From a9e4583695dd59399641fba7dccf402e6bdea4d2 Mon Sep 17 00:00:00 2001
From: Siddhant Rai <siddhant.rai.5686@gmail.com>
Date: Mon, 23 Jun 2025 09:22:17 +0530
Subject: [PATCH] refactor: use DuckDuckGo and Brave as tools instead of
 retrievers

---
 application/agents/classic_agent.py        |   2 +-
 application/agents/tools/brave.py          | 125 ++++++++-------------
 application/agents/tools/duckduckgo.py     | 114 +++++++++++++++++++
 application/api/user/routes.py             |  23 ----
 application/core/settings.py               |   3 +-
 application/retriever/brave_search.py      | 112 ------------------
 application/retriever/duckduck_search.py   | 111 ------------------
 application/retriever/retriever_creator.py |   4 -
 8 files changed, 161 insertions(+), 333 deletions(-)
 create mode 100644 application/agents/tools/duckduckgo.py
 delete mode 100644 application/retriever/brave_search.py
 delete mode 100644 application/retriever/duckduck_search.py

diff --git a/application/agents/classic_agent.py b/application/agents/classic_agent.py
index 6fe73de0..fbffe77c 100644
--- a/application/agents/classic_agent.py
+++ b/application/agents/classic_agent.py
@@ -8,7 +8,7 @@ logger = logging.getLogger(__name__)
 
 
 class ClassicAgent(BaseAgent):
-    """A simplified classic agent with clear execution flow.
+    """A simplified agent with clear execution flow.
 
     Usage:
     1. Processes a query through retrieval
diff --git a/application/agents/tools/brave.py b/application/agents/tools/brave.py
index 1428bea4..33843ac0 100644
--- a/application/agents/tools/brave.py
+++ b/application/agents/tools/brave.py
@@ -25,27 +25,35 @@ class BraveSearchTool(Tool):
         else:
             raise ValueError(f"Unknown action: {action_name}")
 
-    def _web_search(self, query, country="ALL", search_lang="en", count=10, 
-                   offset=0, safesearch="off", freshness=None, 
-                   result_filter=None, extra_snippets=False, summary=False):
+    def _web_search(
+        self,
+        query,
+        country="ALL",
+        search_lang="en",
+        count=10,
+        offset=0,
+        safesearch="off",
+        freshness=None,
+        result_filter=None,
+        extra_snippets=False,
+        summary=False,
+    ):
         """
         Performs a web search using the Brave Search API.
         """
         print(f"Performing Brave web search for: {query}")
-        
+
         url = f"{self.base_url}/web/search"
-        
-        # Build query parameters
+
         params = {
             "q": query,
             "country": country,
             "search_lang": search_lang,
             "count": min(count, 20),
             "offset": min(offset, 9),
-            "safesearch": safesearch
+            "safesearch": safesearch,
         }
-        
-        # Add optional parameters only if they have values
+
         if freshness:
             params["freshness"] = freshness
         if result_filter:
@@ -54,68 +62,69 @@ class BraveSearchTool(Tool):
             params["extra_snippets"] = 1
         if summary:
             params["summary"] = 1
-        
-        # Set up headers
         headers = {
             "Accept": "application/json",
             "Accept-Encoding": "gzip",
-            "X-Subscription-Token": self.token
+            "X-Subscription-Token": self.token,
         }
-        
-        # Make the request
+
         response = requests.get(url, params=params, headers=headers)
-        
+
         if response.status_code == 200:
             return {
                 "status_code": response.status_code,
                 "results": response.json(),
-                "message": "Search completed successfully."
+                "message": "Search completed successfully.",
             }
         else:
             return {
                 "status_code": response.status_code,
-                "message": f"Search failed with status code: {response.status_code}."
+                "message": f"Search failed with status code: {response.status_code}.",
             }
-    
-    def _image_search(self, query, country="ALL", search_lang="en", count=5, 
-                     safesearch="off", spellcheck=False):
+
+    def _image_search(
+        self,
+        query,
+        country="ALL",
+        search_lang="en",
+        count=5,
+        safesearch="off",
+        spellcheck=False,
+    ):
         """
         Performs an image search using the Brave Search API.
         """
         print(f"Performing Brave image search for: {query}")
-        
+
         url = f"{self.base_url}/images/search"
-        
-        # Build query parameters
+
         params = {
             "q": query,
             "country": country,
             "search_lang": search_lang,
             "count": min(count, 100),  # API max is 100
             "safesearch": safesearch,
-            "spellcheck": 1 if spellcheck else 0
+            "spellcheck": 1 if spellcheck else 0,
         }
-        
-        # Set up headers
+
         headers = {
             "Accept": "application/json",
             "Accept-Encoding": "gzip",
-            "X-Subscription-Token": self.token
+            "X-Subscription-Token": self.token,
         }
-        
-        # Make the request
+
         response = requests.get(url, params=params, headers=headers)
-        
+
         if response.status_code == 200:
             return {
                 "status_code": response.status_code,
                 "results": response.json(),
-                "message": "Image search completed successfully."
+                "message": "Image search completed successfully.",
             }
         else:
             return {
                 "status_code": response.status_code,
-                "message": f"Image search failed with status code: {response.status_code}."
+                "message": f"Image search failed with status code: {response.status_code}.",
             }
 
     def get_actions_metadata(self):
@@ -130,42 +139,14 @@ class BraveSearchTool(Tool):
                             "type": "string",
                             "description": "The search query (max 400 characters, 50 words)",
                         },
-                        # "country": {
-                        #     "type": "string",
-                        #     "description": "The 2-character country code (default: US)",
-                        # },
                         "search_lang": {
                             "type": "string",
                             "description": "The search language preference (default: en)",
                         },
-                        # "count": {
-                        #     "type": "integer",
-                        #     "description": "Number of results to return (max 20, default: 10)",
-                        # },
-                        # "offset": {
-                        #     "type": "integer",
-                        #     "description": "Pagination offset (max 9, default: 0)",
-                        # },
-                        # "safesearch": {
-                        #     "type": "string",
-                        #     "description": "Filter level for adult content (off, moderate, strict)",
-                        # },
                         "freshness": {
                             "type": "string",
                             "description": "Time filter for results (pd: last 24h, pw: last week, pm: last month, py: last year)",
                         },
-                        # "result_filter": {
-                        #     "type": "string",
-                        #     "description": "Comma-delimited list of result types to include",
-                        # },
-                        # "extra_snippets": {
-                        #     "type": "boolean",
-                        #     "description": "Get additional excerpts from result pages",
-                        # },
-                        # "summary": {
-                        #     "type": "boolean",
-                        #     "description": "Enable summary generation in search results",
-                        # }
                     },
                     "required": ["query"],
                     "additionalProperties": False,
@@ -181,37 +162,21 @@ class BraveSearchTool(Tool):
                             "type": "string",
                             "description": "The search query (max 400 characters, 50 words)",
                         },
-                        # "country": {
-                        #     "type": "string",
-                        #     "description": "The 2-character country code (default: US)",
-                        # },
-                        # "search_lang": {
-                        #     "type": "string",
-                        #     "description": "The search language preference (default: en)",
-                        # },
                         "count": {
                             "type": "integer",
                             "description": "Number of results to return (max 100, default: 5)",
                         },
-                        # "safesearch": {
-                        #     "type": "string",
-                        #     "description": "Filter level for adult content (off, strict). Default: strict",
-                        # },
-                        # "spellcheck": {
-                        #     "type": "boolean",
-                        #     "description": "Whether to spellcheck provided query (default: true)",
-                        # }
                     },
                     "required": ["query"],
                     "additionalProperties": False,
                 },
-            }
+            },
         ]
 
     def get_config_requirements(self):
         return {
             "token": {
-                "type": "string", 
-                "description": "Brave Search API key for authentication"
+                "type": "string",
+                "description": "Brave Search API key for authentication",
             },
-        }
\ No newline at end of file
+        }
diff --git a/application/agents/tools/duckduckgo.py b/application/agents/tools/duckduckgo.py
new file mode 100644
index 00000000..87c1bc7e
--- /dev/null
+++ b/application/agents/tools/duckduckgo.py
@@ -0,0 +1,114 @@
+from application.agents.tools.base import Tool
+from duckduckgo_search import DDGS
+
+
+class DuckDuckGoSearchTool(Tool):
+    """
+    DuckDuckGo Search
+    A tool for performing web and image searches using DuckDuckGo.
+    """
+
+    def __init__(self, config):
+        self.config = config
+
+    def execute_action(self, action_name, **kwargs):
+        actions = {
+            "ddg_web_search": self._web_search,
+            "ddg_image_search": self._image_search,
+        }
+
+        if action_name in actions:
+            return actions[action_name](**kwargs)
+        else:
+            raise ValueError(f"Unknown action: {action_name}")
+
+    def _web_search(
+        self,
+        query,
+        max_results=5,
+    ):
+        print(f"Performing DuckDuckGo web search for: {query}")
+
+        try:
+            results = DDGS().text(
+                query,
+                max_results=max_results,
+            )
+
+            return {
+                "status_code": 200,
+                "results": results,
+                "message": "Web search completed successfully.",
+            }
+        except Exception as e:
+            return {
+                "status_code": 500,
+                "message": f"Web search failed: {str(e)}",
+            }
+
+    def _image_search(
+        self,
+        query,
+        max_results=5,
+    ):
+        print(f"Performing DuckDuckGo image search for: {query}")
+
+        try:
+            results = DDGS().images(
+                keywords=query,
+                max_results=max_results,
+            )
+
+            return {
+                "status_code": 200,
+                "results": results,
+                "message": "Image search completed successfully.",
+            }
+        except Exception as e:
+            return {
+                "status_code": 500,
+                "message": f"Image search failed: {str(e)}",
+            }
+
+    def get_actions_metadata(self):
+        return [
+            {
+                "name": "ddg_web_search",
+                "description": "Perform a web search using DuckDuckGo.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "Search query",
+                        },
+                        "max_results": {
+                            "type": "integer",
+                            "description": "Number of results to return (default: 5)",
+                        },
+                    },
+                    "required": ["query"],
+                },
+            },
+            {
+                "name": "ddg_image_search",
+                "description": "Perform an image search using DuckDuckGo.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "Search query",
+                        },
+                        "max_results": {
+                            "type": "integer",
+                            "description": "Number of results to return (default: 5, max: 50)",
+                        },
+                    },
+                    "required": ["query"],
+                },
+            },
+        ]
+
+    def get_config_requirements(self):
+        return {}
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index b1058203..c2f89761 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -879,29 +879,6 @@ class CombinedJson(Resource):
                         "syncFrequency": index.get("sync_frequency", ""),
                     }
                 )
-            if "duckduck_search" in settings.RETRIEVERS_ENABLED:
-                data.append(
-                    {
-                        "name": "DuckDuckGo Search",
-                        "date": "duckduck_search",
-                        "model": settings.EMBEDDINGS_NAME,
-                        "location": "custom",
-                        "tokens": "",
-                        "retriever": "duckduck_search",
-                    }
-                )
-            if "brave_search" in settings.RETRIEVERS_ENABLED:
-                data.append(
-                    {
-                        "name": "Brave Search",
-                        "language": "en",
-                        "date": "brave_search",
-                        "model": settings.EMBEDDINGS_NAME,
-                        "location": "custom",
-                        "tokens": "",
-                        "retriever": "brave_search",
-                    }
-                )
         except Exception as err:
             current_app.logger.error(f"Error retrieving sources: {err}", exc_info=True)
             return make_response(jsonify({"success": False}), 400)
diff --git a/application/core/settings.py b/application/core/settings.py
index 7030022a..35e1bb75 100644
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -33,7 +33,7 @@ class Settings(BaseSettings):
     VECTOR_STORE: str = (
         "faiss"  #  "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb"
     )
-    RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"]  # also brave_search
+    RETRIEVERS_ENABLED: list = ["classic_rag"]
     AGENT_NAME: str = "classic"
     FALLBACK_LLM_PROVIDER: Optional[str] = None  # provider for fallback llm
     FALLBACK_LLM_NAME: Optional[str] = None  # model name for fallback llm
@@ -99,7 +99,6 @@ class Settings(BaseSettings):
     LANCEDB_TABLE_NAME: Optional[str] = (
         "docsgpts"  # Name of the table to use for storing vectors
     )
-    BRAVE_SEARCH_API_KEY: Optional[str] = None
 
     FLASK_DEBUG_MODE: bool = False
     STORAGE_TYPE: str = "local"  # local or s3
diff --git a/application/retriever/brave_search.py b/application/retriever/brave_search.py
deleted file mode 100644
index 123000e4..00000000
--- a/application/retriever/brave_search.py
+++ /dev/null
@@ -1,112 +0,0 @@
-import json
-
-from langchain_community.tools import BraveSearch
-
-from application.core.settings import settings
-from application.llm.llm_creator import LLMCreator
-from application.retriever.base import BaseRetriever
-
-
-class BraveRetSearch(BaseRetriever):
-
-    def __init__(
-        self,
-        source,
-        chat_history,
-        prompt,
-        chunks=2,
-        token_limit=150,
-        gpt_model="docsgpt",
-        user_api_key=None,
-        decoded_token=None,
-    ):
-        self.question = ""
-        self.source = source
-        self.chat_history = chat_history
-        self.prompt = prompt
-        self.chunks = chunks
-        self.gpt_model = gpt_model
-        self.token_limit = (
-            token_limit
-            if token_limit
-            < settings.LLM_TOKEN_LIMITS.get(
-                self.gpt_model, settings.DEFAULT_MAX_HISTORY
-            )
-            else settings.LLM_TOKEN_LIMITS.get(
-                self.gpt_model, settings.DEFAULT_MAX_HISTORY
-            )
-        )
-        self.user_api_key = user_api_key
-        self.decoded_token = decoded_token
-
-    def _get_data(self):
-        if self.chunks == 0:
-            docs = []
-        else:
-            search = BraveSearch.from_api_key(
-                api_key=settings.BRAVE_SEARCH_API_KEY,
-                search_kwargs={"count": int(self.chunks)},
-            )
-            results = search.run(self.question)
-            results = json.loads(results)
-
-            docs = []
-            for i in results:
-                try:
-                    title = i["title"]
-                    link = i["link"]
-                    snippet = i["snippet"]
-                    docs.append({"text": snippet, "title": title, "link": link})
-                except IndexError:
-                    pass
-        if settings.LLM_PROVIDER == "llama.cpp":
-            docs = [docs[0]]
-
-        return docs
-
-    def gen(self):
-        docs = self._get_data()
-
-        # join all page_content together with a newline
-        docs_together = "\n".join([doc["text"] for doc in docs])
-        p_chat_combine = self.prompt.replace("{summaries}", docs_together)
-        messages_combine = [{"role": "system", "content": p_chat_combine}]
-        for doc in docs:
-            yield {"source": doc}
-
-        if len(self.chat_history) > 0:
-            for i in self.chat_history:
-                if "prompt" in i and "response" in i:
-                    messages_combine.append({"role": "user", "content": i["prompt"]})
-                    messages_combine.append(
-                        {"role": "assistant", "content": i["response"]}
-                    )
-        messages_combine.append({"role": "user", "content": self.question})
-
-        llm = LLMCreator.create_llm(
-            settings.LLM_PROVIDER,
-            api_key=settings.API_KEY,
-            user_api_key=self.user_api_key,
-            decoded_token=self.decoded_token,
-        )
-
-        completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
-        for line in completion:
-            yield {"answer": str(line)}
-
-    def search(self, query: str = ""):
-        if query:
-            self.question = query
-        return self._get_data()
-
-    def get_params(self):
-        return {
-            "question": self.question,
-            "source": self.source,
-            "chat_history": self.chat_history,
-            "prompt": self.prompt,
-            "chunks": self.chunks,
-            "token_limit": self.token_limit,
-            "gpt_model": self.gpt_model,
-            "user_api_key": self.user_api_key,
-        }
diff --git a/application/retriever/duckduck_search.py b/application/retriever/duckduck_search.py
deleted file mode 100644
index 5abe5edd..00000000
--- a/application/retriever/duckduck_search.py
+++ /dev/null
@@ -1,111 +0,0 @@
-from langchain_community.tools import DuckDuckGoSearchResults
-from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
-
-from application.core.settings import settings
-from application.llm.llm_creator import LLMCreator
-from application.retriever.base import BaseRetriever
-
-
-class DuckDuckSearch(BaseRetriever):
-
-    def __init__(
-        self,
-        source,
-        chat_history,
-        prompt,
-        chunks=2,
-        token_limit=150,
-        gpt_model="docsgpt",
-        user_api_key=None,
-        decoded_token=None,
-    ):
-        self.question = ""
-        self.source = source
-        self.chat_history = chat_history
-        self.prompt = prompt
-        self.chunks = chunks
-        self.gpt_model = gpt_model
-        self.token_limit = (
-            token_limit
-            if token_limit
-            < settings.LLM_TOKEN_LIMITS.get(
-                self.gpt_model, settings.DEFAULT_MAX_HISTORY
-            )
-            else settings.LLM_TOKEN_LIMITS.get(
-                self.gpt_model, settings.DEFAULT_MAX_HISTORY
-            )
-        )
-        self.user_api_key = user_api_key
-        self.decoded_token = decoded_token
-
-    def _get_data(self):
-        if self.chunks == 0:
-            docs = []
-        else:
-            wrapper = DuckDuckGoSearchAPIWrapper(max_results=self.chunks)
-            search = DuckDuckGoSearchResults(api_wrapper=wrapper, output_format="list")
-            results = search.run(self.question)
-
-            docs = []
-            for i in results:
-                try:
-                    docs.append(
-                        {
-                            "text": i.get("snippet", "").strip(),
-                            "title": i.get("title", "").strip(),
-                            "link": i.get("link", "").strip(),
-                        }
-                    )
-                except IndexError:
-                    pass
-        if settings.LLM_PROVIDER == "llama.cpp":
-            docs = [docs[0]]
-
-        return docs
-
-    def gen(self):
-        docs = self._get_data()
-
-        # join all page_content together with a newline
-        docs_together = "\n".join([doc["text"] for doc in docs])
-        p_chat_combine = self.prompt.replace("{summaries}", docs_together)
-        messages_combine = [{"role": "system", "content": p_chat_combine}]
-        for doc in docs:
-            yield {"source": doc}
-
-        if len(self.chat_history) > 0:
-            for i in self.chat_history:
-                if "prompt" in i and "response" in i:
-                    messages_combine.append({"role": "user", "content": i["prompt"]})
-                    messages_combine.append(
-                        {"role": "assistant", "content": i["response"]}
-                    )
-        messages_combine.append({"role": "user", "content": self.question})
-
-        llm = LLMCreator.create_llm(
-            settings.LLM_PROVIDER,
-            api_key=settings.API_KEY,
-            user_api_key=self.user_api_key,
-            decoded_token=self.decoded_token,
-        )
-
-        completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
-        for line in completion:
-            yield {"answer": str(line)}
-
-    def search(self, query: str = ""):
-        if query:
-            self.question = query
-        return self._get_data()
-
-    def get_params(self):
-        return {
-            "question": self.question,
-            "source": self.source,
-            "chat_history": self.chat_history,
-            "prompt": self.prompt,
-            "chunks": self.chunks,
-            "token_limit": self.token_limit,
-            "gpt_model": self.gpt_model,
-            "user_api_key": self.user_api_key,
-        }
diff --git a/application/retriever/retriever_creator.py b/application/retriever/retriever_creator.py
index 26cb41ca..e51be42f 100644
--- a/application/retriever/retriever_creator.py
+++ b/application/retriever/retriever_creator.py
@@ -1,13 +1,9 @@
 from application.retriever.classic_rag import ClassicRAG
-from application.retriever.duckduck_search import DuckDuckSearch
-from application.retriever.brave_search import BraveRetSearch
 
 
 class RetrieverCreator:
     retrievers = {
         "classic": ClassicRAG,
-        "duckduck_search": DuckDuckSearch,
-        "brave_search": BraveRetSearch,
         "default": ClassicRAG,
     }