refactor: use DuckDuckGo and Brave as tools instead of retrievers

2026-03-06 22:03:39 +00:00 · 2025-06-23 09:22:17 +05:30
parent f0908af3c0
commit a9e4583695
8 changed files with 161 additions and 333 deletions
--- a/application/agents/classic_agent.py
+++ b/application/agents/classic_agent.py
@@ -8,7 +8,7 @@ logger = logging.getLogger(__name__)


 class ClassicAgent(BaseAgent):
-    """A simplified classic agent with clear execution flow.
+    """A simplified agent with clear execution flow.

    Usage:
    1. Processes a query through retrieval
--- a/application/agents/tools/brave.py
+++ b/application/agents/tools/brave.py
@@ -25,27 +25,35 @@ class BraveSearchTool(Tool):
        else:
            raise ValueError(f"Unknown action: {action_name}")

-    def _web_search(self, query, country="ALL", search_lang="en", count=10, 
-                   offset=0, safesearch="off", freshness=None, 
-                   result_filter=None, extra_snippets=False, summary=False):
+    def _web_search(
+        self,
+        query,
+        country="ALL",
+        search_lang="en",
+        count=10,
+        offset=0,
+        safesearch="off",
+        freshness=None,
+        result_filter=None,
+        extra_snippets=False,
+        summary=False,
+    ):
        """
        Performs a web search using the Brave Search API.
        """
        print(f"Performing Brave web search for: {query}")
-        
+
        url = f"{self.base_url}/web/search"
-        
-        # Build query parameters
+
        params = {
            "q": query,
            "country": country,
            "search_lang": search_lang,
            "count": min(count, 20),
            "offset": min(offset, 9),
-            "safesearch": safesearch
+            "safesearch": safesearch,
        }
-        
-        # Add optional parameters only if they have values
+
        if freshness:
            params["freshness"] = freshness
        if result_filter:
@@ -54,68 +62,69 @@ class BraveSearchTool(Tool):
            params["extra_snippets"] = 1
        if summary:
            params["summary"] = 1
-        
-        # Set up headers
        headers = {
            "Accept": "application/json",
            "Accept-Encoding": "gzip",
-            "X-Subscription-Token": self.token
+            "X-Subscription-Token": self.token,
        }
-        
-        # Make the request
+
        response = requests.get(url, params=params, headers=headers)
-        
+
        if response.status_code == 200:
            return {
                "status_code": response.status_code,
                "results": response.json(),
-                "message": "Search completed successfully."
+                "message": "Search completed successfully.",
            }
        else:
            return {
                "status_code": response.status_code,
-                "message": f"Search failed with status code: {response.status_code}."
+                "message": f"Search failed with status code: {response.status_code}.",
            }
-    
-    def _image_search(self, query, country="ALL", search_lang="en", count=5, 
-                     safesearch="off", spellcheck=False):
+
+    def _image_search(
+        self,
+        query,
+        country="ALL",
+        search_lang="en",
+        count=5,
+        safesearch="off",
+        spellcheck=False,
+    ):
        """
        Performs an image search using the Brave Search API.
        """
        print(f"Performing Brave image search for: {query}")
-        
+
        url = f"{self.base_url}/images/search"
-        
-        # Build query parameters
+
        params = {
            "q": query,
            "country": country,
            "search_lang": search_lang,
            "count": min(count, 100),  # API max is 100
            "safesearch": safesearch,
-            "spellcheck": 1 if spellcheck else 0
+            "spellcheck": 1 if spellcheck else 0,
        }
-        
-        # Set up headers
+
        headers = {
            "Accept": "application/json",
            "Accept-Encoding": "gzip",
-            "X-Subscription-Token": self.token
+            "X-Subscription-Token": self.token,
        }
-        
-        # Make the request
+
        response = requests.get(url, params=params, headers=headers)
-        
+
        if response.status_code == 200:
            return {
                "status_code": response.status_code,
                "results": response.json(),
-                "message": "Image search completed successfully."
+                "message": "Image search completed successfully.",
            }
        else:
            return {
                "status_code": response.status_code,
-                "message": f"Image search failed with status code: {response.status_code}."
+                "message": f"Image search failed with status code: {response.status_code}.",
            }

    def get_actions_metadata(self):
@@ -130,42 +139,14 @@ class BraveSearchTool(Tool):
                            "type": "string",
                            "description": "The search query (max 400 characters, 50 words)",
                        },
-                        # "country": {
-                        #     "type": "string",
-                        #     "description": "The 2-character country code (default: US)",
-                        # },
                        "search_lang": {
                            "type": "string",
                            "description": "The search language preference (default: en)",
                        },
-                        # "count": {
-                        #     "type": "integer",
-                        #     "description": "Number of results to return (max 20, default: 10)",
-                        # },
-                        # "offset": {
-                        #     "type": "integer",
-                        #     "description": "Pagination offset (max 9, default: 0)",
-                        # },
-                        # "safesearch": {
-                        #     "type": "string",
-                        #     "description": "Filter level for adult content (off, moderate, strict)",
-                        # },
                        "freshness": {
                            "type": "string",
                            "description": "Time filter for results (pd: last 24h, pw: last week, pm: last month, py: last year)",
                        },
-                        # "result_filter": {
-                        #     "type": "string",
-                        #     "description": "Comma-delimited list of result types to include",
-                        # },
-                        # "extra_snippets": {
-                        #     "type": "boolean",
-                        #     "description": "Get additional excerpts from result pages",
-                        # },
-                        # "summary": {
-                        #     "type": "boolean",
-                        #     "description": "Enable summary generation in search results",
-                        # }
                    },
                    "required": ["query"],
                    "additionalProperties": False,
@@ -181,37 +162,21 @@ class BraveSearchTool(Tool):
                            "type": "string",
                            "description": "The search query (max 400 characters, 50 words)",
                        },
-                        # "country": {
-                        #     "type": "string",
-                        #     "description": "The 2-character country code (default: US)",
-                        # },
-                        # "search_lang": {
-                        #     "type": "string",
-                        #     "description": "The search language preference (default: en)",
-                        # },
                        "count": {
                            "type": "integer",
                            "description": "Number of results to return (max 100, default: 5)",
                        },
-                        # "safesearch": {
-                        #     "type": "string",
-                        #     "description": "Filter level for adult content (off, strict). Default: strict",
-                        # },
-                        # "spellcheck": {
-                        #     "type": "boolean",
-                        #     "description": "Whether to spellcheck provided query (default: true)",
-                        # }
                    },
                    "required": ["query"],
                    "additionalProperties": False,
                },
-            }
+            },
        ]

    def get_config_requirements(self):
        return {
            "token": {
-                "type": "string", 
-                "description": "Brave Search API key for authentication"
+                "type": "string",
+                "description": "Brave Search API key for authentication",
            },
-        }
+        }
--- a/application/agents/tools/duckduckgo.py
+++ b/application/agents/tools/duckduckgo.py
@@ -0,0 +1,114 @@
+from application.agents.tools.base import Tool
+from duckduckgo_search import DDGS
+
+
+class DuckDuckGoSearchTool(Tool):
+    """
+    DuckDuckGo Search
+    A tool for performing web and image searches using DuckDuckGo.
+    """
+
+    def __init__(self, config):
+        self.config = config
+
+    def execute_action(self, action_name, **kwargs):
+        actions = {
+            "ddg_web_search": self._web_search,
+            "ddg_image_search": self._image_search,
+        }
+
+        if action_name in actions:
+            return actions[action_name](**kwargs)
+        else:
+            raise ValueError(f"Unknown action: {action_name}")
+
+    def _web_search(
+        self,
+        query,
+        max_results=5,
+    ):
+        print(f"Performing DuckDuckGo web search for: {query}")
+
+        try:
+            results = DDGS().text(
+                query,
+                max_results=max_results,
+            )
+
+            return {
+                "status_code": 200,
+                "results": results,
+                "message": "Web search completed successfully.",
+            }
+        except Exception as e:
+            return {
+                "status_code": 500,
+                "message": f"Web search failed: {str(e)}",
+            }
+
+    def _image_search(
+        self,
+        query,
+        max_results=5,
+    ):
+        print(f"Performing DuckDuckGo image search for: {query}")
+
+        try:
+            results = DDGS().images(
+                keywords=query,
+                max_results=max_results,
+            )
+
+            return {
+                "status_code": 200,
+                "results": results,
+                "message": "Image search completed successfully.",
+            }
+        except Exception as e:
+            return {
+                "status_code": 500,
+                "message": f"Image search failed: {str(e)}",
+            }
+
+    def get_actions_metadata(self):
+        return [
+            {
+                "name": "ddg_web_search",
+                "description": "Perform a web search using DuckDuckGo.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "Search query",
+                        },
+                        "max_results": {
+                            "type": "integer",
+                            "description": "Number of results to return (default: 5)",
+                        },
+                    },
+                    "required": ["query"],
+                },
+            },
+            {
+                "name": "ddg_image_search",
+                "description": "Perform an image search using DuckDuckGo.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "Search query",
+                        },
+                        "max_results": {
+                            "type": "integer",
+                            "description": "Number of results to return (default: 5, max: 50)",
+                        },
+                    },
+                    "required": ["query"],
+                },
+            },
+        ]
+
+    def get_config_requirements(self):
+        return {}
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -879,29 +879,6 @@ class CombinedJson(Resource):
                        "syncFrequency": index.get("sync_frequency", ""),
                    }
                )
-            if "duckduck_search" in settings.RETRIEVERS_ENABLED:
-                data.append(
-                    {
-                        "name": "DuckDuckGo Search",
-                        "date": "duckduck_search",
-                        "model": settings.EMBEDDINGS_NAME,
-                        "location": "custom",
-                        "tokens": "",
-                        "retriever": "duckduck_search",
-                    }
-                )
-            if "brave_search" in settings.RETRIEVERS_ENABLED:
-                data.append(
-                    {
-                        "name": "Brave Search",
-                        "language": "en",
-                        "date": "brave_search",
-                        "model": settings.EMBEDDINGS_NAME,
-                        "location": "custom",
-                        "tokens": "",
-                        "retriever": "brave_search",
-                    }
-                )
        except Exception as err:
            current_app.logger.error(f"Error retrieving sources: {err}", exc_info=True)
            return make_response(jsonify({"success": False}), 400)
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -33,7 +33,7 @@ class Settings(BaseSettings):
    VECTOR_STORE: str = (
        "faiss"  #  "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb"
    )
-    RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"]  # also brave_search
+    RETRIEVERS_ENABLED: list = ["classic_rag"]
    AGENT_NAME: str = "classic"
    FALLBACK_LLM_PROVIDER: Optional[str] = None  # provider for fallback llm
    FALLBACK_LLM_NAME: Optional[str] = None  # model name for fallback llm
@@ -99,7 +99,6 @@ class Settings(BaseSettings):
    LANCEDB_TABLE_NAME: Optional[str] = (
        "docsgpts"  # Name of the table to use for storing vectors
    )
-    BRAVE_SEARCH_API_KEY: Optional[str] = None

    FLASK_DEBUG_MODE: bool = False
    STORAGE_TYPE: str = "local"  # local or s3
--- a/application/retriever/brave_search.py
+++ b/application/retriever/brave_search.py
@@ -1,112 +0,0 @@
-import json
-
-from langchain_community.tools import BraveSearch
-
-from application.core.settings import settings
-from application.llm.llm_creator import LLMCreator
-from application.retriever.base import BaseRetriever
-
-
-class BraveRetSearch(BaseRetriever):
-
-    def __init__(
-        self,
-        source,
-        chat_history,
-        prompt,
-        chunks=2,
-        token_limit=150,
-        gpt_model="docsgpt",
-        user_api_key=None,
-        decoded_token=None,
-    ):
-        self.question = ""
-        self.source = source
-        self.chat_history = chat_history
-        self.prompt = prompt
-        self.chunks = chunks
-        self.gpt_model = gpt_model
-        self.token_limit = (
-            token_limit
-            if token_limit
-            < settings.LLM_TOKEN_LIMITS.get(
-                self.gpt_model, settings.DEFAULT_MAX_HISTORY
-            )
-            else settings.LLM_TOKEN_LIMITS.get(
-                self.gpt_model, settings.DEFAULT_MAX_HISTORY
-            )
-        )
-        self.user_api_key = user_api_key
-        self.decoded_token = decoded_token
-
-    def _get_data(self):
-        if self.chunks == 0:
-            docs = []
-        else:
-            search = BraveSearch.from_api_key(
-                api_key=settings.BRAVE_SEARCH_API_KEY,
-                search_kwargs={"count": int(self.chunks)},
-            )
-            results = search.run(self.question)
-            results = json.loads(results)
-
-            docs = []
-            for i in results:
-                try:
-                    title = i["title"]
-                    link = i["link"]
-                    snippet = i["snippet"]
-                    docs.append({"text": snippet, "title": title, "link": link})
-                except IndexError:
-                    pass
-        if settings.LLM_PROVIDER == "llama.cpp":
-            docs = [docs[0]]
-
-        return docs
-
-    def gen(self):
-        docs = self._get_data()
-
-        # join all page_content together with a newline
-        docs_together = "\n".join([doc["text"] for doc in docs])
-        p_chat_combine = self.prompt.replace("{summaries}", docs_together)
-        messages_combine = [{"role": "system", "content": p_chat_combine}]
-        for doc in docs:
-            yield {"source": doc}
-
-        if len(self.chat_history) > 0:
-            for i in self.chat_history:
-                if "prompt" in i and "response" in i:
-                    messages_combine.append({"role": "user", "content": i["prompt"]})
-                    messages_combine.append(
-                        {"role": "assistant", "content": i["response"]}
-                    )
-        messages_combine.append({"role": "user", "content": self.question})
-
-        llm = LLMCreator.create_llm(
-            settings.LLM_PROVIDER,
-            api_key=settings.API_KEY,
-            user_api_key=self.user_api_key,
-            decoded_token=self.decoded_token,
-        )
-
-        completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
-        for line in completion:
-            yield {"answer": str(line)}
-
-    def search(self, query: str = ""):
-        if query:
-            self.question = query
-        return self._get_data()
-
-    def get_params(self):
-        return {
-            "question": self.question,
-            "source": self.source,
-            "chat_history": self.chat_history,
-            "prompt": self.prompt,
-            "chunks": self.chunks,
-            "token_limit": self.token_limit,
-            "gpt_model": self.gpt_model,
-            "user_api_key": self.user_api_key,
-        }
--- a/application/retriever/duckduck_search.py
+++ b/application/retriever/duckduck_search.py
@@ -1,111 +0,0 @@
-from langchain_community.tools import DuckDuckGoSearchResults
-from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
-
-from application.core.settings import settings
-from application.llm.llm_creator import LLMCreator
-from application.retriever.base import BaseRetriever
-
-
-class DuckDuckSearch(BaseRetriever):
-
-    def __init__(
-        self,
-        source,
-        chat_history,
-        prompt,
-        chunks=2,
-        token_limit=150,
-        gpt_model="docsgpt",
-        user_api_key=None,
-        decoded_token=None,
-    ):
-        self.question = ""
-        self.source = source
-        self.chat_history = chat_history
-        self.prompt = prompt
-        self.chunks = chunks
-        self.gpt_model = gpt_model
-        self.token_limit = (
-            token_limit
-            if token_limit
-            < settings.LLM_TOKEN_LIMITS.get(
-                self.gpt_model, settings.DEFAULT_MAX_HISTORY
-            )
-            else settings.LLM_TOKEN_LIMITS.get(
-                self.gpt_model, settings.DEFAULT_MAX_HISTORY
-            )
-        )
-        self.user_api_key = user_api_key
-        self.decoded_token = decoded_token
-
-    def _get_data(self):
-        if self.chunks == 0:
-            docs = []
-        else:
-            wrapper = DuckDuckGoSearchAPIWrapper(max_results=self.chunks)
-            search = DuckDuckGoSearchResults(api_wrapper=wrapper, output_format="list")
-            results = search.run(self.question)
-
-            docs = []
-            for i in results:
-                try:
-                    docs.append(
-                        {
-                            "text": i.get("snippet", "").strip(),
-                            "title": i.get("title", "").strip(),
-                            "link": i.get("link", "").strip(),
-                        }
-                    )
-                except IndexError:
-                    pass
-        if settings.LLM_PROVIDER == "llama.cpp":
-            docs = [docs[0]]
-
-        return docs
-
-    def gen(self):
-        docs = self._get_data()
-
-        # join all page_content together with a newline
-        docs_together = "\n".join([doc["text"] for doc in docs])
-        p_chat_combine = self.prompt.replace("{summaries}", docs_together)
-        messages_combine = [{"role": "system", "content": p_chat_combine}]
-        for doc in docs:
-            yield {"source": doc}
-
-        if len(self.chat_history) > 0:
-            for i in self.chat_history:
-                if "prompt" in i and "response" in i:
-                    messages_combine.append({"role": "user", "content": i["prompt"]})
-                    messages_combine.append(
-                        {"role": "assistant", "content": i["response"]}
-                    )
-        messages_combine.append({"role": "user", "content": self.question})
-
-        llm = LLMCreator.create_llm(
-            settings.LLM_PROVIDER,
-            api_key=settings.API_KEY,
-            user_api_key=self.user_api_key,
-            decoded_token=self.decoded_token,
-        )
-
-        completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
-        for line in completion:
-            yield {"answer": str(line)}
-
-    def search(self, query: str = ""):
-        if query:
-            self.question = query
-        return self._get_data()
-
-    def get_params(self):
-        return {
-            "question": self.question,
-            "source": self.source,
-            "chat_history": self.chat_history,
-            "prompt": self.prompt,
-            "chunks": self.chunks,
-            "token_limit": self.token_limit,
-            "gpt_model": self.gpt_model,
-            "user_api_key": self.user_api_key,
-        }
--- a/application/retriever/retriever_creator.py
+++ b/application/retriever/retriever_creator.py
@@ -1,13 +1,9 @@
 from application.retriever.classic_rag import ClassicRAG
-from application.retriever.duckduck_search import DuckDuckSearch
-from application.retriever.brave_search import BraveRetSearch


 class RetrieverCreator:
    retrievers = {
        "classic": ClassicRAG,
-        "duckduck_search": DuckDuckSearch,
-        "brave_search": BraveRetSearch,
        "default": ClassicRAG,
    }