From a9e4583695dd59399641fba7dccf402e6bdea4d2 Mon Sep 17 00:00:00 2001 From: Siddhant Rai Date: Mon, 23 Jun 2025 09:22:17 +0530 Subject: [PATCH] refactor: use DuckDuckGo and Brave as tools instead of retrievers --- application/agents/classic_agent.py | 2 +- application/agents/tools/brave.py | 125 ++++++++------------- application/agents/tools/duckduckgo.py | 114 +++++++++++++++++++ application/api/user/routes.py | 23 ---- application/core/settings.py | 3 +- application/retriever/brave_search.py | 112 ------------------ application/retriever/duckduck_search.py | 111 ------------------ application/retriever/retriever_creator.py | 4 - 8 files changed, 161 insertions(+), 333 deletions(-) create mode 100644 application/agents/tools/duckduckgo.py delete mode 100644 application/retriever/brave_search.py delete mode 100644 application/retriever/duckduck_search.py diff --git a/application/agents/classic_agent.py b/application/agents/classic_agent.py index 6fe73de0..fbffe77c 100644 --- a/application/agents/classic_agent.py +++ b/application/agents/classic_agent.py @@ -8,7 +8,7 @@ logger = logging.getLogger(__name__) class ClassicAgent(BaseAgent): - """A simplified classic agent with clear execution flow. + """A simplified agent with clear execution flow. Usage: 1. Processes a query through retrieval diff --git a/application/agents/tools/brave.py b/application/agents/tools/brave.py index 1428bea4..33843ac0 100644 --- a/application/agents/tools/brave.py +++ b/application/agents/tools/brave.py @@ -25,27 +25,35 @@ class BraveSearchTool(Tool): else: raise ValueError(f"Unknown action: {action_name}") - def _web_search(self, query, country="ALL", search_lang="en", count=10, - offset=0, safesearch="off", freshness=None, - result_filter=None, extra_snippets=False, summary=False): + def _web_search( + self, + query, + country="ALL", + search_lang="en", + count=10, + offset=0, + safesearch="off", + freshness=None, + result_filter=None, + extra_snippets=False, + summary=False, + ): """ Performs a web search using the Brave Search API. """ print(f"Performing Brave web search for: {query}") - + url = f"{self.base_url}/web/search" - - # Build query parameters + params = { "q": query, "country": country, "search_lang": search_lang, "count": min(count, 20), "offset": min(offset, 9), - "safesearch": safesearch + "safesearch": safesearch, } - - # Add optional parameters only if they have values + if freshness: params["freshness"] = freshness if result_filter: @@ -54,68 +62,69 @@ class BraveSearchTool(Tool): params["extra_snippets"] = 1 if summary: params["summary"] = 1 - - # Set up headers headers = { "Accept": "application/json", "Accept-Encoding": "gzip", - "X-Subscription-Token": self.token + "X-Subscription-Token": self.token, } - - # Make the request + response = requests.get(url, params=params, headers=headers) - + if response.status_code == 200: return { "status_code": response.status_code, "results": response.json(), - "message": "Search completed successfully." + "message": "Search completed successfully.", } else: return { "status_code": response.status_code, - "message": f"Search failed with status code: {response.status_code}." + "message": f"Search failed with status code: {response.status_code}.", } - - def _image_search(self, query, country="ALL", search_lang="en", count=5, - safesearch="off", spellcheck=False): + + def _image_search( + self, + query, + country="ALL", + search_lang="en", + count=5, + safesearch="off", + spellcheck=False, + ): """ Performs an image search using the Brave Search API. """ print(f"Performing Brave image search for: {query}") - + url = f"{self.base_url}/images/search" - - # Build query parameters + params = { "q": query, "country": country, "search_lang": search_lang, "count": min(count, 100), # API max is 100 "safesearch": safesearch, - "spellcheck": 1 if spellcheck else 0 + "spellcheck": 1 if spellcheck else 0, } - - # Set up headers + headers = { "Accept": "application/json", "Accept-Encoding": "gzip", - "X-Subscription-Token": self.token + "X-Subscription-Token": self.token, } - - # Make the request + response = requests.get(url, params=params, headers=headers) - + if response.status_code == 200: return { "status_code": response.status_code, "results": response.json(), - "message": "Image search completed successfully." + "message": "Image search completed successfully.", } else: return { "status_code": response.status_code, - "message": f"Image search failed with status code: {response.status_code}." + "message": f"Image search failed with status code: {response.status_code}.", } def get_actions_metadata(self): @@ -130,42 +139,14 @@ class BraveSearchTool(Tool): "type": "string", "description": "The search query (max 400 characters, 50 words)", }, - # "country": { - # "type": "string", - # "description": "The 2-character country code (default: US)", - # }, "search_lang": { "type": "string", "description": "The search language preference (default: en)", }, - # "count": { - # "type": "integer", - # "description": "Number of results to return (max 20, default: 10)", - # }, - # "offset": { - # "type": "integer", - # "description": "Pagination offset (max 9, default: 0)", - # }, - # "safesearch": { - # "type": "string", - # "description": "Filter level for adult content (off, moderate, strict)", - # }, "freshness": { "type": "string", "description": "Time filter for results (pd: last 24h, pw: last week, pm: last month, py: last year)", }, - # "result_filter": { - # "type": "string", - # "description": "Comma-delimited list of result types to include", - # }, - # "extra_snippets": { - # "type": "boolean", - # "description": "Get additional excerpts from result pages", - # }, - # "summary": { - # "type": "boolean", - # "description": "Enable summary generation in search results", - # } }, "required": ["query"], "additionalProperties": False, @@ -181,37 +162,21 @@ class BraveSearchTool(Tool): "type": "string", "description": "The search query (max 400 characters, 50 words)", }, - # "country": { - # "type": "string", - # "description": "The 2-character country code (default: US)", - # }, - # "search_lang": { - # "type": "string", - # "description": "The search language preference (default: en)", - # }, "count": { "type": "integer", "description": "Number of results to return (max 100, default: 5)", }, - # "safesearch": { - # "type": "string", - # "description": "Filter level for adult content (off, strict). Default: strict", - # }, - # "spellcheck": { - # "type": "boolean", - # "description": "Whether to spellcheck provided query (default: true)", - # } }, "required": ["query"], "additionalProperties": False, }, - } + }, ] def get_config_requirements(self): return { "token": { - "type": "string", - "description": "Brave Search API key for authentication" + "type": "string", + "description": "Brave Search API key for authentication", }, - } \ No newline at end of file + } diff --git a/application/agents/tools/duckduckgo.py b/application/agents/tools/duckduckgo.py new file mode 100644 index 00000000..87c1bc7e --- /dev/null +++ b/application/agents/tools/duckduckgo.py @@ -0,0 +1,114 @@ +from application.agents.tools.base import Tool +from duckduckgo_search import DDGS + + +class DuckDuckGoSearchTool(Tool): + """ + DuckDuckGo Search + A tool for performing web and image searches using DuckDuckGo. + """ + + def __init__(self, config): + self.config = config + + def execute_action(self, action_name, **kwargs): + actions = { + "ddg_web_search": self._web_search, + "ddg_image_search": self._image_search, + } + + if action_name in actions: + return actions[action_name](**kwargs) + else: + raise ValueError(f"Unknown action: {action_name}") + + def _web_search( + self, + query, + max_results=5, + ): + print(f"Performing DuckDuckGo web search for: {query}") + + try: + results = DDGS().text( + query, + max_results=max_results, + ) + + return { + "status_code": 200, + "results": results, + "message": "Web search completed successfully.", + } + except Exception as e: + return { + "status_code": 500, + "message": f"Web search failed: {str(e)}", + } + + def _image_search( + self, + query, + max_results=5, + ): + print(f"Performing DuckDuckGo image search for: {query}") + + try: + results = DDGS().images( + keywords=query, + max_results=max_results, + ) + + return { + "status_code": 200, + "results": results, + "message": "Image search completed successfully.", + } + except Exception as e: + return { + "status_code": 500, + "message": f"Image search failed: {str(e)}", + } + + def get_actions_metadata(self): + return [ + { + "name": "ddg_web_search", + "description": "Perform a web search using DuckDuckGo.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query", + }, + "max_results": { + "type": "integer", + "description": "Number of results to return (default: 5)", + }, + }, + "required": ["query"], + }, + }, + { + "name": "ddg_image_search", + "description": "Perform an image search using DuckDuckGo.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Search query", + }, + "max_results": { + "type": "integer", + "description": "Number of results to return (default: 5, max: 50)", + }, + }, + "required": ["query"], + }, + }, + ] + + def get_config_requirements(self): + return {} diff --git a/application/api/user/routes.py b/application/api/user/routes.py index b1058203..c2f89761 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -879,29 +879,6 @@ class CombinedJson(Resource): "syncFrequency": index.get("sync_frequency", ""), } ) - if "duckduck_search" in settings.RETRIEVERS_ENABLED: - data.append( - { - "name": "DuckDuckGo Search", - "date": "duckduck_search", - "model": settings.EMBEDDINGS_NAME, - "location": "custom", - "tokens": "", - "retriever": "duckduck_search", - } - ) - if "brave_search" in settings.RETRIEVERS_ENABLED: - data.append( - { - "name": "Brave Search", - "language": "en", - "date": "brave_search", - "model": settings.EMBEDDINGS_NAME, - "location": "custom", - "tokens": "", - "retriever": "brave_search", - } - ) except Exception as err: current_app.logger.error(f"Error retrieving sources: {err}", exc_info=True) return make_response(jsonify({"success": False}), 400) diff --git a/application/core/settings.py b/application/core/settings.py index 7030022a..35e1bb75 100644 --- a/application/core/settings.py +++ b/application/core/settings.py @@ -33,7 +33,7 @@ class Settings(BaseSettings): VECTOR_STORE: str = ( "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb" ) - RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search + RETRIEVERS_ENABLED: list = ["classic_rag"] AGENT_NAME: str = "classic" FALLBACK_LLM_PROVIDER: Optional[str] = None # provider for fallback llm FALLBACK_LLM_NAME: Optional[str] = None # model name for fallback llm @@ -99,7 +99,6 @@ class Settings(BaseSettings): LANCEDB_TABLE_NAME: Optional[str] = ( "docsgpts" # Name of the table to use for storing vectors ) - BRAVE_SEARCH_API_KEY: Optional[str] = None FLASK_DEBUG_MODE: bool = False STORAGE_TYPE: str = "local" # local or s3 diff --git a/application/retriever/brave_search.py b/application/retriever/brave_search.py deleted file mode 100644 index 123000e4..00000000 --- a/application/retriever/brave_search.py +++ /dev/null @@ -1,112 +0,0 @@ -import json - -from langchain_community.tools import BraveSearch - -from application.core.settings import settings -from application.llm.llm_creator import LLMCreator -from application.retriever.base import BaseRetriever - - -class BraveRetSearch(BaseRetriever): - - def __init__( - self, - source, - chat_history, - prompt, - chunks=2, - token_limit=150, - gpt_model="docsgpt", - user_api_key=None, - decoded_token=None, - ): - self.question = "" - self.source = source - self.chat_history = chat_history - self.prompt = prompt - self.chunks = chunks - self.gpt_model = gpt_model - self.token_limit = ( - token_limit - if token_limit - < settings.LLM_TOKEN_LIMITS.get( - self.gpt_model, settings.DEFAULT_MAX_HISTORY - ) - else settings.LLM_TOKEN_LIMITS.get( - self.gpt_model, settings.DEFAULT_MAX_HISTORY - ) - ) - self.user_api_key = user_api_key - self.decoded_token = decoded_token - - def _get_data(self): - if self.chunks == 0: - docs = [] - else: - search = BraveSearch.from_api_key( - api_key=settings.BRAVE_SEARCH_API_KEY, - search_kwargs={"count": int(self.chunks)}, - ) - results = search.run(self.question) - results = json.loads(results) - - docs = [] - for i in results: - try: - title = i["title"] - link = i["link"] - snippet = i["snippet"] - docs.append({"text": snippet, "title": title, "link": link}) - except IndexError: - pass - if settings.LLM_PROVIDER == "llama.cpp": - docs = [docs[0]] - - return docs - - def gen(self): - docs = self._get_data() - - # join all page_content together with a newline - docs_together = "\n".join([doc["text"] for doc in docs]) - p_chat_combine = self.prompt.replace("{summaries}", docs_together) - messages_combine = [{"role": "system", "content": p_chat_combine}] - for doc in docs: - yield {"source": doc} - - if len(self.chat_history) > 0: - for i in self.chat_history: - if "prompt" in i and "response" in i: - messages_combine.append({"role": "user", "content": i["prompt"]}) - messages_combine.append( - {"role": "assistant", "content": i["response"]} - ) - messages_combine.append({"role": "user", "content": self.question}) - - llm = LLMCreator.create_llm( - settings.LLM_PROVIDER, - api_key=settings.API_KEY, - user_api_key=self.user_api_key, - decoded_token=self.decoded_token, - ) - - completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine) - for line in completion: - yield {"answer": str(line)} - - def search(self, query: str = ""): - if query: - self.question = query - return self._get_data() - - def get_params(self): - return { - "question": self.question, - "source": self.source, - "chat_history": self.chat_history, - "prompt": self.prompt, - "chunks": self.chunks, - "token_limit": self.token_limit, - "gpt_model": self.gpt_model, - "user_api_key": self.user_api_key, - } diff --git a/application/retriever/duckduck_search.py b/application/retriever/duckduck_search.py deleted file mode 100644 index 5abe5edd..00000000 --- a/application/retriever/duckduck_search.py +++ /dev/null @@ -1,111 +0,0 @@ -from langchain_community.tools import DuckDuckGoSearchResults -from langchain_community.utilities import DuckDuckGoSearchAPIWrapper - -from application.core.settings import settings -from application.llm.llm_creator import LLMCreator -from application.retriever.base import BaseRetriever - - -class DuckDuckSearch(BaseRetriever): - - def __init__( - self, - source, - chat_history, - prompt, - chunks=2, - token_limit=150, - gpt_model="docsgpt", - user_api_key=None, - decoded_token=None, - ): - self.question = "" - self.source = source - self.chat_history = chat_history - self.prompt = prompt - self.chunks = chunks - self.gpt_model = gpt_model - self.token_limit = ( - token_limit - if token_limit - < settings.LLM_TOKEN_LIMITS.get( - self.gpt_model, settings.DEFAULT_MAX_HISTORY - ) - else settings.LLM_TOKEN_LIMITS.get( - self.gpt_model, settings.DEFAULT_MAX_HISTORY - ) - ) - self.user_api_key = user_api_key - self.decoded_token = decoded_token - - def _get_data(self): - if self.chunks == 0: - docs = [] - else: - wrapper = DuckDuckGoSearchAPIWrapper(max_results=self.chunks) - search = DuckDuckGoSearchResults(api_wrapper=wrapper, output_format="list") - results = search.run(self.question) - - docs = [] - for i in results: - try: - docs.append( - { - "text": i.get("snippet", "").strip(), - "title": i.get("title", "").strip(), - "link": i.get("link", "").strip(), - } - ) - except IndexError: - pass - if settings.LLM_PROVIDER == "llama.cpp": - docs = [docs[0]] - - return docs - - def gen(self): - docs = self._get_data() - - # join all page_content together with a newline - docs_together = "\n".join([doc["text"] for doc in docs]) - p_chat_combine = self.prompt.replace("{summaries}", docs_together) - messages_combine = [{"role": "system", "content": p_chat_combine}] - for doc in docs: - yield {"source": doc} - - if len(self.chat_history) > 0: - for i in self.chat_history: - if "prompt" in i and "response" in i: - messages_combine.append({"role": "user", "content": i["prompt"]}) - messages_combine.append( - {"role": "assistant", "content": i["response"]} - ) - messages_combine.append({"role": "user", "content": self.question}) - - llm = LLMCreator.create_llm( - settings.LLM_PROVIDER, - api_key=settings.API_KEY, - user_api_key=self.user_api_key, - decoded_token=self.decoded_token, - ) - - completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine) - for line in completion: - yield {"answer": str(line)} - - def search(self, query: str = ""): - if query: - self.question = query - return self._get_data() - - def get_params(self): - return { - "question": self.question, - "source": self.source, - "chat_history": self.chat_history, - "prompt": self.prompt, - "chunks": self.chunks, - "token_limit": self.token_limit, - "gpt_model": self.gpt_model, - "user_api_key": self.user_api_key, - } diff --git a/application/retriever/retriever_creator.py b/application/retriever/retriever_creator.py index 26cb41ca..e51be42f 100644 --- a/application/retriever/retriever_creator.py +++ b/application/retriever/retriever_creator.py @@ -1,13 +1,9 @@ from application.retriever.classic_rag import ClassicRAG -from application.retriever.duckduck_search import DuckDuckSearch -from application.retriever.brave_search import BraveRetSearch class RetrieverCreator: retrievers = { "classic": ClassicRAG, - "duckduck_search": DuckDuckSearch, - "brave_search": BraveRetSearch, "default": ClassicRAG, }