mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 00:23:17 +00:00
refactor: use DuckDuckGo and Brave as tools instead of retrievers
This commit is contained in:
@@ -8,7 +8,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ClassicAgent(BaseAgent):
|
||||
"""A simplified classic agent with clear execution flow.
|
||||
"""A simplified agent with clear execution flow.
|
||||
|
||||
Usage:
|
||||
1. Processes a query through retrieval
|
||||
|
||||
@@ -25,27 +25,35 @@ class BraveSearchTool(Tool):
|
||||
else:
|
||||
raise ValueError(f"Unknown action: {action_name}")
|
||||
|
||||
def _web_search(self, query, country="ALL", search_lang="en", count=10,
|
||||
offset=0, safesearch="off", freshness=None,
|
||||
result_filter=None, extra_snippets=False, summary=False):
|
||||
def _web_search(
|
||||
self,
|
||||
query,
|
||||
country="ALL",
|
||||
search_lang="en",
|
||||
count=10,
|
||||
offset=0,
|
||||
safesearch="off",
|
||||
freshness=None,
|
||||
result_filter=None,
|
||||
extra_snippets=False,
|
||||
summary=False,
|
||||
):
|
||||
"""
|
||||
Performs a web search using the Brave Search API.
|
||||
"""
|
||||
print(f"Performing Brave web search for: {query}")
|
||||
|
||||
|
||||
url = f"{self.base_url}/web/search"
|
||||
|
||||
# Build query parameters
|
||||
|
||||
params = {
|
||||
"q": query,
|
||||
"country": country,
|
||||
"search_lang": search_lang,
|
||||
"count": min(count, 20),
|
||||
"offset": min(offset, 9),
|
||||
"safesearch": safesearch
|
||||
"safesearch": safesearch,
|
||||
}
|
||||
|
||||
# Add optional parameters only if they have values
|
||||
|
||||
if freshness:
|
||||
params["freshness"] = freshness
|
||||
if result_filter:
|
||||
@@ -54,68 +62,69 @@ class BraveSearchTool(Tool):
|
||||
params["extra_snippets"] = 1
|
||||
if summary:
|
||||
params["summary"] = 1
|
||||
|
||||
# Set up headers
|
||||
headers = {
|
||||
"Accept": "application/json",
|
||||
"Accept-Encoding": "gzip",
|
||||
"X-Subscription-Token": self.token
|
||||
"X-Subscription-Token": self.token,
|
||||
}
|
||||
|
||||
# Make the request
|
||||
|
||||
response = requests.get(url, params=params, headers=headers)
|
||||
|
||||
|
||||
if response.status_code == 200:
|
||||
return {
|
||||
"status_code": response.status_code,
|
||||
"results": response.json(),
|
||||
"message": "Search completed successfully."
|
||||
"message": "Search completed successfully.",
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"status_code": response.status_code,
|
||||
"message": f"Search failed with status code: {response.status_code}."
|
||||
"message": f"Search failed with status code: {response.status_code}.",
|
||||
}
|
||||
|
||||
def _image_search(self, query, country="ALL", search_lang="en", count=5,
|
||||
safesearch="off", spellcheck=False):
|
||||
|
||||
def _image_search(
|
||||
self,
|
||||
query,
|
||||
country="ALL",
|
||||
search_lang="en",
|
||||
count=5,
|
||||
safesearch="off",
|
||||
spellcheck=False,
|
||||
):
|
||||
"""
|
||||
Performs an image search using the Brave Search API.
|
||||
"""
|
||||
print(f"Performing Brave image search for: {query}")
|
||||
|
||||
|
||||
url = f"{self.base_url}/images/search"
|
||||
|
||||
# Build query parameters
|
||||
|
||||
params = {
|
||||
"q": query,
|
||||
"country": country,
|
||||
"search_lang": search_lang,
|
||||
"count": min(count, 100), # API max is 100
|
||||
"safesearch": safesearch,
|
||||
"spellcheck": 1 if spellcheck else 0
|
||||
"spellcheck": 1 if spellcheck else 0,
|
||||
}
|
||||
|
||||
# Set up headers
|
||||
|
||||
headers = {
|
||||
"Accept": "application/json",
|
||||
"Accept-Encoding": "gzip",
|
||||
"X-Subscription-Token": self.token
|
||||
"X-Subscription-Token": self.token,
|
||||
}
|
||||
|
||||
# Make the request
|
||||
|
||||
response = requests.get(url, params=params, headers=headers)
|
||||
|
||||
|
||||
if response.status_code == 200:
|
||||
return {
|
||||
"status_code": response.status_code,
|
||||
"results": response.json(),
|
||||
"message": "Image search completed successfully."
|
||||
"message": "Image search completed successfully.",
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"status_code": response.status_code,
|
||||
"message": f"Image search failed with status code: {response.status_code}."
|
||||
"message": f"Image search failed with status code: {response.status_code}.",
|
||||
}
|
||||
|
||||
def get_actions_metadata(self):
|
||||
@@ -130,42 +139,14 @@ class BraveSearchTool(Tool):
|
||||
"type": "string",
|
||||
"description": "The search query (max 400 characters, 50 words)",
|
||||
},
|
||||
# "country": {
|
||||
# "type": "string",
|
||||
# "description": "The 2-character country code (default: US)",
|
||||
# },
|
||||
"search_lang": {
|
||||
"type": "string",
|
||||
"description": "The search language preference (default: en)",
|
||||
},
|
||||
# "count": {
|
||||
# "type": "integer",
|
||||
# "description": "Number of results to return (max 20, default: 10)",
|
||||
# },
|
||||
# "offset": {
|
||||
# "type": "integer",
|
||||
# "description": "Pagination offset (max 9, default: 0)",
|
||||
# },
|
||||
# "safesearch": {
|
||||
# "type": "string",
|
||||
# "description": "Filter level for adult content (off, moderate, strict)",
|
||||
# },
|
||||
"freshness": {
|
||||
"type": "string",
|
||||
"description": "Time filter for results (pd: last 24h, pw: last week, pm: last month, py: last year)",
|
||||
},
|
||||
# "result_filter": {
|
||||
# "type": "string",
|
||||
# "description": "Comma-delimited list of result types to include",
|
||||
# },
|
||||
# "extra_snippets": {
|
||||
# "type": "boolean",
|
||||
# "description": "Get additional excerpts from result pages",
|
||||
# },
|
||||
# "summary": {
|
||||
# "type": "boolean",
|
||||
# "description": "Enable summary generation in search results",
|
||||
# }
|
||||
},
|
||||
"required": ["query"],
|
||||
"additionalProperties": False,
|
||||
@@ -181,37 +162,21 @@ class BraveSearchTool(Tool):
|
||||
"type": "string",
|
||||
"description": "The search query (max 400 characters, 50 words)",
|
||||
},
|
||||
# "country": {
|
||||
# "type": "string",
|
||||
# "description": "The 2-character country code (default: US)",
|
||||
# },
|
||||
# "search_lang": {
|
||||
# "type": "string",
|
||||
# "description": "The search language preference (default: en)",
|
||||
# },
|
||||
"count": {
|
||||
"type": "integer",
|
||||
"description": "Number of results to return (max 100, default: 5)",
|
||||
},
|
||||
# "safesearch": {
|
||||
# "type": "string",
|
||||
# "description": "Filter level for adult content (off, strict). Default: strict",
|
||||
# },
|
||||
# "spellcheck": {
|
||||
# "type": "boolean",
|
||||
# "description": "Whether to spellcheck provided query (default: true)",
|
||||
# }
|
||||
},
|
||||
"required": ["query"],
|
||||
"additionalProperties": False,
|
||||
},
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def get_config_requirements(self):
|
||||
return {
|
||||
"token": {
|
||||
"type": "string",
|
||||
"description": "Brave Search API key for authentication"
|
||||
"type": "string",
|
||||
"description": "Brave Search API key for authentication",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
114
application/agents/tools/duckduckgo.py
Normal file
114
application/agents/tools/duckduckgo.py
Normal file
@@ -0,0 +1,114 @@
|
||||
from application.agents.tools.base import Tool
|
||||
from duckduckgo_search import DDGS
|
||||
|
||||
|
||||
class DuckDuckGoSearchTool(Tool):
|
||||
"""
|
||||
DuckDuckGo Search
|
||||
A tool for performing web and image searches using DuckDuckGo.
|
||||
"""
|
||||
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
|
||||
def execute_action(self, action_name, **kwargs):
|
||||
actions = {
|
||||
"ddg_web_search": self._web_search,
|
||||
"ddg_image_search": self._image_search,
|
||||
}
|
||||
|
||||
if action_name in actions:
|
||||
return actions[action_name](**kwargs)
|
||||
else:
|
||||
raise ValueError(f"Unknown action: {action_name}")
|
||||
|
||||
def _web_search(
|
||||
self,
|
||||
query,
|
||||
max_results=5,
|
||||
):
|
||||
print(f"Performing DuckDuckGo web search for: {query}")
|
||||
|
||||
try:
|
||||
results = DDGS().text(
|
||||
query,
|
||||
max_results=max_results,
|
||||
)
|
||||
|
||||
return {
|
||||
"status_code": 200,
|
||||
"results": results,
|
||||
"message": "Web search completed successfully.",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"status_code": 500,
|
||||
"message": f"Web search failed: {str(e)}",
|
||||
}
|
||||
|
||||
def _image_search(
|
||||
self,
|
||||
query,
|
||||
max_results=5,
|
||||
):
|
||||
print(f"Performing DuckDuckGo image search for: {query}")
|
||||
|
||||
try:
|
||||
results = DDGS().images(
|
||||
keywords=query,
|
||||
max_results=max_results,
|
||||
)
|
||||
|
||||
return {
|
||||
"status_code": 200,
|
||||
"results": results,
|
||||
"message": "Image search completed successfully.",
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"status_code": 500,
|
||||
"message": f"Image search failed: {str(e)}",
|
||||
}
|
||||
|
||||
def get_actions_metadata(self):
|
||||
return [
|
||||
{
|
||||
"name": "ddg_web_search",
|
||||
"description": "Perform a web search using DuckDuckGo.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query",
|
||||
},
|
||||
"max_results": {
|
||||
"type": "integer",
|
||||
"description": "Number of results to return (default: 5)",
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"name": "ddg_image_search",
|
||||
"description": "Perform an image search using DuckDuckGo.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query",
|
||||
},
|
||||
"max_results": {
|
||||
"type": "integer",
|
||||
"description": "Number of results to return (default: 5, max: 50)",
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def get_config_requirements(self):
|
||||
return {}
|
||||
@@ -879,29 +879,6 @@ class CombinedJson(Resource):
|
||||
"syncFrequency": index.get("sync_frequency", ""),
|
||||
}
|
||||
)
|
||||
if "duckduck_search" in settings.RETRIEVERS_ENABLED:
|
||||
data.append(
|
||||
{
|
||||
"name": "DuckDuckGo Search",
|
||||
"date": "duckduck_search",
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "custom",
|
||||
"tokens": "",
|
||||
"retriever": "duckduck_search",
|
||||
}
|
||||
)
|
||||
if "brave_search" in settings.RETRIEVERS_ENABLED:
|
||||
data.append(
|
||||
{
|
||||
"name": "Brave Search",
|
||||
"language": "en",
|
||||
"date": "brave_search",
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "custom",
|
||||
"tokens": "",
|
||||
"retriever": "brave_search",
|
||||
}
|
||||
)
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error retrieving sources: {err}", exc_info=True)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
|
||||
@@ -33,7 +33,7 @@ class Settings(BaseSettings):
|
||||
VECTOR_STORE: str = (
|
||||
"faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb"
|
||||
)
|
||||
RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search
|
||||
RETRIEVERS_ENABLED: list = ["classic_rag"]
|
||||
AGENT_NAME: str = "classic"
|
||||
FALLBACK_LLM_PROVIDER: Optional[str] = None # provider for fallback llm
|
||||
FALLBACK_LLM_NAME: Optional[str] = None # model name for fallback llm
|
||||
@@ -99,7 +99,6 @@ class Settings(BaseSettings):
|
||||
LANCEDB_TABLE_NAME: Optional[str] = (
|
||||
"docsgpts" # Name of the table to use for storing vectors
|
||||
)
|
||||
BRAVE_SEARCH_API_KEY: Optional[str] = None
|
||||
|
||||
FLASK_DEBUG_MODE: bool = False
|
||||
STORAGE_TYPE: str = "local" # local or s3
|
||||
|
||||
@@ -1,112 +0,0 @@
|
||||
import json
|
||||
|
||||
from langchain_community.tools import BraveSearch
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.retriever.base import BaseRetriever
|
||||
|
||||
|
||||
class BraveRetSearch(BaseRetriever):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
source,
|
||||
chat_history,
|
||||
prompt,
|
||||
chunks=2,
|
||||
token_limit=150,
|
||||
gpt_model="docsgpt",
|
||||
user_api_key=None,
|
||||
decoded_token=None,
|
||||
):
|
||||
self.question = ""
|
||||
self.source = source
|
||||
self.chat_history = chat_history
|
||||
self.prompt = prompt
|
||||
self.chunks = chunks
|
||||
self.gpt_model = gpt_model
|
||||
self.token_limit = (
|
||||
token_limit
|
||||
if token_limit
|
||||
< settings.LLM_TOKEN_LIMITS.get(
|
||||
self.gpt_model, settings.DEFAULT_MAX_HISTORY
|
||||
)
|
||||
else settings.LLM_TOKEN_LIMITS.get(
|
||||
self.gpt_model, settings.DEFAULT_MAX_HISTORY
|
||||
)
|
||||
)
|
||||
self.user_api_key = user_api_key
|
||||
self.decoded_token = decoded_token
|
||||
|
||||
def _get_data(self):
|
||||
if self.chunks == 0:
|
||||
docs = []
|
||||
else:
|
||||
search = BraveSearch.from_api_key(
|
||||
api_key=settings.BRAVE_SEARCH_API_KEY,
|
||||
search_kwargs={"count": int(self.chunks)},
|
||||
)
|
||||
results = search.run(self.question)
|
||||
results = json.loads(results)
|
||||
|
||||
docs = []
|
||||
for i in results:
|
||||
try:
|
||||
title = i["title"]
|
||||
link = i["link"]
|
||||
snippet = i["snippet"]
|
||||
docs.append({"text": snippet, "title": title, "link": link})
|
||||
except IndexError:
|
||||
pass
|
||||
if settings.LLM_PROVIDER == "llama.cpp":
|
||||
docs = [docs[0]]
|
||||
|
||||
return docs
|
||||
|
||||
def gen(self):
|
||||
docs = self._get_data()
|
||||
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc["text"] for doc in docs])
|
||||
p_chat_combine = self.prompt.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
for doc in docs:
|
||||
yield {"source": doc}
|
||||
|
||||
if len(self.chat_history) > 0:
|
||||
for i in self.chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append(
|
||||
{"role": "assistant", "content": i["response"]}
|
||||
)
|
||||
messages_combine.append({"role": "user", "content": self.question})
|
||||
|
||||
llm = LLMCreator.create_llm(
|
||||
settings.LLM_PROVIDER,
|
||||
api_key=settings.API_KEY,
|
||||
user_api_key=self.user_api_key,
|
||||
decoded_token=self.decoded_token,
|
||||
)
|
||||
|
||||
completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
|
||||
for line in completion:
|
||||
yield {"answer": str(line)}
|
||||
|
||||
def search(self, query: str = ""):
|
||||
if query:
|
||||
self.question = query
|
||||
return self._get_data()
|
||||
|
||||
def get_params(self):
|
||||
return {
|
||||
"question": self.question,
|
||||
"source": self.source,
|
||||
"chat_history": self.chat_history,
|
||||
"prompt": self.prompt,
|
||||
"chunks": self.chunks,
|
||||
"token_limit": self.token_limit,
|
||||
"gpt_model": self.gpt_model,
|
||||
"user_api_key": self.user_api_key,
|
||||
}
|
||||
@@ -1,111 +0,0 @@
|
||||
from langchain_community.tools import DuckDuckGoSearchResults
|
||||
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.retriever.base import BaseRetriever
|
||||
|
||||
|
||||
class DuckDuckSearch(BaseRetriever):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
source,
|
||||
chat_history,
|
||||
prompt,
|
||||
chunks=2,
|
||||
token_limit=150,
|
||||
gpt_model="docsgpt",
|
||||
user_api_key=None,
|
||||
decoded_token=None,
|
||||
):
|
||||
self.question = ""
|
||||
self.source = source
|
||||
self.chat_history = chat_history
|
||||
self.prompt = prompt
|
||||
self.chunks = chunks
|
||||
self.gpt_model = gpt_model
|
||||
self.token_limit = (
|
||||
token_limit
|
||||
if token_limit
|
||||
< settings.LLM_TOKEN_LIMITS.get(
|
||||
self.gpt_model, settings.DEFAULT_MAX_HISTORY
|
||||
)
|
||||
else settings.LLM_TOKEN_LIMITS.get(
|
||||
self.gpt_model, settings.DEFAULT_MAX_HISTORY
|
||||
)
|
||||
)
|
||||
self.user_api_key = user_api_key
|
||||
self.decoded_token = decoded_token
|
||||
|
||||
def _get_data(self):
|
||||
if self.chunks == 0:
|
||||
docs = []
|
||||
else:
|
||||
wrapper = DuckDuckGoSearchAPIWrapper(max_results=self.chunks)
|
||||
search = DuckDuckGoSearchResults(api_wrapper=wrapper, output_format="list")
|
||||
results = search.run(self.question)
|
||||
|
||||
docs = []
|
||||
for i in results:
|
||||
try:
|
||||
docs.append(
|
||||
{
|
||||
"text": i.get("snippet", "").strip(),
|
||||
"title": i.get("title", "").strip(),
|
||||
"link": i.get("link", "").strip(),
|
||||
}
|
||||
)
|
||||
except IndexError:
|
||||
pass
|
||||
if settings.LLM_PROVIDER == "llama.cpp":
|
||||
docs = [docs[0]]
|
||||
|
||||
return docs
|
||||
|
||||
def gen(self):
|
||||
docs = self._get_data()
|
||||
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc["text"] for doc in docs])
|
||||
p_chat_combine = self.prompt.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
for doc in docs:
|
||||
yield {"source": doc}
|
||||
|
||||
if len(self.chat_history) > 0:
|
||||
for i in self.chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append(
|
||||
{"role": "assistant", "content": i["response"]}
|
||||
)
|
||||
messages_combine.append({"role": "user", "content": self.question})
|
||||
|
||||
llm = LLMCreator.create_llm(
|
||||
settings.LLM_PROVIDER,
|
||||
api_key=settings.API_KEY,
|
||||
user_api_key=self.user_api_key,
|
||||
decoded_token=self.decoded_token,
|
||||
)
|
||||
|
||||
completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
|
||||
for line in completion:
|
||||
yield {"answer": str(line)}
|
||||
|
||||
def search(self, query: str = ""):
|
||||
if query:
|
||||
self.question = query
|
||||
return self._get_data()
|
||||
|
||||
def get_params(self):
|
||||
return {
|
||||
"question": self.question,
|
||||
"source": self.source,
|
||||
"chat_history": self.chat_history,
|
||||
"prompt": self.prompt,
|
||||
"chunks": self.chunks,
|
||||
"token_limit": self.token_limit,
|
||||
"gpt_model": self.gpt_model,
|
||||
"user_api_key": self.user_api_key,
|
||||
}
|
||||
@@ -1,13 +1,9 @@
|
||||
from application.retriever.classic_rag import ClassicRAG
|
||||
from application.retriever.duckduck_search import DuckDuckSearch
|
||||
from application.retriever.brave_search import BraveRetSearch
|
||||
|
||||
|
||||
class RetrieverCreator:
|
||||
retrievers = {
|
||||
"classic": ClassicRAG,
|
||||
"duckduck_search": DuckDuckSearch,
|
||||
"brave_search": BraveRetSearch,
|
||||
"default": ClassicRAG,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user