This commit is contained in:
GH Action - Upstream Sync
2025-06-12 01:43:21 +00:00
33 changed files with 913 additions and 649 deletions

View File

@@ -2,16 +2,18 @@ import uuid
from abc import ABC, abstractmethod
from typing import Dict, Generator, List, Optional
from application.agents.llm_handler import get_llm_handler
from bson.objectid import ObjectId
from application.agents.tools.tool_action_parser import ToolActionParser
from application.agents.tools.tool_manager import ToolManager
from application.core.mongo_db import MongoDB
from application.core.settings import settings
from application.llm.handlers.handler_creator import LLMHandlerCreator
from application.llm.llm_creator import LLMCreator
from application.logging import build_stack_data, log_activity, LogContext
from application.retriever.base import BaseRetriever
from application.core.settings import settings
from bson.objectid import ObjectId
class BaseAgent(ABC):
@@ -45,7 +47,9 @@ class BaseAgent(ABC):
user_api_key=user_api_key,
decoded_token=decoded_token,
)
self.llm_handler = get_llm_handler(llm_name)
self.llm_handler = LLMHandlerCreator.create_handler(
llm_name if llm_name else "default"
)
self.attachments = attachments or []
@log_activity()
@@ -132,6 +136,15 @@ class BaseAgent(ABC):
parser = ToolActionParser(self.llm.__class__.__name__)
tool_id, action_name, call_args = parser.parse_args(call)
call_id = getattr(call, "id", None) or str(uuid.uuid4())
tool_call_data = {
"tool_name": tools_dict[tool_id]["name"],
"call_id": call_id,
"action_name": f"{action_name}_{tool_id}",
"arguments": call_args,
}
yield {"type": "tool_call", "data": {**tool_call_data, "status": "pending"}}
tool_data = tools_dict[tool_id]
action_data = (
tool_data["config"]["actions"][action_name]
@@ -184,19 +197,29 @@ class BaseAgent(ABC):
else:
print(f"Executing tool: {action_name} with args: {call_args}")
result = tool.execute_action(action_name, **parameters)
call_id = getattr(call, "id", None)
tool_call_data["result"] = (
f"{str(result)[:50]}..." if len(str(result)) > 50 else result
)
tool_call_data = {
"tool_name": tool_data["name"],
"call_id": call_id if call_id is not None else "None",
"action_name": f"{action_name}_{tool_id}",
"arguments": call_args,
"result": result,
}
yield {"type": "tool_call", "data": {**tool_call_data, "status": "completed"}}
self.tool_calls.append(tool_call_data)
return result, call_id
def _get_truncated_tool_calls(self):
return [
{
**tool_call,
"result": (
f"{str(tool_call['result'])[:50]}..."
if len(str(tool_call["result"])) > 50
else tool_call["result"]
),
"status": "completed",
}
for tool_call in self.tool_calls
]
def _build_messages(
self,
system_prompt: str,
@@ -252,9 +275,16 @@ class BaseAgent(ABC):
return retrieved_data
def _llm_gen(self, messages: List[Dict], log_context: Optional[LogContext] = None):
resp = self.llm.gen_stream(
model=self.gpt_model, messages=messages, tools=self.tools
)
gen_kwargs = {"model": self.gpt_model, "messages": messages}
if (
hasattr(self.llm, "_supports_tools")
and self.llm._supports_tools
and self.tools
):
gen_kwargs["tools"] = self.tools
resp = self.llm.gen_stream(**gen_kwargs)
if log_context:
data = build_stack_data(self.llm, exclude_attributes=["client"])
log_context.stacks.append({"component": "llm", "data": data})
@@ -268,10 +298,30 @@ class BaseAgent(ABC):
log_context: Optional[LogContext] = None,
attachments: Optional[List[Dict]] = None,
):
resp = self.llm_handler.handle_response(
self, resp, tools_dict, messages, attachments
resp = self.llm_handler.process_message_flow(
self, resp, tools_dict, messages, attachments, True
)
if log_context:
data = build_stack_data(self.llm_handler, exclude_attributes=["tool_calls"])
log_context.stacks.append({"component": "llm_handler", "data": data})
return resp
def _handle_response(self, response, tools_dict, messages, log_context):
if isinstance(response, str):
yield {"answer": response}
return
if hasattr(response, "message") and getattr(response.message, "content", None):
yield {"answer": response.message.content}
return
processed_response_gen = self._llm_handler(
response, tools_dict, messages, log_context, self.attachments
)
for event in processed_response_gen:
if isinstance(event, str):
yield {"answer": event}
elif hasattr(event, "message") and getattr(event.message, "content", None):
yield {"answer": event.message.content}
elif isinstance(event, dict) and "type" in event:
yield event

View File

@@ -1,8 +1,6 @@
from typing import Dict, Generator
from application.agents.base import BaseAgent
from application.logging import LogContext
from application.retriever.base import BaseRetriever
import logging
@@ -10,55 +8,46 @@ logger = logging.getLogger(__name__)
class ClassicAgent(BaseAgent):
"""A simplified classic agent with clear execution flow.
Usage:
1. Processes a query through retrieval
2. Sets up available tools
3. Generates responses using LLM
4. Handles tool interactions if needed
5. Returns standardized outputs
Easy to extend by overriding specific steps.
"""
def _gen_inner(
self, query: str, retriever: BaseRetriever, log_context: LogContext
) -> Generator[Dict, None, None]:
# Step 1: Retrieve relevant data
retrieved_data = self._retriever_search(retriever, query, log_context)
if self.user_api_key:
tools_dict = self._get_tools(self.user_api_key)
else:
tools_dict = self._get_user_tools(self.user)
# Step 2: Prepare tools
tools_dict = (
self._get_user_tools(self.user)
if not self.user_api_key
else self._get_tools(self.user_api_key)
)
self._prepare_tools(tools_dict)
# Step 3: Build and process messages
messages = self._build_messages(self.prompt, query, retrieved_data)
llm_response = self._llm_gen(messages, log_context)
resp = self._llm_gen(messages, log_context)
# Step 4: Handle the response
yield from self._handle_response(
llm_response, tools_dict, messages, log_context
)
attachments = self.attachments
if isinstance(resp, str):
yield {"answer": resp}
return
if (
hasattr(resp, "message")
and hasattr(resp.message, "content")
and resp.message.content is not None
):
yield {"answer": resp.message.content}
return
resp = self._llm_handler(resp, tools_dict, messages, log_context, attachments)
if isinstance(resp, str):
yield {"answer": resp}
elif (
hasattr(resp, "message")
and hasattr(resp.message, "content")
and resp.message.content is not None
):
yield {"answer": resp.message.content}
else:
for line in resp:
if isinstance(line, str):
yield {"answer": line}
# Step 5: Return metadata
yield {"sources": retrieved_data}
yield {"tool_calls": self._get_truncated_tool_calls()}
# Log tool calls for debugging
log_context.stacks.append(
{"component": "agent", "data": {"tool_calls": self.tool_calls.copy()}}
)
yield {"sources": retrieved_data}
# clean tool_call_data only send first 50 characters of tool_call['result']
for tool_call in self.tool_calls:
if len(str(tool_call["result"])) > 50:
tool_call["result"] = str(tool_call["result"])[:50] + "..."
yield {"tool_calls": self.tool_calls.copy()}

View File

@@ -1,351 +0,0 @@
import json
import logging
from abc import ABC, abstractmethod
from application.logging import build_stack_data
logger = logging.getLogger(__name__)
class LLMHandler(ABC):
def __init__(self):
self.llm_calls = []
self.tool_calls = []
@abstractmethod
def handle_response(self, agent, resp, tools_dict, messages, attachments=None, **kwargs):
pass
def prepare_messages_with_attachments(self, agent, messages, attachments=None):
"""
Prepare messages with attachment content if available.
Args:
agent: The current agent instance.
messages (list): List of message dictionaries.
attachments (list): List of attachment dictionaries with content.
Returns:
list: Messages with attachment context added to the system prompt.
"""
if not attachments:
return messages
logger.info(f"Preparing messages with {len(attachments)} attachments")
supported_types = agent.llm.get_supported_attachment_types()
supported_attachments = []
unsupported_attachments = []
for attachment in attachments:
mime_type = attachment.get('mime_type')
if mime_type in supported_types:
supported_attachments.append(attachment)
else:
unsupported_attachments.append(attachment)
# Process supported attachments with the LLM's custom method
prepared_messages = messages
if supported_attachments:
logger.info(f"Processing {len(supported_attachments)} supported attachments with {agent.llm.__class__.__name__}'s method")
prepared_messages = agent.llm.prepare_messages_with_attachments(messages, supported_attachments)
# Process unsupported attachments with the default method
if unsupported_attachments:
logger.info(f"Processing {len(unsupported_attachments)} unsupported attachments with default method")
prepared_messages = self._append_attachment_content_to_system(prepared_messages, unsupported_attachments)
return prepared_messages
def _append_attachment_content_to_system(self, messages, attachments):
"""
Default method to append attachment content to the system prompt.
Args:
messages (list): List of message dictionaries.
attachments (list): List of attachment dictionaries with content.
Returns:
list: Messages with attachment context added to the system prompt.
"""
prepared_messages = messages.copy()
attachment_texts = []
for attachment in attachments:
logger.info(f"Adding attachment {attachment.get('id')} to context")
if 'content' in attachment:
attachment_texts.append(f"Attached file content:\n\n{attachment['content']}")
if attachment_texts:
combined_attachment_text = "\n\n".join(attachment_texts)
system_found = False
for i in range(len(prepared_messages)):
if prepared_messages[i].get("role") == "system":
prepared_messages[i]["content"] += f"\n\n{combined_attachment_text}"
system_found = True
break
if not system_found:
prepared_messages.insert(0, {"role": "system", "content": combined_attachment_text})
return prepared_messages
class OpenAILLMHandler(LLMHandler):
def handle_response(self, agent, resp, tools_dict, messages, attachments=None, stream: bool = True):
messages = self.prepare_messages_with_attachments(agent, messages, attachments)
logger.info(f"Messages with attachments: {messages}")
if not stream:
while hasattr(resp, "finish_reason") and resp.finish_reason == "tool_calls":
message = json.loads(resp.model_dump_json())["message"]
keys_to_remove = {"audio", "function_call", "refusal"}
filtered_data = {
k: v for k, v in message.items() if k not in keys_to_remove
}
messages.append(filtered_data)
tool_calls = resp.message.tool_calls
for call in tool_calls:
try:
self.tool_calls.append(call)
tool_response, call_id = agent._execute_tool_action(
tools_dict, call
)
function_call_dict = {
"function_call": {
"name": call.function.name,
"args": call.function.arguments,
"call_id": call_id,
}
}
function_response_dict = {
"function_response": {
"name": call.function.name,
"response": {"result": tool_response},
"call_id": call_id,
}
}
messages.append(
{"role": "assistant", "content": [function_call_dict]}
)
messages.append(
{"role": "tool", "content": [function_response_dict]}
)
messages = self.prepare_messages_with_attachments(agent, messages, attachments)
except Exception as e:
logging.error(f"Error executing tool: {str(e)}", exc_info=True)
messages.append(
{
"role": "tool",
"content": f"Error executing tool: {str(e)}",
"tool_call_id": call_id,
}
)
resp = agent.llm.gen_stream(
model=agent.gpt_model, messages=messages, tools=agent.tools
)
self.llm_calls.append(build_stack_data(agent.llm))
return resp
else:
text_buffer = ""
while True:
tool_calls = {}
for chunk in resp:
if isinstance(chunk, str) and len(chunk) > 0:
yield chunk
continue
elif hasattr(chunk, "delta"):
chunk_delta = chunk.delta
if (
hasattr(chunk_delta, "tool_calls")
and chunk_delta.tool_calls is not None
):
for tool_call in chunk_delta.tool_calls:
index = tool_call.index
if index not in tool_calls:
tool_calls[index] = {
"id": "",
"function": {"name": "", "arguments": ""},
}
current = tool_calls[index]
if tool_call.id:
current["id"] = tool_call.id
if tool_call.function.name:
current["function"][
"name"
] = tool_call.function.name
if tool_call.function.arguments:
current["function"][
"arguments"
] += tool_call.function.arguments
tool_calls[index] = current
if (
hasattr(chunk, "finish_reason")
and chunk.finish_reason == "tool_calls"
):
for index in sorted(tool_calls.keys()):
call = tool_calls[index]
try:
self.tool_calls.append(call)
tool_response, call_id = agent._execute_tool_action(
tools_dict, call
)
if isinstance(call["function"]["arguments"], str):
call["function"]["arguments"] = json.loads(call["function"]["arguments"])
function_call_dict = {
"function_call": {
"name": call["function"]["name"],
"args": call["function"]["arguments"],
"call_id": call["id"],
}
}
function_response_dict = {
"function_response": {
"name": call["function"]["name"],
"response": {"result": tool_response},
"call_id": call["id"],
}
}
messages.append(
{
"role": "assistant",
"content": [function_call_dict],
}
)
messages.append(
{
"role": "tool",
"content": [function_response_dict],
}
)
except Exception as e:
logging.error(f"Error executing tool: {str(e)}", exc_info=True)
messages.append(
{
"role": "assistant",
"content": f"Error executing tool: {str(e)}",
}
)
tool_calls = {}
if hasattr(chunk_delta, "content") and chunk_delta.content:
# Add to buffer or yield immediately based on your preference
text_buffer += chunk_delta.content
yield text_buffer
text_buffer = ""
if (
hasattr(chunk, "finish_reason")
and chunk.finish_reason == "stop"
):
return resp
elif isinstance(chunk, str) and len(chunk) == 0:
continue
logger.info(f"Regenerating with messages: {messages}")
resp = agent.llm.gen_stream(
model=agent.gpt_model, messages=messages, tools=agent.tools
)
self.llm_calls.append(build_stack_data(agent.llm))
class GoogleLLMHandler(LLMHandler):
def handle_response(self, agent, resp, tools_dict, messages, attachments=None, stream: bool = True):
from google.genai import types
messages = self.prepare_messages_with_attachments(agent, messages, attachments)
while True:
if not stream:
response = agent.llm.gen(
model=agent.gpt_model, messages=messages, tools=agent.tools
)
self.llm_calls.append(build_stack_data(agent.llm))
if response.candidates and response.candidates[0].content.parts:
tool_call_found = False
for part in response.candidates[0].content.parts:
if part.function_call:
tool_call_found = True
self.tool_calls.append(part.function_call)
tool_response, call_id = agent._execute_tool_action(
tools_dict, part.function_call
)
function_response_part = types.Part.from_function_response(
name=part.function_call.name,
response={"result": tool_response},
)
messages.append(
{"role": "model", "content": [part.to_json_dict()]}
)
messages.append(
{
"role": "tool",
"content": [function_response_part.to_json_dict()],
}
)
if (
not tool_call_found
and response.candidates[0].content.parts
and response.candidates[0].content.parts[0].text
):
return response.candidates[0].content.parts[0].text
elif not tool_call_found:
return response.candidates[0].content.parts
else:
return response
else:
response = agent.llm.gen_stream(
model=agent.gpt_model, messages=messages, tools=agent.tools
)
self.llm_calls.append(build_stack_data(agent.llm))
tool_call_found = False
for result in response:
if hasattr(result, "function_call"):
tool_call_found = True
self.tool_calls.append(result.function_call)
tool_response, call_id = agent._execute_tool_action(
tools_dict, result.function_call
)
function_response_part = types.Part.from_function_response(
name=result.function_call.name,
response={"result": tool_response},
)
messages.append(
{"role": "model", "content": [result.to_json_dict()]}
)
messages.append(
{
"role": "tool",
"content": [function_response_part.to_json_dict()],
}
)
else:
tool_call_found = False
yield result
if not tool_call_found:
return response
def get_llm_handler(llm_type):
handlers = {
"openai": OpenAILLMHandler(),
"google": GoogleLLMHandler(),
}
return handlers.get(llm_type, OpenAILLMHandler())

View File

@@ -17,26 +17,21 @@ class ToolActionParser:
return parser(call)
def _parse_openai_llm(self, call):
if isinstance(call, dict):
try:
call_args = json.loads(call["function"]["arguments"])
tool_id = call["function"]["name"].split("_")[-1]
action_name = call["function"]["name"].rsplit("_", 1)[0]
except (KeyError, TypeError) as e:
logger.error(f"Error parsing OpenAI LLM call: {e}")
return None, None, None
else:
try:
call_args = json.loads(call.function.arguments)
tool_id = call.function.name.split("_")[-1]
action_name = call.function.name.rsplit("_", 1)[0]
except (AttributeError, TypeError) as e:
logger.error(f"Error parsing OpenAI LLM call: {e}")
return None, None, None
try:
call_args = json.loads(call.arguments)
tool_id = call.name.split("_")[-1]
action_name = call.name.rsplit("_", 1)[0]
except (AttributeError, TypeError) as e:
logger.error(f"Error parsing OpenAI LLM call: {e}")
return None, None, None
return tool_id, action_name, call_args
def _parse_google_llm(self, call):
call_args = call.args
tool_id = call.name.split("_")[-1]
action_name = call.name.rsplit("_", 1)[0]
try:
call_args = call.arguments
tool_id = call.name.split("_")[-1]
action_name = call.name.rsplit("_", 1)[0]
except (AttributeError, TypeError) as e:
logger.error(f"Error parsing Google LLM call: {e}")
return None, None, None
return tool_id, action_name, call_args

View File

@@ -37,17 +37,17 @@ api.add_namespace(answer_ns)
gpt_model = ""
# to have some kind of default behaviour
if settings.LLM_NAME == "openai":
if settings.LLM_PROVIDER == "openai":
gpt_model = "gpt-4o-mini"
elif settings.LLM_NAME == "anthropic":
elif settings.LLM_PROVIDER == "anthropic":
gpt_model = "claude-2"
elif settings.LLM_NAME == "groq":
elif settings.LLM_PROVIDER == "groq":
gpt_model = "llama3-8b-8192"
elif settings.LLM_NAME == "novita":
elif settings.LLM_PROVIDER == "novita":
gpt_model = "deepseek/deepseek-r1"
if settings.MODEL_NAME: # in case there is particular model name configured
gpt_model = settings.MODEL_NAME
if settings.LLM_NAME: # in case there is particular model name configured
gpt_model = settings.LLM_NAME
# load the prompts
current_dir = os.path.dirname(
@@ -307,19 +307,20 @@ def complete_stream(
yield f"data: {data}\n\n"
elif "tool_calls" in line:
tool_calls = line["tool_calls"]
data = json.dumps({"type": "tool_calls", "tool_calls": tool_calls})
yield f"data: {data}\n\n"
elif "thought" in line:
thought += line["thought"]
data = json.dumps({"type": "thought", "thought": line["thought"]})
yield f"data: {data}\n\n"
elif "type" in line:
data = json.dumps(line)
yield f"data: {data}\n\n"
if isNoneDoc:
for doc in source_log_docs:
doc["source"] = "None"
llm = LLMCreator.create_llm(
settings.LLM_NAME,
settings.LLM_PROVIDER,
api_key=settings.API_KEY,
user_api_key=user_api_key,
decoded_token=decoded_token,
@@ -451,9 +452,7 @@ class Stream(Resource):
agent_type = settings.AGENT_NAME
decoded_token = getattr(request, "decoded_token", None)
user_sub = decoded_token.get("sub") if decoded_token else None
agent_key, is_shared_usage, shared_token = get_agent_key(
agent_id, user_sub
)
agent_key, is_shared_usage, shared_token = get_agent_key(agent_id, user_sub)
if agent_key:
data.update({"api_key": agent_key})
@@ -504,7 +503,7 @@ class Stream(Resource):
agent = AgentCreator.create_agent(
agent_type,
endpoint="stream",
llm_name=settings.LLM_NAME,
llm_name=settings.LLM_PROVIDER,
gpt_model=gpt_model,
api_key=settings.API_KEY,
user_api_key=user_api_key,
@@ -658,7 +657,7 @@ class Answer(Resource):
agent = AgentCreator.create_agent(
agent_type,
endpoint="api/answer",
llm_name=settings.LLM_NAME,
llm_name=settings.LLM_PROVIDER,
gpt_model=gpt_model,
api_key=settings.API_KEY,
user_api_key=user_api_key,
@@ -727,7 +726,7 @@ class Answer(Resource):
doc["source"] = "None"
llm = LLMCreator.create_llm(
settings.LLM_NAME,
settings.LLM_PROVIDER,
api_key=settings.API_KEY,
user_api_key=user_api_key,
decoded_token=decoded_token,

View File

@@ -11,18 +11,18 @@ current_dir = os.path.dirname(
class Settings(BaseSettings):
AUTH_TYPE: Optional[str] = None
LLM_NAME: str = "docsgpt"
MODEL_NAME: Optional[str] = (
None # if LLM_NAME is openai, MODEL_NAME can be gpt-4 or gpt-3.5-turbo
LLM_PROVIDER: str = "docsgpt"
LLM_NAME: Optional[str] = (
None # if LLM_PROVIDER is openai, LLM_NAME can be gpt-4 or gpt-3.5-turbo
)
EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
MONGO_DB_NAME: str = "docsgpt"
MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
LLM_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
DEFAULT_MAX_HISTORY: int = 150
MODEL_TOKEN_LIMITS: dict = {
LLM_TOKEN_LIMITS: dict = {
"gpt-4o-mini": 128000,
"gpt-3.5-turbo": 4096,
"claude-2": 1e5,
@@ -35,6 +35,9 @@ class Settings(BaseSettings):
)
RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search
AGENT_NAME: str = "classic"
FALLBACK_LLM_PROVIDER: Optional[str] = None # provider for fallback llm
FALLBACK_LLM_NAME: Optional[str] = None # model name for fallback llm
FALLBACK_LLM_API_KEY: Optional[str] = None # api key for fallback llm
# LLM Cache
CACHE_REDIS_URL: str = "redis://localhost:6379/2"
@@ -99,8 +102,7 @@ class Settings(BaseSettings):
BRAVE_SEARCH_API_KEY: Optional[str] = None
FLASK_DEBUG_MODE: bool = False
STORAGE_TYPE: str = "local" # local or s3
STORAGE_TYPE: str = "local" # local or s3
JWT_SECRET_KEY: str = ""

View File

@@ -1,53 +1,117 @@
import logging
from abc import ABC, abstractmethod
from application.cache import gen_cache, stream_cache
from application.core.settings import settings
from application.usage import gen_token_usage, stream_token_usage
logger = logging.getLogger(__name__)
class BaseLLM(ABC):
def __init__(self, decoded_token=None):
def __init__(
self,
decoded_token=None,
):
self.decoded_token = decoded_token
self.token_usage = {"prompt_tokens": 0, "generated_tokens": 0}
self.fallback_provider = settings.FALLBACK_LLM_PROVIDER
self.fallback_model_name = settings.FALLBACK_LLM_NAME
self.fallback_llm_api_key = settings.FALLBACK_LLM_API_KEY
self._fallback_llm = None
def _apply_decorator(self, method, decorators, *args, **kwargs):
for decorator in decorators:
method = decorator(method)
return method(self, *args, **kwargs)
@property
def fallback_llm(self):
"""Lazy-loaded fallback LLM instance."""
if (
self._fallback_llm is None
and self.fallback_provider
and self.fallback_model_name
):
try:
from application.llm.llm_creator import LLMCreator
self._fallback_llm = LLMCreator.create_llm(
self.fallback_provider,
self.fallback_llm_api_key,
None,
self.decoded_token,
)
except Exception as e:
logger.error(
f"Failed to initialize fallback LLM: {str(e)}", exc_info=True
)
return self._fallback_llm
def _execute_with_fallback(
self, method_name: str, decorators: list, *args, **kwargs
):
"""
Unified method execution with fallback support.
Args:
method_name: Name of the raw method ('_raw_gen' or '_raw_gen_stream')
decorators: List of decorators to apply
*args: Positional arguments
**kwargs: Keyword arguments
"""
def decorated_method():
method = getattr(self, method_name)
for decorator in decorators:
method = decorator(method)
return method(self, *args, **kwargs)
try:
return decorated_method()
except Exception as e:
if not self.fallback_llm:
logger.error(f"Primary LLM failed and no fallback available: {str(e)}")
raise
logger.warning(
f"Falling back to {self.fallback_provider}/{self.fallback_model_name}. Error: {str(e)}"
)
fallback_method = getattr(
self.fallback_llm, method_name.replace("_raw_", "")
)
return fallback_method(*args, **kwargs)
def gen(self, model, messages, stream=False, tools=None, *args, **kwargs):
decorators = [gen_token_usage, gen_cache]
return self._execute_with_fallback(
"_raw_gen",
decorators,
model=model,
messages=messages,
stream=stream,
tools=tools,
*args,
**kwargs,
)
def gen_stream(self, model, messages, stream=True, tools=None, *args, **kwargs):
decorators = [stream_cache, stream_token_usage]
return self._execute_with_fallback(
"_raw_gen_stream",
decorators,
model=model,
messages=messages,
stream=stream,
tools=tools,
*args,
**kwargs,
)
@abstractmethod
def _raw_gen(self, model, messages, stream, tools, *args, **kwargs):
pass
def gen(self, model, messages, stream=False, tools=None, *args, **kwargs):
decorators = [gen_token_usage, gen_cache]
return self._apply_decorator(
self._raw_gen,
decorators=decorators,
model=model,
messages=messages,
stream=stream,
tools=tools,
*args,
**kwargs
)
@abstractmethod
def _raw_gen_stream(self, model, messages, stream, *args, **kwargs):
pass
def gen_stream(self, model, messages, stream=True, tools=None, *args, **kwargs):
decorators = [stream_cache, stream_token_usage]
return self._apply_decorator(
self._raw_gen_stream,
decorators=decorators,
model=model,
messages=messages,
stream=stream,
tools=tools,
*args,
**kwargs
)
def supports_tools(self):
return hasattr(self, "_supports_tools") and callable(
getattr(self, "_supports_tools")
@@ -55,11 +119,11 @@ class BaseLLM(ABC):
def _supports_tools(self):
raise NotImplementedError("Subclass must implement _supports_tools method")
def get_supported_attachment_types(self):
"""
Return a list of MIME types supported by this LLM for file uploads.
Returns:
list: List of supported MIME types
"""

View File

View File

@@ -0,0 +1,335 @@
import logging
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Dict, Generator, List, Optional, Union
from application.logging import build_stack_data
logger = logging.getLogger(__name__)
@dataclass
class ToolCall:
"""Represents a tool/function call from the LLM."""
id: str
name: str
arguments: Union[str, Dict]
index: Optional[int] = None
@classmethod
def from_dict(cls, data: Dict) -> "ToolCall":
"""Create ToolCall from dictionary."""
return cls(
id=data.get("id", ""),
name=data.get("name", ""),
arguments=data.get("arguments", {}),
index=data.get("index"),
)
@dataclass
class LLMResponse:
"""Represents a response from the LLM."""
content: str
tool_calls: List[ToolCall]
finish_reason: str
raw_response: Any
@property
def requires_tool_call(self) -> bool:
"""Check if the response requires tool calls."""
return bool(self.tool_calls) and self.finish_reason == "tool_calls"
class LLMHandler(ABC):
"""Abstract base class for LLM handlers."""
def __init__(self):
self.llm_calls = []
self.tool_calls = []
@abstractmethod
def parse_response(self, response: Any) -> LLMResponse:
"""Parse raw LLM response into standardized format."""
pass
@abstractmethod
def create_tool_message(self, tool_call: ToolCall, result: Any) -> Dict:
"""Create a tool result message for the conversation history."""
pass
@abstractmethod
def _iterate_stream(self, response: Any) -> Generator:
"""Iterate through streaming response chunks."""
pass
def process_message_flow(
self,
agent,
initial_response,
tools_dict: Dict,
messages: List[Dict],
attachments: Optional[List] = None,
stream: bool = False,
) -> Union[str, Generator]:
"""
Main orchestration method for processing LLM message flow.
Args:
agent: The agent instance
initial_response: Initial LLM response
tools_dict: Dictionary of available tools
messages: Conversation history
attachments: Optional attachments
stream: Whether to use streaming
Returns:
Final response or generator for streaming
"""
messages = self.prepare_messages(agent, messages, attachments)
if stream:
return self.handle_streaming(agent, initial_response, tools_dict, messages)
else:
return self.handle_non_streaming(
agent, initial_response, tools_dict, messages
)
def prepare_messages(
self, agent, messages: List[Dict], attachments: Optional[List] = None
) -> List[Dict]:
"""
Prepare messages with attachments and provider-specific formatting.
Args:
agent: The agent instance
messages: Original messages
attachments: List of attachments
Returns:
Prepared messages list
"""
if not attachments:
return messages
logger.info(f"Preparing messages with {len(attachments)} attachments")
supported_types = agent.llm.get_supported_attachment_types()
supported_attachments = [
a for a in attachments if a.get("mime_type") in supported_types
]
unsupported_attachments = [
a for a in attachments if a.get("mime_type") not in supported_types
]
# Process supported attachments with the LLM's custom method
if supported_attachments:
logger.info(
f"Processing {len(supported_attachments)} supported attachments"
)
messages = agent.llm.prepare_messages_with_attachments(
messages, supported_attachments
)
# Process unsupported attachments with default method
if unsupported_attachments:
logger.info(
f"Processing {len(unsupported_attachments)} unsupported attachments"
)
messages = self._append_unsupported_attachments(
messages, unsupported_attachments
)
return messages
def _append_unsupported_attachments(
self, messages: List[Dict], attachments: List[Dict]
) -> List[Dict]:
"""
Default method to append unsupported attachment content to system prompt.
Args:
messages: Current messages
attachments: List of unsupported attachments
Returns:
Updated messages list
"""
prepared_messages = messages.copy()
attachment_texts = []
for attachment in attachments:
logger.info(f"Adding attachment {attachment.get('id')} to context")
if "content" in attachment:
attachment_texts.append(
f"Attached file content:\n\n{attachment['content']}"
)
if attachment_texts:
combined_text = "\n\n".join(attachment_texts)
system_msg = next(
(msg for msg in prepared_messages if msg.get("role") == "system"),
{"role": "system", "content": ""},
)
if system_msg not in prepared_messages:
prepared_messages.insert(0, system_msg)
system_msg["content"] += f"\n\n{combined_text}"
return prepared_messages
def handle_tool_calls(
self, agent, tool_calls: List[ToolCall], tools_dict: Dict, messages: List[Dict]
) -> Generator:
"""
Execute tool calls and update conversation history.
Args:
agent: The agent instance
tool_calls: List of tool calls to execute
tools_dict: Available tools dictionary
messages: Current conversation history
Returns:
Updated messages list
"""
updated_messages = messages.copy()
for call in tool_calls:
try:
self.tool_calls.append(call)
tool_executor_gen = agent._execute_tool_action(tools_dict, call)
while True:
try:
yield next(tool_executor_gen)
except StopIteration as e:
tool_response, call_id = e.value
break
updated_messages.append(
{
"role": "assistant",
"content": [
{
"function_call": {
"name": call.name,
"args": call.arguments,
"call_id": call_id,
}
}
],
}
)
updated_messages.append(self.create_tool_message(call, tool_response))
except Exception as e:
logger.error(f"Error executing tool: {str(e)}", exc_info=True)
updated_messages.append(
{
"role": "tool",
"content": f"Error executing tool: {str(e)}",
"tool_call_id": call.id,
}
)
return updated_messages
def handle_non_streaming(
self, agent, response: Any, tools_dict: Dict, messages: List[Dict]
) -> Generator:
"""
Handle non-streaming response flow.
Args:
agent: The agent instance
response: Current LLM response
tools_dict: Available tools dictionary
messages: Conversation history
Returns:
Final response after processing all tool calls
"""
parsed = self.parse_response(response)
self.llm_calls.append(build_stack_data(agent.llm))
while parsed.requires_tool_call:
tool_handler_gen = self.handle_tool_calls(
agent, parsed.tool_calls, tools_dict, messages
)
while True:
try:
yield next(tool_handler_gen)
except StopIteration as e:
messages = e.value
break
response = agent.llm.gen(
model=agent.gpt_model, messages=messages, tools=agent.tools
)
parsed = self.parse_response(response)
self.llm_calls.append(build_stack_data(agent.llm))
return parsed.content
def handle_streaming(
self, agent, response: Any, tools_dict: Dict, messages: List[Dict]
) -> Generator:
"""
Handle streaming response flow.
Args:
agent: The agent instance
response: Current LLM response
tools_dict: Available tools dictionary
messages: Conversation history
Yields:
Streaming response chunks
"""
buffer = ""
tool_calls = {}
for chunk in self._iterate_stream(response):
if isinstance(chunk, str):
yield chunk
continue
parsed = self.parse_response(chunk)
if parsed.tool_calls:
for call in parsed.tool_calls:
if call.index not in tool_calls:
tool_calls[call.index] = call
else:
existing = tool_calls[call.index]
if call.id:
existing.id = call.id
if call.name:
existing.name = call.name
if call.arguments:
existing.arguments += call.arguments
if parsed.finish_reason == "tool_calls":
tool_handler_gen = self.handle_tool_calls(
agent, list(tool_calls.values()), tools_dict, messages
)
while True:
try:
yield next(tool_handler_gen)
except StopIteration as e:
messages = e.value
break
tool_calls = {}
response = agent.llm.gen_stream(
model=agent.gpt_model, messages=messages, tools=agent.tools
)
self.llm_calls.append(build_stack_data(agent.llm))
yield from self.handle_streaming(agent, response, tools_dict, messages)
return
if parsed.content:
buffer += parsed.content
yield buffer
buffer = ""
if parsed.finish_reason == "stop":
return

View File

@@ -0,0 +1,78 @@
import uuid
from typing import Any, Dict, Generator
from application.llm.handlers.base import LLMHandler, LLMResponse, ToolCall
class GoogleLLMHandler(LLMHandler):
"""Handler for Google's GenAI API."""
def parse_response(self, response: Any) -> LLMResponse:
"""Parse Google response into standardized format."""
if isinstance(response, str):
return LLMResponse(
content=response,
tool_calls=[],
finish_reason="stop",
raw_response=response,
)
if hasattr(response, "candidates"):
parts = response.candidates[0].content.parts if response.candidates else []
tool_calls = [
ToolCall(
id=str(uuid.uuid4()),
name=part.function_call.name,
arguments=part.function_call.args,
)
for part in parts
if hasattr(part, "function_call") and part.function_call is not None
]
content = " ".join(
part.text
for part in parts
if hasattr(part, "text") and part.text is not None
)
return LLMResponse(
content=content,
tool_calls=tool_calls,
finish_reason="tool_calls" if tool_calls else "stop",
raw_response=response,
)
else:
tool_calls = []
if hasattr(response, "function_call"):
tool_calls.append(
ToolCall(
id=str(uuid.uuid4()),
name=response.function_call.name,
arguments=response.function_call.args,
)
)
return LLMResponse(
content=response.text if hasattr(response, "text") else "",
tool_calls=tool_calls,
finish_reason="tool_calls" if tool_calls else "stop",
raw_response=response,
)
def create_tool_message(self, tool_call: ToolCall, result: Any) -> Dict:
"""Create Google-style tool message."""
from google.genai import types
return {
"role": "tool",
"content": [
types.Part.from_function_response(
name=tool_call.name, response={"result": result}
).to_json_dict()
],
}
def _iterate_stream(self, response: Any) -> Generator:
"""Iterate through Google streaming response."""
for chunk in response:
yield chunk

View File

@@ -0,0 +1,18 @@
from application.llm.handlers.base import LLMHandler
from application.llm.handlers.google import GoogleLLMHandler
from application.llm.handlers.openai import OpenAILLMHandler
class LLMHandlerCreator:
handlers = {
"openai": OpenAILLMHandler,
"google": GoogleLLMHandler,
"default": OpenAILLMHandler,
}
@classmethod
def create_handler(cls, llm_type: str, *args, **kwargs) -> LLMHandler:
handler_class = cls.handlers.get(llm_type.lower())
if not handler_class:
raise ValueError(f"No LLM handler class found for type {llm_type}")
return handler_class(*args, **kwargs)

View File

@@ -0,0 +1,57 @@
from typing import Any, Dict, Generator
from application.llm.handlers.base import LLMHandler, LLMResponse, ToolCall
class OpenAILLMHandler(LLMHandler):
"""Handler for OpenAI API."""
def parse_response(self, response: Any) -> LLMResponse:
"""Parse OpenAI response into standardized format."""
if isinstance(response, str):
return LLMResponse(
content=response,
tool_calls=[],
finish_reason="stop",
raw_response=response,
)
message = getattr(response, "message", None) or getattr(response, "delta", None)
tool_calls = []
if hasattr(message, "tool_calls"):
tool_calls = [
ToolCall(
id=getattr(tc, "id", ""),
name=getattr(tc.function, "name", ""),
arguments=getattr(tc.function, "arguments", ""),
index=getattr(tc, "index", None),
)
for tc in message.tool_calls or []
]
return LLMResponse(
content=getattr(message, "content", ""),
tool_calls=tool_calls,
finish_reason=getattr(response, "finish_reason", ""),
raw_response=response,
)
def create_tool_message(self, tool_call: ToolCall, result: Any) -> Dict:
"""Create OpenAI-style tool message."""
return {
"role": "tool",
"content": [
{
"function_response": {
"name": tool_call.name,
"response": {"result": result},
"call_id": tool_call.id,
}
}
],
}
def _iterate_stream(self, response: Any) -> Generator:
"""Iterate through OpenAI streaming response."""
for chunk in response:
yield chunk

View File

@@ -2,6 +2,7 @@ from application.llm.base import BaseLLM
from application.core.settings import settings
import threading
class LlamaSingleton:
_instances = {}
_lock = threading.Lock() # Add a lock for thread synchronization
@@ -29,7 +30,7 @@ class LlamaCpp(BaseLLM):
self,
api_key=None,
user_api_key=None,
llm_name=settings.MODEL_PATH,
llm_name=settings.LLM_PATH,
*args,
**kwargs,
):
@@ -42,14 +43,18 @@ class LlamaCpp(BaseLLM):
context = messages[0]["content"]
user_question = messages[-1]["content"]
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
result = LlamaSingleton.query_model(self.llama, prompt, max_tokens=150, echo=False)
result = LlamaSingleton.query_model(
self.llama, prompt, max_tokens=150, echo=False
)
return result["choices"][0]["text"].split("### Answer \n")[-1]
def _raw_gen_stream(self, baseself, model, messages, stream=True, **kwargs):
context = messages[0]["content"]
user_question = messages[-1]["content"]
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
result = LlamaSingleton.query_model(self.llama, prompt, max_tokens=150, echo=False, stream=stream)
result = LlamaSingleton.query_model(
self.llama, prompt, max_tokens=150, echo=False, stream=stream
)
for item in result:
for choice in item["choices"]:
yield choice["text"]
yield choice["text"]

View File

@@ -29,10 +29,10 @@ class BraveRetSearch(BaseRetriever):
self.token_limit = (
token_limit
if token_limit
< settings.MODEL_TOKEN_LIMITS.get(
< settings.LLM_TOKEN_LIMITS.get(
self.gpt_model, settings.DEFAULT_MAX_HISTORY
)
else settings.MODEL_TOKEN_LIMITS.get(
else settings.LLM_TOKEN_LIMITS.get(
self.gpt_model, settings.DEFAULT_MAX_HISTORY
)
)
@@ -59,7 +59,7 @@ class BraveRetSearch(BaseRetriever):
docs.append({"text": snippet, "title": title, "link": link})
except IndexError:
pass
if settings.LLM_NAME == "llama.cpp":
if settings.LLM_PROVIDER == "llama.cpp":
docs = [docs[0]]
return docs
@@ -84,7 +84,7 @@ class BraveRetSearch(BaseRetriever):
messages_combine.append({"role": "user", "content": self.question})
llm = LLMCreator.create_llm(
settings.LLM_NAME,
settings.LLM_PROVIDER,
api_key=settings.API_KEY,
user_api_key=self.user_api_key,
decoded_token=self.decoded_token,

View File

@@ -16,7 +16,7 @@ class ClassicRAG(BaseRetriever):
token_limit=150,
gpt_model="docsgpt",
user_api_key=None,
llm_name=settings.LLM_NAME,
llm_name=settings.LLM_PROVIDER,
api_key=settings.API_KEY,
decoded_token=None,
):
@@ -28,10 +28,10 @@ class ClassicRAG(BaseRetriever):
self.token_limit = (
token_limit
if token_limit
< settings.MODEL_TOKEN_LIMITS.get(
< settings.LLM_TOKEN_LIMITS.get(
self.gpt_model, settings.DEFAULT_MAX_HISTORY
)
else settings.MODEL_TOKEN_LIMITS.get(
else settings.LLM_TOKEN_LIMITS.get(
self.gpt_model, settings.DEFAULT_MAX_HISTORY
)
)

View File

@@ -28,10 +28,10 @@ class DuckDuckSearch(BaseRetriever):
self.token_limit = (
token_limit
if token_limit
< settings.MODEL_TOKEN_LIMITS.get(
< settings.LLM_TOKEN_LIMITS.get(
self.gpt_model, settings.DEFAULT_MAX_HISTORY
)
else settings.MODEL_TOKEN_LIMITS.get(
else settings.LLM_TOKEN_LIMITS.get(
self.gpt_model, settings.DEFAULT_MAX_HISTORY
)
)
@@ -58,7 +58,7 @@ class DuckDuckSearch(BaseRetriever):
)
except IndexError:
pass
if settings.LLM_NAME == "llama.cpp":
if settings.LLM_PROVIDER == "llama.cpp":
docs = [docs[0]]
return docs
@@ -83,7 +83,7 @@ class DuckDuckSearch(BaseRetriever):
messages_combine.append({"role": "user", "content": self.question})
llm = LLMCreator.create_llm(
settings.LLM_NAME,
settings.LLM_PROVIDER,
api_key=settings.API_KEY,
user_api_key=self.user_api_key,
decoded_token=self.decoded_token,

View File

@@ -102,8 +102,8 @@ def limit_chat_history(history, max_token_limit=None, gpt_model="docsgpt"):
max_token_limit
if max_token_limit
and max_token_limit
< settings.MODEL_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
else settings.MODEL_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
< settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
else settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
)
if not history:

View File

@@ -143,8 +143,8 @@ def run_agent_logic(agent_config, input_data):
agent = AgentCreator.create_agent(
agent_type,
endpoint="webhook",
llm_name=settings.LLM_NAME,
gpt_model=settings.MODEL_NAME,
llm_name=settings.LLM_PROVIDER,
gpt_model=settings.LLM_NAME,
api_key=settings.API_KEY,
user_api_key=user_api_key,
prompt=prompt,
@@ -159,7 +159,7 @@ def run_agent_logic(agent_config, input_data):
prompt=prompt,
chunks=chunks,
token_limit=settings.DEFAULT_MAX_HISTORY,
gpt_model=settings.MODEL_NAME,
gpt_model=settings.LLM_NAME,
user_api_key=user_api_key,
decoded_token=decoded_token,
)
@@ -452,7 +452,7 @@ def attachment_worker(self, file_info, user):
try:
self.update_state(state="PROGRESS", meta={"current": 10})
storage = StorageCreator.get_storage()
self.update_state(
state="PROGRESS", meta={"current": 30, "status": "Processing content"}
)
@@ -461,9 +461,11 @@ def attachment_worker(self, file_info, user):
relative_path,
lambda local_path, **kwargs: SimpleDirectoryReader(
input_files=[local_path], exclude_hidden=True, errors="ignore"
).load_data()[0].text
)
.load_data()[0]
.text,
)
token_count = num_tokens_from_string(content)
self.update_state(
@@ -491,9 +493,7 @@ def attachment_worker(self, file_info, user):
f"Stored attachment with ID: {attachment_id}", extra={"user": user}
)
self.update_state(
state="PROGRESS", meta={"current": 100, "status": "Complete"}
)
self.update_state(state="PROGRESS", meta={"current": 100, "status": "Complete"})
return {
"filename": filename,