From f9ad4c068afcf8cd040d6a5200e9427984290dba Mon Sep 17 00:00:00 2001 From: ManishMadan2882 Date: Thu, 3 Apr 2025 03:26:37 +0530 Subject: [PATCH] (feat:attach) fallback strategy to process docs --- application/agents/base.py | 7 ++-- application/agents/llm_handler.py | 53 +++++++++++++++++++------------ application/llm/base.py | 3 -- 3 files changed, 37 insertions(+), 26 deletions(-) diff --git a/application/agents/base.py b/application/agents/base.py index 7dd8ceea..14ecad49 100644 --- a/application/agents/base.py +++ b/application/agents/base.py @@ -23,7 +23,7 @@ class BaseAgent(ABC): prompt: str = "", chat_history: Optional[List[Dict]] = None, decoded_token: Optional[Dict] = None, - attachments: Optional[str]=None, + attachments: Optional[List[Dict]]=None, ): self.endpoint = endpoint self.llm_name = llm_name @@ -44,7 +44,7 @@ class BaseAgent(ABC): decoded_token=decoded_token, ) self.llm_handler = get_llm_handler(llm_name) - set.attachments = attachments or [] + self.attachments = attachments or [] @log_activity() def gen( @@ -243,8 +243,9 @@ class BaseAgent(ABC): tools_dict: Dict, messages: List[Dict], log_context: Optional[LogContext] = None, + attachments: Optional[List[Dict]] = None ): - resp = self.llm_handler.handle_response(self, resp, tools_dict, messages) + resp = self.llm_handler.handle_response(self, resp, tools_dict, messages, attachments) if log_context: data = build_stack_data(self.llm_handler) log_context.stacks.append({"component": "llm_handler", "data": data}) diff --git a/application/agents/llm_handler.py b/application/agents/llm_handler.py index 229db1ef..21d972d9 100644 --- a/application/agents/llm_handler.py +++ b/application/agents/llm_handler.py @@ -26,38 +26,48 @@ class LLMHandler(ABC): attachments (list): List of attachment dictionaries with content. Returns: - list: Messages with attachment context added. + list: Messages with attachment context added to the system prompt. """ if not attachments: return messages - + logger.info(f"Preparing messages with {len(attachments)} attachments") - # If the LLM has its own attachment handling, use that - if hasattr(agent.llm, "prepare_messages_with_attachments"): + # Check if the LLM has its own custom attachment handling implementation + if hasattr(agent.llm, "prepare_messages_with_attachments") and agent.llm.__class__.__name__ != "BaseLLM": + logger.info(f"Using {agent.llm.__class__.__name__}'s own prepare_messages_with_attachments method") return agent.llm.prepare_messages_with_attachments(messages, attachments) - - # Otherwise, use a generic approach: - # Insert attachment context after system messages, before user messages. - attachment_context = [] + + # Otherwise, append attachment content to the system prompt + prepared_messages = messages.copy() + + # Build attachment content string + attachment_texts = [] for attachment in attachments: logger.info(f"Adding attachment {attachment.get('id')} to context") - attachment_context.append({ - "role": "system", - "content": f"The user has attached a file with the following content:\n\n{attachment['content']}" - }) - - system_messages = [msg for msg in messages if msg.get("role") == "system"] - user_messages = [msg for msg in messages if msg.get("role") != "system"] + if 'content' in attachment: + attachment_texts.append(f"Attached file content:\n\n{attachment['content']}") - return system_messages + attachment_context + user_messages - + if attachment_texts: + combined_attachment_text = "\n\n".join(attachment_texts) + + system_found = False + for i in range(len(prepared_messages)): + if prepared_messages[i].get("role") == "system": + prepared_messages[i]["content"] += f"\n\n{combined_attachment_text}" + system_found = True + break + + if not system_found: + prepared_messages.insert(0, {"role": "system", "content": combined_attachment_text}) + + return prepared_messages class OpenAILLMHandler(LLMHandler): def handle_response(self, agent, resp, tools_dict, messages, attachments=None, stream: bool = True): - - messages = self.prepare_messages_with_attachments(agent, messages, attachments) + messages = self.prepare_messages_with_attachments(agent, messages, attachments) + logger.info(f"Messages with attachments: {messages}") if not stream: while hasattr(resp, "finish_reason") and resp.finish_reason == "tool_calls": message = json.loads(resp.model_dump_json())["message"] @@ -96,6 +106,7 @@ class OpenAILLMHandler(LLMHandler): {"role": "tool", "content": [function_response_dict]} ) + messages = self.prepare_messages_with_attachments(agent, messages, attachments) except Exception as e: messages.append( { @@ -111,6 +122,7 @@ class OpenAILLMHandler(LLMHandler): return resp else: + while True: tool_calls = {} for chunk in resp: @@ -202,7 +214,8 @@ class OpenAILLMHandler(LLMHandler): return elif isinstance(chunk, str) and len(chunk) == 0: continue - + + logger.info(f"Regenerating with messages: {messages}") resp = agent.llm.gen_stream( model=agent.gpt_model, messages=messages, tools=agent.tools ) diff --git a/application/llm/base.py b/application/llm/base.py index 1305a98b..0fce208c 100644 --- a/application/llm/base.py +++ b/application/llm/base.py @@ -55,6 +55,3 @@ class BaseLLM(ABC): def _supports_tools(self): raise NotImplementedError("Subclass must implement _supports_tools method") - - def prepare_messages_with_attachments(self, messages, attachments=None): - return messages