(feat:attach) fallback strategy to process docs

2025-11-29 08:33:20 +00:00 · 2025-04-03 03:26:37 +05:30
parent 19d68252cd
commit f9ad4c068a
3 changed files with 37 additions and 26 deletions
--- a/application/agents/base.py
+++ b/application/agents/base.py
@@ -23,7 +23,7 @@ class BaseAgent(ABC):
        prompt: str = "",
        chat_history: Optional[List[Dict]] = None,
        decoded_token: Optional[Dict] = None,
-        attachments: Optional[str]=None,
+        attachments: Optional[List[Dict]]=None,
    ):
        self.endpoint = endpoint
        self.llm_name = llm_name
@@ -44,7 +44,7 @@ class BaseAgent(ABC):
            decoded_token=decoded_token,
        )
        self.llm_handler = get_llm_handler(llm_name)
-        set.attachments = attachments or []
+        self.attachments = attachments or []

    @log_activity()
    def gen(
@@ -243,8 +243,9 @@ class BaseAgent(ABC):
        tools_dict: Dict,
        messages: List[Dict],
        log_context: Optional[LogContext] = None,
+        attachments: Optional[List[Dict]] = None
    ):
-        resp = self.llm_handler.handle_response(self, resp, tools_dict, messages)
+        resp = self.llm_handler.handle_response(self, resp, tools_dict, messages, attachments)
        if log_context:
            data = build_stack_data(self.llm_handler)
            log_context.stacks.append({"component": "llm_handler", "data": data})
--- a/application/agents/llm_handler.py
+++ b/application/agents/llm_handler.py
@@ -26,38 +26,48 @@ class LLMHandler(ABC):
            attachments (list): List of attachment dictionaries with content.
            
        Returns:
-            list: Messages with attachment context added.
+            list: Messages with attachment context added to the system prompt.
        """
        if not attachments:
            return messages
        
        logger.info(f"Preparing messages with {len(attachments)} attachments")
        
-        # If the LLM has its own attachment handling, use that
-        if hasattr(agent.llm, "prepare_messages_with_attachments"):
+        # Check if the LLM has its own custom attachment handling implementation
+        if hasattr(agent.llm, "prepare_messages_with_attachments") and agent.llm.__class__.__name__ != "BaseLLM":
+            logger.info(f"Using {agent.llm.__class__.__name__}'s own prepare_messages_with_attachments method")
            return agent.llm.prepare_messages_with_attachments(messages, attachments)
        
-        # Otherwise, use a generic approach:
-        # Insert attachment context after system messages, before user messages.
-        attachment_context = []
+        # Otherwise, append attachment content to the system prompt
+        prepared_messages = messages.copy()
+        
+        # Build attachment content string
+        attachment_texts = []
        for attachment in attachments:
            logger.info(f"Adding attachment {attachment.get('id')} to context")
-            attachment_context.append({
-                "role": "system",
-                "content": f"The user has attached a file with the following content:\n\n{attachment['content']}"
-            })
+            if 'content' in attachment:
+                attachment_texts.append(f"Attached file content:\n\n{attachment['content']}")
        
-        system_messages = [msg for msg in messages if msg.get("role") == "system"]
-        user_messages = [msg for msg in messages if msg.get("role") != "system"]
+        if attachment_texts:
+            combined_attachment_text = "\n\n".join(attachment_texts)
            
-        return system_messages + attachment_context + user_messages
+            system_found = False
+            for i in range(len(prepared_messages)):
+                if prepared_messages[i].get("role") == "system":
+                    prepared_messages[i]["content"] += f"\n\n{combined_attachment_text}"
+                    system_found = True
+                    break
            
+            if not system_found:
+                prepared_messages.insert(0, {"role": "system", "content": combined_attachment_text})
+        
+        return prepared_messages

 class OpenAILLMHandler(LLMHandler):
    def handle_response(self, agent, resp, tools_dict, messages, attachments=None, stream: bool = True):
        
        messages = self.prepare_messages_with_attachments(agent, messages, attachments)
-        
+        logger.info(f"Messages with attachments: {messages}")
        if not stream:
            while hasattr(resp, "finish_reason") and resp.finish_reason == "tool_calls":
                message = json.loads(resp.model_dump_json())["message"]
@@ -96,6 +106,7 @@ class OpenAILLMHandler(LLMHandler):
                            {"role": "tool", "content": [function_response_dict]}
                        )

+                        messages = self.prepare_messages_with_attachments(agent, messages, attachments)
                    except Exception as e:
                        messages.append(
                            {
@@ -111,6 +122,7 @@ class OpenAILLMHandler(LLMHandler):
            return resp

        else:
+            
            while True:
                tool_calls = {}
                for chunk in resp:
@@ -203,6 +215,7 @@ class OpenAILLMHandler(LLMHandler):
                    elif isinstance(chunk, str) and len(chunk) == 0:
                            continue
                
+                logger.info(f"Regenerating with messages: {messages}")
                resp = agent.llm.gen_stream(
                    model=agent.gpt_model, messages=messages, tools=agent.tools
                )
--- a/application/llm/base.py
+++ b/application/llm/base.py
@@ -55,6 +55,3 @@ class BaseLLM(ABC):

    def _supports_tools(self):
        raise NotImplementedError("Subclass must implement _supports_tools method")
-
-    def prepare_messages_with_attachments(self, messages, attachments=None):
-        return messages