Merge branch 'main' of https://github.com/arc53/DocsGPT

2026-02-23 12:53:43 +00:00 · 2025-08-27 01:36:38 +00:00
parent 578c68205a 545caacfa3
commit f08067a161
3 changed files with 83 additions and 4 deletions
--- a/application/agents/base.py
+++ b/application/agents/base.py
@@ -1,9 +1,12 @@
 import logging
 import uuid
 from abc import ABC, abstractmethod
 from typing import Dict, Generator, List, Optional
 from bson.objectid import ObjectId
 logger = logging.getLogger(__name__)
 from application.agents.tools.tool_action_parser import ToolActionParser
 from application.agents.tools.tool_manager import ToolManager
@@ -139,6 +142,40 @@ class BaseAgent(ABC):
        tool_id, action_name, call_args = parser.parse_args(call)
        call_id = getattr(call, "id", None) or str(uuid.uuid4())
        # Check if parsing failed
        if tool_id is None or action_name is None:
            error_message = f"Error: Failed to parse LLM tool call. Tool name: {getattr(call, 'name', 'unknown')}"
            logger.error(error_message)
            tool_call_data = {
                "tool_name": "unknown",
                "call_id": call_id,
                "action_name": getattr(call, 'name', 'unknown'),
                "arguments": call_args or {},
                "result": f"Failed to parse tool call. Invalid tool name format: {getattr(call, 'name', 'unknown')}",
            }
            yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
            self.tool_calls.append(tool_call_data)
            return f"Failed to parse tool call.", call_id
        # Check if tool_id exists in available tools
        if tool_id not in tools_dict:
            error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
            logger.error(error_message)
            # Return error result
            tool_call_data = {
                "tool_name": "unknown",
                "call_id": call_id,
                "action_name": f"{action_name}_{tool_id}",
                "arguments": call_args,
                "result": f"Tool with ID {tool_id} not found. Available tools: {list(tools_dict.keys())}",
            }
            yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
            self.tool_calls.append(tool_call_data)
            return f"Tool with ID {tool_id} not found.", call_id
        tool_call_data = {
            "tool_name": tools_dict[tool_id]["name"],
            "call_id": call_id,
--- a/application/agents/tools/tool_action_parser.py
+++ b/application/agents/tools/tool_action_parser.py
@@ -19,8 +19,20 @@ class ToolActionParser:
    def _parse_openai_llm(self, call):
        try:
            call_args = json.loads(call.arguments)
-            tool_id = call.name.split("_")[-1]
+            tool_parts = call.name.split("_")
-            action_name = call.name.rsplit("_", 1)[0]
+            
            # If the tool name doesn't contain an underscore, it's likely a hallucinated tool
            if len(tool_parts) < 2:
                logger.warning(f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id")
                return None, None, None
            tool_id = tool_parts[-1]
            action_name = "_".join(tool_parts[:-1])
            # Validate that tool_id looks like a numerical ID
            if not tool_id.isdigit():
                logger.warning(f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call.")
        except (AttributeError, TypeError) as e:
            logger.error(f"Error parsing OpenAI LLM call: {e}")
            return None, None, None
@@ -29,8 +41,20 @@ class ToolActionParser:
    def _parse_google_llm(self, call):
        try:
            call_args = call.arguments
-            tool_id = call.name.split("_")[-1]
+            tool_parts = call.name.split("_")
-            action_name = call.name.rsplit("_", 1)[0]
+            
            # If the tool name doesn't contain an underscore, it's likely a hallucinated tool
            if len(tool_parts) < 2:
                logger.warning(f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id")
                return None, None, None
            tool_id = tool_parts[-1]
            action_name = "_".join(tool_parts[:-1])
            # Validate that tool_id looks like a numerical ID
            if not tool_id.isdigit():
                logger.warning(f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call.")
        except (AttributeError, TypeError) as e:
            logger.error(f"Error parsing Google LLM call: {e}")
            return None, None, None
--- a/application/parser/embedding_pipeline.py
+++ b/application/parser/embedding_pipeline.py
@@ -6,6 +6,21 @@ from application.core.settings import settings
 from application.vectorstore.vector_creator import VectorCreator
 def sanitize_content(content: str) -> str:
    """
    Remove NUL characters that can cause vector store ingestion to fail.
    Args:
        content (str): Raw content that may contain NUL characters
    Returns:
        str: Sanitized content with NUL characters removed
    """
    if not content:
        return content
    return content.replace('\x00', '')
@retry(tries=10, delay=60)
 def add_text_to_store_with_retry(store, doc, source_id):
    """
@@ -16,6 +31,9 @@ def add_text_to_store_with_retry(store, doc, source_id):
        source_id: Unique identifier for the source.
    """
    try:
        # Sanitize content to remove NUL characters that cause ingestion failures
        doc.page_content = sanitize_content(doc.page_content)
        doc.metadata["source_id"] = str(source_id)
        store.add_texts([doc.page_content], metadatas=[doc.metadata])
    except Exception as e: