mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 00:23:17 +00:00
Merge branch 'main' of https://github.com/arc53/DocsGPT
This commit is contained in:
@@ -1,9 +1,12 @@
|
||||
import logging
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Generator, List, Optional
|
||||
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from application.agents.tools.tool_action_parser import ToolActionParser
|
||||
from application.agents.tools.tool_manager import ToolManager
|
||||
|
||||
@@ -139,6 +142,40 @@ class BaseAgent(ABC):
|
||||
tool_id, action_name, call_args = parser.parse_args(call)
|
||||
|
||||
call_id = getattr(call, "id", None) or str(uuid.uuid4())
|
||||
|
||||
# Check if parsing failed
|
||||
if tool_id is None or action_name is None:
|
||||
error_message = f"Error: Failed to parse LLM tool call. Tool name: {getattr(call, 'name', 'unknown')}"
|
||||
logger.error(error_message)
|
||||
|
||||
tool_call_data = {
|
||||
"tool_name": "unknown",
|
||||
"call_id": call_id,
|
||||
"action_name": getattr(call, 'name', 'unknown'),
|
||||
"arguments": call_args or {},
|
||||
"result": f"Failed to parse tool call. Invalid tool name format: {getattr(call, 'name', 'unknown')}",
|
||||
}
|
||||
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
|
||||
self.tool_calls.append(tool_call_data)
|
||||
return f"Failed to parse tool call.", call_id
|
||||
|
||||
# Check if tool_id exists in available tools
|
||||
if tool_id not in tools_dict:
|
||||
error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
|
||||
logger.error(error_message)
|
||||
|
||||
# Return error result
|
||||
tool_call_data = {
|
||||
"tool_name": "unknown",
|
||||
"call_id": call_id,
|
||||
"action_name": f"{action_name}_{tool_id}",
|
||||
"arguments": call_args,
|
||||
"result": f"Tool with ID {tool_id} not found. Available tools: {list(tools_dict.keys())}",
|
||||
}
|
||||
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
|
||||
self.tool_calls.append(tool_call_data)
|
||||
return f"Tool with ID {tool_id} not found.", call_id
|
||||
|
||||
tool_call_data = {
|
||||
"tool_name": tools_dict[tool_id]["name"],
|
||||
"call_id": call_id,
|
||||
|
||||
@@ -19,8 +19,20 @@ class ToolActionParser:
|
||||
def _parse_openai_llm(self, call):
|
||||
try:
|
||||
call_args = json.loads(call.arguments)
|
||||
tool_id = call.name.split("_")[-1]
|
||||
action_name = call.name.rsplit("_", 1)[0]
|
||||
tool_parts = call.name.split("_")
|
||||
|
||||
# If the tool name doesn't contain an underscore, it's likely a hallucinated tool
|
||||
if len(tool_parts) < 2:
|
||||
logger.warning(f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id")
|
||||
return None, None, None
|
||||
|
||||
tool_id = tool_parts[-1]
|
||||
action_name = "_".join(tool_parts[:-1])
|
||||
|
||||
# Validate that tool_id looks like a numerical ID
|
||||
if not tool_id.isdigit():
|
||||
logger.warning(f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call.")
|
||||
|
||||
except (AttributeError, TypeError) as e:
|
||||
logger.error(f"Error parsing OpenAI LLM call: {e}")
|
||||
return None, None, None
|
||||
@@ -29,8 +41,20 @@ class ToolActionParser:
|
||||
def _parse_google_llm(self, call):
|
||||
try:
|
||||
call_args = call.arguments
|
||||
tool_id = call.name.split("_")[-1]
|
||||
action_name = call.name.rsplit("_", 1)[0]
|
||||
tool_parts = call.name.split("_")
|
||||
|
||||
# If the tool name doesn't contain an underscore, it's likely a hallucinated tool
|
||||
if len(tool_parts) < 2:
|
||||
logger.warning(f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id")
|
||||
return None, None, None
|
||||
|
||||
tool_id = tool_parts[-1]
|
||||
action_name = "_".join(tool_parts[:-1])
|
||||
|
||||
# Validate that tool_id looks like a numerical ID
|
||||
if not tool_id.isdigit():
|
||||
logger.warning(f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call.")
|
||||
|
||||
except (AttributeError, TypeError) as e:
|
||||
logger.error(f"Error parsing Google LLM call: {e}")
|
||||
return None, None, None
|
||||
|
||||
@@ -6,6 +6,21 @@ from application.core.settings import settings
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
|
||||
|
||||
def sanitize_content(content: str) -> str:
|
||||
"""
|
||||
Remove NUL characters that can cause vector store ingestion to fail.
|
||||
|
||||
Args:
|
||||
content (str): Raw content that may contain NUL characters
|
||||
|
||||
Returns:
|
||||
str: Sanitized content with NUL characters removed
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
return content.replace('\x00', '')
|
||||
|
||||
|
||||
@retry(tries=10, delay=60)
|
||||
def add_text_to_store_with_retry(store, doc, source_id):
|
||||
"""
|
||||
@@ -16,6 +31,9 @@ def add_text_to_store_with_retry(store, doc, source_id):
|
||||
source_id: Unique identifier for the source.
|
||||
"""
|
||||
try:
|
||||
# Sanitize content to remove NUL characters that cause ingestion failures
|
||||
doc.page_content = sanitize_content(doc.page_content)
|
||||
|
||||
doc.metadata["source_id"] = str(source_id)
|
||||
store.add_texts([doc.page_content], metadatas=[doc.metadata])
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user