This commit is contained in:
GH Action - Upstream Sync
2025-08-27 01:36:38 +00:00
3 changed files with 83 additions and 4 deletions

View File

@@ -1,9 +1,12 @@
import logging
import uuid import uuid
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Dict, Generator, List, Optional from typing import Dict, Generator, List, Optional
from bson.objectid import ObjectId from bson.objectid import ObjectId
logger = logging.getLogger(__name__)
from application.agents.tools.tool_action_parser import ToolActionParser from application.agents.tools.tool_action_parser import ToolActionParser
from application.agents.tools.tool_manager import ToolManager from application.agents.tools.tool_manager import ToolManager
@@ -139,6 +142,40 @@ class BaseAgent(ABC):
tool_id, action_name, call_args = parser.parse_args(call) tool_id, action_name, call_args = parser.parse_args(call)
call_id = getattr(call, "id", None) or str(uuid.uuid4()) call_id = getattr(call, "id", None) or str(uuid.uuid4())
# Check if parsing failed
if tool_id is None or action_name is None:
error_message = f"Error: Failed to parse LLM tool call. Tool name: {getattr(call, 'name', 'unknown')}"
logger.error(error_message)
tool_call_data = {
"tool_name": "unknown",
"call_id": call_id,
"action_name": getattr(call, 'name', 'unknown'),
"arguments": call_args or {},
"result": f"Failed to parse tool call. Invalid tool name format: {getattr(call, 'name', 'unknown')}",
}
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
self.tool_calls.append(tool_call_data)
return f"Failed to parse tool call.", call_id
# Check if tool_id exists in available tools
if tool_id not in tools_dict:
error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
logger.error(error_message)
# Return error result
tool_call_data = {
"tool_name": "unknown",
"call_id": call_id,
"action_name": f"{action_name}_{tool_id}",
"arguments": call_args,
"result": f"Tool with ID {tool_id} not found. Available tools: {list(tools_dict.keys())}",
}
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
self.tool_calls.append(tool_call_data)
return f"Tool with ID {tool_id} not found.", call_id
tool_call_data = { tool_call_data = {
"tool_name": tools_dict[tool_id]["name"], "tool_name": tools_dict[tool_id]["name"],
"call_id": call_id, "call_id": call_id,

View File

@@ -19,8 +19,20 @@ class ToolActionParser:
def _parse_openai_llm(self, call): def _parse_openai_llm(self, call):
try: try:
call_args = json.loads(call.arguments) call_args = json.loads(call.arguments)
tool_id = call.name.split("_")[-1] tool_parts = call.name.split("_")
action_name = call.name.rsplit("_", 1)[0]
# If the tool name doesn't contain an underscore, it's likely a hallucinated tool
if len(tool_parts) < 2:
logger.warning(f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id")
return None, None, None
tool_id = tool_parts[-1]
action_name = "_".join(tool_parts[:-1])
# Validate that tool_id looks like a numerical ID
if not tool_id.isdigit():
logger.warning(f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call.")
except (AttributeError, TypeError) as e: except (AttributeError, TypeError) as e:
logger.error(f"Error parsing OpenAI LLM call: {e}") logger.error(f"Error parsing OpenAI LLM call: {e}")
return None, None, None return None, None, None
@@ -29,8 +41,20 @@ class ToolActionParser:
def _parse_google_llm(self, call): def _parse_google_llm(self, call):
try: try:
call_args = call.arguments call_args = call.arguments
tool_id = call.name.split("_")[-1] tool_parts = call.name.split("_")
action_name = call.name.rsplit("_", 1)[0]
# If the tool name doesn't contain an underscore, it's likely a hallucinated tool
if len(tool_parts) < 2:
logger.warning(f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id")
return None, None, None
tool_id = tool_parts[-1]
action_name = "_".join(tool_parts[:-1])
# Validate that tool_id looks like a numerical ID
if not tool_id.isdigit():
logger.warning(f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call.")
except (AttributeError, TypeError) as e: except (AttributeError, TypeError) as e:
logger.error(f"Error parsing Google LLM call: {e}") logger.error(f"Error parsing Google LLM call: {e}")
return None, None, None return None, None, None

View File

@@ -6,6 +6,21 @@ from application.core.settings import settings
from application.vectorstore.vector_creator import VectorCreator from application.vectorstore.vector_creator import VectorCreator
def sanitize_content(content: str) -> str:
"""
Remove NUL characters that can cause vector store ingestion to fail.
Args:
content (str): Raw content that may contain NUL characters
Returns:
str: Sanitized content with NUL characters removed
"""
if not content:
return content
return content.replace('\x00', '')
@retry(tries=10, delay=60) @retry(tries=10, delay=60)
def add_text_to_store_with_retry(store, doc, source_id): def add_text_to_store_with_retry(store, doc, source_id):
""" """
@@ -16,6 +31,9 @@ def add_text_to_store_with_retry(store, doc, source_id):
source_id: Unique identifier for the source. source_id: Unique identifier for the source.
""" """
try: try:
# Sanitize content to remove NUL characters that cause ingestion failures
doc.page_content = sanitize_content(doc.page_content)
doc.metadata["source_id"] = str(source_id) doc.metadata["source_id"] = str(source_id)
store.add_texts([doc.page_content], metadatas=[doc.metadata]) store.add_texts([doc.page_content], metadatas=[doc.metadata])
except Exception as e: except Exception as e: