mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
feat: template-based prompt rendering with dynamic namespace injection (#2091)
* feat: template-based prompt rendering with dynamic namespace injection * refactor: improve template engine initialization with clearer formatting * refactor: streamline ReActAgent methods and improve content extraction logic feat: enhance error handling in NamespaceManager and TemplateEngine fix: update NewAgent component to ensure consistent form data submission test: modify tests for ReActAgent and prompt renderer to reflect method changes and improve coverage * feat: tools namespace + three-tier token budget * refactor: remove unused variable assignment in message building tests * Enhance prompt customization and tool pre-fetching functionality * ruff lint fix * refactor: cleaner error handling and reduce code clutter --------- Co-authored-by: Alex <a@tushynski.me>
This commit is contained in:
@@ -12,7 +12,6 @@ from application.core.settings import settings
|
||||
from application.llm.handlers.handler_creator import LLMHandlerCreator
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.logging import build_stack_data, log_activity, LogContext
|
||||
from application.retriever.base import BaseRetriever
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -27,6 +26,7 @@ class BaseAgent(ABC):
|
||||
user_api_key: Optional[str] = None,
|
||||
prompt: str = "",
|
||||
chat_history: Optional[List[Dict]] = None,
|
||||
retrieved_docs: Optional[List[Dict]] = None,
|
||||
decoded_token: Optional[Dict] = None,
|
||||
attachments: Optional[List[Dict]] = None,
|
||||
json_schema: Optional[Dict] = None,
|
||||
@@ -53,6 +53,7 @@ class BaseAgent(ABC):
|
||||
user_api_key=user_api_key,
|
||||
decoded_token=decoded_token,
|
||||
)
|
||||
self.retrieved_docs = retrieved_docs or []
|
||||
self.llm_handler = LLMHandlerCreator.create_handler(
|
||||
llm_name if llm_name else "default"
|
||||
)
|
||||
@@ -65,13 +66,13 @@ class BaseAgent(ABC):
|
||||
|
||||
@log_activity()
|
||||
def gen(
|
||||
self, query: str, retriever: BaseRetriever, log_context: LogContext = None
|
||||
self, query: str, log_context: LogContext = None
|
||||
) -> Generator[Dict, None, None]:
|
||||
yield from self._gen_inner(query, retriever, log_context)
|
||||
yield from self._gen_inner(query, log_context)
|
||||
|
||||
@abstractmethod
|
||||
def _gen_inner(
|
||||
self, query: str, retriever: BaseRetriever, log_context: LogContext
|
||||
self, query: str, log_context: LogContext
|
||||
) -> Generator[Dict, None, None]:
|
||||
pass
|
||||
|
||||
@@ -150,6 +151,7 @@ class BaseAgent(ABC):
|
||||
call_id = getattr(call, "id", None) or str(uuid.uuid4())
|
||||
|
||||
# Check if parsing failed
|
||||
|
||||
if tool_id is None or action_name is None:
|
||||
error_message = f"Error: Failed to parse LLM tool call. Tool name: {getattr(call, 'name', 'unknown')}"
|
||||
logger.error(error_message)
|
||||
@@ -164,13 +166,14 @@ class BaseAgent(ABC):
|
||||
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
|
||||
self.tool_calls.append(tool_call_data)
|
||||
return "Failed to parse tool call.", call_id
|
||||
|
||||
# Check if tool_id exists in available tools
|
||||
|
||||
if tool_id not in tools_dict:
|
||||
error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
|
||||
logger.error(error_message)
|
||||
|
||||
# Return error result
|
||||
|
||||
tool_call_data = {
|
||||
"tool_name": "unknown",
|
||||
"call_id": call_id,
|
||||
@@ -181,7 +184,6 @@ class BaseAgent(ABC):
|
||||
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
|
||||
self.tool_calls.append(tool_call_data)
|
||||
return f"Tool with ID {tool_id} not found.", call_id
|
||||
|
||||
tool_call_data = {
|
||||
"tool_name": tools_dict[tool_id]["name"],
|
||||
"call_id": call_id,
|
||||
@@ -223,6 +225,7 @@ class BaseAgent(ABC):
|
||||
tm = ToolManager(config={})
|
||||
|
||||
# Prepare tool_config and add tool_id for memory tools
|
||||
|
||||
if tool_data["name"] == "api_tool":
|
||||
tool_config = {
|
||||
"url": tool_data["config"]["actions"][action_name]["url"],
|
||||
@@ -234,8 +237,8 @@ class BaseAgent(ABC):
|
||||
tool_config = tool_data["config"].copy() if tool_data["config"] else {}
|
||||
# Add tool_id from MongoDB _id for tools that need instance isolation (like memory tool)
|
||||
# Use MongoDB _id if available, otherwise fall back to enumerated tool_id
|
||||
tool_config["tool_id"] = str(tool_data.get("_id", tool_id))
|
||||
|
||||
tool_config["tool_id"] = str(tool_data.get("_id", tool_id))
|
||||
tool = tm.load_tool(
|
||||
tool_data["name"],
|
||||
tool_config=tool_config,
|
||||
@@ -276,24 +279,14 @@ class BaseAgent(ABC):
|
||||
self,
|
||||
system_prompt: str,
|
||||
query: str,
|
||||
retrieved_data: List[Dict],
|
||||
) -> List[Dict]:
|
||||
docs_with_filenames = []
|
||||
for doc in retrieved_data:
|
||||
filename = doc.get("filename") or doc.get("title") or doc.get("source")
|
||||
if filename:
|
||||
chunk_header = str(filename)
|
||||
docs_with_filenames.append(f"{chunk_header}\n{doc['text']}")
|
||||
else:
|
||||
docs_with_filenames.append(doc["text"])
|
||||
docs_together = "\n\n".join(docs_with_filenames)
|
||||
p_chat_combine = system_prompt.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
"""Build messages using pre-rendered system prompt"""
|
||||
messages = [{"role": "system", "content": system_prompt}]
|
||||
|
||||
for i in self.chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append({"role": "assistant", "content": i["response"]})
|
||||
messages.append({"role": "user", "content": i["prompt"]})
|
||||
messages.append({"role": "assistant", "content": i["response"]})
|
||||
if "tool_calls" in i:
|
||||
for tool_call in i["tool_calls"]:
|
||||
call_id = tool_call.get("call_id") or str(uuid.uuid4())
|
||||
@@ -313,26 +306,14 @@ class BaseAgent(ABC):
|
||||
}
|
||||
}
|
||||
|
||||
messages_combine.append(
|
||||
messages.append(
|
||||
{"role": "assistant", "content": [function_call_dict]}
|
||||
)
|
||||
messages_combine.append(
|
||||
messages.append(
|
||||
{"role": "tool", "content": [function_response_dict]}
|
||||
)
|
||||
messages_combine.append({"role": "user", "content": query})
|
||||
return messages_combine
|
||||
|
||||
def _retriever_search(
|
||||
self,
|
||||
retriever: BaseRetriever,
|
||||
query: str,
|
||||
log_context: Optional[LogContext] = None,
|
||||
) -> List[Dict]:
|
||||
retrieved_data = retriever.search(query)
|
||||
if log_context:
|
||||
data = build_stack_data(retriever, exclude_attributes=["llm"])
|
||||
log_context.stacks.append({"component": "retriever", "data": data})
|
||||
return retrieved_data
|
||||
messages.append({"role": "user", "content": query})
|
||||
return messages
|
||||
|
||||
def _llm_gen(self, messages: List[Dict], log_context: Optional[LogContext] = None):
|
||||
gen_kwargs = {"model": self.gpt_model, "messages": messages}
|
||||
@@ -343,7 +324,6 @@ class BaseAgent(ABC):
|
||||
and self.tools
|
||||
):
|
||||
gen_kwargs["tools"] = self.tools
|
||||
|
||||
if (
|
||||
self.json_schema
|
||||
and hasattr(self.llm, "_supports_structured_output")
|
||||
@@ -357,7 +337,6 @@ class BaseAgent(ABC):
|
||||
gen_kwargs["response_format"] = structured_format
|
||||
elif self.llm_name == "google":
|
||||
gen_kwargs["response_schema"] = structured_format
|
||||
|
||||
resp = self.llm.gen_stream(**gen_kwargs)
|
||||
|
||||
if log_context:
|
||||
|
||||
@@ -1,32 +1,20 @@
|
||||
import logging
|
||||
from typing import Dict, Generator
|
||||
|
||||
from application.agents.base import BaseAgent
|
||||
from application.logging import LogContext
|
||||
from application.retriever.base import BaseRetriever
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ClassicAgent(BaseAgent):
|
||||
"""A simplified agent with clear execution flow.
|
||||
|
||||
Usage:
|
||||
1. Processes a query through retrieval
|
||||
2. Sets up available tools
|
||||
3. Generates responses using LLM
|
||||
4. Handles tool interactions if needed
|
||||
5. Returns standardized outputs
|
||||
|
||||
Easy to extend by overriding specific steps.
|
||||
"""
|
||||
"""A simplified agent with clear execution flow"""
|
||||
|
||||
def _gen_inner(
|
||||
self, query: str, retriever: BaseRetriever, log_context: LogContext
|
||||
self, query: str, log_context: LogContext
|
||||
) -> Generator[Dict, None, None]:
|
||||
# Step 1: Retrieve relevant data
|
||||
retrieved_data = self._retriever_search(retriever, query, log_context)
|
||||
"""Core generator function for ClassicAgent execution flow"""
|
||||
|
||||
# Step 2: Prepare tools
|
||||
tools_dict = (
|
||||
self._get_user_tools(self.user)
|
||||
if not self.user_api_key
|
||||
@@ -34,20 +22,16 @@ class ClassicAgent(BaseAgent):
|
||||
)
|
||||
self._prepare_tools(tools_dict)
|
||||
|
||||
# Step 3: Build and process messages
|
||||
messages = self._build_messages(self.prompt, query, retrieved_data)
|
||||
messages = self._build_messages(self.prompt, query)
|
||||
llm_response = self._llm_gen(messages, log_context)
|
||||
|
||||
# Step 4: Handle the response
|
||||
yield from self._handle_response(
|
||||
llm_response, tools_dict, messages, log_context
|
||||
)
|
||||
|
||||
# Step 5: Return metadata
|
||||
yield {"sources": retrieved_data}
|
||||
yield {"sources": self.retrieved_docs}
|
||||
yield {"tool_calls": self._get_truncated_tool_calls()}
|
||||
|
||||
# Log tool calls for debugging
|
||||
log_context.stacks.append(
|
||||
{"component": "agent", "data": {"tool_calls": self.tool_calls.copy()}}
|
||||
)
|
||||
|
||||
@@ -1,284 +1,238 @@
|
||||
import os
|
||||
from typing import Dict, Generator, List, Any
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, Generator, List
|
||||
|
||||
from application.agents.base import BaseAgent
|
||||
from application.logging import build_stack_data, LogContext
|
||||
from application.retriever.base import BaseRetriever
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_ITERATIONS_REASONING = 10
|
||||
|
||||
current_dir = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
with open(
|
||||
os.path.join(current_dir, "application/prompts", "react_planning_prompt.txt"), "r"
|
||||
) as f:
|
||||
planning_prompt_template = f.read()
|
||||
PLANNING_PROMPT_TEMPLATE = f.read()
|
||||
with open(
|
||||
os.path.join(current_dir, "application/prompts", "react_final_prompt.txt"),
|
||||
"r",
|
||||
os.path.join(current_dir, "application/prompts", "react_final_prompt.txt"), "r"
|
||||
) as f:
|
||||
final_prompt_template = f.read()
|
||||
|
||||
MAX_ITERATIONS_REASONING = 10
|
||||
FINAL_PROMPT_TEMPLATE = f.read()
|
||||
|
||||
|
||||
class ReActAgent(BaseAgent):
|
||||
"""
|
||||
Research and Action (ReAct) Agent - Advanced reasoning agent with iterative planning.
|
||||
|
||||
Implements a think-act-observe loop for complex problem-solving:
|
||||
1. Creates a strategic plan based on the query
|
||||
2. Executes tools and gathers observations
|
||||
3. Iteratively refines approach until satisfied
|
||||
4. Synthesizes final answer from all observations
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.plan: str = ""
|
||||
self.observations: List[str] = []
|
||||
|
||||
def _extract_content_from_llm_response(self, resp: Any) -> str:
|
||||
"""
|
||||
Helper to extract string content from various LLM response types.
|
||||
Handles strings, message objects (OpenAI-like), and streams.
|
||||
Adapt stream handling for your specific LLM client if not OpenAI.
|
||||
"""
|
||||
collected_content = []
|
||||
if isinstance(resp, str):
|
||||
collected_content.append(resp)
|
||||
elif ( # OpenAI non-streaming or Anthropic non-streaming (older SDK style)
|
||||
hasattr(resp, "message")
|
||||
and hasattr(resp.message, "content")
|
||||
and resp.message.content is not None
|
||||
):
|
||||
collected_content.append(resp.message.content)
|
||||
elif ( # OpenAI non-streaming (Pydantic model), Anthropic new SDK non-streaming
|
||||
hasattr(resp, "choices")
|
||||
and resp.choices
|
||||
and hasattr(resp.choices[0], "message")
|
||||
and hasattr(resp.choices[0].message, "content")
|
||||
and resp.choices[0].message.content is not None
|
||||
):
|
||||
collected_content.append(resp.choices[0].message.content) # OpenAI
|
||||
elif ( # Anthropic new SDK non-streaming content block
|
||||
hasattr(resp, "content")
|
||||
and isinstance(resp.content, list)
|
||||
and resp.content
|
||||
and hasattr(resp.content[0], "text")
|
||||
):
|
||||
collected_content.append(resp.content[0].text) # Anthropic
|
||||
else:
|
||||
# Assume resp is a stream if not a recognized object
|
||||
chunk = None
|
||||
try:
|
||||
for (
|
||||
chunk
|
||||
) in (
|
||||
resp
|
||||
): # This will fail if resp is not iterable (e.g. a non-streaming response object)
|
||||
content_piece = ""
|
||||
# OpenAI-like stream
|
||||
if (
|
||||
hasattr(chunk, "choices")
|
||||
and len(chunk.choices) > 0
|
||||
and hasattr(chunk.choices[0], "delta")
|
||||
and hasattr(chunk.choices[0].delta, "content")
|
||||
and chunk.choices[0].delta.content is not None
|
||||
):
|
||||
content_piece = chunk.choices[0].delta.content
|
||||
# Anthropic-like stream (ContentBlockDelta)
|
||||
elif (
|
||||
hasattr(chunk, "type")
|
||||
and chunk.type == "content_block_delta"
|
||||
and hasattr(chunk, "delta")
|
||||
and hasattr(chunk.delta, "text")
|
||||
):
|
||||
content_piece = chunk.delta.text
|
||||
elif isinstance(chunk, str): # Simplest case: stream of strings
|
||||
content_piece = chunk
|
||||
|
||||
if content_piece:
|
||||
collected_content.append(content_piece)
|
||||
except (
|
||||
TypeError
|
||||
): # If resp is not iterable (e.g. a final response object that wasn't caught above)
|
||||
logger.debug(
|
||||
f"Response type {type(resp)} could not be iterated as a stream. It might be a non-streaming object not handled by specific checks."
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error processing potential stream chunk: {e}, chunk was: {getattr(chunk, '__dict__', chunk) if chunk is not None else 'N/A'}"
|
||||
)
|
||||
|
||||
return "".join(collected_content)
|
||||
|
||||
def _gen_inner(
|
||||
self, query: str, retriever: BaseRetriever, log_context: LogContext
|
||||
self, query: str, log_context: LogContext
|
||||
) -> Generator[Dict, None, None]:
|
||||
# Reset state for this generation call
|
||||
self.plan = ""
|
||||
self.observations = []
|
||||
retrieved_data = self._retriever_search(retriever, query, log_context)
|
||||
"""Execute ReAct reasoning loop with planning, action, and observation cycles"""
|
||||
|
||||
if self.user_api_key:
|
||||
tools_dict = self._get_tools(self.user_api_key)
|
||||
else:
|
||||
tools_dict = self._get_user_tools(self.user)
|
||||
self._reset_state()
|
||||
|
||||
tools_dict = (
|
||||
self._get_tools(self.user_api_key)
|
||||
if self.user_api_key
|
||||
else self._get_user_tools(self.user)
|
||||
)
|
||||
self._prepare_tools(tools_dict)
|
||||
|
||||
docs_together = "\n".join([doc["text"] for doc in retrieved_data])
|
||||
iterating_reasoning = 0
|
||||
while iterating_reasoning < MAX_ITERATIONS_REASONING:
|
||||
iterating_reasoning += 1
|
||||
# 1. Create Plan
|
||||
logger.info("ReActAgent: Creating plan...")
|
||||
plan_stream = self._create_plan(query, docs_together, log_context)
|
||||
current_plan_parts = []
|
||||
yield {"thought": f"Reasoning... (iteration {iterating_reasoning})\n\n"}
|
||||
for line_chunk in plan_stream:
|
||||
current_plan_parts.append(line_chunk)
|
||||
yield {"thought": line_chunk}
|
||||
self.plan = "".join(current_plan_parts)
|
||||
if self.plan:
|
||||
self.observations.append(
|
||||
f"Plan: {self.plan} Iteration: {iterating_reasoning}"
|
||||
)
|
||||
for iteration in range(1, MAX_ITERATIONS_REASONING + 1):
|
||||
yield {"thought": f"Reasoning... (iteration {iteration})\n\n"}
|
||||
|
||||
max_obs_len = 20000
|
||||
obs_str = "\n".join(self.observations)
|
||||
if len(obs_str) > max_obs_len:
|
||||
obs_str = obs_str[:max_obs_len] + "\n...[observations truncated]"
|
||||
execution_prompt_str = (
|
||||
(self.prompt or "")
|
||||
+ f"\n\nFollow this plan:\n{self.plan}"
|
||||
+ f"\n\nObservations:\n{obs_str}"
|
||||
+ f"\n\nIf there is enough data to complete user query '{query}', Respond with 'SATISFIED' only. Otherwise, continue. Dont Menstion 'SATISFIED' in your response if you are not ready. "
|
||||
)
|
||||
yield from self._planning_phase(query, log_context)
|
||||
|
||||
messages = self._build_messages(execution_prompt_str, query, retrieved_data)
|
||||
|
||||
resp_from_llm_gen = self._llm_gen(messages, log_context)
|
||||
|
||||
initial_llm_thought_content = self._extract_content_from_llm_response(
|
||||
resp_from_llm_gen
|
||||
)
|
||||
if initial_llm_thought_content:
|
||||
self.observations.append(
|
||||
f"Initial thought/response: {initial_llm_thought_content}"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"ReActAgent: Initial LLM response (before handler) had no textual content (might be only tool calls)."
|
||||
)
|
||||
resp_after_handler = self._llm_handler(
|
||||
resp_from_llm_gen, tools_dict, messages, log_context
|
||||
)
|
||||
|
||||
for (
|
||||
tool_call_info
|
||||
) in (
|
||||
self.tool_calls
|
||||
): # Iterate over self.tool_calls populated by _llm_handler
|
||||
observation_string = (
|
||||
f"Executed Action: Tool '{tool_call_info.get('tool_name', 'N/A')}' "
|
||||
f"with arguments '{tool_call_info.get('arguments', '{}')}'. Result: '{str(tool_call_info.get('result', ''))[:200]}...'"
|
||||
)
|
||||
self.observations.append(observation_string)
|
||||
|
||||
content_after_handler = self._extract_content_from_llm_response(
|
||||
resp_after_handler
|
||||
)
|
||||
if content_after_handler:
|
||||
self.observations.append(
|
||||
f"Response after tool execution: {content_after_handler}"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"ReActAgent: LLM response after handler had no textual content."
|
||||
)
|
||||
|
||||
if log_context:
|
||||
log_context.stacks.append(
|
||||
{
|
||||
"component": "agent_tool_calls",
|
||||
"data": {"tool_calls": self.tool_calls.copy()},
|
||||
}
|
||||
)
|
||||
|
||||
yield {"sources": retrieved_data}
|
||||
|
||||
display_tool_calls = []
|
||||
for tc in self.tool_calls:
|
||||
cleaned_tc = tc.copy()
|
||||
if len(str(cleaned_tc.get("result", ""))) > 50:
|
||||
cleaned_tc["result"] = str(cleaned_tc["result"])[:50] + "..."
|
||||
display_tool_calls.append(cleaned_tc)
|
||||
if display_tool_calls:
|
||||
yield {"tool_calls": display_tool_calls}
|
||||
|
||||
if "SATISFIED" in content_after_handler:
|
||||
logger.info(
|
||||
"ReActAgent: LLM satisfied with the plan and data. Stopping reasoning."
|
||||
if not self.plan:
|
||||
logger.warning(
|
||||
f"ReActAgent: No plan generated in iteration {iteration}"
|
||||
)
|
||||
break
|
||||
self.observations.append(f"Plan (iteration {iteration}): {self.plan}")
|
||||
|
||||
# 3. Create Final Answer based on all observations
|
||||
final_answer_stream = self._create_final_answer(
|
||||
query, self.observations, log_context
|
||||
)
|
||||
for answer_chunk in final_answer_stream:
|
||||
yield {"answer": answer_chunk}
|
||||
logger.info("ReActAgent: Finished generating final answer.")
|
||||
satisfied = yield from self._execution_phase(query, tools_dict, log_context)
|
||||
|
||||
def _create_plan(
|
||||
self, query: str, docs_data: str, log_context: LogContext = None
|
||||
) -> Generator[str, None, None]:
|
||||
plan_prompt_filled = planning_prompt_template.replace("{query}", query)
|
||||
if "{summaries}" in plan_prompt_filled:
|
||||
summaries = docs_data if docs_data else "No documents retrieved."
|
||||
plan_prompt_filled = plan_prompt_filled.replace("{summaries}", summaries)
|
||||
plan_prompt_filled = plan_prompt_filled.replace("{prompt}", self.prompt or "")
|
||||
plan_prompt_filled = plan_prompt_filled.replace(
|
||||
"{observations}", "\n".join(self.observations)
|
||||
)
|
||||
if satisfied:
|
||||
logger.info("ReActAgent: Goal satisfied, stopping reasoning loop")
|
||||
break
|
||||
yield from self._synthesis_phase(query, log_context)
|
||||
|
||||
messages = [{"role": "user", "content": plan_prompt_filled}]
|
||||
def _reset_state(self):
|
||||
"""Reset agent state for new query"""
|
||||
self.plan = ""
|
||||
self.observations = []
|
||||
|
||||
plan_stream_from_llm = self.llm.gen_stream(
|
||||
def _planning_phase(
|
||||
self, query: str, log_context: LogContext
|
||||
) -> Generator[Dict, None, None]:
|
||||
"""Generate strategic plan for query"""
|
||||
logger.info("ReActAgent: Creating plan...")
|
||||
|
||||
plan_prompt = self._build_planning_prompt(query)
|
||||
messages = [{"role": "user", "content": plan_prompt}]
|
||||
|
||||
plan_stream = self.llm.gen_stream(
|
||||
model=self.gpt_model,
|
||||
messages=messages,
|
||||
tools=getattr(self, "tools", None), # Use self.tools
|
||||
tools=self.tools if self.tools else None,
|
||||
)
|
||||
|
||||
if log_context:
|
||||
data = build_stack_data(self.llm)
|
||||
log_context.stacks.append({"component": "planning_llm", "data": data})
|
||||
|
||||
for chunk in plan_stream_from_llm:
|
||||
content_piece = self._extract_content_from_llm_response(chunk)
|
||||
if content_piece:
|
||||
yield content_piece
|
||||
|
||||
def _create_final_answer(
|
||||
self, query: str, observations: List[str], log_context: LogContext = None
|
||||
) -> Generator[str, None, None]:
|
||||
observation_string = "\n".join(observations)
|
||||
max_obs_len = 10000
|
||||
if len(observation_string) > max_obs_len:
|
||||
observation_string = (
|
||||
observation_string[:max_obs_len] + "\n...[observations truncated]"
|
||||
)
|
||||
logger.warning(
|
||||
"ReActAgent: Truncated observations for final answer prompt due to length."
|
||||
log_context.stacks.append(
|
||||
{"component": "planning_llm", "data": build_stack_data(self.llm)}
|
||||
)
|
||||
plan_parts = []
|
||||
for chunk in plan_stream:
|
||||
content = self._extract_content(chunk)
|
||||
if content:
|
||||
plan_parts.append(content)
|
||||
yield {"thought": content}
|
||||
self.plan = "".join(plan_parts)
|
||||
|
||||
final_answer_prompt_filled = final_prompt_template.format(
|
||||
query=query, observations=observation_string
|
||||
def _execution_phase(
|
||||
self, query: str, tools_dict: Dict, log_context: LogContext
|
||||
) -> Generator[bool, None, None]:
|
||||
"""Execute plan with tool calls and observations"""
|
||||
execution_prompt = self._build_execution_prompt(query)
|
||||
messages = self._build_messages(execution_prompt, query)
|
||||
|
||||
llm_response = self._llm_gen(messages, log_context)
|
||||
initial_content = self._extract_content(llm_response)
|
||||
|
||||
if initial_content:
|
||||
self.observations.append(f"Initial response: {initial_content}")
|
||||
processed_response = self._llm_handler(
|
||||
llm_response, tools_dict, messages, log_context
|
||||
)
|
||||
|
||||
messages = [{"role": "user", "content": final_answer_prompt_filled}]
|
||||
for tool_call in self.tool_calls:
|
||||
observation = (
|
||||
f"Executed: {tool_call.get('tool_name', 'Unknown')} "
|
||||
f"with args {tool_call.get('arguments', {})}. "
|
||||
f"Result: {str(tool_call.get('result', ''))[:200]}"
|
||||
)
|
||||
self.observations.append(observation)
|
||||
final_content = self._extract_content(processed_response)
|
||||
if final_content:
|
||||
self.observations.append(f"Response after tools: {final_content}")
|
||||
if log_context:
|
||||
log_context.stacks.append(
|
||||
{
|
||||
"component": "agent_tool_calls",
|
||||
"data": {"tool_calls": self.tool_calls.copy()},
|
||||
}
|
||||
)
|
||||
yield {"sources": self.retrieved_docs}
|
||||
yield {"tool_calls": self._get_truncated_tool_calls()}
|
||||
|
||||
# Final answer should synthesize, not call tools.
|
||||
final_answer_stream_from_llm = self.llm.gen_stream(
|
||||
return "SATISFIED" in (final_content or "")
|
||||
|
||||
def _synthesis_phase(
|
||||
self, query: str, log_context: LogContext
|
||||
) -> Generator[Dict, None, None]:
|
||||
"""Synthesize final answer from all observations"""
|
||||
logger.info("ReActAgent: Generating final answer...")
|
||||
|
||||
final_prompt = self._build_final_answer_prompt(query)
|
||||
messages = [{"role": "user", "content": final_prompt}]
|
||||
|
||||
final_stream = self.llm.gen_stream(
|
||||
model=self.gpt_model, messages=messages, tools=None
|
||||
)
|
||||
if log_context:
|
||||
data = build_stack_data(self.llm)
|
||||
log_context.stacks.append({"component": "final_answer_llm", "data": data})
|
||||
|
||||
for chunk in final_answer_stream_from_llm:
|
||||
content_piece = self._extract_content_from_llm_response(chunk)
|
||||
if content_piece:
|
||||
yield content_piece
|
||||
if log_context:
|
||||
log_context.stacks.append(
|
||||
{"component": "final_answer_llm", "data": build_stack_data(self.llm)}
|
||||
)
|
||||
for chunk in final_stream:
|
||||
content = self._extract_content(chunk)
|
||||
if content:
|
||||
yield {"answer": content}
|
||||
|
||||
def _build_planning_prompt(self, query: str) -> str:
|
||||
"""Build planning phase prompt"""
|
||||
prompt = PLANNING_PROMPT_TEMPLATE.replace("{query}", query)
|
||||
prompt = prompt.replace("{prompt}", self.prompt or "")
|
||||
prompt = prompt.replace("{summaries}", "")
|
||||
prompt = prompt.replace("{observations}", "\n".join(self.observations))
|
||||
return prompt
|
||||
|
||||
def _build_execution_prompt(self, query: str) -> str:
|
||||
"""Build execution phase prompt with plan and observations"""
|
||||
observations_str = "\n".join(self.observations)
|
||||
|
||||
if len(observations_str) > 20000:
|
||||
observations_str = observations_str[:20000] + "\n...[truncated]"
|
||||
return (
|
||||
f"{self.prompt or ''}\n\n"
|
||||
f"Follow this plan:\n{self.plan}\n\n"
|
||||
f"Observations:\n{observations_str}\n\n"
|
||||
f"If sufficient data exists to answer '{query}', respond with 'SATISFIED'. "
|
||||
f"Otherwise, continue executing the plan."
|
||||
)
|
||||
|
||||
def _build_final_answer_prompt(self, query: str) -> str:
|
||||
"""Build final synthesis prompt"""
|
||||
observations_str = "\n".join(self.observations)
|
||||
|
||||
if len(observations_str) > 10000:
|
||||
observations_str = observations_str[:10000] + "\n...[truncated]"
|
||||
logger.warning("ReActAgent: Observations truncated for final answer")
|
||||
return FINAL_PROMPT_TEMPLATE.format(query=query, observations=observations_str)
|
||||
|
||||
def _extract_content(self, response: Any) -> str:
|
||||
"""Extract text content from various LLM response formats"""
|
||||
if not response:
|
||||
return ""
|
||||
collected = []
|
||||
|
||||
if isinstance(response, str):
|
||||
return response
|
||||
if hasattr(response, "message") and hasattr(response.message, "content"):
|
||||
if response.message.content:
|
||||
return response.message.content
|
||||
if hasattr(response, "choices") and response.choices:
|
||||
if hasattr(response.choices[0], "message"):
|
||||
content = response.choices[0].message.content
|
||||
if content:
|
||||
return content
|
||||
if hasattr(response, "content") and isinstance(response.content, list):
|
||||
if response.content and hasattr(response.content[0], "text"):
|
||||
return response.content[0].text
|
||||
try:
|
||||
for chunk in response:
|
||||
content_piece = ""
|
||||
|
||||
if hasattr(chunk, "choices") and chunk.choices:
|
||||
if hasattr(chunk.choices[0], "delta"):
|
||||
delta_content = chunk.choices[0].delta.content
|
||||
if delta_content:
|
||||
content_piece = delta_content
|
||||
elif hasattr(chunk, "type") and chunk.type == "content_block_delta":
|
||||
if hasattr(chunk, "delta") and hasattr(chunk.delta, "text"):
|
||||
content_piece = chunk.delta.text
|
||||
elif isinstance(chunk, str):
|
||||
content_piece = chunk
|
||||
if content_piece:
|
||||
collected.append(content_piece)
|
||||
except (TypeError, AttributeError):
|
||||
logger.debug(
|
||||
f"Response not iterable or unexpected format: {type(response)}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content: {e}")
|
||||
return "".join(collected)
|
||||
|
||||
Reference in New Issue
Block a user