feat: template-based prompt rendering with dynamic namespace injection (#2091)

* feat: template-based prompt rendering with dynamic namespace injection * refactor: improve template engine initialization with clearer formatting * refactor: streamline ReActAgent methods and improve content extraction logic feat: enhance error handling in NamespaceManager and TemplateEngine fix: update NewAgent component to ensure consistent form data submission test: modify tests for ReActAgent and prompt renderer to reflect method changes and improve coverage * feat: tools namespace + three-tier token budget * refactor: remove unused variable assignment in message building tests * Enhance prompt customization and tool pre-fetching functionality * ruff lint fix * refactor: cleaner error handling and reduce code clutter --------- Co-authored-by: Alex <a@tushynski.me>
2026-02-11 00:31:02 +00:00 · 2025-10-31 18:17:44 +05:30
parent a7d61b9d59
commit 21e5c261ef
33 changed files with 2917 additions and 646 deletions
--- a/application/agents/react_agent.py
+++ b/application/agents/react_agent.py
@@ -1,284 +1,238 @@
-import os
-from typing import Dict, Generator, List, Any
 import logging
+import os
+from typing import Any, Dict, Generator, List

 from application.agents.base import BaseAgent
 from application.logging import build_stack_data, LogContext
-from application.retriever.base import BaseRetriever

 logger = logging.getLogger(__name__)

+MAX_ITERATIONS_REASONING = 10
+
 current_dir = os.path.dirname(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 )
 with open(
    os.path.join(current_dir, "application/prompts", "react_planning_prompt.txt"), "r"
 ) as f:
-    planning_prompt_template = f.read()
+    PLANNING_PROMPT_TEMPLATE = f.read()
 with open(
-    os.path.join(current_dir, "application/prompts", "react_final_prompt.txt"),
-    "r",
+    os.path.join(current_dir, "application/prompts", "react_final_prompt.txt"), "r"
 ) as f:
-    final_prompt_template = f.read()
-
-MAX_ITERATIONS_REASONING = 10
+    FINAL_PROMPT_TEMPLATE = f.read()


 class ReActAgent(BaseAgent):
+    """
+    Research and Action (ReAct) Agent - Advanced reasoning agent with iterative planning.
+
+    Implements a think-act-observe loop for complex problem-solving:
+    1. Creates a strategic plan based on the query
+    2. Executes tools and gathers observations
+    3. Iteratively refines approach until satisfied
+    4. Synthesizes final answer from all observations
+    """
+
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.plan: str = ""
        self.observations: List[str] = []

-    def _extract_content_from_llm_response(self, resp: Any) -> str:
-        """
-        Helper to extract string content from various LLM response types.
-        Handles strings, message objects (OpenAI-like), and streams.
-        Adapt stream handling for your specific LLM client if not OpenAI.
-        """
-        collected_content = []
-        if isinstance(resp, str):
-            collected_content.append(resp)
-        elif (  # OpenAI non-streaming or Anthropic non-streaming (older SDK style)
-            hasattr(resp, "message")
-            and hasattr(resp.message, "content")
-            and resp.message.content is not None
-        ):
-            collected_content.append(resp.message.content)
-        elif (  # OpenAI non-streaming (Pydantic model), Anthropic new SDK non-streaming
-            hasattr(resp, "choices")
-            and resp.choices
-            and hasattr(resp.choices[0], "message")
-            and hasattr(resp.choices[0].message, "content")
-            and resp.choices[0].message.content is not None
-        ):
-            collected_content.append(resp.choices[0].message.content)  # OpenAI
-        elif (  # Anthropic new SDK non-streaming content block
-            hasattr(resp, "content")
-            and isinstance(resp.content, list)
-            and resp.content
-            and hasattr(resp.content[0], "text")
-        ):
-            collected_content.append(resp.content[0].text)  # Anthropic
-        else:
-            # Assume resp is a stream if not a recognized object
-            chunk = None
-            try:
-                for (
-                    chunk
-                ) in (
-                    resp
-                ):  # This will fail if resp is not iterable (e.g. a non-streaming response object)
-                    content_piece = ""
-                    # OpenAI-like stream
-                    if (
-                        hasattr(chunk, "choices")
-                        and len(chunk.choices) > 0
-                        and hasattr(chunk.choices[0], "delta")
-                        and hasattr(chunk.choices[0].delta, "content")
-                        and chunk.choices[0].delta.content is not None
-                    ):
-                        content_piece = chunk.choices[0].delta.content
-                    # Anthropic-like stream (ContentBlockDelta)
-                    elif (
-                        hasattr(chunk, "type")
-                        and chunk.type == "content_block_delta"
-                        and hasattr(chunk, "delta")
-                        and hasattr(chunk.delta, "text")
-                    ):
-                        content_piece = chunk.delta.text
-                    elif isinstance(chunk, str):  # Simplest case: stream of strings
-                        content_piece = chunk
-
-                    if content_piece:
-                        collected_content.append(content_piece)
-            except (
-                TypeError
-            ):  # If resp is not iterable (e.g. a final response object that wasn't caught above)
-                logger.debug(
-                    f"Response type {type(resp)} could not be iterated as a stream. It might be a non-streaming object not handled by specific checks."
-                )
-            except Exception as e:
-                logger.error(
-                    f"Error processing potential stream chunk: {e}, chunk was: {getattr(chunk, '__dict__', chunk) if chunk is not None else 'N/A'}"
-                )
-
-        return "".join(collected_content)
-
    def _gen_inner(
-        self, query: str, retriever: BaseRetriever, log_context: LogContext
+        self, query: str, log_context: LogContext
    ) -> Generator[Dict, None, None]:
-        # Reset state for this generation call
-        self.plan = ""
-        self.observations = []
-        retrieved_data = self._retriever_search(retriever, query, log_context)
+        """Execute ReAct reasoning loop with planning, action, and observation cycles"""

-        if self.user_api_key:
-            tools_dict = self._get_tools(self.user_api_key)
-        else:
-            tools_dict = self._get_user_tools(self.user)
+        self._reset_state()
+
+        tools_dict = (
+            self._get_tools(self.user_api_key)
+            if self.user_api_key
+            else self._get_user_tools(self.user)
+        )
        self._prepare_tools(tools_dict)

-        docs_together = "\n".join([doc["text"] for doc in retrieved_data])
-        iterating_reasoning = 0
-        while iterating_reasoning < MAX_ITERATIONS_REASONING:
-            iterating_reasoning += 1
-            # 1. Create Plan
-            logger.info("ReActAgent: Creating plan...")
-            plan_stream = self._create_plan(query, docs_together, log_context)
-            current_plan_parts = []
-            yield {"thought": f"Reasoning... (iteration {iterating_reasoning})\n\n"}
-            for line_chunk in plan_stream:
-                current_plan_parts.append(line_chunk)
-                yield {"thought": line_chunk}
-            self.plan = "".join(current_plan_parts)
-            if self.plan:
-                self.observations.append(
-                    f"Plan: {self.plan} Iteration: {iterating_reasoning}"
-                )
+        for iteration in range(1, MAX_ITERATIONS_REASONING + 1):
+            yield {"thought": f"Reasoning... (iteration {iteration})\n\n"}

-            max_obs_len = 20000
-            obs_str = "\n".join(self.observations)
-            if len(obs_str) > max_obs_len:
-                obs_str = obs_str[:max_obs_len] + "\n...[observations truncated]"
-            execution_prompt_str = (
-                (self.prompt or "")
-                + f"\n\nFollow this plan:\n{self.plan}"
-                + f"\n\nObservations:\n{obs_str}"
-                + f"\n\nIf there is enough data to complete user query '{query}', Respond with 'SATISFIED' only. Otherwise, continue. Dont Menstion 'SATISFIED' in your response if you are not ready. "
-            )
+            yield from self._planning_phase(query, log_context)

-            messages = self._build_messages(execution_prompt_str, query, retrieved_data)
-
-            resp_from_llm_gen = self._llm_gen(messages, log_context)
-
-            initial_llm_thought_content = self._extract_content_from_llm_response(
-                resp_from_llm_gen
-            )
-            if initial_llm_thought_content:
-                self.observations.append(
-                    f"Initial thought/response: {initial_llm_thought_content}"
-                )
-            else:
-                logger.info(
-                    "ReActAgent: Initial LLM response (before handler) had no textual content (might be only tool calls)."
-                )
-            resp_after_handler = self._llm_handler(
-                resp_from_llm_gen, tools_dict, messages, log_context
-            )
-
-            for (
-                tool_call_info
-            ) in (
-                self.tool_calls
-            ):  # Iterate over self.tool_calls populated by _llm_handler
-                observation_string = (
-                    f"Executed Action: Tool '{tool_call_info.get('tool_name', 'N/A')}' "
-                    f"with arguments '{tool_call_info.get('arguments', '{}')}'. Result: '{str(tool_call_info.get('result', ''))[:200]}...'"
-                )
-                self.observations.append(observation_string)
-
-            content_after_handler = self._extract_content_from_llm_response(
-                resp_after_handler
-            )
-            if content_after_handler:
-                self.observations.append(
-                    f"Response after tool execution: {content_after_handler}"
-                )
-            else:
-                logger.info(
-                    "ReActAgent: LLM response after handler had no textual content."
-                )
-
-            if log_context:
-                log_context.stacks.append(
-                    {
-                        "component": "agent_tool_calls",
-                        "data": {"tool_calls": self.tool_calls.copy()},
-                    }
-                )
-
-            yield {"sources": retrieved_data}
-
-            display_tool_calls = []
-            for tc in self.tool_calls:
-                cleaned_tc = tc.copy()
-                if len(str(cleaned_tc.get("result", ""))) > 50:
-                    cleaned_tc["result"] = str(cleaned_tc["result"])[:50] + "..."
-                display_tool_calls.append(cleaned_tc)
-            if display_tool_calls:
-                yield {"tool_calls": display_tool_calls}
-
-            if "SATISFIED" in content_after_handler:
-                logger.info(
-                    "ReActAgent: LLM satisfied with the plan and data. Stopping reasoning."
+            if not self.plan:
+                logger.warning(
+                    f"ReActAgent: No plan generated in iteration {iteration}"
                )
                break
+            self.observations.append(f"Plan (iteration {iteration}): {self.plan}")

-        # 3. Create Final Answer based on all observations
-        final_answer_stream = self._create_final_answer(
-            query, self.observations, log_context
-        )
-        for answer_chunk in final_answer_stream:
-            yield {"answer": answer_chunk}
-        logger.info("ReActAgent: Finished generating final answer.")
+            satisfied = yield from self._execution_phase(query, tools_dict, log_context)

-    def _create_plan(
-        self, query: str, docs_data: str, log_context: LogContext = None
-    ) -> Generator[str, None, None]:
-        plan_prompt_filled = planning_prompt_template.replace("{query}", query)
-        if "{summaries}" in plan_prompt_filled:
-            summaries = docs_data if docs_data else "No documents retrieved."
-            plan_prompt_filled = plan_prompt_filled.replace("{summaries}", summaries)
-        plan_prompt_filled = plan_prompt_filled.replace("{prompt}", self.prompt or "")
-        plan_prompt_filled = plan_prompt_filled.replace(
-            "{observations}", "\n".join(self.observations)
-        )
+            if satisfied:
+                logger.info("ReActAgent: Goal satisfied, stopping reasoning loop")
+                break
+        yield from self._synthesis_phase(query, log_context)

-        messages = [{"role": "user", "content": plan_prompt_filled}]
+    def _reset_state(self):
+        """Reset agent state for new query"""
+        self.plan = ""
+        self.observations = []

-        plan_stream_from_llm = self.llm.gen_stream(
+    def _planning_phase(
+        self, query: str, log_context: LogContext
+    ) -> Generator[Dict, None, None]:
+        """Generate strategic plan for query"""
+        logger.info("ReActAgent: Creating plan...")
+
+        plan_prompt = self._build_planning_prompt(query)
+        messages = [{"role": "user", "content": plan_prompt}]
+
+        plan_stream = self.llm.gen_stream(
            model=self.gpt_model,
            messages=messages,
-            tools=getattr(self, "tools", None),  # Use self.tools
+            tools=self.tools if self.tools else None,
        )
+
        if log_context:
-            data = build_stack_data(self.llm)
-            log_context.stacks.append({"component": "planning_llm", "data": data})
-
-        for chunk in plan_stream_from_llm:
-            content_piece = self._extract_content_from_llm_response(chunk)
-            if content_piece:
-                yield content_piece
-
-    def _create_final_answer(
-        self, query: str, observations: List[str], log_context: LogContext = None
-    ) -> Generator[str, None, None]:
-        observation_string = "\n".join(observations)
-        max_obs_len = 10000
-        if len(observation_string) > max_obs_len:
-            observation_string = (
-                observation_string[:max_obs_len] + "\n...[observations truncated]"
-            )
-            logger.warning(
-                "ReActAgent: Truncated observations for final answer prompt due to length."
+            log_context.stacks.append(
+                {"component": "planning_llm", "data": build_stack_data(self.llm)}
            )
+        plan_parts = []
+        for chunk in plan_stream:
+            content = self._extract_content(chunk)
+            if content:
+                plan_parts.append(content)
+                yield {"thought": content}
+        self.plan = "".join(plan_parts)

-        final_answer_prompt_filled = final_prompt_template.format(
-            query=query, observations=observation_string
+    def _execution_phase(
+        self, query: str, tools_dict: Dict, log_context: LogContext
+    ) -> Generator[bool, None, None]:
+        """Execute plan with tool calls and observations"""
+        execution_prompt = self._build_execution_prompt(query)
+        messages = self._build_messages(execution_prompt, query)
+
+        llm_response = self._llm_gen(messages, log_context)
+        initial_content = self._extract_content(llm_response)
+
+        if initial_content:
+            self.observations.append(f"Initial response: {initial_content}")
+        processed_response = self._llm_handler(
+            llm_response, tools_dict, messages, log_context
        )

-        messages = [{"role": "user", "content": final_answer_prompt_filled}]
+        for tool_call in self.tool_calls:
+            observation = (
+                f"Executed: {tool_call.get('tool_name', 'Unknown')} "
+                f"with args {tool_call.get('arguments', {})}. "
+                f"Result: {str(tool_call.get('result', ''))[:200]}"
+            )
+            self.observations.append(observation)
+        final_content = self._extract_content(processed_response)
+        if final_content:
+            self.observations.append(f"Response after tools: {final_content}")
+        if log_context:
+            log_context.stacks.append(
+                {
+                    "component": "agent_tool_calls",
+                    "data": {"tool_calls": self.tool_calls.copy()},
+                }
+            )
+        yield {"sources": self.retrieved_docs}
+        yield {"tool_calls": self._get_truncated_tool_calls()}

-        # Final answer should synthesize, not call tools.
-        final_answer_stream_from_llm = self.llm.gen_stream(
+        return "SATISFIED" in (final_content or "")
+
+    def _synthesis_phase(
+        self, query: str, log_context: LogContext
+    ) -> Generator[Dict, None, None]:
+        """Synthesize final answer from all observations"""
+        logger.info("ReActAgent: Generating final answer...")
+
+        final_prompt = self._build_final_answer_prompt(query)
+        messages = [{"role": "user", "content": final_prompt}]
+
+        final_stream = self.llm.gen_stream(
            model=self.gpt_model, messages=messages, tools=None
        )
-        if log_context:
-            data = build_stack_data(self.llm)
-            log_context.stacks.append({"component": "final_answer_llm", "data": data})

-        for chunk in final_answer_stream_from_llm:
-            content_piece = self._extract_content_from_llm_response(chunk)
-            if content_piece:
-                yield content_piece
+        if log_context:
+            log_context.stacks.append(
+                {"component": "final_answer_llm", "data": build_stack_data(self.llm)}
+            )
+        for chunk in final_stream:
+            content = self._extract_content(chunk)
+            if content:
+                yield {"answer": content}
+
+    def _build_planning_prompt(self, query: str) -> str:
+        """Build planning phase prompt"""
+        prompt = PLANNING_PROMPT_TEMPLATE.replace("{query}", query)
+        prompt = prompt.replace("{prompt}", self.prompt or "")
+        prompt = prompt.replace("{summaries}", "")
+        prompt = prompt.replace("{observations}", "\n".join(self.observations))
+        return prompt
+
+    def _build_execution_prompt(self, query: str) -> str:
+        """Build execution phase prompt with plan and observations"""
+        observations_str = "\n".join(self.observations)
+
+        if len(observations_str) > 20000:
+            observations_str = observations_str[:20000] + "\n...[truncated]"
+        return (
+            f"{self.prompt or ''}\n\n"
+            f"Follow this plan:\n{self.plan}\n\n"
+            f"Observations:\n{observations_str}\n\n"
+            f"If sufficient data exists to answer '{query}', respond with 'SATISFIED'. "
+            f"Otherwise, continue executing the plan."
+        )
+
+    def _build_final_answer_prompt(self, query: str) -> str:
+        """Build final synthesis prompt"""
+        observations_str = "\n".join(self.observations)
+
+        if len(observations_str) > 10000:
+            observations_str = observations_str[:10000] + "\n...[truncated]"
+            logger.warning("ReActAgent: Observations truncated for final answer")
+        return FINAL_PROMPT_TEMPLATE.format(query=query, observations=observations_str)
+
+    def _extract_content(self, response: Any) -> str:
+        """Extract text content from various LLM response formats"""
+        if not response:
+            return ""
+        collected = []
+
+        if isinstance(response, str):
+            return response
+        if hasattr(response, "message") and hasattr(response.message, "content"):
+            if response.message.content:
+                return response.message.content
+        if hasattr(response, "choices") and response.choices:
+            if hasattr(response.choices[0], "message"):
+                content = response.choices[0].message.content
+                if content:
+                    return content
+        if hasattr(response, "content") and isinstance(response.content, list):
+            if response.content and hasattr(response.content[0], "text"):
+                return response.content[0].text
+        try:
+            for chunk in response:
+                content_piece = ""
+
+                if hasattr(chunk, "choices") and chunk.choices:
+                    if hasattr(chunk.choices[0], "delta"):
+                        delta_content = chunk.choices[0].delta.content
+                        if delta_content:
+                            content_piece = delta_content
+                elif hasattr(chunk, "type") and chunk.type == "content_block_delta":
+                    if hasattr(chunk, "delta") and hasattr(chunk.delta, "text"):
+                        content_piece = chunk.delta.text
+                elif isinstance(chunk, str):
+                    content_piece = chunk
+                if content_piece:
+                    collected.append(content_piece)
+        except (TypeError, AttributeError):
+            logger.debug(
+                f"Response not iterable or unexpected format: {type(response)}"
+            )
+        except Exception as e:
+            logger.error(f"Error extracting content: {e}")
+        return "".join(collected)