feat: template-based prompt rendering with dynamic namespace injection (#2091)

* feat: template-based prompt rendering with dynamic namespace injection * refactor: improve template engine initialization with clearer formatting * refactor: streamline ReActAgent methods and improve content extraction logic feat: enhance error handling in NamespaceManager and TemplateEngine fix: update NewAgent component to ensure consistent form data submission test: modify tests for ReActAgent and prompt renderer to reflect method changes and improve coverage * feat: tools namespace + three-tier token budget * refactor: remove unused variable assignment in message building tests * Enhance prompt customization and tool pre-fetching functionality * ruff lint fix * refactor: cleaner error handling and reduce code clutter --------- Co-authored-by: Alex <a@tushynski.me>
2026-03-06 22:03:39 +00:00 · 2025-10-31 18:17:44 +05:30
parent a7d61b9d59
commit 21e5c261ef
33 changed files with 2917 additions and 646 deletions
--- a/application/agents/base.py
+++ b/application/agents/base.py
@@ -12,7 +12,6 @@ from application.core.settings import settings
 from application.llm.handlers.handler_creator import LLMHandlerCreator
 from application.llm.llm_creator import LLMCreator
 from application.logging import build_stack_data, log_activity, LogContext
-from application.retriever.base import BaseRetriever

 logger = logging.getLogger(__name__)

@@ -27,6 +26,7 @@ class BaseAgent(ABC):
        user_api_key: Optional[str] = None,
        prompt: str = "",
        chat_history: Optional[List[Dict]] = None,
+        retrieved_docs: Optional[List[Dict]] = None,
        decoded_token: Optional[Dict] = None,
        attachments: Optional[List[Dict]] = None,
        json_schema: Optional[Dict] = None,
@@ -53,6 +53,7 @@ class BaseAgent(ABC):
            user_api_key=user_api_key,
            decoded_token=decoded_token,
        )
+        self.retrieved_docs = retrieved_docs or []
        self.llm_handler = LLMHandlerCreator.create_handler(
            llm_name if llm_name else "default"
        )
@@ -65,13 +66,13 @@ class BaseAgent(ABC):

    @log_activity()
    def gen(
-        self, query: str, retriever: BaseRetriever, log_context: LogContext = None
+        self, query: str, log_context: LogContext = None
    ) -> Generator[Dict, None, None]:
-        yield from self._gen_inner(query, retriever, log_context)
+        yield from self._gen_inner(query, log_context)

    @abstractmethod
    def _gen_inner(
-        self, query: str, retriever: BaseRetriever, log_context: LogContext
+        self, query: str, log_context: LogContext
    ) -> Generator[Dict, None, None]:
        pass

@@ -150,6 +151,7 @@ class BaseAgent(ABC):
        call_id = getattr(call, "id", None) or str(uuid.uuid4())

        # Check if parsing failed
+
        if tool_id is None or action_name is None:
            error_message = f"Error: Failed to parse LLM tool call. Tool name: {getattr(call, 'name', 'unknown')}"
            logger.error(error_message)
@@ -164,13 +166,14 @@ class BaseAgent(ABC):
            yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
            self.tool_calls.append(tool_call_data)
            return "Failed to parse tool call.", call_id
-
        # Check if tool_id exists in available tools
+
        if tool_id not in tools_dict:
            error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
            logger.error(error_message)

            # Return error result
+
            tool_call_data = {
                "tool_name": "unknown",
                "call_id": call_id,
@@ -181,7 +184,6 @@ class BaseAgent(ABC):
            yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
            self.tool_calls.append(tool_call_data)
            return f"Tool with ID {tool_id} not found.", call_id
-
        tool_call_data = {
            "tool_name": tools_dict[tool_id]["name"],
            "call_id": call_id,
@@ -223,6 +225,7 @@ class BaseAgent(ABC):
        tm = ToolManager(config={})

        # Prepare tool_config and add tool_id for memory tools
+
        if tool_data["name"] == "api_tool":
            tool_config = {
                "url": tool_data["config"]["actions"][action_name]["url"],
@@ -234,8 +237,8 @@ class BaseAgent(ABC):
            tool_config = tool_data["config"].copy() if tool_data["config"] else {}
            # Add tool_id from MongoDB _id for tools that need instance isolation (like memory tool)
            # Use MongoDB _id if available, otherwise fall back to enumerated tool_id
-            tool_config["tool_id"] = str(tool_data.get("_id", tool_id))

+            tool_config["tool_id"] = str(tool_data.get("_id", tool_id))
        tool = tm.load_tool(
            tool_data["name"],
            tool_config=tool_config,
@@ -276,24 +279,14 @@ class BaseAgent(ABC):
        self,
        system_prompt: str,
        query: str,
-        retrieved_data: List[Dict],
    ) -> List[Dict]:
-        docs_with_filenames = []
-        for doc in retrieved_data:
-            filename = doc.get("filename") or doc.get("title") or doc.get("source")
-            if filename:
-                chunk_header = str(filename)
-                docs_with_filenames.append(f"{chunk_header}\n{doc['text']}")
-            else:
-                docs_with_filenames.append(doc["text"])
-        docs_together = "\n\n".join(docs_with_filenames)
-        p_chat_combine = system_prompt.replace("{summaries}", docs_together)
-        messages_combine = [{"role": "system", "content": p_chat_combine}]
+        """Build messages using pre-rendered system prompt"""
+        messages = [{"role": "system", "content": system_prompt}]

        for i in self.chat_history:
            if "prompt" in i and "response" in i:
-                messages_combine.append({"role": "user", "content": i["prompt"]})
-                messages_combine.append({"role": "assistant", "content": i["response"]})
+                messages.append({"role": "user", "content": i["prompt"]})
+                messages.append({"role": "assistant", "content": i["response"]})
            if "tool_calls" in i:
                for tool_call in i["tool_calls"]:
                    call_id = tool_call.get("call_id") or str(uuid.uuid4())
@@ -313,26 +306,14 @@ class BaseAgent(ABC):
                        }
                    }

-                    messages_combine.append(
+                    messages.append(
                        {"role": "assistant", "content": [function_call_dict]}
                    )
-                    messages_combine.append(
+                    messages.append(
                        {"role": "tool", "content": [function_response_dict]}
                    )
-        messages_combine.append({"role": "user", "content": query})
-        return messages_combine
-
-    def _retriever_search(
-        self,
-        retriever: BaseRetriever,
-        query: str,
-        log_context: Optional[LogContext] = None,
-    ) -> List[Dict]:
-        retrieved_data = retriever.search(query)
-        if log_context:
-            data = build_stack_data(retriever, exclude_attributes=["llm"])
-            log_context.stacks.append({"component": "retriever", "data": data})
-        return retrieved_data
+        messages.append({"role": "user", "content": query})
+        return messages

    def _llm_gen(self, messages: List[Dict], log_context: Optional[LogContext] = None):
        gen_kwargs = {"model": self.gpt_model, "messages": messages}
@@ -343,7 +324,6 @@ class BaseAgent(ABC):
            and self.tools
        ):
            gen_kwargs["tools"] = self.tools
-
        if (
            self.json_schema
            and hasattr(self.llm, "_supports_structured_output")
@@ -357,7 +337,6 @@ class BaseAgent(ABC):
                    gen_kwargs["response_format"] = structured_format
                elif self.llm_name == "google":
                    gen_kwargs["response_schema"] = structured_format
-
        resp = self.llm.gen_stream(**gen_kwargs)

        if log_context:
--- a/application/agents/classic_agent.py
+++ b/application/agents/classic_agent.py
@@ -1,32 +1,20 @@
+import logging
 from typing import Dict, Generator
+
 from application.agents.base import BaseAgent
 from application.logging import LogContext
-from application.retriever.base import BaseRetriever
-import logging

 logger = logging.getLogger(__name__)


 class ClassicAgent(BaseAgent):
-    """A simplified agent with clear execution flow.
-
-    Usage:
-    1. Processes a query through retrieval
-    2. Sets up available tools
-    3. Generates responses using LLM
-    4. Handles tool interactions if needed
-    5. Returns standardized outputs
-
-    Easy to extend by overriding specific steps.
-    """
+    """A simplified agent with clear execution flow"""

    def _gen_inner(
-        self, query: str, retriever: BaseRetriever, log_context: LogContext
+        self, query: str, log_context: LogContext
    ) -> Generator[Dict, None, None]:
-        # Step 1: Retrieve relevant data
-        retrieved_data = self._retriever_search(retriever, query, log_context)
+        """Core generator function for ClassicAgent execution flow"""

-        # Step 2: Prepare tools
        tools_dict = (
            self._get_user_tools(self.user)
            if not self.user_api_key
@@ -34,20 +22,16 @@ class ClassicAgent(BaseAgent):
        )
        self._prepare_tools(tools_dict)

-        # Step 3: Build and process messages
-        messages = self._build_messages(self.prompt, query, retrieved_data)
+        messages = self._build_messages(self.prompt, query)
        llm_response = self._llm_gen(messages, log_context)

-        # Step 4: Handle the response
        yield from self._handle_response(
            llm_response, tools_dict, messages, log_context
        )

-        # Step 5: Return metadata
-        yield {"sources": retrieved_data}
+        yield {"sources": self.retrieved_docs}
        yield {"tool_calls": self._get_truncated_tool_calls()}

-        # Log tool calls for debugging
        log_context.stacks.append(
            {"component": "agent", "data": {"tool_calls": self.tool_calls.copy()}}
        )
--- a/application/agents/react_agent.py
+++ b/application/agents/react_agent.py
@@ -1,284 +1,238 @@
-import os
-from typing import Dict, Generator, List, Any
 import logging
+import os
+from typing import Any, Dict, Generator, List

 from application.agents.base import BaseAgent
 from application.logging import build_stack_data, LogContext
-from application.retriever.base import BaseRetriever

 logger = logging.getLogger(__name__)

+MAX_ITERATIONS_REASONING = 10
+
 current_dir = os.path.dirname(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 )
 with open(
    os.path.join(current_dir, "application/prompts", "react_planning_prompt.txt"), "r"
 ) as f:
-    planning_prompt_template = f.read()
+    PLANNING_PROMPT_TEMPLATE = f.read()
 with open(
-    os.path.join(current_dir, "application/prompts", "react_final_prompt.txt"),
-    "r",
+    os.path.join(current_dir, "application/prompts", "react_final_prompt.txt"), "r"
 ) as f:
-    final_prompt_template = f.read()
-
-MAX_ITERATIONS_REASONING = 10
+    FINAL_PROMPT_TEMPLATE = f.read()


 class ReActAgent(BaseAgent):
+    """
+    Research and Action (ReAct) Agent - Advanced reasoning agent with iterative planning.
+
+    Implements a think-act-observe loop for complex problem-solving:
+    1. Creates a strategic plan based on the query
+    2. Executes tools and gathers observations
+    3. Iteratively refines approach until satisfied
+    4. Synthesizes final answer from all observations
+    """
+
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.plan: str = ""
        self.observations: List[str] = []

-    def _extract_content_from_llm_response(self, resp: Any) -> str:
-        """
-        Helper to extract string content from various LLM response types.
-        Handles strings, message objects (OpenAI-like), and streams.
-        Adapt stream handling for your specific LLM client if not OpenAI.
-        """
-        collected_content = []
-        if isinstance(resp, str):
-            collected_content.append(resp)
-        elif (  # OpenAI non-streaming or Anthropic non-streaming (older SDK style)
-            hasattr(resp, "message")
-            and hasattr(resp.message, "content")
-            and resp.message.content is not None
-        ):
-            collected_content.append(resp.message.content)
-        elif (  # OpenAI non-streaming (Pydantic model), Anthropic new SDK non-streaming
-            hasattr(resp, "choices")
-            and resp.choices
-            and hasattr(resp.choices[0], "message")
-            and hasattr(resp.choices[0].message, "content")
-            and resp.choices[0].message.content is not None
-        ):
-            collected_content.append(resp.choices[0].message.content)  # OpenAI
-        elif (  # Anthropic new SDK non-streaming content block
-            hasattr(resp, "content")
-            and isinstance(resp.content, list)
-            and resp.content
-            and hasattr(resp.content[0], "text")
-        ):
-            collected_content.append(resp.content[0].text)  # Anthropic
-        else:
-            # Assume resp is a stream if not a recognized object
-            chunk = None
-            try:
-                for (
-                    chunk
-                ) in (
-                    resp
-                ):  # This will fail if resp is not iterable (e.g. a non-streaming response object)
-                    content_piece = ""
-                    # OpenAI-like stream
-                    if (
-                        hasattr(chunk, "choices")
-                        and len(chunk.choices) > 0
-                        and hasattr(chunk.choices[0], "delta")
-                        and hasattr(chunk.choices[0].delta, "content")
-                        and chunk.choices[0].delta.content is not None
-                    ):
-                        content_piece = chunk.choices[0].delta.content
-                    # Anthropic-like stream (ContentBlockDelta)
-                    elif (
-                        hasattr(chunk, "type")
-                        and chunk.type == "content_block_delta"
-                        and hasattr(chunk, "delta")
-                        and hasattr(chunk.delta, "text")
-                    ):
-                        content_piece = chunk.delta.text
-                    elif isinstance(chunk, str):  # Simplest case: stream of strings
-                        content_piece = chunk
-
-                    if content_piece:
-                        collected_content.append(content_piece)
-            except (
-                TypeError
-            ):  # If resp is not iterable (e.g. a final response object that wasn't caught above)
-                logger.debug(
-                    f"Response type {type(resp)} could not be iterated as a stream. It might be a non-streaming object not handled by specific checks."
-                )
-            except Exception as e:
-                logger.error(
-                    f"Error processing potential stream chunk: {e}, chunk was: {getattr(chunk, '__dict__', chunk) if chunk is not None else 'N/A'}"
-                )
-
-        return "".join(collected_content)
-
    def _gen_inner(
-        self, query: str, retriever: BaseRetriever, log_context: LogContext
+        self, query: str, log_context: LogContext
    ) -> Generator[Dict, None, None]:
-        # Reset state for this generation call
-        self.plan = ""
-        self.observations = []
-        retrieved_data = self._retriever_search(retriever, query, log_context)
+        """Execute ReAct reasoning loop with planning, action, and observation cycles"""

-        if self.user_api_key:
-            tools_dict = self._get_tools(self.user_api_key)
-        else:
-            tools_dict = self._get_user_tools(self.user)
+        self._reset_state()
+
+        tools_dict = (
+            self._get_tools(self.user_api_key)
+            if self.user_api_key
+            else self._get_user_tools(self.user)
+        )
        self._prepare_tools(tools_dict)

-        docs_together = "\n".join([doc["text"] for doc in retrieved_data])
-        iterating_reasoning = 0
-        while iterating_reasoning < MAX_ITERATIONS_REASONING:
-            iterating_reasoning += 1
-            # 1. Create Plan
-            logger.info("ReActAgent: Creating plan...")
-            plan_stream = self._create_plan(query, docs_together, log_context)
-            current_plan_parts = []
-            yield {"thought": f"Reasoning... (iteration {iterating_reasoning})\n\n"}
-            for line_chunk in plan_stream:
-                current_plan_parts.append(line_chunk)
-                yield {"thought": line_chunk}
-            self.plan = "".join(current_plan_parts)
-            if self.plan:
-                self.observations.append(
-                    f"Plan: {self.plan} Iteration: {iterating_reasoning}"
-                )
+        for iteration in range(1, MAX_ITERATIONS_REASONING + 1):
+            yield {"thought": f"Reasoning... (iteration {iteration})\n\n"}

-            max_obs_len = 20000
-            obs_str = "\n".join(self.observations)
-            if len(obs_str) > max_obs_len:
-                obs_str = obs_str[:max_obs_len] + "\n...[observations truncated]"
-            execution_prompt_str = (
-                (self.prompt or "")
-                + f"\n\nFollow this plan:\n{self.plan}"
-                + f"\n\nObservations:\n{obs_str}"
-                + f"\n\nIf there is enough data to complete user query '{query}', Respond with 'SATISFIED' only. Otherwise, continue. Dont Menstion 'SATISFIED' in your response if you are not ready. "
-            )
+            yield from self._planning_phase(query, log_context)

-            messages = self._build_messages(execution_prompt_str, query, retrieved_data)
-
-            resp_from_llm_gen = self._llm_gen(messages, log_context)
-
-            initial_llm_thought_content = self._extract_content_from_llm_response(
-                resp_from_llm_gen
-            )
-            if initial_llm_thought_content:
-                self.observations.append(
-                    f"Initial thought/response: {initial_llm_thought_content}"
-                )
-            else:
-                logger.info(
-                    "ReActAgent: Initial LLM response (before handler) had no textual content (might be only tool calls)."
-                )
-            resp_after_handler = self._llm_handler(
-                resp_from_llm_gen, tools_dict, messages, log_context
-            )
-
-            for (
-                tool_call_info
-            ) in (
-                self.tool_calls
-            ):  # Iterate over self.tool_calls populated by _llm_handler
-                observation_string = (
-                    f"Executed Action: Tool '{tool_call_info.get('tool_name', 'N/A')}' "
-                    f"with arguments '{tool_call_info.get('arguments', '{}')}'. Result: '{str(tool_call_info.get('result', ''))[:200]}...'"
-                )
-                self.observations.append(observation_string)
-
-            content_after_handler = self._extract_content_from_llm_response(
-                resp_after_handler
-            )
-            if content_after_handler:
-                self.observations.append(
-                    f"Response after tool execution: {content_after_handler}"
-                )
-            else:
-                logger.info(
-                    "ReActAgent: LLM response after handler had no textual content."
-                )
-
-            if log_context:
-                log_context.stacks.append(
-                    {
-                        "component": "agent_tool_calls",
-                        "data": {"tool_calls": self.tool_calls.copy()},
-                    }
-                )
-
-            yield {"sources": retrieved_data}
-
-            display_tool_calls = []
-            for tc in self.tool_calls:
-                cleaned_tc = tc.copy()
-                if len(str(cleaned_tc.get("result", ""))) > 50:
-                    cleaned_tc["result"] = str(cleaned_tc["result"])[:50] + "..."
-                display_tool_calls.append(cleaned_tc)
-            if display_tool_calls:
-                yield {"tool_calls": display_tool_calls}
-
-            if "SATISFIED" in content_after_handler:
-                logger.info(
-                    "ReActAgent: LLM satisfied with the plan and data. Stopping reasoning."
+            if not self.plan:
+                logger.warning(
+                    f"ReActAgent: No plan generated in iteration {iteration}"
                )
                break
+            self.observations.append(f"Plan (iteration {iteration}): {self.plan}")

-        # 3. Create Final Answer based on all observations
-        final_answer_stream = self._create_final_answer(
-            query, self.observations, log_context
-        )
-        for answer_chunk in final_answer_stream:
-            yield {"answer": answer_chunk}
-        logger.info("ReActAgent: Finished generating final answer.")
+            satisfied = yield from self._execution_phase(query, tools_dict, log_context)

-    def _create_plan(
-        self, query: str, docs_data: str, log_context: LogContext = None
-    ) -> Generator[str, None, None]:
-        plan_prompt_filled = planning_prompt_template.replace("{query}", query)
-        if "{summaries}" in plan_prompt_filled:
-            summaries = docs_data if docs_data else "No documents retrieved."
-            plan_prompt_filled = plan_prompt_filled.replace("{summaries}", summaries)
-        plan_prompt_filled = plan_prompt_filled.replace("{prompt}", self.prompt or "")
-        plan_prompt_filled = plan_prompt_filled.replace(
-            "{observations}", "\n".join(self.observations)
-        )
+            if satisfied:
+                logger.info("ReActAgent: Goal satisfied, stopping reasoning loop")
+                break
+        yield from self._synthesis_phase(query, log_context)

-        messages = [{"role": "user", "content": plan_prompt_filled}]
+    def _reset_state(self):
+        """Reset agent state for new query"""
+        self.plan = ""
+        self.observations = []

-        plan_stream_from_llm = self.llm.gen_stream(
+    def _planning_phase(
+        self, query: str, log_context: LogContext
+    ) -> Generator[Dict, None, None]:
+        """Generate strategic plan for query"""
+        logger.info("ReActAgent: Creating plan...")
+
+        plan_prompt = self._build_planning_prompt(query)
+        messages = [{"role": "user", "content": plan_prompt}]
+
+        plan_stream = self.llm.gen_stream(
            model=self.gpt_model,
            messages=messages,
-            tools=getattr(self, "tools", None),  # Use self.tools
+            tools=self.tools if self.tools else None,
        )
+
        if log_context:
-            data = build_stack_data(self.llm)
-            log_context.stacks.append({"component": "planning_llm", "data": data})
-
-        for chunk in plan_stream_from_llm:
-            content_piece = self._extract_content_from_llm_response(chunk)
-            if content_piece:
-                yield content_piece
-
-    def _create_final_answer(
-        self, query: str, observations: List[str], log_context: LogContext = None
-    ) -> Generator[str, None, None]:
-        observation_string = "\n".join(observations)
-        max_obs_len = 10000
-        if len(observation_string) > max_obs_len:
-            observation_string = (
-                observation_string[:max_obs_len] + "\n...[observations truncated]"
-            )
-            logger.warning(
-                "ReActAgent: Truncated observations for final answer prompt due to length."
+            log_context.stacks.append(
+                {"component": "planning_llm", "data": build_stack_data(self.llm)}
            )
+        plan_parts = []
+        for chunk in plan_stream:
+            content = self._extract_content(chunk)
+            if content:
+                plan_parts.append(content)
+                yield {"thought": content}
+        self.plan = "".join(plan_parts)

-        final_answer_prompt_filled = final_prompt_template.format(
-            query=query, observations=observation_string
+    def _execution_phase(
+        self, query: str, tools_dict: Dict, log_context: LogContext
+    ) -> Generator[bool, None, None]:
+        """Execute plan with tool calls and observations"""
+        execution_prompt = self._build_execution_prompt(query)
+        messages = self._build_messages(execution_prompt, query)
+
+        llm_response = self._llm_gen(messages, log_context)
+        initial_content = self._extract_content(llm_response)
+
+        if initial_content:
+            self.observations.append(f"Initial response: {initial_content}")
+        processed_response = self._llm_handler(
+            llm_response, tools_dict, messages, log_context
        )

-        messages = [{"role": "user", "content": final_answer_prompt_filled}]
+        for tool_call in self.tool_calls:
+            observation = (
+                f"Executed: {tool_call.get('tool_name', 'Unknown')} "
+                f"with args {tool_call.get('arguments', {})}. "
+                f"Result: {str(tool_call.get('result', ''))[:200]}"
+            )
+            self.observations.append(observation)
+        final_content = self._extract_content(processed_response)
+        if final_content:
+            self.observations.append(f"Response after tools: {final_content}")
+        if log_context:
+            log_context.stacks.append(
+                {
+                    "component": "agent_tool_calls",
+                    "data": {"tool_calls": self.tool_calls.copy()},
+                }
+            )
+        yield {"sources": self.retrieved_docs}
+        yield {"tool_calls": self._get_truncated_tool_calls()}

-        # Final answer should synthesize, not call tools.
-        final_answer_stream_from_llm = self.llm.gen_stream(
+        return "SATISFIED" in (final_content or "")
+
+    def _synthesis_phase(
+        self, query: str, log_context: LogContext
+    ) -> Generator[Dict, None, None]:
+        """Synthesize final answer from all observations"""
+        logger.info("ReActAgent: Generating final answer...")
+
+        final_prompt = self._build_final_answer_prompt(query)
+        messages = [{"role": "user", "content": final_prompt}]
+
+        final_stream = self.llm.gen_stream(
            model=self.gpt_model, messages=messages, tools=None
        )
-        if log_context:
-            data = build_stack_data(self.llm)
-            log_context.stacks.append({"component": "final_answer_llm", "data": data})

-        for chunk in final_answer_stream_from_llm:
-            content_piece = self._extract_content_from_llm_response(chunk)
-            if content_piece:
-                yield content_piece
+        if log_context:
+            log_context.stacks.append(
+                {"component": "final_answer_llm", "data": build_stack_data(self.llm)}
+            )
+        for chunk in final_stream:
+            content = self._extract_content(chunk)
+            if content:
+                yield {"answer": content}
+
+    def _build_planning_prompt(self, query: str) -> str:
+        """Build planning phase prompt"""
+        prompt = PLANNING_PROMPT_TEMPLATE.replace("{query}", query)
+        prompt = prompt.replace("{prompt}", self.prompt or "")
+        prompt = prompt.replace("{summaries}", "")
+        prompt = prompt.replace("{observations}", "\n".join(self.observations))
+        return prompt
+
+    def _build_execution_prompt(self, query: str) -> str:
+        """Build execution phase prompt with plan and observations"""
+        observations_str = "\n".join(self.observations)
+
+        if len(observations_str) > 20000:
+            observations_str = observations_str[:20000] + "\n...[truncated]"
+        return (
+            f"{self.prompt or ''}\n\n"
+            f"Follow this plan:\n{self.plan}\n\n"
+            f"Observations:\n{observations_str}\n\n"
+            f"If sufficient data exists to answer '{query}', respond with 'SATISFIED'. "
+            f"Otherwise, continue executing the plan."
+        )
+
+    def _build_final_answer_prompt(self, query: str) -> str:
+        """Build final synthesis prompt"""
+        observations_str = "\n".join(self.observations)
+
+        if len(observations_str) > 10000:
+            observations_str = observations_str[:10000] + "\n...[truncated]"
+            logger.warning("ReActAgent: Observations truncated for final answer")
+        return FINAL_PROMPT_TEMPLATE.format(query=query, observations=observations_str)
+
+    def _extract_content(self, response: Any) -> str:
+        """Extract text content from various LLM response formats"""
+        if not response:
+            return ""
+        collected = []
+
+        if isinstance(response, str):
+            return response
+        if hasattr(response, "message") and hasattr(response.message, "content"):
+            if response.message.content:
+                return response.message.content
+        if hasattr(response, "choices") and response.choices:
+            if hasattr(response.choices[0], "message"):
+                content = response.choices[0].message.content
+                if content:
+                    return content
+        if hasattr(response, "content") and isinstance(response.content, list):
+            if response.content and hasattr(response.content[0], "text"):
+                return response.content[0].text
+        try:
+            for chunk in response:
+                content_piece = ""
+
+                if hasattr(chunk, "choices") and chunk.choices:
+                    if hasattr(chunk.choices[0], "delta"):
+                        delta_content = chunk.choices[0].delta.content
+                        if delta_content:
+                            content_piece = delta_content
+                elif hasattr(chunk, "type") and chunk.type == "content_block_delta":
+                    if hasattr(chunk, "delta") and hasattr(chunk.delta, "text"):
+                        content_piece = chunk.delta.text
+                elif isinstance(chunk, str):
+                    content_piece = chunk
+                if content_piece:
+                    collected.append(content_piece)
+        except (TypeError, AttributeError):
+            logger.debug(
+                f"Response not iterable or unexpected format: {type(response)}"
+            )
+        except Exception as e:
+            logger.error(f"Error extracting content: {e}")
+        return "".join(collected)
--- a/application/api/answer/routes/answer.py
+++ b/application/api/answer/routes/answer.py
@@ -54,6 +54,10 @@ class AnswerResource(Resource, BaseAnswerResource):
                default=True,
                description="Whether to save the conversation",
            ),
+            "passthrough": fields.Raw(
+                required=False,
+                description="Dynamic parameters to inject into prompt template",
+            ),
        },
    )

@@ -69,8 +73,17 @@ class AnswerResource(Resource, BaseAnswerResource):
            processor.initialize()
            if not processor.decoded_token:
                return make_response({"error": "Unauthorized"}, 401)
-            agent = processor.create_agent()
-            retriever = processor.create_retriever()
+
+            docs_together, docs_list = processor.pre_fetch_docs(
+                data.get("question", "")
+            )
+            tools_data = processor.pre_fetch_tools()
+
+            agent = processor.create_agent(
+                docs_together=docs_together,
+                docs=docs_list,
+                tools_data=tools_data,
+            )

            if error := self.check_usage(processor.agent_config):
                return error
@@ -78,7 +91,6 @@ class AnswerResource(Resource, BaseAnswerResource):
            stream = self.complete_stream(
                question=data["question"],
                agent=agent,
-                retriever=retriever,
                conversation_id=processor.conversation_id,
                user_api_key=processor.agent_config.get("user_api_key"),
                decoded_token=processor.decoded_token,
--- a/application/api/answer/routes/base.py
+++ b/application/api/answer/routes/base.py
@@ -3,7 +3,7 @@ import json
 import logging
 from typing import Any, Dict, Generator, List, Optional

-from flask import Response, make_response, jsonify
+from flask import jsonify, make_response, Response
 from flask_restx import Namespace

 from application.api.answer.services.conversation_service import ConversationService
@@ -41,9 +41,7 @@ class BaseAnswerResource:
            return missing_fields
        return None

-    def check_usage(
-            self, agent_config: Dict
-    ) -> Optional[Response]:
+    def check_usage(self, agent_config: Dict) -> Optional[Response]:
        """Check if there is a usage limit and if it is exceeded

        Args:
@@ -51,30 +49,40 @@ class BaseAnswerResource:

        Returns:
            None or Response if either of limits exceeded.
-        
+
        """
        api_key = agent_config.get("user_api_key")
        if not api_key:
            return None
-        
+
        agents_collection = self.db["agents"]
        agent = agents_collection.find_one({"key": api_key})

        if not agent:
            return make_response(
-                jsonify(
-                    {
-                        "success": False,
-                        "message": "Invalid API key."
-                    }
-                ),
-                401
+                jsonify({"success": False, "message": "Invalid API key."}), 401
            )

-        limited_token_mode = agent.get("limited_token_mode", False)
-        limited_request_mode = agent.get("limited_request_mode", False)
-        token_limit = int(agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]))
-        request_limit = int(agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]))
+        limited_token_mode_raw = agent.get("limited_token_mode", False)
+        limited_request_mode_raw = agent.get("limited_request_mode", False)
+
+        limited_token_mode = (
+            limited_token_mode_raw
+            if isinstance(limited_token_mode_raw, bool)
+            else limited_token_mode_raw == "True"
+        )
+        limited_request_mode = (
+            limited_request_mode_raw
+            if isinstance(limited_request_mode_raw, bool)
+            else limited_request_mode_raw == "True"
+        )
+
+        token_limit = int(
+            agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"])
+        )
+        request_limit = int(
+            agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"])
+        )

        token_usage_collection = self.db["token_usage"]

@@ -83,18 +91,20 @@ class BaseAnswerResource:

        match_query = {
            "timestamp": {"$gte": start_date, "$lte": end_date},
-            "api_key": api_key
+            "api_key": api_key,
        }
-        
+
        if limited_token_mode:
            token_pipeline = [
                {"$match": match_query},
                {
                    "$group": {
                        "_id": None,
-                        "total_tokens": {"$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}}
+                        "total_tokens": {
+                            "$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}
+                        },
                    }
-                }
+                },
            ]
            token_result = list(token_usage_collection.aggregate(token_pipeline))
            daily_token_usage = token_result[0]["total_tokens"] if token_result else 0
@@ -108,26 +118,33 @@ class BaseAnswerResource:

        if not limited_token_mode and not limited_request_mode:
            return None
-        elif limited_token_mode and token_limit > daily_token_usage:
-            return None
-        elif limited_request_mode and request_limit > daily_request_usage:
-            return None

-        return make_response(
-            jsonify(
-                {
-                    "success": False,
-                    "message": "Exceeding usage limit, please try again later."
-                }
-            ),
-            429, # too many requests
+        token_exceeded = (
+            limited_token_mode and token_limit > 0 and daily_token_usage >= token_limit
        )
+        request_exceeded = (
+            limited_request_mode
+            and request_limit > 0
+            and daily_request_usage >= request_limit
+        )
+
+        if token_exceeded or request_exceeded:
+            return make_response(
+                jsonify(
+                    {
+                        "success": False,
+                        "message": "Exceeding usage limit, please try again later.",
+                    }
+                ),
+                429,
+            )
+
+        return None

    def complete_stream(
        self,
        question: str,
        agent: Any,
-        retriever: Any,
        conversation_id: Optional[str],
        user_api_key: Optional[str],
        decoded_token: Dict[str, Any],
@@ -156,6 +173,7 @@ class BaseAnswerResource:
            agent_id: ID of agent used
            is_shared_usage: Flag for shared agent usage
            shared_token: Token for shared agent
+            retrieved_docs: Pre-fetched documents for sources (optional)

        Yields:
            Server-sent event strings
@@ -166,7 +184,7 @@ class BaseAnswerResource:
            schema_info = None
            structured_chunks = []

-            for line in agent.gen(query=question, retriever=retriever):
+            for line in agent.gen(query=question):
                if "answer" in line:
                    response_full += str(line["answer"])
                    if line.get("structured"):
@@ -247,7 +265,6 @@ class BaseAnswerResource:
            data = json.dumps(id_data)
            yield f"data: {data}\n\n"

-            retriever_params = retriever.get_params()
            log_data = {
                "action": "stream_answer",
                "level": "info",
@@ -256,7 +273,6 @@ class BaseAnswerResource:
                "question": question,
                "response": response_full,
                "sources": source_log_docs,
-                "retriever_params": retriever_params,
                "attachments": attachment_ids,
                "timestamp": datetime.datetime.now(datetime.timezone.utc),
            }
@@ -264,24 +280,19 @@ class BaseAnswerResource:
                log_data["structured_output"] = True
                if schema_info:
                    log_data["schema"] = schema_info
-  
-            # clean up text fields to be no longer than 10000 characters
+
+            # Clean up text fields to be no longer than 10000 characters
            for key, value in log_data.items():
                if isinstance(value, str) and len(value) > 10000:
                    log_data[key] = value[:10000]
-            
-            self.user_logs_collection.insert_one(log_data)

-            # End of stream
+            self.user_logs_collection.insert_one(log_data)

            data = json.dumps({"type": "end"})
            yield f"data: {data}\n\n"
        except GeneratorExit:
-            # Client aborted the connection
-            logger.info(
-                f"Stream aborted by client for question: {question[:50]}... "
-            )
-            # Save partial response to database before exiting
+            logger.info(f"Stream aborted by client for question: {question[:50]}... ")
+            # Save partial response
            if should_save_conversation and response_full:
                try:
                    if isNoneDoc:
@@ -311,7 +322,9 @@ class BaseAnswerResource:
                        attachment_ids=attachment_ids,
                    )
                except Exception as e:
-                    logger.error(f"Error saving partial response: {str(e)}", exc_info=True)
+                    logger.error(
+                        f"Error saving partial response: {str(e)}", exc_info=True
+                    )
            raise
        except Exception as e:
            logger.error(f"Error in stream: {str(e)}", exc_info=True)
--- a/application/api/answer/routes/stream.py
+++ b/application/api/answer/routes/stream.py
@@ -60,6 +60,10 @@ class StreamResource(Resource, BaseAnswerResource):
            "attachments": fields.List(
                fields.String, required=False, description="List of attachment IDs"
            ),
+            "passthrough": fields.Raw(
+                required=False,
+                description="Dynamic parameters to inject into prompt template",
+            ),
        },
    )

@@ -73,17 +77,20 @@ class StreamResource(Resource, BaseAnswerResource):
        processor = StreamProcessor(data, decoded_token)
        try:
            processor.initialize()
-            agent = processor.create_agent()
-            retriever = processor.create_retriever()
+
+            docs_together, docs_list = processor.pre_fetch_docs(data["question"])
+            tools_data = processor.pre_fetch_tools()
+
+            agent = processor.create_agent(
+                docs_together=docs_together, docs=docs_list, tools_data=tools_data
+            )

            if error := self.check_usage(processor.agent_config):
                return error
-
            return Response(
                self.complete_stream(
                    question=data["question"],
                    agent=agent,
-                    retriever=retriever,
                    conversation_id=processor.conversation_id,
                    user_api_key=processor.agent_config.get("user_api_key"),
                    decoded_token=processor.decoded_token,
--- a/application/api/answer/services/conversation_service.py
+++ b/application/api/answer/services/conversation_service.py
@@ -133,10 +133,9 @@ class ConversationService:

            messages_summary = [
                {
-                    "role": "assistant",
-                    "content": "Summarise following conversation in no more than 3 "
-                    "words, respond ONLY with the summary, use the same "
-                    "language as the user query",
+                    "role": "system",
+                    "content": "You are a helpful assistant that creates concise conversation titles. "
+                    "Summarize conversations in 3 words or less using the same language as the user.",
                },
                {
                    "role": "user",
--- a/application/api/answer/services/prompt_renderer.py
+++ b/application/api/answer/services/prompt_renderer.py
@@ -0,0 +1,97 @@
+import logging
+from typing import Any, Dict, Optional
+
+from application.templates.namespaces import NamespaceManager
+
+from application.templates.template_engine import TemplateEngine, TemplateRenderError
+
+logger = logging.getLogger(__name__)
+
+
+class PromptRenderer:
+    """Service for rendering prompts with dynamic context using namespaces"""
+
+    def __init__(self):
+        self.template_engine = TemplateEngine()
+        self.namespace_manager = NamespaceManager()
+
+    def render_prompt(
+        self,
+        prompt_content: str,
+        user_id: Optional[str] = None,
+        request_id: Optional[str] = None,
+        passthrough_data: Optional[Dict[str, Any]] = None,
+        docs: Optional[list] = None,
+        docs_together: Optional[str] = None,
+        tools_data: Optional[Dict[str, Any]] = None,
+        **kwargs,
+    ) -> str:
+        """
+        Render prompt with full context from all namespaces.
+
+        Args:
+            prompt_content: Raw prompt template string
+            user_id: Current user identifier
+            request_id: Unique request identifier
+            passthrough_data: Parameters from web request
+            docs: RAG retrieved documents
+            docs_together: Concatenated document content
+            tools_data: Pre-fetched tool results organized by tool name
+            **kwargs: Additional parameters for namespace builders
+
+        Returns:
+            Rendered prompt string with all variables substituted
+
+        Raises:
+            TemplateRenderError: If template rendering fails
+        """
+        if not prompt_content:
+            return ""
+
+        uses_template = self._uses_template_syntax(prompt_content)
+
+        if not uses_template:
+            return self._apply_legacy_substitutions(prompt_content, docs_together)
+
+        try:
+            context = self.namespace_manager.build_context(
+                user_id=user_id,
+                request_id=request_id,
+                passthrough_data=passthrough_data,
+                docs=docs,
+                docs_together=docs_together,
+                tools_data=tools_data,
+                **kwargs,
+            )
+
+            return self.template_engine.render(prompt_content, context)
+        except TemplateRenderError:
+            raise
+        except Exception as e:
+            error_msg = f"Prompt rendering failed: {str(e)}"
+            logger.error(error_msg)
+            raise TemplateRenderError(error_msg) from e
+
+    def _uses_template_syntax(self, prompt_content: str) -> bool:
+        """Check if prompt uses Jinja2 template syntax"""
+        return "{{" in prompt_content and "}}" in prompt_content
+
+    def _apply_legacy_substitutions(
+        self, prompt_content: str, docs_together: Optional[str] = None
+    ) -> str:
+        """
+        Apply backward-compatible substitutions for old prompt format.
+
+        Handles legacy {summaries} and {query} placeholders during transition period.
+        """
+        if docs_together:
+            prompt_content = prompt_content.replace("{summaries}", docs_together)
+        return prompt_content
+
+    def validate_template(self, prompt_content: str) -> bool:
+        """Validate prompt template syntax"""
+        return self.template_engine.validate_template(prompt_content)
+
+    def extract_variables(self, prompt_content: str) -> set[str]:
+        """Extract all variable names from prompt template"""
+        return self.template_engine.extract_variables(prompt_content)
--- a/application/api/answer/services/stream_processor.py
+++ b/application/api/answer/services/stream_processor.py
@@ -3,7 +3,7 @@ import json
 import logging
 import os
 from pathlib import Path
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Set

 from bson.dbref import DBRef

@@ -11,10 +11,15 @@ from bson.objectid import ObjectId

 from application.agents.agent_creator import AgentCreator
 from application.api.answer.services.conversation_service import ConversationService
+from application.api.answer.services.prompt_renderer import PromptRenderer
 from application.core.mongo_db import MongoDB
 from application.core.settings import settings
 from application.retriever.retriever_creator import RetrieverCreator
-from application.utils import get_gpt_model, limit_chat_history
+from application.utils import (
+    calculate_doc_token_budget,
+    get_gpt_model,
+    limit_chat_history,
+)

 logger = logging.getLogger(__name__)

@@ -73,12 +78,16 @@ class StreamProcessor:
        self.all_sources = []
        self.attachments = []
        self.history = []
+        self.retrieved_docs = []
        self.agent_config = {}
        self.retriever_config = {}
        self.is_shared_usage = False
        self.shared_token = None
        self.gpt_model = get_gpt_model()
        self.conversation_service = ConversationService()
+        self.prompt_renderer = PromptRenderer()
+        self._prompt_content: Optional[str] = None
+        self._required_tool_actions: Optional[Dict[str, Set[Optional[str]]]] = None

    def initialize(self):
        """Initialize all required components for processing"""
@@ -311,19 +320,312 @@ class StreamProcessor:
            )

    def _configure_retriever(self):
-        """Configure the retriever based on request data"""
+        history_token_limit = int(self.data.get("token_limit", 2000))
+        doc_token_limit = calculate_doc_token_budget(
+            gpt_model=self.gpt_model, history_token_limit=history_token_limit
+        )
+
        self.retriever_config = {
            "retriever_name": self.data.get("retriever", "classic"),
            "chunks": int(self.data.get("chunks", 2)),
-            "token_limit": self.data.get("token_limit", settings.DEFAULT_MAX_HISTORY),
+            "doc_token_limit": doc_token_limit,
+            "history_token_limit": history_token_limit,
        }

        api_key = self.data.get("api_key") or self.agent_key
        if not api_key and "isNoneDoc" in self.data and self.data["isNoneDoc"]:
            self.retriever_config["chunks"] = 0

-    def create_agent(self):
-        """Create and return the configured agent"""
+    def create_retriever(self):
+        return RetrieverCreator.create_retriever(
+            self.retriever_config["retriever_name"],
+            source=self.source,
+            chat_history=self.history,
+            prompt=get_prompt(self.agent_config["prompt_id"], self.prompts_collection),
+            chunks=self.retriever_config["chunks"],
+            doc_token_limit=self.retriever_config.get("doc_token_limit", 50000),
+            gpt_model=self.gpt_model,
+            user_api_key=self.agent_config["user_api_key"],
+            decoded_token=self.decoded_token,
+        )
+
+    def pre_fetch_docs(self, question: str) -> tuple[Optional[str], Optional[list]]:
+        """Pre-fetch documents for template rendering before agent creation"""
+        if self.data.get("isNoneDoc", False):
+            logger.info("Pre-fetch skipped: isNoneDoc=True")
+            return None, None
+        try:
+            retriever = self.create_retriever()
+            logger.info(
+                f"Pre-fetching docs with chunks={retriever.chunks}, doc_token_limit={retriever.doc_token_limit}"
+            )
+            docs = retriever.search(question)
+            logger.info(f"Pre-fetch retrieved {len(docs) if docs else 0} documents")
+
+            if not docs:
+                logger.info("Pre-fetch: No documents returned from search")
+                return None, None
+            self.retrieved_docs = docs
+
+            docs_with_filenames = []
+            for doc in docs:
+                filename = doc.get("filename") or doc.get("title") or doc.get("source")
+                if filename:
+                    chunk_header = str(filename)
+                    docs_with_filenames.append(f"{chunk_header}\n{doc['text']}")
+                else:
+                    docs_with_filenames.append(doc["text"])
+            docs_together = "\n\n".join(docs_with_filenames)
+
+            logger.info(f"Pre-fetch docs_together size: {len(docs_together)} chars")
+
+            return docs_together, docs
+        except Exception as e:
+            logger.error(f"Failed to pre-fetch docs: {str(e)}", exc_info=True)
+            return None, None
+
+    def pre_fetch_tools(self) -> Optional[Dict[str, Any]]:
+        """Pre-fetch tool data for template rendering before agent creation
+
+        Can be controlled via:
+        1. Global setting: ENABLE_TOOL_PREFETCH in .env
+        2. Per-request: disable_tool_prefetch in request data
+        """
+        if not settings.ENABLE_TOOL_PREFETCH:
+            logger.info(
+                "Tool pre-fetching disabled globally via ENABLE_TOOL_PREFETCH setting"
+            )
+            return None
+
+        if self.data.get("disable_tool_prefetch", False):
+            logger.info("Tool pre-fetching disabled for this request")
+            return None
+
+        required_tool_actions = self._get_required_tool_actions()
+        filtering_enabled = required_tool_actions is not None
+
+        try:
+            user_tools_collection = self.db["user_tools"]
+            user_id = self.initial_user_id or "local"
+
+            user_tools = list(
+                user_tools_collection.find({"user": user_id, "status": True})
+            )
+
+            if not user_tools:
+                return None
+
+            tools_data = {}
+
+            for tool_doc in user_tools:
+                tool_name = tool_doc.get("name")
+                tool_id = str(tool_doc.get("_id"))
+
+                if filtering_enabled:
+                    required_actions_by_name = required_tool_actions.get(
+                        tool_name, set()
+                    )
+                    required_actions_by_id = required_tool_actions.get(tool_id, set())
+
+                    required_actions = required_actions_by_name | required_actions_by_id
+
+                    if not required_actions:
+                        continue
+                else:
+                    required_actions = None
+
+                tool_data = self._fetch_tool_data(tool_doc, required_actions)
+                if tool_data:
+                    tools_data[tool_name] = tool_data
+                    tools_data[tool_id] = tool_data
+
+            return tools_data if tools_data else None
+        except Exception as e:
+            logger.warning(f"Failed to pre-fetch tools: {type(e).__name__}")
+            return None
+
+    def _fetch_tool_data(
+        self,
+        tool_doc: Dict[str, Any],
+        required_actions: Optional[Set[Optional[str]]],
+    ) -> Optional[Dict[str, Any]]:
+        """Fetch and execute tool actions with saved parameters"""
+        try:
+            from application.agents.tools.tool_manager import ToolManager
+
+            tool_name = tool_doc.get("name")
+            tool_config = tool_doc.get("config", {}).copy()
+            tool_config["tool_id"] = str(tool_doc["_id"])
+
+            tool_manager = ToolManager(config={tool_name: tool_config})
+            user_id = self.initial_user_id or "local"
+            tool = tool_manager.load_tool(tool_name, tool_config, user_id=user_id)
+
+            if not tool:
+                logger.debug(f"Tool '{tool_name}' failed to load")
+                return None
+
+            tool_actions = tool.get_actions_metadata()
+            if not tool_actions:
+                logger.debug(f"Tool '{tool_name}' has no actions")
+                return None
+
+            saved_actions = tool_doc.get("actions", [])
+
+            include_all_actions = required_actions is None or (
+                required_actions and None in required_actions
+            )
+            allowed_actions: Set[str] = (
+                {action for action in required_actions if isinstance(action, str)}
+                if required_actions
+                else set()
+            )
+
+            action_results = {}
+            for action_meta in tool_actions:
+                action_name = action_meta.get("name")
+                if action_name is None:
+                    continue
+                if (
+                    not include_all_actions
+                    and allowed_actions
+                    and action_name not in allowed_actions
+                ):
+                    continue
+
+                try:
+                    saved_action = None
+                    for sa in saved_actions:
+                        if sa.get("name") == action_name:
+                            saved_action = sa
+                            break
+
+                    action_params = action_meta.get("parameters", {})
+                    properties = action_params.get("properties", {})
+
+                    kwargs = {}
+                    for param_name, param_spec in properties.items():
+                        if saved_action:
+                            saved_props = saved_action.get("parameters", {}).get(
+                                "properties", {}
+                            )
+                            if param_name in saved_props:
+                                param_value = saved_props[param_name].get("value")
+                                if param_value is not None:
+                                    kwargs[param_name] = param_value
+                                    continue
+
+                        if param_name in tool_config:
+                            kwargs[param_name] = tool_config[param_name]
+                        elif "default" in param_spec:
+                            kwargs[param_name] = param_spec["default"]
+
+                    result = tool.execute_action(action_name, **kwargs)
+                    action_results[action_name] = result
+                except Exception as e:
+                    logger.debug(
+                        f"Action '{action_name}' execution failed: {type(e).__name__}"
+                    )
+                    continue
+
+            return action_results if action_results else None
+
+        except Exception as e:
+            logger.debug(f"Tool pre-fetch failed for '{tool_name}': {type(e).__name__}")
+            return None
+
+    def _get_prompt_content(self) -> Optional[str]:
+        """Retrieve and cache the raw prompt content for the current agent configuration."""
+        if self._prompt_content is not None:
+            return self._prompt_content
+        prompt_id = (
+            self.agent_config.get("prompt_id")
+            if isinstance(self.agent_config, dict)
+            else None
+        )
+        if not prompt_id:
+            return None
+        try:
+            self._prompt_content = get_prompt(prompt_id, self.prompts_collection)
+        except ValueError as e:
+            logger.debug(f"Invalid prompt ID '{prompt_id}': {str(e)}")
+            self._prompt_content = None
+        except Exception as e:
+            logger.debug(f"Failed to fetch prompt '{prompt_id}': {type(e).__name__}")
+            self._prompt_content = None
+        return self._prompt_content
+
+    def _get_required_tool_actions(self) -> Optional[Dict[str, Set[Optional[str]]]]:
+        """Determine which tool actions are referenced in the prompt template"""
+        if self._required_tool_actions is not None:
+            return self._required_tool_actions
+
+        prompt_content = self._get_prompt_content()
+        if prompt_content is None:
+            return None
+
+        if "{{" not in prompt_content or "}}" not in prompt_content:
+            self._required_tool_actions = {}
+            return self._required_tool_actions
+
+        try:
+            from application.templates.template_engine import TemplateEngine
+
+            template_engine = TemplateEngine()
+            usages = template_engine.extract_tool_usages(prompt_content)
+            self._required_tool_actions = usages
+            return self._required_tool_actions
+        except Exception as e:
+            logger.debug(f"Failed to extract tool usages: {type(e).__name__}")
+            self._required_tool_actions = {}
+            return self._required_tool_actions
+
+    def _fetch_memory_tool_data(
+        self, tool_doc: Dict[str, Any]
+    ) -> Optional[Dict[str, Any]]:
+        """Fetch memory tool data for pre-injection into prompt"""
+        try:
+            tool_config = tool_doc.get("config", {}).copy()
+            tool_config["tool_id"] = str(tool_doc["_id"])
+
+            from application.agents.tools.memory import MemoryTool
+
+            memory_tool = MemoryTool(tool_config, self.initial_user_id)
+
+            root_view = memory_tool.execute_action("view", path="/")
+
+            if "Error:" in root_view or not root_view.strip():
+                return None
+
+            return {"root": root_view, "available": True}
+        except Exception as e:
+            logger.warning(f"Failed to fetch memory tool data: {str(e)}")
+            return None
+
+    def create_agent(
+        self,
+        docs_together: Optional[str] = None,
+        docs: Optional[list] = None,
+        tools_data: Optional[Dict[str, Any]] = None,
+    ):
+        """Create and return the configured agent with rendered prompt"""
+        raw_prompt = self._get_prompt_content()
+        if raw_prompt is None:
+            raw_prompt = get_prompt(
+                self.agent_config["prompt_id"], self.prompts_collection
+            )
+            self._prompt_content = raw_prompt
+
+        rendered_prompt = self.prompt_renderer.render_prompt(
+            prompt_content=raw_prompt,
+            user_id=self.initial_user_id,
+            request_id=self.data.get("request_id"),
+            passthrough_data=self.data.get("passthrough"),
+            docs=docs,
+            docs_together=docs_together,
+            tools_data=tools_data,
+        )
+
        return AgentCreator.create_agent(
            self.agent_config["agent_type"],
            endpoint="stream",
@@ -331,23 +633,10 @@ class StreamProcessor:
            gpt_model=self.gpt_model,
            api_key=settings.API_KEY,
            user_api_key=self.agent_config["user_api_key"],
-            prompt=get_prompt(self.agent_config["prompt_id"], self.prompts_collection),
+            prompt=rendered_prompt,
            chat_history=self.history,
+            retrieved_docs=self.retrieved_docs,
            decoded_token=self.decoded_token,
            attachments=self.attachments,
            json_schema=self.agent_config.get("json_schema"),
        )
-
-    def create_retriever(self):
-        """Create and return the configured retriever"""
-        return RetrieverCreator.create_retriever(
-            self.retriever_config["retriever_name"],
-            source=self.source,
-            chat_history=self.history,
-            prompt=get_prompt(self.agent_config["prompt_id"], self.prompts_collection),
-            chunks=self.retriever_config["chunks"],
-            token_limit=self.retriever_config["token_limit"],
-            gpt_model=self.gpt_model,
-            user_api_key=self.agent_config["user_api_key"],
-            decoded_token=self.decoded_token,
-        )
--- a/application/api/user/agents/routes.py
+++ b/application/api/user/agents/routes.py
@@ -10,7 +10,6 @@ from flask import current_app, jsonify, make_response, request
 from flask_restx import fields, Namespace, Resource

 from application.api import api
-from application.core.settings import settings
 from application.api.user.base import (
    agents_collection,
    db,
@@ -20,6 +19,7 @@ from application.api.user.base import (
    storage,
    users_collection,
 )
+from application.core.settings import settings
 from application.utils import (
    check_required_fields,
    generate_image_url,
@@ -76,9 +76,13 @@ class GetAgent(Resource):
                "status": agent.get("status", ""),
                "json_schema": agent.get("json_schema"),
                "limited_token_mode": agent.get("limited_token_mode", False),
-                "token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
+                "token_limit": agent.get(
+                    "token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]
+                ),
                "limited_request_mode": agent.get("limited_request_mode", False),
-                "request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
+                "request_limit": agent.get(
+                    "request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]
+                ),
                "created_at": agent.get("createdAt", ""),
                "updated_at": agent.get("updatedAt", ""),
                "last_used_at": agent.get("lastUsedAt", ""),
@@ -149,9 +153,13 @@ class GetAgents(Resource):
                    "status": agent.get("status", ""),
                    "json_schema": agent.get("json_schema"),
                    "limited_token_mode": agent.get("limited_token_mode", False),
-                    "token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
+                    "token_limit": agent.get(
+                        "token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]
+                    ),
                    "limited_request_mode": agent.get("limited_request_mode", False),
-                    "request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
+                    "request_limit": agent.get(
+                        "request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]
+                    ),
                    "created_at": agent.get("createdAt", ""),
                    "updated_at": agent.get("updatedAt", ""),
                    "last_used_at": agent.get("lastUsedAt", ""),
@@ -209,21 +217,19 @@ class CreateAgent(Resource):
                description="JSON schema for enforcing structured output format",
            ),
            "limited_token_mode": fields.Boolean(
-                required=False,
-                description="Whether the agent is in limited token mode"
+                required=False, description="Whether the agent is in limited token mode"
            ),
            "token_limit": fields.Integer(
-                required=False,
-                description="Token limit for the agent in limited mode"
+                required=False, description="Token limit for the agent in limited mode"
            ),
            "limited_request_mode": fields.Boolean(
                required=False,
-                description="Whether the agent is in limited request mode"
+                description="Whether the agent is in limited request mode",
            ),
            "request_limit": fields.Integer(
                required=False,
-                description="Request limit for the agent in limited mode"
-            )
+                description="Request limit for the agent in limited mode",
+            ),
        },
    )

@@ -369,10 +375,26 @@ class CreateAgent(Resource):
                "agent_type": data.get("agent_type", ""),
                "status": data.get("status"),
                "json_schema": data.get("json_schema"),
-                "limited_token_mode": data.get("limited_token_mode", False),
-                "token_limit": data.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
-                "limited_request_mode": data.get("limited_request_mode", False),
-                "request_limit": data.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
+                "limited_token_mode": (
+                    data.get("limited_token_mode") == "True"
+                    if isinstance(data.get("limited_token_mode"), str)
+                    else bool(data.get("limited_token_mode", False))
+                ),
+                "token_limit": int(
+                    data.get(
+                        "token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]
+                    )
+                ),
+                "limited_request_mode": (
+                    data.get("limited_request_mode") == "True"
+                    if isinstance(data.get("limited_request_mode"), str)
+                    else bool(data.get("limited_request_mode", False))
+                ),
+                "request_limit": int(
+                    data.get(
+                        "request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]
+                    )
+                ),
                "createdAt": datetime.datetime.now(datetime.timezone.utc),
                "updatedAt": datetime.datetime.now(datetime.timezone.utc),
                "lastUsedAt": None,
@@ -429,21 +451,19 @@ class UpdateAgent(Resource):
                description="JSON schema for enforcing structured output format",
            ),
            "limited_token_mode": fields.Boolean(
-                required=False,
-                description="Whether the agent is in limited token mode"
+                required=False, description="Whether the agent is in limited token mode"
            ),
            "token_limit": fields.Integer(
-                required=False,
-                description="Token limit for the agent in limited mode"
+                required=False, description="Token limit for the agent in limited mode"
            ),
            "limited_request_mode": fields.Boolean(
                require=False,
-                description="Whether the agent is in limited request mode"
+                description="Whether the agent is in limited request mode",
            ),
            "request_limit": fields.Integer(
                required=False,
-                description="Request limit for the agent in limited mode"
-            )
+                description="Request limit for the agent in limited mode",
+            ),
        },
    )

@@ -534,7 +554,7 @@ class UpdateAgent(Resource):
            "limited_token_mode",
            "token_limit",
            "limited_request_mode",
-            "request_limit"
+            "request_limit",
        ]

        for field in allowed_fields:
@@ -652,8 +672,15 @@ class UpdateAgent(Resource):
                else:
                    update_fields[field] = None
            elif field == "limited_token_mode":
-                is_mode_enabled = data.get("limited_token_mode", False)
-                if is_mode_enabled and data.get("token_limit") is None:
+                raw_value = data.get("limited_token_mode", False)
+                bool_value = (
+                    raw_value == "True"
+                    if isinstance(raw_value, str)
+                    else bool(raw_value)
+                )
+                update_fields[field] = bool_value
+
+                if bool_value and data.get("token_limit") is None:
                    return make_response(
                        jsonify(
                            {
@@ -664,8 +691,15 @@ class UpdateAgent(Resource):
                        400,
                    )
            elif field == "limited_request_mode":
-                is_mode_enabled = data.get("limited_request_mode", False)
-                if is_mode_enabled and data.get("request_limit") is None:
+                raw_value = data.get("limited_request_mode", False)
+                bool_value = (
+                    raw_value == "True"
+                    if isinstance(raw_value, str)
+                    else bool(raw_value)
+                )
+                update_fields[field] = bool_value
+
+                if bool_value and data.get("request_limit") is None:
                    return make_response(
                        jsonify(
                            {
@@ -677,7 +711,11 @@ class UpdateAgent(Resource):
                    )
            elif field == "token_limit":
                token_limit = data.get("token_limit")
-                if token_limit is not None and not data.get("limited_token_mode"):
+                # Convert to int and store
+                update_fields[field] = int(token_limit) if token_limit else 0
+
+                # Validate consistency with mode
+                if update_fields[field] > 0 and not data.get("limited_token_mode"):
                    return make_response(
                        jsonify(
                            {
@@ -689,7 +727,9 @@ class UpdateAgent(Resource):
                    )
            elif field == "request_limit":
                request_limit = data.get("request_limit")
-                if request_limit is not None and not data.get("limited_request_mode"):
+                update_fields[field] = int(request_limit) if request_limit else 0
+
+                if update_fields[field] > 0 and not data.get("limited_request_mode"):
                    return make_response(
                        jsonify(
                            {
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -23,10 +23,18 @@ class Settings(BaseSettings):
    LLM_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
    DEFAULT_MAX_HISTORY: int = 150
    LLM_TOKEN_LIMITS: dict = {
+        "gpt-4o": 128000,
        "gpt-4o-mini": 128000,
+        "gpt-4": 8192,
        "gpt-3.5-turbo": 4096,
-        "claude-2": 1e5,
-        "gemini-2.5-flash": 1e6,
+        "claude-2": int(1e5),
+        "gemini-2.5-flash": int(1e6),
+    }
+    DEFAULT_LLM_TOKEN_LIMIT: int = 128000
+    RESERVED_TOKENS: dict = {
+        "system_prompt": 500,
+        "current_query": 500,
+        "safety_buffer": 1000,
    }
    DEFAULT_AGENT_LIMITS: dict = {
        "token_limit": 50000,
@@ -133,5 +141,8 @@ class Settings(BaseSettings):
    TTS_PROVIDER: str = "google_tts" # google_tts or elevenlabs
    ELEVENLABS_API_KEY: Optional[str] = None

+    # Tool pre-fetch settings
+    ENABLE_TOOL_PREFETCH: bool = True
+
 path = Path(__file__).parent.parent.absolute()
 settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
--- a/application/llm/base.py
+++ b/application/llm/base.py
@@ -44,6 +44,12 @@ class BaseLLM(ABC):
                )
        return self._fallback_llm

+    @staticmethod
+    def _remove_null_values(args_dict):
+        if not isinstance(args_dict, dict):
+            return args_dict
+        return {k: v for k, v in args_dict.items() if v is not None}
+
    def _execute_with_fallback(
        self, method_name: str, decorators: list, *args, **kwargs
    ):
--- a/application/llm/docsgpt_provider.py
+++ b/application/llm/docsgpt_provider.py
@@ -33,14 +33,15 @@ class DocsGPTAPILLM(BaseLLM):
                                {"role": role, "content": item["text"]}
                            )
                        elif "function_call" in item:
+                            cleaned_args = self._remove_null_values(
+                                item["function_call"]["args"]
+                            )
                            tool_call = {
                                "id": item["function_call"]["call_id"],
                                "type": "function",
                                "function": {
                                    "name": item["function_call"]["name"],
-                                    "arguments": json.dumps(
-                                        item["function_call"]["args"]
-                                    ),
+                                    "arguments": json.dumps(cleaned_args),
                                },
                            }
                            cleaned_messages.append(
--- a/application/llm/google_ai.py
+++ b/application/llm/google_ai.py
@@ -163,10 +163,14 @@ class GoogleLLM(BaseLLM):
                        if "text" in item:
                            parts.append(types.Part.from_text(text=item["text"]))
                        elif "function_call" in item:
+                            # Remove null values from args to avoid API errors
+                            cleaned_args = self._remove_null_values(
+                                item["function_call"]["args"]
+                            )
                            parts.append(
                                types.Part.from_function_call(
                                    name=item["function_call"]["name"],
-                                    args=item["function_call"]["args"],
+                                    args=cleaned_args,
                                )
                            )
                        elif "function_response" in item:
@@ -386,7 +390,7 @@ class GoogleLLM(BaseLLM):
                elif hasattr(chunk, "text"):
                    yield chunk.text
        finally:
-            if hasattr(response, 'close'):
+            if hasattr(response, "close"):
                response.close()

    def _supports_tools(self):
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -44,14 +44,15 @@ class OpenAILLM(BaseLLM):
                                {"role": role, "content": item["text"]}
                            )
                        elif "function_call" in item:
+                            cleaned_args = self._remove_null_values(
+                                item["function_call"]["args"]
+                            )
                            tool_call = {
                                "id": item["function_call"]["call_id"],
                                "type": "function",
                                "function": {
                                    "name": item["function_call"]["name"],
-                                    "arguments": json.dumps(
-                                        item["function_call"]["args"]
-                                    ),
+                                    "arguments": json.dumps(cleaned_args),
                                },
                            }
                            cleaned_messages.append(
@@ -181,7 +182,7 @@ class OpenAILLM(BaseLLM):
                elif len(line.choices) > 0:
                    yield line.choices[0]
        finally:
-            if hasattr(response, 'close'):
+            if hasattr(response, "close"):
                response.close()

    def _supports_tools(self):
--- a/application/retriever/base.py
+++ b/application/retriever/base.py
@@ -8,7 +8,3 @@ class BaseRetriever(ABC):
    @abstractmethod
    def search(self, *args, **kwargs):
        pass
-
-    @abstractmethod
-    def get_params(self):
-        pass
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -4,7 +4,7 @@ import os
 from application.core.settings import settings
 from application.llm.llm_creator import LLMCreator
 from application.retriever.base import BaseRetriever
-
+from application.utils import num_tokens_from_string
 from application.vectorstore.vector_creator import VectorCreator


@@ -15,14 +15,13 @@ class ClassicRAG(BaseRetriever):
        chat_history=None,
        prompt="",
        chunks=2,
-        token_limit=150,
+        doc_token_limit=50000,
        gpt_model="docsgpt",
        user_api_key=None,
        llm_name=settings.LLM_PROVIDER,
        api_key=settings.API_KEY,
        decoded_token=None,
    ):
-        """Initialize ClassicRAG retriever with vectorstore sources and LLM configuration"""
        self.original_question = source.get("question", "")
        self.chat_history = chat_history if chat_history is not None else []
        self.prompt = prompt
@@ -42,16 +41,7 @@ class ClassicRAG(BaseRetriever):
            f"sources={'active_docs' in source and source['active_docs'] is not None}"
        )
        self.gpt_model = gpt_model
-        self.token_limit = (
-            token_limit
-            if token_limit
-            < settings.LLM_TOKEN_LIMITS.get(
-                self.gpt_model, settings.DEFAULT_MAX_HISTORY
-            )
-            else settings.LLM_TOKEN_LIMITS.get(
-                self.gpt_model, settings.DEFAULT_MAX_HISTORY
-            )
-        )
+        self.doc_token_limit = doc_token_limit
        self.user_api_key = user_api_key
        self.llm_name = llm_name
        self.api_key = api_key
@@ -118,21 +108,17 @@ class ClassicRAG(BaseRetriever):
            return self.original_question

    def _get_data(self):
-        """Retrieve relevant documents from configured vectorstores"""
        if self.chunks == 0 or not self.vectorstores:
            logging.info(
                f"ClassicRAG._get_data: Skipping retrieval - chunks={self.chunks}, "
                f"vectorstores_count={len(self.vectorstores) if self.vectorstores else 0}"
            )
            return []
+
        all_docs = []
        chunks_per_source = max(1, self.chunks // len(self.vectorstores))
-
-        logging.info(
-            f"ClassicRAG._get_data: Starting retrieval with chunks={self.chunks}, "
-            f"vectorstores={self.vectorstores}, chunks_per_source={chunks_per_source}, "
-            f"query='{self.question[:50]}...'"
-        )
+        token_budget = max(int(self.doc_token_limit * 0.9), 100)
+        cumulative_tokens = 0

        for vectorstore_id in self.vectorstores:
            if vectorstore_id:
@@ -140,15 +126,21 @@ class ClassicRAG(BaseRetriever):
                    docsearch = VectorCreator.create_vectorstore(
                        settings.VECTOR_STORE, vectorstore_id, settings.EMBEDDINGS_KEY
                    )
-                    docs_temp = docsearch.search(self.question, k=chunks_per_source)
+                    docs_temp = docsearch.search(
+                        self.question, k=max(chunks_per_source * 2, 20)
+                    )

                    for doc in docs_temp:
+                        if cumulative_tokens >= token_budget:
+                            break
+
                        if hasattr(doc, "page_content") and hasattr(doc, "metadata"):
                            page_content = doc.page_content
                            metadata = doc.metadata
                        else:
                            page_content = doc.get("text", doc.get("page_content", ""))
                            metadata = doc.get("metadata", {})
+
                        title = metadata.get(
                            "title", metadata.get("post_title", page_content)
                        )
@@ -168,23 +160,35 @@ class ClassicRAG(BaseRetriever):
                        if not filename:
                            filename = title
                        source_path = metadata.get("source") or vectorstore_id
-                        all_docs.append(
-                            {
-                                "title": title,
-                                "text": page_content,
-                                "source": source_path,
-                                "filename": filename,
-                            }
-                        )
+
+                        doc_text_with_header = f"{filename}\n{page_content}"
+                        doc_tokens = num_tokens_from_string(doc_text_with_header)
+
+                        if cumulative_tokens + doc_tokens < token_budget:
+                            all_docs.append(
+                                {
+                                    "title": title,
+                                    "text": page_content,
+                                    "source": source_path,
+                                    "filename": filename,
+                                }
+                            )
+                            cumulative_tokens += doc_tokens
+
+                    if cumulative_tokens >= token_budget:
+                        break
+
                except Exception as e:
                    logging.error(
                        f"Error searching vectorstore {vectorstore_id}: {e}",
                        exc_info=True,
                    )
                    continue
+
        logging.info(
            f"ClassicRAG._get_data: Retrieval complete - retrieved {len(all_docs)} documents "
-            f"(requested chunks={self.chunks}, chunks_per_source={chunks_per_source})"
+            f"(requested chunks={self.chunks}, chunks_per_source={chunks_per_source}, "
+            f"cumulative_tokens={cumulative_tokens}/{token_budget})"
        )
        return all_docs

@@ -194,15 +198,3 @@ class ClassicRAG(BaseRetriever):
            self.original_question = query
            self.question = self._rephrase_query()
        return self._get_data()
-
-    def get_params(self):
-        """Return current retriever configuration parameters"""
-        return {
-            "question": self.original_question,
-            "rephrased_question": self.question,
-            "sources": self.vectorstores,
-            "chunks": self.chunks,
-            "token_limit": self.token_limit,
-            "gpt_model": self.gpt_model,
-            "user_api_key": self.user_api_key,
-        }
--- a/application/templates/init.py
+++ b/application/templates/init.py
--- a/application/templates/namespaces.py
+++ b/application/templates/namespaces.py
@@ -0,0 +1,190 @@
+import logging
+import uuid
+from abc import ABC, abstractmethod
+from datetime import datetime, timezone
+from typing import Any, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+
+class NamespaceBuilder(ABC):
+    """Base class for building template context namespaces"""
+
+    @abstractmethod
+    def build(self, **kwargs) -> Dict[str, Any]:
+        """Build namespace context dictionary"""
+        pass
+
+    @property
+    @abstractmethod
+    def namespace_name(self) -> str:
+        """Name of this namespace for template access"""
+        pass
+
+
+class SystemNamespace(NamespaceBuilder):
+    """System metadata namespace: {{ system.* }}"""
+
+    @property
+    def namespace_name(self) -> str:
+        return "system"
+
+    def build(
+        self, request_id: Optional[str] = None, user_id: Optional[str] = None, **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Build system context with metadata.
+
+        Args:
+            request_id: Unique request identifier
+            user_id: Current user identifier
+
+        Returns:
+            Dictionary with system variables
+        """
+        now = datetime.now(timezone.utc)
+
+        return {
+            "date": now.strftime("%Y-%m-%d"),
+            "time": now.strftime("%H:%M:%S"),
+            "timestamp": now.isoformat(),
+            "request_id": request_id or str(uuid.uuid4()),
+            "user_id": user_id,
+        }
+
+
+class PassthroughNamespace(NamespaceBuilder):
+    """Request parameters namespace: {{ passthrough.* }}"""
+
+    @property
+    def namespace_name(self) -> str:
+        return "passthrough"
+
+    def build(
+        self, passthrough_data: Optional[Dict[str, Any]] = None, **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Build passthrough context from request parameters.
+
+        Args:
+            passthrough_data: Dictionary of parameters from web request
+
+        Returns:
+            Dictionary with passthrough variables
+        """
+        if not passthrough_data:
+            return {}
+        safe_data = {}
+        for key, value in passthrough_data.items():
+            if isinstance(value, (str, int, float, bool, type(None))):
+                safe_data[key] = value
+            else:
+                logger.warning(
+                    f"Skipping non-serializable passthrough value for key '{key}': {type(value)}"
+                )
+        return safe_data
+
+
+class SourceNamespace(NamespaceBuilder):
+    """RAG source documents namespace: {{ source.* }}"""
+
+    @property
+    def namespace_name(self) -> str:
+        return "source"
+
+    def build(
+        self, docs: Optional[list] = None, docs_together: Optional[str] = None, **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Build source context from RAG retrieval results.
+
+        Args:
+            docs: List of retrieved documents
+            docs_together: Concatenated document content (for backward compatibility)
+
+        Returns:
+            Dictionary with source variables
+        """
+        context = {}
+
+        if docs:
+            context["documents"] = docs
+            context["count"] = len(docs)
+        if docs_together:
+            context["docs_together"] = docs_together  # Add docs_together for custom templates
+            context["content"] = docs_together
+            context["summaries"] = docs_together
+        return context
+
+
+class ToolsNamespace(NamespaceBuilder):
+    """Pre-executed tools namespace: {{ tools.* }}"""
+
+    @property
+    def namespace_name(self) -> str:
+        return "tools"
+
+    def build(
+        self, tools_data: Optional[Dict[str, Any]] = None, **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Build tools context with pre-executed tool results.
+
+        Args:
+            tools_data: Dictionary of pre-fetched tool results organized by tool name
+                       e.g., {"memory": {"notes": "content", "tasks": "list"}}
+
+        Returns:
+            Dictionary with tool results organized by tool name
+        """
+        if not tools_data:
+            return {}
+
+        safe_data = {}
+        for tool_name, tool_result in tools_data.items():
+            if isinstance(tool_result, (str, dict, list, int, float, bool, type(None))):
+                safe_data[tool_name] = tool_result
+            else:
+                logger.warning(
+                    f"Skipping non-serializable tool result for '{tool_name}': {type(tool_result)}"
+                )
+        return safe_data
+
+
+class NamespaceManager:
+    """Manages all namespace builders and context assembly"""
+
+    def __init__(self):
+        self._builders = {
+            "system": SystemNamespace(),
+            "passthrough": PassthroughNamespace(),
+            "source": SourceNamespace(),
+            "tools": ToolsNamespace(),
+        }
+
+    def build_context(self, **kwargs) -> Dict[str, Any]:
+        """
+        Build complete template context from all namespaces.
+
+        Args:
+            **kwargs: Parameters to pass to namespace builders
+
+        Returns:
+            Complete context dictionary for template rendering
+        """
+        context = {}
+
+        for namespace_name, builder in self._builders.items():
+            try:
+                namespace_context = builder.build(**kwargs)
+                # Always include namespace, even if empty, to prevent undefined errors
+                context[namespace_name] = namespace_context if namespace_context else {}
+            except Exception as e:
+                logger.error(f"Failed to build {namespace_name} namespace: {str(e)}")
+                # Include empty namespace on error to prevent template failures
+                context[namespace_name] = {}
+        return context
+
+    def get_builder(self, namespace_name: str) -> Optional[NamespaceBuilder]:
+        """Get specific namespace builder"""
+        return self._builders.get(namespace_name)
--- a/application/templates/template_engine.py
+++ b/application/templates/template_engine.py
@@ -0,0 +1,161 @@
+import logging
+from typing import Any, Dict, List, Optional, Set
+
+from jinja2 import (
+    ChainableUndefined,
+    Environment,
+    nodes,
+    select_autoescape,
+    TemplateSyntaxError,
+)
+from jinja2.exceptions import UndefinedError
+
+logger = logging.getLogger(__name__)
+
+
+class TemplateRenderError(Exception):
+    """Raised when template rendering fails"""
+
+    pass
+
+
+class TemplateEngine:
+    """Jinja2-based template engine for dynamic prompt rendering"""
+
+    def __init__(self):
+        self._env = Environment(
+            undefined=ChainableUndefined,
+            trim_blocks=True,
+            lstrip_blocks=True,
+            autoescape=select_autoescape(default_for_string=True, default=True),
+        )
+
+    def render(self, template_content: str, context: Dict[str, Any]) -> str:
+        """
+        Render template with provided context.
+
+        Args:
+            template_content: Raw template string with Jinja2 syntax
+            context: Dictionary of variables to inject into template
+
+        Returns:
+            Rendered template string
+
+        Raises:
+            TemplateRenderError: If template syntax is invalid or variables undefined
+        """
+        if not template_content:
+            return ""
+        try:
+            template = self._env.from_string(template_content)
+            return template.render(**context)
+        except TemplateSyntaxError as e:
+            error_msg = f"Template syntax error at line {e.lineno}: {e.message}"
+            logger.error(error_msg)
+            raise TemplateRenderError(error_msg) from e
+        except UndefinedError as e:
+            error_msg = f"Undefined variable in template: {e.message}"
+            logger.error(error_msg)
+            raise TemplateRenderError(error_msg) from e
+        except Exception as e:
+            error_msg = f"Template rendering failed: {str(e)}"
+            logger.error(error_msg)
+            raise TemplateRenderError(error_msg) from e
+
+    def validate_template(self, template_content: str) -> bool:
+        """
+        Validate template syntax without rendering.
+
+        Args:
+            template_content: Template string to validate
+
+        Returns:
+            True if template is syntactically valid
+        """
+        if not template_content:
+            return True
+        try:
+            self._env.from_string(template_content)
+            return True
+        except TemplateSyntaxError as e:
+            logger.debug(f"Template syntax invalid at line {e.lineno}: {e.message}")
+            return False
+        except Exception as e:
+            logger.debug(f"Template validation error: {type(e).__name__}: {str(e)}")
+            return False
+
+    def extract_variables(self, template_content: str) -> Set[str]:
+        """
+        Extract all variable names from template.
+
+        Args:
+            template_content: Template string to analyze
+
+        Returns:
+            Set of variable names found in template
+        """
+        if not template_content:
+            return set()
+        try:
+            ast = self._env.parse(template_content)
+            return set(self._env.get_template_module(ast).make_module().keys())
+        except TemplateSyntaxError as e:
+            logger.debug(f"Cannot extract variables - syntax error at line {e.lineno}")
+            return set()
+        except Exception as e:
+            logger.debug(f"Cannot extract variables: {type(e).__name__}")
+            return set()
+
+    def extract_tool_usages(
+        self, template_content: str
+    ) -> Dict[str, Set[Optional[str]]]:
+        """Extract tool and action references from a template"""
+        if not template_content:
+            return {}
+        try:
+            ast = self._env.parse(template_content)
+        except TemplateSyntaxError as e:
+            logger.debug(f"extract_tool_usages - syntax error at line {e.lineno}")
+            return {}
+        except Exception as e:
+            logger.debug(f"extract_tool_usages - parse error: {type(e).__name__}")
+            return {}
+
+        usages: Dict[str, Set[Optional[str]]] = {}
+
+        def record(path: List[str]) -> None:
+            if not path:
+                return
+            tool_name = path[0]
+            action_name = path[1] if len(path) > 1 else None
+            if not tool_name:
+                return
+            tool_entry = usages.setdefault(tool_name, set())
+            tool_entry.add(action_name)
+
+        for node in ast.find_all(nodes.Getattr):
+            path = []
+            current = node
+            while isinstance(current, nodes.Getattr):
+                path.append(current.attr)
+                current = current.node
+            if isinstance(current, nodes.Name) and current.name == "tools":
+                path.reverse()
+                record(path)
+
+        for node in ast.find_all(nodes.Getitem):
+            path = []
+            current = node
+            while isinstance(current, nodes.Getitem):
+                key = current.arg
+                if isinstance(key, nodes.Const) and isinstance(key.value, str):
+                    path.append(key.value)
+                else:
+                    path = []
+                    break
+                current = current.node
+            if path and isinstance(current, nodes.Name) and current.name == "tools":
+                path.reverse()
+                record(path)
+
+        return usages
--- a/application/utils.py
+++ b/application/utils.py
@@ -74,6 +74,17 @@ def count_tokens_docs(docs):
    return tokens


+def calculate_doc_token_budget(
+    gpt_model: str = "gpt-4o", history_token_limit: int = 2000
+) -> int:
+    total_context = settings.LLM_TOKEN_LIMITS.get(
+        gpt_model, settings.DEFAULT_LLM_TOKEN_LIMIT
+    )
+    reserved = sum(settings.RESERVED_TOKENS.values())
+    doc_budget = total_context - history_token_limit - reserved
+    return max(doc_budget, 1000)
+
+
 def get_missing_fields(data, required_fields):
    """Check for missing required fields. Returns list of missing field names."""
    return [field for field in required_fields if field not in data]
@@ -141,8 +152,8 @@ def limit_chat_history(history, max_token_limit=None, gpt_model="docsgpt"):
        max_token_limit
        if max_token_limit
        and max_token_limit
-        < settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
-        else settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
+        < settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_LLM_TOKEN_LIMIT)
+        else settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_LLM_TOKEN_LIMIT)
    )

    if not history: