mirror of
https://github.com/arc53/DocsGPT.git
synced 2026-01-20 05:50:58 +00:00
feat: template-based prompt rendering with dynamic namespace injection (#2091)
* feat: template-based prompt rendering with dynamic namespace injection * refactor: improve template engine initialization with clearer formatting * refactor: streamline ReActAgent methods and improve content extraction logic feat: enhance error handling in NamespaceManager and TemplateEngine fix: update NewAgent component to ensure consistent form data submission test: modify tests for ReActAgent and prompt renderer to reflect method changes and improve coverage * feat: tools namespace + three-tier token budget * refactor: remove unused variable assignment in message building tests * Enhance prompt customization and tool pre-fetching functionality * ruff lint fix * refactor: cleaner error handling and reduce code clutter --------- Co-authored-by: Alex <a@tushynski.me>
This commit is contained in:
@@ -12,7 +12,6 @@ from application.core.settings import settings
|
||||
from application.llm.handlers.handler_creator import LLMHandlerCreator
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.logging import build_stack_data, log_activity, LogContext
|
||||
from application.retriever.base import BaseRetriever
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -27,6 +26,7 @@ class BaseAgent(ABC):
|
||||
user_api_key: Optional[str] = None,
|
||||
prompt: str = "",
|
||||
chat_history: Optional[List[Dict]] = None,
|
||||
retrieved_docs: Optional[List[Dict]] = None,
|
||||
decoded_token: Optional[Dict] = None,
|
||||
attachments: Optional[List[Dict]] = None,
|
||||
json_schema: Optional[Dict] = None,
|
||||
@@ -53,6 +53,7 @@ class BaseAgent(ABC):
|
||||
user_api_key=user_api_key,
|
||||
decoded_token=decoded_token,
|
||||
)
|
||||
self.retrieved_docs = retrieved_docs or []
|
||||
self.llm_handler = LLMHandlerCreator.create_handler(
|
||||
llm_name if llm_name else "default"
|
||||
)
|
||||
@@ -65,13 +66,13 @@ class BaseAgent(ABC):
|
||||
|
||||
@log_activity()
|
||||
def gen(
|
||||
self, query: str, retriever: BaseRetriever, log_context: LogContext = None
|
||||
self, query: str, log_context: LogContext = None
|
||||
) -> Generator[Dict, None, None]:
|
||||
yield from self._gen_inner(query, retriever, log_context)
|
||||
yield from self._gen_inner(query, log_context)
|
||||
|
||||
@abstractmethod
|
||||
def _gen_inner(
|
||||
self, query: str, retriever: BaseRetriever, log_context: LogContext
|
||||
self, query: str, log_context: LogContext
|
||||
) -> Generator[Dict, None, None]:
|
||||
pass
|
||||
|
||||
@@ -150,6 +151,7 @@ class BaseAgent(ABC):
|
||||
call_id = getattr(call, "id", None) or str(uuid.uuid4())
|
||||
|
||||
# Check if parsing failed
|
||||
|
||||
if tool_id is None or action_name is None:
|
||||
error_message = f"Error: Failed to parse LLM tool call. Tool name: {getattr(call, 'name', 'unknown')}"
|
||||
logger.error(error_message)
|
||||
@@ -164,13 +166,14 @@ class BaseAgent(ABC):
|
||||
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
|
||||
self.tool_calls.append(tool_call_data)
|
||||
return "Failed to parse tool call.", call_id
|
||||
|
||||
# Check if tool_id exists in available tools
|
||||
|
||||
if tool_id not in tools_dict:
|
||||
error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
|
||||
logger.error(error_message)
|
||||
|
||||
# Return error result
|
||||
|
||||
tool_call_data = {
|
||||
"tool_name": "unknown",
|
||||
"call_id": call_id,
|
||||
@@ -181,7 +184,6 @@ class BaseAgent(ABC):
|
||||
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
|
||||
self.tool_calls.append(tool_call_data)
|
||||
return f"Tool with ID {tool_id} not found.", call_id
|
||||
|
||||
tool_call_data = {
|
||||
"tool_name": tools_dict[tool_id]["name"],
|
||||
"call_id": call_id,
|
||||
@@ -223,6 +225,7 @@ class BaseAgent(ABC):
|
||||
tm = ToolManager(config={})
|
||||
|
||||
# Prepare tool_config and add tool_id for memory tools
|
||||
|
||||
if tool_data["name"] == "api_tool":
|
||||
tool_config = {
|
||||
"url": tool_data["config"]["actions"][action_name]["url"],
|
||||
@@ -234,8 +237,8 @@ class BaseAgent(ABC):
|
||||
tool_config = tool_data["config"].copy() if tool_data["config"] else {}
|
||||
# Add tool_id from MongoDB _id for tools that need instance isolation (like memory tool)
|
||||
# Use MongoDB _id if available, otherwise fall back to enumerated tool_id
|
||||
tool_config["tool_id"] = str(tool_data.get("_id", tool_id))
|
||||
|
||||
tool_config["tool_id"] = str(tool_data.get("_id", tool_id))
|
||||
tool = tm.load_tool(
|
||||
tool_data["name"],
|
||||
tool_config=tool_config,
|
||||
@@ -276,24 +279,14 @@ class BaseAgent(ABC):
|
||||
self,
|
||||
system_prompt: str,
|
||||
query: str,
|
||||
retrieved_data: List[Dict],
|
||||
) -> List[Dict]:
|
||||
docs_with_filenames = []
|
||||
for doc in retrieved_data:
|
||||
filename = doc.get("filename") or doc.get("title") or doc.get("source")
|
||||
if filename:
|
||||
chunk_header = str(filename)
|
||||
docs_with_filenames.append(f"{chunk_header}\n{doc['text']}")
|
||||
else:
|
||||
docs_with_filenames.append(doc["text"])
|
||||
docs_together = "\n\n".join(docs_with_filenames)
|
||||
p_chat_combine = system_prompt.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
"""Build messages using pre-rendered system prompt"""
|
||||
messages = [{"role": "system", "content": system_prompt}]
|
||||
|
||||
for i in self.chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append({"role": "assistant", "content": i["response"]})
|
||||
messages.append({"role": "user", "content": i["prompt"]})
|
||||
messages.append({"role": "assistant", "content": i["response"]})
|
||||
if "tool_calls" in i:
|
||||
for tool_call in i["tool_calls"]:
|
||||
call_id = tool_call.get("call_id") or str(uuid.uuid4())
|
||||
@@ -313,26 +306,14 @@ class BaseAgent(ABC):
|
||||
}
|
||||
}
|
||||
|
||||
messages_combine.append(
|
||||
messages.append(
|
||||
{"role": "assistant", "content": [function_call_dict]}
|
||||
)
|
||||
messages_combine.append(
|
||||
messages.append(
|
||||
{"role": "tool", "content": [function_response_dict]}
|
||||
)
|
||||
messages_combine.append({"role": "user", "content": query})
|
||||
return messages_combine
|
||||
|
||||
def _retriever_search(
|
||||
self,
|
||||
retriever: BaseRetriever,
|
||||
query: str,
|
||||
log_context: Optional[LogContext] = None,
|
||||
) -> List[Dict]:
|
||||
retrieved_data = retriever.search(query)
|
||||
if log_context:
|
||||
data = build_stack_data(retriever, exclude_attributes=["llm"])
|
||||
log_context.stacks.append({"component": "retriever", "data": data})
|
||||
return retrieved_data
|
||||
messages.append({"role": "user", "content": query})
|
||||
return messages
|
||||
|
||||
def _llm_gen(self, messages: List[Dict], log_context: Optional[LogContext] = None):
|
||||
gen_kwargs = {"model": self.gpt_model, "messages": messages}
|
||||
@@ -343,7 +324,6 @@ class BaseAgent(ABC):
|
||||
and self.tools
|
||||
):
|
||||
gen_kwargs["tools"] = self.tools
|
||||
|
||||
if (
|
||||
self.json_schema
|
||||
and hasattr(self.llm, "_supports_structured_output")
|
||||
@@ -357,7 +337,6 @@ class BaseAgent(ABC):
|
||||
gen_kwargs["response_format"] = structured_format
|
||||
elif self.llm_name == "google":
|
||||
gen_kwargs["response_schema"] = structured_format
|
||||
|
||||
resp = self.llm.gen_stream(**gen_kwargs)
|
||||
|
||||
if log_context:
|
||||
|
||||
@@ -1,32 +1,20 @@
|
||||
import logging
|
||||
from typing import Dict, Generator
|
||||
|
||||
from application.agents.base import BaseAgent
|
||||
from application.logging import LogContext
|
||||
from application.retriever.base import BaseRetriever
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ClassicAgent(BaseAgent):
|
||||
"""A simplified agent with clear execution flow.
|
||||
|
||||
Usage:
|
||||
1. Processes a query through retrieval
|
||||
2. Sets up available tools
|
||||
3. Generates responses using LLM
|
||||
4. Handles tool interactions if needed
|
||||
5. Returns standardized outputs
|
||||
|
||||
Easy to extend by overriding specific steps.
|
||||
"""
|
||||
"""A simplified agent with clear execution flow"""
|
||||
|
||||
def _gen_inner(
|
||||
self, query: str, retriever: BaseRetriever, log_context: LogContext
|
||||
self, query: str, log_context: LogContext
|
||||
) -> Generator[Dict, None, None]:
|
||||
# Step 1: Retrieve relevant data
|
||||
retrieved_data = self._retriever_search(retriever, query, log_context)
|
||||
"""Core generator function for ClassicAgent execution flow"""
|
||||
|
||||
# Step 2: Prepare tools
|
||||
tools_dict = (
|
||||
self._get_user_tools(self.user)
|
||||
if not self.user_api_key
|
||||
@@ -34,20 +22,16 @@ class ClassicAgent(BaseAgent):
|
||||
)
|
||||
self._prepare_tools(tools_dict)
|
||||
|
||||
# Step 3: Build and process messages
|
||||
messages = self._build_messages(self.prompt, query, retrieved_data)
|
||||
messages = self._build_messages(self.prompt, query)
|
||||
llm_response = self._llm_gen(messages, log_context)
|
||||
|
||||
# Step 4: Handle the response
|
||||
yield from self._handle_response(
|
||||
llm_response, tools_dict, messages, log_context
|
||||
)
|
||||
|
||||
# Step 5: Return metadata
|
||||
yield {"sources": retrieved_data}
|
||||
yield {"sources": self.retrieved_docs}
|
||||
yield {"tool_calls": self._get_truncated_tool_calls()}
|
||||
|
||||
# Log tool calls for debugging
|
||||
log_context.stacks.append(
|
||||
{"component": "agent", "data": {"tool_calls": self.tool_calls.copy()}}
|
||||
)
|
||||
|
||||
@@ -1,284 +1,238 @@
|
||||
import os
|
||||
from typing import Dict, Generator, List, Any
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, Generator, List
|
||||
|
||||
from application.agents.base import BaseAgent
|
||||
from application.logging import build_stack_data, LogContext
|
||||
from application.retriever.base import BaseRetriever
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_ITERATIONS_REASONING = 10
|
||||
|
||||
current_dir = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
with open(
|
||||
os.path.join(current_dir, "application/prompts", "react_planning_prompt.txt"), "r"
|
||||
) as f:
|
||||
planning_prompt_template = f.read()
|
||||
PLANNING_PROMPT_TEMPLATE = f.read()
|
||||
with open(
|
||||
os.path.join(current_dir, "application/prompts", "react_final_prompt.txt"),
|
||||
"r",
|
||||
os.path.join(current_dir, "application/prompts", "react_final_prompt.txt"), "r"
|
||||
) as f:
|
||||
final_prompt_template = f.read()
|
||||
|
||||
MAX_ITERATIONS_REASONING = 10
|
||||
FINAL_PROMPT_TEMPLATE = f.read()
|
||||
|
||||
|
||||
class ReActAgent(BaseAgent):
|
||||
"""
|
||||
Research and Action (ReAct) Agent - Advanced reasoning agent with iterative planning.
|
||||
|
||||
Implements a think-act-observe loop for complex problem-solving:
|
||||
1. Creates a strategic plan based on the query
|
||||
2. Executes tools and gathers observations
|
||||
3. Iteratively refines approach until satisfied
|
||||
4. Synthesizes final answer from all observations
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.plan: str = ""
|
||||
self.observations: List[str] = []
|
||||
|
||||
def _extract_content_from_llm_response(self, resp: Any) -> str:
|
||||
"""
|
||||
Helper to extract string content from various LLM response types.
|
||||
Handles strings, message objects (OpenAI-like), and streams.
|
||||
Adapt stream handling for your specific LLM client if not OpenAI.
|
||||
"""
|
||||
collected_content = []
|
||||
if isinstance(resp, str):
|
||||
collected_content.append(resp)
|
||||
elif ( # OpenAI non-streaming or Anthropic non-streaming (older SDK style)
|
||||
hasattr(resp, "message")
|
||||
and hasattr(resp.message, "content")
|
||||
and resp.message.content is not None
|
||||
):
|
||||
collected_content.append(resp.message.content)
|
||||
elif ( # OpenAI non-streaming (Pydantic model), Anthropic new SDK non-streaming
|
||||
hasattr(resp, "choices")
|
||||
and resp.choices
|
||||
and hasattr(resp.choices[0], "message")
|
||||
and hasattr(resp.choices[0].message, "content")
|
||||
and resp.choices[0].message.content is not None
|
||||
):
|
||||
collected_content.append(resp.choices[0].message.content) # OpenAI
|
||||
elif ( # Anthropic new SDK non-streaming content block
|
||||
hasattr(resp, "content")
|
||||
and isinstance(resp.content, list)
|
||||
and resp.content
|
||||
and hasattr(resp.content[0], "text")
|
||||
):
|
||||
collected_content.append(resp.content[0].text) # Anthropic
|
||||
else:
|
||||
# Assume resp is a stream if not a recognized object
|
||||
chunk = None
|
||||
try:
|
||||
for (
|
||||
chunk
|
||||
) in (
|
||||
resp
|
||||
): # This will fail if resp is not iterable (e.g. a non-streaming response object)
|
||||
content_piece = ""
|
||||
# OpenAI-like stream
|
||||
if (
|
||||
hasattr(chunk, "choices")
|
||||
and len(chunk.choices) > 0
|
||||
and hasattr(chunk.choices[0], "delta")
|
||||
and hasattr(chunk.choices[0].delta, "content")
|
||||
and chunk.choices[0].delta.content is not None
|
||||
):
|
||||
content_piece = chunk.choices[0].delta.content
|
||||
# Anthropic-like stream (ContentBlockDelta)
|
||||
elif (
|
||||
hasattr(chunk, "type")
|
||||
and chunk.type == "content_block_delta"
|
||||
and hasattr(chunk, "delta")
|
||||
and hasattr(chunk.delta, "text")
|
||||
):
|
||||
content_piece = chunk.delta.text
|
||||
elif isinstance(chunk, str): # Simplest case: stream of strings
|
||||
content_piece = chunk
|
||||
|
||||
if content_piece:
|
||||
collected_content.append(content_piece)
|
||||
except (
|
||||
TypeError
|
||||
): # If resp is not iterable (e.g. a final response object that wasn't caught above)
|
||||
logger.debug(
|
||||
f"Response type {type(resp)} could not be iterated as a stream. It might be a non-streaming object not handled by specific checks."
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error processing potential stream chunk: {e}, chunk was: {getattr(chunk, '__dict__', chunk) if chunk is not None else 'N/A'}"
|
||||
)
|
||||
|
||||
return "".join(collected_content)
|
||||
|
||||
def _gen_inner(
|
||||
self, query: str, retriever: BaseRetriever, log_context: LogContext
|
||||
self, query: str, log_context: LogContext
|
||||
) -> Generator[Dict, None, None]:
|
||||
# Reset state for this generation call
|
||||
self.plan = ""
|
||||
self.observations = []
|
||||
retrieved_data = self._retriever_search(retriever, query, log_context)
|
||||
"""Execute ReAct reasoning loop with planning, action, and observation cycles"""
|
||||
|
||||
if self.user_api_key:
|
||||
tools_dict = self._get_tools(self.user_api_key)
|
||||
else:
|
||||
tools_dict = self._get_user_tools(self.user)
|
||||
self._reset_state()
|
||||
|
||||
tools_dict = (
|
||||
self._get_tools(self.user_api_key)
|
||||
if self.user_api_key
|
||||
else self._get_user_tools(self.user)
|
||||
)
|
||||
self._prepare_tools(tools_dict)
|
||||
|
||||
docs_together = "\n".join([doc["text"] for doc in retrieved_data])
|
||||
iterating_reasoning = 0
|
||||
while iterating_reasoning < MAX_ITERATIONS_REASONING:
|
||||
iterating_reasoning += 1
|
||||
# 1. Create Plan
|
||||
logger.info("ReActAgent: Creating plan...")
|
||||
plan_stream = self._create_plan(query, docs_together, log_context)
|
||||
current_plan_parts = []
|
||||
yield {"thought": f"Reasoning... (iteration {iterating_reasoning})\n\n"}
|
||||
for line_chunk in plan_stream:
|
||||
current_plan_parts.append(line_chunk)
|
||||
yield {"thought": line_chunk}
|
||||
self.plan = "".join(current_plan_parts)
|
||||
if self.plan:
|
||||
self.observations.append(
|
||||
f"Plan: {self.plan} Iteration: {iterating_reasoning}"
|
||||
)
|
||||
for iteration in range(1, MAX_ITERATIONS_REASONING + 1):
|
||||
yield {"thought": f"Reasoning... (iteration {iteration})\n\n"}
|
||||
|
||||
max_obs_len = 20000
|
||||
obs_str = "\n".join(self.observations)
|
||||
if len(obs_str) > max_obs_len:
|
||||
obs_str = obs_str[:max_obs_len] + "\n...[observations truncated]"
|
||||
execution_prompt_str = (
|
||||
(self.prompt or "")
|
||||
+ f"\n\nFollow this plan:\n{self.plan}"
|
||||
+ f"\n\nObservations:\n{obs_str}"
|
||||
+ f"\n\nIf there is enough data to complete user query '{query}', Respond with 'SATISFIED' only. Otherwise, continue. Dont Menstion 'SATISFIED' in your response if you are not ready. "
|
||||
)
|
||||
yield from self._planning_phase(query, log_context)
|
||||
|
||||
messages = self._build_messages(execution_prompt_str, query, retrieved_data)
|
||||
|
||||
resp_from_llm_gen = self._llm_gen(messages, log_context)
|
||||
|
||||
initial_llm_thought_content = self._extract_content_from_llm_response(
|
||||
resp_from_llm_gen
|
||||
)
|
||||
if initial_llm_thought_content:
|
||||
self.observations.append(
|
||||
f"Initial thought/response: {initial_llm_thought_content}"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"ReActAgent: Initial LLM response (before handler) had no textual content (might be only tool calls)."
|
||||
)
|
||||
resp_after_handler = self._llm_handler(
|
||||
resp_from_llm_gen, tools_dict, messages, log_context
|
||||
)
|
||||
|
||||
for (
|
||||
tool_call_info
|
||||
) in (
|
||||
self.tool_calls
|
||||
): # Iterate over self.tool_calls populated by _llm_handler
|
||||
observation_string = (
|
||||
f"Executed Action: Tool '{tool_call_info.get('tool_name', 'N/A')}' "
|
||||
f"with arguments '{tool_call_info.get('arguments', '{}')}'. Result: '{str(tool_call_info.get('result', ''))[:200]}...'"
|
||||
)
|
||||
self.observations.append(observation_string)
|
||||
|
||||
content_after_handler = self._extract_content_from_llm_response(
|
||||
resp_after_handler
|
||||
)
|
||||
if content_after_handler:
|
||||
self.observations.append(
|
||||
f"Response after tool execution: {content_after_handler}"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
"ReActAgent: LLM response after handler had no textual content."
|
||||
)
|
||||
|
||||
if log_context:
|
||||
log_context.stacks.append(
|
||||
{
|
||||
"component": "agent_tool_calls",
|
||||
"data": {"tool_calls": self.tool_calls.copy()},
|
||||
}
|
||||
)
|
||||
|
||||
yield {"sources": retrieved_data}
|
||||
|
||||
display_tool_calls = []
|
||||
for tc in self.tool_calls:
|
||||
cleaned_tc = tc.copy()
|
||||
if len(str(cleaned_tc.get("result", ""))) > 50:
|
||||
cleaned_tc["result"] = str(cleaned_tc["result"])[:50] + "..."
|
||||
display_tool_calls.append(cleaned_tc)
|
||||
if display_tool_calls:
|
||||
yield {"tool_calls": display_tool_calls}
|
||||
|
||||
if "SATISFIED" in content_after_handler:
|
||||
logger.info(
|
||||
"ReActAgent: LLM satisfied with the plan and data. Stopping reasoning."
|
||||
if not self.plan:
|
||||
logger.warning(
|
||||
f"ReActAgent: No plan generated in iteration {iteration}"
|
||||
)
|
||||
break
|
||||
self.observations.append(f"Plan (iteration {iteration}): {self.plan}")
|
||||
|
||||
# 3. Create Final Answer based on all observations
|
||||
final_answer_stream = self._create_final_answer(
|
||||
query, self.observations, log_context
|
||||
)
|
||||
for answer_chunk in final_answer_stream:
|
||||
yield {"answer": answer_chunk}
|
||||
logger.info("ReActAgent: Finished generating final answer.")
|
||||
satisfied = yield from self._execution_phase(query, tools_dict, log_context)
|
||||
|
||||
def _create_plan(
|
||||
self, query: str, docs_data: str, log_context: LogContext = None
|
||||
) -> Generator[str, None, None]:
|
||||
plan_prompt_filled = planning_prompt_template.replace("{query}", query)
|
||||
if "{summaries}" in plan_prompt_filled:
|
||||
summaries = docs_data if docs_data else "No documents retrieved."
|
||||
plan_prompt_filled = plan_prompt_filled.replace("{summaries}", summaries)
|
||||
plan_prompt_filled = plan_prompt_filled.replace("{prompt}", self.prompt or "")
|
||||
plan_prompt_filled = plan_prompt_filled.replace(
|
||||
"{observations}", "\n".join(self.observations)
|
||||
)
|
||||
if satisfied:
|
||||
logger.info("ReActAgent: Goal satisfied, stopping reasoning loop")
|
||||
break
|
||||
yield from self._synthesis_phase(query, log_context)
|
||||
|
||||
messages = [{"role": "user", "content": plan_prompt_filled}]
|
||||
def _reset_state(self):
|
||||
"""Reset agent state for new query"""
|
||||
self.plan = ""
|
||||
self.observations = []
|
||||
|
||||
plan_stream_from_llm = self.llm.gen_stream(
|
||||
def _planning_phase(
|
||||
self, query: str, log_context: LogContext
|
||||
) -> Generator[Dict, None, None]:
|
||||
"""Generate strategic plan for query"""
|
||||
logger.info("ReActAgent: Creating plan...")
|
||||
|
||||
plan_prompt = self._build_planning_prompt(query)
|
||||
messages = [{"role": "user", "content": plan_prompt}]
|
||||
|
||||
plan_stream = self.llm.gen_stream(
|
||||
model=self.gpt_model,
|
||||
messages=messages,
|
||||
tools=getattr(self, "tools", None), # Use self.tools
|
||||
tools=self.tools if self.tools else None,
|
||||
)
|
||||
|
||||
if log_context:
|
||||
data = build_stack_data(self.llm)
|
||||
log_context.stacks.append({"component": "planning_llm", "data": data})
|
||||
|
||||
for chunk in plan_stream_from_llm:
|
||||
content_piece = self._extract_content_from_llm_response(chunk)
|
||||
if content_piece:
|
||||
yield content_piece
|
||||
|
||||
def _create_final_answer(
|
||||
self, query: str, observations: List[str], log_context: LogContext = None
|
||||
) -> Generator[str, None, None]:
|
||||
observation_string = "\n".join(observations)
|
||||
max_obs_len = 10000
|
||||
if len(observation_string) > max_obs_len:
|
||||
observation_string = (
|
||||
observation_string[:max_obs_len] + "\n...[observations truncated]"
|
||||
)
|
||||
logger.warning(
|
||||
"ReActAgent: Truncated observations for final answer prompt due to length."
|
||||
log_context.stacks.append(
|
||||
{"component": "planning_llm", "data": build_stack_data(self.llm)}
|
||||
)
|
||||
plan_parts = []
|
||||
for chunk in plan_stream:
|
||||
content = self._extract_content(chunk)
|
||||
if content:
|
||||
plan_parts.append(content)
|
||||
yield {"thought": content}
|
||||
self.plan = "".join(plan_parts)
|
||||
|
||||
final_answer_prompt_filled = final_prompt_template.format(
|
||||
query=query, observations=observation_string
|
||||
def _execution_phase(
|
||||
self, query: str, tools_dict: Dict, log_context: LogContext
|
||||
) -> Generator[bool, None, None]:
|
||||
"""Execute plan with tool calls and observations"""
|
||||
execution_prompt = self._build_execution_prompt(query)
|
||||
messages = self._build_messages(execution_prompt, query)
|
||||
|
||||
llm_response = self._llm_gen(messages, log_context)
|
||||
initial_content = self._extract_content(llm_response)
|
||||
|
||||
if initial_content:
|
||||
self.observations.append(f"Initial response: {initial_content}")
|
||||
processed_response = self._llm_handler(
|
||||
llm_response, tools_dict, messages, log_context
|
||||
)
|
||||
|
||||
messages = [{"role": "user", "content": final_answer_prompt_filled}]
|
||||
for tool_call in self.tool_calls:
|
||||
observation = (
|
||||
f"Executed: {tool_call.get('tool_name', 'Unknown')} "
|
||||
f"with args {tool_call.get('arguments', {})}. "
|
||||
f"Result: {str(tool_call.get('result', ''))[:200]}"
|
||||
)
|
||||
self.observations.append(observation)
|
||||
final_content = self._extract_content(processed_response)
|
||||
if final_content:
|
||||
self.observations.append(f"Response after tools: {final_content}")
|
||||
if log_context:
|
||||
log_context.stacks.append(
|
||||
{
|
||||
"component": "agent_tool_calls",
|
||||
"data": {"tool_calls": self.tool_calls.copy()},
|
||||
}
|
||||
)
|
||||
yield {"sources": self.retrieved_docs}
|
||||
yield {"tool_calls": self._get_truncated_tool_calls()}
|
||||
|
||||
# Final answer should synthesize, not call tools.
|
||||
final_answer_stream_from_llm = self.llm.gen_stream(
|
||||
return "SATISFIED" in (final_content or "")
|
||||
|
||||
def _synthesis_phase(
|
||||
self, query: str, log_context: LogContext
|
||||
) -> Generator[Dict, None, None]:
|
||||
"""Synthesize final answer from all observations"""
|
||||
logger.info("ReActAgent: Generating final answer...")
|
||||
|
||||
final_prompt = self._build_final_answer_prompt(query)
|
||||
messages = [{"role": "user", "content": final_prompt}]
|
||||
|
||||
final_stream = self.llm.gen_stream(
|
||||
model=self.gpt_model, messages=messages, tools=None
|
||||
)
|
||||
if log_context:
|
||||
data = build_stack_data(self.llm)
|
||||
log_context.stacks.append({"component": "final_answer_llm", "data": data})
|
||||
|
||||
for chunk in final_answer_stream_from_llm:
|
||||
content_piece = self._extract_content_from_llm_response(chunk)
|
||||
if content_piece:
|
||||
yield content_piece
|
||||
if log_context:
|
||||
log_context.stacks.append(
|
||||
{"component": "final_answer_llm", "data": build_stack_data(self.llm)}
|
||||
)
|
||||
for chunk in final_stream:
|
||||
content = self._extract_content(chunk)
|
||||
if content:
|
||||
yield {"answer": content}
|
||||
|
||||
def _build_planning_prompt(self, query: str) -> str:
|
||||
"""Build planning phase prompt"""
|
||||
prompt = PLANNING_PROMPT_TEMPLATE.replace("{query}", query)
|
||||
prompt = prompt.replace("{prompt}", self.prompt or "")
|
||||
prompt = prompt.replace("{summaries}", "")
|
||||
prompt = prompt.replace("{observations}", "\n".join(self.observations))
|
||||
return prompt
|
||||
|
||||
def _build_execution_prompt(self, query: str) -> str:
|
||||
"""Build execution phase prompt with plan and observations"""
|
||||
observations_str = "\n".join(self.observations)
|
||||
|
||||
if len(observations_str) > 20000:
|
||||
observations_str = observations_str[:20000] + "\n...[truncated]"
|
||||
return (
|
||||
f"{self.prompt or ''}\n\n"
|
||||
f"Follow this plan:\n{self.plan}\n\n"
|
||||
f"Observations:\n{observations_str}\n\n"
|
||||
f"If sufficient data exists to answer '{query}', respond with 'SATISFIED'. "
|
||||
f"Otherwise, continue executing the plan."
|
||||
)
|
||||
|
||||
def _build_final_answer_prompt(self, query: str) -> str:
|
||||
"""Build final synthesis prompt"""
|
||||
observations_str = "\n".join(self.observations)
|
||||
|
||||
if len(observations_str) > 10000:
|
||||
observations_str = observations_str[:10000] + "\n...[truncated]"
|
||||
logger.warning("ReActAgent: Observations truncated for final answer")
|
||||
return FINAL_PROMPT_TEMPLATE.format(query=query, observations=observations_str)
|
||||
|
||||
def _extract_content(self, response: Any) -> str:
|
||||
"""Extract text content from various LLM response formats"""
|
||||
if not response:
|
||||
return ""
|
||||
collected = []
|
||||
|
||||
if isinstance(response, str):
|
||||
return response
|
||||
if hasattr(response, "message") and hasattr(response.message, "content"):
|
||||
if response.message.content:
|
||||
return response.message.content
|
||||
if hasattr(response, "choices") and response.choices:
|
||||
if hasattr(response.choices[0], "message"):
|
||||
content = response.choices[0].message.content
|
||||
if content:
|
||||
return content
|
||||
if hasattr(response, "content") and isinstance(response.content, list):
|
||||
if response.content and hasattr(response.content[0], "text"):
|
||||
return response.content[0].text
|
||||
try:
|
||||
for chunk in response:
|
||||
content_piece = ""
|
||||
|
||||
if hasattr(chunk, "choices") and chunk.choices:
|
||||
if hasattr(chunk.choices[0], "delta"):
|
||||
delta_content = chunk.choices[0].delta.content
|
||||
if delta_content:
|
||||
content_piece = delta_content
|
||||
elif hasattr(chunk, "type") and chunk.type == "content_block_delta":
|
||||
if hasattr(chunk, "delta") and hasattr(chunk.delta, "text"):
|
||||
content_piece = chunk.delta.text
|
||||
elif isinstance(chunk, str):
|
||||
content_piece = chunk
|
||||
if content_piece:
|
||||
collected.append(content_piece)
|
||||
except (TypeError, AttributeError):
|
||||
logger.debug(
|
||||
f"Response not iterable or unexpected format: {type(response)}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting content: {e}")
|
||||
return "".join(collected)
|
||||
|
||||
@@ -54,6 +54,10 @@ class AnswerResource(Resource, BaseAnswerResource):
|
||||
default=True,
|
||||
description="Whether to save the conversation",
|
||||
),
|
||||
"passthrough": fields.Raw(
|
||||
required=False,
|
||||
description="Dynamic parameters to inject into prompt template",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -69,8 +73,17 @@ class AnswerResource(Resource, BaseAnswerResource):
|
||||
processor.initialize()
|
||||
if not processor.decoded_token:
|
||||
return make_response({"error": "Unauthorized"}, 401)
|
||||
agent = processor.create_agent()
|
||||
retriever = processor.create_retriever()
|
||||
|
||||
docs_together, docs_list = processor.pre_fetch_docs(
|
||||
data.get("question", "")
|
||||
)
|
||||
tools_data = processor.pre_fetch_tools()
|
||||
|
||||
agent = processor.create_agent(
|
||||
docs_together=docs_together,
|
||||
docs=docs_list,
|
||||
tools_data=tools_data,
|
||||
)
|
||||
|
||||
if error := self.check_usage(processor.agent_config):
|
||||
return error
|
||||
@@ -78,7 +91,6 @@ class AnswerResource(Resource, BaseAnswerResource):
|
||||
stream = self.complete_stream(
|
||||
question=data["question"],
|
||||
agent=agent,
|
||||
retriever=retriever,
|
||||
conversation_id=processor.conversation_id,
|
||||
user_api_key=processor.agent_config.get("user_api_key"),
|
||||
decoded_token=processor.decoded_token,
|
||||
|
||||
@@ -3,7 +3,7 @@ import json
|
||||
import logging
|
||||
from typing import Any, Dict, Generator, List, Optional
|
||||
|
||||
from flask import Response, make_response, jsonify
|
||||
from flask import jsonify, make_response, Response
|
||||
from flask_restx import Namespace
|
||||
|
||||
from application.api.answer.services.conversation_service import ConversationService
|
||||
@@ -41,9 +41,7 @@ class BaseAnswerResource:
|
||||
return missing_fields
|
||||
return None
|
||||
|
||||
def check_usage(
|
||||
self, agent_config: Dict
|
||||
) -> Optional[Response]:
|
||||
def check_usage(self, agent_config: Dict) -> Optional[Response]:
|
||||
"""Check if there is a usage limit and if it is exceeded
|
||||
|
||||
Args:
|
||||
@@ -51,30 +49,40 @@ class BaseAnswerResource:
|
||||
|
||||
Returns:
|
||||
None or Response if either of limits exceeded.
|
||||
|
||||
|
||||
"""
|
||||
api_key = agent_config.get("user_api_key")
|
||||
if not api_key:
|
||||
return None
|
||||
|
||||
|
||||
agents_collection = self.db["agents"]
|
||||
agent = agents_collection.find_one({"key": api_key})
|
||||
|
||||
if not agent:
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": "Invalid API key."
|
||||
}
|
||||
),
|
||||
401
|
||||
jsonify({"success": False, "message": "Invalid API key."}), 401
|
||||
)
|
||||
|
||||
limited_token_mode = agent.get("limited_token_mode", False)
|
||||
limited_request_mode = agent.get("limited_request_mode", False)
|
||||
token_limit = int(agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]))
|
||||
request_limit = int(agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]))
|
||||
limited_token_mode_raw = agent.get("limited_token_mode", False)
|
||||
limited_request_mode_raw = agent.get("limited_request_mode", False)
|
||||
|
||||
limited_token_mode = (
|
||||
limited_token_mode_raw
|
||||
if isinstance(limited_token_mode_raw, bool)
|
||||
else limited_token_mode_raw == "True"
|
||||
)
|
||||
limited_request_mode = (
|
||||
limited_request_mode_raw
|
||||
if isinstance(limited_request_mode_raw, bool)
|
||||
else limited_request_mode_raw == "True"
|
||||
)
|
||||
|
||||
token_limit = int(
|
||||
agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"])
|
||||
)
|
||||
request_limit = int(
|
||||
agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"])
|
||||
)
|
||||
|
||||
token_usage_collection = self.db["token_usage"]
|
||||
|
||||
@@ -83,18 +91,20 @@ class BaseAnswerResource:
|
||||
|
||||
match_query = {
|
||||
"timestamp": {"$gte": start_date, "$lte": end_date},
|
||||
"api_key": api_key
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
||||
|
||||
if limited_token_mode:
|
||||
token_pipeline = [
|
||||
{"$match": match_query},
|
||||
{
|
||||
"$group": {
|
||||
"_id": None,
|
||||
"total_tokens": {"$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}}
|
||||
"total_tokens": {
|
||||
"$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}
|
||||
},
|
||||
}
|
||||
}
|
||||
},
|
||||
]
|
||||
token_result = list(token_usage_collection.aggregate(token_pipeline))
|
||||
daily_token_usage = token_result[0]["total_tokens"] if token_result else 0
|
||||
@@ -108,26 +118,33 @@ class BaseAnswerResource:
|
||||
|
||||
if not limited_token_mode and not limited_request_mode:
|
||||
return None
|
||||
elif limited_token_mode and token_limit > daily_token_usage:
|
||||
return None
|
||||
elif limited_request_mode and request_limit > daily_request_usage:
|
||||
return None
|
||||
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": "Exceeding usage limit, please try again later."
|
||||
}
|
||||
),
|
||||
429, # too many requests
|
||||
token_exceeded = (
|
||||
limited_token_mode and token_limit > 0 and daily_token_usage >= token_limit
|
||||
)
|
||||
request_exceeded = (
|
||||
limited_request_mode
|
||||
and request_limit > 0
|
||||
and daily_request_usage >= request_limit
|
||||
)
|
||||
|
||||
if token_exceeded or request_exceeded:
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": "Exceeding usage limit, please try again later.",
|
||||
}
|
||||
),
|
||||
429,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def complete_stream(
|
||||
self,
|
||||
question: str,
|
||||
agent: Any,
|
||||
retriever: Any,
|
||||
conversation_id: Optional[str],
|
||||
user_api_key: Optional[str],
|
||||
decoded_token: Dict[str, Any],
|
||||
@@ -156,6 +173,7 @@ class BaseAnswerResource:
|
||||
agent_id: ID of agent used
|
||||
is_shared_usage: Flag for shared agent usage
|
||||
shared_token: Token for shared agent
|
||||
retrieved_docs: Pre-fetched documents for sources (optional)
|
||||
|
||||
Yields:
|
||||
Server-sent event strings
|
||||
@@ -166,7 +184,7 @@ class BaseAnswerResource:
|
||||
schema_info = None
|
||||
structured_chunks = []
|
||||
|
||||
for line in agent.gen(query=question, retriever=retriever):
|
||||
for line in agent.gen(query=question):
|
||||
if "answer" in line:
|
||||
response_full += str(line["answer"])
|
||||
if line.get("structured"):
|
||||
@@ -247,7 +265,6 @@ class BaseAnswerResource:
|
||||
data = json.dumps(id_data)
|
||||
yield f"data: {data}\n\n"
|
||||
|
||||
retriever_params = retriever.get_params()
|
||||
log_data = {
|
||||
"action": "stream_answer",
|
||||
"level": "info",
|
||||
@@ -256,7 +273,6 @@ class BaseAnswerResource:
|
||||
"question": question,
|
||||
"response": response_full,
|
||||
"sources": source_log_docs,
|
||||
"retriever_params": retriever_params,
|
||||
"attachments": attachment_ids,
|
||||
"timestamp": datetime.datetime.now(datetime.timezone.utc),
|
||||
}
|
||||
@@ -264,24 +280,19 @@ class BaseAnswerResource:
|
||||
log_data["structured_output"] = True
|
||||
if schema_info:
|
||||
log_data["schema"] = schema_info
|
||||
|
||||
# clean up text fields to be no longer than 10000 characters
|
||||
|
||||
# Clean up text fields to be no longer than 10000 characters
|
||||
for key, value in log_data.items():
|
||||
if isinstance(value, str) and len(value) > 10000:
|
||||
log_data[key] = value[:10000]
|
||||
|
||||
self.user_logs_collection.insert_one(log_data)
|
||||
|
||||
# End of stream
|
||||
self.user_logs_collection.insert_one(log_data)
|
||||
|
||||
data = json.dumps({"type": "end"})
|
||||
yield f"data: {data}\n\n"
|
||||
except GeneratorExit:
|
||||
# Client aborted the connection
|
||||
logger.info(
|
||||
f"Stream aborted by client for question: {question[:50]}... "
|
||||
)
|
||||
# Save partial response to database before exiting
|
||||
logger.info(f"Stream aborted by client for question: {question[:50]}... ")
|
||||
# Save partial response
|
||||
if should_save_conversation and response_full:
|
||||
try:
|
||||
if isNoneDoc:
|
||||
@@ -311,7 +322,9 @@ class BaseAnswerResource:
|
||||
attachment_ids=attachment_ids,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving partial response: {str(e)}", exc_info=True)
|
||||
logger.error(
|
||||
f"Error saving partial response: {str(e)}", exc_info=True
|
||||
)
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error in stream: {str(e)}", exc_info=True)
|
||||
|
||||
@@ -60,6 +60,10 @@ class StreamResource(Resource, BaseAnswerResource):
|
||||
"attachments": fields.List(
|
||||
fields.String, required=False, description="List of attachment IDs"
|
||||
),
|
||||
"passthrough": fields.Raw(
|
||||
required=False,
|
||||
description="Dynamic parameters to inject into prompt template",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -73,17 +77,20 @@ class StreamResource(Resource, BaseAnswerResource):
|
||||
processor = StreamProcessor(data, decoded_token)
|
||||
try:
|
||||
processor.initialize()
|
||||
agent = processor.create_agent()
|
||||
retriever = processor.create_retriever()
|
||||
|
||||
docs_together, docs_list = processor.pre_fetch_docs(data["question"])
|
||||
tools_data = processor.pre_fetch_tools()
|
||||
|
||||
agent = processor.create_agent(
|
||||
docs_together=docs_together, docs=docs_list, tools_data=tools_data
|
||||
)
|
||||
|
||||
if error := self.check_usage(processor.agent_config):
|
||||
return error
|
||||
|
||||
return Response(
|
||||
self.complete_stream(
|
||||
question=data["question"],
|
||||
agent=agent,
|
||||
retriever=retriever,
|
||||
conversation_id=processor.conversation_id,
|
||||
user_api_key=processor.agent_config.get("user_api_key"),
|
||||
decoded_token=processor.decoded_token,
|
||||
|
||||
@@ -133,10 +133,9 @@ class ConversationService:
|
||||
|
||||
messages_summary = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Summarise following conversation in no more than 3 "
|
||||
"words, respond ONLY with the summary, use the same "
|
||||
"language as the user query",
|
||||
"role": "system",
|
||||
"content": "You are a helpful assistant that creates concise conversation titles. "
|
||||
"Summarize conversations in 3 words or less using the same language as the user.",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
|
||||
97
application/api/answer/services/prompt_renderer.py
Normal file
97
application/api/answer/services/prompt_renderer.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import logging
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from application.templates.namespaces import NamespaceManager
|
||||
|
||||
from application.templates.template_engine import TemplateEngine, TemplateRenderError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PromptRenderer:
|
||||
"""Service for rendering prompts with dynamic context using namespaces"""
|
||||
|
||||
def __init__(self):
|
||||
self.template_engine = TemplateEngine()
|
||||
self.namespace_manager = NamespaceManager()
|
||||
|
||||
def render_prompt(
|
||||
self,
|
||||
prompt_content: str,
|
||||
user_id: Optional[str] = None,
|
||||
request_id: Optional[str] = None,
|
||||
passthrough_data: Optional[Dict[str, Any]] = None,
|
||||
docs: Optional[list] = None,
|
||||
docs_together: Optional[str] = None,
|
||||
tools_data: Optional[Dict[str, Any]] = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
"""
|
||||
Render prompt with full context from all namespaces.
|
||||
|
||||
Args:
|
||||
prompt_content: Raw prompt template string
|
||||
user_id: Current user identifier
|
||||
request_id: Unique request identifier
|
||||
passthrough_data: Parameters from web request
|
||||
docs: RAG retrieved documents
|
||||
docs_together: Concatenated document content
|
||||
tools_data: Pre-fetched tool results organized by tool name
|
||||
**kwargs: Additional parameters for namespace builders
|
||||
|
||||
Returns:
|
||||
Rendered prompt string with all variables substituted
|
||||
|
||||
Raises:
|
||||
TemplateRenderError: If template rendering fails
|
||||
"""
|
||||
if not prompt_content:
|
||||
return ""
|
||||
|
||||
uses_template = self._uses_template_syntax(prompt_content)
|
||||
|
||||
if not uses_template:
|
||||
return self._apply_legacy_substitutions(prompt_content, docs_together)
|
||||
|
||||
try:
|
||||
context = self.namespace_manager.build_context(
|
||||
user_id=user_id,
|
||||
request_id=request_id,
|
||||
passthrough_data=passthrough_data,
|
||||
docs=docs,
|
||||
docs_together=docs_together,
|
||||
tools_data=tools_data,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
return self.template_engine.render(prompt_content, context)
|
||||
except TemplateRenderError:
|
||||
raise
|
||||
except Exception as e:
|
||||
error_msg = f"Prompt rendering failed: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
raise TemplateRenderError(error_msg) from e
|
||||
|
||||
def _uses_template_syntax(self, prompt_content: str) -> bool:
|
||||
"""Check if prompt uses Jinja2 template syntax"""
|
||||
return "{{" in prompt_content and "}}" in prompt_content
|
||||
|
||||
def _apply_legacy_substitutions(
|
||||
self, prompt_content: str, docs_together: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
Apply backward-compatible substitutions for old prompt format.
|
||||
|
||||
Handles legacy {summaries} and {query} placeholders during transition period.
|
||||
"""
|
||||
if docs_together:
|
||||
prompt_content = prompt_content.replace("{summaries}", docs_together)
|
||||
return prompt_content
|
||||
|
||||
def validate_template(self, prompt_content: str) -> bool:
|
||||
"""Validate prompt template syntax"""
|
||||
return self.template_engine.validate_template(prompt_content)
|
||||
|
||||
def extract_variables(self, prompt_content: str) -> set[str]:
|
||||
"""Extract all variable names from prompt template"""
|
||||
return self.template_engine.extract_variables(prompt_content)
|
||||
@@ -3,7 +3,7 @@ import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict, Optional, Set
|
||||
|
||||
from bson.dbref import DBRef
|
||||
|
||||
@@ -11,10 +11,15 @@ from bson.objectid import ObjectId
|
||||
|
||||
from application.agents.agent_creator import AgentCreator
|
||||
from application.api.answer.services.conversation_service import ConversationService
|
||||
from application.api.answer.services.prompt_renderer import PromptRenderer
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
from application.retriever.retriever_creator import RetrieverCreator
|
||||
from application.utils import get_gpt_model, limit_chat_history
|
||||
from application.utils import (
|
||||
calculate_doc_token_budget,
|
||||
get_gpt_model,
|
||||
limit_chat_history,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -73,12 +78,16 @@ class StreamProcessor:
|
||||
self.all_sources = []
|
||||
self.attachments = []
|
||||
self.history = []
|
||||
self.retrieved_docs = []
|
||||
self.agent_config = {}
|
||||
self.retriever_config = {}
|
||||
self.is_shared_usage = False
|
||||
self.shared_token = None
|
||||
self.gpt_model = get_gpt_model()
|
||||
self.conversation_service = ConversationService()
|
||||
self.prompt_renderer = PromptRenderer()
|
||||
self._prompt_content: Optional[str] = None
|
||||
self._required_tool_actions: Optional[Dict[str, Set[Optional[str]]]] = None
|
||||
|
||||
def initialize(self):
|
||||
"""Initialize all required components for processing"""
|
||||
@@ -311,19 +320,312 @@ class StreamProcessor:
|
||||
)
|
||||
|
||||
def _configure_retriever(self):
|
||||
"""Configure the retriever based on request data"""
|
||||
history_token_limit = int(self.data.get("token_limit", 2000))
|
||||
doc_token_limit = calculate_doc_token_budget(
|
||||
gpt_model=self.gpt_model, history_token_limit=history_token_limit
|
||||
)
|
||||
|
||||
self.retriever_config = {
|
||||
"retriever_name": self.data.get("retriever", "classic"),
|
||||
"chunks": int(self.data.get("chunks", 2)),
|
||||
"token_limit": self.data.get("token_limit", settings.DEFAULT_MAX_HISTORY),
|
||||
"doc_token_limit": doc_token_limit,
|
||||
"history_token_limit": history_token_limit,
|
||||
}
|
||||
|
||||
api_key = self.data.get("api_key") or self.agent_key
|
||||
if not api_key and "isNoneDoc" in self.data and self.data["isNoneDoc"]:
|
||||
self.retriever_config["chunks"] = 0
|
||||
|
||||
def create_agent(self):
|
||||
"""Create and return the configured agent"""
|
||||
def create_retriever(self):
|
||||
return RetrieverCreator.create_retriever(
|
||||
self.retriever_config["retriever_name"],
|
||||
source=self.source,
|
||||
chat_history=self.history,
|
||||
prompt=get_prompt(self.agent_config["prompt_id"], self.prompts_collection),
|
||||
chunks=self.retriever_config["chunks"],
|
||||
doc_token_limit=self.retriever_config.get("doc_token_limit", 50000),
|
||||
gpt_model=self.gpt_model,
|
||||
user_api_key=self.agent_config["user_api_key"],
|
||||
decoded_token=self.decoded_token,
|
||||
)
|
||||
|
||||
def pre_fetch_docs(self, question: str) -> tuple[Optional[str], Optional[list]]:
|
||||
"""Pre-fetch documents for template rendering before agent creation"""
|
||||
if self.data.get("isNoneDoc", False):
|
||||
logger.info("Pre-fetch skipped: isNoneDoc=True")
|
||||
return None, None
|
||||
try:
|
||||
retriever = self.create_retriever()
|
||||
logger.info(
|
||||
f"Pre-fetching docs with chunks={retriever.chunks}, doc_token_limit={retriever.doc_token_limit}"
|
||||
)
|
||||
docs = retriever.search(question)
|
||||
logger.info(f"Pre-fetch retrieved {len(docs) if docs else 0} documents")
|
||||
|
||||
if not docs:
|
||||
logger.info("Pre-fetch: No documents returned from search")
|
||||
return None, None
|
||||
self.retrieved_docs = docs
|
||||
|
||||
docs_with_filenames = []
|
||||
for doc in docs:
|
||||
filename = doc.get("filename") or doc.get("title") or doc.get("source")
|
||||
if filename:
|
||||
chunk_header = str(filename)
|
||||
docs_with_filenames.append(f"{chunk_header}\n{doc['text']}")
|
||||
else:
|
||||
docs_with_filenames.append(doc["text"])
|
||||
docs_together = "\n\n".join(docs_with_filenames)
|
||||
|
||||
logger.info(f"Pre-fetch docs_together size: {len(docs_together)} chars")
|
||||
|
||||
return docs_together, docs
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to pre-fetch docs: {str(e)}", exc_info=True)
|
||||
return None, None
|
||||
|
||||
def pre_fetch_tools(self) -> Optional[Dict[str, Any]]:
|
||||
"""Pre-fetch tool data for template rendering before agent creation
|
||||
|
||||
Can be controlled via:
|
||||
1. Global setting: ENABLE_TOOL_PREFETCH in .env
|
||||
2. Per-request: disable_tool_prefetch in request data
|
||||
"""
|
||||
if not settings.ENABLE_TOOL_PREFETCH:
|
||||
logger.info(
|
||||
"Tool pre-fetching disabled globally via ENABLE_TOOL_PREFETCH setting"
|
||||
)
|
||||
return None
|
||||
|
||||
if self.data.get("disable_tool_prefetch", False):
|
||||
logger.info("Tool pre-fetching disabled for this request")
|
||||
return None
|
||||
|
||||
required_tool_actions = self._get_required_tool_actions()
|
||||
filtering_enabled = required_tool_actions is not None
|
||||
|
||||
try:
|
||||
user_tools_collection = self.db["user_tools"]
|
||||
user_id = self.initial_user_id or "local"
|
||||
|
||||
user_tools = list(
|
||||
user_tools_collection.find({"user": user_id, "status": True})
|
||||
)
|
||||
|
||||
if not user_tools:
|
||||
return None
|
||||
|
||||
tools_data = {}
|
||||
|
||||
for tool_doc in user_tools:
|
||||
tool_name = tool_doc.get("name")
|
||||
tool_id = str(tool_doc.get("_id"))
|
||||
|
||||
if filtering_enabled:
|
||||
required_actions_by_name = required_tool_actions.get(
|
||||
tool_name, set()
|
||||
)
|
||||
required_actions_by_id = required_tool_actions.get(tool_id, set())
|
||||
|
||||
required_actions = required_actions_by_name | required_actions_by_id
|
||||
|
||||
if not required_actions:
|
||||
continue
|
||||
else:
|
||||
required_actions = None
|
||||
|
||||
tool_data = self._fetch_tool_data(tool_doc, required_actions)
|
||||
if tool_data:
|
||||
tools_data[tool_name] = tool_data
|
||||
tools_data[tool_id] = tool_data
|
||||
|
||||
return tools_data if tools_data else None
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to pre-fetch tools: {type(e).__name__}")
|
||||
return None
|
||||
|
||||
def _fetch_tool_data(
|
||||
self,
|
||||
tool_doc: Dict[str, Any],
|
||||
required_actions: Optional[Set[Optional[str]]],
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Fetch and execute tool actions with saved parameters"""
|
||||
try:
|
||||
from application.agents.tools.tool_manager import ToolManager
|
||||
|
||||
tool_name = tool_doc.get("name")
|
||||
tool_config = tool_doc.get("config", {}).copy()
|
||||
tool_config["tool_id"] = str(tool_doc["_id"])
|
||||
|
||||
tool_manager = ToolManager(config={tool_name: tool_config})
|
||||
user_id = self.initial_user_id or "local"
|
||||
tool = tool_manager.load_tool(tool_name, tool_config, user_id=user_id)
|
||||
|
||||
if not tool:
|
||||
logger.debug(f"Tool '{tool_name}' failed to load")
|
||||
return None
|
||||
|
||||
tool_actions = tool.get_actions_metadata()
|
||||
if not tool_actions:
|
||||
logger.debug(f"Tool '{tool_name}' has no actions")
|
||||
return None
|
||||
|
||||
saved_actions = tool_doc.get("actions", [])
|
||||
|
||||
include_all_actions = required_actions is None or (
|
||||
required_actions and None in required_actions
|
||||
)
|
||||
allowed_actions: Set[str] = (
|
||||
{action for action in required_actions if isinstance(action, str)}
|
||||
if required_actions
|
||||
else set()
|
||||
)
|
||||
|
||||
action_results = {}
|
||||
for action_meta in tool_actions:
|
||||
action_name = action_meta.get("name")
|
||||
if action_name is None:
|
||||
continue
|
||||
if (
|
||||
not include_all_actions
|
||||
and allowed_actions
|
||||
and action_name not in allowed_actions
|
||||
):
|
||||
continue
|
||||
|
||||
try:
|
||||
saved_action = None
|
||||
for sa in saved_actions:
|
||||
if sa.get("name") == action_name:
|
||||
saved_action = sa
|
||||
break
|
||||
|
||||
action_params = action_meta.get("parameters", {})
|
||||
properties = action_params.get("properties", {})
|
||||
|
||||
kwargs = {}
|
||||
for param_name, param_spec in properties.items():
|
||||
if saved_action:
|
||||
saved_props = saved_action.get("parameters", {}).get(
|
||||
"properties", {}
|
||||
)
|
||||
if param_name in saved_props:
|
||||
param_value = saved_props[param_name].get("value")
|
||||
if param_value is not None:
|
||||
kwargs[param_name] = param_value
|
||||
continue
|
||||
|
||||
if param_name in tool_config:
|
||||
kwargs[param_name] = tool_config[param_name]
|
||||
elif "default" in param_spec:
|
||||
kwargs[param_name] = param_spec["default"]
|
||||
|
||||
result = tool.execute_action(action_name, **kwargs)
|
||||
action_results[action_name] = result
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"Action '{action_name}' execution failed: {type(e).__name__}"
|
||||
)
|
||||
continue
|
||||
|
||||
return action_results if action_results else None
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Tool pre-fetch failed for '{tool_name}': {type(e).__name__}")
|
||||
return None
|
||||
|
||||
def _get_prompt_content(self) -> Optional[str]:
|
||||
"""Retrieve and cache the raw prompt content for the current agent configuration."""
|
||||
if self._prompt_content is not None:
|
||||
return self._prompt_content
|
||||
prompt_id = (
|
||||
self.agent_config.get("prompt_id")
|
||||
if isinstance(self.agent_config, dict)
|
||||
else None
|
||||
)
|
||||
if not prompt_id:
|
||||
return None
|
||||
try:
|
||||
self._prompt_content = get_prompt(prompt_id, self.prompts_collection)
|
||||
except ValueError as e:
|
||||
logger.debug(f"Invalid prompt ID '{prompt_id}': {str(e)}")
|
||||
self._prompt_content = None
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to fetch prompt '{prompt_id}': {type(e).__name__}")
|
||||
self._prompt_content = None
|
||||
return self._prompt_content
|
||||
|
||||
def _get_required_tool_actions(self) -> Optional[Dict[str, Set[Optional[str]]]]:
|
||||
"""Determine which tool actions are referenced in the prompt template"""
|
||||
if self._required_tool_actions is not None:
|
||||
return self._required_tool_actions
|
||||
|
||||
prompt_content = self._get_prompt_content()
|
||||
if prompt_content is None:
|
||||
return None
|
||||
|
||||
if "{{" not in prompt_content or "}}" not in prompt_content:
|
||||
self._required_tool_actions = {}
|
||||
return self._required_tool_actions
|
||||
|
||||
try:
|
||||
from application.templates.template_engine import TemplateEngine
|
||||
|
||||
template_engine = TemplateEngine()
|
||||
usages = template_engine.extract_tool_usages(prompt_content)
|
||||
self._required_tool_actions = usages
|
||||
return self._required_tool_actions
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to extract tool usages: {type(e).__name__}")
|
||||
self._required_tool_actions = {}
|
||||
return self._required_tool_actions
|
||||
|
||||
def _fetch_memory_tool_data(
|
||||
self, tool_doc: Dict[str, Any]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Fetch memory tool data for pre-injection into prompt"""
|
||||
try:
|
||||
tool_config = tool_doc.get("config", {}).copy()
|
||||
tool_config["tool_id"] = str(tool_doc["_id"])
|
||||
|
||||
from application.agents.tools.memory import MemoryTool
|
||||
|
||||
memory_tool = MemoryTool(tool_config, self.initial_user_id)
|
||||
|
||||
root_view = memory_tool.execute_action("view", path="/")
|
||||
|
||||
if "Error:" in root_view or not root_view.strip():
|
||||
return None
|
||||
|
||||
return {"root": root_view, "available": True}
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch memory tool data: {str(e)}")
|
||||
return None
|
||||
|
||||
def create_agent(
|
||||
self,
|
||||
docs_together: Optional[str] = None,
|
||||
docs: Optional[list] = None,
|
||||
tools_data: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
"""Create and return the configured agent with rendered prompt"""
|
||||
raw_prompt = self._get_prompt_content()
|
||||
if raw_prompt is None:
|
||||
raw_prompt = get_prompt(
|
||||
self.agent_config["prompt_id"], self.prompts_collection
|
||||
)
|
||||
self._prompt_content = raw_prompt
|
||||
|
||||
rendered_prompt = self.prompt_renderer.render_prompt(
|
||||
prompt_content=raw_prompt,
|
||||
user_id=self.initial_user_id,
|
||||
request_id=self.data.get("request_id"),
|
||||
passthrough_data=self.data.get("passthrough"),
|
||||
docs=docs,
|
||||
docs_together=docs_together,
|
||||
tools_data=tools_data,
|
||||
)
|
||||
|
||||
return AgentCreator.create_agent(
|
||||
self.agent_config["agent_type"],
|
||||
endpoint="stream",
|
||||
@@ -331,23 +633,10 @@ class StreamProcessor:
|
||||
gpt_model=self.gpt_model,
|
||||
api_key=settings.API_KEY,
|
||||
user_api_key=self.agent_config["user_api_key"],
|
||||
prompt=get_prompt(self.agent_config["prompt_id"], self.prompts_collection),
|
||||
prompt=rendered_prompt,
|
||||
chat_history=self.history,
|
||||
retrieved_docs=self.retrieved_docs,
|
||||
decoded_token=self.decoded_token,
|
||||
attachments=self.attachments,
|
||||
json_schema=self.agent_config.get("json_schema"),
|
||||
)
|
||||
|
||||
def create_retriever(self):
|
||||
"""Create and return the configured retriever"""
|
||||
return RetrieverCreator.create_retriever(
|
||||
self.retriever_config["retriever_name"],
|
||||
source=self.source,
|
||||
chat_history=self.history,
|
||||
prompt=get_prompt(self.agent_config["prompt_id"], self.prompts_collection),
|
||||
chunks=self.retriever_config["chunks"],
|
||||
token_limit=self.retriever_config["token_limit"],
|
||||
gpt_model=self.gpt_model,
|
||||
user_api_key=self.agent_config["user_api_key"],
|
||||
decoded_token=self.decoded_token,
|
||||
)
|
||||
|
||||
@@ -10,7 +10,6 @@ from flask import current_app, jsonify, make_response, request
|
||||
from flask_restx import fields, Namespace, Resource
|
||||
|
||||
from application.api import api
|
||||
from application.core.settings import settings
|
||||
from application.api.user.base import (
|
||||
agents_collection,
|
||||
db,
|
||||
@@ -20,6 +19,7 @@ from application.api.user.base import (
|
||||
storage,
|
||||
users_collection,
|
||||
)
|
||||
from application.core.settings import settings
|
||||
from application.utils import (
|
||||
check_required_fields,
|
||||
generate_image_url,
|
||||
@@ -76,9 +76,13 @@ class GetAgent(Resource):
|
||||
"status": agent.get("status", ""),
|
||||
"json_schema": agent.get("json_schema"),
|
||||
"limited_token_mode": agent.get("limited_token_mode", False),
|
||||
"token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
|
||||
"token_limit": agent.get(
|
||||
"token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]
|
||||
),
|
||||
"limited_request_mode": agent.get("limited_request_mode", False),
|
||||
"request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
|
||||
"request_limit": agent.get(
|
||||
"request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]
|
||||
),
|
||||
"created_at": agent.get("createdAt", ""),
|
||||
"updated_at": agent.get("updatedAt", ""),
|
||||
"last_used_at": agent.get("lastUsedAt", ""),
|
||||
@@ -149,9 +153,13 @@ class GetAgents(Resource):
|
||||
"status": agent.get("status", ""),
|
||||
"json_schema": agent.get("json_schema"),
|
||||
"limited_token_mode": agent.get("limited_token_mode", False),
|
||||
"token_limit": agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
|
||||
"token_limit": agent.get(
|
||||
"token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]
|
||||
),
|
||||
"limited_request_mode": agent.get("limited_request_mode", False),
|
||||
"request_limit": agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
|
||||
"request_limit": agent.get(
|
||||
"request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]
|
||||
),
|
||||
"created_at": agent.get("createdAt", ""),
|
||||
"updated_at": agent.get("updatedAt", ""),
|
||||
"last_used_at": agent.get("lastUsedAt", ""),
|
||||
@@ -209,21 +217,19 @@ class CreateAgent(Resource):
|
||||
description="JSON schema for enforcing structured output format",
|
||||
),
|
||||
"limited_token_mode": fields.Boolean(
|
||||
required=False,
|
||||
description="Whether the agent is in limited token mode"
|
||||
required=False, description="Whether the agent is in limited token mode"
|
||||
),
|
||||
"token_limit": fields.Integer(
|
||||
required=False,
|
||||
description="Token limit for the agent in limited mode"
|
||||
required=False, description="Token limit for the agent in limited mode"
|
||||
),
|
||||
"limited_request_mode": fields.Boolean(
|
||||
required=False,
|
||||
description="Whether the agent is in limited request mode"
|
||||
description="Whether the agent is in limited request mode",
|
||||
),
|
||||
"request_limit": fields.Integer(
|
||||
required=False,
|
||||
description="Request limit for the agent in limited mode"
|
||||
)
|
||||
description="Request limit for the agent in limited mode",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -369,10 +375,26 @@ class CreateAgent(Resource):
|
||||
"agent_type": data.get("agent_type", ""),
|
||||
"status": data.get("status"),
|
||||
"json_schema": data.get("json_schema"),
|
||||
"limited_token_mode": data.get("limited_token_mode", False),
|
||||
"token_limit": data.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]),
|
||||
"limited_request_mode": data.get("limited_request_mode", False),
|
||||
"request_limit": data.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]),
|
||||
"limited_token_mode": (
|
||||
data.get("limited_token_mode") == "True"
|
||||
if isinstance(data.get("limited_token_mode"), str)
|
||||
else bool(data.get("limited_token_mode", False))
|
||||
),
|
||||
"token_limit": int(
|
||||
data.get(
|
||||
"token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]
|
||||
)
|
||||
),
|
||||
"limited_request_mode": (
|
||||
data.get("limited_request_mode") == "True"
|
||||
if isinstance(data.get("limited_request_mode"), str)
|
||||
else bool(data.get("limited_request_mode", False))
|
||||
),
|
||||
"request_limit": int(
|
||||
data.get(
|
||||
"request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]
|
||||
)
|
||||
),
|
||||
"createdAt": datetime.datetime.now(datetime.timezone.utc),
|
||||
"updatedAt": datetime.datetime.now(datetime.timezone.utc),
|
||||
"lastUsedAt": None,
|
||||
@@ -429,21 +451,19 @@ class UpdateAgent(Resource):
|
||||
description="JSON schema for enforcing structured output format",
|
||||
),
|
||||
"limited_token_mode": fields.Boolean(
|
||||
required=False,
|
||||
description="Whether the agent is in limited token mode"
|
||||
required=False, description="Whether the agent is in limited token mode"
|
||||
),
|
||||
"token_limit": fields.Integer(
|
||||
required=False,
|
||||
description="Token limit for the agent in limited mode"
|
||||
required=False, description="Token limit for the agent in limited mode"
|
||||
),
|
||||
"limited_request_mode": fields.Boolean(
|
||||
require=False,
|
||||
description="Whether the agent is in limited request mode"
|
||||
description="Whether the agent is in limited request mode",
|
||||
),
|
||||
"request_limit": fields.Integer(
|
||||
required=False,
|
||||
description="Request limit for the agent in limited mode"
|
||||
)
|
||||
description="Request limit for the agent in limited mode",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -534,7 +554,7 @@ class UpdateAgent(Resource):
|
||||
"limited_token_mode",
|
||||
"token_limit",
|
||||
"limited_request_mode",
|
||||
"request_limit"
|
||||
"request_limit",
|
||||
]
|
||||
|
||||
for field in allowed_fields:
|
||||
@@ -652,8 +672,15 @@ class UpdateAgent(Resource):
|
||||
else:
|
||||
update_fields[field] = None
|
||||
elif field == "limited_token_mode":
|
||||
is_mode_enabled = data.get("limited_token_mode", False)
|
||||
if is_mode_enabled and data.get("token_limit") is None:
|
||||
raw_value = data.get("limited_token_mode", False)
|
||||
bool_value = (
|
||||
raw_value == "True"
|
||||
if isinstance(raw_value, str)
|
||||
else bool(raw_value)
|
||||
)
|
||||
update_fields[field] = bool_value
|
||||
|
||||
if bool_value and data.get("token_limit") is None:
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
@@ -664,8 +691,15 @@ class UpdateAgent(Resource):
|
||||
400,
|
||||
)
|
||||
elif field == "limited_request_mode":
|
||||
is_mode_enabled = data.get("limited_request_mode", False)
|
||||
if is_mode_enabled and data.get("request_limit") is None:
|
||||
raw_value = data.get("limited_request_mode", False)
|
||||
bool_value = (
|
||||
raw_value == "True"
|
||||
if isinstance(raw_value, str)
|
||||
else bool(raw_value)
|
||||
)
|
||||
update_fields[field] = bool_value
|
||||
|
||||
if bool_value and data.get("request_limit") is None:
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
@@ -677,7 +711,11 @@ class UpdateAgent(Resource):
|
||||
)
|
||||
elif field == "token_limit":
|
||||
token_limit = data.get("token_limit")
|
||||
if token_limit is not None and not data.get("limited_token_mode"):
|
||||
# Convert to int and store
|
||||
update_fields[field] = int(token_limit) if token_limit else 0
|
||||
|
||||
# Validate consistency with mode
|
||||
if update_fields[field] > 0 and not data.get("limited_token_mode"):
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
@@ -689,7 +727,9 @@ class UpdateAgent(Resource):
|
||||
)
|
||||
elif field == "request_limit":
|
||||
request_limit = data.get("request_limit")
|
||||
if request_limit is not None and not data.get("limited_request_mode"):
|
||||
update_fields[field] = int(request_limit) if request_limit else 0
|
||||
|
||||
if update_fields[field] > 0 and not data.get("limited_request_mode"):
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
|
||||
@@ -23,10 +23,18 @@ class Settings(BaseSettings):
|
||||
LLM_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
|
||||
DEFAULT_MAX_HISTORY: int = 150
|
||||
LLM_TOKEN_LIMITS: dict = {
|
||||
"gpt-4o": 128000,
|
||||
"gpt-4o-mini": 128000,
|
||||
"gpt-4": 8192,
|
||||
"gpt-3.5-turbo": 4096,
|
||||
"claude-2": 1e5,
|
||||
"gemini-2.5-flash": 1e6,
|
||||
"claude-2": int(1e5),
|
||||
"gemini-2.5-flash": int(1e6),
|
||||
}
|
||||
DEFAULT_LLM_TOKEN_LIMIT: int = 128000
|
||||
RESERVED_TOKENS: dict = {
|
||||
"system_prompt": 500,
|
||||
"current_query": 500,
|
||||
"safety_buffer": 1000,
|
||||
}
|
||||
DEFAULT_AGENT_LIMITS: dict = {
|
||||
"token_limit": 50000,
|
||||
@@ -133,5 +141,8 @@ class Settings(BaseSettings):
|
||||
TTS_PROVIDER: str = "google_tts" # google_tts or elevenlabs
|
||||
ELEVENLABS_API_KEY: Optional[str] = None
|
||||
|
||||
# Tool pre-fetch settings
|
||||
ENABLE_TOOL_PREFETCH: bool = True
|
||||
|
||||
path = Path(__file__).parent.parent.absolute()
|
||||
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
|
||||
|
||||
@@ -44,6 +44,12 @@ class BaseLLM(ABC):
|
||||
)
|
||||
return self._fallback_llm
|
||||
|
||||
@staticmethod
|
||||
def _remove_null_values(args_dict):
|
||||
if not isinstance(args_dict, dict):
|
||||
return args_dict
|
||||
return {k: v for k, v in args_dict.items() if v is not None}
|
||||
|
||||
def _execute_with_fallback(
|
||||
self, method_name: str, decorators: list, *args, **kwargs
|
||||
):
|
||||
|
||||
@@ -33,14 +33,15 @@ class DocsGPTAPILLM(BaseLLM):
|
||||
{"role": role, "content": item["text"]}
|
||||
)
|
||||
elif "function_call" in item:
|
||||
cleaned_args = self._remove_null_values(
|
||||
item["function_call"]["args"]
|
||||
)
|
||||
tool_call = {
|
||||
"id": item["function_call"]["call_id"],
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": item["function_call"]["name"],
|
||||
"arguments": json.dumps(
|
||||
item["function_call"]["args"]
|
||||
),
|
||||
"arguments": json.dumps(cleaned_args),
|
||||
},
|
||||
}
|
||||
cleaned_messages.append(
|
||||
|
||||
@@ -163,10 +163,14 @@ class GoogleLLM(BaseLLM):
|
||||
if "text" in item:
|
||||
parts.append(types.Part.from_text(text=item["text"]))
|
||||
elif "function_call" in item:
|
||||
# Remove null values from args to avoid API errors
|
||||
cleaned_args = self._remove_null_values(
|
||||
item["function_call"]["args"]
|
||||
)
|
||||
parts.append(
|
||||
types.Part.from_function_call(
|
||||
name=item["function_call"]["name"],
|
||||
args=item["function_call"]["args"],
|
||||
args=cleaned_args,
|
||||
)
|
||||
)
|
||||
elif "function_response" in item:
|
||||
@@ -386,7 +390,7 @@ class GoogleLLM(BaseLLM):
|
||||
elif hasattr(chunk, "text"):
|
||||
yield chunk.text
|
||||
finally:
|
||||
if hasattr(response, 'close'):
|
||||
if hasattr(response, "close"):
|
||||
response.close()
|
||||
|
||||
def _supports_tools(self):
|
||||
|
||||
@@ -44,14 +44,15 @@ class OpenAILLM(BaseLLM):
|
||||
{"role": role, "content": item["text"]}
|
||||
)
|
||||
elif "function_call" in item:
|
||||
cleaned_args = self._remove_null_values(
|
||||
item["function_call"]["args"]
|
||||
)
|
||||
tool_call = {
|
||||
"id": item["function_call"]["call_id"],
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": item["function_call"]["name"],
|
||||
"arguments": json.dumps(
|
||||
item["function_call"]["args"]
|
||||
),
|
||||
"arguments": json.dumps(cleaned_args),
|
||||
},
|
||||
}
|
||||
cleaned_messages.append(
|
||||
@@ -181,7 +182,7 @@ class OpenAILLM(BaseLLM):
|
||||
elif len(line.choices) > 0:
|
||||
yield line.choices[0]
|
||||
finally:
|
||||
if hasattr(response, 'close'):
|
||||
if hasattr(response, "close"):
|
||||
response.close()
|
||||
|
||||
def _supports_tools(self):
|
||||
|
||||
@@ -8,7 +8,3 @@ class BaseRetriever(ABC):
|
||||
@abstractmethod
|
||||
def search(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_params(self):
|
||||
pass
|
||||
|
||||
@@ -4,7 +4,7 @@ import os
|
||||
from application.core.settings import settings
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.retriever.base import BaseRetriever
|
||||
|
||||
from application.utils import num_tokens_from_string
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
|
||||
|
||||
@@ -15,14 +15,13 @@ class ClassicRAG(BaseRetriever):
|
||||
chat_history=None,
|
||||
prompt="",
|
||||
chunks=2,
|
||||
token_limit=150,
|
||||
doc_token_limit=50000,
|
||||
gpt_model="docsgpt",
|
||||
user_api_key=None,
|
||||
llm_name=settings.LLM_PROVIDER,
|
||||
api_key=settings.API_KEY,
|
||||
decoded_token=None,
|
||||
):
|
||||
"""Initialize ClassicRAG retriever with vectorstore sources and LLM configuration"""
|
||||
self.original_question = source.get("question", "")
|
||||
self.chat_history = chat_history if chat_history is not None else []
|
||||
self.prompt = prompt
|
||||
@@ -42,16 +41,7 @@ class ClassicRAG(BaseRetriever):
|
||||
f"sources={'active_docs' in source and source['active_docs'] is not None}"
|
||||
)
|
||||
self.gpt_model = gpt_model
|
||||
self.token_limit = (
|
||||
token_limit
|
||||
if token_limit
|
||||
< settings.LLM_TOKEN_LIMITS.get(
|
||||
self.gpt_model, settings.DEFAULT_MAX_HISTORY
|
||||
)
|
||||
else settings.LLM_TOKEN_LIMITS.get(
|
||||
self.gpt_model, settings.DEFAULT_MAX_HISTORY
|
||||
)
|
||||
)
|
||||
self.doc_token_limit = doc_token_limit
|
||||
self.user_api_key = user_api_key
|
||||
self.llm_name = llm_name
|
||||
self.api_key = api_key
|
||||
@@ -118,21 +108,17 @@ class ClassicRAG(BaseRetriever):
|
||||
return self.original_question
|
||||
|
||||
def _get_data(self):
|
||||
"""Retrieve relevant documents from configured vectorstores"""
|
||||
if self.chunks == 0 or not self.vectorstores:
|
||||
logging.info(
|
||||
f"ClassicRAG._get_data: Skipping retrieval - chunks={self.chunks}, "
|
||||
f"vectorstores_count={len(self.vectorstores) if self.vectorstores else 0}"
|
||||
)
|
||||
return []
|
||||
|
||||
all_docs = []
|
||||
chunks_per_source = max(1, self.chunks // len(self.vectorstores))
|
||||
|
||||
logging.info(
|
||||
f"ClassicRAG._get_data: Starting retrieval with chunks={self.chunks}, "
|
||||
f"vectorstores={self.vectorstores}, chunks_per_source={chunks_per_source}, "
|
||||
f"query='{self.question[:50]}...'"
|
||||
)
|
||||
token_budget = max(int(self.doc_token_limit * 0.9), 100)
|
||||
cumulative_tokens = 0
|
||||
|
||||
for vectorstore_id in self.vectorstores:
|
||||
if vectorstore_id:
|
||||
@@ -140,15 +126,21 @@ class ClassicRAG(BaseRetriever):
|
||||
docsearch = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE, vectorstore_id, settings.EMBEDDINGS_KEY
|
||||
)
|
||||
docs_temp = docsearch.search(self.question, k=chunks_per_source)
|
||||
docs_temp = docsearch.search(
|
||||
self.question, k=max(chunks_per_source * 2, 20)
|
||||
)
|
||||
|
||||
for doc in docs_temp:
|
||||
if cumulative_tokens >= token_budget:
|
||||
break
|
||||
|
||||
if hasattr(doc, "page_content") and hasattr(doc, "metadata"):
|
||||
page_content = doc.page_content
|
||||
metadata = doc.metadata
|
||||
else:
|
||||
page_content = doc.get("text", doc.get("page_content", ""))
|
||||
metadata = doc.get("metadata", {})
|
||||
|
||||
title = metadata.get(
|
||||
"title", metadata.get("post_title", page_content)
|
||||
)
|
||||
@@ -168,23 +160,35 @@ class ClassicRAG(BaseRetriever):
|
||||
if not filename:
|
||||
filename = title
|
||||
source_path = metadata.get("source") or vectorstore_id
|
||||
all_docs.append(
|
||||
{
|
||||
"title": title,
|
||||
"text": page_content,
|
||||
"source": source_path,
|
||||
"filename": filename,
|
||||
}
|
||||
)
|
||||
|
||||
doc_text_with_header = f"{filename}\n{page_content}"
|
||||
doc_tokens = num_tokens_from_string(doc_text_with_header)
|
||||
|
||||
if cumulative_tokens + doc_tokens < token_budget:
|
||||
all_docs.append(
|
||||
{
|
||||
"title": title,
|
||||
"text": page_content,
|
||||
"source": source_path,
|
||||
"filename": filename,
|
||||
}
|
||||
)
|
||||
cumulative_tokens += doc_tokens
|
||||
|
||||
if cumulative_tokens >= token_budget:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logging.error(
|
||||
f"Error searching vectorstore {vectorstore_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
continue
|
||||
|
||||
logging.info(
|
||||
f"ClassicRAG._get_data: Retrieval complete - retrieved {len(all_docs)} documents "
|
||||
f"(requested chunks={self.chunks}, chunks_per_source={chunks_per_source})"
|
||||
f"(requested chunks={self.chunks}, chunks_per_source={chunks_per_source}, "
|
||||
f"cumulative_tokens={cumulative_tokens}/{token_budget})"
|
||||
)
|
||||
return all_docs
|
||||
|
||||
@@ -194,15 +198,3 @@ class ClassicRAG(BaseRetriever):
|
||||
self.original_question = query
|
||||
self.question = self._rephrase_query()
|
||||
return self._get_data()
|
||||
|
||||
def get_params(self):
|
||||
"""Return current retriever configuration parameters"""
|
||||
return {
|
||||
"question": self.original_question,
|
||||
"rephrased_question": self.question,
|
||||
"sources": self.vectorstores,
|
||||
"chunks": self.chunks,
|
||||
"token_limit": self.token_limit,
|
||||
"gpt_model": self.gpt_model,
|
||||
"user_api_key": self.user_api_key,
|
||||
}
|
||||
|
||||
0
application/templates/__init__.py
Normal file
0
application/templates/__init__.py
Normal file
190
application/templates/namespaces.py
Normal file
190
application/templates/namespaces.py
Normal file
@@ -0,0 +1,190 @@
|
||||
import logging
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NamespaceBuilder(ABC):
|
||||
"""Base class for building template context namespaces"""
|
||||
|
||||
@abstractmethod
|
||||
def build(self, **kwargs) -> Dict[str, Any]:
|
||||
"""Build namespace context dictionary"""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def namespace_name(self) -> str:
|
||||
"""Name of this namespace for template access"""
|
||||
pass
|
||||
|
||||
|
||||
class SystemNamespace(NamespaceBuilder):
|
||||
"""System metadata namespace: {{ system.* }}"""
|
||||
|
||||
@property
|
||||
def namespace_name(self) -> str:
|
||||
return "system"
|
||||
|
||||
def build(
|
||||
self, request_id: Optional[str] = None, user_id: Optional[str] = None, **kwargs
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Build system context with metadata.
|
||||
|
||||
Args:
|
||||
request_id: Unique request identifier
|
||||
user_id: Current user identifier
|
||||
|
||||
Returns:
|
||||
Dictionary with system variables
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
return {
|
||||
"date": now.strftime("%Y-%m-%d"),
|
||||
"time": now.strftime("%H:%M:%S"),
|
||||
"timestamp": now.isoformat(),
|
||||
"request_id": request_id or str(uuid.uuid4()),
|
||||
"user_id": user_id,
|
||||
}
|
||||
|
||||
|
||||
class PassthroughNamespace(NamespaceBuilder):
|
||||
"""Request parameters namespace: {{ passthrough.* }}"""
|
||||
|
||||
@property
|
||||
def namespace_name(self) -> str:
|
||||
return "passthrough"
|
||||
|
||||
def build(
|
||||
self, passthrough_data: Optional[Dict[str, Any]] = None, **kwargs
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Build passthrough context from request parameters.
|
||||
|
||||
Args:
|
||||
passthrough_data: Dictionary of parameters from web request
|
||||
|
||||
Returns:
|
||||
Dictionary with passthrough variables
|
||||
"""
|
||||
if not passthrough_data:
|
||||
return {}
|
||||
safe_data = {}
|
||||
for key, value in passthrough_data.items():
|
||||
if isinstance(value, (str, int, float, bool, type(None))):
|
||||
safe_data[key] = value
|
||||
else:
|
||||
logger.warning(
|
||||
f"Skipping non-serializable passthrough value for key '{key}': {type(value)}"
|
||||
)
|
||||
return safe_data
|
||||
|
||||
|
||||
class SourceNamespace(NamespaceBuilder):
|
||||
"""RAG source documents namespace: {{ source.* }}"""
|
||||
|
||||
@property
|
||||
def namespace_name(self) -> str:
|
||||
return "source"
|
||||
|
||||
def build(
|
||||
self, docs: Optional[list] = None, docs_together: Optional[str] = None, **kwargs
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Build source context from RAG retrieval results.
|
||||
|
||||
Args:
|
||||
docs: List of retrieved documents
|
||||
docs_together: Concatenated document content (for backward compatibility)
|
||||
|
||||
Returns:
|
||||
Dictionary with source variables
|
||||
"""
|
||||
context = {}
|
||||
|
||||
if docs:
|
||||
context["documents"] = docs
|
||||
context["count"] = len(docs)
|
||||
if docs_together:
|
||||
context["docs_together"] = docs_together # Add docs_together for custom templates
|
||||
context["content"] = docs_together
|
||||
context["summaries"] = docs_together
|
||||
return context
|
||||
|
||||
|
||||
class ToolsNamespace(NamespaceBuilder):
|
||||
"""Pre-executed tools namespace: {{ tools.* }}"""
|
||||
|
||||
@property
|
||||
def namespace_name(self) -> str:
|
||||
return "tools"
|
||||
|
||||
def build(
|
||||
self, tools_data: Optional[Dict[str, Any]] = None, **kwargs
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Build tools context with pre-executed tool results.
|
||||
|
||||
Args:
|
||||
tools_data: Dictionary of pre-fetched tool results organized by tool name
|
||||
e.g., {"memory": {"notes": "content", "tasks": "list"}}
|
||||
|
||||
Returns:
|
||||
Dictionary with tool results organized by tool name
|
||||
"""
|
||||
if not tools_data:
|
||||
return {}
|
||||
|
||||
safe_data = {}
|
||||
for tool_name, tool_result in tools_data.items():
|
||||
if isinstance(tool_result, (str, dict, list, int, float, bool, type(None))):
|
||||
safe_data[tool_name] = tool_result
|
||||
else:
|
||||
logger.warning(
|
||||
f"Skipping non-serializable tool result for '{tool_name}': {type(tool_result)}"
|
||||
)
|
||||
return safe_data
|
||||
|
||||
|
||||
class NamespaceManager:
|
||||
"""Manages all namespace builders and context assembly"""
|
||||
|
||||
def __init__(self):
|
||||
self._builders = {
|
||||
"system": SystemNamespace(),
|
||||
"passthrough": PassthroughNamespace(),
|
||||
"source": SourceNamespace(),
|
||||
"tools": ToolsNamespace(),
|
||||
}
|
||||
|
||||
def build_context(self, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
Build complete template context from all namespaces.
|
||||
|
||||
Args:
|
||||
**kwargs: Parameters to pass to namespace builders
|
||||
|
||||
Returns:
|
||||
Complete context dictionary for template rendering
|
||||
"""
|
||||
context = {}
|
||||
|
||||
for namespace_name, builder in self._builders.items():
|
||||
try:
|
||||
namespace_context = builder.build(**kwargs)
|
||||
# Always include namespace, even if empty, to prevent undefined errors
|
||||
context[namespace_name] = namespace_context if namespace_context else {}
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to build {namespace_name} namespace: {str(e)}")
|
||||
# Include empty namespace on error to prevent template failures
|
||||
context[namespace_name] = {}
|
||||
return context
|
||||
|
||||
def get_builder(self, namespace_name: str) -> Optional[NamespaceBuilder]:
|
||||
"""Get specific namespace builder"""
|
||||
return self._builders.get(namespace_name)
|
||||
161
application/templates/template_engine.py
Normal file
161
application/templates/template_engine.py
Normal file
@@ -0,0 +1,161 @@
|
||||
import logging
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
from jinja2 import (
|
||||
ChainableUndefined,
|
||||
Environment,
|
||||
nodes,
|
||||
select_autoescape,
|
||||
TemplateSyntaxError,
|
||||
)
|
||||
from jinja2.exceptions import UndefinedError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TemplateRenderError(Exception):
|
||||
"""Raised when template rendering fails"""
|
||||
|
||||
pass
|
||||
|
||||
|
||||
class TemplateEngine:
|
||||
"""Jinja2-based template engine for dynamic prompt rendering"""
|
||||
|
||||
def __init__(self):
|
||||
self._env = Environment(
|
||||
undefined=ChainableUndefined,
|
||||
trim_blocks=True,
|
||||
lstrip_blocks=True,
|
||||
autoescape=select_autoescape(default_for_string=True, default=True),
|
||||
)
|
||||
|
||||
def render(self, template_content: str, context: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Render template with provided context.
|
||||
|
||||
Args:
|
||||
template_content: Raw template string with Jinja2 syntax
|
||||
context: Dictionary of variables to inject into template
|
||||
|
||||
Returns:
|
||||
Rendered template string
|
||||
|
||||
Raises:
|
||||
TemplateRenderError: If template syntax is invalid or variables undefined
|
||||
"""
|
||||
if not template_content:
|
||||
return ""
|
||||
try:
|
||||
template = self._env.from_string(template_content)
|
||||
return template.render(**context)
|
||||
except TemplateSyntaxError as e:
|
||||
error_msg = f"Template syntax error at line {e.lineno}: {e.message}"
|
||||
logger.error(error_msg)
|
||||
raise TemplateRenderError(error_msg) from e
|
||||
except UndefinedError as e:
|
||||
error_msg = f"Undefined variable in template: {e.message}"
|
||||
logger.error(error_msg)
|
||||
raise TemplateRenderError(error_msg) from e
|
||||
except Exception as e:
|
||||
error_msg = f"Template rendering failed: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
raise TemplateRenderError(error_msg) from e
|
||||
|
||||
def validate_template(self, template_content: str) -> bool:
|
||||
"""
|
||||
Validate template syntax without rendering.
|
||||
|
||||
Args:
|
||||
template_content: Template string to validate
|
||||
|
||||
Returns:
|
||||
True if template is syntactically valid
|
||||
"""
|
||||
if not template_content:
|
||||
return True
|
||||
try:
|
||||
self._env.from_string(template_content)
|
||||
return True
|
||||
except TemplateSyntaxError as e:
|
||||
logger.debug(f"Template syntax invalid at line {e.lineno}: {e.message}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.debug(f"Template validation error: {type(e).__name__}: {str(e)}")
|
||||
return False
|
||||
|
||||
def extract_variables(self, template_content: str) -> Set[str]:
|
||||
"""
|
||||
Extract all variable names from template.
|
||||
|
||||
Args:
|
||||
template_content: Template string to analyze
|
||||
|
||||
Returns:
|
||||
Set of variable names found in template
|
||||
"""
|
||||
if not template_content:
|
||||
return set()
|
||||
try:
|
||||
ast = self._env.parse(template_content)
|
||||
return set(self._env.get_template_module(ast).make_module().keys())
|
||||
except TemplateSyntaxError as e:
|
||||
logger.debug(f"Cannot extract variables - syntax error at line {e.lineno}")
|
||||
return set()
|
||||
except Exception as e:
|
||||
logger.debug(f"Cannot extract variables: {type(e).__name__}")
|
||||
return set()
|
||||
|
||||
def extract_tool_usages(
|
||||
self, template_content: str
|
||||
) -> Dict[str, Set[Optional[str]]]:
|
||||
"""Extract tool and action references from a template"""
|
||||
if not template_content:
|
||||
return {}
|
||||
try:
|
||||
ast = self._env.parse(template_content)
|
||||
except TemplateSyntaxError as e:
|
||||
logger.debug(f"extract_tool_usages - syntax error at line {e.lineno}")
|
||||
return {}
|
||||
except Exception as e:
|
||||
logger.debug(f"extract_tool_usages - parse error: {type(e).__name__}")
|
||||
return {}
|
||||
|
||||
usages: Dict[str, Set[Optional[str]]] = {}
|
||||
|
||||
def record(path: List[str]) -> None:
|
||||
if not path:
|
||||
return
|
||||
tool_name = path[0]
|
||||
action_name = path[1] if len(path) > 1 else None
|
||||
if not tool_name:
|
||||
return
|
||||
tool_entry = usages.setdefault(tool_name, set())
|
||||
tool_entry.add(action_name)
|
||||
|
||||
for node in ast.find_all(nodes.Getattr):
|
||||
path = []
|
||||
current = node
|
||||
while isinstance(current, nodes.Getattr):
|
||||
path.append(current.attr)
|
||||
current = current.node
|
||||
if isinstance(current, nodes.Name) and current.name == "tools":
|
||||
path.reverse()
|
||||
record(path)
|
||||
|
||||
for node in ast.find_all(nodes.Getitem):
|
||||
path = []
|
||||
current = node
|
||||
while isinstance(current, nodes.Getitem):
|
||||
key = current.arg
|
||||
if isinstance(key, nodes.Const) and isinstance(key.value, str):
|
||||
path.append(key.value)
|
||||
else:
|
||||
path = []
|
||||
break
|
||||
current = current.node
|
||||
if path and isinstance(current, nodes.Name) and current.name == "tools":
|
||||
path.reverse()
|
||||
record(path)
|
||||
|
||||
return usages
|
||||
@@ -74,6 +74,17 @@ def count_tokens_docs(docs):
|
||||
return tokens
|
||||
|
||||
|
||||
def calculate_doc_token_budget(
|
||||
gpt_model: str = "gpt-4o", history_token_limit: int = 2000
|
||||
) -> int:
|
||||
total_context = settings.LLM_TOKEN_LIMITS.get(
|
||||
gpt_model, settings.DEFAULT_LLM_TOKEN_LIMIT
|
||||
)
|
||||
reserved = sum(settings.RESERVED_TOKENS.values())
|
||||
doc_budget = total_context - history_token_limit - reserved
|
||||
return max(doc_budget, 1000)
|
||||
|
||||
|
||||
def get_missing_fields(data, required_fields):
|
||||
"""Check for missing required fields. Returns list of missing field names."""
|
||||
return [field for field in required_fields if field not in data]
|
||||
@@ -141,8 +152,8 @@ def limit_chat_history(history, max_token_limit=None, gpt_model="docsgpt"):
|
||||
max_token_limit
|
||||
if max_token_limit
|
||||
and max_token_limit
|
||||
< settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
|
||||
else settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
|
||||
< settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_LLM_TOKEN_LIMIT)
|
||||
else settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_LLM_TOKEN_LIMIT)
|
||||
)
|
||||
|
||||
if not history:
|
||||
|
||||
Reference in New Issue
Block a user