feat: template-based prompt rendering with dynamic namespace injection (#2091)

* feat: template-based prompt rendering with dynamic namespace injection

* refactor: improve template engine initialization with clearer formatting

* refactor: streamline ReActAgent methods and improve content extraction logic

feat: enhance error handling in NamespaceManager and TemplateEngine

fix: update NewAgent component to ensure consistent form data submission

test: modify tests for ReActAgent and prompt renderer to reflect method changes and improve coverage

* feat: tools namespace + three-tier token budget

* refactor: remove unused variable assignment in message building tests

* Enhance prompt customization and tool pre-fetching functionality

* ruff lint fix

* refactor: cleaner error handling and reduce code clutter

---------

Co-authored-by: Alex <a@tushynski.me>
This commit is contained in:
Siddhant Rai
2025-10-31 18:17:44 +05:30
committed by GitHub
parent a7d61b9d59
commit 21e5c261ef
33 changed files with 2917 additions and 646 deletions

View File

@@ -3,7 +3,7 @@ import json
import logging
from typing import Any, Dict, Generator, List, Optional
from flask import Response, make_response, jsonify
from flask import jsonify, make_response, Response
from flask_restx import Namespace
from application.api.answer.services.conversation_service import ConversationService
@@ -41,9 +41,7 @@ class BaseAnswerResource:
return missing_fields
return None
def check_usage(
self, agent_config: Dict
) -> Optional[Response]:
def check_usage(self, agent_config: Dict) -> Optional[Response]:
"""Check if there is a usage limit and if it is exceeded
Args:
@@ -51,30 +49,40 @@ class BaseAnswerResource:
Returns:
None or Response if either of limits exceeded.
"""
api_key = agent_config.get("user_api_key")
if not api_key:
return None
agents_collection = self.db["agents"]
agent = agents_collection.find_one({"key": api_key})
if not agent:
return make_response(
jsonify(
{
"success": False,
"message": "Invalid API key."
}
),
401
jsonify({"success": False, "message": "Invalid API key."}), 401
)
limited_token_mode = agent.get("limited_token_mode", False)
limited_request_mode = agent.get("limited_request_mode", False)
token_limit = int(agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"]))
request_limit = int(agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"]))
limited_token_mode_raw = agent.get("limited_token_mode", False)
limited_request_mode_raw = agent.get("limited_request_mode", False)
limited_token_mode = (
limited_token_mode_raw
if isinstance(limited_token_mode_raw, bool)
else limited_token_mode_raw == "True"
)
limited_request_mode = (
limited_request_mode_raw
if isinstance(limited_request_mode_raw, bool)
else limited_request_mode_raw == "True"
)
token_limit = int(
agent.get("token_limit", settings.DEFAULT_AGENT_LIMITS["token_limit"])
)
request_limit = int(
agent.get("request_limit", settings.DEFAULT_AGENT_LIMITS["request_limit"])
)
token_usage_collection = self.db["token_usage"]
@@ -83,18 +91,20 @@ class BaseAnswerResource:
match_query = {
"timestamp": {"$gte": start_date, "$lte": end_date},
"api_key": api_key
"api_key": api_key,
}
if limited_token_mode:
token_pipeline = [
{"$match": match_query},
{
"$group": {
"_id": None,
"total_tokens": {"$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}}
"total_tokens": {
"$sum": {"$add": ["$prompt_tokens", "$generated_tokens"]}
},
}
}
},
]
token_result = list(token_usage_collection.aggregate(token_pipeline))
daily_token_usage = token_result[0]["total_tokens"] if token_result else 0
@@ -108,26 +118,33 @@ class BaseAnswerResource:
if not limited_token_mode and not limited_request_mode:
return None
elif limited_token_mode and token_limit > daily_token_usage:
return None
elif limited_request_mode and request_limit > daily_request_usage:
return None
return make_response(
jsonify(
{
"success": False,
"message": "Exceeding usage limit, please try again later."
}
),
429, # too many requests
token_exceeded = (
limited_token_mode and token_limit > 0 and daily_token_usage >= token_limit
)
request_exceeded = (
limited_request_mode
and request_limit > 0
and daily_request_usage >= request_limit
)
if token_exceeded or request_exceeded:
return make_response(
jsonify(
{
"success": False,
"message": "Exceeding usage limit, please try again later.",
}
),
429,
)
return None
def complete_stream(
self,
question: str,
agent: Any,
retriever: Any,
conversation_id: Optional[str],
user_api_key: Optional[str],
decoded_token: Dict[str, Any],
@@ -156,6 +173,7 @@ class BaseAnswerResource:
agent_id: ID of agent used
is_shared_usage: Flag for shared agent usage
shared_token: Token for shared agent
retrieved_docs: Pre-fetched documents for sources (optional)
Yields:
Server-sent event strings
@@ -166,7 +184,7 @@ class BaseAnswerResource:
schema_info = None
structured_chunks = []
for line in agent.gen(query=question, retriever=retriever):
for line in agent.gen(query=question):
if "answer" in line:
response_full += str(line["answer"])
if line.get("structured"):
@@ -247,7 +265,6 @@ class BaseAnswerResource:
data = json.dumps(id_data)
yield f"data: {data}\n\n"
retriever_params = retriever.get_params()
log_data = {
"action": "stream_answer",
"level": "info",
@@ -256,7 +273,6 @@ class BaseAnswerResource:
"question": question,
"response": response_full,
"sources": source_log_docs,
"retriever_params": retriever_params,
"attachments": attachment_ids,
"timestamp": datetime.datetime.now(datetime.timezone.utc),
}
@@ -264,24 +280,19 @@ class BaseAnswerResource:
log_data["structured_output"] = True
if schema_info:
log_data["schema"] = schema_info
# clean up text fields to be no longer than 10000 characters
# Clean up text fields to be no longer than 10000 characters
for key, value in log_data.items():
if isinstance(value, str) and len(value) > 10000:
log_data[key] = value[:10000]
self.user_logs_collection.insert_one(log_data)
# End of stream
self.user_logs_collection.insert_one(log_data)
data = json.dumps({"type": "end"})
yield f"data: {data}\n\n"
except GeneratorExit:
# Client aborted the connection
logger.info(
f"Stream aborted by client for question: {question[:50]}... "
)
# Save partial response to database before exiting
logger.info(f"Stream aborted by client for question: {question[:50]}... ")
# Save partial response
if should_save_conversation and response_full:
try:
if isNoneDoc:
@@ -311,7 +322,9 @@ class BaseAnswerResource:
attachment_ids=attachment_ids,
)
except Exception as e:
logger.error(f"Error saving partial response: {str(e)}", exc_info=True)
logger.error(
f"Error saving partial response: {str(e)}", exc_info=True
)
raise
except Exception as e:
logger.error(f"Error in stream: {str(e)}", exc_info=True)