fix: count history tokens from chunks, remove old UI setting limit (#2196)

2026-03-07 22:33:36 +00:00 · 2025-12-17 01:34:17 +00:00
parent aacf281222
commit af3e16c4fc
12 changed files with 3 additions and 77 deletions
--- a/application/api/answer/routes/answer.py
+++ b/application/api/answer/routes/answer.py
@@ -40,7 +40,6 @@ class AnswerResource(Resource, BaseAnswerResource):
            "chunks": fields.Integer(
                required=False, default=2, description="Number of chunks"
            ),
-            "token_limit": fields.Integer(required=False, description="Token limit"),
            "retriever": fields.String(required=False, description="Retriever type"),
            "api_key": fields.String(required=False, description="API key"),
            "active_docs": fields.String(
--- a/application/api/answer/routes/stream.py
+++ b/application/api/answer/routes/stream.py
@@ -40,7 +40,6 @@ class StreamResource(Resource, BaseAnswerResource):
            "chunks": fields.Integer(
                required=False, default=2, description="Number of chunks"
            ),
-            "token_limit": fields.Integer(required=False, description="Token limit"),
            "retriever": fields.String(required=False, description="Retriever type"),
            "api_key": fields.String(required=False, description="API key"),
            "active_docs": fields.String(
--- a/application/api/answer/services/stream_processor.py
+++ b/application/api/answer/services/stream_processor.py
@@ -420,16 +420,14 @@ class StreamProcessor:
            )

    def _configure_retriever(self):
-        history_token_limit = int(self.data.get("token_limit", 2000))
        doc_token_limit = calculate_doc_token_budget(
-            model_id=self.model_id, history_token_limit=history_token_limit
+            model_id=self.model_id
        )

        self.retriever_config = {
            "retriever_name": self.data.get("retriever", "classic"),
            "chunks": int(self.data.get("chunks", 2)),
            "doc_token_limit": doc_token_limit,
-            "history_token_limit": history_token_limit,
        }

        api_key = self.data.get("api_key") or self.agent_key
--- a/application/utils.py
+++ b/application/utils.py
@@ -77,11 +77,11 @@ def count_tokens_docs(docs):


 def calculate_doc_token_budget(
-    model_id: str = "gpt-4o", history_token_limit: int = 2000
+    model_id: str = "gpt-4o"
 ) -> int:
    total_context = get_token_limit(model_id)
    reserved = sum(settings.RESERVED_TOKENS.values())
-    doc_budget = total_context - history_token_limit - reserved
+    doc_budget = total_context - reserved
    return max(doc_budget, 1000)