mirror of
https://github.com/arc53/DocsGPT.git
synced 2026-01-20 05:50:58 +00:00
fix: count history tokens from chunks, remove old UI setting limit (#2196)
This commit is contained in:
@@ -40,7 +40,6 @@ class AnswerResource(Resource, BaseAnswerResource):
|
||||
"chunks": fields.Integer(
|
||||
required=False, default=2, description="Number of chunks"
|
||||
),
|
||||
"token_limit": fields.Integer(required=False, description="Token limit"),
|
||||
"retriever": fields.String(required=False, description="Retriever type"),
|
||||
"api_key": fields.String(required=False, description="API key"),
|
||||
"active_docs": fields.String(
|
||||
|
||||
@@ -40,7 +40,6 @@ class StreamResource(Resource, BaseAnswerResource):
|
||||
"chunks": fields.Integer(
|
||||
required=False, default=2, description="Number of chunks"
|
||||
),
|
||||
"token_limit": fields.Integer(required=False, description="Token limit"),
|
||||
"retriever": fields.String(required=False, description="Retriever type"),
|
||||
"api_key": fields.String(required=False, description="API key"),
|
||||
"active_docs": fields.String(
|
||||
|
||||
@@ -420,16 +420,14 @@ class StreamProcessor:
|
||||
)
|
||||
|
||||
def _configure_retriever(self):
|
||||
history_token_limit = int(self.data.get("token_limit", 2000))
|
||||
doc_token_limit = calculate_doc_token_budget(
|
||||
model_id=self.model_id, history_token_limit=history_token_limit
|
||||
model_id=self.model_id
|
||||
)
|
||||
|
||||
self.retriever_config = {
|
||||
"retriever_name": self.data.get("retriever", "classic"),
|
||||
"chunks": int(self.data.get("chunks", 2)),
|
||||
"doc_token_limit": doc_token_limit,
|
||||
"history_token_limit": history_token_limit,
|
||||
}
|
||||
|
||||
api_key = self.data.get("api_key") or self.agent_key
|
||||
|
||||
@@ -77,11 +77,11 @@ def count_tokens_docs(docs):
|
||||
|
||||
|
||||
def calculate_doc_token_budget(
|
||||
model_id: str = "gpt-4o", history_token_limit: int = 2000
|
||||
model_id: str = "gpt-4o"
|
||||
) -> int:
|
||||
total_context = get_token_limit(model_id)
|
||||
reserved = sum(settings.RESERVED_TOKENS.values())
|
||||
doc_budget = total_context - history_token_limit - reserved
|
||||
doc_budget = total_context - reserved
|
||||
return max(doc_budget, 1000)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user