From af3e16c4fcc1fb96717e0208d2dffbd3a9a6ebf2 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Wed, 17 Dec 2025 01:34:17 +0000
Subject: [PATCH] fix: count history tokens from chunks, remove old UI setting
 limit (#2196)

---
 application/api/answer/routes/answer.py       |  1 -
 application/api/answer/routes/stream.py       |  1 -
 .../api/answer/services/stream_processor.py   |  4 +--
 application/utils.py                          |  4 +--
 frontend/src/agents/agentPreviewSlice.ts      |  2 --
 .../src/conversation/conversationHandlers.ts  |  6 ----
 .../src/conversation/conversationModels.ts    |  1 -
 .../src/conversation/conversationSlice.ts     |  2 --
 frontend/src/preferences/preferenceSlice.ts   | 20 -----------
 frontend/src/settings/General.tsx             | 36 -------------------
 frontend/src/store.ts                         |  2 --
 tests/api/conftest.py                         |  1 -
 12 files changed, 3 insertions(+), 77 deletions(-)

diff --git a/application/api/answer/routes/answer.py b/application/api/answer/routes/answer.py
index bc7ec58c..e79ea378 100644
--- a/application/api/answer/routes/answer.py
+++ b/application/api/answer/routes/answer.py
@@ -40,7 +40,6 @@ class AnswerResource(Resource, BaseAnswerResource):
             "chunks": fields.Integer(
                 required=False, default=2, description="Number of chunks"
             ),
-            "token_limit": fields.Integer(required=False, description="Token limit"),
             "retriever": fields.String(required=False, description="Retriever type"),
             "api_key": fields.String(required=False, description="API key"),
             "active_docs": fields.String(
diff --git a/application/api/answer/routes/stream.py b/application/api/answer/routes/stream.py
index b2827a93..7583aa0b 100644
--- a/application/api/answer/routes/stream.py
+++ b/application/api/answer/routes/stream.py
@@ -40,7 +40,6 @@ class StreamResource(Resource, BaseAnswerResource):
             "chunks": fields.Integer(
                 required=False, default=2, description="Number of chunks"
             ),
-            "token_limit": fields.Integer(required=False, description="Token limit"),
             "retriever": fields.String(required=False, description="Retriever type"),
             "api_key": fields.String(required=False, description="API key"),
             "active_docs": fields.String(
diff --git a/application/api/answer/services/stream_processor.py b/application/api/answer/services/stream_processor.py
index 912aff65..cf5fc1f9 100644
--- a/application/api/answer/services/stream_processor.py
+++ b/application/api/answer/services/stream_processor.py
@@ -420,16 +420,14 @@ class StreamProcessor:
             )
 
     def _configure_retriever(self):
-        history_token_limit = int(self.data.get("token_limit", 2000))
         doc_token_limit = calculate_doc_token_budget(
-            model_id=self.model_id, history_token_limit=history_token_limit
+            model_id=self.model_id
         )
 
         self.retriever_config = {
             "retriever_name": self.data.get("retriever", "classic"),
             "chunks": int(self.data.get("chunks", 2)),
             "doc_token_limit": doc_token_limit,
-            "history_token_limit": history_token_limit,
         }
 
         api_key = self.data.get("api_key") or self.agent_key
diff --git a/application/utils.py b/application/utils.py
index b25c4717..35b61036 100644
--- a/application/utils.py
+++ b/application/utils.py
@@ -77,11 +77,11 @@ def count_tokens_docs(docs):
 
 
 def calculate_doc_token_budget(
-    model_id: str = "gpt-4o", history_token_limit: int = 2000
+    model_id: str = "gpt-4o"
 ) -> int:
     total_context = get_token_limit(model_id)
     reserved = sum(settings.RESERVED_TOKENS.values())
-    doc_budget = total_context - history_token_limit - reserved
+    doc_budget = total_context - reserved
     return max(doc_budget, 1000)
 
 
diff --git a/frontend/src/agents/agentPreviewSlice.ts b/frontend/src/agents/agentPreviewSlice.ts
index 3e601229..d765b789 100644
--- a/frontend/src/agents/agentPreviewSlice.ts
+++ b/frontend/src/agents/agentPreviewSlice.ts
@@ -65,7 +65,6 @@ export const fetchPreviewAnswer = createAsyncThunk<
           null, // No conversation ID for previews
           state.preference.prompt.id,
           state.preference.chunks,
-          state.preference.token_limit,
           (event: MessageEvent) => {
             const data = JSON.parse(event.data);
             const targetIndex = indx ?? state.agentPreview.queries.length - 1;
@@ -136,7 +135,6 @@ export const fetchPreviewAnswer = createAsyncThunk<
           null,
           state.preference.prompt.id,
           state.preference.chunks,
-          state.preference.token_limit,
           state.preference.selectedAgent?.id,
           attachmentIds,
           false,
diff --git a/frontend/src/conversation/conversationHandlers.ts b/frontend/src/conversation/conversationHandlers.ts
index b6222150..e55952fe 100644
--- a/frontend/src/conversation/conversationHandlers.ts
+++ b/frontend/src/conversation/conversationHandlers.ts
@@ -11,7 +11,6 @@ export function handleFetchAnswer(
   conversationId: string | null,
   promptId: string | null,
   chunks: string,
-  token_limit: number,
   agentId?: string,
   attachments?: string[],
   save_conversation = true,
@@ -42,7 +41,6 @@ export function handleFetchAnswer(
     conversation_id: conversationId,
     prompt_id: promptId,
     chunks: chunks,
-    token_limit: token_limit,
     isNoneDoc: selectedDocs.length === 0,
     agent_id: agentId,
     save_conversation: save_conversation,
@@ -100,7 +98,6 @@ export function handleFetchAnswerSteaming(
   conversationId: string | null,
   promptId: string | null,
   chunks: string,
-  token_limit: number,
   onEvent: (event: MessageEvent) => void,
   indx?: number,
   agentId?: string,
@@ -113,7 +110,6 @@ export function handleFetchAnswerSteaming(
     conversation_id: conversationId,
     prompt_id: promptId,
     chunks: chunks,
-    token_limit: token_limit,
     isNoneDoc: selectedDocs.length === 0,
     index: indx,
     agent_id: agentId,
@@ -198,13 +194,11 @@ export function handleSearch(
   selectedDocs: Doc[],
   conversation_id: string | null,
   chunks: string,
-  token_limit: number,
 ) {
   const payload: RetrievalPayload = {
     question: question,
     conversation_id: conversation_id,
     chunks: chunks,
-    token_limit: token_limit,
     isNoneDoc: selectedDocs.length === 0,
   };
   if (selectedDocs.length > 0) {
diff --git a/frontend/src/conversation/conversationModels.ts b/frontend/src/conversation/conversationModels.ts
index cbcb644e..904683a7 100644
--- a/frontend/src/conversation/conversationModels.ts
+++ b/frontend/src/conversation/conversationModels.ts
@@ -59,7 +59,6 @@ export interface RetrievalPayload {
   conversation_id: string | null;
   prompt_id?: string | null;
   chunks: string;
-  token_limit: number;
   isNoneDoc: boolean;
   index?: number;
   agent_id?: string;
diff --git a/frontend/src/conversation/conversationSlice.ts b/frontend/src/conversation/conversationSlice.ts
index 8b4eea0e..0298d78c 100644
--- a/frontend/src/conversation/conversationSlice.ts
+++ b/frontend/src/conversation/conversationSlice.ts
@@ -63,7 +63,6 @@ export const fetchAnswer = createAsyncThunk<
         currentConversationId,
         state.preference.prompt.id,
         state.preference.chunks,
-        state.preference.token_limit,
         (event) => {
           const data = JSON.parse(event.data);
           const targetIndex = indx ?? state.conversation.queries.length - 1;
@@ -171,7 +170,6 @@ export const fetchAnswer = createAsyncThunk<
         state.conversation.conversationId,
         state.preference.prompt.id,
         state.preference.chunks,
-        state.preference.token_limit,
         state.preference.selectedAgent?.id,
         attachmentIds,
         true,
diff --git a/frontend/src/preferences/preferenceSlice.ts b/frontend/src/preferences/preferenceSlice.ts
index 2bd5faea..89239b30 100644
--- a/frontend/src/preferences/preferenceSlice.ts
+++ b/frontend/src/preferences/preferenceSlice.ts
@@ -21,7 +21,6 @@ export interface Preference {
   prompt: { name: string; id: string; type: string };
   prompts: Prompt[];
   chunks: string;
-  token_limit: number;
   selectedDocs: Doc[];
   sourceDocs: Doc[] | null;
   conversations: {
@@ -49,7 +48,6 @@ const initialState: Preference = {
     { name: 'strict', id: 'strict', type: 'public' },
   ],
   chunks: '2',
-  token_limit: 2000,
   selectedDocs: [
     {
       id: 'default',
@@ -108,9 +106,6 @@ export const prefSlice = createSlice({
     setChunks: (state, action) => {
       state.chunks = action.payload;
     },
-    setTokenLimit: (state, action) => {
-      state.token_limit = action.payload;
-    },
     setModalStateDeleteConv: (state, action: PayloadAction<ActiveState>) => {
       state.modalState = action.payload;
     },
@@ -147,7 +142,6 @@ export const {
   setPrompt,
   setPrompts,
   setChunks,
-  setTokenLimit,
   setModalStateDeleteConv,
   setPaginatedDocuments,
   setTemplateAgents,
@@ -200,18 +194,6 @@ prefListenerMiddleware.startListening({
   },
 });
 
-prefListenerMiddleware.startListening({
-  matcher: isAnyOf(setTokenLimit),
-  effect: (action, listenerApi) => {
-    localStorage.setItem(
-      'DocsGPTTokenLimit',
-      JSON.stringify(
-        (listenerApi.getState() as RootState).preference.token_limit,
-      ),
-    );
-  },
-});
-
 prefListenerMiddleware.startListening({
   matcher: isAnyOf(setSourceDocs),
   effect: (_action, listenerApi) => {
@@ -281,8 +263,6 @@ export const selectToken = (state: RootState) => state.preference.token;
 export const selectPrompt = (state: RootState) => state.preference.prompt;
 export const selectPrompts = (state: RootState) => state.preference.prompts;
 export const selectChunks = (state: RootState) => state.preference.chunks;
-export const selectTokenLimit = (state: RootState) =>
-  state.preference.token_limit;
 export const selectPaginatedDocuments = (state: RootState) =>
   state.preference.paginatedDocuments;
 export const selectTemplateAgents = (state: RootState) =>
diff --git a/frontend/src/settings/General.tsx b/frontend/src/settings/General.tsx
index 8b2c53cc..74479832 100644
--- a/frontend/src/settings/General.tsx
+++ b/frontend/src/settings/General.tsx
@@ -8,12 +8,10 @@ import {
   selectChunks,
   selectPrompt,
   selectPrompts,
-  selectTokenLimit,
   setChunks,
   setModalStateDeleteConv,
   setPrompt,
   setPrompts,
-  setTokenLimit,
 } from '../preferences/preferenceSlice';
 import Prompts from './Prompts';
 
@@ -37,17 +35,8 @@ export default function General() {
     { label: 'Русский', value: 'ru' },
   ];
   const chunks = ['0', '2', '4', '6', '8', '10'];
-  const token_limits = new Map([
-    [0, t('settings.general.none')],
-    [100, t('settings.general.low')],
-    [1000, t('settings.general.medium')],
-    [2000, t('settings.general.default')],
-    [4000, t('settings.general.high')],
-    [1e9, t('settings.general.unlimited')],
-  ]);
   const prompts = useSelector(selectPrompts);
   const selectedChunks = useSelector(selectChunks);
-  const selectedTokenLimit = useSelector(selectTokenLimit);
   const [isDarkTheme, toggleTheme] = useDarkTheme();
   const [selectedTheme, setSelectedTheme] = React.useState(
     isDarkTheme ? 'Dark' : 'Light',
@@ -118,31 +107,6 @@ export default function General() {
           border="border"
         />
       </div>
-      <div className="flex flex-col gap-4">
-        <label className="text-jet dark:text-bright-gray text-base font-medium">
-          {t('settings.general.convHistory')}
-        </label>
-        <Dropdown
-          options={Array.from(token_limits, ([value, desc]) => ({
-            value: value,
-            description: desc,
-          }))}
-          selectedValue={{
-            value: selectedTokenLimit,
-            description: token_limits.get(selectedTokenLimit) as string,
-          }}
-          onSelect={({
-            value,
-            description,
-          }: {
-            value: number;
-            description: string;
-          }) => dispatch(setTokenLimit(value))}
-          size="w-56"
-          rounded="3xl"
-          border="border"
-        />
-      </div>
       <div className="flex flex-col gap-4">
         <Prompts
           prompts={prompts}
diff --git a/frontend/src/store.ts b/frontend/src/store.ts
index a3092c82..68100698 100644
--- a/frontend/src/store.ts
+++ b/frontend/src/store.ts
@@ -13,7 +13,6 @@ import uploadReducer from './upload/uploadSlice';
 const key = localStorage.getItem('DocsGPTApiKey');
 const prompt = localStorage.getItem('DocsGPTPrompt');
 const chunks = localStorage.getItem('DocsGPTChunks');
-const token_limit = localStorage.getItem('DocsGPTTokenLimit');
 const doc = localStorage.getItem('DocsGPTRecentDocs');
 const selectedModel = localStorage.getItem('DocsGPTSelectedModel');
 
@@ -31,7 +30,6 @@ const preloadedState: { preference: Preference } = {
       { name: 'strict', id: 'strict', type: 'public' },
     ],
     chunks: JSON.parse(chunks ?? '2').toString(),
-    token_limit: token_limit ? parseInt(token_limit) : 2000,
     selectedDocs: doc !== null ? JSON.parse(doc) : [],
     conversations: {
       data: null,
diff --git a/tests/api/conftest.py b/tests/api/conftest.py
index 2c98b14f..60223a5d 100644
--- a/tests/api/conftest.py
+++ b/tests/api/conftest.py
@@ -73,7 +73,6 @@ def sample_answer_request():
         "conversation_id": None,
         "prompt_id": "default",
         "chunks": 2,
-        "token_limit": 1000,
         "retriever": "classic_rag",
         "active_docs": "local/test/",
         "isNoneDoc": False,