From af3e16c4fcc1fb96717e0208d2dffbd3a9a6ebf2 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 17 Dec 2025 01:34:17 +0000 Subject: [PATCH] fix: count history tokens from chunks, remove old UI setting limit (#2196) --- application/api/answer/routes/answer.py | 1 - application/api/answer/routes/stream.py | 1 - .../api/answer/services/stream_processor.py | 4 +-- application/utils.py | 4 +-- frontend/src/agents/agentPreviewSlice.ts | 2 -- .../src/conversation/conversationHandlers.ts | 6 ---- .../src/conversation/conversationModels.ts | 1 - .../src/conversation/conversationSlice.ts | 2 -- frontend/src/preferences/preferenceSlice.ts | 20 ----------- frontend/src/settings/General.tsx | 36 ------------------- frontend/src/store.ts | 2 -- tests/api/conftest.py | 1 - 12 files changed, 3 insertions(+), 77 deletions(-) diff --git a/application/api/answer/routes/answer.py b/application/api/answer/routes/answer.py index bc7ec58c..e79ea378 100644 --- a/application/api/answer/routes/answer.py +++ b/application/api/answer/routes/answer.py @@ -40,7 +40,6 @@ class AnswerResource(Resource, BaseAnswerResource): "chunks": fields.Integer( required=False, default=2, description="Number of chunks" ), - "token_limit": fields.Integer(required=False, description="Token limit"), "retriever": fields.String(required=False, description="Retriever type"), "api_key": fields.String(required=False, description="API key"), "active_docs": fields.String( diff --git a/application/api/answer/routes/stream.py b/application/api/answer/routes/stream.py index b2827a93..7583aa0b 100644 --- a/application/api/answer/routes/stream.py +++ b/application/api/answer/routes/stream.py @@ -40,7 +40,6 @@ class StreamResource(Resource, BaseAnswerResource): "chunks": fields.Integer( required=False, default=2, description="Number of chunks" ), - "token_limit": fields.Integer(required=False, description="Token limit"), "retriever": fields.String(required=False, description="Retriever type"), "api_key": fields.String(required=False, description="API key"), "active_docs": fields.String( diff --git a/application/api/answer/services/stream_processor.py b/application/api/answer/services/stream_processor.py index 912aff65..cf5fc1f9 100644 --- a/application/api/answer/services/stream_processor.py +++ b/application/api/answer/services/stream_processor.py @@ -420,16 +420,14 @@ class StreamProcessor: ) def _configure_retriever(self): - history_token_limit = int(self.data.get("token_limit", 2000)) doc_token_limit = calculate_doc_token_budget( - model_id=self.model_id, history_token_limit=history_token_limit + model_id=self.model_id ) self.retriever_config = { "retriever_name": self.data.get("retriever", "classic"), "chunks": int(self.data.get("chunks", 2)), "doc_token_limit": doc_token_limit, - "history_token_limit": history_token_limit, } api_key = self.data.get("api_key") or self.agent_key diff --git a/application/utils.py b/application/utils.py index b25c4717..35b61036 100644 --- a/application/utils.py +++ b/application/utils.py @@ -77,11 +77,11 @@ def count_tokens_docs(docs): def calculate_doc_token_budget( - model_id: str = "gpt-4o", history_token_limit: int = 2000 + model_id: str = "gpt-4o" ) -> int: total_context = get_token_limit(model_id) reserved = sum(settings.RESERVED_TOKENS.values()) - doc_budget = total_context - history_token_limit - reserved + doc_budget = total_context - reserved return max(doc_budget, 1000) diff --git a/frontend/src/agents/agentPreviewSlice.ts b/frontend/src/agents/agentPreviewSlice.ts index 3e601229..d765b789 100644 --- a/frontend/src/agents/agentPreviewSlice.ts +++ b/frontend/src/agents/agentPreviewSlice.ts @@ -65,7 +65,6 @@ export const fetchPreviewAnswer = createAsyncThunk< null, // No conversation ID for previews state.preference.prompt.id, state.preference.chunks, - state.preference.token_limit, (event: MessageEvent) => { const data = JSON.parse(event.data); const targetIndex = indx ?? state.agentPreview.queries.length - 1; @@ -136,7 +135,6 @@ export const fetchPreviewAnswer = createAsyncThunk< null, state.preference.prompt.id, state.preference.chunks, - state.preference.token_limit, state.preference.selectedAgent?.id, attachmentIds, false, diff --git a/frontend/src/conversation/conversationHandlers.ts b/frontend/src/conversation/conversationHandlers.ts index b6222150..e55952fe 100644 --- a/frontend/src/conversation/conversationHandlers.ts +++ b/frontend/src/conversation/conversationHandlers.ts @@ -11,7 +11,6 @@ export function handleFetchAnswer( conversationId: string | null, promptId: string | null, chunks: string, - token_limit: number, agentId?: string, attachments?: string[], save_conversation = true, @@ -42,7 +41,6 @@ export function handleFetchAnswer( conversation_id: conversationId, prompt_id: promptId, chunks: chunks, - token_limit: token_limit, isNoneDoc: selectedDocs.length === 0, agent_id: agentId, save_conversation: save_conversation, @@ -100,7 +98,6 @@ export function handleFetchAnswerSteaming( conversationId: string | null, promptId: string | null, chunks: string, - token_limit: number, onEvent: (event: MessageEvent) => void, indx?: number, agentId?: string, @@ -113,7 +110,6 @@ export function handleFetchAnswerSteaming( conversation_id: conversationId, prompt_id: promptId, chunks: chunks, - token_limit: token_limit, isNoneDoc: selectedDocs.length === 0, index: indx, agent_id: agentId, @@ -198,13 +194,11 @@ export function handleSearch( selectedDocs: Doc[], conversation_id: string | null, chunks: string, - token_limit: number, ) { const payload: RetrievalPayload = { question: question, conversation_id: conversation_id, chunks: chunks, - token_limit: token_limit, isNoneDoc: selectedDocs.length === 0, }; if (selectedDocs.length > 0) { diff --git a/frontend/src/conversation/conversationModels.ts b/frontend/src/conversation/conversationModels.ts index cbcb644e..904683a7 100644 --- a/frontend/src/conversation/conversationModels.ts +++ b/frontend/src/conversation/conversationModels.ts @@ -59,7 +59,6 @@ export interface RetrievalPayload { conversation_id: string | null; prompt_id?: string | null; chunks: string; - token_limit: number; isNoneDoc: boolean; index?: number; agent_id?: string; diff --git a/frontend/src/conversation/conversationSlice.ts b/frontend/src/conversation/conversationSlice.ts index 8b4eea0e..0298d78c 100644 --- a/frontend/src/conversation/conversationSlice.ts +++ b/frontend/src/conversation/conversationSlice.ts @@ -63,7 +63,6 @@ export const fetchAnswer = createAsyncThunk< currentConversationId, state.preference.prompt.id, state.preference.chunks, - state.preference.token_limit, (event) => { const data = JSON.parse(event.data); const targetIndex = indx ?? state.conversation.queries.length - 1; @@ -171,7 +170,6 @@ export const fetchAnswer = createAsyncThunk< state.conversation.conversationId, state.preference.prompt.id, state.preference.chunks, - state.preference.token_limit, state.preference.selectedAgent?.id, attachmentIds, true, diff --git a/frontend/src/preferences/preferenceSlice.ts b/frontend/src/preferences/preferenceSlice.ts index 2bd5faea..89239b30 100644 --- a/frontend/src/preferences/preferenceSlice.ts +++ b/frontend/src/preferences/preferenceSlice.ts @@ -21,7 +21,6 @@ export interface Preference { prompt: { name: string; id: string; type: string }; prompts: Prompt[]; chunks: string; - token_limit: number; selectedDocs: Doc[]; sourceDocs: Doc[] | null; conversations: { @@ -49,7 +48,6 @@ const initialState: Preference = { { name: 'strict', id: 'strict', type: 'public' }, ], chunks: '2', - token_limit: 2000, selectedDocs: [ { id: 'default', @@ -108,9 +106,6 @@ export const prefSlice = createSlice({ setChunks: (state, action) => { state.chunks = action.payload; }, - setTokenLimit: (state, action) => { - state.token_limit = action.payload; - }, setModalStateDeleteConv: (state, action: PayloadAction) => { state.modalState = action.payload; }, @@ -147,7 +142,6 @@ export const { setPrompt, setPrompts, setChunks, - setTokenLimit, setModalStateDeleteConv, setPaginatedDocuments, setTemplateAgents, @@ -200,18 +194,6 @@ prefListenerMiddleware.startListening({ }, }); -prefListenerMiddleware.startListening({ - matcher: isAnyOf(setTokenLimit), - effect: (action, listenerApi) => { - localStorage.setItem( - 'DocsGPTTokenLimit', - JSON.stringify( - (listenerApi.getState() as RootState).preference.token_limit, - ), - ); - }, -}); - prefListenerMiddleware.startListening({ matcher: isAnyOf(setSourceDocs), effect: (_action, listenerApi) => { @@ -281,8 +263,6 @@ export const selectToken = (state: RootState) => state.preference.token; export const selectPrompt = (state: RootState) => state.preference.prompt; export const selectPrompts = (state: RootState) => state.preference.prompts; export const selectChunks = (state: RootState) => state.preference.chunks; -export const selectTokenLimit = (state: RootState) => - state.preference.token_limit; export const selectPaginatedDocuments = (state: RootState) => state.preference.paginatedDocuments; export const selectTemplateAgents = (state: RootState) => diff --git a/frontend/src/settings/General.tsx b/frontend/src/settings/General.tsx index 8b2c53cc..74479832 100644 --- a/frontend/src/settings/General.tsx +++ b/frontend/src/settings/General.tsx @@ -8,12 +8,10 @@ import { selectChunks, selectPrompt, selectPrompts, - selectTokenLimit, setChunks, setModalStateDeleteConv, setPrompt, setPrompts, - setTokenLimit, } from '../preferences/preferenceSlice'; import Prompts from './Prompts'; @@ -37,17 +35,8 @@ export default function General() { { label: 'Русский', value: 'ru' }, ]; const chunks = ['0', '2', '4', '6', '8', '10']; - const token_limits = new Map([ - [0, t('settings.general.none')], - [100, t('settings.general.low')], - [1000, t('settings.general.medium')], - [2000, t('settings.general.default')], - [4000, t('settings.general.high')], - [1e9, t('settings.general.unlimited')], - ]); const prompts = useSelector(selectPrompts); const selectedChunks = useSelector(selectChunks); - const selectedTokenLimit = useSelector(selectTokenLimit); const [isDarkTheme, toggleTheme] = useDarkTheme(); const [selectedTheme, setSelectedTheme] = React.useState( isDarkTheme ? 'Dark' : 'Light', @@ -118,31 +107,6 @@ export default function General() { border="border" /> -
- - ({ - value: value, - description: desc, - }))} - selectedValue={{ - value: selectedTokenLimit, - description: token_limits.get(selectedTokenLimit) as string, - }} - onSelect={({ - value, - description, - }: { - value: number; - description: string; - }) => dispatch(setTokenLimit(value))} - size="w-56" - rounded="3xl" - border="border" - /> -