fix: count history tokens from chunks, remove old UI setting limit (#2196)

2026-03-06 22:03:39 +00:00 · 2025-12-17 01:34:17 +00:00
parent aacf281222
commit af3e16c4fc
12 changed files with 3 additions and 77 deletions
--- a/application/api/answer/routes/answer.py
+++ b/application/api/answer/routes/answer.py
@@ -40,7 +40,6 @@ class AnswerResource(Resource, BaseAnswerResource):
            "chunks": fields.Integer(
                required=False, default=2, description="Number of chunks"
            ),
-            "token_limit": fields.Integer(required=False, description="Token limit"),
            "retriever": fields.String(required=False, description="Retriever type"),
            "api_key": fields.String(required=False, description="API key"),
            "active_docs": fields.String(
--- a/application/api/answer/routes/stream.py
+++ b/application/api/answer/routes/stream.py
@@ -40,7 +40,6 @@ class StreamResource(Resource, BaseAnswerResource):
            "chunks": fields.Integer(
                required=False, default=2, description="Number of chunks"
            ),
-            "token_limit": fields.Integer(required=False, description="Token limit"),
            "retriever": fields.String(required=False, description="Retriever type"),
            "api_key": fields.String(required=False, description="API key"),
            "active_docs": fields.String(
--- a/application/api/answer/services/stream_processor.py
+++ b/application/api/answer/services/stream_processor.py
@@ -420,16 +420,14 @@ class StreamProcessor:
            )

    def _configure_retriever(self):
-        history_token_limit = int(self.data.get("token_limit", 2000))
        doc_token_limit = calculate_doc_token_budget(
-            model_id=self.model_id, history_token_limit=history_token_limit
+            model_id=self.model_id
        )

        self.retriever_config = {
            "retriever_name": self.data.get("retriever", "classic"),
            "chunks": int(self.data.get("chunks", 2)),
            "doc_token_limit": doc_token_limit,
-            "history_token_limit": history_token_limit,
        }

        api_key = self.data.get("api_key") or self.agent_key
--- a/application/utils.py
+++ b/application/utils.py
@@ -77,11 +77,11 @@ def count_tokens_docs(docs):


 def calculate_doc_token_budget(
-    model_id: str = "gpt-4o", history_token_limit: int = 2000
+    model_id: str = "gpt-4o"
 ) -> int:
    total_context = get_token_limit(model_id)
    reserved = sum(settings.RESERVED_TOKENS.values())
-    doc_budget = total_context - history_token_limit - reserved
+    doc_budget = total_context - reserved
    return max(doc_budget, 1000)


--- a/frontend/src/agents/agentPreviewSlice.ts
+++ b/frontend/src/agents/agentPreviewSlice.ts
@@ -65,7 +65,6 @@ export const fetchPreviewAnswer = createAsyncThunk<
          null, // No conversation ID for previews
          state.preference.prompt.id,
          state.preference.chunks,
-          state.preference.token_limit,
          (event: MessageEvent) => {
            const data = JSON.parse(event.data);
            const targetIndex = indx ?? state.agentPreview.queries.length - 1;
@@ -136,7 +135,6 @@ export const fetchPreviewAnswer = createAsyncThunk<
          null,
          state.preference.prompt.id,
          state.preference.chunks,
-          state.preference.token_limit,
          state.preference.selectedAgent?.id,
          attachmentIds,
          false,
--- a/frontend/src/conversation/conversationHandlers.ts
+++ b/frontend/src/conversation/conversationHandlers.ts
@@ -11,7 +11,6 @@ export function handleFetchAnswer(
  conversationId: string | null,
  promptId: string | null,
  chunks: string,
-  token_limit: number,
  agentId?: string,
  attachments?: string[],
  save_conversation = true,
@@ -42,7 +41,6 @@ export function handleFetchAnswer(
    conversation_id: conversationId,
    prompt_id: promptId,
    chunks: chunks,
-    token_limit: token_limit,
    isNoneDoc: selectedDocs.length === 0,
    agent_id: agentId,
    save_conversation: save_conversation,
@@ -100,7 +98,6 @@ export function handleFetchAnswerSteaming(
  conversationId: string | null,
  promptId: string | null,
  chunks: string,
-  token_limit: number,
  onEvent: (event: MessageEvent) => void,
  indx?: number,
  agentId?: string,
@@ -113,7 +110,6 @@ export function handleFetchAnswerSteaming(
    conversation_id: conversationId,
    prompt_id: promptId,
    chunks: chunks,
-    token_limit: token_limit,
    isNoneDoc: selectedDocs.length === 0,
    index: indx,
    agent_id: agentId,
@@ -198,13 +194,11 @@ export function handleSearch(
  selectedDocs: Doc[],
  conversation_id: string | null,
  chunks: string,
-  token_limit: number,
 ) {
  const payload: RetrievalPayload = {
    question: question,
    conversation_id: conversation_id,
    chunks: chunks,
-    token_limit: token_limit,
    isNoneDoc: selectedDocs.length === 0,
  };
  if (selectedDocs.length > 0) {
--- a/frontend/src/conversation/conversationModels.ts
+++ b/frontend/src/conversation/conversationModels.ts
@@ -59,7 +59,6 @@ export interface RetrievalPayload {
  conversation_id: string | null;
  prompt_id?: string | null;
  chunks: string;
-  token_limit: number;
  isNoneDoc: boolean;
  index?: number;
  agent_id?: string;
--- a/frontend/src/conversation/conversationSlice.ts
+++ b/frontend/src/conversation/conversationSlice.ts
@@ -63,7 +63,6 @@ export const fetchAnswer = createAsyncThunk<
        currentConversationId,
        state.preference.prompt.id,
        state.preference.chunks,
-        state.preference.token_limit,
        (event) => {
          const data = JSON.parse(event.data);
          const targetIndex = indx ?? state.conversation.queries.length - 1;
@@ -171,7 +170,6 @@ export const fetchAnswer = createAsyncThunk<
        state.conversation.conversationId,
        state.preference.prompt.id,
        state.preference.chunks,
-        state.preference.token_limit,
        state.preference.selectedAgent?.id,
        attachmentIds,
        true,
--- a/frontend/src/preferences/preferenceSlice.ts
+++ b/frontend/src/preferences/preferenceSlice.ts
@@ -21,7 +21,6 @@ export interface Preference {
  prompt: { name: string; id: string; type: string };
  prompts: Prompt[];
  chunks: string;
-  token_limit: number;
  selectedDocs: Doc[];
  sourceDocs: Doc[] | null;
  conversations: {
@@ -49,7 +48,6 @@ const initialState: Preference = {
    { name: 'strict', id: 'strict', type: 'public' },
  ],
  chunks: '2',
-  token_limit: 2000,
  selectedDocs: [
    {
      id: 'default',
@@ -108,9 +106,6 @@ export const prefSlice = createSlice({
    setChunks: (state, action) => {
      state.chunks = action.payload;
    },
-    setTokenLimit: (state, action) => {
-      state.token_limit = action.payload;
-    },
    setModalStateDeleteConv: (state, action: PayloadAction<ActiveState>) => {
      state.modalState = action.payload;
    },
@@ -147,7 +142,6 @@ export const {
  setPrompt,
  setPrompts,
  setChunks,
-  setTokenLimit,
  setModalStateDeleteConv,
  setPaginatedDocuments,
  setTemplateAgents,
@@ -200,18 +194,6 @@ prefListenerMiddleware.startListening({
  },
 });

-prefListenerMiddleware.startListening({
-  matcher: isAnyOf(setTokenLimit),
-  effect: (action, listenerApi) => {
-    localStorage.setItem(
-      'DocsGPTTokenLimit',
-      JSON.stringify(
-        (listenerApi.getState() as RootState).preference.token_limit,
-      ),
-    );
-  },
-});
-
 prefListenerMiddleware.startListening({
  matcher: isAnyOf(setSourceDocs),
  effect: (_action, listenerApi) => {
@@ -281,8 +263,6 @@ export const selectToken = (state: RootState) => state.preference.token;
 export const selectPrompt = (state: RootState) => state.preference.prompt;
 export const selectPrompts = (state: RootState) => state.preference.prompts;
 export const selectChunks = (state: RootState) => state.preference.chunks;
-export const selectTokenLimit = (state: RootState) =>
-  state.preference.token_limit;
 export const selectPaginatedDocuments = (state: RootState) =>
  state.preference.paginatedDocuments;
 export const selectTemplateAgents = (state: RootState) =>
--- a/frontend/src/settings/General.tsx
+++ b/frontend/src/settings/General.tsx
@@ -8,12 +8,10 @@ import {
  selectChunks,
  selectPrompt,
  selectPrompts,
-  selectTokenLimit,
  setChunks,
  setModalStateDeleteConv,
  setPrompt,
  setPrompts,
-  setTokenLimit,
 } from '../preferences/preferenceSlice';
 import Prompts from './Prompts';

@@ -37,17 +35,8 @@ export default function General() {
    { label: 'Русский', value: 'ru' },
  ];
  const chunks = ['0', '2', '4', '6', '8', '10'];
-  const token_limits = new Map([
-    [0, t('settings.general.none')],
-    [100, t('settings.general.low')],
-    [1000, t('settings.general.medium')],
-    [2000, t('settings.general.default')],
-    [4000, t('settings.general.high')],
-    [1e9, t('settings.general.unlimited')],
-  ]);
  const prompts = useSelector(selectPrompts);
  const selectedChunks = useSelector(selectChunks);
-  const selectedTokenLimit = useSelector(selectTokenLimit);
  const [isDarkTheme, toggleTheme] = useDarkTheme();
  const [selectedTheme, setSelectedTheme] = React.useState(
    isDarkTheme ? 'Dark' : 'Light',
@@ -118,31 +107,6 @@ export default function General() {
          border="border"
        />
      </div>
-      <div className="flex flex-col gap-4">
-        <label className="text-jet dark:text-bright-gray text-base font-medium">
-          {t('settings.general.convHistory')}
-        </label>
-        <Dropdown
-          options={Array.from(token_limits, ([value, desc]) => ({
-            value: value,
-            description: desc,
-          }))}
-          selectedValue={{
-            value: selectedTokenLimit,
-            description: token_limits.get(selectedTokenLimit) as string,
-          }}
-          onSelect={({
-            value,
-            description,
-          }: {
-            value: number;
-            description: string;
-          }) => dispatch(setTokenLimit(value))}
-          size="w-56"
-          rounded="3xl"
-          border="border"
-        />
-      </div>
      <div className="flex flex-col gap-4">
        <Prompts
          prompts={prompts}
--- a/frontend/src/store.ts
+++ b/frontend/src/store.ts
@@ -13,7 +13,6 @@ import uploadReducer from './upload/uploadSlice';
 const key = localStorage.getItem('DocsGPTApiKey');
 const prompt = localStorage.getItem('DocsGPTPrompt');
 const chunks = localStorage.getItem('DocsGPTChunks');
-const token_limit = localStorage.getItem('DocsGPTTokenLimit');
 const doc = localStorage.getItem('DocsGPTRecentDocs');
 const selectedModel = localStorage.getItem('DocsGPTSelectedModel');

@@ -31,7 +30,6 @@ const preloadedState: { preference: Preference } = {
      { name: 'strict', id: 'strict', type: 'public' },
    ],
    chunks: JSON.parse(chunks ?? '2').toString(),
-    token_limit: token_limit ? parseInt(token_limit) : 2000,
    selectedDocs: doc !== null ? JSON.parse(doc) : [],
    conversations: {
      data: null,
--- a/tests/api/conftest.py
+++ b/tests/api/conftest.py
@@ -73,7 +73,6 @@ def sample_answer_request():
        "conversation_id": None,
        "prompt_id": "default",
        "chunks": 2,
-        "token_limit": 1000,
        "retriever": "classic_rag",
        "active_docs": "local/test/",
        "isNoneDoc": False,