(refactor) remove preprocessing in retrieval

2025-11-29 16:43:16 +00:00 · 2024-12-19 05:20:55 +05:30
parent 89a2f249c1
commit 9096013e13
5 changed files with 3 additions and 30 deletions
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -455,7 +455,7 @@ class Answer(Resource):

        try:
            question = data["question"]
-            history = data.get("history", [])
+            history = str(limit_chat_history(json.loads(data.get("history", []))))
            conversation_id = data.get("conversation_id")
            prompt_id = data.get("prompt_id", "default")
            chunks = int(data.get("chunks", 2))
--- a/application/retriever/brave_search.py
+++ b/application/retriever/brave_search.py
@@ -2,7 +2,6 @@ import json
 from application.retriever.base import BaseRetriever
 from application.core.settings import settings
 from application.llm.llm_creator import LLMCreator
-from application.utils import num_tokens_from_string
 from langchain_community.tools import BraveSearch


@@ -73,15 +72,7 @@ class BraveRetSearch(BaseRetriever):
            yield {"source": doc}

        if len(self.chat_history) > 1:
-            tokens_current_history = 0
-            # count tokens in history
            for i in self.chat_history:
-                if "prompt" in i and "response" in i:
-                    tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(
-                        i["response"]
-                    )
-                    if tokens_current_history + tokens_batch < self.token_limit:
-                        tokens_current_history += tokens_batch
                        messages_combine.append(
                            {"role": "user", "content": i["prompt"]}
                        )
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -3,7 +3,6 @@ from application.core.settings import settings
 from application.vectorstore.vector_creator import VectorCreator
 from application.llm.llm_creator import LLMCreator

-from application.utils import num_tokens_from_string


 class ClassicRAG(BaseRetriever):
@@ -73,15 +72,7 @@ class ClassicRAG(BaseRetriever):
            yield {"source": doc}

        if len(self.chat_history) > 1:
-            tokens_current_history = 0
-            # count tokens in history
            for i in self.chat_history:
-                if "prompt" in i and "response" in i:
-                    tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(
-                        i["response"]
-                    )
-                    if tokens_current_history + tokens_batch < self.token_limit:
-                        tokens_current_history += tokens_batch
                        messages_combine.append(
                            {"role": "user", "content": i["prompt"]}
                        )
--- a/application/retriever/duckduck_search.py
+++ b/application/retriever/duckduck_search.py
@@ -1,7 +1,6 @@
 from application.retriever.base import BaseRetriever
 from application.core.settings import settings
 from application.llm.llm_creator import LLMCreator
-from application.utils import num_tokens_from_string
 from langchain_community.tools import DuckDuckGoSearchResults
 from langchain_community.utilities import DuckDuckGoSearchAPIWrapper

@@ -89,16 +88,8 @@ class DuckDuckSearch(BaseRetriever):
        for doc in docs:
            yield {"source": doc}

-        if len(self.chat_history) > 1:
-            tokens_current_history = 0
-            # count tokens in history
+        if len(self.chat_history) > 1:      
            for i in self.chat_history:
-                if "prompt" in i and "response" in i:
-                    tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(
-                        i["response"]
-                    )
-                    if tokens_current_history + tokens_batch < self.token_limit:
-                        tokens_current_history += tokens_batch
                        messages_combine.append(
                            {"role": "user", "content": i["prompt"]}
                        )