From e0a9f086322d9596dcd46b5bbfa74203ac593db6 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Wed, 10 Dec 2025 21:53:59 +0000
Subject: [PATCH] refactor and deps (#2184)

---
 .../answer/services/conversation_service.py   |   5 +-
 application/llm/docsgpt_provider.py           | 148 +++++-------------
 application/llm/groq.py                       |  44 ++----
 application/llm/handlers/base.py              |   5 +-
 application/llm/huggingface.py                |  68 --------
 application/llm/llm_creator.py                |   2 -
 application/llm/novita.py                     |  41 ++---
 application/llm/openai.py                     |   5 +-
 application/parser/file/base.py               |   2 +-
 application/parser/remote/base.py             |   2 +-
 application/parser/schema/base.py             |   2 +-
 application/requirements.txt                  |  90 +++++------
 tests/parser/remote/test_crawler_loader.py    |   2 +-
 tests/parser/remote/test_web_loader.py        |   2 +-
 14 files changed, 120 insertions(+), 298 deletions(-)
 delete mode 100644 application/llm/huggingface.py

diff --git a/application/api/answer/services/conversation_service.py b/application/api/answer/services/conversation_service.py
index bf55801c..5d37e32b 100644
--- a/application/api/answer/services/conversation_service.py
+++ b/application/api/answer/services/conversation_service.py
@@ -148,9 +148,12 @@ class ConversationService:
             ]
 
             completion = llm.gen(
-                model=model_id, messages=messages_summary, max_tokens=30
+                model=model_id, messages=messages_summary, max_tokens=500
             )
 
+            if not completion or not completion.strip():
+                completion = question[:50] if question else "New Conversation"
+
             conversation_data = {
                 "user": user_id,
                 "date": current_time,
diff --git a/application/llm/docsgpt_provider.py b/application/llm/docsgpt_provider.py
index 44a479ae..bc52bcfc 100644
--- a/application/llm/docsgpt_provider.py
+++ b/application/llm/docsgpt_provider.py
@@ -1,75 +1,19 @@
-import json
-
-from openai import OpenAI
-
 from application.core.settings import settings
-from application.llm.base import BaseLLM
+from application.llm.openai import OpenAILLM
 
+DOCSGPT_API_KEY = "sk-docsgpt-public"
+DOCSGPT_BASE_URL = "https://oai.arc53.com"
+DOCSGPT_MODEL = "docsgpt"
 
-class DocsGPTAPILLM(BaseLLM):
-
-    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
-
-        super().__init__(*args, **kwargs)
-        self.api_key = "sk-docsgpt-public"
-        self.client = OpenAI(api_key=self.api_key, base_url="https://oai.arc53.com")
-        self.user_api_key = user_api_key
-
-    def _clean_messages_openai(self, messages):
-        cleaned_messages = []
-        for message in messages:
-            role = message.get("role")
-            content = message.get("content")
-
-            if role == "model":
-                role = "assistant"
-            if role and content is not None:
-                if isinstance(content, str):
-                    cleaned_messages.append({"role": role, "content": content})
-                elif isinstance(content, list):
-                    for item in content:
-                        if "text" in item:
-                            cleaned_messages.append(
-                                {"role": role, "content": item["text"]}
-                            )
-                        elif "function_call" in item:
-                            cleaned_args = self._remove_null_values(
-                                item["function_call"]["args"]
-                            )
-                            tool_call = {
-                                "id": item["function_call"]["call_id"],
-                                "type": "function",
-                                "function": {
-                                    "name": item["function_call"]["name"],
-                                    "arguments": json.dumps(cleaned_args),
-                                },
-                            }
-                            cleaned_messages.append(
-                                {
-                                    "role": "assistant",
-                                    "content": None,
-                                    "tool_calls": [tool_call],
-                                }
-                            )
-                        elif "function_response" in item:
-                            cleaned_messages.append(
-                                {
-                                    "role": "tool",
-                                    "tool_call_id": item["function_response"][
-                                        "call_id"
-                                    ],
-                                    "content": json.dumps(
-                                        item["function_response"]["response"]["result"]
-                                    ),
-                                }
-                            )
-                        else:
-                            raise ValueError(
-                                f"Unexpected content dictionary format: {item}"
-                            )
-                else:
-                    raise ValueError(f"Unexpected content type: {type(content)}")
-        return cleaned_messages
+class DocsGPTAPILLM(OpenAILLM):
+    def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
+        super().__init__(
+            api_key=DOCSGPT_API_KEY,
+            user_api_key=user_api_key,
+            base_url=DOCSGPT_BASE_URL,
+            *args,
+            **kwargs,
+        )
 
     def _raw_gen(
         self,
@@ -79,23 +23,19 @@ class DocsGPTAPILLM(BaseLLM):
         stream=False,
         tools=None,
         engine=settings.AZURE_DEPLOYMENT_NAME,
+        response_format=None,
         **kwargs,
     ):
-        messages = self._clean_messages_openai(messages)
-        if tools:
-            response = self.client.chat.completions.create(
-                model="docsgpt",
-                messages=messages,
-                stream=stream,
-                tools=tools,
-                **kwargs,
-            )
-            return response.choices[0]
-        else:
-            response = self.client.chat.completions.create(
-                model="docsgpt", messages=messages, stream=stream, **kwargs
-            )
-            return response.choices[0].message.content
+        return super()._raw_gen(
+            baseself,
+            DOCSGPT_MODEL,
+            messages,
+            stream=stream,
+            tools=tools,
+            engine=engine,
+            response_format=response_format,
+            **kwargs,
+        )
 
     def _raw_gen_stream(
         self,
@@ -105,34 +45,16 @@ class DocsGPTAPILLM(BaseLLM):
         stream=True,
         tools=None,
         engine=settings.AZURE_DEPLOYMENT_NAME,
+        response_format=None,
         **kwargs,
     ):
-        messages = self._clean_messages_openai(messages)
-        if tools:
-            response = self.client.chat.completions.create(
-                model="docsgpt",
-                messages=messages,
-                stream=stream,
-                tools=tools,
-                **kwargs,
-            )
-        else:
-            response = self.client.chat.completions.create(
-                model="docsgpt", messages=messages, stream=stream, **kwargs
-            )
-        try:
-            for line in response:
-                if (
-                    len(line.choices) > 0
-                    and line.choices[0].delta.content is not None
-                    and len(line.choices[0].delta.content) > 0
-                ):
-                    yield line.choices[0].delta.content
-                elif len(line.choices) > 0:
-                    yield line.choices[0]
-        finally:
-            if hasattr(response, "close"):
-                response.close()
-
-    def _supports_tools(self):
-        return True
+        return super()._raw_gen_stream(
+            baseself,
+            DOCSGPT_MODEL,
+            messages,
+            stream=stream,
+            tools=tools,
+            engine=engine,
+            response_format=response_format,
+            **kwargs,
+        )
diff --git a/application/llm/groq.py b/application/llm/groq.py
index c2ae40ee..9d7c1713 100644
--- a/application/llm/groq.py
+++ b/application/llm/groq.py
@@ -1,37 +1,15 @@
-from openai import OpenAI
-
 from application.core.settings import settings
-from application.llm.base import BaseLLM
+from application.llm.openai import OpenAILLM
+
+GROQ_BASE_URL = "https://api.groq.com/openai/v1"
 
 
-class GroqLLM(BaseLLM):
-    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
-
-        super().__init__(*args, **kwargs)
-        self.api_key = api_key or settings.GROQ_API_KEY or settings.API_KEY
-        self.user_api_key = user_api_key
-        self.client = OpenAI(
-            api_key=self.api_key, base_url="https://api.groq.com/openai/v1"
+class GroqLLM(OpenAILLM):
+    def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
+        super().__init__(
+            api_key=api_key or settings.GROQ_API_KEY or settings.API_KEY,
+            user_api_key=user_api_key,
+            base_url=base_url or GROQ_BASE_URL,
+            *args,
+            **kwargs,
         )
-
-    def _raw_gen(self, baseself, model, messages, stream=False, tools=None, **kwargs):
-        if tools:
-            response = self.client.chat.completions.create(
-                model=model, messages=messages, stream=stream, tools=tools, **kwargs
-            )
-            return response.choices[0]
-        else:
-            response = self.client.chat.completions.create(
-                model=model, messages=messages, stream=stream, **kwargs
-            )
-            return response.choices[0].message.content
-
-    def _raw_gen_stream(
-        self, baseself, model, messages, stream=True, tools=None, **kwargs
-    ):
-        response = self.client.chat.completions.create(
-            model=model, messages=messages, stream=stream, **kwargs
-        )
-        for line in response:
-            if line.choices[0].delta.content is not None:
-                yield line.choices[0].delta.content
diff --git a/application/llm/handlers/base.py b/application/llm/handlers/base.py
index b11654c5..dbc5a879 100644
--- a/application/llm/handlers/base.py
+++ b/application/llm/handlers/base.py
@@ -833,7 +833,10 @@ class LLMHandler(ABC):
                         if call.name:
                             existing.name = call.name
                         if call.arguments:
-                            existing.arguments += call.arguments
+                            if existing.arguments is None:
+                                existing.arguments = call.arguments
+                            else:
+                                existing.arguments += call.arguments
                         # Preserve thought_signature for Google Gemini 3 models
                         if call.thought_signature:
                             existing.thought_signature = call.thought_signature
diff --git a/application/llm/huggingface.py b/application/llm/huggingface.py
deleted file mode 100644
index 2fb4a925..00000000
--- a/application/llm/huggingface.py
+++ /dev/null
@@ -1,68 +0,0 @@
-from application.llm.base import BaseLLM
-
-
-class HuggingFaceLLM(BaseLLM):
-
-    def __init__(
-        self,
-        api_key=None,
-        user_api_key=None,
-        llm_name="Arc53/DocsGPT-7B",
-        q=False,
-        *args,
-        **kwargs,
-    ):
-        global hf
-
-        from langchain.llms import HuggingFacePipeline
-
-        if q:
-            import torch
-            from transformers import (
-                AutoModelForCausalLM,
-                AutoTokenizer,
-                pipeline,
-                BitsAndBytesConfig,
-            )
-
-            tokenizer = AutoTokenizer.from_pretrained(llm_name)
-            bnb_config = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_use_double_quant=True,
-                bnb_4bit_quant_type="nf4",
-                bnb_4bit_compute_dtype=torch.bfloat16,
-            )
-            model = AutoModelForCausalLM.from_pretrained(
-                llm_name, quantization_config=bnb_config
-            )
-        else:
-            from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-
-            tokenizer = AutoTokenizer.from_pretrained(llm_name)
-            model = AutoModelForCausalLM.from_pretrained(llm_name)
-
-        super().__init__(*args, **kwargs)
-        self.api_key = api_key
-        self.user_api_key = user_api_key
-        pipe = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            max_new_tokens=2000,
-            device_map="auto",
-            eos_token_id=tokenizer.eos_token_id,
-        )
-        hf = HuggingFacePipeline(pipeline=pipe)
-
-    def _raw_gen(self, baseself, model, messages, stream=False, **kwargs):
-        context = messages[0]["content"]
-        user_question = messages[-1]["content"]
-        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
-
-        result = hf(prompt)
-
-        return result.content
-
-    def _raw_gen_stream(self, baseself, model, messages, stream=True, **kwargs):
-
-        raise NotImplementedError("HuggingFaceLLM Streaming is not implemented yet.")
diff --git a/application/llm/llm_creator.py b/application/llm/llm_creator.py
index 21d653b9..ca39194c 100644
--- a/application/llm/llm_creator.py
+++ b/application/llm/llm_creator.py
@@ -4,7 +4,6 @@ from application.llm.anthropic import AnthropicLLM
 from application.llm.docsgpt_provider import DocsGPTAPILLM
 from application.llm.google_ai import GoogleLLM
 from application.llm.groq import GroqLLM
-from application.llm.huggingface import HuggingFaceLLM
 from application.llm.llama_cpp import LlamaCpp
 from application.llm.novita import NovitaLLM
 from application.llm.openai import AzureOpenAILLM, OpenAILLM
@@ -19,7 +18,6 @@ class LLMCreator:
         "openai": OpenAILLM,
         "azure_openai": AzureOpenAILLM,
         "sagemaker": SagemakerAPILLM,
-        "huggingface": HuggingFaceLLM,
         "llama.cpp": LlamaCpp,
         "anthropic": AnthropicLLM,
         "docsgpt": DocsGPTAPILLM,
diff --git a/application/llm/novita.py b/application/llm/novita.py
index 8d6ac042..b741c4f3 100644
--- a/application/llm/novita.py
+++ b/application/llm/novita.py
@@ -1,32 +1,15 @@
-from application.llm.base import BaseLLM
-from openai import OpenAI
+from application.core.settings import settings
+from application.llm.openai import OpenAILLM
+
+NOVITA_BASE_URL = "https://api.novita.ai/v3/openai"
 
 
-class NovitaLLM(BaseLLM):
-    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.client = OpenAI(api_key=api_key, base_url="https://api.novita.ai/v3/openai")
-        self.api_key = api_key
-        self.user_api_key = user_api_key
-
-    def _raw_gen(self, baseself, model, messages, stream=False, tools=None, **kwargs):
-        if tools:
-            response = self.client.chat.completions.create(
-                model=model, messages=messages, stream=stream, tools=tools, **kwargs
-            )
-            return response.choices[0]
-        else:
-            response = self.client.chat.completions.create(
-                model=model, messages=messages, stream=stream, **kwargs
-            )
-            return response.choices[0].message.content
-
-    def _raw_gen_stream(
-        self, baseself, model, messages, stream=True, tools=None, **kwargs
-    ):
-        response = self.client.chat.completions.create(
-            model=model, messages=messages, stream=stream, **kwargs
+class NovitaLLM(OpenAILLM):
+    def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
+        super().__init__(
+            api_key=api_key or settings.API_KEY,
+            user_api_key=user_api_key,
+            base_url=base_url or NOVITA_BASE_URL,
+            *args,
+            **kwargs,
         )
-        for line in response:
-            if line.choices[0].delta.content is not None:
-                yield line.choices[0].delta.content
diff --git a/application/llm/openai.py b/application/llm/openai.py
index 3917cbf7..e851f078 100644
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -127,6 +127,7 @@ class OpenAILLM(BaseLLM):
         **kwargs,
     ):
         messages = self._clean_messages_openai(messages)
+        logging.info(f"Cleaned messages: {messages}")
 
         # Convert max_tokens to max_completion_tokens for newer models
         if "max_tokens" in kwargs:
@@ -144,7 +145,7 @@ class OpenAILLM(BaseLLM):
         if response_format:
             request_params["response_format"] = response_format
         response = self.client.chat.completions.create(**request_params)
-
+        logging.info(f"OpenAI response: {response}")
         if tools:
             return response.choices[0]
         else:
@@ -162,6 +163,7 @@ class OpenAILLM(BaseLLM):
         **kwargs,
     ):
         messages = self._clean_messages_openai(messages)
+        logging.info(f"Cleaned messages: {messages}")
 
         # Convert max_tokens to max_completion_tokens for newer models
         if "max_tokens" in kwargs:
@@ -182,6 +184,7 @@ class OpenAILLM(BaseLLM):
 
         try:
             for line in response:
+                logging.debug(f"OpenAI stream line: {line}")
                 if (
                     len(line.choices) > 0
                     and line.choices[0].delta.content is not None
diff --git a/application/parser/file/base.py b/application/parser/file/base.py
index f63e8ef6..8e9b1015 100644
--- a/application/parser/file/base.py
+++ b/application/parser/file/base.py
@@ -2,7 +2,7 @@
 from abc import abstractmethod
 from typing import Any, List
 
-from langchain.docstore.document import Document as LCDocument
+from langchain_core.documents import Document as LCDocument
 from application.parser.schema.base import Document
 
 
diff --git a/application/parser/remote/base.py b/application/parser/remote/base.py
index 91313f22..74b6fce7 100644
--- a/application/parser/remote/base.py
+++ b/application/parser/remote/base.py
@@ -2,7 +2,7 @@
 from abc import abstractmethod
 from typing import Any, List
 
-from langchain.docstore.document import Document as LCDocument
+from langchain_core.documents import Document as LCDocument
 from application.parser.schema.base import Document
 
 
diff --git a/application/parser/schema/base.py b/application/parser/schema/base.py
index 61670f9a..a7453dd7 100644
--- a/application/parser/schema/base.py
+++ b/application/parser/schema/base.py
@@ -1,7 +1,7 @@
 """Base schema for readers."""
 from dataclasses import dataclass
 
-from langchain.docstore.document import Document as LCDocument
+from langchain_core.documents import Document as LCDocument
 from application.parser.schema.schema import BaseDocument
 
 
diff --git a/application/requirements.txt b/application/requirements.txt
index cb58247b..ca8be5cc 100644
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -1,91 +1,91 @@
-anthropic==0.49.0
-boto3==1.38.18
-beautifulsoup4==4.13.4
-celery==5.4.0
-cryptography==42.0.8
+anthropic==0.75.0
+boto3==1.42.6
+beautifulsoup4==4.14.3
+celery==5.6.0
+cryptography==46.0.3
 dataclasses-json==0.6.7
 docx2txt==0.8
-duckduckgo-search==7.5.2
+duckduckgo-search==8.1.1
 ebooklib==0.18
 escodegen==1.0.11
 esprima==4.0.1
 esutils==1.0.1
-elevenlabs==2.17.0
-Flask==3.1.1
-faiss-cpu==1.9.0.post1
-fastmcp==2.11.0
-flask-restx==1.3.0
-google-genai==1.49.0
-google-api-python-client==2.179.0
+elevenlabs==2.26.1
+Flask==3.1.2
+faiss-cpu==1.13.1
+fastmcp==2.13.3
+flask-restx==1.3.2
+google-genai==1.54.0
+google-api-python-client==2.187.0
 google-auth-httplib2==0.2.0
 google-auth-oauthlib==1.2.2
 gTTS==2.5.4
 gunicorn==23.0.0
 javalang==0.13.0
 jinja2==3.1.6
-jiter==0.8.2
+jiter==0.12.0
 jmespath==1.0.1
 joblib==1.4.2
 jsonpatch==1.33
 jsonpointer==3.0.0
-kombu==5.4.2
-langchain==0.3.20
-langchain-community==0.3.19
-langchain-core==0.3.59
-langchain-openai==0.3.16
-langchain-text-splitters==0.3.8
-langsmith==0.3.42
+kombu==5.6.1
+langchain==1.1.3
+langchain-community==0.4.1
+langchain-core==1.1.3
+langchain-openai==1.1.1
+langchain-text-splitters==1.0.0
+langsmith==0.4.58
 lazy-object-proxy==1.10.0
-lxml==5.3.1
+lxml==6.0.2
 markupsafe==3.0.2
 marshmallow==3.26.1
 mpmath==1.3.0
-multidict==6.4.3
+multidict==6.7.0
 mypy-extensions==1.0.0
-networkx==3.4.2
+networkx==3.6.1
 numpy==2.2.1
-openai==1.78.1
+openai==2.9.0
 openapi3-parser==1.1.21
-orjson==3.10.14
+orjson==3.11.5
 packaging==24.2
 pandas==2.2.3
 openpyxl==3.1.5
 pathable==0.4.4
-pillow==11.1.0
+pillow==12.0.0
 portalocker>=2.7.0,<3.0.0
 prance==23.6.21.0
 prompt-toolkit==3.0.51
 protobuf==5.29.3
-psycopg2-binary==2.9.10
+psycopg2-binary==2.9.11
 py==1.11.0
 pydantic
 pydantic-core
 pydantic-settings
-pymongo==4.11.3
-pypdf==5.5.0
+pymongo==4.15.5
+pypdf==6.4.1
 python-dateutil==2.9.0.post0
 python-dotenv
 python-jose==3.4.0
 python-pptx==1.0.2
-redis==5.2.1
+redis==7.1.0
 referencing>=0.28.0,<0.31.0
-regex==2024.11.6
-requests==2.32.3
+regex==2025.11.3
+requests==2.32.5
 retry==0.9.2
-sentence-transformers==3.3.1
-tiktoken==0.8.0
-tokenizers==0.21.0
+sentence-transformers==5.1.2
+tiktoken==0.12.0
+tokenizers==0.22.1
 torch==2.7.0
 tqdm==4.67.1
-transformers==4.51.3
-typing-extensions==4.12.2
+transformers==4.57.3
+typing-extensions==4.15.0
 typing-inspect==0.9.0
-tzdata==2024.2
-urllib3==2.3.0
+tzdata==2025.2
+urllib3==2.6.1
 vine==5.1.0
 wcwidth==0.2.13
-werkzeug>=3.1.0,<3.1.2
-yarl==1.20.0
-markdownify==1.1.0
-tldextract==5.1.3
-websockets==14.1
+werkzeug>=3.1.0
+yarl==1.22.0
+markdownify==1.2.2
+tldextract==5.3.0
+websockets==15.0.1
\ No newline at end of file
diff --git a/tests/parser/remote/test_crawler_loader.py b/tests/parser/remote/test_crawler_loader.py
index 0a100abb..92ffdc84 100644
--- a/tests/parser/remote/test_crawler_loader.py
+++ b/tests/parser/remote/test_crawler_loader.py
@@ -2,7 +2,7 @@ from unittest.mock import MagicMock, patch
 
 from application.parser.remote.crawler_loader import CrawlerLoader
 from application.parser.schema.base import Document
-from langchain.docstore.document import Document as LCDocument
+from langchain_core.documents import Document as LCDocument
 
 
 class DummyResponse:
diff --git a/tests/parser/remote/test_web_loader.py b/tests/parser/remote/test_web_loader.py
index ca539f0a..73e368a5 100644
--- a/tests/parser/remote/test_web_loader.py
+++ b/tests/parser/remote/test_web_loader.py
@@ -4,7 +4,7 @@ from urllib.parse import urlparse
 
 from application.parser.remote.web_loader import WebLoader, headers
 from application.parser.schema.base import Document
-from langchain.docstore.document import Document as LCDocument
+from langchain_core.documents import Document as LCDocument
 
 
 @pytest.fixture