From e0a9f086322d9596dcd46b5bbfa74203ac593db6 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 10 Dec 2025 21:53:59 +0000 Subject: [PATCH] refactor and deps (#2184) --- .../answer/services/conversation_service.py | 5 +- application/llm/docsgpt_provider.py | 148 +++++------------- application/llm/groq.py | 44 ++---- application/llm/handlers/base.py | 5 +- application/llm/huggingface.py | 68 -------- application/llm/llm_creator.py | 2 - application/llm/novita.py | 41 ++--- application/llm/openai.py | 5 +- application/parser/file/base.py | 2 +- application/parser/remote/base.py | 2 +- application/parser/schema/base.py | 2 +- application/requirements.txt | 90 +++++------ tests/parser/remote/test_crawler_loader.py | 2 +- tests/parser/remote/test_web_loader.py | 2 +- 14 files changed, 120 insertions(+), 298 deletions(-) delete mode 100644 application/llm/huggingface.py diff --git a/application/api/answer/services/conversation_service.py b/application/api/answer/services/conversation_service.py index bf55801c..5d37e32b 100644 --- a/application/api/answer/services/conversation_service.py +++ b/application/api/answer/services/conversation_service.py @@ -148,9 +148,12 @@ class ConversationService: ] completion = llm.gen( - model=model_id, messages=messages_summary, max_tokens=30 + model=model_id, messages=messages_summary, max_tokens=500 ) + if not completion or not completion.strip(): + completion = question[:50] if question else "New Conversation" + conversation_data = { "user": user_id, "date": current_time, diff --git a/application/llm/docsgpt_provider.py b/application/llm/docsgpt_provider.py index 44a479ae..bc52bcfc 100644 --- a/application/llm/docsgpt_provider.py +++ b/application/llm/docsgpt_provider.py @@ -1,75 +1,19 @@ -import json - -from openai import OpenAI - from application.core.settings import settings -from application.llm.base import BaseLLM +from application.llm.openai import OpenAILLM +DOCSGPT_API_KEY = "sk-docsgpt-public" +DOCSGPT_BASE_URL = "https://oai.arc53.com" +DOCSGPT_MODEL = "docsgpt" -class DocsGPTAPILLM(BaseLLM): - - def __init__(self, api_key=None, user_api_key=None, *args, **kwargs): - - super().__init__(*args, **kwargs) - self.api_key = "sk-docsgpt-public" - self.client = OpenAI(api_key=self.api_key, base_url="https://oai.arc53.com") - self.user_api_key = user_api_key - - def _clean_messages_openai(self, messages): - cleaned_messages = [] - for message in messages: - role = message.get("role") - content = message.get("content") - - if role == "model": - role = "assistant" - if role and content is not None: - if isinstance(content, str): - cleaned_messages.append({"role": role, "content": content}) - elif isinstance(content, list): - for item in content: - if "text" in item: - cleaned_messages.append( - {"role": role, "content": item["text"]} - ) - elif "function_call" in item: - cleaned_args = self._remove_null_values( - item["function_call"]["args"] - ) - tool_call = { - "id": item["function_call"]["call_id"], - "type": "function", - "function": { - "name": item["function_call"]["name"], - "arguments": json.dumps(cleaned_args), - }, - } - cleaned_messages.append( - { - "role": "assistant", - "content": None, - "tool_calls": [tool_call], - } - ) - elif "function_response" in item: - cleaned_messages.append( - { - "role": "tool", - "tool_call_id": item["function_response"][ - "call_id" - ], - "content": json.dumps( - item["function_response"]["response"]["result"] - ), - } - ) - else: - raise ValueError( - f"Unexpected content dictionary format: {item}" - ) - else: - raise ValueError(f"Unexpected content type: {type(content)}") - return cleaned_messages +class DocsGPTAPILLM(OpenAILLM): + def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs): + super().__init__( + api_key=DOCSGPT_API_KEY, + user_api_key=user_api_key, + base_url=DOCSGPT_BASE_URL, + *args, + **kwargs, + ) def _raw_gen( self, @@ -79,23 +23,19 @@ class DocsGPTAPILLM(BaseLLM): stream=False, tools=None, engine=settings.AZURE_DEPLOYMENT_NAME, + response_format=None, **kwargs, ): - messages = self._clean_messages_openai(messages) - if tools: - response = self.client.chat.completions.create( - model="docsgpt", - messages=messages, - stream=stream, - tools=tools, - **kwargs, - ) - return response.choices[0] - else: - response = self.client.chat.completions.create( - model="docsgpt", messages=messages, stream=stream, **kwargs - ) - return response.choices[0].message.content + return super()._raw_gen( + baseself, + DOCSGPT_MODEL, + messages, + stream=stream, + tools=tools, + engine=engine, + response_format=response_format, + **kwargs, + ) def _raw_gen_stream( self, @@ -105,34 +45,16 @@ class DocsGPTAPILLM(BaseLLM): stream=True, tools=None, engine=settings.AZURE_DEPLOYMENT_NAME, + response_format=None, **kwargs, ): - messages = self._clean_messages_openai(messages) - if tools: - response = self.client.chat.completions.create( - model="docsgpt", - messages=messages, - stream=stream, - tools=tools, - **kwargs, - ) - else: - response = self.client.chat.completions.create( - model="docsgpt", messages=messages, stream=stream, **kwargs - ) - try: - for line in response: - if ( - len(line.choices) > 0 - and line.choices[0].delta.content is not None - and len(line.choices[0].delta.content) > 0 - ): - yield line.choices[0].delta.content - elif len(line.choices) > 0: - yield line.choices[0] - finally: - if hasattr(response, "close"): - response.close() - - def _supports_tools(self): - return True + return super()._raw_gen_stream( + baseself, + DOCSGPT_MODEL, + messages, + stream=stream, + tools=tools, + engine=engine, + response_format=response_format, + **kwargs, + ) diff --git a/application/llm/groq.py b/application/llm/groq.py index c2ae40ee..9d7c1713 100644 --- a/application/llm/groq.py +++ b/application/llm/groq.py @@ -1,37 +1,15 @@ -from openai import OpenAI - from application.core.settings import settings -from application.llm.base import BaseLLM +from application.llm.openai import OpenAILLM + +GROQ_BASE_URL = "https://api.groq.com/openai/v1" -class GroqLLM(BaseLLM): - def __init__(self, api_key=None, user_api_key=None, *args, **kwargs): - - super().__init__(*args, **kwargs) - self.api_key = api_key or settings.GROQ_API_KEY or settings.API_KEY - self.user_api_key = user_api_key - self.client = OpenAI( - api_key=self.api_key, base_url="https://api.groq.com/openai/v1" +class GroqLLM(OpenAILLM): + def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs): + super().__init__( + api_key=api_key or settings.GROQ_API_KEY or settings.API_KEY, + user_api_key=user_api_key, + base_url=base_url or GROQ_BASE_URL, + *args, + **kwargs, ) - - def _raw_gen(self, baseself, model, messages, stream=False, tools=None, **kwargs): - if tools: - response = self.client.chat.completions.create( - model=model, messages=messages, stream=stream, tools=tools, **kwargs - ) - return response.choices[0] - else: - response = self.client.chat.completions.create( - model=model, messages=messages, stream=stream, **kwargs - ) - return response.choices[0].message.content - - def _raw_gen_stream( - self, baseself, model, messages, stream=True, tools=None, **kwargs - ): - response = self.client.chat.completions.create( - model=model, messages=messages, stream=stream, **kwargs - ) - for line in response: - if line.choices[0].delta.content is not None: - yield line.choices[0].delta.content diff --git a/application/llm/handlers/base.py b/application/llm/handlers/base.py index b11654c5..dbc5a879 100644 --- a/application/llm/handlers/base.py +++ b/application/llm/handlers/base.py @@ -833,7 +833,10 @@ class LLMHandler(ABC): if call.name: existing.name = call.name if call.arguments: - existing.arguments += call.arguments + if existing.arguments is None: + existing.arguments = call.arguments + else: + existing.arguments += call.arguments # Preserve thought_signature for Google Gemini 3 models if call.thought_signature: existing.thought_signature = call.thought_signature diff --git a/application/llm/huggingface.py b/application/llm/huggingface.py deleted file mode 100644 index 2fb4a925..00000000 --- a/application/llm/huggingface.py +++ /dev/null @@ -1,68 +0,0 @@ -from application.llm.base import BaseLLM - - -class HuggingFaceLLM(BaseLLM): - - def __init__( - self, - api_key=None, - user_api_key=None, - llm_name="Arc53/DocsGPT-7B", - q=False, - *args, - **kwargs, - ): - global hf - - from langchain.llms import HuggingFacePipeline - - if q: - import torch - from transformers import ( - AutoModelForCausalLM, - AutoTokenizer, - pipeline, - BitsAndBytesConfig, - ) - - tokenizer = AutoTokenizer.from_pretrained(llm_name) - bnb_config = BitsAndBytesConfig( - load_in_4bit=True, - bnb_4bit_use_double_quant=True, - bnb_4bit_quant_type="nf4", - bnb_4bit_compute_dtype=torch.bfloat16, - ) - model = AutoModelForCausalLM.from_pretrained( - llm_name, quantization_config=bnb_config - ) - else: - from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline - - tokenizer = AutoTokenizer.from_pretrained(llm_name) - model = AutoModelForCausalLM.from_pretrained(llm_name) - - super().__init__(*args, **kwargs) - self.api_key = api_key - self.user_api_key = user_api_key - pipe = pipeline( - "text-generation", - model=model, - tokenizer=tokenizer, - max_new_tokens=2000, - device_map="auto", - eos_token_id=tokenizer.eos_token_id, - ) - hf = HuggingFacePipeline(pipeline=pipe) - - def _raw_gen(self, baseself, model, messages, stream=False, **kwargs): - context = messages[0]["content"] - user_question = messages[-1]["content"] - prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n" - - result = hf(prompt) - - return result.content - - def _raw_gen_stream(self, baseself, model, messages, stream=True, **kwargs): - - raise NotImplementedError("HuggingFaceLLM Streaming is not implemented yet.") diff --git a/application/llm/llm_creator.py b/application/llm/llm_creator.py index 21d653b9..ca39194c 100644 --- a/application/llm/llm_creator.py +++ b/application/llm/llm_creator.py @@ -4,7 +4,6 @@ from application.llm.anthropic import AnthropicLLM from application.llm.docsgpt_provider import DocsGPTAPILLM from application.llm.google_ai import GoogleLLM from application.llm.groq import GroqLLM -from application.llm.huggingface import HuggingFaceLLM from application.llm.llama_cpp import LlamaCpp from application.llm.novita import NovitaLLM from application.llm.openai import AzureOpenAILLM, OpenAILLM @@ -19,7 +18,6 @@ class LLMCreator: "openai": OpenAILLM, "azure_openai": AzureOpenAILLM, "sagemaker": SagemakerAPILLM, - "huggingface": HuggingFaceLLM, "llama.cpp": LlamaCpp, "anthropic": AnthropicLLM, "docsgpt": DocsGPTAPILLM, diff --git a/application/llm/novita.py b/application/llm/novita.py index 8d6ac042..b741c4f3 100644 --- a/application/llm/novita.py +++ b/application/llm/novita.py @@ -1,32 +1,15 @@ -from application.llm.base import BaseLLM -from openai import OpenAI +from application.core.settings import settings +from application.llm.openai import OpenAILLM + +NOVITA_BASE_URL = "https://api.novita.ai/v3/openai" -class NovitaLLM(BaseLLM): - def __init__(self, api_key=None, user_api_key=None, *args, **kwargs): - super().__init__(*args, **kwargs) - self.client = OpenAI(api_key=api_key, base_url="https://api.novita.ai/v3/openai") - self.api_key = api_key - self.user_api_key = user_api_key - - def _raw_gen(self, baseself, model, messages, stream=False, tools=None, **kwargs): - if tools: - response = self.client.chat.completions.create( - model=model, messages=messages, stream=stream, tools=tools, **kwargs - ) - return response.choices[0] - else: - response = self.client.chat.completions.create( - model=model, messages=messages, stream=stream, **kwargs - ) - return response.choices[0].message.content - - def _raw_gen_stream( - self, baseself, model, messages, stream=True, tools=None, **kwargs - ): - response = self.client.chat.completions.create( - model=model, messages=messages, stream=stream, **kwargs +class NovitaLLM(OpenAILLM): + def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs): + super().__init__( + api_key=api_key or settings.API_KEY, + user_api_key=user_api_key, + base_url=base_url or NOVITA_BASE_URL, + *args, + **kwargs, ) - for line in response: - if line.choices[0].delta.content is not None: - yield line.choices[0].delta.content diff --git a/application/llm/openai.py b/application/llm/openai.py index 3917cbf7..e851f078 100644 --- a/application/llm/openai.py +++ b/application/llm/openai.py @@ -127,6 +127,7 @@ class OpenAILLM(BaseLLM): **kwargs, ): messages = self._clean_messages_openai(messages) + logging.info(f"Cleaned messages: {messages}") # Convert max_tokens to max_completion_tokens for newer models if "max_tokens" in kwargs: @@ -144,7 +145,7 @@ class OpenAILLM(BaseLLM): if response_format: request_params["response_format"] = response_format response = self.client.chat.completions.create(**request_params) - + logging.info(f"OpenAI response: {response}") if tools: return response.choices[0] else: @@ -162,6 +163,7 @@ class OpenAILLM(BaseLLM): **kwargs, ): messages = self._clean_messages_openai(messages) + logging.info(f"Cleaned messages: {messages}") # Convert max_tokens to max_completion_tokens for newer models if "max_tokens" in kwargs: @@ -182,6 +184,7 @@ class OpenAILLM(BaseLLM): try: for line in response: + logging.debug(f"OpenAI stream line: {line}") if ( len(line.choices) > 0 and line.choices[0].delta.content is not None diff --git a/application/parser/file/base.py b/application/parser/file/base.py index f63e8ef6..8e9b1015 100644 --- a/application/parser/file/base.py +++ b/application/parser/file/base.py @@ -2,7 +2,7 @@ from abc import abstractmethod from typing import Any, List -from langchain.docstore.document import Document as LCDocument +from langchain_core.documents import Document as LCDocument from application.parser.schema.base import Document diff --git a/application/parser/remote/base.py b/application/parser/remote/base.py index 91313f22..74b6fce7 100644 --- a/application/parser/remote/base.py +++ b/application/parser/remote/base.py @@ -2,7 +2,7 @@ from abc import abstractmethod from typing import Any, List -from langchain.docstore.document import Document as LCDocument +from langchain_core.documents import Document as LCDocument from application.parser.schema.base import Document diff --git a/application/parser/schema/base.py b/application/parser/schema/base.py index 61670f9a..a7453dd7 100644 --- a/application/parser/schema/base.py +++ b/application/parser/schema/base.py @@ -1,7 +1,7 @@ """Base schema for readers.""" from dataclasses import dataclass -from langchain.docstore.document import Document as LCDocument +from langchain_core.documents import Document as LCDocument from application.parser.schema.schema import BaseDocument diff --git a/application/requirements.txt b/application/requirements.txt index cb58247b..ca8be5cc 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -1,91 +1,91 @@ -anthropic==0.49.0 -boto3==1.38.18 -beautifulsoup4==4.13.4 -celery==5.4.0 -cryptography==42.0.8 +anthropic==0.75.0 +boto3==1.42.6 +beautifulsoup4==4.14.3 +celery==5.6.0 +cryptography==46.0.3 dataclasses-json==0.6.7 docx2txt==0.8 -duckduckgo-search==7.5.2 +duckduckgo-search==8.1.1 ebooklib==0.18 escodegen==1.0.11 esprima==4.0.1 esutils==1.0.1 -elevenlabs==2.17.0 -Flask==3.1.1 -faiss-cpu==1.9.0.post1 -fastmcp==2.11.0 -flask-restx==1.3.0 -google-genai==1.49.0 -google-api-python-client==2.179.0 +elevenlabs==2.26.1 +Flask==3.1.2 +faiss-cpu==1.13.1 +fastmcp==2.13.3 +flask-restx==1.3.2 +google-genai==1.54.0 +google-api-python-client==2.187.0 google-auth-httplib2==0.2.0 google-auth-oauthlib==1.2.2 gTTS==2.5.4 gunicorn==23.0.0 javalang==0.13.0 jinja2==3.1.6 -jiter==0.8.2 +jiter==0.12.0 jmespath==1.0.1 joblib==1.4.2 jsonpatch==1.33 jsonpointer==3.0.0 -kombu==5.4.2 -langchain==0.3.20 -langchain-community==0.3.19 -langchain-core==0.3.59 -langchain-openai==0.3.16 -langchain-text-splitters==0.3.8 -langsmith==0.3.42 +kombu==5.6.1 +langchain==1.1.3 +langchain-community==0.4.1 +langchain-core==1.1.3 +langchain-openai==1.1.1 +langchain-text-splitters==1.0.0 +langsmith==0.4.58 lazy-object-proxy==1.10.0 -lxml==5.3.1 +lxml==6.0.2 markupsafe==3.0.2 marshmallow==3.26.1 mpmath==1.3.0 -multidict==6.4.3 +multidict==6.7.0 mypy-extensions==1.0.0 -networkx==3.4.2 +networkx==3.6.1 numpy==2.2.1 -openai==1.78.1 +openai==2.9.0 openapi3-parser==1.1.21 -orjson==3.10.14 +orjson==3.11.5 packaging==24.2 pandas==2.2.3 openpyxl==3.1.5 pathable==0.4.4 -pillow==11.1.0 +pillow==12.0.0 portalocker>=2.7.0,<3.0.0 prance==23.6.21.0 prompt-toolkit==3.0.51 protobuf==5.29.3 -psycopg2-binary==2.9.10 +psycopg2-binary==2.9.11 py==1.11.0 pydantic pydantic-core pydantic-settings -pymongo==4.11.3 -pypdf==5.5.0 +pymongo==4.15.5 +pypdf==6.4.1 python-dateutil==2.9.0.post0 python-dotenv python-jose==3.4.0 python-pptx==1.0.2 -redis==5.2.1 +redis==7.1.0 referencing>=0.28.0,<0.31.0 -regex==2024.11.6 -requests==2.32.3 +regex==2025.11.3 +requests==2.32.5 retry==0.9.2 -sentence-transformers==3.3.1 -tiktoken==0.8.0 -tokenizers==0.21.0 +sentence-transformers==5.1.2 +tiktoken==0.12.0 +tokenizers==0.22.1 torch==2.7.0 tqdm==4.67.1 -transformers==4.51.3 -typing-extensions==4.12.2 +transformers==4.57.3 +typing-extensions==4.15.0 typing-inspect==0.9.0 -tzdata==2024.2 -urllib3==2.3.0 +tzdata==2025.2 +urllib3==2.6.1 vine==5.1.0 wcwidth==0.2.13 -werkzeug>=3.1.0,<3.1.2 -yarl==1.20.0 -markdownify==1.1.0 -tldextract==5.1.3 -websockets==14.1 +werkzeug>=3.1.0 +yarl==1.22.0 +markdownify==1.2.2 +tldextract==5.3.0 +websockets==15.0.1 \ No newline at end of file diff --git a/tests/parser/remote/test_crawler_loader.py b/tests/parser/remote/test_crawler_loader.py index 0a100abb..92ffdc84 100644 --- a/tests/parser/remote/test_crawler_loader.py +++ b/tests/parser/remote/test_crawler_loader.py @@ -2,7 +2,7 @@ from unittest.mock import MagicMock, patch from application.parser.remote.crawler_loader import CrawlerLoader from application.parser.schema.base import Document -from langchain.docstore.document import Document as LCDocument +from langchain_core.documents import Document as LCDocument class DummyResponse: diff --git a/tests/parser/remote/test_web_loader.py b/tests/parser/remote/test_web_loader.py index ca539f0a..73e368a5 100644 --- a/tests/parser/remote/test_web_loader.py +++ b/tests/parser/remote/test_web_loader.py @@ -4,7 +4,7 @@ from urllib.parse import urlparse from application.parser.remote.web_loader import WebLoader, headers from application.parser.schema.base import Document -from langchain.docstore.document import Document as LCDocument +from langchain_core.documents import Document as LCDocument @pytest.fixture