feat: model registry and capabilities for multi-provider support (#2158)

* feat: Implement model registry and capabilities for multi-provider support - Added ModelRegistry to manage available models and their capabilities. - Introduced ModelProvider enum for different LLM providers. - Created ModelCapabilities dataclass to define model features. - Implemented methods to load models based on API keys and settings. - Added utility functions for model management in model_utils.py. - Updated settings.py to include provider-specific API keys. - Refactored LLM classes (Anthropic, OpenAI, Google, etc.) to utilize new model registry. - Enhanced utility functions to handle token limits and model validation. - Improved code structure and logging for better maintainability. * feat: Add model selection feature with API integration and UI component * feat: Add model selection and default model functionality in agent management * test: Update assertions and formatting in stream processing tests * refactor(llm): Standardize model identifier to model_id * fix tests --------- Co-authored-by: Alex <a@tushynski.me>
2026-03-03 20:33:45 +00:00 · 2025-11-14 16:43:19 +05:30
parent fbf7cf874b
commit 3f7de867cc
54 changed files with 1388 additions and 226 deletions
--- a/application/utils.py
+++ b/application/utils.py
@@ -7,6 +7,8 @@ import tiktoken
 from flask import jsonify, make_response
 from werkzeug.utils import secure_filename

+from application.core.model_utils import get_token_limit
+
 from application.core.settings import settings


@@ -75,11 +77,9 @@ def count_tokens_docs(docs):


 def calculate_doc_token_budget(
-    gpt_model: str = "gpt-4o", history_token_limit: int = 2000
+    model_id: str = "gpt-4o", history_token_limit: int = 2000
 ) -> int:
-    total_context = settings.LLM_TOKEN_LIMITS.get(
-        gpt_model, settings.DEFAULT_LLM_TOKEN_LIMIT
-    )
+    total_context = get_token_limit(model_id)
    reserved = sum(settings.RESERVED_TOKENS.values())
    doc_budget = total_context - history_token_limit - reserved
    return max(doc_budget, 1000)
@@ -144,16 +144,13 @@ def get_hash(data):
    return hashlib.md5(data.encode(), usedforsecurity=False).hexdigest()


-def limit_chat_history(history, max_token_limit=None, gpt_model="docsgpt"):
+def limit_chat_history(history, max_token_limit=None, model_id="docsgpt-local"):
    """Limit chat history to fit within token limit."""
-    from application.core.settings import settings
-
+    model_token_limit = get_token_limit(model_id)
    max_token_limit = (
        max_token_limit
-        if max_token_limit
-        and max_token_limit
-        < settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_LLM_TOKEN_LIMIT)
-        else settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_LLM_TOKEN_LIMIT)
+        if max_token_limit and max_token_limit < model_token_limit
+        else model_token_limit
    )

    if not history:
@@ -205,37 +202,44 @@ def clean_text_for_tts(text: str) -> str:
    clean text for Text-to-Speech processing.
    """
    # Handle code blocks and links
-    text = re.sub(r'```mermaid[\s\S]*?```', ' flowchart, ', text)  ## ```mermaid...```
-    text = re.sub(r'```[\s\S]*?```', ' code block, ', text)  ## ```code```
-    text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)  ## [text](url)
-    text = re.sub(r'!\[([^\]]*)\]\([^\)]+\)', '', text)  ## ![alt](url)
+
+    text = re.sub(r"```mermaid[\s\S]*?```", " flowchart, ", text)  ## ```mermaid...```
+    text = re.sub(r"```[\s\S]*?```", " code block, ", text)  ## ```code```
+    text = re.sub(r"\[([^\]]+)\]\([^\)]+\)", r"\1", text)  ## [text](url)
+    text = re.sub(r"!\[([^\]]*)\]\([^\)]+\)", "", text)  ## ![alt](url)

    # Remove markdown formatting
-    text = re.sub(r'`([^`]+)`', r'\1', text)  ## `code`
-    text = re.sub(r'\{([^}]*)\}', r' \1 ', text)  ## {text}
-    text = re.sub(r'[{}]', ' ', text)  ## unmatched {}
-    text = re.sub(r'\[([^\]]+)\]', r' \1 ', text)  ## [text]
-    text = re.sub(r'[\[\]]', ' ', text)  ## unmatched []
-    text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text)  ## **bold** __bold__
-    text = re.sub(r'(\*|_)(.*?)\1', r'\2', text)  ## *italic* _italic_
-    text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)  ## # headers
-    text = re.sub(r'^>\s+', '', text, flags=re.MULTILINE)  ## > blockquotes
-    text = re.sub(r'^[\s]*[-\*\+]\s+', '', text, flags=re.MULTILINE)  ## - * + lists
-    text = re.sub(r'^[\s]*\d+\.\s+', '', text, flags=re.MULTILINE)  ## 1. numbered lists
-    text = re.sub(r'^[\*\-_]{3,}\s*$', '', text, flags=re.MULTILINE)  ## --- *** ___ rules
-    text = re.sub(r'<[^>]*>', '', text)  ## <html> tags

-    #Remove non-ASCII (emojis, special Unicode)
-    text = re.sub(r'[^\x20-\x7E\n\r\t]', '', text)
+    text = re.sub(r"`([^`]+)`", r"\1", text)  ## `code`
+    text = re.sub(r"\{([^}]*)\}", r" \1 ", text)  ## {text}
+    text = re.sub(r"[{}]", " ", text)  ## unmatched {}
+    text = re.sub(r"\[([^\]]+)\]", r" \1 ", text)  ## [text]
+    text = re.sub(r"[\[\]]", " ", text)  ## unmatched []
+    text = re.sub(r"(\*\*|__)(.*?)\1", r"\2", text)  ## **bold** __bold__
+    text = re.sub(r"(\*|_)(.*?)\1", r"\2", text)  ## *italic* _italic_
+    text = re.sub(r"^#{1,6}\s+", "", text, flags=re.MULTILINE)  ## # headers
+    text = re.sub(r"^>\s+", "", text, flags=re.MULTILINE)  ## > blockquotes
+    text = re.sub(r"^[\s]*[-\*\+]\s+", "", text, flags=re.MULTILINE)  ## - * + lists
+    text = re.sub(r"^[\s]*\d+\.\s+", "", text, flags=re.MULTILINE)  ## 1. numbered lists
+    text = re.sub(
+        r"^[\*\-_]{3,}\s*$", "", text, flags=re.MULTILINE
+    )  ## --- *** ___ rules
+    text = re.sub(r"<[^>]*>", "", text)  ## <html> tags

-    #Replace special sequences
-    text = re.sub(r'-->', ', ', text)  ## -->
-    text = re.sub(r'<--', ', ', text)  ## <--
-    text = re.sub(r'=>', ', ', text)  ## =>
-    text = re.sub(r'::', ' ', text)  ## ::
+    # Remove non-ASCII (emojis, special Unicode)

-    #Normalize whitespace
-    text = re.sub(r'\s+', ' ', text)
+    text = re.sub(r"[^\x20-\x7E\n\r\t]", "", text)
+
+    # Replace special sequences
+
+    text = re.sub(r"-->", ", ", text)  ## -->
+    text = re.sub(r"<--", ", ", text)  ## <--
+    text = re.sub(r"=>", ", ", text)  ## =>
+    text = re.sub(r"::", " ", text)  ## ::
+
+    # Normalize whitespace
+
+    text = re.sub(r"\s+", " ", text)
    text = text.strip()

    return text