refactor: enhance LLM fallback handling and streamline method execution

2025-11-29 08:33:20 +00:00 · 2025-06-06 16:55:57 +05:30
parent e9530d5ec5
commit 5f5c31cd5b
2 changed files with 110 additions and 36 deletions
--- a/application/agents/base.py
+++ b/application/agents/base.py
@@ -256,12 +256,21 @@ class BaseAgent(ABC):
        return retrieved_data
    def _llm_gen(self, messages: List[Dict], log_context: Optional[LogContext] = None):
-        resp = self.llm.gen_stream(
+        gen_kwargs = {"model": self.gpt_model, "messages": messages}
-            model=self.gpt_model, messages=messages, tools=self.tools
+
-        )
+        if (
            hasattr(self.llm, "_supports_tools")
            and self.llm._supports_tools
            and self.tools
        ):
            gen_kwargs["tools"] = self.tools
        resp = self.llm.gen_stream(**gen_kwargs)
        if log_context:
            data = build_stack_data(self.llm, exclude_attributes=["client"])
            log_context.stacks.append({"component": "llm", "data": data})
        return resp
    def _llm_handler(
--- a/application/llm/base.py
+++ b/application/llm/base.py
@@ -1,53 +1,118 @@
 import logging
 from abc import ABC, abstractmethod
 from application.cache import gen_cache, stream_cache
 from application.core.settings import settings
 from application.usage import gen_token_usage, stream_token_usage
 logger = logging.getLogger(__name__)
 class BaseLLM(ABC):
-    def __init__(self, decoded_token=None):
+    def __init__(
        self,
        decoded_token=None,
    ):
        self.decoded_token = decoded_token
        self.token_usage = {"prompt_tokens": 0, "generated_tokens": 0}
        self.fallback_provider = settings.FALLBACK_LLM_PROVIDER
        self.fallback_model_name = settings.FALLBACK_LLM_NAME
        self.fallback_llm_api_key = settings.FALLBACK_LLM_API_KEY
        self._fallback_llm = None
-    def _apply_decorator(self, method, decorators, *args, **kwargs):
+    @property
    def fallback_llm(self):
        """Lazy-loaded fallback LLM instance."""
        if (
            self._fallback_llm is None
            and self.fallback_provider
            and self.fallback_model_name
        ):
            try:
                from llm.llm_creator import LLMCreator
                self._fallback_llm = LLMCreator(
                    self.fallback_provider,
                    self.fallback_llm_api_key,
                    None,
                    self.decoded_token,
                )
            except Exception as e:
                logger.error(
                    f"Failed to initialize fallback LLM: {str(e)}", exc_info=True
                )
        return self._fallback_llm
    def _execute_with_fallback(
        self, method_name: str, decorators: list, *args, **kwargs
    ):
        """
        Unified method execution with fallback support.
        Args:
            method_name: Name of the raw method ('_raw_gen' or '_raw_gen_stream')
            decorators: List of decorators to apply
            *args: Positional arguments
            **kwargs: Keyword arguments
        """
        def decorated_method():
            method = getattr(self, method_name)
            for decorator in decorators:
                method = decorator(method)
            return method(self, *args, **kwargs)
        try:
            return decorated_method()
        except Exception as e:
            if not self.fallback_llm:
                logger.error(f"Primary LLM failed and no fallback available: {str(e)}")
                raise
            logger.warning(
                f"Falling back to {self.fallback_provider}/{self.fallback_model_name}. Error: {str(e)}"
            )
            # Retry with fallback (without decorators for accurate token tracking)
            fallback_method = getattr(
                self.fallback_llm, method_name.replace("_raw_", "")
            )
            return fallback_method(*args, **kwargs)
    def gen(self, model, messages, stream=False, tools=None, *args, **kwargs):
        decorators = [gen_token_usage, gen_cache]
        return self._execute_with_fallback(
            "_raw_gen",
            decorators,
            model=model,
            messages=messages,
            stream=stream,
            tools=tools,
            *args,
            **kwargs,
        )
    def gen_stream(self, model, messages, stream=True, tools=None, *args, **kwargs):
        decorators = [stream_cache, stream_token_usage]
        return self._execute_with_fallback(
            "_raw_gen_stream",
            decorators,
            model=model,
            messages=messages,
            stream=stream,
            tools=tools,
            *args,
            **kwargs,
        )
    @abstractmethod
    def _raw_gen(self, model, messages, stream, tools, *args, **kwargs):
        pass
    def gen(self, model, messages, stream=False, tools=None, *args, **kwargs):
        decorators = [gen_token_usage, gen_cache]
        return self._apply_decorator(
            self._raw_gen,
            decorators=decorators,
            model=model,
            messages=messages,
            stream=stream,
            tools=tools,
            *args,
            **kwargs
        )
    @abstractmethod
    def _raw_gen_stream(self, model, messages, stream, *args, **kwargs):
        pass
    def gen_stream(self, model, messages, stream=True, tools=None, *args, **kwargs):
        decorators = [stream_cache, stream_token_usage]
        return self._apply_decorator(
            self._raw_gen_stream,
            decorators=decorators,
            model=model,
            messages=messages,
            stream=stream,
            tools=tools,
            *args,
            **kwargs
        )
    def supports_tools(self):
        return hasattr(self, "_supports_tools") and callable(
            getattr(self, "_supports_tools")