Merge branch 'main' of https://github.com/arc53/DocsGPT

2025-12-03 02:23:14 +00:00 · 2025-06-12 01:43:21 +00:00
parent b8a10e0962 0aceda96e4
commit 0a3e8ca535
33 changed files with 913 additions and 649 deletions
--- a/application/agents/base.py
+++ b/application/agents/base.py
@@ -2,16 +2,18 @@ import uuid
 from abc import ABC, abstractmethod
 from typing import Dict, Generator, List, Optional

-from application.agents.llm_handler import get_llm_handler
+from bson.objectid import ObjectId
+
 from application.agents.tools.tool_action_parser import ToolActionParser
 from application.agents.tools.tool_manager import ToolManager

 from application.core.mongo_db import MongoDB
+from application.core.settings import settings
+
+from application.llm.handlers.handler_creator import LLMHandlerCreator
 from application.llm.llm_creator import LLMCreator
 from application.logging import build_stack_data, log_activity, LogContext
 from application.retriever.base import BaseRetriever
-from application.core.settings import settings
-from bson.objectid import ObjectId


 class BaseAgent(ABC):
@@ -45,7 +47,9 @@ class BaseAgent(ABC):
            user_api_key=user_api_key,
            decoded_token=decoded_token,
        )
-        self.llm_handler = get_llm_handler(llm_name)
+        self.llm_handler = LLMHandlerCreator.create_handler(
+            llm_name if llm_name else "default"
+        )
        self.attachments = attachments or []

    @log_activity()
@@ -132,6 +136,15 @@ class BaseAgent(ABC):
        parser = ToolActionParser(self.llm.__class__.__name__)
        tool_id, action_name, call_args = parser.parse_args(call)

+        call_id = getattr(call, "id", None) or str(uuid.uuid4())
+        tool_call_data = {
+            "tool_name": tools_dict[tool_id]["name"],
+            "call_id": call_id,
+            "action_name": f"{action_name}_{tool_id}",
+            "arguments": call_args,
+        }
+        yield {"type": "tool_call", "data": {**tool_call_data, "status": "pending"}}
+
        tool_data = tools_dict[tool_id]
        action_data = (
            tool_data["config"]["actions"][action_name]
@@ -184,19 +197,29 @@ class BaseAgent(ABC):
        else:
            print(f"Executing tool: {action_name} with args: {call_args}")
            result = tool.execute_action(action_name, **parameters)
-        call_id = getattr(call, "id", None)
+        tool_call_data["result"] = (
+            f"{str(result)[:50]}..." if len(str(result)) > 50 else result
+        )

-        tool_call_data = {
-            "tool_name": tool_data["name"],
-            "call_id": call_id if call_id is not None else "None",
-            "action_name": f"{action_name}_{tool_id}",
-            "arguments": call_args,
-            "result": result,
-        }
+        yield {"type": "tool_call", "data": {**tool_call_data, "status": "completed"}}
        self.tool_calls.append(tool_call_data)

        return result, call_id

+    def _get_truncated_tool_calls(self):
+        return [
+            {
+                **tool_call,
+                "result": (
+                    f"{str(tool_call['result'])[:50]}..."
+                    if len(str(tool_call["result"])) > 50
+                    else tool_call["result"]
+                ),
+                "status": "completed",
+            }
+            for tool_call in self.tool_calls
+        ]
+
    def _build_messages(
        self,
        system_prompt: str,
@@ -252,9 +275,16 @@ class BaseAgent(ABC):
        return retrieved_data

    def _llm_gen(self, messages: List[Dict], log_context: Optional[LogContext] = None):
-        resp = self.llm.gen_stream(
-            model=self.gpt_model, messages=messages, tools=self.tools
-        )
+        gen_kwargs = {"model": self.gpt_model, "messages": messages}
+
+        if (
+            hasattr(self.llm, "_supports_tools")
+            and self.llm._supports_tools
+            and self.tools
+        ):
+            gen_kwargs["tools"] = self.tools
+        resp = self.llm.gen_stream(**gen_kwargs)
+
        if log_context:
            data = build_stack_data(self.llm, exclude_attributes=["client"])
            log_context.stacks.append({"component": "llm", "data": data})
@@ -268,10 +298,30 @@ class BaseAgent(ABC):
        log_context: Optional[LogContext] = None,
        attachments: Optional[List[Dict]] = None,
    ):
-        resp = self.llm_handler.handle_response(
-            self, resp, tools_dict, messages, attachments
+        resp = self.llm_handler.process_message_flow(
+            self, resp, tools_dict, messages, attachments, True
        )
        if log_context:
            data = build_stack_data(self.llm_handler, exclude_attributes=["tool_calls"])
            log_context.stacks.append({"component": "llm_handler", "data": data})
        return resp
+
+    def _handle_response(self, response, tools_dict, messages, log_context):
+        if isinstance(response, str):
+            yield {"answer": response}
+            return
+        if hasattr(response, "message") and getattr(response.message, "content", None):
+            yield {"answer": response.message.content}
+            return
+
+        processed_response_gen = self._llm_handler(
+            response, tools_dict, messages, log_context, self.attachments
+        )
+
+        for event in processed_response_gen:
+            if isinstance(event, str):
+                yield {"answer": event}
+            elif hasattr(event, "message") and getattr(event.message, "content", None):
+                yield {"answer": event.message.content}
+            elif isinstance(event, dict) and "type" in event:
+                yield event
--- a/application/agents/classic_agent.py
+++ b/application/agents/classic_agent.py
@@ -1,8 +1,6 @@
 from typing import Dict, Generator
-
 from application.agents.base import BaseAgent
 from application.logging import LogContext
-
 from application.retriever.base import BaseRetriever
 import logging

@@ -10,55 +8,46 @@ logger = logging.getLogger(__name__)


 class ClassicAgent(BaseAgent):
+    """A simplified classic agent with clear execution flow.
+
+    Usage:
+    1. Processes a query through retrieval
+    2. Sets up available tools
+    3. Generates responses using LLM
+    4. Handles tool interactions if needed
+    5. Returns standardized outputs
+
+    Easy to extend by overriding specific steps.
+    """
+
    def _gen_inner(
        self, query: str, retriever: BaseRetriever, log_context: LogContext
    ) -> Generator[Dict, None, None]:
+        # Step 1: Retrieve relevant data
        retrieved_data = self._retriever_search(retriever, query, log_context)
-        if self.user_api_key:
-            tools_dict = self._get_tools(self.user_api_key)
-        else:
-            tools_dict = self._get_user_tools(self.user)
+
+        # Step 2: Prepare tools
+        tools_dict = (
+            self._get_user_tools(self.user)
+            if not self.user_api_key
+            else self._get_tools(self.user_api_key)
+        )
        self._prepare_tools(tools_dict)

+        # Step 3: Build and process messages
        messages = self._build_messages(self.prompt, query, retrieved_data)
+        llm_response = self._llm_gen(messages, log_context)

-        resp = self._llm_gen(messages, log_context)
+        # Step 4: Handle the response
+        yield from self._handle_response(
+            llm_response, tools_dict, messages, log_context
+        )

-        attachments = self.attachments
-
-        if isinstance(resp, str):
-            yield {"answer": resp}
-            return
-        if (
-            hasattr(resp, "message")
-            and hasattr(resp.message, "content")
-            and resp.message.content is not None
-        ):
-            yield {"answer": resp.message.content}
-            return
-
-        resp = self._llm_handler(resp, tools_dict, messages, log_context, attachments)
-
-        if isinstance(resp, str):
-            yield {"answer": resp}
-        elif (
-            hasattr(resp, "message")
-            and hasattr(resp.message, "content")
-            and resp.message.content is not None
-        ):
-            yield {"answer": resp.message.content}
-        else:
-            for line in resp:
-                if isinstance(line, str):
-                    yield {"answer": line}
+        # Step 5: Return metadata
+        yield {"sources": retrieved_data}
+        yield {"tool_calls": self._get_truncated_tool_calls()}

+        # Log tool calls for debugging
        log_context.stacks.append(
            {"component": "agent", "data": {"tool_calls": self.tool_calls.copy()}}
        )
-
-        yield {"sources": retrieved_data}
-        # clean tool_call_data only send first 50 characters of tool_call['result']
-        for tool_call in self.tool_calls:
-            if len(str(tool_call["result"])) > 50:
-                tool_call["result"] = str(tool_call["result"])[:50] + "..."
-        yield {"tool_calls": self.tool_calls.copy()}
--- a/application/agents/llm_handler.py
+++ b/application/agents/llm_handler.py
@@ -1,351 +0,0 @@
-import json
-import logging
-from abc import ABC, abstractmethod
-
-from application.logging import build_stack_data
-
-logger = logging.getLogger(__name__)
-
-
-class LLMHandler(ABC):
-    def __init__(self):
-        self.llm_calls = []
-        self.tool_calls = []
-
-    @abstractmethod
-    def handle_response(self, agent, resp, tools_dict, messages, attachments=None, **kwargs):
-        pass
-
-    def prepare_messages_with_attachments(self, agent, messages, attachments=None):
-        """
-        Prepare messages with attachment content if available.
-
-        Args:
-            agent: The current agent instance.
-            messages (list): List of message dictionaries.
-            attachments (list): List of attachment dictionaries with content.
-
-        Returns:
-            list: Messages with attachment context added to the system prompt.
-        """
-        if not attachments:
-            return messages
-
-        logger.info(f"Preparing messages with {len(attachments)} attachments")
-
-        supported_types = agent.llm.get_supported_attachment_types()
-
-        supported_attachments = []
-        unsupported_attachments = []
-
-        for attachment in attachments:
-            mime_type = attachment.get('mime_type')
-            if mime_type in supported_types:
-                supported_attachments.append(attachment)
-            else:
-                unsupported_attachments.append(attachment)
-
-        # Process supported attachments with the LLM's custom method
-        prepared_messages = messages
-        if supported_attachments:
-            logger.info(f"Processing {len(supported_attachments)} supported attachments with {agent.llm.__class__.__name__}'s method")
-            prepared_messages = agent.llm.prepare_messages_with_attachments(messages, supported_attachments)
-
-        # Process unsupported attachments with the default method
-        if unsupported_attachments:
-            logger.info(f"Processing {len(unsupported_attachments)} unsupported attachments with default method")
-            prepared_messages = self._append_attachment_content_to_system(prepared_messages, unsupported_attachments)
-
-        return prepared_messages
-
-    def _append_attachment_content_to_system(self, messages, attachments):
-        """
-        Default method to append attachment content to the system prompt.
-
-        Args:
-            messages (list): List of message dictionaries.
-            attachments (list): List of attachment dictionaries with content.
-
-        Returns:
-            list: Messages with attachment context added to the system prompt.
-        """
-        prepared_messages = messages.copy()
-
-        attachment_texts = []
-        for attachment in attachments:
-            logger.info(f"Adding attachment {attachment.get('id')} to context")
-            if 'content' in attachment:
-                attachment_texts.append(f"Attached file content:\n\n{attachment['content']}")
-
-        if attachment_texts:
-            combined_attachment_text = "\n\n".join(attachment_texts)
-
-            system_found = False
-            for i in range(len(prepared_messages)):
-                if prepared_messages[i].get("role") == "system":
-                    prepared_messages[i]["content"] += f"\n\n{combined_attachment_text}"
-                    system_found = True
-                    break
-
-            if not system_found:
-                prepared_messages.insert(0, {"role": "system", "content": combined_attachment_text})
-
-        return prepared_messages
-
-class OpenAILLMHandler(LLMHandler):
-    def handle_response(self, agent, resp, tools_dict, messages, attachments=None, stream: bool = True):
-
-        messages = self.prepare_messages_with_attachments(agent, messages, attachments)
-        logger.info(f"Messages with attachments: {messages}")
-        if not stream:
-            while hasattr(resp, "finish_reason") and resp.finish_reason == "tool_calls":
-                message = json.loads(resp.model_dump_json())["message"]
-                keys_to_remove = {"audio", "function_call", "refusal"}
-                filtered_data = {
-                    k: v for k, v in message.items() if k not in keys_to_remove
-                }
-                messages.append(filtered_data)
-
-                tool_calls = resp.message.tool_calls
-                for call in tool_calls:
-                    try:
-                        self.tool_calls.append(call)
-                        tool_response, call_id = agent._execute_tool_action(
-                            tools_dict, call
-                        )
-                        function_call_dict = {
-                            "function_call": {
-                                "name": call.function.name,
-                                "args": call.function.arguments,
-                                "call_id": call_id,
-                            }
-                        }
-                        function_response_dict = {
-                            "function_response": {
-                                "name": call.function.name,
-                                "response": {"result": tool_response},
-                                "call_id": call_id,
-                            }
-                        }
-
-                        messages.append(
-                            {"role": "assistant", "content": [function_call_dict]}
-                        )
-                        messages.append(
-                            {"role": "tool", "content": [function_response_dict]}
-                        )
-
-                        messages = self.prepare_messages_with_attachments(agent, messages, attachments)
-                    except Exception as e:
-                        logging.error(f"Error executing tool: {str(e)}", exc_info=True)
-                        messages.append(
-                            {
-                                "role": "tool",
-                                "content": f"Error executing tool: {str(e)}",
-                                "tool_call_id": call_id,
-                            }
-                        )
-                resp = agent.llm.gen_stream(
-                    model=agent.gpt_model, messages=messages, tools=agent.tools
-                )
-                self.llm_calls.append(build_stack_data(agent.llm))
-            return resp
-
-        else:
-            text_buffer = ""
-            while True:
-                tool_calls = {}
-                for chunk in resp:
-                    if isinstance(chunk, str) and len(chunk) > 0:
-                        yield chunk
-                        continue
-                    elif hasattr(chunk, "delta"):
-                        chunk_delta = chunk.delta
-
-                        if (
-                            hasattr(chunk_delta, "tool_calls")
-                            and chunk_delta.tool_calls is not None
-                        ):
-                            for tool_call in chunk_delta.tool_calls:
-                                index = tool_call.index
-                                if index not in tool_calls:
-                                    tool_calls[index] = {
-                                        "id": "",
-                                        "function": {"name": "", "arguments": ""},
-                                    }
-
-                                current = tool_calls[index]
-                                if tool_call.id:
-                                    current["id"] = tool_call.id
-                                if tool_call.function.name:
-                                    current["function"][
-                                        "name"
-                                    ] = tool_call.function.name
-                                if tool_call.function.arguments:
-                                    current["function"][
-                                        "arguments"
-                                    ] += tool_call.function.arguments
-                                tool_calls[index] = current
-
-                        if (
-                            hasattr(chunk, "finish_reason")
-                            and chunk.finish_reason == "tool_calls"
-                        ):
-                            for index in sorted(tool_calls.keys()):
-                                call = tool_calls[index]
-                                try:
-                                    self.tool_calls.append(call)
-                                    tool_response, call_id = agent._execute_tool_action(
-                                        tools_dict, call
-                                    )
-                                    if isinstance(call["function"]["arguments"], str):
-                                        call["function"]["arguments"] = json.loads(call["function"]["arguments"])
-
-                                    function_call_dict = {
-                                        "function_call": {
-                                            "name": call["function"]["name"],
-                                            "args": call["function"]["arguments"],
-                                            "call_id": call["id"],
-                                        }
-                                    }
-                                    function_response_dict = {
-                                        "function_response": {
-                                            "name": call["function"]["name"],
-                                            "response": {"result": tool_response},
-                                            "call_id": call["id"],
-                                        }
-                                    }
-
-                                    messages.append(
-                                        {
-                                            "role": "assistant",
-                                            "content": [function_call_dict],
-                                        }
-                                    )
-                                    messages.append(
-                                        {
-                                            "role": "tool",
-                                            "content": [function_response_dict],
-                                        }
-                                    )
-
-                                except Exception as e:
-                                    logging.error(f"Error executing tool: {str(e)}", exc_info=True)
-                                    messages.append(
-                                        {
-                                            "role": "assistant",
-                                            "content": f"Error executing tool: {str(e)}",
-                                        }
-                                    )
-                            tool_calls = {}
-                        if hasattr(chunk_delta, "content") and chunk_delta.content:
-                            # Add to buffer or yield immediately based on your preference
-                            text_buffer += chunk_delta.content
-                            yield text_buffer
-                            text_buffer = ""
-
-                        if (
-                            hasattr(chunk, "finish_reason")
-                            and chunk.finish_reason == "stop"
-                        ):
-                            return resp
-                    elif isinstance(chunk, str) and len(chunk) == 0:
-                            continue
-
-                logger.info(f"Regenerating with messages: {messages}")
-                resp = agent.llm.gen_stream(
-                    model=agent.gpt_model, messages=messages, tools=agent.tools
-                )
-                self.llm_calls.append(build_stack_data(agent.llm))
-
-
-class GoogleLLMHandler(LLMHandler):
-    def handle_response(self, agent, resp, tools_dict, messages, attachments=None, stream: bool = True):
-        from google.genai import types
-
-        messages = self.prepare_messages_with_attachments(agent, messages, attachments)
-
-        while True:
-            if not stream:
-                response = agent.llm.gen(
-                    model=agent.gpt_model, messages=messages, tools=agent.tools
-                )
-                self.llm_calls.append(build_stack_data(agent.llm))
-                if response.candidates and response.candidates[0].content.parts:
-                    tool_call_found = False
-                    for part in response.candidates[0].content.parts:
-                        if part.function_call:
-                            tool_call_found = True
-                            self.tool_calls.append(part.function_call)
-                            tool_response, call_id = agent._execute_tool_action(
-                                tools_dict, part.function_call
-                            )
-                            function_response_part = types.Part.from_function_response(
-                                name=part.function_call.name,
-                                response={"result": tool_response},
-                            )
-
-                            messages.append(
-                                {"role": "model", "content": [part.to_json_dict()]}
-                            )
-                            messages.append(
-                                {
-                                    "role": "tool",
-                                    "content": [function_response_part.to_json_dict()],
-                                }
-                            )
-
-                    if (
-                        not tool_call_found
-                        and response.candidates[0].content.parts
-                        and response.candidates[0].content.parts[0].text
-                    ):
-                        return response.candidates[0].content.parts[0].text
-                    elif not tool_call_found:
-                        return response.candidates[0].content.parts
-
-                else:
-                    return response
-
-            else:
-                response = agent.llm.gen_stream(
-                    model=agent.gpt_model, messages=messages, tools=agent.tools
-                )
-                self.llm_calls.append(build_stack_data(agent.llm))
-
-                tool_call_found = False
-                for result in response:
-                    if hasattr(result, "function_call"):
-                        tool_call_found = True
-                        self.tool_calls.append(result.function_call)
-                        tool_response, call_id = agent._execute_tool_action(
-                            tools_dict, result.function_call
-                        )
-                        function_response_part = types.Part.from_function_response(
-                            name=result.function_call.name,
-                            response={"result": tool_response},
-                        )
-
-                        messages.append(
-                            {"role": "model", "content": [result.to_json_dict()]}
-                        )
-                        messages.append(
-                            {
-                                "role": "tool",
-                                "content": [function_response_part.to_json_dict()],
-                            }
-                        )
-                    else:
-                        tool_call_found = False
-                        yield result
-
-                if not tool_call_found:
-                    return response
-
-
-def get_llm_handler(llm_type):
-    handlers = {
-        "openai": OpenAILLMHandler(),
-        "google": GoogleLLMHandler(),
-    }
-    return handlers.get(llm_type, OpenAILLMHandler())
--- a/application/agents/tools/tool_action_parser.py
+++ b/application/agents/tools/tool_action_parser.py
@@ -17,26 +17,21 @@ class ToolActionParser:
        return parser(call)

    def _parse_openai_llm(self, call):
-        if isinstance(call, dict):
-            try:
-                call_args = json.loads(call["function"]["arguments"])
-                tool_id = call["function"]["name"].split("_")[-1]
-                action_name = call["function"]["name"].rsplit("_", 1)[0]
-            except (KeyError, TypeError) as e:
-                logger.error(f"Error parsing OpenAI LLM call: {e}")
-                return None, None, None
-        else:
-            try:
-                call_args = json.loads(call.function.arguments)
-                tool_id = call.function.name.split("_")[-1]
-                action_name = call.function.name.rsplit("_", 1)[0]
-            except (AttributeError, TypeError) as e:
-                logger.error(f"Error parsing OpenAI LLM call: {e}")
-                return None, None, None
+        try:
+            call_args = json.loads(call.arguments)
+            tool_id = call.name.split("_")[-1]
+            action_name = call.name.rsplit("_", 1)[0]
+        except (AttributeError, TypeError) as e:
+            logger.error(f"Error parsing OpenAI LLM call: {e}")
+            return None, None, None
        return tool_id, action_name, call_args

    def _parse_google_llm(self, call):
-        call_args = call.args
-        tool_id = call.name.split("_")[-1]
-        action_name = call.name.rsplit("_", 1)[0]
+        try:
+            call_args = call.arguments
+            tool_id = call.name.split("_")[-1]
+            action_name = call.name.rsplit("_", 1)[0]
+        except (AttributeError, TypeError) as e:
+            logger.error(f"Error parsing Google LLM call: {e}")
+            return None, None, None
        return tool_id, action_name, call_args
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -37,17 +37,17 @@ api.add_namespace(answer_ns)

 gpt_model = ""
 # to have some kind of default behaviour
-if settings.LLM_NAME == "openai":
+if settings.LLM_PROVIDER == "openai":
    gpt_model = "gpt-4o-mini"
-elif settings.LLM_NAME == "anthropic":
+elif settings.LLM_PROVIDER == "anthropic":
    gpt_model = "claude-2"
-elif settings.LLM_NAME == "groq":
+elif settings.LLM_PROVIDER == "groq":
    gpt_model = "llama3-8b-8192"
-elif settings.LLM_NAME == "novita":
+elif settings.LLM_PROVIDER == "novita":
    gpt_model = "deepseek/deepseek-r1"

-if settings.MODEL_NAME:  # in case there is particular model name configured
-    gpt_model = settings.MODEL_NAME
+if settings.LLM_NAME:  # in case there is particular model name configured
+    gpt_model = settings.LLM_NAME

 # load the prompts
 current_dir = os.path.dirname(
@@ -307,19 +307,20 @@ def complete_stream(
                    yield f"data: {data}\n\n"
            elif "tool_calls" in line:
                tool_calls = line["tool_calls"]
-                data = json.dumps({"type": "tool_calls", "tool_calls": tool_calls})
-                yield f"data: {data}\n\n"
            elif "thought" in line:
                thought += line["thought"]
                data = json.dumps({"type": "thought", "thought": line["thought"]})
                yield f"data: {data}\n\n"
+            elif "type" in line:
+                data = json.dumps(line)
+                yield f"data: {data}\n\n"

        if isNoneDoc:
            for doc in source_log_docs:
                doc["source"] = "None"

        llm = LLMCreator.create_llm(
-            settings.LLM_NAME,
+            settings.LLM_PROVIDER,
            api_key=settings.API_KEY,
            user_api_key=user_api_key,
            decoded_token=decoded_token,
@@ -451,9 +452,7 @@ class Stream(Resource):
            agent_type = settings.AGENT_NAME
            decoded_token = getattr(request, "decoded_token", None)
            user_sub = decoded_token.get("sub") if decoded_token else None
-            agent_key, is_shared_usage, shared_token = get_agent_key(
-                agent_id, user_sub
-            )
+            agent_key, is_shared_usage, shared_token = get_agent_key(agent_id, user_sub)

            if agent_key:
                data.update({"api_key": agent_key})
@@ -504,7 +503,7 @@ class Stream(Resource):
            agent = AgentCreator.create_agent(
                agent_type,
                endpoint="stream",
-                llm_name=settings.LLM_NAME,
+                llm_name=settings.LLM_PROVIDER,
                gpt_model=gpt_model,
                api_key=settings.API_KEY,
                user_api_key=user_api_key,
@@ -658,7 +657,7 @@ class Answer(Resource):
            agent = AgentCreator.create_agent(
                agent_type,
                endpoint="api/answer",
-                llm_name=settings.LLM_NAME,
+                llm_name=settings.LLM_PROVIDER,
                gpt_model=gpt_model,
                api_key=settings.API_KEY,
                user_api_key=user_api_key,
@@ -727,7 +726,7 @@ class Answer(Resource):
                    doc["source"] = "None"

            llm = LLMCreator.create_llm(
-                settings.LLM_NAME,
+                settings.LLM_PROVIDER,
                api_key=settings.API_KEY,
                user_api_key=user_api_key,
                decoded_token=decoded_token,
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -11,18 +11,18 @@ current_dir = os.path.dirname(

 class Settings(BaseSettings):
    AUTH_TYPE: Optional[str] = None
-    LLM_NAME: str = "docsgpt"
-    MODEL_NAME: Optional[str] = (
-        None  # if LLM_NAME is openai, MODEL_NAME can be gpt-4 or gpt-3.5-turbo
+    LLM_PROVIDER: str = "docsgpt"
+    LLM_NAME: Optional[str] = (
+        None  # if LLM_PROVIDER is openai, LLM_NAME can be gpt-4 or gpt-3.5-turbo
    )
    EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
    CELERY_BROKER_URL: str = "redis://localhost:6379/0"
    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
    MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
    MONGO_DB_NAME: str = "docsgpt"
-    MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
+    LLM_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
    DEFAULT_MAX_HISTORY: int = 150
-    MODEL_TOKEN_LIMITS: dict = {
+    LLM_TOKEN_LIMITS: dict = {
        "gpt-4o-mini": 128000,
        "gpt-3.5-turbo": 4096,
        "claude-2": 1e5,
@@ -35,6 +35,9 @@ class Settings(BaseSettings):
    )
    RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"]  # also brave_search
    AGENT_NAME: str = "classic"
+    FALLBACK_LLM_PROVIDER: Optional[str] = None  # provider for fallback llm
+    FALLBACK_LLM_NAME: Optional[str] = None  # model name for fallback llm
+    FALLBACK_LLM_API_KEY: Optional[str] = None  # api key for fallback llm

    # LLM Cache
    CACHE_REDIS_URL: str = "redis://localhost:6379/2"
@@ -99,8 +102,7 @@ class Settings(BaseSettings):
    BRAVE_SEARCH_API_KEY: Optional[str] = None

    FLASK_DEBUG_MODE: bool = False
-    STORAGE_TYPE: str = "local"  # local or s3 
-
+    STORAGE_TYPE: str = "local"  # local or s3

    JWT_SECRET_KEY: str = ""

--- a/application/llm/base.py
+++ b/application/llm/base.py
@@ -1,53 +1,117 @@
+import logging
 from abc import ABC, abstractmethod

 from application.cache import gen_cache, stream_cache
+
+from application.core.settings import settings
 from application.usage import gen_token_usage, stream_token_usage

+logger = logging.getLogger(__name__)
+

 class BaseLLM(ABC):
-    def __init__(self, decoded_token=None):
+    def __init__(
+        self,
+        decoded_token=None,
+    ):
        self.decoded_token = decoded_token
        self.token_usage = {"prompt_tokens": 0, "generated_tokens": 0}
+        self.fallback_provider = settings.FALLBACK_LLM_PROVIDER
+        self.fallback_model_name = settings.FALLBACK_LLM_NAME
+        self.fallback_llm_api_key = settings.FALLBACK_LLM_API_KEY
+        self._fallback_llm = None

-    def _apply_decorator(self, method, decorators, *args, **kwargs):
-        for decorator in decorators:
-            method = decorator(method)
-        return method(self, *args, **kwargs)
+    @property
+    def fallback_llm(self):
+        """Lazy-loaded fallback LLM instance."""
+        if (
+            self._fallback_llm is None
+            and self.fallback_provider
+            and self.fallback_model_name
+        ):
+            try:
+                from application.llm.llm_creator import LLMCreator
+
+                self._fallback_llm = LLMCreator.create_llm(
+                    self.fallback_provider,
+                    self.fallback_llm_api_key,
+                    None,
+                    self.decoded_token,
+                )
+            except Exception as e:
+                logger.error(
+                    f"Failed to initialize fallback LLM: {str(e)}", exc_info=True
+                )
+        return self._fallback_llm
+
+    def _execute_with_fallback(
+        self, method_name: str, decorators: list, *args, **kwargs
+    ):
+        """
+        Unified method execution with fallback support.
+
+        Args:
+            method_name: Name of the raw method ('_raw_gen' or '_raw_gen_stream')
+            decorators: List of decorators to apply
+            *args: Positional arguments
+            **kwargs: Keyword arguments
+        """
+
+        def decorated_method():
+            method = getattr(self, method_name)
+            for decorator in decorators:
+                method = decorator(method)
+            return method(self, *args, **kwargs)
+
+        try:
+            return decorated_method()
+        except Exception as e:
+            if not self.fallback_llm:
+                logger.error(f"Primary LLM failed and no fallback available: {str(e)}")
+                raise
+            logger.warning(
+                f"Falling back to {self.fallback_provider}/{self.fallback_model_name}. Error: {str(e)}"
+            )
+
+            fallback_method = getattr(
+                self.fallback_llm, method_name.replace("_raw_", "")
+            )
+            return fallback_method(*args, **kwargs)
+
+    def gen(self, model, messages, stream=False, tools=None, *args, **kwargs):
+        decorators = [gen_token_usage, gen_cache]
+        return self._execute_with_fallback(
+            "_raw_gen",
+            decorators,
+            model=model,
+            messages=messages,
+            stream=stream,
+            tools=tools,
+            *args,
+            **kwargs,
+        )
+
+    def gen_stream(self, model, messages, stream=True, tools=None, *args, **kwargs):
+        decorators = [stream_cache, stream_token_usage]
+        return self._execute_with_fallback(
+            "_raw_gen_stream",
+            decorators,
+            model=model,
+            messages=messages,
+            stream=stream,
+            tools=tools,
+            *args,
+            **kwargs,
+        )

    @abstractmethod
    def _raw_gen(self, model, messages, stream, tools, *args, **kwargs):
        pass

-    def gen(self, model, messages, stream=False, tools=None, *args, **kwargs):
-        decorators = [gen_token_usage, gen_cache]
-        return self._apply_decorator(
-            self._raw_gen,
-            decorators=decorators,
-            model=model,
-            messages=messages,
-            stream=stream,
-            tools=tools,
-            *args,
-            **kwargs
-        )
-
    @abstractmethod
    def _raw_gen_stream(self, model, messages, stream, *args, **kwargs):
        pass

-    def gen_stream(self, model, messages, stream=True, tools=None, *args, **kwargs):
-        decorators = [stream_cache, stream_token_usage]
-        return self._apply_decorator(
-            self._raw_gen_stream,
-            decorators=decorators,
-            model=model,
-            messages=messages,
-            stream=stream,
-            tools=tools,
-            *args,
-            **kwargs
-        )
-
    def supports_tools(self):
        return hasattr(self, "_supports_tools") and callable(
            getattr(self, "_supports_tools")
@@ -55,11 +119,11 @@ class BaseLLM(ABC):

    def _supports_tools(self):
        raise NotImplementedError("Subclass must implement _supports_tools method")
-        
+
    def get_supported_attachment_types(self):
        """
        Return a list of MIME types supported by this LLM for file uploads.
-        
+
        Returns:
            list: List of supported MIME types
        """
--- a/application/llm/handlers/init.py
+++ b/application/llm/handlers/init.py
--- a/application/llm/handlers/base.py
+++ b/application/llm/handlers/base.py
@@ -0,0 +1,335 @@
+import logging
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Dict, Generator, List, Optional, Union
+
+from application.logging import build_stack_data
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ToolCall:
+    """Represents a tool/function call from the LLM."""
+
+    id: str
+    name: str
+    arguments: Union[str, Dict]
+    index: Optional[int] = None
+
+    @classmethod
+    def from_dict(cls, data: Dict) -> "ToolCall":
+        """Create ToolCall from dictionary."""
+        return cls(
+            id=data.get("id", ""),
+            name=data.get("name", ""),
+            arguments=data.get("arguments", {}),
+            index=data.get("index"),
+        )
+
+
+@dataclass
+class LLMResponse:
+    """Represents a response from the LLM."""
+
+    content: str
+    tool_calls: List[ToolCall]
+    finish_reason: str
+    raw_response: Any
+
+    @property
+    def requires_tool_call(self) -> bool:
+        """Check if the response requires tool calls."""
+        return bool(self.tool_calls) and self.finish_reason == "tool_calls"
+
+
+class LLMHandler(ABC):
+    """Abstract base class for LLM handlers."""
+
+    def __init__(self):
+        self.llm_calls = []
+        self.tool_calls = []
+
+    @abstractmethod
+    def parse_response(self, response: Any) -> LLMResponse:
+        """Parse raw LLM response into standardized format."""
+        pass
+
+    @abstractmethod
+    def create_tool_message(self, tool_call: ToolCall, result: Any) -> Dict:
+        """Create a tool result message for the conversation history."""
+        pass
+
+    @abstractmethod
+    def _iterate_stream(self, response: Any) -> Generator:
+        """Iterate through streaming response chunks."""
+        pass
+
+    def process_message_flow(
+        self,
+        agent,
+        initial_response,
+        tools_dict: Dict,
+        messages: List[Dict],
+        attachments: Optional[List] = None,
+        stream: bool = False,
+    ) -> Union[str, Generator]:
+        """
+        Main orchestration method for processing LLM message flow.
+
+        Args:
+            agent: The agent instance
+            initial_response: Initial LLM response
+            tools_dict: Dictionary of available tools
+            messages: Conversation history
+            attachments: Optional attachments
+            stream: Whether to use streaming
+
+        Returns:
+            Final response or generator for streaming
+        """
+        messages = self.prepare_messages(agent, messages, attachments)
+
+        if stream:
+            return self.handle_streaming(agent, initial_response, tools_dict, messages)
+        else:
+            return self.handle_non_streaming(
+                agent, initial_response, tools_dict, messages
+            )
+
+    def prepare_messages(
+        self, agent, messages: List[Dict], attachments: Optional[List] = None
+    ) -> List[Dict]:
+        """
+        Prepare messages with attachments and provider-specific formatting.
+
+        Args:
+            agent: The agent instance
+            messages: Original messages
+            attachments: List of attachments
+
+        Returns:
+            Prepared messages list
+        """
+        if not attachments:
+            return messages
+        logger.info(f"Preparing messages with {len(attachments)} attachments")
+        supported_types = agent.llm.get_supported_attachment_types()
+
+        supported_attachments = [
+            a for a in attachments if a.get("mime_type") in supported_types
+        ]
+        unsupported_attachments = [
+            a for a in attachments if a.get("mime_type") not in supported_types
+        ]
+
+        # Process supported attachments with the LLM's custom method
+
+        if supported_attachments:
+            logger.info(
+                f"Processing {len(supported_attachments)} supported attachments"
+            )
+            messages = agent.llm.prepare_messages_with_attachments(
+                messages, supported_attachments
+            )
+        # Process unsupported attachments with default method
+
+        if unsupported_attachments:
+            logger.info(
+                f"Processing {len(unsupported_attachments)} unsupported attachments"
+            )
+            messages = self._append_unsupported_attachments(
+                messages, unsupported_attachments
+            )
+        return messages
+
+    def _append_unsupported_attachments(
+        self, messages: List[Dict], attachments: List[Dict]
+    ) -> List[Dict]:
+        """
+        Default method to append unsupported attachment content to system prompt.
+
+        Args:
+            messages: Current messages
+            attachments: List of unsupported attachments
+
+        Returns:
+            Updated messages list
+        """
+        prepared_messages = messages.copy()
+        attachment_texts = []
+
+        for attachment in attachments:
+            logger.info(f"Adding attachment {attachment.get('id')} to context")
+            if "content" in attachment:
+                attachment_texts.append(
+                    f"Attached file content:\n\n{attachment['content']}"
+                )
+        if attachment_texts:
+            combined_text = "\n\n".join(attachment_texts)
+
+            system_msg = next(
+                (msg for msg in prepared_messages if msg.get("role") == "system"),
+                {"role": "system", "content": ""},
+            )
+
+            if system_msg not in prepared_messages:
+                prepared_messages.insert(0, system_msg)
+            system_msg["content"] += f"\n\n{combined_text}"
+        return prepared_messages
+
+    def handle_tool_calls(
+        self, agent, tool_calls: List[ToolCall], tools_dict: Dict, messages: List[Dict]
+    ) -> Generator:
+        """
+        Execute tool calls and update conversation history.
+
+        Args:
+            agent: The agent instance
+            tool_calls: List of tool calls to execute
+            tools_dict: Available tools dictionary
+            messages: Current conversation history
+
+        Returns:
+            Updated messages list
+        """
+        updated_messages = messages.copy()
+
+        for call in tool_calls:
+            try:
+                self.tool_calls.append(call)
+                tool_executor_gen = agent._execute_tool_action(tools_dict, call)
+                while True:
+                    try:
+                        yield next(tool_executor_gen)
+                    except StopIteration as e:
+                        tool_response, call_id = e.value
+                        break
+
+                updated_messages.append(
+                    {
+                        "role": "assistant",
+                        "content": [
+                            {
+                                "function_call": {
+                                    "name": call.name,
+                                    "args": call.arguments,
+                                    "call_id": call_id,
+                                }
+                            }
+                        ],
+                    }
+                )
+
+                updated_messages.append(self.create_tool_message(call, tool_response))
+
+            except Exception as e:
+                logger.error(f"Error executing tool: {str(e)}", exc_info=True)
+                updated_messages.append(
+                    {
+                        "role": "tool",
+                        "content": f"Error executing tool: {str(e)}",
+                        "tool_call_id": call.id,
+                    }
+                )
+
+        return updated_messages
+
+    def handle_non_streaming(
+        self, agent, response: Any, tools_dict: Dict, messages: List[Dict]
+    ) -> Generator:
+        """
+        Handle non-streaming response flow.
+
+        Args:
+            agent: The agent instance
+            response: Current LLM response
+            tools_dict: Available tools dictionary
+            messages: Conversation history
+
+        Returns:
+            Final response after processing all tool calls
+        """
+        parsed = self.parse_response(response)
+        self.llm_calls.append(build_stack_data(agent.llm))
+
+        while parsed.requires_tool_call:
+            tool_handler_gen = self.handle_tool_calls(
+                agent, parsed.tool_calls, tools_dict, messages
+            )
+            while True:
+                try:
+                    yield next(tool_handler_gen)
+                except StopIteration as e:
+                    messages = e.value
+                    break
+
+            response = agent.llm.gen(
+                model=agent.gpt_model, messages=messages, tools=agent.tools
+            )
+            parsed = self.parse_response(response)
+            self.llm_calls.append(build_stack_data(agent.llm))
+
+        return parsed.content
+
+    def handle_streaming(
+        self, agent, response: Any, tools_dict: Dict, messages: List[Dict]
+    ) -> Generator:
+        """
+        Handle streaming response flow.
+
+        Args:
+            agent: The agent instance
+            response: Current LLM response
+            tools_dict: Available tools dictionary
+            messages: Conversation history
+
+        Yields:
+            Streaming response chunks
+        """
+        buffer = ""
+        tool_calls = {}
+
+        for chunk in self._iterate_stream(response):
+            if isinstance(chunk, str):
+                yield chunk
+                continue
+            parsed = self.parse_response(chunk)
+
+            if parsed.tool_calls:
+                for call in parsed.tool_calls:
+                    if call.index not in tool_calls:
+                        tool_calls[call.index] = call
+                    else:
+                        existing = tool_calls[call.index]
+                        if call.id:
+                            existing.id = call.id
+                        if call.name:
+                            existing.name = call.name
+                        if call.arguments:
+                            existing.arguments += call.arguments
+            if parsed.finish_reason == "tool_calls":
+                tool_handler_gen = self.handle_tool_calls(
+                    agent, list(tool_calls.values()), tools_dict, messages
+                )
+                while True:
+                    try:
+                        yield next(tool_handler_gen)
+                    except StopIteration as e:
+                        messages = e.value
+                        break
+                tool_calls = {}
+
+                response = agent.llm.gen_stream(
+                    model=agent.gpt_model, messages=messages, tools=agent.tools
+                )
+                self.llm_calls.append(build_stack_data(agent.llm))
+
+                yield from self.handle_streaming(agent, response, tools_dict, messages)
+                return
+            if parsed.content:
+                buffer += parsed.content
+                yield buffer
+                buffer = ""
+            if parsed.finish_reason == "stop":
+                return
--- a/application/llm/handlers/google.py
+++ b/application/llm/handlers/google.py
@@ -0,0 +1,78 @@
+import uuid
+from typing import Any, Dict, Generator
+
+from application.llm.handlers.base import LLMHandler, LLMResponse, ToolCall
+
+
+class GoogleLLMHandler(LLMHandler):
+    """Handler for Google's GenAI API."""
+
+    def parse_response(self, response: Any) -> LLMResponse:
+        """Parse Google response into standardized format."""
+
+        if isinstance(response, str):
+            return LLMResponse(
+                content=response,
+                tool_calls=[],
+                finish_reason="stop",
+                raw_response=response,
+            )
+
+        if hasattr(response, "candidates"):
+            parts = response.candidates[0].content.parts if response.candidates else []
+            tool_calls = [
+                ToolCall(
+                    id=str(uuid.uuid4()),
+                    name=part.function_call.name,
+                    arguments=part.function_call.args,
+                )
+                for part in parts
+                if hasattr(part, "function_call") and part.function_call is not None
+            ]
+
+            content = " ".join(
+                part.text
+                for part in parts
+                if hasattr(part, "text") and part.text is not None
+            )
+            return LLMResponse(
+                content=content,
+                tool_calls=tool_calls,
+                finish_reason="tool_calls" if tool_calls else "stop",
+                raw_response=response,
+            )
+
+        else:
+            tool_calls = []
+            if hasattr(response, "function_call"):
+                tool_calls.append(
+                    ToolCall(
+                        id=str(uuid.uuid4()),
+                        name=response.function_call.name,
+                        arguments=response.function_call.args,
+                    )
+                )
+            return LLMResponse(
+                content=response.text if hasattr(response, "text") else "",
+                tool_calls=tool_calls,
+                finish_reason="tool_calls" if tool_calls else "stop",
+                raw_response=response,
+            )
+
+    def create_tool_message(self, tool_call: ToolCall, result: Any) -> Dict:
+        """Create Google-style tool message."""
+        from google.genai import types
+
+        return {
+            "role": "tool",
+            "content": [
+                types.Part.from_function_response(
+                    name=tool_call.name, response={"result": result}
+                ).to_json_dict()
+            ],
+        }
+
+    def _iterate_stream(self, response: Any) -> Generator:
+        """Iterate through Google streaming response."""
+        for chunk in response:
+            yield chunk
--- a/application/llm/handlers/handler_creator.py
+++ b/application/llm/handlers/handler_creator.py
@@ -0,0 +1,18 @@
+from application.llm.handlers.base import LLMHandler
+from application.llm.handlers.google import GoogleLLMHandler
+from application.llm.handlers.openai import OpenAILLMHandler
+
+
+class LLMHandlerCreator:
+    handlers = {
+        "openai": OpenAILLMHandler,
+        "google": GoogleLLMHandler,
+        "default": OpenAILLMHandler,
+    }
+
+    @classmethod
+    def create_handler(cls, llm_type: str, *args, **kwargs) -> LLMHandler:
+        handler_class = cls.handlers.get(llm_type.lower())
+        if not handler_class:
+            raise ValueError(f"No LLM handler class found for type {llm_type}")
+        return handler_class(*args, **kwargs)
--- a/application/llm/handlers/openai.py
+++ b/application/llm/handlers/openai.py
@@ -0,0 +1,57 @@
+from typing import Any, Dict, Generator
+
+from application.llm.handlers.base import LLMHandler, LLMResponse, ToolCall
+
+
+class OpenAILLMHandler(LLMHandler):
+    """Handler for OpenAI API."""
+
+    def parse_response(self, response: Any) -> LLMResponse:
+        """Parse OpenAI response into standardized format."""
+        if isinstance(response, str):
+            return LLMResponse(
+                content=response,
+                tool_calls=[],
+                finish_reason="stop",
+                raw_response=response,
+            )
+
+        message = getattr(response, "message", None) or getattr(response, "delta", None)
+
+        tool_calls = []
+        if hasattr(message, "tool_calls"):
+            tool_calls = [
+                ToolCall(
+                    id=getattr(tc, "id", ""),
+                    name=getattr(tc.function, "name", ""),
+                    arguments=getattr(tc.function, "arguments", ""),
+                    index=getattr(tc, "index", None),
+                )
+                for tc in message.tool_calls or []
+            ]
+        return LLMResponse(
+            content=getattr(message, "content", ""),
+            tool_calls=tool_calls,
+            finish_reason=getattr(response, "finish_reason", ""),
+            raw_response=response,
+        )
+
+    def create_tool_message(self, tool_call: ToolCall, result: Any) -> Dict:
+        """Create OpenAI-style tool message."""
+        return {
+            "role": "tool",
+            "content": [
+                {
+                    "function_response": {
+                        "name": tool_call.name,
+                        "response": {"result": result},
+                        "call_id": tool_call.id,
+                    }
+                }
+            ],
+        }
+
+    def _iterate_stream(self, response: Any) -> Generator:
+        """Iterate through OpenAI streaming response."""
+        for chunk in response:
+            yield chunk
--- a/application/llm/llama_cpp.py
+++ b/application/llm/llama_cpp.py
@@ -2,6 +2,7 @@ from application.llm.base import BaseLLM
 from application.core.settings import settings
 import threading

+
 class LlamaSingleton:
    _instances = {}
    _lock = threading.Lock()  # Add a lock for thread synchronization
@@ -29,7 +30,7 @@ class LlamaCpp(BaseLLM):
        self,
        api_key=None,
        user_api_key=None,
-        llm_name=settings.MODEL_PATH,
+        llm_name=settings.LLM_PATH,
        *args,
        **kwargs,
    ):
@@ -42,14 +43,18 @@ class LlamaCpp(BaseLLM):
        context = messages[0]["content"]
        user_question = messages[-1]["content"]
        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
-        result = LlamaSingleton.query_model(self.llama, prompt, max_tokens=150, echo=False)
+        result = LlamaSingleton.query_model(
+            self.llama, prompt, max_tokens=150, echo=False
+        )
        return result["choices"][0]["text"].split("### Answer \n")[-1]

    def _raw_gen_stream(self, baseself, model, messages, stream=True, **kwargs):
        context = messages[0]["content"]
        user_question = messages[-1]["content"]
        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
-        result = LlamaSingleton.query_model(self.llama, prompt, max_tokens=150, echo=False, stream=stream)
+        result = LlamaSingleton.query_model(
+            self.llama, prompt, max_tokens=150, echo=False, stream=stream
+        )
        for item in result:
            for choice in item["choices"]:
-                yield choice["text"]
+                yield choice["text"]
--- a/application/retriever/brave_search.py
+++ b/application/retriever/brave_search.py
@@ -29,10 +29,10 @@ class BraveRetSearch(BaseRetriever):
        self.token_limit = (
            token_limit
            if token_limit
-            < settings.MODEL_TOKEN_LIMITS.get(
+            < settings.LLM_TOKEN_LIMITS.get(
                self.gpt_model, settings.DEFAULT_MAX_HISTORY
            )
-            else settings.MODEL_TOKEN_LIMITS.get(
+            else settings.LLM_TOKEN_LIMITS.get(
                self.gpt_model, settings.DEFAULT_MAX_HISTORY
            )
        )
@@ -59,7 +59,7 @@ class BraveRetSearch(BaseRetriever):
                    docs.append({"text": snippet, "title": title, "link": link})
                except IndexError:
                    pass
-        if settings.LLM_NAME == "llama.cpp":
+        if settings.LLM_PROVIDER == "llama.cpp":
            docs = [docs[0]]

        return docs
@@ -84,7 +84,7 @@ class BraveRetSearch(BaseRetriever):
        messages_combine.append({"role": "user", "content": self.question})

        llm = LLMCreator.create_llm(
-            settings.LLM_NAME,
+            settings.LLM_PROVIDER,
            api_key=settings.API_KEY,
            user_api_key=self.user_api_key,
            decoded_token=self.decoded_token,
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -16,7 +16,7 @@ class ClassicRAG(BaseRetriever):
        token_limit=150,
        gpt_model="docsgpt",
        user_api_key=None,
-        llm_name=settings.LLM_NAME,
+        llm_name=settings.LLM_PROVIDER,
        api_key=settings.API_KEY,
        decoded_token=None,
    ):
@@ -28,10 +28,10 @@ class ClassicRAG(BaseRetriever):
        self.token_limit = (
            token_limit
            if token_limit
-            < settings.MODEL_TOKEN_LIMITS.get(
+            < settings.LLM_TOKEN_LIMITS.get(
                self.gpt_model, settings.DEFAULT_MAX_HISTORY
            )
-            else settings.MODEL_TOKEN_LIMITS.get(
+            else settings.LLM_TOKEN_LIMITS.get(
                self.gpt_model, settings.DEFAULT_MAX_HISTORY
            )
        )
--- a/application/retriever/duckduck_search.py
+++ b/application/retriever/duckduck_search.py
@@ -28,10 +28,10 @@ class DuckDuckSearch(BaseRetriever):
        self.token_limit = (
            token_limit
            if token_limit
-            < settings.MODEL_TOKEN_LIMITS.get(
+            < settings.LLM_TOKEN_LIMITS.get(
                self.gpt_model, settings.DEFAULT_MAX_HISTORY
            )
-            else settings.MODEL_TOKEN_LIMITS.get(
+            else settings.LLM_TOKEN_LIMITS.get(
                self.gpt_model, settings.DEFAULT_MAX_HISTORY
            )
        )
@@ -58,7 +58,7 @@ class DuckDuckSearch(BaseRetriever):
                    )
                except IndexError:
                    pass
-        if settings.LLM_NAME == "llama.cpp":
+        if settings.LLM_PROVIDER == "llama.cpp":
            docs = [docs[0]]

        return docs
@@ -83,7 +83,7 @@ class DuckDuckSearch(BaseRetriever):
        messages_combine.append({"role": "user", "content": self.question})

        llm = LLMCreator.create_llm(
-            settings.LLM_NAME,
+            settings.LLM_PROVIDER,
            api_key=settings.API_KEY,
            user_api_key=self.user_api_key,
            decoded_token=self.decoded_token,
--- a/application/utils.py
+++ b/application/utils.py
@@ -102,8 +102,8 @@ def limit_chat_history(history, max_token_limit=None, gpt_model="docsgpt"):
        max_token_limit
        if max_token_limit
        and max_token_limit
-        < settings.MODEL_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
-        else settings.MODEL_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
+        < settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
+        else settings.LLM_TOKEN_LIMITS.get(gpt_model, settings.DEFAULT_MAX_HISTORY)
    )

    if not history:
--- a/application/worker.py
+++ b/application/worker.py
@@ -143,8 +143,8 @@ def run_agent_logic(agent_config, input_data):
        agent = AgentCreator.create_agent(
            agent_type,
            endpoint="webhook",
-            llm_name=settings.LLM_NAME,
-            gpt_model=settings.MODEL_NAME,
+            llm_name=settings.LLM_PROVIDER,
+            gpt_model=settings.LLM_NAME,
            api_key=settings.API_KEY,
            user_api_key=user_api_key,
            prompt=prompt,
@@ -159,7 +159,7 @@ def run_agent_logic(agent_config, input_data):
            prompt=prompt,
            chunks=chunks,
            token_limit=settings.DEFAULT_MAX_HISTORY,
-            gpt_model=settings.MODEL_NAME,
+            gpt_model=settings.LLM_NAME,
            user_api_key=user_api_key,
            decoded_token=decoded_token,
        )
@@ -452,7 +452,7 @@ def attachment_worker(self, file_info, user):
    try:
        self.update_state(state="PROGRESS", meta={"current": 10})
        storage = StorageCreator.get_storage()
-        
+
        self.update_state(
            state="PROGRESS", meta={"current": 30, "status": "Processing content"}
        )
@@ -461,9 +461,11 @@ def attachment_worker(self, file_info, user):
            relative_path,
            lambda local_path, **kwargs: SimpleDirectoryReader(
                input_files=[local_path], exclude_hidden=True, errors="ignore"
-            ).load_data()[0].text
+            )
+            .load_data()[0]
+            .text,
        )
-            
+
        token_count = num_tokens_from_string(content)

        self.update_state(
@@ -491,9 +493,7 @@ def attachment_worker(self, file_info, user):
            f"Stored attachment with ID: {attachment_id}", extra={"user": user}
        )

-        self.update_state(
-            state="PROGRESS", meta={"current": 100, "status": "Complete"}
-        )
+        self.update_state(state="PROGRESS", meta={"current": 100, "status": "Complete"})

        return {
            "filename": filename,