chore(deps): bump langchain-text-splitters in /application

Bumps [langchain-text-splitters](https://github.com/langchain-ai/langchain) from 1.1.1 to 1.1.2. - [Release notes](https://github.com/langchain-ai/langchain/releases) - [Commits](https://github.com/langchain-ai/langchain/compare/langchain-text-splitters==1.1.1...langchain-text-splitters==1.1.2) --- updated-dependencies: - dependency-name: langchain-text-splitters dependency-version: 1.1.2 dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] <support@github.com>
2026-05-07 06:30:03 +00:00 · 2026-04-20 22:35:39 +00:00
126 changed files with 15369 additions and 5051 deletions
--- a/.env-template
+++ b/.env-template
@@ -35,5 +35,8 @@ MICROSOFT_TENANT_ID=your-azure-ad-tenant-id
 #Alternatively, use "https://login.microsoftonline.com/common" for multi-tenant app.
 MICROSOFT_AUTHORITY=https://{tenantId}.ciamlogin.com/{tenantId}

-
+# User-data Postgres DB (Phase 0 of the MongoDB→Postgres migration).
+# Standard Postgres URI — `postgres://` and `postgresql://` both work.
+# Leave unset while the migration is still being rolled out; the app will
+# fall back to MongoDB for user data until POSTGRES_URI is configured.
 # POSTGRES_URI=postgresql://docsgpt:docsgpt@localhost:5432/docsgpt
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -37,22 +37,6 @@ Run the Flask API (if needed):
 flask --app application/app.py run --host=0.0.0.0 --port=7091
 ```

-That's the fast inner-loop option — quick startup, the Werkzeug interactive
-debugger still works, and it hot-reloads on source changes. It serves the
-Flask routes only (`/api/*`, `/stream`, etc.).
-
-If you need to exercise the full ASGI stack — the `/mcp` FastMCP endpoint,
-or to match the production runtime exactly — run the ASGI composition under
-uvicorn instead:
-
-```bash
-uvicorn application.asgi:asgi_app --host 0.0.0.0 --port 7091 --reload
-```
-
-Production uses `gunicorn -k uvicorn_worker.UvicornWorker` against the same
-`application.asgi:asgi_app` target; see `application/Dockerfile` for the
-full flag set.
-
 Run the Celery worker in a separate terminal (if needed):

 ```bash
@@ -115,7 +99,7 @@ vale .
 - `frontend/`: Vite + React + TypeScript application.
 - `frontend/src/`: main UI code, including `components`, `conversation`, `hooks`, `locale`, `settings`, `upload`, and Redux store wiring in `store.ts`.
 - `docs/`: separate documentation site built with Next.js/Nextra.
- `extensions/`: integrations and widgets — currently the Chatwoot webhook bridge and the React widget (published to npm as `docsgpt`). The Discord bot, Slack bot, and Chrome extension have been moved to their own repos under `arc53/`.
+- `extensions/`: integrations and widgets such as Chatwoot, Chrome, Discord, React widget, Slack bot, and web widget.
 - `deployment/`: Docker Compose variants and Kubernetes manifests.

 ## Coding rules
--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -88,15 +88,5 @@ EXPOSE 7091
 # Switch to non-root user
 USER appuser

-CMD ["gunicorn", \
-     "-w", "1", \
-     "-k", "uvicorn_worker.UvicornWorker", \
-     "--bind", "0.0.0.0:7091", \
-     "--timeout", "180", \
-     "--graceful-timeout", "120", \
-     "--keep-alive", "5", \
-     "--worker-tmp-dir", "/dev/shm", \
-     "--max-requests", "1000", \
-     "--max-requests-jitter", "100", \
-     "--config", "application/gunicorn_conf.py", \
-     "application.asgi:asgi_app"]
+# Start Gunicorn
+CMD ["gunicorn", "-w", "1", "--timeout", "120", "--bind", "0.0.0.0:7091", "--preload", "application.wsgi:app"]
--- a/application/alembic/versions/0002_app_metadata.py
+++ b/application/alembic/versions/0002_app_metadata.py
@@ -1,37 +0,0 @@
-"""0002 app_metadata — singleton key/value table for instance-wide state.
-
-Used by the startup version-check client to persist the anonymous
-instance UUID and a one-shot "notice shown" flag. Both values are tiny
-plain-text strings; this is a deliberate generic-config table rather
-than dedicated columns so future one-off settings (telemetry opt-in
-timestamps, feature-flag overrides, etc.) don't each need their own
-migration.
-
-Revision ID: 0002_app_metadata
-Revises: 0001_initial
-"""
-
-from typing import Sequence, Union
-
-from alembic import op
-
-
-revision: str = "0002_app_metadata"
-down_revision: Union[str, None] = "0001_initial"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
-
-
-def upgrade() -> None:
-    op.execute(
-        """
-        CREATE TABLE app_metadata (
-            key   TEXT PRIMARY KEY,
-            value TEXT NOT NULL
-        );
-        """
-    )
-
-
-def downgrade() -> None:
-    op.execute("DROP TABLE IF EXISTS app_metadata;")
--- a/application/api/answer/routes/search.py
+++ b/application/api/answer/routes/search.py
@@ -1,21 +1,21 @@
 import logging
+from typing import Any, Dict, List

 from flask import make_response, request
 from flask_restx import fields, Resource

 from application.api.answer.routes.base import answer_ns
-from application.services.search_service import (
-    InvalidAPIKey,
-    SearchFailed,
-    search,
-)
+from application.core.settings import settings
+from application.storage.db.repositories.agents import AgentsRepository
+from application.storage.db.session import db_readonly
+from application.vectorstore.vector_creator import VectorCreator

 logger = logging.getLogger(__name__)


@answer_ns.route("/api/search")
 class SearchResource(Resource):
-    """Fast search endpoint for retrieving relevant documents."""
+    """Fast search endpoint for retrieving relevant documents"""

    search_model = answer_ns.model(
        "SearchModel",
@@ -32,10 +32,102 @@ class SearchResource(Resource):
        },
    )

+    def _get_sources_from_api_key(self, api_key: str) -> List[str]:
+        """Get source IDs connected to the API key/agent."""
+        with db_readonly() as conn:
+            agent_data = AgentsRepository(conn).find_by_key(api_key)
+        if not agent_data:
+            return []
+
+        source_ids: List[str] = []
+        # extra_source_ids is a PG ARRAY(UUID) of source UUIDs.
+        extra = agent_data.get("extra_source_ids") or []
+        for src in extra:
+            if src:
+                source_ids.append(str(src))
+
+        if not source_ids:
+            single = agent_data.get("source_id")
+            if single:
+                source_ids.append(str(single))
+
+        return source_ids
+
+    def _search_vectorstores(
+        self, query: str, source_ids: List[str], chunks: int
+    ) -> List[Dict[str, Any]]:
+        """Search across vectorstores and return results"""
+        if not source_ids:
+            return []
+
+        results = []
+        chunks_per_source = max(1, chunks // len(source_ids))
+        seen_texts = set()
+
+        for source_id in source_ids:
+            if not source_id or not source_id.strip():
+                continue
+
+            try:
+                docsearch = VectorCreator.create_vectorstore(
+                    settings.VECTOR_STORE, source_id, settings.EMBEDDINGS_KEY
+                )
+                docs = docsearch.search(query, k=chunks_per_source * 2)
+
+                for doc in docs:
+                    if len(results) >= chunks:
+                        break
+
+                    if hasattr(doc, "page_content") and hasattr(doc, "metadata"):
+                        page_content = doc.page_content
+                        metadata = doc.metadata
+                    else:
+                        page_content = doc.get("text", doc.get("page_content", ""))
+                        metadata = doc.get("metadata", {})
+
+                    # Skip duplicates
+                    text_hash = hash(page_content[:200])
+                    if text_hash in seen_texts:
+                        continue
+                    seen_texts.add(text_hash)
+
+                    title = metadata.get(
+                        "title", metadata.get("post_title", "")
+                    )
+                    if not isinstance(title, str):
+                        title = str(title) if title else ""
+
+                    # Clean up title
+                    if title:
+                        title = title.split("/")[-1]
+                    else:
+                        # Use filename or first part of content as title
+                        title = metadata.get("filename", page_content[:50] + "...")
+
+                    source = metadata.get("source", source_id)
+
+                    results.append({
+                        "text": page_content,
+                        "title": title,
+                        "source": source,
+                    })
+
+                if len(results) >= chunks:
+                    break
+
+            except Exception as e:
+                logger.error(
+                    f"Error searching vectorstore {source_id}: {e}",
+                    exc_info=True,
+                )
+                continue
+
+        return results[:chunks]
+
    @answer_ns.expect(search_model)
    @answer_ns.doc(description="Search for relevant documents based on query")
    def post(self):
-        data = request.get_json() or {}
+        data = request.get_json()

        question = data.get("question")
        api_key = data.get("api_key")
@@ -43,13 +135,32 @@ class SearchResource(Resource):

        if not question:
            return make_response({"error": "question is required"}, 400)
+
        if not api_key:
            return make_response({"error": "api_key is required"}, 400)

-        try:
-            return make_response(search(api_key, question, chunks), 200)
-        except InvalidAPIKey:
+        # Validate API key
+        with db_readonly() as conn:
+            agent = AgentsRepository(conn).find_by_key(api_key)
+        if not agent:
            return make_response({"error": "Invalid API key"}, 401)
-        except SearchFailed:
-            logger.exception("/api/search failed")
+
+        try:
+            # Get sources connected to this API key
+            source_ids = self._get_sources_from_api_key(api_key)
+
+            if not source_ids:
+                return make_response([], 200)
+
+            # Perform search
+            results = self._search_vectorstores(question, source_ids, chunks)
+
+            return make_response(results, 200)
+
+        except Exception as e:
+            logger.error(
+                f"/api/search - error: {str(e)}",
+                extra={"error": str(e)},
+                exc_info=True,
+            )
            return make_response({"error": "Search failed"}, 500)
--- a/application/api/user/tasks.py
+++ b/application/api/user/tasks.py
@@ -140,11 +140,6 @@ def setup_periodic_tasks(sender, **kwargs):
        cleanup_pending_tool_state.s(),
        name="cleanup-pending-tool-state",
    )
-    sender.add_periodic_task(
-        timedelta(hours=7),
-        version_check_task.s(),
-        name="version-check",
-    )


@celery.task(bind=True)
@@ -181,16 +176,3 @@ def cleanup_pending_tool_state(self):
    with engine.begin() as conn:
        deleted = PendingToolStateRepository(conn).cleanup_expired()
    return {"deleted": deleted}
-
-
-@celery.task(bind=True)
-def version_check_task(self):
-    """Periodic anonymous version check.
-
-    Complements the ``worker_ready`` boot trigger so long-running
-    deployments (>6h cache TTL) still refresh advisories. ``run_check``
-    is fail-silent and coordinates across replicas via Redis lock +
-    cache (see ``application.updates.version_check``).
-    """
-    from application.updates.version_check import run_check
-    run_check()
--- a/application/app.py
+++ b/application/app.py
@@ -4,7 +4,7 @@ import platform
 import uuid

 import dotenv
-from flask import Flask, Response, jsonify, redirect, request
+from flask import Flask, jsonify, redirect, request
 from jose import jwt

 from application.auth import handle_auth
@@ -149,11 +149,12 @@ def authenticate_request():


@app.after_request
-def after_request(response: Response) -> Response:
-    """Add CORS headers for the pure Flask development entrypoint."""
-    response.headers["Access-Control-Allow-Origin"] = "*"
-    response.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization"
-    response.headers["Access-Control-Allow-Methods"] = "GET, POST, PUT, DELETE, OPTIONS"
+def after_request(response):
+    response.headers.add("Access-Control-Allow-Origin", "*")
+    response.headers.add("Access-Control-Allow-Headers", "Content-Type, Authorization")
+    response.headers.add(
+        "Access-Control-Allow-Methods", "GET, POST, PUT, DELETE, OPTIONS"
+    )
    return response


--- a/application/asgi.py
+++ b/application/asgi.py
@@ -1,33 +0,0 @@
-"""ASGI entrypoint: Flask (WSGI) + FastMCP on the same process."""
-
-from __future__ import annotations
-
-from a2wsgi import WSGIMiddleware
-from starlette.applications import Starlette
-from starlette.middleware import Middleware
-from starlette.middleware.cors import CORSMiddleware
-from starlette.routing import Mount
-
-from application.app import app as flask_app
-from application.mcp_server import mcp
-
-_WSGI_THREADPOOL = 32
-
-mcp_app = mcp.http_app(path="/")
-
-asgi_app = Starlette(
-    routes=[
-        Mount("/mcp", app=mcp_app),
-        Mount("/", app=WSGIMiddleware(flask_app, workers=_WSGI_THREADPOOL)),
-    ],
-    middleware=[
-        Middleware(
-            CORSMiddleware,
-            allow_origins=["*"],
-            allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
-            allow_headers=["Content-Type", "Authorization", "Mcp-Session-Id"],
-            expose_headers=["Mcp-Session-Id"],
-        ),
-    ],
-    lifespan=mcp_app.lifespan,
-)
--- a/application/celery_init.py
+++ b/application/celery_init.py
@@ -1,8 +1,6 @@
-import threading
-
 from celery import Celery
 from application.core.settings import settings
-from celery.signals import setup_logging, worker_process_init, worker_ready
+from celery.signals import setup_logging, worker_process_init


 def make_celery(app_name=__name__):
@@ -41,25 +39,5 @@ def _dispose_db_engine_on_fork(*args, **kwargs):
    dispose_engine()


-@worker_ready.connect
-def _run_version_check(*args, **kwargs):
-    """Kick off the anonymous version check on worker startup.
-
-    Runs in a daemon thread so a slow endpoint or bad DNS never holds
-    up the worker becoming ready for tasks. The check itself is
-    fail-silent (see ``application.updates.version_check.run_check``);
-    this handler's only job is to launch it and get out of the way.
-
-    Import is lazy so the symbol resolution never fires at module
-    import time — consistent with the ``_dispose_db_engine_on_fork``
-    pattern above.
-    """
-    try:
-        from application.updates.version_check import run_check
-    except Exception:
-        return
-    threading.Thread(target=run_check, name="version-check", daemon=True).start()
-
-
 celery = make_celery()
 celery.config_from_object("application.celeryconfig")
--- a/application/celeryconfig.py
+++ b/application/celeryconfig.py
@@ -9,8 +9,3 @@ accept_content = ['json']

 # Autodiscover tasks
 imports = ('application.api.user.tasks',)
-
-beat_scheduler = "redbeat.RedBeatScheduler"
-redbeat_redis_url = broker_url
-redbeat_key_prefix = "redbeat:docsgpt:"
-redbeat_lock_timeout = 90
--- a/application/core/logging_config.py
+++ b/application/core/logging_config.py
@@ -1,45 +1,11 @@
-import logging
-import os
 from logging.config import dictConfig

-
-def _otlp_logs_enabled() -> bool:
-    """Return True when the user has opted in to OTLP log export.
-
-    Gated by the standard OTEL env vars so no project-specific knob is needed:
-    set ``OTEL_LOGS_EXPORTER=otlp`` (and leave ``OTEL_SDK_DISABLED`` unset or
-    false) to flip it on. When false, ``setup_logging`` keeps its original
-    console-only behavior.
-    """
-    exporter = os.getenv("OTEL_LOGS_EXPORTER", "").strip().lower()
-    disabled = os.getenv("OTEL_SDK_DISABLED", "false").strip().lower() == "true"
-    return exporter == "otlp" and not disabled
-
-
-def setup_logging() -> None:
-    """Configure the root logger with a stdout console handler.
-
-    When OTLP log export is enabled, ``opentelemetry-instrument`` attaches a
-    ``LoggingHandler`` to the root logger before this function runs. The
-    ``dictConfig`` call below replaces ``root.handlers`` with the console
-    handler, which would silently drop the OTEL handler. To make OTLP log
-    export work without forcing every contributor to opt in, snapshot the
-    OTEL handlers up front and re-attach them after ``dictConfig``.
-    """
-    preserved_handlers: list[logging.Handler] = []
-    if _otlp_logs_enabled():
-        preserved_handlers = [
-            h
-            for h in logging.getLogger().handlers
-            if h.__class__.__module__.startswith("opentelemetry")
-        ]
-
+def setup_logging():
    dictConfig({
-        "version": 1,
-        "disable_existing_loggers": False,
-        "formatters": {
-            "default": {
-                "format": "[%(asctime)s] %(levelname)s in %(module)s: %(message)s",
+        'version': 1,
+        'formatters': {
+            'default': {
+                'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
            }
        },
        "handlers": {
@@ -49,14 +15,8 @@ def setup_logging() -> None:
                "formatter": "default",
            }
        },
-        "root": {
-            "level": "INFO",
-            "handlers": ["console"],
+        'root': {
+            'level': 'INFO',
+            'handlers': ['console'],
        },
-    })
-
-    if preserved_handlers:
-        root = logging.getLogger()
-        for handler in preserved_handlers:
-            if handler not in root.handlers:
-                root.addHandler(handler)
+    })
--- a/application/core/model_configs.py
+++ b/application/core/model_configs.py
@@ -0,0 +1,266 @@
+"""
+Model configurations for all supported LLM providers.
+"""
+
+from application.core.model_settings import (
+    AvailableModel,
+    ModelCapabilities,
+    ModelProvider,
+)
+
+# Base image attachment types supported by most vision-capable LLMs
+IMAGE_ATTACHMENTS = [
+    "image/png",
+    "image/jpeg",
+    "image/jpg",
+    "image/webp",
+    "image/gif",
+]
+
+# PDF excluded: most OpenAI-compatible endpoints don't support native PDF uploads.
+# When excluded, PDFs are synthetically processed by converting pages to images.
+OPENAI_ATTACHMENTS = IMAGE_ATTACHMENTS
+
+GOOGLE_ATTACHMENTS = ["application/pdf"] + IMAGE_ATTACHMENTS
+
+ANTHROPIC_ATTACHMENTS = IMAGE_ATTACHMENTS
+
+OPENROUTER_ATTACHMENTS = IMAGE_ATTACHMENTS
+
+NOVITA_ATTACHMENTS = IMAGE_ATTACHMENTS
+
+
+OPENAI_MODELS = [
+    AvailableModel(
+        id="gpt-5.1",
+        provider=ModelProvider.OPENAI,
+        display_name="GPT-5.1",
+        description="Flagship model with enhanced reasoning, coding, and agentic capabilities",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supports_structured_output=True,
+            supported_attachment_types=OPENAI_ATTACHMENTS,
+            context_window=200000,
+        ),
+    ),
+    AvailableModel(
+        id="gpt-5-mini",
+        provider=ModelProvider.OPENAI,
+        display_name="GPT-5 Mini",
+        description="Faster, cost-effective variant of GPT-5.1",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supports_structured_output=True,
+            supported_attachment_types=OPENAI_ATTACHMENTS,
+            context_window=200000,
+        ),
+    )
+]
+
+
+ANTHROPIC_MODELS = [
+    AvailableModel(
+        id="claude-3-5-sonnet-20241022",
+        provider=ModelProvider.ANTHROPIC,
+        display_name="Claude 3.5 Sonnet (Latest)",
+        description="Latest Claude 3.5 Sonnet with enhanced capabilities",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supported_attachment_types=ANTHROPIC_ATTACHMENTS,
+            context_window=200000,
+        ),
+    ),
+    AvailableModel(
+        id="claude-3-5-sonnet",
+        provider=ModelProvider.ANTHROPIC,
+        display_name="Claude 3.5 Sonnet",
+        description="Balanced performance and capability",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supported_attachment_types=ANTHROPIC_ATTACHMENTS,
+            context_window=200000,
+        ),
+    ),
+    AvailableModel(
+        id="claude-3-opus",
+        provider=ModelProvider.ANTHROPIC,
+        display_name="Claude 3 Opus",
+        description="Most capable Claude model",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supported_attachment_types=ANTHROPIC_ATTACHMENTS,
+            context_window=200000,
+        ),
+    ),
+    AvailableModel(
+        id="claude-3-haiku",
+        provider=ModelProvider.ANTHROPIC,
+        display_name="Claude 3 Haiku",
+        description="Fastest Claude model",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supported_attachment_types=ANTHROPIC_ATTACHMENTS,
+            context_window=200000,
+        ),
+    ),
+]
+
+
+GOOGLE_MODELS = [
+    AvailableModel(
+        id="gemini-flash-latest",
+        provider=ModelProvider.GOOGLE,
+        display_name="Gemini Flash (Latest)",
+        description="Latest experimental Gemini model",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supports_structured_output=True,
+            supported_attachment_types=GOOGLE_ATTACHMENTS,
+            context_window=int(1e6),
+        ),
+    ),
+    AvailableModel(
+        id="gemini-flash-lite-latest",
+        provider=ModelProvider.GOOGLE,
+        display_name="Gemini Flash Lite (Latest)",
+        description="Fast with huge context window",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supports_structured_output=True,
+            supported_attachment_types=GOOGLE_ATTACHMENTS,
+            context_window=int(1e6),
+        ),
+    ),
+    AvailableModel(
+        id="gemini-3-pro-preview",
+        provider=ModelProvider.GOOGLE,
+        display_name="Gemini 3 Pro",
+        description="Most capable Gemini model",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supports_structured_output=True,
+            supported_attachment_types=GOOGLE_ATTACHMENTS,
+            context_window=2000000,
+        ),
+    ),
+]
+
+
+GROQ_MODELS = [
+    AvailableModel(
+        id="llama-3.3-70b-versatile",
+        provider=ModelProvider.GROQ,
+        display_name="Llama 3.3 70B",
+        description="Latest Llama model with high-speed inference",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            context_window=128000,
+        ),
+    ),
+    AvailableModel(
+        id="openai/gpt-oss-120b",
+        provider=ModelProvider.GROQ,
+        display_name="GPT-OSS 120B",
+        description="Open-source GPT model optimized for speed",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            context_window=128000,
+        ),
+    ),
+]
+
+
+OPENROUTER_MODELS = [
+    AvailableModel(
+        id="qwen/qwen3-coder:free",
+        provider=ModelProvider.OPENROUTER,
+        display_name="Qwen 3 Coder",
+        description="Latest Qwen model with high-speed inference",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            context_window=128000,
+            supported_attachment_types=OPENROUTER_ATTACHMENTS
+        ),
+    ),
+    AvailableModel(
+        id="google/gemma-3-27b-it:free",
+        provider=ModelProvider.OPENROUTER,
+        display_name="Gemma 3 27B",
+        description="Latest Gemma model with high-speed inference",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            context_window=128000,
+            supported_attachment_types=OPENROUTER_ATTACHMENTS
+        ),
+    ),
+]
+
+NOVITA_MODELS = [
+    AvailableModel(
+        id="moonshotai/kimi-k2.5",
+        provider=ModelProvider.NOVITA,
+        display_name="Kimi K2.5",
+        description="MoE model with function calling, structured output, reasoning, and vision",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supports_structured_output=True,
+            supported_attachment_types=NOVITA_ATTACHMENTS,
+            context_window=262144,
+        ),
+    ),
+    AvailableModel(
+        id="zai-org/glm-5",
+        provider=ModelProvider.NOVITA,
+        display_name="GLM-5",
+        description="MoE model with function calling, structured output, and reasoning",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supports_structured_output=True,
+            supported_attachment_types=[],
+            context_window=202800,
+        ),
+    ),
+    AvailableModel(
+        id="minimax/minimax-m2.5",
+        provider=ModelProvider.NOVITA,
+        display_name="MiniMax M2.5",
+        description="MoE model with function calling, structured output, and reasoning",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supports_structured_output=True,
+            supported_attachment_types=[],
+            context_window=204800,
+        ),
+    ),
+]
+
+
+AZURE_OPENAI_MODELS = [
+    AvailableModel(
+        id="azure-gpt-4",
+        provider=ModelProvider.AZURE_OPENAI,
+        display_name="Azure OpenAI GPT-4",
+        description="Azure-hosted GPT model",
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supports_structured_output=True,
+            supported_attachment_types=OPENAI_ATTACHMENTS,
+            context_window=8192,
+        ),
+    ),
+]
+
+
+def create_custom_openai_model(model_name: str, base_url: str) -> AvailableModel:
+    """Create a custom OpenAI-compatible model (e.g., LM Studio, Ollama)."""
+    return AvailableModel(
+        id=model_name,
+        provider=ModelProvider.OPENAI,
+        display_name=model_name,
+        description=f"Custom OpenAI-compatible model at {base_url}",
+        base_url=base_url,
+        capabilities=ModelCapabilities(
+            supports_tools=True,
+            supported_attachment_types=OPENAI_ATTACHMENTS,
+        ),
+    )
--- a/application/core/model_registry.py
+++ b/application/core/model_registry.py
@@ -1,164 +0,0 @@
-"""Layered model registry.
-
-Loads model catalogs from YAML files (built-in + operator-supplied),
-groups them by provider name, then for each registered provider plugin
-calls ``get_models`` to produce the final per-provider model list.
-
-The ``user_id`` parameter on lookup methods is reserved for the future
-end-user BYOM (per-user model records in Postgres). It is currently
-ignored — defaulted to ``None`` everywhere — so call sites can be
-threaded through without a wide refactor when BYOM lands.
-"""
-
-from __future__ import annotations
-
-import logging
-from collections import defaultdict
-from typing import Dict, List, Optional
-
-from application.core.model_settings import AvailableModel
-from application.core.model_yaml import (
-    BUILTIN_MODELS_DIR,
-    ProviderCatalog,
-    load_model_yamls,
-)
-
-logger = logging.getLogger(__name__)
-
-
-class ModelRegistry:
-    """Singleton registry of available models."""
-
-    _instance: Optional["ModelRegistry"] = None
-    _initialized: bool = False
-
-    def __new__(cls):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-        return cls._instance
-
-    def __init__(self):
-        if not ModelRegistry._initialized:
-            self.models: Dict[str, AvailableModel] = {}
-            self.default_model_id: Optional[str] = None
-            self._load_models()
-            ModelRegistry._initialized = True
-
-    @classmethod
-    def get_instance(cls) -> "ModelRegistry":
-        return cls()
-
-    @classmethod
-    def reset(cls) -> None:
-        """Clear the singleton. Intended for test fixtures."""
-        cls._instance = None
-        cls._initialized = False
-
-    def _load_models(self) -> None:
-        from pathlib import Path
-
-        from application.core.settings import settings
-        from application.llm.providers import ALL_PROVIDERS
-
-        directories = [BUILTIN_MODELS_DIR]
-        operator_dir = getattr(settings, "MODELS_CONFIG_DIR", None)
-        if operator_dir:
-            op_path = Path(operator_dir)
-            if not op_path.exists():
-                logger.warning(
-                    "MODELS_CONFIG_DIR=%s does not exist; no operator "
-                    "model YAMLs will be loaded.",
-                    operator_dir,
-                )
-            elif not op_path.is_dir():
-                logger.warning(
-                    "MODELS_CONFIG_DIR=%s is not a directory; no operator "
-                    "model YAMLs will be loaded.",
-                    operator_dir,
-                )
-            else:
-                directories.append(op_path)
-
-        catalogs = load_model_yamls(directories)
-
-        # Validate every catalog targets a known plugin before doing any
-        # registry work, so an unknown provider name in YAML aborts boot
-        # with a clear error.
-        plugin_names = {p.name for p in ALL_PROVIDERS}
-        for c in catalogs:
-            if c.provider not in plugin_names:
-                raise ValueError(
-                    f"{c.source_path}: YAML declares unknown provider "
-                    f"{c.provider!r}; no Provider plugin is registered "
-                    f"under that name. Known: {sorted(plugin_names)}"
-                )
-
-        catalogs_by_provider: Dict[str, List[ProviderCatalog]] = defaultdict(list)
-        for c in catalogs:
-            catalogs_by_provider[c.provider].append(c)
-
-        self.models.clear()
-        for provider in ALL_PROVIDERS:
-            if not provider.is_enabled(settings):
-                continue
-            for model in provider.get_models(
-                settings, catalogs_by_provider.get(provider.name, [])
-            ):
-                self.models[model.id] = model
-
-        self.default_model_id = self._resolve_default(settings)
-
-        logger.info(
-            "ModelRegistry loaded %d models, default: %s",
-            len(self.models),
-            self.default_model_id,
-        )
-
-    def _resolve_default(self, settings) -> Optional[str]:
-        if settings.LLM_NAME:
-            for name in self._parse_model_names(settings.LLM_NAME):
-                if name in self.models:
-                    return name
-            if settings.LLM_NAME in self.models:
-                return settings.LLM_NAME
-
-        if settings.LLM_PROVIDER and settings.API_KEY:
-            for model_id, model in self.models.items():
-                if model.provider.value == settings.LLM_PROVIDER:
-                    return model_id
-
-        if self.models:
-            return next(iter(self.models.keys()))
-        return None
-
-    @staticmethod
-    def _parse_model_names(llm_name: str) -> List[str]:
-        if not llm_name:
-            return []
-        return [name.strip() for name in llm_name.split(",") if name.strip()]
-
-    # ------------------------------------------------------------------
-    # Lookup API. ``user_id`` is reserved for the future BYOM and
-    # is ignored today — but threading it through every call site now
-    # means BYOM doesn't require a wide refactor when we build it.
-    # ------------------------------------------------------------------
-
-    def get_model(
-        self, model_id: str, user_id: Optional[str] = None
-    ) -> Optional[AvailableModel]:
-        return self.models.get(model_id)
-
-    def get_all_models(
-        self, user_id: Optional[str] = None
-    ) -> List[AvailableModel]:
-        return list(self.models.values())
-
-    def get_enabled_models(
-        self, user_id: Optional[str] = None
-    ) -> List[AvailableModel]:
-        return [m for m in self.models.values() if m.enabled]
-
-    def model_exists(
-        self, model_id: str, user_id: Optional[str] = None
-    ) -> bool:
-        return model_id in self.models
--- a/application/core/model_settings.py
+++ b/application/core/model_settings.py
@@ -5,16 +5,9 @@ from typing import Dict, List, Optional

 logger = logging.getLogger(__name__)

-# Re-exported here so existing call sites (and tests) that do
-# ``from application.core.model_settings import ModelRegistry`` keep
-# working. The implementation lives in ``application/core/model_registry.py``.
-# Imported lazily inside ``__getattr__`` to avoid an import cycle with
-# ``model_yaml`` → ``model_settings`` (this file).
-

 class ModelProvider(str, Enum):
    OPENAI = "openai"
-    OPENAI_COMPATIBLE = "openai_compatible"
    OPENROUTER = "openrouter"
    AZURE_OPENAI = "azure_openai"
    ANTHROPIC = "anthropic"
@@ -48,20 +41,11 @@ class AvailableModel:
    capabilities: ModelCapabilities = field(default_factory=ModelCapabilities)
    enabled: bool = True
    base_url: Optional[str] = None
-    # User-facing label distinct from the dispatch ``provider``. Used by
-    # openai_compatible YAMLs so a Mistral model shows "mistral" in the
-    # API response while still routing through the OpenAI wire format.
-    display_provider: Optional[str] = None
-    # Per-record API key. Operator YAMLs leave this None; populated for
-    # openai_compatible models (resolved from the YAML's ``api_key_env``)
-    # and reserved for the future end-user BYOM phase. Never serialized
-    # into to_dict().
-    api_key: Optional[str] = field(default=None, repr=False, compare=False)

    def to_dict(self) -> Dict:
        result = {
            "id": self.id,
-            "provider": self.display_provider or self.provider.value,
+            "provider": self.provider.value,
            "display_name": self.display_name,
            "description": self.description,
            "supported_attachment_types": self.capabilities.supported_attachment_types,
@@ -76,14 +60,255 @@ class AvailableModel:
        return result


-def __getattr__(name):
-    """Lazy re-export of ``ModelRegistry`` from ``model_registry.py``.
+class ModelRegistry:
+    _instance = None
+    _initialized = False

-    Done lazily to avoid an import cycle: ``model_registry`` imports
-    ``model_yaml`` which imports the dataclasses from this file.
-    """
-    if name == "ModelRegistry":
-        from application.core.model_registry import ModelRegistry as _MR
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance

-        return _MR
-    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+    def __init__(self):
+        if not ModelRegistry._initialized:
+            self.models: Dict[str, AvailableModel] = {}
+            self.default_model_id: Optional[str] = None
+            self._load_models()
+            ModelRegistry._initialized = True
+
+    @classmethod
+    def get_instance(cls) -> "ModelRegistry":
+        return cls()
+
+    def _load_models(self):
+        from application.core.settings import settings
+
+        self.models.clear()
+
+        # Skip DocsGPT model if using custom OpenAI-compatible endpoint
+        if not settings.OPENAI_BASE_URL:
+            self._add_docsgpt_models(settings)
+        if (
+            settings.OPENAI_API_KEY
+            or (settings.LLM_PROVIDER == "openai" and settings.API_KEY)
+            or settings.OPENAI_BASE_URL
+        ):
+            self._add_openai_models(settings)
+        if settings.OPENAI_API_BASE or (
+            settings.LLM_PROVIDER == "azure_openai" and settings.API_KEY
+        ):
+            self._add_azure_openai_models(settings)
+        if settings.ANTHROPIC_API_KEY or (
+            settings.LLM_PROVIDER == "anthropic" and settings.API_KEY
+        ):
+            self._add_anthropic_models(settings)
+        if settings.GOOGLE_API_KEY or (
+            settings.LLM_PROVIDER == "google" and settings.API_KEY
+        ):
+            self._add_google_models(settings)
+        if settings.GROQ_API_KEY or (
+            settings.LLM_PROVIDER == "groq" and settings.API_KEY
+        ):
+            self._add_groq_models(settings)
+        if settings.OPEN_ROUTER_API_KEY or (
+            settings.LLM_PROVIDER == "openrouter" and settings.API_KEY
+        ):
+            self._add_openrouter_models(settings)
+        if settings.NOVITA_API_KEY or (
+            settings.LLM_PROVIDER == "novita" and settings.API_KEY
+        ):
+            self._add_novita_models(settings)
+        if settings.HUGGINGFACE_API_KEY or (
+            settings.LLM_PROVIDER == "huggingface" and settings.API_KEY
+        ):
+            self._add_huggingface_models(settings)
+        # Default model selection
+        if settings.LLM_NAME:
+            # Parse LLM_NAME (may be comma-separated)
+            model_names = self._parse_model_names(settings.LLM_NAME)
+            # First model in the list becomes default
+            for model_name in model_names:
+                if model_name in self.models:
+                    self.default_model_id = model_name
+                    break
+            # Backward compat: try exact match if no parsed model found
+            if not self.default_model_id and settings.LLM_NAME in self.models:
+                self.default_model_id = settings.LLM_NAME
+
+        if not self.default_model_id:
+            if settings.LLM_PROVIDER and settings.API_KEY:
+                for model_id, model in self.models.items():
+                    if model.provider.value == settings.LLM_PROVIDER:
+                        self.default_model_id = model_id
+                        break
+
+        if not self.default_model_id and self.models:
+            self.default_model_id = next(iter(self.models.keys()))
+        logger.info(
+            f"ModelRegistry loaded {len(self.models)} models, default: {self.default_model_id}"
+        )
+
+    def _add_openai_models(self, settings):
+        from application.core.model_configs import (
+            OPENAI_MODELS,
+            create_custom_openai_model,
+        )
+
+        # Check if using local OpenAI-compatible endpoint (Ollama, LM Studio, etc.)
+        using_local_endpoint = bool(
+            settings.OPENAI_BASE_URL and settings.OPENAI_BASE_URL.strip()
+        )
+
+        if using_local_endpoint:
+            # When OPENAI_BASE_URL is set, ONLY register custom models from LLM_NAME
+            # Do NOT add standard OpenAI models (gpt-5.1, etc.)
+            if settings.LLM_NAME:
+                model_names = self._parse_model_names(settings.LLM_NAME)
+                for model_name in model_names:
+                    custom_model = create_custom_openai_model(
+                        model_name, settings.OPENAI_BASE_URL
+                    )
+                    self.models[model_name] = custom_model
+                    logger.info(
+                        f"Registered custom OpenAI model: {model_name} at {settings.OPENAI_BASE_URL}"
+                    )
+        else:
+            # Standard OpenAI API usage - add standard models if API key is valid
+            if settings.OPENAI_API_KEY:
+                for model in OPENAI_MODELS:
+                    self.models[model.id] = model
+
+    def _add_azure_openai_models(self, settings):
+        from application.core.model_configs import AZURE_OPENAI_MODELS
+
+        if settings.LLM_PROVIDER == "azure_openai" and settings.LLM_NAME:
+            for model in AZURE_OPENAI_MODELS:
+                if model.id == settings.LLM_NAME:
+                    self.models[model.id] = model
+                    return
+        for model in AZURE_OPENAI_MODELS:
+            self.models[model.id] = model
+
+    def _add_anthropic_models(self, settings):
+        from application.core.model_configs import ANTHROPIC_MODELS
+
+        if settings.ANTHROPIC_API_KEY:
+            for model in ANTHROPIC_MODELS:
+                self.models[model.id] = model
+            return
+        if settings.LLM_PROVIDER == "anthropic" and settings.LLM_NAME:
+            for model in ANTHROPIC_MODELS:
+                if model.id == settings.LLM_NAME:
+                    self.models[model.id] = model
+                    return
+        for model in ANTHROPIC_MODELS:
+            self.models[model.id] = model
+
+    def _add_google_models(self, settings):
+        from application.core.model_configs import GOOGLE_MODELS
+
+        if settings.GOOGLE_API_KEY:
+            for model in GOOGLE_MODELS:
+                self.models[model.id] = model
+            return
+        if settings.LLM_PROVIDER == "google" and settings.LLM_NAME:
+            for model in GOOGLE_MODELS:
+                if model.id == settings.LLM_NAME:
+                    self.models[model.id] = model
+                    return
+        for model in GOOGLE_MODELS:
+            self.models[model.id] = model
+
+    def _add_groq_models(self, settings):
+        from application.core.model_configs import GROQ_MODELS
+
+        if settings.GROQ_API_KEY:
+            for model in GROQ_MODELS:
+                self.models[model.id] = model
+            return
+        if settings.LLM_PROVIDER == "groq" and settings.LLM_NAME:
+            for model in GROQ_MODELS:
+                if model.id == settings.LLM_NAME:
+                    self.models[model.id] = model
+                    return
+        for model in GROQ_MODELS:
+            self.models[model.id] = model
+    
+    def _add_openrouter_models(self, settings):
+        from application.core.model_configs import OPENROUTER_MODELS
+
+        if settings.OPEN_ROUTER_API_KEY:
+            for model in OPENROUTER_MODELS:
+                self.models[model.id] = model
+            return
+        if settings.LLM_PROVIDER == "openrouter" and settings.LLM_NAME:
+            for model in OPENROUTER_MODELS:
+                if model.id == settings.LLM_NAME:
+                    self.models[model.id] = model
+                    return
+        for model in OPENROUTER_MODELS:
+            self.models[model.id] = model
+
+    def _add_novita_models(self, settings):
+        from application.core.model_configs import NOVITA_MODELS
+
+        if settings.NOVITA_API_KEY:
+            for model in NOVITA_MODELS:
+                self.models[model.id] = model
+            return
+        if settings.LLM_PROVIDER == "novita" and settings.LLM_NAME:
+            for model in NOVITA_MODELS:
+                if model.id == settings.LLM_NAME:
+                    self.models[model.id] = model
+                    return
+        for model in NOVITA_MODELS:
+            self.models[model.id] = model
+
+    def _add_docsgpt_models(self, settings):
+        model_id = "docsgpt-local"
+        model = AvailableModel(
+            id=model_id,
+            provider=ModelProvider.DOCSGPT,
+            display_name="DocsGPT Model",
+            description="Local model",
+            capabilities=ModelCapabilities(
+                supports_tools=False,
+                supported_attachment_types=[],
+            ),
+        )
+        self.models[model_id] = model
+
+    def _add_huggingface_models(self, settings):
+        model_id = "huggingface-local"
+        model = AvailableModel(
+            id=model_id,
+            provider=ModelProvider.HUGGINGFACE,
+            display_name="Hugging Face Model",
+            description="Local Hugging Face model",
+            capabilities=ModelCapabilities(
+                supports_tools=False,
+                supported_attachment_types=[],
+            ),
+        )
+        self.models[model_id] = model
+
+    def _parse_model_names(self, llm_name: str) -> List[str]:
+        """
+        Parse LLM_NAME which may contain comma-separated model names.
+        E.g., 'deepseek-r1:1.5b,gemma:2b' -> ['deepseek-r1:1.5b', 'gemma:2b']
+        """
+        if not llm_name:
+            return []
+        return [name.strip() for name in llm_name.split(",") if name.strip()]
+
+    def get_model(self, model_id: str) -> Optional[AvailableModel]:
+        return self.models.get(model_id)
+
+    def get_all_models(self) -> List[AvailableModel]:
+        return list(self.models.values())
+
+    def get_enabled_models(self) -> List[AvailableModel]:
+        return [m for m in self.models.values() if m.enabled]
+
+    def model_exists(self, model_id: str) -> bool:
+        return model_id in self.models
--- a/application/core/model_utils.py
+++ b/application/core/model_utils.py
@@ -1,22 +1,28 @@
 from typing import Any, Dict, Optional

-from application.core.model_registry import ModelRegistry
+from application.core.model_settings import ModelRegistry


 def get_api_key_for_provider(provider: str) -> Optional[str]:
-    """Get the appropriate API key for a provider.
-
-    Delegates to the provider plugin's ``get_api_key``. Falls back to the
-    generic ``settings.API_KEY`` for unknown providers.
-    """
+    """Get the appropriate API key for a provider"""
    from application.core.settings import settings
-    from application.llm.providers import PROVIDERS_BY_NAME

-    plugin = PROVIDERS_BY_NAME.get(provider)
-    if plugin is not None:
-        key = plugin.get_api_key(settings)
-        if key:
-            return key
+    provider_key_map = {
+        "openai": settings.OPENAI_API_KEY,
+        "openrouter": settings.OPEN_ROUTER_API_KEY,
+        "novita": settings.NOVITA_API_KEY,
+        "anthropic": settings.ANTHROPIC_API_KEY,
+        "google": settings.GOOGLE_API_KEY,
+        "groq": settings.GROQ_API_KEY,
+        "huggingface": settings.HUGGINGFACE_API_KEY,
+        "azure_openai": settings.API_KEY,
+        "docsgpt": None,
+        "llama.cpp": None,
+    }
+
+    provider_key = provider_key_map.get(provider)
+    if provider_key:
+        return provider_key
    return settings.API_KEY


@@ -85,21 +91,3 @@ def get_base_url_for_model(model_id: str) -> Optional[str]:
    if model:
        return model.base_url
    return None
-
-
-def get_api_key_for_model(model_id: str) -> Optional[str]:
-    """
-    Resolve the API key to use when invoking ``model_id``.
-
-    Priority:
-      1. The model record's own ``api_key`` (reserved for future end-user
-         BYOM where credentials travel with the record).
-      2. The provider plugin's settings-based key.
-    """
-    registry = ModelRegistry.get_instance()
-    model = registry.get_model(model_id)
-    if model is not None and model.api_key:
-        return model.api_key
-    if model is not None:
-        return get_api_key_for_provider(model.provider.value)
-    return None
--- a/application/core/model_yaml.py
+++ b/application/core/model_yaml.py
@@ -1,325 +0,0 @@
-"""YAML loader for model catalog files under ``application/core/models/``.
-
-Each ``*.yaml`` file declares one provider's static model catalog. Files
-are validated with Pydantic at load time; any parse, schema, or alias
-error aborts startup with the offending file path in the message.
-
-For most providers, one YAML maps to one catalog. The
-``openai_compatible`` provider is special: each YAML file represents a
-distinct logical endpoint (Mistral, Together, Ollama, ...) with its own
-``api_key_env`` and ``base_url``. The loader returns a flat list so the
-registry can distinguish multiple files with the same ``provider:`` value.
-"""
-
-from __future__ import annotations
-
-import logging
-from pathlib import Path
-from typing import Dict, List, Optional, Sequence
-
-import yaml
-from pydantic import BaseModel, ConfigDict, Field, field_validator
-
-from application.core.model_settings import (
-    AvailableModel,
-    ModelCapabilities,
-    ModelProvider,
-)
-
-logger = logging.getLogger(__name__)
-
-BUILTIN_MODELS_DIR = Path(__file__).parent / "models"
-DEFAULTS_FILENAME = "_defaults.yaml"
-
-
-class _DefaultsFile(BaseModel):
-    """Schema for ``_defaults.yaml``. Currently just attachment aliases."""
-
-    model_config = ConfigDict(extra="forbid")
-
-    attachment_aliases: Dict[str, List[str]] = Field(default_factory=dict)
-
-
-class _CapabilityFields(BaseModel):
-    """Capability fields shared between provider ``defaults:`` and per-model overrides.
-
-    All fields are optional so a per-model override can selectively replace
-    a single field from the provider-level defaults.
-    """
-
-    model_config = ConfigDict(extra="forbid")
-
-    supports_tools: Optional[bool] = None
-    supports_structured_output: Optional[bool] = None
-    supports_streaming: Optional[bool] = None
-    attachments: Optional[List[str]] = None
-    context_window: Optional[int] = None
-    input_cost_per_token: Optional[float] = None
-    output_cost_per_token: Optional[float] = None
-
-
-class _ModelEntry(_CapabilityFields):
-    """Schema for one model row inside a YAML's ``models:`` list."""
-
-    id: str
-    display_name: Optional[str] = None
-    description: str = ""
-    enabled: bool = True
-    base_url: Optional[str] = None
-    aliases: List[str] = Field(default_factory=list)
-
-    @field_validator("id")
-    @classmethod
-    def _id_nonempty(cls, v: str) -> str:
-        if not v or not v.strip():
-            raise ValueError("model id must be a non-empty string")
-        return v
-
-
-class _ProviderFile(BaseModel):
-    """Schema for one ``<provider>.yaml`` catalog file."""
-
-    model_config = ConfigDict(extra="forbid")
-
-    provider: str
-    defaults: _CapabilityFields = Field(default_factory=_CapabilityFields)
-    models: List[_ModelEntry] = Field(default_factory=list)
-    # openai_compatible metadata. Optional for other providers.
-    display_provider: Optional[str] = None
-    api_key_env: Optional[str] = None
-    base_url: Optional[str] = None
-
-
-class ProviderCatalog(BaseModel):
-    """One YAML file's parsed contents, ready for the registry.
-
-    For most providers, multiple catalogs with the same ``provider`` get
-    merged later by the registry. The ``openai_compatible`` provider is
-    the exception: each catalog is treated as a distinct endpoint, with
-    its own ``api_key_env`` and ``base_url``.
-    """
-
-    provider: str
-    models: List[AvailableModel]
-    source_path: Optional[Path] = None
-    display_provider: Optional[str] = None
-    api_key_env: Optional[str] = None
-    base_url: Optional[str] = None
-
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-
-
-class ModelYAMLError(ValueError):
-    """Raised when a model YAML fails parsing, schema, or alias validation."""
-
-
-def _expand_attachments(
-    attachments: Sequence[str], aliases: Dict[str, List[str]], source: str
-) -> List[str]:
-    """Resolve attachment shorthands (``image``, ``pdf``) to MIME types.
-
-    Raw MIME-typed entries (containing ``/``) pass through unchanged.
-    Unknown aliases raise ``ModelYAMLError``.
-    """
-    expanded: List[str] = []
-    seen: set = set()
-    for entry in attachments:
-        if "/" in entry:
-            if entry not in seen:
-                expanded.append(entry)
-                seen.add(entry)
-            continue
-        if entry not in aliases:
-            valid = ", ".join(sorted(aliases.keys())) or "<none defined>"
-            raise ModelYAMLError(
-                f"{source}: unknown attachment alias '{entry}'. "
-                f"Valid aliases: {valid}. "
-                "(Or use a raw MIME type like 'image/png'.)"
-            )
-        for mime in aliases[entry]:
-            if mime not in seen:
-                expanded.append(mime)
-                seen.add(mime)
-    return expanded
-
-
-def _load_defaults(directory: Path) -> Dict[str, List[str]]:
-    """Load ``_defaults.yaml`` from ``directory`` if it exists."""
-    path = directory / DEFAULTS_FILENAME
-    if not path.exists():
-        return {}
-    try:
-        raw = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
-    except yaml.YAMLError as e:
-        raise ModelYAMLError(f"{path}: invalid YAML: {e}") from e
-    try:
-        parsed = _DefaultsFile.model_validate(raw)
-    except Exception as e:
-        raise ModelYAMLError(f"{path}: schema error: {e}") from e
-    return parsed.attachment_aliases
-
-
-def _resolve_provider_enum(name: str, source: Path) -> ModelProvider:
-    try:
-        return ModelProvider(name)
-    except ValueError as e:
-        valid = ", ".join(p.value for p in ModelProvider)
-        raise ModelYAMLError(
-            f"{source}: unknown provider '{name}'. Valid: {valid}"
-        ) from e
-
-
-def _build_model(
-    entry: _ModelEntry,
-    defaults: _CapabilityFields,
-    provider: ModelProvider,
-    aliases: Dict[str, List[str]],
-    source: Path,
-    display_provider: Optional[str] = None,
-) -> AvailableModel:
-    """Merge defaults + per-model overrides into a final ``AvailableModel``."""
-
-    def pick(field_name: str, fallback):
-        v = getattr(entry, field_name)
-        if v is not None:
-            return v
-        d = getattr(defaults, field_name)
-        if d is not None:
-            return d
-        return fallback
-
-    raw_attachments = entry.attachments
-    if raw_attachments is None:
-        raw_attachments = defaults.attachments
-    if raw_attachments is None:
-        raw_attachments = []
-    expanded = _expand_attachments(
-        raw_attachments, aliases, f"{source} [model={entry.id}]"
-    )
-
-    caps = ModelCapabilities(
-        supports_tools=pick("supports_tools", False),
-        supports_structured_output=pick("supports_structured_output", False),
-        supports_streaming=pick("supports_streaming", True),
-        supported_attachment_types=expanded,
-        context_window=pick("context_window", 128000),
-        input_cost_per_token=pick("input_cost_per_token", None),
-        output_cost_per_token=pick("output_cost_per_token", None),
-    )
-
-    return AvailableModel(
-        id=entry.id,
-        provider=provider,
-        display_name=entry.display_name or entry.id,
-        description=entry.description,
-        capabilities=caps,
-        enabled=entry.enabled,
-        base_url=entry.base_url,
-        display_provider=display_provider,
-    )
-
-
-def _load_one_yaml(
-    path: Path, aliases: Dict[str, List[str]]
-) -> ProviderCatalog:
-    try:
-        raw = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
-    except yaml.YAMLError as e:
-        raise ModelYAMLError(f"{path}: invalid YAML: {e}") from e
-    try:
-        parsed = _ProviderFile.model_validate(raw)
-    except Exception as e:
-        raise ModelYAMLError(f"{path}: schema error: {e}") from e
-
-    provider_enum = _resolve_provider_enum(parsed.provider, path)
-    models = [
-        _build_model(
-            entry,
-            parsed.defaults,
-            provider_enum,
-            aliases,
-            path,
-            display_provider=parsed.display_provider,
-        )
-        for entry in parsed.models
-    ]
-
-    return ProviderCatalog(
-        provider=parsed.provider,
-        models=models,
-        source_path=path,
-        display_provider=parsed.display_provider,
-        api_key_env=parsed.api_key_env,
-        base_url=parsed.base_url,
-    )
-
-
-_BUILTIN_ALIASES_CACHE: Optional[Dict[str, List[str]]] = None
-
-
-def builtin_attachment_aliases() -> Dict[str, List[str]]:
-    """Return the built-in attachment alias map from ``_defaults.yaml``.
-
-    Cached after first read so repeat calls are cheap.
-    """
-    global _BUILTIN_ALIASES_CACHE
-    if _BUILTIN_ALIASES_CACHE is None:
-        _BUILTIN_ALIASES_CACHE = _load_defaults(BUILTIN_MODELS_DIR)
-    return _BUILTIN_ALIASES_CACHE
-
-
-def resolve_attachment_alias(alias: str) -> List[str]:
-    """Resolve a single attachment alias (e.g. ``"image"``) to its
-    canonical MIME-type list. Raises ``ModelYAMLError`` if unknown.
-    """
-    aliases = builtin_attachment_aliases()
-    if alias not in aliases:
-        valid = ", ".join(sorted(aliases.keys())) or "<none defined>"
-        raise ModelYAMLError(
-            f"Unknown attachment alias '{alias}'. Valid: {valid}"
-        )
-    return list(aliases[alias])
-
-
-def load_model_yamls(directories: Sequence[Path]) -> List[ProviderCatalog]:
-    """Load every ``*.yaml`` file (excluding ``_defaults.yaml``) under each
-    directory in order and return a flat list of catalogs.
-
-    Caller is responsible for merging multiple catalogs that target the
-    same provider plugin. The flat-list shape lets ``openai_compatible``
-    keep each file separate (one logical endpoint per file).
-
-    When the same model ``id`` appears in more than one YAML across the
-    directory list, a warning is logged. Order in the returned list
-    preserves load order, so the registry's "later wins" merge gives the
-    later directory's definition.
-    """
-    catalogs: List[ProviderCatalog] = []
-    seen_ids: Dict[str, Path] = {}
-
-    aliases: Dict[str, List[str]] = {}
-    for d in directories:
-        if not d or not d.exists():
-            continue
-        aliases.update(_load_defaults(d))
-
-    for d in directories:
-        if not d or not d.exists():
-            continue
-        for path in sorted(d.glob("*.yaml")):
-            if path.name == DEFAULTS_FILENAME:
-                continue
-            catalog = _load_one_yaml(path, aliases)
-            catalogs.append(catalog)
-            for m in catalog.models:
-                prior = seen_ids.get(m.id)
-                if prior is not None and prior != path:
-                    logger.warning(
-                        "Model id %r redefined: %s overrides %s (later wins)",
-                        m.id,
-                        path,
-                        prior,
-                    )
-                seen_ids[m.id] = path
-
-    return catalogs
--- a/application/core/models/README.md
+++ b/application/core/models/README.md
@@ -1,213 +0,0 @@
-# Model catalogs
-
-Each `*.yaml` file in this directory declares one provider's model
-catalog. The registry loads every YAML at boot and joins it to the
-matching provider plugin under `application/llm/providers/`.
-
-To add or edit models, you almost always only touch a YAML here — no
-Python code required.
-
-## Add a model to an existing provider
-
-Open the provider's YAML (e.g. `anthropic.yaml`) and append two lines
-under `models:`:
-
-```yaml
-models:
-  - id: claude-3-7-sonnet
-    display_name: Claude 3.7 Sonnet
-```
-
-Capabilities default to the provider's `defaults:` block. Override
-per-model only when needed:
-
-```yaml
-  - id: claude-3-7-sonnet
-    display_name: Claude 3.7 Sonnet
-    context_window: 500000
-```
-
-Restart the app. The new model appears in `/api/models`.
-
-> The model `id` is what gets stored in agent / workflow records. Once
-> users start picking the model, **don't rename it** — agent and
-> workflow rows reference it as a free-form string and silently fall
-> back to the system default if the id disappears.
-
-## Add an OpenAI-compatible provider (zero Python)
-
-Drop a YAML in this directory (or in your `MODELS_CONFIG_DIR`) that uses
-the `openai_compatible` plugin. Set the env var named in `api_key_env`
-and you're done — no Python, no settings.py edit, no LLMCreator change:
-
-```yaml
-# mistral.yaml
-provider: openai_compatible
-display_provider: mistral             # shown in /api/models response
-api_key_env: MISTRAL_API_KEY          # env var the plugin reads at boot
-base_url: https://api.mistral.ai/v1
-defaults:
-  supports_tools: true
-  context_window: 128000
-models:
-  - id: mistral-large-latest
-    display_name: Mistral Large
-  - id: mistral-small-latest
-    display_name: Mistral Small
-```
-
-`MISTRAL_API_KEY=sk-... ; restart` — Mistral models appear in
-`/api/models` with `provider: "mistral"`. They route through the OpenAI
-wire format (it's `OpenAILLM` under the hood) but with Mistral's
-endpoint and key.
-
-Multiple `openai_compatible` YAMLs coexist: each file is one logical
-endpoint with its own `api_key_env` and `base_url`. Drop in
-`together.yaml`, `fireworks.yaml`, etc. side by side. If an env var
-isn't set, that catalog is silently skipped at boot (logged at INFO) —
-no error.
-
-Working example: `examples/mistral.yaml.example`. Files inside
-`examples/` aren't loaded by the registry; the glob only picks up
-`*.yaml` at the top level.
-
-## Add a provider with its own SDK
-
-For a provider that doesn't speak OpenAI's wire format, add one Python
-file to `application/llm/providers/<name>.py`:
-
-```python
-from application.llm.providers.base import Provider
-from application.llm.my_provider import MyLLM
-
-class MyProvider(Provider):
-    name = "my_provider"
-    llm_class = MyLLM
-
-    def get_api_key(self, settings):
-        return settings.MY_PROVIDER_API_KEY
-```
-
-Register it in `application/llm/providers/__init__.py` (one line in
-`ALL_PROVIDERS`), add `MY_PROVIDER_API_KEY` to `settings.py`, and create
-`my_provider.yaml` here with the model catalog.
-
-## Schema reference
-
-```yaml
-provider: <string, required>          # matches the Provider plugin's `name`
-
-# openai_compatible only — required for that provider, ignored for others
-display_provider: <string>            # label shown in /api/models response
-api_key_env: <string>                 # name of the env var carrying the key
-base_url: <string>                    # endpoint URL
-
-defaults:                              # optional, applied to every model below
-  supports_tools: bool                 # default false
-  supports_structured_output: bool     # default false
-  supports_streaming: bool             # default true
-  attachments: [<alias-or-mime>, ...]  # default []
-  context_window: int                  # default 128000
-  input_cost_per_token: float          # default null
-  output_cost_per_token: float         # default null
-
-models:                                # required
-  - id: <string, required>             # the value persisted in agent records
-    display_name: <string>             # default: id
-    description: <string>              # default: ""
-    enabled: bool                      # default true; false hides from /api/models
-    base_url: <string>                 # optional custom endpoint for this model
-    # All `defaults:` fields above can be overridden here per-model.
-```
-
-### Attachment aliases
-
-The `attachments:` list can mix human-readable aliases with raw MIME
-types. Aliases are defined in `_defaults.yaml`:
-
-| Alias | Expands to |
-|---|---|
-| `image` | `image/png`, `image/jpeg`, `image/jpg`, `image/webp`, `image/gif` |
-| `pdf` | `application/pdf` |
-| `audio` | `audio/mpeg`, `audio/wav`, `audio/ogg` |
-
-Use raw MIME types when you need surgical control:
-
-```yaml
-attachments: [image/png, image/webp]   # only these two
-```
-
-## Operator-supplied YAMLs (`MODELS_CONFIG_DIR`)
-
-Set the `MODELS_CONFIG_DIR` env var (or `.env` entry) to a directory
-path. Every `*.yaml` in that directory is loaded **after** the built-in
-catalog under `application/core/models/`. Operators use this to:
-
- Add new `openai_compatible` providers (Mistral, Together, Fireworks,
-  Ollama, ...) without forking the repo.
- Extend an existing provider's catalog with extra models — append
-  models under `provider: anthropic` and they show up alongside the
-  built-ins.
- Override a built-in model's capabilities — declare the same `id`
-  with different fields (e.g. a higher `context_window`). Later wins;
-  the override is logged as a `WARNING` so you can audit it.
-
-Things you cannot do via `MODELS_CONFIG_DIR`:
-
- Add a brand-new non-OpenAI provider — that needs a Python plugin
-  under `application/llm/providers/` (see "Add a provider with its own
-  SDK" above). Operator YAMLs may only target a `provider:` value that
-  already has a registered plugin.
-
-### Example: Docker
-
-Mount your model YAMLs into the container and point the env var at the
-mount path:
-
-```yaml
-# docker-compose.yml
-services:
-  app:
-    image: arc53/docsgpt
-    environment:
-      MODELS_CONFIG_DIR: /etc/docsgpt/models
-      MISTRAL_API_KEY: ${MISTRAL_API_KEY}
-    volumes:
-      - ./my-models:/etc/docsgpt/models:ro
-```
-
-Then `./my-models/mistral.yaml` (the file from
-`examples/mistral.yaml.example`) gets picked up at boot.
-
-### Example: Kubernetes
-
-Mount a `ConfigMap` containing your YAMLs at a known path and set
-`MODELS_CONFIG_DIR` on the deployment. The same `examples/mistral.yaml.example`
-becomes a key in the ConfigMap.
-
-### Misconfiguration
-
-If `MODELS_CONFIG_DIR` is set but the path doesn't exist (or isn't a
-directory), the app logs a `WARNING` at boot and continues with just
-the built-in catalog. The app does *not* fail to start — operators can
-ship config drift without taking down the service — but the warning is
-loud enough to surface in any reasonable log aggregator.
-
-## Validation
-
-YAMLs are parsed with Pydantic at boot. The app fails to start with a
-clear error message if:
-
- a top-level key is unknown
- a model is missing `id`
- an attachment alias isn't defined
- the `provider:` value isn't registered as a plugin
-
-This is intentional — silent fallbacks would mean users don't notice
-their model picks broke until they hit the API.
-
-## Reserved fields (not yet implemented)
-
- `aliases:` on a model — old IDs that resolve to this model. Reserved
-  for future renames; the schema accepts the field but it is not yet
-  acted on.
--- a/application/core/models/_defaults.yaml
+++ b/application/core/models/_defaults.yaml
@@ -1,18 +0,0 @@
-# Global defaults applied across every model YAML in this directory.
-# Keep this file sparse — per-provider `defaults:` blocks are clearer
-# than a deep global default chain. This file is for things that
-# genuinely never vary, like the meaning of "image".
-
-attachment_aliases:
-  image:
-    - image/png
-    - image/jpeg
-    - image/jpg
-    - image/webp
-    - image/gif
-  pdf:
-    - application/pdf
-  audio:
-    - audio/mpeg
-    - audio/wav
-    - audio/ogg
--- a/application/core/models/anthropic.yaml
+++ b/application/core/models/anthropic.yaml
@@ -1,23 +0,0 @@
-provider: anthropic
-defaults:
-  supports_tools: true
-  attachments: [image]
-  context_window: 200000
-
-models:
-  - id: claude-opus-4-7
-    display_name: Claude Opus 4.7
-    description: Most capable Claude model for complex reasoning and agentic coding
-    context_window: 1000000
-    supports_structured_output: true
-
-  - id: claude-sonnet-4-6
-    display_name: Claude Sonnet 4.6
-    description: Best balance of speed and intelligence with extended thinking
-    context_window: 1000000
-    supports_structured_output: true
-
-  - id: claude-haiku-4-5
-    display_name: Claude Haiku 4.5
-    description: Fastest Claude model with near-frontier intelligence
-    supports_structured_output: true
--- a/application/core/models/azure_openai.yaml
+++ b/application/core/models/azure_openai.yaml
@@ -1,31 +0,0 @@
-# Azure OpenAI catalog.
-#
-# IMPORTANT: For Azure OpenAI, the `id` field is the **deployment name**, not
-# a model name. Deployment names are arbitrary strings the operator chooses
-# in Azure portal (or via ARM/Bicep/Terraform) when they create a deployment
-# for a given underlying model + version.
-#
-# The IDs below are sensible defaults that mirror the underlying OpenAI
-# model name (prefixed with `azure-`). Operators almost always need to
-# override them via `MODELS_CONFIG_DIR` to match the deployment names that
-# actually exist in their Azure resource. The `display_name`, capability
-# flags, and `context_window` reflect the underlying OpenAI model.
-provider: azure_openai
-
-defaults:
-  supports_tools: true
-  supports_structured_output: true
-  attachments: [image]
-  context_window: 400000
-
-models:
-  - id: azure-gpt-5.5
-    display_name: Azure OpenAI GPT-5.5
-    description: Azure-hosted flagship frontier model for complex reasoning, coding, and agentic work with a 1M-token context window
-    context_window: 1050000
-  - id: azure-gpt-5.4-mini
-    display_name: Azure OpenAI GPT-5.4 Mini
-    description: Azure-hosted cost-efficient GPT-5.4-class model for high-volume coding, computer use, and subagent workloads
-  - id: azure-gpt-5.4-nano
-    display_name: Azure OpenAI GPT-5.4 Nano
-    description: Azure-hosted cheapest GPT-5.4-class model, optimized for simple high-volume tasks where speed and cost matter most
--- a/application/core/models/docsgpt.yaml
+++ b/application/core/models/docsgpt.yaml
@@ -1,7 +0,0 @@
-provider: docsgpt
-
-models:
-  - id: docsgpt-local
-    display_name: DocsGPT Model
-    description: Local model
-    supports_tools: false
--- a/application/core/models/examples/mistral.yaml.example
+++ b/application/core/models/examples/mistral.yaml.example
@@ -1,31 +0,0 @@
-# EXAMPLE — copy this file to ../mistral.yaml (or to your
-# MODELS_CONFIG_DIR) and set MISTRAL_API_KEY in your environment.
-#
-# This is the entire integration. No Python required: the
-# `openai_compatible` plugin reads `api_key_env` and `base_url` from
-# the file and routes calls through the OpenAI wire format.
-#
-# Files in this `examples/` directory are NOT loaded by the registry
-# (the loader globs *.yaml at the top level only).
-
-provider: openai_compatible
-display_provider: mistral             # shown in /api/models response
-api_key_env: MISTRAL_API_KEY          # env var the plugin reads
-base_url: https://api.mistral.ai/v1   # OpenAI-compatible endpoint
-
-defaults:
-  supports_tools: true
-  context_window: 128000
-
-models:
-  - id: mistral-large-latest
-    display_name: Mistral Large
-    description: Top-tier reasoning model
-
-  - id: mistral-small-latest
-    display_name: Mistral Small
-    description: Fast, cost-efficient
-
-  - id: codestral-latest
-    display_name: Codestral
-    description: Code-specialized model
--- a/application/core/models/google.yaml
+++ b/application/core/models/google.yaml
@@ -1,17 +0,0 @@
-provider: google
-defaults:
-  supports_tools: true
-  supports_structured_output: true
-  attachments: [pdf, image]
-  context_window: 1048576
-
-models:
-  - id: gemini-3.1-pro-preview
-    display_name: Gemini 3.1 Pro
-    description: Most capable Gemini 3 model with advanced reasoning and agentic coding (preview)
-  - id: gemini-3-flash-preview
-    display_name: Gemini 3 Flash
-    description: Frontier-class performance for low-latency, high-volume tasks (preview)
-  - id: gemini-3.1-flash-lite-preview
-    display_name: Gemini 3.1 Flash-Lite
-    description: Cost-efficient frontier-class multimodal model for high-throughput workloads (preview)
--- a/application/core/models/groq.yaml
+++ b/application/core/models/groq.yaml
@@ -1,16 +0,0 @@
-provider: groq
-defaults:
-  supports_tools: true
-  context_window: 131072
-
-models:
-  - id: openai/gpt-oss-120b
-    display_name: GPT-OSS 120B
-    description: OpenAI's open-weight 120B flagship served on Groq's LPU hardware; strong general reasoning with strict structured output support
-    supports_structured_output: true
-  - id: llama-3.3-70b-versatile
-    display_name: Llama 3.3 70B Versatile
-    description: Meta's Llama 3.3 70B for general-purpose chat with parallel tool use
-  - id: llama-3.1-8b-instant
-    display_name: Llama 3.1 8B Instant
-    description: Small, very low-latency Llama model (~560 tok/s) with parallel tool use
--- a/application/core/models/huggingface.yaml
+++ b/application/core/models/huggingface.yaml
@@ -1,7 +0,0 @@
-provider: huggingface
-
-models:
-  - id: huggingface-local
-    display_name: Hugging Face Model
-    description: Local Hugging Face model
-    supports_tools: false
--- a/application/core/models/novita.yaml
+++ b/application/core/models/novita.yaml
@@ -1,21 +0,0 @@
-provider: novita
-defaults:
-  supports_tools: true
-  supports_structured_output: true
-
-models:
-  - id: deepseek/deepseek-v4-pro
-    display_name: DeepSeek V4 Pro
-    description: 1.6T MoE (49B active) with 1M context, hybrid CSA/HCA attention, top-tier reasoning and agentic coding
-    context_window: 1048576
-
-  - id: moonshotai/kimi-k2.6
-    display_name: Kimi K2.6
-    description: 1T-parameter open-weight MoE with native vision/video, multi-step tool calling, and agentic long-horizon execution
-    attachments: [image]
-    context_window: 262144
-
-  - id: zai-org/glm-5
-    display_name: GLM-5
-    description: Z.AI 754B-parameter MoE with strong general reasoning, function calling, and structured output
-    context_window: 202800
--- a/application/core/models/openai.yaml
+++ b/application/core/models/openai.yaml
@@ -1,18 +0,0 @@
-provider: openai
-defaults:
-  supports_tools: true
-  supports_structured_output: true
-  attachments: [image]
-  context_window: 400000
-
-models:
-  - id: gpt-5.5
-    display_name: GPT-5.5
-    description: Flagship frontier model for complex reasoning, coding, and agentic work with a 1M-token context window
-    context_window: 1050000
-  - id: gpt-5.4-mini
-    display_name: GPT-5.4 Mini
-    description: Cost-efficient GPT-5.4-class model for high-volume coding, computer use, and subagent workloads
-  - id: gpt-5.4-nano
-    display_name: GPT-5.4 Nano
-    description: Cheapest GPT-5.4-class model, optimized for simple high-volume tasks where speed and cost matter most
--- a/application/core/models/openrouter.yaml
+++ b/application/core/models/openrouter.yaml
@@ -1,25 +0,0 @@
-provider: openrouter
-defaults:
-  supports_tools: true
-  attachments: [image]
-  context_window: 128000
-
-models:
-  - id: qwen/qwen3-coder:free
-    display_name: Qwen3 Coder (free)
-    description: Free-tier 480B MoE coder model with strong agentic tool use; rate-limited
-    context_window: 262000
-    attachments: []
-
-  - id: deepseek/deepseek-v3.2
-    display_name: DeepSeek V3.2
-    description: Open-weights reasoning model, very low cost (~$0.25 in / $0.38 out per 1M)
-    context_window: 131072
-    attachments: []
-    supports_structured_output: true
-
-  - id: anthropic/claude-sonnet-4.6
-    display_name: Claude Sonnet 4.6 (via OpenRouter)
-    description: Frontier Sonnet-class model with 1M context, vision, and extended thinking
-    context_window: 1000000
-    supports_structured_output: true
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -23,10 +23,6 @@ class Settings(BaseSettings):
    EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
    EMBEDDINGS_BASE_URL: Optional[str] = None  # Remote embeddings API URL (OpenAI-compatible)
    EMBEDDINGS_KEY: Optional[str] = None  # api key for embeddings (if using openai, just copy API_KEY)
-    # Optional directory of operator-supplied model YAMLs, loaded after the
-    # built-in catalog under application/core/models/. Later wins on
-    # duplicate model id. See application/core/models/README.md.
-    MODELS_CONFIG_DIR: Optional[str] = None

    CELERY_BROKER_URL: str = "redis://localhost:6379/0"
    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
@@ -153,9 +149,6 @@ class Settings(BaseSettings):

    FLASK_DEBUG_MODE: bool = False
    STORAGE_TYPE: str = "local"  # local or s3
-
-    # Anonymous startup version check for security issues.
-    VERSION_CHECK: bool = True
    URL_STRATEGY: str = "backend"  # backend or s3

    JWT_SECRET_KEY: str = ""
--- a/application/gunicorn_conf.py
+++ b/application/gunicorn_conf.py
@@ -1,72 +0,0 @@
-"""Gunicorn config — keeps uvicorn's access log in NCSA format."""
-
-from __future__ import annotations
-
-import logging
-import logging.config
-
-# NCSA common log format:
-#   %(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"
-# Uvicorn's access formatter exposes a ``client_addr``/``request_line``/
-# ``status_code`` trio but not the full NCSA field set, so we re-derive
-# what we can.
-_NCSA_FMT = (
-    '%(client_addr)s - - [%(asctime)s] "%(request_line)s" %(status_code)s'
-)
-
-logconfig_dict = {
-    "version": 1,
-    "disable_existing_loggers": False,
-    "formatters": {
-        "ncsa_access": {
-            "()": "uvicorn.logging.AccessFormatter",
-            "fmt": _NCSA_FMT,
-            "datefmt": "%d/%b/%Y:%H:%M:%S %z",
-            "use_colors": False,
-        },
-        "default": {
-            "format": "[%(asctime)s] [%(process)d] [%(levelname)s] %(name)s: %(message)s",
-        },
-    },
-    "handlers": {
-        "access": {
-            "class": "logging.StreamHandler",
-            "formatter": "ncsa_access",
-            "stream": "ext://sys.stdout",
-        },
-        "default": {
-            "class": "logging.StreamHandler",
-            "formatter": "default",
-            "stream": "ext://sys.stderr",
-        },
-    },
-    "loggers": {
-        "uvicorn": {"handlers": ["default"], "level": "INFO", "propagate": False},
-        "uvicorn.error": {
-            "handlers": ["default"],
-            "level": "INFO",
-            "propagate": False,
-        },
-        "uvicorn.access": {
-            "handlers": ["access"],
-            "level": "INFO",
-            "propagate": False,
-        },
-        "gunicorn.error": {
-            "handlers": ["default"],
-            "level": "INFO",
-            "propagate": False,
-        },
-        "gunicorn.access": {
-            "handlers": ["access"],
-            "level": "INFO",
-            "propagate": False,
-        },
-    },
-    "root": {"handlers": ["default"], "level": "INFO"},
-}
-
-
-def on_starting(server):  # pragma: no cover — gunicorn hook
-    """Ensure gunicorn's own loggers use the configured handlers."""
-    logging.config.dictConfig(logconfig_dict)
--- a/application/llm/llm_creator.py
+++ b/application/llm/llm_creator.py
@@ -1,11 +1,34 @@
 import logging

-from application.llm.providers import PROVIDERS_BY_NAME
+from application.llm.anthropic import AnthropicLLM
+from application.llm.docsgpt_provider import DocsGPTAPILLM
+from application.llm.google_ai import GoogleLLM
+from application.llm.groq import GroqLLM
+from application.llm.llama_cpp import LlamaCpp
+from application.llm.novita import NovitaLLM
+from application.llm.openai import AzureOpenAILLM, OpenAILLM
+from application.llm.premai import PremAILLM
+from application.llm.sagemaker import SagemakerAPILLM
+from application.llm.open_router import OpenRouterLLM

 logger = logging.getLogger(__name__)


 class LLMCreator:
+    llms = {
+        "openai": OpenAILLM,
+        "azure_openai": AzureOpenAILLM,
+        "sagemaker": SagemakerAPILLM,
+        "llama.cpp": LlamaCpp,
+        "anthropic": AnthropicLLM,
+        "docsgpt": DocsGPTAPILLM,
+        "premai": PremAILLM,
+        "groq": GroqLLM,
+        "google": GoogleLLM,
+        "novita": NovitaLLM,
+        "openrouter": OpenRouterLLM,
+    }
+
    @classmethod
    def create_llm(
        cls,
@@ -19,27 +42,18 @@ class LLMCreator:
        *args,
        **kwargs,
    ):
-        from application.core.model_registry import ModelRegistry
+        from application.core.model_utils import get_base_url_for_model

-        plugin = PROVIDERS_BY_NAME.get(type.lower())
-        if plugin is None or plugin.llm_class is None:
+        llm_class = cls.llms.get(type.lower())
+        if not llm_class:
            raise ValueError(f"No LLM class found for type {type}")

-        # Prefer per-model endpoint config from the registry. This is what
-        # makes openai_compatible (and the future end-user BYOM phase)
-        # work without changing every call site: if the registered
-        # AvailableModel carries its own api_key / base_url, they win
-        # over whatever the caller resolved via the provider plugin.
+        # Extract base_url from model configuration if model_id is provided
        base_url = None
        if model_id:
-            model = ModelRegistry.get_instance().get_model(model_id)
-            if model is not None:
-                if model.api_key:
-                    api_key = model.api_key
-                if model.base_url:
-                    base_url = model.base_url
+            base_url = get_base_url_for_model(model_id)

-        return plugin.llm_class(
+        return llm_class(
            api_key,
            user_api_key,
            decoded_token=decoded_token,
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -389,8 +389,8 @@ class OpenAILLM(BaseLLM):
        Returns:
            list: List of supported MIME types
        """
-        from application.core.model_yaml import resolve_attachment_alias
-        return resolve_attachment_alias("image")
+        from application.core.model_configs import OPENAI_ATTACHMENTS
+        return OPENAI_ATTACHMENTS

    def prepare_messages_with_attachments(self, messages, attachments=None):
        """
--- a/application/llm/providers/init.py
+++ b/application/llm/providers/init.py
@@ -1,51 +0,0 @@
-"""Provider plugin registry.
-
-Plugins are imported eagerly so import errors surface at app boot rather
-than at first request. ``ALL_PROVIDERS`` is the canonical ordered list;
-``PROVIDERS_BY_NAME`` is a name-keyed lookup for LLMCreator and the
-model registry.
-"""
-
-from __future__ import annotations
-
-from typing import Dict, List
-
-from application.llm.providers.anthropic import AnthropicProvider
-from application.llm.providers.azure_openai import AzureOpenAIProvider
-from application.llm.providers.base import Provider
-from application.llm.providers.docsgpt import DocsGPTProvider
-from application.llm.providers.google import GoogleProvider
-from application.llm.providers.groq import GroqProvider
-from application.llm.providers.huggingface import HuggingFaceProvider
-from application.llm.providers.llama_cpp import LlamaCppProvider
-from application.llm.providers.novita import NovitaProvider
-from application.llm.providers.openai import OpenAIProvider
-from application.llm.providers.openai_compatible import OpenAICompatibleProvider
-from application.llm.providers.openrouter import OpenRouterProvider
-from application.llm.providers.premai import PremAIProvider
-from application.llm.providers.sagemaker import SagemakerProvider
-
-# Order here is the order the registry iterates providers (and therefore
-# the order ``/api/models`` reports them). Match the historical order
-# from the old ModelRegistry._load_models for byte-stable output during
-# the migration. ``openai_compatible`` slots in right after ``openai``
-# so legacy ``OPENAI_BASE_URL`` models keep landing in the same place.
-ALL_PROVIDERS: List[Provider] = [
-    DocsGPTProvider(),
-    OpenAIProvider(),
-    OpenAICompatibleProvider(),
-    AzureOpenAIProvider(),
-    AnthropicProvider(),
-    GoogleProvider(),
-    GroqProvider(),
-    OpenRouterProvider(),
-    NovitaProvider(),
-    HuggingFaceProvider(),
-    LlamaCppProvider(),
-    PremAIProvider(),
-    SagemakerProvider(),
-]
-
-PROVIDERS_BY_NAME: Dict[str, Provider] = {p.name: p for p in ALL_PROVIDERS}
-
-__all__ = ["ALL_PROVIDERS", "PROVIDERS_BY_NAME", "Provider"]
--- a/application/llm/providers/_apikey_or_llm_name.py
+++ b/application/llm/providers/_apikey_or_llm_name.py
@@ -1,51 +0,0 @@
-"""Shared helper for providers that follow the
-``<X>_API_KEY or (LLM_PROVIDER==X and API_KEY)`` pattern.
-
-This is the dominant pattern across Anthropic, Google, Groq, OpenRouter,
-and Novita. Extracted here so each plugin stays a few lines long.
-"""
-
-from __future__ import annotations
-
-from typing import List, Optional
-
-from application.core.model_settings import AvailableModel
-
-
-def get_api_key(
-    settings,
-    provider_name: str,
-    provider_specific_key: Optional[str],
-) -> Optional[str]:
-    if provider_specific_key:
-        return provider_specific_key
-    if settings.LLM_PROVIDER == provider_name and settings.API_KEY:
-        return settings.API_KEY
-    return None
-
-
-def filter_models_by_llm_name(
-    settings,
-    provider_name: str,
-    provider_specific_key: Optional[str],
-    models: List[AvailableModel],
-) -> List[AvailableModel]:
-    """Mirrors the historical ``_add_<X>_models`` selection logic.
-
-    Behavior:
-    - If the provider-specific API key is set → load all models.
-    - Else if ``LLM_PROVIDER`` matches and ``LLM_NAME`` matches a known
-      model → load just that model.
-    - Otherwise → load all models (preserved "load anyway" branch from
-      the original methods).
-    """
-    if provider_specific_key:
-        return models
-    if (
-        settings.LLM_PROVIDER == provider_name
-        and settings.LLM_NAME
-    ):
-        named = [m for m in models if m.id == settings.LLM_NAME]
-        if named:
-            return named
-    return models
--- a/application/llm/providers/anthropic.py
+++ b/application/llm/providers/anthropic.py
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from application.llm.anthropic import AnthropicLLM
-from application.llm.providers._apikey_or_llm_name import (
-    filter_models_by_llm_name,
-    get_api_key,
-)
-from application.llm.providers.base import Provider
-
-
-class AnthropicProvider(Provider):
-    name = "anthropic"
-    llm_class = AnthropicLLM
-
-    def get_api_key(self, settings) -> Optional[str]:
-        return get_api_key(settings, self.name, settings.ANTHROPIC_API_KEY)
-
-    def filter_yaml_models(self, settings, models):
-        return filter_models_by_llm_name(
-            settings, self.name, settings.ANTHROPIC_API_KEY, models
-        )
--- a/application/llm/providers/azure_openai.py
+++ b/application/llm/providers/azure_openai.py
@@ -1,30 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from application.llm.openai import AzureOpenAILLM
-from application.llm.providers.base import Provider
-
-
-class AzureOpenAIProvider(Provider):
-    name = "azure_openai"
-    llm_class = AzureOpenAILLM
-
-    def get_api_key(self, settings) -> Optional[str]:
-        # Azure historically uses the generic API_KEY field.
-        return settings.API_KEY
-
-    def is_enabled(self, settings) -> bool:
-        if settings.OPENAI_API_BASE:
-            return True
-        return settings.LLM_PROVIDER == self.name and bool(settings.API_KEY)
-
-    def filter_yaml_models(self, settings, models):
-        # Mirrors _add_azure_openai_models: when LLM_PROVIDER==azure_openai
-        # and LLM_NAME matches a known model, narrow to that one model.
-        # Otherwise load the entire catalog.
-        if settings.LLM_PROVIDER == self.name and settings.LLM_NAME:
-            named = [m for m in models if m.id == settings.LLM_NAME]
-            if named:
-                return named
-        return models
--- a/application/llm/providers/base.py
+++ b/application/llm/providers/base.py
@@ -1,74 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, ClassVar, List, Optional, Type
-
-if TYPE_CHECKING:
-    from application.core.model_settings import AvailableModel
-    from application.core.model_yaml import ProviderCatalog
-    from application.core.settings import Settings
-    from application.llm.base import BaseLLM
-
-
-class Provider(ABC):
-    """Owns the *behavior* of an LLM provider.
-
-    Concrete providers declare their name, the LLM class to instantiate,
-    and how to resolve credentials from settings. Static model catalogs
-    live in YAML under ``application/core/models/`` and are joined to the
-    provider by name at registry load time.
-
-    Most plugins receive zero or one catalog at registry-build time. The
-    ``openai_compatible`` plugin is the exception: it receives one catalog
-    per matching YAML file, each with its own ``api_key_env`` and
-    ``base_url``. Plugins that need per-catalog metadata override
-    ``get_models``; the default implementation merges catalogs and routes
-    through ``filter_yaml_models`` + ``extra_models``.
-    """
-
-    name: ClassVar[str]
-    # ``None`` means the provider appears in the catalog but isn't
-    # dispatchable through LLMCreator (e.g. Hugging Face today, where the
-    # original LLMCreator dict had no entry).
-    llm_class: ClassVar[Optional[Type["BaseLLM"]]] = None
-
-    @abstractmethod
-    def get_api_key(self, settings: "Settings") -> Optional[str]:
-        """Return the API key for this provider, or None if unavailable."""
-
-    def is_enabled(self, settings: "Settings") -> bool:
-        """Whether this provider should contribute models to the registry."""
-        return bool(self.get_api_key(settings))
-
-    def filter_yaml_models(
-        self, settings: "Settings", models: List["AvailableModel"]
-    ) -> List["AvailableModel"]:
-        """Hook to filter YAML-loaded models. Default: return all."""
-        return models
-
-    def extra_models(self, settings: "Settings") -> List["AvailableModel"]:
-        """Hook to add dynamic models not declared in YAML. Default: none."""
-        return []
-
-    def get_models(
-        self,
-        settings: "Settings",
-        catalogs: List["ProviderCatalog"],
-    ) -> List["AvailableModel"]:
-        """Final list of models this plugin contributes.
-
-        Default: merge the models across all matched catalogs (later
-        catalog wins on duplicate id), filter via ``filter_yaml_models``,
-        then append ``extra_models``. Override when per-catalog metadata
-        matters (see ``OpenAICompatibleProvider``).
-        """
-        merged: List["AvailableModel"] = []
-        seen: dict = {}
-        for c in catalogs:
-            for m in c.models:
-                if m.id in seen:
-                    merged[seen[m.id]] = m
-                else:
-                    seen[m.id] = len(merged)
-                    merged.append(m)
-        return self.filter_yaml_models(settings, merged) + self.extra_models(settings)
--- a/application/llm/providers/docsgpt.py
+++ b/application/llm/providers/docsgpt.py
@@ -1,22 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from application.llm.docsgpt_provider import DocsGPTAPILLM
-from application.llm.providers.base import Provider
-
-
-class DocsGPTProvider(Provider):
-    name = "docsgpt"
-    llm_class = DocsGPTAPILLM
-
-    def get_api_key(self, settings) -> Optional[str]:
-        # No provider-specific key; the LLM class can use the generic
-        # API_KEY fallback if it needs one. Mirrors model_utils' historical
-        # behavior of returning settings.API_KEY when no specific key exists.
-        return settings.API_KEY
-
-    def is_enabled(self, settings) -> bool:
-        # The hosted DocsGPT model is hidden when the deployment is
-        # pointed at a custom OpenAI-compatible endpoint.
-        return not settings.OPENAI_BASE_URL
--- a/application/llm/providers/google.py
+++ b/application/llm/providers/google.py
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from application.llm.google_ai import GoogleLLM
-from application.llm.providers._apikey_or_llm_name import (
-    filter_models_by_llm_name,
-    get_api_key,
-)
-from application.llm.providers.base import Provider
-
-
-class GoogleProvider(Provider):
-    name = "google"
-    llm_class = GoogleLLM
-
-    def get_api_key(self, settings) -> Optional[str]:
-        return get_api_key(settings, self.name, settings.GOOGLE_API_KEY)
-
-    def filter_yaml_models(self, settings, models):
-        return filter_models_by_llm_name(
-            settings, self.name, settings.GOOGLE_API_KEY, models
-        )
--- a/application/llm/providers/groq.py
+++ b/application/llm/providers/groq.py
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from application.llm.groq import GroqLLM
-from application.llm.providers._apikey_or_llm_name import (
-    filter_models_by_llm_name,
-    get_api_key,
-)
-from application.llm.providers.base import Provider
-
-
-class GroqProvider(Provider):
-    name = "groq"
-    llm_class = GroqLLM
-
-    def get_api_key(self, settings) -> Optional[str]:
-        return get_api_key(settings, self.name, settings.GROQ_API_KEY)
-
-    def filter_yaml_models(self, settings, models):
-        return filter_models_by_llm_name(
-            settings, self.name, settings.GROQ_API_KEY, models
-        )
--- a/application/llm/providers/huggingface.py
+++ b/application/llm/providers/huggingface.py
@@ -1,25 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from application.llm.providers._apikey_or_llm_name import (
-    get_api_key as shared_get_api_key,
-)
-from application.llm.providers.base import Provider
-
-
-class HuggingFaceProvider(Provider):
-    """Surfaces ``huggingface-local`` to the model catalog.
-
-    Not dispatchable through LLMCreator — historically there was no
-    HuggingFaceLLM entry in ``LLMCreator.llms``, and calling ``create_llm``
-    with ``"huggingface"`` raised ``ValueError``. We preserve that
-    behavior: the model appears in ``/api/models`` but selecting it
-    surfaces the same error it always did.
-    """
-
-    name = "huggingface"
-    llm_class = None  # not dispatchable
-
-    def get_api_key(self, settings) -> Optional[str]:
-        return shared_get_api_key(settings, self.name, settings.HUGGINGFACE_API_KEY)
--- a/application/llm/providers/llama_cpp.py
+++ b/application/llm/providers/llama_cpp.py
@@ -1,19 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from application.llm.llama_cpp import LlamaCpp
-from application.llm.providers.base import Provider
-
-
-class LlamaCppProvider(Provider):
-    """LLMCreator-only plugin: invocable via LLM_PROVIDER but not in the catalog."""
-
-    name = "llama.cpp"
-    llm_class = LlamaCpp
-
-    def get_api_key(self, settings) -> Optional[str]:
-        return settings.API_KEY
-
-    def is_enabled(self, settings) -> bool:
-        return False
--- a/application/llm/providers/novita.py
+++ b/application/llm/providers/novita.py
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from application.llm.novita import NovitaLLM
-from application.llm.providers._apikey_or_llm_name import (
-    filter_models_by_llm_name,
-    get_api_key,
-)
-from application.llm.providers.base import Provider
-
-
-class NovitaProvider(Provider):
-    name = "novita"
-    llm_class = NovitaLLM
-
-    def get_api_key(self, settings) -> Optional[str]:
-        return get_api_key(settings, self.name, settings.NOVITA_API_KEY)
-
-    def filter_yaml_models(self, settings, models):
-        return filter_models_by_llm_name(
-            settings, self.name, settings.NOVITA_API_KEY, models
-        )
--- a/application/llm/providers/openai.py
+++ b/application/llm/providers/openai.py
@@ -1,37 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from application.llm.openai import OpenAILLM
-from application.llm.providers.base import Provider
-
-
-class OpenAIProvider(Provider):
-    name = "openai"
-    llm_class = OpenAILLM
-
-    def get_api_key(self, settings) -> Optional[str]:
-        if settings.OPENAI_API_KEY:
-            return settings.OPENAI_API_KEY
-        if settings.LLM_PROVIDER == self.name and settings.API_KEY:
-            return settings.API_KEY
-        return None
-
-    def is_enabled(self, settings) -> bool:
-        # When the deployment is pointed at a custom OpenAI-compatible
-        # endpoint (Ollama, LM Studio, ...), the cloud-OpenAI catalog is
-        # suppressed but ``is_enabled`` stays True — necessary so the
-        # filter below still gets to drop the catalog (rather than the
-        # registry skipping the provider entirely and missing the rule).
-        if settings.OPENAI_BASE_URL:
-            return True
-        return bool(self.get_api_key(settings))
-
-    def filter_yaml_models(self, settings, models):
-        # Legacy local-endpoint mode hides the cloud catalog. The
-        # corresponding dynamic models live in OpenAICompatibleProvider.
-        if settings.OPENAI_BASE_URL:
-            return []
-        if not settings.OPENAI_API_KEY:
-            return []
-        return models
--- a/application/llm/providers/openai_compatible.py
+++ b/application/llm/providers/openai_compatible.py
@@ -1,149 +0,0 @@
-"""Generic provider for OpenAI-wire-compatible endpoints.
-
-Each ``openai_compatible`` YAML file describes one logical endpoint
-(Mistral, Together, Fireworks, Ollama, ...) with its own
-``api_key_env`` and ``base_url``. Multiple files can coexist; the
-plugin produces one set of models per file, each pre-configured with
-the right credentials and URL.
-
-The plugin also handles the **legacy** ``OPENAI_BASE_URL`` + ``LLM_NAME``
-local-endpoint pattern that previously lived in ``OpenAIProvider``. That
-path generates models dynamically from ``LLM_NAME``, using
-``OPENAI_BASE_URL`` and ``OPENAI_API_KEY`` as the endpoint config.
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-from typing import List, Optional
-
-from application.core.model_settings import (
-    AvailableModel,
-    ModelCapabilities,
-    ModelProvider,
-)
-from application.llm.openai import OpenAILLM
-from application.llm.providers.base import Provider
-
-logger = logging.getLogger(__name__)
-
-
-def _parse_model_names(llm_name: Optional[str]) -> List[str]:
-    if not llm_name:
-        return []
-    return [name.strip() for name in llm_name.split(",") if name.strip()]
-
-
-class OpenAICompatibleProvider(Provider):
-    name = "openai_compatible"
-    llm_class = OpenAILLM
-
-    def get_api_key(self, settings) -> Optional[str]:
-        # Per-model: each catalog supplies its own ``api_key_env``. There
-        # is no single plugin-wide key. LLMCreator reads the per-model
-        # ``api_key`` set during catalog materialization.
-        return None
-
-    def is_enabled(self, settings) -> bool:
-        # Concrete enablement happens per catalog (in ``get_models``).
-        # Returning True lets the registry call ``get_models`` so we can
-        # decide per-file whether to contribute models.
-        return True
-
-    def get_models(self, settings, catalogs) -> List[AvailableModel]:
-        out: List[AvailableModel] = []
-
-        for catalog in catalogs:
-            out.extend(self._materialize_yaml_catalog(catalog))
-
-        if settings.OPENAI_BASE_URL and settings.LLM_NAME:
-            out.extend(self._materialize_legacy_local_endpoint(settings))
-
-        return out
-
-    def _materialize_yaml_catalog(self, catalog) -> List[AvailableModel]:
-        """Resolve one openai_compatible YAML into ready-to-dispatch models.
-
-        Skipped (with an INFO-level log) if ``api_key_env`` resolves to
-        nothing — no point publishing models the user can't actually
-        call. INFO rather than WARNING because operators may legitimately
-        drop multiple provider YAMLs as templates and only set the env
-        vars for the ones they actually use; a missing key is ambiguous,
-        not necessarily a misconfig.
-        """
-        if not catalog.base_url:
-            raise ValueError(
-                f"{catalog.source_path}: openai_compatible YAML must set "
-                "'base_url'."
-            )
-        if not catalog.api_key_env:
-            raise ValueError(
-                f"{catalog.source_path}: openai_compatible YAML must set "
-                "'api_key_env'."
-            )
-
-        api_key = os.environ.get(catalog.api_key_env)
-        if not api_key:
-            logger.info(
-                "openai_compatible catalog %s skipped: env var %s is not set",
-                catalog.source_path,
-                catalog.api_key_env,
-            )
-            return []
-
-        out: List[AvailableModel] = []
-        for m in catalog.models:
-            out.append(self._with_endpoint(m, catalog.base_url, api_key))
-        return out
-
-    def _materialize_legacy_local_endpoint(self, settings) -> List[AvailableModel]:
-        """Generate AvailableModels from ``LLM_NAME`` for the legacy
-        ``OPENAI_BASE_URL`` deployment pattern (Ollama, LM Studio, ...).
-
-        Preserves the historical ``provider="openai"`` display behavior
-        by setting ``display_provider="openai"``.
-        """
-        from application.core.model_yaml import resolve_attachment_alias
-
-        attachments = resolve_attachment_alias("image")
-        api_key = settings.OPENAI_API_KEY or settings.API_KEY
-        out: List[AvailableModel] = []
-        for model_name in _parse_model_names(settings.LLM_NAME):
-            out.append(
-                AvailableModel(
-                    id=model_name,
-                    provider=ModelProvider.OPENAI_COMPATIBLE,
-                    display_name=model_name,
-                    description=f"Custom OpenAI-compatible model at {settings.OPENAI_BASE_URL}",
-                    base_url=settings.OPENAI_BASE_URL,
-                    capabilities=ModelCapabilities(
-                        supports_tools=True,
-                        supported_attachment_types=attachments,
-                    ),
-                    api_key=api_key,
-                    display_provider="openai",
-                )
-            )
-        return out
-
-    @staticmethod
-    def _with_endpoint(
-        model: AvailableModel, base_url: str, api_key: str
-    ) -> AvailableModel:
-        """Return a copy of ``model`` carrying the catalog's endpoint config.
-
-        The catalog-level ``base_url`` is the default; an explicit
-        per-model ``base_url`` in the YAML wins.
-        """
-        return AvailableModel(
-            id=model.id,
-            provider=model.provider,
-            display_name=model.display_name,
-            description=model.description,
-            capabilities=model.capabilities,
-            enabled=model.enabled,
-            base_url=model.base_url or base_url,
-            display_provider=model.display_provider,
-            api_key=api_key,
-        )
--- a/application/llm/providers/openrouter.py
+++ b/application/llm/providers/openrouter.py
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from application.llm.open_router import OpenRouterLLM
-from application.llm.providers._apikey_or_llm_name import (
-    filter_models_by_llm_name,
-    get_api_key,
-)
-from application.llm.providers.base import Provider
-
-
-class OpenRouterProvider(Provider):
-    name = "openrouter"
-    llm_class = OpenRouterLLM
-
-    def get_api_key(self, settings) -> Optional[str]:
-        return get_api_key(settings, self.name, settings.OPEN_ROUTER_API_KEY)
-
-    def filter_yaml_models(self, settings, models):
-        return filter_models_by_llm_name(
-            settings, self.name, settings.OPEN_ROUTER_API_KEY, models
-        )
--- a/application/llm/providers/premai.py
+++ b/application/llm/providers/premai.py
@@ -1,19 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from application.llm.premai import PremAILLM
-from application.llm.providers.base import Provider
-
-
-class PremAIProvider(Provider):
-    """LLMCreator-only plugin: invocable via LLM_PROVIDER but not in the catalog."""
-
-    name = "premai"
-    llm_class = PremAILLM
-
-    def get_api_key(self, settings) -> Optional[str]:
-        return settings.API_KEY
-
-    def is_enabled(self, settings) -> bool:
-        return False
--- a/application/llm/providers/sagemaker.py
+++ b/application/llm/providers/sagemaker.py
@@ -1,24 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from application.llm.sagemaker import SagemakerAPILLM
-from application.llm.providers.base import Provider
-
-
-class SagemakerProvider(Provider):
-    """LLMCreator-only plugin: invocable via LLM_PROVIDER but not in the catalog.
-
-    SageMaker reads its credentials from ``SAGEMAKER_*`` settings inside
-    the LLM class itself; this plugin's ``get_api_key`` exists only for
-    LLMCreator's symmetry.
-    """
-
-    name = "sagemaker"
-    llm_class = SagemakerAPILLM
-
-    def get_api_key(self, settings) -> Optional[str]:
-        return settings.API_KEY
-
-    def is_enabled(self, settings) -> bool:
-        return False
--- a/application/mcp_server.py
+++ b/application/mcp_server.py
@@ -1,59 +0,0 @@
-"""FastMCP server exposing DocsGPT retrieval over streamable HTTP.
-
-Mounted at ``/mcp`` by ``application/asgi.py``. Bearer tokens are the
-existing DocsGPT agent API keys — no new credential surface.
-
-The tool reads the ``Authorization`` header directly via
-``get_http_headers(include={"authorization"})``. The ``include`` kwarg
-is required: by default ``get_http_headers`` strips ``authorization``
-(and a handful of other hop-by-hop headers) so they aren't forwarded
-to downstream services — since we deliberately want the caller's
-token, we opt it back in.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import logging
-
-from fastmcp import FastMCP
-from fastmcp.server.dependencies import get_http_headers
-
-from application.services.search_service import (
-    InvalidAPIKey,
-    SearchFailed,
-    search,
-)
-
-logger = logging.getLogger(__name__)
-
-mcp = FastMCP("docsgpt")
-
-
-def _extract_bearer_token() -> str | None:
-    auth = get_http_headers(include={"authorization"}).get("authorization", "")
-    parts = auth.split(None, 1)
-    if len(parts) != 2 or parts[0].lower() != "bearer" or not parts[1]:
-        return None
-    return parts[1]
-
-
-@mcp.tool
-async def search_docs(query: str, chunks: int = 5) -> list[dict]:
-    """Search the caller's DocsGPT knowledge base.
-
-    Authentication is via ``Authorization: Bearer <agent-api-key>`` on
-    the MCP request — the same opaque key that ``/api/search`` accepts
-    in its JSON body. Returns at most ``chunks`` hits, each a dict with
-    ``text``, ``title``, ``source`` keys.
-    """
-    api_key = _extract_bearer_token()
-    if not api_key:
-        raise PermissionError("Missing Bearer token")
-    try:
-        return await asyncio.to_thread(search, api_key, query, chunks)
-    except InvalidAPIKey as exc:
-        raise PermissionError("Invalid API key") from exc
-    except SearchFailed:
-        logger.exception("search_docs failed")
-        raise
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -1,12 +1,9 @@
-a2wsgi==1.10.10
 alembic>=1.13,<2
 anthropic==0.88.0
-asgiref>=3.11.1
 boto3==1.42.83
 beautifulsoup4==4.14.3
 cel-python==0.5.0
 celery==5.6.3
-celery-redbeat==2.3.3
 cryptography==46.0.7
 dataclasses-json==0.6.7
 defusedxml==0.7.1
@@ -17,7 +14,7 @@ docx2txt==0.9
 ddgs>=8.0.0
 fast-ebook
 elevenlabs==2.43.0
-Flask==3.1.1
+Flask==3.1.3
 faiss-cpu==1.13.2
 fastmcp==3.2.4
 flask-restx==1.3.2
@@ -38,7 +35,7 @@ langchain==1.2.3
 langchain-community==0.4.1
 langchain-core==1.2.29
 langchain-openai==1.1.12
-langchain-text-splitters==1.1.1
+langchain-text-splitters==1.1.2
 langsmith==0.7.31
 lazy-object-proxy==1.12.0
 lxml==6.0.2
@@ -52,16 +49,6 @@ networkx==3.6.1
 numpy==2.4.4
 openai==2.32.0
 openapi3-parser==1.1.22
-opentelemetry-distro>=0.50b0,<1
-opentelemetry-exporter-otlp>=1.29.0,<2
-opentelemetry-instrumentation-celery>=0.50b0,<1
-opentelemetry-instrumentation-flask>=0.50b0,<1
-opentelemetry-instrumentation-logging>=0.50b0,<1
-opentelemetry-instrumentation-psycopg>=0.50b0,<1
-opentelemetry-instrumentation-redis>=0.50b0,<1
-opentelemetry-instrumentation-requests>=0.50b0,<1
-opentelemetry-instrumentation-sqlalchemy>=0.50b0,<1
-opentelemetry-instrumentation-starlette>=0.50b0,<1
 orjson==3.11.7
 packaging==26.0
 pandas==3.0.2
@@ -71,7 +58,7 @@ pdf2image>=1.17.0
 pillow
 portalocker>=2.7.0,<4.0.0
 prompt-toolkit==3.0.52
-protobuf==6.33.6
+protobuf==7.34.1
 psycopg[binary,pool]>=3.1,<4
 py==1.11.0
 pydantic
@@ -82,7 +69,6 @@ python-dateutil==2.9.0.post0
 python-dotenv
 python-jose==3.5.0
 python-pptx==1.0.2
-PyYAML
 redis==7.4.0
 referencing>=0.28.0,<0.38.0
 regex==2026.4.4
@@ -90,7 +76,6 @@ requests==2.33.1
 retry==0.9.2
 sentence-transformers==5.3.0
 sqlalchemy>=2.0,<3
-starlette>=1.0,<2
 tiktoken==0.12.0
 tokenizers==0.22.2
 torch==2.11.0
@@ -100,8 +85,6 @@ typing-extensions==4.15.0
 typing-inspect==0.9.0
 tzdata==2026.1
 urllib3==2.6.3
-uvicorn[standard]>=0.30,<1
-uvicorn-worker>=0.4,<1
 vine==5.1.0
 wcwidth==0.6.0
 werkzeug>=3.1.0
--- a/application/services/search_service.py
+++ b/application/services/search_service.py
@@ -1,153 +0,0 @@
-"""Shared retrieval service used by the HTTP search route and the MCP tool.
-
-Flask-free. Raises domain exceptions (``InvalidAPIKey``, ``SearchFailed``)
-that callers translate into their own wire protocol (HTTP status codes,
-MCP error responses, etc.).
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import Any, Dict, List
-
-from application.core.settings import settings
-from application.storage.db.repositories.agents import AgentsRepository
-from application.storage.db.session import db_readonly
-from application.vectorstore.vector_creator import VectorCreator
-
-logger = logging.getLogger(__name__)
-
-
-class InvalidAPIKey(Exception):
-    """The supplied ``api_key`` does not resolve to an agent."""
-
-
-class SearchFailed(Exception):
-    """Unexpected error during retrieval (e.g. DB outage). Caller maps to 5xx."""
-
-
-def _collect_source_ids(agent: Dict[str, Any]) -> List[str]:
-    """Extract the ordered list of source UUIDs to search.
-
-    Prefers ``extra_source_ids`` (PG ARRAY(UUID) of multi-source agents);
-    falls back to the legacy single ``source_id`` field.
-    """
-    source_ids: List[str] = []
-    extra = agent.get("extra_source_ids") or []
-    for src in extra:
-        if src:
-            source_ids.append(str(src))
-    if not source_ids:
-        single = agent.get("source_id")
-        if single:
-            source_ids.append(str(single))
-    return source_ids
-
-
-def _search_sources(
-    query: str, source_ids: List[str], chunks: int
-) -> List[Dict[str, Any]]:
-    """Search across each source's vectorstore and return up to ``chunks`` hits.
-
-    Per-source errors are logged and skipped so one broken index doesn't
-    take down the whole search. Results are de-duplicated by content hash.
-    """
-    if chunks <= 0 or not source_ids:
-        return []
-
-    results: List[Dict[str, Any]] = []
-    chunks_per_source = max(1, chunks // len(source_ids))
-    seen_texts: set[int] = set()
-
-    for source_id in source_ids:
-        if not source_id or not source_id.strip():
-            continue
-
-        try:
-            docsearch = VectorCreator.create_vectorstore(
-                settings.VECTOR_STORE, source_id, settings.EMBEDDINGS_KEY
-            )
-            docs = docsearch.search(query, k=chunks_per_source * 2)
-
-            for doc in docs:
-                if len(results) >= chunks:
-                    break
-
-                if hasattr(doc, "page_content") and hasattr(doc, "metadata"):
-                    page_content = doc.page_content
-                    metadata = doc.metadata
-                else:
-                    page_content = doc.get("text", doc.get("page_content", ""))
-                    metadata = doc.get("metadata", {})
-
-                text_hash = hash(page_content[:200])
-                if text_hash in seen_texts:
-                    continue
-                seen_texts.add(text_hash)
-
-                title = metadata.get("title", metadata.get("post_title", ""))
-                if not isinstance(title, str):
-                    title = str(title) if title else ""
-
-                if title:
-                    title = title.split("/")[-1]
-                else:
-                    title = metadata.get("filename", page_content[:50] + "...")
-
-                source = metadata.get("source", source_id)
-
-                results.append(
-                    {
-                        "text": page_content,
-                        "title": title,
-                        "source": source,
-                    }
-                )
-
-            if len(results) >= chunks:
-                break
-
-        except Exception as e:
-            logger.error(
-                f"Error searching vectorstore {source_id}: {e}",
-                exc_info=True,
-            )
-            continue
-
-    return results[:chunks]
-
-
-def search(api_key: str, query: str, chunks: int = 5) -> List[Dict[str, Any]]:
-    """Resolve an agent by API key and search its sources.
-
-    Args:
-        api_key: Agent API key (the opaque string stored on
-            ``agents.key`` in Postgres).
-        query: Free-text search query.
-        chunks: Max number of hits to return.
-
-    Returns:
-        List of hit dicts with ``text``, ``title``, ``source`` keys.
-        Empty list if the agent has no sources configured.
-
-    Raises:
-        InvalidAPIKey: if ``api_key`` does not resolve to an agent.
-        SearchFailed: on unexpected DB / infrastructure errors.
-    """
-    if chunks <= 0:
-        return []
-
-    try:
-        with db_readonly() as conn:
-            agent = AgentsRepository(conn).find_by_key(api_key)
-    except Exception as e:
-        raise SearchFailed("agent lookup failed") from e
-
-    if not agent:
-        raise InvalidAPIKey()
-
-    source_ids = _collect_source_ids(agent)
-    if not source_ids:
-        return []
-
-    return _search_sources(query, source_ids, chunks)
--- a/application/storage/db/models.py
+++ b/application/storage/db/models.py
@@ -117,16 +117,6 @@ stack_logs_table = Table(
    Column("timestamp", DateTime(timezone=True), nullable=False, server_default=func.now()),
 )

-# Singleton key/value table for instance-wide state (e.g. anonymous
-# instance UUID, one-shot notice flags). Added in migration
-# ``0002_app_metadata``.
-app_metadata_table = Table(
-    "app_metadata",
-    metadata,
-    Column("key", Text, primary_key=True),
-    Column("value", Text, nullable=False),
-)
-

 # --- Phase 2, Tier 2 --------------------------------------------------------

--- a/application/storage/db/repositories/agents.py
+++ b/application/storage/db/repositories/agents.py
@@ -1,6 +1,7 @@
 """Repository for the ``agents`` table.

-Covers every write operation the legacy Mongo code performs on ``agents_collection``:
+This is the most complex Phase 2 repository. Covers every write operation
+the legacy Mongo code performs on ``agents_collection``:

 - create, update, delete
 - find by key (API key lookup)
--- a/application/storage/db/repositories/app_metadata.py
+++ b/application/storage/db/repositories/app_metadata.py
@@ -1,60 +0,0 @@
-"""Repository for the ``app_metadata`` singleton key/value table.
-
-Owns the instance-wide state the version-check client needs:
-``instance_id`` (anonymous UUID sent with each check) and
-``version_check_notice_shown`` (one-shot flag for the first-run
-telemetry notice). Kept deliberately generic so future one-off config
-values can piggyback without a new migration each time.
-"""
-
-from __future__ import annotations
-
-import uuid
-from typing import Optional
-
-from sqlalchemy import Connection, text
-
-
-class AppMetadataRepository:
-    """Postgres-backed ``app_metadata`` store. Tiny by design."""
-
-    def __init__(self, conn: Connection) -> None:
-        self._conn = conn
-
-    def get(self, key: str) -> Optional[str]:
-        row = self._conn.execute(
-            text("SELECT value FROM app_metadata WHERE key = :key"),
-            {"key": key},
-        ).fetchone()
-        return row[0] if row is not None else None
-
-    def set(self, key: str, value: str) -> None:
-        self._conn.execute(
-            text(
-                "INSERT INTO app_metadata (key, value) VALUES (:key, :value) "
-                "ON CONFLICT (key) DO UPDATE SET value = EXCLUDED.value"
-            ),
-            {"key": key, "value": value},
-        )
-
-    def get_or_create_instance_id(self) -> str:
-        """Return the anonymous instance UUID, generating one if absent.
-
-        Uses ``INSERT ... ON CONFLICT DO NOTHING`` + re-read so two
-        workers racing on the very first startup converge on a single
-        UUID instead of each persisting their own.
-        """
-        existing = self.get("instance_id")
-        if existing:
-            return existing
-        candidate = str(uuid.uuid4())
-        self._conn.execute(
-            text(
-                "INSERT INTO app_metadata (key, value) VALUES ('instance_id', :value) "
-                "ON CONFLICT (key) DO NOTHING"
-            ),
-            {"value": candidate},
-        )
-        # Re-read: if another worker won the race, their UUID is now authoritative.
-        winner = self.get("instance_id")
-        return winner or candidate
--- a/application/updates/init.py
+++ b/application/updates/init.py
--- a/application/updates/version_check.py
+++ b/application/updates/version_check.py
@@ -1,304 +0,0 @@
-"""Anonymous version-check client.
-
-Fired on every Celery worker boot (see ``application/celery_init.py``
-``worker_ready`` handler) and on a 7h periodic schedule (see the
-``version-check`` entry in ``application/api/user/tasks.py``). Posts
-the running version + anonymous instance UUID to
-``gptcloud.arc53.com/api/check``, caches the response in Redis, and
-surfaces any advisories to stdout + logs.
-
-Design invariants — all enforced by a broad ``try/except`` at the top
-of :func:`run_check`:
-
-* Never blocks worker startup (fired from a daemon thread).
-* Never raises to the caller (every failure is swallowed + logged at
-  ``DEBUG``).
-* Opt-out via ``VERSION_CHECK=0`` short-circuits before any Postgres
-  write, Redis access, or outbound request.
-* Redis coordinates multi-worker and multi-replica deployments — the
-  first worker to acquire ``docsgpt:version_check:lock`` fetches, the
-  rest read from the cached response on the next cycle.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import platform
-import socket
-import sys
-from typing import Any, Dict, Optional
-
-import requests
-
-from application.cache import get_redis_instance
-from application.core.settings import settings
-from application.storage.db.repositories.app_metadata import AppMetadataRepository
-from application.storage.db.session import db_session
-from application.version import get_version
-
-logger = logging.getLogger(__name__)
-
-ENDPOINT_URL = "https://gptcloud.arc53.com/api/check"
-CLIENT_NAME = "docsgpt-backend"
-REQUEST_TIMEOUT_SECONDS = 5
-
-CACHE_KEY = "docsgpt:version_check:response"
-LOCK_KEY = "docsgpt:version_check:lock"
-CACHE_TTL_SECONDS = 6 * 3600  # 6h default; shortened by response `next_check_after`.
-LOCK_TTL_SECONDS = 60
-
-NOTICE_KEY = "version_check_notice_shown"
-INSTANCE_ID_KEY = "instance_id"
-
-_HIGH_SEVERITIES = {"high", "critical"}
-
-_ANSI_RESET = "\033[0m"
-_ANSI_RED = "\033[31m"
-_ANSI_YELLOW = "\033[33m"
-
-
-def run_check() -> None:
-    """Entry point for the worker-startup daemon thread.
-
-    Safe to call unconditionally: the opt-out, Redis-outage, and
-    Postgres-outage paths all return silently. No exception propagates.
-    """
-    try:
-        _run_check_inner()
-    except Exception as exc:  # noqa: BLE001 — belt-and-braces; nothing escapes.
-        logger.debug("version check crashed: %s", exc, exc_info=True)
-
-
-def _run_check_inner() -> None:
-    if not settings.VERSION_CHECK:
-        return
-
-    instance_id = _resolve_instance_id_and_notice()
-    if instance_id is None:
-        # Postgres unavailable — per spec we skip the check entirely
-        # rather than phone home with a synthetic/ephemeral UUID.
-        return
-
-    redis_client = get_redis_instance()
-
-    cached = _read_cache(redis_client)
-    if cached is not None:
-        _render_advisories(cached)
-        return
-
-    # Cache miss. Try to win the lock; if another worker has it, skip.
-    # ``redis_client is None`` here means Redis is unreachable — per the
-    # spec we still proceed uncached (acceptable duplicate calls in
-    # multi-worker Redis-less deploys).
-    if redis_client is not None and not _acquire_lock(redis_client):
-        return
-
-    response = _fetch(instance_id)
-    if response is None:
-        if redis_client is not None:
-            _release_lock(redis_client)
-        return
-
-    _write_cache(redis_client, response)
-    _render_advisories(response)
-    if redis_client is not None:
-        _release_lock(redis_client)
-
-
-def _resolve_instance_id_and_notice() -> Optional[str]:
-    """Load (or create) the instance UUID and emit the first-run notice.
-
-    The notice is printed at most once across the lifetime of the
-    installation — tracked via the ``version_check_notice_shown`` row
-    in ``app_metadata``. Both reads and the write happen inside one
-    short transaction so two racing workers can't each emit the notice.
-    """
-    try:
-        with db_session() as conn:
-            repo = AppMetadataRepository(conn)
-            instance_id = repo.get_or_create_instance_id()
-            if repo.get(NOTICE_KEY) is None:
-                _print_first_run_notice()
-                repo.set(NOTICE_KEY, "1")
-            return instance_id
-    except Exception as exc:  # noqa: BLE001 — Postgres down, bad URI, etc.
-        logger.debug("version check: Postgres unavailable (%s)", exc, exc_info=True)
-        return None
-
-
-def _print_first_run_notice() -> None:
-    message = (
-        "Anonymous version check enabled — sends version to "
-        "gptcloud.arc53.com.\nDisable with VERSION_CHECK=0."
-    )
-    print(message, flush=True)
-    logger.info("version check: first-run notice shown")
-
-
-def _read_cache(redis_client) -> Optional[Dict[str, Any]]:
-    if redis_client is None:
-        return None
-    try:
-        raw = redis_client.get(CACHE_KEY)
-    except Exception as exc:  # noqa: BLE001 — Redis transient errors.
-        logger.debug("version check: cache GET failed (%s)", exc, exc_info=True)
-        return None
-    if raw is None:
-        return None
-    try:
-        return json.loads(raw.decode("utf-8") if isinstance(raw, bytes) else raw)
-    except (ValueError, AttributeError) as exc:
-        logger.debug("version check: cache decode failed (%s)", exc, exc_info=True)
-        return None
-
-
-def _write_cache(redis_client, response: Dict[str, Any]) -> None:
-    if redis_client is None:
-        return
-    ttl = _compute_ttl(response)
-    try:
-        redis_client.setex(CACHE_KEY, ttl, json.dumps(response))
-    except Exception as exc:  # noqa: BLE001
-        logger.debug("version check: cache SETEX failed (%s)", exc, exc_info=True)
-
-
-def _compute_ttl(response: Dict[str, Any]) -> int:
-    """Cap the cache at 6h but honor a shorter server-specified window."""
-    next_after = response.get("next_check_after")
-    if isinstance(next_after, (int, float)) and next_after > 0:
-        return max(1, min(CACHE_TTL_SECONDS, int(next_after)))
-    return CACHE_TTL_SECONDS
-
-
-def _acquire_lock(redis_client) -> bool:
-    try:
-        owner = f"{socket.gethostname()}:{os.getpid()}"
-        return bool(
-            redis_client.set(LOCK_KEY, owner, nx=True, ex=LOCK_TTL_SECONDS)
-        )
-    except Exception as exc:  # noqa: BLE001
-        # Treat a failing Redis the same as "no lock infra" — skip rather
-        # than fire without coordination, because Redis outage is
-        # usually transient and one missed cycle is harmless.
-        logger.debug("version check: lock acquire failed (%s)", exc, exc_info=True)
-        return False
-
-
-def _release_lock(redis_client) -> None:
-    try:
-        redis_client.delete(LOCK_KEY)
-    except Exception as exc:  # noqa: BLE001
-        logger.debug("version check: lock release failed (%s)", exc, exc_info=True)
-
-
-def _fetch(instance_id: str) -> Optional[Dict[str, Any]]:
-    version = get_version()
-    if version in ("", "unknown"):
-        # The endpoint rejects payloads without a valid semver, and the
-        # rejection is otherwise logged at DEBUG — invisible under the
-        # usual ``-l INFO`` Celery worker start. Surface it loudly so a
-        # misconfigured release (missing or unset ``__version__``) is
-        # obvious instead of silently disabling the check.
-        logger.warning(
-            "version check: skipping — get_version() returned %r. "
-            "Set __version__ in application/version.py to a valid "
-            "version string.",
-            version,
-        )
-        return None
-    payload = {
-        "version": version,
-        "instance_id": instance_id,
-        "python_version": platform.python_version(),
-        "platform": sys.platform,
-        "client": CLIENT_NAME,
-    }
-    try:
-        resp = requests.post(
-            ENDPOINT_URL,
-            json=payload,
-            timeout=REQUEST_TIMEOUT_SECONDS,
-        )
-    except requests.RequestException as exc:
-        logger.debug("version check: request failed (%s)", exc, exc_info=True)
-        return None
-    if resp.status_code >= 400:
-        logger.debug("version check: non-2xx response %s", resp.status_code)
-        return None
-    try:
-        return resp.json()
-    except ValueError as exc:
-        logger.debug("version check: response decode failed (%s)", exc, exc_info=True)
-        return None
-
-
-def _render_advisories(response: Dict[str, Any]) -> None:
-    advisories = response.get("advisories") or []
-    if not isinstance(advisories, list):
-        return
-    current_version = get_version()
-    for advisory in advisories:
-        if not isinstance(advisory, dict):
-            continue
-        severity = str(advisory.get("severity", "")).lower()
-        advisory_id = advisory.get("id", "UNKNOWN")
-        title = advisory.get("title", "")
-        url = advisory.get("url", "")
-        fixed_in = advisory.get("fixed_in")
-        summary = advisory.get(
-            "summary",
-            f"Your DocsGPT version {current_version} is vulnerable.",
-        )
-
-        logger.warning(
-            "security advisory %s (severity=%s) affects version %s: %s%s%s",
-            advisory_id,
-            severity or "unknown",
-            current_version,
-            title or summary,
-            f" — fixed in {fixed_in}" if fixed_in else "",
-            f" — {url}" if url else "",
-        )
-
-        if severity in _HIGH_SEVERITIES:
-            _print_console_advisory(
-                advisory_id=advisory_id,
-                title=title,
-                severity=severity,
-                summary=summary,
-                fixed_in=fixed_in,
-                url=url,
-            )
-
-
-def _print_console_advisory(
-    *,
-    advisory_id: str,
-    title: str,
-    severity: str,
-    summary: str,
-    fixed_in: Optional[str],
-    url: str,
-) -> None:
-    color = _ANSI_RED if severity == "critical" else _ANSI_YELLOW
-    bar = "=" * 60
-    upgrade_line = ""
-    if fixed_in and url:
-        upgrade_line = f"   Upgrade to {fixed_in}+ — {url}"
-    elif fixed_in:
-        upgrade_line = f"   Upgrade to {fixed_in}+"
-    elif url:
-        upgrade_line = f"   {url}"
-
-    lines = [
-        bar,
-        f"\u26a0  SECURITY ADVISORY: {advisory_id}",
-        f"   {summary}",
-        f"   {title} (severity: {severity})" if title else f"   severity: {severity}",
-    ]
-    if upgrade_line:
-        lines.append(upgrade_line)
-    lines.append(bar)
-    print(f"{color}{chr(10).join(lines)}{_ANSI_RESET}", flush=True)
--- a/application/version.py
+++ b/application/version.py
@@ -1,10 +0,0 @@
-"""DocsGPT backend version string."""
-
-from __future__ import annotations
-
-__version__ = "0.17.0"
-
-
-def get_version() -> str:
-    """Return the DocsGPT backend version."""
-    return __version__
--- a/application/worker.py
+++ b/application/worker.py
@@ -348,16 +348,6 @@ def run_agent_logic(agent_config, input_data):
            model_id = agent_default_model
        else:
            model_id = get_default_model_id()
-            if agent_default_model:
-                # Stored model_id no longer resolves in the registry. Log so
-                # operators can detect bad YAML edits before users complain;
-                # behavior matches the historical silent fallback.
-                logging.warning(
-                    "Agent %s references unknown model_id %r; falling back to %r",
-                    agent_id,
-                    agent_default_model,
-                    model_id,
-                )

        # Get provider and API key for the selected model
        provider = get_provider_from_model_id(model_id) if model_id else settings.LLM_PROVIDER
--- a/docs/content/Deploying/Development-Environment.mdx
+++ b/docs/content/Deploying/Development-Environment.mdx
@@ -104,15 +104,7 @@ To run the DocsGPT backend locally, you'll need to set up a Python environment a
    flask --app application/app.py run --host=0.0.0.0 --port=7091
    ```

-    This command will launch the backend server, making it accessible on `http://localhost:7091`. It's the fastest inner-loop option for day-to-day development — the Werkzeug interactive debugger still works and it hot-reloads on source changes. It serves the Flask routes only.
-
-    If you need to exercise the full ASGI stack — the `/mcp` endpoint (FastMCP server), or to match the production runtime — run the ASGI composition under uvicorn instead:
-
-    ```bash
-    uvicorn application.asgi:asgi_app --host 0.0.0.0 --port 7091 --reload
-    ```
-
-    Production uses `gunicorn -k uvicorn_worker.UvicornWorker` against the same `application.asgi:asgi_app` target.
+    This command will launch the backend server, making it accessible on `http://localhost:7091`.

 6.  **Start the Celery Worker:**

--- a/docs/content/Deploying/DocsGPT-Settings.mdx
+++ b/docs/content/Deploying/DocsGPT-Settings.mdx
@@ -99,82 +99,6 @@ EMBEDDINGS_NAME=huggingface_sentence-transformers/all-mpnet-base-v2 # You can al

 In this case, even though you are using Ollama locally, `LLM_PROVIDER` is set to `openai` because Ollama (and many other local inference engines) are designed to be API-compatible with OpenAI. `OPENAI_BASE_URL` points DocsGPT to the local Ollama server.

-## Adding Custom Models (`MODELS_CONFIG_DIR`)
-
-DocsGPT ships with a built-in catalog of models for the providers it
-supports out of the box (OpenAI, Anthropic, Google, Groq, OpenRouter,
-Novita, Azure OpenAI, Hugging Face, DocsGPT). To add **your own
-models** without forking the repo — for example, a Mistral or Together
-account, a self-hosted vLLM endpoint, or any other OpenAI-compatible
-API — point `MODELS_CONFIG_DIR` at a directory of YAML files.
-
-```
-MODELS_CONFIG_DIR=/etc/docsgpt/models
-MISTRAL_API_KEY=sk-...
-```
-
-A minimal YAML for one provider:
-
-```yaml
-# /etc/docsgpt/models/mistral.yaml
-provider: openai_compatible
-display_provider: mistral
-api_key_env: MISTRAL_API_KEY
-base_url: https://api.mistral.ai/v1
-defaults:
-  supports_tools: true
-  context_window: 128000
-models:
-  - id: mistral-large-latest
-    display_name: Mistral Large
-  - id: mistral-small-latest
-    display_name: Mistral Small
-```
-
-After restart, those models appear in `/api/models` and are selectable
-in the UI. A working template lives at
-`application/core/models/examples/mistral.yaml.example`.
-
-**What you can do:**
-
- Add new `openai_compatible` providers (Mistral, Together, Fireworks,
-  Ollama, vLLM, ...) — one YAML per provider, each with its own
-  `api_key_env` and `base_url`.
- Extend an existing provider's catalog by dropping a YAML with the
-  same `provider:` value as the built-in (e.g. `provider: anthropic`
-  with extra models).
- Override a built-in model's capabilities by re-declaring the same
-  `id` — later wins, override is logged at `WARNING`.
-
-**What you cannot do via `MODELS_CONFIG_DIR`:** add a brand-new
-non-OpenAI provider. That requires a Python plugin under
-`application/llm/providers/`. See
-`application/core/models/README.md` for the full schema reference.
-
-### Docker
-
-Mount the directory and set the env var:
-
-```yaml
-# docker-compose.yml
-services:
-  app:
-    image: arc53/docsgpt
-    environment:
-      MODELS_CONFIG_DIR: /etc/docsgpt/models
-      MISTRAL_API_KEY: ${MISTRAL_API_KEY}
-    volumes:
-      - ./my-models:/etc/docsgpt/models:ro
-```
-
-### Misconfiguration
-
-If `MODELS_CONFIG_DIR` is set but the path doesn't exist (or isn't a
-directory), the app logs a `WARNING` at boot and continues with just
-the built-in catalog — it does **not** fail to start. If a YAML
-declares an unknown provider name or has a schema error, the app
-**does** fail to start, with the offending file path in the message.
-
 ## Speech-to-Text Settings

 DocsGPT can transcribe audio in two places:
--- a/docs/content/Deploying/Observability.mdx
+++ b/docs/content/Deploying/Observability.mdx
@@ -1,111 +0,0 @@
---
-title: Observability
-description: Send traces, metrics, and logs from DocsGPT to any OpenTelemetry-compatible backend (Axiom, Honeycomb, Grafana, Datadog, Jaeger, etc.).
---
-
-import { Callout } from 'nextra/components'
-
-# Observability
-
-DocsGPT bundles the OpenTelemetry SDK and auto-instrumentation packages
-in `application/requirements.txt` — they install with the rest of the
-backend deps. Telemetry is **off by default**; opt in by prefixing the
-launch command with `opentelemetry-instrument` and setting OTLP env
-vars.
-
-Auto-instrumentation covers Flask, Starlette, Celery, SQLAlchemy,
-psycopg, Redis, requests, and Python logging. LLM/retriever calls are
-not captured at this layer — see *Going further* below.
-
-## Enabling
-
-Set these env vars in your `.env` (or compose `environment:` block):
-
-```bash
-OTEL_SDK_DISABLED=false
-OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
-OTEL_EXPORTER_OTLP_ENDPOINT=https://your-collector.example.com
-OTEL_EXPORTER_OTLP_HEADERS=Authorization=Bearer%20<token>
-OTEL_TRACES_EXPORTER=otlp
-OTEL_METRICS_EXPORTER=otlp
-OTEL_LOGS_EXPORTER=otlp
-OTEL_PYTHON_LOG_CORRELATION=true
-OTEL_RESOURCE_ATTRIBUTES=service.name=docsgpt-backend,deployment.environment=prod
-```
-
-Then prefix the process command with `opentelemetry-instrument`. The
-simplest way is a compose override (no image rebuild):
-
-```yaml
-# deployment/docker-compose.override.yaml
-services:
-  backend:
-    command: >
-      opentelemetry-instrument gunicorn -w 1 -k uvicorn_worker.UvicornWorker
-      --bind 0.0.0.0:7091 --config application/gunicorn_conf.py
-      application.asgi:asgi_app
-    environment:
-      - OTEL_SERVICE_NAME=docsgpt-backend
-  worker:
-    command: opentelemetry-instrument celery -A application.app.celery worker -l INFO -B
-    environment:
-      - OTEL_SERVICE_NAME=docsgpt-celery-worker
-```
-
-For local dev, prepend `dotenv run --` so the `OTEL_*` vars from `.env`
-reach `opentelemetry-instrument` before it boots the SDK:
-
-```bash
-dotenv run -- opentelemetry-instrument flask --app application/app.py run --port=7091
-dotenv run -- opentelemetry-instrument celery -A application.app.celery worker -l INFO --pool=solo
-```
-
-
-<Callout type="info" emoji="ℹ️">
-  Logs are exported in-process when `OTEL_LOGS_EXPORTER=otlp` is set —
-  `application/core/logging_config.py` detects the flag and preserves
-  the OTEL log handler. Without it, `logging` writes only to stdout.
-</Callout>
-
-## Backend examples
-
-### Axiom
-
-```bash
-OTEL_EXPORTER_OTLP_ENDPOINT=https://api.axiom.co
-OTEL_EXPORTER_OTLP_HEADERS=Authorization=Bearer%20xaat-XXXX,X-Axiom-Dataset=docsgpt
-OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf
-```
-
-`%20` is the URL-encoded space between `Bearer` and the token. Create
-the dataset in the Axiom UI before sending.
-
-### Self-hosted OTLP collector / Jaeger / Tempo
-
-```bash
-OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
-OTEL_EXPORTER_OTLP_PROTOCOL=grpc
-```
-
-### Honeycomb / Grafana Cloud / Datadog
-
-Each vendor publishes a single-line `OTEL_EXPORTER_OTLP_ENDPOINT` plus
-`OTEL_EXPORTER_OTLP_HEADERS` recipe — drop them in alongside the
-service-name override.
-
-## Caveats
-
- The Dockerfile uses `gunicorn -w 1`. If you raise worker count, move
-  SDK init into a `post_worker_init` hook to avoid one-thread-per-process
-  exporter contention.
- `asgi.py` wraps Flask in Starlette's `WSGIMiddleware`. Both
-  instrumentors are installed, so each request produces a Starlette
-  span enclosing a Flask span. Drop
-  `opentelemetry-instrumentation-flask` from `requirements.txt` if the
-  duplication is noisy.
- OTEL packages add ~50 MB to the image. They install on every build —
-  the runtime cost is zero unless you set `opentelemetry-instrument` on
-  the command and set the OTLP env vars.
- The OTEL exporter ecosystem currently caps `protobuf` at `<7`, so the
-  backend runs on protobuf 6.x. This will catch up in a future OTEL
-  release.
--- a/docs/content/Deploying/_meta.js
+++ b/docs/content/Deploying/_meta.js
@@ -23,10 +23,6 @@ export default {
    "title": "🐘 PostgreSQL for User Data",
    "href": "/Deploying/Postgres-Migration"
  },
-  "Observability": {
-    "title": "🔭 Observability",
-    "href": "/Deploying/Observability"
-  },
  "Amazon-Lightsail": {
    "title": "Hosting DocsGPT on Amazon Lightsail",
    "href": "/Deploying/Amazon-Lightsail",
--- a/docs/content/Extensions/Chrome-extension.mdx
+++ b/docs/content/Extensions/Chrome-extension.mdx
@@ -0,0 +1,38 @@
+---
+title: Add DocsGPT Chrome Extension to Your Browser
+description: Install the DocsGPT Chrome extension to access AI-powered document assistance directly from your browser for enhanced productivity.
+---
+
+import {Steps} from 'nextra/components'
+import { Callout } from 'nextra/components'
+
+
+## Chrome  Extension Setup Guide
+
+To enhance your DocsGPT experience, you can install the DocsGPT Chrome extension. Here's how:
+<Steps >
+### Step 1
+
+
+
+In the DocsGPT GitHub repository, click on the **Code** button and select **Download ZIP**.
+### Step 2
+Unzip the downloaded file to a location you can easily access.
+### Step 3
+Open the Google Chrome browser and click on the three dots menu (upper right corner).
+### Step 4
+Select **More Tools** and then **Extensions**.
+### Step 5
+Turn on the **Developer mode** switch in the top right corner of the **Extensions page**.
+### Step 6
+Click on the **Load unpacked** button.
+### Step 7
+7. Select the **Chrome** folder where the DocsGPT files have been unzipped (docsgpt-main > extensions > chrome).
+### Step 8
+The extension should now be added to Google Chrome and can be managed on the Extensions page.
+### Step 9
+ To disable or remove the extension, simply turn off the toggle switch on the extension card or click the **Remove** button.
+</Steps>
+
+
+
--- a/docs/content/Extensions/_meta.js
+++ b/docs/content/Extensions/_meta.js
@@ -11,6 +11,10 @@ export default {
    "title": "🔎 Search Widget",
    "href": "/Extensions/search-widget"
  },
+  "Chrome-extension": {
+    "title": "🌐 Chrome Extension",
+    "href": "/Extensions/Chrome-extension"
+  },
  "Chatwoot-extension": {
    "title": "🗣️ Chatwoot Extension",
    "href": "/Extensions/Chatwoot-extension"
--- a/docs/content/_meta.js
+++ b/docs/content/_meta.js
@@ -1,7 +1,6 @@
 export default {
  "index": "Home",
  "quickstart": "Quickstart",
-  "upgrading": "Upgrading",
  "Deploying": "Deploying",
  "Models": "Models",
  "Tools": "Tools",
--- a/docs/content/upgrading.mdx
+++ b/docs/content/upgrading.mdx
@@ -1,66 +0,0 @@
---
-title: Upgrading DocsGPT
-description: Upgrade your DocsGPT deployment across Docker Compose, source builds, and Kubernetes.
---
-
-import { Callout } from 'nextra/components'
-
-# Upgrading DocsGPT
-
-<Callout type="warning">
-  **Upgrading from 0.16.x?** User data moved from MongoDB to Postgres in 0.17.0. Follow the [Postgres Migration guide](/Deploying/Postgres-Migration) before running `docker compose pull` or `git pull` — existing deployments will not start cleanly without it.
-</Callout>
-
-## Check your version
-
-```bash
-docker compose exec backend python -c "from application.version import get_version; print(get_version())"
-```
-
-Release notes: [changelog](/changelog). Tags: [GitHub releases](https://github.com/arc53/DocsGPT/releases).
-
-## Docker Compose — hub images
-
-```bash
-cd DocsGPT/deployment
-docker compose -f docker-compose-hub.yaml pull
-docker compose -f docker-compose-hub.yaml up -d
-```
-
-`pull` fetches the latest image for whichever tag your compose file references. To move to a specific release, edit `image: arc53/docsgpt:<tag>` first.
-
-## Docker Compose — from source
-
-```bash
-cd DocsGPT
-git pull
-docker compose -f deployment/docker-compose.yaml build
-docker compose -f deployment/docker-compose.yaml up -d
-```
-
-Swap `git pull` for `git checkout <tag>` if you want to pin a specific release.
-
-## Kubernetes
-
-```bash
-kubectl set image deployment/docsgpt-backend backend=arc53/docsgpt:<tag>
-kubectl set image deployment/docsgpt-worker worker=arc53/docsgpt:<tag>
-kubectl rollout status deployment/docsgpt-backend
-kubectl rollout status deployment/docsgpt-worker
-```
-
-Full manifests: [Kubernetes deployment guide](/Deploying/Kubernetes-Deploying).
-
-## Migrations
-
-Alembic migrations run on worker startup. To apply manually:
-
-```bash
-docker compose exec backend alembic -c application/alembic.ini upgrade head
-```
-
-`upgrade head` is idempotent.
-
-## Rollback
-
-Set the image tag to the previous release and `up -d` again. Schema changes are not reversible without a backup — take one before upgrading any release that mentions migrations in the changelog.
--- a/extensions/chrome/_locales/en/messages.json
+++ b/extensions/chrome/_locales/en/messages.json
@@ -0,0 +1,66 @@
+{
+  "l10nTabName": {
+    "message":"Localization"
+    ,"description":"name of the localization tab"
+  }
+  ,"l10nHeader": {
+    "message":"It does localization too! (this whole tab is, actually)"
+    ,"description":"Header text for the localization section"
+  }
+  ,"l10nIntro": {
+    "message":"'L10n' refers to 'Localization' - 'L' an 'n' are obvious, and 10 comes from the number of letters between those two.  It is the process/whatever of displaying something in the language of choice.  It uses 'I18n', 'Internationalization', which refers to the tools / framework supporting L10n.  I.e., something is internationalized if it has I18n support, and can be localized.  Something is localized for you if it is in your language / dialect."
+    ,"description":"introduce the basic idea."
+  }
+  ,"l10nProd": {
+    "message":"You <strong>are</strong> planning to allow localization, right?  You have <em>no idea</em> who will be using your extension!  You have no idea who will be translating it!  At least support the basics, it's not hard, and having the framework in place will let you transition much more easily later on."
+    ,"description":"drive the point home.  It's good for you."
+  }
+  ,"l10nFirstParagraph": {
+    "message":"When the options page loads, elements decorated with <strong>data-l10n</strong> will automatically be localized!"
+    ,"description":"inform that <el data-l10n='' /> elements will be localized on load"
+  }
+  ,"l10nSecondParagraph": {
+    "message":"If you need more complex localization, you can also define <strong>data-l10n-args</strong>.  This should contain <span class='code'>$containerType$</span> filled with <span class='code'>$dataType$</span>, which will be passed into Chrome's i18n API as <span class='code'>$functionArgs$</span>.  In fact, this paragraph does just that, and wraps the args in mono-space font.  Easy!"
+    ,"description":"introduce the data-l10n-args attribute.  End on a lame note."
+    ,"placeholders": {
+      "containerType": {
+        "content":"$1"
+        ,"example":"'array', 'list', or something similar"
+        ,"description":"type of the args container"
+      }
+      ,"dataType": {
+        "content":"$2"
+        ,"example":"string"
+        ,"description":"type of data in each array index"
+      }
+      ,"functionArgs": {
+        "content":"$3"
+        ,"example":"arguments"
+        ,"description":"whatever you call what you pass into a function/method.  args, params, etc."
+      }
+    }
+  }
+  ,"l10nThirdParagraph": {
+    "message":"Message contents are passed right into innerHTML without processing - include any tags (or even scripts) that you feel like.  If you have an input field, the placeholder will be set instead, and buttons will have the value attribute set."
+    ,"description":"inform that we handle placeholders, buttons, and direct HTML input"
+  }
+  ,"l10nButtonsBefore": {
+    "message":"Different types of buttons are handled as well.  &lt;button&gt; elements have their html set:"
+  }
+  ,"l10nButton": {
+    "message":"in a <strong>button</strong>"
+  }
+  ,"l10nButtonsBetween": {
+    "message":"while &lt;input type='submit'&gt; and &lt;input type='button'&gt; get their 'value' set (note: no HTML):"
+  }
+  ,"l10nSubmit": {
+    "message":"a <strong>submit</strong> value"
+  }
+  ,"l10nButtonsAfter": {
+    "message":"Awesome, no?"
+  }
+  ,"l10nExtras": {
+    "message":"You can even set <span class='code'>data-l10n</span> on things like the &lt;title&gt; tag, which lets you have translatable page titles, or fieldset &lt;legend&gt; tags, or anywhere else - the default <span class='code'>Boil.localize()</span> behavior will check every tag in the document, not just the body."
+    ,"description":"inform about places which may not be obvious, like <title>, etc"
+  }
+}
--- a/extensions/chrome/dist/output.css
+++ b/extensions/chrome/dist/output.css
@@ -0,0 +1,678 @@
+/*
+! tailwindcss v3.2.7 | MIT License | https://tailwindcss.com
+*/
+
+/*
+1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)
+2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)
+*/
+
+*,
+::before,
+::after {
+  box-sizing: border-box;
+  /* 1 */
+  border-width: 0;
+  /* 2 */
+  border-style: solid;
+  /* 2 */
+  border-color: #e5e7eb;
+  /* 2 */
+}
+
+::before,
+::after {
+  --tw-content: '';
+}
+
+/*
+1. Use a consistent sensible line-height in all browsers.
+2. Prevent adjustments of font size after orientation changes in iOS.
+3. Use a more readable tab size.
+4. Use the user's configured `sans` font-family by default.
+5. Use the user's configured `sans` font-feature-settings by default.
+*/
+
+html {
+  line-height: 1.5;
+  /* 1 */
+  -webkit-text-size-adjust: 100%;
+  /* 2 */
+  -moz-tab-size: 4;
+  /* 3 */
+  -o-tab-size: 4;
+     tab-size: 4;
+  /* 3 */
+  font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
+  /* 4 */
+  font-feature-settings: normal;
+  /* 5 */
+}
+
+/*
+1. Remove the margin in all browsers.
+2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.
+*/
+
+body {
+  margin: 0;
+  /* 1 */
+  line-height: inherit;
+  /* 2 */
+}
+
+/*
+1. Add the correct height in Firefox.
+2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)
+3. Ensure horizontal rules are visible by default.
+*/
+
+hr {
+  height: 0;
+  /* 1 */
+  color: inherit;
+  /* 2 */
+  border-top-width: 1px;
+  /* 3 */
+}
+
+/*
+Add the correct text decoration in Chrome, Edge, and Safari.
+*/
+
+abbr:where([title]) {
+  -webkit-text-decoration: underline dotted;
+          text-decoration: underline dotted;
+}
+
+/*
+Remove the default font size and weight for headings.
+*/
+
+h1,
+h2,
+h3,
+h4,
+h5,
+h6 {
+  font-size: inherit;
+  font-weight: inherit;
+}
+
+/*
+Reset links to optimize for opt-in styling instead of opt-out.
+*/
+
+a {
+  color: inherit;
+  text-decoration: inherit;
+}
+
+/*
+Add the correct font weight in Edge and Safari.
+*/
+
+b,
+strong {
+  font-weight: bolder;
+}
+
+/*
+1. Use the user's configured `mono` font family by default.
+2. Correct the odd `em` font sizing in all browsers.
+*/
+
+code,
+kbd,
+samp,
+pre {
+  font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
+  /* 1 */
+  font-size: 1em;
+  /* 2 */
+}
+
+/*
+Add the correct font size in all browsers.
+*/
+
+small {
+  font-size: 80%;
+}
+
+/*
+Prevent `sub` and `sup` elements from affecting the line height in all browsers.
+*/
+
+sub,
+sup {
+  font-size: 75%;
+  line-height: 0;
+  position: relative;
+  vertical-align: baseline;
+}
+
+sub {
+  bottom: -0.25em;
+}
+
+sup {
+  top: -0.5em;
+}
+
+/*
+1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)
+2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)
+3. Remove gaps between table borders by default.
+*/
+
+table {
+  text-indent: 0;
+  /* 1 */
+  border-color: inherit;
+  /* 2 */
+  border-collapse: collapse;
+  /* 3 */
+}
+
+/*
+1. Change the font styles in all browsers.
+2. Remove the margin in Firefox and Safari.
+3. Remove default padding in all browsers.
+*/
+
+button,
+input,
+optgroup,
+select,
+textarea {
+  font-family: inherit;
+  /* 1 */
+  font-size: 100%;
+  /* 1 */
+  font-weight: inherit;
+  /* 1 */
+  line-height: inherit;
+  /* 1 */
+  color: inherit;
+  /* 1 */
+  margin: 0;
+  /* 2 */
+  padding: 0;
+  /* 3 */
+}
+
+/*
+Remove the inheritance of text transform in Edge and Firefox.
+*/
+
+button,
+select {
+  text-transform: none;
+}
+
+/*
+1. Correct the inability to style clickable types in iOS and Safari.
+2. Remove default button styles.
+*/
+
+button,
+[type='button'],
+[type='reset'],
+[type='submit'] {
+  -webkit-appearance: button;
+  /* 1 */
+  background-color: transparent;
+  /* 2 */
+  background-image: none;
+  /* 2 */
+}
+
+/*
+Use the modern Firefox focus style for all focusable elements.
+*/
+
+:-moz-focusring {
+  outline: auto;
+}
+
+/*
+Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)
+*/
+
+:-moz-ui-invalid {
+  box-shadow: none;
+}
+
+/*
+Add the correct vertical alignment in Chrome and Firefox.
+*/
+
+progress {
+  vertical-align: baseline;
+}
+
+/*
+Correct the cursor style of increment and decrement buttons in Safari.
+*/
+
+::-webkit-inner-spin-button,
+::-webkit-outer-spin-button {
+  height: auto;
+}
+
+/*
+1. Correct the odd appearance in Chrome and Safari.
+2. Correct the outline style in Safari.
+*/
+
+[type='search'] {
+  -webkit-appearance: textfield;
+  /* 1 */
+  outline-offset: -2px;
+  /* 2 */
+}
+
+/*
+Remove the inner padding in Chrome and Safari on macOS.
+*/
+
+::-webkit-search-decoration {
+  -webkit-appearance: none;
+}
+
+/*
+1. Correct the inability to style clickable types in iOS and Safari.
+2. Change font properties to `inherit` in Safari.
+*/
+
+::-webkit-file-upload-button {
+  -webkit-appearance: button;
+  /* 1 */
+  font: inherit;
+  /* 2 */
+}
+
+/*
+Add the correct display in Chrome and Safari.
+*/
+
+summary {
+  display: list-item;
+}
+
+/*
+Removes the default spacing and border for appropriate elements.
+*/
+
+blockquote,
+dl,
+dd,
+h1,
+h2,
+h3,
+h4,
+h5,
+h6,
+hr,
+figure,
+p,
+pre {
+  margin: 0;
+}
+
+fieldset {
+  margin: 0;
+  padding: 0;
+}
+
+legend {
+  padding: 0;
+}
+
+ol,
+ul,
+menu {
+  list-style: none;
+  margin: 0;
+  padding: 0;
+}
+
+/*
+Prevent resizing textareas horizontally by default.
+*/
+
+textarea {
+  resize: vertical;
+}
+
+/*
+1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)
+2. Set the default placeholder color to the user's configured gray 400 color.
+*/
+
+input::-moz-placeholder, textarea::-moz-placeholder {
+  opacity: 1;
+  /* 1 */
+  color: #9ca3af;
+  /* 2 */
+}
+
+input::placeholder,
+textarea::placeholder {
+  opacity: 1;
+  /* 1 */
+  color: #9ca3af;
+  /* 2 */
+}
+
+/*
+Set the default cursor for buttons.
+*/
+
+button,
+[role="button"] {
+  cursor: pointer;
+}
+
+/*
+Make sure disabled buttons don't get the pointer cursor.
+*/
+
+:disabled {
+  cursor: default;
+}
+
+/*
+1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)
+2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)
+   This can trigger a poorly considered lint error in some tools but is included by design.
+*/
+
+img,
+svg,
+video,
+canvas,
+audio,
+iframe,
+embed,
+object {
+  display: block;
+  /* 1 */
+  vertical-align: middle;
+  /* 2 */
+}
+
+/*
+Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)
+*/
+
+img,
+video {
+  max-width: 100%;
+  height: auto;
+}
+
+/* Make elements with the HTML hidden attribute stay hidden by default */
+
+[hidden] {
+  display: none;
+}
+
+*, ::before, ::after {
+  --tw-border-spacing-x: 0;
+  --tw-border-spacing-y: 0;
+  --tw-translate-x: 0;
+  --tw-translate-y: 0;
+  --tw-rotate: 0;
+  --tw-skew-x: 0;
+  --tw-skew-y: 0;
+  --tw-scale-x: 1;
+  --tw-scale-y: 1;
+  --tw-pan-x:  ;
+  --tw-pan-y:  ;
+  --tw-pinch-zoom:  ;
+  --tw-scroll-snap-strictness: proximity;
+  --tw-ordinal:  ;
+  --tw-slashed-zero:  ;
+  --tw-numeric-figure:  ;
+  --tw-numeric-spacing:  ;
+  --tw-numeric-fraction:  ;
+  --tw-ring-inset:  ;
+  --tw-ring-offset-width: 0px;
+  --tw-ring-offset-color: #fff;
+  --tw-ring-color: rgb(59 130 246 / 0.5);
+  --tw-ring-offset-shadow: 0 0 #0000;
+  --tw-ring-shadow: 0 0 #0000;
+  --tw-shadow: 0 0 #0000;
+  --tw-shadow-colored: 0 0 #0000;
+  --tw-blur:  ;
+  --tw-brightness:  ;
+  --tw-contrast:  ;
+  --tw-grayscale:  ;
+  --tw-hue-rotate:  ;
+  --tw-invert:  ;
+  --tw-saturate:  ;
+  --tw-sepia:  ;
+  --tw-drop-shadow:  ;
+  --tw-backdrop-blur:  ;
+  --tw-backdrop-brightness:  ;
+  --tw-backdrop-contrast:  ;
+  --tw-backdrop-grayscale:  ;
+  --tw-backdrop-hue-rotate:  ;
+  --tw-backdrop-invert:  ;
+  --tw-backdrop-opacity:  ;
+  --tw-backdrop-saturate:  ;
+  --tw-backdrop-sepia:  ;
+}
+
+::backdrop {
+  --tw-border-spacing-x: 0;
+  --tw-border-spacing-y: 0;
+  --tw-translate-x: 0;
+  --tw-translate-y: 0;
+  --tw-rotate: 0;
+  --tw-skew-x: 0;
+  --tw-skew-y: 0;
+  --tw-scale-x: 1;
+  --tw-scale-y: 1;
+  --tw-pan-x:  ;
+  --tw-pan-y:  ;
+  --tw-pinch-zoom:  ;
+  --tw-scroll-snap-strictness: proximity;
+  --tw-ordinal:  ;
+  --tw-slashed-zero:  ;
+  --tw-numeric-figure:  ;
+  --tw-numeric-spacing:  ;
+  --tw-numeric-fraction:  ;
+  --tw-ring-inset:  ;
+  --tw-ring-offset-width: 0px;
+  --tw-ring-offset-color: #fff;
+  --tw-ring-color: rgb(59 130 246 / 0.5);
+  --tw-ring-offset-shadow: 0 0 #0000;
+  --tw-ring-shadow: 0 0 #0000;
+  --tw-shadow: 0 0 #0000;
+  --tw-shadow-colored: 0 0 #0000;
+  --tw-blur:  ;
+  --tw-brightness:  ;
+  --tw-contrast:  ;
+  --tw-grayscale:  ;
+  --tw-hue-rotate:  ;
+  --tw-invert:  ;
+  --tw-saturate:  ;
+  --tw-sepia:  ;
+  --tw-drop-shadow:  ;
+  --tw-backdrop-blur:  ;
+  --tw-backdrop-brightness:  ;
+  --tw-backdrop-contrast:  ;
+  --tw-backdrop-grayscale:  ;
+  --tw-backdrop-hue-rotate:  ;
+  --tw-backdrop-invert:  ;
+  --tw-backdrop-opacity:  ;
+  --tw-backdrop-saturate:  ;
+  --tw-backdrop-sepia:  ;
+}
+
+.mb-2 {
+  margin-bottom: 0.5rem;
+}
+
+.ml-2 {
+  margin-left: 0.5rem;
+}
+
+.mr-2 {
+  margin-right: 0.5rem;
+}
+
+.mt-4 {
+  margin-top: 1rem;
+}
+
+.flex {
+  display: flex;
+}
+
+.w-\[26rem\] {
+  width: 26rem;
+}
+
+.w-full {
+  width: 100%;
+}
+
+.flex-col {
+  flex-direction: column;
+}
+
+.items-center {
+  align-items: center;
+}
+
+.justify-between {
+  justify-content: space-between;
+}
+
+.self-start {
+  align-self: flex-start;
+}
+
+.self-end {
+  align-self: flex-end;
+}
+
+.rounded-lg {
+  border-radius: 0.5rem;
+}
+
+.bg-blue-500 {
+  --tw-bg-opacity: 1;
+  background-color: rgb(59 130 246 / var(--tw-bg-opacity));
+}
+
+.bg-gray-200 {
+  --tw-bg-opacity: 1;
+  background-color: rgb(229 231 235 / var(--tw-bg-opacity));
+}
+
+.bg-gray-900 {
+  --tw-bg-opacity: 1;
+  background-color: rgb(17 24 39 / var(--tw-bg-opacity));
+}
+
+.bg-indigo-500 {
+  --tw-bg-opacity: 1;
+  background-color: rgb(99 102 241 / var(--tw-bg-opacity));
+}
+
+.bg-white {
+  --tw-bg-opacity: 1;
+  background-color: rgb(255 255 255 / var(--tw-bg-opacity));
+}
+
+.p-2 {
+  padding: 0.5rem;
+}
+
+.p-4 {
+  padding: 1rem;
+}
+
+.text-lg {
+  font-size: 1.125rem;
+  line-height: 1.75rem;
+}
+
+.text-sm {
+  font-size: 0.875rem;
+  line-height: 1.25rem;
+}
+
+.font-medium {
+  font-weight: 500;
+}
+
+.text-blue-500 {
+  --tw-text-opacity: 1;
+  color: rgb(59 130 246 / var(--tw-text-opacity));
+}
+
+.text-gray-700 {
+  --tw-text-opacity: 1;
+  color: rgb(55 65 81 / var(--tw-text-opacity));
+}
+
+.text-white {
+  --tw-text-opacity: 1;
+  color: rgb(255 255 255 / var(--tw-text-opacity));
+}
+
+.shadow {
+  --tw-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
+  --tw-shadow-colored: 0 1px 3px 0 var(--tw-shadow-color), 0 1px 2px -1px var(--tw-shadow-color);
+  box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow);
+}
+
+#chat-container {
+  width: 500px;
+  height: 450px;
+  background-color: white;
+  padding: 10px;
+  overflow: auto;
+}
+
+.bg-gray-200 {
+  background-color: #edf2f7;
+}
+
+.bg-gray-900 {
+  background-color: #1a202c;
+}
+
+.rounded-lg {
+  border-radius: 0.5rem;
+}
+
+.shadow {
+  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12), 0 1px 2px rgba(0, 0, 0, 0.24);
+}
+
+.text-gray-700 {
+  color: #4a5568;
+}
+
+.text-sm {
+  font-size: 0.875rem;
+}
+
+.p-4 {
+  padding: 1.5rem;
+}
+
+.hover\:text-blue-800:hover {
+  --tw-text-opacity: 1;
+  color: rgb(30 64 175 / var(--tw-text-opacity));
+}
+  
+
+
--- a/extensions/chrome/icons/icon128.png
+++ b/extensions/chrome/icons/icon128.png
--- a/extensions/chrome/icons/icon16
+++ b/extensions/chrome/icons/icon16
--- a/extensions/chrome/icons/icon16.png
+++ b/extensions/chrome/icons/icon16.png
--- a/extensions/chrome/icons/icon19.png
+++ b/extensions/chrome/icons/icon19.png
--- a/extensions/chrome/icons/icon24.png
+++ b/extensions/chrome/icons/icon24.png
--- a/extensions/chrome/icons/icon256.png
+++ b/extensions/chrome/icons/icon256.png
--- a/extensions/chrome/icons/icon32.png
+++ b/extensions/chrome/icons/icon32.png
--- a/extensions/chrome/icons/icon48.png
+++ b/extensions/chrome/icons/icon48.png
--- a/extensions/chrome/js/jquery/.gitignore
+++ b/extensions/chrome/js/jquery/.gitignore
@@ -0,0 +1,3 @@
+build
+jquery-migrate.js
+jquery-migrate.min.js
--- a/extensions/chrome/js/jquery/README.md
+++ b/extensions/chrome/js/jquery/README.md
@@ -0,0 +1,4 @@
+jQuery Component
+================
+
+Shim repository for jQuery.
--- a/extensions/chrome/js/jquery/bower.json
+++ b/extensions/chrome/js/jquery/bower.json
@@ -0,0 +1,13 @@
+{
+  "name": "jquery",
+  "version": "2.0.0",
+  "description": "jQuery component",
+  "keywords": [
+    "jquery",
+    "component"
+  ],
+  "scripts": [
+    "jquery.js"
+  ],
+  "license": "MIT"
+}
--- a/extensions/chrome/js/jquery/component.json
+++ b/extensions/chrome/js/jquery/component.json
@@ -0,0 +1,21 @@
+{
+  "name": "jquery",
+  "version": "2.0.0",
+  "description": "jQuery component",
+  "keywords": [
+    "jquery",
+    "component"
+  ],
+  "scripts": [
+    "jquery.js"
+  ],
+  "license": "MIT",
+  "gitHead": "46f8412bd1bb9b1b30b5b0eb88560e2d4196509c",
+  "readme": "jQuery Component\n================\n\nShim repository for jQuery.\n",
+  "readmeFilename": "README.md",
+  "_id": "jquery@2.0.0",
+  "repository": {
+    "type": "git",
+    "url": "git://github.com/components/jquery.git"
+  }
+}
--- a/extensions/chrome/js/jquery/composer.json
+++ b/extensions/chrome/js/jquery/composer.json
@@ -0,0 +1,30 @@
+{
+    "name": "components/jquery",
+    "description": "jQuery JavaScript Library",
+    "type": "component",
+    "homepage": "http://jquery.com",
+    "license": "MIT",
+    "support": {
+        "irc": "irc://irc.freenode.org/jquery",
+        "issues": "http://bugs.jquery.com",
+        "forum": "http://forum.jquery.com",
+        "wiki": "http://docs.jquery.com/",
+        "source": "https://github.com/jquery/jquery"
+    },
+    "authors": [
+        {
+            "name": "John Resig",
+            "email": "jeresig@gmail.com"
+        }
+    ],
+    "require": {
+        "robloach/component-installer": "*"
+    },
+    "extra": {
+        "component": {
+            "scripts": [
+                "jquery.js"
+            ]
+        }
+    }
+}
--- a/extensions/chrome/js/jquery/jquery.js
+++ b/extensions/chrome/js/jquery/jquery.js
--- a/extensions/chrome/js/jquery/jquery.min.js
+++ b/extensions/chrome/js/jquery/jquery.min.js
--- a/extensions/chrome/js/jquery/package.json
+++ b/extensions/chrome/js/jquery/package.json
@@ -0,0 +1,7 @@
+{
+  "name": "components-jquery",
+  "version": "2.0.0",
+  "description": "jQuery component",
+  "keywords": ["jquery"],
+  "main": "./jquery.js"
+}
--- a/extensions/chrome/manifest.json
+++ b/extensions/chrome/manifest.json
@@ -0,0 +1,28 @@
+{
+  "name": "DocsGPT - Documentation AI butler",
+  "version": "0.0.1",
+  "manifest_version": 3,
+  "description": "AI assistant for developers, that helps you answer your questions about the documentation you are reading.",
+  "icons": {
+    "16": "icons/icon16.png",
+    "48": "icons/icon48.png",
+    "128": "icons/icon128.png"
+  },
+  "default_locale": "en",
+  "background": {
+    "service_worker": "src/bg/service-worker.js"
+  },
+  "action": {
+    "default_title": "DocsGPT - Documentation AI butler",
+    "default_popup": "popup.html"
+  },
+  "permissions": ["activeTab", "storage"],
+  "host_permissions": [
+    "*://*/*"
+  ],
+  "content_scripts": [{
+    "js": ["popup.js"],
+    "matches": ["https://github.com/*"]
+  }]
+
+}
--- a/extensions/chrome/package-lock.json
+++ b/extensions/chrome/package-lock.json
--- a/extensions/chrome/package.json
+++ b/extensions/chrome/package.json
@@ -0,0 +1,20 @@
+{
+  "name": "docsgpt-chrome-extension",
+  "version": "0.0.1", 
+  "description": "DocsGPT - Documentation AI butler",
+  "main": "popup.js",
+  "author": "",
+  "license": "MIT",
+  "scripts": {
+    "dev": "npx tailwindcss -i ./styles.css -o ./dist/output.css --watch"
+  },
+  "keywords": [
+    "DocsGPT",
+    "Documentation",
+    "Chrome",
+    "extension"
+  ],
+  "devDependencies": {
+    "tailwindcss": "^3.2.4"
+  }
+}
--- a/extensions/chrome/popup.html
+++ b/extensions/chrome/popup.html
@@ -0,0 +1,56 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Chat Extension</title>
+    <link href="/dist/output.css" rel="stylesheet">
+    <!-- <link rel="stylesheet" href="styles.css"> -->
+
+
+
+  </head>
+  <body>
+    <header class="bg-white p-2 flex justify-between items-center">
+      <h1 class="text-lg font-medium">DocsGPT</h1>
+      <a href="#about" class="text-blue-500 hover:text-blue-800">About</a>
+    </header>
+        <div class="w-full flex flex-col">
+          <div id="chat-container">
+            
+            <div id="messages" class="w-full flex flex-col">
+              <div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
+                <p class="text-sm">Hello, ask me anything about this library. Im here to help</p>
+              </div>
+              <div class="bg-purple-30 text-white p-2 rounded-lg mb-2 self-end">
+                <p class="text-sm">How to create API key for Api gateway?</p>
+              </div>
+              <div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
+                <p class="text-sm">Import the boto3 library and create a client for the API Gateway service:</p>
+                
+
+              </div>
+              <div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
+                <code class="text-sm">client = boto3.client('apigateway')</code> 
+                
+
+              </div>
+              <div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
+                <p class="text-sm">Create an API key:</p>
+                
+              </div>
+              <div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
+                <code class="text-sm">response = client.create_api_key(<br>name='API_KEY_NAME',<br>description='API key description',<br>enabled=True)<br>api_key = response['value']</code>
+
+              </div>
+          </div>
+        </div>
+        <div class=" flex mt-4 mb-2">
+        <form id="message-form">
+          <input id="message-input" class="bg-white p-2 rounded-lg ml-2 w-[26rem]" type="text" placeholder="Type your message here...">
+          <button class="bg-purple-30 text-white p-2 rounded-lg ml-2 mr-2 ml-2" type="submit">Send</button>
+        </form>
+        </div>
+
+    </div>
+    <script src="popup.js"></script>
+  </body>
+</html>
--- a/extensions/chrome/popup.js
+++ b/extensions/chrome/popup.js
@@ -0,0 +1,46 @@
+document.getElementById("message-form").addEventListener("submit", function(event) {
+    event.preventDefault();
+    var message = document.getElementById("message-input").value;
+    chrome.runtime.sendMessage({msg: "sendMessage", message: message}, function(response) {
+      console.log(response.response);
+      msg_html = '<div class="bg-purple-30 text-white p-2 rounded-lg mb-2 self-end"><p class="text-sm">'
+      msg_html += message
+      msg_html += '</p></div>'
+      document.getElementById("messages").innerHTML += msg_html;
+      let chatWindow = document.getElementById("chat-container");
+      chatWindow.scrollTop = chatWindow.scrollHeight;
+
+    });
+
+    document.getElementById("message-input").value = "";
+    var conversation_state = localStorage.getItem("conversation_state");
+    // check if conversation state is null
+    if (conversation_state == null) {
+      conversation_state = 0;
+      localStorage.setItem("conversation_state", conversation_state);
+    }
+
+    // send post request to server http://127.0.0.1:5000/ with message in json body
+    fetch('http://127.0.0.1:7091/api/answer', {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify({question: message, history: null}),
+    })
+    .then(response => response.json())
+    .then(data => {
+      console.log('Success:', data);
+
+        
+        msg_html = '<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start"><code class="text-sm">'
+        msg_html += data.answer
+        msg_html += '</code></div>'
+        document.getElementById("messages").innerHTML += msg_html;
+        let chatWindow = document.getElementById("chat-container");
+        chatWindow.scrollTop = chatWindow.scrollHeight;
+    })
+
+
+  });
+  
--- a/extensions/chrome/src/bg/service-worker.js
+++ b/extensions/chrome/src/bg/service-worker.js
@@ -0,0 +1,12 @@
+// This is the service worker script, which executes in its own context
+// when the extension is installed or refreshed (or when you access its console).
+// It would correspond to the background script in chrome extensions v2.
+
+console.log("This prints to the console of the service worker (background script)");
+chrome.runtime.onMessage.addListener(
+    function(request, sender, sendResponse) {
+        if (request.msg === "sendMessage") {
+        sendResponse({response: "Message received"});
+        }
+    }
+);
--- a/extensions/chrome/styles.css
+++ b/extensions/chrome/styles.css
@@ -0,0 +1,45 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+
+#chat-container {
+    width: 500px;
+    height: 450px;
+    background-color: white;
+    padding: 10px;
+    overflow: auto;
+}
+
+
+
+.bg-gray-200 {
+    background-color: #edf2f7;
+  }
+  
+  .bg-gray-900 {
+    background-color: #1a202c;
+  }
+  
+  .rounded-lg {
+    border-radius: 0.5rem;
+  }
+  
+  .shadow {
+    box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12), 0 1px 2px rgba(0, 0, 0, 0.24);
+  }
+  
+  .text-gray-700 {
+    color: #4a5568;
+  }
+  
+  .text-sm {
+    font-size: 0.875rem;
+  }
+  
+  .p-4 {
+    padding: 1.5rem;
+  }
+  
+
+
--- a/extensions/chrome/tailwind.config.js
+++ b/extensions/chrome/tailwind.config.js
@@ -0,0 +1,7 @@
+module.exports = {
+    content: ["./src/**/*.{html,js}", "./*.{html,js,css}"],
+    theme: {
+      extend: {},
+    },
+    plugins: [],
+  }
--- a/application/services/init.py
+++ b/application/services/init.py
--- a/extensions/discord/bot.py
+++ b/extensions/discord/bot.py
@@ -0,0 +1,158 @@
+import os
+import re
+import logging
+import aiohttp
+import discord
+from discord.ext import commands
+import dotenv
+
+dotenv.load_dotenv()
+
+# Enable logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Bot configuration
+TOKEN = os.getenv("DISCORD_TOKEN")
+PREFIX = '!'  # Command prefix
+BASE_API_URL = os.getenv("API_BASE", "https://gptcloud.arc53.com")
+API_URL = BASE_API_URL + "/api/answer"
+API_KEY = os.getenv("API_KEY")
+
+intents = discord.Intents.default()
+intents.message_content = True
+
+bot = commands.Bot(command_prefix=PREFIX, intents=intents)
+
+# Store conversation history per user
+conversation_histories = {}
+
+def chunk_string(text, max_length=2000):
+    """Splits a string into chunks of a specified maximum length."""
+    # Create list to store the split strings
+    chunks = []
+    # Loop through the text, create substrings with max_length
+    while len(text) > max_length:
+        # Find last space within the limit
+        idx = text.rfind(' ', 0, max_length)
+        # Ensure we don't have an empty part
+        if idx == -1:
+            # If no spaces, just take chunk
+            chunks.append(text[:max_length])
+            text = text[max_length:]
+        else:
+            # Push whatever we've got up to the last space
+            chunks.append(text[:idx])
+            text = text[idx+1:]
+    # Catches the remaining part
+    chunks.append(text)
+    return chunks
+
+def escape_markdown(text):
+    """Escapes Discord markdown characters."""
+    escape_chars = r'\*_$$$$()~>#+-=|{}.!'
+    return re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', text)
+
+def split_string(input_str):
+    """Splits the input string to detect bot mentions."""
+    pattern = r'^<@!?{0}>\s*'.format(bot.user.id)
+    match = re.match(pattern, input_str)
+    if match:
+        content = input_str[match.end():].strip()
+        return str(bot.user.id), content
+    return None, input_str
+
+@bot.event
+async def on_ready():
+    print(f'{bot.user.name} has connected to Discord!')
+
+async def generate_answer(question, messages, conversation_id):
+    """Generates an answer using the external API."""
+    payload = {
+        "question": question,
+        "api_key": API_KEY,
+        "history": messages,
+        "conversation_id": conversation_id
+    }
+    headers = {
+        "Content-Type": "application/json; charset=utf-8"
+    }
+    timeout = aiohttp.ClientTimeout(total=60)
+    async with aiohttp.ClientSession(timeout=timeout) as session:
+        async with session.post(API_URL, json=payload, headers=headers) as resp:
+            if resp.status == 200:
+                data = await resp.json()
+                conversation_id = data.get("conversation_id")
+                answer = data.get("answer", "Sorry, I couldn't find an answer.")
+                return {"answer": answer, "conversation_id": conversation_id}
+            else:
+                return {"answer": "Sorry, I couldn't find an answer.", "conversation_id": None}
+
+@bot.command(name="start")
+async def start(ctx):
+    """Handles the /start command."""
+    await ctx.send(f"Hi {ctx.author.mention}! How can I assist you today?")
+
+@bot.command(name="custom_help")
+async def custom_help_command(ctx):
+    """Handles the /custom_help command."""
+    help_text = (
+        "Here are the available commands:\n"
+        "`!start` - Begin a new conversation with the bot\n"
+        "`!help` - Display this help message\n\n"
+        "You can also mention me or send a direct message to ask a question!"
+    )
+    await ctx.send(help_text)
+
+@bot.event
+async def on_message(message):
+    if message.author == bot.user:
+        return
+
+    # Process commands first
+    await bot.process_commands(message)
+
+    # Check if the message is in a DM channel
+    if isinstance(message.channel, discord.DMChannel):
+        content = message.content.strip()
+    else:
+        # In guild channels, check if the message mentions the bot at the start
+        content = message.content.strip()
+        prefix, content = split_string(content)
+        if prefix is None:
+            return
+        part_prefix = str(bot.user.id)
+        if part_prefix != prefix:
+            return  # Bot not mentioned at the start, so do not process
+
+    # Now process the message
+    user_id = message.author.id
+    if user_id not in conversation_histories:
+        conversation_histories[user_id] = {
+            "history": [],
+            "conversation_id": None
+        }
+
+    conversation = conversation_histories[user_id]
+    conversation["history"].append({"prompt": content})
+
+    # Generate the answer
+    response_doc = await generate_answer(
+        content,
+        conversation["history"],
+        conversation["conversation_id"]
+    )
+    answer = response_doc["answer"]
+    conversation_id = response_doc["conversation_id"]
+
+    answer_chunks = chunk_string(answer)
+    for chunk in answer_chunks:
+        await message.channel.send(chunk)
+
+    conversation["history"][-1]["response"] = answer
+    conversation["conversation_id"] = conversation_id
+
+    # Keep conversation history to last 10 exchanges
+    conversation["history"] = conversation["history"][-10:]
+
+bot.run(TOKEN)
--- a/extensions/react-widget/package-lock.json
+++ b/extensions/react-widget/package-lock.json
@@ -36,8 +36,8 @@
        "@types/markdown-it": "^14.1.2",
        "@types/react": "^19.2.14",
        "@types/react-dom": "^19.2.3",
-        "@typescript-eslint/eslint-plugin": "^8.59.0",
-        "@typescript-eslint/parser": "^8.59.0",
+        "@typescript-eslint/eslint-plugin": "^8.57.2",
+        "@typescript-eslint/parser": "^8.57.2",
        "babel-loader": "^10.1.1",
        "eslint": "^9.39.4",
        "eslint-config-prettier": "^10.1.8",
@@ -49,7 +49,7 @@
        "prettier": "^3.8.1",
        "process": "^0.11.10",
        "svgo": "^4.0.1",
-        "typescript": "^6.0.3"
+        "typescript": "^5.3.3"
      }
    },
    "node_modules/@babel/code-frame": {
@@ -499,12 +499,11 @@
      }
    },
    "node_modules/@babel/plugin-syntax-flow": {
-      "version": "7.28.6",
-      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-flow/-/plugin-syntax-flow-7.28.6.tgz",
-      "integrity": "sha512-D+OrJumc9McXNEBI/JmFnc/0uCM2/Y3PEBG3gfV3QIYkKv5pvnpzFrl1kYCrcHJP8nOeFB/SHi1IHz29pNGuew==",
-      "license": "MIT",
+      "version": "7.24.6",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-flow/-/plugin-syntax-flow-7.24.6.tgz",
+      "integrity": "sha512-gNkksSdV8RbsCoHF9sjVYrHfYACMl/8U32UfUhJ9+84/ASXw8dlx+eHyyF0m6ncQJ9IBSxfuCkB36GJqYdXTOA==",
      "dependencies": {
-        "@babel/helper-plugin-utils": "^7.28.6"
+        "@babel/helper-plugin-utils": "^7.24.6"
      },
      "engines": {
        "node": ">=6.9.0"
@@ -865,13 +864,12 @@
      }
    },
    "node_modules/@babel/plugin-transform-flow-strip-types": {
-      "version": "7.27.1",
-      "resolved": "https://registry.npmjs.org/@babel/plugin-transform-flow-strip-types/-/plugin-transform-flow-strip-types-7.27.1.tgz",
-      "integrity": "sha512-G5eDKsu50udECw7DL2AcsysXiQyB7Nfg521t2OAJ4tbfTJ27doHLeF/vlI1NZGlLdbb/v+ibvtL1YBQqYOwJGg==",
-      "license": "MIT",
+      "version": "7.24.6",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-transform-flow-strip-types/-/plugin-transform-flow-strip-types-7.24.6.tgz",
+      "integrity": "sha512-1l8b24NoCpaQ13Vi6FtLG1nv6kNoi8PWvQb1AYO7GHZDpFfBYc3lbXArx1lP2KRt8b4pej1eWc/zrRmsQTfOdQ==",
      "dependencies": {
-        "@babel/helper-plugin-utils": "^7.27.1",
-        "@babel/plugin-syntax-flow": "^7.27.1"
+        "@babel/helper-plugin-utils": "^7.24.6",
+        "@babel/plugin-syntax-flow": "^7.24.6"
      },
      "engines": {
        "node": ">=6.9.0"
@@ -4540,20 +4538,20 @@
      "devOptional": true
    },
    "node_modules/@typescript-eslint/eslint-plugin": {
-      "version": "8.59.0",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.59.0.tgz",
-      "integrity": "sha512-HyAZtpdkgZwpq8Sz3FSUvCR4c+ScbuWa9AksK2Jweub7w4M3yTz4O11AqVJzLYjy/B9ZWPyc81I+mOdJU/bDQw==",
+      "version": "8.57.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.57.2.tgz",
+      "integrity": "sha512-NZZgp0Fm2IkD+La5PR81sd+g+8oS6JwJje+aRWsDocxHkjyRw0J5L5ZTlN3LI1LlOcGL7ph3eaIUmTXMIjLk0w==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "@eslint-community/regexpp": "^4.12.2",
-        "@typescript-eslint/scope-manager": "8.59.0",
-        "@typescript-eslint/type-utils": "8.59.0",
-        "@typescript-eslint/utils": "8.59.0",
-        "@typescript-eslint/visitor-keys": "8.59.0",
+        "@typescript-eslint/scope-manager": "8.57.2",
+        "@typescript-eslint/type-utils": "8.57.2",
+        "@typescript-eslint/utils": "8.57.2",
+        "@typescript-eslint/visitor-keys": "8.57.2",
        "ignore": "^7.0.5",
        "natural-compare": "^1.4.0",
-        "ts-api-utils": "^2.5.0"
+        "ts-api-utils": "^2.4.0"
      },
      "engines": {
        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -4563,22 +4561,22 @@
        "url": "https://opencollective.com/typescript-eslint"
      },
      "peerDependencies": {
-        "@typescript-eslint/parser": "^8.59.0",
+        "@typescript-eslint/parser": "^8.57.2",
        "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0",
-        "typescript": ">=4.8.4 <6.1.0"
+        "typescript": ">=4.8.4 <6.0.0"
      }
    },
    "node_modules/@typescript-eslint/parser": {
-      "version": "8.59.0",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.59.0.tgz",
-      "integrity": "sha512-TI1XGwKbDpo9tRW8UDIXCOeLk55qe9ZFGs8MTKU6/M08HWTw52DD/IYhfQtOEhEdPhLMT26Ka/x7p70nd3dzDg==",
+      "version": "8.57.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.57.2.tgz",
+      "integrity": "sha512-30ScMRHIAD33JJQkgfGW1t8CURZtjc2JpTrq5n2HFhOefbAhb7ucc7xJwdWcrEtqUIYJ73Nybpsggii6GtAHjA==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/scope-manager": "8.59.0",
-        "@typescript-eslint/types": "8.59.0",
-        "@typescript-eslint/typescript-estree": "8.59.0",
-        "@typescript-eslint/visitor-keys": "8.59.0",
+        "@typescript-eslint/scope-manager": "8.57.2",
+        "@typescript-eslint/types": "8.57.2",
+        "@typescript-eslint/typescript-estree": "8.57.2",
+        "@typescript-eslint/visitor-keys": "8.57.2",
        "debug": "^4.4.3"
      },
      "engines": {
@@ -4590,18 +4588,18 @@
      },
      "peerDependencies": {
        "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0",
-        "typescript": ">=4.8.4 <6.1.0"
+        "typescript": ">=4.8.4 <6.0.0"
      }
    },
    "node_modules/@typescript-eslint/project-service": {
-      "version": "8.59.0",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.59.0.tgz",
-      "integrity": "sha512-Lw5ITrR5s5TbC19YSvlr63ZfLaJoU6vtKTHyB0GQOpX0W7d5/Ir6vUahWi/8Sps/nOukZQ0IB3SmlxZnjaKVnw==",
+      "version": "8.57.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.57.2.tgz",
+      "integrity": "sha512-FuH0wipFywXRTHf+bTTjNyuNQQsQC3qh/dYzaM4I4W0jrCqjCVuUh99+xd9KamUfmCGPvbO8NDngo/vsnNVqgw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/tsconfig-utils": "^8.59.0",
-        "@typescript-eslint/types": "^8.59.0",
+        "@typescript-eslint/tsconfig-utils": "^8.57.2",
+        "@typescript-eslint/types": "^8.57.2",
        "debug": "^4.4.3"
      },
      "engines": {
@@ -4612,18 +4610,18 @@
        "url": "https://opencollective.com/typescript-eslint"
      },
      "peerDependencies": {
-        "typescript": ">=4.8.4 <6.1.0"
+        "typescript": ">=4.8.4 <6.0.0"
      }
    },
    "node_modules/@typescript-eslint/scope-manager": {
-      "version": "8.59.0",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.59.0.tgz",
-      "integrity": "sha512-UzR16Ut8IpA3Mc4DbgAShlPPkVm8xXMWafXxB0BocaVRHs8ZGakAxGRskF7FId3sdk9lgGD73GSFaWmWFDE4dg==",
+      "version": "8.57.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.57.2.tgz",
+      "integrity": "sha512-snZKH+W4WbWkrBqj4gUNRIGb/jipDW3qMqVJ4C9rzdFc+wLwruxk+2a5D+uoFcKPAqyqEnSb4l2ULuZf95eSkw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/types": "8.59.0",
-        "@typescript-eslint/visitor-keys": "8.59.0"
+        "@typescript-eslint/types": "8.57.2",
+        "@typescript-eslint/visitor-keys": "8.57.2"
      },
      "engines": {
        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -4634,9 +4632,9 @@
      }
    },
    "node_modules/@typescript-eslint/tsconfig-utils": {
-      "version": "8.59.0",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.59.0.tgz",
-      "integrity": "sha512-91Sbl3s4Kb3SybliIY6muFBmHVv+pYXfybC4Oolp3dvk8BvIE3wOPc+403CWIT7mJNkfQRGtdqghzs2+Z91Tqg==",
+      "version": "8.57.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.57.2.tgz",
+      "integrity": "sha512-3Lm5DSM+DCowsUOJC+YqHHnKEfFh5CoGkj5Z31NQSNF4l5wdOwqGn99wmwN/LImhfY3KJnmordBq/4+VDe2eKw==",
      "dev": true,
      "license": "MIT",
      "engines": {
@@ -4647,21 +4645,21 @@
        "url": "https://opencollective.com/typescript-eslint"
      },
      "peerDependencies": {
-        "typescript": ">=4.8.4 <6.1.0"
+        "typescript": ">=4.8.4 <6.0.0"
      }
    },
    "node_modules/@typescript-eslint/type-utils": {
-      "version": "8.59.0",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.59.0.tgz",
-      "integrity": "sha512-3TRiZaQSltGqGeNrJzzr1+8YcEobKH9rHnqIp/1psfKFmhRQDNMGP5hBufanYTGznwShzVLs3Mz+gDN7HkWfXg==",
+      "version": "8.57.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.57.2.tgz",
+      "integrity": "sha512-Co6ZCShm6kIbAM/s+oYVpKFfW7LBc6FXoPXjTRQ449PPNBY8U0KZXuevz5IFuuUj2H9ss40atTaf9dlGLzbWZg==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/types": "8.59.0",
-        "@typescript-eslint/typescript-estree": "8.59.0",
-        "@typescript-eslint/utils": "8.59.0",
+        "@typescript-eslint/types": "8.57.2",
+        "@typescript-eslint/typescript-estree": "8.57.2",
+        "@typescript-eslint/utils": "8.57.2",
        "debug": "^4.4.3",
-        "ts-api-utils": "^2.5.0"
+        "ts-api-utils": "^2.4.0"
      },
      "engines": {
        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -4672,13 +4670,13 @@
      },
      "peerDependencies": {
        "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0",
-        "typescript": ">=4.8.4 <6.1.0"
+        "typescript": ">=4.8.4 <6.0.0"
      }
    },
    "node_modules/@typescript-eslint/types": {
-      "version": "8.59.0",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.59.0.tgz",
-      "integrity": "sha512-nLzdsT1gdOgFxxxwrlNVUBzSNBEEHJ86bblmk4QAS6stfig7rcJzWKqCyxFy3YRRHXDWEkb2NralA1nOYkkm/A==",
+      "version": "8.57.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.57.2.tgz",
+      "integrity": "sha512-/iZM6FnM4tnx9csuTxspMW4BOSegshwX5oBDznJ7S4WggL7Vczz5d2W11ecc4vRrQMQHXRSxzrCsyG5EsPPTbA==",
      "dev": true,
      "license": "MIT",
      "engines": {
@@ -4690,21 +4688,21 @@
      }
    },
    "node_modules/@typescript-eslint/typescript-estree": {
-      "version": "8.59.0",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.59.0.tgz",
-      "integrity": "sha512-O9Re9P1BmBLFJyikRbQpLku/QA3/AueZNO9WePLBwQrvkixTmDe8u76B6CYUAITRl/rHawggEqUGn5QIkVRLMw==",
+      "version": "8.57.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.57.2.tgz",
+      "integrity": "sha512-2MKM+I6g8tJxfSmFKOnHv2t8Sk3T6rF20A1Puk0svLK+uVapDZB/4pfAeB7nE83uAZrU6OxW+HmOd5wHVdXwXA==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/project-service": "8.59.0",
-        "@typescript-eslint/tsconfig-utils": "8.59.0",
-        "@typescript-eslint/types": "8.59.0",
-        "@typescript-eslint/visitor-keys": "8.59.0",
+        "@typescript-eslint/project-service": "8.57.2",
+        "@typescript-eslint/tsconfig-utils": "8.57.2",
+        "@typescript-eslint/types": "8.57.2",
+        "@typescript-eslint/visitor-keys": "8.57.2",
        "debug": "^4.4.3",
        "minimatch": "^10.2.2",
        "semver": "^7.7.3",
        "tinyglobby": "^0.2.15",
-        "ts-api-utils": "^2.5.0"
+        "ts-api-utils": "^2.4.0"
      },
      "engines": {
        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -4714,7 +4712,7 @@
        "url": "https://opencollective.com/typescript-eslint"
      },
      "peerDependencies": {
-        "typescript": ">=4.8.4 <6.1.0"
+        "typescript": ">=4.8.4 <6.0.0"
      }
    },
    "node_modules/@typescript-eslint/typescript-estree/node_modules/semver": {
@@ -4731,16 +4729,16 @@
      }
    },
    "node_modules/@typescript-eslint/utils": {
-      "version": "8.59.0",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.59.0.tgz",
-      "integrity": "sha512-I1R/K7V07XsMJ12Oaxg/O9GfrysGTmCRhvZJBv0RE0NcULMzjqVpR5kRRQjHsz3J/bElU7HwCO7zkqL+MSUz+g==",
+      "version": "8.57.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.57.2.tgz",
+      "integrity": "sha512-krRIbvPK1ju1WBKIefiX+bngPs+odIQUtR7kymzPfo1POVw3jlF+nLkmexdSSd4UCbDcQn+wMBATOOmpBbqgKg==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "@eslint-community/eslint-utils": "^4.9.1",
-        "@typescript-eslint/scope-manager": "8.59.0",
-        "@typescript-eslint/types": "8.59.0",
-        "@typescript-eslint/typescript-estree": "8.59.0"
+        "@typescript-eslint/scope-manager": "8.57.2",
+        "@typescript-eslint/types": "8.57.2",
+        "@typescript-eslint/typescript-estree": "8.57.2"
      },
      "engines": {
        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -4751,17 +4749,17 @@
      },
      "peerDependencies": {
        "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0",
-        "typescript": ">=4.8.4 <6.1.0"
+        "typescript": ">=4.8.4 <6.0.0"
      }
    },
    "node_modules/@typescript-eslint/visitor-keys": {
-      "version": "8.59.0",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.59.0.tgz",
-      "integrity": "sha512-/uejZt4dSere1bx12WLlPfv8GktzcaDtuJ7s42/HEZ5zGj9oxRaD4bj7qwSunXkf+pbAhFt2zjpHYUiT5lHf0Q==",
+      "version": "8.57.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.57.2.tgz",
+      "integrity": "sha512-zhahknjobV2FiD6Ee9iLbS7OV9zi10rG26odsQdfBO/hjSzUQbkIYgda+iNKK1zNiW2ey+Lf8MU5btN17V3dUw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/types": "8.59.0",
+        "@typescript-eslint/types": "8.57.2",
        "eslint-visitor-keys": "^5.0.0"
      },
      "engines": {
@@ -7806,13 +7804,13 @@
      }
    },
    "node_modules/minimatch": {
-      "version": "10.2.5",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz",
-      "integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==",
+      "version": "10.2.4",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz",
+      "integrity": "sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==",
      "dev": true,
      "license": "BlueOak-1.0.0",
      "dependencies": {
-        "brace-expansion": "^5.0.5"
+        "brace-expansion": "^5.0.2"
      },
      "engines": {
        "node": "18 || 20 || >=22"
@@ -8332,9 +8330,9 @@
      }
    },
    "node_modules/prettier": {
-      "version": "3.8.3",
-      "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.8.3.tgz",
-      "integrity": "sha512-7igPTM53cGHMW8xWuVTydi2KO233VFiTNyF5hLJqpilHfmn8C8gPf+PS7dUT64YcXFbiMGZxS9pCSxL/Dxm/Jw==",
+      "version": "3.8.1",
+      "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.8.1.tgz",
+      "integrity": "sha512-UOnG6LftzbdaHZcKoPFtOcCKztrQ57WkHDeRD9t/PTQtmT0NHSeWWepj6pS0z/N7+08BHFDQVUrfmfMRcZwbMg==",
      "dev": true,
      "license": "MIT",
      "bin": {
@@ -9097,14 +9095,14 @@
      }
    },
    "node_modules/tinyglobby": {
-      "version": "0.2.16",
-      "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.16.tgz",
-      "integrity": "sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==",
+      "version": "0.2.15",
+      "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
+      "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "fdir": "^6.5.0",
-        "picomatch": "^4.0.4"
+        "picomatch": "^4.0.3"
      },
      "engines": {
        "node": ">=12.0.0"
@@ -9248,9 +9246,9 @@
      }
    },
    "node_modules/typescript": {
-      "version": "6.0.3",
-      "resolved": "https://registry.npmjs.org/typescript/-/typescript-6.0.3.tgz",
-      "integrity": "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw==",
+      "version": "5.9.3",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
+      "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
      "license": "Apache-2.0",
      "bin": {
        "tsc": "bin/tsc",
--- a/extensions/react-widget/package.json
+++ b/extensions/react-widget/package.json
@@ -69,8 +69,8 @@
    "@types/markdown-it": "^14.1.2",
    "@types/react": "^19.2.14",
    "@types/react-dom": "^19.2.3",
-    "@typescript-eslint/eslint-plugin": "^8.59.0",
-    "@typescript-eslint/parser": "^8.59.0",
+    "@typescript-eslint/eslint-plugin": "^8.57.2",
+    "@typescript-eslint/parser": "^8.57.2",
    "babel-loader": "^10.1.1",
    "eslint": "^9.39.4",
    "eslint-config-prettier": "^10.1.8",
@@ -82,7 +82,7 @@
    "prettier": "^3.8.1",
    "process": "^0.11.10",
    "svgo": "^4.0.1",
-    "typescript": "^6.0.3"
+    "typescript": "^5.3.3"
  },
  "publishConfig": {
    "access": "public"
--- a/extensions/slack-bot/.gitignore
+++ b/extensions/slack-bot/.gitignore
@@ -0,0 +1,3 @@
+.env
+.venv/
+get-pip.py
--- a/extensions/slack-bot/Readme.md
+++ b/extensions/slack-bot/Readme.md
@@ -0,0 +1,84 @@
+
+# Slack Bot Configuration Guide
+
+> **Note:** The following guidelines must be followed on the [Slack API website](https://api.slack.com/) for setting up your Slack app and generating the necessary tokens.
+
+## Step-by-Step Instructions
+
+### 1. Navigate to Your Apps
+- Go to the Slack API page for apps and select **Create an App** from the “From Scratch” option.
+
+### 2. App Creation
+- Name your app and choose the workspace where you wish to add the assistant. 
+
+### 3. Enabling Socket Mode
+- Navigate to **Settings > Socket Mode** and enable **Socket Mode**. 
+- This action will generate an App-level token. Select the `connections:write` scope and copy the App-level token for future use.
+
+### 4. Socket Naming
+- Assign a name to your socket as per your preference.
+
+### 5. Basic Information Setup
+- Go to **Basic Information** (under **Settings**) and configure the following:
+  - Assistant name
+  - App icon
+  - Background color 
+
+### 6. Bot Token and Permissions
+- In the **OAuth & Permissions** option found under the **Features** section, retrieve the Bot Token. Save it for future usage.
+- You will also need to add specific bot token scopes:
+  - `app_mentions:read`
+  - `assistant:write`
+  - `chat:write`
+  - `chat:write.public`
+  - `im:history`
+
+### 7. Enable Events
+- From **Event Subscriptions**, enable events and add the following Bot User events:
+  - `app_mention`
+  - `assistant_thread_context_changed`
+  - `assistant_thread_started`
+  - `message.im`
+
+### 8. Agent/Assistant Toggle
+- In the **Features > Agent & Assistants** section, toggle on the Agent or Assistant option. 
+- In the **Suggested Prompts** setting, leave it as `dynamic` (this is the default setting).
+
+---
+
+## Code-Side Configuration Guide
+
+This section focuses on generating and setting up the necessary tokens in the `.env` file, using the `.env-example` as a template.
+
+### Step 1: Generating Required Keys
+
+1. **SLACK_APP_TOKEN**
+   - Navigate to **Settings > Socket Mode** in the Slack API and enable **Socket Mode**.
+   - Copy the App-level token generated (usually starts with `xapp-`).
+
+2. **SLACK_BOT_TOKEN**
+   - Go to **OAuth & Permissions** (under the **Features** section in Slack API).
+   - Retrieve the **Bot Token** (starts with `xoxb-`).
+
+3. **DOCSGPT_API_KEY**
+   - Go to the **DocsGPT website**.
+   - Navigate to **Settings > Chatbots > Create New** to generate a DocsGPT API Key.
+   - Copy the generated key for use.
+
+### Step 2: Creating and Updating the `.env` File
+
+1. Create a new `.env` file in the root of your project (if it doesn’t already exist).
+2. Use the `.env-example` as a reference and update the file with the following keys and values:
+
+```bash
+# .env file
+SLACK_APP_TOKEN=xapp-your-generated-app-token
+SLACK_BOT_TOKEN=xoxb-your-generated-bot-token
+DOCSGPT_API_KEY=your-docsgpt-generated-api-key
+```
+
+Replace the placeholder values with the actual tokens generated from the Slack API and DocsGPT as per the steps outlined above.
+
+---
+
+This concludes the guide for both setting up the Slack API and configuring the `.env` file on the code side.
--- a/extensions/slack-bot/app.py
+++ b/extensions/slack-bot/app.py
@@ -0,0 +1,112 @@
+import os
+import hashlib
+import httpx
+import re
+from slack_bolt.async_app import AsyncApp
+from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
+from dotenv import load_dotenv
+
+load_dotenv()
+API_BASE = os.getenv("API_BASE", "https://gptcloud.arc53.com")
+API_URL =  API_BASE + "/api/answer"
+
+# Slack bot token and signing secret
+SLACK_BOT_TOKEN = os.getenv("SLACK_BOT_TOKEN")
+SLACK_APP_TOKEN = os.getenv("SLACK_APP_TOKEN")
+
+# OpenAI API key for DocsGPT (replace this with your actual API key)
+DOCSGPT_API_KEY = os.getenv("DOCSGPT_API_KEY")
+
+# Initialize Slack app
+app = AsyncApp(token=SLACK_BOT_TOKEN)
+
+def encode_conversation_id(conversation_id: str) -> str:
+    """
+        Encodes 11 length Slack conversation_id to 12 length string
+        Args:
+        conversation_id (str): The 11 digit slack conversation_id.
+        Returns:
+            str: Hashed id.
+    """    
+    # Create a SHA-256 hash of the string
+    hashed_id = hashlib.sha256(conversation_id.encode()).hexdigest()
+
+    # Take the first 24 characters of the hash
+    hashed_24_char_id = hashed_id[:24]
+    return hashed_24_char_id
+
+async def generate_answer(question: str, messages: list, conversation_id: str | None) -> dict:
+    """Generates an answer using the external API."""
+    payload = {
+        "question": question,
+        "api_key": DOCSGPT_API_KEY,
+        "history": messages,
+        "conversation_id": conversation_id,
+    }
+    headers = {
+        "Content-Type": "application/json; charset=utf-8"
+    }
+    timeout = 60.0
+    async with httpx.AsyncClient() as client:
+        response = await client.post(API_URL, json=payload, headers=headers, timeout=timeout)
+
+        if response.status_code == 200:
+            data = response.json()
+            conversation_id = data.get("conversation_id")
+            answer = data.get("answer", "Sorry, I couldn't find an answer.")
+            return {"answer": answer, "conversation_id": conversation_id}
+        else:
+            print(response.json())
+            return {"answer": "Sorry, I couldn't find an answer.", "conversation_id": None}
+
+@app.message(".*")
+async def message_docs(message, say):
+    client = app.client
+    channel = message['channel']
+    thread_ts = message['thread_ts']
+    user_query = message['text']    
+    await client.assistant_threads_setStatus(
+        channel_id = channel,
+        thread_ts = thread_ts,
+        status = "is generating your answer...",
+    )
+
+    docs_gpt_channel_id = encode_conversation_id(thread_ts)
+    
+    # Get response from DocsGPT
+    response = await generate_answer(user_query,[], docs_gpt_channel_id)
+    answer = convert_to_slack_markdown(response['answer'])
+
+    # Respond in Slack
+    await client.chat_postMessage(text = answer, mrkdwn= True, channel= message['channel'],
+        thread_ts = message['thread_ts'],)
+
+def convert_to_slack_markdown(markdown_text: str):
+    # Convert bold **text** to *text* for Slack
+    slack_text = re.sub(r'\*\*(.*?)\*\*', r'*\1*', markdown_text)  # **text** to *text*
+
+    # Convert italics _text_ to _text_ for Slack
+    slack_text = re.sub(r'_(.*?)_', r'_\1_', slack_text)  # _text_ to _text_
+
+    # Convert inline code `code` to `code` (Slack supports backticks for inline code)
+    slack_text = re.sub(r'`(.*?)`', r'`\1`', slack_text)
+
+    # Convert bullet points with single or no spaces to filled bullets (•)
+    slack_text = re.sub(r'^\s{0,1}[-*]\s+', ' • ', slack_text, flags=re.MULTILINE)
+
+    # Convert bullet points with multiple spaces to hollow bullets (◦)
+    slack_text = re.sub(r'^\s{2,}[-*]\s+', '\t◦ ', slack_text, flags=re.MULTILINE)
+
+    # Convert headers (##) to bold in Slack
+    slack_text = re.sub(r'^\s*#{1,6}\s*(.*?)$', r'*\1*', slack_text, flags=re.MULTILINE)
+
+    return slack_text
+
+async def main():
+    handler = AsyncSocketModeHandler(app, os.environ["SLACK_APP_TOKEN"])
+    await handler.start_async()
+
+# Start the app
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())
--- a/extensions/slack-bot/requirements.txt
+++ b/extensions/slack-bot/requirements.txt
@@ -0,0 +1,9 @@
+aiohttp>=3,<4
+certifi==2024.7.4
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.0
+idna==3.7
+python-dotenv==1.0.1
+sniffio==1.3.1
+slack-bolt==1.21.0
--- a/Show More
+++ b/Show More