feat: BYOM

2026-05-07 06:30:03 +00:00 · 2026-04-27 21:50:45 +01:00
145 changed files with 1006 additions and 15717 deletions
--- a/README.md
+++ b/README.md
@@ -47,13 +47,11 @@
 </ul>

 ## Roadmap
- [x] Agent Workflow Builder with conditional nodes ( February 2026 )
- [x] SharePoint & Confluence connectors ( March – April 2026 )
- [x] Research mode ( March 2026 )
- [x] Postgres migration for user data ( April 2026 )
- [x] OpenTelemetry observability ( April 2026 )
- [x] Bring Your Own Model (BYOM) ( April 2026 )
- [ ] Agent scheduling (RedBeat-backed) ( Q2 2026 )
+- [x] Add OAuth 2.0 authentication for MCP ( September 2025 )
+- [x] Deep Agents ( October 2025 )
+- [x] Prompt Templating ( October 2025 )
+- [x] Full api tooling ( Dec 2025 )
+- [ ] Agent scheduling ( Jan 2026 )

 You can find our full roadmap [here](https://github.com/orgs/arc53/projects/2). Please don't hesitate to contribute or create issues, it helps us improve DocsGPT!

--- a/application/agents/tool_executor.py
+++ b/application/agents/tool_executor.py
@@ -1,107 +1,18 @@
 import logging
 import uuid
 from collections import Counter
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple

 from application.agents.tools.tool_action_parser import ToolActionParser
 from application.agents.tools.tool_manager import ToolManager
 from application.security.encryption import decrypt_credentials
-from application.storage.db.base_repository import looks_like_uuid
 from application.storage.db.repositories.agents import AgentsRepository
-from application.storage.db.repositories.tool_call_attempts import (
-    ToolCallAttemptsRepository,
-)
 from application.storage.db.repositories.user_tools import UserToolsRepository
-from application.storage.db.session import db_readonly, db_session
+from application.storage.db.session import db_readonly

 logger = logging.getLogger(__name__)


-def _record_proposed(
-    call_id: str,
-    tool_name: str,
-    action_name: str,
-    arguments: Any,
-    *,
-    tool_id: Optional[str] = None,
-) -> bool:
-    """Insert a ``proposed`` row; swallow infra failures so tool calls
-    still run when the journal is unreachable. Returns True iff the row
-    is now journaled (newly created or already present).
-    """
-    try:
-        with db_session() as conn:
-            inserted = ToolCallAttemptsRepository(conn).record_proposed(
-                call_id,
-                tool_name,
-                action_name,
-                arguments,
-                tool_id=tool_id if tool_id and looks_like_uuid(tool_id) else None,
-            )
-        if not inserted:
-            logger.warning(
-                "tool_call_attempts duplicate call_id=%s; existing row left in place",
-                call_id,
-                extra={"alert": "tool_call_id_collision", "call_id": call_id},
-            )
-        return True
-    except Exception:
-        logger.exception("tool_call_attempts proposed write failed for %s", call_id)
-        return False
-
-
-def _mark_executed(
-    call_id: str,
-    result: Any,
-    *,
-    message_id: Optional[str] = None,
-    artifact_id: Optional[str] = None,
-    proposed_ok: bool = True,
-    tool_name: Optional[str] = None,
-    action_name: Optional[str] = None,
-    arguments: Any = None,
-    tool_id: Optional[str] = None,
-) -> None:
-    """Flip the row to ``executed``. If ``proposed_ok`` is False (the
-    proposed write failed earlier), upsert a fresh row in ``executed`` so
-    the reconciler can still see the attempt — without this, the side
-    effect would be invisible to the journal.
-    """
-    try:
-        with db_session() as conn:
-            repo = ToolCallAttemptsRepository(conn)
-            if proposed_ok:
-                updated = repo.mark_executed(
-                    call_id,
-                    result,
-                    message_id=message_id,
-                    artifact_id=artifact_id,
-                )
-                if updated:
-                    return
-            # Fallback synthesizes the row so the journal isn't lost.
-            repo.upsert_executed(
-                call_id,
-                tool_name=tool_name or "unknown",
-                action_name=action_name or "",
-                arguments=arguments if arguments is not None else {},
-                result=result,
-                tool_id=tool_id if tool_id and looks_like_uuid(tool_id) else None,
-                message_id=message_id,
-                artifact_id=artifact_id,
-            )
-    except Exception:
-        logger.exception("tool_call_attempts executed write failed for %s", call_id)
-
-
-def _mark_failed(call_id: str, error: str) -> None:
-    try:
-        with db_session() as conn:
-            ToolCallAttemptsRepository(conn).mark_failed(call_id, error)
-    except Exception:
-        logger.exception("tool_call_attempts failed-write failed for %s", call_id)
-
-
 class ToolExecutor:
    """Handles tool discovery, preparation, and execution.

@@ -120,7 +31,6 @@ class ToolExecutor:
        self.tool_calls: List[Dict] = []
        self._loaded_tools: Dict[str, object] = {}
        self.conversation_id: Optional[str] = None
-        self.message_id: Optional[str] = None
        self.client_tools: Optional[List[Dict]] = None
        self._name_to_tool: Dict[str, Tuple[str, str]] = {}
        self._tool_to_name: Dict[Tuple[str, str], str] = {}
@@ -364,14 +274,7 @@ class ToolExecutor:

        if tool_id is None or action_name is None:
            error_message = f"Error: Failed to parse LLM tool call. Tool name: {llm_name}"
-            logger.error(
-                "tool_call_parse_failed",
-                extra={
-                    "llm_class_name": llm_class_name,
-                    "llm_tool_name": llm_name,
-                    "call_id": call_id,
-                },
-            )
+            logger.error(error_message)

            tool_call_data = {
                "tool_name": "unknown",
@@ -386,15 +289,7 @@ class ToolExecutor:

        if tool_id not in tools_dict:
            error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
-            logger.error(
-                "tool_id_not_found",
-                extra={
-                    "tool_id": tool_id,
-                    "llm_tool_name": llm_name,
-                    "call_id": call_id,
-                    "available_tool_count": len(tools_dict),
-                },
-            )
+            logger.error(error_message)

            tool_call_data = {
                "tool_name": "unknown",
@@ -413,36 +308,9 @@ class ToolExecutor:
            "action_name": llm_name,
            "arguments": call_args,
        }
-        tool_data = tools_dict[tool_id]
-        # Journal first so the reconciler sees malformed calls and any
-        # subsequent ``_mark_failed`` actually updates a real row.
-        proposed_ok = _record_proposed(
-            call_id,
-            tool_data["name"],
-            action_name,
-            call_args if isinstance(call_args, dict) else {},
-            tool_id=tool_data.get("id"),
-        )
-        # Defensive guard: a non-dict ``call_args`` (e.g. malformed
-        # JSON on the resume path) would crash the param walk below
-        # with AttributeError on ``.items()``. Surface a clean error
-        # event and flip the journal row to ``failed`` instead of
-        # killing the stream.
-        if not isinstance(call_args, dict):
-            error_message = (
-                f"Tool call arguments must be a JSON object, got "
-                f"{type(call_args).__name__}."
-            )
-            tool_call_data["result"] = error_message
-            tool_call_data["arguments"] = {}
-            _mark_failed(call_id, error_message)
-            yield {
-                "type": "tool_call",
-                "data": {**tool_call_data, "status": "error"},
-            }
-            self.tool_calls.append(tool_call_data)
-            return error_message, call_id
        yield {"type": "tool_call", "data": {**tool_call_data, "status": "pending"}}
+
+        tool_data = tools_dict[tool_id]
        action_data = (
            tool_data["config"]["actions"][action_name]
            if tool_data["name"] == "api_tool"
@@ -488,17 +356,8 @@ class ToolExecutor:
                f"Failed to load tool '{tool_data.get('name')}' (tool_id key={tool_id}): "
                "missing 'id' on tool row."
            )
-            logger.error(
-                "tool_load_failed",
-                extra={
-                    "tool_name": tool_data.get("name"),
-                    "tool_id": tool_id,
-                    "action_name": action_name,
-                    "call_id": call_id,
-                },
-            )
+            logger.error(error_message)
            tool_call_data["result"] = error_message
-            _mark_failed(call_id, error_message)
            yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
            self.tool_calls.append(tool_call_data)
            return error_message, call_id
@@ -508,18 +367,14 @@ class ToolExecutor:
            if tool_data["name"] == "api_tool"
            else parameters
        )
-        try:
-            if tool_data["name"] == "api_tool":
-                logger.debug(
-                    f"Executing api: {action_name} with query_params: {query_params}, headers: {headers}, body: {body}"
-                )
-                result = tool.execute_action(action_name, **body)
-            else:
-                logger.debug(f"Executing tool: {action_name} with args: {call_args}")
-                result = tool.execute_action(action_name, **parameters)
-        except Exception as exc:
-            _mark_failed(call_id, str(exc))
-            raise
+        if tool_data["name"] == "api_tool":
+            logger.debug(
+                f"Executing api: {action_name} with query_params: {query_params}, headers: {headers}, body: {body}"
+            )
+            result = tool.execute_action(action_name, **body)
+        else:
+            logger.debug(f"Executing tool: {action_name} with args: {call_args}")
+            result = tool.execute_action(action_name, **parameters)

        get_artifact_id = (
            getattr(tool, "get_artifact_id", None)
@@ -548,22 +403,6 @@ class ToolExecutor:
            f"{result_full[:50]}..." if len(result_full) > 50 else result_full
        )

-        # Tool side effect has run; flip the journal row so the
-        # message-finalize path can later confirm it. If the proposed
-        # write failed (DB outage), upsert a fresh row in ``executed`` so
-        # the reconciler still sees the side effect.
-        _mark_executed(
-            call_id,
-            result_full,
-            message_id=self.message_id,
-            artifact_id=artifact_id or None,
-            proposed_ok=proposed_ok,
-            tool_name=tool_data["name"],
-            action_name=action_name,
-            arguments=call_args,
-            tool_id=tool_data.get("id"),
-        )
-
        stream_tool_call_data = {
            key: value
            for key, value in tool_call_data.items()
@@ -612,12 +451,10 @@ class ToolExecutor:
            row_id = tool_data.get("id")
            if not row_id:
                logger.error(
-                    "tool_missing_row_id",
-                    extra={
-                        "tool_name": tool_data.get("name"),
-                        "tool_id": tool_id,
-                        "action_name": action_name,
-                    },
+                    "Tool data missing 'id' for tool name=%s (enumerate-key tool_id=%s); "
+                    "skipping load to avoid binding a non-UUID downstream.",
+                    tool_data.get("name"),
+                    tool_id,
                )
                return None
            tool_config["tool_id"] = str(row_id)
--- a/application/agents/tools/postgres.py
+++ b/application/agents/tools/postgres.py
@@ -177,4 +177,3 @@ class PostgresTool(Tool):
                "order": 1,
            },
        }
-
--- a/application/agents/tools/tool_action_parser.py
+++ b/application/agents/tools/tool_action_parser.py
@@ -57,29 +57,6 @@ class ToolActionParser:
    def _parse_google_llm(self, call):
        try:
            call_args = call.arguments
-            # Gemini's SDK natively returns ``args`` as a dict, but the
-            # resume path (``gen_continuation``) stringifies it for the
-            # assistant message. Coerce a JSON string back into a dict;
-            # fall back to an empty dict on malformed input so downstream
-            # ``call_args.items()`` doesn't crash the stream.
-            if isinstance(call_args, str):
-                try:
-                    call_args = json.loads(call_args)
-                except (json.JSONDecodeError, TypeError):
-                    logger.warning(
-                        "Google call.arguments was not valid JSON; "
-                        "falling back to empty args for %s",
-                        getattr(call, "name", "<unknown>"),
-                    )
-                    call_args = {}
-            if not isinstance(call_args, dict):
-                logger.warning(
-                    "Google call.arguments has unexpected type %s; "
-                    "falling back to empty args for %s",
-                    type(call_args).__name__,
-                    getattr(call, "name", "<unknown>"),
-                )
-                call_args = {}

            resolved = self._resolve_via_mapping(call.name)
            if resolved:
--- a/application/alembic/versions/0004_durability_foundation.py
+++ b/application/alembic/versions/0004_durability_foundation.py
@@ -1,217 +0,0 @@
-"""0004 durability foundation — idempotency, tool-call log, ingest checkpoint.
-
-Adds ``task_dedup``, ``webhook_dedup``, ``tool_call_attempts``,
-``ingest_chunk_progress``, and per-row status flags on
-``conversation_messages`` and ``pending_tool_state``. Also adds
-``token_usage.source`` and ``token_usage.request_id`` so per-channel
-cost attribution (``agent_stream`` / ``title`` / ``compression`` /
-``rag_condense`` / ``fallback``) is queryable and multi-call agent runs
-can be DISTINCT-collapsed into a single user request for rate limiting.
-
-Revision ID: 0004_durability_foundation
-Revises: 0003_user_custom_models
-"""
-
-from typing import Sequence, Union
-
-from alembic import op
-
-
-revision: str = "0004_durability_foundation"
-down_revision: Union[str, None] = "0003_user_custom_models"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
-
-
-def upgrade() -> None:
-    # ------------------------------------------------------------------
-    # New tables
-    # ------------------------------------------------------------------
-    # ``attempt_count`` bounds the per-Celery-task idempotency wrapper's
-    # retry loop so a poison message can't run forever; default 0 means
-    # existing rows behave as if no attempts have run yet.
-    op.execute(
-        """
-        CREATE TABLE task_dedup (
-            idempotency_key TEXT PRIMARY KEY,
-            task_name       TEXT NOT NULL,
-            task_id         TEXT NOT NULL,
-            result_json     JSONB,
-            status          TEXT NOT NULL
-                            CHECK (status IN ('pending', 'completed', 'failed')),
-            attempt_count   INT  NOT NULL DEFAULT 0,
-            created_at      TIMESTAMPTZ NOT NULL DEFAULT now()
-        );
-        """
-    )
-
-    op.execute(
-        """
-        CREATE TABLE webhook_dedup (
-            idempotency_key TEXT PRIMARY KEY,
-            agent_id        UUID NOT NULL,
-            task_id         TEXT NOT NULL,
-            response_json   JSONB,
-            created_at      TIMESTAMPTZ NOT NULL DEFAULT now()
-        );
-        """
-    )
-
-    # FK on ``message_id`` uses ``ON DELETE SET NULL`` so the journal row
-    # survives parent-message deletion (compliance / cost-attribution).
-    op.execute(
-        """
-        CREATE TABLE tool_call_attempts (
-            call_id      TEXT PRIMARY KEY,
-            message_id   UUID
-                         REFERENCES conversation_messages (id)
-                         ON DELETE SET NULL,
-            tool_id      UUID,
-            tool_name    TEXT NOT NULL,
-            action_name  TEXT NOT NULL,
-            arguments    JSONB NOT NULL,
-            result       JSONB,
-            error        TEXT,
-            status       TEXT NOT NULL
-                         CHECK (status IN (
-                             'proposed', 'executed', 'confirmed',
-                             'compensated', 'failed'
-                         )),
-            attempted_at TIMESTAMPTZ NOT NULL DEFAULT now(),
-            updated_at   TIMESTAMPTZ NOT NULL DEFAULT now()
-        );
-        """
-    )
-
-    op.execute(
-        """
-        CREATE TABLE ingest_chunk_progress (
-            source_id        UUID PRIMARY KEY,
-            total_chunks     INT NOT NULL,
-            embedded_chunks  INT NOT NULL DEFAULT 0,
-            last_index       INT NOT NULL DEFAULT -1,
-            last_updated     TIMESTAMPTZ NOT NULL DEFAULT now()
-        );
-        """
-    )
-
-    # ------------------------------------------------------------------
-    # Column additions on existing tables
-    # ------------------------------------------------------------------
-    # DEFAULT 'complete' backfills existing rows — they're already done.
-    op.execute(
-        """
-        ALTER TABLE conversation_messages
-            ADD COLUMN status TEXT NOT NULL DEFAULT 'complete'
-                CHECK (status IN ('pending', 'streaming', 'complete', 'failed')),
-            ADD COLUMN request_id TEXT;
-        """
-    )
-
-    op.execute(
-        """
-        ALTER TABLE pending_tool_state
-            ADD COLUMN status TEXT NOT NULL DEFAULT 'pending'
-                CHECK (status IN ('pending', 'resuming')),
-            ADD COLUMN resumed_at TIMESTAMPTZ;
-        """
-    )
-
-    # Default ``agent_stream`` backfills historical rows under the
-    # assumption they were written from the primary path — pre-fix the
-    # only path that wrote was the error branch reading agent.llm.
-    # ``request_id`` is the stream-scoped UUID stamped by the route on
-    # ``agent.llm`` so multi-tool agent runs (which produce N rows)
-    # collapse to one request via DISTINCT in ``count_in_range``.
-    # Side-channel sources (``title`` / ``compression`` / ``rag_condense``
-    # / ``fallback``) leave it NULL and are excluded from the request
-    # count by source filter.
-    op.execute(
-        """
-        ALTER TABLE token_usage
-            ADD COLUMN source     TEXT NOT NULL DEFAULT 'agent_stream',
-            ADD COLUMN request_id TEXT;
-        """
-    )
-
-    # ------------------------------------------------------------------
-    # Indexes — partial where the predicate selects only non-terminal rows
-    # ------------------------------------------------------------------
-    op.execute(
-        "CREATE INDEX conversation_messages_pending_ts_idx "
-        "ON conversation_messages (timestamp) "
-        "WHERE status IN ('pending', 'streaming');"
-    )
-    op.execute(
-        "CREATE INDEX tool_call_attempts_pending_ts_idx "
-        "ON tool_call_attempts (attempted_at) "
-        "WHERE status IN ('proposed', 'executed');"
-    )
-    op.execute(
-        "CREATE INDEX tool_call_attempts_message_idx "
-        "ON tool_call_attempts (message_id) "
-        "WHERE message_id IS NOT NULL;"
-    )
-    op.execute(
-        "CREATE INDEX pending_tool_state_resuming_ts_idx "
-        "ON pending_tool_state (resumed_at) "
-        "WHERE status = 'resuming';"
-    )
-    op.execute(
-        "CREATE INDEX webhook_dedup_agent_idx "
-        "ON webhook_dedup (agent_id);"
-    )
-    op.execute(
-        "CREATE INDEX task_dedup_pending_attempts_idx "
-        "ON task_dedup (attempt_count) WHERE status = 'pending';"
-    )
-    # Cost-attribution dashboards filter ``token_usage`` by
-    # ``(timestamp, source)``; index the same shape so they stay cheap.
-    op.execute(
-        "CREATE INDEX token_usage_source_ts_idx "
-        "ON token_usage (source, timestamp);"
-    )
-    # Partial index — only rows with a stamped request_id participate
-    # in the DISTINCT count. NULL rows fall through to the COUNT(*)
-    # branch in the repository query.
-    op.execute(
-        "CREATE INDEX token_usage_request_id_idx "
-        "ON token_usage (request_id) "
-        "WHERE request_id IS NOT NULL;"
-    )
-
-    op.execute(
-        "CREATE TRIGGER tool_call_attempts_set_updated_at "
-        "BEFORE UPDATE ON tool_call_attempts "
-        "FOR EACH ROW WHEN (OLD.* IS DISTINCT FROM NEW.*) "
-        "EXECUTE FUNCTION set_updated_at();"
-    )
-
-
-def downgrade() -> None:
-    # CASCADE so the downgrade stays safe if later migrations FK into these.
-    for table in (
-        "ingest_chunk_progress",
-        "tool_call_attempts",
-        "webhook_dedup",
-        "task_dedup",
-    ):
-        op.execute(f"DROP TABLE IF EXISTS {table} CASCADE;")
-
-    op.execute(
-        "ALTER TABLE conversation_messages "
-        "DROP COLUMN IF EXISTS request_id, "
-        "DROP COLUMN IF EXISTS status;"
-    )
-    op.execute(
-        "ALTER TABLE pending_tool_state "
-        "DROP COLUMN IF EXISTS resumed_at, "
-        "DROP COLUMN IF EXISTS status;"
-    )
-    op.execute("DROP INDEX IF EXISTS token_usage_request_id_idx;")
-    op.execute("DROP INDEX IF EXISTS token_usage_source_ts_idx;")
-    op.execute(
-        "ALTER TABLE token_usage "
-        "DROP COLUMN IF EXISTS request_id, "
-        "DROP COLUMN IF EXISTS source;"
-    )
--- a/application/alembic/versions/0005_ingest_attempt_id.py
+++ b/application/alembic/versions/0005_ingest_attempt_id.py
@@ -1,44 +0,0 @@
-"""0005 ingest_chunk_progress.attempt_id — per-attempt resume scoping.
-
-Without this column, a completed checkpoint row poisoned every later
-embed call on the same ``source_id``: a sync after an upload finished
-read the upload's terminal ``last_index`` and either embedded zero
-chunks (if new ``total_docs <= last_index + 1``) or stacked new chunks
-on top of the old vectors (if ``total_docs > last_index + 1``).
-
-``attempt_id`` is stamped from ``self.request.id`` (Celery's stable
-task id, which survives ``acks_late`` retries of the same task but
-differs across separate task invocations). The repository's
-``init_progress`` upsert resets ``last_index`` / ``embedded_chunks``
-when the incoming ``attempt_id`` differs from the stored one — so a
-fresh sync starts from chunk 0 while a retry of the same task resumes
-from the last checkpointed chunk.
-
-Revision ID: 0005_ingest_attempt_id
-Revises: 0004_durability_foundation
-"""
-
-from typing import Sequence, Union
-
-from alembic import op
-
-
-revision: str = "0005_ingest_attempt_id"
-down_revision: Union[str, None] = "0004_durability_foundation"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
-
-
-def upgrade() -> None:
-    op.execute(
-        """
-        ALTER TABLE ingest_chunk_progress
-            ADD COLUMN attempt_id TEXT;
-        """
-    )
-
-
-def downgrade() -> None:
-    op.execute(
-        "ALTER TABLE ingest_chunk_progress DROP COLUMN IF EXISTS attempt_id;"
-    )
--- a/application/alembic/versions/0006_idempotency_lease.py
+++ b/application/alembic/versions/0006_idempotency_lease.py
@@ -1,57 +0,0 @@
-"""0006 task_dedup lease columns — running-lease for in-flight tasks.
-
-Without these, ``with_idempotency`` only short-circuits *completed*
-rows. A late-ack redelivery (Redis ``visibility_timeout`` exceeded by a
-long ingest, or a hung-but-alive worker) hands the same message to a
-second worker; ``_claim_or_bump`` only bumped the attempt counter and
-both workers ran the task body in parallel — duplicate vector writes,
-duplicate token spend, duplicate webhook side effects.
-
-``lease_owner_id`` + ``lease_expires_at`` turn that into an atomic
-compare-and-swap. The wrapper claims a lease at entry, refreshes it via
-a 30 s heartbeat thread, and finalises (which makes the lease moot via
-``status='completed'``). A second worker hitting the same key sees a
-fresh lease and ``self.retry(countdown=LEASE_TTL)``s instead of running.
-A crashed worker's lease expires after ``LEASE_TTL`` seconds and the
-next retry can claim it.
-
-Revision ID: 0006_idempotency_lease
-Revises: 0005_ingest_attempt_id
-"""
-
-from typing import Sequence, Union
-
-from alembic import op
-
-
-revision: str = "0006_idempotency_lease"
-down_revision: Union[str, None] = "0005_ingest_attempt_id"
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
-
-
-def upgrade() -> None:
-    op.execute(
-        """
-        ALTER TABLE task_dedup
-            ADD COLUMN lease_owner_id   TEXT,
-            ADD COLUMN lease_expires_at TIMESTAMPTZ;
-        """
-    )
-    # Reconciler's stuck-pending sweep filters by
-    # ``(status='pending', lease_expires_at < now() - 60s, attempt_count >= 5)``.
-    # Partial index keeps the scan small even under heavy task throughput.
-    op.execute(
-        "CREATE INDEX task_dedup_pending_lease_idx "
-        "ON task_dedup (lease_expires_at) "
-        "WHERE status = 'pending';"
-    )
-
-
-def downgrade() -> None:
-    op.execute("DROP INDEX IF EXISTS task_dedup_pending_lease_idx;")
-    op.execute(
-        "ALTER TABLE task_dedup "
-        "DROP COLUMN IF EXISTS lease_expires_at, "
-        "DROP COLUMN IF EXISTS lease_owner_id;"
-    )
--- a/application/api/answer/routes/answer.py
+++ b/application/api/answer/routes/answer.py
@@ -102,8 +102,6 @@ class AnswerResource(Resource, BaseAnswerResource):
                        "tools_dict": tools_dict,
                        "pending_tool_calls": pending_tool_calls,
                        "tool_actions": tool_actions,
-                        "reserved_message_id": processor.reserved_message_id,
-                        "request_id": processor.request_id,
                    },
                )
            else:
--- a/application/api/answer/routes/base.py
+++ b/application/api/answer/routes/base.py
@@ -1,18 +1,13 @@
 import datetime
 import json
 import logging
-import time
-import uuid
 from typing import Any, Dict, Generator, List, Optional

 from flask import jsonify, make_response, Response
 from flask_restx import Namespace

 from application.api.answer.services.continuation_service import ContinuationService
-from application.api.answer.services.conversation_service import (
-    ConversationService,
-    TERMINATED_RESPONSE_PLACEHOLDER,
-)
+from application.api.answer.services.conversation_service import ConversationService
 from application.core.model_utils import (
    get_api_key_for_provider,
    get_default_model_id,
@@ -208,118 +203,13 @@ class BaseAnswerResource:
        Yields:
            Server-sent event strings
        """
-        response_full, thought, source_log_docs, tool_calls = "", "", [], []
-        is_structured = False
-        schema_info = None
-        structured_chunks = []
-        query_metadata: Dict[str, Any] = {}
-        paused = False
-
-        # One id shared across the WAL row, primary LLM (token_usage
-        # attribution), the SSE event, and resumed continuations.
-        request_id = (
-            _continuation.get("request_id") if _continuation else None
-        ) or str(uuid.uuid4())
-
-        # Reserve the placeholder row before the LLM call so a crash
-        # mid-stream still leaves the question queryable. Continuations
-        # reuse the original placeholder.
-        reserved_message_id: Optional[str] = None
-        wal_eligible = should_save_conversation and not _continuation
-        if wal_eligible:
-            try:
-                reservation = self.conversation_service.save_user_question(
-                    conversation_id=conversation_id,
-                    question=question,
-                    decoded_token=decoded_token,
-                    attachment_ids=attachment_ids,
-                    api_key=user_api_key,
-                    agent_id=agent_id,
-                    is_shared_usage=is_shared_usage,
-                    shared_token=shared_token,
-                    model_id=model_id or self.default_model_id,
-                    request_id=request_id,
-                    index=index,
-                )
-                conversation_id = reservation["conversation_id"]
-                reserved_message_id = reservation["message_id"]
-            except Exception as e:
-                logger.error(
-                    f"Failed to reserve message row before stream: {e}",
-                    exc_info=True,
-                )
-        elif _continuation and _continuation.get("reserved_message_id"):
-            reserved_message_id = _continuation["reserved_message_id"]
-
-        primary_llm = getattr(agent, "llm", None)
-        if primary_llm is not None:
-            primary_llm._request_id = request_id
-
-        # Flipped to ``streaming`` on first chunk; reconciler uses this
-        # to tell "never started" from "in flight".
-        streaming_marked = False
-        # Heartbeat goes into ``metadata.last_heartbeat_at`` (not
-        # ``updated_at``, which reconciler-side writes share) and uses
-        # ``time.monotonic`` so a blocked event loop can't fake fresh.
-        STREAM_HEARTBEAT_INTERVAL = 60
-        last_heartbeat_at = time.monotonic()
-
-        def _mark_streaming_once() -> None:
-            nonlocal streaming_marked, last_heartbeat_at
-            if streaming_marked or not reserved_message_id:
-                return
-            try:
-                self.conversation_service.update_message_status(
-                    reserved_message_id, "streaming",
-                )
-            except Exception:
-                logger.exception(
-                    "update_message_status streaming failed for %s",
-                    reserved_message_id,
-                )
-            streaming_marked = True
-            last_heartbeat_at = time.monotonic()
-
-        def _heartbeat_streaming() -> None:
-            nonlocal last_heartbeat_at
-            if not reserved_message_id or not streaming_marked:
-                return
-            now_mono = time.monotonic()
-            if now_mono - last_heartbeat_at < STREAM_HEARTBEAT_INTERVAL:
-                return
-            try:
-                self.conversation_service.heartbeat_message(
-                    reserved_message_id,
-                )
-            except Exception:
-                logger.exception(
-                    "stream heartbeat update failed for %s",
-                    reserved_message_id,
-                )
-            last_heartbeat_at = now_mono
-
-        # Correlates tool_call_attempts rows with this message.
-        if reserved_message_id and getattr(agent, "tool_executor", None):
-            try:
-                agent.tool_executor.message_id = reserved_message_id
-            except Exception:
-                pass
-
        try:
-            # Surface the placeholder id before any LLM tokens so a
-            # mid-handshake disconnect still has a row to tail-poll.
-            if reserved_message_id:
-                early_event = json.dumps(
-                    {
-                        "type": "message_id",
-                        "message_id": reserved_message_id,
-                        "conversation_id": (
-                            str(conversation_id) if conversation_id else None
-                        ),
-                        "request_id": request_id,
-                    }
-                )
-                yield f"data: {early_event}\n\n"
+            response_full, thought, source_log_docs, tool_calls = "", "", [], []
+            is_structured = False
+            schema_info = None
+            structured_chunks = []
+            query_metadata = {}
+            paused = False

            if _continuation:
                gen_iter = agent.gen_continuation(
@@ -332,13 +222,9 @@ class BaseAnswerResource:
                gen_iter = agent.gen(query=question)

            for line in gen_iter:
-                # Cheap closure check that only hits the DB when the
-                # heartbeat interval has elapsed.
-                _heartbeat_streaming()
                if "metadata" in line:
                    query_metadata.update(line["metadata"])
                elif "answer" in line:
-                    _mark_streaming_once()
                    response_full += str(line["answer"])
                    if line.get("structured"):
                        is_structured = True
@@ -348,7 +234,6 @@ class BaseAnswerResource:
                        data = json.dumps({"type": "answer", "answer": line["answer"]})
                        yield f"data: {data}\n\n"
                elif "sources" in line:
-                    _mark_streaming_once()
                    truncated_sources = []
                    source_log_docs = line["sources"]
                    for source in line["sources"]:
@@ -401,9 +286,12 @@ class BaseAnswerResource:
            if paused:
                continuation = getattr(agent, "_pending_continuation", None)
                if continuation:
-                    # First-turn pause needs a conversation row to attach to.
+                    # Ensure we have a conversation_id — create a partial
+                    # conversation if this is the first turn.
                    if not conversation_id and should_save_conversation:
                        try:
+                            # Use model-owner scope so shared-agent
+                            # owner-BYOM resolves to its registered plugin.
                            provider = (
                                get_provider_from_model_id(
                                    model_id,
@@ -464,8 +352,8 @@ class BaseAnswerResource:
                                tool_schemas=getattr(agent, "tools", []),
                                agent_config={
                                    "model_id": model_id or self.default_model_id,
-                                    # BYOM scope; without it resume falls
-                                    # back to caller's layer.
+                                    # Persist BYOM scope so resume doesn't
+                                    # fall back to caller's layer.
                                    "model_user_id": model_user_id,
                                    "llm_name": getattr(agent, "llm_name", settings.LLM_PROVIDER),
                                    "api_key": getattr(agent, "api_key", None),
@@ -475,11 +363,6 @@ class BaseAnswerResource:
                                    "prompt": getattr(agent, "prompt", ""),
                                    "json_schema": getattr(agent, "json_schema", None),
                                    "retriever_config": getattr(agent, "retriever_config", None),
-                                    # Reused on resume so the same WAL row
-                                    # is finalised and request_id stays
-                                    # consistent across token_usage rows.
-                                    "reserved_message_id": reserved_message_id,
-                                    "request_id": request_id,
                                },
                                client_tools=getattr(
                                    agent.tool_executor, "client_tools", None
@@ -502,7 +385,8 @@ class BaseAnswerResource:
            if isNoneDoc:
                for doc in source_log_docs:
                    doc["source"] = "None"
-            # Model-owner scope so title-gen uses owner's BYOM key.
+            # Run under model-owner scope so title-gen LLM inside
+            # save_conversation uses the owner's BYOM provider/key.
            provider = (
                get_provider_from_model_id(
                    model_id,
@@ -523,49 +407,26 @@ class BaseAnswerResource:
                agent_id=agent_id,
                model_user_id=model_user_id,
            )
-            # Title-gen only; agent stream tokens live on ``agent.llm``.
-            llm._token_usage_source = "title"

            if should_save_conversation:
-                if reserved_message_id is not None:
-                    self.conversation_service.finalize_message(
-                        reserved_message_id,
-                        response_full,
-                        thought=thought,
-                        sources=source_log_docs,
-                        tool_calls=tool_calls,
-                        model_id=model_id or self.default_model_id,
-                        metadata=query_metadata if query_metadata else None,
-                        status="complete",
-                        title_inputs={
-                            "llm": llm,
-                            "question": question,
-                            "response": response_full,
-                            "model_id": model_id or self.default_model_id,
-                            "fallback_name": (
-                                question[:50] if question else "New Conversation"
-                            ),
-                        },
-                    )
-                else:
-                    conversation_id = self.conversation_service.save_conversation(
-                        conversation_id,
-                        question,
-                        response_full,
-                        thought,
-                        source_log_docs,
-                        tool_calls,
-                        llm,
-                        model_id or self.default_model_id,
-                        decoded_token,
-                        index=index,
-                        api_key=user_api_key,
-                        agent_id=agent_id,
-                        is_shared_usage=is_shared_usage,
-                        shared_token=shared_token,
-                        attachment_ids=attachment_ids,
-                        metadata=query_metadata if query_metadata else None,
-                    )
+                conversation_id = self.conversation_service.save_conversation(
+                    conversation_id,
+                    question,
+                    response_full,
+                    thought,
+                    source_log_docs,
+                    tool_calls,
+                    llm,
+                    model_id or self.default_model_id,
+                    decoded_token,
+                    index=index,
+                    api_key=user_api_key,
+                    agent_id=agent_id,
+                    is_shared_usage=is_shared_usage,
+                    shared_token=shared_token,
+                    attachment_ids=attachment_ids,
+                    metadata=query_metadata if query_metadata else None,
+                )
                # Persist compression metadata/summary if it exists and wasn't saved mid-execution
                compression_meta = getattr(agent, "compression_metadata", None)
                compression_saved = getattr(agent, "compression_saved", False)
@@ -588,21 +449,6 @@ class BaseAnswerResource:
                        )
            else:
                conversation_id = None
-            # Resume finished cleanly; drop the continuation row.
-            # Crash-paths leave it ``resuming`` for the janitor to revert.
-            if _continuation and conversation_id:
-                try:
-                    cont_service = ContinuationService()
-                    cont_service.delete_state(
-                        str(conversation_id),
-                        decoded_token.get("sub", "local"),
-                    )
-                except Exception as e:
-                    logger.error(
-                        f"Failed to delete continuation state on resume "
-                        f"completion: {e}",
-                        exc_info=True,
-                    )
            id_data = {"type": "id", "id": str(conversation_id)}
            data = json.dumps(id_data)
            yield f"data: {data}\n\n"
@@ -657,8 +503,10 @@ class BaseAnswerResource:
                    if isNoneDoc:
                        for doc in source_log_docs:
                            doc["source"] = "None"
-                    # Resolve under model-owner scope so shared-agent
-                    # title-gen uses owner BYOM, not deployment default.
+                    # Mirror the normal-path provider resolution so the
+                    # partial-save title LLM uses the model-owner's BYOM
+                    # registration (shared-agent dispatch) rather than
+                    # the deployment default with the instance api key.
                    provider = (
                        get_provider_from_model_id(
                            model_id,
@@ -684,46 +532,24 @@ class BaseAnswerResource:
                        agent_id=agent_id,
                        model_user_id=model_user_id,
                    )
-                    llm._token_usage_source = "title"
-                    if reserved_message_id is not None:
-                        self.conversation_service.finalize_message(
-                            reserved_message_id,
-                            response_full,
-                            thought=thought,
-                            sources=source_log_docs,
-                            tool_calls=tool_calls,
-                            model_id=model_id or self.default_model_id,
-                            metadata=query_metadata if query_metadata else None,
-                            status="complete",
-                            title_inputs={
-                                "llm": llm,
-                                "question": question,
-                                "response": response_full,
-                                "model_id": model_id or self.default_model_id,
-                                "fallback_name": (
-                                    question[:50] if question else "New Conversation"
-                                ),
-                            },
-                        )
-                    else:
-                        self.conversation_service.save_conversation(
-                            conversation_id,
-                            question,
-                            response_full,
-                            thought,
-                            source_log_docs,
-                            tool_calls,
-                            llm,
-                            model_id or self.default_model_id,
-                            decoded_token,
-                            index=index,
-                            api_key=user_api_key,
-                            agent_id=agent_id,
-                            is_shared_usage=is_shared_usage,
-                            shared_token=shared_token,
-                            attachment_ids=attachment_ids,
-                            metadata=query_metadata if query_metadata else None,
-                        )
+                    self.conversation_service.save_conversation(
+                        conversation_id,
+                        question,
+                        response_full,
+                        thought,
+                        source_log_docs,
+                        tool_calls,
+                        llm,
+                        model_id or self.default_model_id,
+                        decoded_token,
+                        index=index,
+                        api_key=user_api_key,
+                        agent_id=agent_id,
+                        is_shared_usage=is_shared_usage,
+                        shared_token=shared_token,
+                        attachment_ids=attachment_ids,
+                        metadata=query_metadata if query_metadata else None,
+                    )
                    compression_meta = getattr(agent, "compression_metadata", None)
                    compression_saved = getattr(agent, "compression_saved", False)
                    if conversation_id and compression_meta and not compression_saved:
@@ -750,24 +576,6 @@ class BaseAnswerResource:
            raise
        except Exception as e:
            logger.error(f"Error in stream: {str(e)}", exc_info=True)
-            if reserved_message_id is not None:
-                try:
-                    self.conversation_service.finalize_message(
-                        reserved_message_id,
-                        response_full or TERMINATED_RESPONSE_PLACEHOLDER,
-                        thought=thought,
-                        sources=source_log_docs,
-                        tool_calls=tool_calls,
-                        model_id=model_id or self.default_model_id,
-                        metadata=query_metadata if query_metadata else None,
-                        status="failed",
-                        error=e,
-                    )
-                except Exception as fin_err:
-                    logger.error(
-                        f"Failed to finalize errored message: {fin_err}",
-                        exc_info=True,
-                    )
            data = json.dumps(
                {
                    "type": "error",
--- a/application/api/answer/routes/stream.py
+++ b/application/api/answer/routes/stream.py
@@ -115,8 +115,6 @@ class StreamResource(Resource, BaseAnswerResource):
                            "tools_dict": tools_dict,
                            "pending_tool_calls": pending_tool_calls,
                            "tool_actions": tool_actions,
-                            "reserved_message_id": processor.reserved_message_id,
-                            "request_id": processor.request_id,
                        },
                    ),
                    mimetype="text/event-stream",
--- a/application/api/answer/services/compression/orchestrator.py
+++ b/application/api/answer/services/compression/orchestrator.py
@@ -160,9 +160,6 @@ class CompressionOrchestrator:
                agent_id=conversation.get("agent_id"),
                model_user_id=registry_user_id,
            )
-            # Side-channel LLM tag — distinguishes compression rows
-            # from primary stream rows for cost-attribution dashboards.
-            compression_llm._token_usage_source = "compression"

            # Create compression service with DB update capability
            compression_service = CompressionService(
--- a/application/api/answer/services/compression/token_counter.py
+++ b/application/api/answer/services/compression/token_counter.py
@@ -12,12 +12,6 @@ logger = logging.getLogger(__name__)
 class TokenCounter:
    """Centralized token counting for conversations and messages."""

-    # Per-image token estimate. Provider tokenizers vary widely
-    # (Gemini ~258, GPT-4o 85-1500, Claude ~1500) and the actual cost
-    # depends on resolution/detail we can't see here. Errs slightly high
-    # so the threshold check stays conservative.
-    _IMAGE_PART_TOKEN_ESTIMATE = 1500
-
    @staticmethod
    def count_message_tokens(messages: List[Dict]) -> int:
        """
@@ -35,36 +29,12 @@ class TokenCounter:
            if isinstance(content, str):
                total_tokens += num_tokens_from_string(content)
            elif isinstance(content, list):
-                # Handle structured content (tool calls, image parts, etc.)
+                # Handle structured content (tool calls, etc.)
                for item in content:
                    if isinstance(item, dict):
-                        total_tokens += TokenCounter._count_content_part(item)
+                        total_tokens += num_tokens_from_string(str(item))
        return total_tokens

-    @staticmethod
-    def _count_content_part(item: Dict) -> int:
-        # Image/file attachments are billed by the provider per image,
-        # not proportional to the inline bytes/base64 string.
-        # ``str(item)`` on a 1MB image inflates the count by ~10000x,
-        # which trips spurious compression and overflows downstream
-        # input limits.
-        item_type = item.get("type")
-
-        if "files" in item:
-            files = item.get("files")
-            count = len(files) if isinstance(files, list) and files else 1
-            return TokenCounter._IMAGE_PART_TOKEN_ESTIMATE * count
-
-        if "image_url" in item or item_type in {
-            "image",
-            "image_url",
-            "input_image",
-            "file",
-        }:
-            return TokenCounter._IMAGE_PART_TOKEN_ESTIMATE
-
-        return num_tokens_from_string(str(item))
-
    @staticmethod
    def count_query_tokens(
        queries: List[Dict[str, Any]], include_tool_calls: bool = True
--- a/application/api/answer/services/continuation_service.py
+++ b/application/api/answer/services/continuation_service.py
@@ -7,13 +7,13 @@ resume later by sending tool_actions.

 import logging
 from typing import Any, Dict, List, Optional
+from uuid import UUID

 from application.storage.db.base_repository import looks_like_uuid
 from application.storage.db.repositories.conversations import ConversationsRepository
 from application.storage.db.repositories.pending_tool_state import (
    PendingToolStateRepository,
 )
-from application.storage.db.serialization import coerce_pg_native as _make_serializable
 from application.storage.db.session import db_readonly, db_session

 logger = logging.getLogger(__name__)
@@ -21,9 +21,23 @@ logger = logging.getLogger(__name__)
 # TTL for pending states — auto-cleaned after this period
 PENDING_STATE_TTL_SECONDS = 30 * 60  # 30 minutes

-# Re-export so the existing tests at tests/api/answer/services/test_continuation_service_pg.py
-# can keep importing ``_make_serializable`` from here.
-__all__ = ["_make_serializable", "ContinuationService", "PENDING_STATE_TTL_SECONDS"]
+
+def _make_serializable(obj: Any) -> Any:
+    """Recursively coerce non-JSON values into JSON-safe forms.
+
+    Handles ``uuid.UUID`` (from PG columns), ``bytes``, and recurses into
+    dicts/lists. Post-Mongo-cutover the ObjectId branch is gone — none of
+    our writers produce them anymore.
+    """
+    if isinstance(obj, UUID):
+        return str(obj)
+    if isinstance(obj, dict):
+        return {str(k): _make_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_make_serializable(v) for v in obj]
+    if isinstance(obj, bytes):
+        return obj.decode("utf-8", errors="replace")
+    return obj


 class ContinuationService:
@@ -141,23 +155,3 @@ class ContinuationService:
                f"Deleted continuation state for conversation {conversation_id}"
            )
        return deleted
-
-    def mark_resuming(self, conversation_id: str, user: str) -> bool:
-        """Flip the pending row to ``resuming`` so a crashed resume can be retried."""
-        with db_session() as conn:
-            conv = ConversationsRepository(conn).get_by_legacy_id(conversation_id)
-            if conv is not None:
-                pg_conv_id = conv["id"]
-            elif looks_like_uuid(conversation_id):
-                pg_conv_id = conversation_id
-            else:
-                return False
-            flipped = PendingToolStateRepository(conn).mark_resuming(
-                pg_conv_id, user
-            )
-        if flipped:
-            logger.info(
-                f"Marked continuation state as resuming for conversation "
-                f"{conversation_id}"
-            )
-        return flipped
--- a/application/api/answer/services/conversation_service.py
+++ b/application/api/answer/services/conversation_service.py
@@ -6,7 +6,6 @@ than held for the duration of a stream.
 """

 import logging
-import uuid
 from datetime import datetime, timezone
 from typing import Any, Dict, List, Optional

@@ -22,12 +21,6 @@ from application.storage.db.session import db_readonly, db_session
 logger = logging.getLogger(__name__)


-# Shown to the user if the worker dies mid-stream and the response is never finalised.
-TERMINATED_RESPONSE_PLACEHOLDER = (
-    "Response was terminated prior to completion, try regenerating."
-)
-
-
 class ConversationService:
    def get_conversation(
        self, conversation_id: str, user_id: str
@@ -186,236 +179,6 @@ class ConversationService:
                repo.append_message(conv_pg_id, append_payload)
            return conv_pg_id

-    def save_user_question(
-        self,
-        conversation_id: Optional[str],
-        question: str,
-        decoded_token: Dict[str, Any],
-        *,
-        attachment_ids: Optional[List[str]] = None,
-        api_key: Optional[str] = None,
-        agent_id: Optional[str] = None,
-        is_shared_usage: bool = False,
-        shared_token: Optional[str] = None,
-        model_id: Optional[str] = None,
-        request_id: Optional[str] = None,
-        status: str = "pending",
-        index: Optional[int] = None,
-    ) -> Dict[str, str]:
-        """Reserve the placeholder message row before the LLM call.
-
-        ``index`` triggers regenerate semantics: messages at
-        ``position >= index`` are truncated so the new placeholder
-        lands at ``position = index`` rather than appending.
-
-        Returns ``{"conversation_id", "message_id", "request_id"}``.
-        """
-        if decoded_token is None:
-            raise ValueError("Invalid or missing authentication token")
-        user_id = decoded_token.get("sub")
-        if not user_id:
-            raise ValueError("User ID not found in token")
-
-        request_id = request_id or str(uuid.uuid4())
-
-        resolved_api_key: Optional[str] = None
-        resolved_agent_id: Optional[str] = None
-        if api_key and not conversation_id:
-            with db_readonly() as conn:
-                agent = AgentsRepository(conn).find_by_key(api_key)
-            if agent:
-                resolved_api_key = agent.get("key")
-            if agent_id:
-                resolved_agent_id = agent_id
-
-        with db_session() as conn:
-            repo = ConversationsRepository(conn)
-            if conversation_id:
-                conv = repo.get_any(conversation_id, user_id)
-                if conv is None:
-                    raise ValueError("Conversation not found or unauthorized")
-                conv_pg_id = str(conv["id"])
-                # Regenerate / edit-prior-question: drop the message at
-                # ``index`` and everything after it so the new
-                # ``reserve_message`` lands at ``position=index`` rather
-                # than appending at the end of the conversation.
-                if isinstance(index, int) and index >= 0:
-                    repo.truncate_after(conv_pg_id, keep_up_to=index - 1)
-            else:
-                fallback_name = (question[:50] if question else "New Conversation")
-                conv = repo.create(
-                    user_id,
-                    fallback_name,
-                    agent_id=resolved_agent_id,
-                    api_key=resolved_api_key,
-                    is_shared_usage=bool(resolved_agent_id and is_shared_usage),
-                    shared_token=(
-                        shared_token
-                        if (resolved_agent_id and is_shared_usage)
-                        else None
-                    ),
-                )
-                conv_pg_id = str(conv["id"])
-
-            row = repo.reserve_message(
-                conv_pg_id,
-                prompt=question,
-                placeholder_response=TERMINATED_RESPONSE_PLACEHOLDER,
-                request_id=request_id,
-                status=status,
-                attachments=attachment_ids,
-                model_id=model_id,
-            )
-            message_id = str(row["id"])
-
-        return {
-            "conversation_id": conv_pg_id,
-            "message_id": message_id,
-            "request_id": request_id,
-        }
-
-    def update_message_status(self, message_id: str, status: str) -> bool:
-        """Cheap status-only transition (e.g. ``pending → streaming``)."""
-        if not message_id:
-            return False
-        with db_session() as conn:
-            return ConversationsRepository(conn).update_message_status(
-                message_id, status,
-            )
-
-    def heartbeat_message(self, message_id: str) -> bool:
-        """Bump ``message_metadata.last_heartbeat_at`` so the reconciler's
-        staleness sweep counts the row as alive. No-ops on terminal rows.
-        """
-        if not message_id:
-            return False
-        with db_session() as conn:
-            return ConversationsRepository(conn).heartbeat_message(message_id)
-
-    def finalize_message(
-        self,
-        message_id: str,
-        response: str,
-        *,
-        thought: str = "",
-        sources: Optional[List[Dict[str, Any]]] = None,
-        tool_calls: Optional[List[Dict[str, Any]]] = None,
-        model_id: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
-        status: str = "complete",
-        error: Optional[BaseException] = None,
-        title_inputs: Optional[Dict[str, Any]] = None,
-    ) -> bool:
-        """Commit the response and tool_call confirms in one transaction."""
-        if not message_id:
-            return False
-        sources = sources or []
-        for source in sources:
-            if "text" in source and isinstance(source["text"], str):
-                source["text"] = source["text"][:1000]
-
-        merged_metadata: Dict[str, Any] = dict(metadata or {})
-        if status == "failed" and error is not None:
-            merged_metadata.setdefault(
-                "error", f"{type(error).__name__}: {str(error)}"
-            )
-
-        update_fields: Dict[str, Any] = {
-            "response": response,
-            "status": status,
-            "thought": thought,
-            "sources": sources,
-            "tool_calls": tool_calls or [],
-            "metadata": merged_metadata,
-        }
-        if model_id is not None:
-            update_fields["model_id"] = model_id
-
-        # Atomic message update + tool_call_attempts confirm; the
-        # ``only_if_non_terminal`` guard prevents a late stream from
-        # retracting a row the reconciler already escalated.
-        with db_session() as conn:
-            repo = ConversationsRepository(conn)
-            ok = repo.update_message_by_id(
-                message_id, update_fields,
-                only_if_non_terminal=True,
-            )
-            if not ok:
-                logger.warning(
-                    f"finalize_message: no row updated for message_id={message_id} "
-                    f"(possibly already terminal — reconciler may have escalated)"
-                )
-                return False
-            repo.confirm_executed_tool_calls(message_id)
-
-        # Outside the txn — title-gen is a multi-second LLM round trip.
-        if title_inputs and status == "complete":
-            try:
-                with db_session() as conn:
-                    self._maybe_generate_title(conn, message_id, title_inputs)
-            except Exception as e:
-                logger.error(
-                    f"finalize_message title generation failed: {e}",
-                    exc_info=True,
-                )
-        return True
-
-    def _maybe_generate_title(
-        self,
-        conn,
-        message_id: str,
-        title_inputs: Dict[str, Any],
-    ) -> None:
-        """Generate an LLM-summarised conversation name if one isn't set yet."""
-        llm = title_inputs.get("llm")
-        question = title_inputs.get("question") or ""
-        response = title_inputs.get("response") or ""
-        fallback_name = title_inputs.get("fallback_name") or question[:50]
-        if llm is None:
-            return
-
-        row = conn.execute(
-            sql_text(
-                "SELECT c.id, c.name FROM conversation_messages m "
-                "JOIN conversations c ON c.id = m.conversation_id "
-                "WHERE m.id = CAST(:mid AS uuid)"
-            ),
-            {"mid": message_id},
-        ).fetchone()
-        if row is None:
-            return
-        conv_id, current_name = str(row[0]), row[1]
-        if current_name and current_name != fallback_name:
-            return
-
-        messages_summary = [
-            {
-                "role": "system",
-                "content": "You are a helpful assistant that creates concise conversation titles. "
-                "Summarize conversations in 3 words or less using the same language as the user.",
-            },
-            {
-                "role": "user",
-                "content": "Summarise following conversation in no more than 3 words, "
-                "respond ONLY with the summary, use the same language as the "
-                "user query \n\nUser: " + question + "\n\n" + "AI: " + response,
-            },
-        ]
-        completion = llm.gen(
-            model=getattr(llm, "model_id", None) or title_inputs.get("model_id"),
-            messages=messages_summary,
-            max_tokens=500,
-        )
-        if not completion or not completion.strip():
-            completion = fallback_name or "New Conversation"
-        conn.execute(
-            sql_text(
-                "UPDATE conversations SET name = :name, updated_at = now() "
-                "WHERE id = CAST(:id AS uuid)"
-            ),
-            {"id": conv_id, "name": completion.strip()},
-        )
-
    def update_compression_metadata(
        self, conversation_id: str, compression_metadata: Dict[str, Any]
    ) -> None:
--- a/application/api/answer/services/stream_processor.py
+++ b/application/api/answer/services/stream_processor.py
@@ -123,10 +123,6 @@ class StreamProcessor:
        self.model_id: Optional[str] = None
        # BYOM-resolution scope, set by _validate_and_set_model.
        self.model_user_id: Optional[str] = None
-        # WAL placeholder id pulled from continuation state on resume.
-        self.reserved_message_id: Optional[str] = None
-        # Carried through resumes so multi-pause runs keep one request_id.
-        self.request_id: Optional[str] = None
        self.conversation_service = ConversationService()
        self.compression_orchestrator = CompressionOrchestrator(
            self.conversation_service
@@ -932,20 +928,6 @@ class StreamProcessor:
        if not state:
            raise ValueError("No pending tool state found for this conversation")

-        # Claim the resume up-front. ``mark_resuming`` only flips ``pending``
-        # → ``resuming``; if it returns False, another resume already
-        # claimed this row (status='resuming') — bail before any further
-        # LLM/tool work to avoid double-execution. The cleanup janitor
-        # reverts a stale ``resuming`` claim back to ``pending`` after the
-        # 10-minute grace window so the user can retry.
-        if not cont_service.mark_resuming(
-            conversation_id, self.initial_user_id,
-        ):
-            raise ValueError(
-                "Resume already in progress for this conversation; "
-                "retry after the grace window if it stalls."
-            )
-
        messages = state["messages"]
        pending_tool_calls = state["pending_tool_calls"]
        tools_dict = state["tools_dict"]
@@ -1040,10 +1022,9 @@ class StreamProcessor:
        self.agent_id = agent_id
        self.agent_config["user_api_key"] = user_api_key
        self.conversation_id = conversation_id
-        # Reused on resume so the same WAL row gets finalised and
-        # request_id stays consistent across token_usage rows.
-        self.reserved_message_id = agent_config.get("reserved_message_id")
-        self.request_id = agent_config.get("request_id")
+
+        # Delete state so it can't be replayed
+        cont_service.delete_state(conversation_id, self.initial_user_id)

        return agent, messages, tools_dict, pending_tool_calls, tool_actions

--- a/application/api/user/agents/routes.py
+++ b/application/api/user/agents/routes.py
@@ -46,9 +46,7 @@ AGENT_TYPE_SCHEMAS = {
            "prompt_id",
        ],
        "required_draft": ["name"],
-        # ``prompt_id`` intentionally omitted — the "default" sentinel
-        # is acceptable and maps to NULL downstream.
-        "validate_published": ["name", "description"],
+        "validate_published": ["name", "description", "prompt_id"],
        "validate_draft": [],
        "require_source": True,
        "fields": [
@@ -1011,16 +1009,12 @@ class UpdateAgent(Resource):
                                400,
                            )
                    else:
-                        # ``prompt_id`` is intentionally omitted: the
-                        # frontend's "default" choice maps to NULL here
-                        # (see the prompt_id branch above), and NULL
-                        # means "use the built-in default prompt" which
-                        # is a valid published-agent state.
                        missing_published_fields = []
                        for req_field, field_label in (
                            ("name", "Agent name"),
                            ("description", "Agent description"),
                            ("chunks", "Chunks count"),
+                            ("prompt_id", "Prompt"),
                            ("agent_type", "Agent type"),
                        ):
                            final_value = update_fields.get(
@@ -1034,23 +1028,8 @@ class UpdateAgent(Resource):
                        extra_final = update_fields.get(
                            "extra_source_ids", existing_agent.get("extra_source_ids") or [],
                        )
-                        # ``retriever`` carries the runtime identity for
-                        # agents that publish against the synthetic
-                        # "Default" source (frontend's auto-selected
-                        # ``{name: "Default", retriever: "classic"}``
-                        # entry has no ``id``, so ``source_id`` ends up
-                        # NULL even though the user picked something).
-                        # Without this fallback the most common new-agent
-                        # publish flow gets a 400.
-                        retriever_final = update_fields.get(
-                            "retriever", existing_agent.get("retriever"),
-                        )
-                        if (
-                            not source_final
-                            and not extra_final
-                            and not retriever_final
-                        ):
-                            missing_published_fields.append("Source or retriever")
+                        if not source_final and not extra_final:
+                            missing_published_fields.append("Source")
                        if missing_published_fields:
                            return make_response(
                                jsonify(
--- a/application/api/user/agents/webhooks.py
+++ b/application/api/user/agents/webhooks.py
@@ -1,19 +1,15 @@
 """Agent management webhook handlers."""

 import secrets
-import uuid

 from flask import current_app, jsonify, make_response, request
 from flask_restx import Namespace, Resource
-from sqlalchemy import text as sql_text

 from application.api import api
 from application.api.user.base import require_agent
 from application.api.user.tasks import process_agent_webhook
 from application.core.settings import settings
-from application.storage.db.base_repository import looks_like_uuid
 from application.storage.db.repositories.agents import AgentsRepository
-from application.storage.db.repositories.idempotency import IdempotencyRepository
 from application.storage.db.session import db_readonly, db_session


@@ -22,37 +18,6 @@ agents_webhooks_ns = Namespace(
 )


-_IDEMPOTENCY_KEY_MAX_LEN = 256
-
-
-def _read_idempotency_key():
-    """Return (key, error_response). Empty header → (None, None); oversized → (None, 400)."""
-    key = request.headers.get("Idempotency-Key")
-    if not key:
-        return None, None
-    if len(key) > _IDEMPOTENCY_KEY_MAX_LEN:
-        return None, make_response(
-            jsonify(
-                {
-                    "success": False,
-                    "message": (
-                        f"Idempotency-Key exceeds maximum length of "
-                        f"{_IDEMPOTENCY_KEY_MAX_LEN} characters"
-                    ),
-                }
-            ),
-            400,
-        )
-    return key, None
-
-
-def _scoped_idempotency_key(idempotency_key, scope):
-    """``{scope}:{key}`` so different agents can't collide on the same key."""
-    if not idempotency_key or not scope:
-        return None
-    return f"{scope}:{idempotency_key}"
-
-
@agents_webhooks_ns.route("/agent_webhook")
 class AgentWebhook(Resource):
    @api.doc(
@@ -103,7 +68,7 @@ class AgentWebhook(Resource):
 class AgentWebhookListener(Resource):
    method_decorators = [require_agent]

-    def _enqueue_webhook_task(self, agent_id_str, payload, source_method, agent=None):
+    def _enqueue_webhook_task(self, agent_id_str, payload, source_method):
        if not payload:
            current_app.logger.warning(
                f"Webhook ({source_method}) received for agent {agent_id_str} with empty payload."
@@ -112,94 +77,26 @@ class AgentWebhookListener(Resource):
            f"Incoming {source_method} webhook for agent {agent_id_str}. Enqueuing task with payload: {payload}"
        )

-        idempotency_key, key_error = _read_idempotency_key()
-        if key_error is not None:
-            return key_error
-        # Resolve to PG UUID first so dedup writes don't crash on legacy ids.
-        agent_uuid = None
-        if agent is not None:
-            candidate = str(agent.get("id") or "")
-            if looks_like_uuid(candidate):
-                agent_uuid = candidate
-        if idempotency_key and agent_uuid is None:
-            current_app.logger.warning(
-                "Skipping webhook idempotency dedup: agent %s has non-UUID id",
-                agent_id_str,
-            )
-            idempotency_key = None
-        # Agent-scoped (webhooks have no user_id).
-        scoped_key = _scoped_idempotency_key(idempotency_key, agent_uuid)
-        # Claim before enqueue; the loser returns the winner's task_id.
-        predetermined_task_id = None
-        if scoped_key:
-            predetermined_task_id = str(uuid.uuid4())
-            with db_session() as conn:
-                claimed = IdempotencyRepository(conn).record_webhook(
-                    key=scoped_key,
-                    agent_id=agent_uuid,
-                    task_id=predetermined_task_id,
-                    response_json={
-                        "success": True, "task_id": predetermined_task_id,
-                    },
-                )
-            if claimed is None:
-                with db_readonly() as conn:
-                    cached = IdempotencyRepository(conn).get_webhook(scoped_key)
-                if cached is not None:
-                    return make_response(jsonify(cached["response_json"]), 200)
-                return make_response(
-                    jsonify({"success": True, "task_id": "deduplicated"}), 200
-                )
-
        try:
-            apply_kwargs = dict(
-                kwargs={
-                    "agent_id": agent_id_str,
-                    "payload": payload,
-                    # Scoped so the worker dedup row matches the HTTP claim.
-                    "idempotency_key": scoped_key or idempotency_key,
-                },
+            task = process_agent_webhook.delay(
+                agent_id=agent_id_str,
+                payload=payload,
            )
-            if predetermined_task_id is not None:
-                apply_kwargs["task_id"] = predetermined_task_id
-            task = process_agent_webhook.apply_async(**apply_kwargs)
            current_app.logger.info(
                f"Task {task.id} enqueued for agent {agent_id_str} ({source_method})."
            )
-            response_payload = {"success": True, "task_id": task.id}
-            return make_response(jsonify(response_payload), 200)
+            return make_response(jsonify({"success": True, "task_id": task.id}), 200)
        except Exception as err:
            current_app.logger.error(
                f"Error enqueuing webhook task ({source_method}) for agent {agent_id_str}: {err}",
                exc_info=True,
            )
-            if scoped_key:
-                # Roll back the claim so a retry can succeed.
-                try:
-                    with db_session() as conn:
-                        conn.execute(
-                            sql_text(
-                                "DELETE FROM webhook_dedup "
-                                "WHERE idempotency_key = :k"
-                            ),
-                            {"k": scoped_key},
-                        )
-                except Exception:
-                    current_app.logger.exception(
-                        "Failed to release webhook_dedup claim for key=%s",
-                        scoped_key,
-                    )
            return make_response(
                jsonify({"success": False, "message": "Error processing webhook"}), 500
            )

    @api.doc(
-        description=(
-            "Webhook listener for agent events (POST). Expects JSON payload, which "
-            "is used to trigger processing. Honors an optional ``Idempotency-Key`` "
-            "header: a repeat request with the same key within 24h returns the "
-            "original cached response and does not re-enqueue the task."
-        ),
+        description="Webhook listener for agent events (POST). Expects JSON payload, which is used to trigger processing.",
    )
    def post(self, webhook_token, agent, agent_id_str):
        payload = request.get_json()
@@ -213,20 +110,11 @@ class AgentWebhookListener(Resource):
                ),
                400,
            )
-        return self._enqueue_webhook_task(
-            agent_id_str, payload, source_method="POST", agent=agent,
-        )
+        return self._enqueue_webhook_task(agent_id_str, payload, source_method="POST")

    @api.doc(
-        description=(
-            "Webhook listener for agent events (GET). Uses URL query parameters as "
-            "payload to trigger processing. Honors an optional ``Idempotency-Key`` "
-            "header: a repeat request with the same key within 24h returns the "
-            "original cached response and does not re-enqueue the task."
-        ),
+        description="Webhook listener for agent events (GET). Uses URL query parameters as payload to trigger processing.",
    )
    def get(self, webhook_token, agent, agent_id_str):
        payload = request.args.to_dict(flat=True)
-        return self._enqueue_webhook_task(
-            agent_id_str, payload, source_method="GET", agent=agent,
-        )
+        return self._enqueue_webhook_task(agent_id_str, payload, source_method="GET")
--- a/application/api/user/conversations/routes.py
+++ b/application/api/user/conversations/routes.py
@@ -4,10 +4,8 @@ import datetime

 from flask import current_app, jsonify, make_response, request
 from flask_restx import fields, Namespace, Resource
-from sqlalchemy import text as sql_text

 from application.api import api
-from application.storage.db.base_repository import looks_like_uuid, row_to_dict
 from application.storage.db.repositories.attachments import AttachmentsRepository
 from application.storage.db.repositories.conversations import ConversationsRepository
 from application.storage.db.session import db_readonly, db_session
@@ -135,7 +133,6 @@ class GetSingleConversation(Resource):
                attachments_repo = AttachmentsRepository(conn)
                queries = []
                for msg in messages:
-                    metadata = msg.get("metadata") or {}
                    query = {
                        "prompt": msg.get("prompt"),
                        "response": msg.get("response"),
@@ -144,15 +141,9 @@ class GetSingleConversation(Resource):
                        "tool_calls": msg.get("tool_calls") or [],
                        "timestamp": msg.get("timestamp"),
                        "model_id": msg.get("model_id"),
-                        # Lets the client distinguish placeholder rows from
-                        # finalised answers and tail-poll in-flight ones.
-                        "message_id": str(msg["id"]) if msg.get("id") else None,
-                        "status": msg.get("status"),
-                        "request_id": msg.get("request_id"),
-                        "last_heartbeat_at": metadata.get("last_heartbeat_at"),
                    }
-                    if metadata:
-                        query["metadata"] = metadata
+                    if msg.get("metadata"):
+                        query["metadata"] = msg["metadata"]
                    # Feedback on conversation_messages is a JSONB blob with
                    # shape {"text": <str>, "timestamp": <iso>}. The legacy
                    # frontend consumed a flat scalar feedback string, so
@@ -310,61 +301,3 @@ class SubmitFeedback(Resource):
            current_app.logger.error(f"Error submitting feedback: {err}", exc_info=True)
            return make_response(jsonify({"success": False}), 400)
        return make_response(jsonify({"success": True}), 200)
-
-
-@conversations_ns.route("/messages/<string:message_id>/tail")
-class GetMessageTail(Resource):
-    @api.doc(
-        description=(
-            "Current state of one conversation_messages row, scoped to the "
-            "authenticated user. Used to reconnect to an in-flight stream "
-            "after a refresh."
-        ),
-        params={"message_id": "Message UUID"},
-    )
-    def get(self, message_id):
-        decoded_token = request.decoded_token
-        if not decoded_token:
-            return make_response(jsonify({"success": False}), 401)
-        if not looks_like_uuid(message_id):
-            return make_response(
-                jsonify({"success": False, "message": "Invalid message id"}), 400
-            )
-        user_id = decoded_token.get("sub")
-        try:
-            with db_readonly() as conn:
-                # Owner-or-shared, matching ``ConversationsRepository.get``.
-                row = conn.execute(
-                    sql_text(
-                        "SELECT m.* FROM conversation_messages m "
-                        "JOIN conversations c ON c.id = m.conversation_id "
-                        "WHERE m.id = CAST(:mid AS uuid) "
-                        "AND (c.user_id = :uid OR :uid = ANY(c.shared_with))"
-                    ),
-                    {"mid": message_id, "uid": user_id},
-                ).fetchone()
-                if row is None:
-                    return make_response(jsonify({"status": "not found"}), 404)
-                msg = row_to_dict(row)
-        except Exception as err:
-            current_app.logger.error(
-                f"Error tailing message {message_id}: {err}", exc_info=True
-            )
-            return make_response(jsonify({"success": False}), 400)
-        metadata = msg.get("message_metadata") or {}
-        return make_response(
-            jsonify(
-                {
-                    "message_id": str(msg["id"]),
-                    "status": msg.get("status"),
-                    "response": msg.get("response"),
-                    "thought": msg.get("thought"),
-                    "sources": msg.get("sources") or [],
-                    "tool_calls": msg.get("tool_calls") or [],
-                    "request_id": msg.get("request_id"),
-                    "last_heartbeat_at": metadata.get("last_heartbeat_at"),
-                    "error": metadata.get("error"),
-                }
-            ),
-            200,
-        )
--- a/application/api/user/idempotency.py
+++ b/application/api/user/idempotency.py
@@ -1,237 +0,0 @@
-"""Per-Celery-task idempotency wrapper backed by ``task_dedup``."""
-
-from __future__ import annotations
-
-import functools
-import logging
-import threading
-import uuid
-from typing import Any, Callable, Optional
-
-from application.storage.db.repositories.idempotency import IdempotencyRepository
-from application.storage.db.session import db_readonly, db_session
-
-
-logger = logging.getLogger(__name__)
-
-
-# Poison-loop cap; transient-failure headroom without infinite retry.
-MAX_TASK_ATTEMPTS = 5
-
-# 30s heartbeat / 60s TTL → ~2 missed ticks of slack before reclaim.
-LEASE_TTL_SECONDS = 60
-LEASE_HEARTBEAT_INTERVAL = 30
-
-# 10 × 60s ≈ 5 min of deferral before giving up on a held lease.
-LEASE_RETRY_MAX = 10
-
-
-def with_idempotency(task_name: str) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
-    """Short-circuit on completed key; gate concurrent runs via a lease.
-
-    Entry short-circuits:
-      - completed row → return cached result
-      - live lease held → retry(countdown=LEASE_TTL_SECONDS)
-      - attempt_count > MAX_TASK_ATTEMPTS → poison-loop alert
-    Success writes ``completed``; exceptions leave ``pending`` for
-    autoretry until the poison-loop guard trips.
-    """
-
-    def decorator(fn: Callable[..., Any]) -> Callable[..., Any]:
-        @functools.wraps(fn)
-        def wrapper(self, *args: Any, idempotency_key: Any = None, **kwargs: Any) -> Any:
-            key = idempotency_key if isinstance(idempotency_key, str) and idempotency_key else None
-            if key is None:
-                return fn(self, *args, idempotency_key=idempotency_key, **kwargs)
-
-            cached = _lookup_completed(key)
-            if cached is not None:
-                logger.info(
-                    "idempotency hit for task=%s key=%s — returning cached result",
-                    task_name, key,
-                )
-                return cached
-
-            owner_id = str(uuid.uuid4())
-            attempt = _try_claim_lease(
-                key, task_name, _safe_task_id(self), owner_id,
-            )
-            if attempt is None:
-                # Live lease held by another worker. Re-queue and bail
-                # quickly — by the time the retry fires (LEASE_TTL
-                # seconds), Worker 1 has either finalised (we'll hit
-                # ``_lookup_completed`` and return cached) or its lease
-                # has expired and we can claim.
-                logger.info(
-                    "idempotency: live lease held; deferring task=%s key=%s",
-                    task_name, key,
-                )
-                raise self.retry(
-                    countdown=LEASE_TTL_SECONDS,
-                    max_retries=LEASE_RETRY_MAX,
-                )
-
-            if attempt > MAX_TASK_ATTEMPTS:
-                logger.error(
-                    "idempotency poison-loop guard: task=%s key=%s attempts=%s",
-                    task_name, key, attempt,
-                    extra={
-                        "alert": "idempotency_poison_loop",
-                        "task_name": task_name,
-                        "idempotency_key": key,
-                        "attempts": attempt,
-                    },
-                )
-                poisoned = {
-                    "success": False,
-                    "error": "idempotency poison-loop guard tripped",
-                    "attempts": attempt,
-                }
-                _finalize(key, poisoned, status="failed")
-                return poisoned
-
-            heartbeat_thread, heartbeat_stop = _start_lease_heartbeat(
-                key, owner_id,
-            )
-            try:
-                result = fn(self, *args, idempotency_key=idempotency_key, **kwargs)
-                _finalize(key, result, status="completed")
-                return result
-            except Exception:
-                # Drop the lease so the next retry doesn't wait LEASE_TTL.
-                _release_lease(key, owner_id)
-                raise
-            finally:
-                _stop_lease_heartbeat(heartbeat_thread, heartbeat_stop)
-
-        return wrapper
-
-    return decorator
-
-
-def _lookup_completed(key: str) -> Any:
-    """Return cached ``result_json`` if a completed row exists for ``key``, else None."""
-    with db_readonly() as conn:
-        row = IdempotencyRepository(conn).get_task(key)
-    if row is None:
-        return None
-    if row.get("status") != "completed":
-        return None
-    return row.get("result_json")
-
-
-def _try_claim_lease(
-    key: str, task_name: str, task_id: str, owner_id: str,
-) -> Optional[int]:
-    """Atomic CAS; returns ``attempt_count`` or ``None`` when held.
-
-    DB outage → treated as ``attempt=1`` so transient failures don't
-    block all task execution; reconciler repairs the lease columns.
-    """
-    try:
-        with db_session() as conn:
-            return IdempotencyRepository(conn).try_claim_lease(
-                key=key,
-                task_name=task_name,
-                task_id=task_id,
-                owner_id=owner_id,
-                ttl_seconds=LEASE_TTL_SECONDS,
-            )
-    except Exception:
-        logger.exception(
-            "idempotency lease-claim failed for key=%s task=%s", key, task_name,
-        )
-        return 1
-
-
-def _finalize(key: str, result_json: Any, *, status: str) -> None:
-    """Best-effort terminal write. Never let DB outage fail the task."""
-    try:
-        with db_session() as conn:
-            IdempotencyRepository(conn).finalize_task(
-                key=key, result_json=result_json, status=status,
-            )
-    except Exception:
-        logger.exception(
-            "idempotency finalize failed for key=%s status=%s", key, status,
-        )
-
-
-def _release_lease(key: str, owner_id: str) -> None:
-    """Best-effort lease release on the wrapper's exception path."""
-    try:
-        with db_session() as conn:
-            IdempotencyRepository(conn).release_lease(key, owner_id)
-    except Exception:
-        logger.exception("idempotency release-lease failed for key=%s", key)
-
-
-def _start_lease_heartbeat(
-    key: str, owner_id: str,
-) -> tuple[threading.Thread, threading.Event]:
-    """Spawn a daemon thread that bumps ``lease_expires_at`` every
-    :data:`LEASE_HEARTBEAT_INTERVAL` seconds until ``stop_event`` fires.
-
-    Mirrors ``application.worker._start_ingest_heartbeat`` so the two
-    durability heartbeats share shape and cadence.
-    """
-    stop_event = threading.Event()
-    thread = threading.Thread(
-        target=_lease_heartbeat_loop,
-        args=(key, owner_id, stop_event, LEASE_HEARTBEAT_INTERVAL),
-        daemon=True,
-        name=f"idempotency-lease-heartbeat:{key[:32]}",
-    )
-    thread.start()
-    return thread, stop_event
-
-
-def _stop_lease_heartbeat(
-    thread: threading.Thread, stop_event: threading.Event,
-) -> None:
-    """Signal the heartbeat thread to exit and join with a short timeout."""
-    stop_event.set()
-    thread.join(timeout=10)
-
-
-def _lease_heartbeat_loop(
-    key: str,
-    owner_id: str,
-    stop_event: threading.Event,
-    interval: int,
-) -> None:
-    """Refresh the lease until ``stop_event`` is set or ownership is lost.
-
-    A failed refresh (rowcount 0) means another worker stole the lease
-    after expiry — at that point the damage is already possible, so we
-    log and keep ticking. Don't escalate to thread death; the main task
-    body needs to keep running so its outcome is at least *recorded*.
-    """
-    while not stop_event.wait(interval):
-        try:
-            with db_session() as conn:
-                still_owned = IdempotencyRepository(conn).refresh_lease(
-                    key=key, owner_id=owner_id, ttl_seconds=LEASE_TTL_SECONDS,
-                )
-            if not still_owned:
-                logger.warning(
-                    "idempotency lease lost mid-task for key=%s "
-                    "(another worker may have taken over)",
-                    key,
-                )
-        except Exception:
-            logger.exception(
-                "idempotency lease-heartbeat tick failed for key=%s", key,
-            )
-
-
-def _safe_task_id(task_self: Any) -> str:
-    """Best-effort extraction of ``self.request.id`` from a Celery task."""
-    try:
-        request = getattr(task_self, "request", None)
-        task_id: Optional[str] = (
-            getattr(request, "id", None) if request is not None else None
-        )
-    except Exception:
-        task_id = None
-    return task_id or "unknown"
--- a/application/api/user/reconciliation.py
+++ b/application/api/user/reconciliation.py
@@ -1,196 +0,0 @@
-"""Reconciler tick: sweep stuck rows and escalate to terminal status + alert."""
-
-from __future__ import annotations
-
-import logging
-import uuid
-from typing import Any, Dict, Optional
-
-from sqlalchemy import Connection
-
-from application.api.user.idempotency import MAX_TASK_ATTEMPTS
-from application.core.settings import settings
-from application.storage.db.engine import get_engine
-from application.storage.db.repositories.reconciliation import (
-    ReconciliationRepository,
-)
-from application.storage.db.repositories.stack_logs import StackLogsRepository
-
-logger = logging.getLogger(__name__)
-
-
-MAX_MESSAGE_RECONCILE_ATTEMPTS = 3
-
-
-def run_reconciliation() -> Dict[str, Any]:
-    """Single tick of the reconciler. Five sweeps, FOR UPDATE SKIP LOCKED.
-
-    Stuck ``executed`` tool calls always flip to ``failed`` — operators
-    handle cleanup manually via the structured alert. The side effect is
-    assumed to have committed; no automated rollback is attempted.
-
-    Stuck ``task_dedup`` rows (lease expired AND attempts >= max)
-    promote to ``failed`` so a same-key retry can re-claim instead of
-    sitting in ``pending`` until 24 h TTL.
-    """
-    if not settings.POSTGRES_URI:
-        return {
-            "messages_failed": 0,
-            "tool_calls_failed": 0,
-            "skipped": "POSTGRES_URI not set",
-        }
-
-    engine = get_engine()
-    summary = {
-        "messages_failed": 0,
-        "tool_calls_failed": 0,
-        "ingests_stalled": 0,
-        "idempotency_pending_failed": 0,
-    }
-
-    with engine.begin() as conn:
-        repo = ReconciliationRepository(conn)
-        for msg in repo.find_and_lock_stuck_messages():
-            new_count = repo.increment_message_reconcile_attempts(msg["id"])
-            if new_count >= MAX_MESSAGE_RECONCILE_ATTEMPTS:
-                repo.mark_message_failed(
-                    msg["id"],
-                    error=(
-                        "reconciler: stuck in pending/streaming for >5 min "
-                        f"after {new_count} attempts"
-                    ),
-                )
-                summary["messages_failed"] += 1
-                _emit_alert(
-                    conn,
-                    name="reconciler_message_failed",
-                    user_id=msg.get("user_id"),
-                    detail={
-                        "message_id": str(msg["id"]),
-                        "attempts": new_count,
-                    },
-                )
-
-    with engine.begin() as conn:
-        repo = ReconciliationRepository(conn)
-        for row in repo.find_and_lock_proposed_tool_calls():
-            repo.mark_tool_call_failed(
-                row["call_id"],
-                error=(
-                    "reconciler: stuck in 'proposed' for >5 min; "
-                    "side effect status unknown"
-                ),
-            )
-            summary["tool_calls_failed"] += 1
-            _emit_alert(
-                conn,
-                name="reconciler_tool_call_failed_proposed",
-                user_id=None,
-                detail={
-                    "call_id": row["call_id"],
-                    "tool_name": row.get("tool_name"),
-                },
-            )
-
-    with engine.begin() as conn:
-        repo = ReconciliationRepository(conn)
-        for row in repo.find_and_lock_executed_tool_calls():
-            repo.mark_tool_call_failed(
-                row["call_id"],
-                error=(
-                    "reconciler: executed-not-confirmed; side effect "
-                    "assumed committed, manual cleanup required"
-                ),
-            )
-            summary["tool_calls_failed"] += 1
-            _emit_alert(
-                conn,
-                name="reconciler_tool_call_failed_executed",
-                user_id=None,
-                detail={
-                    "call_id": row["call_id"],
-                    "tool_name": row.get("tool_name"),
-                    "action_name": row.get("action_name"),
-                },
-            )
-
-    # Q4: ingest checkpoints whose heartbeat has gone silent. The
-    # reconciler only escalates (alerts) — it doesn't kill the worker
-    # or roll back the partial embed. The next dispatch resumes from
-    # ``last_index`` thanks to the per-chunk checkpoint, so this is an
-    # observability sweep, not a recovery action.
-    with engine.begin() as conn:
-        repo = ReconciliationRepository(conn)
-        for row in repo.find_and_lock_stalled_ingests():
-            summary["ingests_stalled"] += 1
-            _emit_alert(
-                conn,
-                name="reconciler_ingest_stalled",
-                user_id=None,
-                detail={
-                    "source_id": str(row.get("source_id")),
-                    "embedded_chunks": row.get("embedded_chunks"),
-                    "total_chunks": row.get("total_chunks"),
-                    "last_updated": str(row.get("last_updated")),
-                },
-            )
-            # Bump the heartbeat so we don't re-alert every tick.
-            repo.touch_ingest_progress(str(row["source_id"]))
-
-    # Q5: idempotency rows whose lease expired with attempts exhausted.
-    # The wrapper's poison-loop guard normally finalises these, but if
-    # the wrapper itself died mid-task (worker SIGKILL, OOM during
-    # heartbeat) the row sits in ``pending`` blocking same-key retries
-    # via ``_lookup_completed`` returning None for the whole 24 h TTL.
-    # Promote to ``failed`` so a retry can re-claim and either resume
-    # or fail loudly.
-    with engine.begin() as conn:
-        repo = ReconciliationRepository(conn)
-        for row in repo.find_stuck_idempotency_pending(
-            max_attempts=MAX_TASK_ATTEMPTS,
-        ):
-            error_msg = (
-                "reconciler: idempotency lease expired with attempts "
-                f"({row['attempt_count']}) >= {MAX_TASK_ATTEMPTS}; "
-                "task abandoned"
-            )
-            repo.mark_idempotency_pending_failed(
-                row["idempotency_key"], error=error_msg,
-            )
-            summary["idempotency_pending_failed"] += 1
-            _emit_alert(
-                conn,
-                name="reconciler_idempotency_pending_failed",
-                user_id=None,
-                detail={
-                    "idempotency_key": row["idempotency_key"],
-                    "task_name": row.get("task_name"),
-                    "task_id": row.get("task_id"),
-                    "attempts": row.get("attempt_count"),
-                },
-            )
-
-    return summary
-
-
-def _emit_alert(
-    conn: Connection,
-    *,
-    name: str,
-    user_id: Optional[str],
-    detail: Dict[str, Any],
-) -> None:
-    """Structured ``logger.error`` plus a ``stack_logs`` row for operators."""
-    extra = {"alert": name, **detail}
-    logger.error("reconciler alert: %s", name, extra=extra)
-    try:
-        StackLogsRepository(conn).insert(
-            activity_id=str(uuid.uuid4()),
-            endpoint="reconciliation_worker",
-            level="ERROR",
-            user_id=user_id,
-            query=name,
-            stacks=[extra],
-        )
-    except Exception:
-        logger.exception("reconciler: failed to write stack_logs row for %s", name)
--- a/application/api/user/sources/upload.py
+++ b/application/api/user/sources/upload.py
@@ -3,19 +3,16 @@
 import json
 import os
 import tempfile
-import uuid
 import zipfile

 from flask import current_app, jsonify, make_response, request
 from flask_restx import fields, Namespace, Resource
-from sqlalchemy import text as sql_text

 from application.api import api
 from application.api.user.tasks import ingest, ingest_connector_task, ingest_remote
 from application.core.settings import settings
 from application.parser.connectors.connector_creator import ConnectorCreator
 from application.parser.file.constants import SUPPORTED_SOURCE_EXTENSIONS
-from application.storage.db.repositories.idempotency import IdempotencyRepository
 from application.storage.db.repositories.sources import SourcesRepository
 from application.storage.db.session import db_readonly, db_session
 from application.storage.storage_creator import StorageCreator
@@ -33,79 +30,6 @@ sources_upload_ns = Namespace(
 )


-_IDEMPOTENCY_KEY_MAX_LEN = 256
-
-
-def _read_idempotency_key():
-    """Return (key, error_response). Empty header → (None, None); oversized → (None, 400)."""
-    key = request.headers.get("Idempotency-Key")
-    if not key:
-        return None, None
-    if len(key) > _IDEMPOTENCY_KEY_MAX_LEN:
-        return None, make_response(
-            jsonify(
-                {
-                    "success": False,
-                    "message": (
-                        f"Idempotency-Key exceeds maximum length of "
-                        f"{_IDEMPOTENCY_KEY_MAX_LEN} characters"
-                    ),
-                }
-            ),
-            400,
-        )
-    return key, None
-
-
-def _scoped_idempotency_key(idempotency_key, scope):
-    """``{scope}:{key}`` so different users can't collide on the same key."""
-    if not idempotency_key or not scope:
-        return None
-    return f"{scope}:{idempotency_key}"
-
-
-def _claim_task_or_get_cached(key, task_name):
-    """Claim ``key`` for this request OR return the winner's cached payload.
-
-    Pre-generates the celery task_id so a losing writer sees the same
-    id immediately. Returns ``(task_id, cached_response)``; non-None
-    cached means the caller should return without enqueuing.
-    """
-    predetermined_id = str(uuid.uuid4())
-    with db_session() as conn:
-        claimed = IdempotencyRepository(conn).claim_task(
-            key=key, task_name=task_name, task_id=predetermined_id,
-        )
-    if claimed is not None:
-        return claimed["task_id"], None
-    with db_readonly() as conn:
-        existing = IdempotencyRepository(conn).get_task(key)
-    cached_id = existing.get("task_id") if existing else None
-    return None, {
-        "success": True,
-        "task_id": cached_id or "deduplicated",
-    }
-
-
-def _release_claim(key):
-    """Drop a pending claim so a client retry can re-claim it."""
-    try:
-        with db_session() as conn:
-            conn.execute(
-                sql_text(
-                    "DELETE FROM task_dedup WHERE idempotency_key = :k "
-                    "AND status = 'pending'"
-                ),
-                {"k": key},
-            )
-    except Exception:
-        current_app.logger.exception(
-            "Failed to release task_dedup claim for key=%s", key,
-        )
-
-
-
-
 def _enforce_audio_path_size_limit(file_path: str, filename: str) -> None:
    if not is_audio_filename(filename):
        return
@@ -125,38 +49,17 @@ class UploadFile(Resource):
        )
    )
    @api.doc(
-        description=(
-            "Uploads a file to be vectorized and indexed. Honors an optional "
-            "``Idempotency-Key`` header: a repeat request with the same key "
-            "within 24h returns the original cached response without re-enqueuing."
-        ),
+        description="Uploads a file to be vectorized and indexed",
    )
    def post(self):
        decoded_token = request.decoded_token
        if not decoded_token:
            return make_response(jsonify({"success": False}), 401)
-        user = decoded_token.get("sub")
-        idempotency_key, key_error = _read_idempotency_key()
-        if key_error is not None:
-            return key_error
-        # User-scoped to avoid cross-user collisions; also feeds
-        # ``_derive_source_id`` so uuid5 stays user-disjoint.
-        scoped_key = _scoped_idempotency_key(idempotency_key, user)
-        # Claim before enqueue; the loser returns the winner's task_id.
-        predetermined_task_id = None
-        if scoped_key:
-            predetermined_task_id, cached = _claim_task_or_get_cached(
-                scoped_key, "ingest",
-            )
-            if cached is not None:
-                return make_response(jsonify(cached), 200)
        data = request.form
        files = request.files.getlist("file")
        required_fields = ["user", "name"]
        missing_fields = check_required_fields(data, required_fields)
        if missing_fields or not files or all(file.filename == "" for file in files):
-            if scoped_key:
-                _release_claim(scoped_key)
            return make_response(
                jsonify(
                    {
@@ -166,6 +69,7 @@ class UploadFile(Resource):
                ),
                400,
            )
+        user = decoded_token.get("sub")
        job_name = request.form["name"]

        # Create safe versions for filesystem operations
@@ -236,27 +140,16 @@ class UploadFile(Resource):
                        file_path = f"{base_path}/{safe_file}"
                        with open(temp_file_path, "rb") as f:
                            storage.save_file(f, file_path)
-            ingest_kwargs = dict(
-                args=(
-                    settings.UPLOAD_FOLDER,
-                    list(SUPPORTED_SOURCE_EXTENSIONS),
-                    job_name,
-                    user,
-                ),
-                kwargs={
-                    "file_path": base_path,
-                    "filename": dir_name,
-                    "file_name_map": file_name_map,
-                    # Scoped so the worker dedup row matches the HTTP claim.
-                    "idempotency_key": scoped_key or idempotency_key,
-                },
+            task = ingest.delay(
+                settings.UPLOAD_FOLDER,
+                list(SUPPORTED_SOURCE_EXTENSIONS),
+                job_name,
+                user,
+                file_path=base_path,
+                filename=dir_name,
+                file_name_map=file_name_map,
            )
-            if predetermined_task_id is not None:
-                ingest_kwargs["task_id"] = predetermined_task_id
-            task = ingest.apply_async(**ingest_kwargs)
        except AudioFileTooLargeError:
-            if scoped_key:
-                _release_claim(scoped_key)
            return make_response(
                jsonify(
                    {
@@ -268,13 +161,8 @@ class UploadFile(Resource):
            )
        except Exception as err:
            current_app.logger.error(f"Error uploading file: {err}", exc_info=True)
-            if scoped_key:
-                _release_claim(scoped_key)
            return make_response(jsonify({"success": False}), 400)
-        # Predetermined id matches the dedup-claim row; loser GET sees same.
-        response_task_id = predetermined_task_id or task.id
-        response_payload = {"success": True, "task_id": response_task_id}
-        return make_response(jsonify(response_payload), 200)
+        return make_response(jsonify({"success": True, "task_id": task.id}), 200)


@sources_upload_ns.route("/remote")
@@ -294,38 +182,17 @@ class UploadRemote(Resource):
        )
    )
    @api.doc(
-        description=(
-            "Uploads remote source for vectorization. Honors an optional "
-            "``Idempotency-Key`` header: a repeat request with the same key "
-            "within 24h returns the original cached response without re-enqueuing."
-        ),
+        description="Uploads remote source for vectorization",
    )
    def post(self):
        decoded_token = request.decoded_token
        if not decoded_token:
            return make_response(jsonify({"success": False}), 401)
-        user = decoded_token.get("sub")
-        idempotency_key, key_error = _read_idempotency_key()
-        if key_error is not None:
-            return key_error
-        scoped_key = _scoped_idempotency_key(idempotency_key, user)
        data = request.form
        required_fields = ["user", "source", "name", "data"]
        missing_fields = check_required_fields(data, required_fields)
        if missing_fields:
            return missing_fields
-        task_name_for_dedup = (
-            "ingest_connector_task"
-            if data.get("source") in ConnectorCreator.get_supported_connectors()
-            else "ingest_remote"
-        )
-        predetermined_task_id = None
-        if scoped_key:
-            predetermined_task_id, cached = _claim_task_or_get_cached(
-                scoped_key, task_name_for_dedup,
-            )
-            if cached is not None:
-                return make_response(jsonify(cached), 200)
        try:
            config = json.loads(data["data"])
            source_data = None
@@ -341,8 +208,6 @@ class UploadRemote(Resource):
            elif data["source"] in ConnectorCreator.get_supported_connectors():
                session_token = config.get("session_token")
                if not session_token:
-                    if scoped_key:
-                        _release_claim(scoped_key)
                    return make_response(
                        jsonify(
                            {
@@ -371,47 +236,31 @@ class UploadRemote(Resource):
                config["file_ids"] = file_ids
                config["folder_ids"] = folder_ids

-                connector_kwargs = {
-                    "kwargs": {
-                        "job_name": data["name"],
-                        "user": user,
-                        "source_type": data["source"],
-                        "session_token": session_token,
-                        "file_ids": file_ids,
-                        "folder_ids": folder_ids,
-                        "recursive": config.get("recursive", False),
-                        "retriever": config.get("retriever", "classic"),
-                        "idempotency_key": scoped_key or idempotency_key,
-                    },
-                }
-                if predetermined_task_id is not None:
-                    connector_kwargs["task_id"] = predetermined_task_id
-                task = ingest_connector_task.apply_async(**connector_kwargs)
-                response_task_id = predetermined_task_id or task.id
-                response_payload = {"success": True, "task_id": response_task_id}
-                return make_response(jsonify(response_payload), 200)
-            remote_kwargs = {
-                "kwargs": {
-                    "source_data": source_data,
-                    "job_name": data["name"],
-                    "user": user,
-                    "loader": data["source"],
-                    "idempotency_key": scoped_key or idempotency_key,
-                },
-            }
-            if predetermined_task_id is not None:
-                remote_kwargs["task_id"] = predetermined_task_id
-            task = ingest_remote.apply_async(**remote_kwargs)
+                task = ingest_connector_task.delay(
+                    job_name=data["name"],
+                    user=decoded_token.get("sub"),
+                    source_type=data["source"],
+                    session_token=session_token,
+                    file_ids=file_ids,
+                    folder_ids=folder_ids,
+                    recursive=config.get("recursive", False),
+                    retriever=config.get("retriever", "classic"),
+                )
+                return make_response(
+                    jsonify({"success": True, "task_id": task.id}), 200
+                )
+            task = ingest_remote.delay(
+                source_data=source_data,
+                job_name=data["name"],
+                user=decoded_token.get("sub"),
+                loader=data["source"],
+            )
        except Exception as err:
            current_app.logger.error(
                f"Error uploading remote source: {err}", exc_info=True
            )
-            if scoped_key:
-                _release_claim(scoped_key)
            return make_response(jsonify({"success": False}), 400)
-        response_task_id = predetermined_task_id or task.id
-        response_payload = {"success": True, "task_id": response_task_id}
-        return make_response(jsonify(response_payload), 200)
+        return make_response(jsonify({"success": True, "task_id": task.id}), 200)


@sources_upload_ns.route("/manage_source_files")
@@ -456,10 +305,6 @@ class ManageSourceFiles(Resource):
                jsonify({"success": False, "message": "Unauthorized"}), 401
            )
        user = decoded_token.get("sub")
-        idempotency_key, key_error = _read_idempotency_key()
-        if key_error is not None:
-            return key_error
-        scoped_key = _scoped_idempotency_key(idempotency_key, user)
        source_id = request.form.get("source_id")
        operation = request.form.get("operation")

@@ -502,12 +347,6 @@ class ManageSourceFiles(Resource):
                jsonify({"success": False, "message": "Database error"}), 500
            )
        resolved_source_id = str(source["id"])
-        # Flips to True after each branch's ``apply_async`` returns
-        # successfully — at that point the worker owns the predetermined
-        # task_id. The outer ``except`` only releases the claim while
-        # this is False, so a post-``apply_async`` failure (jsonify,
-        # make_response, etc.) doesn't double-enqueue on the next retry.
-        claim_transferred = False
        try:
            storage = StorageCreator.get_storage()
            source_file_path = source.get("file_path", "")
@@ -540,21 +379,6 @@ class ManageSourceFiles(Resource):
                        ),
                        400,
                    )
-
-                # Claim before any storage mutation so a duplicate request
-                # short-circuits without touching the filesystem. Mirrors
-                # the pattern in ``UploadFile.post`` / ``UploadRemote.post``
-                # — without it ``.delay()`` would enqueue twice for two
-                # racing same-key POSTs (the worker decorator only
-                # deduplicates *after* completion).
-                predetermined_task_id = None
-                if scoped_key:
-                    predetermined_task_id, cached = _claim_task_or_get_cached(
-                        scoped_key, "reingest_source_task",
-                    )
-                    if cached is not None:
-                        return make_response(jsonify(cached), 200)
-
                added_files = []
                map_updated = False

@@ -590,15 +414,9 @@ class ManageSourceFiles(Resource):

                from application.api.user.tasks import reingest_source_task

-                task = reingest_source_task.apply_async(
-                    kwargs={
-                        "source_id": resolved_source_id,
-                        "user": user,
-                        "idempotency_key": scoped_key or idempotency_key,
-                    },
-                    task_id=predetermined_task_id,
+                task = reingest_source_task.delay(
+                    source_id=resolved_source_id, user=user
                )
-                claim_transferred = True

                return make_response(
                    jsonify(
@@ -637,8 +455,10 @@ class ManageSourceFiles(Resource):
                        ),
                        400,
                    )
-                # Path-traversal guard runs *before* the claim so a 400
-                # for an invalid path doesn't leave a pending dedup row.
+                # Remove files from storage and directory structure
+
+                removed_files = []
+                map_updated = False
                for file_path in file_paths:
                    if ".." in str(file_path) or str(file_path).startswith("/"):
                        return make_response(
@@ -650,22 +470,6 @@ class ManageSourceFiles(Resource):
                            ),
                            400,
                        )
-
-                # Claim before any storage mutation. See ``add`` branch
-                # comment for rationale.
-                predetermined_task_id = None
-                if scoped_key:
-                    predetermined_task_id, cached = _claim_task_or_get_cached(
-                        scoped_key, "reingest_source_task",
-                    )
-                    if cached is not None:
-                        return make_response(jsonify(cached), 200)
-
-                # Remove files from storage and directory structure
-
-                removed_files = []
-                map_updated = False
-                for file_path in file_paths:
                    full_path = f"{source_file_path}/{file_path}"

                    # Remove from storage
@@ -687,15 +491,9 @@ class ManageSourceFiles(Resource):

                from application.api.user.tasks import reingest_source_task

-                task = reingest_source_task.apply_async(
-                    kwargs={
-                        "source_id": resolved_source_id,
-                        "user": user,
-                        "idempotency_key": scoped_key or idempotency_key,
-                    },
-                    task_id=predetermined_task_id,
+                task = reingest_source_task.delay(
+                    source_id=resolved_source_id, user=user
                )
-                claim_transferred = True

                return make_response(
                    jsonify(
@@ -754,16 +552,6 @@ class ManageSourceFiles(Resource):
                        ),
                        404,
                    )
-
-                # Claim before mutation. See ``add`` branch for rationale.
-                predetermined_task_id = None
-                if scoped_key:
-                    predetermined_task_id, cached = _claim_task_or_get_cached(
-                        scoped_key, "reingest_source_task",
-                    )
-                    if cached is not None:
-                        return make_response(jsonify(cached), 200)
-
                success = storage.remove_directory(full_directory_path)

                if not success:
@@ -772,11 +560,6 @@ class ManageSourceFiles(Resource):
                        f"User: {user}, Source ID: {source_id}, Directory path: {directory_path}, "
                        f"Full path: {full_directory_path}"
                    )
-                    # Release so a client retry can reclaim — otherwise
-                    # the next request would silently 200-cache to the
-                    # task_id that never enqueued.
-                    if scoped_key:
-                        _release_claim(scoped_key)
                    return make_response(
                        jsonify(
                            {"success": False, "message": "Failed to remove directory"}
@@ -808,15 +591,9 @@ class ManageSourceFiles(Resource):

                from application.api.user.tasks import reingest_source_task

-                task = reingest_source_task.apply_async(
-                    kwargs={
-                        "source_id": resolved_source_id,
-                        "user": user,
-                        "idempotency_key": scoped_key or idempotency_key,
-                    },
-                    task_id=predetermined_task_id,
+                task = reingest_source_task.delay(
+                    source_id=resolved_source_id, user=user
                )
-                claim_transferred = True

                return make_response(
                    jsonify(
@@ -830,14 +607,6 @@ class ManageSourceFiles(Resource):
                    200,
                )
        except Exception as err:
-            # Release the dedup claim only if it wasn't transferred to
-            # a worker. Without this, a same-key retry within the 24h
-            # TTL would 200-cache to a predetermined task_id whose
-            # ``apply_async`` never ran (or ran but the response builder
-            # blew up afterward — only the first case matters in
-            # practice; the flag protects both).
-            if scoped_key and not claim_transferred:
-                _release_claim(scoped_key)
            error_context = f"operation={operation}, user={user}, source_id={source_id}"
            if operation == "remove_directory":
                directory_path = request.form.get("directory_path", "")
--- a/application/api/user/tasks.py
+++ b/application/api/user/tasks.py
@@ -1,6 +1,5 @@
 from datetime import timedelta

-from application.api.user.idempotency import with_idempotency
 from application.celery_init import celery
 from application.worker import (
    agent_webhook_worker,
@@ -14,32 +13,9 @@ from application.worker import (
 )


-# Shared decorator config for long-running, side-effecting tasks. ``acks_late``
-# is also the celeryconfig default but stays explicit here so each task's
-# durability story is grep-able next to the body. Combined with
-# ``autoretry_for=(Exception,)`` and a bounded ``max_retries`` so a poison
-# message can't loop forever.
-DURABLE_TASK = dict(
-    bind=True,
-    acks_late=True,
-    autoretry_for=(Exception,),
-    retry_kwargs={"max_retries": 3, "countdown": 60},
-    retry_backoff=True,
-)
-
-
-@celery.task(**DURABLE_TASK)
-@with_idempotency(task_name="ingest")
+@celery.task(bind=True)
 def ingest(
-    self,
-    directory,
-    formats,
-    job_name,
-    user,
-    file_path,
-    filename,
-    file_name_map=None,
-    idempotency_key=None,
+    self, directory, formats, job_name, user, file_path, filename, file_name_map=None
 ):
    resp = ingest_worker(
        self,
@@ -50,35 +26,25 @@ def ingest(
        filename,
        user,
        file_name_map=file_name_map,
-        idempotency_key=idempotency_key,
    )
    return resp


-@celery.task(**DURABLE_TASK)
-@with_idempotency(task_name="ingest_remote")
-def ingest_remote(self, source_data, job_name, user, loader, idempotency_key=None):
-    resp = remote_worker(
-        self, source_data, job_name, user, loader,
-        idempotency_key=idempotency_key,
-    )
+@celery.task(bind=True)
+def ingest_remote(self, source_data, job_name, user, loader):
+    resp = remote_worker(self, source_data, job_name, user, loader)
    return resp


-@celery.task(**DURABLE_TASK)
-@with_idempotency(task_name="reingest_source_task")
-def reingest_source_task(self, source_id, user, idempotency_key=None):
+@celery.task(bind=True)
+def reingest_source_task(self, source_id, user):
    from application.worker import reingest_source_worker

    resp = reingest_source_worker(self, source_id, user)
    return resp


-# Beat-driven dispatch tasks default to ``acks_late=False``: a SIGKILL
-# of a beat tick is harmless to redeliver only if the dispatch itself is
-# idempotent. We keep these early-ACK so the broker doesn't replay a
-# dispatch that already enqueued downstream work.
-@celery.task(bind=True, acks_late=False)
+@celery.task(bind=True)
 def schedule_syncs(self, frequency):
    resp = sync_worker(self, frequency)
    return resp
@@ -108,22 +74,19 @@ def sync_source(
    return resp


-@celery.task(**DURABLE_TASK)
-@with_idempotency(task_name="store_attachment")
-def store_attachment(self, file_info, user, idempotency_key=None):
+@celery.task(bind=True)
+def store_attachment(self, file_info, user):
    resp = attachment_worker(self, file_info, user)
    return resp


-@celery.task(**DURABLE_TASK)
-@with_idempotency(task_name="process_agent_webhook")
-def process_agent_webhook(self, agent_id, payload, idempotency_key=None):
+@celery.task(bind=True)
+def process_agent_webhook(self, agent_id, payload):
    resp = agent_webhook_worker(self, agent_id, payload)
    return resp


-@celery.task(**DURABLE_TASK)
-@with_idempotency(task_name="ingest_connector_task")
+@celery.task(bind=True)
 def ingest_connector_task(
    self,
    job_name,
@@ -137,7 +100,6 @@ def ingest_connector_task(
    operation_mode="upload",
    doc_id=None,
    sync_frequency="never",
-    idempotency_key=None,
 ):
    from application.worker import ingest_connector

@@ -154,7 +116,6 @@ def ingest_connector_task(
        operation_mode=operation_mode,
        doc_id=doc_id,
        sync_frequency=sync_frequency,
-        idempotency_key=idempotency_key,
    )
    return resp

@@ -179,19 +140,6 @@ def setup_periodic_tasks(sender, **kwargs):
        cleanup_pending_tool_state.s(),
        name="cleanup-pending-tool-state",
    )
-    # Pure housekeeping for ``task_dedup`` / ``webhook_dedup`` — the
-    # upsert paths already handle stale rows, so cadence only bounds
-    # table size. Hourly is plenty for typical traffic.
-    sender.add_periodic_task(
-        timedelta(hours=1),
-        cleanup_idempotency_dedup.s(),
-        name="cleanup-idempotency-dedup",
-    )
-    sender.add_periodic_task(
-        timedelta(seconds=30),
-        reconciliation_task.s(),
-        name="reconciliation",
-    )
    sender.add_periodic_task(
        timedelta(hours=7),
        version_check_task.s(),
@@ -211,12 +159,18 @@ def mcp_oauth_status_task(self, task_id):
    return resp


-@celery.task(bind=True, acks_late=False)
+@celery.task(bind=True)
 def cleanup_pending_tool_state(self):
-    """Revert stale ``resuming`` rows, then delete TTL-expired rows."""
+    """Delete pending_tool_state rows past their TTL.
+
+    Replaces Mongo's ``expireAfterSeconds=0`` TTL index — Postgres has
+    no native TTL, so this task runs every 60 seconds to keep
+    ``pending_tool_state`` bounded. No-ops if ``POSTGRES_URI`` isn't
+    configured (keeps the task runnable in Mongo-only environments).
+    """
    from application.core.settings import settings
    if not settings.POSTGRES_URI:
-        return {"deleted": 0, "reverted": 0, "skipped": "POSTGRES_URI not set"}
+        return {"deleted": 0, "skipped": "POSTGRES_URI not set"}

    from application.storage.db.engine import get_engine
    from application.storage.db.repositories.pending_tool_state import (
@@ -225,47 +179,11 @@ def cleanup_pending_tool_state(self):

    engine = get_engine()
    with engine.begin() as conn:
-        repo = PendingToolStateRepository(conn)
-        reverted = repo.revert_stale_resuming(grace_seconds=600)
-        deleted = repo.cleanup_expired()
-    return {"deleted": deleted, "reverted": reverted}
+        deleted = PendingToolStateRepository(conn).cleanup_expired()
+    return {"deleted": deleted}


-@celery.task(bind=True, acks_late=False)
-def cleanup_idempotency_dedup(self):
-    """Delete TTL-expired rows from ``task_dedup`` and ``webhook_dedup``.
-
-    Pure housekeeping — the upsert paths already ignore stale rows
-    (TTL-aware ``ON CONFLICT DO UPDATE``), so this only bounds table
-    growth and keeps SELECT planning tight on large deployments.
-    """
-    from application.core.settings import settings
-    if not settings.POSTGRES_URI:
-        return {
-            "task_dedup_deleted": 0,
-            "webhook_dedup_deleted": 0,
-            "skipped": "POSTGRES_URI not set",
-        }
-
-    from application.storage.db.engine import get_engine
-    from application.storage.db.repositories.idempotency import (
-        IdempotencyRepository,
-    )
-
-    engine = get_engine()
-    with engine.begin() as conn:
-        return IdempotencyRepository(conn).cleanup_expired()
-
-
-@celery.task(bind=True, acks_late=False)
-def reconciliation_task(self):
-    """Sweep stuck durability rows and escalate them to terminal status + alert."""
-    from application.api.user.reconciliation import run_reconciliation
-
-    return run_reconciliation()
-
-
-@celery.task(bind=True, acks_late=False)
+@celery.task(bind=True)
 def version_check_task(self):
    """Periodic anonymous version check.

--- a/application/api/v1/routes.py
+++ b/application/api/v1/routes.py
@@ -9,7 +9,6 @@ import json
 import logging
 import time
 import traceback
-from datetime import datetime
 from typing import Any, Dict, Generator, Optional

 from flask import Blueprint, jsonify, make_response, request, Response
@@ -307,16 +306,7 @@ def list_models():
                    401,
                )

-        # Repository rows now go through ``coerce_pg_native`` at SELECT
-        # time, so timestamps arrive as ISO 8601 strings. Parse before
-        # taking ``.timestamp()``; fall back to ``time.time()`` only when
-        # the value is genuinely missing or unparseable.
        created = agent.get("created_at") or agent.get("createdAt")
-        if isinstance(created, str):
-            try:
-                created = datetime.fromisoformat(created)
-            except (ValueError, TypeError):
-                created = None
        created_ts = (
            int(created.timestamp()) if hasattr(created, "timestamp")
            else int(time.time())
--- a/application/app.py
+++ b/application/app.py
@@ -9,7 +9,6 @@ from jose import jwt

 from application.auth import handle_auth

-from application.core import log_context
 from application.core.logging_config import setup_logging

 setup_logging()
@@ -113,38 +112,6 @@ def generate_token():
    return jsonify({"error": "Token generation not allowed in current auth mode"}), 400


-_LOG_CTX_TOKEN_ATTR = "_log_ctx_token"
-
-
-@app.before_request
-def _bind_log_context():
-    """Bind activity_id + endpoint for the duration of this request.
-
-    Runs before ``authenticate_request``; ``user_id`` is overlaid in a
-    follow-up handler once the JWT has been decoded.
-    """
-    if request.method == "OPTIONS":
-        return None
-    activity_id = str(uuid.uuid4())
-    request.activity_id = activity_id
-    token = log_context.bind(
-        activity_id=activity_id,
-        endpoint=request.endpoint,
-    )
-    setattr(request, _LOG_CTX_TOKEN_ATTR, token)
-    return None
-
-
-@app.teardown_request
-def _reset_log_context(_exc):
-    # SSE streams keep yielding after teardown fires, but a2wsgi runs each
-    # request inside ``copy_context().run(...)``, so this reset doesn't
-    # leak into the stream's view of the context.
-    token = getattr(request, _LOG_CTX_TOKEN_ATTR, None)
-    if token is not None:
-        log_context.reset(token)
-
-
@app.before_request
 def enforce_stt_request_size_limits():
    if request.method == "OPTIONS":
@@ -181,28 +148,11 @@ def authenticate_request():
        request.decoded_token = decoded_token


-@app.before_request
-def _bind_user_id_to_log_context():
-    # Registered after ``authenticate_request`` (Flask runs before_request
-    # handlers in registration order), so ``request.decoded_token`` is
-    # populated by the time we read it. ``teardown_request`` unwinds the
-    # whole request-level bind, so no separate reset token is needed here.
-    if request.method == "OPTIONS":
-        return None
-    decoded_token = getattr(request, "decoded_token", None)
-    user_id = decoded_token.get("sub") if isinstance(decoded_token, dict) else None
-    if user_id:
-        log_context.bind(user_id=user_id)
-    return None
-
-
@app.after_request
 def after_request(response: Response) -> Response:
    """Add CORS headers for the pure Flask development entrypoint."""
    response.headers["Access-Control-Allow-Origin"] = "*"
-    response.headers["Access-Control-Allow-Headers"] = (
-        "Content-Type, Authorization, Idempotency-Key"
-    )
+    response.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization"
    response.headers["Access-Control-Allow-Methods"] = "GET, POST, PUT, PATCH, DELETE, OPTIONS"
    return response

--- a/application/asgi.py
+++ b/application/asgi.py
@@ -25,12 +25,7 @@ asgi_app = Starlette(
            CORSMiddleware,
            allow_origins=["*"],
            allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
-            allow_headers=[
-                "Content-Type",
-                "Authorization",
-                "Mcp-Session-Id",
-                "Idempotency-Key",
-            ],
+            allow_headers=["Content-Type", "Authorization", "Mcp-Session-Id"],
            expose_headers=["Mcp-Session-Id"],
        ),
    ],
--- a/application/cache.py
+++ b/application/cache.py
@@ -1,4 +1,3 @@
-import hashlib
 import json
 import logging
 import time
@@ -11,14 +10,6 @@ from application.utils import get_hash

 logger = logging.getLogger(__name__)

-
-def _cache_default(value):
-    # Image attachments arrive inline as bytes (see GoogleLLM.prepare_messages_with_attachments);
-    # hash so the cache key stays bounded in size and stable across identical content.
-    if isinstance(value, (bytes, bytearray, memoryview)):
-        return f"<bytes:sha256:{hashlib.sha256(bytes(value)).hexdigest()}>"
-    return repr(value)
-
 _redis_instance = None
 _redis_creation_failed = False
 _instance_lock = Lock()
@@ -45,7 +36,7 @@ def get_redis_instance():
 def gen_cache_key(messages, model="docgpt", tools=None):
    if not all(isinstance(msg, dict) for msg in messages):
        raise ValueError("All messages must be dictionaries.")
-    messages_str = json.dumps(messages, default=_cache_default)
+    messages_str = json.dumps(messages)
    tools_str = json.dumps(str(tools)) if tools else ""
    combined = f"{model}_{messages_str}_{tools_str}"
    cache_key = get_hash(combined)
--- a/application/celery_init.py
+++ b/application/celery_init.py
@@ -1,17 +1,8 @@
-import inspect
-import logging
 import threading

 from celery import Celery
-from application.core import log_context
 from application.core.settings import settings
-from celery.signals import (
-    setup_logging,
-    task_postrun,
-    task_prerun,
-    worker_process_init,
-    worker_ready,
-)
+from celery.signals import setup_logging, worker_process_init, worker_ready


 def make_celery(app_name=__name__):
@@ -50,54 +41,6 @@ def _dispose_db_engine_on_fork(*args, **kwargs):
    dispose_engine()


-# Most tasks in this repo accept ``user`` where the log context wants
-# ``user_id``; map task parameter names to context keys explicitly.
-_TASK_PARAM_TO_CTX_KEY: dict[str, str] = {
-    "user": "user_id",
-    "user_id": "user_id",
-    "agent_id": "agent_id",
-    "conversation_id": "conversation_id",
-}
-
-_task_log_tokens: dict[str, object] = {}
-
-
-@task_prerun.connect
-def _bind_task_log_context(task_id, task, args, kwargs, **_):
-    # Resolve task args by parameter name — nearly every task in this repo
-    # is called positionally, so ``kwargs.get('user')`` would bind nothing.
-    ctx = {"activity_id": task_id}
-    try:
-        sig = inspect.signature(task.run)
-        bound = sig.bind_partial(*args, **kwargs).arguments
-    except (TypeError, ValueError):
-        bound = dict(kwargs)
-    for param_name, value in bound.items():
-        ctx_key = _TASK_PARAM_TO_CTX_KEY.get(param_name)
-        if ctx_key and value:
-            ctx[ctx_key] = value
-    _task_log_tokens[task_id] = log_context.bind(**ctx)
-
-
-@task_postrun.connect
-def _unbind_task_log_context(task_id, **_):
-    # ``task_postrun`` fires on both success and failure. Required for
-    # Celery: unlike the Flask path, tasks aren't isolated in their own
-    # ``copy_context().run(...)``, so a missing reset would leak the
-    # bind onto the next task on the same worker.
-    token = _task_log_tokens.pop(task_id, None)
-    if token is None:
-        return
-    try:
-        log_context.reset(token)
-    except ValueError:
-        # task_prerun and task_postrun ran on different threads (non-default
-        # Celery pool); the token isn't valid in this context. Drop it.
-        logging.getLogger(__name__).debug(
-            "log_context reset skipped for task %s", task_id
-        )
-
-
@worker_ready.connect
 def _run_version_check(*args, **kwargs):
    """Kick off the anonymous version check on worker startup.
--- a/application/celeryconfig.py
+++ b/application/celeryconfig.py
@@ -1,10 +1,7 @@
-from application.core.settings import settings
+import os

-# Pydantic loads .env into ``settings`` but does not inject values into
-# ``os.environ`` — read directly from settings so beat startup (which
-# imports this module before any explicit env load) sees a real URL.
-broker_url = settings.CELERY_BROKER_URL
-result_backend = settings.CELERY_RESULT_BACKEND
+broker_url = os.getenv("CELERY_BROKER_URL")
+result_backend = os.getenv("CELERY_RESULT_BACKEND")

 task_serializer = 'json'
 result_serializer = 'json'
@@ -13,21 +10,7 @@ accept_content = ['json']
 # Autodiscover tasks
 imports = ('application.api.user.tasks',)

-# Project-scoped queue so a stray sibling worker on the same broker
-# (other repo, same default ``celery`` queue) can't grab DocsGPT tasks.
-task_default_queue = "docsgpt"
-task_default_exchange = "docsgpt"
-task_default_routing_key = "docsgpt"
-
 beat_scheduler = "redbeat.RedBeatScheduler"
 redbeat_redis_url = broker_url
 redbeat_key_prefix = "redbeat:docsgpt:"
 redbeat_lock_timeout = 90
-
-# Survive worker SIGKILL/OOM without silently dropping in-flight tasks.
-task_acks_late = True
-task_reject_on_worker_lost = True
-worker_prefetch_multiplier = settings.CELERY_WORKER_PREFETCH_MULTIPLIER
-broker_transport_options = {"visibility_timeout": settings.CELERY_VISIBILITY_TIMEOUT}
-result_expires = 86400 * 7
-task_track_started = True
--- a/application/core/log_context.py
+++ b/application/core/log_context.py
@@ -1,57 +0,0 @@
-"""Per-activity logging context backed by ``contextvars``.
-
-The ``_ContextFilter`` installed by ``logging_config.setup_logging`` stamps
-every ``LogRecord`` emitted inside a ``bind`` block with the bound keys, so
-they land as first-class attributes on the OTLP log export rather than being
-buried inside formatted message bodies.
-
-A single ``ContextVar`` holds a dict so nested binds reset atomically (LIFO)
-via the token returned by ``bind``.
-"""
-
-from __future__ import annotations
-
-from contextvars import ContextVar, Token
-from typing import Mapping
-
-
-_CTX_KEYS: frozenset[str] = frozenset(
-    {
-        "activity_id",
-        "parent_activity_id",
-        "user_id",
-        "agent_id",
-        "conversation_id",
-        "endpoint",
-        "model",
-    }
-)
-
-_ctx: ContextVar[Mapping[str, str]] = ContextVar("log_ctx", default={})
-
-
-def bind(**kwargs: object) -> Token:
-    """Overlay the given keys onto the current context.
-
-    Returns a ``Token`` so the caller can ``reset`` in a ``finally`` block.
-    Keys outside :data:`_CTX_KEYS` are silently dropped (so a typo can't
-    stamp a stray field name onto every record), as are ``None`` values
-    (a missing attribute is more useful than the literal string ``"None"``).
-    """
-    overlay = {
-        k: str(v)
-        for k, v in kwargs.items()
-        if k in _CTX_KEYS and v is not None
-    }
-    new = {**_ctx.get(), **overlay}
-    return _ctx.set(new)
-
-
-def reset(token: Token) -> None:
-    """Restore the context to the snapshot captured by the matching ``bind``."""
-    _ctx.reset(token)
-
-
-def snapshot() -> Mapping[str, str]:
-    """Return the current context dict. Treat as read-only; use :func:`bind`."""
-    return _ctx.get()
--- a/application/core/logging_config.py
+++ b/application/core/logging_config.py
@@ -2,36 +2,6 @@ import logging
 import os
 from logging.config import dictConfig

-from application.core.log_context import snapshot as _ctx_snapshot
-
-
-# Loggers with ``propagate=False`` don't share root's handlers, so the
-# context filter has to be installed on their handlers directly.
-_NON_PROPAGATING_LOGGERS: tuple[str, ...] = (
-    "uvicorn",
-    "uvicorn.access",
-    "uvicorn.error",
-    "celery.app.trace",
-    "celery.worker.strategy",
-    "gunicorn.error",
-    "gunicorn.access",
-)
-
-
-class _ContextFilter(logging.Filter):
-    """Stamp the current ``log_context`` snapshot onto every ``LogRecord``.
-
-    Must be installed on **handlers**, not loggers: Python skips logger-level
-    filters when a child logger's record propagates up. The ``hasattr`` guard
-    keeps an explicit ``logger.info(..., extra={...})`` from being overwritten.
-    """
-
-    def filter(self, record: logging.LogRecord) -> bool:
-        for key, value in _ctx_snapshot().items():
-            if not hasattr(record, key):
-                setattr(record, key, value)
-        return True
-

 def _otlp_logs_enabled() -> bool:
    """Return True when the user has opted in to OTLP log export.
@@ -90,23 +60,3 @@ def setup_logging() -> None:
        for handler in preserved_handlers:
            if handler not in root.handlers:
                root.addHandler(handler)
-
-    _install_context_filter()
-
-
-def _install_context_filter() -> None:
-    """Attach :class:`_ContextFilter` to root's handlers + every handler on
-    the known non-propagating loggers. Skipping handlers that already carry
-    one keeps repeat ``setup_logging`` calls from stacking filters.
-    """
-
-    def _has_ctx_filter(handler: logging.Handler) -> bool:
-        return any(isinstance(f, _ContextFilter) for f in handler.filters)
-
-    for handler in logging.getLogger().handlers:
-        if not _has_ctx_filter(handler):
-            handler.addFilter(_ContextFilter())
-    for name in _NON_PROPAGATING_LOGGERS:
-        for handler in logging.getLogger(name).handlers:
-            if not _has_ctx_filter(handler):
-                handler.addFilter(_ContextFilter())
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -30,12 +30,6 @@ class Settings(BaseSettings):

    CELERY_BROKER_URL: str = "redis://localhost:6379/0"
    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
-    # Prefetch=1 caps SIGKILL loss to one task. Visibility timeout must exceed
-    # the longest legitimate task runtime (ingest, agent webhook) but stay
-    # short enough that SIGKILLed tasks redeliver promptly. 1h matches Onyx
-    # and Dify defaults; long ingests can override via env.
-    CELERY_WORKER_PREFETCH_MULTIPLIER: int = 1
-    CELERY_VISIBILITY_TIMEOUT: int = 3600
    # Only consulted when VECTOR_STORE=mongodb or when running scripts/db/backfill.py; user data lives in Postgres.
    MONGO_URI: Optional[str] = None
    # User-data Postgres DB.
--- a/application/llm/anthropic.py
+++ b/application/llm/anthropic.py
@@ -11,7 +11,6 @@ logger = logging.getLogger(__name__)


 class AnthropicLLM(BaseLLM):
-    provider_name = "anthropic"

    def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):

--- a/application/llm/base.py
+++ b/application/llm/base.py
@@ -1,6 +1,5 @@
 import logging
 from abc import ABC, abstractmethod
-from typing import ClassVar

 from application.cache import gen_cache, stream_cache

@@ -11,10 +10,6 @@ logger = logging.getLogger(__name__)


 class BaseLLM(ABC):
-    # Stamped onto the ``llm_stream_start`` event so dashboards can group
-    # calls by vendor. Subclasses override.
-    provider_name: ClassVar[str] = "unknown"
-
    def __init__(
        self,
        decoded_token=None,
@@ -80,14 +75,6 @@ class BaseLLM(ABC):
                    agent_id=self.agent_id,
                    model_user_id=self.model_user_id,
                )
-                # Tag the fallback LLM so its rows land as
-                # ``source='fallback'`` in cost-attribution dashboards.
-                # Propagate the parent's ``_request_id`` so a user
-                # request that ran fallback is still grouped under one id.
-                self._fallback_llm._token_usage_source = "fallback"
-                self._fallback_llm._request_id = getattr(
-                    self, "_request_id", None,
-                )
                logger.info(
                    f"Fallback LLM initialized from agent backup model: "
                    f"{provider}/{backup_model_id}"
@@ -114,11 +101,6 @@ class BaseLLM(ABC):
                    agent_id=self.agent_id,
                    model_user_id=self.model_user_id,
                )
-                # Same rationale as the agent-backup branch.
-                self._fallback_llm._token_usage_source = "fallback"
-                self._fallback_llm._request_id = getattr(
-                    self, "_request_id", None,
-                )
                logger.info(
                    f"Fallback LLM initialized from global settings: "
                    f"{settings.FALLBACK_LLM_PROVIDER}/{settings.FALLBACK_LLM_NAME}"
@@ -136,26 +118,6 @@ class BaseLLM(ABC):
            return args_dict
        return {k: v for k, v in args_dict.items() if v is not None}

-    @staticmethod
-    def _is_non_retriable_client_error(exc: BaseException) -> bool:
-        """4xx errors mean the request itself is malformed — retrying with
-        a different model fails identically and doubles the work. Only
-        transient/5xx/connection errors should trigger fallback."""
-        try:
-            from google.genai.errors import ClientError as _GenaiClientError
-
-            if isinstance(exc, _GenaiClientError):
-                return True
-        except ImportError:
-            pass
-        for attr in ("status_code", "code", "http_status"):
-            v = getattr(exc, attr, None)
-            if isinstance(v, int) and 400 <= v < 500:
-                return True
-        resp = getattr(exc, "response", None)
-        v = getattr(resp, "status_code", None)
-        return isinstance(v, int) and 400 <= v < 500
-
    def _execute_with_fallback(
        self, method_name: str, decorators: list, *args, **kwargs
    ):
@@ -179,18 +141,12 @@ class BaseLLM(ABC):

        if is_stream:
            return self._stream_with_fallback(
-                decorated_method, method_name, decorators, *args, **kwargs
+                decorated_method, method_name, *args, **kwargs
            )

        try:
            return decorated_method()
        except Exception as e:
-            if self._is_non_retriable_client_error(e):
-                logger.error(
-                    f"Primary LLM failed with non-retriable client error; "
-                    f"skipping fallback: {str(e)}"
-                )
-                raise
            if not self.fallback_llm:
                logger.error(f"Primary LLM failed and no fallback configured: {str(e)}")
                raise
@@ -200,27 +156,14 @@ class BaseLLM(ABC):
                f"{fallback.model_id}. Error: {str(e)}"
            )

-            # Apply decorators to fallback's raw method directly — calling
-            # fallback.gen() would re-enter the orchestrator and recurse via
-            # fallback.fallback_llm.
-            fallback_method = getattr(fallback, method_name)
-            for decorator in decorators:
-                fallback_method = decorator(fallback_method)
+            fallback_method = getattr(
+                fallback, method_name.replace("_raw_", "")
+            )
            fallback_kwargs = {**kwargs, "model": fallback.model_id}
-            try:
-                return fallback_method(fallback, *args, **fallback_kwargs)
-            except Exception as e2:
-                if self._is_non_retriable_client_error(e2):
-                    logger.error(
-                        f"Fallback LLM failed with non-retriable client "
-                        f"error; giving up: {str(e2)}"
-                    )
-                else:
-                    logger.error(f"Fallback LLM also failed; giving up: {str(e2)}")
-                raise
+            return fallback_method(*args, **fallback_kwargs)

    def _stream_with_fallback(
-        self, decorated_method, method_name, decorators, *args, **kwargs
+        self, decorated_method, method_name, *args, **kwargs
    ):
        """
        Wrapper generator that catches mid-stream errors and falls back.
@@ -233,12 +176,6 @@ class BaseLLM(ABC):
        try:
            yield from decorated_method()
        except Exception as e:
-            if self._is_non_retriable_client_error(e):
-                logger.error(
-                    f"Primary LLM failed mid-stream with non-retriable client "
-                    f"error; skipping fallback: {str(e)}"
-                )
-                raise
            if not self.fallback_llm:
                logger.error(
                    f"Primary LLM failed and no fallback configured: {str(e)}"
@@ -249,37 +186,11 @@ class BaseLLM(ABC):
                f"Primary LLM failed mid-stream. Falling back to "
                f"{fallback.model_id}. Error: {str(e)}"
            )
-            # Apply decorators to fallback's raw stream method directly —
-            # calling fallback.gen_stream() would re-enter the orchestrator
-            # and recurse via fallback.fallback_llm. Emit the stream-start
-            # event manually so dashboards still see the fallback's
-            # provider/model when the response actually comes from it.
-            fallback._emit_stream_start_log(
-                fallback.model_id,
-                kwargs.get("messages"),
-                kwargs.get("tools"),
-                bool(
-                    kwargs.get("_usage_attachments")
-                    or kwargs.get("attachments")
-                ),
+            fallback_method = getattr(
+                fallback, method_name.replace("_raw_", "")
            )
-            fallback_method = getattr(fallback, method_name)
-            for decorator in decorators:
-                fallback_method = decorator(fallback_method)
            fallback_kwargs = {**kwargs, "model": fallback.model_id}
-            try:
-                yield from fallback_method(fallback, *args, **fallback_kwargs)
-            except Exception as e2:
-                if self._is_non_retriable_client_error(e2):
-                    logger.error(
-                        f"Fallback LLM failed mid-stream with non-retriable "
-                        f"client error; giving up: {str(e2)}"
-                    )
-                else:
-                    logger.error(
-                        f"Fallback LLM also failed mid-stream; giving up: {str(e2)}"
-                    )
-                raise
+            yield from fallback_method(*args, **fallback_kwargs)

    def gen(self, model, messages, stream=False, tools=None, *args, **kwargs):
        decorators = [gen_token_usage, gen_cache]
@@ -294,58 +205,7 @@ class BaseLLM(ABC):
            **kwargs,
        )

-    def _emit_stream_start_log(self, model, messages, tools, has_attachments):
-        # Stamped with ``self.provider_name`` so dashboards can group calls
-        # by vendor; the fallback path emits its own copy on the fallback
-        # instance so the actual responding provider is recorded.
-        logging.info(
-            "llm_stream_start",
-            extra={
-                "model": model,
-                "provider": self.provider_name,
-                "message_count": len(messages) if messages is not None else 0,
-                "has_attachments": bool(has_attachments),
-                "has_tools": bool(tools),
-            },
-        )
-
-    def _emit_stream_finished_log(
-        self,
-        model,
-        *,
-        prompt_tokens,
-        completion_tokens,
-        latency_ms,
-        cached_tokens=None,
-        error=None,
-    ):
-        # Paired with ``llm_stream_start`` so cost dashboards can sum tokens
-        # by user/agent/provider. Token counts are client-side estimates
-        # from ``stream_token_usage``; vendor-reported counts (incl.
-        # ``cached_tokens`` for prompt caching) require per-provider
-        # extraction in each ``_raw_gen_stream`` and aren't wired yet.
-        extra = {
-            "model": model,
-            "provider": self.provider_name,
-            "prompt_tokens": int(prompt_tokens),
-            "completion_tokens": int(completion_tokens),
-            "latency_ms": int(latency_ms),
-            "status": "error" if error is not None else "ok",
-        }
-        if cached_tokens is not None:
-            extra["cached_tokens"] = int(cached_tokens)
-        if error is not None:
-            extra["error_class"] = type(error).__name__
-        logging.info("llm_stream_finished", extra=extra)
-
    def gen_stream(self, model, messages, stream=True, tools=None, *args, **kwargs):
-        # Attachments arrive as ``_usage_attachments`` from ``Agent._llm_gen``;
-        # the ``stream_token_usage`` decorator pops that key, but the log
-        # fires before the decorator runs so it's still in ``kwargs`` here.
-        has_attachments = bool(
-            kwargs.get("_usage_attachments") or kwargs.get("attachments")
-        )
-        self._emit_stream_start_log(model, messages, tools, has_attachments)
        decorators = [stream_cache, stream_token_usage]
        return self._execute_with_fallback(
            "_raw_gen_stream",
--- a/application/llm/docsgpt_provider.py
+++ b/application/llm/docsgpt_provider.py
@@ -6,8 +6,6 @@ DOCSGPT_BASE_URL = "https://oai.arc53.com"
 DOCSGPT_MODEL = "docsgpt"

 class DocsGPTAPILLM(OpenAILLM):
-    provider_name = "docsgpt"
-
    def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
        super().__init__(
            api_key=DOCSGPT_API_KEY,
--- a/application/llm/google_ai.py
+++ b/application/llm/google_ai.py
@@ -6,13 +6,10 @@ from google.genai import types
 from application.core.settings import settings

 from application.llm.base import BaseLLM
-from application.llm.handlers.google import _decode_thought_signature
 from application.storage.storage_creator import StorageCreator


 class GoogleLLM(BaseLLM):
-    provider_name = "google"
-
    def __init__(
        self, api_key=None, user_api_key=None, decoded_token=None, *args, **kwargs
    ):
@@ -82,39 +79,24 @@ class GoogleLLM(BaseLLM):
        for attachment in attachments:
            mime_type = attachment.get("mime_type")

-            if mime_type not in self.get_supported_attachment_types():
-                continue
-            try:
-                # Images go inline as bytes per Google's guidance for
-                # requests under 20MB; the Files API can return before
-                # the upload reaches ACTIVE state and yield an empty URI.
-                if mime_type.startswith("image/"):
-                    file_bytes = self._read_attachment_bytes(attachment)
-                    files.append(
-                        {"file_bytes": file_bytes, "mime_type": mime_type}
-                    )
-                else:
+            if mime_type in self.get_supported_attachment_types():
+                try:
                    file_uri = self._upload_file_to_google(attachment)
-                    if not file_uri:
-                        raise ValueError(
-                            f"Google Files API returned empty URI for "
-                            f"{attachment.get('path', 'unknown')}"
-                        )
                    logging.info(
                        f"GoogleLLM: Successfully uploaded file, got URI: {file_uri}"
                    )
                    files.append({"file_uri": file_uri, "mime_type": mime_type})
-            except Exception as e:
-                logging.error(
-                    f"GoogleLLM: Error processing attachment: {e}", exc_info=True
-                )
-                if "content" in attachment:
-                    prepared_messages[user_message_index]["content"].append(
-                        {
-                            "type": "text",
-                            "text": f"[File could not be processed: {attachment.get('path', 'unknown')}]",
-                        }
+                except Exception as e:
+                    logging.error(
+                        f"GoogleLLM: Error uploading file: {e}", exc_info=True
                    )
+                    if "content" in attachment:
+                        prepared_messages[user_message_index]["content"].append(
+                            {
+                                "type": "text",
+                                "text": f"[File could not be processed: {attachment.get('path', 'unknown')}]",
+                            }
+                        )
        if files:
            logging.info(f"GoogleLLM: Adding {len(files)} files to message")
            prepared_messages[user_message_index]["content"].append({"files": files})
@@ -130,9 +112,7 @@ class GoogleLLM(BaseLLM):
        Returns:
            str: Google AI file URI for the uploaded file.
        """
-        # Truthy check, not membership: a poisoned cache row of "" or
-        # None must be treated as a miss and trigger a fresh upload.
-        if attachment.get("google_file_uri"):
+        if "google_file_uri" in attachment:
            return attachment["google_file_uri"]
        file_path = attachment.get("path")
        if not file_path:
@@ -146,10 +126,6 @@ class GoogleLLM(BaseLLM):
                    file=local_path
                ).uri,
            )
-            if not file_uri:
-                raise ValueError(
-                    f"Google Files API upload returned empty URI for {file_path}"
-                )

            # Cache the Google file URI on the attachment row so we don't
            # re-upload on the next LLM call. Accept either a PG UUID
@@ -183,26 +159,6 @@ class GoogleLLM(BaseLLM):
            logging.error(f"Error uploading file to Google AI: {e}", exc_info=True)
            raise

-    def _read_attachment_bytes(self, attachment):
-        """
-        Read attachment bytes from storage for inline transmission.
-
-        Args:
-            attachment (dict): Attachment dictionary with path and metadata.
-
-        Returns:
-            bytes: Raw file bytes.
-        """
-        file_path = attachment.get("path")
-        if not file_path:
-            raise ValueError("No file path provided in attachment")
-        if not self.storage.file_exists(file_path):
-            raise FileNotFoundError(f"File not found: {file_path}")
-        return self.storage.process_file(
-            file_path,
-            lambda local_path, **kwargs: open(local_path, "rb").read(),
-        )
-
    def _clean_messages_google(self, messages):
        """
        Convert OpenAI format messages to Google AI format and collect system prompts.
@@ -259,7 +215,7 @@ class GoogleLLM(BaseLLM):
                        except (_json.JSONDecodeError, TypeError):
                            args = {}
                    cleaned_args = self._remove_null_values(args)
-                    thought_sig = _decode_thought_signature(tc.get("thought_signature"))
+                    thought_sig = tc.get("thought_signature")
                    if thought_sig:
                        parts.append(
                            types.Part(
@@ -323,9 +279,7 @@ class GoogleLLM(BaseLLM):
                                            name=item["function_call"]["name"],
                                            args=cleaned_args,
                                        ),
-                                        thoughtSignature=_decode_thought_signature(
-                                            item["thought_signature"]
-                                        ),
+                                        thoughtSignature=item["thought_signature"],
                                    )
                                )
                            else:
@@ -344,24 +298,12 @@ class GoogleLLM(BaseLLM):
                            )
                        elif "files" in item:
                            for file_data in item["files"]:
-                                if "file_bytes" in file_data:
-                                    parts.append(
-                                        types.Part.from_bytes(
-                                            data=file_data["file_bytes"],
-                                            mime_type=file_data["mime_type"],
-                                        )
-                                    )
-                                elif file_data.get("file_uri"):
-                                    parts.append(
-                                        types.Part.from_uri(
-                                            file_uri=file_data["file_uri"],
-                                            mime_type=file_data["mime_type"],
-                                        )
-                                    )
-                                else:
-                                    logging.warning(
-                                        "GoogleLLM: dropping file part with empty URI and no bytes"
+                                parts.append(
+                                    types.Part.from_uri(
+                                        file_uri=file_data["file_uri"],
+                                        mime_type=file_data["mime_type"],
                                    )
+                                )
                        else:
                            raise ValueError(
                                f"Unexpected content dictionary format:{item}"
@@ -599,6 +541,22 @@ class GoogleLLM(BaseLLM):
            config.response_mime_type = "application/json"
        # Check if we have both tools and file attachments

+        has_attachments = False
+        for message in messages:
+            for part in message.parts:
+                if hasattr(part, "file_data") and part.file_data is not None:
+                    has_attachments = True
+                    break
+            if has_attachments:
+                break
+        messages_summary = self._summarize_messages_for_log(messages)
+        logging.info(
+            "GoogleLLM: Starting stream generation. Model: %s, Messages: %s, Has attachments: %s",
+            model,
+            messages_summary,
+            has_attachments,
+        )
+
        response = client.models.generate_content_stream(
            model=model,
            contents=messages,
--- a/application/llm/groq.py
+++ b/application/llm/groq.py
@@ -5,8 +5,6 @@ GROQ_BASE_URL = "https://api.groq.com/openai/v1"


 class GroqLLM(OpenAILLM):
-    provider_name = "groq"
-
    def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
        super().__init__(
            api_key=api_key or settings.GROQ_API_KEY or settings.API_KEY,
--- a/application/llm/handlers/base.py
+++ b/application/llm/handlers/base.py
@@ -10,18 +10,6 @@ from application.logging import build_stack_data
 logger = logging.getLogger(__name__)


-# Cap the agent tool-call loop. Without this an LLM that keeps
-# requesting more tool calls (preview models, sparse tool results,
-# under-specified prompts) can chain searches indefinitely and the
-# stream never finalises. 25 mirrors Dify's default.
-MAX_TOOL_ITERATIONS = 25
-_FINALIZE_INSTRUCTION = (
-    f"You have made {MAX_TOOL_ITERATIONS} tool calls. Provide a final "
-    "response to the user based on what you have, without making any "
-    "additional tool calls."
-)
-
-
@dataclass
 class ToolCall:
    """Represents a tool/function call from the LLM."""
@@ -292,26 +280,7 @@ class LLMHandler(ABC):
                        # Keep serialized function calls/responses so the compressor sees actions
                        parts_text.append(str(item))
                    elif "files" in item:
-                        # Image attachments arrive with raw bytes / base64
-                        # inline (see GoogleLLM.prepare_messages_with_attachments).
-                        # ``str(item)`` would dump the whole byte/base64
-                        # blob into the compression prompt and bust the
-                        # compression LLM's input limit.
-                        files = item.get("files") or []
-                        descriptors = []
-                        if isinstance(files, list):
-                            for f in files:
-                                if isinstance(f, dict):
-                                    descriptors.append(
-                                        f.get("mime_type") or "file"
-                                    )
-                                elif isinstance(f, str):
-                                    descriptors.append(f)
-                        if not descriptors:
-                            descriptors = ["file"]
-                        parts_text.append(
-                            f"[attachment: {', '.join(descriptors)}]"
-                        )
+                        parts_text.append(str(item))
            return "\n".join(parts_text)
        return ""

@@ -636,10 +605,6 @@ class LLMHandler(ABC):
                agent_id=getattr(agent, "agent_id", None),
                model_user_id=compression_user_id,
            )
-            # Side-channel LLM tag — see ``orchestrator.py`` for rationale.
-            compression_llm._token_usage_source = "compression"
-            compression_llm._request_id = getattr(agent, "_request_id", None) \
-                or getattr(getattr(agent, "llm", None), "_request_id", None)

            # Create service without DB persistence capability
            compression_service = CompressionService(
@@ -950,9 +915,7 @@ class LLMHandler(ABC):
        parsed = self.parse_response(response)
        self.llm_calls.append(build_stack_data(agent.llm))

-        iteration = 0
        while parsed.requires_tool_call:
-            iteration += 1
            tool_handler_gen = self.handle_tool_calls(
                agent, parsed.tool_calls, tools_dict, messages
            )
@@ -976,25 +939,6 @@ class LLMHandler(ABC):
                }
                return ""

-            # Cap reached: force one final tool-less call so the stream
-            # always ends with content rather than cutting off.
-            if iteration >= MAX_TOOL_ITERATIONS:
-                logger.warning(
-                    "agent tool loop hit cap (%d); forcing finalize",
-                    MAX_TOOL_ITERATIONS,
-                )
-                messages.append(
-                    {"role": "system", "content": _FINALIZE_INSTRUCTION},
-                )
-                response = agent.llm.gen(
-                    model=getattr(agent.llm, "model_id", None) or agent.model_id,
-                    messages=messages,
-                    tools=None,
-                )
-                parsed = self.parse_response(response)
-                self.llm_calls.append(build_stack_data(agent.llm))
-                break
-
            # ``agent.model_id`` is the registry id (a UUID for BYOM
            # records). Use the LLM's own model_id, which LLMCreator
            # already resolved to the upstream model name. Built-ins:
@@ -1010,12 +954,7 @@ class LLMHandler(ABC):
        return parsed.content

    def handle_streaming(
-        self,
-        agent,
-        response: Any,
-        tools_dict: Dict,
-        messages: List[Dict],
-        _iteration: int = 0,
+        self, agent, response: Any, tools_dict: Dict, messages: List[Dict]
    ) -> Generator:
        """
        Handle streaming response flow.
@@ -1084,9 +1023,6 @@ class LLMHandler(ABC):
                    }
                    return

-                next_iteration = _iteration + 1
-                cap_reached = next_iteration >= MAX_TOOL_ITERATIONS
-
                # Check if context limit was reached during tool execution
                if hasattr(agent, 'context_limit_reached') and agent.context_limit_reached:
                    # Add system message warning about context limit
@@ -1099,32 +1035,16 @@ class LLMHandler(ABC):
                        )
                    })
                    logger.info("Context limit reached - instructing agent to wrap up")
-                elif cap_reached:
-                    logger.warning(
-                        "agent tool loop hit cap (%d); forcing finalize",
-                        MAX_TOOL_ITERATIONS,
-                    )
-                    messages.append(
-                        {"role": "system", "content": _FINALIZE_INSTRUCTION},
-                    )

                # See note above on agent.model_id vs llm.model_id.
                response = agent.llm.gen_stream(
                    model=getattr(agent.llm, "model_id", None) or agent.model_id,
                    messages=messages,
-                    tools=(
-                        None
-                        if cap_reached
-                        or getattr(agent, "context_limit_reached", False)
-                        else agent.tools
-                    ),
+                    tools=agent.tools if not agent.context_limit_reached else None,
                )
                self.llm_calls.append(build_stack_data(agent.llm))

-                yield from self.handle_streaming(
-                    agent, response, tools_dict, messages,
-                    _iteration=next_iteration,
-                )
+                yield from self.handle_streaming(agent, response, tools_dict, messages)
                return
            if parsed.content:
                buffer += parsed.content
--- a/application/llm/handlers/google.py
+++ b/application/llm/handlers/google.py
@@ -1,35 +1,9 @@
-import base64
-import binascii
 import uuid
-from typing import Any, Dict, Generator, Optional, Union
+from typing import Any, Dict, Generator

 from application.llm.handlers.base import LLMHandler, LLMResponse, ToolCall


-def _encode_thought_signature(sig: Optional[Union[bytes, str]]) -> Optional[str]:
-    # Gemini's Python SDK returns thought_signature as raw bytes, but the
-    # field is typed Optional[str] downstream and gets json.dumps'd into
-    # SSE events. Encode once at ingress so callers only ever see a str.
-    if isinstance(sig, bytes):
-        return base64.b64encode(sig).decode("ascii")
-    return sig
-
-
-def _decode_thought_signature(
-    sig: Optional[Union[bytes, str]],
-) -> Optional[Union[bytes, str]]:
-    # Reverse of _encode_thought_signature — Gemini's SDK expects bytes
-    # back when we replay a tool call. ``validate=True`` keeps ASCII
-    # strings that happen to be loosely decodable from being silently
-    # turned into bytes; non-base64 inputs pass through unchanged.
-    if isinstance(sig, str):
-        try:
-            return base64.b64decode(sig.encode("ascii"), validate=True)
-        except (binascii.Error, ValueError):
-            return sig
-    return sig
-
-
 class GoogleLLMHandler(LLMHandler):
    """Handler for Google's GenAI API."""

@@ -49,7 +23,7 @@ class GoogleLLMHandler(LLMHandler):
            for idx, part in enumerate(parts):
                if hasattr(part, "function_call") and part.function_call is not None:
                    has_sig = hasattr(part, "thought_signature") and part.thought_signature is not None
-                    thought_sig = _encode_thought_signature(part.thought_signature) if has_sig else None
+                    thought_sig = part.thought_signature if has_sig else None
                    tool_calls.append(
                        ToolCall(
                            id=str(uuid.uuid4()),
@@ -76,7 +50,7 @@ class GoogleLLMHandler(LLMHandler):
            tool_calls = []
            if hasattr(response, "function_call") and response.function_call is not None:
                has_sig = hasattr(response, "thought_signature") and response.thought_signature is not None
-                thought_sig = _encode_thought_signature(response.thought_signature) if has_sig else None
+                thought_sig = response.thought_signature if has_sig else None
                tool_calls.append(
                    ToolCall(
                        id=str(uuid.uuid4()),
@@ -96,15 +70,8 @@ class GoogleLLMHandler(LLMHandler):
        """Create a tool result message in the standard internal format."""
        import json as _json

-        from application.storage.db.serialization import PGNativeJSONEncoder
-
-        # PostgresTool results commonly include PG-native types
-        # (datetime / UUID / Decimal / bytea) when SELECT touches
-        # timestamptz / numeric / uuid / bytea columns. The shared
-        # encoder handles all five — bytes get base64 (lossless) instead
-        # of the ``str(b'...')`` repr that ``default=str`` would emit.
        content = (
-            _json.dumps(result, cls=PGNativeJSONEncoder)
+            _json.dumps(result)
            if not isinstance(result, str)
            else result
        )
--- a/application/llm/handlers/openai.py
+++ b/application/llm/handlers/openai.py
@@ -40,15 +40,8 @@ class OpenAILLMHandler(LLMHandler):
        """Create a tool result message in the standard internal format."""
        import json as _json

-        from application.storage.db.serialization import PGNativeJSONEncoder
-
-        # PostgresTool results commonly include PG-native types
-        # (datetime / UUID / Decimal / bytea) when SELECT touches
-        # timestamptz / numeric / uuid / bytea columns. The shared
-        # encoder handles all five — bytes get base64 (lossless) instead
-        # of the ``str(b'...')`` repr that ``default=str`` would emit.
        content = (
-            _json.dumps(result, cls=PGNativeJSONEncoder)
+            _json.dumps(result)
            if not isinstance(result, str)
            else result
        )
--- a/application/llm/llama_cpp.py
+++ b/application/llm/llama_cpp.py
@@ -26,8 +26,6 @@ class LlamaSingleton:


 class LlamaCpp(BaseLLM):
-    provider_name = "llama_cpp"
-
    def __init__(
        self,
        api_key=None,
--- a/application/llm/novita.py
+++ b/application/llm/novita.py
@@ -5,8 +5,6 @@ NOVITA_BASE_URL = "https://api.novita.ai/openai"


 class NovitaLLM(OpenAILLM):
-    provider_name = "novita"
-
    def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
        super().__init__(
            api_key=api_key or settings.NOVITA_API_KEY or settings.API_KEY,
--- a/application/llm/open_router.py
+++ b/application/llm/open_router.py
@@ -5,8 +5,6 @@ OPEN_ROUTER_BASE_URL = "https://openrouter.ai/api/v1"


 class OpenRouterLLM(OpenAILLM):
-    provider_name = "openrouter"
-
    def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
        super().__init__(
            api_key=api_key or settings.OPEN_ROUTER_API_KEY or settings.API_KEY,
--- a/application/llm/openai.py
+++ b/application/llm/openai.py
@@ -61,7 +61,6 @@ def _truncate_base64_for_logging(messages):


 class OpenAILLM(BaseLLM):
-    provider_name = "openai"

    def __init__(
        self,
--- a/application/llm/premai.py
+++ b/application/llm/premai.py
@@ -3,7 +3,6 @@ from application.core.settings import settings


 class PremAILLM(BaseLLM):
-    provider_name = "premai"

    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
        from premai import Prem
--- a/application/llm/sagemaker.py
+++ b/application/llm/sagemaker.py
@@ -59,7 +59,6 @@ class LineIterator:


 class SagemakerAPILLM(BaseLLM):
-    provider_name = "sagemaker"

    def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
        import boto3
--- a/application/logging.py
+++ b/application/logging.py
@@ -1,13 +1,11 @@
 import datetime
 import functools
 import inspect
-import time

 import logging
 import uuid
 from typing import Any, Callable, Dict, Generator, List

-from application.core import log_context
 from application.storage.db.repositories.stack_logs import StackLogsRepository
 from application.storage.db.session import db_session

@@ -24,15 +22,6 @@ class LogContext:
        self.api_key = api_key
        self.query = query
        self.stacks = []
-        # Per-activity response aggregates populated by ``_consume_and_log``
-        # while it forwards stream items, then flushed onto the
-        # ``activity_finished`` event so every Flask request gets the
-        # same summary that ``run_agent_logic`` used to log only for the
-        # Celery webhook path.
-        self.answer_length = 0
-        self.thought_length = 0
-        self.source_count = 0
-        self.tool_call_count = 0


 def build_stack_data(
@@ -89,125 +78,25 @@ def log_activity() -> Callable:
            user = data.get("user", "local")
            api_key = data.get("user_api_key", "")
            query = kwargs.get("query", getattr(args[0], "query", ""))
-            agent_id = getattr(args[0], "agent_id", None) or kwargs.get("agent_id")
-            conversation_id = (
-                kwargs.get("conversation_id")
-                or getattr(args[0], "conversation_id", None)
-            )
-            model = getattr(args[0], "gpt_model", None) or getattr(args[0], "model", None)
-
-            # Capture the surrounding activity_id before overlaying ours,
-            # so nested activities record the parent → child link.
-            parent_activity_id = log_context.snapshot().get("activity_id")

            context = LogContext(endpoint, activity_id, user, api_key, query)
            kwargs["log_context"] = context

-            ctx_token = log_context.bind(
-                activity_id=activity_id,
-                parent_activity_id=parent_activity_id,
-                user_id=user,
-                agent_id=agent_id,
-                conversation_id=conversation_id,
-                endpoint=endpoint,
-                model=model,
-            )
-
-            started_at = time.monotonic()
            logging.info(
-                "activity_started",
-                extra={
-                    "activity_id": activity_id,
-                    "parent_activity_id": parent_activity_id,
-                    "user_id": user,
-                    "agent_id": agent_id,
-                    "conversation_id": conversation_id,
-                    "endpoint": endpoint,
-                    "model": model,
-                },
+                f"Starting activity: {endpoint} - {activity_id} - User: {user}"
            )

-            error: BaseException | None = None
-            try:
-                generator = func(*args, **kwargs)
-                yield from _consume_and_log(generator, context)
-            except Exception as exc:
-                # Only ``Exception`` counts as an activity error; ``GeneratorExit``
-                # (consumer disconnected mid-stream) and ``KeyboardInterrupt``
-                # flow through the finally as ``status="ok"``, matching
-                # ``_consume_and_log``.
-                error = exc
-                raise
-            finally:
-                _emit_activity_finished(
-                    context=context,
-                    parent_activity_id=parent_activity_id,
-                    started_at=started_at,
-                    error=error,
-                )
-                log_context.reset(ctx_token)
+            generator = func(*args, **kwargs)
+            yield from _consume_and_log(generator, context)

        return wrapper

    return decorator


-def _emit_activity_finished(
-    *,
-    context: "LogContext",
-    parent_activity_id: str | None,
-    started_at: float,
-    error: BaseException | None,
-) -> None:
-    """Emit the paired ``activity_finished`` event with duration, outcome,
-    and per-activity response aggregates accumulated in ``_consume_and_log``.
-    """
-    duration_ms = int((time.monotonic() - started_at) * 1000)
-    logging.info(
-        "activity_finished",
-        extra={
-            "activity_id": context.activity_id,
-            "parent_activity_id": parent_activity_id,
-            "user_id": context.user,
-            "endpoint": context.endpoint,
-            "duration_ms": duration_ms,
-            "status": "error" if error is not None else "ok",
-            "error_class": type(error).__name__ if error is not None else None,
-            "answer_length": context.answer_length,
-            "thought_length": context.thought_length,
-            "source_count": context.source_count,
-            "tool_call_count": context.tool_call_count,
-        },
-    )
-
-
-def _accumulate_response_summary(item: Any, context: "LogContext") -> None:
-    """Mirror the per-line aggregation that ``run_agent_logic`` did for the
-    Celery webhook path, but at the generator-consumption layer so every
-    ``Agent.gen`` activity (Flask streaming, sub-agents, workflow agents)
-    gets the same summary.
-    """
-    if not isinstance(item, dict):
-        return
-    if "answer" in item:
-        context.answer_length += len(str(item["answer"]))
-        return
-    if "thought" in item:
-        context.thought_length += len(str(item["thought"]))
-        return
-    sources = item.get("sources") if "sources" in item else None
-    if isinstance(sources, list):
-        context.source_count += len(sources)
-        return
-    tool_calls = item.get("tool_calls") if "tool_calls" in item else None
-    if isinstance(tool_calls, list):
-        context.tool_call_count += len(tool_calls)
-
-
 def _consume_and_log(generator: Generator, context: "LogContext"):
    try:
        for item in generator:
-            _accumulate_response_summary(item, context)
            yield item
    except Exception as e:
        logging.exception(f"Error in {context.endpoint} - {context.activity_id}: {e}")
--- a/application/parser/embedding_pipeline.py
+++ b/application/parser/embedding_pipeline.py
@@ -1,27 +1,12 @@
 import os
 import logging
-from typing import Any, List, Optional
+from typing import List, Any
 from retry import retry
 from tqdm import tqdm
 from application.core.settings import settings
-from application.storage.db.repositories.ingest_chunk_progress import (
-    IngestChunkProgressRepository,
-)
-from application.storage.db.session import db_session
 from application.vectorstore.vector_creator import VectorCreator


-class EmbeddingPipelineError(Exception):
-    """Raised when the per-chunk embed loop produces a partial index.
-
-    Escapes into Celery's ``autoretry_for`` so a transient cause (rate
-    limit, network blip) gets another shot. The chunk-progress
-    checkpoint makes retries cheap — only the failed-and-after chunks
-    re-run. After ``MAX_TASK_ATTEMPTS`` the poison-loop guard in
-    ``with_idempotency`` finalises the row as ``failed``.
-    """
-
-
 def sanitize_content(content: str) -> str:
    """
    Remove NUL characters that can cause vector store ingestion to fail.
@@ -37,11 +22,7 @@ def sanitize_content(content: str) -> str:
    return content.replace('\x00', '')


-# Per-chunk inline retry. Aggressive defaults (tries=10, delay=60) blocked
-# the loop for up to 9 min per chunk and wedged the heartbeat: lower the
-# tail so a transient failure fails-fast and the chunk-progress checkpoint
-# resumes cleanly on next dispatch.
-@retry(tries=3, delay=5, backoff=2)
+@retry(tries=10, delay=60)
 def add_text_to_store_with_retry(store: Any, doc: Any, source_id: str) -> None:
    """Add a document's text and metadata to the vector store with retry logic.
    
@@ -64,119 +45,21 @@ def add_text_to_store_with_retry(store: Any, doc: Any, source_id: str) -> None:
        raise


-def _init_progress_and_resume_index(
-    source_id: str, total_chunks: int, attempt_id: Optional[str],
-) -> int:
-    """Upsert the progress row and return the next chunk index to embed.
-
-    The repository's upsert preserves ``last_index`` only when the
-    incoming ``attempt_id`` matches the stored one (a Celery autoretry
-    of the same task). On a fresh attempt — including any caller that
-    doesn't pass an ``attempt_id``, e.g. legacy code or tests — the
-    row's checkpoint is reset so the loop starts from chunk 0. This
-    is what prevents a completed checkpoint from any prior run
-    silently no-op'ing the next sync/reingest.
-
-    Best-effort: a DB outage falls back to ``0`` (fresh run from
-    chunk 0). The embed loop's own re-raise still ensures partial
-    runs don't get cached as complete.
-    """
-    try:
-        with db_session() as conn:
-            progress = IngestChunkProgressRepository(conn).init_progress(
-                source_id, total_chunks, attempt_id,
-            )
-    except Exception as e:
-        logging.warning(
-            f"Could not init ingest progress for {source_id}: {e}",
-            exc_info=True,
-        )
-        return 0
-    if not progress:
-        return 0
-    last_index = progress.get("last_index", -1)
-    if last_index is None or last_index < 0:
-        return 0
-    return int(last_index) + 1
-
-
-def _record_progress(source_id: str, last_index: int, embedded_chunks: int) -> None:
-    """Best-effort checkpoint after each chunk; logged but never raised."""
-    try:
-        with db_session() as conn:
-            IngestChunkProgressRepository(conn).record_chunk(
-                source_id, last_index=last_index, embedded_chunks=embedded_chunks
-            )
-    except Exception as e:
-        logging.warning(
-            f"Could not record ingest progress for {source_id}: {e}", exc_info=True
-        )
-
-
-def assert_index_complete(source_id: str) -> None:
-    """Raise ``EmbeddingPipelineError`` if ``ingest_chunk_progress``
-    shows a partial embed for ``source_id``.
-
-    Defense-in-depth tripwire that workers run after
-    ``embed_and_store_documents`` to catch any future swallow path
-    that bypasses the function's own re-raise — the chunk-progress
-    row is the authoritative record of how many chunks landed.
-    No-op when no row exists (zero-doc validation raised before init,
-    or progress repo was unreachable).
-    """
-    try:
-        with db_session() as conn:
-            progress = IngestChunkProgressRepository(conn).get_progress(source_id)
-    except Exception as e:
-        logging.warning(
-            f"assert_index_complete: progress lookup failed for "
-            f"{source_id}: {e}",
-            exc_info=True,
-        )
-        return
-    if not progress:
-        return
-    embedded = int(progress.get("embedded_chunks") or 0)
-    total = int(progress.get("total_chunks") or 0)
-    if embedded < total:
-        raise EmbeddingPipelineError(
-            f"partial index for source {source_id}: "
-            f"{embedded}/{total} chunks embedded"
-        )
-
-
-def embed_and_store_documents(
-    docs: List[Any],
-    folder_name: str,
-    source_id: str,
-    task_status: Any,
-    *,
-    attempt_id: Optional[str] = None,
-) -> None:
+def embed_and_store_documents(docs: List[Any], folder_name: str, source_id: str, task_status: Any) -> None:
    """Embeds documents and stores them in a vector store.

-    Resumable across Celery autoretries of the *same* task: when
-    ``attempt_id`` matches the stored checkpoint's ``attempt_id``,
-    the loop resumes from ``last_index + 1``. A different
-    ``attempt_id`` (a fresh sync / reingest invocation) resets the
-    checkpoint so the index is rebuilt from chunk 0 — this is what
-    keeps a completed checkpoint from poisoning the next sync.
-
    Args:
        docs: List of documents to be embedded and stored.
        folder_name: Directory to save the vector store.
        source_id: Unique identifier for the source.
        task_status: Task state manager for progress updates.
-        attempt_id: Stable id of the current task invocation,
-            typically ``self.request.id`` from the Celery task body.
-            ``None`` is treated as a fresh attempt every time.

    Returns:
        None
-
+        
    Raises:
        OSError: If unable to create folder or save vector store.
-        EmbeddingPipelineError: If a chunk fails after retries.
+        Exception: If vector store creation or document embedding fails.
    """
    # Ensure the folder exists
    if not os.path.exists(folder_name):
@@ -186,77 +69,33 @@ def embed_and_store_documents(
    if not docs:
        raise ValueError("No documents to embed - check file format and extension")

-    total_docs = len(docs)
-    # Atomic upsert that preserves checkpoint state on attempt-id match
-    # (autoretry of same task) and resets it on mismatch (fresh sync /
-    # reingest). Returns the new resume index — 0 means "start fresh".
-    resume_index = _init_progress_and_resume_index(
-        source_id, total_docs, attempt_id,
-    )
-    is_resume = resume_index > 0
-
    # Initialize vector store
    if settings.VECTOR_STORE == "faiss":
-        if is_resume:
-            # Load the existing FAISS index from storage so chunks
-            # already embedded by the prior attempt survive the
-            # save_local rewrite at the end of this run.
-            store = VectorCreator.create_vectorstore(
-                settings.VECTOR_STORE,
-                source_id=source_id,
-                embeddings_key=os.getenv("EMBEDDINGS_KEY"),
-            )
-            loop_start = resume_index
-        else:
-            # FAISS requires at least one doc to construct the store;
-            # seed with ``docs[0]`` and let the loop pick up at index 1.
-            store = VectorCreator.create_vectorstore(
-                settings.VECTOR_STORE,
-                docs_init=[docs[0]],
-                source_id=source_id,
-                embeddings_key=os.getenv("EMBEDDINGS_KEY"),
-            )
-            # Record the seeded chunk so single-doc ingests don't fail
-            # ``assert_index_complete`` — the loop never runs for
-            # ``total_docs == 1`` and would otherwise leave
-            # ``embedded_chunks`` at 0 / ``last_index`` at -1. The loop
-            # body's per-iteration ``_record_progress`` overshoots
-            # correctly for multi-chunk runs (counts seed + iterations),
-            # so writing this checkpoint up-front is a no-op for those.
-            _record_progress(source_id, last_index=0, embedded_chunks=1)
-            loop_start = 1
+        docs_init = [docs.pop(0)]
+        store = VectorCreator.create_vectorstore(
+            settings.VECTOR_STORE,
+            docs_init=docs_init,
+            source_id=source_id,
+            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
+        )
    else:
        store = VectorCreator.create_vectorstore(
            settings.VECTOR_STORE,
            source_id=source_id,
            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
        )
-        # Only wipe the index on a fresh run — a resume must keep the
-        # chunks that earlier attempts already embedded.
-        if not is_resume:
-            store.delete_index()
-        loop_start = resume_index
+        store.delete_index()

-    if is_resume and loop_start >= total_docs:
-        # Nothing left to do; the loop runs zero iterations and
-        # downstream finalize logic still executes. This is only
-        # reachable on a same-attempt retry of a task whose previous
-        # attempt finished — typically a Celery acks_late redelivery
-        # after the task already returned. The ``assert_index_complete``
-        # tripwire still validates ``embedded == total`` afterwards.
-        loop_start = total_docs
+    total_docs = len(docs)

    # Process and embed documents
-    chunk_error: Exception | None = None
-    failed_idx: int | None = None
-    for idx in tqdm(
-        range(loop_start, total_docs),
+    for idx, doc in tqdm(
+        enumerate(docs),
        desc="Embedding 🦖",
        unit="docs",
-        total=total_docs - loop_start,
+        total=total_docs,
        bar_format="{l_bar}{bar}| Time Left: {remaining}",
    ):
-        doc = docs[idx]
        try:
            # Update task status for progress tracking
            progress = int(((idx + 1) / total_docs) * 100)
@@ -264,10 +103,7 @@ def embed_and_store_documents(

            # Add document to vector store
            add_text_to_store_with_retry(store, doc, source_id)
-            _record_progress(source_id, last_index=idx, embedded_chunks=idx + 1)
        except Exception as e:
-            chunk_error = e
-            failed_idx = idx
            logging.error(f"Error embedding document {idx}: {e}", exc_info=True)
            logging.info(f"Saving progress at document {idx} out of {total_docs}")
            try:
@@ -288,16 +124,3 @@ def embed_and_store_documents(
            raise OSError(f"Unable to save vector store to {folder_name}: {e}") from e
    else:
        logging.info("Vector store saved successfully.")
-
-    # Re-raise after the partial save: the chunks that *did* embed are
-    # flushed to disk and recorded in ``ingest_chunk_progress``, so a
-    # Celery autoretry resumes via ``_read_resume_index`` and only
-    # re-runs the failed-and-after chunks. Without the raise, the
-    # task body returns success and ``with_idempotency`` finalises
-    # ``task_dedup`` as ``completed`` for a partial index — poisoning
-    # the cache for 24h.
-    if chunk_error is not None:
-        raise EmbeddingPipelineError(
-            f"embed failure at chunk {failed_idx}/{total_docs} "
-            f"for source {source_id}"
-        ) from chunk_error
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -60,9 +60,6 @@ class ClassicRAG(BaseRetriever):
            agent_id=self.agent_id,
            model_user_id=self.model_user_id,
        )
-        # Query-rephrase LLM is a side channel — tag it so its rows
-        # land as ``source='rag_condense'`` in cost-attribution.
-        self.llm._token_usage_source = "rag_condense"

        if "active_docs" in source and source["active_docs"] is not None:
            if isinstance(source["active_docs"], list):
--- a/application/storage/db/base_repository.py
+++ b/application/storage/db/base_repository.py
@@ -11,8 +11,6 @@ import re
 from typing import Any, Mapping
 from uuid import UUID

-from application.storage.db.serialization import coerce_pg_native
-

 _UUID_RE = re.compile(
    r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
@@ -36,17 +34,12 @@ def looks_like_uuid(value: Any) -> bool:


 def row_to_dict(row: Any) -> dict:
-    """Convert a SQLAlchemy ``Row`` to a plain JSON-safe dict.
+    """Convert a SQLAlchemy ``Row`` to a plain dict with Mongo-compatible ids.

-    Normalises PG-native types at the SELECT boundary: UUID, datetime,
-    date, Decimal, and bytes are coerced to JSON-safe forms via
-    :func:`coerce_pg_native`. Downstream serialisation (SSE events,
-    JSONB writes, API responses) becomes safe by default — repository
-    consumers no longer need to know that PG returns a different type
-    set than Mongo did.
-
-    Also emits ``_id`` alongside ``id`` for the duration of the Mongo→PG
-    cutover so legacy serializers expecting Mongo's shape keep working.
+    During the migration window, API responses and downstream code still
+    expect a string ``_id`` field (matching the Mongo shape). This helper
+    normalizes UUID columns to strings and emits both ``id`` and ``_id`` so
+    existing serializers keep working unchanged.

    Args:
        row: A SQLAlchemy ``Row`` object, or ``None``.
@@ -59,9 +52,10 @@ def row_to_dict(row: Any) -> dict:

    # Row has a ``._mapping`` attribute exposing a MappingProxy view.
    mapping: Mapping[str, Any] = row._mapping  # type: ignore[attr-defined]
-    out = coerce_pg_native(dict(mapping))
+    out = dict(mapping)

    if "id" in out and out["id"] is not None:
+        out["id"] = str(out["id"]) if isinstance(out["id"], UUID) else out["id"]
        out["_id"] = out["id"]

    return out
--- a/application/storage/db/models.py
+++ b/application/storage/db/models.py
@@ -91,16 +91,6 @@ token_usage_table = Table(
    Column("prompt_tokens", Integer, nullable=False, server_default="0"),
    Column("generated_tokens", Integer, nullable=False, server_default="0"),
    Column("timestamp", DateTime(timezone=True), nullable=False, server_default=func.now()),
-    # Added in ``0004_durability_foundation``. Distinguishes
-    # ``agent_stream`` (primary completion) from side-channel inserts
-    # (``title`` / ``compression`` / ``rag_condense`` / ``fallback``)
-    # so cost attribution dashboards can group by call source.
-    Column("source", Text, nullable=False, server_default="agent_stream"),
-    # Added in ``0005_token_usage_request_id``. Stream-scoped UUID stamped
-    # on the agent's primary LLM so multi-call agent runs (which produce
-    # N rows) count as a single request via DISTINCT in the repository
-    # query. NULL on side-channel sources by design.
-    Column("request_id", Text),
 )

 user_logs_table = Table(
@@ -355,11 +345,6 @@ conversation_messages_table = Table(
    Column("feedback", JSONB),
    Column("timestamp", DateTime(timezone=True), nullable=False, server_default=func.now()),
    Column("updated_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
-    # Added in 0004_durability_foundation. ``status`` is the WAL state
-    # machine (pending|streaming|complete|failed); ``request_id`` ties a
-    # row to a specific HTTP request for log correlation.
-    Column("status", Text, nullable=False, server_default="complete"),
-    Column("request_id", Text),
    UniqueConstraint("conversation_id", "position", name="conversation_messages_conv_pos_uidx"),
 )

@@ -392,101 +377,9 @@ pending_tool_state_table = Table(
    Column("client_tools", JSONB),
    Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
    Column("expires_at", DateTime(timezone=True), nullable=False),
-    # Added in ``0004_durability_foundation``. ``status`` is the
-    # ``pending|resuming`` claim flag for the resumed-run path;
-    # ``resumed_at`` stamps when ``mark_resuming`` flipped the row so
-    # the cleanup janitor can revert stale claims after the grace
-    # window.
-    Column("status", Text, nullable=False, server_default="pending"),
-    Column("resumed_at", DateTime(timezone=True)),
    UniqueConstraint("conversation_id", "user_id", name="pending_tool_state_conv_user_uidx"),
 )

-
-# --- Tier 1 durability foundation (migration 0004) --------------------------
-# CHECK constraints (status enums) and partial indexes are intentionally
-# omitted from these declarations — the DB is the authority. Repositories
-# use raw ``text(...)`` SQL against these tables, not the Core objects.
-
-task_dedup_table = Table(
-    "task_dedup",
-    metadata,
-    Column("idempotency_key", Text, primary_key=True),
-    Column("task_name", Text, nullable=False),
-    Column("task_id", Text, nullable=False),
-    Column("result_json", JSONB),
-    # CHECK (status IN ('pending', 'completed', 'failed')) lives in 0004.
-    Column("status", Text, nullable=False),
-    # Bumped each time the per-Celery-task wrapper re-enters; the
-    # poison-loop guard (``MAX_TASK_ATTEMPTS=5``) refuses to run fn once
-    # this exceeds the threshold.
-    Column("attempt_count", Integer, nullable=False, server_default="0"),
-    Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
-    # Added in ``0006_idempotency_lease``. Per-invocation random id
-    # written by the wrapper at lease claim; refreshed every 30 s by a
-    # heartbeat thread. Other workers seeing a fresh lease (NOT NULL
-    # AND ``lease_expires_at > now()``) refuse to run the task body.
-    Column("lease_owner_id", Text),
-    Column("lease_expires_at", DateTime(timezone=True)),
-)
-
-webhook_dedup_table = Table(
-    "webhook_dedup",
-    metadata,
-    Column("idempotency_key", Text, primary_key=True),
-    Column("agent_id", UUID(as_uuid=True), nullable=False),
-    Column("task_id", Text, nullable=False),
-    Column("response_json", JSONB),
-    Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
-)
-
-# Three-phase tool-call journal: ``proposed → executed → confirmed``
-# (terminal: ``failed``; ``compensated`` is grandfathered in the CHECK
-# from migration 0004 but no code writes it). The reconciler sweeps
-# stuck rows via the partial ``tool_call_attempts_pending_ts_idx``.
-tool_call_attempts_table = Table(
-    "tool_call_attempts",
-    metadata,
-    Column("call_id", Text, primary_key=True),
-    # ON DELETE SET NULL preserves the journal even after the parent
-    # message is deleted — useful for cost-attribution / compliance.
-    Column(
-        "message_id",
-        UUID(as_uuid=True),
-        ForeignKey("conversation_messages.id", ondelete="SET NULL"),
-    ),
-    Column("tool_id", UUID(as_uuid=True)),
-    Column("tool_name", Text, nullable=False),
-    Column("action_name", Text, nullable=False),
-    Column("arguments", JSONB, nullable=False),
-    Column("result", JSONB),
-    Column("error", Text),
-    # CHECK (status IN ('proposed', 'executed', 'confirmed',
-    # 'compensated', 'failed')) lives in 0004.
-    Column("status", Text, nullable=False),
-    Column("attempted_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
-    Column("updated_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
-)
-
-# Per-source ingest checkpoint. Heartbeat thread bumps ``last_updated``
-# every 30s while a worker embeds; the reconciler escalates when it
-# stops ticking.
-ingest_chunk_progress_table = Table(
-    "ingest_chunk_progress",
-    metadata,
-    Column("source_id", UUID(as_uuid=True), primary_key=True),
-    Column("total_chunks", Integer, nullable=False),
-    Column("embedded_chunks", Integer, nullable=False, server_default="0"),
-    Column("last_index", Integer, nullable=False, server_default="-1"),
-    Column("last_updated", DateTime(timezone=True), nullable=False, server_default=func.now()),
-    # Added in ``0005_ingest_attempt_id``. Stamped from
-    # ``self.request.id`` (Celery's stable task id) so a retry of the
-    # same task resumes from the checkpoint, but a separate invocation
-    # (manual reingest, scheduled sync) resets to a clean re-index.
-    Column("attempt_id", Text),
-)
-
-
 workflows_table = Table(
    "workflows",
    metadata,
--- a/application/storage/db/repositories/attachments.py
+++ b/application/storage/db/repositories/attachments.py
@@ -17,21 +17,6 @@ _UPDATABLE_SCALARS = {
 _UPDATABLE_JSONB = {"metadata"}


-def _attachment_to_dict(row: Any) -> dict:
-    """row_to_dict + ``upload_path``→``path`` alias.
-
-    Pre-Postgres, the Mongo attachment shape used ``path``. The PG column
-    is ``upload_path``; LLM provider code (google_ai/openai/anthropic and
-    handlers/base) still reads ``attachment.get("path")``. Mirroring the
-    ``id``/``_id`` dual-emit in row_to_dict so consumers don't need to
-    know which storage backend produced the dict.
-    """
-    out = row_to_dict(row)
-    if "upload_path" in out and out.get("path") is None:
-        out["path"] = out["upload_path"]
-    return out
-
-
 class AttachmentsRepository:
    def __init__(self, conn: Connection) -> None:
        self._conn = conn
@@ -81,7 +66,7 @@ class AttachmentsRepository:
                "legacy_mongo_id": legacy_mongo_id,
            },
        )
-        return _attachment_to_dict(result.fetchone())
+        return row_to_dict(result.fetchone())

    def get(self, attachment_id: str, user_id: str) -> Optional[dict]:
        result = self._conn.execute(
@@ -91,7 +76,7 @@ class AttachmentsRepository:
            {"id": attachment_id, "user_id": user_id},
        )
        row = result.fetchone()
-        return _attachment_to_dict(row) if row is not None else None
+        return row_to_dict(row) if row is not None else None

    def get_any(self, attachment_id: str, user_id: str) -> Optional[dict]:
        """Resolve an attachment by either PG UUID or legacy Mongo ObjectId string."""
@@ -170,14 +155,14 @@ class AttachmentsRepository:
            params["user_id"] = user_id
        result = self._conn.execute(text(sql), params)
        row = result.fetchone()
-        return _attachment_to_dict(row) if row is not None else None
+        return row_to_dict(row) if row is not None else None

    def list_for_user(self, user_id: str) -> list[dict]:
        result = self._conn.execute(
            text("SELECT * FROM attachments WHERE user_id = :user_id ORDER BY created_at DESC"),
            {"user_id": user_id},
        )
-        return [_attachment_to_dict(r) for r in result.fetchall()]
+        return [row_to_dict(r) for r in result.fetchall()]

    def update(self, attachment_id: str, user_id: str, fields: dict) -> bool:
        """Partial update. Used by the LLM providers to cache their
--- a/application/storage/db/repositories/connector_sessions.py
+++ b/application/storage/db/repositories/connector_sessions.py
@@ -25,7 +25,6 @@ from typing import Any, Optional
 from sqlalchemy import Connection, text

 from application.storage.db.base_repository import row_to_dict
-from application.storage.db.serialization import PGNativeJSONEncoder


 _UPDATABLE_SCALARS = {
@@ -37,7 +36,7 @@ _UPDATABLE_JSONB = {"session_data", "token_info"}
 def _jsonb(value: Any) -> Any:
    if value is None:
        return None
-    return json.dumps(value, cls=PGNativeJSONEncoder)
+    return json.dumps(value, default=str)


 class ConnectorSessionsRepository:
--- a/application/storage/db/repositories/conversations.py
+++ b/application/storage/db/repositories/conversations.py
@@ -22,7 +22,6 @@ from sqlalchemy.dialects.postgresql import insert as pg_insert

 from application.storage.db.base_repository import looks_like_uuid, row_to_dict
 from application.storage.db.models import conversations_table, conversation_messages_table
-from application.storage.db.serialization import PGNativeJSONEncoder


 def _message_row_to_dict(row) -> dict:
@@ -453,7 +452,7 @@ class ConversationsRepository:
            ),
            {
                "id": conversation_id,
-                "point": json.dumps(point, cls=PGNativeJSONEncoder),
+                "point": json.dumps(point, default=str),
                "max_points": int(max_points),
            },
        )
@@ -633,200 +632,6 @@ class ConversationsRepository:
        result = self._conn.execute(text(sql), params)
        return result.rowcount > 0

-    def reserve_message(
-        self,
-        conversation_id: str,
-        *,
-        prompt: str,
-        placeholder_response: str,
-        request_id: str | None = None,
-        status: str = "pending",
-        attachments: list[str] | None = None,
-        model_id: str | None = None,
-        metadata: dict | None = None,
-    ) -> dict:
-        """Pre-persist a placeholder assistant message before the LLM call."""
-        self._conn.execute(
-            text(
-                "SELECT id FROM conversations "
-                "WHERE id = CAST(:conv_id AS uuid) FOR UPDATE"
-            ),
-            {"conv_id": conversation_id},
-        )
-        next_pos = self._conn.execute(
-            text(
-                "SELECT COALESCE(MAX(position), -1) + 1 AS next_pos "
-                "FROM conversation_messages "
-                "WHERE conversation_id = CAST(:conv_id AS uuid)"
-            ),
-            {"conv_id": conversation_id},
-        ).scalar()
-
-        values = {
-            "conversation_id": conversation_id,
-            "position": next_pos,
-            "prompt": prompt,
-            "response": placeholder_response,
-            "status": status,
-            "request_id": request_id,
-            "model_id": model_id,
-            "message_metadata": metadata or {},
-        }
-        if attachments:
-            resolved = self._resolve_attachment_refs(
-                [str(a) for a in attachments],
-            )
-            if resolved:
-                values["attachments"] = resolved
-
-        stmt = (
-            pg_insert(conversation_messages_table)
-            .values(**values)
-            .returning(conversation_messages_table)
-        )
-        result = self._conn.execute(stmt)
-        self._conn.execute(
-            text(
-                "UPDATE conversations SET updated_at = now() "
-                "WHERE id = CAST(:id AS uuid)"
-            ),
-            {"id": conversation_id},
-        )
-        return _message_row_to_dict(result.fetchone())
-
-    def update_message_by_id(
-        self, message_id: str, fields: dict,
-        *, only_if_non_terminal: bool = False,
-    ) -> bool:
-        """Update specific fields on a message identified by its UUID.
-
-        ``metadata`` is merged into the existing JSONB rather than
-        overwritten, so a reconciler-set ``reconcile_attempts`` survives
-        a successful late finalize. When ``only_if_non_terminal`` is
-        True, the update is gated so a late finalize cannot retract a
-        reconciler-set ``failed`` (or a prior ``complete``).
-        """
-        if not looks_like_uuid(message_id):
-            return False
-        allowed = {
-            "prompt", "response", "thought", "sources", "tool_calls",
-            "attachments", "model_id", "metadata", "timestamp", "status",
-            "request_id", "feedback", "feedback_timestamp",
-        }
-        filtered = {k: v for k, v in fields.items() if k in allowed}
-        if not filtered:
-            return False
-
-        api_to_col = {"metadata": "message_metadata"}
-
-        set_parts = []
-        params: dict = {"id": message_id}
-        for key, val in filtered.items():
-            col = api_to_col.get(key, key)
-            if key == "metadata":
-                if val is None:
-                    set_parts.append(f"{col} = NULL")
-                else:
-                    set_parts.append(
-                        f"{col} = COALESCE({col}, '{{}}'::jsonb) "
-                        f"|| CAST(:{col} AS jsonb)"
-                    )
-                    params[col] = (
-                        json.dumps(val) if not isinstance(val, str) else val
-                    )
-            elif key in ("sources", "tool_calls", "feedback"):
-                set_parts.append(f"{col} = CAST(:{col} AS jsonb)")
-                if val is None:
-                    params[col] = None
-                else:
-                    params[col] = (
-                        json.dumps(val) if not isinstance(val, str) else val
-                    )
-            elif key == "attachments":
-                set_parts.append(f"{col} = CAST(:{col} AS uuid[])")
-                params[col] = self._resolve_attachment_refs(
-                    [str(a) for a in val] if val else [],
-                )
-            else:
-                set_parts.append(f"{col} = :{col}")
-                params[col] = val
-
-        set_parts.append("updated_at = now()")
-        where_clauses = ["id = CAST(:id AS uuid)"]
-        if only_if_non_terminal:
-            where_clauses.append("status NOT IN ('complete', 'failed')")
-        sql = (
-            f"UPDATE conversation_messages SET {', '.join(set_parts)} "
-            f"WHERE {' AND '.join(where_clauses)}"
-        )
-        result = self._conn.execute(text(sql), params)
-        return result.rowcount > 0
-
-    def update_message_status(
-        self, message_id: str, status: str,
-    ) -> bool:
-        """Cheap status-only transition (e.g. pending → streaming).
-
-        Only flips non-terminal rows: a reconciler-set ``failed`` row
-        stays put so the late streaming chunk doesn't silently retract
-        the alert.
-        """
-        if not looks_like_uuid(message_id):
-            return False
-        result = self._conn.execute(
-            text(
-                "UPDATE conversation_messages SET status = :status, "
-                "updated_at = now() "
-                "WHERE id = CAST(:id AS uuid) "
-                "AND status NOT IN ('complete', 'failed')"
-            ),
-            {"id": message_id, "status": status},
-        )
-        return result.rowcount > 0
-
-    def heartbeat_message(self, message_id: str) -> bool:
-        """Stamp ``message_metadata.last_heartbeat_at`` with ``clock_timestamp()``.
-
-        The reconciler's staleness check uses ``GREATEST(timestamp,
-        last_heartbeat_at)``, so this call extends a long-running
-        stream's effective freshness without touching ``timestamp`` (the
-        creation time, used for history sort) or ``status`` (the WAL
-        marker). Skips terminal rows so a late heartbeat can't silently
-        retract a reconciler-set ``failed``.
-        """
-        if not looks_like_uuid(message_id):
-            return False
-        result = self._conn.execute(
-            text(
-                """
-                UPDATE conversation_messages
-                SET message_metadata = jsonb_set(
-                    COALESCE(message_metadata, '{}'::jsonb),
-                    '{last_heartbeat_at}',
-                    to_jsonb(clock_timestamp())
-                )
-                WHERE id = CAST(:id AS uuid)
-                  AND status NOT IN ('complete', 'failed')
-                """
-            ),
-            {"id": message_id},
-        )
-        return result.rowcount > 0
-
-    def confirm_executed_tool_calls(self, message_id: str) -> int:
-        """Flip ``tool_call_attempts.status='executed' → 'confirmed'`` for the message."""
-        if not looks_like_uuid(message_id):
-            return 0
-        result = self._conn.execute(
-            text(
-                "UPDATE tool_call_attempts SET status = 'confirmed', "
-                "updated_at = now() "
-                "WHERE message_id = CAST(:mid AS uuid) AND status = 'executed'"
-            ),
-            {"mid": message_id},
-        )
-        return result.rowcount or 0
-
    def truncate_after(self, conversation_id: str, keep_up_to: int) -> int:
        """Delete messages with position > keep_up_to.

--- a/application/storage/db/repositories/idempotency.py
+++ b/application/storage/db/repositories/idempotency.py
@@ -1,346 +0,0 @@
-"""Repository for ``webhook_dedup`` and ``task_dedup``; 24h TTL enforced at read."""
-
-from __future__ import annotations
-
-import json
-from typing import Any, Optional
-
-from sqlalchemy import Connection, text
-
-from application.storage.db.base_repository import row_to_dict
-from application.storage.db.serialization import PGNativeJSONEncoder
-
-# 24h TTL is the contract surfaced in the upload/webhook docstrings; the
-# read filters and the stale-row replacement predicate must agree, or the
-# upsert can fall into a window where the row is "fresh" to the writer
-# but "expired" to the reader (or vice versa). Keep one constant so any
-# future change moves both directions in lockstep.
-DEDUP_TTL_INTERVAL = "24 hours"
-
-
-def _jsonb(value: Any) -> Any:
-    if value is None:
-        return None
-    return json.dumps(value, cls=PGNativeJSONEncoder)
-
-
-class IdempotencyRepository:
-    def __init__(self, conn: Connection) -> None:
-        self._conn = conn
-
-    # --- webhook_dedup -----------------------------------------------------
-
-    def get_webhook(self, key: str) -> Optional[dict]:
-        """Return the cached webhook row for ``key`` if still within the 24h window."""
-        row = self._conn.execute(
-            text(
-                """
-                SELECT * FROM webhook_dedup
-                WHERE idempotency_key = :key
-                  AND created_at > now() - CAST(:ttl AS interval)
-                """
-            ),
-            {"key": key, "ttl": DEDUP_TTL_INTERVAL},
-        ).fetchone()
-        return row_to_dict(row) if row is not None else None
-
-    def record_webhook(
-        self,
-        key: str,
-        agent_id: str,
-        task_id: str,
-        response_json: dict,
-    ) -> Optional[dict]:
-        """Insert a webhook dedup row; return None if another writer raced and won.
-
-        ``ON CONFLICT`` replaces an existing row only when its ``created_at``
-        is past TTL — atomic stale-row recycling under the row lock. A
-        within-TTL conflict yields no row; the caller resolves it via
-        :meth:`get_webhook`.
-        """
-        result = self._conn.execute(
-            text(
-                """
-                INSERT INTO webhook_dedup (
-                    idempotency_key, agent_id, task_id, response_json
-                )
-                VALUES (
-                    :key, CAST(:agent_id AS uuid), :task_id,
-                    CAST(:response_json AS jsonb)
-                )
-                ON CONFLICT (idempotency_key) DO UPDATE
-                   SET agent_id      = EXCLUDED.agent_id,
-                       task_id       = EXCLUDED.task_id,
-                       response_json = EXCLUDED.response_json,
-                       created_at    = now()
-                   WHERE webhook_dedup.created_at
-                         <= now() - CAST(:ttl AS interval)
-                RETURNING *
-                """
-            ),
-            {
-                "key": key,
-                "agent_id": agent_id,
-                "task_id": task_id,
-                "response_json": _jsonb(response_json),
-                "ttl": DEDUP_TTL_INTERVAL,
-            },
-        )
-        row = result.fetchone()
-        return row_to_dict(row) if row is not None else None
-
-    # --- task_dedup --------------------------------------------------------
-
-    def get_task(self, key: str) -> Optional[dict]:
-        """Return the cached task row for ``key`` if still within the 24h window."""
-        row = self._conn.execute(
-            text(
-                """
-                SELECT * FROM task_dedup
-                WHERE idempotency_key = :key
-                  AND created_at > now() - CAST(:ttl AS interval)
-                """
-            ),
-            {"key": key, "ttl": DEDUP_TTL_INTERVAL},
-        ).fetchone()
-        return row_to_dict(row) if row is not None else None
-
-    def claim_task(
-        self,
-        key: str,
-        task_name: str,
-        task_id: str,
-    ) -> Optional[dict]:
-        """Claim ``key`` for this task. Returns the inserted row, or None if
-        another writer raced and won. The HTTP entry must call this *before*
-        ``.delay()`` so only the winner enqueues the Celery task.
-
-        ``ON CONFLICT`` replaces an existing row in two cases:
-
-        - **status='failed'**: the worker's poison-loop guard or the
-          reconciler's stuck-pending sweep finalised the prior attempt
-          as failed. Both explicitly intend a same-key retry to re-run
-          (see ``run_reconciliation`` Q5 docstring) — letting the row
-          block for 24 h would silently undo that intent.
-        - **created_at past TTL**: a stale claim from any status no
-          longer represents a meaningful dedup signal.
-
-        ``status='completed'`` rows still block within TTL — that's the
-        cached-success contract callers rely on. ``status='pending'``
-        rows still block within TTL so concurrent same-key requests
-        collapse onto the in-flight task. Result/attempt fields are
-        reset to their fresh-claim defaults during replacement.
-        """
-        result = self._conn.execute(
-            text(
-                """
-                INSERT INTO task_dedup (
-                    idempotency_key, task_name, task_id, result_json, status
-                )
-                VALUES (
-                    :key, :task_name, :task_id, NULL, 'pending'
-                )
-                ON CONFLICT (idempotency_key) DO UPDATE
-                   SET task_name     = EXCLUDED.task_name,
-                       task_id       = EXCLUDED.task_id,
-                       result_json   = NULL,
-                       status        = 'pending',
-                       attempt_count = 0,
-                       created_at    = now()
-                   WHERE task_dedup.status = 'failed'
-                      OR task_dedup.created_at
-                         <= now() - CAST(:ttl AS interval)
-                RETURNING *
-                """
-            ),
-            {
-                "key": key,
-                "task_name": task_name,
-                "task_id": task_id,
-                "ttl": DEDUP_TTL_INTERVAL,
-            },
-        )
-        row = result.fetchone()
-        return row_to_dict(row) if row is not None else None
-
-    def try_claim_lease(
-        self,
-        key: str,
-        task_name: str,
-        task_id: str,
-        owner_id: str,
-        ttl_seconds: int = 60,
-    ) -> Optional[int]:
-        """Atomically claim the running lease for ``key``.
-
-        Returns the new ``attempt_count`` if this caller now owns the
-        lease (fresh insert OR existing row whose lease was empty/expired),
-        or ``None`` if a different worker holds a live lease.
-
-        The conflict path also bumps ``attempt_count`` so the
-        poison-loop guard in :func:`with_idempotency` can fire after
-        :data:`MAX_TASK_ATTEMPTS` reclaims. ``status='completed'`` rows
-        are deliberately untouched — :func:`_lookup_completed` is the
-        cache short-circuit and runs before this. Uses
-        ``clock_timestamp()`` so a same-transaction refresh actually
-        moves the expiry forward (``now()`` is frozen at txn start).
-        """
-        result = self._conn.execute(
-            text(
-                """
-                INSERT INTO task_dedup (
-                    idempotency_key, task_name, task_id, status, attempt_count,
-                    lease_owner_id, lease_expires_at
-                ) VALUES (
-                    :key, :task_name, :task_id, 'pending', 1,
-                    :owner,
-                    clock_timestamp() + make_interval(secs => :ttl)
-                )
-                ON CONFLICT (idempotency_key) DO UPDATE
-                   SET attempt_count    = task_dedup.attempt_count + 1,
-                       task_name        = EXCLUDED.task_name,
-                       lease_owner_id   = EXCLUDED.lease_owner_id,
-                       lease_expires_at = EXCLUDED.lease_expires_at
-                   WHERE task_dedup.status <> 'completed'
-                     AND (task_dedup.lease_expires_at IS NULL
-                          OR task_dedup.lease_expires_at <= clock_timestamp())
-                RETURNING attempt_count
-                """
-            ),
-            {
-                "key": key,
-                "task_name": task_name,
-                "task_id": task_id,
-                "owner": owner_id,
-                "ttl": int(ttl_seconds),
-            },
-        )
-        row = result.fetchone()
-        return int(row[0]) if row is not None else None
-
-    def refresh_lease(
-        self,
-        key: str,
-        owner_id: str,
-        ttl_seconds: int = 60,
-    ) -> bool:
-        """Bump ``lease_expires_at`` if this caller still owns the lease.
-
-        Returns False when ownership was lost (lease stolen by another
-        worker after expiry, or row finalised). The heartbeat thread
-        logs that as a warning but doesn't try to abort the running
-        task — at-most-one-worker is bounded by ``ttl_seconds``, the
-        damage from a brief overlap window is unavoidable in this case.
-        """
-        result = self._conn.execute(
-            text(
-                """
-                UPDATE task_dedup
-                SET lease_expires_at =
-                        clock_timestamp() + make_interval(secs => :ttl)
-                WHERE idempotency_key = :key
-                  AND lease_owner_id = :owner
-                  AND status = 'pending'
-                """
-            ),
-            {
-                "key": key,
-                "owner": owner_id,
-                "ttl": int(ttl_seconds),
-            },
-        )
-        return result.rowcount > 0
-
-    def release_lease(self, key: str, owner_id: str) -> bool:
-        """Clear ``lease_owner_id`` / ``lease_expires_at`` on the
-        wrapper's exception path so Celery's autoretry_for doesn't have
-        to wait the full ``ttl_seconds`` before the next worker can
-        re-claim. No-op if a different worker has since taken over the
-        lease — that case is benign (we'd just be acknowledging we
-        weren't the owner anymore).
-        """
-        result = self._conn.execute(
-            text(
-                """
-                UPDATE task_dedup
-                SET lease_owner_id   = NULL,
-                    lease_expires_at = NULL
-                WHERE idempotency_key = :key
-                  AND lease_owner_id = :owner
-                  AND status = 'pending'
-                """
-            ),
-            {"key": key, "owner": owner_id},
-        )
-        return result.rowcount > 0
-
-    def finalize_task(
-        self,
-        key: str,
-        *,
-        result_json: Optional[dict],
-        status: str,
-    ) -> bool:
-        """Promote ``status='pending'`` → ``completed|failed`` with the
-        recorded result. Also clears the lease columns so a stale
-        ``lease_expires_at`` doesn't show up in operator dashboards.
-        No-op if the row is already terminal — preserves the first
-        writer's outcome on a crash + retry.
-        """
-        if status not in ("completed", "failed"):
-            raise ValueError(f"finalize_task: invalid status {status!r}")
-        result = self._conn.execute(
-            text(
-                """
-                UPDATE task_dedup
-                SET status           = :status,
-                    result_json      = CAST(:result_json AS jsonb),
-                    lease_owner_id   = NULL,
-                    lease_expires_at = NULL
-                WHERE idempotency_key = :key
-                  AND status = 'pending'
-                """
-            ),
-            {
-                "key": key,
-                "status": status,
-                "result_json": _jsonb(result_json),
-            },
-        )
-        return result.rowcount > 0
-
-    # --- housekeeping ------------------------------------------------------
-
-    def cleanup_expired(self) -> dict:
-        """Delete rows past TTL from both dedup tables; return per-table counts.
-
-        The TTL-aware upserts already prevent stale rows from blocking new
-        work, so this is purely housekeeping — bounds table growth and
-        keeps test isolation cheap. Safe to run concurrently with other
-        writers: a same-key INSERT racing the DELETE will either find no
-        row (acts as a fresh insert) or find a fresh row (re-created
-        between DELETE and conflict-check), neither of which is wrong.
-        """
-        task_deleted = self._conn.execute(
-            text(
-                """
-                DELETE FROM task_dedup
-                WHERE created_at <= now() - CAST(:ttl AS interval)
-                """
-            ),
-            {"ttl": DEDUP_TTL_INTERVAL},
-        ).rowcount
-        webhook_deleted = self._conn.execute(
-            text(
-                """
-                DELETE FROM webhook_dedup
-                WHERE created_at <= now() - CAST(:ttl AS interval)
-                """
-            ),
-            {"ttl": DEDUP_TTL_INTERVAL},
-        ).rowcount
-        return {
-            "task_dedup_deleted": int(task_deleted or 0),
-            "webhook_dedup_deleted": int(webhook_deleted or 0),
-        }
-
--- a/application/storage/db/repositories/ingest_chunk_progress.py
+++ b/application/storage/db/repositories/ingest_chunk_progress.py
@@ -1,127 +0,0 @@
-"""Repository for ``ingest_chunk_progress``; per-source resume + heartbeat."""
-
-from __future__ import annotations
-
-from typing import Optional
-
-from sqlalchemy import Connection, text
-
-from application.storage.db.base_repository import row_to_dict
-
-
-class IngestChunkProgressRepository:
-    """Read/write helpers for ``ingest_chunk_progress``."""
-
-    def __init__(self, conn: Connection) -> None:
-        self._conn = conn
-
-    def init_progress(
-        self,
-        source_id: str,
-        total_chunks: int,
-        attempt_id: Optional[str] = None,
-    ) -> dict:
-        """Upsert the progress row, scoped by ``attempt_id``.
-
-        On conflict the upsert distinguishes two cases:
-
-        - **Same attempt** (``attempt_id`` matches the stored value):
-          this is a Celery autoretry of the same task — preserve
-          ``last_index`` / ``embedded_chunks`` so the embed loop resumes
-          from the checkpoint. Only ``total_chunks`` and
-          ``last_updated`` get refreshed.
-        - **Different attempt** (a fresh invocation: manual reingest,
-          scheduled sync, or any caller that didn't pass an
-          ``attempt_id``): reset ``last_index`` to ``-1`` and
-          ``embedded_chunks`` to ``0`` so the loop starts from chunk 0.
-          This prevents a completed checkpoint from any prior run
-          poisoning the index.
-
-        ``IS NOT DISTINCT FROM`` treats two NULLs as equal — so legacy
-        rows with NULL ``attempt_id`` resume against another NULL
-        caller (e.g. test fixtures), but get reset the moment a real
-        ``attempt_id`` arrives.
-        """
-        result = self._conn.execute(
-            text(
-                """
-                INSERT INTO ingest_chunk_progress (
-                    source_id, total_chunks, embedded_chunks, last_index,
-                    attempt_id, last_updated
-                )
-                VALUES (
-                    CAST(:source_id AS uuid), :total_chunks, 0, -1,
-                    :attempt_id, now()
-                )
-                ON CONFLICT (source_id) DO UPDATE SET
-                    total_chunks = EXCLUDED.total_chunks,
-                    last_updated = now(),
-                    last_index = CASE
-                        WHEN ingest_chunk_progress.attempt_id
-                             IS NOT DISTINCT FROM EXCLUDED.attempt_id
-                        THEN ingest_chunk_progress.last_index
-                        ELSE -1
-                    END,
-                    embedded_chunks = CASE
-                        WHEN ingest_chunk_progress.attempt_id
-                             IS NOT DISTINCT FROM EXCLUDED.attempt_id
-                        THEN ingest_chunk_progress.embedded_chunks
-                        ELSE 0
-                    END,
-                    attempt_id = EXCLUDED.attempt_id
-                RETURNING *
-                """
-            ),
-            {
-                "source_id": str(source_id),
-                "total_chunks": int(total_chunks),
-                "attempt_id": attempt_id,
-            },
-        )
-        return row_to_dict(result.fetchone())
-
-    def record_chunk(
-        self, source_id: str, last_index: int, embedded_chunks: int
-    ) -> None:
-        """Persist progress after a chunk is embedded."""
-        self._conn.execute(
-            text(
-                """
-                UPDATE ingest_chunk_progress
-                SET last_index = :last_index,
-                    embedded_chunks = :embedded_chunks,
-                    last_updated = now()
-                WHERE source_id = CAST(:source_id AS uuid)
-                """
-            ),
-            {
-                "source_id": str(source_id),
-                "last_index": int(last_index),
-                "embedded_chunks": int(embedded_chunks),
-            },
-        )
-
-    def get_progress(self, source_id: str) -> Optional[dict]:
-        """Return the progress row for ``source_id`` if it exists."""
-        result = self._conn.execute(
-            text(
-                "SELECT * FROM ingest_chunk_progress "
-                "WHERE source_id = CAST(:source_id AS uuid)"
-            ),
-            {"source_id": str(source_id)},
-        )
-        row = result.fetchone()
-        return row_to_dict(row) if row is not None else None
-
-    def bump_heartbeat(self, source_id: str) -> None:
-        """Refresh ``last_updated`` so the row looks alive to the reconciler."""
-        self._conn.execute(
-            text(
-                """
-                UPDATE ingest_chunk_progress
-                SET last_updated = now()
-                WHERE source_id = CAST(:source_id AS uuid)
-                """
-            ),
-            {"source_id": str(source_id)},
-        )
--- a/application/storage/db/repositories/pending_tool_state.py
+++ b/application/storage/db/repositories/pending_tool_state.py
@@ -7,11 +7,6 @@ Mirrors the continuation service's three operations on
 - load_state  → find_one by (conversation_id, user_id)
 - delete_state → delete_one by (conversation_id, user_id)

-Adds ``mark_resuming`` so a resumed run can claim a row without
-deleting it; a separate ``revert_stale_resuming`` flips abandoned
-``resuming`` rows back to ``pending`` so a crashed worker doesn't
-strand the user.
-
 Plus a cleanup method for the Celery beat task that replaces Mongo's
 TTL index.
 """
@@ -25,7 +20,6 @@ from typing import Optional
 from sqlalchemy import Connection, text

 from application.storage.db.base_repository import row_to_dict
-from application.storage.db.serialization import PGNativeJSONEncoder

 PENDING_STATE_TTL_SECONDS = 30 * 60  # 1800 seconds

@@ -77,24 +71,19 @@ class PendingToolStateRepository:
                    agent_config = EXCLUDED.agent_config,
                    client_tools = EXCLUDED.client_tools,
                    created_at = EXCLUDED.created_at,
-                    expires_at = EXCLUDED.expires_at,
-                    status = 'pending',
-                    resumed_at = NULL
+                    expires_at = EXCLUDED.expires_at
                RETURNING *
                """
            ),
            {
                "conv_id": conversation_id,
                "user_id": user_id,
-                "messages": json.dumps(messages, cls=PGNativeJSONEncoder),
-                "pending": json.dumps(pending_tool_calls, cls=PGNativeJSONEncoder),
-                "tools_dict": json.dumps(tools_dict, cls=PGNativeJSONEncoder),
-                "schemas": json.dumps(tool_schemas, cls=PGNativeJSONEncoder),
-                "agent_config": json.dumps(agent_config, cls=PGNativeJSONEncoder),
-                "client_tools": (
-                    json.dumps(client_tools, cls=PGNativeJSONEncoder)
-                    if client_tools is not None else None
-                ),
+                "messages": json.dumps(messages),
+                "pending": json.dumps(pending_tool_calls),
+                "tools_dict": json.dumps(tools_dict),
+                "schemas": json.dumps(tool_schemas),
+                "agent_config": json.dumps(agent_config),
+                "client_tools": json.dumps(client_tools) if client_tools is not None else None,
                "created_at": now,
                "expires_at": expires,
            },
@@ -124,45 +113,6 @@ class PendingToolStateRepository:
        )
        return result.rowcount > 0

-    def mark_resuming(self, conversation_id: str, user_id: str) -> bool:
-        """Flip a pending row to ``resuming`` and stamp ``resumed_at``."""
-        result = self._conn.execute(
-            text(
-                """
-                UPDATE pending_tool_state
-                SET status = 'resuming', resumed_at = clock_timestamp()
-                WHERE conversation_id = CAST(:conv_id AS uuid)
-                  AND user_id = :user_id
-                  AND status = 'pending'
-                """
-            ),
-            {"conv_id": conversation_id, "user_id": user_id},
-        )
-        return result.rowcount > 0
-
-    def revert_stale_resuming(
-        self,
-        grace_seconds: int = 600,
-        ttl_extension_seconds: int = PENDING_STATE_TTL_SECONDS,
-    ) -> int:
-        """Revert ``resuming`` rows older than ``grace_seconds`` to ``pending``; bump TTL."""
-        result = self._conn.execute(
-            text(
-                """
-                UPDATE pending_tool_state
-                SET status = 'pending',
-                    resumed_at = NULL,
-                    expires_at = clock_timestamp()
-                                 + make_interval(secs => :ttl)
-                WHERE status = 'resuming'
-                  AND resumed_at
-                      < clock_timestamp() - make_interval(secs => :grace)
-                """
-            ),
-            {"grace": grace_seconds, "ttl": ttl_extension_seconds},
-        )
-        return result.rowcount
-
    def cleanup_expired(self) -> int:
        """Delete rows where ``expires_at < now()``.

--- a/application/storage/db/repositories/reconciliation.py
+++ b/application/storage/db/repositories/reconciliation.py
@@ -1,273 +0,0 @@
-"""Repository for reconciliation sweeps over stuck durability rows."""
-
-from __future__ import annotations
-
-from sqlalchemy import Connection, text
-
-from application.storage.db.base_repository import row_to_dict
-
-
-class ReconciliationRepository:
-    """Sweeps and terminal writes for the reconciler beat task."""
-
-    def __init__(self, conn: Connection) -> None:
-        self._conn = conn
-
-    def find_and_lock_stuck_messages(
-        self, *, age_minutes: int = 5, limit: int = 100,
-    ) -> list[dict]:
-        """Lock stuck pending/streaming messages skipping live resumes.
-
-        Staleness rides on the **later of** ``cm.timestamp`` (creation)
-        and ``message_metadata.last_heartbeat_at`` (route heartbeat). An
-        in-flight stream that re-stamps the heartbeat each minute stays
-        out of the sweep; reconciler-side writes deliberately don't
-        touch either column so the per-row attempts counter advances
-        across ticks. Liveness exemption covers both ``pending`` (paused
-        waiting for resume) and ``resuming`` (actively executing)
-        ``pending_tool_state`` rows so a paused message survives until
-        the PT row's own TTL retires it.
-        """
-        result = self._conn.execute(
-            text(
-                """
-                SELECT cm.id, cm.conversation_id, cm.user_id, cm.timestamp,
-                       cm.message_metadata
-                FROM conversation_messages cm
-                WHERE cm.status IN ('pending', 'streaming')
-                  AND cm.timestamp < now() - make_interval(mins => :age)
-                  AND COALESCE(
-                      (cm.message_metadata->>'last_heartbeat_at')::timestamptz,
-                      cm.timestamp
-                  ) < now() - make_interval(mins => :age)
-                  AND NOT EXISTS (
-                      SELECT 1
-                      FROM pending_tool_state pts
-                      WHERE pts.conversation_id = cm.conversation_id
-                        AND (
-                            (pts.status = 'pending'
-                             AND pts.expires_at > now())
-                            OR
-                            (pts.status = 'resuming'
-                             AND pts.resumed_at
-                                 > now() - interval '10 minutes')
-                        )
-                  )
-                ORDER BY cm.timestamp ASC
-                LIMIT :limit
-                FOR UPDATE OF cm SKIP LOCKED
-                """
-            ),
-            {"age": age_minutes, "limit": limit},
-        )
-        return [row_to_dict(r) for r in result.fetchall()]
-
-    def find_and_lock_proposed_tool_calls(
-        self, *, age_minutes: int = 5, limit: int = 100,
-    ) -> list[dict]:
-        """Lock tool_call_attempts that never advanced past ``proposed``."""
-        result = self._conn.execute(
-            text(
-                """
-                SELECT call_id, message_id, tool_id, tool_name, action_name,
-                       arguments, attempted_at, updated_at
-                FROM tool_call_attempts
-                WHERE status = 'proposed'
-                  AND attempted_at < now() - make_interval(mins => :age)
-                ORDER BY attempted_at ASC
-                LIMIT :limit
-                FOR UPDATE SKIP LOCKED
-                """
-            ),
-            {"age": age_minutes, "limit": limit},
-        )
-        return [row_to_dict(r) for r in result.fetchall()]
-
-    def find_and_lock_executed_tool_calls(
-        self, *, age_minutes: int = 15, limit: int = 100,
-    ) -> list[dict]:
-        """Lock tool_call_attempts stuck in ``executed`` past confirm window."""
-        result = self._conn.execute(
-            text(
-                """
-                SELECT call_id, message_id, tool_id, tool_name, action_name,
-                       arguments, result, attempted_at, updated_at
-                FROM tool_call_attempts
-                WHERE status = 'executed'
-                  AND updated_at < now() - make_interval(mins => :age)
-                ORDER BY updated_at ASC
-                LIMIT :limit
-                FOR UPDATE SKIP LOCKED
-                """
-            ),
-            {"age": age_minutes, "limit": limit},
-        )
-        return [row_to_dict(r) for r in result.fetchall()]
-
-    def find_and_lock_stalled_ingests(
-        self, *, age_minutes: int = 30, limit: int = 100,
-    ) -> list[dict]:
-        """Lock ingest checkpoints whose heartbeat hasn't ticked recently."""
-        result = self._conn.execute(
-            text(
-                """
-                SELECT source_id, total_chunks, embedded_chunks,
-                       last_index, last_updated
-                FROM ingest_chunk_progress
-                WHERE last_updated < now() - make_interval(mins => :age)
-                  AND embedded_chunks < total_chunks
-                ORDER BY last_updated ASC
-                LIMIT :limit
-                FOR UPDATE SKIP LOCKED
-                """
-            ),
-            {"age": age_minutes, "limit": limit},
-        )
-        return [row_to_dict(r) for r in result.fetchall()]
-
-    def touch_ingest_progress(self, source_id: str) -> bool:
-        """Bump ``last_updated`` so a once-stalled ingest re-enters the watch window."""
-        result = self._conn.execute(
-            text(
-                "UPDATE ingest_chunk_progress SET last_updated = now() "
-                "WHERE source_id = CAST(:sid AS uuid)"
-            ),
-            {"sid": str(source_id)},
-        )
-        return result.rowcount > 0
-
-    def increment_message_reconcile_attempts(self, message_id: str) -> int:
-        """Bump ``message_metadata.reconcile_attempts`` and return the new count."""
-        result = self._conn.execute(
-            text(
-                """
-                UPDATE conversation_messages
-                SET message_metadata = jsonb_set(
-                    COALESCE(message_metadata, '{}'::jsonb),
-                    '{reconcile_attempts}',
-                    to_jsonb(
-                        COALESCE(
-                            (message_metadata->>'reconcile_attempts')::int,
-                            0
-                        ) + 1
-                    )
-                )
-                WHERE id = CAST(:message_id AS uuid)
-                RETURNING (message_metadata->>'reconcile_attempts')::int
-                         AS new_count
-                """
-            ),
-            {"message_id": message_id},
-        )
-        row = result.fetchone()
-        return int(row[0]) if row is not None else 0
-
-    def mark_message_failed(self, message_id: str, *, error: str) -> bool:
-        """Flip a message to ``status='failed'`` and stash ``error`` in metadata."""
-        result = self._conn.execute(
-            text(
-                """
-                UPDATE conversation_messages
-                SET status = 'failed',
-                    message_metadata = jsonb_set(
-                        COALESCE(message_metadata, '{}'::jsonb),
-                        '{error}',
-                        to_jsonb(CAST(:error AS text))
-                    )
-                WHERE id = CAST(:message_id AS uuid)
-                """
-            ),
-            {"message_id": message_id, "error": error},
-        )
-        return result.rowcount > 0
-
-    def mark_tool_call_failed(self, call_id: str, *, error: str) -> bool:
-        """Flip a tool_call_attempts row to ``failed`` with ``error``."""
-        result = self._conn.execute(
-            text(
-                "UPDATE tool_call_attempts SET status = 'failed', "
-                "error = :error WHERE call_id = :call_id"
-            ),
-            {"call_id": call_id, "error": error},
-        )
-        return result.rowcount > 0
-
-    def find_stuck_idempotency_pending(
-        self,
-        *,
-        max_attempts: int,
-        lease_grace_seconds: int = 60,
-        limit: int = 100,
-    ) -> list[dict]:
-        """Lock ``task_dedup`` rows abandoned past the lease + retry budget.
-
-        A row is "stuck" when:
-
-        - ``status='pending'`` (lease was claimed but never finalised)
-        - ``lease_expires_at`` is past by at least ``lease_grace_seconds``
-          (the heartbeat thread is gone — the lease isn't going to come
-          back)
-        - ``attempt_count >= max_attempts`` (the poison-loop guard
-          should already have escalated this; if it hasn't, the wrapper
-          died before getting there)
-
-        These rows would otherwise sit in ``pending`` until the 24 h
-        TTL aged them out, blocking same-key retries via
-        ``_lookup_completed`` returning None for the whole window.
-        """
-        result = self._conn.execute(
-            text(
-                """
-                SELECT idempotency_key, task_name, task_id, attempt_count,
-                       lease_owner_id, lease_expires_at, created_at
-                FROM task_dedup
-                WHERE status = 'pending'
-                  AND lease_expires_at IS NOT NULL
-                  AND lease_expires_at
-                      < now() - make_interval(secs => :grace)
-                  AND attempt_count >= :max_attempts
-                ORDER BY created_at ASC
-                LIMIT :limit
-                FOR UPDATE SKIP LOCKED
-                """
-            ),
-            {
-                "max_attempts": int(max_attempts),
-                "grace": int(lease_grace_seconds),
-                "limit": int(limit),
-            },
-        )
-        return [row_to_dict(r) for r in result.fetchall()]
-
-    def mark_idempotency_pending_failed(
-        self, key: str, *, error: str,
-    ) -> bool:
-        """Promote a stuck pending ``task_dedup`` row to ``failed``."""
-        from application.storage.db.serialization import PGNativeJSONEncoder
-        import json
-
-        result = self._conn.execute(
-            text(
-                """
-                UPDATE task_dedup
-                SET status = 'failed',
-                    result_json = CAST(:result AS jsonb),
-                    lease_owner_id = NULL,
-                    lease_expires_at = NULL
-                WHERE idempotency_key = :key
-                  AND status = 'pending'
-                """
-            ),
-            {
-                "key": key,
-                "result": json.dumps(
-                    {
-                        "success": False,
-                        "error": error,
-                        "reconciled": True,
-                    },
-                    cls=PGNativeJSONEncoder,
-                ),
-            },
-        )
-        return result.rowcount > 0
--- a/application/storage/db/repositories/stack_logs.py
+++ b/application/storage/db/repositories/stack_logs.py
@@ -13,8 +13,6 @@ import json
 from datetime import datetime
 from typing import Optional

-from application.storage.db.serialization import PGNativeJSONEncoder
-
 from sqlalchemy import Connection, text


@@ -54,7 +52,7 @@ class StackLogsRepository:
                "user_id": user_id,
                "api_key": api_key,
                "query": query,
-                "stacks": json.dumps(stacks or [], cls=PGNativeJSONEncoder),
+                "stacks": json.dumps(stacks or []),
                "timestamp": timestamp,
            },
        )
--- a/application/storage/db/repositories/token_usage.py
+++ b/application/storage/db/repositories/token_usage.py
@@ -31,8 +31,6 @@ class TokenUsageRepository:
        agent_id: Optional[str] = None,
        prompt_tokens: int = 0,
        generated_tokens: int = 0,
-        source: str = "agent_stream",
-        request_id: Optional[str] = None,
        timestamp: Optional[datetime] = None,
    ) -> None:
        # Attribution guard: the ``token_usage_attribution_chk`` CHECK
@@ -56,16 +54,12 @@ class TokenUsageRepository:
        self._conn.execute(
            text(
                """
-                INSERT INTO token_usage (
-                    user_id, api_key, agent_id,
-                    prompt_tokens, generated_tokens,
-                    source, request_id, timestamp
-                )
+                INSERT INTO token_usage (user_id, api_key, agent_id, prompt_tokens, generated_tokens, timestamp)
                VALUES (
                    :user_id, :api_key,
                    CAST(:agent_id AS uuid),
                    :prompt_tokens, :generated_tokens,
-                    :source, :request_id, COALESCE(:timestamp, now())
+                    COALESCE(:timestamp, now())
                )
                """
            ),
@@ -75,8 +69,6 @@ class TokenUsageRepository:
                "agent_id": agent_id_uuid,
                "prompt_tokens": prompt_tokens,
                "generated_tokens": generated_tokens,
-                "source": source,
-                "request_id": request_id,
                "timestamp": timestamp,
            },
        )
@@ -181,22 +173,8 @@ class TokenUsageRepository:
        user_id: Optional[str] = None,
        api_key: Optional[str] = None,
    ) -> int:
-        """Count user-initiated requests in the given time range.
-
-        A request = one ``agent_stream`` invocation. Multi-tool agent
-        runs produce multiple rows (one per LLM call) tagged with the
-        same ``request_id``; we DISTINCT on that to count the request
-        once. Pre-migration rows have ``request_id=NULL`` and are
-        counted one-per-row via the second branch (back-compat).
-        Side-channel sources (``title`` / ``compression`` /
-        ``rag_condense`` / ``fallback``) are excluded — they aren't
-        user-initiated and shouldn't tick the request limit.
-        """
-        clauses = [
-            "timestamp >= :start",
-            "timestamp <= :end",
-            "source = 'agent_stream'",
-        ]
+        """Count of token_usage rows in the given time range (for request limiting)."""
+        clauses = ["timestamp >= :start", "timestamp <= :end"]
        params: dict = {"start": start, "end": end}
        if user_id is not None:
            clauses.append("user_id = :user_id")
@@ -206,15 +184,7 @@ class TokenUsageRepository:
            params["api_key"] = api_key
        where = " AND ".join(clauses)
        result = self._conn.execute(
-            text(
-                f"""
-                SELECT
-                    COUNT(DISTINCT request_id) FILTER (WHERE request_id IS NOT NULL)
-                    + COUNT(*) FILTER (WHERE request_id IS NULL)
-                FROM token_usage
-                WHERE {where}
-                """
-            ),
+            text(f"SELECT COUNT(*) FROM token_usage WHERE {where}"),
            params,
        )
        return result.scalar()
--- a/application/storage/db/repositories/tool_call_attempts.py
+++ b/application/storage/db/repositories/tool_call_attempts.py
@@ -1,144 +0,0 @@
-"""Repository for ``tool_call_attempts``; executor's proposed/executed/failed writes."""
-
-from __future__ import annotations
-
-import json
-from typing import Any, Optional
-
-from sqlalchemy import Connection, text
-
-from application.storage.db.serialization import PGNativeJSONEncoder
-
-
-class ToolCallAttemptsRepository:
-    def __init__(self, conn: Connection) -> None:
-        self._conn = conn
-
-    def record_proposed(
-        self,
-        call_id: str,
-        tool_name: str,
-        action_name: str,
-        arguments: Any,
-        *,
-        tool_id: Optional[str] = None,
-    ) -> bool:
-        """Insert a ``proposed`` row before the tool executes.
-
-        Returns True if a new row was created. ``ON CONFLICT DO NOTHING``
-        guards against the LLM emitting a duplicate ``call_id``: the
-        existing row stays put rather than a re-insert raising
-        ``IntegrityError``.
-        """
-        result = self._conn.execute(
-            text(
-                """
-                INSERT INTO tool_call_attempts
-                    (call_id, tool_id, tool_name, action_name, arguments, status)
-                VALUES
-                    (:call_id, CAST(:tool_id AS uuid), :tool_name,
-                     :action_name, CAST(:arguments AS jsonb), 'proposed')
-                ON CONFLICT (call_id) DO NOTHING
-                """
-            ),
-            {
-                "call_id": call_id,
-                "tool_id": tool_id,
-                "tool_name": tool_name,
-                "action_name": action_name,
-                "arguments": json.dumps(arguments if arguments is not None else {}, cls=PGNativeJSONEncoder),
-            },
-        )
-        return result.rowcount > 0
-
-    def upsert_executed(
-        self,
-        call_id: str,
-        tool_name: str,
-        action_name: str,
-        arguments: Any,
-        result: Any,
-        *,
-        tool_id: Optional[str] = None,
-        message_id: Optional[str] = None,
-        artifact_id: Optional[str] = None,
-    ) -> None:
-        """Insert OR upgrade a row to ``executed``.
-
-        Used as a fallback when ``record_proposed`` failed (DB outage)
-        and the tool ran anyway — preserves the journal so the
-        reconciler can still see the attempt.
-        """
-        result_payload: dict = {"result": result}
-        if artifact_id:
-            result_payload["artifact_id"] = artifact_id
-        self._conn.execute(
-            text(
-                """
-                INSERT INTO tool_call_attempts
-                    (call_id, tool_id, tool_name, action_name, arguments,
-                     result, message_id, status)
-                VALUES
-                    (:call_id, CAST(:tool_id AS uuid), :tool_name,
-                     :action_name, CAST(:arguments AS jsonb),
-                     CAST(:result AS jsonb), CAST(:message_id AS uuid),
-                     'executed')
-                ON CONFLICT (call_id) DO UPDATE
-                   SET status     = 'executed',
-                       result     = EXCLUDED.result,
-                       message_id = COALESCE(EXCLUDED.message_id, tool_call_attempts.message_id)
-                """
-            ),
-            {
-                "call_id": call_id,
-                "tool_id": tool_id,
-                "tool_name": tool_name,
-                "action_name": action_name,
-                "arguments": json.dumps(arguments if arguments is not None else {}, cls=PGNativeJSONEncoder),
-                "result": json.dumps(result_payload, cls=PGNativeJSONEncoder),
-                "message_id": message_id,
-            },
-        )
-
-    def mark_executed(
-        self,
-        call_id: str,
-        result: Any,
-        *,
-        message_id: Optional[str] = None,
-        artifact_id: Optional[str] = None,
-    ) -> bool:
-        """Flip ``proposed`` → ``executed`` with the tool result.
-
-        ``artifact_id`` (when present) is stored alongside ``result`` in
-        the JSONB as audit data — the reconciler reads it for diagnostic
-        alerts when escalating stuck rows to ``failed``.
-        """
-        result_payload: dict = {"result": result}
-        if artifact_id:
-            result_payload["artifact_id"] = artifact_id
-        sql = (
-            "UPDATE tool_call_attempts SET "
-            "status = 'executed', result = CAST(:result AS jsonb)"
-        )
-        params: dict[str, Any] = {
-            "call_id": call_id,
-            "result": json.dumps(result_payload, cls=PGNativeJSONEncoder),
-        }
-        if message_id is not None:
-            sql += ", message_id = CAST(:message_id AS uuid)"
-            params["message_id"] = message_id
-        sql += " WHERE call_id = :call_id"
-        result_proxy = self._conn.execute(text(sql), params)
-        return result_proxy.rowcount > 0
-
-    def mark_failed(self, call_id: str, error: str) -> bool:
-        """Flip ``proposed`` → ``failed`` with the exception text."""
-        result = self._conn.execute(
-            text(
-                "UPDATE tool_call_attempts SET status = 'failed', error = :error "
-                "WHERE call_id = :call_id"
-            ),
-            {"call_id": call_id, "error": error},
-        )
-        return result.rowcount > 0
--- a/application/storage/db/repositories/user_logs.py
+++ b/application/storage/db/repositories/user_logs.py
@@ -20,7 +20,6 @@ from typing import Optional
 from sqlalchemy import Connection, text

 from application.storage.db.base_repository import row_to_dict
-from application.storage.db.serialization import PGNativeJSONEncoder


 class UserLogsRepository:
@@ -47,7 +46,7 @@ class UserLogsRepository:
            {
                "user_id": user_id,
                "endpoint": endpoint,
-                "data": json.dumps(data, cls=PGNativeJSONEncoder) if data is not None else None,
+                "data": json.dumps(data, default=str) if data is not None else None,
                "timestamp": timestamp,
            },
        )
--- a/application/storage/db/serialization.py
+++ b/application/storage/db/serialization.py
@@ -1,93 +0,0 @@
-"""JSON-safe coercion for PG-native Python types.
-
-Postgres (via psycopg) returns native Python types — ``uuid.UUID``,
-``datetime.datetime``/``datetime.date``, ``decimal.Decimal``, ``bytes``
-— that ``json.dumps`` rejects. This module is the single place those
-coercion rules live; everywhere else should call into it.
-
-Two interfaces with identical coverage:
-
-* :func:`coerce_pg_native` — recursive walk returning a JSON-safe copy.
-  Use when you need to inspect the dict yourself or pass it to a
-  serializer that doesn't accept a custom encoder (e.g. SQLAlchemy
-  parameter binding for a JSONB column).
-* :class:`PGNativeJSONEncoder` — ``JSONEncoder`` subclass. Use as
-  ``json.dumps(obj, cls=PGNativeJSONEncoder)`` for serialise-once flows
-  where the extra recursive walk is wasted work.
-
-Coercion rules:
-
-* ``UUID`` → canonical hex string.
-* ``datetime`` / ``date`` → ISO 8601 string.
-* ``Decimal`` → numeric string (preserves precision; ``float()`` would not).
-* ``bytes`` → base64 string. Lossless and universally JSON-safe;
-  prior code used UTF-8 with ``errors="replace"`` which silently
-  corrupted binary payloads (e.g. Gemini's ``thought_signature``).
-"""
-
-from __future__ import annotations
-
-import base64
-import binascii
-import json
-from datetime import date, datetime
-from decimal import Decimal
-from typing import Any
-from uuid import UUID
-
-
-def _coerce_scalar(obj: Any) -> Any:
-    if isinstance(obj, UUID):
-        return str(obj)
-    if isinstance(obj, (datetime, date)):
-        return obj.isoformat()
-    if isinstance(obj, Decimal):
-        return str(obj)
-    if isinstance(obj, bytes):
-        return base64.b64encode(obj).decode("ascii")
-    return obj
-
-
-def coerce_pg_native(obj: Any) -> Any:
-    """Recursively coerce PG-native types to JSON-safe equivalents.
-
-    Recurses into ``dict`` (stringifying keys, matching prior helper
-    behavior) and ``list``/``tuple`` (tuples flatten to lists since JSON
-    has no tuple type). Any other type passes through unchanged.
-    """
-    if isinstance(obj, dict):
-        return {str(k): coerce_pg_native(v) for k, v in obj.items()}
-    if isinstance(obj, (list, tuple)):
-        return [coerce_pg_native(v) for v in obj]
-    return _coerce_scalar(obj)
-
-
-def decode_base64_bytes(value: Any) -> Any:
-    """Reverse ``coerce_pg_native``'s bytes-to-base64 step.
-
-    Useful at egress points that need the original bytes back (e.g.
-    sending Gemini's ``thought_signature`` to the SDK on resume). Uses
-    ``validate=True`` so plain ASCII strings that happen to be
-    permissively decodable (e.g. ``"abcd"``) are not silently turned
-    into bytes — the original value passes through.
-    """
-    if isinstance(value, str):
-        try:
-            return base64.b64decode(value.encode("ascii"), validate=True)
-        except (binascii.Error, ValueError):
-            return value
-    return value
-
-
-class PGNativeJSONEncoder(json.JSONEncoder):
-    """``JSONEncoder`` covering UUID / datetime / date / Decimal / bytes.
-
-    Use as ``json.dumps(obj, cls=PGNativeJSONEncoder)``. Equivalent in
-    coverage to :func:`coerce_pg_native` but skips the eager walk.
-    """
-
-    def default(self, obj: Any) -> Any:
-        coerced = _coerce_scalar(obj)
-        if coerced is obj:
-            return super().default(obj)
-        return coerced
--- a/application/usage.py
+++ b/application/usage.py
@@ -1,5 +1,6 @@
+import sys
 import logging
-import time
+from datetime import datetime

 from application.storage.db.repositories.token_usage import TokenUsageRepository
 from application.storage.db.session import db_session
@@ -19,15 +20,6 @@ def _serialize_for_token_count(value):
    if value is None:
        return ""

-    # Raw binary payloads (image/file attachments arrive as ``bytes`` from
-    # ``GoogleLLM.prepare_messages_with_attachments``) — without this
-    # branch they fall through to ``str(value)`` below, which produces a
-    # multi-megabyte ``"b'\\x89PNG...'"`` repr-string and inflates
-    # ``prompt_tokens`` by orders of magnitude. Same intent as the
-    # data-URL skip above.
-    if isinstance(value, (bytes, bytearray, memoryview)):
-        return ""
-
    if isinstance(value, list):
        return [_serialize_for_token_count(item) for item in value]

@@ -91,62 +83,33 @@ def _count_prompt_tokens(messages, tools=None, usage_attachments=None, **kwargs)
    return prompt_tokens


-def _persist_call_usage(llm, call_usage):
-    """Write one ``token_usage`` row per LLM call. Always-on; no flag.
-
-    Source defaults to ``agent_stream`` and can be overridden per
-    instance via ``_token_usage_source`` (set on side-channel LLMs:
-    title / compression / rag_condense / fallback). A ``_request_id``
-    stamped on the LLM lets ``count_in_range`` deduplicate the multiple
-    rows produced by a single multi-tool agent run.
-    """
-    if call_usage["prompt_tokens"] == 0 and call_usage["generated_tokens"] == 0:
+def update_token_usage(decoded_token, user_api_key, token_usage, agent_id=None):
+    if "pytest" in sys.modules:
        return
-    decoded_token = getattr(llm, "decoded_token", None)
-    user_id = (
-        decoded_token.get("sub") if isinstance(decoded_token, dict) else None
-    )
-    user_api_key = getattr(llm, "user_api_key", None)
-    agent_id = getattr(llm, "agent_id", None)
-    if not user_id and not user_api_key:
-        # Repository would raise on the attribution check — log instead
-        # so operators see the gap rather than crashing the stream.
+    user_id = decoded_token.get("sub") if isinstance(decoded_token, dict) else None
+    normalized_agent_id = str(agent_id) if agent_id else None
+
+    if not user_id and not user_api_key and not normalized_agent_id:
        logger.warning(
-            "token_usage skip: no user_id/api_key on LLM instance",
-            extra={
-                "source": getattr(llm, "_token_usage_source", "agent_stream"),
-            },
+            "Skipping token usage insert: missing user_id, api_key, and agent_id"
        )
        return
+
    try:
        with db_session() as conn:
-            # ``timestamp`` is omitted so Postgres ``server_default
-            # = func.now()`` populates a tz-aware UTC value; passing
-            # naive ``datetime.now()`` would silently shift on
-            # non-UTC servers.
            TokenUsageRepository(conn).insert(
                user_id=user_id,
                api_key=user_api_key,
-                agent_id=str(agent_id) if agent_id else None,
-                prompt_tokens=call_usage["prompt_tokens"],
-                generated_tokens=call_usage["generated_tokens"],
-                source=(
-                    getattr(llm, "_token_usage_source", None) or "agent_stream"
-                ),
-                request_id=getattr(llm, "_request_id", None),
+                agent_id=normalized_agent_id,
+                prompt_tokens=token_usage["prompt_tokens"],
+                generated_tokens=token_usage["generated_tokens"],
+                timestamp=datetime.now(),
            )
-    except Exception:
-        logger.exception("token_usage persist failed")
+    except Exception as e:
+        logger.error(f"Failed to record token usage: {e}", exc_info=True)


 def gen_token_usage(func):
-    """Accumulate per-call token counts and write a ``token_usage`` row.
-
-    The accumulator on ``self.token_usage`` stays in place for code
-    paths that introspect it (e.g., logging, response payloads). DB
-    persistence happens here for every call so primary streams,
-    side-channel LLMs, and no-save flows all produce rows uniformly.
-    """
    def wrapper(self, model, messages, stream, tools, **kwargs):
        usage_attachments = kwargs.pop("_usage_attachments", None)
        call_usage = {"prompt_tokens": 0, "generated_tokens": 0}
@@ -160,14 +123,18 @@ def gen_token_usage(func):
        call_usage["generated_tokens"] += _count_tokens(result)
        self.token_usage["prompt_tokens"] += call_usage["prompt_tokens"]
        self.token_usage["generated_tokens"] += call_usage["generated_tokens"]
-        _persist_call_usage(self, call_usage)
+        update_token_usage(
+            self.decoded_token,
+            self.user_api_key,
+            call_usage,
+            getattr(self, "agent_id", None),
+        )
        return result

    return wrapper


 def stream_token_usage(func):
-    """Stream variant of ``gen_token_usage``. Same persistence contract."""
    def wrapper(self, model, messages, stream, tools, **kwargs):
        usage_attachments = kwargs.pop("_usage_attachments", None)
        call_usage = {"prompt_tokens": 0, "generated_tokens": 0}
@@ -178,36 +145,19 @@ def stream_token_usage(func):
            **kwargs,
        )
        batch = []
-        started_at = time.monotonic()
-        error: BaseException | None = None
-        try:
-            result = func(self, model, messages, stream, tools, **kwargs)
-            for r in result:
-                batch.append(r)
-                yield r
-        except Exception as exc:
-            # ``GeneratorExit`` (consumer disconnected) and KeyboardInterrupt
-            # flow through as ``status="ok"`` — same convention as
-            # ``application.logging._consume_and_log``.
-            error = exc
-            raise
-        finally:
-            for line in batch:
-                call_usage["generated_tokens"] += _count_tokens(line)
-            self.token_usage["prompt_tokens"] += call_usage["prompt_tokens"]
-            self.token_usage["generated_tokens"] += call_usage["generated_tokens"]
-            _persist_call_usage(self, call_usage)
-            emit = getattr(self, "_emit_stream_finished_log", None)
-            if callable(emit):
-                try:
-                    emit(
-                        model,
-                        prompt_tokens=call_usage["prompt_tokens"],
-                        completion_tokens=call_usage["generated_tokens"],
-                        latency_ms=int((time.monotonic() - started_at) * 1000),
-                        error=error,
-                    )
-                except Exception:
-                    logger.exception("Failed to emit llm_stream_finished")
+        result = func(self, model, messages, stream, tools, **kwargs)
+        for r in result:
+            batch.append(r)
+            yield r
+        for line in batch:
+            call_usage["generated_tokens"] += _count_tokens(line)
+        self.token_usage["prompt_tokens"] += call_usage["prompt_tokens"]
+        self.token_usage["generated_tokens"] += call_usage["generated_tokens"]
+        update_token_usage(
+            self.decoded_token,
+            self.user_api_key,
+            call_usage,
+            getattr(self, "agent_id", None),
+        )

    return wrapper
--- a/application/worker.py
+++ b/application/worker.py
@@ -6,7 +6,6 @@ import os
 import shutil
 import string
 import tempfile
-import threading
 from typing import Any, Dict
 import zipfile

@@ -23,10 +22,7 @@ from application.cache import get_redis_instance
 from application.core.settings import settings
 from application.parser.chunking import Chunker
 from application.parser.connectors.connector_creator import ConnectorCreator
-from application.parser.embedding_pipeline import (
-    assert_index_complete,
-    embed_and_store_documents,
-)
+from application.parser.embedding_pipeline import embed_and_store_documents
 from application.parser.file.bulk import SimpleDirectoryReader, get_default_file_extractor
 from application.parser.file.constants import SUPPORTED_SOURCE_EXTENSIONS
 from application.parser.remote.remote_creator import RemoteCreator
@@ -36,9 +32,6 @@ from application.retriever.retriever_creator import RetrieverCreator
 from application.storage.db.base_repository import looks_like_uuid
 from application.storage.db.repositories.agents import AgentsRepository
 from application.storage.db.repositories.attachments import AttachmentsRepository
-from application.storage.db.repositories.ingest_chunk_progress import (
-    IngestChunkProgressRepository,
-)
 from application.storage.db.repositories.sources import SourcesRepository
 from application.storage.db.session import db_readonly, db_session
 from application.storage.storage_creator import StorageCreator
@@ -50,53 +43,6 @@ from application.utils import count_tokens_docs, num_tokens_from_string, safe_fi
 MIN_TOKENS = 150
 MAX_TOKENS = 1250
 RECURSION_DEPTH = 2
-INGEST_HEARTBEAT_INTERVAL_SECONDS = 30
-
-# Stable namespace for deterministic source IDs derived from idempotency keys.
-# Pinned literal — do not change. Re-rolling this would mint different
-# source_ids for the same idempotency_keys across deploys, defeating the
-# retry-resume contract.
-DOCSGPT_INGEST_NAMESPACE = uuid.UUID("fa25d5d1-398b-46df-ac89-8d1c360b9bea")
-
-
-def _derive_source_id(idempotency_key):
-    """``uuid5(NS, key)`` when a key is supplied; ``uuid4()`` otherwise."""
-    if isinstance(idempotency_key, str) and idempotency_key:
-        return uuid.uuid5(DOCSGPT_INGEST_NAMESPACE, idempotency_key)
-    return uuid.uuid4()
-
-
-def _ingest_heartbeat_loop(source_id, stop_event, interval=INGEST_HEARTBEAT_INTERVAL_SECONDS):
-    """Bump ``ingest_chunk_progress.last_updated`` until ``stop_event`` is set."""
-    while not stop_event.wait(interval):
-        try:
-            with db_session() as conn:
-                IngestChunkProgressRepository(conn).bump_heartbeat(source_id)
-        except Exception as e:
-            logging.warning(
-                f"Heartbeat failed for {source_id}: {e}", exc_info=True
-            )
-
-
-def _start_ingest_heartbeat(source_id):
-    """Spawn the heartbeat daemon and return ``(thread, stop_event)``."""
-    stop_event = threading.Event()
-    thread = threading.Thread(
-        target=_ingest_heartbeat_loop,
-        args=(str(source_id), stop_event),
-        daemon=True,
-        name=f"ingest-heartbeat-{source_id}",
-    )
-    thread.start()
-    return thread, stop_event
-
-
-def _stop_ingest_heartbeat(thread, stop_event):
-    """Signal the heartbeat daemon to exit and wait briefly for it."""
-    if stop_event is not None:
-        stop_event.set()
-    if thread is not None:
-        thread.join(timeout=5)


 # Define a function to extract metadata from a given filename.
@@ -486,10 +432,7 @@ def run_agent_logic(agent_config, input_data):
            "tool_calls": tool_calls,
            "thought": thought,
        }
-        # Per-activity summary fields (answer_length, thought_length,
-        # source_count, tool_call_count) now ride on the inner
-        # ``activity_finished`` event emitted by ``log_activity`` around
-        # ``Agent.gen`` above; no separate ``agent_response`` log needed.
+        logging.info(f"Agent response: {result}")
        return result
    except Exception as e:
        logging.error(f"Error in run_agent_logic: {e}", exc_info=True)
@@ -509,7 +452,6 @@ def ingest_worker(
    user,
    retriever="classic",
    file_name_map=None,
-    idempotency_key=None,
 ):
    """
    Ingest and process documents.
@@ -524,9 +466,6 @@ def ingest_worker(
        user (str): Identifier for the user initiating the ingestion (original, unsanitized).
        retriever (str): Type of retriever to use for processing the documents.
        file_name_map (dict|str|None): Optional mapping of safe relative paths to original filenames.
-        idempotency_key (str|None): When provided, the ``source_id`` is derived
-            deterministically from the key so a retried task reuses the same
-            source row instead of duplicating it.

    Returns:
        dict: Information about the completed ingestion task, including input parameters and a "limited" flag.
@@ -633,23 +572,12 @@ def ingest_worker(

            docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]

-            id = _derive_source_id(idempotency_key)
+            id = uuid.uuid4()

            vector_store_path = os.path.join(temp_dir, "vector_store")
            os.makedirs(vector_store_path, exist_ok=True)

-            heartbeat_thread, heartbeat_stop = _start_ingest_heartbeat(id)
-            try:
-                embed_and_store_documents(
-                    docs, vector_store_path, id, self,
-                    attempt_id=getattr(self.request, "id", None),
-                )
-            finally:
-                _stop_ingest_heartbeat(heartbeat_thread, heartbeat_stop)
-            # Defense-in-depth: chunk-progress is the authoritative
-            # record of how many chunks landed; mismatch raises so the
-            # task fails loud rather than caching a partial index.
-            assert_index_complete(id)
+            embed_and_store_documents(docs, vector_store_path, id, self)

            tokens = count_tokens_docs(docs)

@@ -1012,7 +940,6 @@ def remote_worker(
    sync_frequency="never",
    operation_mode="upload",
    doc_id=None,
-    idempotency_key=None,
 ):
    safe_user = safe_filename(user)
    full_path = os.path.join(directory, safe_user, uuid.uuid4().hex)
@@ -1105,22 +1032,14 @@ def remote_worker(
        )

        if operation_mode == "upload":
-            id = _derive_source_id(idempotency_key)
-            embed_and_store_documents(
-                docs, full_path, id, self,
-                attempt_id=getattr(self.request, "id", None),
-            )
-            assert_index_complete(id)
+            id = uuid.uuid4()
+            embed_and_store_documents(docs, full_path, id, self)
        elif operation_mode == "sync":
            if not doc_id:
                logging.error("Invalid doc_id provided for sync operation: %s", doc_id)
                raise ValueError("doc_id must be provided for sync operation.")
            id = str(doc_id)
-            embed_and_store_documents(
-                docs, full_path, id, self,
-                attempt_id=getattr(self.request, "id", None),
-            )
-            assert_index_complete(id)
+            embed_and_store_documents(docs, full_path, id, self)
        self.update_state(state="PROGRESS", meta={"current": 100})

        # Serialize remote_data as JSON if it's a dict (for S3, Reddit, etc.)
@@ -1326,10 +1245,16 @@ def attachment_worker(self, file_info, user):


 def agent_webhook_worker(self, agent_id, payload):
-    """Process the webhook payload for an agent.
+    """
+    Process the webhook payload for an agent.

-    Raises on failure: Celery treats a returned dict as success and
-    would skip retries, leaving the caller with a stale 200.
+    Args:
+        self: Reference to the instance of the task.
+        agent_id (str): Unique identifier for the agent.
+        payload (dict): The payload data from the webhook.
+
+    Returns:
+        dict: Information about the processed webhook.
    """
    self.update_state(state="PROGRESS", meta={"current": 1})
    try:
@@ -1355,13 +1280,13 @@ def agent_webhook_worker(self, agent_id, payload):
        input_data = json.dumps(payload)
    except Exception as e:
        logging.error(f"Error processing agent webhook: {e}", exc_info=True)
-        raise
+        return {"status": "error", "error": str(e)}
    self.update_state(state="PROGRESS", meta={"current": 50})
    try:
        result = run_agent_logic(agent_config, input_data)
    except Exception as e:
        logging.error(f"Error running agent logic: {e}", exc_info=True)
-        raise
+        return {"status": "error"}
    else:
        logging.info(
            f"Webhook processed for agent {agent_id}", extra={"agent_id": agent_id}
@@ -1384,7 +1309,6 @@ def ingest_connector(
    operation_mode: str = "upload",
    doc_id=None,
    sync_frequency: str = "never",
-    idempotency_key=None,
 ) -> Dict[str, Any]:
    """
    Ingestion for internal knowledge bases (GoogleDrive, etc.).
@@ -1401,8 +1325,6 @@ def ingest_connector(
        operation_mode: "upload" for initial ingestion, "sync" for incremental sync
        doc_id: Document ID for sync operations (required when operation_mode="sync")
        sync_frequency: How often to sync ("never", "daily", "weekly", "monthly")
-        idempotency_key: When provided, the ``source_id`` is derived
-            deterministically so a retried upload reuses the same source row.
    """
    logging.info(
        f"Starting remote ingestion from {source_type} for user: {user}, job: {job_name}"
@@ -1498,7 +1420,7 @@ def ingest_connector(
            docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]

            if operation_mode == "upload":
-                id = _derive_source_id(idempotency_key)
+                id = uuid.uuid4()
            elif operation_mode == "sync":
                if not doc_id:
                    logging.error(
@@ -1515,11 +1437,7 @@ def ingest_connector(
            self.update_state(
                state="PROGRESS", meta={"current": 80, "status": "Storing documents"}
            )
-            embed_and_store_documents(
-                docs, vector_store_path, id, self,
-                attempt_id=getattr(self.request, "id", None),
-            )
-            assert_index_complete(id)
+            embed_and_store_documents(docs, vector_store_path, id, self)

            tokens = count_tokens_docs(docs)

--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -4448,9 +4448,10 @@
      }
    },
    "node_modules/@xmldom/xmldom": {
-      "version": "0.9.10",
-      "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.9.10.tgz",
-      "integrity": "sha512-A9gOqLdi6cV4ibazAjcQufGj0B1y/vDqYrcuP6d/6x8P27gRS8643Dj9o1dEKtB6O7fwxb2FgBmJS2mX7gpvdw==",
+      "version": "0.9.9",
+      "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.9.9.tgz",
+      "integrity": "sha512-qycIHAucxy/LXAYIjmLmtQ8q9GPnMbnjG1KXhWm9o5sCr6pOYDATkMPiTNa6/v8eELyqOQ2FsEqeoFYmgv/gJg==",
+      "deprecated": "this version has critical issues, please update to the latest version",
      "license": "MIT",
      "engines": {
        "node": ">=14.6"
@@ -11834,12 +11835,12 @@
      }
    },
    "node_modules/speech-rule-engine": {
-      "version": "4.1.4",
-      "resolved": "https://registry.npmjs.org/speech-rule-engine/-/speech-rule-engine-4.1.4.tgz",
-      "integrity": "sha512-i/VCLG1fvRc95pMHRqG4aQNscv+9aIsqA2oI7ZQS51sTdUcDHYX6cpT8/tqZ+enjs1tKVwbRBWgxut9SWn+f9g==",
+      "version": "4.1.3",
+      "resolved": "https://registry.npmjs.org/speech-rule-engine/-/speech-rule-engine-4.1.3.tgz",
+      "integrity": "sha512-SBMgkuJYvP4F62daRfBNwYC2nXTEhNXAfsBZ/BB7Ly85/KnbnjmKM7/45ZrFbH6jIMiAliDUDPSZFUuXDvcg6A==",
      "license": "Apache-2.0",
      "dependencies": {
-        "@xmldom/xmldom": "0.9.10",
+        "@xmldom/xmldom": "0.9.9",
        "commander": "13.1.0",
        "wicked-good-xpath": "1.3.0"
      },
--- a/extensions/react-widget/package-lock.json
+++ b/extensions/react-widget/package-lock.json
@@ -19,7 +19,7 @@
        "class-variance-authority": "^0.7.0",
        "clsx": "^2.1.0",
        "dompurify": "^3.1.5",
-        "flow-bin": "^0.311.0",
+        "flow-bin": "^0.309.0",
        "markdown-it": "^14.1.0",
        "react": "^19.2.5",
        "react-dom": "^19.2.5",
@@ -44,7 +44,7 @@
        "eslint-plugin-prettier": "^5.5.5",
        "eslint-plugin-react": "^7.37.5",
        "eslint-plugin-unused-imports": "^4.4.1",
-        "globals": "^17.5.0",
+        "globals": "^15.15.0",
        "parcel": "^2.16.4",
        "prettier": "^3.8.1",
        "process": "^0.11.10",
@@ -546,13 +546,12 @@
      }
    },
    "node_modules/@babel/plugin-syntax-jsx": {
-      "version": "7.28.6",
-      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.28.6.tgz",
-      "integrity": "sha512-wgEmr06G6sIpqr8YDwA2dSRTE3bJ+V0IfpzfSY3Lfgd7YWOaAdlykvJi13ZKBt8cZHfgH1IXN+CL656W3uUa4w==",
+      "version": "7.24.6",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.24.6.tgz",
+      "integrity": "sha512-lWfvAIFNWMlCsU0DRUun2GpFwZdGTukLaHJqRh1JRb80NdAP5Sb1HDHB5X9P9OtgZHQl089UzQkpYlBq2VTPRw==",
      "dev": true,
-      "license": "MIT",
      "dependencies": {
-        "@babel/helper-plugin-utils": "^7.28.6"
+        "@babel/helper-plugin-utils": "^7.24.6"
      },
      "engines": {
        "node": ">=6.9.0"
@@ -1253,13 +1252,12 @@
      }
    },
    "node_modules/@babel/plugin-transform-react-display-name": {
-      "version": "7.28.0",
-      "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-display-name/-/plugin-transform-react-display-name-7.28.0.tgz",
-      "integrity": "sha512-D6Eujc2zMxKjfa4Zxl4GHMsmhKKZ9VpcqIchJLvwTxad9zWIYulwYItBovpDOoNLISpcZSXoDJ5gaGbQUDqViA==",
+      "version": "7.24.6",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-display-name/-/plugin-transform-react-display-name-7.24.6.tgz",
+      "integrity": "sha512-/3iiEEHDsJuj9QU09gbyWGSUxDboFcD7Nj6dnHIlboWSodxXAoaY/zlNMHeYAC0WsERMqgO9a7UaM77CsYgWcg==",
      "dev": true,
-      "license": "MIT",
      "dependencies": {
-        "@babel/helper-plugin-utils": "^7.27.1"
+        "@babel/helper-plugin-utils": "^7.24.6"
      },
      "engines": {
        "node": ">=6.9.0"
@@ -1269,17 +1267,16 @@
      }
    },
    "node_modules/@babel/plugin-transform-react-jsx": {
-      "version": "7.28.6",
-      "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx/-/plugin-transform-react-jsx-7.28.6.tgz",
-      "integrity": "sha512-61bxqhiRfAACulXSLd/GxqmAedUSrRZIu/cbaT18T1CetkTmtDN15it7i80ru4DVqRK1WMxQhXs+Lf9kajm5Ow==",
+      "version": "7.24.6",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx/-/plugin-transform-react-jsx-7.24.6.tgz",
+      "integrity": "sha512-pCtPHhpRZHfwdA5G1Gpk5mIzMA99hv0R8S/Ket50Rw+S+8hkt3wBWqdqHaPw0CuUYxdshUgsPiLQ5fAs4ASMhw==",
      "dev": true,
-      "license": "MIT",
      "dependencies": {
-        "@babel/helper-annotate-as-pure": "^7.27.3",
-        "@babel/helper-module-imports": "^7.28.6",
-        "@babel/helper-plugin-utils": "^7.28.6",
-        "@babel/plugin-syntax-jsx": "^7.28.6",
-        "@babel/types": "^7.28.6"
+        "@babel/helper-annotate-as-pure": "^7.24.6",
+        "@babel/helper-module-imports": "^7.24.6",
+        "@babel/helper-plugin-utils": "^7.24.6",
+        "@babel/plugin-syntax-jsx": "^7.24.6",
+        "@babel/types": "^7.24.6"
      },
      "engines": {
        "node": ">=6.9.0"
@@ -1289,13 +1286,12 @@
      }
    },
    "node_modules/@babel/plugin-transform-react-jsx-development": {
-      "version": "7.27.1",
-      "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-development/-/plugin-transform-react-jsx-development-7.27.1.tgz",
-      "integrity": "sha512-ykDdF5yI4f1WrAolLqeF3hmYU12j9ntLQl/AOG1HAS21jxyg1Q0/J/tpREuYLfatGdGmXp/3yS0ZA76kOlVq9Q==",
+      "version": "7.24.6",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-development/-/plugin-transform-react-jsx-development-7.24.6.tgz",
+      "integrity": "sha512-F7EsNp5StNDouSSdYyDSxh4J+xvj/JqG+Cb6s2fA+jCyHOzigG5vTwgH8tU2U8Voyiu5zCG9bAK49wTr/wPH0w==",
      "dev": true,
-      "license": "MIT",
      "dependencies": {
-        "@babel/plugin-transform-react-jsx": "^7.27.1"
+        "@babel/plugin-transform-react-jsx": "^7.24.6"
      },
      "engines": {
        "node": ">=6.9.0"
@@ -1305,14 +1301,13 @@
      }
    },
    "node_modules/@babel/plugin-transform-react-pure-annotations": {
-      "version": "7.27.1",
-      "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-pure-annotations/-/plugin-transform-react-pure-annotations-7.27.1.tgz",
-      "integrity": "sha512-JfuinvDOsD9FVMTHpzA/pBLisxpv1aSf+OIV8lgH3MuWrks19R27e6a6DipIg4aX1Zm9Wpb04p8wljfKrVSnPA==",
+      "version": "7.24.6",
+      "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-pure-annotations/-/plugin-transform-react-pure-annotations-7.24.6.tgz",
+      "integrity": "sha512-0HoDQlFJJkXRyV2N+xOpUETbKHcouSwijRQbKWVtxsPoq5bbB30qZag9/pSc5xcWVYjTHlLsBsY+hZDnzQTPNw==",
      "dev": true,
-      "license": "MIT",
      "dependencies": {
-        "@babel/helper-annotate-as-pure": "^7.27.1",
-        "@babel/helper-plugin-utils": "^7.27.1"
+        "@babel/helper-annotate-as-pure": "^7.24.6",
+        "@babel/helper-plugin-utils": "^7.24.6"
      },
      "engines": {
        "node": ">=6.9.0"
@@ -1618,18 +1613,17 @@
      }
    },
    "node_modules/@babel/preset-react": {
-      "version": "7.28.5",
-      "resolved": "https://registry.npmjs.org/@babel/preset-react/-/preset-react-7.28.5.tgz",
-      "integrity": "sha512-Z3J8vhRq7CeLjdC58jLv4lnZ5RKFUJWqH5emvxmv9Hv3BD1T9R/Im713R4MTKwvFaV74ejZ3sM01LyEKk4ugNQ==",
+      "version": "7.24.6",
+      "resolved": "https://registry.npmjs.org/@babel/preset-react/-/preset-react-7.24.6.tgz",
+      "integrity": "sha512-8mpzh1bWvmINmwM3xpz6ahu57mNaWavMm+wBNjQ4AFu1nghKBiIRET7l/Wmj4drXany/BBGjJZngICcD98F1iw==",
      "dev": true,
-      "license": "MIT",
      "dependencies": {
-        "@babel/helper-plugin-utils": "^7.27.1",
-        "@babel/helper-validator-option": "^7.27.1",
-        "@babel/plugin-transform-react-display-name": "^7.28.0",
-        "@babel/plugin-transform-react-jsx": "^7.27.1",
-        "@babel/plugin-transform-react-jsx-development": "^7.27.1",
-        "@babel/plugin-transform-react-pure-annotations": "^7.27.1"
+        "@babel/helper-plugin-utils": "^7.24.6",
+        "@babel/helper-validator-option": "^7.24.6",
+        "@babel/plugin-transform-react-display-name": "^7.24.6",
+        "@babel/plugin-transform-react-jsx": "^7.24.6",
+        "@babel/plugin-transform-react-jsx-development": "^7.24.6",
+        "@babel/plugin-transform-react-pure-annotations": "^7.24.6"
      },
      "engines": {
        "node": ">=6.9.0"
@@ -4546,17 +4540,17 @@
      "devOptional": true
    },
    "node_modules/@typescript-eslint/eslint-plugin": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.59.1.tgz",
-      "integrity": "sha512-BOziFIfE+6osHO9FoJG4zjoHUcvI7fTNBSpdAwrNH0/TLvzjsk2oo8XSSOT2HhqUyhZPfHv4UOffoJ9oEEQ7Ag==",
+      "version": "8.59.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.59.0.tgz",
+      "integrity": "sha512-HyAZtpdkgZwpq8Sz3FSUvCR4c+ScbuWa9AksK2Jweub7w4M3yTz4O11AqVJzLYjy/B9ZWPyc81I+mOdJU/bDQw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "@eslint-community/regexpp": "^4.12.2",
-        "@typescript-eslint/scope-manager": "8.59.1",
-        "@typescript-eslint/type-utils": "8.59.1",
-        "@typescript-eslint/utils": "8.59.1",
-        "@typescript-eslint/visitor-keys": "8.59.1",
+        "@typescript-eslint/scope-manager": "8.59.0",
+        "@typescript-eslint/type-utils": "8.59.0",
+        "@typescript-eslint/utils": "8.59.0",
+        "@typescript-eslint/visitor-keys": "8.59.0",
        "ignore": "^7.0.5",
        "natural-compare": "^1.4.0",
        "ts-api-utils": "^2.5.0"
@@ -4569,22 +4563,22 @@
        "url": "https://opencollective.com/typescript-eslint"
      },
      "peerDependencies": {
-        "@typescript-eslint/parser": "^8.59.1",
+        "@typescript-eslint/parser": "^8.59.0",
        "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0",
        "typescript": ">=4.8.4 <6.1.0"
      }
    },
    "node_modules/@typescript-eslint/parser": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.59.1.tgz",
-      "integrity": "sha512-HDQH9O/47Dxi1ceDhBXdaldtf/WV9yRYMjbjCuNk3qnaTD564qwv61Y7+gTxwxRKzSrgO5uhtw584igXVuuZkA==",
+      "version": "8.59.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.59.0.tgz",
+      "integrity": "sha512-TI1XGwKbDpo9tRW8UDIXCOeLk55qe9ZFGs8MTKU6/M08HWTw52DD/IYhfQtOEhEdPhLMT26Ka/x7p70nd3dzDg==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/scope-manager": "8.59.1",
-        "@typescript-eslint/types": "8.59.1",
-        "@typescript-eslint/typescript-estree": "8.59.1",
-        "@typescript-eslint/visitor-keys": "8.59.1",
+        "@typescript-eslint/scope-manager": "8.59.0",
+        "@typescript-eslint/types": "8.59.0",
+        "@typescript-eslint/typescript-estree": "8.59.0",
+        "@typescript-eslint/visitor-keys": "8.59.0",
        "debug": "^4.4.3"
      },
      "engines": {
@@ -4600,14 +4594,14 @@
      }
    },
    "node_modules/@typescript-eslint/project-service": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.59.1.tgz",
-      "integrity": "sha512-+MuHQlHiEr00Of/IQbE/MmEoi44znZHbR/Pz7Opq4HryUOlRi+/44dro9Ycy8Fyo+/024IWtw8m4JUMCGTYxDg==",
+      "version": "8.59.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.59.0.tgz",
+      "integrity": "sha512-Lw5ITrR5s5TbC19YSvlr63ZfLaJoU6vtKTHyB0GQOpX0W7d5/Ir6vUahWi/8Sps/nOukZQ0IB3SmlxZnjaKVnw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/tsconfig-utils": "^8.59.1",
-        "@typescript-eslint/types": "^8.59.1",
+        "@typescript-eslint/tsconfig-utils": "^8.59.0",
+        "@typescript-eslint/types": "^8.59.0",
        "debug": "^4.4.3"
      },
      "engines": {
@@ -4622,14 +4616,14 @@
      }
    },
    "node_modules/@typescript-eslint/scope-manager": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.59.1.tgz",
-      "integrity": "sha512-LwuHQI4pDOYVKvmH2dkaJo6YZCSgouVgnS/z7yBPKBMvgtBvyLqiLy9Z6b7+m/TRcX1NFYUqZetI5Y+aT4GEfg==",
+      "version": "8.59.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.59.0.tgz",
+      "integrity": "sha512-UzR16Ut8IpA3Mc4DbgAShlPPkVm8xXMWafXxB0BocaVRHs8ZGakAxGRskF7FId3sdk9lgGD73GSFaWmWFDE4dg==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/types": "8.59.1",
-        "@typescript-eslint/visitor-keys": "8.59.1"
+        "@typescript-eslint/types": "8.59.0",
+        "@typescript-eslint/visitor-keys": "8.59.0"
      },
      "engines": {
        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -4640,9 +4634,9 @@
      }
    },
    "node_modules/@typescript-eslint/tsconfig-utils": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.59.1.tgz",
-      "integrity": "sha512-/0nEyPbX7gRsk0Uwfe4ALwwgxuA66d/l2mhRDNlAvaj4U3juhUtJNq0DsY8M2AYwwb9rEq2hrC3IcIcEt++iJA==",
+      "version": "8.59.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.59.0.tgz",
+      "integrity": "sha512-91Sbl3s4Kb3SybliIY6muFBmHVv+pYXfybC4Oolp3dvk8BvIE3wOPc+403CWIT7mJNkfQRGtdqghzs2+Z91Tqg==",
      "dev": true,
      "license": "MIT",
      "engines": {
@@ -4657,15 +4651,15 @@
      }
    },
    "node_modules/@typescript-eslint/type-utils": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.59.1.tgz",
-      "integrity": "sha512-klWPBR2ciQHS3f++ug/mVnWKPjBUo7icEL3FAO1lhAR1Z1i5NQYZ1EannMSRYcq5qCv5wNALlXr6fksRHyYl7w==",
+      "version": "8.59.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.59.0.tgz",
+      "integrity": "sha512-3TRiZaQSltGqGeNrJzzr1+8YcEobKH9rHnqIp/1psfKFmhRQDNMGP5hBufanYTGznwShzVLs3Mz+gDN7HkWfXg==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/types": "8.59.1",
-        "@typescript-eslint/typescript-estree": "8.59.1",
-        "@typescript-eslint/utils": "8.59.1",
+        "@typescript-eslint/types": "8.59.0",
+        "@typescript-eslint/typescript-estree": "8.59.0",
+        "@typescript-eslint/utils": "8.59.0",
        "debug": "^4.4.3",
        "ts-api-utils": "^2.5.0"
      },
@@ -4682,9 +4676,9 @@
      }
    },
    "node_modules/@typescript-eslint/types": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.59.1.tgz",
-      "integrity": "sha512-ZDCjgccSdYPw5Bxh+my4Z0lJU96ZDN7jbBzvmEn0FZx3RtU1C7VWl6NbDx94bwY3V5YsgwRzJPOgeY2Q/nLG8A==",
+      "version": "8.59.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.59.0.tgz",
+      "integrity": "sha512-nLzdsT1gdOgFxxxwrlNVUBzSNBEEHJ86bblmk4QAS6stfig7rcJzWKqCyxFy3YRRHXDWEkb2NralA1nOYkkm/A==",
      "dev": true,
      "license": "MIT",
      "engines": {
@@ -4696,16 +4690,16 @@
      }
    },
    "node_modules/@typescript-eslint/typescript-estree": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.59.1.tgz",
-      "integrity": "sha512-OUd+vJS05sSkOip+BkZ/2NS8RMxrAAJemsC6vU3kmfLyeaJT0TftHkV9mcx2107MmsBVXXexhVu4F0TZXyMl4g==",
+      "version": "8.59.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.59.0.tgz",
+      "integrity": "sha512-O9Re9P1BmBLFJyikRbQpLku/QA3/AueZNO9WePLBwQrvkixTmDe8u76B6CYUAITRl/rHawggEqUGn5QIkVRLMw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/project-service": "8.59.1",
-        "@typescript-eslint/tsconfig-utils": "8.59.1",
-        "@typescript-eslint/types": "8.59.1",
-        "@typescript-eslint/visitor-keys": "8.59.1",
+        "@typescript-eslint/project-service": "8.59.0",
+        "@typescript-eslint/tsconfig-utils": "8.59.0",
+        "@typescript-eslint/types": "8.59.0",
+        "@typescript-eslint/visitor-keys": "8.59.0",
        "debug": "^4.4.3",
        "minimatch": "^10.2.2",
        "semver": "^7.7.3",
@@ -4737,16 +4731,16 @@
      }
    },
    "node_modules/@typescript-eslint/utils": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.59.1.tgz",
-      "integrity": "sha512-3pIeoXhCeYH9FSCBI8P3iNwJlGuzPlYKkTlen2O9T1DSeeg8UG8jstq6BLk+Mda0qup7mgk4z4XL4OzRaxZ8LA==",
+      "version": "8.59.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.59.0.tgz",
+      "integrity": "sha512-I1R/K7V07XsMJ12Oaxg/O9GfrysGTmCRhvZJBv0RE0NcULMzjqVpR5kRRQjHsz3J/bElU7HwCO7zkqL+MSUz+g==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "@eslint-community/eslint-utils": "^4.9.1",
-        "@typescript-eslint/scope-manager": "8.59.1",
-        "@typescript-eslint/types": "8.59.1",
-        "@typescript-eslint/typescript-estree": "8.59.1"
+        "@typescript-eslint/scope-manager": "8.59.0",
+        "@typescript-eslint/types": "8.59.0",
+        "@typescript-eslint/typescript-estree": "8.59.0"
      },
      "engines": {
        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -4761,13 +4755,13 @@
      }
    },
    "node_modules/@typescript-eslint/visitor-keys": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.59.1.tgz",
-      "integrity": "sha512-LdDNl6C5iJExcM0Yh0PwAIBb9PrSiCsWamF/JyEZawm3kFDnRoaq3LGE4bpyRao/fWeGKKyw7icx0YxrLFC5Cg==",
+      "version": "8.59.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.59.0.tgz",
+      "integrity": "sha512-/uejZt4dSere1bx12WLlPfv8GktzcaDtuJ7s42/HEZ5zGj9oxRaD4bj7qwSunXkf+pbAhFt2zjpHYUiT5lHf0Q==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/types": "8.59.1",
+        "@typescript-eslint/types": "8.59.0",
        "eslint-visitor-keys": "^5.0.0"
      },
      "engines": {
@@ -6492,9 +6486,9 @@
      "license": "ISC"
    },
    "node_modules/flow-bin": {
-      "version": "0.311.0",
-      "resolved": "https://registry.npmjs.org/flow-bin/-/flow-bin-0.311.0.tgz",
-      "integrity": "sha512-4lXxjhPdmkeizju3F0HDCMYGkoL7hiq0W9bAW4pQpQTi56op+QZrVyMENjbCGZc+KlFBLwWkur+EkyfPTsa6xw==",
+      "version": "0.309.0",
+      "resolved": "https://registry.npmjs.org/flow-bin/-/flow-bin-0.309.0.tgz",
+      "integrity": "sha512-/RH68gcCY8OHzcdSVTUCw+fhDSEYmNHoovfK0EcbB4rs1Xbc5HhxhHTvr7U+h55De4bDRlE52ghH23MRP625cQ==",
      "license": "MIT",
      "bin": {
        "flow": "cli.js"
@@ -6657,9 +6651,9 @@
      }
    },
    "node_modules/globals": {
-      "version": "17.5.0",
-      "resolved": "https://registry.npmjs.org/globals/-/globals-17.5.0.tgz",
-      "integrity": "sha512-qoV+HK2yFl/366t2/Cb3+xxPUo5BuMynomoDmiaZBIdbs+0pYbjfZU+twLhGKp4uCZ/+NbtpVepH5bGCxRyy2g==",
+      "version": "15.15.0",
+      "resolved": "https://registry.npmjs.org/globals/-/globals-15.15.0.tgz",
+      "integrity": "sha512-7ACyT3wmyp3I61S4fG682L0VA2RGD9otkqGJIwNUMF1SWUombIIk+af1unuDYgMm082aHYwD+mzJvv9Iu8dsgg==",
      "dev": true,
      "license": "MIT",
      "engines": {
@@ -8976,9 +8970,9 @@
      }
    },
    "node_modules/styled-components": {
-      "version": "6.4.1",
-      "resolved": "https://registry.npmjs.org/styled-components/-/styled-components-6.4.1.tgz",
-      "integrity": "sha512-ADu2dF53esUzzM4I0ewxhxFtsDd6v4V6dNkg3vG0iFKhnt06sJneTZnRvujAosZwW0XD58IKgGMQoqri4wHRqg==",
+      "version": "6.4.0",
+      "resolved": "https://registry.npmjs.org/styled-components/-/styled-components-6.4.0.tgz",
+      "integrity": "sha512-BL1EDFpt+q10eAeZB0q9ps6pSlPejaBQWBkiuM16pyoVTG4NhZrPrZK0cqNbrozxSsYwUsJ9SQYN6NyeKJYX9A==",
      "license": "MIT",
      "dependencies": {
        "@emotion/is-prop-valid": "1.4.0",
--- a/extensions/react-widget/package.json
+++ b/extensions/react-widget/package.json
@@ -52,7 +52,7 @@
    "class-variance-authority": "^0.7.0",
    "clsx": "^2.1.0",
    "dompurify": "^3.1.5",
-    "flow-bin": "^0.311.0",
+    "flow-bin": "^0.309.0",
    "markdown-it": "^14.1.0",
    "react": "^19.2.5",
    "react-dom": "^19.2.5",
@@ -77,7 +77,7 @@
    "eslint-plugin-prettier": "^5.5.5",
    "eslint-plugin-react": "^7.37.5",
    "eslint-plugin-unused-imports": "^4.4.1",
-    "globals": "^17.5.0",
+    "globals": "^15.15.0",
    "parcel": "^2.16.4",
    "prettier": "^3.8.1",
    "process": "^0.11.10",
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -28,12 +28,12 @@
        "react": "^19.1.0",
        "react-chartjs-2": "^5.3.0",
        "react-dom": "^19.2.5",
-        "react-dropzone": "^15.0.0",
+        "react-dropzone": "^14.3.8",
        "react-google-drive-picker": "^1.2.2",
-        "react-i18next": "^17.0.6",
+        "react-i18next": "^17.0.2",
        "react-markdown": "^9.0.1",
        "react-redux": "^9.2.0",
-        "react-router-dom": "^7.14.2",
+        "react-router-dom": "^7.14.1",
        "react-syntax-highlighter": "^16.1.1",
        "reactflow": "^11.11.4",
        "rehype-katex": "^7.0.1",
@@ -47,7 +47,7 @@
        "@types/react": "^19.2.14",
        "@types/react-dom": "^19.2.3",
        "@types/react-syntax-highlighter": "^15.5.13",
-        "@typescript-eslint/eslint-plugin": "^8.59.1",
+        "@typescript-eslint/eslint-plugin": "^8.58.2",
        "@typescript-eslint/parser": "^8.46.3",
        "@vitejs/plugin-react": "^6.0.1",
        "eslint": "^9.39.1",
@@ -60,13 +60,13 @@
        "eslint-plugin-unused-imports": "^4.1.4",
        "husky": "^9.1.7",
        "lint-staged": "^16.4.0",
-        "postcss": "^8.5.12",
+        "postcss": "^8.4.49",
        "prettier": "^3.5.3",
        "prettier-plugin-tailwindcss": "^0.7.2",
        "tailwindcss": "^4.2.2",
        "tw-animate-css": "^1.4.0",
-        "typescript": "^6.0.3",
-        "vite": "^8.0.10",
+        "typescript": "^5.8.3",
+        "vite": "^8.0.8",
        "vite-plugin-svgr": "^4.3.0"
      }
    },
@@ -418,9 +418,9 @@
      "license": "Apache-2.0"
    },
    "node_modules/@emnapi/core": {
-      "version": "1.10.0",
-      "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.10.0.tgz",
-      "integrity": "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw==",
+      "version": "1.9.2",
+      "resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.9.2.tgz",
+      "integrity": "sha512-UC+ZhH3XtczQYfOlu3lNEkdW/p4dsJ1r/bP7H8+rhao3TTTMO1ATq/4DdIi23XuGoFY+Cz0JmCbdVl0hz9jZcA==",
      "dev": true,
      "license": "MIT",
      "optional": true,
@@ -430,9 +430,9 @@
      }
    },
    "node_modules/@emnapi/runtime": {
-      "version": "1.10.0",
-      "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz",
-      "integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==",
+      "version": "1.9.2",
+      "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.2.tgz",
+      "integrity": "sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==",
      "dev": true,
      "license": "MIT",
      "optional": true,
@@ -791,9 +791,9 @@
      }
    },
    "node_modules/@oxc-project/types": {
-      "version": "0.127.0",
-      "resolved": "https://registry.npmjs.org/@oxc-project/types/-/types-0.127.0.tgz",
-      "integrity": "sha512-aIYXQBo4lCbO4z0R3FHeucQHpF46l2LbMdxRvqvuRuW2OxdnSkcng5B8+K12spgLDj93rtN3+J2Vac/TIO+ciQ==",
+      "version": "0.124.0",
+      "resolved": "https://registry.npmjs.org/@oxc-project/types/-/types-0.124.0.tgz",
+      "integrity": "sha512-VBFWMTBvHxS11Z5Lvlr3IWgrwhMTXV+Md+EQF0Xf60+wAdsGFTBx7X7K/hP4pi8N7dcm1RvcHwDxZ16Qx8keUg==",
      "dev": true,
      "license": "MIT",
      "funding": {
@@ -2584,9 +2584,9 @@
      }
    },
    "node_modules/@rolldown/binding-android-arm64": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.17.tgz",
-      "integrity": "sha512-s70pVGhw4zqGeFnXWvAzJDlvxhlRollagdCCKRgOsgUOH3N1l0LIxf83AtGzmb5SiVM4Hjl5HyarMRfdfj3DaQ==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.15.tgz",
+      "integrity": "sha512-YYe6aWruPZDtHNpwu7+qAHEMbQ/yRl6atqb/AhznLTnD3UY99Q1jE7ihLSahNWkF4EqRPVC4SiR4O0UkLK02tA==",
      "cpu": [
        "arm64"
      ],
@@ -2601,9 +2601,9 @@
      }
    },
    "node_modules/@rolldown/binding-darwin-arm64": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.17.tgz",
-      "integrity": "sha512-4ksWc9n0mhlZpZ9PMZgTGjeOPRu8MB1Z3Tz0Mo02eWfWCHMW1zN82Qz/pL/rC+yQa+8ZnutMF0JjJe7PjwasYw==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.15.tgz",
+      "integrity": "sha512-oArR/ig8wNTPYsXL+Mzhs0oxhxfuHRfG7Ikw7jXsw8mYOtk71W0OkF2VEVh699pdmzjPQsTjlD1JIOoHkLP1Fg==",
      "cpu": [
        "arm64"
      ],
@@ -2618,9 +2618,9 @@
      }
    },
    "node_modules/@rolldown/binding-darwin-x64": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-rc.17.tgz",
-      "integrity": "sha512-SUSDOI6WwUVNcWxd02QEBjLdY1VPHvlEkw6T/8nYG322iYWCTxRb1vzk4E+mWWYehTp7ERibq54LSJGjmouOsw==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-rc.15.tgz",
+      "integrity": "sha512-YzeVqOqjPYvUbJSWJ4EDL8ahbmsIXQpgL3JVipmN+MX0XnXMeWomLN3Fb+nwCmP/jfyqte5I3XRSm7OfQrbyxw==",
      "cpu": [
        "x64"
      ],
@@ -2635,9 +2635,9 @@
      }
    },
    "node_modules/@rolldown/binding-freebsd-x64": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-rc.17.tgz",
-      "integrity": "sha512-hwnz3nw9dbJ05EDO/PvcjaaewqqDy7Y1rn1UO81l8iIK1GjenME75dl16ajbvSSMfv66WXSRCYKIqfgq2KCfxw==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-rc.15.tgz",
+      "integrity": "sha512-9Erhx956jeQ0nNTyif1+QWAXDRD38ZNjr//bSHrt6wDwB+QkAfl2q6Mn1k6OBPerznjRmbM10lgRb1Pli4xZPw==",
      "cpu": [
        "x64"
      ],
@@ -2652,9 +2652,9 @@
      }
    },
    "node_modules/@rolldown/binding-linux-arm-gnueabihf": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-rc.17.tgz",
-      "integrity": "sha512-IS+W7epTcwANmFSQFrS1SivEXHtl1JtuQA9wlxrZTcNi6mx+FDOYrakGevvvTwgj2JvWiK8B29/qD9BELZPyXQ==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-rc.15.tgz",
+      "integrity": "sha512-cVwk0w8QbZJGTnP/AHQBs5yNwmpgGYStL88t4UIaqcvYJWBfS0s3oqVLZPwsPU6M0zlW4GqjP0Zq5MnAGwFeGA==",
      "cpu": [
        "arm"
      ],
@@ -2669,9 +2669,9 @@
      }
    },
    "node_modules/@rolldown/binding-linux-arm64-gnu": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-rc.17.tgz",
-      "integrity": "sha512-e6usGaHKW5BMNZOymS1UcEYGowQMWcgZ71Z17Sl/h2+ZziNJ1a9n3Zvcz6LdRyIW5572wBCTH/Z+bKuZouGk9Q==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-rc.15.tgz",
+      "integrity": "sha512-eBZ/u8iAK9SoHGanqe/jrPnY0JvBN6iXbVOsbO38mbz+ZJsaobExAm1Iu+rxa4S1l2FjG0qEZn4Rc6X8n+9M+w==",
      "cpu": [
        "arm64"
      ],
@@ -2686,9 +2686,9 @@
      }
    },
    "node_modules/@rolldown/binding-linux-arm64-musl": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-rc.17.tgz",
-      "integrity": "sha512-b/CgbwAJpmrRLp02RPfhbudf5tZnN9nsPWK82znefso832etkem8H7FSZwxrOI9djcdTP7U6YfNhbRnh7djErg==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-rc.15.tgz",
+      "integrity": "sha512-ZvRYMGrAklV9PEkgt4LQM6MjQX2P58HPAuecwYObY2DhS2t35R0I810bKi0wmaYORt6m/2Sm+Z+nFgb0WhXNcQ==",
      "cpu": [
        "arm64"
      ],
@@ -2703,9 +2703,9 @@
      }
    },
    "node_modules/@rolldown/binding-linux-ppc64-gnu": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.0.0-rc.17.tgz",
-      "integrity": "sha512-4EII1iNGRUN5WwGbF/kOh/EIkoDN9HsupgLQoXfY+D1oyJm7/F4t5PYU5n8SWZgG0FEwakyM8pGgwcBYruGTlA==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.0.0-rc.15.tgz",
+      "integrity": "sha512-VDpgGBzgfg5hLg+uBpCLoFG5kVvEyafmfxGUV0UHLcL5irxAK7PKNeC2MwClgk6ZAiNhmo9FLhRYgvMmedLtnQ==",
      "cpu": [
        "ppc64"
      ],
@@ -2720,9 +2720,9 @@
      }
    },
    "node_modules/@rolldown/binding-linux-s390x-gnu": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.0.0-rc.17.tgz",
-      "integrity": "sha512-AH8oq3XqQo4IibpVXvPeLDI5pzkpYn0WiZAfT05kFzoJ6tQNzwRdDYQ45M8I/gslbodRZwW8uxLhbSBbkv96rA==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.0.0-rc.15.tgz",
+      "integrity": "sha512-y1uXY3qQWCzcPgRJATPSOUP4tCemh4uBdY7e3EZbVwCJTY3gLJWnQABgeUetvED+bt1FQ01OeZwvhLS2bpNrAQ==",
      "cpu": [
        "s390x"
      ],
@@ -2737,9 +2737,9 @@
      }
    },
    "node_modules/@rolldown/binding-linux-x64-gnu": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-rc.17.tgz",
-      "integrity": "sha512-cLnjV3xfo7KslbU41Z7z8BH/E1y5mzUYzAqih1d1MDaIGZRCMqTijqLv76/P7fyHuvUcfGsIpqCdddbxLLK9rA==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-rc.15.tgz",
+      "integrity": "sha512-023bTPBod7J3Y/4fzAN6QtpkSABR0rigtrwaP+qSEabUh5zf6ELr9Nc7GujaROuPY3uwdSIXWrvhn1KxOvurWA==",
      "cpu": [
        "x64"
      ],
@@ -2754,9 +2754,9 @@
      }
    },
    "node_modules/@rolldown/binding-linux-x64-musl": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-rc.17.tgz",
-      "integrity": "sha512-0phclDw1spsL7dUB37sIARuis2tAgomCJXAHZlpt8PXZ4Ba0dRP1e+66lsRqrfhISeN9bEGNjQs+T/Fbd7oYGw==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-rc.15.tgz",
+      "integrity": "sha512-witB2O0/hU4CgfOOKUoeFgQ4GktPi1eEbAhaLAIpgD6+ZnhcPkUtPsoKKHRzmOoWPZue46IThdSgdo4XneOLYw==",
      "cpu": [
        "x64"
      ],
@@ -2771,9 +2771,9 @@
      }
    },
    "node_modules/@rolldown/binding-openharmony-arm64": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.0.0-rc.17.tgz",
-      "integrity": "sha512-0ag/hEgXOwgw4t8QyQvUCxvEg+V0KBcA6YuOx9g0r02MprutRF5dyljgm3EmR02O292UX7UeS6HzWHAl6KgyhA==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.0.0-rc.15.tgz",
+      "integrity": "sha512-UCL68NJ0Ud5zRipXZE9dF5PmirzJE4E4BCIOOssEnM7wLDsxjc6Qb0sGDxTNRTP53I6MZpygyCpY8Aa8sPfKPg==",
      "cpu": [
        "arm64"
      ],
@@ -2788,9 +2788,9 @@
      }
    },
    "node_modules/@rolldown/binding-wasm32-wasi": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-rc.17.tgz",
-      "integrity": "sha512-LEXei6vo0E5wTGwpkJ4KoT3OZJRnglwldt5ziLzOlc6qqb55z4tWNq2A+PFqCJuvWWdP53CVhG1Z9NtToDPJrA==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-rc.15.tgz",
+      "integrity": "sha512-ApLruZq/ig+nhaE7OJm4lDjayUnOHVUa77zGeqnqZ9pn0ovdVbbNPerVibLXDmWeUZXjIYIT8V3xkT58Rm9u5Q==",
      "cpu": [
        "wasm32"
      ],
@@ -2798,18 +2798,18 @@
      "license": "MIT",
      "optional": true,
      "dependencies": {
-        "@emnapi/core": "1.10.0",
-        "@emnapi/runtime": "1.10.0",
-        "@napi-rs/wasm-runtime": "^1.1.4"
+        "@emnapi/core": "1.9.2",
+        "@emnapi/runtime": "1.9.2",
+        "@napi-rs/wasm-runtime": "^1.1.3"
      },
      "engines": {
-        "node": "^20.19.0 || >=22.12.0"
+        "node": ">=14.0.0"
      }
    },
    "node_modules/@rolldown/binding-win32-arm64-msvc": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.17.tgz",
-      "integrity": "sha512-gUmyzBl3SPMa6hrqFUth9sVfcLBlYsbMzBx5PlexMroZStgzGqlZ26pYG89rBb45Mnia+oil6YAIFeEWGWhoZA==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.15.tgz",
+      "integrity": "sha512-KmoUoU7HnN+Si5YWJigfTws1jz1bKBYDQKdbLspz0UaqjjFkddHsqorgiW1mxcAj88lYUE6NC/zJNwT+SloqtA==",
      "cpu": [
        "arm64"
      ],
@@ -2824,9 +2824,9 @@
      }
    },
    "node_modules/@rolldown/binding-win32-x64-msvc": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.17.tgz",
-      "integrity": "sha512-3hkiolcUAvPB9FLb3UZdfjVVNWherN1f/skkGWJP/fgSQhYUZpSIRr0/I8ZK9TkF3F7kxvJAk0+IcKvPHk9qQg==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.15.tgz",
+      "integrity": "sha512-3P2A8L+x75qavWLe/Dll3EYBJLQmtkJN8rfh+U/eR3MqMgL/h98PhYI+JFfXuDPgPeCB7iZAKiqii5vqOvnA0g==",
      "cpu": [
        "x64"
      ],
@@ -3862,17 +3862,17 @@
      "license": "MIT"
    },
    "node_modules/@typescript-eslint/eslint-plugin": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.59.1.tgz",
-      "integrity": "sha512-BOziFIfE+6osHO9FoJG4zjoHUcvI7fTNBSpdAwrNH0/TLvzjsk2oo8XSSOT2HhqUyhZPfHv4UOffoJ9oEEQ7Ag==",
+      "version": "8.58.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.58.2.tgz",
+      "integrity": "sha512-aC2qc5thQahutKjP+cl8cgN9DWe3ZUqVko30CMSZHnFEHyhOYoZSzkGtAI2mcwZ38xeImDucI4dnqsHiOYuuCw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "@eslint-community/regexpp": "^4.12.2",
-        "@typescript-eslint/scope-manager": "8.59.1",
-        "@typescript-eslint/type-utils": "8.59.1",
-        "@typescript-eslint/utils": "8.59.1",
-        "@typescript-eslint/visitor-keys": "8.59.1",
+        "@typescript-eslint/scope-manager": "8.58.2",
+        "@typescript-eslint/type-utils": "8.58.2",
+        "@typescript-eslint/utils": "8.58.2",
+        "@typescript-eslint/visitor-keys": "8.58.2",
        "ignore": "^7.0.5",
        "natural-compare": "^1.4.0",
        "ts-api-utils": "^2.5.0"
@@ -3885,7 +3885,7 @@
        "url": "https://opencollective.com/typescript-eslint"
      },
      "peerDependencies": {
-        "@typescript-eslint/parser": "^8.59.1",
+        "@typescript-eslint/parser": "^8.58.2",
        "eslint": "^8.57.0 || ^9.0.0 || ^10.0.0",
        "typescript": ">=4.8.4 <6.1.0"
      }
@@ -3901,16 +3901,16 @@
      }
    },
    "node_modules/@typescript-eslint/parser": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.59.1.tgz",
-      "integrity": "sha512-HDQH9O/47Dxi1ceDhBXdaldtf/WV9yRYMjbjCuNk3qnaTD564qwv61Y7+gTxwxRKzSrgO5uhtw584igXVuuZkA==",
+      "version": "8.58.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.58.2.tgz",
+      "integrity": "sha512-/Zb/xaIDfxeJnvishjGdcR4jmr7S+bda8PKNhRGdljDM+elXhlvN0FyPSsMnLmJUrVG9aPO6dof80wjMawsASg==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/scope-manager": "8.59.1",
-        "@typescript-eslint/types": "8.59.1",
-        "@typescript-eslint/typescript-estree": "8.59.1",
-        "@typescript-eslint/visitor-keys": "8.59.1",
+        "@typescript-eslint/scope-manager": "8.58.2",
+        "@typescript-eslint/types": "8.58.2",
+        "@typescript-eslint/typescript-estree": "8.58.2",
+        "@typescript-eslint/visitor-keys": "8.58.2",
        "debug": "^4.4.3"
      },
      "engines": {
@@ -3926,14 +3926,14 @@
      }
    },
    "node_modules/@typescript-eslint/project-service": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.59.1.tgz",
-      "integrity": "sha512-+MuHQlHiEr00Of/IQbE/MmEoi44znZHbR/Pz7Opq4HryUOlRi+/44dro9Ycy8Fyo+/024IWtw8m4JUMCGTYxDg==",
+      "version": "8.58.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.58.2.tgz",
+      "integrity": "sha512-Cq6UfpZZk15+r87BkIh5rDpi38W4b+Sjnb8wQCPPDDweS/LRCFjCyViEbzHk5Ck3f2QDfgmlxqSa7S7clDtlfg==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/tsconfig-utils": "^8.59.1",
-        "@typescript-eslint/types": "^8.59.1",
+        "@typescript-eslint/tsconfig-utils": "^8.58.2",
+        "@typescript-eslint/types": "^8.58.2",
        "debug": "^4.4.3"
      },
      "engines": {
@@ -3948,14 +3948,14 @@
      }
    },
    "node_modules/@typescript-eslint/scope-manager": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.59.1.tgz",
-      "integrity": "sha512-LwuHQI4pDOYVKvmH2dkaJo6YZCSgouVgnS/z7yBPKBMvgtBvyLqiLy9Z6b7+m/TRcX1NFYUqZetI5Y+aT4GEfg==",
+      "version": "8.58.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.58.2.tgz",
+      "integrity": "sha512-SgmyvDPexWETQek+qzZnrG6844IaO02UVyOLhI4wpo82dpZJY9+6YZCKAMFzXb7qhx37mFK1QcPQ18tud+vo6Q==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/types": "8.59.1",
-        "@typescript-eslint/visitor-keys": "8.59.1"
+        "@typescript-eslint/types": "8.58.2",
+        "@typescript-eslint/visitor-keys": "8.58.2"
      },
      "engines": {
        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -3966,9 +3966,9 @@
      }
    },
    "node_modules/@typescript-eslint/tsconfig-utils": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.59.1.tgz",
-      "integrity": "sha512-/0nEyPbX7gRsk0Uwfe4ALwwgxuA66d/l2mhRDNlAvaj4U3juhUtJNq0DsY8M2AYwwb9rEq2hrC3IcIcEt++iJA==",
+      "version": "8.58.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.58.2.tgz",
+      "integrity": "sha512-3SR+RukipDvkkKp/d0jP0dyzuls3DbGmwDpVEc5wqk5f38KFThakqAAO0XMirWAE+kT00oTauTbzMFGPoAzB0A==",
      "dev": true,
      "license": "MIT",
      "engines": {
@@ -3983,15 +3983,15 @@
      }
    },
    "node_modules/@typescript-eslint/type-utils": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.59.1.tgz",
-      "integrity": "sha512-klWPBR2ciQHS3f++ug/mVnWKPjBUo7icEL3FAO1lhAR1Z1i5NQYZ1EannMSRYcq5qCv5wNALlXr6fksRHyYl7w==",
+      "version": "8.58.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.58.2.tgz",
+      "integrity": "sha512-Z7EloNR/B389FvabdGeTo2XMs4W9TjtPiO9DAsmT0yom0bwlPyRjkJ1uCdW1DvrrrYP50AJZ9Xc3sByZA9+dcg==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/types": "8.59.1",
-        "@typescript-eslint/typescript-estree": "8.59.1",
-        "@typescript-eslint/utils": "8.59.1",
+        "@typescript-eslint/types": "8.58.2",
+        "@typescript-eslint/typescript-estree": "8.58.2",
+        "@typescript-eslint/utils": "8.58.2",
        "debug": "^4.4.3",
        "ts-api-utils": "^2.5.0"
      },
@@ -4008,9 +4008,9 @@
      }
    },
    "node_modules/@typescript-eslint/types": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.59.1.tgz",
-      "integrity": "sha512-ZDCjgccSdYPw5Bxh+my4Z0lJU96ZDN7jbBzvmEn0FZx3RtU1C7VWl6NbDx94bwY3V5YsgwRzJPOgeY2Q/nLG8A==",
+      "version": "8.58.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.58.2.tgz",
+      "integrity": "sha512-9TukXyATBQf/Jq9AMQXfvurk+G5R2MwfqQGDR2GzGz28HvY/lXNKGhkY+6IOubwcquikWk5cjlgPvD2uAA7htQ==",
      "dev": true,
      "license": "MIT",
      "engines": {
@@ -4022,16 +4022,16 @@
      }
    },
    "node_modules/@typescript-eslint/typescript-estree": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.59.1.tgz",
-      "integrity": "sha512-OUd+vJS05sSkOip+BkZ/2NS8RMxrAAJemsC6vU3kmfLyeaJT0TftHkV9mcx2107MmsBVXXexhVu4F0TZXyMl4g==",
+      "version": "8.58.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.58.2.tgz",
+      "integrity": "sha512-ELGuoofuhhoCvNbQjFFiobFcGgcDCEm0ThWdmO4Z0UzLqPXS3KFvnEZ+SHewwOYHjM09tkzOWXNTv9u6Gqtyuw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/project-service": "8.59.1",
-        "@typescript-eslint/tsconfig-utils": "8.59.1",
-        "@typescript-eslint/types": "8.59.1",
-        "@typescript-eslint/visitor-keys": "8.59.1",
+        "@typescript-eslint/project-service": "8.58.2",
+        "@typescript-eslint/tsconfig-utils": "8.58.2",
+        "@typescript-eslint/types": "8.58.2",
+        "@typescript-eslint/visitor-keys": "8.58.2",
        "debug": "^4.4.3",
        "minimatch": "^10.2.2",
        "semver": "^7.7.3",
@@ -4066,16 +4066,16 @@
      }
    },
    "node_modules/@typescript-eslint/utils": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.59.1.tgz",
-      "integrity": "sha512-3pIeoXhCeYH9FSCBI8P3iNwJlGuzPlYKkTlen2O9T1DSeeg8UG8jstq6BLk+Mda0qup7mgk4z4XL4OzRaxZ8LA==",
+      "version": "8.58.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.58.2.tgz",
+      "integrity": "sha512-QZfjHNEzPY8+l0+fIXMvuQ2sJlplB4zgDZvA+NmvZsZv3EQwOcc1DuIU1VJUTWZ/RKouBMhDyNaBMx4sWvrzRA==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "@eslint-community/eslint-utils": "^4.9.1",
-        "@typescript-eslint/scope-manager": "8.59.1",
-        "@typescript-eslint/types": "8.59.1",
-        "@typescript-eslint/typescript-estree": "8.59.1"
+        "@typescript-eslint/scope-manager": "8.58.2",
+        "@typescript-eslint/types": "8.58.2",
+        "@typescript-eslint/typescript-estree": "8.58.2"
      },
      "engines": {
        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -4090,13 +4090,13 @@
      }
    },
    "node_modules/@typescript-eslint/visitor-keys": {
-      "version": "8.59.1",
-      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.59.1.tgz",
-      "integrity": "sha512-LdDNl6C5iJExcM0Yh0PwAIBb9PrSiCsWamF/JyEZawm3kFDnRoaq3LGE4bpyRao/fWeGKKyw7icx0YxrLFC5Cg==",
+      "version": "8.58.2",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.58.2.tgz",
+      "integrity": "sha512-f1WO2Lx8a9t8DARmcWAUPJbu0G20bJlj8L4z72K00TMeJAoyLr/tHhI/pzYBLrR4dXWkcxO1cWYZEOX8DKHTqA==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@typescript-eslint/types": "8.59.1",
+        "@typescript-eslint/types": "8.58.2",
        "eslint-visitor-keys": "^5.0.0"
      },
      "engines": {
@@ -8356,9 +8356,9 @@
      "license": "MIT"
    },
    "node_modules/lodash-es": {
-      "version": "4.18.1",
-      "resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.18.1.tgz",
-      "integrity": "sha512-J8xewKD/Gk22OZbhpOVSwcs60zhd95ESDwezOFuA3/099925PdHJ7OFHNTGtajL3AlZkykD32HykiMo+BIBI8A==",
+      "version": "4.17.23",
+      "resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.23.tgz",
+      "integrity": "sha512-kVI48u3PZr38HdYz98UmfPnXl2DXrpdctLrFLCd3kOx1xUkOmpFPx7gCWWM5MPkL/fD8zb+Ph0QzjGFs4+hHWg==",
      "license": "MIT"
    },
    "node_modules/lodash.merge": {
@@ -9951,9 +9951,9 @@
      }
    },
    "node_modules/postcss": {
-      "version": "8.5.12",
-      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.12.tgz",
-      "integrity": "sha512-W62t/Se6rA0Az3DfCL0AqJwXuKwBeYg6nOaIgzP+xZ7N5BFCI7DYi1qs6ygUYT6rvfi6t9k65UMLJC+PHZpDAA==",
+      "version": "8.5.8",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.8.tgz",
+      "integrity": "sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg==",
      "dev": true,
      "funding": [
        {
@@ -10280,9 +10280,9 @@
      }
    },
    "node_modules/react-dropzone": {
-      "version": "15.0.0",
-      "resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-15.0.0.tgz",
-      "integrity": "sha512-lGjYV/EoqEjEWPnmiSvH4v5IoIAwQM2W4Z1C0Q/Pw2xD0eVzKPS359BQTUMum+1fa0kH2nrKjuavmTPOGhpLPg==",
+      "version": "14.3.8",
+      "resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-14.3.8.tgz",
+      "integrity": "sha512-sBgODnq+lcA4P296DY4wacOZz3JFpD99fp+hb//iBO2HHnyeZU3FwWyXJ6salNpqQdsZrgMrotuko/BdJMV8Ug==",
      "license": "MIT",
      "dependencies": {
        "attr-accept": "^2.2.4",
@@ -10307,9 +10307,9 @@
      }
    },
    "node_modules/react-i18next": {
-      "version": "17.0.6",
-      "resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-17.0.6.tgz",
-      "integrity": "sha512-WzJ6SMKF+GTD7JZZqxSR1AKKmXjaSu39sClUrNlwxS4Tl7a99O+ltFy6yhPMO+wgZuxpQjJ2PZkfrQKmAqrLhw==",
+      "version": "17.0.2",
+      "resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-17.0.2.tgz",
+      "integrity": "sha512-shBftH2vaTWK2Bsp7FiL+cevx3xFJlvFxmsDFQSrJc+6twHkP0tv/bGa01VVWzpreUVVwU+3Hev5iFqRg65RwA==",
      "license": "MIT",
      "dependencies": {
        "@babel/runtime": "^7.29.2",
@@ -10437,9 +10437,9 @@
      }
    },
    "node_modules/react-router": {
-      "version": "7.14.2",
-      "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.14.2.tgz",
-      "integrity": "sha512-yCqNne6I8IB6rVCH7XUvlBK7/QKyqypBFGv+8dj4QBFJiiRX+FG7/nkdAvGElyvVZ/HQP5N19wzteuTARXi5Gw==",
+      "version": "7.14.1",
+      "resolved": "https://registry.npmjs.org/react-router/-/react-router-7.14.1.tgz",
+      "integrity": "sha512-5BCvFskyAAVumqhEKh/iPhLOIkfxcEUz8WqFIARCkMg8hZZzDYX9CtwxXA0e+qT8zAxmMC0x3Ckb9iMONwc5jg==",
      "license": "MIT",
      "dependencies": {
        "cookie": "^1.0.1",
@@ -10459,12 +10459,12 @@
      }
    },
    "node_modules/react-router-dom": {
-      "version": "7.14.2",
-      "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.14.2.tgz",
-      "integrity": "sha512-YZcM5ES8jJSM+KrJ9BdvHHqlnGTg5tH3sC5ChFRj4inosKctdyzBDhOyyHdGk597q2OT6NTrCA1OvB/YDwfekQ==",
+      "version": "7.14.1",
+      "resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.14.1.tgz",
+      "integrity": "sha512-ZkrQuwwhGibjQLqH1eCdyiZyLWglPxzxdl5tgwgKEyCSGC76vmAjleGocRe3J/MLfzMUIKwaFJWpFVJhK3d2xA==",
      "license": "MIT",
      "dependencies": {
-        "react-router": "7.14.2"
+        "react-router": "7.14.1"
      },
      "engines": {
        "node": ">=20.0.0"
@@ -10788,14 +10788,14 @@
      "license": "Unlicense"
    },
    "node_modules/rolldown": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.17.tgz",
-      "integrity": "sha512-ZrT53oAKrtA4+YtBWPQbtPOxIbVDbxT0orcYERKd63VJTF13zPcgXTvD4843L8pcsI7M6MErt8QtON6lrB9tyA==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.15.tgz",
+      "integrity": "sha512-Ff31guA5zT6WjnGp0SXw76X6hzGRk/OQq2hE+1lcDe+lJdHSgnSX6nK3erbONHyCbpSj9a9E+uX/OvytZoWp2g==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "@oxc-project/types": "=0.127.0",
-        "@rolldown/pluginutils": "1.0.0-rc.17"
+        "@oxc-project/types": "=0.124.0",
+        "@rolldown/pluginutils": "1.0.0-rc.15"
      },
      "bin": {
        "rolldown": "bin/cli.mjs"
@@ -10804,27 +10804,27 @@
        "node": "^20.19.0 || >=22.12.0"
      },
      "optionalDependencies": {
-        "@rolldown/binding-android-arm64": "1.0.0-rc.17",
-        "@rolldown/binding-darwin-arm64": "1.0.0-rc.17",
-        "@rolldown/binding-darwin-x64": "1.0.0-rc.17",
-        "@rolldown/binding-freebsd-x64": "1.0.0-rc.17",
-        "@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.17",
-        "@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.17",
-        "@rolldown/binding-linux-arm64-musl": "1.0.0-rc.17",
-        "@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.17",
-        "@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.17",
-        "@rolldown/binding-linux-x64-gnu": "1.0.0-rc.17",
-        "@rolldown/binding-linux-x64-musl": "1.0.0-rc.17",
-        "@rolldown/binding-openharmony-arm64": "1.0.0-rc.17",
-        "@rolldown/binding-wasm32-wasi": "1.0.0-rc.17",
-        "@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.17",
-        "@rolldown/binding-win32-x64-msvc": "1.0.0-rc.17"
+        "@rolldown/binding-android-arm64": "1.0.0-rc.15",
+        "@rolldown/binding-darwin-arm64": "1.0.0-rc.15",
+        "@rolldown/binding-darwin-x64": "1.0.0-rc.15",
+        "@rolldown/binding-freebsd-x64": "1.0.0-rc.15",
+        "@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.15",
+        "@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.15",
+        "@rolldown/binding-linux-arm64-musl": "1.0.0-rc.15",
+        "@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.15",
+        "@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.15",
+        "@rolldown/binding-linux-x64-gnu": "1.0.0-rc.15",
+        "@rolldown/binding-linux-x64-musl": "1.0.0-rc.15",
+        "@rolldown/binding-openharmony-arm64": "1.0.0-rc.15",
+        "@rolldown/binding-wasm32-wasi": "1.0.0-rc.15",
+        "@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.15",
+        "@rolldown/binding-win32-x64-msvc": "1.0.0-rc.15"
      }
    },
    "node_modules/rolldown/node_modules/@rolldown/pluginutils": {
-      "version": "1.0.0-rc.17",
-      "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.17.tgz",
-      "integrity": "sha512-n8iosDOt6Ig1UhJ2AYqoIhHWh/isz0xpicHTzpKBeotdVsTEcxsSA/i3EVM7gQAj0rU27OLAxCjzlj15IWY7bg==",
+      "version": "1.0.0-rc.15",
+      "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.15.tgz",
+      "integrity": "sha512-UromN0peaE53IaBRe9W7CjrZgXl90fqGpK+mIZbA3qSTeYqg3pqpROBdIPvOG3F5ereDHNwoHBI2e50n1BDr1g==",
      "dev": true,
      "license": "MIT"
    },
@@ -11460,14 +11460,14 @@
      }
    },
    "node_modules/tinyglobby": {
-      "version": "0.2.16",
-      "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.16.tgz",
-      "integrity": "sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==",
+      "version": "0.2.15",
+      "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
+      "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "fdir": "^6.5.0",
-        "picomatch": "^4.0.4"
+        "picomatch": "^4.0.3"
      },
      "engines": {
        "node": ">=12.0.0"
@@ -11699,9 +11699,9 @@
      }
    },
    "node_modules/typescript": {
-      "version": "6.0.3",
-      "resolved": "https://registry.npmjs.org/typescript/-/typescript-6.0.3.tgz",
-      "integrity": "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw==",
+      "version": "5.9.3",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
+      "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
      "devOptional": true,
      "license": "Apache-2.0",
      "bin": {
@@ -12001,17 +12001,17 @@
      }
    },
    "node_modules/vite": {
-      "version": "8.0.10",
-      "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.10.tgz",
-      "integrity": "sha512-rZuUu9j6J5uotLDs+cAA4O5H4K1SfPliUlQwqa6YEwSrWDZzP4rhm00oJR5snMewjxF5V/K3D4kctsUTsIU9Mw==",
+      "version": "8.0.8",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-8.0.8.tgz",
+      "integrity": "sha512-dbU7/iLVa8KZALJyLOBOQ88nOXtNG8vxKuOT4I2mD+Ya70KPceF4IAmDsmU0h1Qsn5bPrvsY9HJstCRh3hG6Uw==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
        "lightningcss": "^1.32.0",
        "picomatch": "^4.0.4",
-        "postcss": "^8.5.10",
-        "rolldown": "1.0.0-rc.17",
-        "tinyglobby": "^0.2.16"
+        "postcss": "^8.5.8",
+        "rolldown": "1.0.0-rc.15",
+        "tinyglobby": "^0.2.15"
      },
      "bin": {
        "vite": "bin/vite.js"
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -39,12 +39,12 @@
    "react": "^19.1.0",
    "react-chartjs-2": "^5.3.0",
    "react-dom": "^19.2.5",
-    "react-dropzone": "^15.0.0",
+    "react-dropzone": "^14.3.8",
    "react-google-drive-picker": "^1.2.2",
-    "react-i18next": "^17.0.6",
+    "react-i18next": "^17.0.2",
    "react-markdown": "^9.0.1",
    "react-redux": "^9.2.0",
-    "react-router-dom": "^7.14.2",
+    "react-router-dom": "^7.14.1",
    "react-syntax-highlighter": "^16.1.1",
    "reactflow": "^11.11.4",
    "rehype-katex": "^7.0.1",
@@ -58,7 +58,7 @@
    "@types/react": "^19.2.14",
    "@types/react-dom": "^19.2.3",
    "@types/react-syntax-highlighter": "^15.5.13",
-    "@typescript-eslint/eslint-plugin": "^8.59.1",
+    "@typescript-eslint/eslint-plugin": "^8.58.2",
    "@typescript-eslint/parser": "^8.46.3",
    "@vitejs/plugin-react": "^6.0.1",
    "eslint": "^9.39.1",
@@ -71,13 +71,13 @@
    "eslint-plugin-unused-imports": "^4.1.4",
    "husky": "^9.1.7",
    "lint-staged": "^16.4.0",
-    "postcss": "^8.5.12",
+    "postcss": "^8.4.49",
    "prettier": "^3.5.3",
    "prettier-plugin-tailwindcss": "^0.7.2",
    "tailwindcss": "^4.2.2",
    "tw-animate-css": "^1.4.0",
-    "typescript": "^6.0.3",
-    "vite": "^8.0.10",
+    "typescript": "^5.8.3",
+    "vite": "^8.0.8",
    "vite-plugin-svgr": "^4.3.0"
  }
 }
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -85,13 +85,6 @@ export default function App() {
          }
        >
          <Route index element={<Conversation />} />
-          {/* One dynamic route (accepting "new" or a UUID) so the
-              /c/new → /c/<id> replace doesn't remount Conversation. */}
-          <Route path="/c/:conversationId" element={<Conversation />} />
-          <Route
-            path="/agents/:agentId/c/:conversationId"
-            element={<Conversation />}
-          />
          <Route path="/settings/*" element={<Setting />} />
          <Route path="/agents/*" element={<Agents />} />
        </Route>
--- a/frontend/src/Navigation.tsx
+++ b/frontend/src/Navigation.tsx
@@ -25,7 +25,6 @@ import UnPin from './assets/unpin.svg';
 import Help from './components/Help';
 import {
  handleAbort,
-  loadConversation,
  selectQueries,
  setConversation,
  updateConversationId,
@@ -51,7 +50,6 @@ import {
  setSelectedAgent,
  setSharedAgents,
 } from './preferences/preferenceSlice';
-import { AppDispatch } from './store';
 import Upload from './upload/Upload';

 interface NavigationProps {
@@ -60,7 +58,7 @@ interface NavigationProps {
 }

 export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
-  const dispatch = useDispatch<AppDispatch>();
+  const dispatch = useDispatch();
  const navigate = useNavigate();

  const { t } = useTranslation();
@@ -184,7 +182,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
    resetConversation();
    dispatch(setSelectedAgent(agent));
    if (isMobile || isTablet) setNavOpen(!navOpen);
-    navigate(agent.id ? `/agents/${agent.id}/c/new` : '/c/new');
+    navigate('/');
  };

  const handleTogglePin = (agent: Agent) => {
@@ -202,21 +200,20 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
    try {
      dispatch(setSelectedAgent(null));

-      // Pre-fetch to choose the route shape (owned-agent / shared / none).
-      const result = await dispatch(
-        loadConversation({ id: index, force: true }),
-      ).unwrap();
-      // Stale: a newer load has already updated Redux; the URL is
-      // wherever that newer flow lands, leave it alone.
-      if (result.stale) return;
-      const data = result.data;
-      if (!data) {
-        navigate('/c/new');
+      const response = await conversationService.getConversation(index, token);
+      if (!response.ok) {
+        navigate('/');
        return;
      }

+      const data = await response.json();
+      if (!data) return;
+
+      dispatch(setConversation(data.queries));
+      dispatch(updateConversationId({ query: { conversationId: index } }));
+
      if (!data.agent_id) {
-        navigate(`/c/${index}`);
+        navigate('/');
        return;
      }

@@ -227,7 +224,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
          token,
        );
        if (!sharedResponse.ok) {
-          navigate(`/c/${index}`);
+          navigate('/');
          return;
        }
        agent = await sharedResponse.json();
@@ -235,7 +232,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
      } else {
        const agentResponse = await userService.getAgent(data.agent_id, token);
        if (!agentResponse.ok) {
-          navigate(`/c/${index}`);
+          navigate('/');
          return;
        }
        agent = await agentResponse.json();
@@ -243,12 +240,12 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
          navigate(`/agents/shared/${agent.shared_token}`);
        } else {
          await Promise.resolve(dispatch(setSelectedAgent(agent)));
-          navigate(`/agents/${data.agent_id}/c/${index}`);
+          navigate('/');
        }
      }
    } catch (error) {
      console.error('Error handling conversation click:', error);
-      navigate('/c/new');
+      navigate('/');
    }
  };

@@ -267,7 +264,6 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
    if (queries && queries?.length > 0) {
      resetConversation();
    }
-    navigate('/c/new');
  };

  async function updateConversationName(updatedConversation: {
@@ -279,6 +275,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
      .then((response) => response.json())
      .then((data) => {
        if (data) {
+          navigate('/');
          fetchConversations();
        }
      })
@@ -373,7 +370,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
          </button>
        </div>
        <NavLink
-          to={'/c/new'}
+          to={'/'}
          onClick={() => {
            if (isMobile || isTablet) {
              setNavOpen(!navOpen);
--- a/frontend/src/agents/AgentCard.tsx
+++ b/frontend/src/agents/AgentCard.tsx
@@ -174,7 +174,7 @@ export default function AgentCard({
    if (section === 'user') {
      if (agent.status === 'published') {
        dispatch(setSelectedAgent(agent));
-        navigate(agent.id ? `/agents/${agent.id}/c/new` : '/c/new');
+        navigate(`/`);
      }
    }
    if (section === 'shared') {
--- a/frontend/src/agents/NewAgent.tsx
+++ b/frontend/src/agents/NewAgent.tsx
@@ -565,22 +565,8 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
          setJsonSchemaText(jsonText);
          setJsonSchemaValid(true);
        }
-        // Backfill required fields so older agents (created before
-        // agent_type / prompt_id / models existed) don't fail
-        // ``isPublishable()`` and leave Save permanently disabled.
-        const normalized = {
-          ...data,
-          agent_type: data.agent_type || 'classic',
-          prompt_id: data.prompt_id || 'default',
-          retriever: data.retriever || 'classic',
-          chunks: data.chunks || '2',
-          tools: data.tools || [],
-          sources: data.sources || [],
-          models: data.models || [],
-          default_model_id: data.default_model_id || '',
-        };
-        setAgent(normalized);
-        initialAgentRef.current = normalized;
+        setAgent(data);
+        initialAgentRef.current = data;
      };
      getAgent();
    }
--- a/frontend/src/agents/SharedAgentCard.tsx
+++ b/frontend/src/agents/SharedAgentCard.tsx
@@ -1,18 +1,8 @@
-import { useTranslation } from 'react-i18next';
-
-import EditIcon from '../assets/edit.svg';
 import AgentImage from '../components/AgentImage';
 import { getToolDisplayName } from '../utils/toolUtils';
 import { Agent } from './types';

-export default function SharedAgentCard({
-  agent,
-  onEdit,
-}: {
-  agent: Agent;
-  onEdit?: () => void;
-}) {
-  const { t } = useTranslation();
+export default function SharedAgentCard({ agent }: { agent: Agent }) {
  // Check if shared metadata exists and has properties (type is 'any' so we validate it's a non-empty object)
  const hasSharedMetadata =
    agent.shared_metadata &&
@@ -21,14 +11,14 @@ export default function SharedAgentCard({
    Object.keys(agent.shared_metadata).length > 0;
  return (
    <div className="border-border dark:border-border flex w-full max-w-[720px] flex-col rounded-3xl border p-6 shadow-xs sm:w-fit sm:min-w-[480px]">
-      <div className="flex items-start gap-3">
+      <div className="flex items-center gap-3">
        <div className="flex h-12 w-12 items-center justify-center overflow-hidden rounded-full p-1">
          <AgentImage
            src={agent.image}
            className="h-full w-full rounded-full object-contain"
          />
        </div>
-        <div className="flex max-h-[92px] flex-1 flex-col gap-px">
+        <div className="flex max-h-[92px] w-[80%] flex-col gap-px">
          <h2 className="text-foreground text-base font-semibold sm:text-lg">
            {agent.name}
          </h2>
@@ -36,17 +26,6 @@ export default function SharedAgentCard({
            {agent.description}
          </p>
        </div>
-        {onEdit && (
-          <button
-            type="button"
-            onClick={onEdit}
-            className="border-border hover:bg-accent text-foreground flex shrink-0 items-center gap-1.5 rounded-full border px-3 py-1.5 text-sm font-medium transition-colors"
-            aria-label={t('agents.edit')}
-          >
-            <img src={EditIcon} alt="" className="h-3.5 w-3.5" />
-            {t('agents.edit')}
-          </button>
-        )}
      </div>
      {hasSharedMetadata && (
        <div className="mt-4 flex items-center gap-8">
--- a/frontend/src/api/endpoints.ts
+++ b/frontend/src/api/endpoints.ts
@@ -92,7 +92,6 @@ const endpoints = {
    FEEDBACK: '/api/feedback',
    CONVERSATION: (id: string) => `/api/get_single_conversation?id=${id}`,
    CONVERSATIONS: '/api/get_conversations',
-    MESSAGE_TAIL: (messageId: string) => `/api/messages/${messageId}/tail`,
    SHARE_CONVERSATION: (isPromptable: boolean) =>
      `/api/share?isPromptable=${isPromptable}`,
    SHARED_CONVERSATION: (identifier: string) =>
--- a/frontend/src/api/services/conversationService.ts
+++ b/frontend/src/api/services/conversationService.ts
@@ -6,20 +6,18 @@ const conversationService = {
    data: any,
    token: string | null,
    signal: AbortSignal,
-    headers: Record<string, string> = {},
  ): Promise<any> =>
-    apiClient.post(endpoints.CONVERSATION.ANSWER, data, token, headers, signal),
+    apiClient.post(endpoints.CONVERSATION.ANSWER, data, token, {}, signal),
  answerStream: (
    data: any,
    token: string | null,
    signal: AbortSignal,
-    headers: Record<string, string> = {},
  ): Promise<any> =>
    apiClient.post(
      endpoints.CONVERSATION.ANSWER_STREAMING,
      data,
      token,
-      headers,
+      {},
      signal,
    ),
  search: (data: any, token: string | null): Promise<any> =>
@@ -28,8 +26,6 @@ const conversationService = {
    apiClient.post(endpoints.CONVERSATION.FEEDBACK, data, token, {}),
  getConversation: (id: string, token: string | null): Promise<any> =>
    apiClient.get(endpoints.CONVERSATION.CONVERSATION(id), token, {}),
-  tailMessage: (messageId: string, token: string | null): Promise<any> =>
-    apiClient.get(endpoints.CONVERSATION.MESSAGE_TAIL(messageId), token, {}),
  getConversations: (token: string | null): Promise<any> =>
    apiClient.get(endpoints.CONVERSATION.CONVERSATIONS, token, {}),
  shareConversation: (
--- a/frontend/src/components/ActionButtons.tsx
+++ b/frontend/src/components/ActionButtons.tsx
@@ -40,7 +40,7 @@ export default function ActionButtons({
        query: { conversationId: null },
      }),
    );
-    navigate('/c/new');
+    navigate('/');
  };
  return (
    <div
--- a/frontend/src/conversation/Conversation.tsx
+++ b/frontend/src/conversation/Conversation.tsx
@@ -1,11 +1,8 @@
 import { useCallback, useEffect, useRef, useState } from 'react';
 import { useTranslation } from 'react-i18next';
 import { useDispatch, useSelector } from 'react-redux';
-import { useNavigate, useParams } from 'react-router-dom';

-import userService from '../api/services/userService';
 import SharedAgentCard from '../agents/SharedAgentCard';
-import { Agent } from '../agents/types';
 import ArtifactSidebar from '../components/ArtifactSidebar';
 import MessageInput from '../components/MessageInput';
 import { useMediaQuery } from '../hooks';
@@ -13,7 +10,6 @@ import {
  selectConversationId,
  selectSelectedAgent,
  selectToken,
-  setSelectedAgent,
 } from '../preferences/preferenceSlice';
 import { AppDispatch } from '../store';
 import { handleSendFeedback } from './conversationHandlers';
@@ -23,9 +19,7 @@ import { ToolCallsType } from './types';
 import {
  addQuery,
  fetchAnswer,
-  loadConversation,
  resendQuery,
-  resetConversation,
  selectQueries,
  selectStatus,
  submitToolActions,
@@ -37,16 +31,6 @@ export default function Conversation() {
  const { t } = useTranslation();
  const { isMobile } = useMediaQuery();
  const dispatch = useDispatch<AppDispatch>();
-  const navigate = useNavigate();
-  const params = useParams<{
-    conversationId?: string;
-    agentId?: string;
-  }>();
-  const urlConversationId = params.conversationId;
-  const urlAgentId = params.agentId;
-  // ``new`` is treated as empty-chat intent, not a real id to fetch.
-  const isNewChatRoute =
-    urlConversationId === undefined || urlConversationId === 'new';

  const token = useSelector(selectToken);
  const queries = useSelector(selectQueries);
@@ -58,65 +42,6 @@ export default function Conversation() {
  const [lastQueryReturnedErr, setLastQueryReturnedErr] =
    useState<boolean>(false);

-  // URL → state. Thunk short-circuits when Redux already matches.
-  useEffect(() => {
-    if (isNewChatRoute) {
-      // Skip when nothing to reset; avoids wiping the in-flight stream
-      // during the null → assigned-id replace below.
-      if (conversationId !== null) {
-        dispatch(resetConversation());
-      }
-      return;
-    }
-    if (urlConversationId && urlConversationId !== conversationId) {
-      dispatch(loadConversation({ id: urlConversationId }))
-        .unwrap()
-        .then((result) => {
-          if (result.stale) return;
-          if (result.data === null) {
-            navigate('/c/new', { replace: true });
-          }
-        })
-        .catch(() => navigate('/c/new', { replace: true }));
-    }
-  }, [urlConversationId, isNewChatRoute]);
-
-  // Agent context follows the URL. ``cancelled`` covers two races:
-  // the user switches agents before the fetch resolves, or leaves the
-  // agent route entirely; either way the late dispatch must be dropped.
-  useEffect(() => {
-    let cancelled = false;
-    if (urlAgentId) {
-      if (selectedAgent?.id !== urlAgentId) {
-        userService
-          .getAgent(urlAgentId, token)
-          .then((response) => (response.ok ? response.json() : null))
-          .then((agent: Agent | null) => {
-            if (cancelled) return;
-            if (agent) dispatch(setSelectedAgent(agent));
-          })
-          .catch((err) => {
-            if (!cancelled) console.error('Failed to load agent:', err);
-          });
-      }
-    } else if (selectedAgent !== null) {
-      dispatch(setSelectedAgent(null));
-    }
-    return () => {
-      cancelled = true;
-    };
-  }, [urlAgentId, token]);
-
-  // State → URL. ``replace`` so Back doesn't return to /c/new and
-  // reset the just-streamed chat.
-  useEffect(() => {
-    if (!isNewChatRoute || !conversationId) return;
-    const target = urlAgentId
-      ? `/agents/${urlAgentId}/c/${conversationId}`
-      : `/c/${conversationId}`;
-    navigate(target, { replace: true });
-  }, [conversationId, isNewChatRoute, urlAgentId]);
-
  const handleToolAction = useCallback(
    (callId: string, decision: 'approved' | 'denied', comment?: string) => {
      dispatch(
@@ -176,13 +101,7 @@ export default function Conversation() {
        .map((a) => ({ id: a.id as string, fileName: a.fileName }));

      if (index !== undefined) {
-        dispatch(
-          resendQuery({
-            index,
-            prompt: trimmedQuestion,
-            keepIdempotencyKey: isRetry,
-          }),
-        );
+        dispatch(resendQuery({ index, prompt: trimmedQuestion }));
        handleFetchAnswer({ question: trimmedQuestion, index });
      } else {
        if (!isRetry)
@@ -232,22 +151,17 @@ export default function Conversation() {
    } else if (question && status !== 'loading') {
      if (lastQueryReturnedErr && queries.length > 0) {
        const retryIndex = queries.length - 1;
-        // Different prompt = new logical action, fresh idempotency key.
-        const prevPrompt = queries[retryIndex].prompt;
-        const isSamePrompt = prevPrompt === question;
-        if (!isSamePrompt) {
-          dispatch(
-            updateQuery({
-              index: retryIndex,
-              query: {
-                prompt: question,
-              },
-            }),
-          );
-        }
+        dispatch(
+          updateQuery({
+            index: retryIndex,
+            query: {
+              prompt: question,
+            },
+          }),
+        );
        handleQuestion({
          question,
-          isRetry: isSamePrompt,
+          isRetry: true,
          index: retryIndex,
        });
      } else {
@@ -336,19 +250,7 @@ export default function Conversation() {
            headerContent={
              selectedAgent ? (
                <div className="flex w-full items-center justify-center py-4">
-                  <SharedAgentCard
-                    agent={selectedAgent}
-                    onEdit={
-                      selectedAgent.id
-                        ? () =>
-                            navigate(
-                              selectedAgent.agent_type === 'workflow'
-                                ? `/agents/workflow/edit/${selectedAgent.id}`
-                                : `/agents/edit/${selectedAgent.id}`,
-                            )
-                        : undefined
-                    }
-                  />
+                  <SharedAgentCard agent={selectedAgent} />
                </div>
              ) : undefined
            }
--- a/frontend/src/conversation/ConversationMessages.tsx
+++ b/frontend/src/conversation/ConversationMessages.tsx
@@ -248,8 +248,32 @@ export default function ConversationMessages({
      ? LAST_BUBBLE_MARGIN
      : DEFAULT_BUBBLE_MARGIN;

-    // Error first; reconciler-failed rows may carry partial thought/
-    // tool_calls and would otherwise fall into the answer branch.
+    if (query.thought || query.response || query.tool_calls || query.research) {
+      const isCurrentlyStreaming =
+        status === 'loading' && index === queries.length - 1;
+      return (
+        <ConversationBubble
+          className={bubbleMargin}
+          key={`${index}-ANSWER`}
+          message={query.response}
+          type={'ANSWER'}
+          thought={query.thought}
+          sources={query.sources}
+          toolCalls={query.tool_calls}
+          research={query.research}
+          onOpenArtifact={onOpenArtifact}
+          onToolAction={onToolAction}
+          feedback={query.feedback}
+          isStreaming={isCurrentlyStreaming}
+          handleFeedback={
+            handleFeedback
+              ? (feedback) => handleFeedback(query, feedback, index)
+              : undefined
+          }
+        />
+      );
+    }
+
    if (query.error) {
      const retryButton = (
        <button
@@ -279,38 +303,6 @@ export default function ConversationMessages({
      );
    }

-    // tool_calls.length, not tool_calls — empty arrays are JS-truthy.
-    const hasContent =
-      query.thought ||
-      query.response ||
-      (query.tool_calls && query.tool_calls.length > 0) ||
-      query.research;
-    if (hasContent) {
-      const isCurrentlyStreaming =
-        status === 'loading' && index === queries.length - 1;
-      return (
-        <ConversationBubble
-          className={bubbleMargin}
-          key={`${index}-ANSWER`}
-          message={query.response}
-          type={'ANSWER'}
-          thought={query.thought}
-          sources={query.sources}
-          toolCalls={query.tool_calls}
-          research={query.research}
-          onOpenArtifact={onOpenArtifact}
-          onToolAction={onToolAction}
-          feedback={query.feedback}
-          isStreaming={isCurrentlyStreaming}
-          handleFeedback={
-            handleFeedback
-              ? (feedback) => handleFeedback(query, feedback, index)
-              : undefined
-          }
-        />
-      );
-    }
-
    if (status === 'loading' && isLastMessage) {
      return (
        <div
--- a/frontend/src/conversation/conversationHandlers.ts
+++ b/frontend/src/conversation/conversationHandlers.ts
@@ -15,7 +15,6 @@ export function handleFetchAnswer(
  attachments?: string[],
  save_conversation = true,
  modelId?: string,
-  idempotencyKey?: string,
 ): Promise<
  | {
      result: any;
@@ -67,10 +66,8 @@ export function handleFetchAnswer(
      payload.retriever = selectedDocs[0].retriever as string;
    }
  }
-  const headers: Record<string, string> = {};
-  if (idempotencyKey) headers['Idempotency-Key'] = idempotencyKey;
  return conversationService
-    .answer(payload, token, signal, headers)
+    .answer(payload, token, signal)
    .then((response) => {
      if (response.ok) {
        return response.json();
@@ -107,7 +104,6 @@ export function handleFetchAnswerSteaming(
  attachments?: string[],
  save_conversation = true,
  modelId?: string,
-  idempotencyKey?: string,
 ): Promise<Answer> {
  const payload: RetrievalPayload = {
    question: question,
@@ -141,11 +137,9 @@ export function handleFetchAnswerSteaming(
    }
  }

-  const headers: Record<string, string> = {};
-  if (idempotencyKey) headers['Idempotency-Key'] = idempotencyKey;
  return new Promise<Answer>((resolve, reject) => {
    conversationService
-      .answerStream(payload, token, signal, headers)
+      .answerStream(payload, token, signal)
      .then((response) => {
        if (!response.body) throw Error('No response body');

@@ -205,18 +199,15 @@ export function handleSubmitToolActions(
  token: string | null,
  signal: AbortSignal,
  onEvent: (event: MessageEvent) => void,
-  idempotencyKey?: string,
 ): Promise<Answer> {
  const payload = {
    conversation_id: conversationId,
    tool_actions: toolActions,
  };

-  const headers: Record<string, string> = {};
-  if (idempotencyKey) headers['Idempotency-Key'] = idempotencyKey;
  return new Promise<Answer>((resolve, reject) => {
    conversationService
-      .answerStream(payload, token, signal, headers)
+      .answerStream(payload, token, signal)
      .then((response) => {
        if (!response.body) throw Error('No response body');

--- a/frontend/src/conversation/conversationModels.ts
+++ b/frontend/src/conversation/conversationModels.ts
@@ -3,8 +3,6 @@ import { ToolCallsType } from './types';
 export type MESSAGE_TYPE = 'QUESTION' | 'ANSWER' | 'ERROR';
 export type Status = 'idle' | 'loading' | 'failed' | 'awaiting_tool_actions';
 export type FEEDBACK = 'LIKE' | 'DISLIKE' | null;
-// Mirrors ``conversation_messages.status``.
-export type MessageStatus = 'pending' | 'streaming' | 'complete' | 'failed';

 export interface Message {
  text: string;
@@ -67,13 +65,6 @@ export interface Query {
  structured?: boolean;
  schema?: object;
  research?: ResearchState;
-  // WAL placeholder id; lets the client tail an in-flight stream.
-  messageId?: string;
-  messageStatus?: MessageStatus;
-  requestId?: string;
-  lastHeartbeatAt?: string;
-  // Persisted so Retry can re-send the same key for server-side dedup.
-  idempotencyKey?: string;
 }

 export interface RetrievalPayload {
--- a/frontend/src/conversation/conversationSlice.ts
+++ b/frontend/src/conversation/conversationSlice.ts
@@ -1,6 +1,5 @@
 import { createAsyncThunk, createSlice, PayloadAction } from '@reduxjs/toolkit';

-import conversationService from '../api/services/conversationService';
 import { getConversations } from '../preferences/preferenceApi';
 import { setConversations } from '../preferences/preferenceSlice';
 import store from '../store';
@@ -8,7 +7,6 @@ import {
  clearAttachments,
  selectCompletedAttachments,
 } from '../upload/uploadSlice';
-import { newIdempotencyKey } from '../utils/idempotency';
 import {
  handleFetchAnswer,
  handleFetchAnswerSteaming,
@@ -18,61 +16,12 @@ import {
 import {
  Answer,
  ConversationState,
-  MessageStatus,
  Query,
  ResearchStep,
  Status,
 } from './conversationModels';
 import { ToolCallsType } from './types';

-// Maps a server message dict into the client ``Query`` shape. Only
-// terminal ``complete`` rows expose ``response``; non-terminal rows
-// would carry the WAL placeholder text, which must never render.
-// ``failed`` rows surface as ``error`` so they pick up Retry.
-export function mapServerQueryToClient(raw: any): Query {
-  const status = raw?.status as MessageStatus | undefined;
-  const isTerminalComplete = status === 'complete';
-  const isFailed = status === 'failed';
-  const metadata = raw?.metadata || {};
-
-  // Empty arrays are JS-truthy; coercing to undefined keeps the
-  // renderer from rendering a blank bubble for in-flight rows and
-  // matches the shape live-stream queries start with.
-  const toolCalls = Array.isArray(raw?.tool_calls) ? raw.tool_calls : undefined;
-  const sources = Array.isArray(raw?.sources) ? raw.sources : undefined;
-  const query: Query = {
-    prompt: raw?.prompt ?? '',
-    feedback: raw?.feedback ?? undefined,
-    thought: raw?.thought ?? undefined,
-    sources: sources && sources.length > 0 ? sources : undefined,
-    tool_calls: toolCalls && toolCalls.length > 0 ? toolCalls : undefined,
-    attachments: raw?.attachments ?? undefined,
-    messageId: raw?.message_id ?? undefined,
-    messageStatus: status,
-    requestId: raw?.request_id ?? undefined,
-    lastHeartbeatAt: raw?.last_heartbeat_at ?? undefined,
-  };
-
-  if (isTerminalComplete) {
-    query.response = raw?.response ?? '';
-  }
-  if (isFailed) {
-    query.error =
-      (typeof metadata.error === 'string' && metadata.error) ||
-      'Generation failed before completing.';
-  }
-  return query;
-}
-
-// Placeholder still being produced server-side; client should tail
-// rather than treat as idle.
-export function isInFlightMessage(query: Query | undefined): boolean {
-  if (!query) return false;
-  return (
-    query.messageStatus === 'pending' || query.messageStatus === 'streaming'
-  );
-}
-
 const initialState: ConversationState = {
  queries: [],
  status: 'idle',
@@ -90,63 +39,6 @@ export function handleAbort() {
  }
 }

-// Loads a conversation and applies it to the slice. Returns
-// ``{data, stale}``: ``stale`` true means a newer load has superseded
-// this one (or Redux already matches), so callers should not react to
-// the returned data; ``data`` null with ``stale`` false means 404.
-export type LoadConversationResult = {
-  data: any | null;
-  stale: boolean;
-};
-
-let loadSeq = 0;
-
-export const loadConversation = createAsyncThunk<
-  LoadConversationResult,
-  { id: string; force?: boolean }
->('loadConversation', async ({ id, force }, { dispatch, getState }) => {
-  const seq = ++loadSeq;
-  const state = getState() as RootState;
-  const token = state.preference.token;
-  if (!force && state.conversation.conversationId === id) {
-    return { data: null, stale: true };
-  }
-  const response = await conversationService.getConversation(id, token);
-  if (!response.ok) {
-    return { data: null, stale: false };
-  }
-  const data = await response.json();
-  if (!data) return { data: null, stale: false };
-
-  // A later loadConversation has been issued; drop our writes so its
-  // result wins, and tell the caller not to navigate off our return.
-  if (seq !== loadSeq) {
-    return { data: null, stale: true };
-  }
-
-  const mappedQueries = (data.queries || []).map(mapServerQueryToClient);
-  dispatch(conversationSlice.actions.setConversation(mappedQueries));
-  dispatch(
-    conversationSlice.actions.updateConversationId({
-      query: { conversationId: id },
-    }),
-  );
-
-  // Only tail the trailing message; earlier in-flight rows are rare.
-  const lastIdx = mappedQueries.length - 1;
-  const lastQuery = mappedQueries[lastIdx];
-  if (lastQuery && lastQuery.messageId && isInFlightMessage(lastQuery)) {
-    dispatch(
-      tailInFlightMessage({
-        messageId: lastQuery.messageId,
-        index: lastIdx,
-        conversationId: id,
-      }),
-    );
-  }
-  return { data, stale: false };
-});
-
 export const fetchAnswer = createAsyncThunk<
  Answer,
  { question: string; indx?: number }
@@ -165,30 +57,11 @@ export const fetchAnswer = createAsyncThunk<
    dispatch(clearAttachments());
  }

-  // Mutable so the SSE handler can adopt a server-assigned id and
-  // keep passing it to reducer guards once the early ``message_id``
-  // event lands.
-  let currentConversationId = state.conversation.conversationId;
+  const currentConversationId = state.conversation.conversationId;
  const modelId =
    state.preference.selectedAgent?.default_model_id ||
    state.preference.selectedModel?.id;

-  // Reuse the key on the target Query when present (retry path),
-  // else mint and persist so a later retry can re-send it.
-  const targetIndexForKey =
-    indx ?? Math.max(state.conversation.queries.length - 1, 0);
-  let idempotencyKey =
-    state.conversation.queries[targetIndexForKey]?.idempotencyKey;
-  if (!idempotencyKey) {
-    idempotencyKey = newIdempotencyKey();
-    dispatch(
-      conversationSlice.actions.updateQuery({
-        index: targetIndexForKey,
-        query: { idempotencyKey },
-      }),
-    );
-  }
-
  if (state.preference) {
    const agentKey = state.preference.selectedAgent?.key;
    if (USE_V1_API && agentKey) {
@@ -206,11 +79,7 @@ export const fetchAnswer = createAsyncThunk<
          const data = JSON.parse(event.data);
          const targetIndex = indx ?? state.conversation.queries.length - 1;

-          // Live Redux check; the closure ``state`` is a stale snapshot.
-          if (
-            currentConversationId ===
-            (getState() as RootState).conversation.conversationId
-          ) {
+          if (currentConversationId === state.conversation.conversationId) {
            if (data.type === 'end') {
              dispatch(conversationSlice.actions.setStatus('idle'));
              getConversations(state.preference.token)
@@ -238,28 +107,6 @@ export const fetchAnswer = createAsyncThunk<
                  }),
                );
              }
-            } else if (data.type === 'message_id') {
-              if (data.conversation_id) {
-                const currentState = getState() as RootState;
-                if (currentState.conversation.conversationId === null) {
-                  // setConversationId leaves status='loading'; the
-                  // status-touching updateConversationId would flip it
-                  // to 'idle' and drop subsequent chunks.
-                  dispatch(
-                    conversationSlice.actions.setConversationId(
-                      data.conversation_id,
-                    ),
-                  );
-                  currentConversationId = data.conversation_id;
-                }
-              }
-              dispatch(
-                conversationSlice.actions.updateMessageMeta({
-                  index: targetIndex,
-                  messageId: data.message_id,
-                  requestId: data.request_id,
-                }),
-              );
            } else if (data.type === 'thought') {
              dispatch(
                updateThought({
@@ -324,11 +171,8 @@ export const fetchAnswer = createAsyncThunk<
          const data = JSON.parse(event.data);
          const targetIndex = indx ?? state.conversation.queries.length - 1;

-          // Live Redux check; the closure ``state`` is a stale snapshot.
-          if (
-            currentConversationId ===
-            (getState() as RootState).conversation.conversationId
-          ) {
+          // Only process events if they match the current conversation
+          if (currentConversationId === state.conversation.conversationId) {
            if (data.type === 'end') {
              dispatch(conversationSlice.actions.setStatus('idle'));
              // Only update research status if this query has research data
@@ -367,28 +211,6 @@ export const fetchAnswer = createAsyncThunk<
                  }),
                );
              }
-            } else if (data.type === 'message_id') {
-              if (data.conversation_id) {
-                const currentState = getState() as RootState;
-                if (currentState.conversation.conversationId === null) {
-                  // setConversationId leaves status='loading'; the
-                  // status-touching updateConversationId would flip it
-                  // to 'idle' and drop subsequent chunks.
-                  dispatch(
-                    conversationSlice.actions.setConversationId(
-                      data.conversation_id,
-                    ),
-                  );
-                  currentConversationId = data.conversation_id;
-                }
-              }
-              dispatch(
-                conversationSlice.actions.updateMessageMeta({
-                  index: targetIndex,
-                  messageId: data.message_id,
-                  requestId: data.request_id,
-                }),
-              );
            } else if (data.type === 'thought') {
              const result = data.thought;
              dispatch(
@@ -471,7 +293,6 @@ export const fetchAnswer = createAsyncThunk<
        attachmentIds,
        true,
        modelId,
-        idempotencyKey,
      );
    } else {
      const answer = await handleFetchAnswer(
@@ -486,7 +307,6 @@ export const fetchAnswer = createAsyncThunk<
        attachmentIds,
        true,
        modelId,
-        idempotencyKey,
      );
      if (answer) {
        let sourcesPrepped = [];
@@ -542,67 +362,6 @@ export const fetchAnswer = createAsyncThunk<
  };
 });

-// Tail-polls the placeholder until terminal status, navigation away,
-// or hard timeout. First poll fires immediately so rows that are
-// already terminal resolve without delay.
-const TAIL_POLL_INTERVAL_MS = 2000;
-const TAIL_MAX_POLL_DURATION_MS = 10 * 60 * 1000;
-
-export const tailInFlightMessage = createAsyncThunk<
-  void,
-  { messageId: string; index: number; conversationId: string }
->(
-  'tailInFlightMessage',
-  async ({ messageId, index, conversationId }, { dispatch, getState }) => {
-    const initialState = getState() as RootState;
-    const token = initialState.preference.token;
-    const start = Date.now();
-    dispatch(conversationSlice.actions.setStatus('loading'));
-
-    while (Date.now() - start < TAIL_MAX_POLL_DURATION_MS) {
-      const cur = (getState() as RootState).conversation.conversationId;
-      if (cur !== conversationId) return;
-
-      let resp: Response;
-      try {
-        resp = await conversationService.tailMessage(messageId, token);
-      } catch {
-        await new Promise((r) => setTimeout(r, TAIL_POLL_INTERVAL_MS));
-        continue;
-      }
-
-      // 404 → row deleted (e.g. conversation wiped); bail quietly.
-      if (resp.status === 404) {
-        dispatch(conversationSlice.actions.setStatus('idle'));
-        return;
-      }
-
-      if (!resp.ok) {
-        await new Promise((r) => setTimeout(r, TAIL_POLL_INTERVAL_MS));
-        continue;
-      }
-
-      const data = await resp.json();
-      dispatch(
-        conversationSlice.actions.applyMessageTail({ index, tail: data }),
-      );
-
-      const status = data?.status as MessageStatus | undefined;
-      if (status === 'complete' || status === 'failed') {
-        dispatch(
-          conversationSlice.actions.setStatus(
-            status === 'failed' ? 'failed' : 'idle',
-          ),
-        );
-        return;
-      }
-      await new Promise((r) => setTimeout(r, TAIL_POLL_INTERVAL_MS));
-    }
-    // Hard timeout: drop status to idle so the user can interact again.
-    dispatch(conversationSlice.actions.setStatus('idle'));
-  },
-);
-
 export const submitToolActions = createAsyncThunk<
  void,
  {
@@ -620,26 +379,10 @@ export const submitToolActions = createAsyncThunk<

  const state = getState() as RootState;
  const conversationId = state.conversation.conversationId;
-  if (!conversationId) {
-    const targetIndex = state.conversation.queries.length - 1;
-    if (targetIndex >= 0) {
-      dispatch(
-        conversationSlice.actions.raiseError({
-          conversationId: null,
-          index: targetIndex,
-          message:
-            'Cannot submit decision — the conversation was not initialized. Please retry the question.',
-        }),
-      );
-    }
-    dispatch(conversationSlice.actions.setStatus('failed'));
-    return;
-  }
+  if (!conversationId) return;

  dispatch(conversationSlice.actions.setStatus('loading'));

-  // Fresh per submission: a tool decision is its own logical action.
-  const idempotencyKey = newIdempotencyKey();
  await handleSubmitToolActions(
    conversationId,
    toolActions,
@@ -660,15 +403,6 @@ export const submitToolActions = createAsyncThunk<
          });
      } else if (data.type === 'id') {
        // conversation ID already set
-      } else if (data.type === 'message_id') {
-        // Re-stamp; continuation reuses the original placeholder.
-        dispatch(
-          conversationSlice.actions.updateMessageMeta({
-            index: targetIndex,
-            messageId: data.message_id,
-            requestId: data.request_id,
-          }),
-        );
      } else if (data.type === 'thought') {
        dispatch(
          updateThought({
@@ -713,7 +447,6 @@ export const submitToolActions = createAsyncThunk<
        );
      }
    },
-    idempotencyKey,
  );
 });

@@ -729,13 +462,9 @@ export const conversationSlice = createSlice({
    },
    resendQuery(
      state,
-      action: PayloadAction<{
-        index: number;
-        prompt: string;
-        keepIdempotencyKey?: boolean;
-      }>,
+      action: PayloadAction<{ index: number; prompt: string }>,
    ) {
-      const { index, prompt, keepIdempotencyKey } = action.payload;
+      const { index, prompt } = action.payload;
      if (index < 0 || index >= state.queries.length) return;

      state.queries.splice(index + 1);
@@ -749,15 +478,6 @@ export const conversationSlice = createSlice({
      delete state.queries[index].schema;
      delete state.queries[index].feedback;
      delete state.queries[index].research;
-      // Drop stale WAL refs; the next stream's message_id event repopulates.
-      delete state.queries[index].messageId;
-      delete state.queries[index].messageStatus;
-      delete state.queries[index].requestId;
-      delete state.queries[index].lastHeartbeatAt;
-      // Retry keeps the key so the server can dedupe; Edit drops it.
-      if (!keepIdempotencyKey) {
-        delete state.queries[index].idempotencyKey;
-      }
    },
    updateStreamingQuery(
      state,
@@ -792,11 +512,6 @@ export const conversationSlice = createSlice({
      state.conversationId = action.payload.query.conversationId ?? null;
      state.status = 'idle';
    },
-    // Sets id without touching status; used mid-stream where the
-    // status-flipping updateConversationId would drop later chunks.
-    setConversationId(state, action: PayloadAction<string | null>) {
-      state.conversationId = action.payload;
-    },
    updateThought(
      state,
      action: PayloadAction<{
@@ -931,47 +646,6 @@ export const conversationSlice = createSlice({
    setStatus(state, action: PayloadAction<Status>) {
      state.status = action.payload;
    },
-    updateMessageMeta(
-      state,
-      action: PayloadAction<{
-        index: number;
-        messageId?: string;
-        requestId?: string;
-      }>,
-    ) {
-      const { index, messageId, requestId } = action.payload;
-      const query = state.queries[index];
-      if (!query) return;
-      if (messageId) query.messageId = messageId;
-      if (requestId) query.requestId = requestId;
-      // Mirror the server-side default so a refresh sees 'pending'.
-      if (!query.messageStatus) query.messageStatus = 'pending';
-    },
-    applyMessageTail(
-      state,
-      action: PayloadAction<{ index: number; tail: any }>,
-    ) {
-      const { index, tail } = action.payload;
-      const query = state.queries[index];
-      if (!query) return;
-      const status = tail?.status as MessageStatus | undefined;
-      query.messageStatus = status;
-      query.lastHeartbeatAt = tail?.last_heartbeat_at ?? query.lastHeartbeatAt;
-      if (status === 'complete') {
-        query.response = tail?.response ?? '';
-        query.thought = tail?.thought ?? query.thought;
-        query.sources = tail?.sources ?? query.sources;
-        query.tool_calls = tail?.tool_calls ?? query.tool_calls;
-        delete query.error;
-      } else if (status === 'failed') {
-        // Surface as error so the placeholder text never renders.
-        query.error =
-          (typeof tail?.error === 'string' && tail.error) ||
-          'Generation failed before completing.';
-        delete query.response;
-      }
-      // pending / streaming: untouched; spinner keeps showing.
-    },
    raiseError(
      state,
      action: PayloadAction<{
@@ -1030,11 +704,8 @@ export const {
  updateResearchPlan,
  updateResearchProgress,
  setConversation,
-  setConversationId,
  setStatus,
  raiseError,
  resetConversation,
-  applyMessageTail,
-  updateMessageMeta,
 } = conversationSlice.actions;
 export default conversationSlice.reducer;
--- a/frontend/src/locale/de.json
+++ b/frontend/src/locale/de.json
@@ -591,7 +591,6 @@
  },
  "agents": {
    "title": "Agenten",
-    "edit": "Bearbeiten",
    "description": "Entdecke und erstelle benutzerdefinierte Versionen von DocsGPT, die Anweisungen, zusätzliches Wissen und beliebige Kombinationen von Fähigkeiten kombinieren",
    "newAgent": "Neuer Agent",
    "backToAll": "Zurück zu allen Agenten",
--- a/frontend/src/locale/en.json
+++ b/frontend/src/locale/en.json
@@ -621,7 +621,6 @@
  },
  "agents": {
    "title": "Agents",
-    "edit": "Edit",
    "description": "Discover and create custom versions of DocsGPT that combine instructions, extra knowledge, and any combination of skills",
    "newAgent": "New Agent",
    "backToAll": "Back to all agents",
--- a/frontend/src/locale/es.json
+++ b/frontend/src/locale/es.json
@@ -609,7 +609,6 @@
  },
  "agents": {
    "title": "Agentes",
-    "edit": "Editar",
    "description": "Descubre y crea versiones personalizadas de DocsGPT que combinan instrucciones, conocimiento adicional y cualquier combinación de habilidades",
    "newAgent": "Nuevo Agente",
    "backToAll": "Volver a todos los agentes",
--- a/frontend/src/locale/jp.json
+++ b/frontend/src/locale/jp.json
@@ -609,7 +609,6 @@
  },
  "agents": {
    "title": "エージェント",
-    "edit": "編集",
    "description": "指示、追加知識、スキルの組み合わせを含むDocsGPTのカスタムバージョンを発見して作成します",
    "newAgent": "新しいエージェント",
    "backToAll": "すべてのエージェントに戻る",
--- a/frontend/src/locale/ru.json
+++ b/frontend/src/locale/ru.json
@@ -609,7 +609,6 @@
  },
  "agents": {
    "title": "Агенты",
-    "edit": "Редактировать",
    "description": "Откройте и создайте пользовательские версии DocsGPT, которые объединяют инструкции, дополнительные знания и любую комбинацию навыков",
    "newAgent": "Новый Агент",
    "backToAll": "Вернуться ко всем агентам",
--- a/frontend/src/locale/zh-TW.json
+++ b/frontend/src/locale/zh-TW.json
@@ -609,7 +609,6 @@
  },
  "agents": {
    "title": "代理",
-    "edit": "編輯",
    "description": "探索並創建結合指令、額外知識和任意技能組合的DocsGPT自訂版本",
    "newAgent": "新建代理",
    "backToAll": "返回所有代理",
--- a/frontend/src/locale/zh.json
+++ b/frontend/src/locale/zh.json
@@ -609,7 +609,6 @@
  },
  "agents": {
    "title": "代理",
-    "edit": "编辑",
    "description": "发现并创建结合指令、额外知识和任意技能组合的DocsGPT自定义版本",
    "newAgent": "新建代理",
    "backToAll": "返回所有代理",
--- a/frontend/src/upload/Upload.tsx
+++ b/frontend/src/upload/Upload.tsx
@@ -535,7 +535,6 @@ function Upload({

    xhr.open('POST', `${apiHost}/api/upload`);
    xhr.setRequestHeader('Authorization', `Bearer ${token}`);
-    xhr.setRequestHeader('Idempotency-Key', clientTaskId);
    xhr.send(formData);
  };

@@ -663,7 +662,6 @@ function Upload({

    xhr.open('POST', endpoint);
    xhr.setRequestHeader('Authorization', `Bearer ${token}`);
-    xhr.setRequestHeader('Idempotency-Key', clientTaskId);
    xhr.send(formData);
  };

--- a/frontend/src/utils/idempotency.ts
+++ b/frontend/src/utils/idempotency.ts
@@ -1,13 +0,0 @@
-// Per-user-action key for the ``Idempotency-Key`` header. Server
-// scopes by user, so cross-user reuse is harmless.
-export function newIdempotencyKey(): string {
-  if (
-    typeof crypto !== 'undefined' &&
-    typeof crypto.randomUUID === 'function'
-  ) {
-    return crypto.randomUUID();
-  }
-  // Fallback for older Safari / jsdom; uniqueness is enough.
-  const rand = () => Math.random().toString(16).slice(2, 10);
-  return `${rand()}-${rand()}-${rand()}-${rand()}`;
-}
--- a/frontend/tsconfig.json
+++ b/frontend/tsconfig.json
@@ -5,17 +5,18 @@
    "lib": ["DOM", "DOM.Iterable", "ESNext"],
    "allowJs": false,
    "skipLibCheck": true,
-    "esModuleInterop": true,
+    "esModuleInterop": false,
    "allowSyntheticDefaultImports": true,
    "strict": true,
    "forceConsistentCasingInFileNames": true,
    "module": "ESNext",
-    "moduleResolution": "Bundler",
+    "moduleResolution": "Node",
    "resolveJsonModule": true,
    "types": ["vite-plugin-svgr/client"],
    "isolatedModules": true,
    "noEmit": true,
    "jsx": "react-jsx",
+    "baseUrl": ".",
    "paths": {
      "@/*": ["./src/*"]
    }
--- a/frontend/tsconfig.node.json
+++ b/frontend/tsconfig.node.json
@@ -2,7 +2,7 @@
  "compilerOptions": {
    "composite": true,
    "module": "ESNext",
-    "moduleResolution": "Bundler",
+    "moduleResolution": "Node",
    "allowSyntheticDefaultImports": true
  },
  "include": ["vite.config.ts"]
--- a/scripts/e2e/mock_llm.py
+++ b/scripts/e2e/mock_llm.py
@@ -286,15 +286,8 @@ def _stream_chat_response(
    content: str,
    tool_calls: list[dict[str, Any]] | None,
    finish_reason: str,
-    chunk_delay_ms: int = 0,
 ):
-    """Generator yielding SSE frames that match the OpenAI streaming protocol.
-
-    ``chunk_delay_ms`` (controlled by ``X-Mock-LLM-Stream-Chunk-Delay-Ms``
-    header) sleeps that many milliseconds between successive SSE frames.
-    Used by durability E2E tests to simulate slow streams that survive a
-    mid-flight ``kill -9`` against the consumer.
-    """
+    """Generator yielding SSE frames that match the OpenAI streaming protocol."""

    created = int(time.time())
    completion_id = f"chatcmpl-e2e-{digest[:12]}"
@@ -314,60 +307,23 @@ def _stream_chat_response(
            ],
        }

-    def _maybe_sleep() -> None:
-        if chunk_delay_ms > 0:
-            time.sleep(chunk_delay_ms / 1000.0)
-
    # Opening role delta — matches OpenAI's real behavior.
    yield _sse(_base_chunk({"role": "assistant", "content": ""}))

    if tool_calls:
        # Emit tool calls in one delta; content streaming is skipped when
        # tool_calls are present, matching what RAG code paths expect.
-        _maybe_sleep()
        yield _sse(_base_chunk({"tool_calls": tool_calls}))
        yield _sse(_base_chunk({}, final=True))
    else:
        chunks = _split_into_chunks(content, STREAM_CHUNK_COUNT)
        last_index = len(chunks) - 1
        for i, piece in enumerate(chunks):
-            _maybe_sleep()
            yield _sse(_base_chunk({"content": piece}, final=(i == last_index)))

    yield "data: [DONE]\n\n"


-def _read_int_header(name: str, default: int = 0, ceiling: int = 600_000) -> int:
-    """Parse an integer header with a sane upper bound (10 minutes)."""
-    raw = request.headers.get(name)
-    if not raw:
-        return default
-    try:
-        value = int(raw)
-    except (TypeError, ValueError):
-        return default
-    if value < 0:
-        return default
-    return min(value, ceiling)
-
-
-def _read_int_env(name: str, default: int = 0, ceiling: int = 600_000) -> int:
-    """Same as ``_read_int_header`` but for env vars — the durability E2E
-    script sets ``MOCK_LLM_FORCE_*_DELAY_MS`` so it can drive slow streams
-    through DocsGPT's OpenAI client without injecting per-request
-    headers."""
-    raw = os.environ.get(name)
-    if not raw:
-        return default
-    try:
-        value = int(raw)
-    except (TypeError, ValueError):
-        return default
-    if value < 0:
-        return default
-    return min(value, ceiling)
-
-
@app.post("/v1/chat/completions")
 def chat_completions() -> Response:
    payload = request.get_json(silent=True) or {}
@@ -377,18 +333,6 @@ def chat_completions() -> Response:
    digest = _compute_request_digest(payload)
    content, tool_calls, finish_reason, usage = _resolve_chat_response(payload, digest)

-    # Durability E2E hooks: per-request OR per-process delays so tests can
-    # simulate slow providers without touching fixtures or recompiling the
-    # stub. Headers win over env so a single fixture run can opt in/out.
-    upfront_delay_ms = _read_int_header("X-Mock-LLM-Total-Delay-Ms") or _read_int_env(
-        "MOCK_LLM_FORCE_TOTAL_DELAY_MS"
-    )
-    chunk_delay_ms = _read_int_header(
-        "X-Mock-LLM-Stream-Chunk-Delay-Ms"
-    ) or _read_int_env("MOCK_LLM_FORCE_STREAM_CHUNK_DELAY_MS")
-    if upfront_delay_ms > 0:
-        time.sleep(upfront_delay_ms / 1000.0)
-
    if stream:
        generator = _stream_chat_response(
            digest=digest,
@@ -396,7 +340,6 @@ def chat_completions() -> Response:
            content=content,
            tool_calls=tool_calls,
            finish_reason=finish_reason,
-            chunk_delay_ms=chunk_delay_ms,
        )
        response = Response(
            stream_with_context(generator),
--- a/scripts/qa/durability_e2e.py
+++ b/scripts/qa/durability_e2e.py
--- a/tests/agents/test_tool_action_parser.py
+++ b/tests/agents/test_tool_action_parser.py
@@ -137,39 +137,6 @@ class TestToolActionParser:
        assert action_name is None
        assert call_args is None

-    def test_parse_google_llm_string_arguments_from_resume(self):
-        # Resume path stringifies dict args for the assistant message format
-        # before re-invoking _execute_tool_action. The Google parser must
-        # decode the JSON string back to a dict so the executor's
-        # ``call_args.items()`` loop doesn't AttributeError.
-        parser = ToolActionParser("GoogleLLM")
-
-        call = Mock()
-        call.name = "search_docs_42"
-        call.arguments = '{"query": "workflows", "limit": 5}'
-
-        tool_id, action_name, call_args = parser.parse_args(call)
-
-        assert tool_id == "42"
-        assert action_name == "search_docs"
-        assert call_args == {"query": "workflows", "limit": 5}
-
-    def test_parse_google_llm_non_json_string_arguments_fall_back_to_empty_dict(self):
-        # Malformed string args fall back to ``{}`` so the executor's
-        # ``call_args.items()`` walk doesn't crash. The executor still
-        # journals the malformed call via its own type guard.
-        parser = ToolActionParser("GoogleLLM")
-
-        call = Mock()
-        call.name = "act_7"
-        call.arguments = "not json"
-
-        tool_id, action_name, call_args = parser.parse_args(call)
-
-        assert tool_id == "7"
-        assert action_name == "act"
-        assert call_args == {}
-
    def test_parse_unknown_llm_type_defaults_to_openai(self):
        parser = ToolActionParser("UnknownLLM")

--- a/tests/agents/test_tool_executor_three_phase.py
+++ b/tests/agents/test_tool_executor_three_phase.py
@@ -1,265 +0,0 @@
-"""Tests for the journaled execute path on ToolExecutor.
-
-Each tool call inserts a row into ``tool_call_attempts`` then flips
-through ``proposed → executed`` (or ``proposed → failed``). The flip
-to ``confirmed`` is owned by the message-finalize path and is only
-asserted indirectly here (rows stay in ``executed`` so the reconciler
-can pick them up).
-"""
-
-from contextlib import contextmanager
-from unittest.mock import Mock
-
-import pytest
-from sqlalchemy import text
-
-from application.agents.tool_executor import ToolExecutor
-
-
-@contextmanager
-def _yield_pg(conn):
-    """Adapter so the executor's ``db_session()`` writes land on ``pg_conn``."""
-
-    @contextmanager
-    def _yield():
-        yield conn
-
-    return _yield
-
-
-def _patch_db(monkeypatch, pg_conn):
-    """Patch all ``db_session`` entry points used by the executor and tools.
-
-    Each module imports ``db_session`` / ``db_readonly`` by name so each
-    module-level binding has to be replaced individually.
-    """
-
-    @contextmanager
-    def _use_pg():
-        yield pg_conn
-
-    targets = (
-        "application.agents.tool_executor",
-        "application.agents.tools.notes",
-        "application.agents.tools.todo_list",
-        "application.storage.db.session",
-    )
-    for module in targets:
-        monkeypatch.setattr(f"{module}.db_session", _use_pg, raising=False)
-        monkeypatch.setattr(f"{module}.db_readonly", _use_pg, raising=False)
-
-
-def _drain(gen):
-    """Exhaust a generator, returning ``(events, return_value)``."""
-    events = []
-    while True:
-        try:
-            events.append(next(gen))
-        except StopIteration as exc:
-            return events, exc.value
-
-
-def _select_attempt(pg_conn, call_id):
-    row = pg_conn.execute(
-        text("SELECT * FROM tool_call_attempts WHERE call_id = :cid"),
-        {"cid": call_id},
-    ).fetchone()
-    return row._mapping if row is not None else None
-
-
-def _make_call(name="test_action_t1", call_id="c1"):
-    call = Mock()
-    call.name = name
-    call.id = call_id
-    call.arguments = "{}"
-    return call
-
-
-@pytest.mark.unit
-class TestExecuteJournaling:
-    def test_happy_path_proposed_then_executed(
-        self, pg_conn, mock_tool_manager, monkeypatch
-    ):
-        executor = ToolExecutor(user="u")
-        monkeypatch.setattr(
-            "application.agents.tool_executor.ToolActionParser",
-            lambda _cls, **kw: Mock(
-                parse_args=Mock(return_value=("t1", "test_action", {"q": "v"}))
-            ),
-        )
-        _patch_db(monkeypatch, pg_conn)
-
-        tools_dict = {
-            "t1": {
-                "id": "00000000-0000-0000-0000-000000000001",
-                "name": "test_tool",
-                "config": {"key": "val"},
-                "actions": [
-                    {"name": "test_action", "description": "T", "parameters": {"properties": {}}},
-                ],
-            }
-        }
-
-        events, result = _drain(executor.execute(tools_dict, _make_call(), "MockLLM"))
-        assert result[0] == "Tool result"
-
-        row = _select_attempt(pg_conn, "c1")
-        assert row is not None
-        assert row["status"] == "executed"
-        assert row["tool_name"] == "test_tool"
-        assert row["action_name"] == "test_action"
-        assert row["arguments"] == {"q": "v"}
-        # Result is wrapped so a future ``artifact_id`` can ride alongside.
-        assert row["result"] == {"result": "Tool result"}
-        assert row["error"] is None
-        assert row["message_id"] is None
-
-    def test_executor_message_id_is_persisted_on_executed_row(
-        self, pg_conn, mock_tool_manager, monkeypatch
-    ):
-        """When the route stamps a placeholder message_id on the executor,
-        the journal row carries it forward so ``confirm_executed_tool_calls``
-        can later flip it to ``confirmed``.
-        """
-        from application.storage.db.repositories.conversations import (
-            ConversationsRepository,
-        )
-
-        # FK constraint: message_id must reference a real row.
-        repo = ConversationsRepository(pg_conn)
-        conv = repo.create("u-mid", "msg-id-test")
-        msg = repo.reserve_message(
-            str(conv["id"]),
-            prompt="q?",
-            placeholder_response="...",
-            request_id="req-mid-1",
-            status="pending",
-        )
-        message_uuid = str(msg["id"])
-
-        executor = ToolExecutor(user="u")
-        executor.message_id = message_uuid
-        monkeypatch.setattr(
-            "application.agents.tool_executor.ToolActionParser",
-            lambda _cls, **kw: Mock(
-                parse_args=Mock(return_value=("t1", "test_action", {}))
-            ),
-        )
-        _patch_db(monkeypatch, pg_conn)
-
-        tools_dict = {
-            "t1": {
-                "id": "00000000-0000-0000-0000-000000000001",
-                "name": "test_tool",
-                "config": {"key": "val"},
-                "actions": [
-                    {"name": "test_action", "description": "T", "parameters": {"properties": {}}},
-                ],
-            }
-        }
-
-        _drain(executor.execute(tools_dict, _make_call(call_id="cm1"), "MockLLM"))
-
-        row = _select_attempt(pg_conn, "cm1")
-        assert row is not None
-        assert row["status"] == "executed"
-        assert str(row["message_id"]) == message_uuid
-
-    def test_tool_raises_marks_failed_and_reraises(
-        self, pg_conn, mock_tool_manager, monkeypatch
-    ):
-        executor = ToolExecutor(user="u")
-        monkeypatch.setattr(
-            "application.agents.tool_executor.ToolActionParser",
-            lambda _cls, **kw: Mock(
-                parse_args=Mock(return_value=("t1", "test_action", {}))
-            ),
-        )
-        _patch_db(monkeypatch, pg_conn)
-        mock_tool_manager.load_tool.return_value.execute_action.side_effect = (
-            RuntimeError("boom")
-        )
-
-        tools_dict = {
-            "t1": {
-                "id": "00000000-0000-0000-0000-000000000001",
-                "name": "test_tool",
-                "config": {"key": "val"},
-                "actions": [
-                    {"name": "test_action", "description": "T", "parameters": {"properties": {}}},
-                ],
-            }
-        }
-
-        gen = executor.execute(tools_dict, _make_call(call_id="c2"), "MockLLM")
-        with pytest.raises(RuntimeError, match="boom"):
-            _drain(gen)
-
-        row = _select_attempt(pg_conn, "c2")
-        assert row is not None
-        assert row["status"] == "failed"
-        assert row["error"] == "boom"
-
-    def test_executed_row_lingers_for_reconciler_when_no_confirm(
-        self, pg_conn, mock_tool_manager, monkeypatch
-    ):
-        """No finalize_message call → row sits in ``executed``."""
-        executor = ToolExecutor(user="u")
-        monkeypatch.setattr(
-            "application.agents.tool_executor.ToolActionParser",
-            lambda _cls, **kw: Mock(
-                parse_args=Mock(return_value=("t1", "test_action", {}))
-            ),
-        )
-        _patch_db(monkeypatch, pg_conn)
-
-        tools_dict = {
-            "t1": {
-                "id": "00000000-0000-0000-0000-000000000001",
-                "name": "test_tool",
-                "config": {"key": "val"},
-                "actions": [
-                    {"name": "test_action", "description": "T", "parameters": {"properties": {}}},
-                ],
-            }
-        }
-
-        _drain(executor.execute(tools_dict, _make_call(call_id="c3"), "MockLLM"))
-
-        row = _select_attempt(pg_conn, "c3")
-        assert row["status"] == "executed"
-        # Partial index `tool_call_attempts_pending_ts_idx` selects rows
-        # in ('proposed','executed') — the reconciler reads those.
-        assert row["status"] in ("proposed", "executed")
-
-
-@pytest.mark.unit
-class TestRepository:
-    def test_proposed_then_executed_round_trip(self, pg_conn):
-        from application.storage.db.repositories.tool_call_attempts import (
-            ToolCallAttemptsRepository,
-        )
-
-        repo = ToolCallAttemptsRepository(pg_conn)
-        assert repo.record_proposed("c-x", "tool", "act", {"a": 1}) is True
-        # Duplicate insert is a no-op; original row stays put.
-        assert repo.record_proposed("c-x", "tool", "act", {"a": 1}) is False
-        row = _select_attempt(pg_conn, "c-x")
-        assert row["status"] == "proposed"
-
-        assert repo.mark_executed("c-x", {"out": "ok"}) is True
-        row = _select_attempt(pg_conn, "c-x")
-        assert row["status"] == "executed"
-        assert row["result"] == {"result": {"out": "ok"}}
-
-    def test_mark_failed_sets_error(self, pg_conn):
-        from application.storage.db.repositories.tool_call_attempts import (
-            ToolCallAttemptsRepository,
-        )
-
-        repo = ToolCallAttemptsRepository(pg_conn)
-        repo.record_proposed("c-y", "tool", "act", {})
-        assert repo.mark_failed("c-y", "kaboom") is True
-        row = _select_attempt(pg_conn, "c-y")
-        assert row["status"] == "failed"
-        assert row["error"] == "kaboom"
--- a/tests/api/answer/routes/test_base.py
+++ b/tests/api/answer/routes/test_base.py
@@ -218,19 +218,10 @@ class TestCompleteStreamMethod:

            decoded_token = {"sub": "user123"}

-            # The fresh-question path now reserves a row before agent.gen()
-            # and calls finalize_message at end of stream — assert both fire.
            with patch.object(
-                resource.conversation_service, "save_user_question"
-            ) as mock_reserve, patch.object(
-                resource.conversation_service, "finalize_message"
-            ) as mock_finalize:
-                mock_reserve.return_value = {
-                    "conversation_id": str(uuid.uuid4()),
-                    "message_id": str(uuid.uuid4()),
-                    "request_id": "req-1",
-                }
-                mock_finalize.return_value = True
+                resource.conversation_service, "save_conversation"
+            ) as mock_save:
+                mock_save.return_value = str(uuid.uuid4())

                list(
                    resource.complete_stream(
@@ -243,8 +234,7 @@ class TestCompleteStreamMethod:
                    )
                )

-                mock_reserve.assert_called_once()
-                mock_finalize.assert_called_once()
+                mock_save.assert_called_once()



--- a/tests/api/answer/services/test_continuation_service_pg.py
+++ b/tests/api/answer/services/test_continuation_service_pg.py
@@ -45,26 +45,19 @@ class TestMakeSerializable:
        got = _make_serializable([u, {"x": u}, 1])
        assert got == [str(u), {"x": str(u)}, 1]

-    def test_bytes_base64_encoded(self):
-        # Migrated from UTF-8-replace to base64 once the helper moved to
-        # the shared serialization module — base64 is lossless and round-
-        # trippable (UTF-8-replace silently corrupted binary payloads).
-        import base64
+    def test_bytes_decoded_to_string(self):
        from application.api.answer.services.continuation_service import (
            _make_serializable,
        )
-        got = _make_serializable(b"hello")
-        assert got == base64.b64encode(b"hello").decode("ascii")
+        assert _make_serializable(b"hello") == "hello"

-    def test_bytes_arbitrary_binary_roundtrips(self):
-        import base64
+    def test_bytes_invalid_utf8_replaced(self):
        from application.api.answer.services.continuation_service import (
            _make_serializable,
        )
-        raw = b"\xff\xfe\x00\x10"
-        got = _make_serializable(raw)
+        # Invalid UTF-8 byte sequence
+        got = _make_serializable(b"\xff\xfe")
        assert isinstance(got, str)
-        assert base64.b64decode(got) == raw

    def test_passes_through_primitives(self):
        from application.api.answer.services.continuation_service import (
@@ -75,50 +68,6 @@ class TestMakeSerializable:
        assert _make_serializable(None) is None
        assert _make_serializable(True) is True

-    def test_datetime_becomes_iso_string(self):
-        # PG SELECT * pulls timestamptz columns through as datetime —
-        # tools_dict carries ``created_at``/``updated_at`` from user_tools
-        # rows, which would otherwise blow up json.dumps in pending_tool_state.
-        import json
-        from datetime import datetime, timezone
-        from application.api.answer.services.continuation_service import (
-            _make_serializable,
-        )
-
-        ts = datetime(2026, 5, 2, 12, 14, 32, tzinfo=timezone.utc)
-        got = _make_serializable(ts)
-        assert got == "2026-05-02T12:14:32+00:00"
-        json.dumps(got)  # would raise on raw datetime
-
-    def test_datetime_nested_in_tools_dict(self):
-        # Mirrors the production failure: tools_dict is a dict-of-dicts
-        # where each tool row has timestamp fields buried under string keys.
-        import json
-        from datetime import datetime, timezone
-        from application.api.answer.services.continuation_service import (
-            _make_serializable,
-        )
-
-        ts = datetime(2026, 5, 2, 12, 14, 32, tzinfo=timezone.utc)
-        tools_dict = {
-            "0": {
-                "name": "mcp_tool",
-                "actions": [{"name": "search", "active": True}],
-                "created_at": ts,
-                "updated_at": ts,
-            }
-        }
-        got = _make_serializable(tools_dict)
-        json.dumps(got)
-        assert got["0"]["created_at"] == "2026-05-02T12:14:32+00:00"
-
-    def test_date_becomes_iso_string(self):
-        from datetime import date
-        from application.api.answer.services.continuation_service import (
-            _make_serializable,
-        )
-        assert _make_serializable(date(2026, 5, 2)) == "2026-05-02"
-

 class TestContinuationServiceSaveLoad:
    def test_save_and_load_state(self, pg_conn):
--- a/tests/api/answer/services/test_conversation_service.py
+++ b/tests/api/answer/services/test_conversation_service.py
@@ -229,489 +229,6 @@ class TestConversationServiceSave:
        assert got["name"] == "q-fallback"


-class TestSaveUserQuestion:
-    def test_creates_conversation_and_reserves_message(self, pg_conn):
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-            TERMINATED_RESPONSE_PLACEHOLDER,
-        )
-        from application.storage.db.repositories.conversations import (
-            ConversationsRepository,
-        )
-
-        user = "u-wal-new"
-        with _patch_db(pg_conn):
-            result = ConversationService().save_user_question(
-                conversation_id=None,
-                question="what is python?",
-                decoded_token={"sub": user},
-            )
-        assert result["conversation_id"]
-        assert result["message_id"]
-        assert result["request_id"]
-
-        repo = ConversationsRepository(pg_conn)
-        conv = repo.get_any(result["conversation_id"], user)
-        assert conv is not None
-        messages = repo.get_messages(result["conversation_id"])
-        assert len(messages) == 1
-        assert messages[0]["status"] == "pending"
-        assert messages[0]["prompt"] == "what is python?"
-        assert messages[0]["response"] == TERMINATED_RESPONSE_PLACEHOLDER
-        assert messages[0]["request_id"] == result["request_id"]
-
-    def test_appends_to_existing_conversation(self, pg_conn):
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-        from application.storage.db.repositories.conversations import (
-            ConversationsRepository,
-        )
-
-        user = "u-wal-existing"
-        repo = ConversationsRepository(pg_conn)
-        conv = repo.create(user, name="hi")
-        conv_id = str(conv["id"])
-
-        with _patch_db(pg_conn):
-            result = ConversationService().save_user_question(
-                conversation_id=conv_id,
-                question="follow-up",
-                decoded_token={"sub": user},
-            )
-        assert result["conversation_id"] == conv_id
-        msgs = repo.get_messages(conv_id)
-        assert len(msgs) == 1
-        assert msgs[0]["prompt"] == "follow-up"
-
-    def test_raises_when_token_missing(self):
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-        with pytest.raises(ValueError):
-            ConversationService().save_user_question(
-                conversation_id=None,
-                question="q",
-                decoded_token=None,
-            )
-
-    def test_regenerate_at_index_replaces_old_message(self, pg_conn):
-        """Regenerate at ``index`` truncates the old message *and
-        everything after* before reserving the placeholder, so the new
-        WAL row lands at ``position=index`` rather than appending at
-        the end. Pre-fix the WAL path appended unconditionally and the
-        old answer survived alongside the regenerated one.
-        """
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-        from application.storage.db.repositories.conversations import (
-            ConversationsRepository,
-        )
-
-        user = "u-wal-regen"
-        repo = ConversationsRepository(pg_conn)
-        conv = repo.create(user, name="regen-test")
-        conv_id = str(conv["id"])
-
-        # Seed five completed messages at positions 0..4.
-        for i in range(5):
-            repo.append_message(
-                conv_id,
-                {
-                    "prompt": f"q{i}",
-                    "response": f"a{i}",
-                    "thought": "",
-                    "sources": [],
-                    "tool_calls": [],
-                    "metadata": {},
-                },
-            )
-        seeded = repo.get_messages(conv_id)
-        assert len(seeded) == 5
-        assert [m["position"] for m in seeded] == [0, 1, 2, 3, 4]
-
-        with _patch_db(pg_conn):
-            result = ConversationService().save_user_question(
-                conversation_id=conv_id,
-                question="q3-regen",
-                decoded_token={"sub": user},
-                index=3,
-            )
-
-        msgs = repo.get_messages(conv_id)
-        # Positions 0,1,2 from the seed plus the new placeholder at 3.
-        assert [m["position"] for m in msgs] == [0, 1, 2, 3]
-        # The placeholder carries the regenerated prompt.
-        regen = next(m for m in msgs if m["position"] == 3)
-        assert regen["prompt"] == "q3-regen"
-        assert regen["status"] == "pending"
-        assert str(regen["id"]) == result["message_id"]
-        # The old answer at index 3 is gone.
-        assert not any(m["response"] == "a3" for m in msgs)
-        # And anything after index 3 was truncated.
-        assert not any(m["prompt"] == "q4" for m in msgs)
-
-    def test_regenerate_at_index_zero_truncates_everything(self, pg_conn):
-        """``index=0`` is a valid edge: it should drop every prior
-        message and reseat the placeholder at position 0.
-        """
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-        from application.storage.db.repositories.conversations import (
-            ConversationsRepository,
-        )
-
-        user = "u-wal-regen-zero"
-        repo = ConversationsRepository(pg_conn)
-        conv = repo.create(user, name="regen-zero")
-        conv_id = str(conv["id"])
-        for i in range(3):
-            repo.append_message(
-                conv_id,
-                {
-                    "prompt": f"old-{i}",
-                    "response": f"old-a-{i}",
-                    "thought": "",
-                    "sources": [],
-                    "tool_calls": [],
-                    "metadata": {},
-                },
-            )
-
-        with _patch_db(pg_conn):
-            ConversationService().save_user_question(
-                conversation_id=conv_id,
-                question="fresh-from-start",
-                decoded_token={"sub": user},
-                index=0,
-            )
-
-        msgs = repo.get_messages(conv_id)
-        assert len(msgs) == 1
-        assert msgs[0]["position"] == 0
-        assert msgs[0]["prompt"] == "fresh-from-start"
-
-    def test_regenerate_index_ignored_without_conversation_id(self, pg_conn):
-        """``index`` only makes sense against an existing conversation;
-        the create-then-reserve path silently treats it as a no-op
-        rather than truncating a freshly-created conversation.
-        """
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-        from application.storage.db.repositories.conversations import (
-            ConversationsRepository,
-        )
-
-        user = "u-wal-regen-no-conv"
-        with _patch_db(pg_conn):
-            result = ConversationService().save_user_question(
-                conversation_id=None,
-                question="brand new q",
-                decoded_token={"sub": user},
-                index=2,
-            )
-
-        repo = ConversationsRepository(pg_conn)
-        msgs = repo.get_messages(result["conversation_id"])
-        assert len(msgs) == 1
-        assert msgs[0]["position"] == 0
-        assert msgs[0]["prompt"] == "brand new q"
-
-    def test_raises_when_conversation_unauthorized(self, pg_conn):
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-        with _patch_db(pg_conn), pytest.raises(ValueError):
-            ConversationService().save_user_question(
-                conversation_id="00000000-0000-0000-0000-000000000000",
-                question="q",
-                decoded_token={"sub": "u"},
-            )
-
-
-class TestFinalizeMessage:
-    def test_finalizes_complete(self, pg_conn):
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-        from application.storage.db.repositories.conversations import (
-            ConversationsRepository,
-        )
-
-        user = "u-fin-ok"
-        with _patch_db(pg_conn):
-            svc = ConversationService()
-            res = svc.save_user_question(
-                conversation_id=None,
-                question="q",
-                decoded_token={"sub": user},
-            )
-            assert svc.finalize_message(
-                res["message_id"],
-                "real answer",
-                thought="thinking",
-                sources=[{"text": "x" * 2000, "title": "doc"}],
-                tool_calls=[{"name": "search"}],
-                model_id="gpt-4",
-                metadata={"foo": "bar"},
-                status="complete",
-            ) is True
-
-        msgs = ConversationsRepository(pg_conn).get_messages(
-            res["conversation_id"],
-        )
-        assert msgs[0]["response"] == "real answer"
-        assert msgs[0]["status"] == "complete"
-        assert msgs[0]["thought"] == "thinking"
-        assert msgs[0]["model_id"] == "gpt-4"
-        # source text trimmed to 1000 chars at finalize time
-        assert len(msgs[0]["sources"][0]["text"]) == 1000
-        assert msgs[0]["metadata"]["foo"] == "bar"
-
-    def test_finalizes_failed_records_error(self, pg_conn):
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-        from application.storage.db.repositories.conversations import (
-            ConversationsRepository,
-        )
-
-        user = "u-fin-fail"
-        with _patch_db(pg_conn):
-            svc = ConversationService()
-            res = svc.save_user_question(
-                conversation_id=None,
-                question="q",
-                decoded_token={"sub": user},
-            )
-            err = RuntimeError("provider down")
-            assert svc.finalize_message(
-                res["message_id"],
-                "fallback text",
-                status="failed",
-                error=err,
-            ) is True
-
-        msgs = ConversationsRepository(pg_conn).get_messages(
-            res["conversation_id"],
-        )
-        assert msgs[0]["status"] == "failed"
-        assert msgs[0]["metadata"]["error"] == "RuntimeError: provider down"
-
-    def test_finalize_flips_executed_tool_calls(self, pg_conn):
-        """finalize_message must mark tool_call_attempts.status='executed'
-        rows as 'confirmed' for the same message_id."""
-        from sqlalchemy import text as sql_text
-
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-
-        user = "u-fin-tools"
-        with _patch_db(pg_conn):
-            svc = ConversationService()
-            res = svc.save_user_question(
-                conversation_id=None,
-                question="q",
-                decoded_token={"sub": user},
-            )
-            pg_conn.execute(
-                sql_text(
-                    "INSERT INTO tool_call_attempts "
-                    "(call_id, message_id, tool_name, action_name, arguments, status) "
-                    "VALUES (:cid, CAST(:mid AS uuid), 't', 'a', '{}'::jsonb, 'executed')"
-                ),
-                {"cid": "c1", "mid": res["message_id"]},
-            )
-            assert svc.finalize_message(
-                res["message_id"], "ans", status="complete",
-            ) is True
-
-        status = pg_conn.execute(
-            sql_text("SELECT status FROM tool_call_attempts WHERE call_id = :cid"),
-            {"cid": "c1"},
-        ).scalar()
-        assert status == "confirmed"
-
-    def test_finalize_returns_false_for_unknown_message(self, pg_conn):
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-        with _patch_db(pg_conn):
-            assert ConversationService().finalize_message(
-                "00000000-0000-0000-0000-000000000000",
-                "x",
-                status="complete",
-            ) is False
-
-    def test_finalize_rolls_back_tool_call_confirm_on_message_update_failure(
-        self, pg_conn
-    ):
-        """Atomicity: if ``update_message_by_id`` raises after the
-        tool_call_attempts confirm ran on the same connection, the
-        confirm rolls back with the rest of the transaction. The
-        ``pg_conn`` fixture pins one connection inside an outer
-        rolled-back transaction; we patch ``db_session`` to wrap each
-        call in a SAVEPOINT so the production-code ``with`` block
-        actually rolls back when the message-update raises.
-        """
-        from contextlib import contextmanager
-
-        from sqlalchemy import text as sql_text
-
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-        from application.storage.db.repositories import conversations as conv_module
-
-        user = "u-fin-rollback"
-
-        @contextmanager
-        def _savepoint_session():
-            nested = pg_conn.begin_nested()
-            try:
-                yield pg_conn
-                nested.commit()
-            except Exception:
-                nested.rollback()
-                raise
-
-        with patch(
-            "application.api.answer.services.conversation_service.db_session",
-            _savepoint_session,
-        ), patch(
-            "application.api.answer.services.conversation_service.db_readonly",
-            _savepoint_session,
-        ):
-            svc = ConversationService()
-            res = svc.save_user_question(
-                conversation_id=None,
-                question="q",
-                decoded_token={"sub": user},
-            )
-            pg_conn.execute(
-                sql_text(
-                    "INSERT INTO tool_call_attempts "
-                    "(call_id, message_id, tool_name, action_name, "
-                    "arguments, status) VALUES (:cid, CAST(:mid AS uuid), "
-                    "'t', 'a', '{}'::jsonb, 'executed')"
-                ),
-                {"cid": "rb-1", "mid": res["message_id"]},
-            )
-            original = conv_module.ConversationsRepository.update_message_by_id
-
-            def boom(self, *args, **kwargs):
-                _ = (args, kwargs)
-                raise RuntimeError("simulated message-update failure")
-
-            conv_module.ConversationsRepository.update_message_by_id = boom
-            try:
-                with pytest.raises(RuntimeError):
-                    svc.finalize_message(
-                        res["message_id"], "answer", status="complete",
-                    )
-            finally:
-                conv_module.ConversationsRepository.update_message_by_id = original
-
-        # The tool_call confirm rolled back: row stays at ``executed``.
-        status = pg_conn.execute(
-            sql_text(
-                "SELECT status FROM tool_call_attempts WHERE call_id = :cid"
-            ),
-            {"cid": "rb-1"},
-        ).scalar()
-        assert status == "executed"
-        msg_status = pg_conn.execute(
-            sql_text(
-                "SELECT status FROM conversation_messages "
-                "WHERE id = CAST(:mid AS uuid)"
-            ),
-            {"mid": res["message_id"]},
-        ).scalar()
-        assert msg_status == "pending"
-
-    def test_finalize_generates_title_when_provided(self, pg_conn):
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-        from application.storage.db.repositories.conversations import (
-            ConversationsRepository,
-        )
-
-        user = "u-fin-title"
-        mock_llm = MagicMock()
-        mock_llm.gen.return_value = "Short Title"
-        with _patch_db(pg_conn):
-            svc = ConversationService()
-            res = svc.save_user_question(
-                conversation_id=None,
-                question="long question that becomes the fallback name",
-                decoded_token={"sub": user},
-            )
-            assert svc.finalize_message(
-                res["message_id"],
-                "answer",
-                status="complete",
-                title_inputs={
-                    "llm": mock_llm,
-                    "question": "long question that becomes the fallback name",
-                    "response": "answer",
-                    "model_id": "gpt-4",
-                    "fallback_name": (
-                        "long question that becomes the fallback name"[:50]
-                    ),
-                },
-            ) is True
-
-        repo = ConversationsRepository(pg_conn)
-        conv = repo.get_any(res["conversation_id"], user)
-        assert conv["name"] == "Short Title"
-
-
-class TestSaveUserQuestionFinalizeFailedFlow:
-    """LLM fails immediately; question stays queryable with status='failed' + error metadata."""
-
-    def test_failed_llm_leaves_question_persisted(self, pg_conn):
-        from application.api.answer.services.conversation_service import (
-            ConversationService,
-        )
-        from application.storage.db.repositories.conversations import (
-            ConversationsRepository,
-        )
-
-        user = "u-acceptance"
-        with _patch_db(pg_conn):
-            svc = ConversationService()
-            # Simulates the WAL pre-persist before LLM call.
-            res = svc.save_user_question(
-                conversation_id=None,
-                question="why did this fail?",
-                decoded_token={"sub": user},
-            )
-            # Simulates the LLM raising immediately, caught by complete_stream.
-            try:
-                raise RuntimeError("upstream 503")
-            except RuntimeError as e:
-                svc.finalize_message(
-                    res["message_id"],
-                    "",
-                    status="failed",
-                    error=e,
-                )
-
-        msgs = ConversationsRepository(pg_conn).get_messages(
-            res["conversation_id"],
-        )
-        assert len(msgs) == 1
-        assert msgs[0]["prompt"] == "why did this fail?"
-        assert msgs[0]["status"] == "failed"
-        assert "RuntimeError" in msgs[0]["metadata"]["error"]
-        assert "upstream 503" in msgs[0]["metadata"]["error"]
-
-
 class TestCompressionMetadata:
    def test_update_compression_metadata(self, pg_conn):
        from application.api.answer.services.conversation_service import (
--- a/Show More
+++ b/Show More