Compare commits

...

43 Commits

Author SHA1 Message Date
Alex
26e2e7d353 fix: tests 2026-05-04 23:00:21 +01:00
Alex
42c33f4e0d fix: better json validation 2026-05-04 18:09:40 +01:00
Alex
073f9fc003 fix: mini issues 2026-05-04 17:51:09 +01:00
Alex
9b974af210 fix: tests 2026-05-04 17:17:15 +01:00
Alex
fe1edc6b79 feat: more durable frontend 2026-05-04 16:32:28 +01:00
Alex
e550b11f39 feat: durability and idempotency keys 2026-05-03 18:36:02 +01:00
Manish Madan
d23679dd93 Merge pull request #2437 from arc53/dependabot/npm_and_yarn/frontend/react-router-dom-7.14.2
chore(deps): bump react-router-dom from 7.14.1 to 7.14.2 in /frontend
2026-04-29 09:33:36 +05:30
dependabot[bot]
1b2239e54b chore(deps): bump react-router-dom from 7.14.1 to 7.14.2 in /frontend
Bumps [react-router-dom](https://github.com/remix-run/react-router/tree/HEAD/packages/react-router-dom) from 7.14.1 to 7.14.2.
- [Release notes](https://github.com/remix-run/react-router/releases)
- [Changelog](https://github.com/remix-run/react-router/blob/main/packages/react-router-dom/CHANGELOG.md)
- [Commits](https://github.com/remix-run/react-router/commits/react-router-dom@7.14.2/packages/react-router-dom)

---
updated-dependencies:
- dependency-name: react-router-dom
  dependency-version: 7.14.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-29 04:02:16 +00:00
Manish Madan
5ceb99f946 Merge pull request #2436 from arc53/dependabot/npm_and_yarn/frontend/vite-8.0.10
chore(deps-dev): bump vite from 8.0.8 to 8.0.10 in /frontend
2026-04-29 09:30:44 +05:30
dependabot[bot]
892908cef5 chore(deps-dev): bump vite from 8.0.8 to 8.0.10 in /frontend
Bumps [vite](https://github.com/vitejs/vite/tree/HEAD/packages/vite) from 8.0.8 to 8.0.10.
- [Release notes](https://github.com/vitejs/vite/releases)
- [Changelog](https://github.com/vitejs/vite/blob/main/packages/vite/CHANGELOG.md)
- [Commits](https://github.com/vitejs/vite/commits/v8.0.10/packages/vite)

---
updated-dependencies:
- dependency-name: vite
  dependency-version: 8.0.10
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-29 03:58:13 +00:00
Manish Madan
99ffe439c7 Merge pull request #2438 from arc53/dependabot/npm_and_yarn/frontend/typescript-eslint/eslint-plugin-8.59.1
chore(deps-dev): bump @typescript-eslint/eslint-plugin from 8.58.2 to 8.59.1 in /frontend
2026-04-29 09:26:52 +05:30
dependabot[bot]
ed87972ca6 chore(deps-dev): bump @typescript-eslint/eslint-plugin in /frontend
Bumps [@typescript-eslint/eslint-plugin](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin) from 8.58.2 to 8.59.1.
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/eslint-plugin/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.59.1/packages/eslint-plugin)

---
updated-dependencies:
- dependency-name: "@typescript-eslint/eslint-plugin"
  dependency-version: 8.59.1
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-29 03:52:58 +00:00
Manish Madan
6ad9022dd3 Merge pull request #2408 from arc53/dependabot/npm_and_yarn/frontend/typescript-6.0.3
chore(deps-dev): bump typescript from 5.9.3 to 6.0.3 in /frontend
2026-04-29 09:05:55 +05:30
ManishMadan2882
9b8fe2d5d0 fix(frontend): migrate tsconfig off TS 6.0 deprecated options
- esModuleInterop: false -> true (modern default)
- moduleResolution: Node -> Bundler (recommended for Vite)
- remove baseUrl; paths resolves relative to tsconfig
2026-04-29 08:57:32 +05:30
ManishMadan2882
d1dc8de27c fix(frontend): silence TS 6.0 deprecation errors in tsconfig 2026-04-29 08:55:14 +05:30
Manish Madan
a29fa44b51 Merge pull request #2426 from arc53/dependabot/npm_and_yarn/docs/npm_and_yarn-707cc257f8
chore(deps): bump @xmldom/xmldom from 0.9.9 to 0.9.10 in /docs in the npm_and_yarn group across 1 directory
2026-04-28 19:50:12 +05:30
dependabot[bot]
026371d024 chore(deps): bump @xmldom/xmldom
Bumps the npm_and_yarn group with 1 update in the /docs directory: [@xmldom/xmldom](https://github.com/xmldom/xmldom).


Updates `@xmldom/xmldom` from 0.9.9 to 0.9.10
- [Release notes](https://github.com/xmldom/xmldom/releases)
- [Changelog](https://github.com/xmldom/xmldom/blob/master/CHANGELOG.md)
- [Commits](https://github.com/xmldom/xmldom/compare/0.9.9...0.9.10)

---
updated-dependencies:
- dependency-name: "@xmldom/xmldom"
  dependency-version: 0.9.10
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-28 14:17:21 +00:00
Manish Madan
b0df2a479b Merge pull request #2434 from arc53/dependabot/npm_and_yarn/extensions/react-widget/typescript-eslint/eslint-plugin-8.59.1
chore(deps-dev): bump @typescript-eslint/eslint-plugin from 8.59.0 to 8.59.1 in /extensions/react-widget
2026-04-28 19:44:18 +05:30
dependabot[bot]
5eae83af1b chore(deps-dev): bump @typescript-eslint/eslint-plugin
Bumps [@typescript-eslint/eslint-plugin](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin) from 8.59.0 to 8.59.1.
- [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases)
- [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/eslint-plugin/CHANGELOG.md)
- [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.59.1/packages/eslint-plugin)

---
updated-dependencies:
- dependency-name: "@typescript-eslint/eslint-plugin"
  dependency-version: 8.59.1
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-28 14:11:59 +00:00
Manish Madan
9c875c83c2 Merge pull request #2431 from arc53/dependabot/npm_and_yarn/extensions/react-widget/babel/preset-react-7.28.5
chore(deps-dev): bump @babel/preset-react from 7.24.6 to 7.28.5 in /extensions/react-widget
2026-04-28 19:38:57 +05:30
dependabot[bot]
e6e671faf1 chore(deps-dev): bump @babel/preset-react in /extensions/react-widget
Bumps [@babel/preset-react](https://github.com/babel/babel/tree/HEAD/packages/babel-preset-react) from 7.24.6 to 7.28.5.
- [Release notes](https://github.com/babel/babel/releases)
- [Changelog](https://github.com/babel/babel/blob/main/CHANGELOG.md)
- [Commits](https://github.com/babel/babel/commits/v7.28.5/packages/babel-preset-react)

---
updated-dependencies:
- dependency-name: "@babel/preset-react"
  dependency-version: 7.28.5
  dependency-type: direct:development
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-28 14:03:26 +00:00
Manish Madan
a31ec97bd7 Merge pull request #2435 from arc53/dependabot/npm_and_yarn/frontend/npm_and_yarn-5f44a83626
chore(deps-dev): bump postcss from 8.5.8 to 8.5.12 in /frontend in the npm_and_yarn group across 1 directory
2026-04-28 19:30:19 +05:30
dependabot[bot]
ebe752d103 chore(deps-dev): bump postcss
Bumps the npm_and_yarn group with 1 update in the /frontend directory: [postcss](https://github.com/postcss/postcss).


Updates `postcss` from 8.5.8 to 8.5.12
- [Release notes](https://github.com/postcss/postcss/releases)
- [Changelog](https://github.com/postcss/postcss/blob/main/CHANGELOG.md)
- [Commits](https://github.com/postcss/postcss/compare/8.5.8...8.5.12)

---
updated-dependencies:
- dependency-name: postcss
  dependency-version: 8.5.12
  dependency-type: direct:development
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-28 13:53:25 +00:00
Manish Madan
8c30c1c880 Merge pull request #2430 from arc53/dependabot/npm_and_yarn/extensions/react-widget/flow-bin-0.311.0
chore(deps): bump flow-bin from 0.309.0 to 0.311.0 in /extensions/react-widget
2026-04-28 19:20:48 +05:30
dependabot[bot]
4a598e062c chore(deps): bump flow-bin in /extensions/react-widget
Bumps [flow-bin](https://github.com/flowtype/flow-bin) from 0.309.0 to 0.311.0.
- [Release notes](https://github.com/flowtype/flow-bin/releases)
- [Commits](https://github.com/flowtype/flow-bin/commits)

---
updated-dependencies:
- dependency-name: flow-bin
  dependency-version: 0.311.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-28 13:49:15 +00:00
Manish Madan
e285b47170 Merge pull request #2429 from arc53/dependabot/npm_and_yarn/extensions/react-widget/styled-components-6.4.1
chore(deps): bump styled-components from 6.4.0 to 6.4.1 in /extensions/react-widget
2026-04-28 19:13:52 +05:30
dependabot[bot]
2d884a3df1 chore(deps): bump styled-components in /extensions/react-widget
Bumps [styled-components](https://github.com/styled-components/styled-components) from 6.4.0 to 6.4.1.
- [Release notes](https://github.com/styled-components/styled-components/releases)
- [Commits](https://github.com/styled-components/styled-components/compare/styled-components@6.4.0...styled-components@6.4.1)

---
updated-dependencies:
- dependency-name: styled-components
  dependency-version: 6.4.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-28 13:42:42 +00:00
Manish Madan
b9920731e0 Merge pull request #2428 from arc53/dependabot/npm_and_yarn/extensions/react-widget/globals-17.5.0
chore(deps-dev): bump globals from 15.15.0 to 17.5.0 in /extensions/react-widget
2026-04-28 18:37:13 +05:30
Manish Madan
f5f4c07e59 Merge pull request #2417 from arc53/dependabot/npm_and_yarn/frontend/react-dropzone-15.0.0
chore(deps): bump react-dropzone from 14.3.8 to 15.0.0 in /frontend
2026-04-28 17:12:19 +05:30
dependabot[bot]
e87dc42ad0 chore(deps): bump react-dropzone from 14.3.8 to 15.0.0 in /frontend
Bumps [react-dropzone](https://github.com/react-dropzone/react-dropzone) from 14.3.8 to 15.0.0.
- [Release notes](https://github.com/react-dropzone/react-dropzone/releases)
- [Commits](https://github.com/react-dropzone/react-dropzone/compare/v14.3.8...v15.0.0)

---
updated-dependencies:
- dependency-name: react-dropzone
  dependency-version: 15.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-28 11:27:14 +00:00
Manish Madan
40a30054bc Merge pull request #2412 from arc53/dependabot/npm_and_yarn/frontend/npm_and_yarn-7d3393088f
chore(deps): bump lodash-es from 4.17.23 to 4.18.1 in /frontend in the npm_and_yarn group across 1 directory
2026-04-28 16:54:41 +05:30
dependabot[bot]
707e782ac8 chore(deps): bump lodash-es
Bumps the npm_and_yarn group with 1 update in the /frontend directory: [lodash-es](https://github.com/lodash/lodash).


Updates `lodash-es` from 4.17.23 to 4.18.1
- [Release notes](https://github.com/lodash/lodash/releases)
- [Commits](https://github.com/lodash/lodash/compare/4.17.23...4.18.1)

---
updated-dependencies:
- dependency-name: lodash-es
  dependency-version: 4.18.1
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-28 10:19:29 +00:00
dependabot[bot]
2bc0b6946b chore(deps-dev): bump typescript from 5.9.3 to 6.0.3 in /frontend
Bumps [typescript](https://github.com/microsoft/TypeScript) from 5.9.3 to 6.0.3.
- [Release notes](https://github.com/microsoft/TypeScript/releases)
- [Commits](https://github.com/microsoft/TypeScript/compare/v5.9.3...v6.0.3)

---
updated-dependencies:
- dependency-name: typescript
  dependency-version: 6.0.3
  dependency-type: direct:development
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-28 10:19:22 +00:00
Manish Madan
fbd686b725 Merge pull request #2415 from arc53/dependabot/npm_and_yarn/frontend/react-i18next-17.0.4
chore(deps): bump react-i18next from 17.0.2 to 17.0.6 in /frontend
2026-04-28 15:47:59 +05:30
dependabot[bot]
29320eb9fd chore(deps): bump react-i18next from 17.0.2 to 17.0.6 in /frontend
Bumps [react-i18next](https://github.com/i18next/react-i18next) from 17.0.2 to 17.0.6.
- [Changelog](https://github.com/i18next/react-i18next/blob/master/CHANGELOG.md)
- [Commits](https://github.com/i18next/react-i18next/compare/v17.0.2...v17.0.6)

---
updated-dependencies:
- dependency-name: react-i18next
  dependency-version: 17.0.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-28 09:54:11 +00:00
Alex
0d2a8e11f4 feat: better token serialiser 2026-04-28 02:36:40 +01:00
Alex
f0c39dec23 feat: more logs on stream finish 2026-04-28 02:27:02 +01:00
Alex
552bfe016a fix: better token counting and fixes cache 2026-04-28 01:47:53 +01:00
Alex
a6a5db631b chore: updated roadmap 2026-04-28 01:03:52 +01:00
Alex
8e9f661efc fix: attachments 2026-04-28 00:38:27 +01:00
Alex
82c71be819 feat: better logging 2026-04-28 00:14:43 +01:00
Alex
318de18d43 feat: BYOM (#2433) 2026-04-27 22:09:33 +01:00
dependabot[bot]
fb24f9cf5e chore(deps-dev): bump globals in /extensions/react-widget
Bumps [globals](https://github.com/sindresorhus/globals) from 15.15.0 to 17.5.0.
- [Release notes](https://github.com/sindresorhus/globals/releases)
- [Commits](https://github.com/sindresorhus/globals/compare/v15.15.0...v17.5.0)

---
updated-dependencies:
- dependency-name: globals
  dependency-version: 17.5.0
  dependency-type: direct:development
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-24 20:53:35 +00:00
184 changed files with 23352 additions and 1278 deletions

View File

@@ -47,11 +47,13 @@
</ul>
## Roadmap
- [x] Add OAuth 2.0 authentication for MCP ( September 2025 )
- [x] Deep Agents ( October 2025 )
- [x] Prompt Templating ( October 2025 )
- [x] Full api tooling ( Dec 2025 )
- [ ] Agent scheduling ( Jan 2026 )
- [x] Agent Workflow Builder with conditional nodes ( February 2026 )
- [x] SharePoint & Confluence connectors ( March April 2026 )
- [x] Research mode ( March 2026 )
- [x] Postgres migration for user data ( April 2026 )
- [x] OpenTelemetry observability ( April 2026 )
- [x] Bring Your Own Model (BYOM) ( April 2026 )
- [ ] Agent scheduling (RedBeat-backed) ( Q2 2026 )
You can find our full roadmap [here](https://github.com/orgs/arc53/projects/2). Please don't hesitate to contribute or create issues, it helps us improve DocsGPT!

View File

@@ -42,6 +42,7 @@ class BaseAgent(ABC):
llm_handler=None,
tool_executor: Optional[ToolExecutor] = None,
backup_models: Optional[List[str]] = None,
model_user_id: Optional[str] = None,
):
self.endpoint = endpoint
self.llm_name = llm_name
@@ -52,10 +53,13 @@ class BaseAgent(ABC):
self.prompt = prompt
self.decoded_token = decoded_token or {}
self.user: str = self.decoded_token.get("sub")
# BYOM-resolution scope: owner for shared agents, caller for
# caller-owned BYOM, None for built-ins. Falls back to self.user
# for worker/legacy callers that don't thread model_user_id.
self.model_user_id = model_user_id
self.tools: List[Dict] = []
self.chat_history: List[Dict] = chat_history if chat_history is not None else []
# Dependency injection for LLM — fall back to creating if not provided
if llm is not None:
self.llm = llm
else:
@@ -67,8 +71,16 @@ class BaseAgent(ABC):
model_id=model_id,
agent_id=agent_id,
backup_models=backup_models,
model_user_id=model_user_id,
)
# For BYOM, registry id (UUID) differs from upstream model id
# (e.g. ``mistral-large-latest``). LLMCreator resolved this onto
# the LLM instance; cache it for subsequent gen calls.
self.upstream_model_id = (
getattr(self.llm, "model_id", None) or model_id
)
self.retrieved_docs = retrieved_docs or []
if llm_handler is not None:
@@ -306,7 +318,9 @@ class BaseAgent(ABC):
try:
current_tokens = self._calculate_current_context_tokens(messages)
self.current_token_count = current_tokens
context_limit = get_token_limit(self.model_id)
context_limit = get_token_limit(
self.model_id, user_id=self.model_user_id or self.user
)
threshold = int(context_limit * settings.COMPRESSION_THRESHOLD_PERCENTAGE)
if current_tokens >= threshold:
@@ -325,7 +339,9 @@ class BaseAgent(ABC):
current_tokens = self._calculate_current_context_tokens(messages)
self.current_token_count = current_tokens
context_limit = get_token_limit(self.model_id)
context_limit = get_token_limit(
self.model_id, user_id=self.model_user_id or self.user
)
percentage = (current_tokens / context_limit) * 100
if current_tokens >= context_limit:
@@ -387,7 +403,9 @@ class BaseAgent(ABC):
)
system_prompt = system_prompt + compression_context
context_limit = get_token_limit(self.model_id)
context_limit = get_token_limit(
self.model_id, user_id=self.model_user_id or self.user
)
system_tokens = num_tokens_from_string(system_prompt)
safety_buffer = int(context_limit * 0.1)
@@ -497,7 +515,10 @@ class BaseAgent(ABC):
def _llm_gen(self, messages: List[Dict], log_context: Optional[LogContext] = None):
self._validate_context_size(messages)
gen_kwargs = {"model": self.model_id, "messages": messages}
# Use the upstream id resolved by LLMCreator (see __init__).
# Built-in models: same as self.model_id. BYOM: the user's
# typed model name, not the internal UUID.
gen_kwargs = {"model": self.upstream_model_id, "messages": messages}
if self.attachments:
gen_kwargs["_usage_attachments"] = self.attachments

View File

@@ -312,7 +312,7 @@ class ResearchAgent(BaseAgent):
try:
response = self.llm.gen(
model=self.model_id,
model=self.upstream_model_id,
messages=messages,
tools=None,
response_format={"type": "json_object"},
@@ -390,7 +390,7 @@ class ResearchAgent(BaseAgent):
try:
response = self.llm.gen(
model=self.model_id,
model=self.upstream_model_id,
messages=messages,
tools=None,
response_format={"type": "json_object"},
@@ -506,7 +506,7 @@ class ResearchAgent(BaseAgent):
try:
response = self.llm.gen(
model=self.model_id,
model=self.upstream_model_id,
messages=messages,
tools=self.tools if self.tools else None,
)
@@ -537,7 +537,7 @@ class ResearchAgent(BaseAgent):
)
try:
response = self.llm.gen(
model=self.model_id, messages=messages, tools=None
model=self.upstream_model_id, messages=messages, tools=None
)
self._track_tokens(self._snapshot_llm_tokens())
text = self._extract_text(response)
@@ -664,7 +664,7 @@ class ResearchAgent(BaseAgent):
]
llm_response = self.llm.gen_stream(
model=self.model_id, messages=messages, tools=None
model=self.upstream_model_id, messages=messages, tools=None
)
if log_context:

View File

@@ -1,18 +1,107 @@
import logging
import uuid
from collections import Counter
from typing import Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple
from application.agents.tools.tool_action_parser import ToolActionParser
from application.agents.tools.tool_manager import ToolManager
from application.security.encryption import decrypt_credentials
from application.storage.db.base_repository import looks_like_uuid
from application.storage.db.repositories.agents import AgentsRepository
from application.storage.db.repositories.tool_call_attempts import (
ToolCallAttemptsRepository,
)
from application.storage.db.repositories.user_tools import UserToolsRepository
from application.storage.db.session import db_readonly
from application.storage.db.session import db_readonly, db_session
logger = logging.getLogger(__name__)
def _record_proposed(
call_id: str,
tool_name: str,
action_name: str,
arguments: Any,
*,
tool_id: Optional[str] = None,
) -> bool:
"""Insert a ``proposed`` row; swallow infra failures so tool calls
still run when the journal is unreachable. Returns True iff the row
is now journaled (newly created or already present).
"""
try:
with db_session() as conn:
inserted = ToolCallAttemptsRepository(conn).record_proposed(
call_id,
tool_name,
action_name,
arguments,
tool_id=tool_id if tool_id and looks_like_uuid(tool_id) else None,
)
if not inserted:
logger.warning(
"tool_call_attempts duplicate call_id=%s; existing row left in place",
call_id,
extra={"alert": "tool_call_id_collision", "call_id": call_id},
)
return True
except Exception:
logger.exception("tool_call_attempts proposed write failed for %s", call_id)
return False
def _mark_executed(
call_id: str,
result: Any,
*,
message_id: Optional[str] = None,
artifact_id: Optional[str] = None,
proposed_ok: bool = True,
tool_name: Optional[str] = None,
action_name: Optional[str] = None,
arguments: Any = None,
tool_id: Optional[str] = None,
) -> None:
"""Flip the row to ``executed``. If ``proposed_ok`` is False (the
proposed write failed earlier), upsert a fresh row in ``executed`` so
the reconciler can still see the attempt — without this, the side
effect would be invisible to the journal.
"""
try:
with db_session() as conn:
repo = ToolCallAttemptsRepository(conn)
if proposed_ok:
updated = repo.mark_executed(
call_id,
result,
message_id=message_id,
artifact_id=artifact_id,
)
if updated:
return
# Fallback synthesizes the row so the journal isn't lost.
repo.upsert_executed(
call_id,
tool_name=tool_name or "unknown",
action_name=action_name or "",
arguments=arguments if arguments is not None else {},
result=result,
tool_id=tool_id if tool_id and looks_like_uuid(tool_id) else None,
message_id=message_id,
artifact_id=artifact_id,
)
except Exception:
logger.exception("tool_call_attempts executed write failed for %s", call_id)
def _mark_failed(call_id: str, error: str) -> None:
try:
with db_session() as conn:
ToolCallAttemptsRepository(conn).mark_failed(call_id, error)
except Exception:
logger.exception("tool_call_attempts failed-write failed for %s", call_id)
class ToolExecutor:
"""Handles tool discovery, preparation, and execution.
@@ -31,6 +120,7 @@ class ToolExecutor:
self.tool_calls: List[Dict] = []
self._loaded_tools: Dict[str, object] = {}
self.conversation_id: Optional[str] = None
self.message_id: Optional[str] = None
self.client_tools: Optional[List[Dict]] = None
self._name_to_tool: Dict[str, Tuple[str, str]] = {}
self._tool_to_name: Dict[Tuple[str, str], str] = {}
@@ -274,7 +364,14 @@ class ToolExecutor:
if tool_id is None or action_name is None:
error_message = f"Error: Failed to parse LLM tool call. Tool name: {llm_name}"
logger.error(error_message)
logger.error(
"tool_call_parse_failed",
extra={
"llm_class_name": llm_class_name,
"llm_tool_name": llm_name,
"call_id": call_id,
},
)
tool_call_data = {
"tool_name": "unknown",
@@ -289,7 +386,15 @@ class ToolExecutor:
if tool_id not in tools_dict:
error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
logger.error(error_message)
logger.error(
"tool_id_not_found",
extra={
"tool_id": tool_id,
"llm_tool_name": llm_name,
"call_id": call_id,
"available_tool_count": len(tools_dict),
},
)
tool_call_data = {
"tool_name": "unknown",
@@ -308,9 +413,36 @@ class ToolExecutor:
"action_name": llm_name,
"arguments": call_args,
}
yield {"type": "tool_call", "data": {**tool_call_data, "status": "pending"}}
tool_data = tools_dict[tool_id]
# Journal first so the reconciler sees malformed calls and any
# subsequent ``_mark_failed`` actually updates a real row.
proposed_ok = _record_proposed(
call_id,
tool_data["name"],
action_name,
call_args if isinstance(call_args, dict) else {},
tool_id=tool_data.get("id"),
)
# Defensive guard: a non-dict ``call_args`` (e.g. malformed
# JSON on the resume path) would crash the param walk below
# with AttributeError on ``.items()``. Surface a clean error
# event and flip the journal row to ``failed`` instead of
# killing the stream.
if not isinstance(call_args, dict):
error_message = (
f"Tool call arguments must be a JSON object, got "
f"{type(call_args).__name__}."
)
tool_call_data["result"] = error_message
tool_call_data["arguments"] = {}
_mark_failed(call_id, error_message)
yield {
"type": "tool_call",
"data": {**tool_call_data, "status": "error"},
}
self.tool_calls.append(tool_call_data)
return error_message, call_id
yield {"type": "tool_call", "data": {**tool_call_data, "status": "pending"}}
action_data = (
tool_data["config"]["actions"][action_name]
if tool_data["name"] == "api_tool"
@@ -356,8 +488,17 @@ class ToolExecutor:
f"Failed to load tool '{tool_data.get('name')}' (tool_id key={tool_id}): "
"missing 'id' on tool row."
)
logger.error(error_message)
logger.error(
"tool_load_failed",
extra={
"tool_name": tool_data.get("name"),
"tool_id": tool_id,
"action_name": action_name,
"call_id": call_id,
},
)
tool_call_data["result"] = error_message
_mark_failed(call_id, error_message)
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
self.tool_calls.append(tool_call_data)
return error_message, call_id
@@ -367,14 +508,18 @@ class ToolExecutor:
if tool_data["name"] == "api_tool"
else parameters
)
if tool_data["name"] == "api_tool":
logger.debug(
f"Executing api: {action_name} with query_params: {query_params}, headers: {headers}, body: {body}"
)
result = tool.execute_action(action_name, **body)
else:
logger.debug(f"Executing tool: {action_name} with args: {call_args}")
result = tool.execute_action(action_name, **parameters)
try:
if tool_data["name"] == "api_tool":
logger.debug(
f"Executing api: {action_name} with query_params: {query_params}, headers: {headers}, body: {body}"
)
result = tool.execute_action(action_name, **body)
else:
logger.debug(f"Executing tool: {action_name} with args: {call_args}")
result = tool.execute_action(action_name, **parameters)
except Exception as exc:
_mark_failed(call_id, str(exc))
raise
get_artifact_id = (
getattr(tool, "get_artifact_id", None)
@@ -403,6 +548,22 @@ class ToolExecutor:
f"{result_full[:50]}..." if len(result_full) > 50 else result_full
)
# Tool side effect has run; flip the journal row so the
# message-finalize path can later confirm it. If the proposed
# write failed (DB outage), upsert a fresh row in ``executed`` so
# the reconciler still sees the side effect.
_mark_executed(
call_id,
result_full,
message_id=self.message_id,
artifact_id=artifact_id or None,
proposed_ok=proposed_ok,
tool_name=tool_data["name"],
action_name=action_name,
arguments=call_args,
tool_id=tool_data.get("id"),
)
stream_tool_call_data = {
key: value
for key, value in tool_call_data.items()
@@ -451,10 +612,12 @@ class ToolExecutor:
row_id = tool_data.get("id")
if not row_id:
logger.error(
"Tool data missing 'id' for tool name=%s (enumerate-key tool_id=%s); "
"skipping load to avoid binding a non-UUID downstream.",
tool_data.get("name"),
tool_id,
"tool_missing_row_id",
extra={
"tool_name": tool_data.get("name"),
"tool_id": tool_id,
"action_name": action_name,
},
)
return None
tool_config["tool_id"] = str(row_id)

View File

@@ -39,6 +39,7 @@ class InternalSearchTool(Tool):
chunks=int(self.config.get("chunks", 2)),
doc_token_limit=int(self.config.get("doc_token_limit", 50000)),
model_id=self.config.get("model_id", "docsgpt-local"),
model_user_id=self.config.get("model_user_id"),
user_api_key=self.config.get("user_api_key"),
agent_id=self.config.get("agent_id"),
llm_name=self.config.get("llm_name", settings.LLM_PROVIDER),
@@ -435,6 +436,7 @@ def build_internal_tool_config(
chunks: int = 2,
doc_token_limit: int = 50000,
model_id: str = "docsgpt-local",
model_user_id: Optional[str] = None,
user_api_key: Optional[str] = None,
agent_id: Optional[str] = None,
llm_name: str = None,
@@ -449,6 +451,7 @@ def build_internal_tool_config(
"chunks": chunks,
"doc_token_limit": doc_token_limit,
"model_id": model_id,
"model_user_id": model_user_id,
"user_api_key": user_api_key,
"agent_id": agent_id,
"llm_name": llm_name or settings.LLM_PROVIDER,

View File

@@ -177,3 +177,4 @@ class PostgresTool(Tool):
"order": 1,
},
}

View File

@@ -57,6 +57,29 @@ class ToolActionParser:
def _parse_google_llm(self, call):
try:
call_args = call.arguments
# Gemini's SDK natively returns ``args`` as a dict, but the
# resume path (``gen_continuation``) stringifies it for the
# assistant message. Coerce a JSON string back into a dict;
# fall back to an empty dict on malformed input so downstream
# ``call_args.items()`` doesn't crash the stream.
if isinstance(call_args, str):
try:
call_args = json.loads(call_args)
except (json.JSONDecodeError, TypeError):
logger.warning(
"Google call.arguments was not valid JSON; "
"falling back to empty args for %s",
getattr(call, "name", "<unknown>"),
)
call_args = {}
if not isinstance(call_args, dict):
logger.warning(
"Google call.arguments has unexpected type %s; "
"falling back to empty args for %s",
type(call_args).__name__,
getattr(call, "name", "<unknown>"),
)
call_args = {}
resolved = self._resolve_via_mapping(call.name)
if resolved:

View File

@@ -211,15 +211,26 @@ class WorkflowEngine:
node_config.json_schema, node.title
)
node_model_id = node_config.model_id or self.agent.model_id
# Inherit BYOM scope from parent agent so owner-stored BYOM
# resolves on shared workflows.
node_user_id = getattr(self.agent, "model_user_id", None) or (
self.agent.decoded_token.get("sub")
if isinstance(self.agent.decoded_token, dict)
else None
)
node_llm_name = (
node_config.llm_name
or get_provider_from_model_id(node_model_id or "")
or get_provider_from_model_id(
node_model_id or "", user_id=node_user_id
)
or self.agent.llm_name
)
node_api_key = get_api_key_for_provider(node_llm_name) or self.agent.api_key
if node_json_schema and node_model_id:
model_capabilities = get_model_capabilities(node_model_id)
model_capabilities = get_model_capabilities(
node_model_id, user_id=node_user_id
)
if model_capabilities and not model_capabilities.get(
"supports_structured_output", False
):
@@ -232,6 +243,7 @@ class WorkflowEngine:
"endpoint": self.agent.endpoint,
"llm_name": node_llm_name,
"model_id": node_model_id,
"model_user_id": getattr(self.agent, "model_user_id", None),
"api_key": node_api_key,
"tool_ids": node_config.tools,
"prompt": node_config.system_prompt,

View File

@@ -0,0 +1,65 @@
"""0003 user_custom_models — per-user OpenAI-compatible model registrations.
Revision ID: 0003_user_custom_models
Revises: 0002_app_metadata
"""
from typing import Sequence, Union
from alembic import op
revision: str = "0003_user_custom_models"
down_revision: Union[str, None] = "0002_app_metadata"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.execute(
"""
CREATE TABLE user_custom_models (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id TEXT NOT NULL,
upstream_model_id TEXT NOT NULL,
display_name TEXT NOT NULL,
description TEXT NOT NULL DEFAULT '',
base_url TEXT NOT NULL,
api_key_encrypted TEXT NOT NULL,
capabilities JSONB NOT NULL DEFAULT '{}'::jsonb,
enabled BOOLEAN NOT NULL DEFAULT true,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
"""
)
op.execute(
"CREATE INDEX user_custom_models_user_id_idx "
"ON user_custom_models (user_id);"
)
# Mirror the project-wide invariants set up in 0001_initial:
# * user_id FK with ON DELETE RESTRICT (deferrable),
# * ensure_user_exists() trigger so the parent users row autocreates,
# * set_updated_at() trigger.
op.execute(
"ALTER TABLE user_custom_models "
"ADD CONSTRAINT user_custom_models_user_id_fk "
"FOREIGN KEY (user_id) REFERENCES users(user_id) "
"ON DELETE RESTRICT DEFERRABLE INITIALLY IMMEDIATE;"
)
op.execute(
"CREATE TRIGGER user_custom_models_ensure_user "
"BEFORE INSERT OR UPDATE OF user_id ON user_custom_models "
"FOR EACH ROW EXECUTE FUNCTION ensure_user_exists();"
)
op.execute(
"CREATE TRIGGER user_custom_models_set_updated_at "
"BEFORE UPDATE ON user_custom_models "
"FOR EACH ROW WHEN (OLD.* IS DISTINCT FROM NEW.*) "
"EXECUTE FUNCTION set_updated_at();"
)
def downgrade() -> None:
op.execute("DROP TABLE IF EXISTS user_custom_models;")

View File

@@ -0,0 +1,217 @@
"""0004 durability foundation — idempotency, tool-call log, ingest checkpoint.
Adds ``task_dedup``, ``webhook_dedup``, ``tool_call_attempts``,
``ingest_chunk_progress``, and per-row status flags on
``conversation_messages`` and ``pending_tool_state``. Also adds
``token_usage.source`` and ``token_usage.request_id`` so per-channel
cost attribution (``agent_stream`` / ``title`` / ``compression`` /
``rag_condense`` / ``fallback``) is queryable and multi-call agent runs
can be DISTINCT-collapsed into a single user request for rate limiting.
Revision ID: 0004_durability_foundation
Revises: 0003_user_custom_models
"""
from typing import Sequence, Union
from alembic import op
revision: str = "0004_durability_foundation"
down_revision: Union[str, None] = "0003_user_custom_models"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# ------------------------------------------------------------------
# New tables
# ------------------------------------------------------------------
# ``attempt_count`` bounds the per-Celery-task idempotency wrapper's
# retry loop so a poison message can't run forever; default 0 means
# existing rows behave as if no attempts have run yet.
op.execute(
"""
CREATE TABLE task_dedup (
idempotency_key TEXT PRIMARY KEY,
task_name TEXT NOT NULL,
task_id TEXT NOT NULL,
result_json JSONB,
status TEXT NOT NULL
CHECK (status IN ('pending', 'completed', 'failed')),
attempt_count INT NOT NULL DEFAULT 0,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
"""
)
op.execute(
"""
CREATE TABLE webhook_dedup (
idempotency_key TEXT PRIMARY KEY,
agent_id UUID NOT NULL,
task_id TEXT NOT NULL,
response_json JSONB,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
"""
)
# FK on ``message_id`` uses ``ON DELETE SET NULL`` so the journal row
# survives parent-message deletion (compliance / cost-attribution).
op.execute(
"""
CREATE TABLE tool_call_attempts (
call_id TEXT PRIMARY KEY,
message_id UUID
REFERENCES conversation_messages (id)
ON DELETE SET NULL,
tool_id UUID,
tool_name TEXT NOT NULL,
action_name TEXT NOT NULL,
arguments JSONB NOT NULL,
result JSONB,
error TEXT,
status TEXT NOT NULL
CHECK (status IN (
'proposed', 'executed', 'confirmed',
'compensated', 'failed'
)),
attempted_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
"""
)
op.execute(
"""
CREATE TABLE ingest_chunk_progress (
source_id UUID PRIMARY KEY,
total_chunks INT NOT NULL,
embedded_chunks INT NOT NULL DEFAULT 0,
last_index INT NOT NULL DEFAULT -1,
last_updated TIMESTAMPTZ NOT NULL DEFAULT now()
);
"""
)
# ------------------------------------------------------------------
# Column additions on existing tables
# ------------------------------------------------------------------
# DEFAULT 'complete' backfills existing rows — they're already done.
op.execute(
"""
ALTER TABLE conversation_messages
ADD COLUMN status TEXT NOT NULL DEFAULT 'complete'
CHECK (status IN ('pending', 'streaming', 'complete', 'failed')),
ADD COLUMN request_id TEXT;
"""
)
op.execute(
"""
ALTER TABLE pending_tool_state
ADD COLUMN status TEXT NOT NULL DEFAULT 'pending'
CHECK (status IN ('pending', 'resuming')),
ADD COLUMN resumed_at TIMESTAMPTZ;
"""
)
# Default ``agent_stream`` backfills historical rows under the
# assumption they were written from the primary path — pre-fix the
# only path that wrote was the error branch reading agent.llm.
# ``request_id`` is the stream-scoped UUID stamped by the route on
# ``agent.llm`` so multi-tool agent runs (which produce N rows)
# collapse to one request via DISTINCT in ``count_in_range``.
# Side-channel sources (``title`` / ``compression`` / ``rag_condense``
# / ``fallback``) leave it NULL and are excluded from the request
# count by source filter.
op.execute(
"""
ALTER TABLE token_usage
ADD COLUMN source TEXT NOT NULL DEFAULT 'agent_stream',
ADD COLUMN request_id TEXT;
"""
)
# ------------------------------------------------------------------
# Indexes — partial where the predicate selects only non-terminal rows
# ------------------------------------------------------------------
op.execute(
"CREATE INDEX conversation_messages_pending_ts_idx "
"ON conversation_messages (timestamp) "
"WHERE status IN ('pending', 'streaming');"
)
op.execute(
"CREATE INDEX tool_call_attempts_pending_ts_idx "
"ON tool_call_attempts (attempted_at) "
"WHERE status IN ('proposed', 'executed');"
)
op.execute(
"CREATE INDEX tool_call_attempts_message_idx "
"ON tool_call_attempts (message_id) "
"WHERE message_id IS NOT NULL;"
)
op.execute(
"CREATE INDEX pending_tool_state_resuming_ts_idx "
"ON pending_tool_state (resumed_at) "
"WHERE status = 'resuming';"
)
op.execute(
"CREATE INDEX webhook_dedup_agent_idx "
"ON webhook_dedup (agent_id);"
)
op.execute(
"CREATE INDEX task_dedup_pending_attempts_idx "
"ON task_dedup (attempt_count) WHERE status = 'pending';"
)
# Cost-attribution dashboards filter ``token_usage`` by
# ``(timestamp, source)``; index the same shape so they stay cheap.
op.execute(
"CREATE INDEX token_usage_source_ts_idx "
"ON token_usage (source, timestamp);"
)
# Partial index — only rows with a stamped request_id participate
# in the DISTINCT count. NULL rows fall through to the COUNT(*)
# branch in the repository query.
op.execute(
"CREATE INDEX token_usage_request_id_idx "
"ON token_usage (request_id) "
"WHERE request_id IS NOT NULL;"
)
op.execute(
"CREATE TRIGGER tool_call_attempts_set_updated_at "
"BEFORE UPDATE ON tool_call_attempts "
"FOR EACH ROW WHEN (OLD.* IS DISTINCT FROM NEW.*) "
"EXECUTE FUNCTION set_updated_at();"
)
def downgrade() -> None:
# CASCADE so the downgrade stays safe if later migrations FK into these.
for table in (
"ingest_chunk_progress",
"tool_call_attempts",
"webhook_dedup",
"task_dedup",
):
op.execute(f"DROP TABLE IF EXISTS {table} CASCADE;")
op.execute(
"ALTER TABLE conversation_messages "
"DROP COLUMN IF EXISTS request_id, "
"DROP COLUMN IF EXISTS status;"
)
op.execute(
"ALTER TABLE pending_tool_state "
"DROP COLUMN IF EXISTS resumed_at, "
"DROP COLUMN IF EXISTS status;"
)
op.execute("DROP INDEX IF EXISTS token_usage_request_id_idx;")
op.execute("DROP INDEX IF EXISTS token_usage_source_ts_idx;")
op.execute(
"ALTER TABLE token_usage "
"DROP COLUMN IF EXISTS request_id, "
"DROP COLUMN IF EXISTS source;"
)

View File

@@ -0,0 +1,44 @@
"""0005 ingest_chunk_progress.attempt_id — per-attempt resume scoping.
Without this column, a completed checkpoint row poisoned every later
embed call on the same ``source_id``: a sync after an upload finished
read the upload's terminal ``last_index`` and either embedded zero
chunks (if new ``total_docs <= last_index + 1``) or stacked new chunks
on top of the old vectors (if ``total_docs > last_index + 1``).
``attempt_id`` is stamped from ``self.request.id`` (Celery's stable
task id, which survives ``acks_late`` retries of the same task but
differs across separate task invocations). The repository's
``init_progress`` upsert resets ``last_index`` / ``embedded_chunks``
when the incoming ``attempt_id`` differs from the stored one — so a
fresh sync starts from chunk 0 while a retry of the same task resumes
from the last checkpointed chunk.
Revision ID: 0005_ingest_attempt_id
Revises: 0004_durability_foundation
"""
from typing import Sequence, Union
from alembic import op
revision: str = "0005_ingest_attempt_id"
down_revision: Union[str, None] = "0004_durability_foundation"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.execute(
"""
ALTER TABLE ingest_chunk_progress
ADD COLUMN attempt_id TEXT;
"""
)
def downgrade() -> None:
op.execute(
"ALTER TABLE ingest_chunk_progress DROP COLUMN IF EXISTS attempt_id;"
)

View File

@@ -0,0 +1,57 @@
"""0006 task_dedup lease columns — running-lease for in-flight tasks.
Without these, ``with_idempotency`` only short-circuits *completed*
rows. A late-ack redelivery (Redis ``visibility_timeout`` exceeded by a
long ingest, or a hung-but-alive worker) hands the same message to a
second worker; ``_claim_or_bump`` only bumped the attempt counter and
both workers ran the task body in parallel — duplicate vector writes,
duplicate token spend, duplicate webhook side effects.
``lease_owner_id`` + ``lease_expires_at`` turn that into an atomic
compare-and-swap. The wrapper claims a lease at entry, refreshes it via
a 30 s heartbeat thread, and finalises (which makes the lease moot via
``status='completed'``). A second worker hitting the same key sees a
fresh lease and ``self.retry(countdown=LEASE_TTL)``s instead of running.
A crashed worker's lease expires after ``LEASE_TTL`` seconds and the
next retry can claim it.
Revision ID: 0006_idempotency_lease
Revises: 0005_ingest_attempt_id
"""
from typing import Sequence, Union
from alembic import op
revision: str = "0006_idempotency_lease"
down_revision: Union[str, None] = "0005_ingest_attempt_id"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.execute(
"""
ALTER TABLE task_dedup
ADD COLUMN lease_owner_id TEXT,
ADD COLUMN lease_expires_at TIMESTAMPTZ;
"""
)
# Reconciler's stuck-pending sweep filters by
# ``(status='pending', lease_expires_at < now() - 60s, attempt_count >= 5)``.
# Partial index keeps the scan small even under heavy task throughput.
op.execute(
"CREATE INDEX task_dedup_pending_lease_idx "
"ON task_dedup (lease_expires_at) "
"WHERE status = 'pending';"
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS task_dedup_pending_lease_idx;")
op.execute(
"ALTER TABLE task_dedup "
"DROP COLUMN IF EXISTS lease_expires_at, "
"DROP COLUMN IF EXISTS lease_owner_id;"
)

View File

@@ -102,6 +102,8 @@ class AnswerResource(Resource, BaseAnswerResource):
"tools_dict": tools_dict,
"pending_tool_calls": pending_tool_calls,
"tool_actions": tool_actions,
"reserved_message_id": processor.reserved_message_id,
"request_id": processor.request_id,
},
)
else:

View File

@@ -1,13 +1,18 @@
import datetime
import json
import logging
import time
import uuid
from typing import Any, Dict, Generator, List, Optional
from flask import jsonify, make_response, Response
from flask_restx import Namespace
from application.api.answer.services.continuation_service import ContinuationService
from application.api.answer.services.conversation_service import ConversationService
from application.api.answer.services.conversation_service import (
ConversationService,
TERMINATED_RESPONSE_PLACEHOLDER,
)
from application.core.model_utils import (
get_api_key_for_provider,
get_default_model_id,
@@ -177,6 +182,7 @@ class BaseAnswerResource:
is_shared_usage: bool = False,
shared_token: Optional[str] = None,
model_id: Optional[str] = None,
model_user_id: Optional[str] = None,
_continuation: Optional[Dict] = None,
) -> Generator[str, None, None]:
"""
@@ -202,13 +208,118 @@ class BaseAnswerResource:
Yields:
Server-sent event strings
"""
response_full, thought, source_log_docs, tool_calls = "", "", [], []
is_structured = False
schema_info = None
structured_chunks = []
query_metadata: Dict[str, Any] = {}
paused = False
# One id shared across the WAL row, primary LLM (token_usage
# attribution), the SSE event, and resumed continuations.
request_id = (
_continuation.get("request_id") if _continuation else None
) or str(uuid.uuid4())
# Reserve the placeholder row before the LLM call so a crash
# mid-stream still leaves the question queryable. Continuations
# reuse the original placeholder.
reserved_message_id: Optional[str] = None
wal_eligible = should_save_conversation and not _continuation
if wal_eligible:
try:
reservation = self.conversation_service.save_user_question(
conversation_id=conversation_id,
question=question,
decoded_token=decoded_token,
attachment_ids=attachment_ids,
api_key=user_api_key,
agent_id=agent_id,
is_shared_usage=is_shared_usage,
shared_token=shared_token,
model_id=model_id or self.default_model_id,
request_id=request_id,
index=index,
)
conversation_id = reservation["conversation_id"]
reserved_message_id = reservation["message_id"]
except Exception as e:
logger.error(
f"Failed to reserve message row before stream: {e}",
exc_info=True,
)
elif _continuation and _continuation.get("reserved_message_id"):
reserved_message_id = _continuation["reserved_message_id"]
primary_llm = getattr(agent, "llm", None)
if primary_llm is not None:
primary_llm._request_id = request_id
# Flipped to ``streaming`` on first chunk; reconciler uses this
# to tell "never started" from "in flight".
streaming_marked = False
# Heartbeat goes into ``metadata.last_heartbeat_at`` (not
# ``updated_at``, which reconciler-side writes share) and uses
# ``time.monotonic`` so a blocked event loop can't fake fresh.
STREAM_HEARTBEAT_INTERVAL = 60
last_heartbeat_at = time.monotonic()
def _mark_streaming_once() -> None:
nonlocal streaming_marked, last_heartbeat_at
if streaming_marked or not reserved_message_id:
return
try:
self.conversation_service.update_message_status(
reserved_message_id, "streaming",
)
except Exception:
logger.exception(
"update_message_status streaming failed for %s",
reserved_message_id,
)
streaming_marked = True
last_heartbeat_at = time.monotonic()
def _heartbeat_streaming() -> None:
nonlocal last_heartbeat_at
if not reserved_message_id or not streaming_marked:
return
now_mono = time.monotonic()
if now_mono - last_heartbeat_at < STREAM_HEARTBEAT_INTERVAL:
return
try:
self.conversation_service.heartbeat_message(
reserved_message_id,
)
except Exception:
logger.exception(
"stream heartbeat update failed for %s",
reserved_message_id,
)
last_heartbeat_at = now_mono
# Correlates tool_call_attempts rows with this message.
if reserved_message_id and getattr(agent, "tool_executor", None):
try:
agent.tool_executor.message_id = reserved_message_id
except Exception:
pass
try:
response_full, thought, source_log_docs, tool_calls = "", "", [], []
is_structured = False
schema_info = None
structured_chunks = []
query_metadata = {}
paused = False
# Surface the placeholder id before any LLM tokens so a
# mid-handshake disconnect still has a row to tail-poll.
if reserved_message_id:
early_event = json.dumps(
{
"type": "message_id",
"message_id": reserved_message_id,
"conversation_id": (
str(conversation_id) if conversation_id else None
),
"request_id": request_id,
}
)
yield f"data: {early_event}\n\n"
if _continuation:
gen_iter = agent.gen_continuation(
@@ -221,9 +332,13 @@ class BaseAnswerResource:
gen_iter = agent.gen(query=question)
for line in gen_iter:
# Cheap closure check that only hits the DB when the
# heartbeat interval has elapsed.
_heartbeat_streaming()
if "metadata" in line:
query_metadata.update(line["metadata"])
elif "answer" in line:
_mark_streaming_once()
response_full += str(line["answer"])
if line.get("structured"):
is_structured = True
@@ -233,6 +348,7 @@ class BaseAnswerResource:
data = json.dumps({"type": "answer", "answer": line["answer"]})
yield f"data: {data}\n\n"
elif "sources" in line:
_mark_streaming_once()
truncated_sources = []
source_log_docs = line["sources"]
for source in line["sources"]:
@@ -285,12 +401,19 @@ class BaseAnswerResource:
if paused:
continuation = getattr(agent, "_pending_continuation", None)
if continuation:
# Ensure we have a conversation_id — create a partial
# conversation if this is the first turn.
# First-turn pause needs a conversation row to attach to.
if not conversation_id and should_save_conversation:
try:
provider = (
get_provider_from_model_id(model_id)
get_provider_from_model_id(
model_id,
user_id=model_user_id
or (
decoded_token.get("sub")
if decoded_token
else None
),
)
if model_id
else settings.LLM_PROVIDER
)
@@ -304,6 +427,7 @@ class BaseAnswerResource:
decoded_token=decoded_token,
model_id=model_id,
agent_id=agent_id,
model_user_id=model_user_id,
)
conversation_id = (
self.conversation_service.save_conversation(
@@ -340,6 +464,9 @@ class BaseAnswerResource:
tool_schemas=getattr(agent, "tools", []),
agent_config={
"model_id": model_id or self.default_model_id,
# BYOM scope; without it resume falls
# back to caller's layer.
"model_user_id": model_user_id,
"llm_name": getattr(agent, "llm_name", settings.LLM_PROVIDER),
"api_key": getattr(agent, "api_key", None),
"user_api_key": user_api_key,
@@ -348,6 +475,11 @@ class BaseAnswerResource:
"prompt": getattr(agent, "prompt", ""),
"json_schema": getattr(agent, "json_schema", None),
"retriever_config": getattr(agent, "retriever_config", None),
# Reused on resume so the same WAL row
# is finalised and request_id stays
# consistent across token_usage rows.
"reserved_message_id": reserved_message_id,
"request_id": request_id,
},
client_tools=getattr(
agent.tool_executor, "client_tools", None
@@ -370,8 +502,13 @@ class BaseAnswerResource:
if isNoneDoc:
for doc in source_log_docs:
doc["source"] = "None"
# Model-owner scope so title-gen uses owner's BYOM key.
provider = (
get_provider_from_model_id(model_id)
get_provider_from_model_id(
model_id,
user_id=model_user_id
or (decoded_token.get("sub") if decoded_token else None),
)
if model_id
else settings.LLM_PROVIDER
)
@@ -384,27 +521,51 @@ class BaseAnswerResource:
decoded_token=decoded_token,
model_id=model_id,
agent_id=agent_id,
model_user_id=model_user_id,
)
# Title-gen only; agent stream tokens live on ``agent.llm``.
llm._token_usage_source = "title"
if should_save_conversation:
conversation_id = self.conversation_service.save_conversation(
conversation_id,
question,
response_full,
thought,
source_log_docs,
tool_calls,
llm,
model_id or self.default_model_id,
decoded_token,
index=index,
api_key=user_api_key,
agent_id=agent_id,
is_shared_usage=is_shared_usage,
shared_token=shared_token,
attachment_ids=attachment_ids,
metadata=query_metadata if query_metadata else None,
)
if reserved_message_id is not None:
self.conversation_service.finalize_message(
reserved_message_id,
response_full,
thought=thought,
sources=source_log_docs,
tool_calls=tool_calls,
model_id=model_id or self.default_model_id,
metadata=query_metadata if query_metadata else None,
status="complete",
title_inputs={
"llm": llm,
"question": question,
"response": response_full,
"model_id": model_id or self.default_model_id,
"fallback_name": (
question[:50] if question else "New Conversation"
),
},
)
else:
conversation_id = self.conversation_service.save_conversation(
conversation_id,
question,
response_full,
thought,
source_log_docs,
tool_calls,
llm,
model_id or self.default_model_id,
decoded_token,
index=index,
api_key=user_api_key,
agent_id=agent_id,
is_shared_usage=is_shared_usage,
shared_token=shared_token,
attachment_ids=attachment_ids,
metadata=query_metadata if query_metadata else None,
)
# Persist compression metadata/summary if it exists and wasn't saved mid-execution
compression_meta = getattr(agent, "compression_metadata", None)
compression_saved = getattr(agent, "compression_saved", False)
@@ -427,6 +588,21 @@ class BaseAnswerResource:
)
else:
conversation_id = None
# Resume finished cleanly; drop the continuation row.
# Crash-paths leave it ``resuming`` for the janitor to revert.
if _continuation and conversation_id:
try:
cont_service = ContinuationService()
cont_service.delete_state(
str(conversation_id),
decoded_token.get("sub", "local"),
)
except Exception as e:
logger.error(
f"Failed to delete continuation state on resume "
f"completion: {e}",
exc_info=True,
)
id_data = {"type": "id", "id": str(conversation_id)}
data = json.dumps(id_data)
yield f"data: {data}\n\n"
@@ -481,31 +657,73 @@ class BaseAnswerResource:
if isNoneDoc:
for doc in source_log_docs:
doc["source"] = "None"
# Resolve under model-owner scope so shared-agent
# title-gen uses owner BYOM, not deployment default.
provider = (
get_provider_from_model_id(
model_id,
user_id=model_user_id
or (
decoded_token.get("sub")
if decoded_token
else None
),
)
if model_id
else settings.LLM_PROVIDER
)
sys_api_key = get_api_key_for_provider(
provider or settings.LLM_PROVIDER
)
llm = LLMCreator.create_llm(
settings.LLM_PROVIDER,
api_key=settings.API_KEY,
provider or settings.LLM_PROVIDER,
api_key=sys_api_key,
user_api_key=user_api_key,
decoded_token=decoded_token,
model_id=model_id,
agent_id=agent_id,
model_user_id=model_user_id,
)
self.conversation_service.save_conversation(
conversation_id,
question,
response_full,
thought,
source_log_docs,
tool_calls,
llm,
model_id or self.default_model_id,
decoded_token,
index=index,
api_key=user_api_key,
agent_id=agent_id,
is_shared_usage=is_shared_usage,
shared_token=shared_token,
attachment_ids=attachment_ids,
metadata=query_metadata if query_metadata else None,
)
llm._token_usage_source = "title"
if reserved_message_id is not None:
self.conversation_service.finalize_message(
reserved_message_id,
response_full,
thought=thought,
sources=source_log_docs,
tool_calls=tool_calls,
model_id=model_id or self.default_model_id,
metadata=query_metadata if query_metadata else None,
status="complete",
title_inputs={
"llm": llm,
"question": question,
"response": response_full,
"model_id": model_id or self.default_model_id,
"fallback_name": (
question[:50] if question else "New Conversation"
),
},
)
else:
self.conversation_service.save_conversation(
conversation_id,
question,
response_full,
thought,
source_log_docs,
tool_calls,
llm,
model_id or self.default_model_id,
decoded_token,
index=index,
api_key=user_api_key,
agent_id=agent_id,
is_shared_usage=is_shared_usage,
shared_token=shared_token,
attachment_ids=attachment_ids,
metadata=query_metadata if query_metadata else None,
)
compression_meta = getattr(agent, "compression_metadata", None)
compression_saved = getattr(agent, "compression_saved", False)
if conversation_id and compression_meta and not compression_saved:
@@ -532,6 +750,24 @@ class BaseAnswerResource:
raise
except Exception as e:
logger.error(f"Error in stream: {str(e)}", exc_info=True)
if reserved_message_id is not None:
try:
self.conversation_service.finalize_message(
reserved_message_id,
response_full or TERMINATED_RESPONSE_PLACEHOLDER,
thought=thought,
sources=source_log_docs,
tool_calls=tool_calls,
model_id=model_id or self.default_model_id,
metadata=query_metadata if query_metadata else None,
status="failed",
error=e,
)
except Exception as fin_err:
logger.error(
f"Failed to finalize errored message: {fin_err}",
exc_info=True,
)
data = json.dumps(
{
"type": "error",

View File

@@ -109,11 +109,14 @@ class StreamResource(Resource, BaseAnswerResource):
decoded_token=processor.decoded_token,
agent_id=processor.agent_id,
model_id=processor.model_id,
model_user_id=processor.model_user_id,
_continuation={
"messages": messages,
"tools_dict": tools_dict,
"pending_tool_calls": pending_tool_calls,
"tool_actions": tool_actions,
"reserved_message_id": processor.reserved_message_id,
"request_id": processor.request_id,
},
),
mimetype="text/event-stream",
@@ -145,6 +148,7 @@ class StreamResource(Resource, BaseAnswerResource):
is_shared_usage=processor.is_shared_usage,
shared_token=processor.shared_token,
model_id=processor.model_id,
model_user_id=processor.model_user_id,
),
mimetype="text/event-stream",
)

View File

@@ -49,6 +49,7 @@ class CompressionOrchestrator:
model_id: str,
decoded_token: Dict[str, Any],
current_query_tokens: int = 500,
model_user_id: Optional[str] = None,
) -> CompressionResult:
"""
Check if compression is needed and perform it if so.
@@ -57,16 +58,18 @@ class CompressionOrchestrator:
Args:
conversation_id: Conversation ID
user_id: User ID
user_id: Caller's user id — used for conversation access checks
model_id: Model being used for conversation
decoded_token: User's decoded JWT token
current_query_tokens: Estimated tokens for current query
model_user_id: BYOM-resolution scope (model owner); defaults
to ``user_id`` for built-in / caller-owned models.
Returns:
CompressionResult with summary and recent queries
"""
try:
# Load conversation
# Conversation row is owned by the caller, not the model owner.
conversation = self.conversation_service.get_conversation(
conversation_id, user_id
)
@@ -77,9 +80,14 @@ class CompressionOrchestrator:
)
return CompressionResult.failure("Conversation not found")
# Check if compression is needed
# Use model-owner scope so per-user BYOM context windows
# (e.g. 8k) compute the threshold against the right limit.
registry_user_id = model_user_id or user_id
if not self.threshold_checker.should_compress(
conversation, model_id, current_query_tokens
conversation,
model_id,
current_query_tokens,
user_id=registry_user_id,
):
# No compression needed, return full history
queries = conversation.get("queries", [])
@@ -87,7 +95,12 @@ class CompressionOrchestrator:
# Perform compression
return self._perform_compression(
conversation_id, conversation, model_id, decoded_token
conversation_id,
conversation,
model_id,
decoded_token,
user_id=user_id,
model_user_id=model_user_id,
)
except Exception as e:
@@ -102,6 +115,8 @@ class CompressionOrchestrator:
conversation: Dict[str, Any],
model_id: str,
decoded_token: Dict[str, Any],
user_id: Optional[str] = None,
model_user_id: Optional[str] = None,
) -> CompressionResult:
"""
Perform the actual compression operation.
@@ -111,6 +126,8 @@ class CompressionOrchestrator:
conversation: Conversation document
model_id: Model ID for conversation
decoded_token: User token
user_id: Caller's id (for conversation reload after compression)
model_user_id: BYOM-resolution scope (model owner)
Returns:
CompressionResult
@@ -123,11 +140,17 @@ class CompressionOrchestrator:
else model_id
)
# Get provider and API key for compression model
provider = get_provider_from_model_id(compression_model)
# Use model-owner scope so provider/api_key resolves to the
# owner's BYOM record (shared-agent dispatch).
caller_user_id = user_id
if caller_user_id is None and isinstance(decoded_token, dict):
caller_user_id = decoded_token.get("sub")
registry_user_id = model_user_id or caller_user_id
provider = get_provider_from_model_id(
compression_model, user_id=registry_user_id
)
api_key = get_api_key_for_provider(provider)
# Create compression LLM
compression_llm = LLMCreator.create_llm(
provider,
api_key=api_key,
@@ -135,7 +158,11 @@ class CompressionOrchestrator:
decoded_token=decoded_token,
model_id=compression_model,
agent_id=conversation.get("agent_id"),
model_user_id=registry_user_id,
)
# Side-channel LLM tag — distinguishes compression rows
# from primary stream rows for cost-attribution dashboards.
compression_llm._token_usage_source = "compression"
# Create compression service with DB update capability
compression_service = CompressionService(
@@ -167,9 +194,12 @@ class CompressionOrchestrator:
f"saved {metadata.original_token_count - metadata.compressed_token_count} tokens"
)
# Reload conversation with updated metadata
# Reload under caller (conversation is owned by caller).
reload_user_id = caller_user_id
if reload_user_id is None and isinstance(decoded_token, dict):
reload_user_id = decoded_token.get("sub")
conversation = self.conversation_service.get_conversation(
conversation_id, user_id=decoded_token.get("sub")
conversation_id, user_id=reload_user_id
)
# Get compressed context
@@ -192,16 +222,21 @@ class CompressionOrchestrator:
model_id: str,
decoded_token: Dict[str, Any],
current_conversation: Optional[Dict[str, Any]] = None,
model_user_id: Optional[str] = None,
) -> CompressionResult:
"""
Perform compression during tool execution.
Args:
conversation_id: Conversation ID
user_id: User ID
user_id: Caller's user id — used for conversation access checks
model_id: Model ID
decoded_token: User token
current_conversation: Pre-loaded conversation (optional)
model_user_id: BYOM-resolution scope (model owner). For
shared-agent dispatch this is the agent owner; defaults
to ``user_id`` so built-in / caller-owned models are
unaffected.
Returns:
CompressionResult
@@ -223,7 +258,12 @@ class CompressionOrchestrator:
# Perform compression
return self._perform_compression(
conversation_id, conversation, model_id, decoded_token
conversation_id,
conversation,
model_id,
decoded_token,
user_id=user_id,
model_user_id=model_user_id,
)
except Exception as e:

View File

@@ -106,8 +106,13 @@ class CompressionService:
f"using model {self.model_id}"
)
# See note in conversation_service.py: ``self.model_id`` is
# the registry id (UUID for BYOM); the LLM's own model_id is
# what the provider's API actually expects.
response = self.llm.gen(
model=self.model_id, messages=messages, max_tokens=4000
model=getattr(self.llm, "model_id", None) or self.model_id,
messages=messages,
max_tokens=4000,
)
# Extract summary from response

View File

@@ -30,6 +30,7 @@ class CompressionThresholdChecker:
conversation: Dict[str, Any],
model_id: str,
current_query_tokens: int = 500,
user_id: str | None = None,
) -> bool:
"""
Determine if compression is needed.
@@ -38,6 +39,8 @@ class CompressionThresholdChecker:
conversation: Full conversation document
model_id: Target model for this request
current_query_tokens: Estimated tokens for current query
user_id: Owner — needed so per-user BYOM custom-model UUIDs
resolve when looking up the context window.
Returns:
True if tokens >= threshold% of context window
@@ -48,7 +51,7 @@ class CompressionThresholdChecker:
total_tokens += current_query_tokens
# Get context window limit for model
context_limit = get_token_limit(model_id)
context_limit = get_token_limit(model_id, user_id=user_id)
# Calculate threshold
threshold = int(context_limit * self.threshold_percentage)
@@ -73,20 +76,24 @@ class CompressionThresholdChecker:
logger.error(f"Error checking compression need: {str(e)}", exc_info=True)
return False
def check_message_tokens(self, messages: list, model_id: str) -> bool:
def check_message_tokens(
self, messages: list, model_id: str, user_id: str | None = None
) -> bool:
"""
Check if message list exceeds threshold.
Args:
messages: List of message dicts
model_id: Target model
user_id: Owner — needed so per-user BYOM custom-model UUIDs
resolve when looking up the context window.
Returns:
True if at or above threshold
"""
try:
current_tokens = TokenCounter.count_message_tokens(messages)
context_limit = get_token_limit(model_id)
context_limit = get_token_limit(model_id, user_id=user_id)
threshold = int(context_limit * self.threshold_percentage)
if current_tokens >= threshold:

View File

@@ -12,6 +12,12 @@ logger = logging.getLogger(__name__)
class TokenCounter:
"""Centralized token counting for conversations and messages."""
# Per-image token estimate. Provider tokenizers vary widely
# (Gemini ~258, GPT-4o 85-1500, Claude ~1500) and the actual cost
# depends on resolution/detail we can't see here. Errs slightly high
# so the threshold check stays conservative.
_IMAGE_PART_TOKEN_ESTIMATE = 1500
@staticmethod
def count_message_tokens(messages: List[Dict]) -> int:
"""
@@ -29,12 +35,36 @@ class TokenCounter:
if isinstance(content, str):
total_tokens += num_tokens_from_string(content)
elif isinstance(content, list):
# Handle structured content (tool calls, etc.)
# Handle structured content (tool calls, image parts, etc.)
for item in content:
if isinstance(item, dict):
total_tokens += num_tokens_from_string(str(item))
total_tokens += TokenCounter._count_content_part(item)
return total_tokens
@staticmethod
def _count_content_part(item: Dict) -> int:
# Image/file attachments are billed by the provider per image,
# not proportional to the inline bytes/base64 string.
# ``str(item)`` on a 1MB image inflates the count by ~10000x,
# which trips spurious compression and overflows downstream
# input limits.
item_type = item.get("type")
if "files" in item:
files = item.get("files")
count = len(files) if isinstance(files, list) and files else 1
return TokenCounter._IMAGE_PART_TOKEN_ESTIMATE * count
if "image_url" in item or item_type in {
"image",
"image_url",
"input_image",
"file",
}:
return TokenCounter._IMAGE_PART_TOKEN_ESTIMATE
return num_tokens_from_string(str(item))
@staticmethod
def count_query_tokens(
queries: List[Dict[str, Any]], include_tool_calls: bool = True

View File

@@ -7,13 +7,13 @@ resume later by sending tool_actions.
import logging
from typing import Any, Dict, List, Optional
from uuid import UUID
from application.storage.db.base_repository import looks_like_uuid
from application.storage.db.repositories.conversations import ConversationsRepository
from application.storage.db.repositories.pending_tool_state import (
PendingToolStateRepository,
)
from application.storage.db.serialization import coerce_pg_native as _make_serializable
from application.storage.db.session import db_readonly, db_session
logger = logging.getLogger(__name__)
@@ -21,23 +21,9 @@ logger = logging.getLogger(__name__)
# TTL for pending states — auto-cleaned after this period
PENDING_STATE_TTL_SECONDS = 30 * 60 # 30 minutes
def _make_serializable(obj: Any) -> Any:
"""Recursively coerce non-JSON values into JSON-safe forms.
Handles ``uuid.UUID`` (from PG columns), ``bytes``, and recurses into
dicts/lists. Post-Mongo-cutover the ObjectId branch is gone — none of
our writers produce them anymore.
"""
if isinstance(obj, UUID):
return str(obj)
if isinstance(obj, dict):
return {str(k): _make_serializable(v) for k, v in obj.items()}
if isinstance(obj, list):
return [_make_serializable(v) for v in obj]
if isinstance(obj, bytes):
return obj.decode("utf-8", errors="replace")
return obj
# Re-export so the existing tests at tests/api/answer/services/test_continuation_service_pg.py
# can keep importing ``_make_serializable`` from here.
__all__ = ["_make_serializable", "ContinuationService", "PENDING_STATE_TTL_SECONDS"]
class ContinuationService:
@@ -155,3 +141,23 @@ class ContinuationService:
f"Deleted continuation state for conversation {conversation_id}"
)
return deleted
def mark_resuming(self, conversation_id: str, user: str) -> bool:
"""Flip the pending row to ``resuming`` so a crashed resume can be retried."""
with db_session() as conn:
conv = ConversationsRepository(conn).get_by_legacy_id(conversation_id)
if conv is not None:
pg_conv_id = conv["id"]
elif looks_like_uuid(conversation_id):
pg_conv_id = conversation_id
else:
return False
flipped = PendingToolStateRepository(conn).mark_resuming(
pg_conv_id, user
)
if flipped:
logger.info(
f"Marked continuation state as resuming for conversation "
f"{conversation_id}"
)
return flipped

View File

@@ -6,6 +6,7 @@ than held for the duration of a stream.
"""
import logging
import uuid
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional
@@ -21,6 +22,12 @@ from application.storage.db.session import db_readonly, db_session
logger = logging.getLogger(__name__)
# Shown to the user if the worker dies mid-stream and the response is never finalised.
TERMINATED_RESPONSE_PLACEHOLDER = (
"Response was terminated prior to completion, try regenerating."
)
class ConversationService:
def get_conversation(
self, conversation_id: str, user_id: str
@@ -136,8 +143,14 @@ class ConversationService:
},
]
# ``model_id`` here is the registry id (a UUID for BYOM
# records). The LLM's own ``model_id`` is the upstream name
# LLMCreator resolved at construction time — that's what
# the provider's API expects. Built-ins are unaffected.
completion = llm.gen(
model=model_id, messages=messages_summary, max_tokens=500
model=getattr(llm, "model_id", None) or model_id,
messages=messages_summary,
max_tokens=500,
)
if not completion or not completion.strip():
@@ -173,6 +186,236 @@ class ConversationService:
repo.append_message(conv_pg_id, append_payload)
return conv_pg_id
def save_user_question(
self,
conversation_id: Optional[str],
question: str,
decoded_token: Dict[str, Any],
*,
attachment_ids: Optional[List[str]] = None,
api_key: Optional[str] = None,
agent_id: Optional[str] = None,
is_shared_usage: bool = False,
shared_token: Optional[str] = None,
model_id: Optional[str] = None,
request_id: Optional[str] = None,
status: str = "pending",
index: Optional[int] = None,
) -> Dict[str, str]:
"""Reserve the placeholder message row before the LLM call.
``index`` triggers regenerate semantics: messages at
``position >= index`` are truncated so the new placeholder
lands at ``position = index`` rather than appending.
Returns ``{"conversation_id", "message_id", "request_id"}``.
"""
if decoded_token is None:
raise ValueError("Invalid or missing authentication token")
user_id = decoded_token.get("sub")
if not user_id:
raise ValueError("User ID not found in token")
request_id = request_id or str(uuid.uuid4())
resolved_api_key: Optional[str] = None
resolved_agent_id: Optional[str] = None
if api_key and not conversation_id:
with db_readonly() as conn:
agent = AgentsRepository(conn).find_by_key(api_key)
if agent:
resolved_api_key = agent.get("key")
if agent_id:
resolved_agent_id = agent_id
with db_session() as conn:
repo = ConversationsRepository(conn)
if conversation_id:
conv = repo.get_any(conversation_id, user_id)
if conv is None:
raise ValueError("Conversation not found or unauthorized")
conv_pg_id = str(conv["id"])
# Regenerate / edit-prior-question: drop the message at
# ``index`` and everything after it so the new
# ``reserve_message`` lands at ``position=index`` rather
# than appending at the end of the conversation.
if isinstance(index, int) and index >= 0:
repo.truncate_after(conv_pg_id, keep_up_to=index - 1)
else:
fallback_name = (question[:50] if question else "New Conversation")
conv = repo.create(
user_id,
fallback_name,
agent_id=resolved_agent_id,
api_key=resolved_api_key,
is_shared_usage=bool(resolved_agent_id and is_shared_usage),
shared_token=(
shared_token
if (resolved_agent_id and is_shared_usage)
else None
),
)
conv_pg_id = str(conv["id"])
row = repo.reserve_message(
conv_pg_id,
prompt=question,
placeholder_response=TERMINATED_RESPONSE_PLACEHOLDER,
request_id=request_id,
status=status,
attachments=attachment_ids,
model_id=model_id,
)
message_id = str(row["id"])
return {
"conversation_id": conv_pg_id,
"message_id": message_id,
"request_id": request_id,
}
def update_message_status(self, message_id: str, status: str) -> bool:
"""Cheap status-only transition (e.g. ``pending → streaming``)."""
if not message_id:
return False
with db_session() as conn:
return ConversationsRepository(conn).update_message_status(
message_id, status,
)
def heartbeat_message(self, message_id: str) -> bool:
"""Bump ``message_metadata.last_heartbeat_at`` so the reconciler's
staleness sweep counts the row as alive. No-ops on terminal rows.
"""
if not message_id:
return False
with db_session() as conn:
return ConversationsRepository(conn).heartbeat_message(message_id)
def finalize_message(
self,
message_id: str,
response: str,
*,
thought: str = "",
sources: Optional[List[Dict[str, Any]]] = None,
tool_calls: Optional[List[Dict[str, Any]]] = None,
model_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
status: str = "complete",
error: Optional[BaseException] = None,
title_inputs: Optional[Dict[str, Any]] = None,
) -> bool:
"""Commit the response and tool_call confirms in one transaction."""
if not message_id:
return False
sources = sources or []
for source in sources:
if "text" in source and isinstance(source["text"], str):
source["text"] = source["text"][:1000]
merged_metadata: Dict[str, Any] = dict(metadata or {})
if status == "failed" and error is not None:
merged_metadata.setdefault(
"error", f"{type(error).__name__}: {str(error)}"
)
update_fields: Dict[str, Any] = {
"response": response,
"status": status,
"thought": thought,
"sources": sources,
"tool_calls": tool_calls or [],
"metadata": merged_metadata,
}
if model_id is not None:
update_fields["model_id"] = model_id
# Atomic message update + tool_call_attempts confirm; the
# ``only_if_non_terminal`` guard prevents a late stream from
# retracting a row the reconciler already escalated.
with db_session() as conn:
repo = ConversationsRepository(conn)
ok = repo.update_message_by_id(
message_id, update_fields,
only_if_non_terminal=True,
)
if not ok:
logger.warning(
f"finalize_message: no row updated for message_id={message_id} "
f"(possibly already terminal — reconciler may have escalated)"
)
return False
repo.confirm_executed_tool_calls(message_id)
# Outside the txn — title-gen is a multi-second LLM round trip.
if title_inputs and status == "complete":
try:
with db_session() as conn:
self._maybe_generate_title(conn, message_id, title_inputs)
except Exception as e:
logger.error(
f"finalize_message title generation failed: {e}",
exc_info=True,
)
return True
def _maybe_generate_title(
self,
conn,
message_id: str,
title_inputs: Dict[str, Any],
) -> None:
"""Generate an LLM-summarised conversation name if one isn't set yet."""
llm = title_inputs.get("llm")
question = title_inputs.get("question") or ""
response = title_inputs.get("response") or ""
fallback_name = title_inputs.get("fallback_name") or question[:50]
if llm is None:
return
row = conn.execute(
sql_text(
"SELECT c.id, c.name FROM conversation_messages m "
"JOIN conversations c ON c.id = m.conversation_id "
"WHERE m.id = CAST(:mid AS uuid)"
),
{"mid": message_id},
).fetchone()
if row is None:
return
conv_id, current_name = str(row[0]), row[1]
if current_name and current_name != fallback_name:
return
messages_summary = [
{
"role": "system",
"content": "You are a helpful assistant that creates concise conversation titles. "
"Summarize conversations in 3 words or less using the same language as the user.",
},
{
"role": "user",
"content": "Summarise following conversation in no more than 3 words, "
"respond ONLY with the summary, use the same language as the "
"user query \n\nUser: " + question + "\n\n" + "AI: " + response,
},
]
completion = llm.gen(
model=getattr(llm, "model_id", None) or title_inputs.get("model_id"),
messages=messages_summary,
max_tokens=500,
)
if not completion or not completion.strip():
completion = fallback_name or "New Conversation"
conn.execute(
sql_text(
"UPDATE conversations SET name = :name, updated_at = now() "
"WHERE id = CAST(:id AS uuid)"
),
{"id": conv_id, "name": completion.strip()},
)
def update_compression_metadata(
self, conversation_id: str, compression_metadata: Dict[str, Any]
) -> None:

View File

@@ -121,6 +121,12 @@ class StreamProcessor:
self.agent_id = self.data.get("agent_id")
self.agent_key = None
self.model_id: Optional[str] = None
# BYOM-resolution scope, set by _validate_and_set_model.
self.model_user_id: Optional[str] = None
# WAL placeholder id pulled from continuation state on resume.
self.reserved_message_id: Optional[str] = None
# Carried through resumes so multi-pause runs keep one request_id.
self.request_id: Optional[str] = None
self.conversation_service = ConversationService()
self.compression_orchestrator = CompressionOrchestrator(
self.conversation_service
@@ -191,16 +197,23 @@ class StreamProcessor:
for query in conversation.get("queries", [])
]
else:
# model_user_id keeps history trim aligned with the BYOM's
# actual context window instead of the default 128k.
self.history = limit_chat_history(
json.loads(self.data.get("history", "[]")), model_id=self.model_id
json.loads(self.data.get("history", "[]")),
model_id=self.model_id,
user_id=self.model_user_id,
)
def _handle_compression(self, conversation: Dict[str, Any]):
"""Handle conversation compression logic using orchestrator."""
try:
# initial_user_id for conversation access; model_user_id
# for BYOM context-window / provider lookups.
result = self.compression_orchestrator.compress_if_needed(
conversation_id=self.conversation_id,
user_id=self.initial_user_id,
model_user_id=self.model_user_id,
model_id=self.model_id,
decoded_token=self.decoded_token,
)
@@ -284,11 +297,18 @@ class StreamProcessor:
from application.core.model_settings import ModelRegistry
requested_model = self.data.get("model_id")
# Caller picks from their own BYOM layer; agent defaults resolve
# under the owner's layer (shared agents have caller != owner).
caller_user_id = self.initial_user_id
owner_user_id = self.agent_config.get("user_id") or caller_user_id
if requested_model:
if not validate_model_id(requested_model):
if not validate_model_id(requested_model, user_id=caller_user_id):
registry = ModelRegistry.get_instance()
available_models = [m.id for m in registry.get_enabled_models()]
available_models = [
m.id
for m in registry.get_enabled_models(user_id=caller_user_id)
]
raise ValueError(
f"Invalid model_id '{requested_model}'. "
f"Available models: {', '.join(available_models[:5])}"
@@ -299,12 +319,17 @@ class StreamProcessor:
)
)
self.model_id = requested_model
self.model_user_id = caller_user_id
else:
agent_default_model = self.agent_config.get("default_model_id", "")
if agent_default_model and validate_model_id(agent_default_model):
if agent_default_model and validate_model_id(
agent_default_model, user_id=owner_user_id
):
self.model_id = agent_default_model
self.model_user_id = owner_user_id
else:
self.model_id = get_default_model_id()
self.model_user_id = None
def _get_agent_key(self, agent_id: Optional[str], user_id: Optional[str]) -> tuple:
"""Get API key for agent with access control."""
@@ -514,6 +539,10 @@ class StreamProcessor:
"allow_system_prompt_override": self._agent_data.get(
"allow_system_prompt_override", False
),
# Owner identity — _validate_and_set_model reads this to
# resolve owner-stored BYOM default_model_id against the
# owner's per-user model layer rather than the caller's.
"user_id": self._agent_data.get("user"),
}
)
@@ -561,7 +590,13 @@ class StreamProcessor:
def _configure_retriever(self):
"""Assemble retriever config with precedence: request > agent > default."""
doc_token_limit = calculate_doc_token_budget(model_id=self.model_id)
# BYOM scope: owner for shared-agent BYOM, caller for own BYOM,
# None for built-ins. Without ``user_id`` here, the doc budget
# falls back to settings.DEFAULT_LLM_TOKEN_LIMIT and overfills
# the upstream context window for any small (e.g. 8k/32k) BYOM.
doc_token_limit = calculate_doc_token_budget(
model_id=self.model_id, user_id=self.model_user_id
)
# Start with defaults
retriever_name = "classic"
@@ -612,6 +647,7 @@ class StreamProcessor:
chunks=self.retriever_config["chunks"],
doc_token_limit=self.retriever_config.get("doc_token_limit", 50000),
model_id=self.model_id,
model_user_id=self.model_user_id,
user_api_key=self.agent_config["user_api_key"],
agent_id=self.agent_id,
decoded_token=self.decoded_token,
@@ -896,6 +932,20 @@ class StreamProcessor:
if not state:
raise ValueError("No pending tool state found for this conversation")
# Claim the resume up-front. ``mark_resuming`` only flips ``pending``
# → ``resuming``; if it returns False, another resume already
# claimed this row (status='resuming') — bail before any further
# LLM/tool work to avoid double-execution. The cleanup janitor
# reverts a stale ``resuming`` claim back to ``pending`` after the
# 10-minute grace window so the user can retry.
if not cont_service.mark_resuming(
conversation_id, self.initial_user_id,
):
raise ValueError(
"Resume already in progress for this conversation; "
"retry after the grace window if it stalls."
)
messages = state["messages"]
pending_tool_calls = state["pending_tool_calls"]
tools_dict = state["tools_dict"]
@@ -903,6 +953,11 @@ class StreamProcessor:
agent_config = state["agent_config"]
model_id = agent_config.get("model_id")
# BYOM scope captured at initial dispatch. None for built-ins or
# caller-owned BYOM where decoded_token['sub'] is already the
# right scope; non-None for shared-agent owner BYOM where the
# caller's identity differs from the model owner's.
model_user_id = agent_config.get("model_user_id")
llm_name = agent_config.get("llm_name", settings.LLM_PROVIDER)
api_key = agent_config.get("api_key")
user_api_key = agent_config.get("user_api_key")
@@ -920,6 +975,7 @@ class StreamProcessor:
decoded_token=self.decoded_token,
model_id=model_id,
agent_id=agent_id,
model_user_id=model_user_id,
)
llm_handler = LLMHandlerCreator.create_handler(llm_name or "default")
tool_executor = ToolExecutor(
@@ -949,6 +1005,7 @@ class StreamProcessor:
"endpoint": "stream",
"llm_name": llm_name,
"model_id": model_id,
"model_user_id": model_user_id,
"api_key": system_api_key,
"agent_id": agent_id,
"user_api_key": user_api_key,
@@ -971,12 +1028,22 @@ class StreamProcessor:
# Store config for the route layer
self.model_id = model_id
# Mirror ``model_user_id`` back onto the processor so the route
# layer (StreamResource) reads the owner scope captured at
# initial dispatch. Without this, ``processor.model_user_id``
# stays at the __init__ default (None) and complete_stream
# falls back to the caller's sub: the post-resume title-LLM
# save misses the owner's BYOM layer, and any second tool
# pause persists ``model_user_id=None`` — losing owner scope
# for every subsequent resume of this conversation.
self.model_user_id = model_user_id
self.agent_id = agent_id
self.agent_config["user_api_key"] = user_api_key
self.conversation_id = conversation_id
# Delete state so it can't be replayed
cont_service.delete_state(conversation_id, self.initial_user_id)
# Reused on resume so the same WAL row gets finalised and
# request_id stays consistent across token_usage rows.
self.reserved_message_id = agent_config.get("reserved_message_id")
self.request_id = agent_config.get("request_id")
return agent, messages, tools_dict, pending_tool_calls, tool_actions
@@ -1022,8 +1089,11 @@ class StreamProcessor:
tools_data=tools_data,
)
# Use the user_id that resolved the model so owner-scoped BYOM
# records dispatch correctly on shared-agent requests.
model_user_id = getattr(self, "model_user_id", self.initial_user_id)
provider = (
get_provider_from_model_id(self.model_id)
get_provider_from_model_id(self.model_id, user_id=model_user_id)
if self.model_id
else settings.LLM_PROVIDER
)
@@ -1048,6 +1118,8 @@ class StreamProcessor:
model_id=self.model_id,
agent_id=self.agent_id,
backup_models=backup_models,
# Owner-scope on shared-agent BYOM dispatch.
model_user_id=model_user_id,
)
llm_handler = LLMHandlerCreator.create_handler(
provider if provider else "default"
@@ -1070,6 +1142,7 @@ class StreamProcessor:
"endpoint": "stream",
"llm_name": provider or settings.LLM_PROVIDER,
"model_id": self.model_id,
"model_user_id": self.model_user_id,
"api_key": system_api_key,
"agent_id": self.agent_id,
"user_api_key": self.agent_config["user_api_key"],
@@ -1097,6 +1170,7 @@ class StreamProcessor:
"doc_token_limit", 50000
),
"model_id": self.model_id,
"model_user_id": self.model_user_id,
"user_api_key": self.agent_config["user_api_key"],
"agent_id": self.agent_id,
"llm_name": provider or settings.LLM_PROVIDER,

View File

@@ -46,7 +46,9 @@ AGENT_TYPE_SCHEMAS = {
"prompt_id",
],
"required_draft": ["name"],
"validate_published": ["name", "description", "prompt_id"],
# ``prompt_id`` intentionally omitted — the "default" sentinel
# is acceptable and maps to NULL downstream.
"validate_published": ["name", "description"],
"validate_draft": [],
"require_source": True,
"fields": [
@@ -1009,12 +1011,16 @@ class UpdateAgent(Resource):
400,
)
else:
# ``prompt_id`` is intentionally omitted: the
# frontend's "default" choice maps to NULL here
# (see the prompt_id branch above), and NULL
# means "use the built-in default prompt" which
# is a valid published-agent state.
missing_published_fields = []
for req_field, field_label in (
("name", "Agent name"),
("description", "Agent description"),
("chunks", "Chunks count"),
("prompt_id", "Prompt"),
("agent_type", "Agent type"),
):
final_value = update_fields.get(
@@ -1028,8 +1034,23 @@ class UpdateAgent(Resource):
extra_final = update_fields.get(
"extra_source_ids", existing_agent.get("extra_source_ids") or [],
)
if not source_final and not extra_final:
missing_published_fields.append("Source")
# ``retriever`` carries the runtime identity for
# agents that publish against the synthetic
# "Default" source (frontend's auto-selected
# ``{name: "Default", retriever: "classic"}``
# entry has no ``id``, so ``source_id`` ends up
# NULL even though the user picked something).
# Without this fallback the most common new-agent
# publish flow gets a 400.
retriever_final = update_fields.get(
"retriever", existing_agent.get("retriever"),
)
if (
not source_final
and not extra_final
and not retriever_final
):
missing_published_fields.append("Source or retriever")
if missing_published_fields:
return make_response(
jsonify(

View File

@@ -1,15 +1,19 @@
"""Agent management webhook handlers."""
import secrets
import uuid
from flask import current_app, jsonify, make_response, request
from flask_restx import Namespace, Resource
from sqlalchemy import text as sql_text
from application.api import api
from application.api.user.base import require_agent
from application.api.user.tasks import process_agent_webhook
from application.core.settings import settings
from application.storage.db.base_repository import looks_like_uuid
from application.storage.db.repositories.agents import AgentsRepository
from application.storage.db.repositories.idempotency import IdempotencyRepository
from application.storage.db.session import db_readonly, db_session
@@ -18,6 +22,37 @@ agents_webhooks_ns = Namespace(
)
_IDEMPOTENCY_KEY_MAX_LEN = 256
def _read_idempotency_key():
"""Return (key, error_response). Empty header → (None, None); oversized → (None, 400)."""
key = request.headers.get("Idempotency-Key")
if not key:
return None, None
if len(key) > _IDEMPOTENCY_KEY_MAX_LEN:
return None, make_response(
jsonify(
{
"success": False,
"message": (
f"Idempotency-Key exceeds maximum length of "
f"{_IDEMPOTENCY_KEY_MAX_LEN} characters"
),
}
),
400,
)
return key, None
def _scoped_idempotency_key(idempotency_key, scope):
"""``{scope}:{key}`` so different agents can't collide on the same key."""
if not idempotency_key or not scope:
return None
return f"{scope}:{idempotency_key}"
@agents_webhooks_ns.route("/agent_webhook")
class AgentWebhook(Resource):
@api.doc(
@@ -68,7 +103,7 @@ class AgentWebhook(Resource):
class AgentWebhookListener(Resource):
method_decorators = [require_agent]
def _enqueue_webhook_task(self, agent_id_str, payload, source_method):
def _enqueue_webhook_task(self, agent_id_str, payload, source_method, agent=None):
if not payload:
current_app.logger.warning(
f"Webhook ({source_method}) received for agent {agent_id_str} with empty payload."
@@ -77,26 +112,94 @@ class AgentWebhookListener(Resource):
f"Incoming {source_method} webhook for agent {agent_id_str}. Enqueuing task with payload: {payload}"
)
try:
task = process_agent_webhook.delay(
agent_id=agent_id_str,
payload=payload,
idempotency_key, key_error = _read_idempotency_key()
if key_error is not None:
return key_error
# Resolve to PG UUID first so dedup writes don't crash on legacy ids.
agent_uuid = None
if agent is not None:
candidate = str(agent.get("id") or "")
if looks_like_uuid(candidate):
agent_uuid = candidate
if idempotency_key and agent_uuid is None:
current_app.logger.warning(
"Skipping webhook idempotency dedup: agent %s has non-UUID id",
agent_id_str,
)
idempotency_key = None
# Agent-scoped (webhooks have no user_id).
scoped_key = _scoped_idempotency_key(idempotency_key, agent_uuid)
# Claim before enqueue; the loser returns the winner's task_id.
predetermined_task_id = None
if scoped_key:
predetermined_task_id = str(uuid.uuid4())
with db_session() as conn:
claimed = IdempotencyRepository(conn).record_webhook(
key=scoped_key,
agent_id=agent_uuid,
task_id=predetermined_task_id,
response_json={
"success": True, "task_id": predetermined_task_id,
},
)
if claimed is None:
with db_readonly() as conn:
cached = IdempotencyRepository(conn).get_webhook(scoped_key)
if cached is not None:
return make_response(jsonify(cached["response_json"]), 200)
return make_response(
jsonify({"success": True, "task_id": "deduplicated"}), 200
)
try:
apply_kwargs = dict(
kwargs={
"agent_id": agent_id_str,
"payload": payload,
# Scoped so the worker dedup row matches the HTTP claim.
"idempotency_key": scoped_key or idempotency_key,
},
)
if predetermined_task_id is not None:
apply_kwargs["task_id"] = predetermined_task_id
task = process_agent_webhook.apply_async(**apply_kwargs)
current_app.logger.info(
f"Task {task.id} enqueued for agent {agent_id_str} ({source_method})."
)
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
response_payload = {"success": True, "task_id": task.id}
return make_response(jsonify(response_payload), 200)
except Exception as err:
current_app.logger.error(
f"Error enqueuing webhook task ({source_method}) for agent {agent_id_str}: {err}",
exc_info=True,
)
if scoped_key:
# Roll back the claim so a retry can succeed.
try:
with db_session() as conn:
conn.execute(
sql_text(
"DELETE FROM webhook_dedup "
"WHERE idempotency_key = :k"
),
{"k": scoped_key},
)
except Exception:
current_app.logger.exception(
"Failed to release webhook_dedup claim for key=%s",
scoped_key,
)
return make_response(
jsonify({"success": False, "message": "Error processing webhook"}), 500
)
@api.doc(
description="Webhook listener for agent events (POST). Expects JSON payload, which is used to trigger processing.",
description=(
"Webhook listener for agent events (POST). Expects JSON payload, which "
"is used to trigger processing. Honors an optional ``Idempotency-Key`` "
"header: a repeat request with the same key within 24h returns the "
"original cached response and does not re-enqueue the task."
),
)
def post(self, webhook_token, agent, agent_id_str):
payload = request.get_json()
@@ -110,11 +213,20 @@ class AgentWebhookListener(Resource):
),
400,
)
return self._enqueue_webhook_task(agent_id_str, payload, source_method="POST")
return self._enqueue_webhook_task(
agent_id_str, payload, source_method="POST", agent=agent,
)
@api.doc(
description="Webhook listener for agent events (GET). Uses URL query parameters as payload to trigger processing.",
description=(
"Webhook listener for agent events (GET). Uses URL query parameters as "
"payload to trigger processing. Honors an optional ``Idempotency-Key`` "
"header: a repeat request with the same key within 24h returns the "
"original cached response and does not re-enqueue the task."
),
)
def get(self, webhook_token, agent, agent_id_str):
payload = request.args.to_dict(flat=True)
return self._enqueue_webhook_task(agent_id_str, payload, source_method="GET")
return self._enqueue_webhook_task(
agent_id_str, payload, source_method="GET", agent=agent,
)

View File

@@ -4,8 +4,10 @@ import datetime
from flask import current_app, jsonify, make_response, request
from flask_restx import fields, Namespace, Resource
from sqlalchemy import text as sql_text
from application.api import api
from application.storage.db.base_repository import looks_like_uuid, row_to_dict
from application.storage.db.repositories.attachments import AttachmentsRepository
from application.storage.db.repositories.conversations import ConversationsRepository
from application.storage.db.session import db_readonly, db_session
@@ -133,6 +135,7 @@ class GetSingleConversation(Resource):
attachments_repo = AttachmentsRepository(conn)
queries = []
for msg in messages:
metadata = msg.get("metadata") or {}
query = {
"prompt": msg.get("prompt"),
"response": msg.get("response"),
@@ -141,9 +144,15 @@ class GetSingleConversation(Resource):
"tool_calls": msg.get("tool_calls") or [],
"timestamp": msg.get("timestamp"),
"model_id": msg.get("model_id"),
# Lets the client distinguish placeholder rows from
# finalised answers and tail-poll in-flight ones.
"message_id": str(msg["id"]) if msg.get("id") else None,
"status": msg.get("status"),
"request_id": msg.get("request_id"),
"last_heartbeat_at": metadata.get("last_heartbeat_at"),
}
if msg.get("metadata"):
query["metadata"] = msg["metadata"]
if metadata:
query["metadata"] = metadata
# Feedback on conversation_messages is a JSONB blob with
# shape {"text": <str>, "timestamp": <iso>}. The legacy
# frontend consumed a flat scalar feedback string, so
@@ -301,3 +310,61 @@ class SubmitFeedback(Resource):
current_app.logger.error(f"Error submitting feedback: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
@conversations_ns.route("/messages/<string:message_id>/tail")
class GetMessageTail(Resource):
@api.doc(
description=(
"Current state of one conversation_messages row, scoped to the "
"authenticated user. Used to reconnect to an in-flight stream "
"after a refresh."
),
params={"message_id": "Message UUID"},
)
def get(self, message_id):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
if not looks_like_uuid(message_id):
return make_response(
jsonify({"success": False, "message": "Invalid message id"}), 400
)
user_id = decoded_token.get("sub")
try:
with db_readonly() as conn:
# Owner-or-shared, matching ``ConversationsRepository.get``.
row = conn.execute(
sql_text(
"SELECT m.* FROM conversation_messages m "
"JOIN conversations c ON c.id = m.conversation_id "
"WHERE m.id = CAST(:mid AS uuid) "
"AND (c.user_id = :uid OR :uid = ANY(c.shared_with))"
),
{"mid": message_id, "uid": user_id},
).fetchone()
if row is None:
return make_response(jsonify({"status": "not found"}), 404)
msg = row_to_dict(row)
except Exception as err:
current_app.logger.error(
f"Error tailing message {message_id}: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
metadata = msg.get("message_metadata") or {}
return make_response(
jsonify(
{
"message_id": str(msg["id"]),
"status": msg.get("status"),
"response": msg.get("response"),
"thought": msg.get("thought"),
"sources": msg.get("sources") or [],
"tool_calls": msg.get("tool_calls") or [],
"request_id": msg.get("request_id"),
"last_heartbeat_at": metadata.get("last_heartbeat_at"),
"error": metadata.get("error"),
}
),
200,
)

View File

@@ -0,0 +1,237 @@
"""Per-Celery-task idempotency wrapper backed by ``task_dedup``."""
from __future__ import annotations
import functools
import logging
import threading
import uuid
from typing import Any, Callable, Optional
from application.storage.db.repositories.idempotency import IdempotencyRepository
from application.storage.db.session import db_readonly, db_session
logger = logging.getLogger(__name__)
# Poison-loop cap; transient-failure headroom without infinite retry.
MAX_TASK_ATTEMPTS = 5
# 30s heartbeat / 60s TTL → ~2 missed ticks of slack before reclaim.
LEASE_TTL_SECONDS = 60
LEASE_HEARTBEAT_INTERVAL = 30
# 10 × 60s ≈ 5 min of deferral before giving up on a held lease.
LEASE_RETRY_MAX = 10
def with_idempotency(task_name: str) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
"""Short-circuit on completed key; gate concurrent runs via a lease.
Entry short-circuits:
- completed row → return cached result
- live lease held → retry(countdown=LEASE_TTL_SECONDS)
- attempt_count > MAX_TASK_ATTEMPTS → poison-loop alert
Success writes ``completed``; exceptions leave ``pending`` for
autoretry until the poison-loop guard trips.
"""
def decorator(fn: Callable[..., Any]) -> Callable[..., Any]:
@functools.wraps(fn)
def wrapper(self, *args: Any, idempotency_key: Any = None, **kwargs: Any) -> Any:
key = idempotency_key if isinstance(idempotency_key, str) and idempotency_key else None
if key is None:
return fn(self, *args, idempotency_key=idempotency_key, **kwargs)
cached = _lookup_completed(key)
if cached is not None:
logger.info(
"idempotency hit for task=%s key=%s — returning cached result",
task_name, key,
)
return cached
owner_id = str(uuid.uuid4())
attempt = _try_claim_lease(
key, task_name, _safe_task_id(self), owner_id,
)
if attempt is None:
# Live lease held by another worker. Re-queue and bail
# quickly — by the time the retry fires (LEASE_TTL
# seconds), Worker 1 has either finalised (we'll hit
# ``_lookup_completed`` and return cached) or its lease
# has expired and we can claim.
logger.info(
"idempotency: live lease held; deferring task=%s key=%s",
task_name, key,
)
raise self.retry(
countdown=LEASE_TTL_SECONDS,
max_retries=LEASE_RETRY_MAX,
)
if attempt > MAX_TASK_ATTEMPTS:
logger.error(
"idempotency poison-loop guard: task=%s key=%s attempts=%s",
task_name, key, attempt,
extra={
"alert": "idempotency_poison_loop",
"task_name": task_name,
"idempotency_key": key,
"attempts": attempt,
},
)
poisoned = {
"success": False,
"error": "idempotency poison-loop guard tripped",
"attempts": attempt,
}
_finalize(key, poisoned, status="failed")
return poisoned
heartbeat_thread, heartbeat_stop = _start_lease_heartbeat(
key, owner_id,
)
try:
result = fn(self, *args, idempotency_key=idempotency_key, **kwargs)
_finalize(key, result, status="completed")
return result
except Exception:
# Drop the lease so the next retry doesn't wait LEASE_TTL.
_release_lease(key, owner_id)
raise
finally:
_stop_lease_heartbeat(heartbeat_thread, heartbeat_stop)
return wrapper
return decorator
def _lookup_completed(key: str) -> Any:
"""Return cached ``result_json`` if a completed row exists for ``key``, else None."""
with db_readonly() as conn:
row = IdempotencyRepository(conn).get_task(key)
if row is None:
return None
if row.get("status") != "completed":
return None
return row.get("result_json")
def _try_claim_lease(
key: str, task_name: str, task_id: str, owner_id: str,
) -> Optional[int]:
"""Atomic CAS; returns ``attempt_count`` or ``None`` when held.
DB outage → treated as ``attempt=1`` so transient failures don't
block all task execution; reconciler repairs the lease columns.
"""
try:
with db_session() as conn:
return IdempotencyRepository(conn).try_claim_lease(
key=key,
task_name=task_name,
task_id=task_id,
owner_id=owner_id,
ttl_seconds=LEASE_TTL_SECONDS,
)
except Exception:
logger.exception(
"idempotency lease-claim failed for key=%s task=%s", key, task_name,
)
return 1
def _finalize(key: str, result_json: Any, *, status: str) -> None:
"""Best-effort terminal write. Never let DB outage fail the task."""
try:
with db_session() as conn:
IdempotencyRepository(conn).finalize_task(
key=key, result_json=result_json, status=status,
)
except Exception:
logger.exception(
"idempotency finalize failed for key=%s status=%s", key, status,
)
def _release_lease(key: str, owner_id: str) -> None:
"""Best-effort lease release on the wrapper's exception path."""
try:
with db_session() as conn:
IdempotencyRepository(conn).release_lease(key, owner_id)
except Exception:
logger.exception("idempotency release-lease failed for key=%s", key)
def _start_lease_heartbeat(
key: str, owner_id: str,
) -> tuple[threading.Thread, threading.Event]:
"""Spawn a daemon thread that bumps ``lease_expires_at`` every
:data:`LEASE_HEARTBEAT_INTERVAL` seconds until ``stop_event`` fires.
Mirrors ``application.worker._start_ingest_heartbeat`` so the two
durability heartbeats share shape and cadence.
"""
stop_event = threading.Event()
thread = threading.Thread(
target=_lease_heartbeat_loop,
args=(key, owner_id, stop_event, LEASE_HEARTBEAT_INTERVAL),
daemon=True,
name=f"idempotency-lease-heartbeat:{key[:32]}",
)
thread.start()
return thread, stop_event
def _stop_lease_heartbeat(
thread: threading.Thread, stop_event: threading.Event,
) -> None:
"""Signal the heartbeat thread to exit and join with a short timeout."""
stop_event.set()
thread.join(timeout=10)
def _lease_heartbeat_loop(
key: str,
owner_id: str,
stop_event: threading.Event,
interval: int,
) -> None:
"""Refresh the lease until ``stop_event`` is set or ownership is lost.
A failed refresh (rowcount 0) means another worker stole the lease
after expiry — at that point the damage is already possible, so we
log and keep ticking. Don't escalate to thread death; the main task
body needs to keep running so its outcome is at least *recorded*.
"""
while not stop_event.wait(interval):
try:
with db_session() as conn:
still_owned = IdempotencyRepository(conn).refresh_lease(
key=key, owner_id=owner_id, ttl_seconds=LEASE_TTL_SECONDS,
)
if not still_owned:
logger.warning(
"idempotency lease lost mid-task for key=%s "
"(another worker may have taken over)",
key,
)
except Exception:
logger.exception(
"idempotency lease-heartbeat tick failed for key=%s", key,
)
def _safe_task_id(task_self: Any) -> str:
"""Best-effort extraction of ``self.request.id`` from a Celery task."""
try:
request = getattr(task_self, "request", None)
task_id: Optional[str] = (
getattr(request, "id", None) if request is not None else None
)
except Exception:
task_id = None
return task_id or "unknown"

View File

@@ -1,18 +1,135 @@
from flask import current_app, jsonify, make_response
"""Model routes.
- ``GET /api/models`` — list available models for the current user.
Combines the built-in catalog with the user's BYOM records.
- ``GET/POST/PATCH/DELETE /api/user/models[/<id>]`` — CRUD for the
user's own OpenAI-compatible model registrations (BYOM).
- ``POST /api/user/models/<id>/test`` — sanity-check the upstream
endpoint with a tiny request.
Every BYOM endpoint is user-scoped at the repository layer
(every query filters on ``user_id`` from ``request.decoded_token``).
"""
from __future__ import annotations
import logging
import requests
from flask import current_app, jsonify, make_response, request
from flask_restx import Namespace, Resource
from application.core.model_settings import ModelRegistry
from application.api import api
from application.core.model_registry import ModelRegistry
from application.security.safe_url import (
UnsafeUserUrlError,
pinned_post,
validate_user_base_url,
)
from application.storage.db.repositories.user_custom_models import (
UserCustomModelsRepository,
)
from application.storage.db.session import db_readonly, db_session
from application.utils import check_required_fields
logger = logging.getLogger(__name__)
models_ns = Namespace("models", description="Available models", path="/api")
_CONTEXT_WINDOW_MIN = 1_000
_CONTEXT_WINDOW_MAX = 10_000_000
def _user_id_or_401():
decoded_token = request.decoded_token
if not decoded_token:
return None, make_response(jsonify({"success": False}), 401)
user_id = decoded_token.get("sub")
if not user_id:
return None, make_response(jsonify({"success": False}), 401)
return user_id, None
def _normalize_capabilities(raw) -> dict:
"""Coerce + bound the user-supplied capabilities payload."""
raw = raw or {}
out = {}
if "supports_tools" in raw:
out["supports_tools"] = bool(raw["supports_tools"])
if "supports_structured_output" in raw:
out["supports_structured_output"] = bool(raw["supports_structured_output"])
if "supports_streaming" in raw:
out["supports_streaming"] = bool(raw["supports_streaming"])
if "attachments" in raw:
atts = raw["attachments"] or []
if not isinstance(atts, list):
raise ValueError("'capabilities.attachments' must be a list")
coerced = [str(a) for a in atts]
# Reject unknown aliases at the API boundary so bad payloads
# never reach the registry layer (where lenient expansion just
# drops them). Raw MIME types (containing ``/``) pass through
# unchanged for parity with the built-in YAML schema.
from application.core.model_yaml import builtin_attachment_aliases
aliases = builtin_attachment_aliases()
for entry in coerced:
if "/" in entry:
continue
if entry not in aliases:
valid = ", ".join(sorted(aliases.keys())) or "<none defined>"
raise ValueError(
f"unknown attachment alias '{entry}' in "
f"'capabilities.attachments'. Valid aliases: {valid}, "
f"or use a raw MIME type like 'image/png'."
)
out["attachments"] = coerced
if "context_window" in raw:
try:
cw = int(raw["context_window"])
except (TypeError, ValueError):
raise ValueError("'capabilities.context_window' must be an integer")
if not (_CONTEXT_WINDOW_MIN <= cw <= _CONTEXT_WINDOW_MAX):
raise ValueError(
f"'capabilities.context_window' must be between "
f"{_CONTEXT_WINDOW_MIN} and {_CONTEXT_WINDOW_MAX}"
)
out["context_window"] = cw
return out
def _row_to_response(row: dict) -> dict:
"""Wire-format projection — never includes the API key."""
return {
"id": str(row["id"]),
"upstream_model_id": row["upstream_model_id"],
"display_name": row["display_name"],
"description": row.get("description") or "",
"base_url": row["base_url"],
"capabilities": row.get("capabilities") or {},
"enabled": bool(row.get("enabled", True)),
"source": "user",
}
@models_ns.route("/models")
class ModelsListResource(Resource):
def get(self):
"""Get list of available models with their capabilities."""
"""Get list of available models with their capabilities.
When the request is authenticated, the response includes the
user's own BYOM registrations alongside the built-in catalog.
"""
try:
user_id = None
decoded_token = getattr(request, "decoded_token", None)
if decoded_token:
user_id = decoded_token.get("sub")
registry = ModelRegistry.get_instance()
models = registry.get_enabled_models()
models = registry.get_enabled_models(user_id=user_id)
response = {
"models": [model.to_dict() for model in models],
@@ -23,3 +140,382 @@ class ModelsListResource(Resource):
current_app.logger.error(f"Error fetching models: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 500)
return make_response(jsonify(response), 200)
@models_ns.route("/user/models")
class UserModelsCollectionResource(Resource):
@api.doc(description="List the current user's BYOM custom models")
def get(self):
user_id, err = _user_id_or_401()
if err:
return err
try:
with db_readonly() as conn:
rows = UserCustomModelsRepository(conn).list_for_user(user_id)
return make_response(
jsonify({"models": [_row_to_response(r) for r in rows]}), 200
)
except Exception as e:
current_app.logger.error(
f"Error listing user custom models: {e}", exc_info=True
)
return make_response(jsonify({"success": False}), 500)
@api.doc(description="Register a new BYOM custom model")
def post(self):
user_id, err = _user_id_or_401()
if err:
return err
data = request.get_json() or {}
missing = check_required_fields(
data,
["upstream_model_id", "display_name", "base_url", "api_key"],
)
if missing:
return missing
# SECURITY: reject blank api_key — would leak instance API key
# to the user-supplied base_url via LLMCreator fallback.
for required_nonblank in (
"upstream_model_id",
"display_name",
"base_url",
"api_key",
):
value = data.get(required_nonblank)
if not isinstance(value, str) or not value.strip():
return make_response(
jsonify(
{
"success": False,
"error": f"'{required_nonblank}' must be a non-empty string",
}
),
400,
)
# SSRF guard at create time. Re-runs at dispatch time (LLMCreator)
# as defense in depth against DNS rebinding and pre-guard rows.
try:
validate_user_base_url(data["base_url"])
except UnsafeUserUrlError as e:
return make_response(
jsonify({"success": False, "error": str(e)}), 400
)
try:
capabilities = _normalize_capabilities(data.get("capabilities"))
except ValueError as e:
return make_response(
jsonify({"success": False, "error": str(e)}), 400
)
try:
with db_session() as conn:
row = UserCustomModelsRepository(conn).create(
user_id=user_id,
upstream_model_id=data["upstream_model_id"],
display_name=data["display_name"],
description=data.get("description") or "",
base_url=data["base_url"],
api_key_plaintext=data["api_key"],
capabilities=capabilities,
enabled=bool(data.get("enabled", True)),
)
except Exception as e:
current_app.logger.error(
f"Error creating user custom model: {e}", exc_info=True
)
return make_response(jsonify({"success": False}), 500)
ModelRegistry.invalidate_user(user_id)
return make_response(jsonify(_row_to_response(row)), 201)
@models_ns.route("/user/models/<string:model_id>")
class UserModelResource(Resource):
@api.doc(description="Get one BYOM custom model")
def get(self, model_id):
user_id, err = _user_id_or_401()
if err:
return err
try:
with db_readonly() as conn:
row = UserCustomModelsRepository(conn).get(model_id, user_id)
except Exception as e:
current_app.logger.error(
f"Error fetching user custom model: {e}", exc_info=True
)
return make_response(jsonify({"success": False}), 500)
if row is None:
return make_response(jsonify({"success": False}), 404)
return make_response(jsonify(_row_to_response(row)), 200)
@api.doc(description="Update a BYOM custom model (partial)")
def patch(self, model_id):
user_id, err = _user_id_or_401()
if err:
return err
data = request.get_json() or {}
# Reject present-but-blank values for fields where blank doesn't
# mean "no change". (The api_key special case — blank means "keep
# existing" — is handled below.)
for required_nonblank in (
"upstream_model_id",
"display_name",
"base_url",
):
if required_nonblank in data:
value = data[required_nonblank]
if not isinstance(value, str) or not value.strip():
return make_response(
jsonify(
{
"success": False,
"error": f"'{required_nonblank}' cannot be blank",
}
),
400,
)
if "base_url" in data and data["base_url"]:
try:
validate_user_base_url(data["base_url"])
except UnsafeUserUrlError as e:
return make_response(
jsonify({"success": False, "error": str(e)}), 400
)
update_fields: dict = {}
for k in (
"upstream_model_id",
"display_name",
"description",
"base_url",
"enabled",
):
if k in data:
update_fields[k] = data[k]
if "capabilities" in data:
try:
update_fields["capabilities"] = _normalize_capabilities(
data["capabilities"]
)
except ValueError as e:
return make_response(
jsonify({"success": False, "error": str(e)}), 400
)
# PATCH semantics: blank/missing api_key → keep the existing
# ciphertext; non-empty api_key → re-encrypt and replace.
if data.get("api_key"):
update_fields["api_key_plaintext"] = data["api_key"]
if not update_fields:
return make_response(
jsonify({"success": False, "error": "no updatable fields"}), 400
)
try:
with db_session() as conn:
ok = UserCustomModelsRepository(conn).update(
model_id, user_id, update_fields
)
except Exception as e:
current_app.logger.error(
f"Error updating user custom model: {e}", exc_info=True
)
return make_response(jsonify({"success": False}), 500)
if not ok:
return make_response(jsonify({"success": False}), 404)
ModelRegistry.invalidate_user(user_id)
with db_readonly() as conn:
row = UserCustomModelsRepository(conn).get(model_id, user_id)
return make_response(jsonify(_row_to_response(row)), 200)
@api.doc(description="Delete a BYOM custom model")
def delete(self, model_id):
user_id, err = _user_id_or_401()
if err:
return err
try:
with db_session() as conn:
ok = UserCustomModelsRepository(conn).delete(model_id, user_id)
except Exception as e:
current_app.logger.error(
f"Error deleting user custom model: {e}", exc_info=True
)
return make_response(jsonify({"success": False}), 500)
if not ok:
return make_response(jsonify({"success": False}), 404)
ModelRegistry.invalidate_user(user_id)
return make_response(jsonify({"success": True}), 200)
def _run_connection_test(
base_url: str, api_key: str, upstream_model_id: str
):
"""Send a 1-token chat-completion to verify a BYOM endpoint.
Returns ``(body, http_status)``. Upstream errors return 200 with
``ok=False`` so the UI can render inline errors; only local SSRF
rejection returns 400.
"""
url = base_url.rstrip("/") + "/chat/completions"
payload = {
"model": upstream_model_id,
"messages": [{"role": "user", "content": "hi"}],
"max_tokens": 1,
"stream": False,
}
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
try:
# pinned_post closes the DNS-rebinding window. Redirects off
# because 3xx could bounce to an internal address (the SSRF
# guard only validates the supplied URL).
resp = pinned_post(
url,
json=payload,
headers=headers,
timeout=5,
allow_redirects=False,
)
except UnsafeUserUrlError as e:
return {"ok": False, "error": str(e)}, 400
except requests.RequestException as e:
return {"ok": False, "error": f"connection error: {e}"}, 200
if 300 <= resp.status_code < 400:
return (
{
"ok": False,
"error": (
f"upstream returned HTTP {resp.status_code} "
"redirect; refusing to follow"
),
},
200,
)
if resp.status_code >= 400:
# Cap and only reflect JSON to avoid body-exfil via non-API responses.
content_type = (resp.headers.get("Content-Type") or "").lower()
if "application/json" in content_type:
text = (resp.text or "")[:500]
error_msg = f"upstream returned HTTP {resp.status_code}: {text}"
else:
error_msg = f"upstream returned HTTP {resp.status_code}"
return {"ok": False, "error": error_msg}, 200
return {"ok": True}, 200
@models_ns.route("/user/models/test")
class UserModelTestPayloadResource(Resource):
@api.doc(
description=(
"Test an arbitrary BYOM payload (display_name / model id / "
"base_url / api_key) without saving. Used by the UI's 'Test "
"connection' button so the user can validate before they "
"Save. Same SSRF guard, same 1-token request, same 5s "
"timeout as the by-id variant."
)
)
def post(self):
user_id, err = _user_id_or_401()
if err:
return err
data = request.get_json() or {}
missing = check_required_fields(
data, ["base_url", "api_key", "upstream_model_id"]
)
if missing:
return missing
body, status = _run_connection_test(
data["base_url"], data["api_key"], data["upstream_model_id"]
)
return make_response(jsonify(body), status)
@models_ns.route("/user/models/<string:model_id>/test")
class UserModelTestResource(Resource):
@api.doc(
description=(
"Test a saved BYOM record. Defaults to the stored "
"base_url / upstream_model_id / encrypted api_key, but "
"any of those can be overridden via the request body so "
"the UI can test in-flight edits before saving. Used by "
"the 'Test connection' button in edit mode."
)
)
def post(self, model_id):
user_id, err = _user_id_or_401()
if err:
return err
data = request.get_json() or {}
# Per-field overrides; blank/missing falls back to stored value.
override_base_url = (data.get("base_url") or "").strip() or None
override_upstream_model_id = (
data.get("upstream_model_id") or ""
).strip() or None
override_api_key = (data.get("api_key") or "").strip() or None
try:
with db_readonly() as conn:
repo = UserCustomModelsRepository(conn)
row = repo.get(model_id, user_id)
if row is None:
return make_response(jsonify({"success": False}), 404)
stored_api_key = (
repo._decrypt_api_key(
row.get("api_key_encrypted", ""), user_id
)
if not override_api_key
else None
)
except Exception as e:
current_app.logger.error(
f"Error loading user custom model for test: {e}", exc_info=True
)
return make_response(
jsonify({"ok": False, "error": "internal error loading model"}),
500,
)
api_key = override_api_key or stored_api_key
if not api_key:
return make_response(
jsonify(
{
"ok": False,
"error": (
"Stored API key could not be decrypted. The "
"encryption secret may have rotated. Re-save "
"the model with the API key to recover."
),
}
),
400,
)
base_url = override_base_url or row["base_url"]
upstream_model_id = (
override_upstream_model_id or row["upstream_model_id"]
)
body, status = _run_connection_test(
base_url, api_key, upstream_model_id
)
return make_response(jsonify(body), status)

View File

@@ -0,0 +1,196 @@
"""Reconciler tick: sweep stuck rows and escalate to terminal status + alert."""
from __future__ import annotations
import logging
import uuid
from typing import Any, Dict, Optional
from sqlalchemy import Connection
from application.api.user.idempotency import MAX_TASK_ATTEMPTS
from application.core.settings import settings
from application.storage.db.engine import get_engine
from application.storage.db.repositories.reconciliation import (
ReconciliationRepository,
)
from application.storage.db.repositories.stack_logs import StackLogsRepository
logger = logging.getLogger(__name__)
MAX_MESSAGE_RECONCILE_ATTEMPTS = 3
def run_reconciliation() -> Dict[str, Any]:
"""Single tick of the reconciler. Five sweeps, FOR UPDATE SKIP LOCKED.
Stuck ``executed`` tool calls always flip to ``failed`` — operators
handle cleanup manually via the structured alert. The side effect is
assumed to have committed; no automated rollback is attempted.
Stuck ``task_dedup`` rows (lease expired AND attempts >= max)
promote to ``failed`` so a same-key retry can re-claim instead of
sitting in ``pending`` until 24 h TTL.
"""
if not settings.POSTGRES_URI:
return {
"messages_failed": 0,
"tool_calls_failed": 0,
"skipped": "POSTGRES_URI not set",
}
engine = get_engine()
summary = {
"messages_failed": 0,
"tool_calls_failed": 0,
"ingests_stalled": 0,
"idempotency_pending_failed": 0,
}
with engine.begin() as conn:
repo = ReconciliationRepository(conn)
for msg in repo.find_and_lock_stuck_messages():
new_count = repo.increment_message_reconcile_attempts(msg["id"])
if new_count >= MAX_MESSAGE_RECONCILE_ATTEMPTS:
repo.mark_message_failed(
msg["id"],
error=(
"reconciler: stuck in pending/streaming for >5 min "
f"after {new_count} attempts"
),
)
summary["messages_failed"] += 1
_emit_alert(
conn,
name="reconciler_message_failed",
user_id=msg.get("user_id"),
detail={
"message_id": str(msg["id"]),
"attempts": new_count,
},
)
with engine.begin() as conn:
repo = ReconciliationRepository(conn)
for row in repo.find_and_lock_proposed_tool_calls():
repo.mark_tool_call_failed(
row["call_id"],
error=(
"reconciler: stuck in 'proposed' for >5 min; "
"side effect status unknown"
),
)
summary["tool_calls_failed"] += 1
_emit_alert(
conn,
name="reconciler_tool_call_failed_proposed",
user_id=None,
detail={
"call_id": row["call_id"],
"tool_name": row.get("tool_name"),
},
)
with engine.begin() as conn:
repo = ReconciliationRepository(conn)
for row in repo.find_and_lock_executed_tool_calls():
repo.mark_tool_call_failed(
row["call_id"],
error=(
"reconciler: executed-not-confirmed; side effect "
"assumed committed, manual cleanup required"
),
)
summary["tool_calls_failed"] += 1
_emit_alert(
conn,
name="reconciler_tool_call_failed_executed",
user_id=None,
detail={
"call_id": row["call_id"],
"tool_name": row.get("tool_name"),
"action_name": row.get("action_name"),
},
)
# Q4: ingest checkpoints whose heartbeat has gone silent. The
# reconciler only escalates (alerts) — it doesn't kill the worker
# or roll back the partial embed. The next dispatch resumes from
# ``last_index`` thanks to the per-chunk checkpoint, so this is an
# observability sweep, not a recovery action.
with engine.begin() as conn:
repo = ReconciliationRepository(conn)
for row in repo.find_and_lock_stalled_ingests():
summary["ingests_stalled"] += 1
_emit_alert(
conn,
name="reconciler_ingest_stalled",
user_id=None,
detail={
"source_id": str(row.get("source_id")),
"embedded_chunks": row.get("embedded_chunks"),
"total_chunks": row.get("total_chunks"),
"last_updated": str(row.get("last_updated")),
},
)
# Bump the heartbeat so we don't re-alert every tick.
repo.touch_ingest_progress(str(row["source_id"]))
# Q5: idempotency rows whose lease expired with attempts exhausted.
# The wrapper's poison-loop guard normally finalises these, but if
# the wrapper itself died mid-task (worker SIGKILL, OOM during
# heartbeat) the row sits in ``pending`` blocking same-key retries
# via ``_lookup_completed`` returning None for the whole 24 h TTL.
# Promote to ``failed`` so a retry can re-claim and either resume
# or fail loudly.
with engine.begin() as conn:
repo = ReconciliationRepository(conn)
for row in repo.find_stuck_idempotency_pending(
max_attempts=MAX_TASK_ATTEMPTS,
):
error_msg = (
"reconciler: idempotency lease expired with attempts "
f"({row['attempt_count']}) >= {MAX_TASK_ATTEMPTS}; "
"task abandoned"
)
repo.mark_idempotency_pending_failed(
row["idempotency_key"], error=error_msg,
)
summary["idempotency_pending_failed"] += 1
_emit_alert(
conn,
name="reconciler_idempotency_pending_failed",
user_id=None,
detail={
"idempotency_key": row["idempotency_key"],
"task_name": row.get("task_name"),
"task_id": row.get("task_id"),
"attempts": row.get("attempt_count"),
},
)
return summary
def _emit_alert(
conn: Connection,
*,
name: str,
user_id: Optional[str],
detail: Dict[str, Any],
) -> None:
"""Structured ``logger.error`` plus a ``stack_logs`` row for operators."""
extra = {"alert": name, **detail}
logger.error("reconciler alert: %s", name, extra=extra)
try:
StackLogsRepository(conn).insert(
activity_id=str(uuid.uuid4()),
endpoint="reconciliation_worker",
level="ERROR",
user_id=user_id,
query=name,
stacks=[extra],
)
except Exception:
logger.exception("reconciler: failed to write stack_logs row for %s", name)

View File

@@ -3,16 +3,19 @@
import json
import os
import tempfile
import uuid
import zipfile
from flask import current_app, jsonify, make_response, request
from flask_restx import fields, Namespace, Resource
from sqlalchemy import text as sql_text
from application.api import api
from application.api.user.tasks import ingest, ingest_connector_task, ingest_remote
from application.core.settings import settings
from application.parser.connectors.connector_creator import ConnectorCreator
from application.parser.file.constants import SUPPORTED_SOURCE_EXTENSIONS
from application.storage.db.repositories.idempotency import IdempotencyRepository
from application.storage.db.repositories.sources import SourcesRepository
from application.storage.db.session import db_readonly, db_session
from application.storage.storage_creator import StorageCreator
@@ -30,6 +33,79 @@ sources_upload_ns = Namespace(
)
_IDEMPOTENCY_KEY_MAX_LEN = 256
def _read_idempotency_key():
"""Return (key, error_response). Empty header → (None, None); oversized → (None, 400)."""
key = request.headers.get("Idempotency-Key")
if not key:
return None, None
if len(key) > _IDEMPOTENCY_KEY_MAX_LEN:
return None, make_response(
jsonify(
{
"success": False,
"message": (
f"Idempotency-Key exceeds maximum length of "
f"{_IDEMPOTENCY_KEY_MAX_LEN} characters"
),
}
),
400,
)
return key, None
def _scoped_idempotency_key(idempotency_key, scope):
"""``{scope}:{key}`` so different users can't collide on the same key."""
if not idempotency_key or not scope:
return None
return f"{scope}:{idempotency_key}"
def _claim_task_or_get_cached(key, task_name):
"""Claim ``key`` for this request OR return the winner's cached payload.
Pre-generates the celery task_id so a losing writer sees the same
id immediately. Returns ``(task_id, cached_response)``; non-None
cached means the caller should return without enqueuing.
"""
predetermined_id = str(uuid.uuid4())
with db_session() as conn:
claimed = IdempotencyRepository(conn).claim_task(
key=key, task_name=task_name, task_id=predetermined_id,
)
if claimed is not None:
return claimed["task_id"], None
with db_readonly() as conn:
existing = IdempotencyRepository(conn).get_task(key)
cached_id = existing.get("task_id") if existing else None
return None, {
"success": True,
"task_id": cached_id or "deduplicated",
}
def _release_claim(key):
"""Drop a pending claim so a client retry can re-claim it."""
try:
with db_session() as conn:
conn.execute(
sql_text(
"DELETE FROM task_dedup WHERE idempotency_key = :k "
"AND status = 'pending'"
),
{"k": key},
)
except Exception:
current_app.logger.exception(
"Failed to release task_dedup claim for key=%s", key,
)
def _enforce_audio_path_size_limit(file_path: str, filename: str) -> None:
if not is_audio_filename(filename):
return
@@ -49,17 +125,38 @@ class UploadFile(Resource):
)
)
@api.doc(
description="Uploads a file to be vectorized and indexed",
description=(
"Uploads a file to be vectorized and indexed. Honors an optional "
"``Idempotency-Key`` header: a repeat request with the same key "
"within 24h returns the original cached response without re-enqueuing."
),
)
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
idempotency_key, key_error = _read_idempotency_key()
if key_error is not None:
return key_error
# User-scoped to avoid cross-user collisions; also feeds
# ``_derive_source_id`` so uuid5 stays user-disjoint.
scoped_key = _scoped_idempotency_key(idempotency_key, user)
# Claim before enqueue; the loser returns the winner's task_id.
predetermined_task_id = None
if scoped_key:
predetermined_task_id, cached = _claim_task_or_get_cached(
scoped_key, "ingest",
)
if cached is not None:
return make_response(jsonify(cached), 200)
data = request.form
files = request.files.getlist("file")
required_fields = ["user", "name"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields or not files or all(file.filename == "" for file in files):
if scoped_key:
_release_claim(scoped_key)
return make_response(
jsonify(
{
@@ -69,7 +166,6 @@ class UploadFile(Resource):
),
400,
)
user = decoded_token.get("sub")
job_name = request.form["name"]
# Create safe versions for filesystem operations
@@ -140,16 +236,27 @@ class UploadFile(Resource):
file_path = f"{base_path}/{safe_file}"
with open(temp_file_path, "rb") as f:
storage.save_file(f, file_path)
task = ingest.delay(
settings.UPLOAD_FOLDER,
list(SUPPORTED_SOURCE_EXTENSIONS),
job_name,
user,
file_path=base_path,
filename=dir_name,
file_name_map=file_name_map,
ingest_kwargs = dict(
args=(
settings.UPLOAD_FOLDER,
list(SUPPORTED_SOURCE_EXTENSIONS),
job_name,
user,
),
kwargs={
"file_path": base_path,
"filename": dir_name,
"file_name_map": file_name_map,
# Scoped so the worker dedup row matches the HTTP claim.
"idempotency_key": scoped_key or idempotency_key,
},
)
if predetermined_task_id is not None:
ingest_kwargs["task_id"] = predetermined_task_id
task = ingest.apply_async(**ingest_kwargs)
except AudioFileTooLargeError:
if scoped_key:
_release_claim(scoped_key)
return make_response(
jsonify(
{
@@ -161,8 +268,13 @@ class UploadFile(Resource):
)
except Exception as err:
current_app.logger.error(f"Error uploading file: {err}", exc_info=True)
if scoped_key:
_release_claim(scoped_key)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
# Predetermined id matches the dedup-claim row; loser GET sees same.
response_task_id = predetermined_task_id or task.id
response_payload = {"success": True, "task_id": response_task_id}
return make_response(jsonify(response_payload), 200)
@sources_upload_ns.route("/remote")
@@ -182,17 +294,38 @@ class UploadRemote(Resource):
)
)
@api.doc(
description="Uploads remote source for vectorization",
description=(
"Uploads remote source for vectorization. Honors an optional "
"``Idempotency-Key`` header: a repeat request with the same key "
"within 24h returns the original cached response without re-enqueuing."
),
)
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
idempotency_key, key_error = _read_idempotency_key()
if key_error is not None:
return key_error
scoped_key = _scoped_idempotency_key(idempotency_key, user)
data = request.form
required_fields = ["user", "source", "name", "data"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
task_name_for_dedup = (
"ingest_connector_task"
if data.get("source") in ConnectorCreator.get_supported_connectors()
else "ingest_remote"
)
predetermined_task_id = None
if scoped_key:
predetermined_task_id, cached = _claim_task_or_get_cached(
scoped_key, task_name_for_dedup,
)
if cached is not None:
return make_response(jsonify(cached), 200)
try:
config = json.loads(data["data"])
source_data = None
@@ -208,6 +341,8 @@ class UploadRemote(Resource):
elif data["source"] in ConnectorCreator.get_supported_connectors():
session_token = config.get("session_token")
if not session_token:
if scoped_key:
_release_claim(scoped_key)
return make_response(
jsonify(
{
@@ -236,31 +371,47 @@ class UploadRemote(Resource):
config["file_ids"] = file_ids
config["folder_ids"] = folder_ids
task = ingest_connector_task.delay(
job_name=data["name"],
user=decoded_token.get("sub"),
source_type=data["source"],
session_token=session_token,
file_ids=file_ids,
folder_ids=folder_ids,
recursive=config.get("recursive", False),
retriever=config.get("retriever", "classic"),
)
return make_response(
jsonify({"success": True, "task_id": task.id}), 200
)
task = ingest_remote.delay(
source_data=source_data,
job_name=data["name"],
user=decoded_token.get("sub"),
loader=data["source"],
)
connector_kwargs = {
"kwargs": {
"job_name": data["name"],
"user": user,
"source_type": data["source"],
"session_token": session_token,
"file_ids": file_ids,
"folder_ids": folder_ids,
"recursive": config.get("recursive", False),
"retriever": config.get("retriever", "classic"),
"idempotency_key": scoped_key or idempotency_key,
},
}
if predetermined_task_id is not None:
connector_kwargs["task_id"] = predetermined_task_id
task = ingest_connector_task.apply_async(**connector_kwargs)
response_task_id = predetermined_task_id or task.id
response_payload = {"success": True, "task_id": response_task_id}
return make_response(jsonify(response_payload), 200)
remote_kwargs = {
"kwargs": {
"source_data": source_data,
"job_name": data["name"],
"user": user,
"loader": data["source"],
"idempotency_key": scoped_key or idempotency_key,
},
}
if predetermined_task_id is not None:
remote_kwargs["task_id"] = predetermined_task_id
task = ingest_remote.apply_async(**remote_kwargs)
except Exception as err:
current_app.logger.error(
f"Error uploading remote source: {err}", exc_info=True
)
if scoped_key:
_release_claim(scoped_key)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
response_task_id = predetermined_task_id or task.id
response_payload = {"success": True, "task_id": response_task_id}
return make_response(jsonify(response_payload), 200)
@sources_upload_ns.route("/manage_source_files")
@@ -305,6 +456,10 @@ class ManageSourceFiles(Resource):
jsonify({"success": False, "message": "Unauthorized"}), 401
)
user = decoded_token.get("sub")
idempotency_key, key_error = _read_idempotency_key()
if key_error is not None:
return key_error
scoped_key = _scoped_idempotency_key(idempotency_key, user)
source_id = request.form.get("source_id")
operation = request.form.get("operation")
@@ -347,6 +502,12 @@ class ManageSourceFiles(Resource):
jsonify({"success": False, "message": "Database error"}), 500
)
resolved_source_id = str(source["id"])
# Flips to True after each branch's ``apply_async`` returns
# successfully — at that point the worker owns the predetermined
# task_id. The outer ``except`` only releases the claim while
# this is False, so a post-``apply_async`` failure (jsonify,
# make_response, etc.) doesn't double-enqueue on the next retry.
claim_transferred = False
try:
storage = StorageCreator.get_storage()
source_file_path = source.get("file_path", "")
@@ -379,6 +540,21 @@ class ManageSourceFiles(Resource):
),
400,
)
# Claim before any storage mutation so a duplicate request
# short-circuits without touching the filesystem. Mirrors
# the pattern in ``UploadFile.post`` / ``UploadRemote.post``
# — without it ``.delay()`` would enqueue twice for two
# racing same-key POSTs (the worker decorator only
# deduplicates *after* completion).
predetermined_task_id = None
if scoped_key:
predetermined_task_id, cached = _claim_task_or_get_cached(
scoped_key, "reingest_source_task",
)
if cached is not None:
return make_response(jsonify(cached), 200)
added_files = []
map_updated = False
@@ -414,9 +590,15 @@ class ManageSourceFiles(Resource):
from application.api.user.tasks import reingest_source_task
task = reingest_source_task.delay(
source_id=resolved_source_id, user=user
task = reingest_source_task.apply_async(
kwargs={
"source_id": resolved_source_id,
"user": user,
"idempotency_key": scoped_key or idempotency_key,
},
task_id=predetermined_task_id,
)
claim_transferred = True
return make_response(
jsonify(
@@ -455,10 +637,8 @@ class ManageSourceFiles(Resource):
),
400,
)
# Remove files from storage and directory structure
removed_files = []
map_updated = False
# Path-traversal guard runs *before* the claim so a 400
# for an invalid path doesn't leave a pending dedup row.
for file_path in file_paths:
if ".." in str(file_path) or str(file_path).startswith("/"):
return make_response(
@@ -470,6 +650,22 @@ class ManageSourceFiles(Resource):
),
400,
)
# Claim before any storage mutation. See ``add`` branch
# comment for rationale.
predetermined_task_id = None
if scoped_key:
predetermined_task_id, cached = _claim_task_or_get_cached(
scoped_key, "reingest_source_task",
)
if cached is not None:
return make_response(jsonify(cached), 200)
# Remove files from storage and directory structure
removed_files = []
map_updated = False
for file_path in file_paths:
full_path = f"{source_file_path}/{file_path}"
# Remove from storage
@@ -491,9 +687,15 @@ class ManageSourceFiles(Resource):
from application.api.user.tasks import reingest_source_task
task = reingest_source_task.delay(
source_id=resolved_source_id, user=user
task = reingest_source_task.apply_async(
kwargs={
"source_id": resolved_source_id,
"user": user,
"idempotency_key": scoped_key or idempotency_key,
},
task_id=predetermined_task_id,
)
claim_transferred = True
return make_response(
jsonify(
@@ -552,6 +754,16 @@ class ManageSourceFiles(Resource):
),
404,
)
# Claim before mutation. See ``add`` branch for rationale.
predetermined_task_id = None
if scoped_key:
predetermined_task_id, cached = _claim_task_or_get_cached(
scoped_key, "reingest_source_task",
)
if cached is not None:
return make_response(jsonify(cached), 200)
success = storage.remove_directory(full_directory_path)
if not success:
@@ -560,6 +772,11 @@ class ManageSourceFiles(Resource):
f"User: {user}, Source ID: {source_id}, Directory path: {directory_path}, "
f"Full path: {full_directory_path}"
)
# Release so a client retry can reclaim — otherwise
# the next request would silently 200-cache to the
# task_id that never enqueued.
if scoped_key:
_release_claim(scoped_key)
return make_response(
jsonify(
{"success": False, "message": "Failed to remove directory"}
@@ -591,9 +808,15 @@ class ManageSourceFiles(Resource):
from application.api.user.tasks import reingest_source_task
task = reingest_source_task.delay(
source_id=resolved_source_id, user=user
task = reingest_source_task.apply_async(
kwargs={
"source_id": resolved_source_id,
"user": user,
"idempotency_key": scoped_key or idempotency_key,
},
task_id=predetermined_task_id,
)
claim_transferred = True
return make_response(
jsonify(
@@ -607,6 +830,14 @@ class ManageSourceFiles(Resource):
200,
)
except Exception as err:
# Release the dedup claim only if it wasn't transferred to
# a worker. Without this, a same-key retry within the 24h
# TTL would 200-cache to a predetermined task_id whose
# ``apply_async`` never ran (or ran but the response builder
# blew up afterward — only the first case matters in
# practice; the flag protects both).
if scoped_key and not claim_transferred:
_release_claim(scoped_key)
error_context = f"operation={operation}, user={user}, source_id={source_id}"
if operation == "remove_directory":
directory_path = request.form.get("directory_path", "")

View File

@@ -1,5 +1,6 @@
from datetime import timedelta
from application.api.user.idempotency import with_idempotency
from application.celery_init import celery
from application.worker import (
agent_webhook_worker,
@@ -13,9 +14,32 @@ from application.worker import (
)
@celery.task(bind=True)
# Shared decorator config for long-running, side-effecting tasks. ``acks_late``
# is also the celeryconfig default but stays explicit here so each task's
# durability story is grep-able next to the body. Combined with
# ``autoretry_for=(Exception,)`` and a bounded ``max_retries`` so a poison
# message can't loop forever.
DURABLE_TASK = dict(
bind=True,
acks_late=True,
autoretry_for=(Exception,),
retry_kwargs={"max_retries": 3, "countdown": 60},
retry_backoff=True,
)
@celery.task(**DURABLE_TASK)
@with_idempotency(task_name="ingest")
def ingest(
self, directory, formats, job_name, user, file_path, filename, file_name_map=None
self,
directory,
formats,
job_name,
user,
file_path,
filename,
file_name_map=None,
idempotency_key=None,
):
resp = ingest_worker(
self,
@@ -26,25 +50,35 @@ def ingest(
filename,
user,
file_name_map=file_name_map,
idempotency_key=idempotency_key,
)
return resp
@celery.task(bind=True)
def ingest_remote(self, source_data, job_name, user, loader):
resp = remote_worker(self, source_data, job_name, user, loader)
@celery.task(**DURABLE_TASK)
@with_idempotency(task_name="ingest_remote")
def ingest_remote(self, source_data, job_name, user, loader, idempotency_key=None):
resp = remote_worker(
self, source_data, job_name, user, loader,
idempotency_key=idempotency_key,
)
return resp
@celery.task(bind=True)
def reingest_source_task(self, source_id, user):
@celery.task(**DURABLE_TASK)
@with_idempotency(task_name="reingest_source_task")
def reingest_source_task(self, source_id, user, idempotency_key=None):
from application.worker import reingest_source_worker
resp = reingest_source_worker(self, source_id, user)
return resp
@celery.task(bind=True)
# Beat-driven dispatch tasks default to ``acks_late=False``: a SIGKILL
# of a beat tick is harmless to redeliver only if the dispatch itself is
# idempotent. We keep these early-ACK so the broker doesn't replay a
# dispatch that already enqueued downstream work.
@celery.task(bind=True, acks_late=False)
def schedule_syncs(self, frequency):
resp = sync_worker(self, frequency)
return resp
@@ -74,19 +108,22 @@ def sync_source(
return resp
@celery.task(bind=True)
def store_attachment(self, file_info, user):
@celery.task(**DURABLE_TASK)
@with_idempotency(task_name="store_attachment")
def store_attachment(self, file_info, user, idempotency_key=None):
resp = attachment_worker(self, file_info, user)
return resp
@celery.task(bind=True)
def process_agent_webhook(self, agent_id, payload):
@celery.task(**DURABLE_TASK)
@with_idempotency(task_name="process_agent_webhook")
def process_agent_webhook(self, agent_id, payload, idempotency_key=None):
resp = agent_webhook_worker(self, agent_id, payload)
return resp
@celery.task(bind=True)
@celery.task(**DURABLE_TASK)
@with_idempotency(task_name="ingest_connector_task")
def ingest_connector_task(
self,
job_name,
@@ -100,6 +137,7 @@ def ingest_connector_task(
operation_mode="upload",
doc_id=None,
sync_frequency="never",
idempotency_key=None,
):
from application.worker import ingest_connector
@@ -116,6 +154,7 @@ def ingest_connector_task(
operation_mode=operation_mode,
doc_id=doc_id,
sync_frequency=sync_frequency,
idempotency_key=idempotency_key,
)
return resp
@@ -140,6 +179,19 @@ def setup_periodic_tasks(sender, **kwargs):
cleanup_pending_tool_state.s(),
name="cleanup-pending-tool-state",
)
# Pure housekeeping for ``task_dedup`` / ``webhook_dedup`` — the
# upsert paths already handle stale rows, so cadence only bounds
# table size. Hourly is plenty for typical traffic.
sender.add_periodic_task(
timedelta(hours=1),
cleanup_idempotency_dedup.s(),
name="cleanup-idempotency-dedup",
)
sender.add_periodic_task(
timedelta(seconds=30),
reconciliation_task.s(),
name="reconciliation",
)
sender.add_periodic_task(
timedelta(hours=7),
version_check_task.s(),
@@ -159,18 +211,12 @@ def mcp_oauth_status_task(self, task_id):
return resp
@celery.task(bind=True)
@celery.task(bind=True, acks_late=False)
def cleanup_pending_tool_state(self):
"""Delete pending_tool_state rows past their TTL.
Replaces Mongo's ``expireAfterSeconds=0`` TTL index — Postgres has
no native TTL, so this task runs every 60 seconds to keep
``pending_tool_state`` bounded. No-ops if ``POSTGRES_URI`` isn't
configured (keeps the task runnable in Mongo-only environments).
"""
"""Revert stale ``resuming`` rows, then delete TTL-expired rows."""
from application.core.settings import settings
if not settings.POSTGRES_URI:
return {"deleted": 0, "skipped": "POSTGRES_URI not set"}
return {"deleted": 0, "reverted": 0, "skipped": "POSTGRES_URI not set"}
from application.storage.db.engine import get_engine
from application.storage.db.repositories.pending_tool_state import (
@@ -179,11 +225,47 @@ def cleanup_pending_tool_state(self):
engine = get_engine()
with engine.begin() as conn:
deleted = PendingToolStateRepository(conn).cleanup_expired()
return {"deleted": deleted}
repo = PendingToolStateRepository(conn)
reverted = repo.revert_stale_resuming(grace_seconds=600)
deleted = repo.cleanup_expired()
return {"deleted": deleted, "reverted": reverted}
@celery.task(bind=True)
@celery.task(bind=True, acks_late=False)
def cleanup_idempotency_dedup(self):
"""Delete TTL-expired rows from ``task_dedup`` and ``webhook_dedup``.
Pure housekeeping — the upsert paths already ignore stale rows
(TTL-aware ``ON CONFLICT DO UPDATE``), so this only bounds table
growth and keeps SELECT planning tight on large deployments.
"""
from application.core.settings import settings
if not settings.POSTGRES_URI:
return {
"task_dedup_deleted": 0,
"webhook_dedup_deleted": 0,
"skipped": "POSTGRES_URI not set",
}
from application.storage.db.engine import get_engine
from application.storage.db.repositories.idempotency import (
IdempotencyRepository,
)
engine = get_engine()
with engine.begin() as conn:
return IdempotencyRepository(conn).cleanup_expired()
@celery.task(bind=True, acks_late=False)
def reconciliation_task(self):
"""Sweep stuck durability rows and escalate them to terminal status + alert."""
from application.api.user.reconciliation import run_reconciliation
return run_reconciliation()
@celery.task(bind=True, acks_late=False)
def version_check_task(self):
"""Periodic anonymous version check.

View File

@@ -198,8 +198,14 @@ def normalize_agent_node_json_schemas(nodes: List[Dict]) -> List[Dict]:
return normalized_nodes
def validate_workflow_structure(nodes: List[Dict], edges: List[Dict]) -> List[str]:
"""Validate workflow graph structure."""
def validate_workflow_structure(
nodes: List[Dict], edges: List[Dict], user_id: str | None = None
) -> List[str]:
"""Validate workflow graph structure.
``user_id`` is required so per-user BYOM custom-model UUIDs resolve
when checking each agent node's structured-output capability.
"""
errors = []
if not nodes:
@@ -343,7 +349,7 @@ def validate_workflow_structure(nodes: List[Dict], edges: List[Dict]) -> List[st
model_id = raw_config.get("model_id")
if has_json_schema and isinstance(model_id, str) and model_id.strip():
capabilities = get_model_capabilities(model_id.strip())
capabilities = get_model_capabilities(model_id.strip(), user_id=user_id)
if capabilities and not capabilities.get("supports_structured_output", False):
errors.append(
f"Agent node '{agent_title}' selected model does not support structured output"
@@ -389,7 +395,9 @@ class WorkflowList(Resource):
nodes_data = data.get("nodes", [])
edges_data = data.get("edges", [])
validation_errors = validate_workflow_structure(nodes_data, edges_data)
validation_errors = validate_workflow_structure(
nodes_data, edges_data, user_id=user_id
)
if validation_errors:
return error_response(
"Workflow validation failed", errors=validation_errors
@@ -451,7 +459,9 @@ class WorkflowDetail(Resource):
nodes_data = data.get("nodes", [])
edges_data = data.get("edges", [])
validation_errors = validate_workflow_structure(nodes_data, edges_data)
validation_errors = validate_workflow_structure(
nodes_data, edges_data, user_id=user_id
)
if validation_errors:
return error_response(
"Workflow validation failed", errors=validation_errors

View File

@@ -9,6 +9,7 @@ import json
import logging
import time
import traceback
from datetime import datetime
from typing import Any, Dict, Generator, Optional
from flask import Blueprint, jsonify, make_response, request, Response
@@ -213,6 +214,7 @@ def _stream_response(
decoded_token=processor.decoded_token,
agent_id=processor.agent_id,
model_id=processor.model_id,
model_user_id=processor.model_user_id,
should_save_conversation=should_save_conversation,
_continuation=continuation,
)
@@ -257,6 +259,7 @@ def _non_stream_response(
decoded_token=processor.decoded_token,
agent_id=processor.agent_id,
model_id=processor.model_id,
model_user_id=processor.model_user_id,
should_save_conversation=should_save_conversation,
_continuation=continuation,
)
@@ -304,7 +307,16 @@ def list_models():
401,
)
# Repository rows now go through ``coerce_pg_native`` at SELECT
# time, so timestamps arrive as ISO 8601 strings. Parse before
# taking ``.timestamp()``; fall back to ``time.time()`` only when
# the value is genuinely missing or unparseable.
created = agent.get("created_at") or agent.get("createdAt")
if isinstance(created, str):
try:
created = datetime.fromisoformat(created)
except (ValueError, TypeError):
created = None
created_ts = (
int(created.timestamp()) if hasattr(created, "timestamp")
else int(time.time())

View File

@@ -9,6 +9,7 @@ from jose import jwt
from application.auth import handle_auth
from application.core import log_context
from application.core.logging_config import setup_logging
setup_logging()
@@ -112,6 +113,38 @@ def generate_token():
return jsonify({"error": "Token generation not allowed in current auth mode"}), 400
_LOG_CTX_TOKEN_ATTR = "_log_ctx_token"
@app.before_request
def _bind_log_context():
"""Bind activity_id + endpoint for the duration of this request.
Runs before ``authenticate_request``; ``user_id`` is overlaid in a
follow-up handler once the JWT has been decoded.
"""
if request.method == "OPTIONS":
return None
activity_id = str(uuid.uuid4())
request.activity_id = activity_id
token = log_context.bind(
activity_id=activity_id,
endpoint=request.endpoint,
)
setattr(request, _LOG_CTX_TOKEN_ATTR, token)
return None
@app.teardown_request
def _reset_log_context(_exc):
# SSE streams keep yielding after teardown fires, but a2wsgi runs each
# request inside ``copy_context().run(...)``, so this reset doesn't
# leak into the stream's view of the context.
token = getattr(request, _LOG_CTX_TOKEN_ATTR, None)
if token is not None:
log_context.reset(token)
@app.before_request
def enforce_stt_request_size_limits():
if request.method == "OPTIONS":
@@ -148,12 +181,29 @@ def authenticate_request():
request.decoded_token = decoded_token
@app.before_request
def _bind_user_id_to_log_context():
# Registered after ``authenticate_request`` (Flask runs before_request
# handlers in registration order), so ``request.decoded_token`` is
# populated by the time we read it. ``teardown_request`` unwinds the
# whole request-level bind, so no separate reset token is needed here.
if request.method == "OPTIONS":
return None
decoded_token = getattr(request, "decoded_token", None)
user_id = decoded_token.get("sub") if isinstance(decoded_token, dict) else None
if user_id:
log_context.bind(user_id=user_id)
return None
@app.after_request
def after_request(response: Response) -> Response:
"""Add CORS headers for the pure Flask development entrypoint."""
response.headers["Access-Control-Allow-Origin"] = "*"
response.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization"
response.headers["Access-Control-Allow-Methods"] = "GET, POST, PUT, DELETE, OPTIONS"
response.headers["Access-Control-Allow-Headers"] = (
"Content-Type, Authorization, Idempotency-Key"
)
response.headers["Access-Control-Allow-Methods"] = "GET, POST, PUT, PATCH, DELETE, OPTIONS"
return response

View File

@@ -24,8 +24,13 @@ asgi_app = Starlette(
Middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
allow_headers=["Content-Type", "Authorization", "Mcp-Session-Id"],
allow_methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
allow_headers=[
"Content-Type",
"Authorization",
"Mcp-Session-Id",
"Idempotency-Key",
],
expose_headers=["Mcp-Session-Id"],
),
],

View File

@@ -1,3 +1,4 @@
import hashlib
import json
import logging
import time
@@ -10,6 +11,14 @@ from application.utils import get_hash
logger = logging.getLogger(__name__)
def _cache_default(value):
# Image attachments arrive inline as bytes (see GoogleLLM.prepare_messages_with_attachments);
# hash so the cache key stays bounded in size and stable across identical content.
if isinstance(value, (bytes, bytearray, memoryview)):
return f"<bytes:sha256:{hashlib.sha256(bytes(value)).hexdigest()}>"
return repr(value)
_redis_instance = None
_redis_creation_failed = False
_instance_lock = Lock()
@@ -36,7 +45,7 @@ def get_redis_instance():
def gen_cache_key(messages, model="docgpt", tools=None):
if not all(isinstance(msg, dict) for msg in messages):
raise ValueError("All messages must be dictionaries.")
messages_str = json.dumps(messages)
messages_str = json.dumps(messages, default=_cache_default)
tools_str = json.dumps(str(tools)) if tools else ""
combined = f"{model}_{messages_str}_{tools_str}"
cache_key = get_hash(combined)

View File

@@ -1,8 +1,17 @@
import inspect
import logging
import threading
from celery import Celery
from application.core import log_context
from application.core.settings import settings
from celery.signals import setup_logging, worker_process_init, worker_ready
from celery.signals import (
setup_logging,
task_postrun,
task_prerun,
worker_process_init,
worker_ready,
)
def make_celery(app_name=__name__):
@@ -41,6 +50,54 @@ def _dispose_db_engine_on_fork(*args, **kwargs):
dispose_engine()
# Most tasks in this repo accept ``user`` where the log context wants
# ``user_id``; map task parameter names to context keys explicitly.
_TASK_PARAM_TO_CTX_KEY: dict[str, str] = {
"user": "user_id",
"user_id": "user_id",
"agent_id": "agent_id",
"conversation_id": "conversation_id",
}
_task_log_tokens: dict[str, object] = {}
@task_prerun.connect
def _bind_task_log_context(task_id, task, args, kwargs, **_):
# Resolve task args by parameter name — nearly every task in this repo
# is called positionally, so ``kwargs.get('user')`` would bind nothing.
ctx = {"activity_id": task_id}
try:
sig = inspect.signature(task.run)
bound = sig.bind_partial(*args, **kwargs).arguments
except (TypeError, ValueError):
bound = dict(kwargs)
for param_name, value in bound.items():
ctx_key = _TASK_PARAM_TO_CTX_KEY.get(param_name)
if ctx_key and value:
ctx[ctx_key] = value
_task_log_tokens[task_id] = log_context.bind(**ctx)
@task_postrun.connect
def _unbind_task_log_context(task_id, **_):
# ``task_postrun`` fires on both success and failure. Required for
# Celery: unlike the Flask path, tasks aren't isolated in their own
# ``copy_context().run(...)``, so a missing reset would leak the
# bind onto the next task on the same worker.
token = _task_log_tokens.pop(task_id, None)
if token is None:
return
try:
log_context.reset(token)
except ValueError:
# task_prerun and task_postrun ran on different threads (non-default
# Celery pool); the token isn't valid in this context. Drop it.
logging.getLogger(__name__).debug(
"log_context reset skipped for task %s", task_id
)
@worker_ready.connect
def _run_version_check(*args, **kwargs):
"""Kick off the anonymous version check on worker startup.

View File

@@ -1,7 +1,10 @@
import os
from application.core.settings import settings
broker_url = os.getenv("CELERY_BROKER_URL")
result_backend = os.getenv("CELERY_RESULT_BACKEND")
# Pydantic loads .env into ``settings`` but does not inject values into
# ``os.environ`` — read directly from settings so beat startup (which
# imports this module before any explicit env load) sees a real URL.
broker_url = settings.CELERY_BROKER_URL
result_backend = settings.CELERY_RESULT_BACKEND
task_serializer = 'json'
result_serializer = 'json'
@@ -10,7 +13,21 @@ accept_content = ['json']
# Autodiscover tasks
imports = ('application.api.user.tasks',)
# Project-scoped queue so a stray sibling worker on the same broker
# (other repo, same default ``celery`` queue) can't grab DocsGPT tasks.
task_default_queue = "docsgpt"
task_default_exchange = "docsgpt"
task_default_routing_key = "docsgpt"
beat_scheduler = "redbeat.RedBeatScheduler"
redbeat_redis_url = broker_url
redbeat_key_prefix = "redbeat:docsgpt:"
redbeat_lock_timeout = 90
# Survive worker SIGKILL/OOM without silently dropping in-flight tasks.
task_acks_late = True
task_reject_on_worker_lost = True
worker_prefetch_multiplier = settings.CELERY_WORKER_PREFETCH_MULTIPLIER
broker_transport_options = {"visibility_timeout": settings.CELERY_VISIBILITY_TIMEOUT}
result_expires = 86400 * 7
task_track_started = True

View File

@@ -0,0 +1,57 @@
"""Per-activity logging context backed by ``contextvars``.
The ``_ContextFilter`` installed by ``logging_config.setup_logging`` stamps
every ``LogRecord`` emitted inside a ``bind`` block with the bound keys, so
they land as first-class attributes on the OTLP log export rather than being
buried inside formatted message bodies.
A single ``ContextVar`` holds a dict so nested binds reset atomically (LIFO)
via the token returned by ``bind``.
"""
from __future__ import annotations
from contextvars import ContextVar, Token
from typing import Mapping
_CTX_KEYS: frozenset[str] = frozenset(
{
"activity_id",
"parent_activity_id",
"user_id",
"agent_id",
"conversation_id",
"endpoint",
"model",
}
)
_ctx: ContextVar[Mapping[str, str]] = ContextVar("log_ctx", default={})
def bind(**kwargs: object) -> Token:
"""Overlay the given keys onto the current context.
Returns a ``Token`` so the caller can ``reset`` in a ``finally`` block.
Keys outside :data:`_CTX_KEYS` are silently dropped (so a typo can't
stamp a stray field name onto every record), as are ``None`` values
(a missing attribute is more useful than the literal string ``"None"``).
"""
overlay = {
k: str(v)
for k, v in kwargs.items()
if k in _CTX_KEYS and v is not None
}
new = {**_ctx.get(), **overlay}
return _ctx.set(new)
def reset(token: Token) -> None:
"""Restore the context to the snapshot captured by the matching ``bind``."""
_ctx.reset(token)
def snapshot() -> Mapping[str, str]:
"""Return the current context dict. Treat as read-only; use :func:`bind`."""
return _ctx.get()

View File

@@ -2,6 +2,36 @@ import logging
import os
from logging.config import dictConfig
from application.core.log_context import snapshot as _ctx_snapshot
# Loggers with ``propagate=False`` don't share root's handlers, so the
# context filter has to be installed on their handlers directly.
_NON_PROPAGATING_LOGGERS: tuple[str, ...] = (
"uvicorn",
"uvicorn.access",
"uvicorn.error",
"celery.app.trace",
"celery.worker.strategy",
"gunicorn.error",
"gunicorn.access",
)
class _ContextFilter(logging.Filter):
"""Stamp the current ``log_context`` snapshot onto every ``LogRecord``.
Must be installed on **handlers**, not loggers: Python skips logger-level
filters when a child logger's record propagates up. The ``hasattr`` guard
keeps an explicit ``logger.info(..., extra={...})`` from being overwritten.
"""
def filter(self, record: logging.LogRecord) -> bool:
for key, value in _ctx_snapshot().items():
if not hasattr(record, key):
setattr(record, key, value)
return True
def _otlp_logs_enabled() -> bool:
"""Return True when the user has opted in to OTLP log export.
@@ -60,3 +90,23 @@ def setup_logging() -> None:
for handler in preserved_handlers:
if handler not in root.handlers:
root.addHandler(handler)
_install_context_filter()
def _install_context_filter() -> None:
"""Attach :class:`_ContextFilter` to root's handlers + every handler on
the known non-propagating loggers. Skipping handlers that already carry
one keeps repeat ``setup_logging`` calls from stacking filters.
"""
def _has_ctx_filter(handler: logging.Handler) -> bool:
return any(isinstance(f, _ContextFilter) for f in handler.filters)
for handler in logging.getLogger().handlers:
if not _has_ctx_filter(handler):
handler.addFilter(_ContextFilter())
for name in _NON_PROPAGATING_LOGGERS:
for handler in logging.getLogger(name).handlers:
if not _has_ctx_filter(handler):
handler.addFilter(_ContextFilter())

View File

@@ -4,17 +4,28 @@ Loads model catalogs from YAML files (built-in + operator-supplied),
groups them by provider name, then for each registered provider plugin
calls ``get_models`` to produce the final per-provider model list.
The ``user_id`` parameter on lookup methods is reserved for the future
end-user BYOM (per-user model records in Postgres). It is currently
ignored — defaulted to ``None`` everywhere — so call sites can be
threaded through without a wide refactor when BYOM lands.
End-user BYOM (per-user model records in Postgres) is layered on top:
when a lookup arrives with a ``user_id``, the registry consults a
per-user cache first (loaded from the ``user_custom_models`` table on
miss) and falls through to the built-in catalog.
Cross-process invalidation: ``ModelRegistry`` is a per-process
singleton, so a CRUD write only evicts the cache in the process that
served it. Other gunicorn workers and Celery workers would otherwise
keep using a deleted/disabled/key-rotated BYOM record indefinitely.
``invalidate_user`` therefore both drops the local layer *and* bumps a
Redis-side version counter; other processes notice the bump on their
next access (after the local TTL window) and reload from Postgres. If
Redis is unreachable the per-process TTL still bounds staleness — pure
TTL semantics, no regression.
"""
from __future__ import annotations
import logging
import time
from collections import defaultdict
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Tuple
from application.core.model_settings import AvailableModel
from application.core.model_yaml import (
@@ -25,6 +36,9 @@ from application.core.model_yaml import (
logger = logging.getLogger(__name__)
_USER_CACHE_TTL_SECONDS = 60.0
_USER_VERSION_KEY_PREFIX = "byom:registry_version:"
class ModelRegistry:
"""Singleton registry of available models."""
@@ -41,6 +55,18 @@ class ModelRegistry:
if not ModelRegistry._initialized:
self.models: Dict[str, AvailableModel] = {}
self.default_model_id: Optional[str] = None
# Per-user BYOM cache. Each entry is
# ``(layer, version_at_load, loaded_at_monotonic)``:
# * ``layer`` — {model_id: AvailableModel}
# * ``version_at_load`` — Redis-side counter snapshot at
# reload time, or ``None`` if Redis was unreachable
# * ``loaded_at_monotonic`` — for TTL bookkeeping
# Populated lazily, evicted by TTL + cross-process
# invalidation (see ``invalidate_user``).
self._user_models: Dict[
str,
Tuple[Dict[str, AvailableModel], Optional[int], float],
] = {}
self._load_models()
ModelRegistry._initialized = True
@@ -54,6 +80,59 @@ class ModelRegistry:
cls._instance = None
cls._initialized = False
@classmethod
def invalidate_user(cls, user_id: str) -> None:
"""Drop the cached per-user model layer for ``user_id``.
Called by the BYOM REST routes after every create/update/delete.
Two effects:
* Local: pop the entry from this process's cache so the next
lookup re-reads from Postgres immediately.
* Cross-process: ``INCR`` a Redis-side version counter for this
user. Other gunicorn/Celery processes notice the counter
changed on their next TTL-driven recheck (see
``_user_models_for``) and reload. If Redis is unreachable we
log and continue — local invalidation still happened, and
peers fall back to TTL-only staleness bounds.
"""
if cls._instance is not None:
cls._instance._user_models.pop(user_id, None)
try:
from application.cache import get_redis_instance
client = get_redis_instance()
if client is not None:
client.incr(_USER_VERSION_KEY_PREFIX + user_id)
except Exception as e:
logger.warning(
"BYOM invalidate: failed to publish version bump for "
"user %s (Redis unreachable?): %s",
user_id,
e,
)
@classmethod
def _read_user_version(cls, user_id: str) -> Optional[int]:
"""Return the Redis-side invalidation counter for ``user_id``.
``0`` if the key has never been bumped; ``None`` if Redis is
unreachable or the read failed (callers fall back to TTL-only
staleness in that case).
"""
try:
from application.cache import get_redis_instance
client = get_redis_instance()
if client is None:
return None
raw = client.get(_USER_VERSION_KEY_PREFIX + user_id)
if raw is None:
return 0
return int(raw)
except Exception:
return None
def _load_models(self) -> None:
from pathlib import Path
@@ -137,28 +216,170 @@ class ModelRegistry:
return []
return [name.strip() for name in llm_name.split(",") if name.strip()]
# ------------------------------------------------------------------
# Lookup API. ``user_id`` is reserved for the future BYOM and
# is ignored today — but threading it through every call site now
# means BYOM doesn't require a wide refactor when we build it.
# ------------------------------------------------------------------
# Per-user (BYOM) layer
def _user_models_for(self, user_id: str) -> Dict[str, AvailableModel]:
"""Return the user's BYOM models keyed by registry id (UUID).
Loaded lazily from Postgres on first access; cached subject to
a per-process TTL (``_USER_CACHE_TTL_SECONDS``) and a Redis-
backed version counter for cross-process invalidation. The TTL
bounds staleness even when Redis is unreachable, while the
version stamp lets peers refresh without a DB read on the
common case (no invalidation since last load). Decryption
failures and DB errors yield an empty layer (logged) — the
user simply doesn't see their custom models on this request,
never a 500.
"""
cached = self._user_models.get(user_id)
now = time.monotonic()
if cached is not None:
layer, cached_version, loaded_at = cached
if (now - loaded_at) < _USER_CACHE_TTL_SECONDS:
return layer
# TTL elapsed: peek at the cross-process counter. If it
# matches what we saw at load time, no invalidation has
# happened — extend the TTL without touching Postgres. If
# Redis is unreachable (``current_version is None``) we
# fall through to a real reload, which keeps staleness
# bounded to the TTL.
current_version = self._read_user_version(user_id)
if (
current_version is not None
and cached_version is not None
and current_version == cached_version
):
self._user_models[user_id] = (layer, cached_version, now)
return layer
# Capture the counter *before* the DB read so a CRUD that lands
# mid-reload doesn't get masked: the next access will see a
# newer version and reload again.
version_before_read = self._read_user_version(user_id)
layer: Dict[str, AvailableModel] = {}
try:
from application.core.model_settings import (
ModelCapabilities,
ModelProvider,
)
from application.storage.db.repositories.user_custom_models import (
UserCustomModelsRepository,
)
from application.storage.db.session import db_readonly
with db_readonly() as conn:
repo = UserCustomModelsRepository(conn)
rows = repo.list_for_user(user_id)
for row in rows:
api_key = repo._decrypt_api_key(
row.get("api_key_encrypted", ""), user_id
)
if not api_key:
# SECURITY: do NOT register an unroutable BYOM
# record. If we did, LLMCreator would fall back
# to the caller-passed api_key (settings.API_KEY
# for openai_compatible) and POST it to the
# user-supplied base_url — leaking the instance
# credential to the user's chosen endpoint.
# Most likely cause is ENCRYPTION_SECRET_KEY
# having rotated; user must re-save the model.
logger.warning(
"user_custom_models: skipping model %s for "
"user %s — api_key could not be decrypted "
"(rotated ENCRYPTION_SECRET_KEY?). Re-save "
"the model to recover.",
row.get("id"),
user_id,
)
continue
caps_raw = row.get("capabilities") or {}
# Stored attachments may be aliases (``image``) or
# raw MIME types. Built-in YAML models expand at
# load time; mirror that here so downstream MIME-
# type comparisons (handlers/base.prepare_messages)
# match concrete types like ``image/png`` rather
# than the bare alias.
from application.core.model_yaml import (
expand_attachments_lenient,
)
raw_attachments = caps_raw.get("attachments", []) or []
expanded_attachments = expand_attachments_lenient(
raw_attachments,
f"user_custom_models[user={user_id}, model={row.get('id')}]",
)
caps = ModelCapabilities(
supports_tools=bool(caps_raw.get("supports_tools", False)),
supports_structured_output=bool(
caps_raw.get("supports_structured_output", False)
),
supports_streaming=bool(
caps_raw.get("supports_streaming", True)
),
supported_attachment_types=expanded_attachments,
context_window=int(
caps_raw.get("context_window") or 128000
),
)
model_id = str(row["id"])
layer[model_id] = AvailableModel(
id=model_id,
provider=ModelProvider.OPENAI_COMPATIBLE,
display_name=row["display_name"],
description=row.get("description") or "",
capabilities=caps,
enabled=bool(row.get("enabled", True)),
base_url=row["base_url"],
upstream_model_id=row["upstream_model_id"],
source="user",
api_key=api_key,
)
except Exception as e:
logger.warning(
"user_custom_models: failed to load layer for user %s: %s",
user_id,
e,
)
layer = {}
self._user_models[user_id] = (layer, version_before_read, now)
return layer
# Lookup API. ``user_id`` enables the BYOM per-user layer; without
# it, callers see only the built-in + operator catalog.
def get_model(
self, model_id: str, user_id: Optional[str] = None
) -> Optional[AvailableModel]:
if user_id:
user_layer = self._user_models_for(user_id)
if model_id in user_layer:
return user_layer[model_id]
return self.models.get(model_id)
def get_all_models(
self, user_id: Optional[str] = None
) -> List[AvailableModel]:
return list(self.models.values())
out = list(self.models.values())
if user_id:
out.extend(self._user_models_for(user_id).values())
return out
def get_enabled_models(
self, user_id: Optional[str] = None
) -> List[AvailableModel]:
return [m for m in self.models.values() if m.enabled]
out = [m for m in self.models.values() if m.enabled]
if user_id:
out.extend(
m for m in self._user_models_for(user_id).values() if m.enabled
)
return out
def model_exists(
self, model_id: str, user_id: Optional[str] = None
) -> bool:
if user_id and model_id in self._user_models_for(user_id):
return True
return model_id in self.models

View File

@@ -48,14 +48,15 @@ class AvailableModel:
capabilities: ModelCapabilities = field(default_factory=ModelCapabilities)
enabled: bool = True
base_url: Optional[str] = None
# User-facing label distinct from the dispatch ``provider``. Used by
# openai_compatible YAMLs so a Mistral model shows "mistral" in the
# API response while still routing through the OpenAI wire format.
# User-facing label distinct from dispatch provider (e.g. mistral
# routed through openai_compatible).
display_provider: Optional[str] = None
# Per-record API key. Operator YAMLs leave this None; populated for
# openai_compatible models (resolved from the YAML's ``api_key_env``)
# and reserved for the future end-user BYOM phase. Never serialized
# into to_dict().
# Sent in the API call's ``model`` field; falls back to ``self.id``
# for built-ins where id IS the upstream name.
upstream_model_id: Optional[str] = None
# "builtin" for catalog YAMLs, "user" for BYOM records.
source: str = "builtin"
# Decrypted/resolved at registry-merge time. Never serialized.
api_key: Optional[str] = field(default=None, repr=False, compare=False)
def to_dict(self) -> Dict:
@@ -70,6 +71,7 @@ class AvailableModel:
"supports_streaming": self.capabilities.supports_streaming,
"context_window": self.capabilities.context_window,
"enabled": self.enabled,
"source": self.source,
}
if self.base_url:
result["base_url"] = self.base_url

View File

@@ -20,22 +20,40 @@ def get_api_key_for_provider(provider: str) -> Optional[str]:
return settings.API_KEY
def get_all_available_models() -> Dict[str, Dict[str, Any]]:
"""Get all available models with metadata for API response"""
def get_all_available_models(
user_id: Optional[str] = None,
) -> Dict[str, Dict[str, Any]]:
"""Get all available models with metadata for API response.
When ``user_id`` is supplied, the user's BYOM custom-model records
are merged into the result alongside the built-in catalog.
"""
registry = ModelRegistry.get_instance()
return {model.id: model.to_dict() for model in registry.get_enabled_models()}
return {
model.id: model.to_dict()
for model in registry.get_enabled_models(user_id=user_id)
}
def validate_model_id(model_id: str) -> bool:
"""Check if a model ID exists in registry"""
def validate_model_id(model_id: str, user_id: Optional[str] = None) -> bool:
"""Check if a model ID exists in registry.
``user_id`` enables resolution of per-user BYOM records (UUIDs).
Without it, only built-in catalog ids resolve.
"""
registry = ModelRegistry.get_instance()
return registry.model_exists(model_id)
return registry.model_exists(model_id, user_id=user_id)
def get_model_capabilities(model_id: str) -> Optional[Dict[str, Any]]:
"""Get capabilities for a specific model"""
def get_model_capabilities(
model_id: str, user_id: Optional[str] = None
) -> Optional[Dict[str, Any]]:
"""Get capabilities for a specific model.
``user_id`` enables resolution of per-user BYOM records.
"""
registry = ModelRegistry.get_instance()
model = registry.get_model(model_id)
model = registry.get_model(model_id, user_id=user_id)
if model:
return {
"supported_attachment_types": model.capabilities.supported_attachment_types,
@@ -52,52 +70,66 @@ def get_default_model_id() -> str:
return registry.default_model_id
def get_provider_from_model_id(model_id: str) -> Optional[str]:
"""Get the provider name for a given model_id"""
def get_provider_from_model_id(
model_id: str, user_id: Optional[str] = None
) -> Optional[str]:
"""Get the provider name for a given model_id.
``user_id`` enables resolution of per-user BYOM records (UUIDs).
Without it, BYOM model ids return ``None`` and the caller falls
back to the deployment default.
"""
registry = ModelRegistry.get_instance()
model = registry.get_model(model_id)
model = registry.get_model(model_id, user_id=user_id)
if model:
return model.provider.value
return None
def get_token_limit(model_id: str) -> int:
"""
Get context window (token limit) for a model.
Returns model's context_window or default 128000 if model not found.
def get_token_limit(model_id: str, user_id: Optional[str] = None) -> int:
"""Get context window (token limit) for a model.
Returns the model's ``context_window`` or ``DEFAULT_LLM_TOKEN_LIMIT``
if not found. ``user_id`` enables resolution of per-user BYOM records.
"""
from application.core.settings import settings
registry = ModelRegistry.get_instance()
model = registry.get_model(model_id)
model = registry.get_model(model_id, user_id=user_id)
if model:
return model.capabilities.context_window
return settings.DEFAULT_LLM_TOKEN_LIMIT
def get_base_url_for_model(model_id: str) -> Optional[str]:
"""
Get the custom base_url for a specific model if configured.
Returns None if no custom base_url is set.
def get_base_url_for_model(
model_id: str, user_id: Optional[str] = None
) -> Optional[str]:
"""Get the custom base_url for a specific model if configured.
Returns ``None`` if no custom base_url is set. ``user_id`` enables
resolution of per-user BYOM records.
"""
registry = ModelRegistry.get_instance()
model = registry.get_model(model_id)
model = registry.get_model(model_id, user_id=user_id)
if model:
return model.base_url
return None
def get_api_key_for_model(model_id: str) -> Optional[str]:
"""
Resolve the API key to use when invoking ``model_id``.
def get_api_key_for_model(
model_id: str, user_id: Optional[str] = None
) -> Optional[str]:
"""Resolve the API key to use when invoking ``model_id``.
Priority:
1. The model record's own ``api_key`` (reserved for future end-user
BYOM where credentials travel with the record).
1. The model record's own ``api_key`` (BYOM records and
``openai_compatible`` YAMLs populate this).
2. The provider plugin's settings-based key.
``user_id`` enables resolution of per-user BYOM records.
"""
registry = ModelRegistry.get_instance()
model = registry.get_model(model_id)
model = registry.get_model(model_id, user_id=user_id)
if model is not None and model.api_key:
return model.api_key
if model is not None:

View File

@@ -281,6 +281,39 @@ def resolve_attachment_alias(alias: str) -> List[str]:
return list(aliases[alias])
def expand_attachments_lenient(
attachments: Sequence[str], source: str
) -> List[str]:
"""Expand attachment aliases to MIME types, tolerating unknowns.
Mirrors ``_expand_attachments`` but logs+skips unknown aliases
rather than raising. Used for runtime call sites (BYOM registry
load) where an operator-side alias-map edit must not drop the
entire user's BYOM layer; the strict raise still happens at the
API validation boundary.
"""
aliases = builtin_attachment_aliases()
expanded: List[str] = []
seen: set = set()
for entry in attachments:
if "/" in entry:
if entry not in seen:
expanded.append(entry)
seen.add(entry)
continue
mime_list = aliases.get(entry)
if mime_list is None:
logger.warning(
"%s: skipping unknown attachment alias %r", source, entry,
)
continue
for mime in mime_list:
if mime not in seen:
expanded.append(mime)
seen.add(mime)
return expanded
def load_model_yamls(directories: Sequence[Path]) -> List[ProviderCatalog]:
"""Load every ``*.yaml`` file (excluding ``_defaults.yaml``) under each
directory in order and return a flat list of catalogs.

View File

@@ -30,6 +30,12 @@ class Settings(BaseSettings):
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
# Prefetch=1 caps SIGKILL loss to one task. Visibility timeout must exceed
# the longest legitimate task runtime (ingest, agent webhook) but stay
# short enough that SIGKILLed tasks redeliver promptly. 1h matches Onyx
# and Dify defaults; long ingests can override via env.
CELERY_WORKER_PREFETCH_MULTIPLIER: int = 1
CELERY_VISIBILITY_TIMEOUT: int = 3600
# Only consulted when VECTOR_STORE=mongodb or when running scripts/db/backfill.py; user data lives in Postgres.
MONGO_URI: Optional[str] = None
# User-data Postgres DB.

View File

@@ -11,6 +11,7 @@ logger = logging.getLogger(__name__)
class AnthropicLLM(BaseLLM):
provider_name = "anthropic"
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):

View File

@@ -1,5 +1,6 @@
import logging
from abc import ABC, abstractmethod
from typing import ClassVar
from application.cache import gen_cache, stream_cache
@@ -10,6 +11,10 @@ logger = logging.getLogger(__name__)
class BaseLLM(ABC):
# Stamped onto the ``llm_stream_start`` event so dashboards can group
# calls by vendor. Subclasses override.
provider_name: ClassVar[str] = "unknown"
def __init__(
self,
decoded_token=None,
@@ -17,6 +22,8 @@ class BaseLLM(ABC):
model_id=None,
base_url=None,
backup_models=None,
model_user_id=None,
capabilities=None,
):
self.decoded_token = decoded_token
self.agent_id = str(agent_id) if agent_id else None
@@ -25,6 +32,12 @@ class BaseLLM(ABC):
self.token_usage = {"prompt_tokens": 0, "generated_tokens": 0}
self._backup_models = backup_models or []
self._fallback_llm = None
# Registry-resolved per-model capability overrides (BYOM caps,
# operator YAML). None falls back to provider-class defaults.
self.capabilities = capabilities
# BYOM-resolution scope captured at LLM creation time so backup
# / fallback lookups hit the same per-user layer as the primary.
self.model_user_id = model_user_id
@property
def fallback_llm(self):
@@ -39,10 +52,19 @@ class BaseLLM(ABC):
get_api_key_for_provider,
)
# Try per-agent backup models first
# model_user_id (BYOM scope) takes precedence over the caller's
# sub so shared-agent backups resolve under the owner's layer.
caller_sub = (
self.decoded_token.get("sub")
if isinstance(self.decoded_token, dict)
else None
)
backup_user_id = self.model_user_id or caller_sub
for backup_model_id in self._backup_models:
try:
provider = get_provider_from_model_id(backup_model_id)
provider = get_provider_from_model_id(
backup_model_id, user_id=backup_user_id
)
if not provider:
logger.warning(
f"Could not resolve provider for backup model: {backup_model_id}"
@@ -56,6 +78,15 @@ class BaseLLM(ABC):
decoded_token=self.decoded_token,
model_id=backup_model_id,
agent_id=self.agent_id,
model_user_id=self.model_user_id,
)
# Tag the fallback LLM so its rows land as
# ``source='fallback'`` in cost-attribution dashboards.
# Propagate the parent's ``_request_id`` so a user
# request that ran fallback is still grouped under one id.
self._fallback_llm._token_usage_source = "fallback"
self._fallback_llm._request_id = getattr(
self, "_request_id", None,
)
logger.info(
f"Fallback LLM initialized from agent backup model: "
@@ -68,7 +99,10 @@ class BaseLLM(ABC):
)
continue
# Fall back to global FALLBACK_* settings
# Fall back to global FALLBACK_* settings. Forward
# ``model_user_id`` here too: deployments can configure
# ``FALLBACK_LLM_NAME`` to a BYOM UUID, and that UUID is owned
# by the same user the primary model was resolved under.
if settings.FALLBACK_LLM_PROVIDER:
try:
self._fallback_llm = LLMCreator.create_llm(
@@ -78,6 +112,12 @@ class BaseLLM(ABC):
decoded_token=self.decoded_token,
model_id=settings.FALLBACK_LLM_NAME,
agent_id=self.agent_id,
model_user_id=self.model_user_id,
)
# Same rationale as the agent-backup branch.
self._fallback_llm._token_usage_source = "fallback"
self._fallback_llm._request_id = getattr(
self, "_request_id", None,
)
logger.info(
f"Fallback LLM initialized from global settings: "
@@ -96,6 +136,26 @@ class BaseLLM(ABC):
return args_dict
return {k: v for k, v in args_dict.items() if v is not None}
@staticmethod
def _is_non_retriable_client_error(exc: BaseException) -> bool:
"""4xx errors mean the request itself is malformed — retrying with
a different model fails identically and doubles the work. Only
transient/5xx/connection errors should trigger fallback."""
try:
from google.genai.errors import ClientError as _GenaiClientError
if isinstance(exc, _GenaiClientError):
return True
except ImportError:
pass
for attr in ("status_code", "code", "http_status"):
v = getattr(exc, attr, None)
if isinstance(v, int) and 400 <= v < 500:
return True
resp = getattr(exc, "response", None)
v = getattr(resp, "status_code", None)
return isinstance(v, int) and 400 <= v < 500
def _execute_with_fallback(
self, method_name: str, decorators: list, *args, **kwargs
):
@@ -119,12 +179,18 @@ class BaseLLM(ABC):
if is_stream:
return self._stream_with_fallback(
decorated_method, method_name, *args, **kwargs
decorated_method, method_name, decorators, *args, **kwargs
)
try:
return decorated_method()
except Exception as e:
if self._is_non_retriable_client_error(e):
logger.error(
f"Primary LLM failed with non-retriable client error; "
f"skipping fallback: {str(e)}"
)
raise
if not self.fallback_llm:
logger.error(f"Primary LLM failed and no fallback configured: {str(e)}")
raise
@@ -134,14 +200,27 @@ class BaseLLM(ABC):
f"{fallback.model_id}. Error: {str(e)}"
)
fallback_method = getattr(
fallback, method_name.replace("_raw_", "")
)
# Apply decorators to fallback's raw method directly — calling
# fallback.gen() would re-enter the orchestrator and recurse via
# fallback.fallback_llm.
fallback_method = getattr(fallback, method_name)
for decorator in decorators:
fallback_method = decorator(fallback_method)
fallback_kwargs = {**kwargs, "model": fallback.model_id}
return fallback_method(*args, **fallback_kwargs)
try:
return fallback_method(fallback, *args, **fallback_kwargs)
except Exception as e2:
if self._is_non_retriable_client_error(e2):
logger.error(
f"Fallback LLM failed with non-retriable client "
f"error; giving up: {str(e2)}"
)
else:
logger.error(f"Fallback LLM also failed; giving up: {str(e2)}")
raise
def _stream_with_fallback(
self, decorated_method, method_name, *args, **kwargs
self, decorated_method, method_name, decorators, *args, **kwargs
):
"""
Wrapper generator that catches mid-stream errors and falls back.
@@ -154,6 +233,12 @@ class BaseLLM(ABC):
try:
yield from decorated_method()
except Exception as e:
if self._is_non_retriable_client_error(e):
logger.error(
f"Primary LLM failed mid-stream with non-retriable client "
f"error; skipping fallback: {str(e)}"
)
raise
if not self.fallback_llm:
logger.error(
f"Primary LLM failed and no fallback configured: {str(e)}"
@@ -164,11 +249,37 @@ class BaseLLM(ABC):
f"Primary LLM failed mid-stream. Falling back to "
f"{fallback.model_id}. Error: {str(e)}"
)
fallback_method = getattr(
fallback, method_name.replace("_raw_", "")
# Apply decorators to fallback's raw stream method directly —
# calling fallback.gen_stream() would re-enter the orchestrator
# and recurse via fallback.fallback_llm. Emit the stream-start
# event manually so dashboards still see the fallback's
# provider/model when the response actually comes from it.
fallback._emit_stream_start_log(
fallback.model_id,
kwargs.get("messages"),
kwargs.get("tools"),
bool(
kwargs.get("_usage_attachments")
or kwargs.get("attachments")
),
)
fallback_method = getattr(fallback, method_name)
for decorator in decorators:
fallback_method = decorator(fallback_method)
fallback_kwargs = {**kwargs, "model": fallback.model_id}
yield from fallback_method(*args, **fallback_kwargs)
try:
yield from fallback_method(fallback, *args, **fallback_kwargs)
except Exception as e2:
if self._is_non_retriable_client_error(e2):
logger.error(
f"Fallback LLM failed mid-stream with non-retriable "
f"client error; giving up: {str(e2)}"
)
else:
logger.error(
f"Fallback LLM also failed mid-stream; giving up: {str(e2)}"
)
raise
def gen(self, model, messages, stream=False, tools=None, *args, **kwargs):
decorators = [gen_token_usage, gen_cache]
@@ -183,7 +294,58 @@ class BaseLLM(ABC):
**kwargs,
)
def _emit_stream_start_log(self, model, messages, tools, has_attachments):
# Stamped with ``self.provider_name`` so dashboards can group calls
# by vendor; the fallback path emits its own copy on the fallback
# instance so the actual responding provider is recorded.
logging.info(
"llm_stream_start",
extra={
"model": model,
"provider": self.provider_name,
"message_count": len(messages) if messages is not None else 0,
"has_attachments": bool(has_attachments),
"has_tools": bool(tools),
},
)
def _emit_stream_finished_log(
self,
model,
*,
prompt_tokens,
completion_tokens,
latency_ms,
cached_tokens=None,
error=None,
):
# Paired with ``llm_stream_start`` so cost dashboards can sum tokens
# by user/agent/provider. Token counts are client-side estimates
# from ``stream_token_usage``; vendor-reported counts (incl.
# ``cached_tokens`` for prompt caching) require per-provider
# extraction in each ``_raw_gen_stream`` and aren't wired yet.
extra = {
"model": model,
"provider": self.provider_name,
"prompt_tokens": int(prompt_tokens),
"completion_tokens": int(completion_tokens),
"latency_ms": int(latency_ms),
"status": "error" if error is not None else "ok",
}
if cached_tokens is not None:
extra["cached_tokens"] = int(cached_tokens)
if error is not None:
extra["error_class"] = type(error).__name__
logging.info("llm_stream_finished", extra=extra)
def gen_stream(self, model, messages, stream=True, tools=None, *args, **kwargs):
# Attachments arrive as ``_usage_attachments`` from ``Agent._llm_gen``;
# the ``stream_token_usage`` decorator pops that key, but the log
# fires before the decorator runs so it's still in ``kwargs`` here.
has_attachments = bool(
kwargs.get("_usage_attachments") or kwargs.get("attachments")
)
self._emit_stream_start_log(model, messages, tools, has_attachments)
decorators = [stream_cache, stream_token_usage]
return self._execute_with_fallback(
"_raw_gen_stream",

View File

@@ -6,6 +6,8 @@ DOCSGPT_BASE_URL = "https://oai.arc53.com"
DOCSGPT_MODEL = "docsgpt"
class DocsGPTAPILLM(OpenAILLM):
provider_name = "docsgpt"
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
super().__init__(
api_key=DOCSGPT_API_KEY,

View File

@@ -6,10 +6,13 @@ from google.genai import types
from application.core.settings import settings
from application.llm.base import BaseLLM
from application.llm.handlers.google import _decode_thought_signature
from application.storage.storage_creator import StorageCreator
class GoogleLLM(BaseLLM):
provider_name = "google"
def __init__(
self, api_key=None, user_api_key=None, decoded_token=None, *args, **kwargs
):
@@ -79,24 +82,39 @@ class GoogleLLM(BaseLLM):
for attachment in attachments:
mime_type = attachment.get("mime_type")
if mime_type in self.get_supported_attachment_types():
try:
if mime_type not in self.get_supported_attachment_types():
continue
try:
# Images go inline as bytes per Google's guidance for
# requests under 20MB; the Files API can return before
# the upload reaches ACTIVE state and yield an empty URI.
if mime_type.startswith("image/"):
file_bytes = self._read_attachment_bytes(attachment)
files.append(
{"file_bytes": file_bytes, "mime_type": mime_type}
)
else:
file_uri = self._upload_file_to_google(attachment)
if not file_uri:
raise ValueError(
f"Google Files API returned empty URI for "
f"{attachment.get('path', 'unknown')}"
)
logging.info(
f"GoogleLLM: Successfully uploaded file, got URI: {file_uri}"
)
files.append({"file_uri": file_uri, "mime_type": mime_type})
except Exception as e:
logging.error(
f"GoogleLLM: Error uploading file: {e}", exc_info=True
except Exception as e:
logging.error(
f"GoogleLLM: Error processing attachment: {e}", exc_info=True
)
if "content" in attachment:
prepared_messages[user_message_index]["content"].append(
{
"type": "text",
"text": f"[File could not be processed: {attachment.get('path', 'unknown')}]",
}
)
if "content" in attachment:
prepared_messages[user_message_index]["content"].append(
{
"type": "text",
"text": f"[File could not be processed: {attachment.get('path', 'unknown')}]",
}
)
if files:
logging.info(f"GoogleLLM: Adding {len(files)} files to message")
prepared_messages[user_message_index]["content"].append({"files": files})
@@ -112,7 +130,9 @@ class GoogleLLM(BaseLLM):
Returns:
str: Google AI file URI for the uploaded file.
"""
if "google_file_uri" in attachment:
# Truthy check, not membership: a poisoned cache row of "" or
# None must be treated as a miss and trigger a fresh upload.
if attachment.get("google_file_uri"):
return attachment["google_file_uri"]
file_path = attachment.get("path")
if not file_path:
@@ -126,6 +146,10 @@ class GoogleLLM(BaseLLM):
file=local_path
).uri,
)
if not file_uri:
raise ValueError(
f"Google Files API upload returned empty URI for {file_path}"
)
# Cache the Google file URI on the attachment row so we don't
# re-upload on the next LLM call. Accept either a PG UUID
@@ -159,6 +183,26 @@ class GoogleLLM(BaseLLM):
logging.error(f"Error uploading file to Google AI: {e}", exc_info=True)
raise
def _read_attachment_bytes(self, attachment):
"""
Read attachment bytes from storage for inline transmission.
Args:
attachment (dict): Attachment dictionary with path and metadata.
Returns:
bytes: Raw file bytes.
"""
file_path = attachment.get("path")
if not file_path:
raise ValueError("No file path provided in attachment")
if not self.storage.file_exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")
return self.storage.process_file(
file_path,
lambda local_path, **kwargs: open(local_path, "rb").read(),
)
def _clean_messages_google(self, messages):
"""
Convert OpenAI format messages to Google AI format and collect system prompts.
@@ -215,7 +259,7 @@ class GoogleLLM(BaseLLM):
except (_json.JSONDecodeError, TypeError):
args = {}
cleaned_args = self._remove_null_values(args)
thought_sig = tc.get("thought_signature")
thought_sig = _decode_thought_signature(tc.get("thought_signature"))
if thought_sig:
parts.append(
types.Part(
@@ -279,7 +323,9 @@ class GoogleLLM(BaseLLM):
name=item["function_call"]["name"],
args=cleaned_args,
),
thoughtSignature=item["thought_signature"],
thoughtSignature=_decode_thought_signature(
item["thought_signature"]
),
)
)
else:
@@ -298,12 +344,24 @@ class GoogleLLM(BaseLLM):
)
elif "files" in item:
for file_data in item["files"]:
parts.append(
types.Part.from_uri(
file_uri=file_data["file_uri"],
mime_type=file_data["mime_type"],
if "file_bytes" in file_data:
parts.append(
types.Part.from_bytes(
data=file_data["file_bytes"],
mime_type=file_data["mime_type"],
)
)
elif file_data.get("file_uri"):
parts.append(
types.Part.from_uri(
file_uri=file_data["file_uri"],
mime_type=file_data["mime_type"],
)
)
else:
logging.warning(
"GoogleLLM: dropping file part with empty URI and no bytes"
)
)
else:
raise ValueError(
f"Unexpected content dictionary format:{item}"
@@ -541,22 +599,6 @@ class GoogleLLM(BaseLLM):
config.response_mime_type = "application/json"
# Check if we have both tools and file attachments
has_attachments = False
for message in messages:
for part in message.parts:
if hasattr(part, "file_data") and part.file_data is not None:
has_attachments = True
break
if has_attachments:
break
messages_summary = self._summarize_messages_for_log(messages)
logging.info(
"GoogleLLM: Starting stream generation. Model: %s, Messages: %s, Has attachments: %s",
model,
messages_summary,
has_attachments,
)
response = client.models.generate_content_stream(
model=model,
contents=messages,

View File

@@ -5,6 +5,8 @@ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
class GroqLLM(OpenAILLM):
provider_name = "groq"
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
super().__init__(
api_key=api_key or settings.GROQ_API_KEY or settings.API_KEY,

View File

@@ -10,6 +10,18 @@ from application.logging import build_stack_data
logger = logging.getLogger(__name__)
# Cap the agent tool-call loop. Without this an LLM that keeps
# requesting more tool calls (preview models, sparse tool results,
# under-specified prompts) can chain searches indefinitely and the
# stream never finalises. 25 mirrors Dify's default.
MAX_TOOL_ITERATIONS = 25
_FINALIZE_INSTRUCTION = (
f"You have made {MAX_TOOL_ITERATIONS} tool calls. Provide a final "
"response to the user based on what you have, without making any "
"additional tool calls."
)
@dataclass
class ToolCall:
"""Represents a tool/function call from the LLM."""
@@ -280,7 +292,26 @@ class LLMHandler(ABC):
# Keep serialized function calls/responses so the compressor sees actions
parts_text.append(str(item))
elif "files" in item:
parts_text.append(str(item))
# Image attachments arrive with raw bytes / base64
# inline (see GoogleLLM.prepare_messages_with_attachments).
# ``str(item)`` would dump the whole byte/base64
# blob into the compression prompt and bust the
# compression LLM's input limit.
files = item.get("files") or []
descriptors = []
if isinstance(files, list):
for f in files:
if isinstance(f, dict):
descriptors.append(
f.get("mime_type") or "file"
)
elif isinstance(f, str):
descriptors.append(f)
if not descriptors:
descriptors = ["file"]
parts_text.append(
f"[attachment: {', '.join(descriptors)}]"
)
return "\n".join(parts_text)
return ""
@@ -470,10 +501,14 @@ class LLMHandler(ABC):
)
return self._perform_in_memory_compression(agent, messages)
# Use orchestrator to perform compression
# Use orchestrator to perform compression. ``model_user_id``
# keeps BYOM registry resolution scoped to the model owner
# (shared-agent dispatch) while ``user_id`` stays the caller
# for the conversation access check.
result = orchestrator.compress_mid_execution(
conversation_id=agent.conversation_id,
user_id=agent.initial_user_id,
model_user_id=getattr(agent, "model_user_id", None),
model_id=agent.model_id,
decoded_token=getattr(agent, "decoded_token", {}),
current_conversation=conversation,
@@ -577,7 +612,20 @@ class LLMHandler(ABC):
if settings.COMPRESSION_MODEL_OVERRIDE
else agent.model_id
)
provider = get_provider_from_model_id(compression_model)
agent_decoded = getattr(agent, "decoded_token", None)
caller_sub = (
agent_decoded.get("sub")
if isinstance(agent_decoded, dict)
else None
)
# Use model-owner scope (mirrors orchestrator path) so
# shared-agent owner-BYOM resolves under the owner's layer.
compression_user_id = (
getattr(agent, "model_user_id", None) or caller_sub
)
provider = get_provider_from_model_id(
compression_model, user_id=compression_user_id
)
api_key = get_api_key_for_provider(provider)
compression_llm = LLMCreator.create_llm(
provider,
@@ -586,7 +634,12 @@ class LLMHandler(ABC):
getattr(agent, "decoded_token", None),
model_id=compression_model,
agent_id=getattr(agent, "agent_id", None),
model_user_id=compression_user_id,
)
# Side-channel LLM tag — see ``orchestrator.py`` for rationale.
compression_llm._token_usage_source = "compression"
compression_llm._request_id = getattr(agent, "_request_id", None) \
or getattr(getattr(agent, "llm", None), "_request_id", None)
# Create service without DB persistence capability
compression_service = CompressionService(
@@ -897,7 +950,9 @@ class LLMHandler(ABC):
parsed = self.parse_response(response)
self.llm_calls.append(build_stack_data(agent.llm))
iteration = 0
while parsed.requires_tool_call:
iteration += 1
tool_handler_gen = self.handle_tool_calls(
agent, parsed.tool_calls, tools_dict, messages
)
@@ -921,15 +976,46 @@ class LLMHandler(ABC):
}
return ""
# Cap reached: force one final tool-less call so the stream
# always ends with content rather than cutting off.
if iteration >= MAX_TOOL_ITERATIONS:
logger.warning(
"agent tool loop hit cap (%d); forcing finalize",
MAX_TOOL_ITERATIONS,
)
messages.append(
{"role": "system", "content": _FINALIZE_INSTRUCTION},
)
response = agent.llm.gen(
model=getattr(agent.llm, "model_id", None) or agent.model_id,
messages=messages,
tools=None,
)
parsed = self.parse_response(response)
self.llm_calls.append(build_stack_data(agent.llm))
break
# ``agent.model_id`` is the registry id (a UUID for BYOM
# records). Use the LLM's own model_id, which LLMCreator
# already resolved to the upstream model name. Built-ins:
# the two are equal; BYOM: the upstream name like
# "mistral-large-latest" instead of the UUID.
response = agent.llm.gen(
model=agent.model_id, messages=messages, tools=agent.tools
model=getattr(agent.llm, "model_id", None) or agent.model_id,
messages=messages,
tools=agent.tools,
)
parsed = self.parse_response(response)
self.llm_calls.append(build_stack_data(agent.llm))
return parsed.content
def handle_streaming(
self, agent, response: Any, tools_dict: Dict, messages: List[Dict]
self,
agent,
response: Any,
tools_dict: Dict,
messages: List[Dict],
_iteration: int = 0,
) -> Generator:
"""
Handle streaming response flow.
@@ -998,6 +1084,9 @@ class LLMHandler(ABC):
}
return
next_iteration = _iteration + 1
cap_reached = next_iteration >= MAX_TOOL_ITERATIONS
# Check if context limit was reached during tool execution
if hasattr(agent, 'context_limit_reached') and agent.context_limit_reached:
# Add system message warning about context limit
@@ -1010,13 +1099,32 @@ class LLMHandler(ABC):
)
})
logger.info("Context limit reached - instructing agent to wrap up")
elif cap_reached:
logger.warning(
"agent tool loop hit cap (%d); forcing finalize",
MAX_TOOL_ITERATIONS,
)
messages.append(
{"role": "system", "content": _FINALIZE_INSTRUCTION},
)
# See note above on agent.model_id vs llm.model_id.
response = agent.llm.gen_stream(
model=agent.model_id, messages=messages, tools=agent.tools if not agent.context_limit_reached else None
model=getattr(agent.llm, "model_id", None) or agent.model_id,
messages=messages,
tools=(
None
if cap_reached
or getattr(agent, "context_limit_reached", False)
else agent.tools
),
)
self.llm_calls.append(build_stack_data(agent.llm))
yield from self.handle_streaming(agent, response, tools_dict, messages)
yield from self.handle_streaming(
agent, response, tools_dict, messages,
_iteration=next_iteration,
)
return
if parsed.content:
buffer += parsed.content

View File

@@ -1,9 +1,35 @@
import base64
import binascii
import uuid
from typing import Any, Dict, Generator
from typing import Any, Dict, Generator, Optional, Union
from application.llm.handlers.base import LLMHandler, LLMResponse, ToolCall
def _encode_thought_signature(sig: Optional[Union[bytes, str]]) -> Optional[str]:
# Gemini's Python SDK returns thought_signature as raw bytes, but the
# field is typed Optional[str] downstream and gets json.dumps'd into
# SSE events. Encode once at ingress so callers only ever see a str.
if isinstance(sig, bytes):
return base64.b64encode(sig).decode("ascii")
return sig
def _decode_thought_signature(
sig: Optional[Union[bytes, str]],
) -> Optional[Union[bytes, str]]:
# Reverse of _encode_thought_signature — Gemini's SDK expects bytes
# back when we replay a tool call. ``validate=True`` keeps ASCII
# strings that happen to be loosely decodable from being silently
# turned into bytes; non-base64 inputs pass through unchanged.
if isinstance(sig, str):
try:
return base64.b64decode(sig.encode("ascii"), validate=True)
except (binascii.Error, ValueError):
return sig
return sig
class GoogleLLMHandler(LLMHandler):
"""Handler for Google's GenAI API."""
@@ -23,7 +49,7 @@ class GoogleLLMHandler(LLMHandler):
for idx, part in enumerate(parts):
if hasattr(part, "function_call") and part.function_call is not None:
has_sig = hasattr(part, "thought_signature") and part.thought_signature is not None
thought_sig = part.thought_signature if has_sig else None
thought_sig = _encode_thought_signature(part.thought_signature) if has_sig else None
tool_calls.append(
ToolCall(
id=str(uuid.uuid4()),
@@ -50,7 +76,7 @@ class GoogleLLMHandler(LLMHandler):
tool_calls = []
if hasattr(response, "function_call") and response.function_call is not None:
has_sig = hasattr(response, "thought_signature") and response.thought_signature is not None
thought_sig = response.thought_signature if has_sig else None
thought_sig = _encode_thought_signature(response.thought_signature) if has_sig else None
tool_calls.append(
ToolCall(
id=str(uuid.uuid4()),
@@ -70,8 +96,15 @@ class GoogleLLMHandler(LLMHandler):
"""Create a tool result message in the standard internal format."""
import json as _json
from application.storage.db.serialization import PGNativeJSONEncoder
# PostgresTool results commonly include PG-native types
# (datetime / UUID / Decimal / bytea) when SELECT touches
# timestamptz / numeric / uuid / bytea columns. The shared
# encoder handles all five — bytes get base64 (lossless) instead
# of the ``str(b'...')`` repr that ``default=str`` would emit.
content = (
_json.dumps(result)
_json.dumps(result, cls=PGNativeJSONEncoder)
if not isinstance(result, str)
else result
)

View File

@@ -40,8 +40,15 @@ class OpenAILLMHandler(LLMHandler):
"""Create a tool result message in the standard internal format."""
import json as _json
from application.storage.db.serialization import PGNativeJSONEncoder
# PostgresTool results commonly include PG-native types
# (datetime / UUID / Decimal / bytea) when SELECT touches
# timestamptz / numeric / uuid / bytea columns. The shared
# encoder handles all five — bytes get base64 (lossless) instead
# of the ``str(b'...')`` repr that ``default=str`` would emit.
content = (
_json.dumps(result)
_json.dumps(result, cls=PGNativeJSONEncoder)
if not isinstance(result, str)
else result
)

View File

@@ -26,6 +26,8 @@ class LlamaSingleton:
class LlamaCpp(BaseLLM):
provider_name = "llama_cpp"
def __init__(
self,
api_key=None,

View File

@@ -16,37 +16,111 @@ class LLMCreator:
model_id=None,
agent_id=None,
backup_models=None,
model_user_id=None,
*args,
**kwargs,
):
"""Construct an LLM for the given provider ``type``.
``model_user_id`` is the BYOM-resolution scope. Defaults to
``decoded_token['sub']`` (the caller). Pass it explicitly when
the model record belongs to a *different* user — most notably
for shared-agent dispatch, where the agent's stored
``default_model_id`` is the owner's BYOM UUID but
``decoded_token`` represents the caller.
"""
from application.core.model_registry import ModelRegistry
from application.security.safe_url import (
UnsafeUserUrlError,
pinned_httpx_client,
validate_user_base_url,
)
plugin = PROVIDERS_BY_NAME.get(type.lower())
if plugin is None or plugin.llm_class is None:
raise ValueError(f"No LLM class found for type {type}")
# Prefer per-model endpoint config from the registry. This is what
# makes openai_compatible (and the future end-user BYOM phase)
# work without changing every call site: if the registered
# AvailableModel carries its own api_key / base_url, they win
# over whatever the caller resolved via the provider plugin.
# makes openai_compatible AND end-user BYOM work without changing
# every call site: if the registered AvailableModel carries its
# own api_key / base_url, they win over whatever the caller
# resolved via the provider plugin.
#
# End-user BYOM lookups need the user_id from decoded_token to
# find the user's per-user models layer (built-in models resolve
# without it, so this stays back-compat).
base_url = None
upstream_model_id = model_id
capabilities = None
if model_id:
model = ModelRegistry.get_instance().get_model(model_id)
user_id = model_user_id
if user_id is None:
user_id = (
(decoded_token or {}).get("sub") if decoded_token else None
)
model = ModelRegistry.get_instance().get_model(model_id, user_id=user_id)
if model is not None:
# Forward registry caps so the LLM enforces them at
# dispatch (built-in classes hard-code True otherwise).
capabilities = getattr(model, "capabilities", None)
# SECURITY: refuse user-source dispatch without its own
# api_key (would leak settings.API_KEY to base_url).
if (
getattr(model, "source", "builtin") == "user"
and not model.api_key
):
raise ValueError(
f"Custom model {model_id!r} has no usable API key "
"(decryption may have failed). Re-save the model "
"in settings to dispatch it."
)
if model.api_key:
api_key = model.api_key
if model.base_url:
base_url = model.base_url
# For BYOM the registry id is a UUID; the upstream API
# call needs the user's typed model name instead.
if model.upstream_model_id:
upstream_model_id = model.upstream_model_id
# SECURITY: re-validate at dispatch (defense in depth
# for pre-guard rows / YAML-supplied entries). The
# pinned httpx.Client below is what actually closes the
# DNS-rebinding TOCTOU window.
if base_url and getattr(model, "source", "builtin") == "user":
try:
validate_user_base_url(base_url)
except UnsafeUserUrlError as e:
raise ValueError(
f"Refusing to dispatch model {model_id!r}: {e}"
) from e
# Pinned httpx.Client: resolves once, validates, and
# binds the SDK's outbound socket to the validated IP
# (preserves Host / SNI). Future BYOM providers must
# opt in explicitly — only openai_compatible takes
# http_client today.
if plugin.name == "openai_compatible":
try:
kwargs["http_client"] = pinned_httpx_client(
base_url
)
except UnsafeUserUrlError as e:
raise ValueError(
f"Refusing to dispatch model {model_id!r}: {e}"
) from e
# Forward model_user_id so backup/fallback resolves under the
# owner's scope on shared-agent dispatch.
return plugin.llm_class(
api_key,
user_api_key,
decoded_token=decoded_token,
model_id=model_id,
model_id=upstream_model_id,
agent_id=agent_id,
base_url=base_url,
backup_models=backup_models,
model_user_id=model_user_id,
capabilities=capabilities,
*args,
**kwargs,
)

View File

@@ -5,6 +5,8 @@ NOVITA_BASE_URL = "https://api.novita.ai/openai"
class NovitaLLM(OpenAILLM):
provider_name = "novita"
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
super().__init__(
api_key=api_key or settings.NOVITA_API_KEY or settings.API_KEY,

View File

@@ -5,6 +5,8 @@ OPEN_ROUTER_BASE_URL = "https://openrouter.ai/api/v1"
class OpenRouterLLM(OpenAILLM):
provider_name = "openrouter"
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
super().__init__(
api_key=api_key or settings.OPEN_ROUTER_API_KEY or settings.API_KEY,

View File

@@ -61,8 +61,17 @@ def _truncate_base64_for_logging(messages):
class OpenAILLM(BaseLLM):
provider_name = "openai"
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
def __init__(
self,
api_key=None,
user_api_key=None,
base_url=None,
http_client=None,
*args,
**kwargs,
):
super().__init__(*args, **kwargs)
self.api_key = api_key or settings.OPENAI_API_KEY or settings.API_KEY
@@ -80,7 +89,18 @@ class OpenAILLM(BaseLLM):
else:
effective_base_url = "https://api.openai.com/v1"
self.client = OpenAI(api_key=self.api_key, base_url=effective_base_url)
# http_client (set by LLMCreator for BYOM) is a DNS-rebinding-safe
# httpx.Client; without it the SDK re-resolves DNS per request.
if http_client is not None:
self.client = OpenAI(
api_key=self.api_key,
base_url=effective_base_url,
http_client=http_client,
)
else:
self.client = OpenAI(
api_key=self.api_key, base_url=effective_base_url
)
self.storage = StorageCreator.get_storage()
def _clean_messages_openai(self, messages):
@@ -243,6 +263,13 @@ class OpenAILLM(BaseLLM):
if "max_tokens" in kwargs:
kwargs["max_completion_tokens"] = kwargs.pop("max_tokens")
# Defense-in-depth: drop tools / response_format if the
# registry's capability flags deny them.
if tools and not self._supports_tools():
tools = None
if response_format and not self._supports_structured_output():
response_format = None
request_params = {
"model": model,
"messages": messages,
@@ -279,6 +306,13 @@ class OpenAILLM(BaseLLM):
if "max_tokens" in kwargs:
kwargs["max_completion_tokens"] = kwargs.pop("max_tokens")
# See _raw_gen for rationale — drop tools/response_format when the
# registry-provided capabilities say the model doesn't support them.
if tools and not self._supports_tools():
tools = None
if response_format and not self._supports_structured_output():
response_format = None
request_params = {
"model": model,
"messages": messages,
@@ -320,9 +354,17 @@ class OpenAILLM(BaseLLM):
response.close()
def _supports_tools(self):
# When the LLM was constructed via LLMCreator with a registered
# AvailableModel, ``self.capabilities`` is the per-model record.
# BYOM users can disable tool support; respect that. Otherwise
# OpenAI's API supports tools by default.
if self.capabilities is not None:
return bool(self.capabilities.supports_tools)
return True
def _supports_structured_output(self):
if self.capabilities is not None:
return bool(self.capabilities.supports_structured_output)
return True
def prepare_structured_output_format(self, json_schema):
@@ -389,6 +431,12 @@ class OpenAILLM(BaseLLM):
Returns:
list: List of supported MIME types
"""
# Per-model caps from the registry win when present — a BYOM
# endpoint that doesn't accept images would otherwise still be
# sent base64 image parts because the OpenAI default below
# advertises the image alias unconditionally.
if self.capabilities is not None:
return list(self.capabilities.supported_attachment_types or [])
from application.core.model_yaml import resolve_attachment_alias
return resolve_attachment_alias("image")

View File

@@ -3,6 +3,7 @@ from application.core.settings import settings
class PremAILLM(BaseLLM):
provider_name = "premai"
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
from premai import Prem

View File

@@ -59,6 +59,7 @@ class LineIterator:
class SagemakerAPILLM(BaseLLM):
provider_name = "sagemaker"
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
import boto3

View File

@@ -1,11 +1,13 @@
import datetime
import functools
import inspect
import time
import logging
import uuid
from typing import Any, Callable, Dict, Generator, List
from application.core import log_context
from application.storage.db.repositories.stack_logs import StackLogsRepository
from application.storage.db.session import db_session
@@ -22,6 +24,15 @@ class LogContext:
self.api_key = api_key
self.query = query
self.stacks = []
# Per-activity response aggregates populated by ``_consume_and_log``
# while it forwards stream items, then flushed onto the
# ``activity_finished`` event so every Flask request gets the
# same summary that ``run_agent_logic`` used to log only for the
# Celery webhook path.
self.answer_length = 0
self.thought_length = 0
self.source_count = 0
self.tool_call_count = 0
def build_stack_data(
@@ -78,25 +89,125 @@ def log_activity() -> Callable:
user = data.get("user", "local")
api_key = data.get("user_api_key", "")
query = kwargs.get("query", getattr(args[0], "query", ""))
agent_id = getattr(args[0], "agent_id", None) or kwargs.get("agent_id")
conversation_id = (
kwargs.get("conversation_id")
or getattr(args[0], "conversation_id", None)
)
model = getattr(args[0], "gpt_model", None) or getattr(args[0], "model", None)
# Capture the surrounding activity_id before overlaying ours,
# so nested activities record the parent → child link.
parent_activity_id = log_context.snapshot().get("activity_id")
context = LogContext(endpoint, activity_id, user, api_key, query)
kwargs["log_context"] = context
logging.info(
f"Starting activity: {endpoint} - {activity_id} - User: {user}"
ctx_token = log_context.bind(
activity_id=activity_id,
parent_activity_id=parent_activity_id,
user_id=user,
agent_id=agent_id,
conversation_id=conversation_id,
endpoint=endpoint,
model=model,
)
generator = func(*args, **kwargs)
yield from _consume_and_log(generator, context)
started_at = time.monotonic()
logging.info(
"activity_started",
extra={
"activity_id": activity_id,
"parent_activity_id": parent_activity_id,
"user_id": user,
"agent_id": agent_id,
"conversation_id": conversation_id,
"endpoint": endpoint,
"model": model,
},
)
error: BaseException | None = None
try:
generator = func(*args, **kwargs)
yield from _consume_and_log(generator, context)
except Exception as exc:
# Only ``Exception`` counts as an activity error; ``GeneratorExit``
# (consumer disconnected mid-stream) and ``KeyboardInterrupt``
# flow through the finally as ``status="ok"``, matching
# ``_consume_and_log``.
error = exc
raise
finally:
_emit_activity_finished(
context=context,
parent_activity_id=parent_activity_id,
started_at=started_at,
error=error,
)
log_context.reset(ctx_token)
return wrapper
return decorator
def _emit_activity_finished(
*,
context: "LogContext",
parent_activity_id: str | None,
started_at: float,
error: BaseException | None,
) -> None:
"""Emit the paired ``activity_finished`` event with duration, outcome,
and per-activity response aggregates accumulated in ``_consume_and_log``.
"""
duration_ms = int((time.monotonic() - started_at) * 1000)
logging.info(
"activity_finished",
extra={
"activity_id": context.activity_id,
"parent_activity_id": parent_activity_id,
"user_id": context.user,
"endpoint": context.endpoint,
"duration_ms": duration_ms,
"status": "error" if error is not None else "ok",
"error_class": type(error).__name__ if error is not None else None,
"answer_length": context.answer_length,
"thought_length": context.thought_length,
"source_count": context.source_count,
"tool_call_count": context.tool_call_count,
},
)
def _accumulate_response_summary(item: Any, context: "LogContext") -> None:
"""Mirror the per-line aggregation that ``run_agent_logic`` did for the
Celery webhook path, but at the generator-consumption layer so every
``Agent.gen`` activity (Flask streaming, sub-agents, workflow agents)
gets the same summary.
"""
if not isinstance(item, dict):
return
if "answer" in item:
context.answer_length += len(str(item["answer"]))
return
if "thought" in item:
context.thought_length += len(str(item["thought"]))
return
sources = item.get("sources") if "sources" in item else None
if isinstance(sources, list):
context.source_count += len(sources)
return
tool_calls = item.get("tool_calls") if "tool_calls" in item else None
if isinstance(tool_calls, list):
context.tool_call_count += len(tool_calls)
def _consume_and_log(generator: Generator, context: "LogContext"):
try:
for item in generator:
_accumulate_response_summary(item, context)
yield item
except Exception as e:
logging.exception(f"Error in {context.endpoint} - {context.activity_id}: {e}")

View File

@@ -1,12 +1,27 @@
import os
import logging
from typing import List, Any
from typing import Any, List, Optional
from retry import retry
from tqdm import tqdm
from application.core.settings import settings
from application.storage.db.repositories.ingest_chunk_progress import (
IngestChunkProgressRepository,
)
from application.storage.db.session import db_session
from application.vectorstore.vector_creator import VectorCreator
class EmbeddingPipelineError(Exception):
"""Raised when the per-chunk embed loop produces a partial index.
Escapes into Celery's ``autoretry_for`` so a transient cause (rate
limit, network blip) gets another shot. The chunk-progress
checkpoint makes retries cheap — only the failed-and-after chunks
re-run. After ``MAX_TASK_ATTEMPTS`` the poison-loop guard in
``with_idempotency`` finalises the row as ``failed``.
"""
def sanitize_content(content: str) -> str:
"""
Remove NUL characters that can cause vector store ingestion to fail.
@@ -22,7 +37,11 @@ def sanitize_content(content: str) -> str:
return content.replace('\x00', '')
@retry(tries=10, delay=60)
# Per-chunk inline retry. Aggressive defaults (tries=10, delay=60) blocked
# the loop for up to 9 min per chunk and wedged the heartbeat: lower the
# tail so a transient failure fails-fast and the chunk-progress checkpoint
# resumes cleanly on next dispatch.
@retry(tries=3, delay=5, backoff=2)
def add_text_to_store_with_retry(store: Any, doc: Any, source_id: str) -> None:
"""Add a document's text and metadata to the vector store with retry logic.
@@ -45,21 +64,119 @@ def add_text_to_store_with_retry(store: Any, doc: Any, source_id: str) -> None:
raise
def embed_and_store_documents(docs: List[Any], folder_name: str, source_id: str, task_status: Any) -> None:
def _init_progress_and_resume_index(
source_id: str, total_chunks: int, attempt_id: Optional[str],
) -> int:
"""Upsert the progress row and return the next chunk index to embed.
The repository's upsert preserves ``last_index`` only when the
incoming ``attempt_id`` matches the stored one (a Celery autoretry
of the same task). On a fresh attempt — including any caller that
doesn't pass an ``attempt_id``, e.g. legacy code or tests — the
row's checkpoint is reset so the loop starts from chunk 0. This
is what prevents a completed checkpoint from any prior run
silently no-op'ing the next sync/reingest.
Best-effort: a DB outage falls back to ``0`` (fresh run from
chunk 0). The embed loop's own re-raise still ensures partial
runs don't get cached as complete.
"""
try:
with db_session() as conn:
progress = IngestChunkProgressRepository(conn).init_progress(
source_id, total_chunks, attempt_id,
)
except Exception as e:
logging.warning(
f"Could not init ingest progress for {source_id}: {e}",
exc_info=True,
)
return 0
if not progress:
return 0
last_index = progress.get("last_index", -1)
if last_index is None or last_index < 0:
return 0
return int(last_index) + 1
def _record_progress(source_id: str, last_index: int, embedded_chunks: int) -> None:
"""Best-effort checkpoint after each chunk; logged but never raised."""
try:
with db_session() as conn:
IngestChunkProgressRepository(conn).record_chunk(
source_id, last_index=last_index, embedded_chunks=embedded_chunks
)
except Exception as e:
logging.warning(
f"Could not record ingest progress for {source_id}: {e}", exc_info=True
)
def assert_index_complete(source_id: str) -> None:
"""Raise ``EmbeddingPipelineError`` if ``ingest_chunk_progress``
shows a partial embed for ``source_id``.
Defense-in-depth tripwire that workers run after
``embed_and_store_documents`` to catch any future swallow path
that bypasses the function's own re-raise — the chunk-progress
row is the authoritative record of how many chunks landed.
No-op when no row exists (zero-doc validation raised before init,
or progress repo was unreachable).
"""
try:
with db_session() as conn:
progress = IngestChunkProgressRepository(conn).get_progress(source_id)
except Exception as e:
logging.warning(
f"assert_index_complete: progress lookup failed for "
f"{source_id}: {e}",
exc_info=True,
)
return
if not progress:
return
embedded = int(progress.get("embedded_chunks") or 0)
total = int(progress.get("total_chunks") or 0)
if embedded < total:
raise EmbeddingPipelineError(
f"partial index for source {source_id}: "
f"{embedded}/{total} chunks embedded"
)
def embed_and_store_documents(
docs: List[Any],
folder_name: str,
source_id: str,
task_status: Any,
*,
attempt_id: Optional[str] = None,
) -> None:
"""Embeds documents and stores them in a vector store.
Resumable across Celery autoretries of the *same* task: when
``attempt_id`` matches the stored checkpoint's ``attempt_id``,
the loop resumes from ``last_index + 1``. A different
``attempt_id`` (a fresh sync / reingest invocation) resets the
checkpoint so the index is rebuilt from chunk 0 — this is what
keeps a completed checkpoint from poisoning the next sync.
Args:
docs: List of documents to be embedded and stored.
folder_name: Directory to save the vector store.
source_id: Unique identifier for the source.
task_status: Task state manager for progress updates.
attempt_id: Stable id of the current task invocation,
typically ``self.request.id`` from the Celery task body.
``None`` is treated as a fresh attempt every time.
Returns:
None
Raises:
OSError: If unable to create folder or save vector store.
Exception: If vector store creation or document embedding fails.
EmbeddingPipelineError: If a chunk fails after retries.
"""
# Ensure the folder exists
if not os.path.exists(folder_name):
@@ -69,33 +186,77 @@ def embed_and_store_documents(docs: List[Any], folder_name: str, source_id: str,
if not docs:
raise ValueError("No documents to embed - check file format and extension")
total_docs = len(docs)
# Atomic upsert that preserves checkpoint state on attempt-id match
# (autoretry of same task) and resets it on mismatch (fresh sync /
# reingest). Returns the new resume index — 0 means "start fresh".
resume_index = _init_progress_and_resume_index(
source_id, total_docs, attempt_id,
)
is_resume = resume_index > 0
# Initialize vector store
if settings.VECTOR_STORE == "faiss":
docs_init = [docs.pop(0)]
store = VectorCreator.create_vectorstore(
settings.VECTOR_STORE,
docs_init=docs_init,
source_id=source_id,
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
)
if is_resume:
# Load the existing FAISS index from storage so chunks
# already embedded by the prior attempt survive the
# save_local rewrite at the end of this run.
store = VectorCreator.create_vectorstore(
settings.VECTOR_STORE,
source_id=source_id,
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
)
loop_start = resume_index
else:
# FAISS requires at least one doc to construct the store;
# seed with ``docs[0]`` and let the loop pick up at index 1.
store = VectorCreator.create_vectorstore(
settings.VECTOR_STORE,
docs_init=[docs[0]],
source_id=source_id,
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
)
# Record the seeded chunk so single-doc ingests don't fail
# ``assert_index_complete`` — the loop never runs for
# ``total_docs == 1`` and would otherwise leave
# ``embedded_chunks`` at 0 / ``last_index`` at -1. The loop
# body's per-iteration ``_record_progress`` overshoots
# correctly for multi-chunk runs (counts seed + iterations),
# so writing this checkpoint up-front is a no-op for those.
_record_progress(source_id, last_index=0, embedded_chunks=1)
loop_start = 1
else:
store = VectorCreator.create_vectorstore(
settings.VECTOR_STORE,
source_id=source_id,
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
)
store.delete_index()
# Only wipe the index on a fresh run — a resume must keep the
# chunks that earlier attempts already embedded.
if not is_resume:
store.delete_index()
loop_start = resume_index
total_docs = len(docs)
if is_resume and loop_start >= total_docs:
# Nothing left to do; the loop runs zero iterations and
# downstream finalize logic still executes. This is only
# reachable on a same-attempt retry of a task whose previous
# attempt finished — typically a Celery acks_late redelivery
# after the task already returned. The ``assert_index_complete``
# tripwire still validates ``embedded == total`` afterwards.
loop_start = total_docs
# Process and embed documents
for idx, doc in tqdm(
enumerate(docs),
chunk_error: Exception | None = None
failed_idx: int | None = None
for idx in tqdm(
range(loop_start, total_docs),
desc="Embedding 🦖",
unit="docs",
total=total_docs,
total=total_docs - loop_start,
bar_format="{l_bar}{bar}| Time Left: {remaining}",
):
doc = docs[idx]
try:
# Update task status for progress tracking
progress = int(((idx + 1) / total_docs) * 100)
@@ -103,7 +264,10 @@ def embed_and_store_documents(docs: List[Any], folder_name: str, source_id: str,
# Add document to vector store
add_text_to_store_with_retry(store, doc, source_id)
_record_progress(source_id, last_index=idx, embedded_chunks=idx + 1)
except Exception as e:
chunk_error = e
failed_idx = idx
logging.error(f"Error embedding document {idx}: {e}", exc_info=True)
logging.info(f"Saving progress at document {idx} out of {total_docs}")
try:
@@ -124,3 +288,16 @@ def embed_and_store_documents(docs: List[Any], folder_name: str, source_id: str,
raise OSError(f"Unable to save vector store to {folder_name}: {e}") from e
else:
logging.info("Vector store saved successfully.")
# Re-raise after the partial save: the chunks that *did* embed are
# flushed to disk and recorded in ``ingest_chunk_progress``, so a
# Celery autoretry resumes via ``_read_resume_index`` and only
# re-runs the failed-and-after chunks. Without the raise, the
# task body returns success and ``with_idempotency`` finalises
# ``task_dedup`` as ``completed`` for a partial index — poisoning
# the cache for 24h.
if chunk_error is not None:
raise EmbeddingPipelineError(
f"embed failure at chunk {failed_idx}/{total_docs} "
f"for source {source_id}"
) from chunk_error

View File

@@ -22,6 +22,7 @@ class ClassicRAG(BaseRetriever):
llm_name=settings.LLM_PROVIDER,
api_key=settings.API_KEY,
decoded_token=None,
model_user_id=None,
):
self.original_question = source.get("question", "")
self.chat_history = chat_history if chat_history is not None else []
@@ -42,18 +43,26 @@ class ClassicRAG(BaseRetriever):
f"sources={'active_docs' in source and source['active_docs'] is not None}"
)
self.model_id = model_id
self.model_user_id = model_user_id
self.doc_token_limit = doc_token_limit
self.user_api_key = user_api_key
self.agent_id = agent_id
self.llm_name = llm_name
self.api_key = api_key
# Forward model_id + model_user_id so LLMCreator resolves BYOM
# base_url / api_key / upstream id for the rephrase client.
self.llm = LLMCreator.create_llm(
self.llm_name,
api_key=self.api_key,
user_api_key=self.user_api_key,
decoded_token=decoded_token,
model_id=self.model_id,
agent_id=self.agent_id,
model_user_id=self.model_user_id,
)
# Query-rephrase LLM is a side channel — tag it so its rows
# land as ``source='rag_condense'`` in cost-attribution.
self.llm._token_usage_source = "rag_condense"
if "active_docs" in source and source["active_docs"] is not None:
if isinstance(source["active_docs"], list):
@@ -103,7 +112,11 @@ class ClassicRAG(BaseRetriever):
]
try:
rephrased_query = self.llm.gen(model=self.model_id, messages=messages)
# Send upstream id (resolved by LLMCreator), not registry UUID.
rephrased_query = self.llm.gen(
model=getattr(self.llm, "model_id", None) or self.model_id,
messages=messages,
)
print(f"Rephrased query: {rephrased_query}")
return rephrased_query if rephrased_query else self.original_question
except Exception as e:

View File

@@ -0,0 +1,464 @@
"""SSRF protection for user-supplied OpenAI-compatible base URLs.
This module is the single chokepoint for validating any URL that a user
provides as an OpenAI-compatible ``base_url`` ("Bring Your Own Model").
The backend will later issue outbound HTTP requests to that URL on the
user's behalf, so we must reject anything that could be used to reach
internal-network resources (cloud metadata services, RFC 1918 ranges,
loopback, link-local, etc.).
Three entry points:
* :func:`validate_user_base_url` — called at create/update time on REST
routes that persist the URL, to give the user immediate feedback.
* :func:`pinned_post` — called at dispatch time when the caller drives
``requests`` directly (e.g. the ``/api/models/test`` endpoint).
Resolves once, dials the IP literal, preserves the original hostname
in the ``Host`` header and via SNI / cert verification for HTTPS.
* :func:`pinned_httpx_client` — called at dispatch time when the caller
hands an ``httpx.Client`` to a third-party SDK (e.g. the OpenAI
Python SDK via ``OpenAI(http_client=...)``). Same DNS-rebinding
closure on the httpx transport layer.
Why all three: the OpenAI / httpx ecosystem performs its own DNS lookup
inside ``socket.getaddrinfo`` when a connection opens, so a hostile DNS
server can hand a public IP to the validator and a loopback / link-local
address to the HTTP client. Validate-then-construct-SDK is unsafe; the
pinned variants close that TOCTOU window by resolving exactly once and
dialing the chosen IP literal directly.
"""
from __future__ import annotations
import ipaddress
import socket
from typing import Any, Iterable
from urllib.parse import urlsplit, urlunsplit
import httpx
import requests
from requests.adapters import HTTPAdapter
# Allowed URL schemes. Anything else (file, gopher, ftp, data, ...) is
# rejected outright because it either bypasses HTTP entirely or enables
# protocol smuggling against the proxy stack.
_ALLOWED_SCHEMES: frozenset[str] = frozenset({"http", "https"})
# Hostnames that resolve to a loopback / metadata / unspecified address
# but which we want to reject *by name* as well, so the rejection
# message is unambiguous and so we never accidentally call DNS on them.
_BLOCKED_HOSTNAMES: frozenset[str] = frozenset(
{
"localhost",
"localhost.localdomain",
"0.0.0.0",
"::",
"::1",
"ip6-localhost",
"ip6-loopback",
# GCP metadata service. AWS/Azure use 169.254.169.254 which the
# IP-range check below already covers via the link-local range,
# but Google's hostname does not always resolve to a link-local
# IP from every VPC, so we hard-deny the string too.
"metadata.google.internal",
}
)
# Carrier-grade NAT (RFC 6598). Python's ``ipaddress`` module does NOT
# classify this range as ``is_private``, so we must check it explicitly.
_CGNAT_NETWORK_V4: ipaddress.IPv4Network = ipaddress.IPv4Network("100.64.0.0/10")
class UnsafeUserUrlError(ValueError):
"""Raised when a user-supplied URL fails SSRF validation.
Subclasses :class:`ValueError` so call sites that already treat
invalid input as a 400-class error continue to work. The string
message names the specific reason (scheme, hostname, resolved IP,
DNS failure, ...) so that it can be surfaced to the user verbatim.
"""
def _strip_ipv6_brackets(host: str) -> str:
"""Return ``host`` with surrounding ``[`` / ``]`` removed if present."""
if host.startswith("[") and host.endswith("]"):
return host[1:-1]
return host
def _is_blocked_ip(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
"""Return ``True`` if ``ip`` falls in any range we refuse to dial.
This is the single source of truth for the IP-range policy:
* loopback (``127.0.0.0/8``, ``::1``)
* private (RFC 1918, ULA ``fc00::/7``)
* link-local (``169.254.0.0/16``, ``fe80::/10``)
* multicast (``224.0.0.0/4``, ``ff00::/8``)
* unspecified (``0.0.0.0``, ``::``)
* reserved (``240.0.0.0/4``, etc.)
* carrier-grade NAT (``100.64.0.0/10``) — not covered by ``is_private``
"""
if (
ip.is_loopback
or ip.is_private
or ip.is_link_local
or ip.is_multicast
or ip.is_unspecified
or ip.is_reserved
):
return True
if isinstance(ip, ipaddress.IPv4Address) and ip in _CGNAT_NETWORK_V4:
return True
return False
def _resolve(host: str) -> Iterable[ipaddress.IPv4Address | ipaddress.IPv6Address]:
"""Resolve ``host`` to every A/AAAA record returned by the system.
Returning *all* addresses (rather than the first one) is critical:
a hostile DNS server can return a public IP first followed by a
private IP, and the underlying HTTP client may fail over to the
private one on connect. We treat the set as unsafe if any element
is unsafe.
"""
try:
results = socket.getaddrinfo(host, None)
except socket.gaierror as exc: # noqa: PERF203 — re-raise as our own type
raise UnsafeUserUrlError(f"could not resolve hostname {host!r}: {exc}") from exc
addresses: list[ipaddress.IPv4Address | ipaddress.IPv6Address] = []
for entry in results:
sockaddr = entry[4]
# IPv4 sockaddr: (host, port). IPv6 sockaddr: (host, port, flowinfo, scope_id).
ip_str = sockaddr[0]
# Strip IPv6 zone-id ("fe80::1%lo0") before parsing.
if "%" in ip_str:
ip_str = ip_str.split("%", 1)[0]
try:
addresses.append(ipaddress.ip_address(ip_str))
except ValueError:
# An entry we can't parse is itself suspicious; treat as unsafe.
raise UnsafeUserUrlError(
f"hostname {host!r} resolved to unparseable address {ip_str!r}"
) from None
return addresses
def _validate_and_pick_ip(
url: str,
) -> tuple[str, ipaddress.IPv4Address | ipaddress.IPv6Address, "urlsplit"]:
"""Run the SSRF guard and return the data needed to dial safely.
Performs every check :func:`validate_user_base_url` performs, but
additionally returns ``(hostname, ip, parts)`` where ``ip`` is one
of the validated addresses (the first record returned by the
resolver, or the literal itself if the URL already used an IP) and
``parts`` is the :func:`urllib.parse.urlsplit` result so callers do
not have to re-parse the URL.
Raises :class:`UnsafeUserUrlError` on the same conditions as
:func:`validate_user_base_url`.
"""
if not isinstance(url, str) or not url.strip():
raise UnsafeUserUrlError("url must be a non-empty string")
try:
parts = urlsplit(url)
except ValueError as exc:
raise UnsafeUserUrlError(f"could not parse url {url!r}: {exc}") from exc
scheme = parts.scheme.lower()
if scheme not in _ALLOWED_SCHEMES:
raise UnsafeUserUrlError(
f"scheme {scheme!r} is not allowed; only http and https are permitted"
)
# ``urlsplit`` returns the bracketed form for IPv6 in ``netloc`` but
# the bare form in ``hostname``. Normalize via lower() because
# hostnames are case-insensitive and we compare against a lowercase
# blocklist.
raw_host = parts.hostname
if not raw_host:
raise UnsafeUserUrlError(f"url {url!r} has no hostname")
host = raw_host.lower()
# Check the literal-string blocklist first. urlsplit().hostname strips
# IPv6 brackets, so we also test the bracketed form for completeness
# (matches the public-spec note about ``[::]``).
bracketed = f"[{host}]"
if host in _BLOCKED_HOSTNAMES or bracketed in _BLOCKED_HOSTNAMES:
raise UnsafeUserUrlError(
f"hostname {raw_host!r} is not allowed (matches internal-only name)"
)
# If the host is already an IP literal (with or without IPv6 brackets),
# check it directly without going to DNS — DNS for an IP literal is a
# no-op but it's clearer to short-circuit and gives a better message.
candidate = _strip_ipv6_brackets(host)
try:
literal = ipaddress.ip_address(candidate)
except ValueError:
literal = None
if literal is not None:
if _is_blocked_ip(literal):
raise UnsafeUserUrlError(
f"hostname {raw_host!r} resolves to blocked address {literal} "
f"(loopback/private/link-local/multicast/reserved/CGNAT)"
)
return host, literal, parts
# Hostname (not an IP literal) — resolve and validate every record.
addresses = list(_resolve(host))
for ip in addresses:
if _is_blocked_ip(ip):
raise UnsafeUserUrlError(
f"hostname {raw_host!r} resolves to blocked address {ip} "
f"(loopback/private/link-local/multicast/reserved/CGNAT)"
)
if not addresses:
# ``getaddrinfo`` would normally raise instead of returning an
# empty list, but treat the degenerate case as unsafe too — we
# have nothing to bind a connection to.
raise UnsafeUserUrlError(
f"hostname {raw_host!r} returned no addresses from DNS"
)
return host, addresses[0], parts
def validate_user_base_url(url: str) -> None:
"""Validate that ``url`` is safe to use as an outbound base URL.
Resolve the URL's hostname to one or more IPs and reject if any
resolved IP is private/loopback/link-local/multicast/reserved, or if
the URL uses a non-http(s) scheme, or if the hostname is one of the
known dangerous strings (``localhost``, ``0.0.0.0``, ``[::]``).
Raises :class:`UnsafeUserUrlError` on rejection. Returns ``None`` on
success.
This function is the create/update-time check. At dispatch time use
:func:`pinned_post` instead, which performs the same validation
*and* pins the outbound connection to the validated IP so a DNS
rebinder cannot flip the resolution between check and connect.
Args:
url: The user-supplied URL to validate. Expected to be an
absolute URL with an ``http`` or ``https`` scheme.
Raises:
UnsafeUserUrlError: If the URL fails to parse, uses a forbidden
scheme, has an empty/blocklisted hostname, fails DNS
resolution, or resolves to any IP in a blocked range.
"""
_validate_and_pick_ip(url)
class _PinnedHostAdapter(HTTPAdapter):
"""HTTPS adapter that performs SNI and cert verification against a
fixed hostname even when the URL connects to an IP literal.
Used by :func:`pinned_post` so that resolving the user-supplied
hostname once and dialing the resolved IP doesn't break TLS.
Without this, ``urllib3`` would default ``server_hostname`` /
``assert_hostname`` to the connect host (the IP) and either send the
wrong SNI or fail cert verification — the cert is for the original
hostname, not the IP literal.
"""
def __init__(self, server_hostname: str, *args: Any, **kwargs: Any) -> None:
self._server_hostname = server_hostname
super().__init__(*args, **kwargs)
def init_poolmanager(self, *args: Any, **kwargs: Any) -> None:
kwargs["server_hostname"] = self._server_hostname
kwargs["assert_hostname"] = self._server_hostname
super().init_poolmanager(*args, **kwargs)
def _ip_to_url_host(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> str:
"""Return ``ip`` formatted for use in a URL netloc (brackets for v6)."""
if isinstance(ip, ipaddress.IPv6Address):
return f"[{ip}]"
return str(ip)
def pinned_post(
url: str,
*,
json: Any = None,
headers: dict[str, str] | None = None,
timeout: float = 5.0,
allow_redirects: bool = False,
) -> requests.Response:
"""POST to ``url`` with the outbound connection pinned to a single
validated IP, closing the DNS-rebinding TOCTOU window left by the
naive validate-then-``requests.post`` pattern.
The URL's hostname is resolved exactly once. Every returned address
must pass the same SSRF guard as :func:`validate_user_base_url`. The
outbound request is issued against the chosen IP literal (so
``urllib3`` cannot ask the resolver again and receive a different
answer); the original hostname is preserved in the ``Host`` header
and, for HTTPS, via :class:`_PinnedHostAdapter` for SNI and cert
verification.
Args:
url: Absolute http(s) URL to POST to.
json: JSON-serializable payload — passed through to ``requests``.
headers: Caller-supplied headers. Any caller-supplied ``Host``
entry is overwritten so the in-flight request matches what
was validated.
timeout: Per-request timeout (seconds).
allow_redirects: Forwarded to ``requests``. Defaults to
``False`` because the SSRF guard only inspects the supplied
URL — following redirects would let a hostile upstream
bounce the request to an internal address.
Raises:
UnsafeUserUrlError: If the URL fails the SSRF guard.
requests.RequestException: For network-level failures.
"""
host, ip, parts = _validate_and_pick_ip(url)
netloc = _ip_to_url_host(ip)
if parts.port is not None:
netloc = f"{netloc}:{parts.port}"
pinned_url = urlunsplit(
(parts.scheme, netloc, parts.path, parts.query, parts.fragment)
)
request_headers = dict(headers or {})
host_header = host if parts.port is None else f"{host}:{parts.port}"
request_headers["Host"] = host_header
session = requests.Session()
if parts.scheme == "https":
session.mount("https://", _PinnedHostAdapter(host))
try:
return session.post(
pinned_url,
json=json,
headers=request_headers,
timeout=timeout,
allow_redirects=allow_redirects,
)
finally:
session.close()
class _PinnedHTTPSTransport(httpx.HTTPTransport):
"""``httpx`` transport pinned to a single validated IP literal.
Closes the DNS-rebinding TOCTOU window that
:func:`validate_user_base_url` cannot close on its own. The OpenAI
Python SDK (and any other SDK that uses ``httpx``) re-resolves the
hostname inside ``socket.getaddrinfo`` at request time, so a
hostile DNS server can return a public IP at validation time and a
private IP at request time. This transport rewrites every outgoing
request's URL host to the validated IP literal so ``httpcore``
dials that IP without a fresh lookup.
The original hostname is preserved in two places:
1. ``Host`` header — ``httpx.Request._prepare`` set it from the URL
netloc *before* this transport runs, so it carries the hostname
not the IP literal. We deliberately do not touch headers here.
2. TLS SNI / cert verification — set via the
``request.extensions["sni_hostname"]`` extension which
``httpcore`` feeds into ``start_tls``'s ``server_hostname``
parameter. Without this, ``urllib3``-equivalent code would use
the IP literal as SNI and cert verification would fail (the
cert is for the original hostname, not the IP).
"""
def __init__(
self,
validated_host: str,
validated_ip: ipaddress.IPv4Address | ipaddress.IPv6Address,
**kwargs: Any,
) -> None:
# http2=False (the httpx default) — defense in depth against
# HTTP/2 connection coalescing (RFC 7540 §9.1.1), where a
# client may reuse a TCP connection for any host whose cert
# covers it. Per-IP pinning never shares connections across
# hosts, but explicit is safer than relying on the default.
kwargs.setdefault("http2", False)
super().__init__(**kwargs)
self._host = validated_host
self._ip_netloc = _ip_to_url_host(validated_ip)
def handle_request(self, request: httpx.Request) -> httpx.Response:
# Defense in depth: refuse if the request URL's host doesn't
# match what we validated. Catches any future SDK regression
# that rewrites the URL between Request construction and dial,
# and any rare case where the SDK reuses our pinned client for
# a different host (which it shouldn't, but assert it anyway).
if request.url.host != self._host:
raise UnsafeUserUrlError(
f"pinned transport bound to {self._host!r}, refused "
f"request for {request.url.host!r}"
)
# SNI/server_hostname for TLS verification. httpcore reads this
# extension at _sync/connection.py and feeds it into
# start_tls's server_hostname argument. Set before the URL host
# is rewritten so cert validation continues to use the original
# hostname even though TCP dials the IP literal.
request.extensions = {
**request.extensions,
"sni_hostname": self._host.encode("ascii"),
}
request.url = request.url.copy_with(host=self._ip_netloc)
return super().handle_request(request)
def pinned_httpx_client(
base_url: str,
*,
timeout: float = 600.0,
) -> httpx.Client:
"""Return an :class:`httpx.Client` whose connections are pinned to
one validated IP, closing the DNS-rebinding TOCTOU window the naive
``OpenAI(base_url=...)`` flow leaves open.
The hostname in ``base_url`` is resolved exactly once. Every
returned address must pass :func:`_validate_and_pick_ip`'s SSRF
guard (loopback, RFC 1918, link-local, multicast, reserved, CGNAT,
cloud metadata names). The chosen IP becomes the URL host on every
outgoing request so ``httpcore`` cannot ask the resolver again.
Pass via ``OpenAI(http_client=pinned_httpx_client(base_url))`` (or
any other SDK that accepts an ``httpx.Client``) to make BYOM
dispatch immune to DNS-rebinding TOCTOU.
Args:
base_url: User-supplied http(s) URL. Validated through the same
SSRF guard as :func:`validate_user_base_url`.
timeout: Per-request timeout (seconds). Defaults to 600 to
match the OpenAI SDK's default; callers should override
for non-LLM workloads.
Raises:
UnsafeUserUrlError: If ``base_url`` fails the SSRF guard.
"""
host, ip, _parts = _validate_and_pick_ip(base_url)
transport = _PinnedHTTPSTransport(host, ip)
# follow_redirects=False — the SSRF guard only inspects the
# supplied URL; following 3xx would let a hostile upstream bounce
# the in-network request to an internal address (cloud metadata,
# RFC1918, loopback) carrying whatever credentials the SDK adds.
return httpx.Client(
transport=transport,
timeout=timeout,
follow_redirects=False,
)

View File

@@ -11,6 +11,8 @@ import re
from typing import Any, Mapping
from uuid import UUID
from application.storage.db.serialization import coerce_pg_native
_UUID_RE = re.compile(
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
@@ -34,12 +36,17 @@ def looks_like_uuid(value: Any) -> bool:
def row_to_dict(row: Any) -> dict:
"""Convert a SQLAlchemy ``Row`` to a plain dict with Mongo-compatible ids.
"""Convert a SQLAlchemy ``Row`` to a plain JSON-safe dict.
During the migration window, API responses and downstream code still
expect a string ``_id`` field (matching the Mongo shape). This helper
normalizes UUID columns to strings and emits both ``id`` and ``_id`` so
existing serializers keep working unchanged.
Normalises PG-native types at the SELECT boundary: UUID, datetime,
date, Decimal, and bytes are coerced to JSON-safe forms via
:func:`coerce_pg_native`. Downstream serialisation (SSE events,
JSONB writes, API responses) becomes safe by default — repository
consumers no longer need to know that PG returns a different type
set than Mongo did.
Also emits ``_id`` alongside ``id`` for the duration of the Mongo→PG
cutover so legacy serializers expecting Mongo's shape keep working.
Args:
row: A SQLAlchemy ``Row`` object, or ``None``.
@@ -52,10 +59,9 @@ def row_to_dict(row: Any) -> dict:
# Row has a ``._mapping`` attribute exposing a MappingProxy view.
mapping: Mapping[str, Any] = row._mapping # type: ignore[attr-defined]
out = dict(mapping)
out = coerce_pg_native(dict(mapping))
if "id" in out and out["id"] is not None:
out["id"] = str(out["id"]) if isinstance(out["id"], UUID) else out["id"]
out["_id"] = out["id"]
return out

View File

@@ -91,6 +91,16 @@ token_usage_table = Table(
Column("prompt_tokens", Integer, nullable=False, server_default="0"),
Column("generated_tokens", Integer, nullable=False, server_default="0"),
Column("timestamp", DateTime(timezone=True), nullable=False, server_default=func.now()),
# Added in ``0004_durability_foundation``. Distinguishes
# ``agent_stream`` (primary completion) from side-channel inserts
# (``title`` / ``compression`` / ``rag_condense`` / ``fallback``)
# so cost attribution dashboards can group by call source.
Column("source", Text, nullable=False, server_default="agent_stream"),
# Added in ``0005_token_usage_request_id``. Stream-scoped UUID stamped
# on the agent's primary LLM so multi-call agent runs (which produce
# N rows) count as a single request via DISTINCT in the repository
# query. NULL on side-channel sources by design.
Column("request_id", Text),
)
user_logs_table = Table(
@@ -203,6 +213,24 @@ agents_table = Table(
Column("legacy_mongo_id", Text),
)
user_custom_models_table = Table(
"user_custom_models",
metadata,
Column("id", UUID(as_uuid=True), primary_key=True, server_default=func.gen_random_uuid()),
Column("user_id", Text, nullable=False),
Column("upstream_model_id", Text, nullable=False),
Column("display_name", Text, nullable=False),
Column("description", Text, nullable=False, server_default=""),
Column("base_url", Text, nullable=False),
# AES-CBC ciphertext (base64) keyed via per-user PBKDF2 in
# application.security.encryption.encrypt_credentials.
Column("api_key_encrypted", Text, nullable=False),
Column("capabilities", JSONB, nullable=False, server_default="{}"),
Column("enabled", Boolean, nullable=False, server_default="true"),
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
Column("updated_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
)
attachments_table = Table(
"attachments",
metadata,
@@ -327,6 +355,11 @@ conversation_messages_table = Table(
Column("feedback", JSONB),
Column("timestamp", DateTime(timezone=True), nullable=False, server_default=func.now()),
Column("updated_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
# Added in 0004_durability_foundation. ``status`` is the WAL state
# machine (pending|streaming|complete|failed); ``request_id`` ties a
# row to a specific HTTP request for log correlation.
Column("status", Text, nullable=False, server_default="complete"),
Column("request_id", Text),
UniqueConstraint("conversation_id", "position", name="conversation_messages_conv_pos_uidx"),
)
@@ -359,9 +392,101 @@ pending_tool_state_table = Table(
Column("client_tools", JSONB),
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
Column("expires_at", DateTime(timezone=True), nullable=False),
# Added in ``0004_durability_foundation``. ``status`` is the
# ``pending|resuming`` claim flag for the resumed-run path;
# ``resumed_at`` stamps when ``mark_resuming`` flipped the row so
# the cleanup janitor can revert stale claims after the grace
# window.
Column("status", Text, nullable=False, server_default="pending"),
Column("resumed_at", DateTime(timezone=True)),
UniqueConstraint("conversation_id", "user_id", name="pending_tool_state_conv_user_uidx"),
)
# --- Tier 1 durability foundation (migration 0004) --------------------------
# CHECK constraints (status enums) and partial indexes are intentionally
# omitted from these declarations — the DB is the authority. Repositories
# use raw ``text(...)`` SQL against these tables, not the Core objects.
task_dedup_table = Table(
"task_dedup",
metadata,
Column("idempotency_key", Text, primary_key=True),
Column("task_name", Text, nullable=False),
Column("task_id", Text, nullable=False),
Column("result_json", JSONB),
# CHECK (status IN ('pending', 'completed', 'failed')) lives in 0004.
Column("status", Text, nullable=False),
# Bumped each time the per-Celery-task wrapper re-enters; the
# poison-loop guard (``MAX_TASK_ATTEMPTS=5``) refuses to run fn once
# this exceeds the threshold.
Column("attempt_count", Integer, nullable=False, server_default="0"),
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
# Added in ``0006_idempotency_lease``. Per-invocation random id
# written by the wrapper at lease claim; refreshed every 30 s by a
# heartbeat thread. Other workers seeing a fresh lease (NOT NULL
# AND ``lease_expires_at > now()``) refuse to run the task body.
Column("lease_owner_id", Text),
Column("lease_expires_at", DateTime(timezone=True)),
)
webhook_dedup_table = Table(
"webhook_dedup",
metadata,
Column("idempotency_key", Text, primary_key=True),
Column("agent_id", UUID(as_uuid=True), nullable=False),
Column("task_id", Text, nullable=False),
Column("response_json", JSONB),
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
)
# Three-phase tool-call journal: ``proposed → executed → confirmed``
# (terminal: ``failed``; ``compensated`` is grandfathered in the CHECK
# from migration 0004 but no code writes it). The reconciler sweeps
# stuck rows via the partial ``tool_call_attempts_pending_ts_idx``.
tool_call_attempts_table = Table(
"tool_call_attempts",
metadata,
Column("call_id", Text, primary_key=True),
# ON DELETE SET NULL preserves the journal even after the parent
# message is deleted — useful for cost-attribution / compliance.
Column(
"message_id",
UUID(as_uuid=True),
ForeignKey("conversation_messages.id", ondelete="SET NULL"),
),
Column("tool_id", UUID(as_uuid=True)),
Column("tool_name", Text, nullable=False),
Column("action_name", Text, nullable=False),
Column("arguments", JSONB, nullable=False),
Column("result", JSONB),
Column("error", Text),
# CHECK (status IN ('proposed', 'executed', 'confirmed',
# 'compensated', 'failed')) lives in 0004.
Column("status", Text, nullable=False),
Column("attempted_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
Column("updated_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
)
# Per-source ingest checkpoint. Heartbeat thread bumps ``last_updated``
# every 30s while a worker embeds; the reconciler escalates when it
# stops ticking.
ingest_chunk_progress_table = Table(
"ingest_chunk_progress",
metadata,
Column("source_id", UUID(as_uuid=True), primary_key=True),
Column("total_chunks", Integer, nullable=False),
Column("embedded_chunks", Integer, nullable=False, server_default="0"),
Column("last_index", Integer, nullable=False, server_default="-1"),
Column("last_updated", DateTime(timezone=True), nullable=False, server_default=func.now()),
# Added in ``0005_ingest_attempt_id``. Stamped from
# ``self.request.id`` (Celery's stable task id) so a retry of the
# same task resumes from the checkpoint, but a separate invocation
# (manual reingest, scheduled sync) resets to a clean re-index.
Column("attempt_id", Text),
)
workflows_table = Table(
"workflows",
metadata,

View File

@@ -17,6 +17,21 @@ _UPDATABLE_SCALARS = {
_UPDATABLE_JSONB = {"metadata"}
def _attachment_to_dict(row: Any) -> dict:
"""row_to_dict + ``upload_path``→``path`` alias.
Pre-Postgres, the Mongo attachment shape used ``path``. The PG column
is ``upload_path``; LLM provider code (google_ai/openai/anthropic and
handlers/base) still reads ``attachment.get("path")``. Mirroring the
``id``/``_id`` dual-emit in row_to_dict so consumers don't need to
know which storage backend produced the dict.
"""
out = row_to_dict(row)
if "upload_path" in out and out.get("path") is None:
out["path"] = out["upload_path"]
return out
class AttachmentsRepository:
def __init__(self, conn: Connection) -> None:
self._conn = conn
@@ -66,7 +81,7 @@ class AttachmentsRepository:
"legacy_mongo_id": legacy_mongo_id,
},
)
return row_to_dict(result.fetchone())
return _attachment_to_dict(result.fetchone())
def get(self, attachment_id: str, user_id: str) -> Optional[dict]:
result = self._conn.execute(
@@ -76,7 +91,7 @@ class AttachmentsRepository:
{"id": attachment_id, "user_id": user_id},
)
row = result.fetchone()
return row_to_dict(row) if row is not None else None
return _attachment_to_dict(row) if row is not None else None
def get_any(self, attachment_id: str, user_id: str) -> Optional[dict]:
"""Resolve an attachment by either PG UUID or legacy Mongo ObjectId string."""
@@ -155,14 +170,14 @@ class AttachmentsRepository:
params["user_id"] = user_id
result = self._conn.execute(text(sql), params)
row = result.fetchone()
return row_to_dict(row) if row is not None else None
return _attachment_to_dict(row) if row is not None else None
def list_for_user(self, user_id: str) -> list[dict]:
result = self._conn.execute(
text("SELECT * FROM attachments WHERE user_id = :user_id ORDER BY created_at DESC"),
{"user_id": user_id},
)
return [row_to_dict(r) for r in result.fetchall()]
return [_attachment_to_dict(r) for r in result.fetchall()]
def update(self, attachment_id: str, user_id: str, fields: dict) -> bool:
"""Partial update. Used by the LLM providers to cache their

View File

@@ -25,6 +25,7 @@ from typing import Any, Optional
from sqlalchemy import Connection, text
from application.storage.db.base_repository import row_to_dict
from application.storage.db.serialization import PGNativeJSONEncoder
_UPDATABLE_SCALARS = {
@@ -36,7 +37,7 @@ _UPDATABLE_JSONB = {"session_data", "token_info"}
def _jsonb(value: Any) -> Any:
if value is None:
return None
return json.dumps(value, default=str)
return json.dumps(value, cls=PGNativeJSONEncoder)
class ConnectorSessionsRepository:

View File

@@ -22,6 +22,7 @@ from sqlalchemy.dialects.postgresql import insert as pg_insert
from application.storage.db.base_repository import looks_like_uuid, row_to_dict
from application.storage.db.models import conversations_table, conversation_messages_table
from application.storage.db.serialization import PGNativeJSONEncoder
def _message_row_to_dict(row) -> dict:
@@ -452,7 +453,7 @@ class ConversationsRepository:
),
{
"id": conversation_id,
"point": json.dumps(point, default=str),
"point": json.dumps(point, cls=PGNativeJSONEncoder),
"max_points": int(max_points),
},
)
@@ -632,6 +633,200 @@ class ConversationsRepository:
result = self._conn.execute(text(sql), params)
return result.rowcount > 0
def reserve_message(
self,
conversation_id: str,
*,
prompt: str,
placeholder_response: str,
request_id: str | None = None,
status: str = "pending",
attachments: list[str] | None = None,
model_id: str | None = None,
metadata: dict | None = None,
) -> dict:
"""Pre-persist a placeholder assistant message before the LLM call."""
self._conn.execute(
text(
"SELECT id FROM conversations "
"WHERE id = CAST(:conv_id AS uuid) FOR UPDATE"
),
{"conv_id": conversation_id},
)
next_pos = self._conn.execute(
text(
"SELECT COALESCE(MAX(position), -1) + 1 AS next_pos "
"FROM conversation_messages "
"WHERE conversation_id = CAST(:conv_id AS uuid)"
),
{"conv_id": conversation_id},
).scalar()
values = {
"conversation_id": conversation_id,
"position": next_pos,
"prompt": prompt,
"response": placeholder_response,
"status": status,
"request_id": request_id,
"model_id": model_id,
"message_metadata": metadata or {},
}
if attachments:
resolved = self._resolve_attachment_refs(
[str(a) for a in attachments],
)
if resolved:
values["attachments"] = resolved
stmt = (
pg_insert(conversation_messages_table)
.values(**values)
.returning(conversation_messages_table)
)
result = self._conn.execute(stmt)
self._conn.execute(
text(
"UPDATE conversations SET updated_at = now() "
"WHERE id = CAST(:id AS uuid)"
),
{"id": conversation_id},
)
return _message_row_to_dict(result.fetchone())
def update_message_by_id(
self, message_id: str, fields: dict,
*, only_if_non_terminal: bool = False,
) -> bool:
"""Update specific fields on a message identified by its UUID.
``metadata`` is merged into the existing JSONB rather than
overwritten, so a reconciler-set ``reconcile_attempts`` survives
a successful late finalize. When ``only_if_non_terminal`` is
True, the update is gated so a late finalize cannot retract a
reconciler-set ``failed`` (or a prior ``complete``).
"""
if not looks_like_uuid(message_id):
return False
allowed = {
"prompt", "response", "thought", "sources", "tool_calls",
"attachments", "model_id", "metadata", "timestamp", "status",
"request_id", "feedback", "feedback_timestamp",
}
filtered = {k: v for k, v in fields.items() if k in allowed}
if not filtered:
return False
api_to_col = {"metadata": "message_metadata"}
set_parts = []
params: dict = {"id": message_id}
for key, val in filtered.items():
col = api_to_col.get(key, key)
if key == "metadata":
if val is None:
set_parts.append(f"{col} = NULL")
else:
set_parts.append(
f"{col} = COALESCE({col}, '{{}}'::jsonb) "
f"|| CAST(:{col} AS jsonb)"
)
params[col] = (
json.dumps(val) if not isinstance(val, str) else val
)
elif key in ("sources", "tool_calls", "feedback"):
set_parts.append(f"{col} = CAST(:{col} AS jsonb)")
if val is None:
params[col] = None
else:
params[col] = (
json.dumps(val) if not isinstance(val, str) else val
)
elif key == "attachments":
set_parts.append(f"{col} = CAST(:{col} AS uuid[])")
params[col] = self._resolve_attachment_refs(
[str(a) for a in val] if val else [],
)
else:
set_parts.append(f"{col} = :{col}")
params[col] = val
set_parts.append("updated_at = now()")
where_clauses = ["id = CAST(:id AS uuid)"]
if only_if_non_terminal:
where_clauses.append("status NOT IN ('complete', 'failed')")
sql = (
f"UPDATE conversation_messages SET {', '.join(set_parts)} "
f"WHERE {' AND '.join(where_clauses)}"
)
result = self._conn.execute(text(sql), params)
return result.rowcount > 0
def update_message_status(
self, message_id: str, status: str,
) -> bool:
"""Cheap status-only transition (e.g. pending → streaming).
Only flips non-terminal rows: a reconciler-set ``failed`` row
stays put so the late streaming chunk doesn't silently retract
the alert.
"""
if not looks_like_uuid(message_id):
return False
result = self._conn.execute(
text(
"UPDATE conversation_messages SET status = :status, "
"updated_at = now() "
"WHERE id = CAST(:id AS uuid) "
"AND status NOT IN ('complete', 'failed')"
),
{"id": message_id, "status": status},
)
return result.rowcount > 0
def heartbeat_message(self, message_id: str) -> bool:
"""Stamp ``message_metadata.last_heartbeat_at`` with ``clock_timestamp()``.
The reconciler's staleness check uses ``GREATEST(timestamp,
last_heartbeat_at)``, so this call extends a long-running
stream's effective freshness without touching ``timestamp`` (the
creation time, used for history sort) or ``status`` (the WAL
marker). Skips terminal rows so a late heartbeat can't silently
retract a reconciler-set ``failed``.
"""
if not looks_like_uuid(message_id):
return False
result = self._conn.execute(
text(
"""
UPDATE conversation_messages
SET message_metadata = jsonb_set(
COALESCE(message_metadata, '{}'::jsonb),
'{last_heartbeat_at}',
to_jsonb(clock_timestamp())
)
WHERE id = CAST(:id AS uuid)
AND status NOT IN ('complete', 'failed')
"""
),
{"id": message_id},
)
return result.rowcount > 0
def confirm_executed_tool_calls(self, message_id: str) -> int:
"""Flip ``tool_call_attempts.status='executed''confirmed'`` for the message."""
if not looks_like_uuid(message_id):
return 0
result = self._conn.execute(
text(
"UPDATE tool_call_attempts SET status = 'confirmed', "
"updated_at = now() "
"WHERE message_id = CAST(:mid AS uuid) AND status = 'executed'"
),
{"mid": message_id},
)
return result.rowcount or 0
def truncate_after(self, conversation_id: str, keep_up_to: int) -> int:
"""Delete messages with position > keep_up_to.

View File

@@ -0,0 +1,346 @@
"""Repository for ``webhook_dedup`` and ``task_dedup``; 24h TTL enforced at read."""
from __future__ import annotations
import json
from typing import Any, Optional
from sqlalchemy import Connection, text
from application.storage.db.base_repository import row_to_dict
from application.storage.db.serialization import PGNativeJSONEncoder
# 24h TTL is the contract surfaced in the upload/webhook docstrings; the
# read filters and the stale-row replacement predicate must agree, or the
# upsert can fall into a window where the row is "fresh" to the writer
# but "expired" to the reader (or vice versa). Keep one constant so any
# future change moves both directions in lockstep.
DEDUP_TTL_INTERVAL = "24 hours"
def _jsonb(value: Any) -> Any:
if value is None:
return None
return json.dumps(value, cls=PGNativeJSONEncoder)
class IdempotencyRepository:
def __init__(self, conn: Connection) -> None:
self._conn = conn
# --- webhook_dedup -----------------------------------------------------
def get_webhook(self, key: str) -> Optional[dict]:
"""Return the cached webhook row for ``key`` if still within the 24h window."""
row = self._conn.execute(
text(
"""
SELECT * FROM webhook_dedup
WHERE idempotency_key = :key
AND created_at > now() - CAST(:ttl AS interval)
"""
),
{"key": key, "ttl": DEDUP_TTL_INTERVAL},
).fetchone()
return row_to_dict(row) if row is not None else None
def record_webhook(
self,
key: str,
agent_id: str,
task_id: str,
response_json: dict,
) -> Optional[dict]:
"""Insert a webhook dedup row; return None if another writer raced and won.
``ON CONFLICT`` replaces an existing row only when its ``created_at``
is past TTL — atomic stale-row recycling under the row lock. A
within-TTL conflict yields no row; the caller resolves it via
:meth:`get_webhook`.
"""
result = self._conn.execute(
text(
"""
INSERT INTO webhook_dedup (
idempotency_key, agent_id, task_id, response_json
)
VALUES (
:key, CAST(:agent_id AS uuid), :task_id,
CAST(:response_json AS jsonb)
)
ON CONFLICT (idempotency_key) DO UPDATE
SET agent_id = EXCLUDED.agent_id,
task_id = EXCLUDED.task_id,
response_json = EXCLUDED.response_json,
created_at = now()
WHERE webhook_dedup.created_at
<= now() - CAST(:ttl AS interval)
RETURNING *
"""
),
{
"key": key,
"agent_id": agent_id,
"task_id": task_id,
"response_json": _jsonb(response_json),
"ttl": DEDUP_TTL_INTERVAL,
},
)
row = result.fetchone()
return row_to_dict(row) if row is not None else None
# --- task_dedup --------------------------------------------------------
def get_task(self, key: str) -> Optional[dict]:
"""Return the cached task row for ``key`` if still within the 24h window."""
row = self._conn.execute(
text(
"""
SELECT * FROM task_dedup
WHERE idempotency_key = :key
AND created_at > now() - CAST(:ttl AS interval)
"""
),
{"key": key, "ttl": DEDUP_TTL_INTERVAL},
).fetchone()
return row_to_dict(row) if row is not None else None
def claim_task(
self,
key: str,
task_name: str,
task_id: str,
) -> Optional[dict]:
"""Claim ``key`` for this task. Returns the inserted row, or None if
another writer raced and won. The HTTP entry must call this *before*
``.delay()`` so only the winner enqueues the Celery task.
``ON CONFLICT`` replaces an existing row in two cases:
- **status='failed'**: the worker's poison-loop guard or the
reconciler's stuck-pending sweep finalised the prior attempt
as failed. Both explicitly intend a same-key retry to re-run
(see ``run_reconciliation`` Q5 docstring) — letting the row
block for 24 h would silently undo that intent.
- **created_at past TTL**: a stale claim from any status no
longer represents a meaningful dedup signal.
``status='completed'`` rows still block within TTL — that's the
cached-success contract callers rely on. ``status='pending'``
rows still block within TTL so concurrent same-key requests
collapse onto the in-flight task. Result/attempt fields are
reset to their fresh-claim defaults during replacement.
"""
result = self._conn.execute(
text(
"""
INSERT INTO task_dedup (
idempotency_key, task_name, task_id, result_json, status
)
VALUES (
:key, :task_name, :task_id, NULL, 'pending'
)
ON CONFLICT (idempotency_key) DO UPDATE
SET task_name = EXCLUDED.task_name,
task_id = EXCLUDED.task_id,
result_json = NULL,
status = 'pending',
attempt_count = 0,
created_at = now()
WHERE task_dedup.status = 'failed'
OR task_dedup.created_at
<= now() - CAST(:ttl AS interval)
RETURNING *
"""
),
{
"key": key,
"task_name": task_name,
"task_id": task_id,
"ttl": DEDUP_TTL_INTERVAL,
},
)
row = result.fetchone()
return row_to_dict(row) if row is not None else None
def try_claim_lease(
self,
key: str,
task_name: str,
task_id: str,
owner_id: str,
ttl_seconds: int = 60,
) -> Optional[int]:
"""Atomically claim the running lease for ``key``.
Returns the new ``attempt_count`` if this caller now owns the
lease (fresh insert OR existing row whose lease was empty/expired),
or ``None`` if a different worker holds a live lease.
The conflict path also bumps ``attempt_count`` so the
poison-loop guard in :func:`with_idempotency` can fire after
:data:`MAX_TASK_ATTEMPTS` reclaims. ``status='completed'`` rows
are deliberately untouched — :func:`_lookup_completed` is the
cache short-circuit and runs before this. Uses
``clock_timestamp()`` so a same-transaction refresh actually
moves the expiry forward (``now()`` is frozen at txn start).
"""
result = self._conn.execute(
text(
"""
INSERT INTO task_dedup (
idempotency_key, task_name, task_id, status, attempt_count,
lease_owner_id, lease_expires_at
) VALUES (
:key, :task_name, :task_id, 'pending', 1,
:owner,
clock_timestamp() + make_interval(secs => :ttl)
)
ON CONFLICT (idempotency_key) DO UPDATE
SET attempt_count = task_dedup.attempt_count + 1,
task_name = EXCLUDED.task_name,
lease_owner_id = EXCLUDED.lease_owner_id,
lease_expires_at = EXCLUDED.lease_expires_at
WHERE task_dedup.status <> 'completed'
AND (task_dedup.lease_expires_at IS NULL
OR task_dedup.lease_expires_at <= clock_timestamp())
RETURNING attempt_count
"""
),
{
"key": key,
"task_name": task_name,
"task_id": task_id,
"owner": owner_id,
"ttl": int(ttl_seconds),
},
)
row = result.fetchone()
return int(row[0]) if row is not None else None
def refresh_lease(
self,
key: str,
owner_id: str,
ttl_seconds: int = 60,
) -> bool:
"""Bump ``lease_expires_at`` if this caller still owns the lease.
Returns False when ownership was lost (lease stolen by another
worker after expiry, or row finalised). The heartbeat thread
logs that as a warning but doesn't try to abort the running
task — at-most-one-worker is bounded by ``ttl_seconds``, the
damage from a brief overlap window is unavoidable in this case.
"""
result = self._conn.execute(
text(
"""
UPDATE task_dedup
SET lease_expires_at =
clock_timestamp() + make_interval(secs => :ttl)
WHERE idempotency_key = :key
AND lease_owner_id = :owner
AND status = 'pending'
"""
),
{
"key": key,
"owner": owner_id,
"ttl": int(ttl_seconds),
},
)
return result.rowcount > 0
def release_lease(self, key: str, owner_id: str) -> bool:
"""Clear ``lease_owner_id`` / ``lease_expires_at`` on the
wrapper's exception path so Celery's autoretry_for doesn't have
to wait the full ``ttl_seconds`` before the next worker can
re-claim. No-op if a different worker has since taken over the
lease — that case is benign (we'd just be acknowledging we
weren't the owner anymore).
"""
result = self._conn.execute(
text(
"""
UPDATE task_dedup
SET lease_owner_id = NULL,
lease_expires_at = NULL
WHERE idempotency_key = :key
AND lease_owner_id = :owner
AND status = 'pending'
"""
),
{"key": key, "owner": owner_id},
)
return result.rowcount > 0
def finalize_task(
self,
key: str,
*,
result_json: Optional[dict],
status: str,
) -> bool:
"""Promote ``status='pending'`` → ``completed|failed`` with the
recorded result. Also clears the lease columns so a stale
``lease_expires_at`` doesn't show up in operator dashboards.
No-op if the row is already terminal — preserves the first
writer's outcome on a crash + retry.
"""
if status not in ("completed", "failed"):
raise ValueError(f"finalize_task: invalid status {status!r}")
result = self._conn.execute(
text(
"""
UPDATE task_dedup
SET status = :status,
result_json = CAST(:result_json AS jsonb),
lease_owner_id = NULL,
lease_expires_at = NULL
WHERE idempotency_key = :key
AND status = 'pending'
"""
),
{
"key": key,
"status": status,
"result_json": _jsonb(result_json),
},
)
return result.rowcount > 0
# --- housekeeping ------------------------------------------------------
def cleanup_expired(self) -> dict:
"""Delete rows past TTL from both dedup tables; return per-table counts.
The TTL-aware upserts already prevent stale rows from blocking new
work, so this is purely housekeeping — bounds table growth and
keeps test isolation cheap. Safe to run concurrently with other
writers: a same-key INSERT racing the DELETE will either find no
row (acts as a fresh insert) or find a fresh row (re-created
between DELETE and conflict-check), neither of which is wrong.
"""
task_deleted = self._conn.execute(
text(
"""
DELETE FROM task_dedup
WHERE created_at <= now() - CAST(:ttl AS interval)
"""
),
{"ttl": DEDUP_TTL_INTERVAL},
).rowcount
webhook_deleted = self._conn.execute(
text(
"""
DELETE FROM webhook_dedup
WHERE created_at <= now() - CAST(:ttl AS interval)
"""
),
{"ttl": DEDUP_TTL_INTERVAL},
).rowcount
return {
"task_dedup_deleted": int(task_deleted or 0),
"webhook_dedup_deleted": int(webhook_deleted or 0),
}

View File

@@ -0,0 +1,127 @@
"""Repository for ``ingest_chunk_progress``; per-source resume + heartbeat."""
from __future__ import annotations
from typing import Optional
from sqlalchemy import Connection, text
from application.storage.db.base_repository import row_to_dict
class IngestChunkProgressRepository:
"""Read/write helpers for ``ingest_chunk_progress``."""
def __init__(self, conn: Connection) -> None:
self._conn = conn
def init_progress(
self,
source_id: str,
total_chunks: int,
attempt_id: Optional[str] = None,
) -> dict:
"""Upsert the progress row, scoped by ``attempt_id``.
On conflict the upsert distinguishes two cases:
- **Same attempt** (``attempt_id`` matches the stored value):
this is a Celery autoretry of the same task — preserve
``last_index`` / ``embedded_chunks`` so the embed loop resumes
from the checkpoint. Only ``total_chunks`` and
``last_updated`` get refreshed.
- **Different attempt** (a fresh invocation: manual reingest,
scheduled sync, or any caller that didn't pass an
``attempt_id``): reset ``last_index`` to ``-1`` and
``embedded_chunks`` to ``0`` so the loop starts from chunk 0.
This prevents a completed checkpoint from any prior run
poisoning the index.
``IS NOT DISTINCT FROM`` treats two NULLs as equal — so legacy
rows with NULL ``attempt_id`` resume against another NULL
caller (e.g. test fixtures), but get reset the moment a real
``attempt_id`` arrives.
"""
result = self._conn.execute(
text(
"""
INSERT INTO ingest_chunk_progress (
source_id, total_chunks, embedded_chunks, last_index,
attempt_id, last_updated
)
VALUES (
CAST(:source_id AS uuid), :total_chunks, 0, -1,
:attempt_id, now()
)
ON CONFLICT (source_id) DO UPDATE SET
total_chunks = EXCLUDED.total_chunks,
last_updated = now(),
last_index = CASE
WHEN ingest_chunk_progress.attempt_id
IS NOT DISTINCT FROM EXCLUDED.attempt_id
THEN ingest_chunk_progress.last_index
ELSE -1
END,
embedded_chunks = CASE
WHEN ingest_chunk_progress.attempt_id
IS NOT DISTINCT FROM EXCLUDED.attempt_id
THEN ingest_chunk_progress.embedded_chunks
ELSE 0
END,
attempt_id = EXCLUDED.attempt_id
RETURNING *
"""
),
{
"source_id": str(source_id),
"total_chunks": int(total_chunks),
"attempt_id": attempt_id,
},
)
return row_to_dict(result.fetchone())
def record_chunk(
self, source_id: str, last_index: int, embedded_chunks: int
) -> None:
"""Persist progress after a chunk is embedded."""
self._conn.execute(
text(
"""
UPDATE ingest_chunk_progress
SET last_index = :last_index,
embedded_chunks = :embedded_chunks,
last_updated = now()
WHERE source_id = CAST(:source_id AS uuid)
"""
),
{
"source_id": str(source_id),
"last_index": int(last_index),
"embedded_chunks": int(embedded_chunks),
},
)
def get_progress(self, source_id: str) -> Optional[dict]:
"""Return the progress row for ``source_id`` if it exists."""
result = self._conn.execute(
text(
"SELECT * FROM ingest_chunk_progress "
"WHERE source_id = CAST(:source_id AS uuid)"
),
{"source_id": str(source_id)},
)
row = result.fetchone()
return row_to_dict(row) if row is not None else None
def bump_heartbeat(self, source_id: str) -> None:
"""Refresh ``last_updated`` so the row looks alive to the reconciler."""
self._conn.execute(
text(
"""
UPDATE ingest_chunk_progress
SET last_updated = now()
WHERE source_id = CAST(:source_id AS uuid)
"""
),
{"source_id": str(source_id)},
)

View File

@@ -7,6 +7,11 @@ Mirrors the continuation service's three operations on
- load_state → find_one by (conversation_id, user_id)
- delete_state → delete_one by (conversation_id, user_id)
Adds ``mark_resuming`` so a resumed run can claim a row without
deleting it; a separate ``revert_stale_resuming`` flips abandoned
``resuming`` rows back to ``pending`` so a crashed worker doesn't
strand the user.
Plus a cleanup method for the Celery beat task that replaces Mongo's
TTL index.
"""
@@ -20,6 +25,7 @@ from typing import Optional
from sqlalchemy import Connection, text
from application.storage.db.base_repository import row_to_dict
from application.storage.db.serialization import PGNativeJSONEncoder
PENDING_STATE_TTL_SECONDS = 30 * 60 # 1800 seconds
@@ -71,19 +77,24 @@ class PendingToolStateRepository:
agent_config = EXCLUDED.agent_config,
client_tools = EXCLUDED.client_tools,
created_at = EXCLUDED.created_at,
expires_at = EXCLUDED.expires_at
expires_at = EXCLUDED.expires_at,
status = 'pending',
resumed_at = NULL
RETURNING *
"""
),
{
"conv_id": conversation_id,
"user_id": user_id,
"messages": json.dumps(messages),
"pending": json.dumps(pending_tool_calls),
"tools_dict": json.dumps(tools_dict),
"schemas": json.dumps(tool_schemas),
"agent_config": json.dumps(agent_config),
"client_tools": json.dumps(client_tools) if client_tools is not None else None,
"messages": json.dumps(messages, cls=PGNativeJSONEncoder),
"pending": json.dumps(pending_tool_calls, cls=PGNativeJSONEncoder),
"tools_dict": json.dumps(tools_dict, cls=PGNativeJSONEncoder),
"schemas": json.dumps(tool_schemas, cls=PGNativeJSONEncoder),
"agent_config": json.dumps(agent_config, cls=PGNativeJSONEncoder),
"client_tools": (
json.dumps(client_tools, cls=PGNativeJSONEncoder)
if client_tools is not None else None
),
"created_at": now,
"expires_at": expires,
},
@@ -113,6 +124,45 @@ class PendingToolStateRepository:
)
return result.rowcount > 0
def mark_resuming(self, conversation_id: str, user_id: str) -> bool:
"""Flip a pending row to ``resuming`` and stamp ``resumed_at``."""
result = self._conn.execute(
text(
"""
UPDATE pending_tool_state
SET status = 'resuming', resumed_at = clock_timestamp()
WHERE conversation_id = CAST(:conv_id AS uuid)
AND user_id = :user_id
AND status = 'pending'
"""
),
{"conv_id": conversation_id, "user_id": user_id},
)
return result.rowcount > 0
def revert_stale_resuming(
self,
grace_seconds: int = 600,
ttl_extension_seconds: int = PENDING_STATE_TTL_SECONDS,
) -> int:
"""Revert ``resuming`` rows older than ``grace_seconds`` to ``pending``; bump TTL."""
result = self._conn.execute(
text(
"""
UPDATE pending_tool_state
SET status = 'pending',
resumed_at = NULL,
expires_at = clock_timestamp()
+ make_interval(secs => :ttl)
WHERE status = 'resuming'
AND resumed_at
< clock_timestamp() - make_interval(secs => :grace)
"""
),
{"grace": grace_seconds, "ttl": ttl_extension_seconds},
)
return result.rowcount
def cleanup_expired(self) -> int:
"""Delete rows where ``expires_at < now()``.

View File

@@ -0,0 +1,273 @@
"""Repository for reconciliation sweeps over stuck durability rows."""
from __future__ import annotations
from sqlalchemy import Connection, text
from application.storage.db.base_repository import row_to_dict
class ReconciliationRepository:
"""Sweeps and terminal writes for the reconciler beat task."""
def __init__(self, conn: Connection) -> None:
self._conn = conn
def find_and_lock_stuck_messages(
self, *, age_minutes: int = 5, limit: int = 100,
) -> list[dict]:
"""Lock stuck pending/streaming messages skipping live resumes.
Staleness rides on the **later of** ``cm.timestamp`` (creation)
and ``message_metadata.last_heartbeat_at`` (route heartbeat). An
in-flight stream that re-stamps the heartbeat each minute stays
out of the sweep; reconciler-side writes deliberately don't
touch either column so the per-row attempts counter advances
across ticks. Liveness exemption covers both ``pending`` (paused
waiting for resume) and ``resuming`` (actively executing)
``pending_tool_state`` rows so a paused message survives until
the PT row's own TTL retires it.
"""
result = self._conn.execute(
text(
"""
SELECT cm.id, cm.conversation_id, cm.user_id, cm.timestamp,
cm.message_metadata
FROM conversation_messages cm
WHERE cm.status IN ('pending', 'streaming')
AND cm.timestamp < now() - make_interval(mins => :age)
AND COALESCE(
(cm.message_metadata->>'last_heartbeat_at')::timestamptz,
cm.timestamp
) < now() - make_interval(mins => :age)
AND NOT EXISTS (
SELECT 1
FROM pending_tool_state pts
WHERE pts.conversation_id = cm.conversation_id
AND (
(pts.status = 'pending'
AND pts.expires_at > now())
OR
(pts.status = 'resuming'
AND pts.resumed_at
> now() - interval '10 minutes')
)
)
ORDER BY cm.timestamp ASC
LIMIT :limit
FOR UPDATE OF cm SKIP LOCKED
"""
),
{"age": age_minutes, "limit": limit},
)
return [row_to_dict(r) for r in result.fetchall()]
def find_and_lock_proposed_tool_calls(
self, *, age_minutes: int = 5, limit: int = 100,
) -> list[dict]:
"""Lock tool_call_attempts that never advanced past ``proposed``."""
result = self._conn.execute(
text(
"""
SELECT call_id, message_id, tool_id, tool_name, action_name,
arguments, attempted_at, updated_at
FROM tool_call_attempts
WHERE status = 'proposed'
AND attempted_at < now() - make_interval(mins => :age)
ORDER BY attempted_at ASC
LIMIT :limit
FOR UPDATE SKIP LOCKED
"""
),
{"age": age_minutes, "limit": limit},
)
return [row_to_dict(r) for r in result.fetchall()]
def find_and_lock_executed_tool_calls(
self, *, age_minutes: int = 15, limit: int = 100,
) -> list[dict]:
"""Lock tool_call_attempts stuck in ``executed`` past confirm window."""
result = self._conn.execute(
text(
"""
SELECT call_id, message_id, tool_id, tool_name, action_name,
arguments, result, attempted_at, updated_at
FROM tool_call_attempts
WHERE status = 'executed'
AND updated_at < now() - make_interval(mins => :age)
ORDER BY updated_at ASC
LIMIT :limit
FOR UPDATE SKIP LOCKED
"""
),
{"age": age_minutes, "limit": limit},
)
return [row_to_dict(r) for r in result.fetchall()]
def find_and_lock_stalled_ingests(
self, *, age_minutes: int = 30, limit: int = 100,
) -> list[dict]:
"""Lock ingest checkpoints whose heartbeat hasn't ticked recently."""
result = self._conn.execute(
text(
"""
SELECT source_id, total_chunks, embedded_chunks,
last_index, last_updated
FROM ingest_chunk_progress
WHERE last_updated < now() - make_interval(mins => :age)
AND embedded_chunks < total_chunks
ORDER BY last_updated ASC
LIMIT :limit
FOR UPDATE SKIP LOCKED
"""
),
{"age": age_minutes, "limit": limit},
)
return [row_to_dict(r) for r in result.fetchall()]
def touch_ingest_progress(self, source_id: str) -> bool:
"""Bump ``last_updated`` so a once-stalled ingest re-enters the watch window."""
result = self._conn.execute(
text(
"UPDATE ingest_chunk_progress SET last_updated = now() "
"WHERE source_id = CAST(:sid AS uuid)"
),
{"sid": str(source_id)},
)
return result.rowcount > 0
def increment_message_reconcile_attempts(self, message_id: str) -> int:
"""Bump ``message_metadata.reconcile_attempts`` and return the new count."""
result = self._conn.execute(
text(
"""
UPDATE conversation_messages
SET message_metadata = jsonb_set(
COALESCE(message_metadata, '{}'::jsonb),
'{reconcile_attempts}',
to_jsonb(
COALESCE(
(message_metadata->>'reconcile_attempts')::int,
0
) + 1
)
)
WHERE id = CAST(:message_id AS uuid)
RETURNING (message_metadata->>'reconcile_attempts')::int
AS new_count
"""
),
{"message_id": message_id},
)
row = result.fetchone()
return int(row[0]) if row is not None else 0
def mark_message_failed(self, message_id: str, *, error: str) -> bool:
"""Flip a message to ``status='failed'`` and stash ``error`` in metadata."""
result = self._conn.execute(
text(
"""
UPDATE conversation_messages
SET status = 'failed',
message_metadata = jsonb_set(
COALESCE(message_metadata, '{}'::jsonb),
'{error}',
to_jsonb(CAST(:error AS text))
)
WHERE id = CAST(:message_id AS uuid)
"""
),
{"message_id": message_id, "error": error},
)
return result.rowcount > 0
def mark_tool_call_failed(self, call_id: str, *, error: str) -> bool:
"""Flip a tool_call_attempts row to ``failed`` with ``error``."""
result = self._conn.execute(
text(
"UPDATE tool_call_attempts SET status = 'failed', "
"error = :error WHERE call_id = :call_id"
),
{"call_id": call_id, "error": error},
)
return result.rowcount > 0
def find_stuck_idempotency_pending(
self,
*,
max_attempts: int,
lease_grace_seconds: int = 60,
limit: int = 100,
) -> list[dict]:
"""Lock ``task_dedup`` rows abandoned past the lease + retry budget.
A row is "stuck" when:
- ``status='pending'`` (lease was claimed but never finalised)
- ``lease_expires_at`` is past by at least ``lease_grace_seconds``
(the heartbeat thread is gone — the lease isn't going to come
back)
- ``attempt_count >= max_attempts`` (the poison-loop guard
should already have escalated this; if it hasn't, the wrapper
died before getting there)
These rows would otherwise sit in ``pending`` until the 24 h
TTL aged them out, blocking same-key retries via
``_lookup_completed`` returning None for the whole window.
"""
result = self._conn.execute(
text(
"""
SELECT idempotency_key, task_name, task_id, attempt_count,
lease_owner_id, lease_expires_at, created_at
FROM task_dedup
WHERE status = 'pending'
AND lease_expires_at IS NOT NULL
AND lease_expires_at
< now() - make_interval(secs => :grace)
AND attempt_count >= :max_attempts
ORDER BY created_at ASC
LIMIT :limit
FOR UPDATE SKIP LOCKED
"""
),
{
"max_attempts": int(max_attempts),
"grace": int(lease_grace_seconds),
"limit": int(limit),
},
)
return [row_to_dict(r) for r in result.fetchall()]
def mark_idempotency_pending_failed(
self, key: str, *, error: str,
) -> bool:
"""Promote a stuck pending ``task_dedup`` row to ``failed``."""
from application.storage.db.serialization import PGNativeJSONEncoder
import json
result = self._conn.execute(
text(
"""
UPDATE task_dedup
SET status = 'failed',
result_json = CAST(:result AS jsonb),
lease_owner_id = NULL,
lease_expires_at = NULL
WHERE idempotency_key = :key
AND status = 'pending'
"""
),
{
"key": key,
"result": json.dumps(
{
"success": False,
"error": error,
"reconciled": True,
},
cls=PGNativeJSONEncoder,
),
},
)
return result.rowcount > 0

View File

@@ -13,6 +13,8 @@ import json
from datetime import datetime
from typing import Optional
from application.storage.db.serialization import PGNativeJSONEncoder
from sqlalchemy import Connection, text
@@ -52,7 +54,7 @@ class StackLogsRepository:
"user_id": user_id,
"api_key": api_key,
"query": query,
"stacks": json.dumps(stacks or []),
"stacks": json.dumps(stacks or [], cls=PGNativeJSONEncoder),
"timestamp": timestamp,
},
)

View File

@@ -31,6 +31,8 @@ class TokenUsageRepository:
agent_id: Optional[str] = None,
prompt_tokens: int = 0,
generated_tokens: int = 0,
source: str = "agent_stream",
request_id: Optional[str] = None,
timestamp: Optional[datetime] = None,
) -> None:
# Attribution guard: the ``token_usage_attribution_chk`` CHECK
@@ -54,12 +56,16 @@ class TokenUsageRepository:
self._conn.execute(
text(
"""
INSERT INTO token_usage (user_id, api_key, agent_id, prompt_tokens, generated_tokens, timestamp)
INSERT INTO token_usage (
user_id, api_key, agent_id,
prompt_tokens, generated_tokens,
source, request_id, timestamp
)
VALUES (
:user_id, :api_key,
CAST(:agent_id AS uuid),
:prompt_tokens, :generated_tokens,
COALESCE(:timestamp, now())
:source, :request_id, COALESCE(:timestamp, now())
)
"""
),
@@ -69,6 +75,8 @@ class TokenUsageRepository:
"agent_id": agent_id_uuid,
"prompt_tokens": prompt_tokens,
"generated_tokens": generated_tokens,
"source": source,
"request_id": request_id,
"timestamp": timestamp,
},
)
@@ -173,8 +181,22 @@ class TokenUsageRepository:
user_id: Optional[str] = None,
api_key: Optional[str] = None,
) -> int:
"""Count of token_usage rows in the given time range (for request limiting)."""
clauses = ["timestamp >= :start", "timestamp <= :end"]
"""Count user-initiated requests in the given time range.
A request = one ``agent_stream`` invocation. Multi-tool agent
runs produce multiple rows (one per LLM call) tagged with the
same ``request_id``; we DISTINCT on that to count the request
once. Pre-migration rows have ``request_id=NULL`` and are
counted one-per-row via the second branch (back-compat).
Side-channel sources (``title`` / ``compression`` /
``rag_condense`` / ``fallback``) are excluded — they aren't
user-initiated and shouldn't tick the request limit.
"""
clauses = [
"timestamp >= :start",
"timestamp <= :end",
"source = 'agent_stream'",
]
params: dict = {"start": start, "end": end}
if user_id is not None:
clauses.append("user_id = :user_id")
@@ -184,7 +206,15 @@ class TokenUsageRepository:
params["api_key"] = api_key
where = " AND ".join(clauses)
result = self._conn.execute(
text(f"SELECT COUNT(*) FROM token_usage WHERE {where}"),
text(
f"""
SELECT
COUNT(DISTINCT request_id) FILTER (WHERE request_id IS NOT NULL)
+ COUNT(*) FILTER (WHERE request_id IS NULL)
FROM token_usage
WHERE {where}
"""
),
params,
)
return result.scalar()

View File

@@ -0,0 +1,144 @@
"""Repository for ``tool_call_attempts``; executor's proposed/executed/failed writes."""
from __future__ import annotations
import json
from typing import Any, Optional
from sqlalchemy import Connection, text
from application.storage.db.serialization import PGNativeJSONEncoder
class ToolCallAttemptsRepository:
def __init__(self, conn: Connection) -> None:
self._conn = conn
def record_proposed(
self,
call_id: str,
tool_name: str,
action_name: str,
arguments: Any,
*,
tool_id: Optional[str] = None,
) -> bool:
"""Insert a ``proposed`` row before the tool executes.
Returns True if a new row was created. ``ON CONFLICT DO NOTHING``
guards against the LLM emitting a duplicate ``call_id``: the
existing row stays put rather than a re-insert raising
``IntegrityError``.
"""
result = self._conn.execute(
text(
"""
INSERT INTO tool_call_attempts
(call_id, tool_id, tool_name, action_name, arguments, status)
VALUES
(:call_id, CAST(:tool_id AS uuid), :tool_name,
:action_name, CAST(:arguments AS jsonb), 'proposed')
ON CONFLICT (call_id) DO NOTHING
"""
),
{
"call_id": call_id,
"tool_id": tool_id,
"tool_name": tool_name,
"action_name": action_name,
"arguments": json.dumps(arguments if arguments is not None else {}, cls=PGNativeJSONEncoder),
},
)
return result.rowcount > 0
def upsert_executed(
self,
call_id: str,
tool_name: str,
action_name: str,
arguments: Any,
result: Any,
*,
tool_id: Optional[str] = None,
message_id: Optional[str] = None,
artifact_id: Optional[str] = None,
) -> None:
"""Insert OR upgrade a row to ``executed``.
Used as a fallback when ``record_proposed`` failed (DB outage)
and the tool ran anyway — preserves the journal so the
reconciler can still see the attempt.
"""
result_payload: dict = {"result": result}
if artifact_id:
result_payload["artifact_id"] = artifact_id
self._conn.execute(
text(
"""
INSERT INTO tool_call_attempts
(call_id, tool_id, tool_name, action_name, arguments,
result, message_id, status)
VALUES
(:call_id, CAST(:tool_id AS uuid), :tool_name,
:action_name, CAST(:arguments AS jsonb),
CAST(:result AS jsonb), CAST(:message_id AS uuid),
'executed')
ON CONFLICT (call_id) DO UPDATE
SET status = 'executed',
result = EXCLUDED.result,
message_id = COALESCE(EXCLUDED.message_id, tool_call_attempts.message_id)
"""
),
{
"call_id": call_id,
"tool_id": tool_id,
"tool_name": tool_name,
"action_name": action_name,
"arguments": json.dumps(arguments if arguments is not None else {}, cls=PGNativeJSONEncoder),
"result": json.dumps(result_payload, cls=PGNativeJSONEncoder),
"message_id": message_id,
},
)
def mark_executed(
self,
call_id: str,
result: Any,
*,
message_id: Optional[str] = None,
artifact_id: Optional[str] = None,
) -> bool:
"""Flip ``proposed`` → ``executed`` with the tool result.
``artifact_id`` (when present) is stored alongside ``result`` in
the JSONB as audit data — the reconciler reads it for diagnostic
alerts when escalating stuck rows to ``failed``.
"""
result_payload: dict = {"result": result}
if artifact_id:
result_payload["artifact_id"] = artifact_id
sql = (
"UPDATE tool_call_attempts SET "
"status = 'executed', result = CAST(:result AS jsonb)"
)
params: dict[str, Any] = {
"call_id": call_id,
"result": json.dumps(result_payload, cls=PGNativeJSONEncoder),
}
if message_id is not None:
sql += ", message_id = CAST(:message_id AS uuid)"
params["message_id"] = message_id
sql += " WHERE call_id = :call_id"
result_proxy = self._conn.execute(text(sql), params)
return result_proxy.rowcount > 0
def mark_failed(self, call_id: str, error: str) -> bool:
"""Flip ``proposed`` → ``failed`` with the exception text."""
result = self._conn.execute(
text(
"UPDATE tool_call_attempts SET status = 'failed', error = :error "
"WHERE call_id = :call_id"
),
{"call_id": call_id, "error": error},
)
return result.rowcount > 0

View File

@@ -0,0 +1,199 @@
"""Repository for the ``user_custom_models`` table.
Backs the end-user "Bring Your Own Model" feature. Each row is one
user-supplied OpenAI-compatible endpoint (Mistral, Together, vLLM, ...).
The ``id`` UUID is the internal DocsGPT identifier (what agents store
in ``default_model_id``); ``upstream_model_id`` is what we send verbatim
to the provider's API.
API key handling: callers pass plaintext via ``api_key_plaintext``;
this module wraps the existing ``application.security.encryption``
helper (AES-CBC + per-user PBKDF2 salt) and writes the base64 ciphertext
to the ``api_key_encrypted`` column. Decryption is the caller's
responsibility (they hold the ``user_id``).
"""
from __future__ import annotations
from typing import Any, Optional
from sqlalchemy import Connection, func, text
from application.security.encryption import (
decrypt_credentials,
encrypt_credentials,
)
from application.storage.db.base_repository import row_to_dict
from application.storage.db.models import user_custom_models_table
_ALLOWED_CAPABILITY_KEYS = frozenset(
{
"supports_tools",
"supports_structured_output",
"supports_streaming",
"attachments",
"context_window",
}
)
class UserCustomModelsRepository:
def __init__(self, conn: Connection) -> None:
self._conn = conn
# ------------------------------------------------------------------ #
# Encryption wrappers
# ------------------------------------------------------------------ #
@staticmethod
def _encrypt_api_key(api_key_plaintext: str, user_id: str) -> str:
"""Encrypt ``api_key_plaintext`` with the per-user PBKDF2 scheme."""
return encrypt_credentials({"api_key": api_key_plaintext}, user_id)
@staticmethod
def _decrypt_api_key(api_key_encrypted: str, user_id: str) -> Optional[str]:
"""Decrypt the API key. Returns None on failure (which the caller
should surface as a configuration error rather than silently
proceeding with the upstream call)."""
if not api_key_encrypted:
return None
creds = decrypt_credentials(api_key_encrypted, user_id)
return creds.get("api_key") if creds else None
@staticmethod
def _normalize_capabilities(caps: Optional[dict]) -> dict:
"""Drop unknown keys; nothing else is forced. Callers (the route
layer) are responsible for value validation (numeric ranges,
attachment alias resolution)."""
if not caps:
return {}
return {k: v for k, v in caps.items() if k in _ALLOWED_CAPABILITY_KEYS}
# ------------------------------------------------------------------ #
# CRUD
# ------------------------------------------------------------------ #
def create(
self,
user_id: str,
upstream_model_id: str,
display_name: str,
base_url: str,
api_key_plaintext: str,
description: str = "",
capabilities: Optional[dict] = None,
enabled: bool = True,
) -> dict:
values = {
"user_id": user_id,
"upstream_model_id": upstream_model_id,
"display_name": display_name,
"description": description or "",
"base_url": base_url,
"api_key_encrypted": self._encrypt_api_key(api_key_plaintext, user_id),
"capabilities": self._normalize_capabilities(capabilities),
"enabled": bool(enabled),
}
from sqlalchemy.dialects.postgresql import insert as pg_insert
stmt = (
pg_insert(user_custom_models_table)
.values(**values)
.returning(user_custom_models_table)
)
result = self._conn.execute(stmt)
return row_to_dict(result.fetchone())
def get(self, model_id: str, user_id: str) -> Optional[dict]:
result = self._conn.execute(
text(
"SELECT * FROM user_custom_models "
"WHERE id = CAST(:id AS uuid) AND user_id = :user_id"
),
{"id": str(model_id), "user_id": user_id},
)
row = result.fetchone()
return row_to_dict(row) if row is not None else None
def list_for_user(self, user_id: str) -> list[dict]:
result = self._conn.execute(
text(
"SELECT * FROM user_custom_models "
"WHERE user_id = :user_id ORDER BY created_at DESC"
),
{"user_id": user_id},
)
return [row_to_dict(r) for r in result.fetchall()]
def update(self, model_id: str, user_id: str, fields: dict) -> bool:
"""Apply a partial update.
Special-cases ``api_key_plaintext``: when present, it is encrypted
and stored in ``api_key_encrypted``. When absent (or empty), the
existing ciphertext is kept untouched. This is the wire-shape
``PATCH`` expects (the UI sends a blank password field when the
operator wants to keep the existing key).
"""
allowed = {
"upstream_model_id",
"display_name",
"description",
"base_url",
"capabilities",
"enabled",
}
values: dict[str, Any] = {}
for col, val in fields.items():
if col not in allowed or val is None:
continue
if col == "capabilities":
values[col] = self._normalize_capabilities(val)
elif col == "enabled":
values[col] = bool(val)
else:
values[col] = val
api_key_plaintext = fields.get("api_key_plaintext")
if api_key_plaintext:
values["api_key_encrypted"] = self._encrypt_api_key(
api_key_plaintext, user_id
)
if not values:
return False
values["updated_at"] = func.now()
t = user_custom_models_table
stmt = (
t.update()
.where(t.c.id == str(model_id))
.where(t.c.user_id == user_id)
.values(**values)
)
result = self._conn.execute(stmt)
return result.rowcount > 0
def delete(self, model_id: str, user_id: str) -> bool:
result = self._conn.execute(
text(
"DELETE FROM user_custom_models "
"WHERE id = CAST(:id AS uuid) AND user_id = :user_id"
),
{"id": str(model_id), "user_id": user_id},
)
return result.rowcount > 0
# ------------------------------------------------------------------ #
# Decryption helpers exposed to the registry layer
# ------------------------------------------------------------------ #
def get_decrypted_api_key(
self, model_id: str, user_id: str
) -> Optional[str]:
"""Convenience: fetch the row and return the decrypted API key,
or ``None`` if the row is missing or decryption fails."""
row = self.get(model_id, user_id)
if row is None:
return None
return self._decrypt_api_key(row.get("api_key_encrypted", ""), user_id)

View File

@@ -20,6 +20,7 @@ from typing import Optional
from sqlalchemy import Connection, text
from application.storage.db.base_repository import row_to_dict
from application.storage.db.serialization import PGNativeJSONEncoder
class UserLogsRepository:
@@ -46,7 +47,7 @@ class UserLogsRepository:
{
"user_id": user_id,
"endpoint": endpoint,
"data": json.dumps(data, default=str) if data is not None else None,
"data": json.dumps(data, cls=PGNativeJSONEncoder) if data is not None else None,
"timestamp": timestamp,
},
)

View File

@@ -0,0 +1,93 @@
"""JSON-safe coercion for PG-native Python types.
Postgres (via psycopg) returns native Python types — ``uuid.UUID``,
``datetime.datetime``/``datetime.date``, ``decimal.Decimal``, ``bytes``
— that ``json.dumps`` rejects. This module is the single place those
coercion rules live; everywhere else should call into it.
Two interfaces with identical coverage:
* :func:`coerce_pg_native` — recursive walk returning a JSON-safe copy.
Use when you need to inspect the dict yourself or pass it to a
serializer that doesn't accept a custom encoder (e.g. SQLAlchemy
parameter binding for a JSONB column).
* :class:`PGNativeJSONEncoder` — ``JSONEncoder`` subclass. Use as
``json.dumps(obj, cls=PGNativeJSONEncoder)`` for serialise-once flows
where the extra recursive walk is wasted work.
Coercion rules:
* ``UUID`` → canonical hex string.
* ``datetime`` / ``date`` → ISO 8601 string.
* ``Decimal`` → numeric string (preserves precision; ``float()`` would not).
* ``bytes`` → base64 string. Lossless and universally JSON-safe;
prior code used UTF-8 with ``errors="replace"`` which silently
corrupted binary payloads (e.g. Gemini's ``thought_signature``).
"""
from __future__ import annotations
import base64
import binascii
import json
from datetime import date, datetime
from decimal import Decimal
from typing import Any
from uuid import UUID
def _coerce_scalar(obj: Any) -> Any:
if isinstance(obj, UUID):
return str(obj)
if isinstance(obj, (datetime, date)):
return obj.isoformat()
if isinstance(obj, Decimal):
return str(obj)
if isinstance(obj, bytes):
return base64.b64encode(obj).decode("ascii")
return obj
def coerce_pg_native(obj: Any) -> Any:
"""Recursively coerce PG-native types to JSON-safe equivalents.
Recurses into ``dict`` (stringifying keys, matching prior helper
behavior) and ``list``/``tuple`` (tuples flatten to lists since JSON
has no tuple type). Any other type passes through unchanged.
"""
if isinstance(obj, dict):
return {str(k): coerce_pg_native(v) for k, v in obj.items()}
if isinstance(obj, (list, tuple)):
return [coerce_pg_native(v) for v in obj]
return _coerce_scalar(obj)
def decode_base64_bytes(value: Any) -> Any:
"""Reverse ``coerce_pg_native``'s bytes-to-base64 step.
Useful at egress points that need the original bytes back (e.g.
sending Gemini's ``thought_signature`` to the SDK on resume). Uses
``validate=True`` so plain ASCII strings that happen to be
permissively decodable (e.g. ``"abcd"``) are not silently turned
into bytes — the original value passes through.
"""
if isinstance(value, str):
try:
return base64.b64decode(value.encode("ascii"), validate=True)
except (binascii.Error, ValueError):
return value
return value
class PGNativeJSONEncoder(json.JSONEncoder):
"""``JSONEncoder`` covering UUID / datetime / date / Decimal / bytes.
Use as ``json.dumps(obj, cls=PGNativeJSONEncoder)``. Equivalent in
coverage to :func:`coerce_pg_native` but skips the eager walk.
"""
def default(self, obj: Any) -> Any:
coerced = _coerce_scalar(obj)
if coerced is obj:
return super().default(obj)
return coerced

View File

@@ -1,6 +1,5 @@
import sys
import logging
from datetime import datetime
import time
from application.storage.db.repositories.token_usage import TokenUsageRepository
from application.storage.db.session import db_session
@@ -20,6 +19,15 @@ def _serialize_for_token_count(value):
if value is None:
return ""
# Raw binary payloads (image/file attachments arrive as ``bytes`` from
# ``GoogleLLM.prepare_messages_with_attachments``) — without this
# branch they fall through to ``str(value)`` below, which produces a
# multi-megabyte ``"b'\\x89PNG...'"`` repr-string and inflates
# ``prompt_tokens`` by orders of magnitude. Same intent as the
# data-URL skip above.
if isinstance(value, (bytes, bytearray, memoryview)):
return ""
if isinstance(value, list):
return [_serialize_for_token_count(item) for item in value]
@@ -83,33 +91,62 @@ def _count_prompt_tokens(messages, tools=None, usage_attachments=None, **kwargs)
return prompt_tokens
def update_token_usage(decoded_token, user_api_key, token_usage, agent_id=None):
if "pytest" in sys.modules:
return
user_id = decoded_token.get("sub") if isinstance(decoded_token, dict) else None
normalized_agent_id = str(agent_id) if agent_id else None
def _persist_call_usage(llm, call_usage):
"""Write one ``token_usage`` row per LLM call. Always-on; no flag.
if not user_id and not user_api_key and not normalized_agent_id:
Source defaults to ``agent_stream`` and can be overridden per
instance via ``_token_usage_source`` (set on side-channel LLMs:
title / compression / rag_condense / fallback). A ``_request_id``
stamped on the LLM lets ``count_in_range`` deduplicate the multiple
rows produced by a single multi-tool agent run.
"""
if call_usage["prompt_tokens"] == 0 and call_usage["generated_tokens"] == 0:
return
decoded_token = getattr(llm, "decoded_token", None)
user_id = (
decoded_token.get("sub") if isinstance(decoded_token, dict) else None
)
user_api_key = getattr(llm, "user_api_key", None)
agent_id = getattr(llm, "agent_id", None)
if not user_id and not user_api_key:
# Repository would raise on the attribution check — log instead
# so operators see the gap rather than crashing the stream.
logger.warning(
"Skipping token usage insert: missing user_id, api_key, and agent_id"
"token_usage skip: no user_id/api_key on LLM instance",
extra={
"source": getattr(llm, "_token_usage_source", "agent_stream"),
},
)
return
try:
with db_session() as conn:
# ``timestamp`` is omitted so Postgres ``server_default
# = func.now()`` populates a tz-aware UTC value; passing
# naive ``datetime.now()`` would silently shift on
# non-UTC servers.
TokenUsageRepository(conn).insert(
user_id=user_id,
api_key=user_api_key,
agent_id=normalized_agent_id,
prompt_tokens=token_usage["prompt_tokens"],
generated_tokens=token_usage["generated_tokens"],
timestamp=datetime.now(),
agent_id=str(agent_id) if agent_id else None,
prompt_tokens=call_usage["prompt_tokens"],
generated_tokens=call_usage["generated_tokens"],
source=(
getattr(llm, "_token_usage_source", None) or "agent_stream"
),
request_id=getattr(llm, "_request_id", None),
)
except Exception as e:
logger.error(f"Failed to record token usage: {e}", exc_info=True)
except Exception:
logger.exception("token_usage persist failed")
def gen_token_usage(func):
"""Accumulate per-call token counts and write a ``token_usage`` row.
The accumulator on ``self.token_usage`` stays in place for code
paths that introspect it (e.g., logging, response payloads). DB
persistence happens here for every call so primary streams,
side-channel LLMs, and no-save flows all produce rows uniformly.
"""
def wrapper(self, model, messages, stream, tools, **kwargs):
usage_attachments = kwargs.pop("_usage_attachments", None)
call_usage = {"prompt_tokens": 0, "generated_tokens": 0}
@@ -123,18 +160,14 @@ def gen_token_usage(func):
call_usage["generated_tokens"] += _count_tokens(result)
self.token_usage["prompt_tokens"] += call_usage["prompt_tokens"]
self.token_usage["generated_tokens"] += call_usage["generated_tokens"]
update_token_usage(
self.decoded_token,
self.user_api_key,
call_usage,
getattr(self, "agent_id", None),
)
_persist_call_usage(self, call_usage)
return result
return wrapper
def stream_token_usage(func):
"""Stream variant of ``gen_token_usage``. Same persistence contract."""
def wrapper(self, model, messages, stream, tools, **kwargs):
usage_attachments = kwargs.pop("_usage_attachments", None)
call_usage = {"prompt_tokens": 0, "generated_tokens": 0}
@@ -145,19 +178,36 @@ def stream_token_usage(func):
**kwargs,
)
batch = []
result = func(self, model, messages, stream, tools, **kwargs)
for r in result:
batch.append(r)
yield r
for line in batch:
call_usage["generated_tokens"] += _count_tokens(line)
self.token_usage["prompt_tokens"] += call_usage["prompt_tokens"]
self.token_usage["generated_tokens"] += call_usage["generated_tokens"]
update_token_usage(
self.decoded_token,
self.user_api_key,
call_usage,
getattr(self, "agent_id", None),
)
started_at = time.monotonic()
error: BaseException | None = None
try:
result = func(self, model, messages, stream, tools, **kwargs)
for r in result:
batch.append(r)
yield r
except Exception as exc:
# ``GeneratorExit`` (consumer disconnected) and KeyboardInterrupt
# flow through as ``status="ok"`` — same convention as
# ``application.logging._consume_and_log``.
error = exc
raise
finally:
for line in batch:
call_usage["generated_tokens"] += _count_tokens(line)
self.token_usage["prompt_tokens"] += call_usage["prompt_tokens"]
self.token_usage["generated_tokens"] += call_usage["generated_tokens"]
_persist_call_usage(self, call_usage)
emit = getattr(self, "_emit_stream_finished_log", None)
if callable(emit):
try:
emit(
model,
prompt_tokens=call_usage["prompt_tokens"],
completion_tokens=call_usage["generated_tokens"],
latency_ms=int((time.monotonic() - started_at) * 1000),
error=error,
)
except Exception:
logger.exception("Failed to emit llm_stream_finished")
return wrapper

View File

@@ -83,9 +83,9 @@ def count_tokens_docs(docs):
def calculate_doc_token_budget(
model_id: str = "gpt-4o"
model_id: str = "gpt-4o", user_id: str | None = None
) -> int:
total_context = get_token_limit(model_id)
total_context = get_token_limit(model_id, user_id=user_id)
reserved = sum(settings.RESERVED_TOKENS.values())
doc_budget = total_context - reserved
return max(doc_budget, 1000)
@@ -150,9 +150,11 @@ def get_hash(data):
return hashlib.md5(data.encode(), usedforsecurity=False).hexdigest()
def limit_chat_history(history, max_token_limit=None, model_id="docsgpt-local"):
def limit_chat_history(
history, max_token_limit=None, model_id="docsgpt-local", user_id=None
):
"""Limit chat history to fit within token limit."""
model_token_limit = get_token_limit(model_id)
model_token_limit = get_token_limit(model_id, user_id=user_id)
max_token_limit = (
max_token_limit
if max_token_limit and max_token_limit < model_token_limit
@@ -204,7 +206,9 @@ def generate_image_url(image_path):
def calculate_compression_threshold(
model_id: str, threshold_percentage: float = 0.8
model_id: str,
threshold_percentage: float = 0.8,
user_id: str | None = None,
) -> int:
"""
Calculate token threshold for triggering compression.
@@ -212,11 +216,13 @@ def calculate_compression_threshold(
Args:
model_id: Model identifier
threshold_percentage: Percentage of context window (default 80%)
user_id: When set, BYOM custom-model records (UUID-keyed) resolve
for context-window lookup.
Returns:
Token count threshold
"""
total_context = get_token_limit(model_id)
total_context = get_token_limit(model_id, user_id=user_id)
threshold = int(total_context * threshold_percentage)
return threshold

View File

@@ -6,6 +6,7 @@ import os
import shutil
import string
import tempfile
import threading
from typing import Any, Dict
import zipfile
@@ -22,7 +23,10 @@ from application.cache import get_redis_instance
from application.core.settings import settings
from application.parser.chunking import Chunker
from application.parser.connectors.connector_creator import ConnectorCreator
from application.parser.embedding_pipeline import embed_and_store_documents
from application.parser.embedding_pipeline import (
assert_index_complete,
embed_and_store_documents,
)
from application.parser.file.bulk import SimpleDirectoryReader, get_default_file_extractor
from application.parser.file.constants import SUPPORTED_SOURCE_EXTENSIONS
from application.parser.remote.remote_creator import RemoteCreator
@@ -32,6 +36,9 @@ from application.retriever.retriever_creator import RetrieverCreator
from application.storage.db.base_repository import looks_like_uuid
from application.storage.db.repositories.agents import AgentsRepository
from application.storage.db.repositories.attachments import AttachmentsRepository
from application.storage.db.repositories.ingest_chunk_progress import (
IngestChunkProgressRepository,
)
from application.storage.db.repositories.sources import SourcesRepository
from application.storage.db.session import db_readonly, db_session
from application.storage.storage_creator import StorageCreator
@@ -43,6 +50,53 @@ from application.utils import count_tokens_docs, num_tokens_from_string, safe_fi
MIN_TOKENS = 150
MAX_TOKENS = 1250
RECURSION_DEPTH = 2
INGEST_HEARTBEAT_INTERVAL_SECONDS = 30
# Stable namespace for deterministic source IDs derived from idempotency keys.
# Pinned literal — do not change. Re-rolling this would mint different
# source_ids for the same idempotency_keys across deploys, defeating the
# retry-resume contract.
DOCSGPT_INGEST_NAMESPACE = uuid.UUID("fa25d5d1-398b-46df-ac89-8d1c360b9bea")
def _derive_source_id(idempotency_key):
"""``uuid5(NS, key)`` when a key is supplied; ``uuid4()`` otherwise."""
if isinstance(idempotency_key, str) and idempotency_key:
return uuid.uuid5(DOCSGPT_INGEST_NAMESPACE, idempotency_key)
return uuid.uuid4()
def _ingest_heartbeat_loop(source_id, stop_event, interval=INGEST_HEARTBEAT_INTERVAL_SECONDS):
"""Bump ``ingest_chunk_progress.last_updated`` until ``stop_event`` is set."""
while not stop_event.wait(interval):
try:
with db_session() as conn:
IngestChunkProgressRepository(conn).bump_heartbeat(source_id)
except Exception as e:
logging.warning(
f"Heartbeat failed for {source_id}: {e}", exc_info=True
)
def _start_ingest_heartbeat(source_id):
"""Spawn the heartbeat daemon and return ``(thread, stop_event)``."""
stop_event = threading.Event()
thread = threading.Thread(
target=_ingest_heartbeat_loop,
args=(str(source_id), stop_event),
daemon=True,
name=f"ingest-heartbeat-{source_id}",
)
thread.start()
return thread, stop_event
def _stop_ingest_heartbeat(thread, stop_event):
"""Signal the heartbeat daemon to exit and wait briefly for it."""
if stop_event is not None:
stop_event.set()
if thread is not None:
thread.join(timeout=5)
# Define a function to extract metadata from a given filename.
@@ -344,7 +398,9 @@ def run_agent_logic(agent_config, input_data):
# Determine model_id: check agent's default_model_id, fallback to system default
agent_default_model = agent_config.get("default_model_id", "")
if agent_default_model and validate_model_id(agent_default_model):
if agent_default_model and validate_model_id(
agent_default_model, user_id=owner
):
model_id = agent_default_model
else:
model_id = get_default_model_id()
@@ -360,12 +416,16 @@ def run_agent_logic(agent_config, input_data):
)
# Get provider and API key for the selected model
provider = get_provider_from_model_id(model_id) if model_id else settings.LLM_PROVIDER
provider = (
get_provider_from_model_id(model_id, user_id=owner)
if model_id
else settings.LLM_PROVIDER
)
system_api_key = get_api_key_for_provider(provider or settings.LLM_PROVIDER)
# Calculate proper doc_token_limit based on model's context window
doc_token_limit = calculate_doc_token_budget(
model_id=model_id
model_id=model_id, user_id=owner
)
retriever = RetrieverCreator.create_retriever(
@@ -426,7 +486,10 @@ def run_agent_logic(agent_config, input_data):
"tool_calls": tool_calls,
"thought": thought,
}
logging.info(f"Agent response: {result}")
# Per-activity summary fields (answer_length, thought_length,
# source_count, tool_call_count) now ride on the inner
# ``activity_finished`` event emitted by ``log_activity`` around
# ``Agent.gen`` above; no separate ``agent_response`` log needed.
return result
except Exception as e:
logging.error(f"Error in run_agent_logic: {e}", exc_info=True)
@@ -446,6 +509,7 @@ def ingest_worker(
user,
retriever="classic",
file_name_map=None,
idempotency_key=None,
):
"""
Ingest and process documents.
@@ -460,6 +524,9 @@ def ingest_worker(
user (str): Identifier for the user initiating the ingestion (original, unsanitized).
retriever (str): Type of retriever to use for processing the documents.
file_name_map (dict|str|None): Optional mapping of safe relative paths to original filenames.
idempotency_key (str|None): When provided, the ``source_id`` is derived
deterministically from the key so a retried task reuses the same
source row instead of duplicating it.
Returns:
dict: Information about the completed ingestion task, including input parameters and a "limited" flag.
@@ -566,12 +633,23 @@ def ingest_worker(
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
id = uuid.uuid4()
id = _derive_source_id(idempotency_key)
vector_store_path = os.path.join(temp_dir, "vector_store")
os.makedirs(vector_store_path, exist_ok=True)
embed_and_store_documents(docs, vector_store_path, id, self)
heartbeat_thread, heartbeat_stop = _start_ingest_heartbeat(id)
try:
embed_and_store_documents(
docs, vector_store_path, id, self,
attempt_id=getattr(self.request, "id", None),
)
finally:
_stop_ingest_heartbeat(heartbeat_thread, heartbeat_stop)
# Defense-in-depth: chunk-progress is the authoritative
# record of how many chunks landed; mismatch raises so the
# task fails loud rather than caching a partial index.
assert_index_complete(id)
tokens = count_tokens_docs(docs)
@@ -934,6 +1012,7 @@ def remote_worker(
sync_frequency="never",
operation_mode="upload",
doc_id=None,
idempotency_key=None,
):
safe_user = safe_filename(user)
full_path = os.path.join(directory, safe_user, uuid.uuid4().hex)
@@ -1026,14 +1105,22 @@ def remote_worker(
)
if operation_mode == "upload":
id = uuid.uuid4()
embed_and_store_documents(docs, full_path, id, self)
id = _derive_source_id(idempotency_key)
embed_and_store_documents(
docs, full_path, id, self,
attempt_id=getattr(self.request, "id", None),
)
assert_index_complete(id)
elif operation_mode == "sync":
if not doc_id:
logging.error("Invalid doc_id provided for sync operation: %s", doc_id)
raise ValueError("doc_id must be provided for sync operation.")
id = str(doc_id)
embed_and_store_documents(docs, full_path, id, self)
embed_and_store_documents(
docs, full_path, id, self,
attempt_id=getattr(self.request, "id", None),
)
assert_index_complete(id)
self.update_state(state="PROGRESS", meta={"current": 100})
# Serialize remote_data as JSON if it's a dict (for S3, Reddit, etc.)
@@ -1239,16 +1326,10 @@ def attachment_worker(self, file_info, user):
def agent_webhook_worker(self, agent_id, payload):
"""
Process the webhook payload for an agent.
"""Process the webhook payload for an agent.
Args:
self: Reference to the instance of the task.
agent_id (str): Unique identifier for the agent.
payload (dict): The payload data from the webhook.
Returns:
dict: Information about the processed webhook.
Raises on failure: Celery treats a returned dict as success and
would skip retries, leaving the caller with a stale 200.
"""
self.update_state(state="PROGRESS", meta={"current": 1})
try:
@@ -1274,13 +1355,13 @@ def agent_webhook_worker(self, agent_id, payload):
input_data = json.dumps(payload)
except Exception as e:
logging.error(f"Error processing agent webhook: {e}", exc_info=True)
return {"status": "error", "error": str(e)}
raise
self.update_state(state="PROGRESS", meta={"current": 50})
try:
result = run_agent_logic(agent_config, input_data)
except Exception as e:
logging.error(f"Error running agent logic: {e}", exc_info=True)
return {"status": "error"}
raise
else:
logging.info(
f"Webhook processed for agent {agent_id}", extra={"agent_id": agent_id}
@@ -1303,6 +1384,7 @@ def ingest_connector(
operation_mode: str = "upload",
doc_id=None,
sync_frequency: str = "never",
idempotency_key=None,
) -> Dict[str, Any]:
"""
Ingestion for internal knowledge bases (GoogleDrive, etc.).
@@ -1319,6 +1401,8 @@ def ingest_connector(
operation_mode: "upload" for initial ingestion, "sync" for incremental sync
doc_id: Document ID for sync operations (required when operation_mode="sync")
sync_frequency: How often to sync ("never", "daily", "weekly", "monthly")
idempotency_key: When provided, the ``source_id`` is derived
deterministically so a retried upload reuses the same source row.
"""
logging.info(
f"Starting remote ingestion from {source_type} for user: {user}, job: {job_name}"
@@ -1414,7 +1498,7 @@ def ingest_connector(
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
if operation_mode == "upload":
id = uuid.uuid4()
id = _derive_source_id(idempotency_key)
elif operation_mode == "sync":
if not doc_id:
logging.error(
@@ -1431,7 +1515,11 @@ def ingest_connector(
self.update_state(
state="PROGRESS", meta={"current": 80, "status": "Storing documents"}
)
embed_and_store_documents(docs, vector_store_path, id, self)
embed_and_store_documents(
docs, vector_store_path, id, self,
attempt_id=getattr(self.request, "id", None),
)
assert_index_complete(id)
tokens = count_tokens_docs(docs)

15
docs/package-lock.json generated
View File

@@ -4448,10 +4448,9 @@
}
},
"node_modules/@xmldom/xmldom": {
"version": "0.9.9",
"resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.9.9.tgz",
"integrity": "sha512-qycIHAucxy/LXAYIjmLmtQ8q9GPnMbnjG1KXhWm9o5sCr6pOYDATkMPiTNa6/v8eELyqOQ2FsEqeoFYmgv/gJg==",
"deprecated": "this version has critical issues, please update to the latest version",
"version": "0.9.10",
"resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.9.10.tgz",
"integrity": "sha512-A9gOqLdi6cV4ibazAjcQufGj0B1y/vDqYrcuP6d/6x8P27gRS8643Dj9o1dEKtB6O7fwxb2FgBmJS2mX7gpvdw==",
"license": "MIT",
"engines": {
"node": ">=14.6"
@@ -11835,12 +11834,12 @@
}
},
"node_modules/speech-rule-engine": {
"version": "4.1.3",
"resolved": "https://registry.npmjs.org/speech-rule-engine/-/speech-rule-engine-4.1.3.tgz",
"integrity": "sha512-SBMgkuJYvP4F62daRfBNwYC2nXTEhNXAfsBZ/BB7Ly85/KnbnjmKM7/45ZrFbH6jIMiAliDUDPSZFUuXDvcg6A==",
"version": "4.1.4",
"resolved": "https://registry.npmjs.org/speech-rule-engine/-/speech-rule-engine-4.1.4.tgz",
"integrity": "sha512-i/VCLG1fvRc95pMHRqG4aQNscv+9aIsqA2oI7ZQS51sTdUcDHYX6cpT8/tqZ+enjs1tKVwbRBWgxut9SWn+f9g==",
"license": "Apache-2.0",
"dependencies": {
"@xmldom/xmldom": "0.9.9",
"@xmldom/xmldom": "0.9.10",
"commander": "13.1.0",
"wicked-good-xpath": "1.3.0"
},

View File

@@ -19,7 +19,7 @@
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.0",
"dompurify": "^3.1.5",
"flow-bin": "^0.309.0",
"flow-bin": "^0.311.0",
"markdown-it": "^14.1.0",
"react": "^19.2.5",
"react-dom": "^19.2.5",
@@ -44,7 +44,7 @@
"eslint-plugin-prettier": "^5.5.5",
"eslint-plugin-react": "^7.37.5",
"eslint-plugin-unused-imports": "^4.4.1",
"globals": "^15.15.0",
"globals": "^17.5.0",
"parcel": "^2.16.4",
"prettier": "^3.8.1",
"process": "^0.11.10",
@@ -546,12 +546,13 @@
}
},
"node_modules/@babel/plugin-syntax-jsx": {
"version": "7.24.6",
"resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.24.6.tgz",
"integrity": "sha512-lWfvAIFNWMlCsU0DRUun2GpFwZdGTukLaHJqRh1JRb80NdAP5Sb1HDHB5X9P9OtgZHQl089UzQkpYlBq2VTPRw==",
"version": "7.28.6",
"resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.28.6.tgz",
"integrity": "sha512-wgEmr06G6sIpqr8YDwA2dSRTE3bJ+V0IfpzfSY3Lfgd7YWOaAdlykvJi13ZKBt8cZHfgH1IXN+CL656W3uUa4w==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/helper-plugin-utils": "^7.24.6"
"@babel/helper-plugin-utils": "^7.28.6"
},
"engines": {
"node": ">=6.9.0"
@@ -1252,12 +1253,13 @@
}
},
"node_modules/@babel/plugin-transform-react-display-name": {
"version": "7.24.6",
"resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-display-name/-/plugin-transform-react-display-name-7.24.6.tgz",
"integrity": "sha512-/3iiEEHDsJuj9QU09gbyWGSUxDboFcD7Nj6dnHIlboWSodxXAoaY/zlNMHeYAC0WsERMqgO9a7UaM77CsYgWcg==",
"version": "7.28.0",
"resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-display-name/-/plugin-transform-react-display-name-7.28.0.tgz",
"integrity": "sha512-D6Eujc2zMxKjfa4Zxl4GHMsmhKKZ9VpcqIchJLvwTxad9zWIYulwYItBovpDOoNLISpcZSXoDJ5gaGbQUDqViA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/helper-plugin-utils": "^7.24.6"
"@babel/helper-plugin-utils": "^7.27.1"
},
"engines": {
"node": ">=6.9.0"
@@ -1267,16 +1269,17 @@
}
},
"node_modules/@babel/plugin-transform-react-jsx": {
"version": "7.24.6",
"resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx/-/plugin-transform-react-jsx-7.24.6.tgz",
"integrity": "sha512-pCtPHhpRZHfwdA5G1Gpk5mIzMA99hv0R8S/Ket50Rw+S+8hkt3wBWqdqHaPw0CuUYxdshUgsPiLQ5fAs4ASMhw==",
"version": "7.28.6",
"resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx/-/plugin-transform-react-jsx-7.28.6.tgz",
"integrity": "sha512-61bxqhiRfAACulXSLd/GxqmAedUSrRZIu/cbaT18T1CetkTmtDN15it7i80ru4DVqRK1WMxQhXs+Lf9kajm5Ow==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/helper-annotate-as-pure": "^7.24.6",
"@babel/helper-module-imports": "^7.24.6",
"@babel/helper-plugin-utils": "^7.24.6",
"@babel/plugin-syntax-jsx": "^7.24.6",
"@babel/types": "^7.24.6"
"@babel/helper-annotate-as-pure": "^7.27.3",
"@babel/helper-module-imports": "^7.28.6",
"@babel/helper-plugin-utils": "^7.28.6",
"@babel/plugin-syntax-jsx": "^7.28.6",
"@babel/types": "^7.28.6"
},
"engines": {
"node": ">=6.9.0"
@@ -1286,12 +1289,13 @@
}
},
"node_modules/@babel/plugin-transform-react-jsx-development": {
"version": "7.24.6",
"resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-development/-/plugin-transform-react-jsx-development-7.24.6.tgz",
"integrity": "sha512-F7EsNp5StNDouSSdYyDSxh4J+xvj/JqG+Cb6s2fA+jCyHOzigG5vTwgH8tU2U8Voyiu5zCG9bAK49wTr/wPH0w==",
"version": "7.27.1",
"resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx-development/-/plugin-transform-react-jsx-development-7.27.1.tgz",
"integrity": "sha512-ykDdF5yI4f1WrAolLqeF3hmYU12j9ntLQl/AOG1HAS21jxyg1Q0/J/tpREuYLfatGdGmXp/3yS0ZA76kOlVq9Q==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/plugin-transform-react-jsx": "^7.24.6"
"@babel/plugin-transform-react-jsx": "^7.27.1"
},
"engines": {
"node": ">=6.9.0"
@@ -1301,13 +1305,14 @@
}
},
"node_modules/@babel/plugin-transform-react-pure-annotations": {
"version": "7.24.6",
"resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-pure-annotations/-/plugin-transform-react-pure-annotations-7.24.6.tgz",
"integrity": "sha512-0HoDQlFJJkXRyV2N+xOpUETbKHcouSwijRQbKWVtxsPoq5bbB30qZag9/pSc5xcWVYjTHlLsBsY+hZDnzQTPNw==",
"version": "7.27.1",
"resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-pure-annotations/-/plugin-transform-react-pure-annotations-7.27.1.tgz",
"integrity": "sha512-JfuinvDOsD9FVMTHpzA/pBLisxpv1aSf+OIV8lgH3MuWrks19R27e6a6DipIg4aX1Zm9Wpb04p8wljfKrVSnPA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/helper-annotate-as-pure": "^7.24.6",
"@babel/helper-plugin-utils": "^7.24.6"
"@babel/helper-annotate-as-pure": "^7.27.1",
"@babel/helper-plugin-utils": "^7.27.1"
},
"engines": {
"node": ">=6.9.0"
@@ -1613,17 +1618,18 @@
}
},
"node_modules/@babel/preset-react": {
"version": "7.24.6",
"resolved": "https://registry.npmjs.org/@babel/preset-react/-/preset-react-7.24.6.tgz",
"integrity": "sha512-8mpzh1bWvmINmwM3xpz6ahu57mNaWavMm+wBNjQ4AFu1nghKBiIRET7l/Wmj4drXany/BBGjJZngICcD98F1iw==",
"version": "7.28.5",
"resolved": "https://registry.npmjs.org/@babel/preset-react/-/preset-react-7.28.5.tgz",
"integrity": "sha512-Z3J8vhRq7CeLjdC58jLv4lnZ5RKFUJWqH5emvxmv9Hv3BD1T9R/Im713R4MTKwvFaV74ejZ3sM01LyEKk4ugNQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"@babel/helper-plugin-utils": "^7.24.6",
"@babel/helper-validator-option": "^7.24.6",
"@babel/plugin-transform-react-display-name": "^7.24.6",
"@babel/plugin-transform-react-jsx": "^7.24.6",
"@babel/plugin-transform-react-jsx-development": "^7.24.6",
"@babel/plugin-transform-react-pure-annotations": "^7.24.6"
"@babel/helper-plugin-utils": "^7.27.1",
"@babel/helper-validator-option": "^7.27.1",
"@babel/plugin-transform-react-display-name": "^7.28.0",
"@babel/plugin-transform-react-jsx": "^7.27.1",
"@babel/plugin-transform-react-jsx-development": "^7.27.1",
"@babel/plugin-transform-react-pure-annotations": "^7.27.1"
},
"engines": {
"node": ">=6.9.0"
@@ -4540,17 +4546,17 @@
"devOptional": true
},
"node_modules/@typescript-eslint/eslint-plugin": {
"version": "8.59.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.59.0.tgz",
"integrity": "sha512-HyAZtpdkgZwpq8Sz3FSUvCR4c+ScbuWa9AksK2Jweub7w4M3yTz4O11AqVJzLYjy/B9ZWPyc81I+mOdJU/bDQw==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.59.1.tgz",
"integrity": "sha512-BOziFIfE+6osHO9FoJG4zjoHUcvI7fTNBSpdAwrNH0/TLvzjsk2oo8XSSOT2HhqUyhZPfHv4UOffoJ9oEEQ7Ag==",
"dev": true,
"license": "MIT",
"dependencies": {
"@eslint-community/regexpp": "^4.12.2",
"@typescript-eslint/scope-manager": "8.59.0",
"@typescript-eslint/type-utils": "8.59.0",
"@typescript-eslint/utils": "8.59.0",
"@typescript-eslint/visitor-keys": "8.59.0",
"@typescript-eslint/scope-manager": "8.59.1",
"@typescript-eslint/type-utils": "8.59.1",
"@typescript-eslint/utils": "8.59.1",
"@typescript-eslint/visitor-keys": "8.59.1",
"ignore": "^7.0.5",
"natural-compare": "^1.4.0",
"ts-api-utils": "^2.5.0"
@@ -4563,22 +4569,22 @@
"url": "https://opencollective.com/typescript-eslint"
},
"peerDependencies": {
"@typescript-eslint/parser": "^8.59.0",
"@typescript-eslint/parser": "^8.59.1",
"eslint": "^8.57.0 || ^9.0.0 || ^10.0.0",
"typescript": ">=4.8.4 <6.1.0"
}
},
"node_modules/@typescript-eslint/parser": {
"version": "8.59.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.59.0.tgz",
"integrity": "sha512-TI1XGwKbDpo9tRW8UDIXCOeLk55qe9ZFGs8MTKU6/M08HWTw52DD/IYhfQtOEhEdPhLMT26Ka/x7p70nd3dzDg==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.59.1.tgz",
"integrity": "sha512-HDQH9O/47Dxi1ceDhBXdaldtf/WV9yRYMjbjCuNk3qnaTD564qwv61Y7+gTxwxRKzSrgO5uhtw584igXVuuZkA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/scope-manager": "8.59.0",
"@typescript-eslint/types": "8.59.0",
"@typescript-eslint/typescript-estree": "8.59.0",
"@typescript-eslint/visitor-keys": "8.59.0",
"@typescript-eslint/scope-manager": "8.59.1",
"@typescript-eslint/types": "8.59.1",
"@typescript-eslint/typescript-estree": "8.59.1",
"@typescript-eslint/visitor-keys": "8.59.1",
"debug": "^4.4.3"
},
"engines": {
@@ -4594,14 +4600,14 @@
}
},
"node_modules/@typescript-eslint/project-service": {
"version": "8.59.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.59.0.tgz",
"integrity": "sha512-Lw5ITrR5s5TbC19YSvlr63ZfLaJoU6vtKTHyB0GQOpX0W7d5/Ir6vUahWi/8Sps/nOukZQ0IB3SmlxZnjaKVnw==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.59.1.tgz",
"integrity": "sha512-+MuHQlHiEr00Of/IQbE/MmEoi44znZHbR/Pz7Opq4HryUOlRi+/44dro9Ycy8Fyo+/024IWtw8m4JUMCGTYxDg==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/tsconfig-utils": "^8.59.0",
"@typescript-eslint/types": "^8.59.0",
"@typescript-eslint/tsconfig-utils": "^8.59.1",
"@typescript-eslint/types": "^8.59.1",
"debug": "^4.4.3"
},
"engines": {
@@ -4616,14 +4622,14 @@
}
},
"node_modules/@typescript-eslint/scope-manager": {
"version": "8.59.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.59.0.tgz",
"integrity": "sha512-UzR16Ut8IpA3Mc4DbgAShlPPkVm8xXMWafXxB0BocaVRHs8ZGakAxGRskF7FId3sdk9lgGD73GSFaWmWFDE4dg==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.59.1.tgz",
"integrity": "sha512-LwuHQI4pDOYVKvmH2dkaJo6YZCSgouVgnS/z7yBPKBMvgtBvyLqiLy9Z6b7+m/TRcX1NFYUqZetI5Y+aT4GEfg==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/types": "8.59.0",
"@typescript-eslint/visitor-keys": "8.59.0"
"@typescript-eslint/types": "8.59.1",
"@typescript-eslint/visitor-keys": "8.59.1"
},
"engines": {
"node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -4634,9 +4640,9 @@
}
},
"node_modules/@typescript-eslint/tsconfig-utils": {
"version": "8.59.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.59.0.tgz",
"integrity": "sha512-91Sbl3s4Kb3SybliIY6muFBmHVv+pYXfybC4Oolp3dvk8BvIE3wOPc+403CWIT7mJNkfQRGtdqghzs2+Z91Tqg==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.59.1.tgz",
"integrity": "sha512-/0nEyPbX7gRsk0Uwfe4ALwwgxuA66d/l2mhRDNlAvaj4U3juhUtJNq0DsY8M2AYwwb9rEq2hrC3IcIcEt++iJA==",
"dev": true,
"license": "MIT",
"engines": {
@@ -4651,15 +4657,15 @@
}
},
"node_modules/@typescript-eslint/type-utils": {
"version": "8.59.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.59.0.tgz",
"integrity": "sha512-3TRiZaQSltGqGeNrJzzr1+8YcEobKH9rHnqIp/1psfKFmhRQDNMGP5hBufanYTGznwShzVLs3Mz+gDN7HkWfXg==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.59.1.tgz",
"integrity": "sha512-klWPBR2ciQHS3f++ug/mVnWKPjBUo7icEL3FAO1lhAR1Z1i5NQYZ1EannMSRYcq5qCv5wNALlXr6fksRHyYl7w==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/types": "8.59.0",
"@typescript-eslint/typescript-estree": "8.59.0",
"@typescript-eslint/utils": "8.59.0",
"@typescript-eslint/types": "8.59.1",
"@typescript-eslint/typescript-estree": "8.59.1",
"@typescript-eslint/utils": "8.59.1",
"debug": "^4.4.3",
"ts-api-utils": "^2.5.0"
},
@@ -4676,9 +4682,9 @@
}
},
"node_modules/@typescript-eslint/types": {
"version": "8.59.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.59.0.tgz",
"integrity": "sha512-nLzdsT1gdOgFxxxwrlNVUBzSNBEEHJ86bblmk4QAS6stfig7rcJzWKqCyxFy3YRRHXDWEkb2NralA1nOYkkm/A==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.59.1.tgz",
"integrity": "sha512-ZDCjgccSdYPw5Bxh+my4Z0lJU96ZDN7jbBzvmEn0FZx3RtU1C7VWl6NbDx94bwY3V5YsgwRzJPOgeY2Q/nLG8A==",
"dev": true,
"license": "MIT",
"engines": {
@@ -4690,16 +4696,16 @@
}
},
"node_modules/@typescript-eslint/typescript-estree": {
"version": "8.59.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.59.0.tgz",
"integrity": "sha512-O9Re9P1BmBLFJyikRbQpLku/QA3/AueZNO9WePLBwQrvkixTmDe8u76B6CYUAITRl/rHawggEqUGn5QIkVRLMw==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.59.1.tgz",
"integrity": "sha512-OUd+vJS05sSkOip+BkZ/2NS8RMxrAAJemsC6vU3kmfLyeaJT0TftHkV9mcx2107MmsBVXXexhVu4F0TZXyMl4g==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/project-service": "8.59.0",
"@typescript-eslint/tsconfig-utils": "8.59.0",
"@typescript-eslint/types": "8.59.0",
"@typescript-eslint/visitor-keys": "8.59.0",
"@typescript-eslint/project-service": "8.59.1",
"@typescript-eslint/tsconfig-utils": "8.59.1",
"@typescript-eslint/types": "8.59.1",
"@typescript-eslint/visitor-keys": "8.59.1",
"debug": "^4.4.3",
"minimatch": "^10.2.2",
"semver": "^7.7.3",
@@ -4731,16 +4737,16 @@
}
},
"node_modules/@typescript-eslint/utils": {
"version": "8.59.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.59.0.tgz",
"integrity": "sha512-I1R/K7V07XsMJ12Oaxg/O9GfrysGTmCRhvZJBv0RE0NcULMzjqVpR5kRRQjHsz3J/bElU7HwCO7zkqL+MSUz+g==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.59.1.tgz",
"integrity": "sha512-3pIeoXhCeYH9FSCBI8P3iNwJlGuzPlYKkTlen2O9T1DSeeg8UG8jstq6BLk+Mda0qup7mgk4z4XL4OzRaxZ8LA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@eslint-community/eslint-utils": "^4.9.1",
"@typescript-eslint/scope-manager": "8.59.0",
"@typescript-eslint/types": "8.59.0",
"@typescript-eslint/typescript-estree": "8.59.0"
"@typescript-eslint/scope-manager": "8.59.1",
"@typescript-eslint/types": "8.59.1",
"@typescript-eslint/typescript-estree": "8.59.1"
},
"engines": {
"node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -4755,13 +4761,13 @@
}
},
"node_modules/@typescript-eslint/visitor-keys": {
"version": "8.59.0",
"resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.59.0.tgz",
"integrity": "sha512-/uejZt4dSere1bx12WLlPfv8GktzcaDtuJ7s42/HEZ5zGj9oxRaD4bj7qwSunXkf+pbAhFt2zjpHYUiT5lHf0Q==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.59.1.tgz",
"integrity": "sha512-LdDNl6C5iJExcM0Yh0PwAIBb9PrSiCsWamF/JyEZawm3kFDnRoaq3LGE4bpyRao/fWeGKKyw7icx0YxrLFC5Cg==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/types": "8.59.0",
"@typescript-eslint/types": "8.59.1",
"eslint-visitor-keys": "^5.0.0"
},
"engines": {
@@ -6486,9 +6492,9 @@
"license": "ISC"
},
"node_modules/flow-bin": {
"version": "0.309.0",
"resolved": "https://registry.npmjs.org/flow-bin/-/flow-bin-0.309.0.tgz",
"integrity": "sha512-/RH68gcCY8OHzcdSVTUCw+fhDSEYmNHoovfK0EcbB4rs1Xbc5HhxhHTvr7U+h55De4bDRlE52ghH23MRP625cQ==",
"version": "0.311.0",
"resolved": "https://registry.npmjs.org/flow-bin/-/flow-bin-0.311.0.tgz",
"integrity": "sha512-4lXxjhPdmkeizju3F0HDCMYGkoL7hiq0W9bAW4pQpQTi56op+QZrVyMENjbCGZc+KlFBLwWkur+EkyfPTsa6xw==",
"license": "MIT",
"bin": {
"flow": "cli.js"
@@ -6651,9 +6657,9 @@
}
},
"node_modules/globals": {
"version": "15.15.0",
"resolved": "https://registry.npmjs.org/globals/-/globals-15.15.0.tgz",
"integrity": "sha512-7ACyT3wmyp3I61S4fG682L0VA2RGD9otkqGJIwNUMF1SWUombIIk+af1unuDYgMm082aHYwD+mzJvv9Iu8dsgg==",
"version": "17.5.0",
"resolved": "https://registry.npmjs.org/globals/-/globals-17.5.0.tgz",
"integrity": "sha512-qoV+HK2yFl/366t2/Cb3+xxPUo5BuMynomoDmiaZBIdbs+0pYbjfZU+twLhGKp4uCZ/+NbtpVepH5bGCxRyy2g==",
"dev": true,
"license": "MIT",
"engines": {
@@ -8970,9 +8976,9 @@
}
},
"node_modules/styled-components": {
"version": "6.4.0",
"resolved": "https://registry.npmjs.org/styled-components/-/styled-components-6.4.0.tgz",
"integrity": "sha512-BL1EDFpt+q10eAeZB0q9ps6pSlPejaBQWBkiuM16pyoVTG4NhZrPrZK0cqNbrozxSsYwUsJ9SQYN6NyeKJYX9A==",
"version": "6.4.1",
"resolved": "https://registry.npmjs.org/styled-components/-/styled-components-6.4.1.tgz",
"integrity": "sha512-ADu2dF53esUzzM4I0ewxhxFtsDd6v4V6dNkg3vG0iFKhnt06sJneTZnRvujAosZwW0XD58IKgGMQoqri4wHRqg==",
"license": "MIT",
"dependencies": {
"@emotion/is-prop-valid": "1.4.0",

View File

@@ -52,7 +52,7 @@
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.0",
"dompurify": "^3.1.5",
"flow-bin": "^0.309.0",
"flow-bin": "^0.311.0",
"markdown-it": "^14.1.0",
"react": "^19.2.5",
"react-dom": "^19.2.5",
@@ -77,7 +77,7 @@
"eslint-plugin-prettier": "^5.5.5",
"eslint-plugin-react": "^7.37.5",
"eslint-plugin-unused-imports": "^4.4.1",
"globals": "^15.15.0",
"globals": "^17.5.0",
"parcel": "^2.16.4",
"prettier": "^3.8.1",
"process": "^0.11.10",

View File

@@ -28,12 +28,12 @@
"react": "^19.1.0",
"react-chartjs-2": "^5.3.0",
"react-dom": "^19.2.5",
"react-dropzone": "^14.3.8",
"react-dropzone": "^15.0.0",
"react-google-drive-picker": "^1.2.2",
"react-i18next": "^17.0.2",
"react-i18next": "^17.0.6",
"react-markdown": "^9.0.1",
"react-redux": "^9.2.0",
"react-router-dom": "^7.14.1",
"react-router-dom": "^7.14.2",
"react-syntax-highlighter": "^16.1.1",
"reactflow": "^11.11.4",
"rehype-katex": "^7.0.1",
@@ -47,7 +47,7 @@
"@types/react": "^19.2.14",
"@types/react-dom": "^19.2.3",
"@types/react-syntax-highlighter": "^15.5.13",
"@typescript-eslint/eslint-plugin": "^8.58.2",
"@typescript-eslint/eslint-plugin": "^8.59.1",
"@typescript-eslint/parser": "^8.46.3",
"@vitejs/plugin-react": "^6.0.1",
"eslint": "^9.39.1",
@@ -60,13 +60,13 @@
"eslint-plugin-unused-imports": "^4.1.4",
"husky": "^9.1.7",
"lint-staged": "^16.4.0",
"postcss": "^8.4.49",
"postcss": "^8.5.12",
"prettier": "^3.5.3",
"prettier-plugin-tailwindcss": "^0.7.2",
"tailwindcss": "^4.2.2",
"tw-animate-css": "^1.4.0",
"typescript": "^5.8.3",
"vite": "^8.0.8",
"typescript": "^6.0.3",
"vite": "^8.0.10",
"vite-plugin-svgr": "^4.3.0"
}
},
@@ -418,9 +418,9 @@
"license": "Apache-2.0"
},
"node_modules/@emnapi/core": {
"version": "1.9.2",
"resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.9.2.tgz",
"integrity": "sha512-UC+ZhH3XtczQYfOlu3lNEkdW/p4dsJ1r/bP7H8+rhao3TTTMO1ATq/4DdIi23XuGoFY+Cz0JmCbdVl0hz9jZcA==",
"version": "1.10.0",
"resolved": "https://registry.npmjs.org/@emnapi/core/-/core-1.10.0.tgz",
"integrity": "sha512-yq6OkJ4p82CAfPl0u9mQebQHKPJkY7WrIuk205cTYnYe+k2Z8YBh11FrbRG/H6ihirqcacOgl2BIO8oyMQLeXw==",
"dev": true,
"license": "MIT",
"optional": true,
@@ -430,9 +430,9 @@
}
},
"node_modules/@emnapi/runtime": {
"version": "1.9.2",
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.9.2.tgz",
"integrity": "sha512-3U4+MIWHImeyu1wnmVygh5WlgfYDtyf0k8AbLhMFxOipihf6nrWC4syIm/SwEeec0mNSafiiNnMJwbza/Is6Lw==",
"version": "1.10.0",
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.10.0.tgz",
"integrity": "sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==",
"dev": true,
"license": "MIT",
"optional": true,
@@ -791,9 +791,9 @@
}
},
"node_modules/@oxc-project/types": {
"version": "0.124.0",
"resolved": "https://registry.npmjs.org/@oxc-project/types/-/types-0.124.0.tgz",
"integrity": "sha512-VBFWMTBvHxS11Z5Lvlr3IWgrwhMTXV+Md+EQF0Xf60+wAdsGFTBx7X7K/hP4pi8N7dcm1RvcHwDxZ16Qx8keUg==",
"version": "0.127.0",
"resolved": "https://registry.npmjs.org/@oxc-project/types/-/types-0.127.0.tgz",
"integrity": "sha512-aIYXQBo4lCbO4z0R3FHeucQHpF46l2LbMdxRvqvuRuW2OxdnSkcng5B8+K12spgLDj93rtN3+J2Vac/TIO+ciQ==",
"dev": true,
"license": "MIT",
"funding": {
@@ -2584,9 +2584,9 @@
}
},
"node_modules/@rolldown/binding-android-arm64": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.15.tgz",
"integrity": "sha512-YYe6aWruPZDtHNpwu7+qAHEMbQ/yRl6atqb/AhznLTnD3UY99Q1jE7ihLSahNWkF4EqRPVC4SiR4O0UkLK02tA==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-android-arm64/-/binding-android-arm64-1.0.0-rc.17.tgz",
"integrity": "sha512-s70pVGhw4zqGeFnXWvAzJDlvxhlRollagdCCKRgOsgUOH3N1l0LIxf83AtGzmb5SiVM4Hjl5HyarMRfdfj3DaQ==",
"cpu": [
"arm64"
],
@@ -2601,9 +2601,9 @@
}
},
"node_modules/@rolldown/binding-darwin-arm64": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.15.tgz",
"integrity": "sha512-oArR/ig8wNTPYsXL+Mzhs0oxhxfuHRfG7Ikw7jXsw8mYOtk71W0OkF2VEVh699pdmzjPQsTjlD1JIOoHkLP1Fg==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-arm64/-/binding-darwin-arm64-1.0.0-rc.17.tgz",
"integrity": "sha512-4ksWc9n0mhlZpZ9PMZgTGjeOPRu8MB1Z3Tz0Mo02eWfWCHMW1zN82Qz/pL/rC+yQa+8ZnutMF0JjJe7PjwasYw==",
"cpu": [
"arm64"
],
@@ -2618,9 +2618,9 @@
}
},
"node_modules/@rolldown/binding-darwin-x64": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-rc.15.tgz",
"integrity": "sha512-YzeVqOqjPYvUbJSWJ4EDL8ahbmsIXQpgL3JVipmN+MX0XnXMeWomLN3Fb+nwCmP/jfyqte5I3XRSm7OfQrbyxw==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-darwin-x64/-/binding-darwin-x64-1.0.0-rc.17.tgz",
"integrity": "sha512-SUSDOI6WwUVNcWxd02QEBjLdY1VPHvlEkw6T/8nYG322iYWCTxRb1vzk4E+mWWYehTp7ERibq54LSJGjmouOsw==",
"cpu": [
"x64"
],
@@ -2635,9 +2635,9 @@
}
},
"node_modules/@rolldown/binding-freebsd-x64": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-rc.15.tgz",
"integrity": "sha512-9Erhx956jeQ0nNTyif1+QWAXDRD38ZNjr//bSHrt6wDwB+QkAfl2q6Mn1k6OBPerznjRmbM10lgRb1Pli4xZPw==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-freebsd-x64/-/binding-freebsd-x64-1.0.0-rc.17.tgz",
"integrity": "sha512-hwnz3nw9dbJ05EDO/PvcjaaewqqDy7Y1rn1UO81l8iIK1GjenME75dl16ajbvSSMfv66WXSRCYKIqfgq2KCfxw==",
"cpu": [
"x64"
],
@@ -2652,9 +2652,9 @@
}
},
"node_modules/@rolldown/binding-linux-arm-gnueabihf": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-rc.15.tgz",
"integrity": "sha512-cVwk0w8QbZJGTnP/AHQBs5yNwmpgGYStL88t4UIaqcvYJWBfS0s3oqVLZPwsPU6M0zlW4GqjP0Zq5MnAGwFeGA==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm-gnueabihf/-/binding-linux-arm-gnueabihf-1.0.0-rc.17.tgz",
"integrity": "sha512-IS+W7epTcwANmFSQFrS1SivEXHtl1JtuQA9wlxrZTcNi6mx+FDOYrakGevvvTwgj2JvWiK8B29/qD9BELZPyXQ==",
"cpu": [
"arm"
],
@@ -2669,9 +2669,9 @@
}
},
"node_modules/@rolldown/binding-linux-arm64-gnu": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-rc.15.tgz",
"integrity": "sha512-eBZ/u8iAK9SoHGanqe/jrPnY0JvBN6iXbVOsbO38mbz+ZJsaobExAm1Iu+rxa4S1l2FjG0qEZn4Rc6X8n+9M+w==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-gnu/-/binding-linux-arm64-gnu-1.0.0-rc.17.tgz",
"integrity": "sha512-e6usGaHKW5BMNZOymS1UcEYGowQMWcgZ71Z17Sl/h2+ZziNJ1a9n3Zvcz6LdRyIW5572wBCTH/Z+bKuZouGk9Q==",
"cpu": [
"arm64"
],
@@ -2686,9 +2686,9 @@
}
},
"node_modules/@rolldown/binding-linux-arm64-musl": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-rc.15.tgz",
"integrity": "sha512-ZvRYMGrAklV9PEkgt4LQM6MjQX2P58HPAuecwYObY2DhS2t35R0I810bKi0wmaYORt6m/2Sm+Z+nFgb0WhXNcQ==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-arm64-musl/-/binding-linux-arm64-musl-1.0.0-rc.17.tgz",
"integrity": "sha512-b/CgbwAJpmrRLp02RPfhbudf5tZnN9nsPWK82znefso832etkem8H7FSZwxrOI9djcdTP7U6YfNhbRnh7djErg==",
"cpu": [
"arm64"
],
@@ -2703,9 +2703,9 @@
}
},
"node_modules/@rolldown/binding-linux-ppc64-gnu": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.0.0-rc.15.tgz",
"integrity": "sha512-VDpgGBzgfg5hLg+uBpCLoFG5kVvEyafmfxGUV0UHLcL5irxAK7PKNeC2MwClgk6ZAiNhmo9FLhRYgvMmedLtnQ==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-ppc64-gnu/-/binding-linux-ppc64-gnu-1.0.0-rc.17.tgz",
"integrity": "sha512-4EII1iNGRUN5WwGbF/kOh/EIkoDN9HsupgLQoXfY+D1oyJm7/F4t5PYU5n8SWZgG0FEwakyM8pGgwcBYruGTlA==",
"cpu": [
"ppc64"
],
@@ -2720,9 +2720,9 @@
}
},
"node_modules/@rolldown/binding-linux-s390x-gnu": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.0.0-rc.15.tgz",
"integrity": "sha512-y1uXY3qQWCzcPgRJATPSOUP4tCemh4uBdY7e3EZbVwCJTY3gLJWnQABgeUetvED+bt1FQ01OeZwvhLS2bpNrAQ==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-s390x-gnu/-/binding-linux-s390x-gnu-1.0.0-rc.17.tgz",
"integrity": "sha512-AH8oq3XqQo4IibpVXvPeLDI5pzkpYn0WiZAfT05kFzoJ6tQNzwRdDYQ45M8I/gslbodRZwW8uxLhbSBbkv96rA==",
"cpu": [
"s390x"
],
@@ -2737,9 +2737,9 @@
}
},
"node_modules/@rolldown/binding-linux-x64-gnu": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-rc.15.tgz",
"integrity": "sha512-023bTPBod7J3Y/4fzAN6QtpkSABR0rigtrwaP+qSEabUh5zf6ELr9Nc7GujaROuPY3uwdSIXWrvhn1KxOvurWA==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-gnu/-/binding-linux-x64-gnu-1.0.0-rc.17.tgz",
"integrity": "sha512-cLnjV3xfo7KslbU41Z7z8BH/E1y5mzUYzAqih1d1MDaIGZRCMqTijqLv76/P7fyHuvUcfGsIpqCdddbxLLK9rA==",
"cpu": [
"x64"
],
@@ -2754,9 +2754,9 @@
}
},
"node_modules/@rolldown/binding-linux-x64-musl": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-rc.15.tgz",
"integrity": "sha512-witB2O0/hU4CgfOOKUoeFgQ4GktPi1eEbAhaLAIpgD6+ZnhcPkUtPsoKKHRzmOoWPZue46IThdSgdo4XneOLYw==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-linux-x64-musl/-/binding-linux-x64-musl-1.0.0-rc.17.tgz",
"integrity": "sha512-0phclDw1spsL7dUB37sIARuis2tAgomCJXAHZlpt8PXZ4Ba0dRP1e+66lsRqrfhISeN9bEGNjQs+T/Fbd7oYGw==",
"cpu": [
"x64"
],
@@ -2771,9 +2771,9 @@
}
},
"node_modules/@rolldown/binding-openharmony-arm64": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.0.0-rc.15.tgz",
"integrity": "sha512-UCL68NJ0Ud5zRipXZE9dF5PmirzJE4E4BCIOOssEnM7wLDsxjc6Qb0sGDxTNRTP53I6MZpygyCpY8Aa8sPfKPg==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-openharmony-arm64/-/binding-openharmony-arm64-1.0.0-rc.17.tgz",
"integrity": "sha512-0ag/hEgXOwgw4t8QyQvUCxvEg+V0KBcA6YuOx9g0r02MprutRF5dyljgm3EmR02O292UX7UeS6HzWHAl6KgyhA==",
"cpu": [
"arm64"
],
@@ -2788,9 +2788,9 @@
}
},
"node_modules/@rolldown/binding-wasm32-wasi": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-rc.15.tgz",
"integrity": "sha512-ApLruZq/ig+nhaE7OJm4lDjayUnOHVUa77zGeqnqZ9pn0ovdVbbNPerVibLXDmWeUZXjIYIT8V3xkT58Rm9u5Q==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-wasm32-wasi/-/binding-wasm32-wasi-1.0.0-rc.17.tgz",
"integrity": "sha512-LEXei6vo0E5wTGwpkJ4KoT3OZJRnglwldt5ziLzOlc6qqb55z4tWNq2A+PFqCJuvWWdP53CVhG1Z9NtToDPJrA==",
"cpu": [
"wasm32"
],
@@ -2798,18 +2798,18 @@
"license": "MIT",
"optional": true,
"dependencies": {
"@emnapi/core": "1.9.2",
"@emnapi/runtime": "1.9.2",
"@napi-rs/wasm-runtime": "^1.1.3"
"@emnapi/core": "1.10.0",
"@emnapi/runtime": "1.10.0",
"@napi-rs/wasm-runtime": "^1.1.4"
},
"engines": {
"node": ">=14.0.0"
"node": "^20.19.0 || >=22.12.0"
}
},
"node_modules/@rolldown/binding-win32-arm64-msvc": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.15.tgz",
"integrity": "sha512-KmoUoU7HnN+Si5YWJigfTws1jz1bKBYDQKdbLspz0UaqjjFkddHsqorgiW1mxcAj88lYUE6NC/zJNwT+SloqtA==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-win32-arm64-msvc/-/binding-win32-arm64-msvc-1.0.0-rc.17.tgz",
"integrity": "sha512-gUmyzBl3SPMa6hrqFUth9sVfcLBlYsbMzBx5PlexMroZStgzGqlZ26pYG89rBb45Mnia+oil6YAIFeEWGWhoZA==",
"cpu": [
"arm64"
],
@@ -2824,9 +2824,9 @@
}
},
"node_modules/@rolldown/binding-win32-x64-msvc": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.15.tgz",
"integrity": "sha512-3P2A8L+x75qavWLe/Dll3EYBJLQmtkJN8rfh+U/eR3MqMgL/h98PhYI+JFfXuDPgPeCB7iZAKiqii5vqOvnA0g==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/binding-win32-x64-msvc/-/binding-win32-x64-msvc-1.0.0-rc.17.tgz",
"integrity": "sha512-3hkiolcUAvPB9FLb3UZdfjVVNWherN1f/skkGWJP/fgSQhYUZpSIRr0/I8ZK9TkF3F7kxvJAk0+IcKvPHk9qQg==",
"cpu": [
"x64"
],
@@ -3862,17 +3862,17 @@
"license": "MIT"
},
"node_modules/@typescript-eslint/eslint-plugin": {
"version": "8.58.2",
"resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.58.2.tgz",
"integrity": "sha512-aC2qc5thQahutKjP+cl8cgN9DWe3ZUqVko30CMSZHnFEHyhOYoZSzkGtAI2mcwZ38xeImDucI4dnqsHiOYuuCw==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-8.59.1.tgz",
"integrity": "sha512-BOziFIfE+6osHO9FoJG4zjoHUcvI7fTNBSpdAwrNH0/TLvzjsk2oo8XSSOT2HhqUyhZPfHv4UOffoJ9oEEQ7Ag==",
"dev": true,
"license": "MIT",
"dependencies": {
"@eslint-community/regexpp": "^4.12.2",
"@typescript-eslint/scope-manager": "8.58.2",
"@typescript-eslint/type-utils": "8.58.2",
"@typescript-eslint/utils": "8.58.2",
"@typescript-eslint/visitor-keys": "8.58.2",
"@typescript-eslint/scope-manager": "8.59.1",
"@typescript-eslint/type-utils": "8.59.1",
"@typescript-eslint/utils": "8.59.1",
"@typescript-eslint/visitor-keys": "8.59.1",
"ignore": "^7.0.5",
"natural-compare": "^1.4.0",
"ts-api-utils": "^2.5.0"
@@ -3885,7 +3885,7 @@
"url": "https://opencollective.com/typescript-eslint"
},
"peerDependencies": {
"@typescript-eslint/parser": "^8.58.2",
"@typescript-eslint/parser": "^8.59.1",
"eslint": "^8.57.0 || ^9.0.0 || ^10.0.0",
"typescript": ">=4.8.4 <6.1.0"
}
@@ -3901,16 +3901,16 @@
}
},
"node_modules/@typescript-eslint/parser": {
"version": "8.58.2",
"resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.58.2.tgz",
"integrity": "sha512-/Zb/xaIDfxeJnvishjGdcR4jmr7S+bda8PKNhRGdljDM+elXhlvN0FyPSsMnLmJUrVG9aPO6dof80wjMawsASg==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-8.59.1.tgz",
"integrity": "sha512-HDQH9O/47Dxi1ceDhBXdaldtf/WV9yRYMjbjCuNk3qnaTD564qwv61Y7+gTxwxRKzSrgO5uhtw584igXVuuZkA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/scope-manager": "8.58.2",
"@typescript-eslint/types": "8.58.2",
"@typescript-eslint/typescript-estree": "8.58.2",
"@typescript-eslint/visitor-keys": "8.58.2",
"@typescript-eslint/scope-manager": "8.59.1",
"@typescript-eslint/types": "8.59.1",
"@typescript-eslint/typescript-estree": "8.59.1",
"@typescript-eslint/visitor-keys": "8.59.1",
"debug": "^4.4.3"
},
"engines": {
@@ -3926,14 +3926,14 @@
}
},
"node_modules/@typescript-eslint/project-service": {
"version": "8.58.2",
"resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.58.2.tgz",
"integrity": "sha512-Cq6UfpZZk15+r87BkIh5rDpi38W4b+Sjnb8wQCPPDDweS/LRCFjCyViEbzHk5Ck3f2QDfgmlxqSa7S7clDtlfg==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.59.1.tgz",
"integrity": "sha512-+MuHQlHiEr00Of/IQbE/MmEoi44znZHbR/Pz7Opq4HryUOlRi+/44dro9Ycy8Fyo+/024IWtw8m4JUMCGTYxDg==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/tsconfig-utils": "^8.58.2",
"@typescript-eslint/types": "^8.58.2",
"@typescript-eslint/tsconfig-utils": "^8.59.1",
"@typescript-eslint/types": "^8.59.1",
"debug": "^4.4.3"
},
"engines": {
@@ -3948,14 +3948,14 @@
}
},
"node_modules/@typescript-eslint/scope-manager": {
"version": "8.58.2",
"resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.58.2.tgz",
"integrity": "sha512-SgmyvDPexWETQek+qzZnrG6844IaO02UVyOLhI4wpo82dpZJY9+6YZCKAMFzXb7qhx37mFK1QcPQ18tud+vo6Q==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.59.1.tgz",
"integrity": "sha512-LwuHQI4pDOYVKvmH2dkaJo6YZCSgouVgnS/z7yBPKBMvgtBvyLqiLy9Z6b7+m/TRcX1NFYUqZetI5Y+aT4GEfg==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/types": "8.58.2",
"@typescript-eslint/visitor-keys": "8.58.2"
"@typescript-eslint/types": "8.59.1",
"@typescript-eslint/visitor-keys": "8.59.1"
},
"engines": {
"node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -3966,9 +3966,9 @@
}
},
"node_modules/@typescript-eslint/tsconfig-utils": {
"version": "8.58.2",
"resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.58.2.tgz",
"integrity": "sha512-3SR+RukipDvkkKp/d0jP0dyzuls3DbGmwDpVEc5wqk5f38KFThakqAAO0XMirWAE+kT00oTauTbzMFGPoAzB0A==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.59.1.tgz",
"integrity": "sha512-/0nEyPbX7gRsk0Uwfe4ALwwgxuA66d/l2mhRDNlAvaj4U3juhUtJNq0DsY8M2AYwwb9rEq2hrC3IcIcEt++iJA==",
"dev": true,
"license": "MIT",
"engines": {
@@ -3983,15 +3983,15 @@
}
},
"node_modules/@typescript-eslint/type-utils": {
"version": "8.58.2",
"resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.58.2.tgz",
"integrity": "sha512-Z7EloNR/B389FvabdGeTo2XMs4W9TjtPiO9DAsmT0yom0bwlPyRjkJ1uCdW1DvrrrYP50AJZ9Xc3sByZA9+dcg==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-8.59.1.tgz",
"integrity": "sha512-klWPBR2ciQHS3f++ug/mVnWKPjBUo7icEL3FAO1lhAR1Z1i5NQYZ1EannMSRYcq5qCv5wNALlXr6fksRHyYl7w==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/types": "8.58.2",
"@typescript-eslint/typescript-estree": "8.58.2",
"@typescript-eslint/utils": "8.58.2",
"@typescript-eslint/types": "8.59.1",
"@typescript-eslint/typescript-estree": "8.59.1",
"@typescript-eslint/utils": "8.59.1",
"debug": "^4.4.3",
"ts-api-utils": "^2.5.0"
},
@@ -4008,9 +4008,9 @@
}
},
"node_modules/@typescript-eslint/types": {
"version": "8.58.2",
"resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.58.2.tgz",
"integrity": "sha512-9TukXyATBQf/Jq9AMQXfvurk+G5R2MwfqQGDR2GzGz28HvY/lXNKGhkY+6IOubwcquikWk5cjlgPvD2uAA7htQ==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.59.1.tgz",
"integrity": "sha512-ZDCjgccSdYPw5Bxh+my4Z0lJU96ZDN7jbBzvmEn0FZx3RtU1C7VWl6NbDx94bwY3V5YsgwRzJPOgeY2Q/nLG8A==",
"dev": true,
"license": "MIT",
"engines": {
@@ -4022,16 +4022,16 @@
}
},
"node_modules/@typescript-eslint/typescript-estree": {
"version": "8.58.2",
"resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.58.2.tgz",
"integrity": "sha512-ELGuoofuhhoCvNbQjFFiobFcGgcDCEm0ThWdmO4Z0UzLqPXS3KFvnEZ+SHewwOYHjM09tkzOWXNTv9u6Gqtyuw==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.59.1.tgz",
"integrity": "sha512-OUd+vJS05sSkOip+BkZ/2NS8RMxrAAJemsC6vU3kmfLyeaJT0TftHkV9mcx2107MmsBVXXexhVu4F0TZXyMl4g==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/project-service": "8.58.2",
"@typescript-eslint/tsconfig-utils": "8.58.2",
"@typescript-eslint/types": "8.58.2",
"@typescript-eslint/visitor-keys": "8.58.2",
"@typescript-eslint/project-service": "8.59.1",
"@typescript-eslint/tsconfig-utils": "8.59.1",
"@typescript-eslint/types": "8.59.1",
"@typescript-eslint/visitor-keys": "8.59.1",
"debug": "^4.4.3",
"minimatch": "^10.2.2",
"semver": "^7.7.3",
@@ -4066,16 +4066,16 @@
}
},
"node_modules/@typescript-eslint/utils": {
"version": "8.58.2",
"resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.58.2.tgz",
"integrity": "sha512-QZfjHNEzPY8+l0+fIXMvuQ2sJlplB4zgDZvA+NmvZsZv3EQwOcc1DuIU1VJUTWZ/RKouBMhDyNaBMx4sWvrzRA==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.59.1.tgz",
"integrity": "sha512-3pIeoXhCeYH9FSCBI8P3iNwJlGuzPlYKkTlen2O9T1DSeeg8UG8jstq6BLk+Mda0qup7mgk4z4XL4OzRaxZ8LA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@eslint-community/eslint-utils": "^4.9.1",
"@typescript-eslint/scope-manager": "8.58.2",
"@typescript-eslint/types": "8.58.2",
"@typescript-eslint/typescript-estree": "8.58.2"
"@typescript-eslint/scope-manager": "8.59.1",
"@typescript-eslint/types": "8.59.1",
"@typescript-eslint/typescript-estree": "8.59.1"
},
"engines": {
"node": "^18.18.0 || ^20.9.0 || >=21.1.0"
@@ -4090,13 +4090,13 @@
}
},
"node_modules/@typescript-eslint/visitor-keys": {
"version": "8.58.2",
"resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.58.2.tgz",
"integrity": "sha512-f1WO2Lx8a9t8DARmcWAUPJbu0G20bJlj8L4z72K00TMeJAoyLr/tHhI/pzYBLrR4dXWkcxO1cWYZEOX8DKHTqA==",
"version": "8.59.1",
"resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.59.1.tgz",
"integrity": "sha512-LdDNl6C5iJExcM0Yh0PwAIBb9PrSiCsWamF/JyEZawm3kFDnRoaq3LGE4bpyRao/fWeGKKyw7icx0YxrLFC5Cg==",
"dev": true,
"license": "MIT",
"dependencies": {
"@typescript-eslint/types": "8.58.2",
"@typescript-eslint/types": "8.59.1",
"eslint-visitor-keys": "^5.0.0"
},
"engines": {
@@ -8356,9 +8356,9 @@
"license": "MIT"
},
"node_modules/lodash-es": {
"version": "4.17.23",
"resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.17.23.tgz",
"integrity": "sha512-kVI48u3PZr38HdYz98UmfPnXl2DXrpdctLrFLCd3kOx1xUkOmpFPx7gCWWM5MPkL/fD8zb+Ph0QzjGFs4+hHWg==",
"version": "4.18.1",
"resolved": "https://registry.npmjs.org/lodash-es/-/lodash-es-4.18.1.tgz",
"integrity": "sha512-J8xewKD/Gk22OZbhpOVSwcs60zhd95ESDwezOFuA3/099925PdHJ7OFHNTGtajL3AlZkykD32HykiMo+BIBI8A==",
"license": "MIT"
},
"node_modules/lodash.merge": {
@@ -9951,9 +9951,9 @@
}
},
"node_modules/postcss": {
"version": "8.5.8",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.8.tgz",
"integrity": "sha512-OW/rX8O/jXnm82Ey1k44pObPtdblfiuWnrd8X7GJ7emImCOstunGbXUpp7HdBrFQX6rJzn3sPT397Wp5aCwCHg==",
"version": "8.5.12",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.12.tgz",
"integrity": "sha512-W62t/Se6rA0Az3DfCL0AqJwXuKwBeYg6nOaIgzP+xZ7N5BFCI7DYi1qs6ygUYT6rvfi6t9k65UMLJC+PHZpDAA==",
"dev": true,
"funding": [
{
@@ -10280,9 +10280,9 @@
}
},
"node_modules/react-dropzone": {
"version": "14.3.8",
"resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-14.3.8.tgz",
"integrity": "sha512-sBgODnq+lcA4P296DY4wacOZz3JFpD99fp+hb//iBO2HHnyeZU3FwWyXJ6salNpqQdsZrgMrotuko/BdJMV8Ug==",
"version": "15.0.0",
"resolved": "https://registry.npmjs.org/react-dropzone/-/react-dropzone-15.0.0.tgz",
"integrity": "sha512-lGjYV/EoqEjEWPnmiSvH4v5IoIAwQM2W4Z1C0Q/Pw2xD0eVzKPS359BQTUMum+1fa0kH2nrKjuavmTPOGhpLPg==",
"license": "MIT",
"dependencies": {
"attr-accept": "^2.2.4",
@@ -10307,9 +10307,9 @@
}
},
"node_modules/react-i18next": {
"version": "17.0.2",
"resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-17.0.2.tgz",
"integrity": "sha512-shBftH2vaTWK2Bsp7FiL+cevx3xFJlvFxmsDFQSrJc+6twHkP0tv/bGa01VVWzpreUVVwU+3Hev5iFqRg65RwA==",
"version": "17.0.6",
"resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-17.0.6.tgz",
"integrity": "sha512-WzJ6SMKF+GTD7JZZqxSR1AKKmXjaSu39sClUrNlwxS4Tl7a99O+ltFy6yhPMO+wgZuxpQjJ2PZkfrQKmAqrLhw==",
"license": "MIT",
"dependencies": {
"@babel/runtime": "^7.29.2",
@@ -10437,9 +10437,9 @@
}
},
"node_modules/react-router": {
"version": "7.14.1",
"resolved": "https://registry.npmjs.org/react-router/-/react-router-7.14.1.tgz",
"integrity": "sha512-5BCvFskyAAVumqhEKh/iPhLOIkfxcEUz8WqFIARCkMg8hZZzDYX9CtwxXA0e+qT8zAxmMC0x3Ckb9iMONwc5jg==",
"version": "7.14.2",
"resolved": "https://registry.npmjs.org/react-router/-/react-router-7.14.2.tgz",
"integrity": "sha512-yCqNne6I8IB6rVCH7XUvlBK7/QKyqypBFGv+8dj4QBFJiiRX+FG7/nkdAvGElyvVZ/HQP5N19wzteuTARXi5Gw==",
"license": "MIT",
"dependencies": {
"cookie": "^1.0.1",
@@ -10459,12 +10459,12 @@
}
},
"node_modules/react-router-dom": {
"version": "7.14.1",
"resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.14.1.tgz",
"integrity": "sha512-ZkrQuwwhGibjQLqH1eCdyiZyLWglPxzxdl5tgwgKEyCSGC76vmAjleGocRe3J/MLfzMUIKwaFJWpFVJhK3d2xA==",
"version": "7.14.2",
"resolved": "https://registry.npmjs.org/react-router-dom/-/react-router-dom-7.14.2.tgz",
"integrity": "sha512-YZcM5ES8jJSM+KrJ9BdvHHqlnGTg5tH3sC5ChFRj4inosKctdyzBDhOyyHdGk597q2OT6NTrCA1OvB/YDwfekQ==",
"license": "MIT",
"dependencies": {
"react-router": "7.14.1"
"react-router": "7.14.2"
},
"engines": {
"node": ">=20.0.0"
@@ -10788,14 +10788,14 @@
"license": "Unlicense"
},
"node_modules/rolldown": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.15.tgz",
"integrity": "sha512-Ff31guA5zT6WjnGp0SXw76X6hzGRk/OQq2hE+1lcDe+lJdHSgnSX6nK3erbONHyCbpSj9a9E+uX/OvytZoWp2g==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/rolldown/-/rolldown-1.0.0-rc.17.tgz",
"integrity": "sha512-ZrT53oAKrtA4+YtBWPQbtPOxIbVDbxT0orcYERKd63VJTF13zPcgXTvD4843L8pcsI7M6MErt8QtON6lrB9tyA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@oxc-project/types": "=0.124.0",
"@rolldown/pluginutils": "1.0.0-rc.15"
"@oxc-project/types": "=0.127.0",
"@rolldown/pluginutils": "1.0.0-rc.17"
},
"bin": {
"rolldown": "bin/cli.mjs"
@@ -10804,27 +10804,27 @@
"node": "^20.19.0 || >=22.12.0"
},
"optionalDependencies": {
"@rolldown/binding-android-arm64": "1.0.0-rc.15",
"@rolldown/binding-darwin-arm64": "1.0.0-rc.15",
"@rolldown/binding-darwin-x64": "1.0.0-rc.15",
"@rolldown/binding-freebsd-x64": "1.0.0-rc.15",
"@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.15",
"@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.15",
"@rolldown/binding-linux-arm64-musl": "1.0.0-rc.15",
"@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.15",
"@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.15",
"@rolldown/binding-linux-x64-gnu": "1.0.0-rc.15",
"@rolldown/binding-linux-x64-musl": "1.0.0-rc.15",
"@rolldown/binding-openharmony-arm64": "1.0.0-rc.15",
"@rolldown/binding-wasm32-wasi": "1.0.0-rc.15",
"@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.15",
"@rolldown/binding-win32-x64-msvc": "1.0.0-rc.15"
"@rolldown/binding-android-arm64": "1.0.0-rc.17",
"@rolldown/binding-darwin-arm64": "1.0.0-rc.17",
"@rolldown/binding-darwin-x64": "1.0.0-rc.17",
"@rolldown/binding-freebsd-x64": "1.0.0-rc.17",
"@rolldown/binding-linux-arm-gnueabihf": "1.0.0-rc.17",
"@rolldown/binding-linux-arm64-gnu": "1.0.0-rc.17",
"@rolldown/binding-linux-arm64-musl": "1.0.0-rc.17",
"@rolldown/binding-linux-ppc64-gnu": "1.0.0-rc.17",
"@rolldown/binding-linux-s390x-gnu": "1.0.0-rc.17",
"@rolldown/binding-linux-x64-gnu": "1.0.0-rc.17",
"@rolldown/binding-linux-x64-musl": "1.0.0-rc.17",
"@rolldown/binding-openharmony-arm64": "1.0.0-rc.17",
"@rolldown/binding-wasm32-wasi": "1.0.0-rc.17",
"@rolldown/binding-win32-arm64-msvc": "1.0.0-rc.17",
"@rolldown/binding-win32-x64-msvc": "1.0.0-rc.17"
}
},
"node_modules/rolldown/node_modules/@rolldown/pluginutils": {
"version": "1.0.0-rc.15",
"resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.15.tgz",
"integrity": "sha512-UromN0peaE53IaBRe9W7CjrZgXl90fqGpK+mIZbA3qSTeYqg3pqpROBdIPvOG3F5ereDHNwoHBI2e50n1BDr1g==",
"version": "1.0.0-rc.17",
"resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-rc.17.tgz",
"integrity": "sha512-n8iosDOt6Ig1UhJ2AYqoIhHWh/isz0xpicHTzpKBeotdVsTEcxsSA/i3EVM7gQAj0rU27OLAxCjzlj15IWY7bg==",
"dev": true,
"license": "MIT"
},
@@ -11460,14 +11460,14 @@
}
},
"node_modules/tinyglobby": {
"version": "0.2.15",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
"integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
"version": "0.2.16",
"resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.16.tgz",
"integrity": "sha512-pn99VhoACYR8nFHhxqix+uvsbXineAasWm5ojXoN8xEwK5Kd3/TrhNn1wByuD52UxWRLy8pu+kRMniEi6Eq9Zg==",
"dev": true,
"license": "MIT",
"dependencies": {
"fdir": "^6.5.0",
"picomatch": "^4.0.3"
"picomatch": "^4.0.4"
},
"engines": {
"node": ">=12.0.0"
@@ -11699,9 +11699,9 @@
}
},
"node_modules/typescript": {
"version": "5.9.3",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
"integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
"version": "6.0.3",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-6.0.3.tgz",
"integrity": "sha512-y2TvuxSZPDyQakkFRPZHKFm+KKVqIisdg9/CZwm9ftvKXLP8NRWj38/ODjNbr43SsoXqNuAisEf1GdCxqWcdBw==",
"devOptional": true,
"license": "Apache-2.0",
"bin": {
@@ -12001,17 +12001,17 @@
}
},
"node_modules/vite": {
"version": "8.0.8",
"resolved": "https://registry.npmjs.org/vite/-/vite-8.0.8.tgz",
"integrity": "sha512-dbU7/iLVa8KZALJyLOBOQ88nOXtNG8vxKuOT4I2mD+Ya70KPceF4IAmDsmU0h1Qsn5bPrvsY9HJstCRh3hG6Uw==",
"version": "8.0.10",
"resolved": "https://registry.npmjs.org/vite/-/vite-8.0.10.tgz",
"integrity": "sha512-rZuUu9j6J5uotLDs+cAA4O5H4K1SfPliUlQwqa6YEwSrWDZzP4rhm00oJR5snMewjxF5V/K3D4kctsUTsIU9Mw==",
"dev": true,
"license": "MIT",
"dependencies": {
"lightningcss": "^1.32.0",
"picomatch": "^4.0.4",
"postcss": "^8.5.8",
"rolldown": "1.0.0-rc.15",
"tinyglobby": "^0.2.15"
"postcss": "^8.5.10",
"rolldown": "1.0.0-rc.17",
"tinyglobby": "^0.2.16"
},
"bin": {
"vite": "bin/vite.js"

View File

@@ -39,12 +39,12 @@
"react": "^19.1.0",
"react-chartjs-2": "^5.3.0",
"react-dom": "^19.2.5",
"react-dropzone": "^14.3.8",
"react-dropzone": "^15.0.0",
"react-google-drive-picker": "^1.2.2",
"react-i18next": "^17.0.2",
"react-i18next": "^17.0.6",
"react-markdown": "^9.0.1",
"react-redux": "^9.2.0",
"react-router-dom": "^7.14.1",
"react-router-dom": "^7.14.2",
"react-syntax-highlighter": "^16.1.1",
"reactflow": "^11.11.4",
"rehype-katex": "^7.0.1",
@@ -58,7 +58,7 @@
"@types/react": "^19.2.14",
"@types/react-dom": "^19.2.3",
"@types/react-syntax-highlighter": "^15.5.13",
"@typescript-eslint/eslint-plugin": "^8.58.2",
"@typescript-eslint/eslint-plugin": "^8.59.1",
"@typescript-eslint/parser": "^8.46.3",
"@vitejs/plugin-react": "^6.0.1",
"eslint": "^9.39.1",
@@ -71,13 +71,13 @@
"eslint-plugin-unused-imports": "^4.1.4",
"husky": "^9.1.7",
"lint-staged": "^16.4.0",
"postcss": "^8.4.49",
"postcss": "^8.5.12",
"prettier": "^3.5.3",
"prettier-plugin-tailwindcss": "^0.7.2",
"tailwindcss": "^4.2.2",
"tw-animate-css": "^1.4.0",
"typescript": "^5.8.3",
"vite": "^8.0.8",
"typescript": "^6.0.3",
"vite": "^8.0.10",
"vite-plugin-svgr": "^4.3.0"
}
}

View File

@@ -85,6 +85,13 @@ export default function App() {
}
>
<Route index element={<Conversation />} />
{/* One dynamic route (accepting "new" or a UUID) so the
/c/new → /c/<id> replace doesn't remount Conversation. */}
<Route path="/c/:conversationId" element={<Conversation />} />
<Route
path="/agents/:agentId/c/:conversationId"
element={<Conversation />}
/>
<Route path="/settings/*" element={<Setting />} />
<Route path="/agents/*" element={<Agents />} />
</Route>

View File

@@ -25,6 +25,7 @@ import UnPin from './assets/unpin.svg';
import Help from './components/Help';
import {
handleAbort,
loadConversation,
selectQueries,
setConversation,
updateConversationId,
@@ -50,6 +51,7 @@ import {
setSelectedAgent,
setSharedAgents,
} from './preferences/preferenceSlice';
import { AppDispatch } from './store';
import Upload from './upload/Upload';
interface NavigationProps {
@@ -58,7 +60,7 @@ interface NavigationProps {
}
export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
const dispatch = useDispatch();
const dispatch = useDispatch<AppDispatch>();
const navigate = useNavigate();
const { t } = useTranslation();
@@ -182,7 +184,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
resetConversation();
dispatch(setSelectedAgent(agent));
if (isMobile || isTablet) setNavOpen(!navOpen);
navigate('/');
navigate(agent.id ? `/agents/${agent.id}/c/new` : '/c/new');
};
const handleTogglePin = (agent: Agent) => {
@@ -200,20 +202,21 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
try {
dispatch(setSelectedAgent(null));
const response = await conversationService.getConversation(index, token);
if (!response.ok) {
navigate('/');
// Pre-fetch to choose the route shape (owned-agent / shared / none).
const result = await dispatch(
loadConversation({ id: index, force: true }),
).unwrap();
// Stale: a newer load has already updated Redux; the URL is
// wherever that newer flow lands, leave it alone.
if (result.stale) return;
const data = result.data;
if (!data) {
navigate('/c/new');
return;
}
const data = await response.json();
if (!data) return;
dispatch(setConversation(data.queries));
dispatch(updateConversationId({ query: { conversationId: index } }));
if (!data.agent_id) {
navigate('/');
navigate(`/c/${index}`);
return;
}
@@ -224,7 +227,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
token,
);
if (!sharedResponse.ok) {
navigate('/');
navigate(`/c/${index}`);
return;
}
agent = await sharedResponse.json();
@@ -232,7 +235,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
} else {
const agentResponse = await userService.getAgent(data.agent_id, token);
if (!agentResponse.ok) {
navigate('/');
navigate(`/c/${index}`);
return;
}
agent = await agentResponse.json();
@@ -240,12 +243,12 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
navigate(`/agents/shared/${agent.shared_token}`);
} else {
await Promise.resolve(dispatch(setSelectedAgent(agent)));
navigate('/');
navigate(`/agents/${data.agent_id}/c/${index}`);
}
}
} catch (error) {
console.error('Error handling conversation click:', error);
navigate('/');
navigate('/c/new');
}
};
@@ -264,6 +267,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
if (queries && queries?.length > 0) {
resetConversation();
}
navigate('/c/new');
};
async function updateConversationName(updatedConversation: {
@@ -275,7 +279,6 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
.then((response) => response.json())
.then((data) => {
if (data) {
navigate('/');
fetchConversations();
}
})
@@ -370,7 +373,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
</button>
</div>
<NavLink
to={'/'}
to={'/c/new'}
onClick={() => {
if (isMobile || isTablet) {
setNavOpen(!navOpen);

View File

@@ -174,7 +174,7 @@ export default function AgentCard({
if (section === 'user') {
if (agent.status === 'published') {
dispatch(setSelectedAgent(agent));
navigate(`/`);
navigate(agent.id ? `/agents/${agent.id}/c/new` : '/c/new');
}
}
if (section === 'shared') {

View File

@@ -448,7 +448,7 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
setUserTools(tools);
};
const getModels = async () => {
const response = await modelService.getModels(null);
const response = await modelService.getModels(token);
if (!response.ok) throw new Error('Failed to fetch models');
const data = await response.json();
const transformed = modelService.transformModels(data.models || []);
@@ -565,8 +565,22 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
setJsonSchemaText(jsonText);
setJsonSchemaValid(true);
}
setAgent(data);
initialAgentRef.current = data;
// Backfill required fields so older agents (created before
// agent_type / prompt_id / models existed) don't fail
// ``isPublishable()`` and leave Save permanently disabled.
const normalized = {
...data,
agent_type: data.agent_type || 'classic',
prompt_id: data.prompt_id || 'default',
retriever: data.retriever || 'classic',
chunks: data.chunks || '2',
tools: data.tools || [],
sources: data.sources || [],
models: data.models || [],
default_model_id: data.default_model_id || '',
};
setAgent(normalized);
initialAgentRef.current = normalized;
};
getAgent();
}
@@ -1041,10 +1055,24 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
isOpen={isModelsPopupOpen}
onClose={() => setIsModelsPopupOpen(false)}
anchorRef={modelAnchorButtonRef}
options={availableModels.map((model) => ({
id: model.id,
label: model.display_name,
}))}
options={(() => {
const builtinLabel = t(
'settings.customModels.modelsGroup.builtin',
);
const userLabel = t('settings.customModels.modelsGroup.user');
const builtin: OptionType[] = [];
const user: OptionType[] = [];
availableModels.forEach((model) => {
const opt: OptionType = {
id: model.id,
label: model.display_name,
group: model.source === 'user' ? userLabel : builtinLabel,
};
if (model.source === 'user') user.push(opt);
else builtin.push(opt);
});
return [...builtin, ...user];
})()}
selectedIds={selectedModelIds}
onSelectionChange={(newSelectedIds: Set<string | number>) =>
setSelectedModelIds(

View File

@@ -1,8 +1,18 @@
import { useTranslation } from 'react-i18next';
import EditIcon from '../assets/edit.svg';
import AgentImage from '../components/AgentImage';
import { getToolDisplayName } from '../utils/toolUtils';
import { Agent } from './types';
export default function SharedAgentCard({ agent }: { agent: Agent }) {
export default function SharedAgentCard({
agent,
onEdit,
}: {
agent: Agent;
onEdit?: () => void;
}) {
const { t } = useTranslation();
// Check if shared metadata exists and has properties (type is 'any' so we validate it's a non-empty object)
const hasSharedMetadata =
agent.shared_metadata &&
@@ -11,14 +21,14 @@ export default function SharedAgentCard({ agent }: { agent: Agent }) {
Object.keys(agent.shared_metadata).length > 0;
return (
<div className="border-border dark:border-border flex w-full max-w-[720px] flex-col rounded-3xl border p-6 shadow-xs sm:w-fit sm:min-w-[480px]">
<div className="flex items-center gap-3">
<div className="flex items-start gap-3">
<div className="flex h-12 w-12 items-center justify-center overflow-hidden rounded-full p-1">
<AgentImage
src={agent.image}
className="h-full w-full rounded-full object-contain"
/>
</div>
<div className="flex max-h-[92px] w-[80%] flex-col gap-px">
<div className="flex max-h-[92px] flex-1 flex-col gap-px">
<h2 className="text-foreground text-base font-semibold sm:text-lg">
{agent.name}
</h2>
@@ -26,6 +36,17 @@ export default function SharedAgentCard({ agent }: { agent: Agent }) {
{agent.description}
</p>
</div>
{onEdit && (
<button
type="button"
onClick={onEdit}
className="border-border hover:bg-accent text-foreground flex shrink-0 items-center gap-1.5 rounded-full border px-3 py-1.5 text-sm font-medium transition-colors"
aria-label={t('agents.edit')}
>
<img src={EditIcon} alt="" className="h-3.5 w-3.5" />
{t('agents.edit')}
</button>
)}
</div>
{hasSharedMetadata && (
<div className="mt-4 flex items-center gap-8">

View File

@@ -42,7 +42,9 @@ import { MultiSelect } from '@/components/ui/multi-select';
import {
Select,
SelectContent,
SelectGroup,
SelectItem,
SelectLabel,
SelectTrigger,
SelectValue,
} from '@/components/ui/select';
@@ -706,7 +708,7 @@ function WorkflowBuilderInner() {
useEffect(() => {
const loadModelsAndTools = async () => {
try {
const modelsResponse = await modelService.getModels(null);
const modelsResponse = await modelService.getModels(token);
if (modelsResponse.ok) {
const modelsData = await modelsResponse.json();
const transformedModels = modelService.transformModels(
@@ -732,7 +734,7 @@ function WorkflowBuilderInner() {
}
};
loadModelsAndTools();
}, []);
}, [token]);
useEffect(() => {
if (!selectedNode || selectedNode.type !== 'agent') return;
@@ -1847,15 +1849,54 @@ function WorkflowBuilderInner() {
<SelectValue placeholder="Select a model" />
</SelectTrigger>
<SelectContent>
{availableModels.map((model) => (
<SelectItem
key={model.id}
value={model.id}
>
{model.display_name} ·{' '}
{model.provider}
</SelectItem>
))}
{(() => {
const builtin = availableModels.filter(
(m) => m.source !== 'user',
);
const user = availableModels.filter(
(m) => m.source === 'user',
);
return (
<>
{builtin.length > 0 && (
<SelectGroup>
<SelectLabel>
{t(
'settings.customModels.modelsGroup.builtin',
)}
</SelectLabel>
{builtin.map((model) => (
<SelectItem
key={model.id}
value={model.id}
>
{model.display_name} ·{' '}
{model.provider}
</SelectItem>
))}
</SelectGroup>
)}
{user.length > 0 && (
<SelectGroup>
<SelectLabel>
{t(
'settings.customModels.modelsGroup.user',
)}
</SelectLabel>
{user.map((model) => (
<SelectItem
key={model.id}
value={model.id}
>
{model.display_name} ·{' '}
{model.provider}
</SelectItem>
))}
</SelectGroup>
)}
</>
);
})()}
</SelectContent>
</Select>
</div>

View File

@@ -80,6 +80,22 @@ const apiClient = {
return response;
}),
patch: (
url: string,
data: any,
token: string | null,
headers = {},
signal?: AbortSignal,
): Promise<any> =>
fetch(`${baseURL}${url}`, {
method: 'PATCH',
headers: getHeaders(token, headers),
body: JSON.stringify(data),
signal,
}).then((response) => {
return response;
}),
putFormData: (
url: string,
formData: FormData,

View File

@@ -76,6 +76,10 @@ const endpoints = {
GET_ARTIFACT: (artifactId: string) => `/api/artifact/${artifactId}`,
WORKFLOWS: '/api/workflows',
WORKFLOW: (id: string) => `/api/workflows/${id}`,
CUSTOM_MODELS: '/api/user/models',
CUSTOM_MODEL: (id: string) => `/api/user/models/${id}`,
CUSTOM_MODEL_TEST: (id: string) => `/api/user/models/${id}/test`,
CUSTOM_MODEL_TEST_PAYLOAD: '/api/user/models/test',
},
V1: {
CHAT_COMPLETIONS: '/v1/chat/completions',
@@ -88,6 +92,7 @@ const endpoints = {
FEEDBACK: '/api/feedback',
CONVERSATION: (id: string) => `/api/get_single_conversation?id=${id}`,
CONVERSATIONS: '/api/get_conversations',
MESSAGE_TAIL: (messageId: string) => `/api/messages/${messageId}/tail`,
SHARE_CONVERSATION: (isPromptable: boolean) =>
`/api/share?isPromptable=${isPromptable}`,
SHARED_CONVERSATION: (identifier: string) =>

View File

@@ -6,18 +6,20 @@ const conversationService = {
data: any,
token: string | null,
signal: AbortSignal,
headers: Record<string, string> = {},
): Promise<any> =>
apiClient.post(endpoints.CONVERSATION.ANSWER, data, token, {}, signal),
apiClient.post(endpoints.CONVERSATION.ANSWER, data, token, headers, signal),
answerStream: (
data: any,
token: string | null,
signal: AbortSignal,
headers: Record<string, string> = {},
): Promise<any> =>
apiClient.post(
endpoints.CONVERSATION.ANSWER_STREAMING,
data,
token,
{},
headers,
signal,
),
search: (data: any, token: string | null): Promise<any> =>
@@ -26,6 +28,8 @@ const conversationService = {
apiClient.post(endpoints.CONVERSATION.FEEDBACK, data, token, {}),
getConversation: (id: string, token: string | null): Promise<any> =>
apiClient.get(endpoints.CONVERSATION.CONVERSATION(id), token, {}),
tailMessage: (messageId: string, token: string | null): Promise<any> =>
apiClient.get(endpoints.CONVERSATION.MESSAGE_TAIL(messageId), token, {}),
getConversations: (token: string | null): Promise<any> =>
apiClient.get(endpoints.CONVERSATION.CONVERSATIONS, token, {}),
shareConversation: (

View File

@@ -0,0 +1,162 @@
import apiClient from '../client';
import endpoints from '../endpoints';
import type {
CreateCustomModelPayload,
CustomModel,
CustomModelTestResult,
} from '../../models/types';
const parseJsonOrError = async (response: Response): Promise<any> => {
const text = await response.text();
let body: any = null;
if (text) {
try {
body = JSON.parse(text);
} catch {
body = null;
}
}
if (!response.ok) {
const message =
(body && (body.error || body.message)) ||
`Request failed with status ${response.status}`;
const err = new Error(message) as Error & {
status?: number;
payload?: unknown;
};
err.status = response.status;
err.payload = body;
throw err;
}
return body;
};
const customModelsService = {
listCustomModels: async (token: string | null): Promise<CustomModel[]> => {
const response = await apiClient.get(endpoints.USER.CUSTOM_MODELS, token);
const data = await parseJsonOrError(response);
if (Array.isArray(data)) return data as CustomModel[];
if (data && Array.isArray(data.models)) return data.models as CustomModel[];
return [];
},
createCustomModel: async (
payload: CreateCustomModelPayload,
token: string | null,
): Promise<CustomModel> => {
const response = await apiClient.post(
endpoints.USER.CUSTOM_MODELS,
payload,
token,
);
return (await parseJsonOrError(response)) as CustomModel;
},
updateCustomModel: async (
id: string,
payload: Partial<CreateCustomModelPayload>,
token: string | null,
): Promise<CustomModel> => {
const response = await apiClient.patch(
endpoints.USER.CUSTOM_MODEL(id),
payload,
token,
);
return (await parseJsonOrError(response)) as CustomModel;
},
deleteCustomModel: async (
id: string,
token: string | null,
): Promise<void> => {
const response = await apiClient.delete(
endpoints.USER.CUSTOM_MODEL(id),
token,
);
if (!response.ok) {
await parseJsonOrError(response);
}
},
testCustomModelPayload: async (
payload: {
base_url: string;
api_key: string;
upstream_model_id: string;
},
token: string | null,
): Promise<CustomModelTestResult> => {
const response = await apiClient.post(
endpoints.USER.CUSTOM_MODEL_TEST_PAYLOAD,
payload,
token,
);
const text = await response.text();
let body: any = null;
if (text) {
try {
body = JSON.parse(text);
} catch {
body = null;
}
}
if (!response.ok) {
return {
ok: false,
error:
(body && (body.error || body.message)) ||
`Test failed with status ${response.status}`,
};
}
if (body && typeof body.ok === 'boolean') {
return body as CustomModelTestResult;
}
return { ok: true };
},
testCustomModel: async (
id: string,
token: string | null,
overrides: {
base_url?: string;
api_key?: string;
upstream_model_id?: string;
} = {},
): Promise<CustomModelTestResult> => {
// Send only non-empty overrides; server falls back to stored values.
const requestBody: Record<string, string> = {};
if (overrides.base_url) requestBody.base_url = overrides.base_url;
if (overrides.api_key) requestBody.api_key = overrides.api_key;
if (overrides.upstream_model_id)
requestBody.upstream_model_id = overrides.upstream_model_id;
const response = await apiClient.post(
endpoints.USER.CUSTOM_MODEL_TEST(id),
requestBody,
token,
);
const text = await response.text();
let body: any = null;
if (text) {
try {
body = JSON.parse(text);
} catch {
body = null;
}
}
if (!response.ok) {
return {
ok: false,
error:
(body && (body.error || body.message)) ||
`Test failed with status ${response.status}`,
};
}
if (body && typeof body.ok === 'boolean') {
return body as CustomModelTestResult;
}
return { ok: true };
},
};
export default customModelsService;

View File

@@ -19,6 +19,7 @@ const modelService = {
supports_tools: model.supports_tools,
supports_structured_output: model.supports_structured_output,
supports_streaming: model.supports_streaming,
source: model.source,
})),
};

View File

@@ -40,7 +40,7 @@ export default function ActionButtons({
query: { conversationId: null },
}),
);
navigate('/');
navigate('/c/new');
};
return (
<div

View File

@@ -7,6 +7,7 @@ import RoundedTick from '../assets/rounded-tick.svg';
import {
selectAvailableModels,
selectSelectedModel,
selectToken,
setAvailableModels,
setModelsLoading,
setSelectedModel,
@@ -18,17 +19,26 @@ export default function DropdownModel() {
const dispatch = useDispatch();
const selectedModel = useSelector(selectSelectedModel);
const availableModels = useSelector(selectAvailableModels);
const token = useSelector(selectToken);
const dropdownRef = React.useRef<HTMLDivElement>(null);
// Tracks which token the cached availableModels were loaded for.
// Without this, the early-return below pins the anonymous/built-in
// list forever once it's populated — login/logout never refetches
// and a user's BYOM models stay invisible.
const lastLoadedTokenRef = React.useRef<string | null | undefined>(undefined);
const [isOpen, setIsOpen] = React.useState(false);
useEffect(() => {
const loadModels = async () => {
if ((availableModels?.length ?? 0) > 0) {
if (
(availableModels?.length ?? 0) > 0 &&
lastLoadedTokenRef.current === token
) {
return;
}
dispatch(setModelsLoading(true));
try {
const response = await modelService.getModels(null);
const response = await modelService.getModels(token);
if (!response.ok) {
throw new Error(`API error: ${response.status}`);
}
@@ -37,6 +47,7 @@ export default function DropdownModel() {
const transformed = modelService.transformModels(models);
dispatch(setAvailableModels(transformed));
lastLoadedTokenRef.current = token;
if (!selectedModel && transformed.length > 0) {
const defaultModel =
transformed.find((m) => m.id === data.default_model_id) ||
@@ -59,7 +70,7 @@ export default function DropdownModel() {
};
loadModels();
}, [availableModels?.length, dispatch, selectedModel]);
}, [availableModels?.length, dispatch, selectedModel, token]);
const handleClickOutside = (event: MouseEvent) => {
if (

View File

@@ -11,6 +11,7 @@ export type OptionType = {
id: string | number;
label: string;
icon?: string | React.ReactNode;
group?: string;
[key: string]: any;
};
@@ -227,43 +228,75 @@ export default function MultiSelectPopup({
</p>
</div>
) : (
filteredOptions.map((option) => {
const isSelected = selectedIds.has(option.id);
return (
<div
key={option.id}
onClick={() => handleOptionClick(option.id)}
className="dark:border-border dark:hover:bg-accent hover:bg-accent flex cursor-pointer items-center justify-between border-b border-[#D9D9D9] p-3 last:border-b-0"
role="option"
aria-selected={isSelected}
>
<div className="mr-3 flex grow items-center overflow-hidden">
{option.icon && renderIcon(option.icon)}
<p
className="overflow-hidden text-sm font-medium text-ellipsis whitespace-nowrap text-gray-900 dark:text-white"
title={option.label}
>
{option.label}
</p>
</div>
<div className="shrink-0">
<div
className={`border-border bg-card flex h-4 w-4 items-center justify-center rounded-xs border-2`}
aria-hidden="true"
>
{isSelected && (
<img
src={CheckmarkIcon}
alt="checkmark"
width={10}
height={10}
/>
)}
(() => {
const hasGroups = filteredOptions.some((o) => !!o.group);
const renderOption = (option: OptionType) => {
const isSelected = selectedIds.has(option.id);
return (
<div
key={option.id}
onClick={() => handleOptionClick(option.id)}
className="dark:border-border dark:hover:bg-accent hover:bg-accent flex cursor-pointer items-center justify-between border-b border-[#D9D9D9] p-3 last:border-b-0"
role="option"
aria-selected={isSelected}
>
<div className="mr-3 flex grow items-center overflow-hidden">
{option.icon && renderIcon(option.icon)}
<p
className="overflow-hidden text-sm font-medium text-ellipsis whitespace-nowrap text-gray-900 dark:text-white"
title={option.label}
>
{option.label}
</p>
</div>
<div className="shrink-0">
<div
className={`border-border bg-card flex h-4 w-4 items-center justify-center rounded-xs border-2`}
aria-hidden="true"
>
{isSelected && (
<img
src={CheckmarkIcon}
alt="checkmark"
width={10}
height={10}
/>
)}
</div>
</div>
</div>
);
};
if (!hasGroups) {
return filteredOptions.map(renderOption);
}
const groupOrder: string[] = [];
const groupMap = new Map<string, OptionType[]>();
filteredOptions.forEach((opt) => {
const key = opt.group || '';
if (!groupMap.has(key)) {
groupOrder.push(key);
groupMap.set(key, []);
}
groupMap.get(key)!.push(opt);
});
return groupOrder.map((groupKey) => (
<div key={`group-${groupKey || 'ungrouped'}`}>
{groupKey && (
<div
className="bg-muted/50 dark:bg-card text-muted-foreground sticky top-0 z-10 border-b border-[#D9D9D9] px-3 py-1.5 text-xs font-semibold uppercase dark:border-[#2E2E2E]"
role="presentation"
>
{groupKey}
</div>
)}
{(groupMap.get(groupKey) || []).map(renderOption)}
</div>
);
})
));
})()
)}
</div>
)}

Some files were not shown because too many files have changed in this diff Show More