Compare commits

..

8 Commits

Author SHA1 Message Date
Alex
764a23d641 feat scheduler 2026-05-21 23:28:31 +01:00
Alex
0bbcbf4539 fix: tests 2026-05-20 11:20:30 +01:00
Alex
d041db77e1 feat: default tools 2026-05-20 10:40:15 +01:00
Alex
1de82ca040 fix: batch limits and failed task reque limit (#2484) 2026-05-18 22:22:43 +01:00
Alex
8f7742c937 fix: better source upload status and fix reconciliation issue (#2482)
* fix: better source upload status and fix reconciliation issue

* fix: mini issues

* chore: locale coverage
2026-05-18 14:22:03 +01:00
Manish Madan
e3bf6a5471 (fix)tool_calls/ui: overscrolling on opening diaglog (#2477) 2026-05-18 00:37:57 +01:00
Alex
e167cf8247 fix: broken syncs (#2480)
* fix: broken syncs

* fix: mini fixes
2026-05-17 23:58:28 +01:00
Alex
c06646519e fix: marking executed tool calls on webhooks (#2479) 2026-05-17 22:15:07 +01:00
123 changed files with 13984 additions and 440 deletions

View File

@@ -98,6 +98,7 @@ class BaseAgent(ABC):
user_api_key=user_api_key,
user=self.user,
decoded_token=decoded_token,
agent_id=agent_id,
)
self.attachments = attachments or []

View File

@@ -0,0 +1,356 @@
"""Default chat tools — config-free tools on by default in chats."""
from __future__ import annotations
import importlib
import inspect
import logging
import uuid
from typing import Any, Dict, List, Optional
from application.core.settings import settings
logger = logging.getLogger(__name__)
# Fixed namespace — never regenerate; produced ids are persisted.
_DEFAULT_TOOL_NAMESPACE = uuid.UUID("6b1d3f2a-9c84-4d17-bf6e-2a0c5e8d4471")
# Tool names whose storage tables FK ``tool_id`` to ``user_tools.id``;
# a synthetic id has no row, so a write would FK-violate. Schema-rot
# guard: ``tests.agents.test_default_tools.TestFkBoundToolsIsInSync``.
_FK_BOUND_TOOLS = frozenset({"notes", "todo_list"})
# Tools that should NEVER appear in a headless run (scheduled or webhook).
# ``scheduler`` only makes sense from an interactive chat — letting an LLM
# call ``schedule_task`` from a scheduled run chains new schedules each fire,
# bounded only by ``SCHEDULE_MAX_PER_USER`` (cost foot-gun, confusing UX).
_HEADLESS_EXCLUDED_TOOLS = frozenset({"scheduler"})
# Agent-selectable builtins: hidden from the Add-Tool catalog (internal=True)
# and exposed to the agent picker via the same synthetic-id machinery as
# default tools. Names may overlap with DEFAULT_CHAT_TOOLS (e.g. ``scheduler``)
# — both registries share ``_DEFAULT_TOOL_NAMESPACE`` so the same uuid5
# resolves either way (the dual-flag row carries ``default`` AND ``builtin``).
BUILTIN_AGENT_TOOLS: tuple = ("scheduler",)
_tool_cache: Dict[str, Optional[Any]] = {}
_ids_cache: Dict[tuple, Dict[str, str]] = {}
_loaded_cache: Dict[tuple, List[str]] = {}
_builtin_ids_cache: Dict[tuple, Dict[str, str]] = {}
_builtin_loaded_cache: Dict[tuple, List[str]] = {}
def _load_tool(tool_name: str) -> Optional[Any]:
"""Return a metadata-only instance of a tool, or None if it has no class."""
# Imports just the named module (not the whole package) — avoids the
# circular import via ``mcp_tool`` → ``application.api.user``.
if tool_name in _tool_cache:
return _tool_cache[tool_name]
from application.agents.tools.base import Tool
instance: Optional[Any] = None
try:
module = importlib.import_module(f"application.agents.tools.{tool_name}")
except ModuleNotFoundError:
_tool_cache[tool_name] = None
return None
for _, obj in inspect.getmembers(module, inspect.isclass):
if issubclass(obj, Tool) and obj is not Tool:
try:
instance = obj({})
except Exception:
logger.warning(
"DEFAULT_CHAT_TOOLS entry %r failed to instantiate; skipping.",
tool_name,
)
instance = None
break
_tool_cache[tool_name] = instance
return instance
def default_tool_id(tool_name: str) -> str:
"""Return the deterministic synthetic id for a default tool name."""
return str(uuid.uuid5(_DEFAULT_TOOL_NAMESPACE, tool_name))
def default_tool_ids() -> Dict[str, str]:
"""Map each configured default-tool name to its synthetic id (memoized)."""
key = tuple(settings.DEFAULT_CHAT_TOOLS)
cached = _ids_cache.get(key)
if cached is None:
cached = {name: default_tool_id(name) for name in key}
_ids_cache[key] = cached
return cached
def is_default_tool_id(tool_id: Any) -> bool:
"""Return True if ``tool_id`` is a synthetic default-tool id."""
if not tool_id:
return False
return str(tool_id) in set(default_tool_ids().values())
def default_tool_name_for_id(tool_id: Any) -> Optional[str]:
"""Return the default-tool name for a synthetic id, or None."""
target = str(tool_id) if tool_id else ""
for name, synthetic_id in default_tool_ids().items():
if synthetic_id == target:
return name
return None
def builtin_agent_tool_ids() -> Dict[str, str]:
"""Map each agent-selectable builtin to its synthetic id (memoized)."""
key = tuple(BUILTIN_AGENT_TOOLS)
cached = _builtin_ids_cache.get(key)
if cached is None:
cached = {name: default_tool_id(name) for name in key}
_builtin_ids_cache[key] = cached
return cached
def is_builtin_agent_tool_id(tool_id: Any) -> bool:
"""Return True if ``tool_id`` is an agent-selectable builtin synthetic id."""
if not tool_id:
return False
return str(tool_id) in set(builtin_agent_tool_ids().values())
def builtin_agent_tool_name_for_id(tool_id: Any) -> Optional[str]:
"""Return the builtin tool name for a synthetic id, or None."""
target = str(tool_id) if tool_id else ""
for name, synthetic_id in builtin_agent_tool_ids().items():
if synthetic_id == target:
return name
return None
def synthesized_tool_name_for_id(tool_id: Any) -> Optional[str]:
"""Return the tool name for any synthetic id (default or builtin), or None."""
return default_tool_name_for_id(tool_id) or builtin_agent_tool_name_for_id(tool_id)
def is_synthesized_tool_id(tool_id: Any) -> bool:
"""Return True for any synthetic id (default chat or agent-builtin)."""
return is_default_tool_id(tool_id) or is_builtin_agent_tool_id(tool_id)
def loaded_default_tools() -> List[str]:
"""Return configured default-tool names that resolve to a loaded tool."""
# Silent + memoized — runs per request; the one-time skip notice
# for unimplemented names lives in ``validate_default_chat_tools``.
key = tuple(settings.DEFAULT_CHAT_TOOLS)
cached = _loaded_cache.get(key)
if cached is None:
cached = [name for name in key if _load_tool(name) is not None]
_loaded_cache[key] = cached
return cached
def loaded_builtin_agent_tools() -> List[str]:
"""Return builtin agent-tool names that resolve to a loaded tool."""
key = tuple(BUILTIN_AGENT_TOOLS)
cached = _builtin_loaded_cache.get(key)
if cached is None:
cached = [name for name in key if _load_tool(name) is not None]
_builtin_loaded_cache[key] = cached
return cached
def validate_default_chat_tools() -> List[str]:
"""Validate ``DEFAULT_CHAT_TOOLS`` at startup; return the usable names."""
skipped = [
name for name in settings.DEFAULT_CHAT_TOOLS if _load_tool(name) is None
]
if skipped:
logger.debug(
"DEFAULT_CHAT_TOOLS entries with no loaded tool, skipped: %s. "
"Each activates automatically once its tool exists.",
", ".join(skipped),
)
usable = loaded_default_tools()
for name in usable:
if name in _FK_BOUND_TOOLS:
raise ValueError(
f"DEFAULT_CHAT_TOOLS entry {name!r} has a storage table "
f"that foreign-keys tool_id to user_tools; a default tool "
f"has a synthetic id with no user_tools row, so it would "
f"fail at write time. It cannot be defaulted on."
)
requirements = _load_tool(name).get_config_requirements() or {}
required = [
key for key, spec in requirements.items()
if isinstance(spec, dict) and spec.get("required")
]
if required:
raise ValueError(
f"DEFAULT_CHAT_TOOLS entry {name!r} requires config "
f"fields {required}; only config-free tools may be "
"defaulted on."
)
if usable:
logger.info("Default chat tools active: %s", ", ".join(usable))
return usable
def _tool_display(tool_name: str) -> str:
"""Return the human-readable display name from the tool docstring."""
tool = _load_tool(tool_name)
doc = (tool.__doc__ or "").strip() if tool else ""
first_line = doc.split("\n", 1)[0].strip() if doc else ""
return first_line or tool_name
def _tool_description(tool_name: str) -> str:
"""Return the tool description (docstring lines after the first)."""
tool = _load_tool(tool_name)
doc = (tool.__doc__ or "").strip() if tool else ""
parts = doc.split("\n", 1)
return parts[1].strip() if len(parts) > 1 else ""
def synthesize_default_tool(tool_name: str) -> Optional[Dict[str, Any]]:
"""Build an in-memory ``user_tools``-shaped row for a default tool."""
tool = _load_tool(tool_name)
if tool is None:
return None
synthetic_id = default_tool_id(tool_name)
return {
"id": synthetic_id,
"_id": synthetic_id,
"name": tool_name,
"display_name": _tool_display(tool_name),
"custom_name": "",
"description": _tool_description(tool_name),
"config": {},
"config_requirements": {},
"actions": tool.get_actions_metadata() or [],
"status": True,
"default": True,
}
def synthesize_builtin_agent_tool(tool_name: str) -> Optional[Dict[str, Any]]:
"""Build an in-memory ``user_tools``-shaped row for a builtin agent tool."""
tool = _load_tool(tool_name)
if tool is None:
return None
synthetic_id = default_tool_id(tool_name)
return {
"id": synthetic_id,
"_id": synthetic_id,
"name": tool_name,
"display_name": _tool_display(tool_name),
"custom_name": "",
"description": _tool_description(tool_name),
"config": {},
"config_requirements": {},
"actions": tool.get_actions_metadata() or [],
"status": True,
"default": False,
"builtin": True,
}
def synthesize_tool_by_name(tool_name: str) -> Optional[Dict[str, Any]]:
"""Synthesize the row for any default or builtin tool name."""
if tool_name in BUILTIN_AGENT_TOOLS:
return synthesize_builtin_agent_tool(tool_name)
return synthesize_default_tool(tool_name)
def disabled_default_tools(user_doc: Optional[Dict[str, Any]]) -> List[str]:
"""Return the user's opt-out list from ``tool_preferences``."""
if not isinstance(user_doc, dict):
return []
prefs = user_doc.get("tool_preferences") or {}
if not isinstance(prefs, dict):
return []
disabled = prefs.get("disabled_default_tools") or []
if not isinstance(disabled, list):
return []
return [str(name) for name in disabled]
def synthesized_default_tools(
user_doc: Optional[Dict[str, Any]] = None,
*,
headless: bool = False,
) -> List[Dict[str, Any]]:
"""Return synthesized default-tool rows for an agentless chat."""
# Agent-bound chats must NOT call this — they resolve exactly
# ``agents.tools``. Disabled defaults are dropped. ``headless=True``
# additionally drops chat-only tools (e.g. ``scheduler``) so a scheduled
# / webhook LLM can't re-schedule itself.
disabled = set(disabled_default_tools(user_doc))
rows: List[Dict[str, Any]] = []
for name in loaded_default_tools():
if name in disabled:
continue
if headless and name in _HEADLESS_EXCLUDED_TOOLS:
continue
row = synthesize_default_tool(name)
if row is not None:
rows.append(row)
return rows
def is_headless_excluded_tool(tool_name: Optional[str]) -> bool:
"""Return True if ``tool_name`` must be hidden from headless runs."""
return bool(tool_name) and tool_name in _HEADLESS_EXCLUDED_TOOLS
def default_tools_for_management(
user_doc: Optional[Dict[str, Any]] = None,
) -> List[Dict[str, Any]]:
"""Return every loaded default tool with its on/off ``status``."""
# Unlike ``synthesized_default_tools`` (chat toolset), this keeps
# disabled tools so the management UI can render their toggle.
disabled = set(disabled_default_tools(user_doc))
rows: List[Dict[str, Any]] = []
for name in loaded_default_tools():
row = synthesize_default_tool(name)
if row is None:
continue
row["status"] = name not in disabled
rows.append(row)
return rows
def builtin_agent_tools_for_management() -> List[Dict[str, Any]]:
"""Return every loaded agent-builtin tool for the agent picker (no per-user state)."""
rows: List[Dict[str, Any]] = []
for name in loaded_builtin_agent_tools():
row = synthesize_builtin_agent_tool(name)
if row is None:
continue
rows.append(row)
return rows
def resolve_tool_by_id(
tool_id: Any,
user: Optional[str],
*,
user_tools_repo: Any = None,
) -> Optional[Dict[str, Any]]:
"""Resolve a tool by id: default/builtin synthetic id, else user_tools row.
Dual-registered tools (e.g. ``scheduler``) get both flags on the resolved
row so callers can branch on either path without losing the discriminator.
"""
default_name = default_tool_name_for_id(tool_id)
builtin_name = builtin_agent_tool_name_for_id(tool_id)
if default_name is not None and builtin_name is not None:
row = synthesize_default_tool(default_name) or {}
row["builtin"] = True
return row or None
if default_name is not None:
return synthesize_default_tool(default_name)
if builtin_name is not None:
return synthesize_builtin_agent_tool(builtin_name)
if user_tools_repo is None or not user:
return None
return user_tools_repo.get_any(str(tool_id), user)

View File

@@ -0,0 +1,173 @@
"""Shared headless agent runner used by webhooks and scheduled runs."""
from __future__ import annotations
import logging
from typing import Any, Dict, Iterable, List, Optional
from application.agents.agent_creator import AgentCreator
from application.agents.tool_executor import ToolExecutor
from application.api.answer.services.stream_processor import get_prompt
from application.core.settings import settings
from application.retriever.retriever_creator import RetrieverCreator
from application.storage.db.repositories.sources import SourcesRepository
from application.storage.db.session import db_readonly
logger = logging.getLogger(__name__)
def _resolve_owner(agent_config: Dict[str, Any]) -> Optional[str]:
return agent_config.get("user_id") or agent_config.get("user")
def _resolve_agent_id(agent_config: Dict[str, Any]) -> Optional[str]:
raw = agent_config.get("id") or agent_config.get("_id")
return str(raw) if raw else None
def run_agent_headless(
agent_config: Dict[str, Any],
query: str,
*,
tool_allowlist: Optional[Iterable[str]] = None,
model_id_override: Optional[str] = None,
endpoint: str = "headless",
chat_history: Optional[List[Dict[str, Any]]] = None,
conversation_id: Optional[str] = None,
) -> Dict[str, Any]:
"""Run an agent with no live client; returns a structured outcome dict."""
from application.core.model_utils import (
get_api_key_for_provider,
get_default_model_id,
get_provider_from_model_id,
validate_model_id,
)
from application.utils import calculate_doc_token_budget
owner = _resolve_owner(agent_config)
if not owner:
raise ValueError("Agent config is missing user_id; cannot run headless.")
decoded_token = {"sub": owner}
retriever_kind = agent_config.get("retriever", "classic")
source_id = agent_config.get("source_id") or agent_config.get("source")
source_active: Any = {}
if source_id:
with db_readonly() as conn:
src_row = SourcesRepository(conn).get(str(source_id), owner)
if src_row:
source_active = str(src_row["id"])
retriever_kind = src_row.get("retriever", retriever_kind)
source = {"active_docs": source_active}
chunks = int(agent_config.get("chunks", 2) or 2)
prompt_id = agent_config.get("prompt_id", "default")
user_api_key = agent_config.get("key")
agent_id = _resolve_agent_id(agent_config)
agent_type = agent_config.get("agent_type", "classic")
json_schema = agent_config.get("json_schema")
prompt = get_prompt(prompt_id)
candidate_model = model_id_override or agent_config.get("default_model_id") or ""
if candidate_model and validate_model_id(candidate_model, user_id=owner):
model_id = candidate_model
else:
model_id = get_default_model_id()
if candidate_model:
logger.warning(
"Agent %s references unknown model_id %r; falling back to %r",
agent_id, candidate_model, model_id,
)
provider = (
get_provider_from_model_id(model_id, user_id=owner)
if model_id
else settings.LLM_PROVIDER
)
system_api_key = get_api_key_for_provider(provider or settings.LLM_PROVIDER)
doc_token_limit = calculate_doc_token_budget(model_id=model_id, user_id=owner)
retriever = RetrieverCreator.create_retriever(
retriever_kind,
source=source,
chat_history=chat_history or [],
prompt=prompt,
chunks=chunks,
doc_token_limit=doc_token_limit,
model_id=model_id,
user_api_key=user_api_key,
agent_id=agent_id,
decoded_token=decoded_token,
)
retrieved_docs: List[Dict[str, Any]] = []
try:
docs = retriever.search(query)
if docs:
retrieved_docs = docs
except Exception as exc:
logger.warning("Headless retrieve failed: %s", exc)
tool_executor = ToolExecutor(
user_api_key=user_api_key,
user=owner,
decoded_token=decoded_token,
agent_id=agent_id,
headless=True,
tool_allowlist=list(tool_allowlist or []),
)
if conversation_id:
tool_executor.conversation_id = str(conversation_id)
agent = AgentCreator.create_agent(
agent_type,
endpoint=endpoint,
llm_name=provider or settings.LLM_PROVIDER,
model_id=model_id,
api_key=system_api_key,
agent_id=agent_id,
user_api_key=user_api_key,
prompt=prompt,
chat_history=chat_history or [],
retrieved_docs=retrieved_docs,
decoded_token=decoded_token,
attachments=[],
json_schema=json_schema,
tool_executor=tool_executor,
)
if conversation_id:
agent.conversation_id = str(conversation_id)
answer_full = ""
thought = ""
sources_log: List[Dict[str, Any]] = []
tool_calls: List[Dict[str, Any]] = []
for event in agent.gen(query=query):
if not isinstance(event, dict):
continue
if "answer" in event:
answer_full += str(event["answer"])
elif "sources" in event:
sources_log.extend(event["sources"])
elif "tool_calls" in event:
tool_calls.extend(event["tool_calls"])
elif "thought" in event:
thought += str(event["thought"])
denied = list(getattr(tool_executor, "headless_denials", []))
error_type = "tool_not_allowed" if denied and not answer_full.strip() else None
# Use the LLM accumulator (gen_token_usage / stream_token_usage decorators);
# current_token_count is a context-size sentinel, not a usage tally.
llm_usage = getattr(getattr(agent, "llm", None), "token_usage", None) or {}
prompt_tokens = int(llm_usage.get("prompt_tokens", 0) or 0)
generated_tokens = int(llm_usage.get("generated_tokens", 0) or 0)
return {
"answer": answer_full,
"thought": thought,
"sources": sources_log,
"tool_calls": tool_calls,
"prompt_tokens": prompt_tokens,
"generated_tokens": generated_tokens,
"denied": denied,
"error_type": error_type,
"model_id": model_id,
}

View File

@@ -0,0 +1,131 @@
"""Cron/tz computations for the scheduler (shared by dispatcher, routes, and tool)."""
from __future__ import annotations
import re
from datetime import datetime, timedelta, timezone
from typing import Optional
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from croniter import croniter
_DELAY_RE = re.compile(r"^\s*(\d+)\s*(s|m|h|d)\s*$", re.IGNORECASE)
_DELAY_MULTIPLIERS = {"s": 1, "m": 60, "h": 3600, "d": 86_400}
class ScheduleValidationError(ValueError):
"""Raised when a schedule's cron, run_at, or delay is invalid."""
def resolve_timezone(tz_name: Optional[str]) -> ZoneInfo:
"""Return a ``ZoneInfo`` for ``tz_name`` (default UTC)."""
name = (tz_name or "UTC").strip() or "UTC"
try:
return ZoneInfo(name)
except ZoneInfoNotFoundError as exc:
raise ScheduleValidationError(f"Unknown timezone: {name}") from exc
def parse_cron(expression: str) -> None:
"""Validate a 5-field cron expression; raise on bad input."""
# croniter defers some malformed inputs until get_next, so force one here.
if not expression or not isinstance(expression, str):
raise ScheduleValidationError("Cron expression is required.")
fields = expression.strip().split()
if len(fields) != 5:
raise ScheduleValidationError("Cron expression must have 5 fields.")
try:
itr = croniter(expression, datetime.now(timezone.utc))
itr.get_next(datetime)
except (ValueError, KeyError) as exc:
raise ScheduleValidationError(f"Invalid cron expression: {exc}") from exc
_CRON_INTERVAL_WINDOW = 64
def cron_interval_seconds(expression: str, tz_name: Optional[str]) -> int:
"""Return the smallest gap between ticks in a rolling window (enforces SCHEDULE_MIN_INTERVAL).
Walks _CRON_INTERVAL_WINDOW ticks because bursty expressions like
``* 9 * * *`` have tiny within-burst gaps and huge between-burst gaps;
sampling only two adjacent ticks would miss the small gap.
"""
parse_cron(expression)
tz = resolve_timezone(tz_name)
anchor_local = datetime.now(timezone.utc).astimezone(tz)
itr = croniter(expression, anchor_local)
prev = itr.get_next(datetime)
smallest: Optional[int] = None
for _ in range(_CRON_INTERVAL_WINDOW - 1):
nxt = itr.get_next(datetime)
gap = int((nxt - prev).total_seconds())
if gap > 0 and (smallest is None or gap < smallest):
smallest = gap
prev = nxt
return smallest if smallest is not None else 0
def next_cron_run(
expression: str,
tz_name: Optional[str],
after: Optional[datetime] = None,
) -> datetime:
"""Return the next fire time strictly after ``after`` (UTC, tz-aware).
Evaluates the cadence in the schedule's IANA tz so DST boundaries land on
the intended local clock-time (e.g. 9 AM Warsaw stays 9 AM across the jump).
"""
parse_cron(expression)
tz = resolve_timezone(tz_name)
anchor_utc = after if after is not None else datetime.now(timezone.utc)
if anchor_utc.tzinfo is None:
anchor_utc = anchor_utc.replace(tzinfo=timezone.utc)
anchor_local = anchor_utc.astimezone(tz)
itr = croniter(expression, anchor_local)
nxt_local = itr.get_next(datetime)
return nxt_local.astimezone(timezone.utc)
def parse_delay(delay: str) -> timedelta:
"""Parse a duration like ``30m`` / ``2h`` / ``1d`` into a timedelta."""
if not isinstance(delay, str):
raise ScheduleValidationError("delay must be a string like '30m' or '2h'.")
match = _DELAY_RE.match(delay)
if not match:
raise ScheduleValidationError(
"delay must look like '30s', '15m', '2h', or '1d'."
)
amount, unit = int(match.group(1)), match.group(2).lower()
if amount <= 0:
raise ScheduleValidationError("delay must be positive.")
return timedelta(seconds=amount * _DELAY_MULTIPLIERS[unit])
def parse_run_at(run_at: str, tz_name: Optional[str] = None) -> datetime:
"""Parse an ISO 8601 timestamp; naive values resolve in ``tz_name``.
Naive values inside the DST "fall back" hour resolve to the earlier instance
(zoneinfo default fold=0); pass an explicit offset to select the later one.
"""
if not isinstance(run_at, str) or not run_at.strip():
raise ScheduleValidationError("run_at must be an ISO 8601 string.")
try:
parsed = datetime.fromisoformat(run_at.strip().replace("Z", "+00:00"))
except ValueError as exc:
raise ScheduleValidationError(f"Invalid run_at: {exc}") from exc
if parsed.tzinfo is None:
parsed = parsed.replace(tzinfo=resolve_timezone(tz_name))
return parsed.astimezone(timezone.utc)
def clamp_once_horizon(run_at: datetime, max_horizon_seconds: int) -> None:
"""Raise when ``run_at`` is in the past or beyond the once-task horizon."""
now = datetime.now(timezone.utc)
if run_at <= now:
raise ScheduleValidationError("run_at is in the past.")
if max_horizon_seconds > 0 and run_at - now > timedelta(seconds=max_horizon_seconds):
raise ScheduleValidationError(
"run_at is beyond the maximum allowed scheduling horizon."
)

View File

@@ -3,6 +3,11 @@ import uuid
from collections import Counter
from typing import Any, Dict, List, Optional, Tuple
from application.agents.default_tools import (
is_headless_excluded_tool,
resolve_tool_by_id,
synthesized_default_tools,
)
from application.agents.tools.tool_action_parser import ToolActionParser
from application.agents.tools.tool_manager import ToolManager
from application.security.encryption import decrypt_credentials
@@ -12,6 +17,7 @@ from application.storage.db.repositories.tool_call_attempts import (
ToolCallAttemptsRepository,
)
from application.storage.db.repositories.user_tools import UserToolsRepository
from application.storage.db.repositories.users import UsersRepository
from application.storage.db.session import db_readonly, db_session
logger = logging.getLogger(__name__)
@@ -113,10 +119,22 @@ class ToolExecutor:
user_api_key: Optional[str] = None,
user: Optional[str] = None,
decoded_token: Optional[Dict] = None,
agent_id: Optional[str] = None,
*,
headless: bool = False,
tool_allowlist: Optional[List[str]] = None,
):
self.user_api_key = user_api_key
self.user = user
self.decoded_token = decoded_token
self.agent_id = agent_id
# Headless mode (scheduled / webhook): no human to resolve a pause,
# so check_pause returns headless_denied sentinels instead.
self.headless = bool(headless)
# Tool-instance ids pre-authorized for headless approval-gated execution.
self.tool_allowlist: set = (
{str(x) for x in tool_allowlist} if tool_allowlist else set()
)
self.tool_calls: List[Dict] = []
self._loaded_tools: Dict[str, object] = {}
self.conversation_id: Optional[str] = None
@@ -124,6 +142,8 @@ class ToolExecutor:
self.client_tools: Optional[List[Dict]] = None
self._name_to_tool: Dict[str, Tuple[str, str]] = {}
self._tool_to_name: Dict[Tuple[str, str], str] = {}
# Filled by the LLMHandler.handle_tool_calls headless loop.
self.headless_denials: List[Dict] = []
def get_tools(self) -> Dict[str, Dict]:
"""Load tool configs from DB based on user context.
@@ -140,29 +160,54 @@ class ToolExecutor:
return tools
def _get_tools_by_api_key(self, api_key: str) -> Dict[str, Dict]:
"""Resolve an agent's toolset — exactly ``agents.tools``, no defaults."""
# Per-operation session: the answer pipeline spans a long-lived
# generator; wrapping it in a single connection would pin a PG
# conn for the whole stream. Open, fetch, close.
with db_readonly() as conn:
agent_data = AgentsRepository(conn).find_by_key(api_key)
tool_ids = agent_data.get("tools", []) if agent_data else []
if not tool_ids:
return {}
tools_repo = UserToolsRepository(conn)
owner = (
(agent_data.get("user_id") or agent_data.get("user"))
if agent_data
else None
)
tools: List[Dict] = []
owner = (agent_data.get("user_id") or agent_data.get("user")) if agent_data else None
for tid in tool_ids:
row = None
if owner:
row = tools_repo.get_any(str(tid), owner)
if row is not None:
tools.append(row)
return {str(tool["id"]): tool for tool in tools} if tools else {}
row = resolve_tool_by_id(tid, owner, user_tools_repo=tools_repo)
if row is None:
continue
# Headless runs (scheduled / webhook) drop chat-only tools
# like ``scheduler`` so a fire-time LLM can't chain schedules.
if self.headless and is_headless_excluded_tool(row.get("name")):
continue
tools.append(row)
return {str(tool["id"]): tool for tool in tools}
def _get_user_tools(self, user: str = "local") -> Dict[str, Dict]:
"""Resolve an agentless chat's toolset: explicit user tools plus defaults."""
with db_readonly() as conn:
user_tools = UserToolsRepository(conn).list_active_for_user(user)
return {str(i): tool for i, tool in enumerate(user_tools)}
user_doc = (
UsersRepository(conn).get(user) if self.agent_id is None else None
)
# Headless agentless runs (e.g. scheduled fire) drop chat-only
# tools (``scheduler``) from explicit user_tools too.
filtered_user_tools = [
t for t in user_tools
if not (self.headless and is_headless_excluded_tool(t.get("name")))
]
# Index keys (ints) and synthetic uuid5 keys can't collide.
tools: Dict[str, Dict] = {
str(i): tool for i, tool in enumerate(filtered_user_tools)
}
if self.agent_id is None:
for default_row in synthesized_default_tools(
user_doc, headless=self.headless,
):
tools[str(default_row["id"])] = default_row
return tools
def merge_client_tools(
self, tools_dict: Dict, client_tools: List[Dict]
@@ -300,9 +345,11 @@ class ToolExecutor:
def check_pause(
self, tools_dict: Dict, call, llm_class_name: str
) -> Optional[Dict]:
"""Check if a tool call requires pausing for approval or client execution.
"""Return a pending-action dict (approval / client / headless_denied) or None.
Returns a dict describing the pending action if pause is needed, None otherwise.
In headless mode the dict's pause_type is ``headless_denied`` so the
upstream loop synthesizes a tool result instead of pausing (nothing can
resume a scheduled / webhook run).
"""
parser = ToolActionParser(llm_class_name, name_mapping=self._name_to_tool)
tool_id, action_name, call_args = parser.parse_args(call)
@@ -313,9 +360,26 @@ class ToolExecutor:
return None # Will be handled as error by execute()
tool_data = tools_dict[tool_id]
arguments = call_args if isinstance(call_args, dict) else {}
# Client-side tools
if tool_data.get("client_side"):
if self.headless:
return {
"call_id": call_id,
"name": llm_name,
"tool_name": tool_data.get("name", "unknown"),
"tool_id": tool_id,
"action_name": action_name,
"llm_name": llm_name,
"arguments": arguments,
"pause_type": "headless_denied",
"deny_reason": (
"Client-side tools cannot run in headless / scheduled runs."
),
"error_type": "tool_not_allowed",
"thought_signature": getattr(call, "thought_signature", None),
}
return {
"call_id": call_id,
"name": llm_name,
@@ -323,7 +387,7 @@ class ToolExecutor:
"tool_id": tool_id,
"action_name": action_name,
"llm_name": llm_name,
"arguments": call_args if isinstance(call_args, dict) else {},
"arguments": arguments,
"pause_type": "requires_client_execution",
"thought_signature": getattr(call, "thought_signature", None),
}
@@ -340,6 +404,27 @@ class ToolExecutor:
)
if action_data.get("require_approval"):
if self.headless:
tool_row_id = str(tool_data.get("id") or tool_id)
if tool_row_id in self.tool_allowlist:
# Pre-authorized for headless execution — fall through.
return None
return {
"call_id": call_id,
"name": llm_name,
"tool_name": tool_data.get("name", "unknown"),
"tool_id": tool_id,
"action_name": action_name,
"llm_name": llm_name,
"arguments": arguments,
"pause_type": "headless_denied",
"deny_reason": (
"This tool requires approval and is not in the run's "
"tool_allowlist."
),
"error_type": "tool_not_allowed",
"thought_signature": getattr(call, "thought_signature", None),
}
return {
"call_id": call_id,
"name": llm_name,
@@ -347,7 +432,7 @@ class ToolExecutor:
"tool_id": tool_id,
"action_name": action_name,
"llm_name": llm_name,
"arguments": call_args if isinstance(call_args, dict) else {},
"arguments": arguments,
"pause_type": "awaiting_approval",
"thought_signature": getattr(call, "thought_signature", None),
}
@@ -623,6 +708,13 @@ class ToolExecutor:
tool_config["tool_id"] = str(row_id)
if self.conversation_id:
tool_config["conversation_id"] = self.conversation_id
if tool_data["name"] == "scheduler":
# Agent-bound: stamp schedules.agent_id. Agentless: the tool
# falls back to ``origin_conversation_id`` as the schedule's
# conversation home.
tool_config["agent_id"] = (
str(self.agent_id) if self.agent_id else None
)
if tool_data["name"] == "mcp_tool":
tool_config["query_mode"] = True

View File

@@ -0,0 +1,339 @@
"""Scheduler tool: one-time agent tasks in agent-bound or agentless chats."""
from __future__ import annotations
import json
import logging
from datetime import datetime, timezone
from typing import Any, Dict, List, Optional
from application.agents.scheduler_utils import (
ScheduleValidationError,
clamp_once_horizon,
parse_delay,
parse_run_at,
)
from application.core.settings import settings
from application.storage.db.base_repository import looks_like_uuid
from application.storage.db.repositories.schedules import SchedulesRepository
from application.storage.db.session import db_readonly, db_session
from .base import Tool
logger = logging.getLogger(__name__)
class SchedulerTool(Tool):
"""Scheduling"""
# internal=True keeps scheduler out of /api/available_tools and the
# agentless Add-Tool modal; tool_manager.load_tool still lazy-loads it
# per-user at execute time (same as memory/notes/todo_list).
internal: bool = True
def __init__(
self,
tool_config: Optional[Dict[str, Any]] = None,
user_id: Optional[str] = None,
) -> None:
cfg = tool_config or {}
self.user_id: Optional[str] = user_id
self.agent_id: Optional[str] = cfg.get("agent_id")
self.conversation_id: Optional[str] = cfg.get("conversation_id")
def execute_action(self, action_name: str, **kwargs: Any) -> str:
"""Dispatch on the LLM-supplied action name."""
if not self.user_id:
return "Error: SchedulerTool requires a valid user_id."
# Agent-bound: agent_id must look like a UUID. Agentless: agent_id is
# absent; an originating conversation is then mandatory (the schedule's
# conversation home, used for history + output append).
if self.agent_id and not looks_like_uuid(str(self.agent_id)):
return "Error: SchedulerTool received an invalid agent_id."
if not self.agent_id and not self.conversation_id:
return (
"Error: SchedulerTool requires an agent_id or a "
"conversation_id (no conversation home)."
)
if action_name == "schedule_task":
return self._schedule_task(
instruction=kwargs.get("instruction", ""),
delay=kwargs.get("delay"),
run_at=kwargs.get("run_at"),
tz=kwargs.get("timezone"),
)
if action_name == "list_scheduled_tasks":
return self._list_scheduled_tasks()
if action_name == "cancel_scheduled_task":
return self._cancel_scheduled_task(kwargs.get("task_id", ""))
return f"Unknown action: {action_name}"
def get_actions_metadata(self) -> List[Dict[str, Any]]:
"""Action schemas for the LLM tool catalogue."""
return [
{
"name": "schedule_task",
"description": (
"Schedule a one-time task. Provide either a `delay` "
"(e.g. '30m', '2h', '1d') from now, or a `run_at` ISO-8601 "
"absolute time. Optionally pass an IANA `timezone` to resolve "
"naive run_at values. The instruction is the task that will "
"execute at fire time (including delivery, e.g. 'send to my "
"Telegram'). For recurring schedules in an agent chat, point "
"the user to the agent's Schedules tab."
),
"parameters": {
"type": "object",
"properties": {
"instruction": {
"type": "string",
"description": "What the agent should do at fire time.",
},
"delay": {
"type": "string",
"description": "Duration like '30m', '2h', '1d'.",
},
"run_at": {
"type": "string",
"description": "Absolute ISO 8601 timestamp.",
},
"timezone": {
"type": "string",
"description": (
"IANA timezone (e.g. Europe/Warsaw) for naive run_at."
),
},
},
"required": ["instruction"],
},
},
{
"name": "list_scheduled_tasks",
"description": (
"List pending one-time tasks for the current chat. "
"Agent-bound chats scope to user+agent; agentless chats "
"scope to user+originating conversation."
),
"parameters": {"type": "object", "properties": {}},
},
{
"name": "cancel_scheduled_task",
"description": "Cancel a pending one-time task by its task_id.",
"parameters": {
"type": "object",
"properties": {
"task_id": {
"type": "string",
"description": "The schedule id returned by schedule_task.",
},
},
"required": ["task_id"],
},
},
]
def get_config_requirements(self) -> Dict[str, Any]:
return {}
def _schedule_task(
self,
instruction: str,
delay: Optional[str],
run_at: Optional[str],
tz: Optional[str],
) -> str:
if not instruction or not isinstance(instruction, str):
return "Error: instruction is required."
if not delay and not run_at:
return "Error: provide either `delay` or `run_at`."
if delay and run_at:
return "Error: provide only one of `delay` or `run_at`."
try:
if delay:
fire = datetime.now(timezone.utc) + parse_delay(delay)
else:
fire = parse_run_at(run_at, tz)
clamp_once_horizon(fire, settings.SCHEDULE_ONCE_MAX_HORIZON)
except ScheduleValidationError as exc:
return f"Error: {exc}"
with db_readonly() as conn:
count = SchedulesRepository(conn).count_active_for_user(self.user_id)
if (
settings.SCHEDULE_MAX_PER_USER > 0
and count >= settings.SCHEDULE_MAX_PER_USER
):
return (
"Error: you have reached the maximum number of active schedules."
)
# Chat-created tasks default to the user's non-approval tools (for the
# agent's toolset when agent-bound, or the user's defaults+user_tools
# when agentless).
allowlist = _safe_default_allowlist(self.agent_id, self.user_id)
auto_name = _name_from_instruction(instruction)
try:
with db_session() as conn:
created = SchedulesRepository(conn).create(
user_id=self.user_id,
agent_id=self.agent_id,
trigger_type="once",
instruction=instruction.strip(),
name=auto_name,
run_at=fire,
next_run_at=fire,
timezone=tz or "UTC",
tool_allowlist=allowlist,
origin_conversation_id=self.conversation_id,
created_via="chat",
)
except Exception as exc:
logger.exception("schedule_task create failed: %s", exc)
return "Error: failed to create scheduled task."
return json.dumps(
{
"task_id": str(created["id"]),
"resolved_run_at": _iso_utc(fire),
"timezone": tz or "UTC",
"instruction": instruction.strip(),
"name": auto_name,
}
)
def _list_scheduled_tasks(self) -> str:
"""Pending one-time tasks for this user, oldest fire first.
Agent-bound chats scope to user+agent. Agentless chats scope to user+
origin_conversation_id so a user only sees tasks created from this chat.
"""
with db_readonly() as conn:
repo = SchedulesRepository(conn)
if self.agent_id:
rows = repo.list_for_agent(
self.agent_id,
self.user_id,
statuses=["active"],
trigger_type="once",
)
else:
rows = repo.list_for_conversation(
self.user_id,
self.conversation_id,
statuses=["active"],
trigger_type="once",
)
# Values arrive as ISO strings (coerce_pg_native); string sentinel keeps types uniform.
rows.sort(key=lambda r: r.get("next_run_at") or "9999-12-31T23:59:59Z")
items = [
{
"task_id": str(r["id"]),
"instruction": r.get("instruction"),
"name": r.get("name"),
"resolved_run_at": _iso_utc(r.get("next_run_at")),
"timezone": r.get("timezone"),
"status": r.get("status"),
}
for r in rows
]
return json.dumps({"tasks": items})
def _cancel_scheduled_task(self, task_id: str) -> str:
if not task_id or not looks_like_uuid(str(task_id)):
return "Error: task_id must be a valid id."
with db_session() as conn:
repo = SchedulesRepository(conn)
# Agentless: scope cancel to user + originating conversation so a
# user can only cancel tasks they created in the current chat.
if not self.agent_id:
row = repo.get(task_id, self.user_id)
if row is None or row.get("agent_id") is not None or (
str(row.get("origin_conversation_id") or "")
!= str(self.conversation_id or "")
):
return (
"Error: scheduled task not found or already terminal."
)
ok = repo.cancel(task_id, self.user_id)
if not ok:
return "Error: scheduled task not found or already terminal."
return json.dumps({"task_id": str(task_id), "status": "cancelled"})
def _name_from_instruction(instruction: str, *, max_len: int = 80) -> str:
"""Compact display name derived from the instruction's first line."""
first_line = instruction.strip().split("\n", 1)[0]
if len(first_line) <= max_len:
return first_line
return first_line[: max_len - 1] + ""
def _iso_utc(value: Any) -> Optional[str]:
"""Render a datetime (or ISO string) as RFC3339 UTC; ``None`` passes through."""
if value is None:
return None
if isinstance(value, str):
try:
value = datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError:
return value
if value.tzinfo is None:
value = value.replace(tzinfo=timezone.utc)
return value.astimezone(timezone.utc).isoformat().replace("+00:00", "Z")
def _safe_default_allowlist(
agent_id: Optional[str], user_id: str,
) -> List[str]:
"""Return ids of available tools whose actions are all non-approval.
Agent-bound: the agent's ``agents.tools`` entries.
Agentless: the user's active ``user_tools`` rows plus synthesized default
chat tools (resolved against ``settings.DEFAULT_CHAT_TOOLS`` and the
user's ``tool_preferences.disabled_default_tools`` opt-outs).
"""
from application.agents.default_tools import (
resolve_tool_by_id,
synthesized_default_tools,
)
from application.storage.db.repositories.agents import AgentsRepository
from application.storage.db.repositories.user_tools import UserToolsRepository
from application.storage.db.repositories.users import UsersRepository
def _is_safe(row: Dict[str, Any]) -> bool:
actions = row.get("actions") or []
return not any(a.get("require_approval") for a in actions)
safe_ids: List[str] = []
try:
with db_readonly() as conn:
tools_repo = UserToolsRepository(conn)
if agent_id:
agent = AgentsRepository(conn).get(agent_id, user_id)
tool_ids = (agent or {}).get("tools") or []
for raw_id in tool_ids:
tool_id = str(raw_id)
row = resolve_tool_by_id(
tool_id, user_id, user_tools_repo=tools_repo,
)
if not row or not _is_safe(row):
continue
safe_ids.append(tool_id)
else:
# Agentless: explicit user_tools (active=true) + synthesized
# defaults respecting the user's opt-out preferences.
user_doc = UsersRepository(conn).get(user_id)
for row in tools_repo.list_active_for_user(user_id):
if not _is_safe(row):
continue
safe_ids.append(str(row["id"]))
for default_row in synthesized_default_tools(user_doc):
if not _is_safe(default_row):
continue
safe_ids.append(str(default_row["id"]))
except Exception: # pragma: no cover — best-effort fallback
logger.exception("scheduler: default allowlist build failed")
return []
return safe_ids

View File

@@ -28,7 +28,10 @@ class ToolManager:
module = importlib.import_module(f"application.agents.tools.{tool_name}")
for member_name, obj in inspect.getmembers(module, inspect.isclass):
if issubclass(obj, Tool) and obj is not Tool:
if tool_name in {"mcp_tool", "notes", "memory", "todo_list"} and user_id:
if (
tool_name in {"mcp_tool", "notes", "memory", "todo_list", "scheduler"}
and user_id
):
return obj(tool_config, user_id)
else:
return obj(tool_config)
@@ -36,7 +39,10 @@ class ToolManager:
def execute_action(self, tool_name, action_name, user_id=None, **kwargs):
if tool_name not in self.tools:
raise ValueError(f"Tool '{tool_name}' not loaded")
if tool_name in {"mcp_tool", "memory", "todo_list", "notes"} and user_id:
if (
tool_name in {"mcp_tool", "memory", "todo_list", "notes", "scheduler"}
and user_id
):
tool_config = self.config.get(tool_name, {})
tool = self.load_tool(tool_name, tool_config, user_id)
return tool.execute_action(action_name, **kwargs)

View File

@@ -0,0 +1,44 @@
"""0008 ingest_chunk_progress.status — terminal flag for stalled ingests.
The reconciler's stalled-ingest sweep had no terminal write, so a dead
ingest re-alerted every ~30 min forever. ``status`` lets it escalate a
stalled checkpoint to ``'stalled'`` once and stop re-selecting it;
``init_progress`` resets it to ``'active'`` on reingest.
Revision ID: 0008_ingest_progress_status
Revises: 0007_message_events
"""
from typing import Sequence, Union
from alembic import op
revision: str = "0008_ingest_progress_status"
down_revision: Union[str, None] = "0007_message_events"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# Constant DEFAULT — metadata-only ADD COLUMN, no table rewrite.
op.execute(
"""
ALTER TABLE ingest_chunk_progress
ADD COLUMN status TEXT NOT NULL DEFAULT 'active'
CHECK (status IN ('active', 'stalled'));
"""
)
# Partial index for the reconciler's stalled-ingest sweep.
op.execute(
"CREATE INDEX ingest_chunk_progress_active_idx "
"ON ingest_chunk_progress (last_updated) "
"WHERE status = 'active';"
)
def downgrade() -> None:
op.execute("DROP INDEX IF EXISTS ingest_chunk_progress_active_idx;")
op.execute(
"ALTER TABLE ingest_chunk_progress DROP COLUMN IF EXISTS status;"
)

View File

@@ -0,0 +1,83 @@
"""0009 default chat tools — users.tool_preferences + memories.tool_id.
Adds ``users.tool_preferences`` JSONB and drops the
``memories.tool_id`` FK to ``user_tools`` (synthetic default-tool ids
have no ``user_tools`` row). Delete-cascade for real tools is kept via
an AFTER DELETE trigger on ``user_tools``. Idempotent both ways.
Revision ID: 0009_tool_preferences
Revises: 0008_ingest_progress_status
"""
from typing import Sequence, Union
from alembic import op
revision: str = "0009_tool_preferences"
down_revision: Union[str, None] = "0008_ingest_progress_status"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.execute(
"""
ALTER TABLE users
ADD COLUMN IF NOT EXISTS tool_preferences JSONB
NOT NULL DEFAULT '{}'::jsonb;
"""
)
op.execute(
"ALTER TABLE memories DROP CONSTRAINT IF EXISTS memories_tool_id_fkey;"
)
op.execute(
"""
CREATE OR REPLACE FUNCTION cleanup_tool_memories() RETURNS trigger
LANGUAGE plpgsql AS $$
BEGIN
DELETE FROM memories WHERE tool_id = OLD.id;
RETURN OLD;
END;
$$;
"""
)
# DROP-then-CREATE — no CREATE OR REPLACE TRIGGER for this signature.
op.execute(
"DROP TRIGGER IF EXISTS user_tools_cleanup_memories ON user_tools;"
)
op.execute(
"CREATE TRIGGER user_tools_cleanup_memories "
"AFTER DELETE ON user_tools "
"FOR EACH ROW EXECUTE FUNCTION cleanup_tool_memories();"
)
def downgrade() -> None:
op.execute(
"DROP TRIGGER IF EXISTS user_tools_cleanup_memories ON user_tools;"
)
op.execute("DROP FUNCTION IF EXISTS cleanup_tool_memories();")
# DESTRUCTIVE: restoring the FK requires every memories.tool_id to
# reference a real user_tools row. Any memory written by a built-in
# default tool (synthetic uuid5 id, no user_tools row) is permanently
# DELETED here so the constraint can be re-created. Downgrading 0009
# therefore loses all built-in-memory-tool data — by necessity, since
# the restored schema cannot represent it.
op.execute(
"""
DELETE FROM memories
WHERE tool_id IS NOT NULL
AND tool_id NOT IN (SELECT id FROM user_tools);
"""
)
op.execute(
"""
ALTER TABLE memories
ADD CONSTRAINT memories_tool_id_fkey
FOREIGN KEY (tool_id) REFERENCES user_tools(id) ON DELETE CASCADE;
"""
)
op.execute("ALTER TABLE users DROP COLUMN IF EXISTS tool_preferences;")

View File

@@ -0,0 +1,147 @@
"""0010 scheduler — schedules + schedule_runs tables.
Revision ID: 0010_schedules
Revises: 0009_tool_preferences
"""
from typing import Sequence, Union
from alembic import op
revision: str = "0010_schedules"
down_revision: Union[str, None] = "0009_tool_preferences"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.execute(
"""
CREATE TABLE schedules (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id TEXT NOT NULL,
agent_id UUID NOT NULL REFERENCES agents(id) ON DELETE CASCADE,
trigger_type TEXT NOT NULL,
name TEXT,
instruction TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'active',
cron TEXT,
run_at TIMESTAMPTZ,
timezone TEXT NOT NULL DEFAULT 'UTC',
next_run_at TIMESTAMPTZ,
last_run_at TIMESTAMPTZ,
end_at TIMESTAMPTZ,
tool_allowlist JSONB NOT NULL DEFAULT '[]'::jsonb,
model_id TEXT,
token_budget INTEGER,
origin_conversation_id UUID REFERENCES conversations(id) ON DELETE SET NULL,
created_via TEXT NOT NULL DEFAULT 'ui',
consecutive_failure_count INTEGER NOT NULL DEFAULT 0,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
CONSTRAINT schedules_trigger_type_chk
CHECK (trigger_type IN ('once', 'recurring')),
CONSTRAINT schedules_status_chk
CHECK (status IN ('active', 'paused', 'completed', 'cancelled')),
CONSTRAINT schedules_created_via_chk
CHECK (created_via IN ('chat', 'ui')),
CONSTRAINT schedules_recurring_cron_chk
CHECK (trigger_type <> 'recurring' OR cron IS NOT NULL),
CONSTRAINT schedules_once_run_at_chk
CHECK (trigger_type <> 'once' OR run_at IS NOT NULL)
);
"""
)
op.execute(
"CREATE INDEX schedules_user_idx ON schedules (user_id);"
)
op.execute(
"CREATE INDEX schedules_agent_idx ON schedules (agent_id);"
)
# Dispatcher hot path: status='active' AND next_run_at <= now().
op.execute(
"CREATE INDEX schedules_due_idx "
"ON schedules (status, next_run_at) "
"WHERE status = 'active';"
)
op.execute(
"CREATE TRIGGER schedules_set_updated_at "
"BEFORE UPDATE ON schedules "
"FOR EACH ROW EXECUTE FUNCTION set_updated_at();"
)
op.execute(
"""
CREATE TABLE schedule_runs (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
schedule_id UUID NOT NULL REFERENCES schedules(id) ON DELETE CASCADE,
user_id TEXT NOT NULL,
agent_id UUID NOT NULL REFERENCES agents(id) ON DELETE CASCADE,
status TEXT NOT NULL DEFAULT 'pending',
scheduled_for TIMESTAMPTZ NOT NULL,
trigger_source TEXT NOT NULL DEFAULT 'cron',
started_at TIMESTAMPTZ,
finished_at TIMESTAMPTZ,
output TEXT,
output_truncated BOOLEAN NOT NULL DEFAULT false,
error TEXT,
error_type TEXT,
prompt_tokens INTEGER NOT NULL DEFAULT 0,
generated_tokens INTEGER NOT NULL DEFAULT 0,
conversation_id UUID,
message_id UUID,
celery_task_id TEXT,
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
CONSTRAINT schedule_runs_status_chk
CHECK (status IN (
'pending', 'running', 'success', 'failed', 'skipped', 'timeout'
)),
CONSTRAINT schedule_runs_trigger_source_chk
CHECK (trigger_source IN ('cron', 'manual')),
CONSTRAINT schedule_runs_error_type_chk
CHECK (error_type IS NULL OR error_type IN (
'auth_expired', 'tool_not_allowed', 'budget_exceeded',
'timeout', 'agent_error', 'internal', 'missed', 'overlap'
))
);
"""
)
# Dedup primitive: racing dispatchers hit ON CONFLICT on this index.
op.execute(
"CREATE UNIQUE INDEX schedule_runs_dedup_uidx "
"ON schedule_runs (schedule_id, scheduled_for);"
)
op.execute(
"CREATE INDEX schedule_runs_schedule_recent_idx "
"ON schedule_runs (schedule_id, scheduled_for DESC);"
)
op.execute(
"CREATE INDEX schedule_runs_user_idx ON schedule_runs (user_id);"
)
op.execute(
"CREATE INDEX schedule_runs_running_idx "
"ON schedule_runs (status, started_at) "
"WHERE status = 'running';"
)
op.execute(
"CREATE TRIGGER schedule_runs_set_updated_at "
"BEFORE UPDATE ON schedule_runs "
"FOR EACH ROW EXECUTE FUNCTION set_updated_at();"
)
def downgrade() -> None:
# Drop triggers explicitly (grep-able) before CASCADE-dropping the tables.
op.execute(
"DROP TRIGGER IF EXISTS schedule_runs_set_updated_at ON schedule_runs;"
)
op.execute("DROP TABLE IF EXISTS schedule_runs CASCADE;")
op.execute(
"DROP TRIGGER IF EXISTS schedules_set_updated_at ON schedules;"
)
op.execute("DROP TABLE IF EXISTS schedules CASCADE;")

View File

@@ -0,0 +1,53 @@
"""0011 scheduler — make schedules.agent_id / schedule_runs.agent_id nullable.
Agentless schedules (created from agentless chats via the dual-registered
``scheduler`` default chat tool) carry ``agent_id IS NULL``. Existing FK +
``ON DELETE CASCADE`` semantics on ``agents(id)`` are unaffected — Postgres
only cascades when the parent row is deleted, NULL rows aren't matched.
Revision ID: 0011_schedules_nullable_agent
Revises: 0010_schedules
"""
from typing import Sequence, Union
from alembic import op
revision: str = "0011_schedules_nullable_agent"
down_revision: Union[str, None] = "0010_schedules"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.execute("ALTER TABLE schedules ALTER COLUMN agent_id DROP NOT NULL;")
op.execute("ALTER TABLE schedule_runs ALTER COLUMN agent_id DROP NOT NULL;")
def downgrade() -> None:
# Destructive otherwise: agentless rows have agent_id IS NULL by design,
# so restoring NOT NULL must fail loudly if any exist.
op.execute(
"""
DO $$
DECLARE
sched_nulls INTEGER;
run_nulls INTEGER;
BEGIN
SELECT count(*) INTO sched_nulls
FROM schedules WHERE agent_id IS NULL;
SELECT count(*) INTO run_nulls
FROM schedule_runs WHERE agent_id IS NULL;
IF sched_nulls > 0 OR run_nulls > 0 THEN
RAISE EXCEPTION
'Cannot downgrade 0011: agentless rows present '
'(schedules=%, schedule_runs=%). '
'Delete or reassign them before retrying.',
sched_nulls, run_nulls;
END IF;
END$$;
"""
)
op.execute("ALTER TABLE schedule_runs ALTER COLUMN agent_id SET NOT NULL;")
op.execute("ALTER TABLE schedules ALTER COLUMN agent_id SET NOT NULL;")

View File

@@ -325,6 +325,17 @@ class BaseAnswerResource:
"Could not set tool_executor.message_id; tool-call correlation will be missing for message_id=%s",
reserved_message_id,
)
# The reservation above may create the conversation row (first turn in
# a new chat). Propagate that fresh id to the tool_executor so tools
# that need a conversation home (e.g. ``scheduler`` in agentless chats)
# see it on the very first call instead of waiting for the next turn.
if conversation_id and getattr(agent, "tool_executor", None):
try:
agent.tool_executor.conversation_id = str(conversation_id)
except Exception:
logger.debug(
"Could not set tool_executor.conversation_id post-reserve",
)
# Per-stream monotonic SSE event id. Allocated by ``_emit`` and
# threaded through both the wire format (``id: <seq>\\n``) and

View File

@@ -6,6 +6,7 @@ from pathlib import Path
from typing import Any, Dict, Optional, Set
from application.agents.agent_creator import AgentCreator
from application.agents.default_tools import synthesized_default_tools
from application.api.answer.services.compression import CompressionOrchestrator
from application.api.answer.services.compression.token_counter import TokenCounter
from application.api.answer.services.conversation_service import ConversationService
@@ -25,6 +26,7 @@ from application.storage.db.repositories.attachments import AttachmentsRepositor
from application.storage.db.repositories.prompts import PromptsRepository
from application.storage.db.repositories.sources import SourcesRepository
from application.storage.db.repositories.user_tools import UserToolsRepository
from application.storage.db.repositories.users import UsersRepository
from application.storage.db.session import db_readonly, db_session
from application.retriever.retriever_creator import RetrieverCreator
from application.utils import (
@@ -293,7 +295,7 @@ class StreamProcessor:
return attachments
def _validate_and_set_model(self):
"""Validate and set model_id from request"""
"""Pick model_id with agent authority on agent-bound chats."""
from application.core.model_settings import ModelRegistry
requested_model = self.data.get("model_id")
@@ -302,6 +304,20 @@ class StreamProcessor:
caller_user_id = self.initial_user_id
owner_user_id = self.agent_config.get("user_id") or caller_user_id
# Agent-bound: agent's default_model_id wins, body's model_id is dropped.
agent_bound = self._agent_data is not None
if agent_bound:
agent_default_model = self.agent_config.get("default_model_id", "")
if agent_default_model and validate_model_id(
agent_default_model, user_id=owner_user_id
):
self.model_id = agent_default_model
self.model_user_id = owner_user_id
else:
self.model_id = get_default_model_id()
self.model_user_id = None
return
if requested_model:
if not validate_model_id(requested_model, user_id=caller_user_id):
registry = ModelRegistry.get_instance()
@@ -321,15 +337,8 @@ class StreamProcessor:
self.model_id = requested_model
self.model_user_id = caller_user_id
else:
agent_default_model = self.agent_config.get("default_model_id", "")
if agent_default_model and validate_model_id(
agent_default_model, user_id=owner_user_id
):
self.model_id = agent_default_model
self.model_user_id = owner_user_id
else:
self.model_id = get_default_model_id()
self.model_user_id = None
self.model_id = get_default_model_id()
self.model_user_id = None
def _get_agent_key(self, agent_id: Optional[str], user_id: Optional[str]) -> tuple:
"""Get API key for agent with access control."""
@@ -385,6 +394,7 @@ class StreamProcessor:
raise
def _get_data_from_api_key(self, api_key: str) -> Dict[str, Any]:
"""Resolve agent metadata + the unioned source set for the given key."""
with db_readonly() as conn:
agent = AgentsRepository(conn).find_by_key(api_key)
if not agent:
@@ -395,36 +405,66 @@ class StreamProcessor:
data: Dict[str, Any] = dict(agent)
data["user"] = agent.get("user_id")
# Resolve the primary source row (if any) for retriever/chunks.
source_id = agent.get("source_id")
if source_id:
source_doc = sources_repo.get(str(source_id), agent.get("user_id"))
# Active sources = primary extras, primary first, deduplicated.
# ``_configure_source`` ignores an empty ``data["sources"]``,
# so the primary must appear in the union too — not only in
# the legacy ``data["source"]`` slot.
sources_list: list = []
seen: set = set()
owner = agent.get("user_id")
primary_id = agent.get("source_id")
# ``sources`` row may have NULL ``retriever``/``chunks`` —
# fall back to the agent's value (``dict.get`` returns None
# even when the key exists with value None).
if primary_id:
source_doc = sources_repo.get(str(primary_id), owner)
if source_doc:
data["source"] = str(source_doc["id"])
data["retriever"] = source_doc.get(
"retriever", data.get("retriever")
sid = str(source_doc["id"])
data["source"] = sid
src_retriever = source_doc.get("retriever")
if src_retriever:
data["retriever"] = src_retriever
src_chunks = source_doc.get("chunks")
if src_chunks is not None:
data["chunks"] = src_chunks
sources_list.append(
{
"id": sid,
"retriever": src_retriever or "classic",
"chunks": (
src_chunks if src_chunks is not None
else data.get("chunks", "2")
),
}
)
data["chunks"] = source_doc.get("chunks", data.get("chunks"))
seen.add(sid)
else:
data["source"] = None
else:
data["source"] = None
sources_list = []
extra = agent.get("extra_source_ids") or []
if extra:
for sid in extra:
source_doc = sources_repo.get(str(sid), agent.get("user_id"))
if source_doc:
sources_list.append(
{
"id": str(source_doc["id"]),
"retriever": source_doc.get("retriever", "classic"),
"chunks": source_doc.get(
"chunks", data.get("chunks", "2")
),
}
)
for sid_raw in agent.get("extra_source_ids") or []:
if not sid_raw:
continue
source_doc = sources_repo.get(str(sid_raw), owner)
if not source_doc:
continue
sid = str(source_doc["id"])
if sid in seen:
continue
src_retriever = source_doc.get("retriever")
src_chunks = source_doc.get("chunks")
sources_list.append(
{
"id": sid,
"retriever": src_retriever or "classic",
"chunks": (
src_chunks if src_chunks is not None
else data.get("chunks", "2")
),
}
)
seen.add(sid)
data["sources"] = sources_list
data["default_model_id"] = data.get("default_model_id", "")
return data
@@ -589,7 +629,7 @@ class StreamProcessor:
)
def _configure_retriever(self):
"""Assemble retriever config with precedence: request > agent > default."""
"""Assemble retriever config; agent's values are authoritative when bound."""
# BYOM scope: owner for shared-agent BYOM, caller for own BYOM,
# None for built-ins. Without ``user_id`` here, the doc budget
# falls back to settings.DEFAULT_LLM_TOKEN_LIMIT and overfills
@@ -598,12 +638,11 @@ class StreamProcessor:
model_id=self.model_id, user_id=self.model_user_id
)
# Start with defaults
retriever_name = "classic"
chunks = 2
# Layer agent-level config (if present)
if self._agent_data:
if self._agent_data is not None:
# Agent-bound: agent wins, body's retriever/chunks are dropped.
if self._agent_data.get("retriever"):
retriever_name = self._agent_data["retriever"]
if self._agent_data.get("chunks") is not None:
@@ -614,18 +653,17 @@ class StreamProcessor:
f"Invalid agent chunks value: {self._agent_data['chunks']}, "
"using default value 2"
)
# Explicit request values win over agent config
if "retriever" in self.data:
retriever_name = self.data["retriever"]
if "chunks" in self.data:
try:
chunks = int(self.data["chunks"])
except (ValueError, TypeError):
logger.warning(
f"Invalid request chunks value: {self.data['chunks']}, "
"using default value 2"
)
else:
if "retriever" in self.data:
retriever_name = self.data["retriever"]
if "chunks" in self.data:
try:
chunks = int(self.data["chunks"])
except (ValueError, TypeError):
logger.warning(
f"Invalid request chunks value: {self.data['chunks']}, "
"using default value 2"
)
self.retriever_config = {
"retriever_name": retriever_name,
@@ -633,7 +671,7 @@ class StreamProcessor:
"doc_token_limit": doc_token_limit,
}
# isNoneDoc without an API key forces no retrieval
# isNoneDoc without an API key forces no retrieval (agentless only)
api_key = self.data.get("api_key") or self.agent_key
if not api_key and "isNoneDoc" in self.data and self.data["isNoneDoc"]:
self.retriever_config["chunks"] = 0
@@ -708,17 +746,26 @@ class StreamProcessor:
try:
user_id = self.initial_user_id or "local"
agentless = self.agent_id is None
with db_readonly() as conn:
user_tools = UserToolsRepository(conn).list_active_for_user(user_id)
user_doc = (
UsersRepository(conn).get(user_id) if agentless else None
)
if not user_tools:
default_docs = (
synthesized_default_tools(user_doc) if agentless else []
)
tool_docs = list(user_tools) + default_docs
if not tool_docs:
return None
tools_data = {}
for tool_doc in user_tools:
for tool_doc in tool_docs:
tool_name = tool_doc.get("name")
tool_id = str(tool_doc.get("_id"))
tool_id = str(tool_doc.get("_id") or tool_doc.get("id"))
is_default = bool(tool_doc.get("default"))
if filtering_enabled:
required_actions_by_name = required_tool_actions.get(
@@ -731,11 +778,18 @@ class StreamProcessor:
if not required_actions:
continue
else:
# No template names a default tool, so running its
# actions blind would only inject noise.
if is_default:
continue
required_actions = None
tool_data = self._fetch_tool_data(tool_doc, required_actions)
if tool_data:
tools_data[tool_name] = tool_data
# Defaults reachable by synthetic id only — the name
# key stays bound to an explicit row of the same name.
if not is_default:
tools_data[tool_name] = tool_data
tools_data[tool_id] = tool_data
return tools_data if tools_data else None
@@ -982,6 +1036,7 @@ class StreamProcessor:
user_api_key=user_api_key,
user=self.initial_user_id,
decoded_token=self.decoded_token,
agent_id=agent_id,
)
tool_executor.conversation_id = conversation_id
# Restore client tools so they stay available for subsequent LLM calls
@@ -1130,6 +1185,7 @@ class StreamProcessor:
user_api_key=self.agent_config["user_api_key"],
user=user,
decoded_token=self.decoded_token,
agent_id=self.agent_id,
)
tool_executor.conversation_id = self.conversation_id
# Pass client-side tools so they get merged in get_tools()
@@ -1137,7 +1193,6 @@ class StreamProcessor:
if client_tools:
tool_executor.client_tools = client_tools
# Base agent kwargs
agent_kwargs = {
"endpoint": "stream",
"llm_name": provider or settings.LLM_PROVIDER,

View File

@@ -83,13 +83,15 @@ def resolve_tool_details(tool_ids):
"""
Resolve tool IDs to their display details.
Accepts either Postgres UUIDs or legacy Mongo ObjectId strings (mixed
lists are supported — each id is looked up via ``get_any``, which
resolves to whichever column matches). Unknown ids are silently
Accepts Postgres UUIDs, legacy Mongo ObjectId strings, or the
synthetic ids of default chat tools / agent-selectable builtins
(mixed lists are supported). Synthetic ids are resolved in memory;
real ids are looked up via ``get_any``. Unknown ids are silently
skipped.
Args:
tool_ids: List of tool IDs (UUIDs or legacy Mongo ObjectId strings).
tool_ids: List of tool IDs (UUIDs, legacy ObjectId strings, or
synthetic default-tool / builtin ids).
Returns:
List of tool details with ``id``, ``name``, and ``display_name``.
@@ -97,19 +99,37 @@ def resolve_tool_details(tool_ids):
if not tool_ids:
return []
from application.agents.default_tools import (
is_synthesized_tool_id,
synthesize_tool_by_name,
synthesized_tool_name_for_id,
)
uuid_ids: list[str] = []
legacy_ids: list[str] = []
default_details: list[dict] = []
for tid in tool_ids:
if not tid:
continue
tid_str = str(tid)
if is_synthesized_tool_id(tid_str):
synth = synthesize_tool_by_name(synthesized_tool_name_for_id(tid_str))
if synth is not None:
default_details.append(
{
"id": tid_str,
"name": synth.get("name", ""),
"display_name": synth.get("display_name", ""),
}
)
continue
if looks_like_uuid(tid_str):
uuid_ids.append(tid_str)
else:
legacy_ids.append(tid_str)
if not uuid_ids and not legacy_ids:
return []
return default_details
rows: list[dict] = []
with db_readonly() as conn:
@@ -132,7 +152,7 @@ def resolve_tool_details(tool_ids):
)
rows.extend(row_to_dict(r) for r in result.fetchall())
return [
return default_details + [
{
"id": str(tool.get("id") or tool.get("legacy_mongo_id") or ""),
"name": tool.get("name", "") or "",

View File

@@ -3,6 +3,7 @@
from __future__ import annotations
import functools
import inspect
import logging
import threading
import uuid
@@ -26,13 +27,20 @@ LEASE_HEARTBEAT_INTERVAL = 30
LEASE_RETRY_MAX = 10
def with_idempotency(task_name: str) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
def with_idempotency(
task_name: str,
*,
on_poison: Optional[Callable[[str, dict], None]] = None,
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
"""Short-circuit on completed key; gate concurrent runs via a lease.
The guard key is the caller's ``idempotency_key``, or one synthesized
from ``source_id`` so a keyless dispatch is still poison-guarded.
Entry short-circuits:
- completed row → return cached result
- live lease held → retry(countdown=LEASE_TTL_SECONDS)
- attempt_count > MAX_TASK_ATTEMPTS → poison-loop alert
- attempt_count > MAX_TASK_ATTEMPTS → poison alert; ``on_poison`` fires
Success writes ``completed``; exceptions leave ``pending`` for
autoretry until the poison-loop guard trips.
"""
@@ -40,7 +48,14 @@ def with_idempotency(task_name: str) -> Callable[[Callable[..., Any]], Callable[
def decorator(fn: Callable[..., Any]) -> Callable[..., Any]:
@functools.wraps(fn)
def wrapper(self, *args: Any, idempotency_key: Any = None, **kwargs: Any) -> Any:
key = idempotency_key if isinstance(idempotency_key, str) and idempotency_key else None
explicit_key = (
idempotency_key
if isinstance(idempotency_key, str) and idempotency_key
else None
)
# A keyless dispatch still gets the guard via a synthesized key;
# None means no anchor exists — run unguarded, as before.
key = explicit_key or _synthesize_guard_key(task_name, kwargs)
if key is None:
return fn(self, *args, idempotency_key=idempotency_key, **kwargs)
@@ -88,6 +103,9 @@ def with_idempotency(task_name: str) -> Callable[[Callable[..., Any]], Callable[
"attempts": attempt,
}
_finalize(key, poisoned, status="failed")
_run_poison_hook(
on_poison, task_name, fn, self, args, kwargs, idempotency_key,
)
return poisoned
heartbeat_thread, heartbeat_stop = _start_lease_heartbeat(
@@ -109,6 +127,45 @@ def with_idempotency(task_name: str) -> Callable[[Callable[..., Any]], Callable[
return decorator
def _synthesize_guard_key(task_name: str, kwargs: dict) -> Optional[str]:
"""Derive a deterministic guard key from ``source_id`` for a keyless dispatch.
``source_id`` is stable across broker redeliveries and unique per
upload, so the poison-loop counter survives an OOM SIGKILL. Returns
``None`` when absent — the dispatch then runs unguarded as before.
"""
source_id = kwargs.get("source_id")
if source_id:
return f"auto:{task_name}:{source_id}"
return None
def _run_poison_hook(
on_poison: Optional[Callable[[str, dict], None]],
task_name: str,
fn: Callable[..., Any],
task_self: Any,
args: tuple,
kwargs: dict,
idempotency_key: Any,
) -> None:
"""Invoke a task's poison-path hook with named call args; swallow failures.
A hook failure must never change the poison-guard outcome.
"""
if on_poison is None:
return
try:
bound = inspect.signature(fn).bind_partial(
task_self, *args, idempotency_key=idempotency_key, **kwargs,
)
on_poison(task_name, dict(bound.arguments))
except Exception:
logger.exception(
"idempotency: poison hook failed for task=%s", task_name,
)
def _lookup_completed(key: str) -> Any:
"""Return cached ``result_json`` if a completed row exists for ``key``, else None."""
with db_readonly() as conn:

View File

@@ -4,7 +4,8 @@ from __future__ import annotations
import logging
import uuid
from typing import Any, Dict, Optional
from datetime import datetime, timezone
from typing import Any, Dict, Optional, TYPE_CHECKING
from sqlalchemy import Connection
@@ -16,6 +17,9 @@ from application.storage.db.repositories.reconciliation import (
)
from application.storage.db.repositories.stack_logs import StackLogsRepository
if TYPE_CHECKING:
from application.storage.db.repositories.schedules import SchedulesRepository
logger = logging.getLogger(__name__)
@@ -46,6 +50,7 @@ def run_reconciliation() -> Dict[str, Any]:
"tool_calls_failed": 0,
"ingests_stalled": 0,
"idempotency_pending_failed": 0,
"schedule_runs_failed": 0,
}
with engine.begin() as conn:
@@ -114,11 +119,11 @@ def run_reconciliation() -> Dict[str, Any]:
},
)
# Q4: ingest checkpoints whose heartbeat has gone silent. The
# reconciler only escalates (alerts) — it doesn't kill the worker
# or roll back the partial embed. The next dispatch resumes from
# ``last_index`` thanks to the per-chunk checkpoint, so this is an
# observability sweep, not a recovery action.
# Q4: ingest checkpoints whose heartbeat has gone silent. Each is
# escalated to terminal ``status='stalled'`` and alerted once — no
# worker kill, no rollback of the partial embed. The 'stalled' flag
# ends the re-alert loop and drives the "indexing failed" badge the
# sources list derives from this row.
with engine.begin() as conn:
repo = ReconciliationRepository(conn)
for row in repo.find_and_lock_stalled_ingests():
@@ -134,8 +139,7 @@ def run_reconciliation() -> Dict[str, Any]:
"last_updated": str(row.get("last_updated")),
},
)
# Bump the heartbeat so we don't re-alert every tick.
repo.touch_ingest_progress(str(row["source_id"]))
repo.mark_ingest_stalled(str(row["source_id"]))
# Q5: idempotency rows whose lease expired with attempts exhausted.
# The wrapper's poison-loop guard normally finalises these, but if
@@ -170,9 +174,101 @@ def run_reconciliation() -> Dict[str, Any]:
},
)
# Q6: scheduler runs stuck in 'running' past the soft-time-limit window.
from application.storage.db.repositories.schedule_runs import (
ScheduleRunsRepository,
)
from application.storage.db.repositories.schedules import SchedulesRepository
from application.core.settings import settings as _settings
stuck_age = max(
15, int(_settings.SCHEDULE_RUN_TIMEOUT // 60) + 5,
)
with engine.begin() as conn:
runs_repo = ScheduleRunsRepository(conn)
schedules_repo = SchedulesRepository(conn)
for run in runs_repo.list_stuck_running(age_minutes=stuck_age):
runs_repo.update(
run["id"],
{
"status": "timeout",
"finished_at": datetime.now(timezone.utc),
"error_type": "timeout",
"error": (
"reconciler: schedule_run stuck in 'running' past "
f"{stuck_age} min"
),
},
)
schedules_repo.bump_failure_count(str(run["schedule_id"]))
_terminal_flip_once_schedule(
schedules_repo, str(run["schedule_id"]),
)
summary["schedule_runs_failed"] += 1
_emit_alert(
conn,
name="reconciler_schedule_run_timeout",
user_id=run.get("user_id"),
detail={
"run_id": str(run["id"]),
"schedule_id": str(run["schedule_id"]),
},
)
# Q7: scheduler runs orphaned in 'pending' — dispatcher committed but
# apply_async failed (broker outage / crash mid-dispatch).
with engine.begin() as conn:
runs_repo = ScheduleRunsRepository(conn)
schedules_repo = SchedulesRepository(conn)
for run in runs_repo.list_stuck_pending(age_minutes=stuck_age):
runs_repo.update(
run["id"],
{
"status": "failed",
"finished_at": datetime.now(timezone.utc),
"error_type": "internal",
"error": (
"reconciler: schedule_run stuck in 'pending' past "
f"{stuck_age} min (worker_never_started)"
),
},
)
schedules_repo.bump_failure_count(str(run["schedule_id"]))
_terminal_flip_once_schedule(
schedules_repo, str(run["schedule_id"]),
)
summary["schedule_runs_failed"] += 1
_emit_alert(
conn,
name="reconciler_schedule_run_pending",
user_id=run.get("user_id"),
detail={
"run_id": str(run["id"]),
"schedule_id": str(run["schedule_id"]),
},
)
return summary
def _terminal_flip_once_schedule(
schedules_repo: "SchedulesRepository", schedule_id: str,
) -> None:
"""Flip a once-schedule to 'completed' after its run terminates.
Recurring schedules keep firing; once-schedules would otherwise read
'active forever' since next_run_at is already NULL.
"""
schedule = schedules_repo.get_internal(schedule_id)
if schedule is None or schedule.get("trigger_type") != "once":
return
if schedule.get("status") in {"completed", "cancelled"}:
return
schedules_repo.update_internal(
schedule_id, {"status": "completed", "next_run_at": None},
)
def _emit_alert(
conn: Connection,
*,

View File

@@ -11,6 +11,7 @@ from .attachments import attachments_ns
from .conversations import conversations_ns
from .models import models_ns
from .prompts import prompts_ns
from .schedules import schedules_ns
from .sharing import sharing_ns
from .sources import sources_chunks_ns, sources_ns, sources_upload_ns
from .tools import tools_mcp_ns, tools_ns
@@ -40,6 +41,9 @@ api.add_namespace(agents_folders_ns)
# Prompts
api.add_namespace(prompts_ns)
# Schedules
api.add_namespace(schedules_ns)
# Sharing
api.add_namespace(sharing_ns)

View File

@@ -0,0 +1,186 @@
"""Schedule dispatcher: poll Postgres, claim due rows under FOR UPDATE SKIP LOCKED,
advance next_run_at atomically with the run claim, then enqueue.
Per-schedule IANA tz semantics (croniter+zoneinfo) outside Celery's app-wide tz,
plus Postgres-native dedup avoid Redis visibility_timeout double-fires.
"""
from __future__ import annotations
import logging
from datetime import datetime, timedelta, timezone
from typing import Any, Dict, List, Optional
from application.agents.scheduler_utils import next_cron_run
from application.core.settings import settings
from application.storage.db.engine import get_engine
from application.storage.db.repositories.schedule_runs import (
ScheduleRunsRepository,
)
from application.storage.db.repositories.schedules import SchedulesRepository
logger = logging.getLogger(__name__)
def _normalize_dt(value: Any) -> Optional[datetime]:
"""Accept a datetime / ISO string / None and return a tz-aware UTC dt."""
if value is None:
return None
if isinstance(value, datetime):
return value.astimezone(timezone.utc) if value.tzinfo else (
value.replace(tzinfo=timezone.utc)
)
if isinstance(value, str):
try:
parsed = datetime.fromisoformat(value.replace("Z", "+00:00"))
except ValueError:
return None
return parsed.astimezone(timezone.utc) if parsed.tzinfo else (
parsed.replace(tzinfo=timezone.utc)
)
return None
def _compute_next(
schedule: Dict[str, Any],
*,
after: datetime,
) -> Optional[datetime]:
"""Next next_run_at for a recurring schedule, or None when past end_at."""
cron = schedule.get("cron")
if not cron:
return None
end_at = _normalize_dt(schedule.get("end_at"))
candidate = next_cron_run(cron, schedule.get("timezone"), after=after)
if end_at is not None and candidate > end_at:
return None
return candidate
def dispatch_due_runs() -> Dict[str, int]:
"""One dispatcher tick; returns counts for schedule_syncs-style logging."""
if not settings.POSTGRES_URI:
return {"enqueued": 0, "skipped": 0, "advanced": 0}
from application.api.user.tasks import execute_scheduled_run
now = datetime.now(timezone.utc)
grace = timedelta(seconds=max(0, settings.SCHEDULE_MISFIRE_GRACE))
engine = get_engine()
counts = {"enqueued": 0, "skipped": 0, "advanced": 0}
enqueue_args: List[str] = []
with engine.begin() as conn:
schedules_repo = SchedulesRepository(conn)
runs_repo = ScheduleRunsRepository(conn)
for schedule in schedules_repo.list_due():
scheduled_for = _normalize_dt(schedule.get("next_run_at"))
if scheduled_for is None:
continue
trigger_type = schedule.get("trigger_type")
agent_id_raw = schedule.get("agent_id")
agent_id = str(agent_id_raw) if agent_id_raw else None
# Misfire grace applies to recurring only — once-tasks fire late, not vanish.
if (
trigger_type == "recurring"
and grace > timedelta(0)
and (now - scheduled_for) > grace
):
runs_repo.record_skipped(
str(schedule["id"]),
schedule["user_id"],
agent_id,
scheduled_for,
error_type="missed",
error="misfire grace exceeded",
)
counts["skipped"] += 1
nxt = _compute_next(schedule, after=now)
if nxt is None:
schedules_repo.update_internal(
str(schedule["id"]),
{"status": "completed", "next_run_at": None,
"last_run_at": now},
)
else:
schedules_repo.update_internal(
str(schedule["id"]),
{"next_run_at": nxt, "last_run_at": now},
)
counts["advanced"] += 1
continue
# Overlap guard: never enqueue while a previous run is active.
if runs_repo.has_active_run(str(schedule["id"])):
runs_repo.record_skipped(
str(schedule["id"]),
schedule["user_id"],
agent_id,
scheduled_for,
error_type="overlap",
error="previous run still active",
)
counts["skipped"] += 1
if trigger_type == "recurring":
nxt = _compute_next(schedule, after=scheduled_for)
schedules_repo.update_internal(
str(schedule["id"]),
{"next_run_at": nxt, "last_run_at": now},
)
else:
# Once: null next_run_at so we don't re-pick; the in-flight
# run will terminal-flip the schedule when it finishes.
schedules_repo.update_internal(
str(schedule["id"]),
{"next_run_at": None, "last_run_at": now},
)
continue
# Dedup primitive: two racing dispatchers see exactly one row.
run = runs_repo.record_pending(
str(schedule["id"]),
schedule["user_id"],
agent_id,
scheduled_for,
trigger_source="cron",
)
if run is None:
counts["skipped"] += 1
else:
enqueue_args.append(str(run["id"]))
counts["enqueued"] += 1
# Advance: recurring picks next tick, once nulls next_run_at
# (worker terminal-flips status on completion).
if trigger_type == "recurring":
nxt = _compute_next(schedule, after=scheduled_for)
if nxt is None:
schedules_repo.update_internal(
str(schedule["id"]),
{"status": "completed", "next_run_at": None,
"last_run_at": now},
)
else:
schedules_repo.update_internal(
str(schedule["id"]),
{"next_run_at": nxt, "last_run_at": now},
)
else:
schedules_repo.update_internal(
str(schedule["id"]),
{"next_run_at": None, "last_run_at": now},
)
counts["advanced"] += 1
# Enqueue after commit so the worker sees the schedule_runs row on pick-up.
for run_id in enqueue_args:
try:
execute_scheduled_run.apply_async(args=[run_id], queue="docsgpt")
except Exception:
logger.exception(
"dispatcher: failed to enqueue execute_scheduled_run for %s",
run_id,
)
return counts

View File

@@ -0,0 +1,433 @@
"""Body of ``execute_scheduled_run`` — runs a single agent execution.
Not a DURABLE_TASK: agent runs have side effects (messages, CRM writes)
and blind auto-retry would double them. Failures after agent.gen starts
are terminal and recorded; only the pre-start load is retry-safe.
"""
from __future__ import annotations
import logging
from datetime import datetime, timezone
from typing import Any, Dict, Optional
from celery.exceptions import SoftTimeLimitExceeded
from sqlalchemy import text as sql_text
from application.agents.headless_runner import run_agent_headless
from application.core.settings import settings
from application.events.publisher import publish_user_event
from application.storage.db.base_repository import row_to_dict
from application.storage.db.engine import get_engine
from application.storage.db.repositories.conversations import (
ConversationsRepository,
)
from application.storage.db.repositories.schedule_runs import (
ScheduleRunsRepository,
)
from application.storage.db.repositories.schedules import SchedulesRepository
from application.storage.db.repositories.token_usage import TokenUsageRepository
logger = logging.getLogger(__name__)
# Cap output verbatim in the run log; beyond the cap we keep the head and stamp output_truncated.
_OUTPUT_CAP_CHARS = 24_000
def _agent_config_for_schedule(schedule: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""Resolve the agent row (agent-bound) or build an ephemeral classic config.
For agentless schedules (``agent_id IS NULL``), the worker constructs an
in-memory agent shape carrying just enough fields for ``run_agent_headless``:
classic agent type, system-default retriever/chunks/prompt, no source, and
the optional ``model_id`` override. The runtime toolset is rebuilt by
``ToolExecutor`` at fire time (current ``user_tools`` + non-disabled,
non-headless-excluded defaults), so a snapshot here would be dead code.
"""
if schedule.get("agent_id"):
engine = get_engine()
with engine.connect() as conn:
row = conn.execute(
sql_text("SELECT * FROM agents WHERE id = CAST(:id AS uuid)"),
{"id": str(schedule["agent_id"])},
).fetchone()
return row_to_dict(row) if row is not None else None
return _ephemeral_agent_for_agentless(schedule)
def _ephemeral_agent_for_agentless(
schedule: Dict[str, Any],
) -> Optional[Dict[str, Any]]:
"""Build an agent-shaped config for a schedule with no parent agent."""
# ``agent_config["tools"]`` is intentionally omitted: ``run_agent_headless``
# never reads it. The runtime toolset is rebuilt by
# ``ToolExecutor._get_user_tools(owner)`` at fire time — same dereference
# the agent-bound path uses, so a tool added/disabled after creation is
# reflected. Headless mode there filters chat-only tools (``scheduler``).
user_id = schedule.get("user_id")
if not user_id:
return None
return {
"id": None,
"user_id": user_id,
"agent_type": "classic",
"retriever": "classic",
"chunks": 2,
"prompt_id": "default",
"source_id": None,
"default_model_id": schedule.get("model_id") or "",
}
def _load_chat_history(schedule: Dict[str, Any]) -> list:
"""Originating conversation history (one-time only; recurring has none)."""
origin = schedule.get("origin_conversation_id")
if not origin or schedule.get("trigger_type") != "once":
return []
user_id = schedule.get("user_id")
if not user_id:
return []
try:
engine = get_engine()
with engine.connect() as conn:
conv = ConversationsRepository(conn).get_any(str(origin), user_id)
if conv is None:
return []
messages = ConversationsRepository(conn).get_messages(str(conv["id"]))
except Exception:
logger.exception("scheduler: failed loading chat history")
return []
history: list = []
for msg in messages:
if msg.get("prompt") and msg.get("response"):
history.append({
"prompt": msg["prompt"],
"response": msg["response"],
})
return history
def _publish_run_event(
event_type: str, run: Dict[str, Any], schedule: Dict[str, Any], **extra: Any
) -> None:
"""Best-effort SSE publish for a scheduler run state transition."""
user_id = run.get("user_id") or schedule.get("user_id")
if not user_id:
return
agent_id_raw = schedule.get("agent_id")
payload = {
"run_id": str(run["id"]),
"schedule_id": str(schedule["id"]),
"agent_id": str(agent_id_raw) if agent_id_raw else None,
"trigger_type": schedule.get("trigger_type"),
"status": run.get("status"),
**extra,
}
try:
publish_user_event(
user_id,
event_type,
payload,
scope={"kind": "schedule", "id": str(schedule["id"])},
)
except Exception:
logger.exception(
"scheduler: SSE publish failed event=%s run=%s",
event_type, run.get("id"),
)
def _publish_message_appended(
user_id: str,
conversation_id: str,
message: Dict[str, Any],
schedule_id: str,
run_id: str,
) -> None:
"""SSE message-appended event for a one-time run's chat turn."""
try:
publish_user_event(
user_id,
"schedule.message.appended",
{
"conversation_id": str(conversation_id),
"message_id": str(message["id"]),
"schedule_id": str(schedule_id),
"run_id": str(run_id),
"position": int(message.get("position", 0)),
},
scope={"kind": "conversation", "id": str(conversation_id)},
)
except Exception:
logger.exception(
"scheduler: message.appended publish failed run=%s", run_id,
)
def _append_one_time_turn(
schedule: Dict[str, Any],
run: Dict[str, Any],
outcome: Dict[str, Any],
) -> Optional[Dict[str, Any]]:
"""Insert an assistant turn in the originating conversation (once only)."""
origin = schedule.get("origin_conversation_id")
if not origin:
return None
engine = get_engine()
user_id = schedule.get("user_id")
metadata = {
"scheduled": True,
"schedule_id": str(schedule["id"]),
"run_id": str(run["id"]),
"scheduled_run_at": (
run.get("scheduled_for")
if isinstance(run.get("scheduled_for"), str)
else None
),
}
with engine.begin() as conn:
conv = ConversationsRepository(conn).get_any(str(origin), user_id)
if conv is None:
return None
message = ConversationsRepository(conn).append_message(
str(conv["id"]),
{
"prompt": schedule.get("instruction") or "",
"response": outcome.get("answer") or "",
"thought": outcome.get("thought") or "",
"sources": outcome.get("sources") or [],
"tool_calls": outcome.get("tool_calls") or [],
"model_id": outcome.get("model_id"),
"metadata": metadata,
},
)
return message
def execute_scheduled_run_body(run_id: str, celery_task_id: Optional[str]) -> Dict[str, Any]:
"""Execute one scheduled run by id; returns a result dict for tracing."""
if not settings.POSTGRES_URI:
return {"status": "skipped", "reason": "POSTGRES_URI not set"}
engine = get_engine()
with engine.connect() as conn:
run = ScheduleRunsRepository(conn).get_internal(run_id)
if run is None:
return {"status": "skipped", "reason": "run not found"}
schedule = SchedulesRepository(conn).get_internal(str(run["schedule_id"]))
if schedule is None:
return {"status": "skipped", "reason": "schedule not found"}
# Refuse non-runnable terminal states; manual run-now bypasses.
if run.get("status") != "pending":
return {"status": "skipped", "reason": f"run status={run.get('status')}"}
if schedule.get("status") in {"cancelled", "completed"} and run.get(
"trigger_source"
) != "manual":
with engine.begin() as conn:
ScheduleRunsRepository(conn).update(
run_id,
{
"status": "skipped",
"finished_at": datetime.now(timezone.utc),
"error_type": "internal",
"error": "schedule no longer active",
},
)
return {"status": "skipped", "reason": "schedule terminal"}
agent_config = _agent_config_for_schedule(schedule)
if agent_config is None:
with engine.begin() as conn:
updated = ScheduleRunsRepository(conn).update(
run_id,
{
"status": "failed",
"finished_at": datetime.now(timezone.utc),
"error_type": "internal",
"error": "agent missing",
},
)
SchedulesRepository(conn).bump_failure_count(str(schedule["id"]))
_publish_run_event("schedule.run.failed", updated or run, schedule,
error="agent missing")
return {"status": "failed", "reason": "agent missing"}
with engine.begin() as conn:
if not ScheduleRunsRepository(conn).mark_running(run_id, celery_task_id):
return {"status": "skipped", "reason": "lost race to mark_running"}
started = datetime.now(timezone.utc)
instruction = schedule.get("instruction") or ""
allowlist = schedule.get("tool_allowlist") or []
chat_history = _load_chat_history(schedule)
outcome: Dict[str, Any]
error_type: Optional[str] = None
error_text: Optional[str] = None
timed_out = False
try:
outcome = run_agent_headless(
agent_config,
instruction,
tool_allowlist=allowlist,
model_id_override=schedule.get("model_id"),
endpoint="schedule",
conversation_id=schedule.get("origin_conversation_id"),
chat_history=chat_history,
)
except SoftTimeLimitExceeded:
timed_out = True
outcome = {"answer": "", "tool_calls": [], "sources": [], "thought": ""}
error_type = "timeout"
error_text = "run exceeded soft time limit"
except Exception as exc:
outcome = {"answer": "", "tool_calls": [], "sources": [], "thought": ""}
error_type = "agent_error"
error_text = str(exc)
logger.exception("scheduler: agent run failed run=%s", run_id)
finished = datetime.now(timezone.utc)
# Headless denial with no usable output → tool_not_allowed.
if (
error_type is None
and (outcome.get("denied") or [])
and not (outcome.get("answer") or "").strip()
):
error_type = "tool_not_allowed"
error_text = "headless allowlist blocked required tool"
prompt_tokens = int(outcome.get("prompt_tokens", 0) or 0)
generated_tokens = int(outcome.get("generated_tokens", 0) or 0)
used_tokens = prompt_tokens + generated_tokens
if (
schedule.get("token_budget") is not None
and int(schedule["token_budget"]) > 0
and used_tokens > int(schedule["token_budget"])
):
error_type = "budget_exceeded"
error_text = (
f"used {used_tokens} tokens exceeds budget "
f"{schedule['token_budget']}"
)
answer = outcome.get("answer") or ""
truncated = False
if len(answer) > _OUTPUT_CAP_CHARS:
answer = answer[:_OUTPUT_CAP_CHARS]
truncated = True
new_status = (
"timeout" if timed_out else ("failed" if error_type else "success")
)
with engine.begin() as conn:
update_fields: Dict[str, Any] = {
"status": new_status,
"started_at": started,
"finished_at": finished,
"output": answer or None,
"output_truncated": truncated,
"prompt_tokens": prompt_tokens,
"generated_tokens": generated_tokens,
}
if error_type:
update_fields["error_type"] = error_type
update_fields["error"] = error_text
updated_run = ScheduleRunsRepository(conn).update(run_id, update_fields)
if used_tokens > 0:
agent_id_raw = schedule.get("agent_id")
try:
TokenUsageRepository(conn).insert(
user_id=schedule.get("user_id"),
api_key=None,
prompt_tokens=prompt_tokens,
generated_tokens=generated_tokens,
timestamp=finished,
agent_id=str(agent_id_raw) if agent_id_raw else None,
source="schedule",
request_id=str(run_id),
)
except Exception:
logger.exception(
"scheduler: token_usage insert failed run=%s", run_id,
)
schedules_repo = SchedulesRepository(conn)
autopaused = False
if new_status == "success":
schedules_repo.reset_failure_count(str(schedule["id"]))
elif new_status in ("failed", "timeout"):
count = schedules_repo.bump_failure_count(str(schedule["id"]))
if (
settings.SCHEDULE_AUTOPAUSE_FAILURES > 0
and count >= settings.SCHEDULE_AUTOPAUSE_FAILURES
and schedule.get("trigger_type") == "recurring"
):
autopaused = schedules_repo.autopause(str(schedule["id"]))
# Once: terminal-flip on cron-fired runs only; manual runs on a
# still-active once-schedule leave the future cadence intact.
if (
schedule.get("trigger_type") == "once"
and run.get("trigger_source") != "manual"
and schedule.get("status") == "active"
):
schedules_repo.update_internal(
str(schedule["id"]),
{"status": "completed", "next_run_at": None},
)
appended: Optional[Dict[str, Any]] = None
if (
schedule.get("trigger_type") == "once"
and new_status == "success"
and schedule.get("origin_conversation_id")
):
try:
appended = _append_one_time_turn(schedule, updated_run or run, outcome)
except Exception:
logger.exception(
"scheduler: append turn failed run=%s", run_id,
)
if appended is not None:
with engine.begin() as conn:
ScheduleRunsRepository(conn).update(
run_id,
{
"conversation_id": str(appended["conversation_id"]),
"message_id": str(appended["id"]),
},
)
_publish_message_appended(
schedule.get("user_id"),
str(appended["conversation_id"]),
appended,
str(schedule["id"]),
run_id,
)
if new_status == "success":
_publish_run_event("schedule.run.completed", updated_run or run, schedule)
else:
_publish_run_event(
"schedule.run.failed",
updated_run or run,
schedule,
error_type=error_type,
error=error_text,
)
if autopaused:
_publish_run_event(
"schedule.autopaused",
updated_run or run,
schedule,
consecutive_failure_count=settings.SCHEDULE_AUTOPAUSE_FAILURES,
)
return {
"status": new_status,
"run_id": run_id,
"error_type": error_type,
}

View File

@@ -0,0 +1,5 @@
"""Schedules module."""
from .routes import schedules_ns
__all__ = ["schedules_ns"]

View File

@@ -0,0 +1,550 @@
"""Schedules REST API (owner-scoped via request.decoded_token)."""
from __future__ import annotations
import logging
from datetime import datetime, timezone
from typing import Any, Dict, Optional
from flask import current_app, jsonify, make_response, request
from flask_restx import Namespace, Resource, fields
from application.agents.scheduler_utils import (
ScheduleValidationError,
clamp_once_horizon,
cron_interval_seconds,
next_cron_run,
parse_cron,
parse_run_at,
resolve_timezone,
)
from application.api import api
from application.core.settings import settings
from application.storage.db.base_repository import looks_like_uuid
from application.storage.db.repositories.agents import AgentsRepository
from application.storage.db.repositories.schedule_runs import (
ScheduleRunsRepository,
)
from application.storage.db.repositories.schedules import SchedulesRepository
from application.storage.db.session import db_readonly, db_session
logger = logging.getLogger(__name__)
schedules_ns = Namespace(
"schedules", description="Agent schedule management", path="/api",
)
def _ok(data: Any, status: int = 200):
return make_response(jsonify(data), status)
def _err(message: str, status: int = 400):
return make_response(jsonify({"success": False, "message": message}), status)
def _format_schedule(row: Dict[str, Any]) -> Dict[str, Any]:
"""Render a schedule row for the API (id-as-string + ISO timestamps)."""
if not row:
return {}
out = dict(row)
for key in (
"id", "agent_id", "origin_conversation_id",
):
if out.get(key) is not None:
out[key] = str(out[key])
out.pop("_id", None) # drop dual-id legacy mirror
return out
def _format_run(row: Dict[str, Any]) -> Dict[str, Any]:
"""Render a schedule_run row for the API."""
if not row:
return {}
out = dict(row)
for key in (
"id", "schedule_id", "agent_id", "conversation_id", "message_id",
):
if out.get(key) is not None:
out[key] = str(out[key])
out.pop("_id", None)
return out
def _agent_owned(agent_id: str, user_id: str) -> Optional[Dict[str, Any]]:
if not looks_like_uuid(str(agent_id)):
return None
with db_readonly() as conn:
return AgentsRepository(conn).get_any(agent_id, user_id)
def _user_id() -> Optional[str]:
decoded = getattr(request, "decoded_token", None)
if not decoded:
return None
return decoded.get("sub")
@schedules_ns.route("/agents/<string:agent_id>/schedules")
class AgentSchedules(Resource):
@api.doc(description="List schedules for an agent (recurring + one-time).")
def get(self, agent_id):
user_id = _user_id()
if not user_id:
return _err("unauthorized", 401)
agent = _agent_owned(agent_id, user_id)
if agent is None:
return _err("agent not found", 404)
try:
with db_readonly() as conn:
rows = SchedulesRepository(conn).list_for_agent(
str(agent["id"]), user_id,
)
except Exception as exc:
current_app.logger.error("list schedules failed: %s", exc, exc_info=True)
return _err("internal error", 500)
return _ok({"schedules": [_format_schedule(r) for r in rows]})
create_model = api.model(
"ScheduleCreate",
{
"instruction": fields.String(required=True),
"trigger_type": fields.String(
required=False,
description="'recurring' (default) or 'once'",
),
"cron": fields.String(
required=False,
description="Required when trigger_type == 'recurring'",
),
"run_at": fields.String(
required=False,
description="ISO 8601 — required when trigger_type == 'once'",
),
"timezone": fields.String(required=False),
"name": fields.String(required=False),
"end_at": fields.String(required=False, description="ISO 8601"),
"tool_allowlist": fields.List(fields.String, required=False),
"model_id": fields.String(required=False),
"token_budget": fields.Integer(required=False),
},
)
@api.expect(create_model)
@api.doc(description="Create a schedule (recurring or one-time) for an agent.")
def post(self, agent_id):
user_id = _user_id()
if not user_id:
return _err("unauthorized", 401)
agent = _agent_owned(agent_id, user_id)
if agent is None:
return _err("agent not found", 404)
data = request.get_json(silent=True) or {}
instruction = (data.get("instruction") or "").strip()
tz_name = (data.get("timezone") or "UTC").strip() or "UTC"
trigger_type = (data.get("trigger_type") or "recurring").strip().lower()
if trigger_type not in ("recurring", "once"):
return _err("trigger_type must be 'recurring' or 'once'")
if not instruction:
return _err("instruction is required")
try:
resolve_timezone(tz_name)
except ScheduleValidationError as exc:
return _err(str(exc))
token_budget = data.get("token_budget")
if token_budget is not None:
try:
token_budget = int(token_budget)
if token_budget < 0:
raise ValueError
except (TypeError, ValueError):
return _err("token_budget must be a non-negative integer")
with db_readonly() as conn:
count = SchedulesRepository(conn).count_active_for_user(user_id)
if (
settings.SCHEDULE_MAX_PER_USER > 0
and count >= settings.SCHEDULE_MAX_PER_USER
):
return _err("max schedules per user reached", 429)
if trigger_type == "once":
run_at_raw = (data.get("run_at") or "").strip()
if not run_at_raw:
return _err("run_at is required for trigger_type 'once'")
try:
fire = parse_run_at(run_at_raw, tz_name)
clamp_once_horizon(
fire, settings.SCHEDULE_ONCE_MAX_HORIZON,
)
except ScheduleValidationError as exc:
return _err(str(exc))
try:
with db_session() as conn:
created = SchedulesRepository(conn).create(
user_id=user_id,
agent_id=str(agent["id"]),
trigger_type="once",
instruction=instruction,
run_at=fire,
next_run_at=fire,
timezone=tz_name,
name=(data.get("name") or "").strip() or None,
tool_allowlist=data.get("tool_allowlist") or [],
model_id=(data.get("model_id") or None),
token_budget=token_budget,
created_via="ui",
)
except Exception as exc:
current_app.logger.error(
"create one-time schedule failed: %s", exc, exc_info=True,
)
return _err("internal error", 500)
return _ok({"schedule": _format_schedule(created)}, status=201)
cron = (data.get("cron") or "").strip()
if not cron:
return _err("cron is required")
try:
parse_cron(cron)
except ScheduleValidationError as exc:
return _err(str(exc))
min_interval = max(0, int(settings.SCHEDULE_MIN_INTERVAL))
if min_interval > 0:
try:
cadence = cron_interval_seconds(cron, tz_name)
except ScheduleValidationError as exc:
return _err(str(exc))
if cadence < min_interval:
return _err(
"cadence below minimum interval "
f"({cadence}s < {min_interval}s)",
)
end_at = None
if data.get("end_at"):
try:
end_at = datetime.fromisoformat(
str(data["end_at"]).replace("Z", "+00:00"),
)
except ValueError:
return _err("invalid end_at")
try:
next_run = next_cron_run(cron, tz_name, after=datetime.now(timezone.utc))
except ScheduleValidationError as exc:
return _err(str(exc))
if end_at is not None and next_run > end_at:
return _err("end_at is before the first cron tick")
try:
with db_session() as conn:
created = SchedulesRepository(conn).create(
user_id=user_id,
agent_id=str(agent["id"]),
trigger_type="recurring",
instruction=instruction,
cron=cron,
timezone=tz_name,
next_run_at=next_run,
end_at=end_at,
name=(data.get("name") or "").strip() or None,
tool_allowlist=data.get("tool_allowlist") or [],
model_id=(data.get("model_id") or None),
token_budget=token_budget,
created_via="ui",
)
except Exception as exc:
current_app.logger.error(
"create schedule failed: %s", exc, exc_info=True,
)
return _err("internal error", 500)
return _ok({"schedule": _format_schedule(created)}, status=201)
@schedules_ns.route("/schedules/<string:schedule_id>")
class ScheduleResource(Resource):
@api.doc(description="Get schedule by id.")
def get(self, schedule_id):
user_id = _user_id()
if not user_id:
return _err("unauthorized", 401)
if not looks_like_uuid(schedule_id):
return _err("invalid schedule id", 400)
with db_readonly() as conn:
row = SchedulesRepository(conn).get(schedule_id, user_id)
if row is None:
return _err("schedule not found", 404)
return _ok({"schedule": _format_schedule(row)})
@api.doc(description="Edit a schedule's editable fields.")
def put(self, schedule_id):
user_id = _user_id()
if not user_id:
return _err("unauthorized", 401)
if not looks_like_uuid(schedule_id):
return _err("invalid schedule id", 400)
data = request.get_json(silent=True) or {}
fields_in: Dict[str, Any] = {}
if "instruction" in data:
inst = (data["instruction"] or "").strip()
if not inst:
return _err("instruction must not be empty")
fields_in["instruction"] = inst
if "cron" in data:
cron = (data["cron"] or "").strip()
try:
parse_cron(cron)
except ScheduleValidationError as exc:
return _err(str(exc))
fields_in["cron"] = cron
if "timezone" in data:
tz_name = (data["timezone"] or "UTC").strip() or "UTC"
try:
resolve_timezone(tz_name)
except ScheduleValidationError as exc:
return _err(str(exc))
fields_in["timezone"] = tz_name
if "tool_allowlist" in data:
fields_in["tool_allowlist"] = data["tool_allowlist"] or []
if "name" in data:
fields_in["name"] = (data["name"] or "").strip() or None
if "model_id" in data:
fields_in["model_id"] = (data["model_id"] or None)
if "token_budget" in data:
tb = data["token_budget"]
if tb is not None:
try:
tb = int(tb)
if tb < 0:
raise ValueError
except (TypeError, ValueError):
return _err("token_budget must be a non-negative integer")
fields_in["token_budget"] = tb
if "end_at" in data:
if data["end_at"]:
try:
fields_in["end_at"] = datetime.fromisoformat(
str(data["end_at"]).replace("Z", "+00:00"),
)
except ValueError:
return _err("invalid end_at")
else:
fields_in["end_at"] = None
# Recompute next_run_at when cron/tz changes.
with db_session() as conn:
existing = SchedulesRepository(conn).get(schedule_id, user_id)
if existing is None:
return _err("schedule not found", 404)
if (
("cron" in fields_in or "timezone" in fields_in)
and existing.get("trigger_type") == "recurring"
):
cron_eff = fields_in.get("cron") or existing.get("cron")
tz_eff = fields_in.get("timezone") or existing.get("timezone")
if cron_eff:
min_interval = max(0, int(settings.SCHEDULE_MIN_INTERVAL))
if min_interval > 0:
try:
cadence = cron_interval_seconds(cron_eff, tz_eff)
except ScheduleValidationError as exc:
return _err(str(exc))
if cadence < min_interval:
return _err(
"cadence below minimum interval "
f"({cadence}s < {min_interval}s)",
)
try:
fields_in["next_run_at"] = next_cron_run(
cron_eff, tz_eff, after=datetime.now(timezone.utc),
)
except ScheduleValidationError as exc:
return _err(str(exc))
updated = SchedulesRepository(conn).update(
schedule_id, user_id, fields_in,
)
return _ok({"schedule": _format_schedule(updated or {})})
@api.doc(description="Pause / resume a schedule.")
def patch(self, schedule_id):
user_id = _user_id()
if not user_id:
return _err("unauthorized", 401)
if not looks_like_uuid(schedule_id):
return _err("invalid schedule id", 400)
data = request.get_json(silent=True) or {}
action = (data.get("action") or "").lower().strip()
if action not in {"pause", "resume"}:
return _err("action must be 'pause' or 'resume'")
with db_session() as conn:
existing = SchedulesRepository(conn).get(schedule_id, user_id)
if existing is None:
return _err("schedule not found", 404)
if existing.get("status") in ("cancelled", "completed"):
return _err("schedule is terminal", 409)
if action == "pause":
fields_in: Dict[str, Any] = {"status": "paused", "next_run_at": None}
else:
# Resume: recurring recomputes from now; once honours run_at if still future.
fields_in = {"status": "active"}
if existing.get("trigger_type") == "recurring":
try:
fields_in["next_run_at"] = next_cron_run(
existing["cron"],
existing["timezone"],
after=datetime.now(timezone.utc),
)
except ScheduleValidationError as exc:
return _err(str(exc))
else:
new_run_at = data.get("run_at")
if new_run_at:
try:
run_at_dt = datetime.fromisoformat(
str(new_run_at).replace("Z", "+00:00"),
)
except ValueError:
return _err("invalid run_at")
if run_at_dt <= datetime.now(timezone.utc):
return _err(
"run_at must be in the future to resume", 409,
)
fields_in["next_run_at"] = run_at_dt
fields_in["run_at"] = run_at_dt
else:
run_at = existing.get("run_at")
if run_at:
if isinstance(run_at, str):
try:
run_at_dt = datetime.fromisoformat(
run_at.replace("Z", "+00:00"),
)
except ValueError:
return _err("schedule run_at is invalid")
else:
run_at_dt = run_at
if run_at_dt <= datetime.now(timezone.utc):
return _err(
"the once schedule has elapsed; recreate "
"it or supply a new run_at",
409,
)
fields_in["next_run_at"] = run_at_dt
updated = SchedulesRepository(conn).update(
schedule_id, user_id, fields_in,
)
if action == "resume":
SchedulesRepository(conn).reset_failure_count(schedule_id)
return _ok({"schedule": _format_schedule(updated or {})})
@api.doc(description="Cancel / delete a schedule.")
def delete(self, schedule_id):
user_id = _user_id()
if not user_id:
return _err("unauthorized", 401)
if not looks_like_uuid(schedule_id):
return _err("invalid schedule id", 400)
with db_session() as conn:
ok = SchedulesRepository(conn).delete(schedule_id, user_id)
if not ok:
return _err("schedule not found", 404)
return _ok({"success": True})
@schedules_ns.route("/schedules/<string:schedule_id>/run")
class ScheduleRunNow(Resource):
@api.doc(description="Run a schedule immediately (trigger_source='manual').")
def post(self, schedule_id):
user_id = _user_id()
if not user_id:
return _err("unauthorized", 401)
if not looks_like_uuid(schedule_id):
return _err("invalid schedule id", 400)
# FOR UPDATE serializes concurrent Run-Now POSTs (timestamp-unique
# scheduled_for values would otherwise sneak past the unique index).
with db_session() as conn:
schedule = SchedulesRepository(conn).get_for_update(
schedule_id, user_id,
)
if schedule is None:
return _err("schedule not found", 404)
if schedule.get("status") == "cancelled":
return _err("schedule is cancelled", 409)
if ScheduleRunsRepository(conn).has_active_run(schedule_id):
return _err("a run is already in flight", 409)
scheduled_for = datetime.now(timezone.utc)
agent_id_raw = schedule.get("agent_id")
run = ScheduleRunsRepository(conn).record_pending(
schedule_id,
user_id,
str(agent_id_raw) if agent_id_raw else None,
scheduled_for,
trigger_source="manual",
)
if run is None:
return _err("could not claim run (concurrent dispatch)", 409)
# Import inside the handler to avoid a circular tasks <-> routes import.
try:
from application.api.user.tasks import execute_scheduled_run
execute_scheduled_run.apply_async(args=[str(run["id"])], queue="docsgpt")
except Exception as exc:
current_app.logger.error(
"run-now enqueue failed: %s", exc, exc_info=True,
)
return _err("enqueue failed", 500)
return _ok({"run": _format_run(run)}, status=202)
@schedules_ns.route("/schedules/<string:schedule_id>/runs")
class ScheduleRunList(Resource):
@api.doc(
description="Paginated run log for a schedule.",
params={"limit": "Page size (default 50)", "offset": "Page offset"},
)
def get(self, schedule_id):
user_id = _user_id()
if not user_id:
return _err("unauthorized", 401)
if not looks_like_uuid(schedule_id):
return _err("invalid schedule id", 400)
try:
limit = max(1, min(int(request.args.get("limit", 50)), 200))
except (TypeError, ValueError):
limit = 50
try:
offset = max(0, int(request.args.get("offset", 0)))
except (TypeError, ValueError):
offset = 0
with db_readonly() as conn:
schedule = SchedulesRepository(conn).get(schedule_id, user_id)
if schedule is None:
return _err("schedule not found", 404)
rows = ScheduleRunsRepository(conn).list_runs(
schedule_id, user_id, limit=limit, offset=offset,
)
return _ok(
{
"runs": [_format_run(r) for r in rows],
"limit": limit,
"offset": offset,
}
)
@schedules_ns.route("/schedules/<string:schedule_id>/runs/<string:run_id>")
class ScheduleRunDetail(Resource):
@api.doc(description="Full output / error for a single run.")
def get(self, schedule_id, run_id):
user_id = _user_id()
if not user_id:
return _err("unauthorized", 401)
if not looks_like_uuid(schedule_id) or not looks_like_uuid(run_id):
return _err("invalid id", 400)
with db_readonly() as conn:
schedule = SchedulesRepository(conn).get(schedule_id, user_id)
if schedule is None:
return _err("schedule not found", 404)
run = ScheduleRunsRepository(conn).get(run_id, user_id)
if run is None or str(run.get("schedule_id")) != str(
schedule["id"]
):
return _err("run not found", 404)
return _ok({"run": _format_run(run)})

View File

@@ -7,8 +7,12 @@ from flask import current_app, jsonify, make_response, redirect, request
from flask_restx import fields, Namespace, Resource
from application.api import api
from application.api.user.tasks import sync_source
from application.api.user.tasks import reingest_source_task, sync_source
from application.core.settings import settings
from application.parser.remote.remote_creator import normalize_remote_data
from application.storage.db.repositories.ingest_chunk_progress import (
IngestChunkProgressRepository,
)
from application.storage.db.repositories.sources import SourcesRepository
from application.storage.db.session import db_readonly, db_session
from application.storage.storage_creator import StorageCreator
@@ -139,6 +143,8 @@ class PaginatedSources(Resource):
"provider": provider,
"isNested": bool(doc.get("directory_structure")),
"type": doc.get("type", "file"),
# Derived in SourcesRepository.list_for_user.
"ingestStatus": doc.get("ingest_status"),
}
)
response = {
@@ -322,7 +328,7 @@ class SyncSource(Resource):
),
400,
)
source_data = doc.get("remote_data")
source_data = normalize_remote_data(source_type, doc.get("remote_data"))
if not source_data:
return make_response(
jsonify({"success": False, "message": "Source is not syncable"}), 400
@@ -346,6 +352,70 @@ class SyncSource(Resource):
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
@sources_ns.route("/sources/reingest")
class ReingestSource(Resource):
reingest_source_model = api.model(
"ReingestSourceModel",
{"source_id": fields.String(required=True, description="Source ID")},
)
@api.expect(reingest_source_model)
@api.doc(
description="Re-run ingestion for a source — e.g. to recover a "
"stalled embed flagged by the reconciler."
)
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
data = request.get_json() or {}
missing_fields = check_required_fields(data, ["source_id"])
if missing_fields:
return missing_fields
source_id = data["source_id"]
try:
with db_readonly() as conn:
doc = SourcesRepository(conn).get_any(source_id, user)
except Exception as err:
current_app.logger.error(
f"Error looking up source: {err}", exc_info=True
)
return make_response(
jsonify({"success": False, "message": "Invalid source ID"}), 400
)
if not doc:
return make_response(
jsonify({"success": False, "message": "Source not found"}), 404
)
resolved_source_id = str(doc["id"])
# Drop the stale chunk-progress row so the sources list stops
# deriving a 'failed' status; reingest never rewrites it itself.
try:
with db_session() as conn:
IngestChunkProgressRepository(conn).delete(resolved_source_id)
except Exception as err:
current_app.logger.warning(
f"Could not clear ingest progress for {resolved_source_id}: "
f"{err}",
exc_info=True,
)
try:
# Scoped key so repeated clicks collapse onto one reingest.
task = reingest_source_task.delay(
source_id=resolved_source_id,
user=user,
idempotency_key=f"reingest-source:{user}:{resolved_source_id}",
)
except Exception as err:
current_app.logger.error(
f"Error starting reingest for source {source_id}: {err}",
exc_info=True,
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
@sources_ns.route("/directory_structure")
class DirectoryStructure(Resource):
@api.doc(

View File

@@ -27,8 +27,42 @@ DURABLE_TASK = dict(
)
# operation tag for the poison-path source.ingest.failed event, per task.
_INGEST_POISON_OPERATION = {
"ingest": "upload",
"ingest_remote": "upload",
"ingest_connector_task": "upload",
"reingest_source_task": "reingest",
}
def _emit_ingest_poison_event(task_name, bound):
"""Publish a terminal ``source.ingest.failed`` when the poison-guard trips.
The guard returns before the worker runs, so the worker's own failed
event never fires — without this the upload toast spins on "training".
"""
user = bound.get("user")
source_id = bound.get("source_id")
if not user or not source_id:
return
from application.events.publisher import publish_user_event
publish_user_event(
user,
"source.ingest.failed",
{
"source_id": str(source_id),
"filename": bound.get("filename") or "",
"operation": _INGEST_POISON_OPERATION.get(task_name, "upload"),
"error": "Ingestion stopped after repeated failures.",
},
scope={"kind": "source", "id": str(source_id)},
)
@celery.task(**DURABLE_TASK)
@with_idempotency(task_name="ingest")
@with_idempotency(task_name="ingest", on_poison=_emit_ingest_poison_event)
def ingest(
self,
directory,
@@ -57,7 +91,7 @@ def ingest(
@celery.task(**DURABLE_TASK)
@with_idempotency(task_name="ingest_remote")
@with_idempotency(task_name="ingest_remote", on_poison=_emit_ingest_poison_event)
def ingest_remote(
self, source_data, job_name, user, loader,
idempotency_key=None, source_id=None,
@@ -71,7 +105,9 @@ def ingest_remote(
@celery.task(**DURABLE_TASK)
@with_idempotency(task_name="reingest_source_task")
@with_idempotency(
task_name="reingest_source_task", on_poison=_emit_ingest_poison_event,
)
def reingest_source_task(self, source_id, user, idempotency_key=None):
from application.worker import reingest_source_worker
@@ -128,7 +164,9 @@ def process_agent_webhook(self, agent_id, payload, idempotency_key=None):
@celery.task(**DURABLE_TASK)
@with_idempotency(task_name="ingest_connector_task")
@with_idempotency(
task_name="ingest_connector_task", on_poison=_emit_ingest_poison_event,
)
def ingest_connector_task(
self,
job_name,
@@ -166,8 +204,64 @@ def ingest_connector_task(
return resp
@celery.task(bind=True, acks_late=False)
def dispatch_scheduled_runs(self):
"""Beat-driven scheduler poller (body in scheduler_dispatcher)."""
from application.api.user.scheduler_dispatcher import dispatch_due_runs
return dispatch_due_runs()
@celery.task(
bind=True,
acks_late=True,
# Not DURABLE_TASK: agent runs have side effects; blind retry would double them.
autoretry_for=(),
max_retries=0,
)
def execute_scheduled_run(self, run_id):
"""Execute one scheduled run; soft-time-limit honors SCHEDULE_RUN_TIMEOUT."""
from application.api.user.scheduler_worker import execute_scheduled_run_body
return execute_scheduled_run_body(run_id, getattr(self.request, "id", None))
# Bind runtime soft-time-limit so the prefork worker can raise mid-agent.
try:
from application.core.settings import settings as _scheduler_settings
execute_scheduled_run.soft_time_limit = max(
30, int(_scheduler_settings.SCHEDULE_RUN_TIMEOUT),
)
execute_scheduled_run.time_limit = (
execute_scheduled_run.soft_time_limit + 60
)
except Exception:
pass
@celery.task(bind=True, acks_late=False)
def cleanup_schedule_runs(self):
"""Trim ``schedule_runs`` per ``SCHEDULE_RUN_OUTPUT_RETENTION_DAYS``."""
from application.core.settings import settings
if not settings.POSTGRES_URI:
return {"deleted": 0, "skipped": "POSTGRES_URI not set"}
from application.storage.db.engine import get_engine
from application.storage.db.repositories.schedule_runs import (
ScheduleRunsRepository,
)
ttl_days = settings.SCHEDULE_RUN_OUTPUT_RETENTION_DAYS
engine = get_engine()
with engine.begin() as conn:
deleted = ScheduleRunsRepository(conn).cleanup_older_than(ttl_days)
return {"deleted": deleted, "ttl_days": ttl_days}
@celery.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
from application.core.settings import settings
sender.add_periodic_task(
timedelta(days=1),
schedule_syncs.s("daily"),
@@ -213,6 +307,22 @@ def setup_periodic_tasks(sender, **kwargs):
cleanup_message_events.s(),
name="cleanup-message-events",
)
sender.add_periodic_task(
timedelta(hours=24),
cleanup_orphan_memories.s(),
name="cleanup-orphan-memories",
)
# Scheduler dispatcher and run-log trim.
sender.add_periodic_task(
timedelta(seconds=max(15, settings.SCHEDULE_DISPATCHER_INTERVAL)),
dispatch_scheduled_runs.s(),
name="dispatch-scheduled-runs",
)
sender.add_periodic_task(
timedelta(hours=24),
cleanup_schedule_runs.s(),
name="cleanup-schedule-runs",
)
@celery.task(bind=True)
@@ -301,6 +411,29 @@ def cleanup_message_events(self):
return {"deleted": deleted, "ttl_days": ttl_days}
@celery.task(bind=True, acks_late=False)
def cleanup_orphan_memories(self):
"""Sweep orphan memories left by the 0009 FK-to-trigger orphan window.
A ``memories`` INSERT for a real ``tool_id`` racing a ``user_tools``
DELETE leaves a permanent orphan the dropped FK would have rejected.
Default-tool synthetic ids are preserved (legitimate built-in data).
"""
from application.core.settings import settings
if not settings.POSTGRES_URI:
return {"deleted": 0, "skipped": "POSTGRES_URI not set"}
from application.agents.default_tools import default_tool_ids
from application.storage.db.engine import get_engine
from application.storage.db.repositories.memories import MemoriesRepository
keep_tool_ids = list(default_tool_ids().values())
engine = get_engine()
with engine.begin() as conn:
deleted = MemoriesRepository(conn).delete_orphans(keep_tool_ids)
return {"deleted": deleted}
@celery.task(bind=True, acks_late=False)
def version_check_task(self):
"""Periodic anonymous version check.

View File

@@ -3,6 +3,15 @@
from flask import current_app, jsonify, make_response, request
from flask_restx import fields, Namespace, Resource
from application.agents.default_tools import (
builtin_agent_tools_for_management,
BUILTIN_AGENT_TOOLS,
default_tool_name_for_id,
default_tools_for_management,
is_builtin_agent_tool_id,
is_default_tool_id,
is_synthesized_tool_id,
)
from application.agents.tools.spec_parser import parse_spec
from application.agents.tools.tool_manager import ToolManager
from application.api import api
@@ -11,6 +20,7 @@ from application.security.encryption import decrypt_credentials, encrypt_credent
from application.storage.db.repositories.notes import NotesRepository
from application.storage.db.repositories.todos import TodosRepository
from application.storage.db.repositories.user_tools import UserToolsRepository
from application.storage.db.repositories.users import UsersRepository
from application.storage.db.session import db_readonly, db_session
from application.utils import check_required_fields, validate_function_name
@@ -208,6 +218,7 @@ class GetTools(Resource):
user = decoded_token.get("sub")
with db_readonly() as conn:
rows = UserToolsRepository(conn).list_for_user(user)
user_doc = UsersRepository(conn).get(user)
user_tools = []
for row in rows:
tool_copy = _row_to_api(row)
@@ -227,6 +238,29 @@ class GetTools(Resource):
tool_copy["config"].pop("encrypted_credentials", None)
user_tools.append(tool_copy)
# ``scheduler`` is dual-registered (default chat tool + agent-
# selectable builtin) and resolves to the same synthetic uuid5 id.
# Surface a single row with both flags so the frontend can show it
# in the management page (toggle) and the agent picker.
seen_ids: set = set()
for default_row in default_tools_for_management(user_doc):
default_copy = _row_to_api(default_row)
default_copy["default"] = True
if default_copy.get("name") in BUILTIN_AGENT_TOOLS:
default_copy["builtin"] = True
seen_ids.add(str(default_copy["id"]))
user_tools.append(default_copy)
# Builtins (e.g. scheduler) hidden from Add-Tool catalog, visible
# to the agent picker. Skip ones already added via the default
# path — both registries share ``_DEFAULT_TOOL_NAMESPACE``.
for builtin_row in builtin_agent_tools_for_management():
builtin_copy = _row_to_api(builtin_row)
if str(builtin_copy["id"]) in seen_ids:
continue
builtin_copy["builtin"] = True
builtin_copy["default"] = False
user_tools.append(builtin_copy)
except Exception as err:
current_app.logger.error(f"Error getting user tools: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
@@ -367,6 +401,46 @@ class UpdateTool(Resource):
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
# Default-tool branch first: a dual-registered tool (e.g. ``scheduler``)
# matches BOTH ``is_default_tool_id`` and ``is_builtin_agent_tool_id``.
# The toggle in Tools settings is the per-user opt-out for the
# agentless default — it must reach the ``set_default_tool_enabled``
# path, not the builtin "not editable" reject.
if is_default_tool_id(data["id"]):
if "status" not in data:
return make_response(
jsonify(
{
"success": False,
"message": "Default tools are not editable; "
"only their on/off status can be changed.",
}
),
400,
)
tool_name = default_tool_name_for_id(data["id"])
try:
with db_session() as conn:
UsersRepository(conn).set_default_tool_enabled(
user, tool_name, bool(data["status"])
)
except Exception as err:
current_app.logger.error(
f"Error updating default tool: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
if is_builtin_agent_tool_id(data["id"]):
return make_response(
jsonify(
{
"success": False,
"message": "Built-in agent tools are not editable; "
"add them to an agent via the agent picker.",
}
),
400,
)
try:
update_data: dict = {}
for key in ("name", "displayName", "customName", "description", "actions"):
@@ -471,6 +545,17 @@ class UpdateToolConfig(Resource):
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
if is_synthesized_tool_id(data["id"]):
return make_response(
jsonify(
{
"success": False,
"message": "Default and built-in tools are config-free "
"and cannot be configured.",
}
),
400,
)
try:
with db_session() as conn:
repo = UserToolsRepository(conn)
@@ -550,6 +635,16 @@ class UpdateToolActions(Resource):
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
if is_synthesized_tool_id(data["id"]):
return make_response(
jsonify(
{
"success": False,
"message": "Default and built-in tools' actions are not editable.",
}
),
400,
)
try:
with db_session() as conn:
repo = UserToolsRepository(conn)
@@ -595,6 +690,27 @@ class UpdateToolStatus(Resource):
if missing_fields:
return missing_fields
try:
# Default branch first so a dual-registered id (e.g. ``scheduler``)
# writes the per-user opt-out instead of being rejected as a
# not-editable builtin (both predicates match the same uuid5).
if is_default_tool_id(data["id"]):
tool_name = default_tool_name_for_id(data["id"])
with db_session() as conn:
UsersRepository(conn).set_default_tool_enabled(
user, tool_name, bool(data["status"])
)
return make_response(jsonify({"success": True}), 200)
if is_builtin_agent_tool_id(data["id"]):
return make_response(
jsonify(
{
"success": False,
"message": "Built-in agent tools have no per-user "
"toggle; add them to an agent via the agent picker.",
}
),
400,
)
with db_session() as conn:
repo = UserToolsRepository(conn)
tool_doc = repo.get_any(data["id"], user)
@@ -633,6 +749,16 @@ class DeleteTool(Resource):
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
if is_synthesized_tool_id(data["id"]):
return make_response(
jsonify(
{
"success": False,
"message": "Built-in tools cannot be deleted; disable them instead.",
}
),
400,
)
try:
with db_session() as conn:
repo = UserToolsRepository(conn)

View File

@@ -48,6 +48,12 @@ ensure_database_ready(
logger=logging.getLogger("application.app"),
)
from application.agents.default_tools import ( # noqa: E402
validate_default_chat_tools,
)
validate_default_chat_tools()
app = Flask(__name__)
app.register_blueprint(user)
app.register_blueprint(answer)

View File

@@ -66,6 +66,9 @@ class Settings(BaseSettings):
PARSE_IMAGE_REMOTE: bool = False
DOCLING_OCR_ENABLED: bool = False # Enable OCR for docling parsers (PDF, images)
DOCLING_OCR_ATTACHMENTS_ENABLED: bool = False # Enable OCR for docling when parsing attachments
# Pages docling's threaded pipeline buffers in flight; the library
# default (100) drives worker RSS to ~3 GB on a mid-size PDF.
DOCLING_PIPELINE_QUEUE_MAX_SIZE: int = 2
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb" or "pgvector"
RETRIEVERS_ENABLED: list = ["classic_rag"]
AGENT_NAME: str = "classic"
@@ -186,6 +189,11 @@ class Settings(BaseSettings):
# Tool pre-fetch settings
ENABLE_TOOL_PREFETCH: bool = True
# Config-free tools on by default in agentless chats. ``scheduler`` is
# dual-registered (also in ``BUILTIN_AGENT_TOOLS``) so the same synthetic id
# resolves whether reached via defaults or the agent picker.
DEFAULT_CHAT_TOOLS: list = ["memory", "read_webpage", "scheduler"]
# Conversation Compression Settings
ENABLE_CONVERSATION_COMPRESSION: bool = True
COMPRESSION_THRESHOLD_PERCENTAGE: float = 0.8 # Trigger at 80% of context
@@ -229,6 +237,16 @@ class Settings(BaseSettings):
# flows without unbounded table growth.
MESSAGE_EVENTS_RETENTION_DAYS: int = 14
# Scheduler (see scheduler.md).
SCHEDULE_DISPATCHER_INTERVAL: int = 30
SCHEDULE_MIN_INTERVAL: int = 900
SCHEDULE_MAX_PER_USER: int = 50
SCHEDULE_RUN_TIMEOUT: int = 600
SCHEDULE_MISFIRE_GRACE: int = 60
SCHEDULE_AUTOPAUSE_FAILURES: int = 3
SCHEDULE_ONCE_MAX_HORIZON: int = 31_536_000
SCHEDULE_RUN_OUTPUT_RETENTION_DAYS: int = 90
@field_validator("POSTGRES_URI", mode="before")
@classmethod
def _normalize_postgres_uri_validator(cls, v):

View File

@@ -850,6 +850,79 @@ class LLMHandler(ABC):
tools_dict, call, llm_class
)
if pause_info:
# Headless (scheduled / webhook): synthesize a denial tool message
# so the LLM finishes gracefully instead of stalling on a pause
# nobody will resolve, then journal so the reconciler sees it.
if pause_info.get("pause_type") == "headless_denied":
deny_reason = pause_info.get(
"deny_reason", "Tool blocked in headless mode."
)
args_str = (
json.dumps(call.arguments)
if isinstance(call.arguments, dict)
else (call.arguments or "{}")
)
tool_call_obj = {
"id": pause_info["call_id"],
"type": "function",
"function": {
"name": call.name,
"arguments": args_str,
},
}
if getattr(call, "thought_signature", None):
tool_call_obj["thought_signature"] = call.thought_signature
updated_messages.append({
"role": "assistant",
"content": None,
"tool_calls": [tool_call_obj],
})
denial_call = ToolCall(
id=pause_info["call_id"],
name=call.name,
arguments=call.arguments,
)
updated_messages.append(
self.create_tool_message(
denial_call,
f"Tool denied (headless): {deny_reason}",
)
)
if hasattr(agent.tool_executor, "headless_denials"):
agent.tool_executor.headless_denials.append(pause_info)
from application.agents.tool_executor import (
_mark_failed,
_record_proposed,
)
_record_proposed(
pause_info["call_id"],
pause_info["tool_name"],
pause_info["action_name"],
pause_info.get("arguments") or {},
tool_id=pause_info.get("tool_id"),
)
_mark_failed(
pause_info["call_id"],
f"headless: {deny_reason}",
)
yield {
"type": "tool_call",
"data": {
"tool_name": pause_info["tool_name"],
"call_id": pause_info["call_id"],
"action_name": pause_info.get(
"llm_name", pause_info["name"]
),
"arguments": pause_info["arguments"],
"status": "denied",
"error": deny_reason,
"error_type": pause_info.get(
"error_type", "tool_not_allowed"
),
},
}
continue
# Yield pause event so the client knows this tool is waiting
yield {
"type": "tool_call",

View File

@@ -154,6 +154,8 @@ def embed_and_store_documents(
*,
attempt_id: Optional[str] = None,
user_id: Optional[str] = None,
progress_start: int = 0,
progress_end: int = 100,
) -> None:
"""Embeds documents and stores them in a vector store.
@@ -176,6 +178,11 @@ def embed_and_store_documents(
published to ``user:{user_id}`` for the in-app upload toast.
``None`` is the safe default — workers without a user
context (e.g. background syncs) skip the publish.
progress_start: Percent the reported progress maps to at chunk 0.
Lets a caller reserve the lower band for an earlier stage
(e.g. parsing). Defaults to ``0`` (embed owns the whole bar).
progress_end: Percent the reported progress maps to at the final
chunk. Defaults to ``100``.
Returns:
None
@@ -257,6 +264,7 @@ def embed_and_store_documents(
failed_idx: int | None = None
last_published_pct = -1
source_id_str = str(source_id)
progress_span = progress_end - progress_start
for idx in tqdm(
range(loop_start, total_docs),
desc="Embedding 🦖",
@@ -266,8 +274,10 @@ def embed_and_store_documents(
):
doc = docs[idx]
try:
# Update task status for progress tracking
progress = int(((idx + 1) / total_docs) * 100)
# Map the embed loop into [progress_start, progress_end].
progress = progress_start + int(
((idx + 1) / total_docs) * progress_span
)
task_status.update_state(state="PROGRESS", meta={"current": progress})
# SSE push for sub-second upload-toast updates. Throttled to one

View File

@@ -211,13 +211,22 @@ class SimpleDirectoryReader(BaseReader):
return new_input_files
def load_data(self, concatenate: bool = False) -> List[Document]:
def load_data(
self,
concatenate: bool = False,
progress_callback: Optional[Callable[[int, int], None]] = None,
) -> List[Document]:
"""Load data from the input directory.
Args:
concatenate (bool): whether to concatenate all files into one document.
If set to True, file metadata is ignored.
False by default.
progress_callback (Optional[Callable[[int, int], None]]): Called
after each file is parsed with ``(files_done, total_files)``.
Lets callers surface parse/OCR progress before embedding
begins. Exceptions raised by the callback are swallowed so
progress reporting can never fail ingestion.
Returns:
List[Document]: A list of documents.
@@ -226,8 +235,9 @@ class SimpleDirectoryReader(BaseReader):
data_list: List[str] = []
metadata_list = []
self.file_token_counts = {}
for input_file in self.input_files:
total_files = len(self.input_files)
for file_index, input_file in enumerate(self.input_files):
suffix_lower = input_file.suffix.lower()
parser_metadata = {}
if suffix_lower in self.file_extractor:
@@ -277,7 +287,15 @@ class SimpleDirectoryReader(BaseReader):
else:
data_list.append(str(data))
metadata_list.append(base_metadata)
if progress_callback is not None:
try:
progress_callback(file_index + 1, total_files)
except Exception:
logging.warning(
"load_data progress callback failed", exc_info=True
)
# Build directory structure if input_dir is provided
if hasattr(self, 'input_dir'):
self.directory_structure = self.build_directory_structure(self.input_dir)

View File

@@ -16,6 +16,29 @@ from application.parser.file.base_parser import BaseParser
logger = logging.getLogger(__name__)
# Per-stage batch size for docling's threaded pipeline; 1 holds the
# concurrent working set to a single page (see _apply_pipeline_caps).
_PIPELINE_BATCH_SIZE = 1
def _apply_pipeline_caps(pipeline_options) -> None:
"""Cap docling's threaded-pipeline queue depth and batch sizes in place.
hasattr-guarded so docling builds without these knobs are unaffected.
"""
from application.core.settings import settings
caps = {
"queue_max_size": max(1, settings.DOCLING_PIPELINE_QUEUE_MAX_SIZE),
"layout_batch_size": _PIPELINE_BATCH_SIZE,
"table_batch_size": _PIPELINE_BATCH_SIZE,
"ocr_batch_size": _PIPELINE_BATCH_SIZE,
}
for name, value in caps.items():
if hasattr(pipeline_options, name):
setattr(pipeline_options, name, value)
class DoclingParser(BaseParser):
"""Parser using docling for advanced document processing.
@@ -86,6 +109,7 @@ class DoclingParser(BaseParser):
do_ocr=self.ocr_enabled,
do_table_structure=self.table_structure,
)
_apply_pipeline_caps(pipeline_options)
if self.ocr_enabled:
ocr_options = self._get_ocr_options()

View File

@@ -1,3 +1,5 @@
import json
from application.parser.remote.sitemap_loader import SitemapLoader
from application.parser.remote.crawler_loader import CrawlerLoader
from application.parser.remote.web_loader import WebLoader
@@ -32,3 +34,59 @@ class RemoteCreator:
if not loader_class:
raise ValueError(f"No loader class found for type {type}")
return loader_class(*args, **kwargs)
# Loader types whose load_data expects a URL string, not a config dict.
_URL_LOADER_TYPES = {"url", "crawler", "sitemap", "github"}
# Keys a remote_data dict may hold the URL under (``raw`` is the legacy shape).
_URL_DATA_KEYS = ("url", "urls", "repo_url", "raw")
def normalize_remote_data(source_type, remote_data):
"""Convert a stored ``sources.remote_data`` JSONB value into the
``source_data`` shape the matching loader expects.
Args:
source_type: The ``sources.type`` value (the loader name).
remote_data: The stored ``remote_data`` (dict, list, str, or None).
Returns:
Loader input: a URL string or list for url/crawler/sitemap/github,
a JSON string for reddit, a dict for s3; ``None`` when the row has
nothing syncable.
"""
if remote_data is None:
return None
# Some legacy rows stored the JSON itself as a string.
if isinstance(remote_data, str):
stripped = remote_data.strip()
if stripped[:1] in ("{", "["):
try:
remote_data = json.loads(stripped)
except json.JSONDecodeError:
# Not actually JSON — leave remote_data as the original
# string; the per-loader branches below handle a string.
pass
loader = (source_type or "").lower()
if loader in _URL_LOADER_TYPES:
if isinstance(remote_data, dict):
for key in _URL_DATA_KEYS:
value = remote_data.get(key)
if value:
return value
# No URL key — None keeps the loader off the dict-crash path.
return None
return remote_data
if loader == "reddit":
# reddit's loader runs json.loads() on its input — needs a string.
if isinstance(remote_data, (dict, list)):
return json.dumps(remote_data)
return remote_data
# s3's loader accepts a dict or JSON string; pass it through unchanged.
return remote_data

View File

@@ -7,6 +7,7 @@ beautifulsoup4==4.14.3
cel-python==0.5.0
celery==5.6.3
celery-redbeat==2.3.3
croniter==6.2.2
cryptography==46.0.7
dataclasses-json==0.6.7
defusedxml==0.7.1

View File

@@ -47,6 +47,7 @@ users_table = Table(
nullable=False,
server_default='{"pinned": [], "shared_with_me": []}',
),
Column("tool_preferences", JSONB, nullable=False, server_default="{}"),
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
Column("updated_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
)
@@ -254,7 +255,8 @@ memories_table = Table(
metadata,
Column("id", UUID(as_uuid=True), primary_key=True, server_default=func.gen_random_uuid()),
Column("user_id", Text, nullable=False),
Column("tool_id", UUID(as_uuid=True), ForeignKey("user_tools.id", ondelete="CASCADE")),
# No FK since 0009 — delete-cascade preserved by trigger.
Column("tool_id", UUID(as_uuid=True)),
Column("path", Text, nullable=False),
Column("content", Text, nullable=False),
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
@@ -514,6 +516,9 @@ ingest_chunk_progress_table = Table(
# same task resumes from the checkpoint, but a separate invocation
# (manual reingest, scheduled sync) resets to a clean re-index.
Column("attempt_id", Text),
# Added in ``0008_ingest_progress_status``. The reconciler flips
# this to 'stalled'; ``init_progress`` resets it to 'active'.
Column("status", Text, nullable=False, server_default="active"),
)
@@ -595,3 +600,74 @@ workflow_runs_table = Table(
Column("ended_at", DateTime(timezone=True)),
Column("legacy_mongo_id", Text),
)
# --- Scheduler (migration 0010) ---------------------------------------------
schedules_table = Table(
"schedules",
metadata,
Column("id", UUID(as_uuid=True), primary_key=True, server_default=func.gen_random_uuid()),
Column("user_id", Text, nullable=False),
# Nullable as of 0011: agentless chats create one-time schedules whose
# run is built ephemerally at fire time from system defaults.
Column(
"agent_id",
UUID(as_uuid=True),
ForeignKey("agents.id", ondelete="CASCADE"),
),
Column("trigger_type", Text, nullable=False),
Column("name", Text),
Column("instruction", Text, nullable=False),
Column("status", Text, nullable=False, server_default="active"),
Column("cron", Text),
Column("run_at", DateTime(timezone=True)),
Column("timezone", Text, nullable=False, server_default="UTC"),
Column("next_run_at", DateTime(timezone=True)),
Column("last_run_at", DateTime(timezone=True)),
Column("end_at", DateTime(timezone=True)),
Column("tool_allowlist", JSONB, nullable=False, server_default="[]"),
Column("model_id", Text),
Column("token_budget", Integer),
Column(
"origin_conversation_id",
UUID(as_uuid=True),
ForeignKey("conversations.id", ondelete="SET NULL"),
),
Column("created_via", Text, nullable=False, server_default="ui"),
Column("consecutive_failure_count", Integer, nullable=False, server_default="0"),
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
Column("updated_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
)
schedule_runs_table = Table(
"schedule_runs",
metadata,
Column("id", UUID(as_uuid=True), primary_key=True, server_default=func.gen_random_uuid()),
Column(
"schedule_id",
UUID(as_uuid=True),
ForeignKey("schedules.id", ondelete="CASCADE"),
nullable=False,
),
Column("user_id", Text, nullable=False),
# Nullable as of 0011 (mirrors ``schedules.agent_id``).
Column("agent_id", UUID(as_uuid=True)),
Column("status", Text, nullable=False, server_default="pending"),
Column("scheduled_for", DateTime(timezone=True), nullable=False),
Column("trigger_source", Text, nullable=False, server_default="cron"),
Column("started_at", DateTime(timezone=True)),
Column("finished_at", DateTime(timezone=True)),
Column("output", Text),
Column("output_truncated", Boolean, nullable=False, server_default="false"),
Column("error", Text),
Column("error_type", Text),
Column("prompt_tokens", Integer, nullable=False, server_default="0"),
Column("generated_tokens", Integer, nullable=False, server_default="0"),
Column("conversation_id", UUID(as_uuid=True)),
Column("message_id", UUID(as_uuid=True)),
Column("celery_task_id", Text),
Column("created_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
Column("updated_at", DateTime(timezone=True), nullable=False, server_default=func.now()),
UniqueConstraint("schedule_id", "scheduled_for", name="schedule_runs_dedup_uidx"),
)

View File

@@ -41,6 +41,9 @@ class IngestChunkProgressRepository:
rows with NULL ``attempt_id`` resume against another NULL
caller (e.g. test fixtures), but get reset the moment a real
``attempt_id`` arrives.
Both branches also reset ``status`` to ``'active'``, clearing a
prior reconciler ``'stalled'`` escalation.
"""
result = self._conn.execute(
text(
@@ -68,7 +71,8 @@ class IngestChunkProgressRepository:
THEN ingest_chunk_progress.embedded_chunks
ELSE 0
END,
attempt_id = EXCLUDED.attempt_id
attempt_id = EXCLUDED.attempt_id,
status = 'active'
RETURNING *
"""
),
@@ -113,6 +117,23 @@ class IngestChunkProgressRepository:
row = result.fetchone()
return row_to_dict(row) if row is not None else None
def delete(self, source_id: str) -> bool:
"""Delete the progress row for ``source_id``.
A manual reingest supersedes any prior ingest state — including a
reconciler ``'stalled'`` escalation — so dropping the row clears
the derived ``failed`` ingest status the sources list shows.
Returns ``True`` when a row was removed.
"""
result = self._conn.execute(
text(
"DELETE FROM ingest_chunk_progress "
"WHERE source_id = CAST(:source_id AS uuid)"
),
{"source_id": str(source_id)},
)
return result.rowcount > 0
def bump_heartbeat(self, source_id: str) -> None:
"""Refresh ``last_updated`` so the row looks alive to the reconciler."""
self._conn.execute(

View File

@@ -86,6 +86,22 @@ class MemoriesRepository:
)
return result.rowcount
def delete_orphans(self, keep_tool_ids: Optional[list[str]] = None) -> int:
"""Delete memories whose tool_id has no user_tools row, except keep_tool_ids."""
keep = [str(tid) for tid in (keep_tool_ids or [])]
result = self._conn.execute(
text(
"""
DELETE FROM memories
WHERE tool_id IS NOT NULL
AND tool_id NOT IN (SELECT id FROM user_tools)
AND NOT (tool_id = ANY(CAST(:keep AS uuid[])))
"""
),
{"keep": keep},
)
return result.rowcount
def update_path(self, user_id: str, tool_id: str, old_path: str, new_path: str) -> bool:
result = self._conn.execute(
text(

View File

@@ -107,7 +107,11 @@ class ReconciliationRepository:
def find_and_lock_stalled_ingests(
self, *, age_minutes: int = 30, limit: int = 100,
) -> list[dict]:
"""Lock ingest checkpoints whose heartbeat hasn't ticked recently."""
"""Lock still-active ingest checkpoints with a silent heartbeat.
The ``status = 'active'`` filter skips rows already escalated to
``'stalled'``, so a dead ingest is alerted once, not every tick.
"""
result = self._conn.execute(
text(
"""
@@ -116,6 +120,7 @@ class ReconciliationRepository:
FROM ingest_chunk_progress
WHERE last_updated < now() - make_interval(mins => :age)
AND embedded_chunks < total_chunks
AND status = 'active'
ORDER BY last_updated ASC
LIMIT :limit
FOR UPDATE SKIP LOCKED
@@ -125,11 +130,15 @@ class ReconciliationRepository:
)
return [row_to_dict(r) for r in result.fetchall()]
def touch_ingest_progress(self, source_id: str) -> bool:
"""Bump ``last_updated`` so a once-stalled ingest re-enters the watch window."""
def mark_ingest_stalled(self, source_id: str) -> bool:
"""Escalate a stalled checkpoint to terminal ``status='stalled'``.
Drops the row out of the sweep so the reconciler alerts once;
``init_progress`` flips it back to ``'active'`` on reingest.
"""
result = self._conn.execute(
text(
"UPDATE ingest_chunk_progress SET last_updated = now() "
"UPDATE ingest_chunk_progress SET status = 'stalled' "
"WHERE source_id = CAST(:sid AS uuid)"
),
{"sid": str(source_id)},

View File

@@ -0,0 +1,278 @@
"""Repository for ``schedule_runs`` (record_pending is the dedup primitive)."""
from __future__ import annotations
from datetime import datetime
from typing import Any, Optional
from sqlalchemy import Connection, text
from application.storage.db.base_repository import row_to_dict
_ALLOWED_UPDATES = frozenset(
{
"status", "started_at", "finished_at", "output", "output_truncated",
"error", "error_type", "prompt_tokens", "generated_tokens",
"conversation_id", "message_id", "celery_task_id",
}
)
class ScheduleRunsRepository:
"""CRUD + dedup insert + reconciliation sweep for ``schedule_runs``."""
def __init__(self, conn: Connection) -> None:
self._conn = conn
def record_pending(
self,
schedule_id: str,
user_id: str,
agent_id: Optional[str],
scheduled_for: datetime,
*,
trigger_source: str = "cron",
) -> Optional[dict]:
"""Insert a ``pending`` row; ``None`` on conflict (already claimed)."""
row = self._conn.execute(
text(
"""
INSERT INTO schedule_runs (
schedule_id, user_id, agent_id, scheduled_for,
trigger_source, status
) VALUES (
CAST(:schedule_id AS uuid),
:user_id,
CAST(:agent_id AS uuid),
:scheduled_for,
:trigger_source,
'pending'
)
ON CONFLICT (schedule_id, scheduled_for) DO NOTHING
RETURNING *
"""
),
{
"schedule_id": str(schedule_id),
"user_id": user_id,
"agent_id": str(agent_id) if agent_id else None,
"scheduled_for": scheduled_for,
"trigger_source": trigger_source,
},
).fetchone()
return row_to_dict(row) if row is not None else None
def record_skipped(
self,
schedule_id: str,
user_id: str,
agent_id: Optional[str],
scheduled_for: datetime,
*,
error_type: str,
error: Optional[str] = None,
) -> Optional[dict]:
"""Write a terminal ``skipped`` row; returns ``None`` on conflict."""
row = self._conn.execute(
text(
"""
INSERT INTO schedule_runs (
schedule_id, user_id, agent_id, scheduled_for,
trigger_source, status, started_at, finished_at,
error, error_type
) VALUES (
CAST(:schedule_id AS uuid),
:user_id,
CAST(:agent_id AS uuid),
:scheduled_for,
'cron',
'skipped',
now(),
now(),
:error,
:error_type
)
ON CONFLICT (schedule_id, scheduled_for) DO NOTHING
RETURNING *
"""
),
{
"schedule_id": str(schedule_id),
"user_id": user_id,
"agent_id": str(agent_id) if agent_id else None,
"scheduled_for": scheduled_for,
"error": error,
"error_type": error_type,
},
).fetchone()
return row_to_dict(row) if row is not None else None
def get(self, run_id: str, user_id: str) -> Optional[dict]:
"""Fetch an owned run row."""
row = self._conn.execute(
text(
"SELECT * FROM schedule_runs "
"WHERE id = CAST(:id AS uuid) AND user_id = :user_id"
),
{"id": str(run_id), "user_id": user_id},
).fetchone()
return row_to_dict(row) if row is not None else None
def get_internal(self, run_id: str) -> Optional[dict]:
"""Fetch a run row with no ownership scoping (worker-only)."""
row = self._conn.execute(
text("SELECT * FROM schedule_runs WHERE id = CAST(:id AS uuid)"),
{"id": str(run_id)},
).fetchone()
return row_to_dict(row) if row is not None else None
def has_active_run(self, schedule_id: str) -> bool:
"""True iff a ``pending``/``running`` run exists for the schedule."""
scalar = self._conn.execute(
text(
"SELECT 1 FROM schedule_runs "
"WHERE schedule_id = CAST(:id AS uuid) "
"AND status IN ('pending', 'running') "
"LIMIT 1"
),
{"id": str(schedule_id)},
).first()
return scalar is not None
def list_runs(
self,
schedule_id: str,
user_id: str,
*,
limit: int = 50,
offset: int = 0,
) -> list[dict]:
"""Paginated newest-first run log for an owned schedule."""
rows = self._conn.execute(
text(
"""
SELECT * FROM schedule_runs
WHERE schedule_id = CAST(:id AS uuid) AND user_id = :user_id
ORDER BY scheduled_for DESC
LIMIT :limit OFFSET :offset
"""
),
{
"id": str(schedule_id),
"user_id": user_id,
"limit": int(limit),
"offset": int(offset),
},
).fetchall()
return [row_to_dict(r) for r in rows]
def update(self, run_id: str, fields: dict) -> Optional[dict]:
"""Apply a whitelisted partial update to a run row."""
filtered = {k: v for k, v in fields.items() if k in _ALLOWED_UPDATES}
if not filtered:
return self.get_internal(run_id)
set_parts: list[str] = []
params: dict[str, Any] = {"id": str(run_id)}
for key, val in filtered.items():
if key in ("conversation_id", "message_id"):
set_parts.append(f"{key} = CAST(:{key} AS uuid)")
params[key] = str(val) if val else None
else:
set_parts.append(f"{key} = :{key}")
params[key] = val
sql = (
"UPDATE schedule_runs SET " + ", ".join(set_parts) +
" WHERE id = CAST(:id AS uuid) RETURNING *"
)
row = self._conn.execute(text(sql), params).fetchone()
return row_to_dict(row) if row is not None else None
def mark_running(self, run_id: str, celery_task_id: Optional[str]) -> bool:
"""Flip ``pending`` → ``running`` and stamp ``started_at``."""
result = self._conn.execute(
text(
"""
UPDATE schedule_runs
SET status = 'running',
started_at = now(),
celery_task_id = :celery_task_id
WHERE id = CAST(:id AS uuid)
AND status = 'pending'
"""
),
{"id": str(run_id), "celery_task_id": celery_task_id},
)
return (result.rowcount or 0) > 0
def list_stuck_running(
self, *, age_minutes: int = 15, limit: int = 50,
) -> list[dict]:
"""Lock ``running`` rows past the soft-time-limit envelope."""
rows = self._conn.execute(
text(
"""
SELECT * FROM schedule_runs
WHERE status = 'running'
AND started_at IS NOT NULL
AND started_at < now() - make_interval(mins => :age)
ORDER BY started_at ASC
LIMIT :limit
FOR UPDATE SKIP LOCKED
"""
),
{"age": int(age_minutes), "limit": int(limit)},
).fetchall()
return [row_to_dict(r) for r in rows]
def list_stuck_pending(
self, *, age_minutes: int = 15, limit: int = 50,
) -> list[dict]:
"""Lock 'pending' rows whose worker never picked them up (created_at-based)."""
rows = self._conn.execute(
text(
"""
SELECT * FROM schedule_runs
WHERE status = 'pending'
AND started_at IS NULL
AND created_at < now() - make_interval(mins => :age)
ORDER BY created_at ASC
LIMIT :limit
FOR UPDATE SKIP LOCKED
"""
),
{"age": int(age_minutes), "limit": int(limit)},
).fetchall()
return [row_to_dict(r) for r in rows]
def cleanup_older_than(
self,
ttl_days: int,
*,
keep_recent_per_schedule: int = 50,
) -> int:
"""Trim run rows older than ``ttl_days``, keeping the recent log slice."""
if ttl_days <= 0:
raise ValueError("ttl_days must be positive")
result = self._conn.execute(
text(
"""
DELETE FROM schedule_runs
WHERE id IN (
SELECT id FROM (
SELECT id,
ROW_NUMBER() OVER (
PARTITION BY schedule_id
ORDER BY scheduled_for DESC
) AS rn,
created_at
FROM schedule_runs
) ranked
WHERE ranked.rn > :keep
AND ranked.created_at < now() - make_interval(days => :ttl)
)
"""
),
{"keep": int(keep_recent_per_schedule), "ttl": int(ttl_days)},
)
return int(result.rowcount or 0)

View File

@@ -0,0 +1,352 @@
"""Repository for the ``schedules`` table (CRUD + dispatcher claim query)."""
from __future__ import annotations
import json
from datetime import datetime
from typing import Any, Iterable, Optional
from sqlalchemy import Connection, text
from application.storage.db.base_repository import row_to_dict
_ALLOWED_UPDATES = frozenset(
{
"name", "instruction", "status", "cron", "run_at", "timezone",
"next_run_at", "last_run_at", "end_at", "tool_allowlist",
"model_id", "token_budget", "consecutive_failure_count",
"origin_conversation_id",
}
)
class SchedulesRepository:
"""CRUD + dispatcher hot path for ``schedules``."""
def __init__(self, conn: Connection) -> None:
self._conn = conn
def create(
self,
user_id: str,
agent_id: Optional[str],
trigger_type: str,
instruction: str,
*,
cron: Optional[str] = None,
run_at: Optional[datetime] = None,
timezone: str = "UTC",
next_run_at: Optional[datetime] = None,
end_at: Optional[datetime] = None,
name: Optional[str] = None,
tool_allowlist: Optional[Iterable[str]] = None,
model_id: Optional[str] = None,
token_budget: Optional[int] = None,
origin_conversation_id: Optional[str] = None,
created_via: str = "ui",
status: str = "active",
) -> dict:
"""Insert a new schedule and return the populated row."""
params = {
"user_id": user_id,
"agent_id": str(agent_id) if agent_id else None,
"trigger_type": trigger_type,
"instruction": instruction,
"cron": cron,
"run_at": run_at,
"tz": timezone,
"next_run_at": next_run_at,
"end_at": end_at,
"name": name,
"allowlist": json.dumps(list(tool_allowlist or [])),
"model_id": model_id,
"token_budget": int(token_budget) if token_budget is not None else None,
"origin_conversation_id": (
str(origin_conversation_id) if origin_conversation_id else None
),
"created_via": created_via,
"status": status,
}
row = self._conn.execute(
text(
"""
INSERT INTO schedules (
user_id, agent_id, trigger_type, instruction, status,
cron, run_at, timezone, next_run_at, end_at, name,
tool_allowlist, model_id, token_budget,
origin_conversation_id, created_via
) VALUES (
:user_id,
CAST(:agent_id AS uuid),
:trigger_type,
:instruction,
:status,
:cron,
:run_at,
:tz,
:next_run_at,
:end_at,
:name,
CAST(:allowlist AS jsonb),
:model_id,
:token_budget,
CAST(:origin_conversation_id AS uuid),
:created_via
) RETURNING *
"""
),
params,
).fetchone()
return row_to_dict(row)
def get(self, schedule_id: str, user_id: str) -> Optional[dict]:
"""Fetch an owned schedule (None when missing or owned by another)."""
row = self._conn.execute(
text(
"SELECT * FROM schedules "
"WHERE id = CAST(:id AS uuid) AND user_id = :user_id"
),
{"id": str(schedule_id), "user_id": user_id},
).fetchone()
return row_to_dict(row) if row is not None else None
def get_internal(self, schedule_id: str) -> Optional[dict]:
"""Fetch a schedule with no ownership scoping (worker-only)."""
row = self._conn.execute(
text("SELECT * FROM schedules WHERE id = CAST(:id AS uuid)"),
{"id": str(schedule_id)},
).fetchone()
return row_to_dict(row) if row is not None else None
def get_for_update(
self, schedule_id: str, user_id: str,
) -> Optional[dict]:
"""Owned fetch with FOR UPDATE; closes the Run-Now TOCTOU."""
row = self._conn.execute(
text(
"SELECT * FROM schedules "
"WHERE id = CAST(:id AS uuid) AND user_id = :user_id "
"FOR UPDATE"
),
{"id": str(schedule_id), "user_id": user_id},
).fetchone()
return row_to_dict(row) if row is not None else None
def list_for_agent(
self,
agent_id: str,
user_id: str,
*,
statuses: Optional[Iterable[str]] = None,
trigger_type: Optional[str] = None,
) -> list[dict]:
"""Owned schedules for an agent, newest-created first."""
sql = (
"SELECT * FROM schedules "
"WHERE agent_id = CAST(:agent_id AS uuid) AND user_id = :user_id"
)
params: dict[str, Any] = {"agent_id": str(agent_id), "user_id": user_id}
if statuses is not None:
status_list = [str(s) for s in statuses]
if not status_list:
return []
placeholders = ", ".join(f":s{i}" for i, _ in enumerate(status_list))
sql += f" AND status IN ({placeholders})"
for i, s in enumerate(status_list):
params[f"s{i}"] = s
if trigger_type:
sql += " AND trigger_type = :trigger_type"
params["trigger_type"] = trigger_type
sql += " ORDER BY created_at DESC"
rows = self._conn.execute(text(sql), params).fetchall()
return [row_to_dict(r) for r in rows]
def list_for_conversation(
self,
user_id: str,
origin_conversation_id: str,
*,
statuses: Optional[Iterable[str]] = None,
trigger_type: Optional[str] = None,
) -> list[dict]:
"""Owned agentless schedules anchored to an originating conversation."""
sql = (
"SELECT * FROM schedules "
"WHERE user_id = :user_id "
"AND agent_id IS NULL "
"AND origin_conversation_id = CAST(:conv AS uuid)"
)
params: dict[str, Any] = {
"user_id": user_id,
"conv": str(origin_conversation_id),
}
if statuses is not None:
status_list = [str(s) for s in statuses]
if not status_list:
return []
placeholders = ", ".join(f":s{i}" for i, _ in enumerate(status_list))
sql += f" AND status IN ({placeholders})"
for i, s in enumerate(status_list):
params[f"s{i}"] = s
if trigger_type:
sql += " AND trigger_type = :trigger_type"
params["trigger_type"] = trigger_type
sql += " ORDER BY created_at DESC"
rows = self._conn.execute(text(sql), params).fetchall()
return [row_to_dict(r) for r in rows]
def list_for_user(self, user_id: str, *, limit: int = 200) -> list[dict]:
"""Owned schedules across all agents — admin / debugging path."""
rows = self._conn.execute(
text(
"SELECT * FROM schedules WHERE user_id = :user_id "
"ORDER BY created_at DESC LIMIT :limit"
),
{"user_id": user_id, "limit": int(limit)},
).fetchall()
return [row_to_dict(r) for r in rows]
def count_active_for_user(self, user_id: str) -> int:
"""Active+paused schedules for quota enforcement."""
scalar = self._conn.execute(
text(
"SELECT COUNT(*) FROM schedules "
"WHERE user_id = :user_id AND status IN ('active', 'paused')"
),
{"user_id": user_id},
).scalar()
return int(scalar or 0)
def list_due(self, *, limit: int = 100) -> list[dict]:
"""Lock and return schedules with ``next_run_at <= now()``."""
rows = self._conn.execute(
text(
"""
SELECT * FROM schedules
WHERE status = 'active'
AND next_run_at IS NOT NULL
AND next_run_at <= now()
AND (end_at IS NULL OR next_run_at <= end_at)
ORDER BY next_run_at ASC
LIMIT :limit
FOR UPDATE SKIP LOCKED
"""
),
{"limit": int(limit)},
).fetchall()
return [row_to_dict(r) for r in rows]
def update(
self,
schedule_id: str,
user_id: str,
fields: dict,
) -> Optional[dict]:
"""Apply a whitelisted partial update; return the new row or None."""
filtered = {k: v for k, v in fields.items() if k in _ALLOWED_UPDATES}
if not filtered:
return self.get(schedule_id, user_id)
set_parts: list[str] = []
params: dict[str, Any] = {"id": str(schedule_id), "user_id": user_id}
for key, val in filtered.items():
if key == "tool_allowlist":
set_parts.append("tool_allowlist = CAST(:tool_allowlist AS jsonb)")
params["tool_allowlist"] = json.dumps(list(val or []))
elif key == "origin_conversation_id":
set_parts.append(
"origin_conversation_id = CAST(:origin_conversation_id AS uuid)"
)
params["origin_conversation_id"] = str(val) if val else None
else:
set_parts.append(f"{key} = :{key}")
params[key] = val
sql = (
"UPDATE schedules SET " + ", ".join(set_parts) +
" WHERE id = CAST(:id AS uuid) AND user_id = :user_id "
"RETURNING *"
)
row = self._conn.execute(text(sql), params).fetchone()
return row_to_dict(row) if row is not None else None
def update_internal(self, schedule_id: str, fields: dict) -> None:
"""Apply a whitelisted partial update from a worker context."""
filtered = {k: v for k, v in fields.items() if k in _ALLOWED_UPDATES}
if not filtered:
return
set_parts: list[str] = []
params: dict[str, Any] = {"id": str(schedule_id)}
for key, val in filtered.items():
if key == "tool_allowlist":
set_parts.append("tool_allowlist = CAST(:tool_allowlist AS jsonb)")
params["tool_allowlist"] = json.dumps(list(val or []))
elif key == "origin_conversation_id":
set_parts.append(
"origin_conversation_id = CAST(:origin_conversation_id AS uuid)"
)
params["origin_conversation_id"] = str(val) if val else None
else:
set_parts.append(f"{key} = :{key}")
params[key] = val
sql = (
"UPDATE schedules SET " + ", ".join(set_parts) +
" WHERE id = CAST(:id AS uuid)"
)
self._conn.execute(text(sql), params)
def cancel(self, schedule_id: str, user_id: str) -> bool:
"""Soft-cancel — flips ``status`` to ``cancelled`` and clears ``next_run_at``."""
result = self._conn.execute(
text(
"UPDATE schedules SET status = 'cancelled', next_run_at = NULL "
"WHERE id = CAST(:id AS uuid) AND user_id = :user_id "
"AND status NOT IN ('cancelled', 'completed')"
),
{"id": str(schedule_id), "user_id": user_id},
)
return (result.rowcount or 0) > 0
def delete(self, schedule_id: str, user_id: str) -> bool:
"""Hard-delete an owned schedule and its runs (FK cascade)."""
result = self._conn.execute(
text(
"DELETE FROM schedules "
"WHERE id = CAST(:id AS uuid) AND user_id = :user_id"
),
{"id": str(schedule_id), "user_id": user_id},
)
return (result.rowcount or 0) > 0
def bump_failure_count(self, schedule_id: str) -> int:
"""Increment ``consecutive_failure_count`` and return the new value."""
row = self._conn.execute(
text(
"UPDATE schedules "
"SET consecutive_failure_count = consecutive_failure_count + 1 "
"WHERE id = CAST(:id AS uuid) "
"RETURNING consecutive_failure_count"
),
{"id": str(schedule_id)},
).fetchone()
return int(row[0]) if row is not None else 0
def reset_failure_count(self, schedule_id: str) -> None:
"""Reset the failure counter to 0 after a successful run."""
self._conn.execute(
text(
"UPDATE schedules SET consecutive_failure_count = 0 "
"WHERE id = CAST(:id AS uuid)"
),
{"id": str(schedule_id)},
)
def autopause(self, schedule_id: str) -> bool:
"""Flip an active schedule to ``paused`` after repeated failures."""
result = self._conn.execute(
text(
"UPDATE schedules SET status = 'paused', next_run_at = NULL "
"WHERE id = CAST(:id AS uuid) AND status = 'active'"
),
{"id": str(schedule_id)},
)
return (result.rowcount or 0) > 0

View File

@@ -5,10 +5,10 @@ from __future__ import annotations
import json
from typing import Any, Optional
from sqlalchemy import Connection, func, select, text
from sqlalchemy import case, Connection, func, select, text
from application.storage.db.base_repository import looks_like_uuid, row_to_dict
from application.storage.db.models import sources_table
from application.storage.db.models import ingest_chunk_progress_table, sources_table
_SCALAR_COLUMNS = {
@@ -61,6 +61,21 @@ def _coerce_jsonb(value: Any) -> Any:
return value
def _ingest_status_case():
"""Derive a user-facing ingest status from the joined progress row.
``failed`` — reconciler-escalated stall. ``processing`` — embed in
flight. ``None`` — no progress row, or the embed completed.
"""
icp = ingest_chunk_progress_table
return case(
(icp.c.source_id.is_(None), None),
(icp.c.status == "stalled", "failed"),
(icp.c.embedded_chunks < icp.c.total_chunks, "processing"),
else_=None,
).label("ingest_status")
class SourcesRepository:
def __init__(self, conn: Connection) -> None:
self._conn = conn
@@ -192,13 +207,25 @@ class SourcesRepository:
as ``"desc"``.
Returns:
A list of source rows as plain dicts (via ``row_to_dict``).
A list of source rows as plain dicts (via ``row_to_dict``),
each carrying a derived ``ingest_status`` (``failed`` /
``processing`` / ``None``) from the joined progress row.
"""
column_name = sort_field if sort_field in _SORTABLE_COLUMNS else "date"
sort_column = sources_table.c[column_name]
ascending = sort_order.lower() == "asc"
stmt = select(sources_table).where(sources_table.c.user_id == user_id)
stmt = (
select(sources_table, _ingest_status_case())
.select_from(
sources_table.outerjoin(
ingest_chunk_progress_table,
ingest_chunk_progress_table.c.source_id
== sources_table.c.id,
)
)
.where(sources_table.c.user_id == user_id)
)
if search_term:
stmt = stmt.where(
sources_table.c.name.ilike(

View File

@@ -175,6 +175,67 @@ class UsersRepository:
{"user_id": user_id, "agent_id": agent_id},
)
def set_default_tool_enabled(
self, user_id: str, tool_name: str, enabled: bool
) -> None:
"""Toggle a default chat tool in ``tool_preferences`` (idempotent)."""
self.upsert(user_id)
if enabled:
self._conn.execute(
text(
"""
UPDATE users
SET tool_preferences = jsonb_set(
COALESCE(tool_preferences, '{}'::jsonb),
'{disabled_default_tools}',
COALESCE(
(
SELECT jsonb_agg(elem)
FROM jsonb_array_elements(
COALESCE(
tool_preferences->'disabled_default_tools',
'[]'::jsonb
)
) AS elem
WHERE (elem #>> '{}') != :tool_name
),
'[]'::jsonb
)
),
updated_at = now()
WHERE user_id = :user_id
"""
),
{"user_id": user_id, "tool_name": tool_name},
)
else:
self._conn.execute(
text(
"""
UPDATE users
SET tool_preferences = jsonb_set(
COALESCE(tool_preferences, '{}'::jsonb),
'{disabled_default_tools}',
CASE
WHEN COALESCE(
tool_preferences->'disabled_default_tools',
'[]'::jsonb
) @> to_jsonb(CAST(:tool_name AS text))
THEN tool_preferences->'disabled_default_tools'
ELSE
COALESCE(
tool_preferences->'disabled_default_tools',
'[]'::jsonb
) || to_jsonb(CAST(:tool_name AS text))
END
),
updated_at = now()
WHERE user_id = :user_id
"""
),
{"user_id": user_id, "tool_name": tool_name},
)
# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------

View File

@@ -16,9 +16,6 @@ from urllib.parse import urljoin
import requests
from application.agents.agent_creator import AgentCreator
from application.api.answer.services.stream_processor import get_prompt
from application.core.settings import settings
from application.events.publisher import publish_user_event
from application.parser.chunking import Chunker
@@ -29,9 +26,11 @@ from application.parser.embedding_pipeline import (
)
from application.parser.file.bulk import SimpleDirectoryReader, get_default_file_extractor
from application.parser.file.constants import SUPPORTED_SOURCE_EXTENSIONS
from application.parser.remote.remote_creator import RemoteCreator
from application.parser.remote.remote_creator import (
RemoteCreator,
normalize_remote_data,
)
from application.parser.schema.base import Document
from application.retriever.retriever_creator import RetrieverCreator
from application.storage.db.base_repository import looks_like_uuid
from application.storage.db.repositories.agents import AgentsRepository
@@ -97,6 +96,40 @@ def _stop_ingest_heartbeat(thread, stop_event):
thread.join(timeout=5)
def _make_parse_progress_callback(task, user, source_id, start_pct, end_pct):
"""Build a ``load_data`` callback mapping parse progress to
``[start_pct, end_pct]`` via ``update_state`` + a throttled
``stage='parsing'`` SSE event.
"""
span = end_pct - start_pct
source_id_str = str(source_id)
state = {"last_pct": -1}
def _callback(files_done, total_files):
if not total_files:
return
pct = start_pct + int((files_done / total_files) * span)
task.update_state(
state="PROGRESS",
meta={"current": pct, "status": "Parsing files"},
)
if user and pct > state["last_pct"]:
publish_user_event(
user,
"source.ingest.progress",
{
"current": pct,
"total": total_files,
"files_done": files_done,
"stage": "parsing",
},
scope={"kind": "source", "id": source_id_str},
)
state["last_pct"] = pct
return _callback
# Define a function to extract metadata from a given filename.
@@ -354,146 +387,6 @@ def upload_index(full_path, file_data):
file.close()
def run_agent_logic(agent_config, input_data):
try:
from application.core.model_utils import (
get_api_key_for_provider,
get_default_model_id,
get_provider_from_model_id,
validate_model_id,
)
from application.utils import calculate_doc_token_budget
retriever = agent_config.get("retriever", "classic")
# agent_config is a PG row dict: ``source_id`` is a UUID, and the
# retriever/chunks live on the source row. Resolve source row for
# its retriever/chunks if the agent points at one.
source_id = agent_config.get("source_id") or agent_config.get("source")
source_active = {}
if source_id:
with db_readonly() as conn:
src_row = SourcesRepository(conn).get(
str(source_id),
agent_config.get("user_id") or agent_config.get("user"),
)
if src_row:
source_active = str(src_row["id"])
retriever = src_row.get("retriever", retriever)
source = {"active_docs": source_active}
chunks = int(agent_config.get("chunks", 2) or 2)
prompt_id = agent_config.get("prompt_id", "default")
user_api_key = agent_config["key"]
agent_id = (
str(agent_config.get("id"))
if agent_config.get("id")
else (str(agent_config.get("_id")) if agent_config.get("_id") else None)
)
agent_type = agent_config.get("agent_type", "classic")
owner = agent_config.get("user_id") or agent_config.get("user")
decoded_token = {"sub": owner}
json_schema = agent_config.get("json_schema")
prompt = get_prompt(prompt_id)
# Determine model_id: check agent's default_model_id, fallback to system default
agent_default_model = agent_config.get("default_model_id", "")
if agent_default_model and validate_model_id(
agent_default_model, user_id=owner
):
model_id = agent_default_model
else:
model_id = get_default_model_id()
if agent_default_model:
# Stored model_id no longer resolves in the registry. Log so
# operators can detect bad YAML edits before users complain;
# behavior matches the historical silent fallback.
logging.warning(
"Agent %s references unknown model_id %r; falling back to %r",
agent_id,
agent_default_model,
model_id,
)
# Get provider and API key for the selected model
provider = (
get_provider_from_model_id(model_id, user_id=owner)
if model_id
else settings.LLM_PROVIDER
)
system_api_key = get_api_key_for_provider(provider or settings.LLM_PROVIDER)
# Calculate proper doc_token_limit based on model's context window
doc_token_limit = calculate_doc_token_budget(
model_id=model_id, user_id=owner
)
retriever = RetrieverCreator.create_retriever(
retriever,
source=source,
chat_history=[],
prompt=prompt,
chunks=chunks,
doc_token_limit=doc_token_limit,
model_id=model_id,
user_api_key=user_api_key,
agent_id=agent_id,
decoded_token=decoded_token,
)
# Pre-fetch documents using the retriever
retrieved_docs = []
try:
docs = retriever.search(input_data)
if docs:
retrieved_docs = docs
except Exception as e:
logging.warning(f"Failed to retrieve documents: {e}")
agent = AgentCreator.create_agent(
agent_type,
endpoint="webhook",
llm_name=provider or settings.LLM_PROVIDER,
model_id=model_id,
api_key=system_api_key,
agent_id=agent_id,
user_api_key=user_api_key,
prompt=prompt,
chat_history=[],
retrieved_docs=retrieved_docs,
decoded_token=decoded_token,
attachments=[],
json_schema=json_schema,
)
answer = agent.gen(query=input_data)
response_full = ""
thought = ""
source_log_docs = []
tool_calls = []
for line in answer:
if "answer" in line:
response_full += str(line["answer"])
elif "sources" in line:
source_log_docs.extend(line["sources"])
elif "tool_calls" in line:
tool_calls.extend(line["tool_calls"])
elif "thought" in line:
thought += line["thought"]
result = {
"answer": response_full,
"sources": source_log_docs,
"tool_calls": tool_calls,
"thought": thought,
}
# Per-activity summary fields (answer_length, thought_length,
# source_count, tool_call_count) now ride on the inner
# ``activity_finished`` event emitted by ``log_activity`` around
# ``Agent.gen`` above; no separate ``agent_response`` log needed.
return result
except Exception as e:
logging.error(f"Error in run_agent_logic: {e}", exc_info=True)
raise
# Define the main function for ingesting and processing documents.
@@ -637,7 +530,12 @@ def ingest_worker(
exclude_hidden=exclude,
file_metadata=metadata_from_filename,
)
raw_docs = reader.load_data()
# Parsing/OCR owns 1-50% of the bar; embedding takes 50-100%.
raw_docs = reader.load_data(
progress_callback=_make_parse_progress_callback(
self, user, source_uuid, start_pct=1, end_pct=50,
)
)
directory_structure = getattr(reader, "directory_structure", {})
logging.info(f"Directory structure from reader: {directory_structure}")
@@ -677,6 +575,7 @@ def ingest_worker(
docs, vector_store_path, source_uuid, self,
attempt_id=getattr(self.request, "id", None),
user_id=user,
progress_start=50, progress_end=100,
)
finally:
_stop_ingest_heartbeat(heartbeat_thread, heartbeat_stop)
@@ -807,6 +706,8 @@ def reingest_source_worker(self, source_id, user):
{
"source_id": source_id,
"name": source_name,
# ``filename`` labels the upload toast on auto-create.
"filename": source_name,
"operation": "reingest",
},
scope={"kind": "source", "id": source_id},
@@ -914,6 +815,7 @@ def reingest_source_worker(self, source_id, user):
{
"source_id": source_id,
"name": source_name,
"filename": source_name,
"operation": "reingest",
"no_changes": True,
"chunks_added": 0,
@@ -1101,6 +1003,7 @@ def reingest_source_worker(self, source_id, user):
completed_payload: dict = {
"source_id": source_id,
"name": source_name,
"filename": source_name,
"operation": "reingest",
"chunks_added": added,
"chunks_deleted": deleted,
@@ -1140,6 +1043,7 @@ def reingest_source_worker(self, source_id, user):
{
"source_id": str(source_id),
"name": source_name,
"filename": source_name,
"operation": "reingest",
"error": str(e)[:1024],
},
@@ -1431,19 +1335,35 @@ def sync_worker(self, frequency):
name = doc.get("name")
user = doc.get("user_id")
source_type = doc.get("type")
source_data = doc.get("remote_data")
retriever = doc.get("retriever")
doc_id = str(doc.get("id"))
sync_counts["total_sync_count"] += 1
# Connector sources have no RemoteCreator loader and need an OAuth
# token to sync, which a scheduled task lacks — skip them.
if source_type and source_type.startswith("connector"):
sync_counts["sync_skipped"] += 1
continue
source_data = normalize_remote_data(source_type, doc.get("remote_data"))
if not source_data:
# No syncable URL/config — skip instead of dispatching a sync
# that can only fail (and emit a spurious failed event).
sync_counts["sync_skipped"] += 1
continue
resp = sync(
self, source_data, name, user, source_type, frequency, retriever, doc_id
)
sync_counts["total_sync_count"] += 1
sync_counts[
"sync_success" if resp["status"] == "success" else "sync_failure"
] += 1
return {
key: sync_counts[key]
for key in ["total_sync_count", "sync_success", "sync_failure"]
for key in [
"total_sync_count", "sync_success", "sync_failure", "sync_skipped",
]
}
@@ -1621,7 +1541,21 @@ def agent_webhook_worker(self, agent_id, payload):
raise
self.update_state(state="PROGRESS", meta={"current": 50})
try:
result = run_agent_logic(agent_config, input_data)
# Shared headless path with the scheduler; approval-gated tools auto-deny.
from application.agents.headless_runner import run_agent_headless
outcome = run_agent_headless(
agent_config,
input_data,
tool_allowlist=_webhook_tool_allowlist(agent_config),
endpoint="webhook",
)
result = {
"answer": outcome.get("answer", ""),
"sources": outcome.get("sources", []),
"tool_calls": outcome.get("tool_calls", []),
"thought": outcome.get("thought", ""),
}
except Exception as e:
logging.error(f"Error running agent logic: {e}", exc_info=True)
raise
@@ -1634,6 +1568,11 @@ def agent_webhook_worker(self, agent_id, payload):
self.update_state(state="PROGRESS", meta={"current": 100})
def _webhook_tool_allowlist(agent_config):
"""Deny-all on approval-gated tools for webhooks (per-agent opt-in is TBD)."""
return []
def ingest_connector(
self,
job_name: str,
@@ -1785,14 +1724,15 @@ def ingest_connector(
exclude_hidden=True,
file_metadata=metadata_from_filename,
)
raw_docs = reader.load_data()
# Parsing/OCR fills 40-60% of the bar; embedding takes 60-100%.
raw_docs = reader.load_data(
progress_callback=_make_parse_progress_callback(
self, user, source_uuid, start_pct=40, end_pct=60,
)
)
directory_structure = getattr(reader, "directory_structure", {})
# Step 4: Process documents (chunking, embedding, etc.)
self.update_state(
state="PROGRESS", meta={"current": 60, "status": "Processing documents"}
)
chunker = Chunker(
chunking_strategy="classic_chunk",
max_tokens=MAX_TOKENS,
@@ -1829,12 +1769,13 @@ def ingest_connector(
os.makedirs(vector_store_path, exist_ok=True)
self.update_state(
state="PROGRESS", meta={"current": 80, "status": "Storing documents"}
state="PROGRESS", meta={"current": 60, "status": "Storing documents"}
)
embed_and_store_documents(
docs, vector_store_path, source_uuid, self,
attempt_id=getattr(self.request, "id", None),
user_id=user,
progress_start=60, progress_end=100,
)
assert_index_complete(source_uuid)

View File

@@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 -960 960 960" width="24px" fill="#e3e3e3"><path d="m612-292 56-56-148-148v-184h-80v216l172 172ZM480-80q-83 0-156-31.5T197-197q-54-54-85.5-127T80-480q0-83 31.5-156T197-763q54-54 127-85.5T480-880q83 0 156 31.5T763-763q54 54 85.5 127T880-480q0 83-31.5 156T763-197q-54 54-127 85.5T480-80Zm0-80q134 0 227-93t93-227q0-134-93-227t-227-93q-134 0-227 93t-93 227q0 134 93 227t227 93Zm0-320Z"/></svg>

After

Width:  |  Height:  |  Size: 455 B

View File

@@ -37,6 +37,7 @@ export default function AgentLogs() {
useEffect(() => {
if (agentId) fetchAgent(agentId);
}, [agentId, token]);
return (
<div className="p-4 md:p-12">
<div className="flex items-center gap-3 px-4">
@@ -78,7 +79,6 @@ export default function AgentLogs() {
)}
{loadingAgent ? (
<div className="flex h-[55vh] w-full items-center justify-center">
{' '}
<Spinner />
</div>
) : (

View File

@@ -439,12 +439,29 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
const response = await userService.getUserTools(token);
if (!response.ok) throw new Error('Failed to fetch tools');
const data = await response.json();
// Group ordering: builtins -> defaults -> user tools (sorted via the
// MultiSelectPopup first-appearance grouping).
const groupFor = (tool: UserToolType): string => {
if (tool.builtin) return t('agents.form.toolsPopup.groupBuiltin');
if (tool.default) return t('agents.form.toolsPopup.groupDefault');
return t('agents.form.toolsPopup.groupCustom');
};
const tools: OptionType[] = data.tools.map((tool: UserToolType) => ({
id: tool.id,
label: getToolDisplayName(tool),
icon: `/toolIcons/tool_${tool.name}.svg`,
name: tool.name,
group: groupFor(tool),
}));
const groupOrder = [
t('agents.form.toolsPopup.groupBuiltin'),
t('agents.form.toolsPopup.groupDefault'),
t('agents.form.toolsPopup.groupCustom'),
];
tools.sort(
(a, b) =>
groupOrder.indexOf(a.group || '') - groupOrder.indexOf(b.group || ''),
);
setUserTools(tools);
};
const getModels = async () => {
@@ -762,6 +779,15 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
{t('agents.form.buttons.logs')}
</button>
)}
{modeConfig[effectiveMode].showAccessDetails && (
<button
className="group border-primary text-primary hover:bg-primary/90 flex items-center gap-2 rounded-3xl border border-solid px-5 py-2 text-sm font-medium transition-colors hover:text-white"
onClick={() => navigate(`/agents/schedules/${agent.id}`)}
>
<span className="block h-5 w-5 bg-[url('/src/assets/clock-purple.svg')] bg-contain bg-center bg-no-repeat transition-all group-hover:bg-[url('/src/assets/clock-white.svg')]" />
{t('agents.form.buttons.schedules')}
</button>
)}
{modeConfig[effectiveMode].showAccessDetails && (
<button
className="border-primary text-primary hover:bg-primary/90 rounded-3xl border border-solid px-5 py-2 text-sm font-medium transition-colors hover:text-white"

View File

@@ -3,6 +3,7 @@ import { Route, Routes } from 'react-router-dom';
import AgentLogs from './AgentLogs';
import AgentsList from './AgentsList';
import NewAgent from './NewAgent';
import SchedulesView from './schedules/SchedulesView';
import SharedAgent from './SharedAgent';
import WorkflowBuilder from './workflow/WorkflowBuilder';
@@ -13,6 +14,7 @@ export default function Agents() {
<Route path="/new" element={<NewAgent mode="new" />} />
<Route path="/edit/:agentId" element={<NewAgent mode="edit" />} />
<Route path="/logs/:agentId" element={<AgentLogs />} />
<Route path="/schedules/:agentId" element={<SchedulesView />} />
<Route path="/shared/:agentId" element={<SharedAgent />} />
<Route path="/workflow/new" element={<WorkflowBuilder />} />
<Route path="/workflow/edit/:agentId" element={<WorkflowBuilder />} />

View File

@@ -0,0 +1,80 @@
import type { ScheduleRun } from '../types/schedule';
export type RunDetailDrawerProps = {
run: ScheduleRun | null;
onClose: () => void;
};
const formatTimestamp = (value?: string | null): string => {
if (!value) return '—';
const d = new Date(value);
if (Number.isNaN(d.getTime())) return value;
return d.toLocaleString();
};
/** Side drawer with a single run's output / error (terminal-state only). */
export default function RunDetailDrawer({
run,
onClose,
}: RunDetailDrawerProps) {
if (!run) return null;
return (
<aside
className="border-border bg-card fixed top-0 right-0 z-30 flex h-full w-full max-w-xl flex-col border-l p-6 shadow-lg"
role="dialog"
aria-label="Schedule run details"
>
<div className="mb-4 flex items-center justify-between">
<h2 className="text-lg font-semibold">Run details</h2>
<button
type="button"
onClick={onClose}
className="border-border text-muted-foreground rounded-md border px-3 py-1 text-sm"
>
Close
</button>
</div>
<dl className="mb-4 grid grid-cols-2 gap-2 text-sm">
<dt className="text-muted-foreground">Status</dt>
<dd>{run.status}</dd>
<dt className="text-muted-foreground">Scheduled for</dt>
<dd>{formatTimestamp(run.scheduled_for)}</dd>
<dt className="text-muted-foreground">Started</dt>
<dd>{formatTimestamp(run.started_at)}</dd>
<dt className="text-muted-foreground">Finished</dt>
<dd>{formatTimestamp(run.finished_at)}</dd>
<dt className="text-muted-foreground">Tokens</dt>
<dd>
{run.prompt_tokens} prompt · {run.generated_tokens} generated
</dd>
<dt className="text-muted-foreground">Trigger</dt>
<dd>{run.trigger_source}</dd>
</dl>
{run.error && (
<section className="mb-4">
<h3 className="text-destructive text-sm font-semibold">
Error{run.error_type ? ` (${run.error_type})` : ''}
</h3>
<pre className="bg-background mt-1 max-h-48 overflow-auto rounded-md p-3 font-mono text-xs">
{run.error}
</pre>
</section>
)}
{run.output && (
<section className="flex-1 overflow-hidden">
<h3 className="text-sm font-semibold">
Output
{run.output_truncated && (
<span className="text-muted-foreground ml-1 text-xs">
(truncated)
</span>
)}
</h3>
<pre className="bg-background mt-1 h-full overflow-auto rounded-md p-3 font-mono text-xs whitespace-pre-wrap">
{run.output}
</pre>
</section>
)}
</aside>
);
}

View File

@@ -0,0 +1,88 @@
import { useEffect } from 'react';
import { useDispatch, useSelector } from 'react-redux';
import { selectToken } from '../../preferences/preferenceSlice';
import type { AppDispatch, RootState } from '../../store';
import type { ScheduleRun } from '../types/schedule';
import { loadRunsForSchedule, selectRunsForSchedule } from './schedulesSlice';
export type RunLogProps = {
scheduleId: string;
onSelect?: (run: ScheduleRun) => void;
};
const STATUS_STYLES: Record<string, string> = {
success: 'text-green-600',
failed: 'text-destructive',
timeout: 'text-amber-600',
skipped: 'text-muted-foreground',
running: 'text-blue-600',
pending: 'text-muted-foreground',
};
const formatTimestamp = (value?: string | null): string => {
if (!value) return '—';
const d = new Date(value);
if (Number.isNaN(d.getTime())) return value;
return d.toLocaleString();
};
/** Paginated run log for a schedule (SSE updates merge via schedulesSlice). */
export default function RunLog({ scheduleId, onSelect }: RunLogProps) {
const dispatch = useDispatch<AppDispatch>();
const token = useSelector(selectToken);
const runs = useSelector((state: RootState) =>
selectRunsForSchedule(state, scheduleId),
);
useEffect(() => {
if (!scheduleId) return;
dispatch(loadRunsForSchedule({ id: scheduleId, token }));
}, [dispatch, scheduleId, token]);
if (runs.length === 0) {
return (
<p className="text-muted-foreground py-3 text-sm">
No runs recorded for this schedule yet.
</p>
);
}
return (
<table className="w-full text-left text-sm">
<thead className="text-muted-foreground text-xs uppercase">
<tr>
<th className="py-2">When</th>
<th className="py-2">Status</th>
<th className="py-2">Tokens</th>
<th className="py-2">Trigger</th>
<th className="py-2"></th>
</tr>
</thead>
<tbody>
{runs.map((run) => (
<tr key={run.id} className="border-border border-t">
<td className="py-2">{formatTimestamp(run.scheduled_for)}</td>
<td className={`py-2 ${STATUS_STYLES[run.status] ?? ''}`}>
{run.status}
{run.error_type ? ` (${run.error_type})` : ''}
</td>
<td className="py-2">{run.prompt_tokens + run.generated_tokens}</td>
<td className="py-2">{run.trigger_source}</td>
<td className="py-2">
{onSelect && (
<button
type="button"
onClick={() => onSelect(run)}
className="text-primary text-xs underline"
>
Details
</button>
)}
</td>
</tr>
))}
</tbody>
</table>
);
}

View File

@@ -0,0 +1,410 @@
import { useMemo, useState } from 'react';
import { useTranslation } from 'react-i18next';
import WrapperModal from '../../modals/WrapperModal';
import type { Schedule, ScheduleCreatePayload } from '../types/schedule';
import {
browserTimezone,
buildCron,
buildRunAtUtc,
parseScheduleToFormValues,
type ScheduleFormValues,
type ScheduleFrequency,
todayDate,
} from './cronBuilder';
export type ScheduleFormModalProps = {
open: boolean;
initial?: Schedule | null;
agentToolIds: string[];
onClose: () => void;
onSubmit: (payload: ScheduleCreatePayload) => Promise<void> | void;
submitting?: boolean;
};
const FREQUENCIES: ScheduleFrequency[] = [
'once',
'daily',
'weekly',
'monthly',
'yearly',
];
// 0=Sun ... 6=Sat (matches POSIX cron's dow field).
const DAY_OPTIONS = [
{ value: 1, key: 'mon' },
{ value: 2, key: 'tue' },
{ value: 3, key: 'wed' },
{ value: 4, key: 'thu' },
{ value: 5, key: 'fri' },
{ value: 6, key: 'sat' },
{ value: 0, key: 'sun' },
] as const;
const MONTH_KEYS = [
'jan',
'feb',
'mar',
'apr',
'may',
'jun',
'jul',
'aug',
'sep',
'oct',
'nov',
'dec',
] as const;
/** Create/edit a Schedule via a modal dialog. */
export default function ScheduleFormModal({
open,
initial,
agentToolIds,
onClose,
onSubmit,
submitting,
}: ScheduleFormModalProps) {
const { t } = useTranslation();
const timezone = useMemo<string>(() => browserTimezone(), []);
const defaults: ScheduleFormValues = useMemo(
() =>
initial
? parseScheduleToFormValues(initial, timezone)
: {
frequency: 'daily',
date: todayDate(timezone),
time: '09:00',
dayOfWeek: 1,
dayOfMonth: 1,
month: 1,
},
[initial, timezone],
);
const [name, setName] = useState<string>(initial?.name ?? '');
const [instruction, setInstruction] = useState<string>(
initial?.instruction ?? '',
);
const [values, setValues] = useState<ScheduleFormValues>(defaults);
const [error, setError] = useState<string | null>(null);
if (!open) return null;
const setFrequency = (frequency: ScheduleFrequency) =>
setValues((current) => ({ ...current, frequency }));
const submit = async () => {
if (!instruction.trim()) {
setError(t('agents.schedules.modal.errors.instructionRequired'));
return;
}
const payload: ScheduleCreatePayload = {
instruction: instruction.trim(),
timezone,
name: name.trim() || undefined,
tool_allowlist: agentToolIds,
};
if (values.frequency === 'once') {
let runAt: string;
try {
runAt = buildRunAtUtc(values.date, values.time, timezone);
} catch {
setError(t('agents.schedules.modal.errors.runAtInPast'));
return;
}
if (new Date(runAt).getTime() <= Date.now()) {
setError(t('agents.schedules.modal.errors.runAtInPast'));
return;
}
payload.trigger_type = 'once';
payload.run_at = runAt;
} else {
const cron = buildCron(values.frequency, values);
if (!cron) {
setError(t('agents.schedules.modal.errors.instructionRequired'));
return;
}
payload.trigger_type = 'recurring';
payload.cron = cron;
}
setError(null);
await onSubmit(payload);
};
const isEdit = Boolean(initial?.id);
return (
<WrapperModal
className="w-[min(560px,92vw)] sm:p-6"
contentClassName="max-h-[80vh]"
close={onClose}
isPerformingTask={submitting}
>
<div className="flex flex-col gap-5">
<div className="flex items-start gap-3 pr-6">
<input
type="text"
value={name}
onChange={(e) => setName(e.target.value)}
placeholder={t('agents.schedules.modal.namePlaceholder')}
className="text-foreground placeholder:text-muted-foreground w-full bg-transparent text-xl font-semibold outline-none"
aria-label={t('agents.schedules.modal.namePlaceholder')}
/>
</div>
<FrequencyTabs
frequency={values.frequency}
onChange={setFrequency}
labels={{
once: t('agents.schedules.modal.frequency.once'),
daily: t('agents.schedules.modal.frequency.daily'),
weekly: t('agents.schedules.modal.frequency.weekly'),
monthly: t('agents.schedules.modal.frequency.monthly'),
yearly: t('agents.schedules.modal.frequency.yearly'),
}}
/>
<OnPicker
values={values}
onChange={setValues}
tDay={(key) => t(`agents.schedules.modal.days.${key}`)}
tMonth={(key) => t(`agents.schedules.modal.months.${key}`)}
labels={{
on: t('agents.schedules.modal.on'),
at: t('agents.schedules.modal.at'),
}}
/>
<label className="flex flex-col gap-2">
<span className="text-foreground text-sm font-medium">
{t('agents.schedules.modal.instructionsLabel')}
</span>
<textarea
value={instruction}
onChange={(e) => setInstruction(e.target.value)}
placeholder={t('agents.schedules.modal.instructionsPlaceholder')}
rows={5}
className="border-border bg-background text-foreground placeholder:text-muted-foreground focus:border-ring focus:ring-ring/40 rounded-md border px-3 py-2 text-sm outline-none focus:ring-2"
/>
</label>
{error && <p className="text-destructive text-sm">{error}</p>}
<div className="flex justify-end">
<button
type="button"
disabled={submitting}
onClick={submit}
className="bg-primary text-primary-foreground hover:bg-primary/90 rounded-full px-5 py-2 text-sm font-semibold disabled:opacity-60"
>
{submitting
? '…'
: isEdit
? t('agents.schedules.modal.save')
: t('agents.schedules.modal.create')}
</button>
</div>
</div>
</WrapperModal>
);
}
type FrequencyTabsProps = {
frequency: ScheduleFrequency;
onChange: (f: ScheduleFrequency) => void;
labels: Record<ScheduleFrequency, string>;
};
function FrequencyTabs({ frequency, onChange, labels }: FrequencyTabsProps) {
return (
<div className="bg-muted/60 dark:bg-muted/40 inline-flex w-full gap-1 rounded-full p-1">
{FREQUENCIES.map((f) => {
const active = f === frequency;
return (
<button
key={f}
type="button"
onClick={() => onChange(f)}
className={[
'flex-1 rounded-full px-3 py-1.5 text-xs font-medium transition-colors',
active
? 'bg-card text-foreground shadow-sm'
: 'text-muted-foreground hover:text-foreground',
].join(' ')}
aria-pressed={active}
>
{labels[f]}
</button>
);
})}
</div>
);
}
type OnPickerProps = {
values: ScheduleFormValues;
onChange: (next: ScheduleFormValues) => void;
tDay: (key: string) => string;
tMonth: (key: string) => string;
labels: { on: string; at: string };
};
function OnPicker({ values, onChange, tDay, tMonth, labels }: OnPickerProps) {
const set = (patch: Partial<ScheduleFormValues>) =>
onChange({ ...values, ...patch });
const inputClass =
'border-border bg-background text-foreground rounded-md border px-2 py-1 text-sm outline-none focus:border-ring focus:ring-ring/40 focus:ring-2';
return (
<div className="border-border flex flex-col gap-3 rounded-md border p-3">
{values.frequency === 'once' && (
<div className="flex items-center justify-between gap-2">
<span className="text-foreground text-sm font-medium">
{labels.on}
</span>
<div className="flex items-center gap-2">
<input
type="date"
value={values.date}
onChange={(e) => set({ date: e.target.value })}
className={inputClass}
aria-label={labels.on}
/>
<input
type="time"
value={values.time}
onChange={(e) => set({ time: e.target.value })}
className={inputClass}
aria-label={labels.at}
/>
</div>
</div>
)}
{values.frequency === 'daily' && (
<div className="flex items-center justify-between gap-2">
<span className="text-foreground text-sm font-medium">
{labels.at}
</span>
<input
type="time"
value={values.time}
onChange={(e) => set({ time: e.target.value })}
className={inputClass}
aria-label={labels.at}
/>
</div>
)}
{values.frequency === 'weekly' && (
<div className="flex flex-col gap-2">
<div className="flex flex-wrap gap-1">
{DAY_OPTIONS.map((d) => {
const active = d.value === values.dayOfWeek;
return (
<button
key={d.key}
type="button"
onClick={() => set({ dayOfWeek: d.value })}
className={[
'rounded-full px-3 py-1 text-xs font-medium transition-colors',
active
? 'bg-primary text-primary-foreground'
: 'border-border text-muted-foreground hover:bg-accent border',
].join(' ')}
aria-pressed={active}
>
{tDay(d.key)}
</button>
);
})}
</div>
<div className="flex items-center justify-between gap-2">
<span className="text-foreground text-sm font-medium">
{labels.at}
</span>
<input
type="time"
value={values.time}
onChange={(e) => set({ time: e.target.value })}
className={inputClass}
aria-label={labels.at}
/>
</div>
</div>
)}
{values.frequency === 'monthly' && (
<div className="flex items-center justify-between gap-2">
<span className="text-foreground text-sm font-medium">
{labels.on}
</span>
<div className="flex items-center gap-2">
<select
value={values.dayOfMonth}
onChange={(e) => set({ dayOfMonth: Number(e.target.value) })}
className={inputClass}
aria-label={labels.on}
>
{Array.from({ length: 31 }, (_, i) => i + 1).map((d) => (
<option key={d} value={d}>
{d}
</option>
))}
</select>
<input
type="time"
value={values.time}
onChange={(e) => set({ time: e.target.value })}
className={inputClass}
aria-label={labels.at}
/>
</div>
</div>
)}
{values.frequency === 'yearly' && (
<div className="flex items-center justify-between gap-2">
<span className="text-foreground text-sm font-medium">
{labels.on}
</span>
<div className="flex items-center gap-2">
<select
value={values.month}
onChange={(e) => set({ month: Number(e.target.value) })}
className={inputClass}
aria-label={labels.on}
>
{MONTH_KEYS.map((k, i) => (
<option key={k} value={i + 1}>
{tMonth(k)}
</option>
))}
</select>
<select
value={values.dayOfMonth}
onChange={(e) => set({ dayOfMonth: Number(e.target.value) })}
className={inputClass}
aria-label={labels.on}
>
{Array.from({ length: 31 }, (_, i) => i + 1).map((d) => (
<option key={d} value={d}>
{d}
</option>
))}
</select>
<input
type="time"
value={values.time}
onChange={(e) => set({ time: e.target.value })}
className={inputClass}
aria-label={labels.at}
/>
</div>
</div>
)}
</div>
);
}

View File

@@ -0,0 +1,37 @@
import { describe, expect, it } from 'vitest';
import { extractToolError } from './SchedulerToolCallCard';
// Regression for the iter-6 issue where ``cancel_scheduled_task`` returning
// a plain ``"Error: …"`` string still rendered "Scheduled task cancelled."
// The fix is to extract the error message so the card can branch on it.
describe('extractToolError', () => {
it('returns the message for an Error: prefixed string', () => {
expect(
extractToolError('Error: scheduled task not found or already terminal.'),
).toBe('scheduled task not found or already terminal.');
});
it('trims leading whitespace before the prefix', () => {
expect(extractToolError(' Error: foo ')).toBe('foo');
});
it('returns null for JSON success payloads', () => {
expect(
extractToolError(JSON.stringify({ task_id: 'x', status: 'cancelled' })),
).toBeNull();
});
it('returns null for plain non-error strings', () => {
expect(extractToolError('done')).toBeNull();
});
it('returns null for object results', () => {
expect(extractToolError({ task_id: 'x' })).toBeNull();
});
it('returns null for undefined / null', () => {
expect(extractToolError(undefined)).toBeNull();
expect(extractToolError(null)).toBeNull();
});
});

View File

@@ -0,0 +1,176 @@
import { useEffect, useState } from 'react';
import { useDispatch, useSelector } from 'react-redux';
import { selectToken } from '../../preferences/preferenceSlice';
import type { AppDispatch } from '../../store';
import { deleteSchedule, loadSchedulesForAgent } from './schedulesSlice';
export type SchedulerToolCallCardProps = {
/** Outcome JSON the scheduler tool returned (action result). */
result?: unknown;
/** Action name dispatched by the LLM. */
actionName: string;
/** Status of this tool call (pending → completed). */
status?: string;
/** Agent id, for live-refresh of the cancel action. */
agentId?: string;
};
const formatTimestamp = (value?: string | null): string => {
if (!value) return '—';
const d = new Date(value);
if (Number.isNaN(d.getTime())) return value;
return d.toLocaleString();
};
const parseResult = (result: unknown): Record<string, unknown> | null => {
if (!result) return null;
if (typeof result === 'object') return result as Record<string, unknown>;
if (typeof result === 'string') {
try {
return JSON.parse(result) as Record<string, unknown>;
} catch {
return null;
}
}
return null;
};
/** Tool returns a plain "Error: …" string on failure (cancel-not-found etc). */
export const extractToolError = (result: unknown): string | null => {
if (typeof result === 'string') {
const trimmed = result.trim();
if (trimmed.startsWith('Error:')) {
return trimmed.slice('Error:'.length).trim();
}
}
return null;
};
/** In-chat card for scheduler.schedule_task with a one-click cancel. */
export default function SchedulerToolCallCard({
result,
actionName,
status,
agentId,
}: SchedulerToolCallCardProps) {
const dispatch = useDispatch<AppDispatch>();
const token = useSelector(selectToken);
const [cancelled, setCancelled] = useState<boolean>(false);
const parsed = parseResult(result);
const taskId =
parsed && typeof parsed.task_id === 'string' ? parsed.task_id : null;
const runAt =
parsed && typeof parsed.resolved_run_at === 'string'
? parsed.resolved_run_at
: null;
const instruction =
parsed && typeof parsed.instruction === 'string'
? parsed.instruction
: null;
const error =
parsed && typeof parsed.error === 'string' ? parsed.error : null;
// Agent-bound chats prime the Schedules tab cache; agentless chats have
// no per-agent listing, so skip the fetch.
useEffect(() => {
if (agentId) dispatch(loadSchedulesForAgent({ agentId, token }));
}, [dispatch, agentId, token]);
const cancel = async () => {
if (!taskId) return;
setCancelled(true);
try {
await dispatch(deleteSchedule({ id: taskId, token })).unwrap();
} catch (err) {
setCancelled(false);
console.error(err);
}
};
if (actionName.startsWith('cancel_scheduled_task')) {
// The tool returns a plain "Error: …" string when the cancel fails
// (not found, already terminal, invalid id). Don't claim success.
const cancelError = extractToolError(result);
if (cancelError) {
return (
<div className="border-border bg-card rounded-2xl border p-4 text-sm">
<p className="text-destructive font-semibold">
Cancel failed: {cancelError}
</p>
</div>
);
}
return (
<div className="border-border bg-card rounded-2xl border p-4 text-sm">
<p className="font-semibold">Scheduled task cancelled.</p>
</div>
);
}
if (actionName.startsWith('list_scheduled_tasks')) {
const tasks = Array.isArray(parsed?.tasks)
? (parsed?.tasks as Array<Record<string, unknown>>)
: [];
return (
<div className="border-border bg-card rounded-2xl border p-4 text-sm">
<p className="font-semibold">
{tasks.length} pending scheduled task{tasks.length === 1 ? '' : 's'}
</p>
<ul className="mt-2 flex flex-col gap-1">
{tasks.map((task) => (
<li key={String(task.task_id)}>
{formatTimestamp(task.resolved_run_at as string)} {' '}
{String(task.instruction || task.name || task.task_id)}
</li>
))}
</ul>
</div>
);
}
// ``error`` may be JSON-shaped (``{"error": "…"}``) or a plain
// ``"Error: …"`` string returned by the tool on validation failures.
const schedulingError = error || extractToolError(result);
if (schedulingError) {
return (
<div className="border-border bg-card rounded-2xl border p-4 text-sm">
<p className="text-destructive font-semibold">
Scheduling failed: {schedulingError}
</p>
</div>
);
}
return (
<div className="border-border bg-card rounded-2xl border p-4 text-sm">
<div className="flex items-center justify-between">
<p className="font-semibold">
{status === 'pending' ? '⏰ Scheduling…' : '⏰ Scheduled task'}
</p>
{runAt && (
<span className="text-muted-foreground text-xs">
{formatTimestamp(runAt)}
</span>
)}
</div>
{instruction && (
<p className="text-muted-foreground mt-2 text-sm italic">
{instruction}
</p>
)}
{taskId && !cancelled && (
<button
type="button"
onClick={cancel}
className="text-destructive border-border mt-2 rounded-md border px-3 py-1 text-xs"
>
Cancel
</button>
)}
{cancelled && (
<p className="text-muted-foreground mt-2 text-xs">Cancelled.</p>
)}
</div>
);
}

View File

@@ -0,0 +1,360 @@
import { useEffect, useMemo, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { useDispatch, useSelector } from 'react-redux';
import { useNavigate, useParams } from 'react-router-dom';
import userService from '../../api/services/userService';
import ArrowLeft from '../../assets/arrow-left.svg';
import Spinner from '../../components/Spinner';
import { selectToken } from '../../preferences/preferenceSlice';
import type { AppDispatch, RootState } from '../../store';
import type { Agent } from '../types';
import type {
Schedule,
ScheduleCreatePayload,
ScheduleRun,
} from '../types/schedule';
import RunDetailDrawer from './RunDetailDrawer';
import RunLog from './RunLog';
import ScheduleFormModal from './ScheduleFormModal';
import {
createSchedule,
deleteSchedule,
loadSchedulesForAgent,
runScheduleNow,
selectSchedulesForAgent,
setSchedulePaused,
updateSchedule,
} from './schedulesSlice';
const formatTimestamp = (value?: string | null): string => {
if (!value) return '—';
const d = new Date(value);
if (Number.isNaN(d.getTime())) return value;
return d.toLocaleString();
};
/** Standalone Schedules page for an agent: list, create, edit, pause, run, delete. */
export default function SchedulesView() {
const { t } = useTranslation();
const navigate = useNavigate();
const { agentId } = useParams();
const dispatch = useDispatch<AppDispatch>();
const token = useSelector(selectToken);
const [agent, setAgent] = useState<Agent | undefined>();
const [loadingAgent, setLoadingAgent] = useState<boolean>(true);
const [modalOpen, setModalOpen] = useState<boolean>(false);
const [editing, setEditing] = useState<Schedule | null>(null);
const [submitting, setSubmitting] = useState<boolean>(false);
const [expanded, setExpanded] = useState<string | null>(null);
const [activeRun, setActiveRun] = useState<ScheduleRun | null>(null);
const schedules = useSelector((state: RootState) =>
selectSchedulesForAgent(state, agentId ?? ''),
);
useEffect(() => {
if (!agentId) return;
const fetchAgent = async () => {
setLoadingAgent(true);
try {
const response = await userService.getAgent(agentId, token);
if (!response.ok) throw new Error('Failed to fetch agent');
const data = await response.json();
setAgent(data);
} catch (error) {
console.error(error);
} finally {
setLoadingAgent(false);
}
};
fetchAgent();
}, [agentId, token]);
useEffect(() => {
if (!agentId) return;
dispatch(loadSchedulesForAgent({ agentId, token }));
}, [dispatch, agentId, token]);
const agentToolIds = useMemo<string[]>(() => {
if (!agent) return [];
const fromDetails = (agent.tool_details ?? []).map((d) => d.id);
if (fromDetails.length > 0) return fromDetails;
return agent.tools ?? [];
}, [agent]);
const recurring = useMemo(
() => schedules.filter((s) => s.trigger_type === 'recurring'),
[schedules],
);
const oneTime = useMemo(
() => schedules.filter((s) => s.trigger_type === 'once'),
[schedules],
);
const openCreate = () => {
setEditing(null);
setModalOpen(true);
};
const openEdit = (schedule: Schedule) => {
setEditing(schedule);
setModalOpen(true);
};
const closeModal = () => {
if (submitting) return;
setModalOpen(false);
setEditing(null);
};
const handleSubmit = async (payload: ScheduleCreatePayload) => {
if (!agentId) return;
setSubmitting(true);
try {
if (editing?.id) {
await dispatch(
updateSchedule({ id: editing.id, payload, token }),
).unwrap();
} else {
await dispatch(createSchedule({ agentId, payload, token })).unwrap();
}
setModalOpen(false);
setEditing(null);
} catch (err) {
console.error(err);
} finally {
setSubmitting(false);
}
};
return (
<div className="p-4 md:p-12">
<div className="flex items-center gap-3 px-4">
<button
className="border-border text-muted-foreground hover:bg-accent rounded-full border p-3 text-sm"
onClick={() => navigate('/agents')}
>
<img src={ArrowLeft} alt="left-arrow" className="h-3 w-3" />
</button>
<p className="text-foreground dark:text-foreground mt-px text-sm font-semibold">
{t('agents.backToAll')}
</p>
</div>
<div className="mt-5 flex w-full flex-wrap items-center justify-between gap-2 px-4">
<h1 className="text-foreground m-0 text-[32px] font-bold md:text-[40px] dark:text-white">
{t('agents.schedules.title')}
</h1>
</div>
<div className="mt-6 flex flex-col gap-3 px-4">
{agent && (
<div className="flex flex-col gap-1">
<p className="text-foreground">{agent.name}</p>
<p className="text-muted-foreground text-xs">
{agent.last_used_at
? t('agents.logs.lastUsedAt') +
' ' +
new Date(agent.last_used_at).toLocaleString()
: t('agents.logs.noUsageHistory')}
</p>
</div>
)}
</div>
{loadingAgent ? (
<div className="flex h-[55vh] w-full items-center justify-center">
<Spinner />
</div>
) : (
agent && (
<div className="flex flex-col gap-4 p-4">
<header className="flex items-center justify-between">
<h2 className="text-lg font-semibold">
{t('agents.schedules.heading')}
</h2>
<button
type="button"
onClick={openCreate}
className="bg-primary text-primary-foreground hover:bg-primary/90 rounded-md px-3 py-1 text-sm"
>
{t('agents.schedules.newRecurring')}
</button>
</header>
<section>
<h3 className="text-muted-foreground mb-2 text-sm font-semibold uppercase">
{t('agents.schedules.recurring')} ({recurring.length})
</h3>
{recurring.length === 0 ? (
<p className="text-muted-foreground text-sm">
{t('agents.schedules.noRecurring')}
</p>
) : (
<ul className="flex flex-col gap-3">
{recurring.map((schedule) => (
<li
key={schedule.id}
className="border-border bg-card rounded-lg border p-3"
>
<div className="flex items-start justify-between">
<div>
<p className="font-semibold">
{schedule.name || schedule.instruction.slice(0, 80)}
</p>
<p className="text-muted-foreground text-xs">
cron:{' '}
<span className="font-mono">{schedule.cron}</span> ·
tz: {schedule.timezone} · status: {schedule.status}{' '}
· next: {formatTimestamp(schedule.next_run_at)}
</p>
</div>
<div className="flex gap-1">
<button
type="button"
onClick={() => openEdit(schedule)}
className="border-border rounded-md border px-2 py-1 text-xs"
>
{t('agents.schedules.edit')}
</button>
<button
type="button"
onClick={() =>
dispatch(
setSchedulePaused({
id: schedule.id,
action:
schedule.status === 'active'
? 'pause'
: 'resume',
token,
}),
)
}
className="border-border rounded-md border px-2 py-1 text-xs"
>
{schedule.status === 'active'
? t('agents.schedules.pause')
: t('agents.schedules.resume')}
</button>
<button
type="button"
onClick={() =>
dispatch(
runScheduleNow({ id: schedule.id, token }),
)
}
className="border-border rounded-md border px-2 py-1 text-xs"
>
{t('agents.schedules.runNow')}
</button>
<button
type="button"
onClick={() =>
dispatch(
deleteSchedule({ id: schedule.id, token }),
)
}
className="text-destructive border-border rounded-md border px-2 py-1 text-xs"
>
{t('agents.schedules.delete')}
</button>
</div>
</div>
<button
type="button"
onClick={() =>
setExpanded(
expanded === schedule.id ? null : schedule.id,
)
}
className="text-primary mt-2 text-xs underline"
>
{expanded === schedule.id
? t('agents.schedules.hideRuns')
: t('agents.schedules.showRuns')}
</button>
{expanded === schedule.id && (
<div className="mt-2">
<RunLog
scheduleId={schedule.id}
onSelect={(run) => setActiveRun(run)}
/>
</div>
)}
</li>
))}
</ul>
)}
</section>
<section>
<h3 className="text-muted-foreground mb-2 text-sm font-semibold uppercase">
{t('agents.schedules.oneTime')} ({oneTime.length})
</h3>
{oneTime.length === 0 ? (
<p className="text-muted-foreground text-sm">
{t('agents.schedules.noOneTime')}
</p>
) : (
<ul className="flex flex-col gap-2">
{oneTime.map((schedule) => (
<li
key={schedule.id}
className="border-border bg-card rounded-lg border p-3 text-sm"
>
<div className="flex items-start justify-between">
<div>
<p className="font-semibold">
{schedule.name || schedule.instruction.slice(0, 80)}
</p>
<p className="text-muted-foreground text-xs">
runs at {formatTimestamp(schedule.run_at)} · status:{' '}
{schedule.status}
</p>
</div>
<div className="flex gap-1">
{schedule.status === 'active' && (
<button
type="button"
onClick={() => openEdit(schedule)}
className="border-border rounded-md border px-2 py-1 text-xs"
>
{t('agents.schedules.edit')}
</button>
)}
{schedule.status === 'active' && (
<button
type="button"
onClick={() =>
dispatch(
deleteSchedule({ id: schedule.id, token }),
)
}
className="text-destructive border-border rounded-md border px-2 py-1 text-xs"
>
{t('agents.schedules.cancel')}
</button>
)}
</div>
</div>
</li>
))}
</ul>
)}
</section>
<RunDetailDrawer
run={activeRun}
onClose={() => setActiveRun(null)}
/>
<ScheduleFormModal
open={modalOpen}
initial={editing}
agentToolIds={agentToolIds}
onClose={closeModal}
onSubmit={handleSubmit}
submitting={submitting}
/>
</div>
)
)}
</div>
);
}

View File

@@ -0,0 +1,208 @@
import { describe, expect, it } from 'vitest';
import type { Schedule } from '../types/schedule';
import {
browserTimezone,
buildCron,
buildRunAtUtc,
parseCron,
parseScheduleToFormValues,
parseTime,
} from './cronBuilder';
const baseValues = {
time: '09:00',
dayOfWeek: 1,
dayOfMonth: 1,
month: 1,
};
describe('buildCron', () => {
it('Daily 22:30 → "30 22 * * *"', () => {
expect(buildCron('daily', { ...baseValues, time: '22:30' })).toBe(
'30 22 * * *',
);
});
it('Weekly Mon 09:00 → "0 9 * * 1"', () => {
expect(
buildCron('weekly', { ...baseValues, time: '09:00', dayOfWeek: 1 }),
).toBe('0 9 * * 1');
});
it('Monthly day-15 10:00 → "0 10 15 * *"', () => {
expect(
buildCron('monthly', { ...baseValues, time: '10:00', dayOfMonth: 15 }),
).toBe('0 10 15 * *');
});
it('Yearly March 15 08:00 → "0 8 15 3 *"', () => {
expect(
buildCron('yearly', {
...baseValues,
time: '08:00',
dayOfMonth: 15,
month: 3,
}),
).toBe('0 8 15 3 *');
});
it('Once returns null cron', () => {
expect(buildCron('once', baseValues)).toBeNull();
});
it('clamps out-of-range time inputs', () => {
expect(buildCron('daily', { ...baseValues, time: '99:99' })).toBe(
'59 23 * * *',
);
});
it('clamps day-of-month and month for yearly', () => {
expect(
buildCron('yearly', {
...baseValues,
time: '00:00',
dayOfMonth: 99,
month: 0,
}),
).toBe('0 0 31 1 *');
});
});
describe('parseTime', () => {
it('parses "HH:MM"', () => {
expect(parseTime('07:05')).toEqual({ hour: 7, minute: 5 });
});
it('falls back to 09:00 on bad input', () => {
expect(parseTime('garbage')).toEqual({ hour: 9, minute: 0 });
});
});
describe('buildRunAtUtc', () => {
it('UTC noon → UTC noon (no offset)', () => {
expect(buildRunAtUtc('2026-06-15', '12:00', 'UTC')).toBe(
'2026-06-15T12:00:00.000Z',
);
});
it('Europe/Warsaw 12:00 in summer (CEST, UTC+2) → 10:00Z', () => {
expect(buildRunAtUtc('2026-06-15', '12:00', 'Europe/Warsaw')).toBe(
'2026-06-15T10:00:00.000Z',
);
});
it('Europe/Warsaw 12:00 in winter (CET, UTC+1) → 11:00Z', () => {
expect(buildRunAtUtc('2026-12-15', '12:00', 'Europe/Warsaw')).toBe(
'2026-12-15T11:00:00.000Z',
);
});
it('America/Los_Angeles 09:00 in summer (PDT, UTC-7) → 16:00Z', () => {
expect(buildRunAtUtc('2026-07-04', '09:00', 'America/Los_Angeles')).toBe(
'2026-07-04T16:00:00.000Z',
);
});
it('throws on invalid date', () => {
expect(() => buildRunAtUtc('not-a-date', '12:00', 'UTC')).toThrow();
});
});
describe('parseCron', () => {
it('round-trips daily cron', () => {
expect(parseCron('30 22 * * *')).toMatchObject({
frequency: 'daily',
minute: 30,
hour: 22,
});
});
it('round-trips weekly cron', () => {
expect(parseCron('0 9 * * 1')).toMatchObject({
frequency: 'weekly',
minute: 0,
hour: 9,
dow: 1,
});
});
it('round-trips monthly cron', () => {
expect(parseCron('0 10 15 * *')).toMatchObject({
frequency: 'monthly',
minute: 0,
hour: 10,
dom: 15,
});
});
it('round-trips yearly cron', () => {
expect(parseCron('0 8 15 3 *')).toMatchObject({
frequency: 'yearly',
minute: 0,
hour: 8,
dom: 15,
mon: 3,
});
});
it('returns null for unsupported shapes (weekday range)', () => {
expect(parseCron('0 9 * * 1-5')).toBeNull();
});
it('returns null for non-5-field input', () => {
expect(parseCron('* * *')).toBeNull();
});
});
describe('browserTimezone', () => {
it('returns a non-empty IANA-looking string', () => {
const tz = browserTimezone();
expect(typeof tz).toBe('string');
expect(tz.length).toBeGreaterThan(0);
});
});
describe('parseScheduleToFormValues', () => {
const makeSchedule = (overrides: Partial<Schedule>): Schedule => ({
id: 's',
user_id: 'u',
agent_id: 'a',
trigger_type: 'recurring',
instruction: 'do thing',
status: 'active',
timezone: 'UTC',
tool_allowlist: [],
created_via: 'ui',
consecutive_failure_count: 0,
created_at: '2026-05-19T12:00:00Z',
updated_at: '2026-05-19T12:00:00Z',
...overrides,
});
it('reconstructs weekly from a cron schedule', () => {
const s = makeSchedule({ cron: '0 9 * * 1' });
const v = parseScheduleToFormValues(s, 'UTC');
expect(v.frequency).toBe('weekly');
expect(v.time).toBe('09:00');
expect(v.dayOfWeek).toBe(1);
});
it('reconstructs once from run_at', () => {
const s = makeSchedule({
trigger_type: 'once',
cron: null,
run_at: '2026-06-15T12:00:00Z',
});
const v = parseScheduleToFormValues(s, 'UTC');
expect(v.frequency).toBe('once');
expect(v.date).toBe('2026-06-15');
expect(v.time).toBe('12:00');
});
it('falls back to daily 09:00 when cron is unrecognized', () => {
const s = makeSchedule({ cron: '0 9 * * 1-5' });
const v = parseScheduleToFormValues(s, 'UTC');
expect(v.frequency).toBe('daily');
});
});

View File

@@ -0,0 +1,242 @@
import type { Schedule } from '../types/schedule';
export type ScheduleFrequency =
| 'once'
| 'daily'
| 'weekly'
| 'monthly'
| 'yearly';
export type ScheduleFormValues = {
frequency: ScheduleFrequency;
date: string; // YYYY-MM-DD (used by 'once')
time: string; // HH:MM (24h)
dayOfWeek: number; // 0=Sun … 6=Sat (used by 'weekly')
dayOfMonth: number; // 1..31 (used by 'monthly' / 'yearly')
month: number; // 1..12 (used by 'yearly')
};
const clamp = (n: number, lo: number, hi: number): number =>
Math.max(lo, Math.min(hi, Math.floor(n)));
const pad2 = (n: number): string => String(n).padStart(2, '0');
/** Parse "HH:MM" into [hour, minute]; defaults on bad input. */
export function parseTime(time: string): { hour: number; minute: number } {
const m = /^(\d{1,2}):(\d{1,2})$/.exec(time?.trim() ?? '');
if (!m) return { hour: 9, minute: 0 };
return {
hour: clamp(Number(m[1]), 0, 23),
minute: clamp(Number(m[2]), 0, 59),
};
}
/** Detect the browser's IANA timezone (e.g. ``Europe/Warsaw``). */
export function browserTimezone(): string {
try {
const tz = Intl.DateTimeFormat().resolvedOptions().timeZone;
return tz || 'UTC';
} catch {
return 'UTC';
}
}
/** Build a 5-field cron expression for recurring frequencies; ``null`` for 'once'. */
export function buildCron(
frequency: ScheduleFrequency,
values: Pick<
ScheduleFormValues,
'time' | 'dayOfWeek' | 'dayOfMonth' | 'month'
>,
): string | null {
if (frequency === 'once') return null;
const { hour, minute } = parseTime(values.time);
switch (frequency) {
case 'daily':
return `${minute} ${hour} * * *`;
case 'weekly':
return `${minute} ${hour} * * ${clamp(values.dayOfWeek, 0, 6)}`;
case 'monthly':
return `${minute} ${hour} ${clamp(values.dayOfMonth, 1, 31)} * *`;
case 'yearly':
return `${minute} ${hour} ${clamp(values.dayOfMonth, 1, 31)} ${clamp(values.month, 1, 12)} *`;
default:
return null;
}
}
/** Convert a local date/time + IANA tz to a UTC ISO 8601 string. */
export function buildRunAtUtc(
date: string,
time: string,
timezone: string,
): string {
const { hour, minute } = parseTime(time);
const dm = /^(\d{4})-(\d{1,2})-(\d{1,2})$/.exec(date?.trim() ?? '');
if (!dm) throw new Error('invalid date');
const year = Number(dm[1]);
const month = clamp(Number(dm[2]), 1, 12);
const day = clamp(Number(dm[3]), 1, 31);
// Compute UTC offset of the chosen tz at the chosen wall-clock instant by
// formatting an interim UTC date and reading back the tz parts.
const guess = Date.UTC(year, month - 1, day, hour, minute, 0);
const parts = formatInTimeZone(guess, timezone);
const wallUtc = Date.UTC(
parts.year,
parts.month - 1,
parts.day,
parts.hour,
parts.minute,
0,
);
const offset = wallUtc - guess;
return new Date(guess - offset).toISOString();
}
type TzParts = {
year: number;
month: number;
day: number;
hour: number;
minute: number;
};
const formatInTimeZone = (utcMs: number, timezone: string): TzParts => {
const fmt = new Intl.DateTimeFormat('en-US', {
timeZone: timezone,
year: 'numeric',
month: '2-digit',
day: '2-digit',
hour: '2-digit',
minute: '2-digit',
hour12: false,
});
const map: Record<string, string> = {};
for (const p of fmt.formatToParts(new Date(utcMs))) {
if (p.type !== 'literal') map[p.type] = p.value;
}
return {
year: Number(map.year),
month: Number(map.month),
day: Number(map.day),
// Intl returns "24" at midnight in some engines; normalize to 0.
hour: Number(map.hour) % 24,
minute: Number(map.minute),
};
};
/** Derive form initial values from an existing schedule (edit mode). */
export function parseScheduleToFormValues(
schedule: Schedule,
timezone: string,
): ScheduleFormValues {
const fallback: ScheduleFormValues = {
frequency: 'daily',
date: todayDate(timezone),
time: '09:00',
dayOfWeek: 1,
dayOfMonth: 1,
month: 1,
};
if (schedule.trigger_type === 'once' && schedule.run_at) {
const parts = formatInTimeZone(
new Date(schedule.run_at).getTime(),
timezone,
);
return {
...fallback,
frequency: 'once',
date: `${parts.year}-${pad2(parts.month)}-${pad2(parts.day)}`,
time: `${pad2(parts.hour)}:${pad2(parts.minute)}`,
};
}
if (!schedule.cron) return fallback;
const parsed = parseCron(schedule.cron);
if (!parsed) return fallback;
const { frequency, minute, hour, dom, mon, dow } = parsed;
return {
frequency,
date: fallback.date,
time: `${pad2(hour)}:${pad2(minute)}`,
dayOfWeek: dow ?? 1,
dayOfMonth: dom ?? 1,
month: mon ?? 1,
};
}
type ParsedCron = {
frequency: Exclude<ScheduleFrequency, 'once'>;
minute: number;
hour: number;
dom: number | null;
mon: number | null;
dow: number | null;
};
/** Recognize the cron shapes ``buildCron`` produces; otherwise ``null``. */
export function parseCron(expression: string): ParsedCron | null {
const parts = expression.trim().split(/\s+/);
if (parts.length !== 5) return null;
const [mn, hr, dom, mon, dow] = parts;
const m = Number(mn);
const h = Number(hr);
if (!Number.isFinite(m) || !Number.isFinite(h)) return null;
// yearly: explicit dom + explicit mon
if (dom !== '*' && mon !== '*' && dow === '*') {
const d = Number(dom);
const mm = Number(mon);
if (!Number.isFinite(d) || !Number.isFinite(mm)) return null;
return {
frequency: 'yearly',
minute: m,
hour: h,
dom: d,
mon: mm,
dow: null,
};
}
// monthly: explicit dom, * mon, * dow
if (dom !== '*' && mon === '*' && dow === '*') {
const d = Number(dom);
if (!Number.isFinite(d)) return null;
return {
frequency: 'monthly',
minute: m,
hour: h,
dom: d,
mon: null,
dow: null,
};
}
// weekly: * dom, * mon, explicit dow (single value)
if (dom === '*' && mon === '*' && dow !== '*' && !dow.includes(',')) {
const d = Number(dow);
if (!Number.isFinite(d)) return null;
return {
frequency: 'weekly',
minute: m,
hour: h,
dom: null,
mon: null,
dow: d,
};
}
// daily: * dom, * mon, * dow
if (dom === '*' && mon === '*' && dow === '*') {
return {
frequency: 'daily',
minute: m,
hour: h,
dom: null,
mon: null,
dow: null,
};
}
return null;
}
/** Today's date in ``YYYY-MM-DD`` for the given IANA timezone. */
export function todayDate(timezone: string): string {
const p = formatInTimeZone(Date.now(), timezone);
return `${p.year}-${pad2(p.month)}-${pad2(p.day)}`;
}

View File

@@ -0,0 +1,222 @@
import { describe, expect, it } from 'vitest';
import {
sseEventReceived,
type SSEEvent,
} from '../../notifications/notificationsSlice';
import type { Schedule, ScheduleRun } from '../types/schedule';
import reducer, {
applyEvent,
selectRunsForSchedule,
selectSchedulesForAgent,
type SchedulesState,
} from './schedulesSlice';
const sampleSchedule = (overrides: Partial<Schedule> = {}): Schedule => ({
id: 'sched-1',
user_id: 'alice',
agent_id: 'agent-1',
trigger_type: 'recurring',
instruction: 'do it',
status: 'active',
timezone: 'UTC',
tool_allowlist: [],
created_via: 'ui',
consecutive_failure_count: 0,
created_at: '2026-05-19T12:00:00Z',
updated_at: '2026-05-19T12:00:00Z',
...overrides,
});
const sampleRun = (overrides: Partial<ScheduleRun> = {}): ScheduleRun => ({
id: 'run-1',
schedule_id: 'sched-1',
user_id: 'alice',
agent_id: 'agent-1',
status: 'pending',
scheduled_for: '2026-05-19T12:00:00Z',
trigger_source: 'cron',
output_truncated: false,
prompt_tokens: 0,
generated_tokens: 0,
created_at: '2026-05-19T12:00:00Z',
updated_at: '2026-05-19T12:00:00Z',
...overrides,
});
const seedState = () => reducer(undefined, { type: '@@INIT' });
const seedWithSchedule = (): SchedulesState => {
let state = seedState();
state = reducer(
state,
applyEvent({ type: 'noop', scheduleId: 'sched-1', run: sampleRun() }),
);
return {
...state,
byAgent: { 'agent-1': [sampleSchedule()] } as Record<string, Schedule[]>,
};
};
describe('schedulesSlice SSE event handling', () => {
it('schedule.run.completed upserts run + bumps last_run_at', () => {
let state = seedWithSchedule();
const envelope: SSEEvent = {
id: 'evt-1',
ts: '2026-05-19T12:05:00Z',
type: 'schedule.run.completed',
payload: {
run_id: 'run-1',
schedule_id: 'sched-1',
status: 'success',
},
};
state = reducer(state, sseEventReceived(envelope));
const runs = selectRunsForSchedule({ schedules: state }, 'sched-1');
expect(runs[0].status).toBe('success');
const schedules = selectSchedulesForAgent({ schedules: state }, 'agent-1');
expect(schedules[0].last_run_at).toBe('2026-05-19T12:05:00Z');
});
it('schedule.run.failed marks the run as failed and carries error_type', () => {
let state = seedWithSchedule();
const envelope: SSEEvent = {
id: 'evt-2',
ts: '2026-05-19T12:06:00Z',
type: 'schedule.run.failed',
payload: {
run_id: 'run-1',
schedule_id: 'sched-1',
error_type: 'agent_error',
error: 'LLM exploded',
},
};
state = reducer(state, sseEventReceived(envelope));
const runs = selectRunsForSchedule({ schedules: state }, 'sched-1');
expect(runs[0].status).toBe('failed');
expect(runs[0].error_type).toBe('agent_error');
expect(runs[0].error).toBe('LLM exploded');
});
it('schedule.autopaused flips the schedule status to paused', () => {
let state = seedWithSchedule();
const envelope: SSEEvent = {
id: 'evt-3',
ts: '2026-05-19T12:07:00Z',
type: 'schedule.autopaused',
payload: { schedule_id: 'sched-1' },
};
state = reducer(state, sseEventReceived(envelope));
const schedules = selectSchedulesForAgent({ schedules: state }, 'agent-1');
expect(schedules[0].status).toBe('paused');
});
it('schedule.message.appended is acknowledged without mutating run state', () => {
let state = seedWithSchedule();
const envelope: SSEEvent = {
id: 'evt-4',
ts: '2026-05-19T12:08:00Z',
type: 'schedule.message.appended',
payload: {
schedule_id: 'sched-1',
run_id: 'run-1',
conversation_id: 'conv-1',
message_id: 'msg-1',
},
};
const before = JSON.stringify(state);
state = reducer(state, sseEventReceived(envelope));
expect(JSON.stringify(state)).toBe(before);
});
it('ignores envelopes without a schedule_id payload', () => {
let state = seedWithSchedule();
const envelope: SSEEvent = {
id: 'evt-5',
type: 'schedule.run.completed',
payload: { run_id: 'run-1' },
};
const before = JSON.stringify(state);
state = reducer(state, sseEventReceived(envelope));
expect(JSON.stringify(state)).toBe(before);
});
it('inserts a stub run row when the envelope arrives before the run log is loaded', () => {
let state = seedState();
state = {
...state,
byAgent: { 'agent-1': [sampleSchedule()] } as Record<string, Schedule[]>,
};
const envelope: SSEEvent = {
id: 'evt-6',
ts: '2026-05-19T12:09:00Z',
type: 'schedule.run.completed',
payload: {
run_id: 'run-new',
schedule_id: 'sched-1',
},
};
state = reducer(state, sseEventReceived(envelope));
const runs = selectRunsForSchedule({ schedules: state }, 'sched-1');
expect(runs[0].id).toBe('run-new');
expect(runs[0].status).toBe('success');
});
it('seeds stub-insert run rows with safe defaults so RunLog never renders NaN', () => {
let state = seedState();
state = {
...state,
byAgent: { 'agent-1': [sampleSchedule()] } as Record<string, Schedule[]>,
};
const envelope: SSEEvent = {
id: 'evt-7',
ts: '2026-05-19T12:10:00Z',
type: 'schedule.run.completed',
payload: { run_id: 'run-stub', schedule_id: 'sched-1' },
};
state = reducer(state, sseEventReceived(envelope));
const stub = selectRunsForSchedule({ schedules: state }, 'sched-1')[0];
expect(stub.prompt_tokens).toBe(0);
expect(stub.generated_tokens).toBe(0);
expect(stub.prompt_tokens + stub.generated_tokens).toBe(0);
expect(Number.isNaN(stub.prompt_tokens + stub.generated_tokens)).toBe(
false,
);
expect(stub.trigger_source).toBe('cron');
expect(stub.output_truncated).toBe(false);
expect(stub.scheduled_for).toBe('2026-05-19T12:10:00Z');
expect(stub.started_at).toBe('2026-05-19T12:10:00Z');
expect(stub.finished_at).toBe('2026-05-19T12:10:00Z');
expect(stub.status).toBe('success');
expect(stub.error).toBeNull();
expect(stub.error_type).toBeNull();
});
it('stub-insert seeds defaults for failed runs too', () => {
let state = seedState();
state = {
...state,
byAgent: { 'agent-1': [sampleSchedule()] } as Record<string, Schedule[]>,
};
const envelope: SSEEvent = {
id: 'evt-8',
ts: '2026-05-19T12:11:00Z',
type: 'schedule.run.failed',
payload: {
run_id: 'run-stub-failed',
schedule_id: 'sched-1',
error_type: 'agent_error',
error: 'boom',
},
};
state = reducer(state, sseEventReceived(envelope));
const stub = selectRunsForSchedule({ schedules: state }, 'sched-1')[0];
expect(stub.status).toBe('failed');
expect(stub.error).toBe('boom');
expect(stub.error_type).toBe('agent_error');
expect(stub.prompt_tokens).toBe(0);
expect(stub.generated_tokens).toBe(0);
expect(stub.trigger_source).toBe('cron');
});
});

View File

@@ -0,0 +1,319 @@
import { createAsyncThunk, createSlice, PayloadAction } from '@reduxjs/toolkit';
import schedulesService from '../../api/services/schedulesService';
import {
sseEventReceived,
type SSEEvent,
} from '../../notifications/notificationsSlice';
import type {
Schedule,
ScheduleCreatePayload,
ScheduleRun,
ScheduleUpdatePayload,
} from '../types/schedule';
export type SchedulesState = {
byAgent: Record<string, Schedule[]>;
runsBySchedule: Record<string, ScheduleRun[]>;
loading: boolean;
error: string | null;
};
const initialState: SchedulesState = {
byAgent: {},
runsBySchedule: {},
loading: false,
error: null,
};
export const loadSchedulesForAgent = createAsyncThunk<
{ agentId: string; schedules: Schedule[] },
{ agentId: string; token: string | null }
>('schedules/loadForAgent', async ({ agentId, token }) => {
const r = await schedulesService.listForAgent(agentId, token);
return { agentId, schedules: r.schedules };
});
export const createSchedule = createAsyncThunk<
Schedule,
{
agentId: string;
payload: ScheduleCreatePayload;
token: string | null;
}
>('schedules/create', async ({ agentId, payload, token }) => {
const r = await schedulesService.create(agentId, payload, token);
return r.schedule;
});
export const updateSchedule = createAsyncThunk<
Schedule,
{
id: string;
payload: ScheduleUpdatePayload;
token: string | null;
}
>('schedules/update', async ({ id, payload, token }) => {
const r = await schedulesService.update(id, payload, token);
return r.schedule;
});
export const setSchedulePaused = createAsyncThunk<
Schedule,
{ id: string; action: 'pause' | 'resume'; token: string | null }
>('schedules/setPaused', async ({ id, action, token }) => {
const r = await schedulesService.setPaused(id, action, token);
return r.schedule;
});
export const deleteSchedule = createAsyncThunk<
string,
{ id: string; token: string | null }
>('schedules/delete', async ({ id, token }) => {
await schedulesService.remove(id, token);
return id;
});
export const runScheduleNow = createAsyncThunk<
{ scheduleId: string; run: ScheduleRun },
{ id: string; token: string | null }
>('schedules/runNow', async ({ id, token }) => {
const r = await schedulesService.runNow(id, token);
return { scheduleId: id, run: r.run };
});
export const loadRunsForSchedule = createAsyncThunk<
{ scheduleId: string; runs: ScheduleRun[] },
{
id: string;
limit?: number;
offset?: number;
token: string | null;
}
>('schedules/loadRuns', async ({ id, limit, offset, token }) => {
const r = await schedulesService.listRuns(id, limit, offset, token);
return { scheduleId: id, runs: r.runs };
});
const upsert = (list: Schedule[], next: Schedule): Schedule[] => {
const idx = list.findIndex((s) => s.id === next.id);
if (idx === -1) return [next, ...list];
const copy = list.slice();
copy[idx] = next;
return copy;
};
const removeFrom = (list: Schedule[], id: string): Schedule[] =>
list.filter((s) => s.id !== id);
// SSE delivers a partial schedule_run; stub the missing fields so RunLog
// renders cleanly until the next list refetch.
const stubRunDefaults = (
scheduleId: string,
ts: string | undefined,
): Omit<ScheduleRun, 'id' | 'status'> => {
const now = ts ?? new Date().toISOString();
return {
schedule_id: scheduleId,
user_id: '',
agent_id: '',
scheduled_for: now,
trigger_source: 'cron',
started_at: now,
finished_at: now,
output: null,
output_truncated: false,
error: null,
error_type: null,
prompt_tokens: 0,
generated_tokens: 0,
conversation_id: null,
message_id: null,
celery_task_id: null,
created_at: now,
updated_at: now,
};
};
const upsertRunDelta = (
state: SchedulesState,
scheduleId: string,
delta: Partial<ScheduleRun> & { id: string; status: ScheduleRun['status'] },
ts: string | undefined,
): void => {
const list = state.runsBySchedule[scheduleId] ?? [];
const idx = list.findIndex((r) => r.id === delta.id);
if (idx === -1) {
const stub: ScheduleRun = { ...stubRunDefaults(scheduleId, ts), ...delta };
state.runsBySchedule[scheduleId] = [stub, ...list];
return;
}
list[idx] = { ...list[idx], ...delta };
};
const findAgentForSchedule = (
state: SchedulesState,
scheduleId: string,
): { agentId: string; schedule: Schedule } | null => {
for (const agentId of Object.keys(state.byAgent)) {
const list = state.byAgent[agentId];
const schedule = list.find((s) => s.id === scheduleId);
if (schedule) return { agentId, schedule };
}
return null;
};
const schedulesSlice = createSlice({
name: 'schedules',
initialState,
reducers: {
applyEvent: (
state,
action: PayloadAction<{
type: string;
scheduleId: string;
run?: ScheduleRun;
}>,
) => {
const { scheduleId, run } = action.payload;
if (run) {
const existing = state.runsBySchedule[scheduleId] ?? [];
const idx = existing.findIndex((r) => r.id === run.id);
if (idx === -1) {
state.runsBySchedule[scheduleId] = [run, ...existing];
} else {
existing[idx] = run;
}
}
},
resetSchedules: () => initialState,
},
extraReducers: (builder) => {
builder
.addCase(loadSchedulesForAgent.pending, (state) => {
state.loading = true;
state.error = null;
})
.addCase(loadSchedulesForAgent.fulfilled, (state, action) => {
state.byAgent[action.payload.agentId] = action.payload.schedules;
state.loading = false;
})
.addCase(loadSchedulesForAgent.rejected, (state, action) => {
state.loading = false;
state.error = action.error.message ?? 'failed to load schedules';
})
// Agentless schedules (``agent_id === null``) skip the byAgent cache —
// they have no Schedules tab home. The inline ⏰ card is the only UI.
.addCase(createSchedule.fulfilled, (state, action) => {
const next = action.payload;
if (!next.agent_id) return;
const list = state.byAgent[next.agent_id] ?? [];
state.byAgent[next.agent_id] = upsert(list, next);
})
.addCase(updateSchedule.fulfilled, (state, action) => {
const next = action.payload;
if (!next.agent_id) return;
const list = state.byAgent[next.agent_id] ?? [];
state.byAgent[next.agent_id] = upsert(list, next);
})
.addCase(setSchedulePaused.fulfilled, (state, action) => {
const next = action.payload;
if (!next.agent_id) return;
const list = state.byAgent[next.agent_id] ?? [];
state.byAgent[next.agent_id] = upsert(list, next);
})
.addCase(deleteSchedule.fulfilled, (state, action) => {
const id = action.payload;
Object.keys(state.byAgent).forEach((agentId) => {
state.byAgent[agentId] = removeFrom(state.byAgent[agentId], id);
});
delete state.runsBySchedule[id];
})
.addCase(runScheduleNow.fulfilled, (state, action) => {
const { scheduleId, run } = action.payload;
const list = state.runsBySchedule[scheduleId] ?? [];
state.runsBySchedule[scheduleId] = [run, ...list];
})
.addCase(loadRunsForSchedule.fulfilled, (state, action) => {
const { scheduleId, runs } = action.payload;
state.runsBySchedule[scheduleId] = runs;
})
// SSE envelopes from scheduler_worker.py; unknown shapes are no-ops.
.addMatcher(
(action) => action.type === sseEventReceived.type,
(state, action: PayloadAction<SSEEvent>) => {
const envelope = action.payload;
const payload = (envelope.payload || {}) as Record<string, unknown>;
const scheduleId = (payload.schedule_id as string | undefined) || '';
if (!scheduleId) return;
switch (envelope.type) {
case 'schedule.run.completed':
case 'schedule.run.failed': {
const runId = (payload.run_id as string | undefined) || '';
if (runId) {
const status =
envelope.type === 'schedule.run.completed'
? 'success'
: 'failed';
upsertRunDelta(
state,
scheduleId,
{
id: runId,
schedule_id: scheduleId,
status: status as ScheduleRun['status'],
error_type:
(payload.error_type as ScheduleRun['error_type']) ?? null,
error: (payload.error as string | undefined) ?? null,
finished_at: envelope.ts ?? null,
},
envelope.ts,
);
}
const found = findAgentForSchedule(state, scheduleId);
if (found && envelope.ts) {
const next: Schedule = {
...found.schedule,
last_run_at: envelope.ts,
};
state.byAgent[found.agentId] = upsert(
state.byAgent[found.agentId],
next,
);
}
break;
}
case 'schedule.autopaused': {
const found = findAgentForSchedule(state, scheduleId);
if (found) {
const next: Schedule = { ...found.schedule, status: 'paused' };
state.byAgent[found.agentId] = upsert(
state.byAgent[found.agentId],
next,
);
}
break;
}
case 'schedule.message.appended':
// Handled by conversationSlice; nothing to mutate here.
break;
default:
break;
}
},
);
},
});
export const { applyEvent, resetSchedules } = schedulesSlice.actions;
export default schedulesSlice.reducer;
export const selectSchedulesForAgent = (
state: { schedules: SchedulesState },
agentId: string,
): Schedule[] => state.schedules.byAgent[agentId] ?? [];
export const selectRunsForSchedule = (
state: { schedules: SchedulesState },
scheduleId: string,
): ScheduleRun[] => state.schedules.runsBySchedule[scheduleId] ?? [];

View File

@@ -47,4 +47,5 @@ export type AgentFolder = {
updated_at?: string;
};
export * from './schedule';
export * from './workflow';

View File

@@ -0,0 +1,94 @@
export type ScheduleTriggerType = 'once' | 'recurring';
export type ScheduleStatus = 'active' | 'paused' | 'completed' | 'cancelled';
export type ScheduleRunStatus =
| 'pending'
| 'running'
| 'success'
| 'failed'
| 'skipped'
| 'timeout';
export type ScheduleRunErrorType =
| 'auth_expired'
| 'tool_not_allowed'
| 'budget_exceeded'
| 'timeout'
| 'agent_error'
| 'internal'
| 'missed'
| 'overlap';
export type Schedule = {
id: string;
user_id: string;
// Null for agentless one-time tasks (migration 0011).
agent_id: string | null;
trigger_type: ScheduleTriggerType;
name?: string | null;
instruction: string;
status: ScheduleStatus;
cron?: string | null;
run_at?: string | null;
timezone: string;
next_run_at?: string | null;
last_run_at?: string | null;
end_at?: string | null;
tool_allowlist: string[];
model_id?: string | null;
token_budget?: number | null;
origin_conversation_id?: string | null;
created_via: 'chat' | 'ui';
consecutive_failure_count: number;
created_at: string;
updated_at: string;
};
export type ScheduleRun = {
id: string;
schedule_id: string;
user_id: string;
// Null for runs of agentless schedules (migration 0011).
agent_id: string | null;
status: ScheduleRunStatus;
scheduled_for: string;
trigger_source: 'cron' | 'manual';
started_at?: string | null;
finished_at?: string | null;
output?: string | null;
output_truncated: boolean;
error?: string | null;
error_type?: ScheduleRunErrorType | null;
prompt_tokens: number;
generated_tokens: number;
conversation_id?: string | null;
message_id?: string | null;
celery_task_id?: string | null;
created_at: string;
updated_at: string;
};
export type ScheduleListResponse = { schedules: Schedule[] };
export type ScheduleResponse = { schedule: Schedule };
export type ScheduleRunListResponse = {
runs: ScheduleRun[];
limit: number;
offset: number;
};
export type ScheduleRunResponse = { run: ScheduleRun };
export type ScheduleCreatePayload = {
instruction: string;
trigger_type?: ScheduleTriggerType;
cron?: string;
run_at?: string; // ISO 8601 UTC; set for trigger_type === 'once'
timezone?: string;
name?: string;
end_at?: string;
tool_allowlist?: string[];
model_id?: string;
token_budget?: number;
};
export type ScheduleUpdatePayload = Partial<ScheduleCreatePayload>;

View File

@@ -3,6 +3,7 @@ import 'reactflow/dist/style.css';
import {
AlertCircle,
Bot,
CalendarClock,
ChartColumn,
Database,
Flag,
@@ -1533,6 +1534,17 @@ function WorkflowBuilderInner() {
Logs
</button>
)}
{canManageAgent && (
<button
onClick={() =>
navigate(`/agents/schedules/${effectiveAgentId}`)
}
className="border-border bg-card hover:bg-accent flex items-center gap-2 rounded-full border px-4 py-2 text-sm font-medium text-gray-700 transition-colors dark:text-gray-200"
>
<CalendarClock size={16} />
Schedules
</button>
)}
{canManageAgent && (
<button
onClick={() => setAgentDetails('ACTIVE')}

View File

@@ -34,6 +34,7 @@ const endpoints = {
LOGS: `/api/get_user_logs`,
MANAGE_SYNC: '/api/manage_sync',
SYNC_SOURCE: '/api/sync_source',
REINGEST_SOURCE: '/api/sources/reingest',
GET_AVAILABLE_TOOLS: '/api/available_tools',
GET_USER_TOOLS: '/api/get_tools',
CREATE_TOOL: '/api/create_tool',
@@ -83,6 +84,13 @@ const endpoints = {
CUSTOM_MODEL: (id: string) => `/api/user/models/${id}`,
CUSTOM_MODEL_TEST: (id: string) => `/api/user/models/${id}/test`,
CUSTOM_MODEL_TEST_PAYLOAD: '/api/user/models/test',
AGENT_SCHEDULES: (agentId: string) => `/api/agents/${agentId}/schedules`,
SCHEDULE: (id: string) => `/api/schedules/${id}`,
SCHEDULE_RUN_NOW: (id: string) => `/api/schedules/${id}/run`,
SCHEDULE_RUNS: (id: string, limit?: number, offset?: number) =>
`/api/schedules/${id}/runs?limit=${limit ?? 50}&offset=${offset ?? 0}`,
SCHEDULE_RUN: (id: string, runId: string) =>
`/api/schedules/${id}/runs/${runId}`,
},
V1: {
CHAT_COMPLETIONS: '/v1/chat/completions',

View File

@@ -0,0 +1,116 @@
import apiClient from '../client';
import endpoints from '../endpoints';
import type {
ScheduleCreatePayload,
ScheduleListResponse,
ScheduleResponse,
ScheduleRunListResponse,
ScheduleRunResponse,
ScheduleUpdatePayload,
} from '../../agents/types/schedule';
const json = async (response: Response | unknown) => {
const r = response as Response;
if (!('json' in r) || typeof r.json !== 'function') return r as unknown;
return r.json();
};
const schedulesService = {
listForAgent: async (
agentId: string,
token: string | null,
): Promise<ScheduleListResponse> => {
const r = await apiClient.get(
endpoints.USER.AGENT_SCHEDULES(agentId),
token,
);
return (await json(r)) as ScheduleListResponse;
},
create: async (
agentId: string,
payload: ScheduleCreatePayload,
token: string | null,
): Promise<ScheduleResponse> => {
const r = await apiClient.post(
endpoints.USER.AGENT_SCHEDULES(agentId),
payload,
token,
);
return (await json(r)) as ScheduleResponse;
},
get: async (id: string, token: string | null): Promise<ScheduleResponse> => {
const r = await apiClient.get(endpoints.USER.SCHEDULE(id), token);
return (await json(r)) as ScheduleResponse;
},
update: async (
id: string,
payload: ScheduleUpdatePayload,
token: string | null,
): Promise<ScheduleResponse> => {
const r = await apiClient.put(endpoints.USER.SCHEDULE(id), payload, token);
return (await json(r)) as ScheduleResponse;
},
setPaused: async (
id: string,
action: 'pause' | 'resume',
token: string | null,
): Promise<ScheduleResponse> => {
const r = await apiClient.patch(
endpoints.USER.SCHEDULE(id),
{ action },
token,
);
return (await json(r)) as ScheduleResponse;
},
remove: async (
id: string,
token: string | null,
): Promise<{ success: boolean }> => {
const r = await apiClient.delete(endpoints.USER.SCHEDULE(id), token);
return (await json(r)) as { success: boolean };
},
runNow: async (
id: string,
token: string | null,
): Promise<ScheduleRunResponse> => {
const r = await apiClient.post(
endpoints.USER.SCHEDULE_RUN_NOW(id),
{},
token,
);
return (await json(r)) as ScheduleRunResponse;
},
listRuns: async (
id: string,
limit: number | undefined,
offset: number | undefined,
token: string | null,
): Promise<ScheduleRunListResponse> => {
const r = await apiClient.get(
endpoints.USER.SCHEDULE_RUNS(id, limit, offset),
token,
);
return (await json(r)) as ScheduleRunListResponse;
},
getRun: async (
id: string,
runId: string,
token: string | null,
): Promise<ScheduleRunResponse> => {
const r = await apiClient.get(
endpoints.USER.SCHEDULE_RUN(id, runId),
token,
);
return (await json(r)) as ScheduleRunResponse;
},
};
export default schedulesService;

View File

@@ -73,6 +73,8 @@ const userService = {
apiClient.post(endpoints.USER.MANAGE_SYNC, data, token),
syncSource: (data: any, token: string | null): Promise<any> =>
apiClient.post(endpoints.USER.SYNC_SOURCE, data, token),
reingestSource: (data: any, token: string | null): Promise<any> =>
apiClient.post(endpoints.USER.REINGEST_SOURCE, data, token),
getAvailableTools: (token: string | null): Promise<any> =>
apiClient.get(endpoints.USER.GET_AVAILABLE_TOOLS, token),
getUserTools: (token: string | null): Promise<any> =>

View File

@@ -0,0 +1,3 @@
<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M12 3.5C7.30558 3.5 3.5 7.30558 3.5 12C3.5 16.6944 7.30558 20.5 12 20.5C16.6944 20.5 20.5 16.6944 20.5 12C20.5 7.30558 16.6944 3.5 12 3.5ZM12 5C15.866 5 19 8.134 19 12C19 15.866 15.866 19 12 19C8.134 19 5 15.866 5 12C5 8.134 8.134 5 12 5ZM11.25 7C11.0511 7 10.8603 7.07902 10.7197 7.21967C10.579 7.36032 10.5 7.55109 10.5 7.75V12C10.5 12.1989 10.579 12.3897 10.7197 12.5303L13.4697 15.2803C13.6103 15.421 13.8011 15.5 14 15.5C14.1989 15.5 14.3897 15.421 14.5303 15.2803C14.671 15.1397 14.75 14.9489 14.75 14.75C14.75 14.5511 14.671 14.3603 14.5303 14.2197L12 11.6893V7.75C12 7.55109 11.921 7.36032 11.7803 7.21967C11.6397 7.07902 11.4489 7 11.25 7Z" fill="#7D54D1" stroke="#7D54D1" stroke-width="0.3"/>
</svg>

After

Width:  |  Height:  |  Size: 815 B

View File

@@ -0,0 +1,3 @@
<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M12 3.5C7.30558 3.5 3.5 7.30558 3.5 12C3.5 16.6944 7.30558 20.5 12 20.5C16.6944 20.5 20.5 16.6944 20.5 12C20.5 7.30558 16.6944 3.5 12 3.5ZM12 5C15.866 5 19 8.134 19 12C19 15.866 15.866 19 12 19C8.134 19 5 15.866 5 12C5 8.134 8.134 5 12 5ZM11.25 7C11.0511 7 10.8603 7.07902 10.7197 7.21967C10.579 7.36032 10.5 7.55109 10.5 7.75V12C10.5 12.1989 10.579 12.3897 10.7197 12.5303L13.4697 15.2803C13.6103 15.421 13.8011 15.5 14 15.5C14.1989 15.5 14.3897 15.421 14.5303 15.2803C14.671 15.1397 14.75 14.9489 14.75 14.75C14.75 14.5511 14.671 14.3603 14.5303 14.2197L12 11.6893V7.75C12 7.55109 11.921 7.36032 11.7803 7.21967C11.6397 7.07902 11.4489 7 11.25 7Z" fill="#FFFFFF" stroke="#FFFFFF" stroke-width="0.3"/>
</svg>

After

Width:  |  Height:  |  Size: 815 B

View File

@@ -0,0 +1,27 @@
import { describe, expect, it } from 'vitest';
import { isChatToolVisible } from './ToolsPopup';
// Regression for the filter drift introduced when ``scheduler`` was
// dual-registered (both ``default: true`` and ``builtin: true``). The
// chat-popup previously filtered ``!tool.builtin`` and dropped scheduler.
describe('isChatToolVisible', () => {
it('keeps dual-registered tools (default + builtin, e.g. scheduler)', () => {
expect(isChatToolVisible({ default: true, builtin: true })).toBe(true);
});
it('keeps default-only chat tools (memory, read_webpage before dual-reg)', () => {
expect(isChatToolVisible({ default: true, builtin: false })).toBe(true);
expect(isChatToolVisible({ default: true })).toBe(true);
});
it('keeps regular user_tools (neither flag set)', () => {
expect(isChatToolVisible({})).toBe(true);
expect(isChatToolVisible({ default: false, builtin: false })).toBe(true);
});
it('drops pure builtins (agent-only, e.g. a future builtin without default)', () => {
expect(isChatToolVisible({ builtin: true })).toBe(false);
expect(isChatToolVisible({ default: false, builtin: true })).toBe(false);
});
});

View File

@@ -12,6 +12,15 @@ import NoFilesDarkIcon from '../assets/no-files-dark.svg';
import CheckmarkIcon from '../assets/checkmark.svg';
import { useDarkTheme } from '../hooks';
// Chat-popup visibility rule: show defaults (so users can toggle the
// agentless chat tools on/off) plus any non-builtin user_tools row. Hide
// pure builtins (agent-only). Dual-registered tools like ``scheduler``
// carry BOTH flags and stay visible via the ``default`` branch.
export const isChatToolVisible = (tool: {
default?: boolean;
builtin?: boolean;
}): boolean => Boolean(tool.default) || !tool.builtin;
interface ToolsPopupProps {
isOpen: boolean;
onClose: () => void;
@@ -104,7 +113,8 @@ export default function ToolsPopup({
return res.json();
})
.then((data) => {
setUserTools(data.tools);
const filtered = (data.tools || []).filter(isChatToolVisible);
setUserTools(filtered);
setLoading(false);
})
.catch((error) => {

View File

@@ -165,12 +165,19 @@ function UploadRow({
return (
<li className="border-border/50 border-b last:border-b-0">
<div className="flex items-center justify-between px-5 py-3">
<p
className="font-inter dark:text-muted-foreground max-w-[200px] truncate text-[13px] leading-[16.5px] font-normal text-black"
title={task.fileName}
>
{task.fileName}
</p>
<div className="flex min-w-0 flex-col">
<p
className="font-inter dark:text-muted-foreground max-w-[200px] truncate text-[13px] leading-[16.5px] font-normal text-black"
title={task.fileName}
>
{task.fileName}
</p>
{task.status === 'training' && task.stage && (
<span className="font-inter text-muted-foreground mt-0.5 text-[11px] leading-[14px]">
{t(`modals.uploadDoc.progress.${task.stage}`)}
</span>
)}
</div>
<div className="flex items-center gap-2">
{showProgress && (

View File

@@ -333,6 +333,7 @@ export default function Conversation() {
onOpenArtifact={handleOpenArtifact}
onToolAction={handleToolAction}
isSplitView={isSplitArtifactOpen}
agentId={selectedAgent?.id}
headerContent={
selectedAgent ? (
<div className="flex w-full items-center justify-center py-4">

View File

@@ -13,6 +13,7 @@ import rehypeKatex from 'rehype-katex';
import remarkGfm from 'remark-gfm';
import remarkMath from 'remark-math';
import SchedulerToolCallCard from '../agents/schedules/SchedulerToolCallCard';
import ChevronDown from '../assets/chevron-down.svg';
import Cloud from '../assets/cloud.svg';
import DocsGPT3 from '../assets/cute_docsgpt3.svg';
@@ -70,6 +71,8 @@ const ConversationBubble = forwardRef<
decision: 'approved' | 'denied',
comment?: string,
) => void;
/** Active agent id; refreshes the Schedules tab from SchedulerToolCallCard. */
agentId?: string;
}
>(function ConversationBubble(
{
@@ -89,6 +92,7 @@ const ConversationBubble = forwardRef<
filesAttached,
onOpenArtifact,
onToolAction,
agentId,
},
ref,
) {
@@ -154,10 +158,10 @@ const ConversationBubble = forwardRef<
<img
src={DocumentationDark}
alt="Attachment"
className="h-[15px] w-[15px] object-fill"
className="h-3.75 w-3.75 object-fill"
/>
</div>
<span className="max-w-[150px] truncate font-normal">
<span className="max-w-37.5 truncate font-normal">
{file.fileName}
</span>
</div>
@@ -328,7 +332,7 @@ const ConversationBubble = forwardRef<
<div className="mb-4 flex flex-col flex-wrap items-start self-start lg:flex-nowrap">
<div className="my-2 flex flex-row items-center justify-center gap-3">
<Avatar
className="h-[26px] w-[30px] text-xl"
className="h-6.5 w-7.5 text-xl"
avatar={
<img
src={Sources}
@@ -376,7 +380,7 @@ const ConversationBubble = forwardRef<
<img
src={Document}
alt="Document"
className="h-[17px] w-[17px] object-fill"
className="h-4.25 w-4.25 object-fill"
/>
<p
className="mt-0.5 truncate text-xs"
@@ -394,11 +398,11 @@ const ConversationBubble = forwardRef<
</div>
{activeTooltip === index && (
<div
className={`dark:bg-card dark:text-foreground absolute left-1/2 z-50 max-h-48 w-40 translate-x-[-50%] translate-y-[3px] rounded-xl bg-[#FBFBFB] p-4 text-black shadow-xl sm:w-56`}
className={`dark:bg-card dark:text-foreground absolute left-1/2 z-50 max-h-48 w-40 translate-x-[-50%] translate-y-0.75 rounded-xl bg-[#FBFBFB] p-4 text-black shadow-xl sm:w-56`}
onMouseOver={() => setActiveTooltip(index)}
onMouseOut={() => setActiveTooltip(null)}
>
<p className="line-clamp-6 max-h-[164px] overflow-hidden rounded-md text-sm wrap-break-word text-ellipsis">
<p className="line-clamp-6 max-h-41 overflow-hidden rounded-md text-sm wrap-break-word text-ellipsis">
{source.text}
</p>
</div>
@@ -423,7 +427,11 @@ const ConversationBubble = forwardRef<
)}
{research && <ResearchProgress research={research} />}
{toolCalls && toolCalls.length > 0 && (
<ToolCalls toolCalls={toolCalls} onToolAction={onToolAction} />
<ToolCalls
toolCalls={toolCalls}
onToolAction={onToolAction}
agentId={agentId}
/>
)}
{!message && primaryArtifactCall?.artifact_id && onOpenArtifact && (
<div className="my-2 ml-2 flex justify-start">
@@ -471,7 +479,7 @@ const ConversationBubble = forwardRef<
<div className="flex max-w-full flex-col flex-wrap items-start self-start lg:flex-nowrap">
<div className="my-2 flex flex-row items-center justify-center gap-3">
<Avatar
className="h-[34px] w-[34px] text-2xl"
className="h-8.5 w-8.5 text-2xl"
avatar={
<img
src={DocsGPT3}
@@ -1005,6 +1013,7 @@ function ToolCallApprovalBar({
function ToolCalls({
toolCalls,
onToolAction,
agentId,
}: {
toolCalls: ToolCallsType[];
onToolAction?: (
@@ -1012,6 +1021,7 @@ function ToolCalls({
decision: 'approved' | 'denied',
comment?: string,
) => void;
agentId?: string;
}) {
const [isToolCallsOpen, setIsToolCallsOpen] = useState(false);
@@ -1023,7 +1033,7 @@ function ToolCalls({
);
return (
<div className="mb-4 flex w-full flex-col flex-wrap items-start self-start lg:flex-nowrap">
<div className="relative mb-4 flex w-full flex-col flex-wrap items-start self-start lg:flex-nowrap">
{/* Approval bars — always visible, compact inline */}
{awaitingCalls.length > 0 && (
<div className="fade-in mt-4 ml-3 w-[90vw] md:w-[70vw] lg:w-full">
@@ -1042,7 +1052,7 @@ function ToolCalls({
<>
<div className="my-2 flex flex-row items-center justify-center gap-3">
<Avatar
className="h-[26px] w-[30px] text-xl"
className="h-6.5 w-7.5 text-xl"
avatar={
<img
src={Sources}
@@ -1066,88 +1076,101 @@ function ToolCalls({
{isToolCallsOpen && (
<div className="fade-in mr-5 ml-3 w-[90vw] md:w-[70vw] lg:w-full">
<div className="grid grid-cols-1 gap-2">
{resolvedCalls.map((toolCall, index) => (
<Accordion
key={`tool-call-${index}`}
title={`${toolCall.tool_name} - ${toolCall.action_name.substring(0, toolCall.action_name.lastIndexOf('_'))}`}
className="bg-muted dark:bg-answer-bubble w-full rounded-4xl"
titleClassName="px-6 py-2 text-sm font-semibold"
>
<div className="flex flex-col gap-1">
<div className="border-border flex flex-col rounded-2xl border">
<p className="dark:bg-background flex flex-row items-center justify-between rounded-t-2xl bg-black/10 px-2 py-1 text-sm font-semibold wrap-break-word">
<span style={{ fontFamily: 'IBMPlexMono-Medium' }}>
Arguments
</span>{' '}
<CopyButton
textToCopy={JSON.stringify(
toolCall.arguments,
null,
2,
)}
/>
</p>
<p className="dark:bg-card rounded-b-2xl p-2 font-mono text-sm wrap-break-word">
<span
className="dark:text-muted-foreground leading-[23px] text-black"
style={{ fontFamily: 'IBMPlexMono-Medium' }}
>
{JSON.stringify(toolCall.arguments, null, 2)}
</span>
</p>
{resolvedCalls.map((toolCall, index) => {
if (toolCall.tool_name === 'scheduler') {
return (
<SchedulerToolCallCard
key={`scheduler-${toolCall.call_id ?? index}`}
result={toolCall.result}
actionName={toolCall.action_name}
status={toolCall.status}
agentId={agentId}
/>
);
}
return (
<Accordion
key={`tool-call-${index}`}
title={`${toolCall.tool_name} - ${toolCall.action_name.substring(0, toolCall.action_name.lastIndexOf('_'))}`}
className="bg-muted dark:bg-answer-bubble w-full rounded-4xl"
titleClassName="px-6 py-2 text-sm font-semibold"
>
<div className="flex flex-col gap-1">
<div className="border-border flex flex-col rounded-2xl border">
<p className="dark:bg-background flex flex-row items-center justify-between rounded-t-2xl bg-black/10 px-2 py-1 text-sm font-semibold wrap-break-word">
<span style={{ fontFamily: 'IBMPlexMono-Medium' }}>
Arguments
</span>{' '}
<CopyButton
textToCopy={JSON.stringify(
toolCall.arguments,
null,
2,
)}
/>
</p>
<p className="dark:bg-card rounded-b-2xl p-2 font-mono text-sm wrap-break-word">
<span
className="dark:text-muted-foreground leading-5.75 text-black"
style={{ fontFamily: 'IBMPlexMono-Medium' }}
>
{JSON.stringify(toolCall.arguments, null, 2)}
</span>
</p>
</div>
<div className="border-border flex flex-col rounded-2xl border">
<p className="dark:bg-background flex flex-row items-center justify-between rounded-t-2xl bg-black/10 px-2 py-1 text-sm font-semibold wrap-break-word">
<span style={{ fontFamily: 'IBMPlexMono-Medium' }}>
Response
</span>{' '}
<CopyButton
textToCopy={
toolCall.status === 'error'
? toolCall.error || 'Unknown error'
: JSON.stringify(toolCall.result, null, 2)
}
/>
</p>
{toolCall.status === 'pending' && (
<span className="dark:bg-card flex w-full items-center justify-center rounded-b-2xl p-2">
<Spinner size="small" />
</span>
)}
{toolCall.status === 'completed' && (
<p className="dark:bg-card rounded-b-2xl p-2 font-mono text-sm wrap-break-word">
<span
className="dark:text-muted-foreground leading-5.75 text-black"
style={{ fontFamily: 'IBMPlexMono-Medium' }}
>
{JSON.stringify(toolCall.result, null, 2)}
</span>
</p>
)}
{toolCall.status === 'error' && (
<p className="dark:bg-card rounded-b-2xl p-2 font-mono text-sm wrap-break-word">
<span
className="text-destructive leading-5.75"
style={{ fontFamily: 'IBMPlexMono-Medium' }}
>
{toolCall.error}
</span>
</p>
)}
{toolCall.status === 'denied' && (
<p className="dark:bg-card rounded-b-2xl p-2 font-mono text-sm wrap-break-word">
<span
className="text-muted-foreground leading-5.75"
style={{ fontFamily: 'IBMPlexMono-Medium' }}
>
Denied by user
</span>
</p>
)}
</div>
</div>
<div className="border-border flex flex-col rounded-2xl border">
<p className="dark:bg-background flex flex-row items-center justify-between rounded-t-2xl bg-black/10 px-2 py-1 text-sm font-semibold wrap-break-word">
<span style={{ fontFamily: 'IBMPlexMono-Medium' }}>
Response
</span>{' '}
<CopyButton
textToCopy={
toolCall.status === 'error'
? toolCall.error || 'Unknown error'
: JSON.stringify(toolCall.result, null, 2)
}
/>
</p>
{toolCall.status === 'pending' && (
<span className="dark:bg-card flex w-full items-center justify-center rounded-b-2xl p-2">
<Spinner size="small" />
</span>
)}
{toolCall.status === 'completed' && (
<p className="dark:bg-card rounded-b-2xl p-2 font-mono text-sm wrap-break-word">
<span
className="dark:text-muted-foreground leading-[23px] text-black"
style={{ fontFamily: 'IBMPlexMono-Medium' }}
>
{JSON.stringify(toolCall.result, null, 2)}
</span>
</p>
)}
{toolCall.status === 'error' && (
<p className="dark:bg-card rounded-b-2xl p-2 font-mono text-sm wrap-break-word">
<span
className="text-destructive leading-[23px]"
style={{ fontFamily: 'IBMPlexMono-Medium' }}
>
{toolCall.error}
</span>
</p>
)}
{toolCall.status === 'denied' && (
<p className="dark:bg-card rounded-b-2xl p-2 font-mono text-sm wrap-break-word">
<span
className="text-muted-foreground leading-[23px]"
style={{ fontFamily: 'IBMPlexMono-Medium' }}
>
Denied by user
</span>
</p>
)}
</div>
</div>
</Accordion>
))}
</Accordion>
);
})}
</div>
</div>
)}
@@ -1172,7 +1195,7 @@ function Thought({
<div className="mb-4 flex w-full flex-col flex-wrap items-start self-start lg:flex-nowrap">
<div className="my-2 flex flex-row items-center justify-center gap-3">
<Avatar
className="h-[26px] w-[30px] text-xl"
className="h-6.5 w-7.5 text-xl"
avatar={
<img
src={Cloud}
@@ -1197,7 +1220,7 @@ function Thought({
</div>
{isThoughtOpen && (
<div className="fade-in mr-5 ml-2 max-w-[90vw] md:max-w-[70vw] lg:max-w-[50vw]">
<div className="bg-muted dark:bg-answer-bubble rounded-[28px] px-7 py-[18px]">
<div className="bg-muted dark:bg-answer-bubble rounded-[28px] px-7 py-4.5">
<ReactMarkdown
className="fade-in leading-normal wrap-break-word whitespace-pre-wrap"
remarkPlugins={[remarkGfm, remarkMath]}

View File

@@ -44,6 +44,8 @@ type ConversationMessagesProps = {
comment?: string,
) => void;
isSplitView?: boolean;
/** Active agent id; threaded into SchedulerToolCallCard. */
agentId?: string;
};
export default function ConversationMessages({
@@ -57,6 +59,7 @@ export default function ConversationMessages({
onOpenArtifact,
onToolAction,
isSplitView = false,
agentId,
}: ConversationMessagesProps) {
const [isDarkTheme] = useDarkTheme();
const { t } = useTranslation();
@@ -302,6 +305,7 @@ export default function ConversationMessages({
onToolAction={onToolAction}
feedback={query.feedback}
isStreaming={isCurrentlyStreaming}
agentId={agentId}
handleFeedback={
handleFeedback
? (feedback) => handleFeedback(query, feedback, index)

View File

@@ -0,0 +1,216 @@
import { configureStore } from '@reduxjs/toolkit';
import {
afterEach,
beforeEach,
describe,
expect,
it,
vi,
type Mock,
} from 'vitest';
import conversationService from '../api/services/conversationService';
import {
sseEventReceived,
type SSEEvent,
} from '../notifications/notificationsSlice';
import * as preferenceApi from '../preferences/preferenceApi';
import { type Preference, prefSlice } from '../preferences/preferenceSlice';
import { type ConversationState } from './conversationModels';
import {
conversationListenerMiddleware,
conversationSlice,
setConversation,
} from './conversationSlice';
vi.mock('../api/services/conversationService', () => ({
default: {
getConversation: vi.fn(),
tailMessage: vi.fn(),
getConversations: vi.fn(),
answer: vi.fn(),
answerStream: vi.fn(),
search: vi.fn(),
feedback: vi.fn(),
shareConversation: vi.fn(),
},
}));
vi.mock('../preferences/preferenceApi', async () => {
const actual = await vi.importActual<typeof preferenceApi>(
'../preferences/preferenceApi',
);
return { ...actual, getConversations: vi.fn() };
});
const ENVELOPE = (overrides: Partial<SSEEvent> = {}): SSEEvent => ({
id: 'evt-msg-1',
ts: '2026-05-19T12:34:56Z',
type: 'schedule.message.appended',
payload: {
conversation_id: 'conv-1',
message_id: 'msg-1',
schedule_id: 'sched-1',
run_id: 'run-1',
},
...overrides,
});
const makeStore = (
initialConversationId: string | null = null,
initialStatus: ConversationState['status'] = 'idle',
) => {
const preference: Preference = {
apiKey: '',
prompt: { name: 'default', id: 'default', type: 'public' },
prompts: [],
chunks: '2',
selectedDocs: [],
sourceDocs: null,
conversations: { data: null, loading: false },
token: 'tok-1',
modalState: 'INACTIVE',
paginatedDocuments: null,
templateAgents: null,
agents: null,
sharedAgents: null,
selectedAgent: null,
selectedModel: null,
availableModels: [],
modelsLoading: false,
agentFolders: null,
};
const conversation: ConversationState = {
queries: [],
status: initialStatus,
conversationId: initialConversationId,
};
return configureStore({
reducer: {
preference: prefSlice.reducer,
conversation: conversationSlice.reducer,
},
preloadedState: { preference, conversation },
middleware: (getDefaultMiddleware) =>
getDefaultMiddleware().concat(conversationListenerMiddleware.middleware),
});
};
describe('conversation listener — schedule.message.appended', () => {
beforeEach(() => {
(conversationService.getConversation as unknown as Mock).mockReset();
(preferenceApi.getConversations as unknown as Mock).mockReset();
(conversationService.getConversation as unknown as Mock).mockResolvedValue({
ok: true,
json: async () => ({
queries: [
{ prompt: 'hi', response: 'hello', status: 'complete' },
{
prompt: '',
response: 'scheduled run output',
status: 'complete',
},
],
}),
});
(preferenceApi.getConversations as unknown as Mock).mockResolvedValue({
data: [{ id: 'conv-1', name: 'Scheduled chat', agent_id: 'agent-1' }],
loading: false,
});
});
afterEach(() => {
vi.restoreAllMocks();
});
it('refetches the open conversation when the appended message lands on it', async () => {
const store = makeStore('conv-1');
store.dispatch(sseEventReceived(ENVELOPE()));
await new Promise((r) => setTimeout(r, 0));
await new Promise((r) => setTimeout(r, 0));
expect(conversationService.getConversation).toHaveBeenCalledWith(
'conv-1',
'tok-1',
);
const state = store.getState();
expect(state.conversation.queries).toHaveLength(2);
expect(state.conversation.queries[1].response).toBe('scheduled run output');
expect(state.conversation.conversationId).toBe('conv-1');
});
it('refreshes the conversations sidebar list so the bumped chat reorders', async () => {
const store = makeStore('conv-other');
store.dispatch(sseEventReceived(ENVELOPE()));
await new Promise((r) => setTimeout(r, 0));
await new Promise((r) => setTimeout(r, 0));
expect(preferenceApi.getConversations).toHaveBeenCalledWith('tok-1');
const list = store.getState().preference.conversations;
expect(list.data).toEqual([
{ id: 'conv-1', name: 'Scheduled chat', agent_id: 'agent-1' },
]);
});
it('does not refetch the open conversation when the appended message targets a different chat', async () => {
const store = makeStore('conv-other');
store.dispatch(sseEventReceived(ENVELOPE()));
await new Promise((r) => setTimeout(r, 0));
await new Promise((r) => setTimeout(r, 0));
expect(conversationService.getConversation).not.toHaveBeenCalled();
expect(preferenceApi.getConversations).toHaveBeenCalledTimes(1);
});
it('ignores envelopes without a conversation_id', async () => {
const store = makeStore('conv-1');
store.dispatch(
sseEventReceived(
ENVELOPE({ payload: { schedule_id: 'sched-1', run_id: 'run-1' } }),
),
);
await new Promise((r) => setTimeout(r, 0));
expect(conversationService.getConversation).not.toHaveBeenCalled();
expect(preferenceApi.getConversations).not.toHaveBeenCalled();
});
it('skips refetching the open conversation while a live stream is in flight', async () => {
// Mid-stream: refetching would flip status to 'idle' and the next chunk
// would die on the updateStreamingQuery guard.
const store = makeStore('conv-1', 'loading');
store.dispatch(sseEventReceived(ENVELOPE()));
await new Promise((r) => setTimeout(r, 0));
await new Promise((r) => setTimeout(r, 0));
expect(conversationService.getConversation).not.toHaveBeenCalled();
expect(store.getState().conversation.status).toBe('loading');
expect(preferenceApi.getConversations).toHaveBeenCalledTimes(1);
});
it('ignores non-scheduler SSE envelopes', async () => {
const store = makeStore('conv-1');
store.dispatch(
sseEventReceived({
id: 'evt-2',
type: 'source.ingest.progress',
payload: { conversation_id: 'conv-1' },
}),
);
await new Promise((r) => setTimeout(r, 0));
expect(conversationService.getConversation).not.toHaveBeenCalled();
expect(preferenceApi.getConversations).not.toHaveBeenCalled();
});
});
describe('listener middleware export hygiene', () => {
it('exports the listener middleware so the store can wire it', () => {
expect(conversationListenerMiddleware).toBeDefined();
expect(typeof conversationListenerMiddleware.middleware).toBe('function');
});
it('still exports the slice actions consumers rely on', () => {
expect(typeof setConversation).toBe('function');
});
});

View File

@@ -1,6 +1,15 @@
import { createAsyncThunk, createSlice, PayloadAction } from '@reduxjs/toolkit';
import {
createAsyncThunk,
createListenerMiddleware,
createSlice,
PayloadAction,
} from '@reduxjs/toolkit';
import conversationService from '../api/services/conversationService';
import {
sseEventReceived,
type SSEEvent,
} from '../notifications/notificationsSlice';
import { getConversations } from '../preferences/preferenceApi';
import { setConversations } from '../preferences/preferenceSlice';
import store from '../store';
@@ -1052,3 +1061,45 @@ export const {
updateMessageMeta,
} = conversationSlice.actions;
export default conversationSlice.reducer;
// Listener (not a reducer) so a scheduled message appended to the open
// chat can dispatch loadConversation + sidebar refresh.
export const conversationListenerMiddleware = createListenerMiddleware();
conversationListenerMiddleware.startListening({
actionCreator: sseEventReceived,
effect: async (action: PayloadAction<SSEEvent>, listenerApi) => {
const envelope = action.payload;
if (envelope.type !== 'schedule.message.appended') return;
const payload = (envelope.payload || {}) as Record<string, unknown>;
const conversationId =
(payload.conversation_id as string | undefined) || '';
if (!conversationId) return;
const state = listenerApi.getState() as RootState;
const token = state.preference.token;
// Skip mid-stream: loadConversation -> updateConversationId flips status
// to 'idle', and the next SSE chunk dies on the 'idle' guard in
// updateStreamingQuery. Defer the refresh to the user's next navigation.
if (
state.conversation.conversationId === conversationId &&
state.conversation.status !== 'loading'
) {
listenerApi.dispatch(
loadConversation({ id: conversationId, force: true }),
);
}
// Refresh sidebar; server reorders by updated_at which just bumped.
try {
const fetched = await getConversations(token);
listenerApi.dispatch(setConversations(fetched));
} catch (error) {
console.error(
'schedule.message.appended: conversations refresh failed',
error,
);
}
},
});

View File

@@ -46,4 +46,15 @@ describe('dispatchSSEEvent', () => {
'mystery.event',
]);
});
it.each([
'schedule.run.completed',
'schedule.run.failed',
'schedule.autopaused',
'schedule.message.appended',
])('treats %s as a known envelope (no debug noise)', (type) => {
const dispatch = vi.fn() as unknown as AppDispatch;
dispatchSSEEvent({ id: `e-${type}`, type }, dispatch);
expect(debugSpy).not.toHaveBeenCalled();
});
});

View File

@@ -24,6 +24,11 @@ const KNOWN_TYPES: ReadonlySet<string> = new Set([
'mcp.oauth.completed',
'mcp.oauth.failed',
'tool.approval.required',
// Scheduler envelopes (scheduler_worker.py); consumed by schedulesSlice.
'schedule.run.completed',
'schedule.run.failed',
'schedule.autopaused',
'schedule.message.appended',
]);
/**

View File

@@ -1,10 +1,4 @@
import {
useCallback,
useEffect,
useRef,
useState,
RefObject,
} from 'react';
import { useCallback, useEffect, useRef, useState, RefObject } from 'react';
export function useOutsideAlerter<T extends HTMLElement>(
ref: RefObject<T | null>,

View File

@@ -70,6 +70,9 @@
"sync": "Synchronisieren",
"syncNow": "Jetzt synchronisieren",
"syncing": "Synchronisiere...",
"reingest": "Erneut indexieren",
"ingestFailed": "Indexierung fehlgeschlagen",
"ingestProcessing": "Indexierung...",
"syncConfirmation": "Bist du sicher, dass du \"{{sourceName}}\" synchronisieren möchtest? Dies aktualisiert den Inhalt mit deinem Cloud-Speicher und kann Änderungen an einzelnen Chunks überschreiben.",
"syncFrequency": {
"never": "Nie",
@@ -158,6 +161,7 @@
"manageTools": "Zu den Werkzeugen",
"edit": "Bearbeiten",
"delete": "Löschen",
"builtIn": "Integriert",
"deleteWarning": "Bist du sicher, dass du das Werkzeug \"{{toolName}}\" löschen möchtest?",
"unsavedChanges": "Du hast ungespeicherte Änderungen, die verloren gehen, wenn du ohne Speichern verlässt.",
"leaveWithoutSaving": "Ohne Speichern verlassen",
@@ -353,6 +357,8 @@
"failed": "Upload fehlgeschlagen",
"wait": "Dies kann einige Minuten dauern",
"preparing": "Upload wird vorbereitet",
"parsing": "Dateien werden verarbeitet...",
"embedding": "Einbettung...",
"tokenLimit": "Token-Limit überschritten, bitte lade ein kleineres Dokument hoch",
"expandDetails": "Upload-Details erweitern",
"collapseDetails": "Upload-Details einklappen",

View File

@@ -70,6 +70,9 @@
"sync": "Sync",
"syncNow": "Sync now",
"syncing": "Syncing...",
"reingest": "Reingest",
"ingestFailed": "Indexing failed",
"ingestProcessing": "Indexing…",
"syncConfirmation": "Are you sure you want to sync \"{{sourceName}}\"? This will update the content with your cloud storage and may override any edits you made to individual chunks.",
"syncFrequency": {
"never": "Never",
@@ -159,6 +162,7 @@
"edit": "Edit",
"delete": "Delete",
"reconnect": "Reconnect",
"builtIn": "Built-in",
"authStatus": {
"connected": "Connected",
"needsAuth": "Needs Auth",
@@ -365,6 +369,8 @@
"failed": "Upload failed",
"wait": "This may take several minutes",
"preparing": "Preparing upload",
"parsing": "Parsing files…",
"embedding": "Embedding…",
"tokenLimit": "Over the token limit, please consider uploading smaller document",
"expandDetails": "Expand upload details",
"collapseDetails": "Collapse upload details",
@@ -664,6 +670,7 @@
"cancel": "Cancel",
"delete": "Delete",
"logs": "Logs",
"schedules": "Schedules",
"accessDetails": "Access Details",
"add": "Add"
},
@@ -697,7 +704,10 @@
"toolsPopup": {
"title": "Select Tools",
"searchPlaceholder": "Search tools...",
"noOptionsMessage": "No tools available"
"noOptionsMessage": "No tools available",
"groupBuiltin": "Built-in",
"groupDefault": "Default",
"groupCustom": "Custom"
},
"modelsPopup": {
"title": "Select Models",
@@ -738,6 +748,69 @@
"noUsageHistory": "No usage history",
"tableHeader": "Agent endpoint logs"
},
"schedules": {
"title": "Agent Schedules",
"heading": "Schedules",
"newRecurring": "New schedule",
"closeForm": "Close form",
"edit": "Edit",
"recurring": "Recurring",
"oneTime": "One-time tasks",
"noRecurring": "No recurring schedules yet.",
"noOneTime": "No one-time tasks yet.",
"pause": "Pause",
"resume": "Resume",
"runNow": "Run now",
"delete": "Delete",
"cancel": "Cancel",
"showRuns": "Show runs",
"hideRuns": "Hide runs",
"modal": {
"titleCreate": "New schedule",
"titleEdit": "Edit schedule",
"namePlaceholder": "Name of task",
"frequency": {
"once": "Once",
"daily": "Daily",
"weekly": "Weekly",
"monthly": "Monthly",
"yearly": "Yearly"
},
"on": "On",
"at": "At",
"instructionsLabel": "Instructions",
"instructionsPlaceholder": "Enter prompt here.",
"create": "Create task",
"save": "Save changes",
"errors": {
"instructionRequired": "Instructions are required.",
"runAtInPast": "Pick a date/time in the future."
},
"days": {
"mon": "Mon",
"tue": "Tue",
"wed": "Wed",
"thu": "Thu",
"fri": "Fri",
"sat": "Sat",
"sun": "Sun"
},
"months": {
"jan": "Jan",
"feb": "Feb",
"mar": "Mar",
"apr": "Apr",
"may": "May",
"jun": "Jun",
"jul": "Jul",
"aug": "Aug",
"sep": "Sep",
"oct": "Oct",
"nov": "Nov",
"dec": "Dec"
}
}
},
"shared": {
"notFound": "No agent found. Please ensure the agent is shared."
},

View File

@@ -70,6 +70,9 @@
"sync": "Sincronizar",
"syncNow": "Sincronizar ahora",
"syncing": "Sincronizando...",
"reingest": "Reindexar",
"ingestFailed": "Error de indexación",
"ingestProcessing": "Indexando...",
"syncConfirmation": "¿Estás seguro de que deseas sincronizar \"{{sourceName}}\"? Esto actualizará el contenido con tu almacenamiento en la nube y puede anular cualquier edición que hayas realizado en fragmentos individuales.",
"syncFrequency": {
"never": "Nunca",
@@ -158,6 +161,7 @@
"manageTools": "Ir a Herramientas",
"edit": "Editar",
"delete": "Eliminar",
"builtIn": "Integrada",
"deleteWarning": "¿Estás seguro de que deseas eliminar la herramienta \"{{toolName}}\"?",
"unsavedChanges": "Tienes cambios sin guardar que se perderán si sales sin guardar.",
"leaveWithoutSaving": "Salir sin Guardar",
@@ -353,6 +357,8 @@
"failed": "Error al subir",
"wait": "Esto puede tardar varios minutos",
"preparing": "Preparando subida",
"parsing": "Analizando archivos...",
"embedding": "Generando incrustaciones...",
"tokenLimit": "Excede el límite de tokens, considere cargar un documento más pequeño",
"expandDetails": "Expandir detalles de subida",
"collapseDetails": "Contraer detalles de subida",

View File

@@ -70,6 +70,9 @@
"sync": "同期",
"syncNow": "今すぐ同期",
"syncing": "同期中...",
"reingest": "再インデックス",
"ingestFailed": "インデックス作成に失敗しました",
"ingestProcessing": "インデックス作成中...",
"syncConfirmation": "\"{{sourceName}}\"を同期してもよろしいですか?これにより、コンテンツがクラウドストレージで更新され、個々のチャンクに加えた編集が上書きされる可能性があります。",
"syncFrequency": {
"never": "なし",
@@ -158,6 +161,7 @@
"manageTools": "ツールへ移動",
"edit": "編集",
"delete": "削除",
"builtIn": "ビルトイン",
"deleteWarning": "ツール \"{{toolName}}\" を削除してもよろしいですか?",
"unsavedChanges": "保存されていない変更があります。保存せずに離れると失われます。",
"leaveWithoutSaving": "保存せずに離れる",
@@ -353,6 +357,8 @@
"failed": "アップロード失敗",
"wait": "数分かかる場合があります",
"preparing": "アップロードを準備中",
"parsing": "ファイルを解析中...",
"embedding": "埋め込み処理中...",
"tokenLimit": "トークン制限を超えています。より小さいドキュメントをアップロードしてください",
"expandDetails": "アップロードの詳細を展開",
"collapseDetails": "アップロードの詳細を折りたたむ",

View File

@@ -70,6 +70,9 @@
"sync": "Синхронизация",
"syncNow": "Синхронизировать сейчас",
"syncing": "Синхронизация...",
"reingest": "Переиндексировать",
"ingestFailed": "Ошибка индексации",
"ingestProcessing": "Индексация...",
"syncConfirmation": "Вы уверены, что хотите синхронизировать \"{{sourceName}}\"? Это обновит содержимое с вашим облачным хранилищем и может перезаписать любые изменения, внесенные вами в отдельные фрагменты.",
"syncFrequency": {
"never": "Никогда",
@@ -158,6 +161,7 @@
"manageTools": "Перейти к инструментам",
"edit": "Редактировать",
"delete": "Удалить",
"builtIn": "Встроенный",
"deleteWarning": "Вы уверены, что хотите удалить инструмент \"{{toolName}}\"?",
"unsavedChanges": "У вас есть несохраненные изменения, которые будут потеряны, если вы уйдете без сохранения.",
"leaveWithoutSaving": "Уйти без сохранения",
@@ -353,6 +357,8 @@
"failed": "Ошибка загрузки",
"wait": "Это может занять несколько минут",
"preparing": "Подготовка загрузки",
"parsing": "Обработка файлов...",
"embedding": "Создание эмбеддингов...",
"tokenLimit": "Превышен лимит токенов, рассмотрите возможность загрузки документа меньшего размера",
"expandDetails": "Развернуть детали загрузки",
"collapseDetails": "Свернуть детали загрузки",

View File

@@ -70,6 +70,9 @@
"sync": "同步",
"syncNow": "立即同步",
"syncing": "同步中...",
"reingest": "重新索引",
"ingestFailed": "索引失敗",
"ingestProcessing": "索引中...",
"syncConfirmation": "您確定要同步 \"{{sourceName}}\" 嗎?這將使用您的雲端儲存更新內容,並可能覆蓋您對個別文本塊所做的任何編輯。",
"syncFrequency": {
"never": "從不",
@@ -158,6 +161,7 @@
"manageTools": "前往工具",
"edit": "編輯",
"delete": "刪除",
"builtIn": "內建",
"deleteWarning": "您確定要刪除工具 \"{{toolName}}\" 嗎?",
"unsavedChanges": "您有未儲存的變更,如果不儲存就離開將會遺失。",
"leaveWithoutSaving": "不儲存離開",
@@ -353,6 +357,8 @@
"failed": "上傳失敗",
"wait": "這可能需要幾分鐘",
"preparing": "準備上傳",
"parsing": "正在解析檔案...",
"embedding": "正在生成嵌入...",
"tokenLimit": "超出令牌限制,請考慮上傳較小的文檔",
"expandDetails": "展開上傳詳情",
"collapseDetails": "摺疊上傳詳情",

View File

@@ -70,6 +70,9 @@
"sync": "同步",
"syncNow": "立即同步",
"syncing": "同步中...",
"reingest": "重新索引",
"ingestFailed": "索引失败",
"ingestProcessing": "索引中...",
"syncConfirmation": "您确定要同步 \"{{sourceName}}\" 吗?这将使用您的云存储更新内容,并可能覆盖您对单个文本块所做的任何编辑。",
"syncFrequency": {
"never": "从不",
@@ -158,6 +161,7 @@
"manageTools": "前往工具",
"edit": "编辑",
"delete": "删除",
"builtIn": "内置",
"deleteWarning": "您确定要删除工具 \"{{toolName}}\" 吗?",
"unsavedChanges": "您有未保存的更改,如果不保存就离开将会丢失。",
"leaveWithoutSaving": "不保存离开",
@@ -353,6 +357,8 @@
"failed": "上传失败",
"wait": "这可能需要几分钟",
"preparing": "准备上传",
"parsing": "正在解析文件...",
"embedding": "正在生成嵌入...",
"tokenLimit": "超出令牌限制,请考虑上传较小的文档",
"expandDetails": "展开上传详情",
"collapseDetails": "折叠上传详情",

View File

@@ -14,6 +14,8 @@ export type Doc = {
syncFrequency?: string;
isNested?: boolean;
provider?: string;
// Derived server-side from ingest_chunk_progress (sources API).
ingestStatus?: 'processing' | 'failed';
};
export type GetDocsResponse = {

View File

@@ -27,6 +27,12 @@ import {
setSourceDocs,
} from '../preferences/preferenceSlice';
import Upload from '../upload/Upload';
import {
addUploadTask,
removeUploadTask,
selectUploadTasks,
updateUploadTask,
} from '../upload/uploadSlice';
import { formatDate } from '../utils/dateTimeUtils';
import FileTree from '../components/FileTree';
import ConnectorTree from '../components/ConnectorTree';
@@ -56,6 +62,7 @@ export default function Sources({
const [isDarkTheme] = useDarkTheme();
const dispatch = useDispatch();
const token = useSelector(selectToken);
const uploadTasks = useSelector(selectUploadTasks);
const [searchTerm, setSearchTerm] = useState<string>('');
const debouncedSearchTerm = useDebouncedValue(searchTerm, 500);
@@ -249,6 +256,57 @@ export default function Sources({
}
};
const handleReingest = async (doc: Doc) => {
if (!doc.id) {
return;
}
const sourceId = doc.id;
// Drop stale toast rows for this source (a finished/dismissed task
// would swallow the reingest's SSE events), then open a fresh one.
uploadTasks
.filter((task) => task.sourceId === sourceId)
.forEach((task) => dispatch(removeUploadTask(task.id)));
const reingestTaskId = `reingest-${sourceId}-${Date.now()}`;
dispatch(
addUploadTask({
id: reingestTaskId,
fileName: doc.name || sourceId,
progress: 0,
status: 'training',
sourceId,
}),
);
try {
const response = await userService.reingestSource(
{ source_id: sourceId },
token,
);
const data = await response.json();
if (!data.success) {
console.error('Reingest failed:', data.error || data.message);
dispatch(
updateUploadTask({
id: reingestTaskId,
updates: {
status: 'failed',
errorMessage: data.error || data.message,
},
}),
);
return;
}
refreshDocs(undefined, currentPage, rowsPerPage);
} catch (error) {
console.error('Error reingesting source:', error);
dispatch(
updateUploadTask({
id: reingestTaskId,
updates: { status: 'failed' },
}),
);
}
};
const [documentToDelete, setDocumentToDelete] = useState<{
index: number;
document: Doc;
@@ -283,6 +341,19 @@ export default function Sources({
},
];
if (document.ingestStatus === 'failed') {
actions.push({
icon: SyncIcon,
label: t('settings.sources.reingest'),
onClick: () => {
handleReingest(document);
},
iconWidth: 14,
iconHeight: 14,
variant: 'primary',
});
}
if (document.syncFrequency) {
actions.push({
icon: SyncIcon,
@@ -483,6 +554,16 @@ export default function Sources({
</div>
<div className="flex flex-col items-start justify-start gap-1">
{document.ingestStatus === 'failed' && (
<span className="rounded-full bg-red-100 px-2 py-0.5 text-[11px] leading-[16px] font-medium text-red-700 dark:bg-red-900/30 dark:text-red-400">
{t('settings.sources.ingestFailed')}
</span>
)}
{document.ingestStatus === 'processing' && (
<span className="bg-muted-foreground/10 text-muted-foreground rounded-full px-2 py-0.5 text-[11px] leading-[16px] font-medium">
{t('settings.sources.ingestProcessing')}
</span>
)}
<div className="flex items-center gap-2">
<img
src={CalendarIcon}

View File

@@ -144,7 +144,15 @@ export default function Tools() {
return res.json();
})
.then((data) => {
setUserTools(data.tools);
// Pure builtins (agent-only, e.g. a future builtin without an
// agentless path) carry no per-user state and only apply when
// added to an agent — hide them from the management page. Dual-
// registered tools (``scheduler``: builtin + default) stay visible
// here so the user can toggle the default off in agentless chats.
const filtered = (data.tools || []).filter(
(tool: UserToolType) => tool.default || !tool.builtin,
);
setUserTools(filtered);
setLoading(false);
})
.catch((error) => {
@@ -282,32 +290,34 @@ export default function Tools() {
key={index}
className="bg-muted hover:bg-accent relative flex h-52 w-[300px] flex-col justify-between overflow-hidden rounded-2xl p-6"
>
<div
ref={menuRefs.current[tool.id]}
onClick={(e) => {
e.stopPropagation();
setActiveMenuId(
activeMenuId === tool.id ? null : tool.id,
);
}}
className="absolute top-4 right-4 z-10 cursor-pointer"
>
<img
src={ThreeDotsIcon}
alt={t('settings.tools.settingsIconAlt')}
className="h-[19px] w-[19px]"
/>
<ContextMenu
isOpen={activeMenuId === tool.id}
setIsOpen={(isOpen) => {
setActiveMenuId(isOpen ? tool.id : null);
{!tool.default && (
<div
ref={menuRefs.current[tool.id]}
onClick={(e) => {
e.stopPropagation();
setActiveMenuId(
activeMenuId === tool.id ? null : tool.id,
);
}}
options={getMenuOptions(tool)}
anchorRef={menuRefs.current[tool.id]}
position="bottom-right"
offset={{ x: 0, y: 0 }}
/>
</div>
className="absolute top-4 right-4 z-10 cursor-pointer"
>
<img
src={ThreeDotsIcon}
alt={t('settings.tools.settingsIconAlt')}
className="h-[19px] w-[19px]"
/>
<ContextMenu
isOpen={activeMenuId === tool.id}
setIsOpen={(isOpen) => {
setActiveMenuId(isOpen ? tool.id : null);
}}
options={getMenuOptions(tool)}
anchorRef={menuRefs.current[tool.id]}
position="bottom-right"
offset={{ x: 0, y: 0 }}
/>
</div>
)}
<div className="w-full">
<div className="flex w-full items-center gap-2 px-1">
<img
@@ -315,6 +325,11 @@ export default function Tools() {
alt={`${tool.displayName} icon`}
className="h-6 w-6"
/>
{tool.default && (
<span className="inline-flex items-center rounded-full bg-gray-100 px-2 py-0.5 text-[10px] leading-none font-medium text-gray-600 dark:bg-gray-700/40 dark:text-gray-300">
{t('settings.tools.builtIn')}
</span>
)}
{tool.name === 'mcp_tool' &&
mcpStatuses[tool.id] && (
<span

View File

@@ -47,6 +47,14 @@ export type UserToolType = {
customName?: string;
description: string;
status: boolean;
// True for built-in default chat tools — managed via the opt-out list,
// not a user_tools row; not deletable. ``scheduler`` is dual-registered
// (both ``default`` and ``builtin``).
default?: boolean;
// True for agent-selectable builtins (e.g. ``scheduler``) — hidden
// from the Add-Tool modal; surfaced to the agent picker. May coexist
// with ``default`` for dual-registered tools.
builtin?: boolean;
config: {
[key: string]: any;
};

View File

@@ -1,8 +1,12 @@
import { configureStore } from '@reduxjs/toolkit';
import agentPreviewReducer from './agents/agentPreviewSlice';
import schedulesReducer from './agents/schedules/schedulesSlice';
import workflowPreviewReducer from './agents/workflow/workflowPreviewSlice';
import { conversationSlice } from './conversation/conversationSlice';
import {
conversationListenerMiddleware,
conversationSlice,
} from './conversation/conversationSlice';
import { sharedConversationSlice } from './conversation/sharedConversationSlice';
import notificationsReducer from './notifications/notificationsSlice';
import { getStoredRecentDocs } from './preferences/preferenceApi';
@@ -69,9 +73,13 @@ const store = configureStore({
agentPreview: agentPreviewReducer,
workflowPreview: workflowPreviewReducer,
notifications: notificationsReducer,
schedules: schedulesReducer,
},
middleware: (getDefaultMiddleware) =>
getDefaultMiddleware().concat(prefListenerMiddleware.middleware),
getDefaultMiddleware().concat(
prefListenerMiddleware.middleware,
conversationListenerMiddleware.middleware,
),
});
export type RootState = ReturnType<typeof store.getState>;

View File

@@ -286,6 +286,26 @@ describe('source.ingest.progress', () => {
state = reducer(state, ingest('source.ingest.progress', { current: -10 }));
expect(state.tasks[0].progress).toBe(100);
});
it('records the ingest stage from the payload', () => {
let state = stateWithTask(makeTask({ status: 'training' }));
state = reducer(
state,
ingest('source.ingest.progress', { current: 20, stage: 'parsing' }),
);
expect(state.tasks[0].stage).toBe('parsing');
state = reducer(
state,
ingest('source.ingest.progress', { current: 70, stage: 'embedding' }),
);
expect(state.tasks[0].stage).toBe('embedding');
// An unknown/absent stage leaves the last known value intact.
state = reducer(
state,
ingest('source.ingest.progress', { current: 80, stage: 'bogus' }),
);
expect(state.tasks[0].stage).toBe('embedding');
});
});
describe('source.ingest.completed', () => {

View File

@@ -66,6 +66,12 @@ export interface UploadTask {
sourceId?: string;
errorMessage?: string;
dismissed?: boolean;
/**
* Ingest phase from the latest ``source.ingest.progress`` event:
* ``parsing`` (parse/OCR, lower band of the bar) or ``embedding``
* (upper band). Drives the phase label in ``UploadToast``.
*/
stage?: 'parsing' | 'embedding';
/**
* Flipped when ``source.ingest.completed`` carries
* ``payload.limited === true`` (the worker hit a token cap during
@@ -334,6 +340,9 @@ export const uploadSlice = createSlice({
if (task.status === 'completed' || task.status === 'failed') break;
task.status = 'training';
if (clamped > task.progress) task.progress = clamped;
if (payload.stage === 'parsing' || payload.stage === 'embedding') {
task.stage = payload.stage;
}
break;
}
case 'source.ingest.completed':

View File

@@ -239,9 +239,14 @@ class TestBaseAgentTools:
agent = ClassicAgent(**agent_base_params)
tools = agent._get_user_tools("test_user")
assert len(tools) == 2
from application.agents.default_tools import loaded_default_tools
assert len(tools) == 2 + len(loaded_default_tools())
assert "0" in tools
assert "1" in tools
names = {t["name"] for t in tools.values()}
assert {"tool1", "tool2"}.issubset(names)
assert set(loaded_default_tools()).issubset(names)
def test_get_user_tools_filters_by_status(
self,
@@ -268,7 +273,12 @@ class TestBaseAgentTools:
agent = ClassicAgent(**agent_base_params)
tools = agent._get_user_tools("test_user")
assert len(tools) == 1
from application.agents.default_tools import loaded_default_tools
assert len(tools) == 1 + len(loaded_default_tools())
names = {t["name"] for t in tools.values()}
assert "tool1" in names
assert "tool2" not in names
def test_get_tools_by_api_key(
self,
@@ -305,7 +315,13 @@ class TestBaseAgentTools:
agent = ClassicAgent(**agent_base_params)
tools = agent._get_tools("api_key_123")
assert tool_id in tools
from application.agents.default_tools import loaded_default_tools
# Agent-bound: exactly agents.tools, no defaults.
assert set(tools) == {tool_id}
names = {t["name"] for t in tools.values()}
assert names == {"api_tool"}
assert not (set(loaded_default_tools()) & names)
def test_build_tool_parameters(
self, agent_base_params, mock_llm_creator, mock_llm_handler_creator

View File

@@ -0,0 +1,398 @@
"""Tests for application.agents.default_tools — the default chat tools."""
from __future__ import annotations
import uuid
import pytest
from application.agents import default_tools
@pytest.fixture(autouse=True)
def _reset_tool_cache():
"""Drop the module caches so settings overrides take effect."""
def _clear():
default_tools._tool_cache.clear()
default_tools._ids_cache.clear()
default_tools._loaded_cache.clear()
default_tools._builtin_ids_cache.clear()
default_tools._builtin_loaded_cache.clear()
_clear()
yield
_clear()
# ---------------------------------------------------------------------------
# Synthetic ids
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestSyntheticIds:
def test_default_tool_id_is_a_valid_uuid(self):
tool_id = default_tools.default_tool_id("memory")
assert str(uuid.UUID(tool_id)) == tool_id
def test_default_tool_id_is_deterministic(self):
assert default_tools.default_tool_id("memory") == default_tools.default_tool_id(
"memory"
)
def test_distinct_names_get_distinct_ids(self):
assert default_tools.default_tool_id("memory") != default_tools.default_tool_id(
"read_webpage"
)
def test_default_tool_ids_covers_configured_set(self, monkeypatch):
monkeypatch.setattr(
default_tools.settings, "DEFAULT_CHAT_TOOLS", ["memory", "scheduler"]
)
ids = default_tools.default_tool_ids()
assert set(ids) == {"memory", "scheduler"}
def test_default_tool_ids_is_memoized(self, monkeypatch):
monkeypatch.setattr(
default_tools.settings, "DEFAULT_CHAT_TOOLS", ["memory", "scheduler"]
)
first = default_tools.default_tool_ids()
assert default_tools.default_tool_ids() is first
def test_default_tool_ids_rebuilds_when_setting_changes(self, monkeypatch):
monkeypatch.setattr(
default_tools.settings, "DEFAULT_CHAT_TOOLS", ["memory"]
)
assert set(default_tools.default_tool_ids()) == {"memory"}
monkeypatch.setattr(
default_tools.settings, "DEFAULT_CHAT_TOOLS", ["memory", "read_webpage"]
)
assert set(default_tools.default_tool_ids()) == {"memory", "read_webpage"}
def test_is_default_tool_id_recognises_synthetic_ids(self):
assert default_tools.is_default_tool_id(
default_tools.default_tool_id("memory")
)
def test_is_default_tool_id_rejects_random_uuid(self):
assert not default_tools.is_default_tool_id(str(uuid.uuid4()))
def test_is_default_tool_id_rejects_empty(self):
assert not default_tools.is_default_tool_id(None)
assert not default_tools.is_default_tool_id("")
def test_name_for_id_round_trip(self):
tool_id = default_tools.default_tool_id("read_webpage")
assert default_tools.default_tool_name_for_id(tool_id) == "read_webpage"
def test_name_for_id_unknown_returns_none(self):
assert default_tools.default_tool_name_for_id(str(uuid.uuid4())) is None
# ---------------------------------------------------------------------------
# Startup validation
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestValidation:
def test_unimplemented_tool_is_skipped_not_an_error(self, monkeypatch, caplog):
monkeypatch.setattr(
default_tools.settings,
"DEFAULT_CHAT_TOOLS",
["memory", "read_webpage", "future_tool_x"],
)
with caplog.at_level("DEBUG", logger="application.agents.default_tools"):
usable = default_tools.validate_default_chat_tools()
assert "future_tool_x" not in usable
assert "memory" in usable and "read_webpage" in usable
assert any(
"future_tool_x" in rec.message and rec.levelname == "DEBUG"
for rec in caplog.records
)
assert not any(rec.levelname == "WARNING" for rec in caplog.records)
def test_loaded_default_tools_is_silent(self, monkeypatch, caplog):
# Runs per request — must never log.
monkeypatch.setattr(
default_tools.settings,
"DEFAULT_CHAT_TOOLS",
["memory", "read_webpage", "future_tool_x"],
)
with caplog.at_level("DEBUG", logger="application.agents.default_tools"):
default_tools.loaded_default_tools()
assert caplog.records == []
def test_fk_bound_tool_is_rejected(self, monkeypatch):
monkeypatch.setattr(
default_tools.settings, "DEFAULT_CHAT_TOOLS", ["memory", "notes"]
)
with pytest.raises(ValueError, match="notes"):
default_tools.validate_default_chat_tools()
def test_fk_bound_todo_list_is_rejected(self, monkeypatch):
monkeypatch.setattr(
default_tools.settings, "DEFAULT_CHAT_TOOLS", ["memory", "todo_list"]
)
with pytest.raises(ValueError, match="todo_list"):
default_tools.validate_default_chat_tools()
def test_fully_unknown_name_is_skipped(self, monkeypatch):
monkeypatch.setattr(
default_tools.settings,
"DEFAULT_CHAT_TOOLS",
["memory", "definitely_not_a_real_tool"],
)
usable = default_tools.validate_default_chat_tools()
assert usable == ["memory"]
def test_config_free_tools_pass(self, monkeypatch):
monkeypatch.setattr(
default_tools.settings, "DEFAULT_CHAT_TOOLS", ["memory", "read_webpage"]
)
assert default_tools.validate_default_chat_tools() == [
"memory",
"read_webpage",
]
def test_scheduler_is_config_free(self, monkeypatch):
# Dual-registration only works if scheduler passes the config-free
# assertion — otherwise startup would reject DEFAULT_CHAT_TOOLS.
monkeypatch.setattr(
default_tools.settings, "DEFAULT_CHAT_TOOLS", ["scheduler"]
)
assert default_tools.validate_default_chat_tools() == ["scheduler"]
def test_tool_with_required_config_is_rejected(self, monkeypatch):
# ``brave`` needs an API key.
monkeypatch.setattr(
default_tools.settings, "DEFAULT_CHAT_TOOLS", ["memory", "brave"]
)
with pytest.raises(ValueError, match="brave"):
default_tools.validate_default_chat_tools()
def test_loaded_default_tools_filters_unimplemented(self, monkeypatch):
monkeypatch.setattr(
default_tools.settings,
"DEFAULT_CHAT_TOOLS",
["memory", "read_webpage", "future_tool_x"],
)
assert default_tools.loaded_default_tools() == ["memory", "read_webpage"]
# ---------------------------------------------------------------------------
# Synthesized rows
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestSynthesize:
def test_synthesize_returns_row_shaped_entry(self):
row = default_tools.synthesize_default_tool("memory")
assert row is not None
assert row["name"] == "memory"
assert row["id"] == default_tools.default_tool_id("memory")
assert row["id"] == row["_id"]
assert row["config"] == {}
assert row["config_requirements"] == {}
assert row["status"] is True
assert row["default"] is True
assert isinstance(row["actions"], list) and row["actions"]
def test_synthesize_unknown_tool_returns_none(self):
assert default_tools.synthesize_default_tool("future_tool_x") is None
assert default_tools.synthesize_default_tool("nope") is None
def test_synthesize_includes_display_name(self):
row = default_tools.synthesize_default_tool("read_webpage")
assert row["display_name"]
assert isinstance(row["description"], str)
# ---------------------------------------------------------------------------
# Opt-out list
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestDisabledList:
def test_none_user_doc_yields_empty(self):
assert default_tools.disabled_default_tools(None) == []
def test_missing_preferences_yields_empty(self):
assert default_tools.disabled_default_tools({"user_id": "u"}) == []
def test_reads_disabled_list(self):
doc = {"tool_preferences": {"disabled_default_tools": ["read_webpage"]}}
assert default_tools.disabled_default_tools(doc) == ["read_webpage"]
def test_malformed_preferences_yields_empty(self):
assert default_tools.disabled_default_tools(
{"tool_preferences": "not-a-dict"}
) == []
assert default_tools.disabled_default_tools(
{"tool_preferences": {"disabled_default_tools": "x"}}
) == []
# ---------------------------------------------------------------------------
# Chat resolver — synthesized defaults
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestSynthesizedDefaults:
def test_all_defaults_present_when_nothing_disabled(self):
rows = default_tools.synthesized_default_tools(None)
names = {r["name"] for r in rows}
assert names == set(default_tools.loaded_default_tools())
def test_opt_out_removes_a_tool(self):
doc = {"tool_preferences": {"disabled_default_tools": ["read_webpage"]}}
rows = default_tools.synthesized_default_tools(doc)
names = {r["name"] for r in rows}
assert "read_webpage" not in names
assert "memory" in names
# ---------------------------------------------------------------------------
# default_tools_for_management — the tool-management listing
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestDefaultToolsForManagement:
def test_lists_every_loaded_default(self):
rows = default_tools.default_tools_for_management(None)
assert {r["name"] for r in rows} == set(
default_tools.loaded_default_tools()
)
def test_all_enabled_when_nothing_disabled(self):
rows = default_tools.default_tools_for_management(None)
assert all(r["status"] is True for r in rows)
def test_disabled_default_still_listed_with_status_false(self):
doc = {"tool_preferences": {"disabled_default_tools": ["read_webpage"]}}
rows = default_tools.default_tools_for_management(doc)
by_name = {r["name"]: r for r in rows}
assert "read_webpage" in by_name
assert by_name["read_webpage"]["status"] is False
assert by_name["memory"]["status"] is True
# ---------------------------------------------------------------------------
# resolve_tool_by_id
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestResolveToolById:
def test_synthetic_id_resolves_in_memory(self):
tool_id = default_tools.default_tool_id("memory")
row = default_tools.resolve_tool_by_id(tool_id, "user-x")
assert row is not None
assert row["name"] == "memory"
assert row["id"] == tool_id
def test_non_default_id_delegates_to_repo(self):
sentinel = {"id": "real", "name": "brave"}
class _Repo:
def get_any(self, tool_id, user):
assert user == "user-x"
return sentinel
row = default_tools.resolve_tool_by_id(
str(uuid.uuid4()), "user-x", user_tools_repo=_Repo()
)
assert row is sentinel
def test_non_default_id_without_repo_returns_none(self):
assert default_tools.resolve_tool_by_id(str(uuid.uuid4()), "user-x") is None
def test_builtin_agent_tool_id_resolves_in_memory(self):
"""Dual-registered scheduler resolves with BOTH ``default`` and
``builtin`` flags so either path can branch on the discriminator."""
tool_id = default_tools.default_tool_id("scheduler")
row = default_tools.resolve_tool_by_id(tool_id, "user-x")
assert row is not None
assert row["name"] == "scheduler"
assert row["builtin"] is True
assert row["default"] is True
# ---------------------------------------------------------------------------
# Agent-selectable builtins (scheduler) — synthesized like defaults but
# hidden from agentless-chat synthesis and from /api/available_tools.
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestBuiltinAgentTools:
def test_scheduler_is_a_builtin(self):
assert "scheduler" in default_tools.BUILTIN_AGENT_TOOLS
def test_scheduler_dual_registered_in_default_chat_tools(self):
# Revised decision #8: scheduler is dual-registered as a default
# chat tool (auto-on in agentless chats) AND a builtin agent tool
# (opt-in via the agent picker). Both registries share the same
# ``_DEFAULT_TOOL_NAMESPACE`` so the synthetic id is one stable uuid5.
assert "scheduler" in default_tools.settings.DEFAULT_CHAT_TOOLS
def test_dual_registration_produces_one_synthetic_id(self):
# Same uuid5 namespace → same id whether reached via defaults or builtins.
as_default = default_tools.default_tool_id("scheduler")
assert default_tools.is_default_tool_id(as_default)
assert default_tools.is_builtin_agent_tool_id(as_default)
def test_builtin_id_is_recognised(self):
tool_id = default_tools.default_tool_id("scheduler")
assert default_tools.is_builtin_agent_tool_id(tool_id)
assert default_tools.builtin_agent_tool_name_for_id(tool_id) == "scheduler"
def test_synthesize_builtin_marks_flags_correctly(self):
row = default_tools.synthesize_builtin_agent_tool("scheduler")
assert row is not None
assert row["name"] == "scheduler"
assert row["default"] is False
assert row["builtin"] is True
assert isinstance(row["actions"], list) and row["actions"]
def test_builtin_agent_tools_for_management_lists_scheduler(self):
rows = default_tools.builtin_agent_tools_for_management()
names = {r["name"] for r in rows}
assert "scheduler" in names
for row in rows:
assert row["builtin"] is True
assert row["default"] is False
def test_synthesized_default_chat_now_includes_scheduler(self):
# Revised decision #8: scheduler is dual-registered → it appears in
# ``synthesized_default_tools`` so agentless chats can use it.
rows = default_tools.synthesized_default_tools(None)
assert "scheduler" in {r["name"] for r in rows}
# ---------------------------------------------------------------------------
# _FK_BOUND_TOOLS — schema introspection guard against rot
# ---------------------------------------------------------------------------
@pytest.mark.unit
class TestFkBoundToolsIsInSync:
# Table name -> tool module name (``application/agents/tools/<name>``).
_TABLE_TO_TOOL = {
"notes": "notes",
"todos": "todo_list",
}
def test_fk_bound_tools_matches_metadata(self):
from application.storage.db.models import metadata
fk_bound_tables = set()
for tbl in metadata.tables.values():
tool_id_col = tbl.columns.get("tool_id")
if tool_id_col is None:
continue
for fk in tool_id_col.foreign_keys:
if fk.target_fullname == "user_tools.id":
fk_bound_tables.add(tbl.name)
break
unmapped = fk_bound_tables - set(self._TABLE_TO_TOOL)
assert not unmapped, (
f"New FK-bound table(s) without a tool mapping: {sorted(unmapped)}. "
"Add an entry to _TABLE_TO_TOOL here AND to "
"application.agents.default_tools._FK_BOUND_TOOLS."
)
derived_names = {
self._TABLE_TO_TOOL[name] for name in fk_bound_tables
}
assert derived_names == set(default_tools._FK_BOUND_TOOLS), (
"_FK_BOUND_TOOLS is out of sync with schema-derived names: "
f"derived={sorted(derived_names)} "
f"declared={sorted(default_tools._FK_BOUND_TOOLS)}"
)

View File

@@ -0,0 +1,187 @@
"""Regression: scheduler stays out of the Add-Tool catalog but reaches the
agent picker, the LLM tool schema, and the schedules table on execute."""
from __future__ import annotations
import json
import uuid
from contextlib import contextmanager
from unittest.mock import patch
import pytest
from sqlalchemy import text
# Pre-import to stabilise the ToolManager.load_tools walk's import order.
import application.api.user.tools.mcp # noqa: F401
from application.agents.default_tools import ( # noqa: E402
BUILTIN_AGENT_TOOLS,
builtin_agent_tools_for_management,
default_tool_id,
resolve_tool_by_id,
)
from application.agents.tool_executor import ToolExecutor # noqa: E402
from application.agents.tools.tool_manager import ToolManager # noqa: E402
from application.storage.db.repositories.schedules import ( # noqa: E402
SchedulesRepository,
)
@pytest.fixture
def patch_scheduler_sessions(pg_conn):
"""Redirect scheduler tool db session helpers to ``pg_conn``."""
@contextmanager
def _ctx():
yield pg_conn
with patch(
"application.agents.tools.scheduler.db_session", _ctx,
), patch(
"application.agents.tools.scheduler.db_readonly", _ctx,
):
yield
def _make_agent(conn, *, user_id="alice", agent_tools=None) -> dict:
"""Insert an agents row whose tools JSONB carries agent_tools."""
row = conn.execute(
text(
"""
INSERT INTO agents (user_id, name, status, key, tools)
VALUES (:u, 'sched-agent', 'active', :k, CAST(:t AS jsonb))
RETURNING *
"""
),
{
"u": user_id,
"k": f"sk-{uuid.uuid4()}",
"t": json.dumps(list(agent_tools or [])),
},
).fetchone()
return dict(row._mapping)
@pytest.mark.unit
class TestAddToolCatalogHidesScheduler:
def test_tool_manager_walks_skip_internal_scheduler(self):
tm = ToolManager(config={})
assert "scheduler" not in tm.tools
@pytest.mark.unit
class TestAgentPickerExposesScheduler:
def test_scheduler_is_listed_in_builtin_agent_tools(self):
rows = builtin_agent_tools_for_management()
assert any(r["name"] == "scheduler" for r in rows)
assert "scheduler" in BUILTIN_AGENT_TOOLS
def test_scheduler_row_is_flagged_builtin_not_default(self):
scheduler_row = next(
r for r in builtin_agent_tools_for_management()
if r["name"] == "scheduler"
)
assert scheduler_row["builtin"] is True
assert scheduler_row["default"] is False
def test_synthetic_id_resolves_to_row_with_schedule_task_action(self):
synthetic_id = default_tool_id("scheduler")
row = resolve_tool_by_id(synthetic_id, "alice")
assert row is not None
assert row["name"] == "scheduler"
action_names = {a["name"] for a in row.get("actions") or []}
assert "schedule_task" in action_names
@pytest.mark.unit
class TestDualRegistration:
"""``scheduler`` is in both registries; same uuid5 resolves either way."""
def test_scheduler_in_both_registries(self):
from application.agents.default_tools import (
BUILTIN_AGENT_TOOLS as BUILTINS,
settings,
)
assert "scheduler" in BUILTINS
assert "scheduler" in settings.DEFAULT_CHAT_TOOLS
def test_same_synthetic_id_in_both_paths(self):
from application.agents.default_tools import (
builtin_agent_tool_ids,
default_tool_ids,
)
via_default = default_tool_ids().get("scheduler")
via_builtin = builtin_agent_tool_ids().get("scheduler")
assert via_default == via_builtin
assert via_default is not None
def test_synthesized_default_tools_includes_scheduler(self):
"""Agentless chats see scheduler in the default-tools synthesis."""
from application.agents.default_tools import synthesized_default_tools
rows = synthesized_default_tools(None)
names = {r["name"] for r in rows}
assert "scheduler" in names
def test_synthesized_builtin_agent_tools_includes_scheduler(self):
"""Agent picker still sees scheduler via the builtin registry."""
from application.agents.default_tools import (
builtin_agent_tools_for_management,
)
rows = builtin_agent_tools_for_management()
names = {r["name"] for r in rows}
assert "scheduler" in names
@pytest.mark.unit
class TestEndToEndAgentPickerToLLMSchema:
def test_agent_with_scheduler_in_tools_exposes_schedule_task_to_llm(
self, pg_conn,
):
scheduler_id = default_tool_id("scheduler")
agent = _make_agent(pg_conn, agent_tools=[scheduler_id])
@contextmanager
def _use_conn():
yield pg_conn
with patch("application.agents.tool_executor.db_readonly", _use_conn):
executor = ToolExecutor(
user_api_key=agent["key"], user="alice",
agent_id=str(agent["id"]),
)
tools_dict = executor.get_tools()
assert scheduler_id in tools_dict
row = tools_dict[scheduler_id]
assert row["name"] == "scheduler"
schema = executor.prepare_tools_for_llm(tools_dict)
function_names = {entry["function"]["name"] for entry in schema}
assert "schedule_task" in function_names
def test_executing_schedule_task_creates_one_time_schedule(
self, pg_conn, patch_scheduler_sessions,
):
agent = _make_agent(pg_conn)
agent_id = str(agent["id"])
user_id = "alice"
tm = ToolManager(config={})
tool = tm.load_tool(
"scheduler",
tool_config={"agent_id": agent_id, "conversation_id": None},
user_id=user_id,
)
out = tool.execute_action(
"schedule_task", instruction="ping me later", delay="1h",
)
parsed = json.loads(out)
assert "task_id" in parsed
row = SchedulesRepository(pg_conn).get(parsed["task_id"], user_id)
assert row is not None
assert row["trigger_type"] == "once"
assert row["status"] == "active"
assert row["created_via"] == "chat"

View File

@@ -0,0 +1,146 @@
"""Tests for scheduler_utils (cron / DST / delay / horizon)."""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from zoneinfo import ZoneInfo
import pytest
from application.agents.scheduler_utils import (
ScheduleValidationError,
clamp_once_horizon,
cron_interval_seconds,
next_cron_run,
parse_cron,
parse_delay,
parse_run_at,
resolve_timezone,
)
class TestParseCron:
def test_valid(self):
parse_cron("0 9 * * 1")
def test_invalid(self):
with pytest.raises(ScheduleValidationError):
parse_cron("not a cron")
def test_wrong_field_count(self):
with pytest.raises(ScheduleValidationError):
parse_cron("0 9 * *")
class TestNextCronRunDST:
def test_daily_9am_warsaw_across_spring_forward(self):
tz = ZoneInfo("Europe/Warsaw")
before_dst = datetime(2026, 3, 28, 9, 30, tzinfo=tz)
nxt = next_cron_run("0 9 * * *", "Europe/Warsaw", after=before_dst)
assert nxt.astimezone(tz) == datetime(2026, 3, 29, 9, 0, tzinfo=tz)
def test_daily_9am_warsaw_across_fall_back(self):
tz = ZoneInfo("Europe/Warsaw")
before_dst = datetime(2026, 10, 24, 9, 30, tzinfo=tz)
nxt = next_cron_run("0 9 * * *", "Europe/Warsaw", after=before_dst)
assert nxt.astimezone(tz) == datetime(2026, 10, 25, 9, 0, tzinfo=tz)
def test_utc_default(self):
anchor = datetime(2026, 5, 19, 12, 0, tzinfo=timezone.utc)
nxt = next_cron_run("0 * * * *", None, after=anchor)
assert nxt > anchor
assert nxt.tzinfo is not None
def test_returned_value_is_utc(self):
anchor = datetime(2026, 5, 19, 12, 0, tzinfo=timezone.utc)
nxt = next_cron_run("0 9 * * *", "Europe/Warsaw", after=anchor)
assert nxt.tzinfo is not None
assert nxt.utcoffset() == timedelta(0)
class TestResolveTimezone:
def test_unknown(self):
with pytest.raises(ScheduleValidationError):
resolve_timezone("Atlantis/Nowhere")
def test_blank_defaults_utc(self):
assert resolve_timezone("").key == "UTC"
assert resolve_timezone(None).key == "UTC"
class TestParseDelay:
@pytest.mark.parametrize(
"raw,seconds",
[("30s", 30), ("15m", 900), ("2h", 7200), ("1d", 86_400)],
)
def test_units(self, raw, seconds):
assert parse_delay(raw).total_seconds() == seconds
def test_uppercase(self):
assert parse_delay("2H").total_seconds() == 7200
def test_zero_rejected(self):
with pytest.raises(ScheduleValidationError):
parse_delay("0m")
def test_garbage(self):
with pytest.raises(ScheduleValidationError):
parse_delay("two hours")
class TestParseRunAt:
def test_iso_utc(self):
parsed = parse_run_at("2026-05-19T12:00:00Z")
assert parsed.tzinfo is not None
assert parsed == datetime(2026, 5, 19, 12, 0, tzinfo=timezone.utc)
def test_iso_with_offset(self):
parsed = parse_run_at("2026-05-19T14:00:00+02:00")
assert parsed == datetime(2026, 5, 19, 12, 0, tzinfo=timezone.utc)
def test_naive_uses_tz(self):
parsed = parse_run_at("2026-05-19T14:00:00", "Europe/Warsaw")
assert parsed == datetime(2026, 5, 19, 12, 0, tzinfo=timezone.utc)
def test_invalid(self):
with pytest.raises(ScheduleValidationError):
parse_run_at("not a date")
class TestCronIntervalSeconds:
def test_every_minute_returns_60s(self):
assert cron_interval_seconds("* * * * *", None) == 60
def test_hourly_returns_3600s(self):
assert cron_interval_seconds("0 * * * *", None) == 3600
def test_bursty_cron_returns_smallest_gap(self):
# '* 9 * * *' has 60s gaps inside the 9 AM burst; sampling two adjacent
# ticks at random can miss them — the rolling window must catch the 60.
assert cron_interval_seconds("* 9 * * *", None) == 60
def test_bursty_cron_rejected_when_floor_above_burst(self):
from application.core.settings import settings as app_settings
burst = "* 9 * * *"
cadence = cron_interval_seconds(burst, None)
floor = max(0, int(app_settings.SCHEDULE_MIN_INTERVAL))
assert cadence < floor, (
f"bursty cron {burst!r} cadence {cadence}s must be below the "
f"configured SCHEDULE_MIN_INTERVAL floor ({floor}s)"
)
class TestClampOnceHorizon:
def test_rejects_past(self):
past = datetime.now(timezone.utc) - timedelta(minutes=1)
with pytest.raises(ScheduleValidationError):
clamp_once_horizon(past, max_horizon_seconds=3600)
def test_rejects_beyond_horizon(self):
far = datetime.now(timezone.utc) + timedelta(days=400)
with pytest.raises(ScheduleValidationError):
clamp_once_horizon(far, max_horizon_seconds=365 * 86_400)
def test_accepts_in_range(self):
soon = datetime.now(timezone.utc) + timedelta(hours=1)
clamp_once_horizon(soon, max_horizon_seconds=86_400)

View File

@@ -13,16 +13,21 @@ class TestToolExecutorInit:
executor = ToolExecutor()
assert executor.user_api_key is None
assert executor.user is None
assert executor.agent_id is None
assert executor.tool_calls == []
assert executor._loaded_tools == {}
assert executor.conversation_id is None
def test_init_with_params(self):
executor = ToolExecutor(
user_api_key="key", user="alice", decoded_token={"sub": "alice"}
user_api_key="key",
user="alice",
decoded_token={"sub": "alice"},
agent_id="agent-1",
)
assert executor.user_api_key == "key"
assert executor.user == "alice"
assert executor.agent_id == "agent-1"
@pytest.mark.unit
@@ -61,7 +66,8 @@ class TestToolExecutorGetTools:
assert str(tool["id"]) in tools
assert tools[str(tool["id"])]["id"] == tool["id"]
def test_get_tools_uses_user_when_no_api_key(self, pg_conn, monkeypatch):
def test_agentless_chat_synthesizes_defaults(self, pg_conn, monkeypatch):
from application.agents.default_tools import loaded_default_tools
from application.storage.db.repositories.user_tools import UserToolsRepository
UserToolsRepository(pg_conn).create(
@@ -72,15 +78,148 @@ class TestToolExecutorGetTools:
executor = ToolExecutor(user="alice")
tools = executor.get_tools()
assert isinstance(tools, dict)
assert len(tools) == 1
assert len(tools) == 1 + len(loaded_default_tools())
names = {t["name"] for t in tools.values()}
assert "tool1" in names
assert "memory" in names
def test_agent_bound_chat_via_user_path_excludes_defaults(
self, pg_conn, monkeypatch
):
"""``agent_id`` forces ``agents.tools``-only; no defaults synthesized."""
from application.agents.default_tools import loaded_default_tools
from application.storage.db.repositories.user_tools import UserToolsRepository
UserToolsRepository(pg_conn).create(
user_id="alice", name="tool1", status=True
)
self._patch_conn(monkeypatch, pg_conn)
executor = ToolExecutor(user="alice", agent_id="agent-x")
tools = executor.get_tools()
names = {t["name"] for t in tools.values()}
assert "tool1" in names
assert not (set(loaded_default_tools()) & names)
def test_get_tools_defaults_to_local(self, pg_conn, monkeypatch):
from application.agents.default_tools import loaded_default_tools
self._patch_conn(monkeypatch, pg_conn)
executor = ToolExecutor()
tools = executor.get_tools()
assert isinstance(tools, dict)
assert tools == {}
assert len(tools) == len(loaded_default_tools())
assert {t["name"] for t in tools.values()} == set(loaded_default_tools())
def test_api_key_path_excludes_defaults(self, pg_conn, monkeypatch):
"""Agent-bound resolution returns exactly ``agents.tools``."""
from application.agents.default_tools import loaded_default_tools
from application.storage.db.repositories.agents import AgentsRepository
from application.storage.db.repositories.user_tools import UserToolsRepository
tool = UserToolsRepository(pg_conn).create(user_id="alice", name="tool1")
AgentsRepository(pg_conn).create(
user_id="alice",
name="a",
status="active",
key="key-agentbound",
tools=[str(tool["id"])],
)
self._patch_conn(monkeypatch, pg_conn)
executor = ToolExecutor(user_api_key="key-agentbound", user="alice")
tools = executor.get_tools()
names = {t["name"] for t in tools.values()}
assert names == {"tool1"}
assert not (set(loaded_default_tools()) & names)
def test_api_key_path_empty_agent_tools_gets_nothing(
self, pg_conn, monkeypatch
):
"""Empty ``agents.tools`` invoked via API key yields no tools."""
from application.storage.db.repositories.agents import AgentsRepository
AgentsRepository(pg_conn).create(
user_id="bob",
name="a",
status="active",
key="key-empty",
tools=[],
)
self._patch_conn(monkeypatch, pg_conn)
executor = ToolExecutor(user_api_key="key-empty", user="bob")
assert executor.get_tools() == {}
def test_api_key_path_only_synthesizes_author_added_defaults(
self, pg_conn, monkeypatch
):
"""Only ``read_webpage`` in ``agents.tools`` -> exactly that; no other defaults bolted on."""
from application.agents.default_tools import default_tool_id
from application.storage.db.repositories.agents import AgentsRepository
read_webpage_id = default_tool_id("read_webpage")
memory_id = default_tool_id("memory")
AgentsRepository(pg_conn).create(
user_id="erin",
name="a",
status="active",
key="key-only-read",
tools=[read_webpage_id],
)
self._patch_conn(monkeypatch, pg_conn)
executor = ToolExecutor(
user_api_key="key-only-read", user="erin", agent_id="erin-agent"
)
tools = executor.get_tools()
assert set(tools) == {read_webpage_id}
assert tools[read_webpage_id]["name"] == "read_webpage"
assert memory_id not in tools
assert "memory" not in {t["name"] for t in tools.values()}
def test_explicit_default_on_agent_resolves(
self, pg_conn, monkeypatch
):
"""A default tool added explicitly to ``agents.tools`` resolves for every caller."""
from application.agents.default_tools import default_tool_id
from application.storage.db.repositories.agents import AgentsRepository
memory_id = default_tool_id("memory")
AgentsRepository(pg_conn).create(
user_id="erin",
name="a",
status="active",
key="key-explicit-default",
tools=[memory_id],
)
self._patch_conn(monkeypatch, pg_conn)
executor = ToolExecutor(
user_api_key="key-explicit-default", user="erin"
)
tools = executor.get_tools()
assert set(tools) == {memory_id}
assert tools[memory_id]["name"] == "memory"
def test_no_dedup_between_explicit_and_default_memory(
self, pg_conn, monkeypatch
):
from application.storage.db.repositories.user_tools import UserToolsRepository
# Explicit ``memory`` row and the default ``memory`` coexist (separate stores).
UserToolsRepository(pg_conn).create(
user_id="dave", name="memory", status=True
)
self._patch_conn(monkeypatch, pg_conn)
executor = ToolExecutor(user="dave")
tools = executor.get_tools()
memory_entries = [t for t in tools.values() if t["name"] == "memory"]
assert len(memory_entries) == 2
ids = {t["id"] for t in memory_entries}
assert len(ids) == 2
@pytest.mark.unit

View File

@@ -0,0 +1,205 @@
"""Headless mode + tool allowlist enforcement on ToolExecutor.check_pause."""
from __future__ import annotations
from types import SimpleNamespace
from application.agents.tool_executor import ToolExecutor
def _call(name: str, args: dict | None = None, call_id: str = "c1"):
import json
return SimpleNamespace(
id=call_id,
name=name,
arguments=json.dumps(args or {}),
thought_signature=None,
)
def _executor(*, headless=False, allowlist=None):
ex = ToolExecutor(headless=headless, tool_allowlist=allowlist or [])
ex._name_to_tool = {
"send": ("tool-a", "send"),
"freecall": ("tool-b", "freecall"),
"client_only": ("ct0", "client_only"),
}
return ex
def _tools_dict():
return {
"tool-a": {
"id": "tool-a",
"name": "telegram",
"actions": [
{"name": "send", "require_approval": True},
],
},
"tool-b": {
"id": "tool-b",
"name": "noop",
"actions": [
{"name": "freecall", "require_approval": False},
],
},
"ct0": {
"name": "client_only",
"client_side": True,
"actions": [
{"name": "client_only"},
],
},
}
class TestHeadlessApproval:
def test_denied_when_not_in_allowlist(self):
ex = _executor(headless=True, allowlist=[])
result = ex.check_pause(_tools_dict(), _call("send"), "MockLLM")
assert result is not None
assert result["pause_type"] == "headless_denied"
assert result["error_type"] == "tool_not_allowed"
def test_allowed_when_in_allowlist(self):
ex = _executor(headless=True, allowlist=["tool-a"])
assert ex.check_pause(_tools_dict(), _call("send"), "MockLLM") is None
def test_non_approval_tool_runs_freely(self):
ex = _executor(headless=True, allowlist=[])
assert ex.check_pause(_tools_dict(), _call("freecall"), "MockLLM") is None
class TestHeadlessClientSide:
def test_client_side_always_denied_in_headless(self):
# Client-side ignores the allowlist; no headless answer is possible.
ex = _executor(headless=True, allowlist=["ct0"])
result = ex.check_pause(_tools_dict(), _call("client_only"), "MockLLM")
assert result is not None
assert result["pause_type"] == "headless_denied"
class TestNormalModeUnchanged:
def test_approval_still_pauses_without_headless(self):
ex = _executor(headless=False)
result = ex.check_pause(_tools_dict(), _call("send"), "MockLLM")
assert result["pause_type"] == "awaiting_approval"
def test_client_side_still_pauses_without_headless(self):
ex = _executor(headless=False)
result = ex.check_pause(_tools_dict(), _call("client_only"), "MockLLM")
assert result["pause_type"] == "requires_client_execution"
# ---------------------------------------------------------------------------
# Scheduler exclusion in headless runs — chat-only tool must not appear in
# the toolset when a scheduled / webhook LLM runs, else it could re-schedule.
# ---------------------------------------------------------------------------
class TestHeadlessSchedulerExclusion:
def test_synthesized_default_tools_drops_scheduler_in_headless(self):
from application.agents.default_tools import (
loaded_default_tools,
synthesized_default_tools,
)
# Sanity: scheduler is on for normal chats…
names_chat = {r["name"] for r in synthesized_default_tools(None)}
if "scheduler" in loaded_default_tools():
assert "scheduler" in names_chat
# …and silently absent for headless runs.
names_headless = {
r["name"]
for r in synthesized_default_tools(None, headless=True)
}
assert "scheduler" not in names_headless
def test_get_user_tools_filters_scheduler_when_headless(
self, monkeypatch,
):
from application.agents import tool_executor as te_module
from application.agents.default_tools import (
default_tool_id,
loaded_default_tools,
)
if "scheduler" not in loaded_default_tools():
import pytest as _pytest # local alias to keep top-of-module noise low
_pytest.skip("scheduler not loaded in this env")
# Stub the DB layer: no explicit user_tools so the synthesized
# defaults are the only ``scheduler`` source — that path is what
# this test pins.
from contextlib import contextmanager
@contextmanager
def _fake_readonly():
yield object()
monkeypatch.setattr(te_module, "db_readonly", _fake_readonly)
monkeypatch.setattr(
te_module, "UserToolsRepository",
lambda _c: type("R", (), {
"list_active_for_user": lambda _self, _u: [],
})(),
)
monkeypatch.setattr(
te_module, "UsersRepository",
lambda _c: type("R", (), {
"get": lambda _self, _u: None,
})(),
)
sched_id = default_tool_id("scheduler")
ex_chat = te_module.ToolExecutor(headless=False)
tools_chat = ex_chat._get_user_tools("u-test")
assert sched_id in tools_chat
ex_headless = te_module.ToolExecutor(headless=True)
tools_headless = ex_headless._get_user_tools("u-test")
assert sched_id not in tools_headless
def test_get_tools_by_api_key_drops_scheduler_when_headless(
self, monkeypatch,
):
"""An agent-bound headless run (e.g. webhook) skips scheduler even if
the author added the synthetic id to ``agents.tools``."""
from application.agents import tool_executor as te_module
from application.agents.default_tools import default_tool_id
sched_id = default_tool_id("scheduler")
from contextlib import contextmanager
@contextmanager
def _fake_readonly():
yield object()
class _AgentsRepo:
def __init__(self, _conn):
pass
def find_by_key(self, _k):
return {"user_id": "u1", "tools": [sched_id]}
class _UTRepo:
def __init__(self, _conn):
pass
def get_any(self, _t, _u):
return None
monkeypatch.setattr(te_module, "db_readonly", _fake_readonly)
monkeypatch.setattr(te_module, "AgentsRepository", _AgentsRepo)
monkeypatch.setattr(te_module, "UserToolsRepository", _UTRepo)
ex_normal = te_module.ToolExecutor(
user_api_key="k", headless=False, agent_id="a",
)
tools_normal = ex_normal._get_tools_by_api_key("k")
assert sched_id in tools_normal
ex_headless = te_module.ToolExecutor(
user_api_key="k", headless=True, agent_id="a",
)
tools_headless = ex_headless._get_tools_by_api_key("k")
assert sched_id not in tools_headless

View File

@@ -250,3 +250,39 @@ class TestRepository:
row = _select_attempt(pg_conn, "c-y")
assert row["status"] == "failed"
assert row["error"] == "kaboom"
@pytest.mark.unit
class TestDefaultToolJournaling:
"""A default tool's synthetic id round-trips through execute/journal."""
def test_synthetic_tool_id_is_journaled(
self, pg_conn, mock_tool_manager, monkeypatch
):
from application.agents.default_tools import synthesize_default_tool
memory_row = synthesize_default_tool("memory")
assert memory_row is not None
tools_dict = {memory_row["id"]: memory_row}
executor = ToolExecutor(user="u")
monkeypatch.setattr(
"application.agents.tool_executor.ToolActionParser",
lambda _cls, **kw: Mock(
parse_args=Mock(
return_value=(memory_row["id"], "view", {"path": "/"})
)
),
)
_patch_db(monkeypatch, pg_conn)
events, result = _drain(
executor.execute(tools_dict, _make_call(call_id="c-def"), "MockLLM")
)
assert result[0] == "Tool result"
row = _select_attempt(pg_conn, "c-def")
assert row is not None
assert row["status"] == "confirmed"
assert row["tool_name"] == "memory"
assert str(row["tool_id"]) == memory_row["id"]

View File

@@ -0,0 +1,442 @@
"""Tests for the SchedulerTool."""
from __future__ import annotations
import json
import uuid
from datetime import datetime, timedelta, timezone
from unittest.mock import patch
import pytest
from sqlalchemy import text
# Pre-import to stabilise the ToolManager.load_tools walk's import order
# (avoids the mcp_tool ↔ application.api.user circular when ToolManager
# instantiation is the first reachable importer in a test process).
import application.api.user.tools.mcp # noqa: F401
from application.agents.tools.scheduler import SchedulerTool # noqa: E402
from application.core.settings import settings # noqa: E402
from application.storage.db.repositories.schedules import SchedulesRepository # noqa: E402
@pytest.fixture
def patch_sessions(pg_conn):
from contextlib import contextmanager
@contextmanager
def _ctx():
yield pg_conn
with patch(
"application.agents.tools.scheduler.db_session", _ctx,
), patch(
"application.agents.tools.scheduler.db_readonly", _ctx,
):
yield
def _make_agent(conn, user_id: str = "u1") -> str:
row = conn.execute(
text(
"INSERT INTO agents (user_id, name, status) "
"VALUES (:u, 'a', 'draft') RETURNING id"
),
{"u": user_id},
).fetchone()
return str(row[0])
def _make_tool(name="scheduler", *, user_id="u1", agent_id=None, conversation_id=None):
return SchedulerTool(
tool_config={
"agent_id": agent_id,
"conversation_id": conversation_id,
},
user_id=user_id,
)
class TestGuards:
def test_requires_user_id(self):
tool = SchedulerTool(tool_config={"agent_id": str(uuid.uuid4())})
assert "user_id" in tool.execute_action("schedule_task", instruction="x")
def test_rejects_invalid_agent_id(self):
tool = _make_tool(user_id="u1", agent_id="not-a-uuid")
assert "invalid agent_id" in tool.execute_action(
"schedule_task", instruction="x"
)
def test_requires_agent_or_conversation(self):
# Neither agent_id nor conversation_id → hard error (webhook caller
# outside any chat); scheduler can't operate without a conversation home.
tool = _make_tool(user_id="u1", agent_id=None, conversation_id=None)
out = tool.execute_action("schedule_task", instruction="x")
assert "conversation_id" in out or "conversation home" in out
class TestScheduleTask:
def test_creates_with_delay(self, pg_conn, patch_sessions):
agent_id = _make_agent(pg_conn)
tool = _make_tool(user_id="u1", agent_id=agent_id, conversation_id=None)
out = tool.execute_action(
"schedule_task", instruction="say hi", delay="2h",
)
parsed = json.loads(out)
assert "task_id" in parsed
assert "resolved_run_at" in parsed
row = SchedulesRepository(pg_conn).get(parsed["task_id"], "u1")
assert row is not None
assert row["trigger_type"] == "once"
assert row["created_via"] == "chat"
fire = datetime.fromisoformat(parsed["resolved_run_at"].replace("Z", "+00:00"))
delta = fire - datetime.now(timezone.utc)
assert timedelta(minutes=119) <= delta <= timedelta(minutes=121)
def test_creates_with_run_at(self, pg_conn, patch_sessions):
agent_id = _make_agent(pg_conn)
tool = _make_tool(user_id="u1", agent_id=agent_id)
fire = (datetime.now(timezone.utc) + timedelta(hours=3)).isoformat()
out = tool.execute_action(
"schedule_task", instruction="x", run_at=fire,
)
parsed = json.loads(out)
assert "task_id" in parsed
def test_rejects_both_delay_and_run_at(self, pg_conn, patch_sessions):
agent_id = _make_agent(pg_conn)
tool = _make_tool(user_id="u1", agent_id=agent_id)
out = tool.execute_action(
"schedule_task", instruction="x", delay="30m",
run_at="2030-01-01T00:00:00Z",
)
assert "only one" in out
def test_rejects_past_run_at(self, pg_conn, patch_sessions):
agent_id = _make_agent(pg_conn)
tool = _make_tool(user_id="u1", agent_id=agent_id)
past = (datetime.now(timezone.utc) - timedelta(hours=1)).isoformat()
out = tool.execute_action("schedule_task", instruction="x", run_at=past)
assert "past" in out
def test_rejects_beyond_horizon(
self, pg_conn, patch_sessions, monkeypatch
):
monkeypatch.setattr(settings, "SCHEDULE_ONCE_MAX_HORIZON", 3600)
agent_id = _make_agent(pg_conn)
tool = _make_tool(user_id="u1", agent_id=agent_id)
far = (datetime.now(timezone.utc) + timedelta(hours=10)).isoformat()
out = tool.execute_action("schedule_task", instruction="x", run_at=far)
assert "horizon" in out
class TestQuota:
def test_quota_enforced(self, pg_conn, patch_sessions, monkeypatch):
monkeypatch.setattr(settings, "SCHEDULE_MAX_PER_USER", 2)
agent_id = _make_agent(pg_conn)
tool = _make_tool(user_id="u1", agent_id=agent_id)
for _ in range(2):
out = tool.execute_action(
"schedule_task", instruction="x", delay="1h",
)
assert "task_id" in out
out = tool.execute_action(
"schedule_task", instruction="x", delay="1h",
)
assert "maximum" in out
class TestListAndCancel:
def test_list_returns_pending(self, pg_conn, patch_sessions):
agent_id = _make_agent(pg_conn)
tool = _make_tool(user_id="u1", agent_id=agent_id)
for _ in range(3):
tool.execute_action(
"schedule_task", instruction="x", delay="1h",
)
listed = json.loads(tool.execute_action("list_scheduled_tasks"))
assert len(listed["tasks"]) == 3
assert all(t["status"] == "active" for t in listed["tasks"])
def test_cancel_flips_status(self, pg_conn, patch_sessions):
agent_id = _make_agent(pg_conn)
tool = _make_tool(user_id="u1", agent_id=agent_id)
created = json.loads(
tool.execute_action("schedule_task", instruction="x", delay="1h")
)
out = tool.execute_action(
"cancel_scheduled_task", task_id=created["task_id"]
)
assert "cancelled" in out
row = SchedulesRepository(pg_conn).get(created["task_id"], "u1")
assert row["status"] == "cancelled"
def test_cancel_unknown_id_rejected(self, pg_conn, patch_sessions):
agent_id = _make_agent(pg_conn)
tool = _make_tool(user_id="u1", agent_id=agent_id)
out = tool.execute_action(
"cancel_scheduled_task", task_id="not-a-uuid",
)
assert "valid id" in out
class TestActionsMetadata:
def test_actions_listed(self):
tool = SchedulerTool()
names = {a["name"] for a in tool.get_actions_metadata()}
assert names == {
"schedule_task", "list_scheduled_tasks", "cancel_scheduled_task",
}
class TestAgentlessInvocation:
def test_agentless_creates_schedule_with_null_agent_id(
self, pg_conn, patch_sessions,
):
"""Agentless chat → scheduler.schedule_task → row with NULL agent_id."""
conv_id = pg_conn.execute(
text(
"INSERT INTO conversations (user_id, name) "
"VALUES ('u1', 'origin') RETURNING id"
)
).fetchone()[0]
tool = _make_tool(
user_id="u1", agent_id=None, conversation_id=str(conv_id),
)
out = tool.execute_action(
"schedule_task", instruction="ping me later", delay="1h",
)
parsed = json.loads(out)
assert "task_id" in parsed
row = SchedulesRepository(pg_conn).get(parsed["task_id"], "u1")
assert row is not None
assert row["agent_id"] is None
assert row["trigger_type"] == "once"
assert row["created_via"] == "chat"
assert str(row["origin_conversation_id"]) == str(conv_id)
def test_agentless_list_scoped_to_conversation(
self, pg_conn, patch_sessions,
):
"""Agentless list_scheduled_tasks scopes to user + origin conversation."""
conv_a = pg_conn.execute(
text(
"INSERT INTO conversations (user_id, name) "
"VALUES ('u1', 'a') RETURNING id"
)
).fetchone()[0]
conv_b = pg_conn.execute(
text(
"INSERT INTO conversations (user_id, name) "
"VALUES ('u1', 'b') RETURNING id"
)
).fetchone()[0]
tool_a = _make_tool(
user_id="u1", agent_id=None, conversation_id=str(conv_a),
)
tool_b = _make_tool(
user_id="u1", agent_id=None, conversation_id=str(conv_b),
)
tool_a.execute_action(
"schedule_task", instruction="in-a", delay="1h",
)
tool_a.execute_action(
"schedule_task", instruction="in-a-2", delay="2h",
)
tool_b.execute_action(
"schedule_task", instruction="in-b", delay="3h",
)
listed_a = json.loads(tool_a.execute_action("list_scheduled_tasks"))
listed_b = json.loads(tool_b.execute_action("list_scheduled_tasks"))
assert len(listed_a["tasks"]) == 2
assert len(listed_b["tasks"]) == 1
assert all(t["status"] == "active" for t in listed_a["tasks"])
def test_agentless_cancel_blocked_for_other_conversation(
self, pg_conn, patch_sessions,
):
"""A user can't cancel tasks created in another agentless chat."""
conv_a = pg_conn.execute(
text(
"INSERT INTO conversations (user_id, name) "
"VALUES ('u1', 'a') RETURNING id"
)
).fetchone()[0]
conv_b = pg_conn.execute(
text(
"INSERT INTO conversations (user_id, name) "
"VALUES ('u1', 'b') RETURNING id"
)
).fetchone()[0]
tool_a = _make_tool(
user_id="u1", agent_id=None, conversation_id=str(conv_a),
)
tool_b = _make_tool(
user_id="u1", agent_id=None, conversation_id=str(conv_b),
)
created = json.loads(
tool_a.execute_action(
"schedule_task", instruction="x", delay="1h",
)
)
out = tool_b.execute_action(
"cancel_scheduled_task", task_id=created["task_id"],
)
assert "not found" in out
def test_agentless_cancel_succeeds_in_own_conversation(
self, pg_conn, patch_sessions,
):
conv = pg_conn.execute(
text(
"INSERT INTO conversations (user_id, name) "
"VALUES ('u1', 'a') RETURNING id"
)
).fetchone()[0]
tool = _make_tool(
user_id="u1", agent_id=None, conversation_id=str(conv),
)
created = json.loads(
tool.execute_action("schedule_task", instruction="x", delay="1h")
)
out = tool.execute_action(
"cancel_scheduled_task", task_id=created["task_id"],
)
assert "cancelled" in out
def test_agentless_snapshot_allowlist_lists_user_tools(
self, pg_conn, patch_sessions,
):
"""Agentless schedule captures the user's non-approval tools at fire-time."""
from application.agents.tools.scheduler import _safe_default_allowlist
from application.storage.db.repositories.user_tools import (
UserToolsRepository,
)
# Seed an explicit non-approval user tool.
user_tool = UserToolsRepository(pg_conn).create(
"u1", "read_webpage", config={}, actions=[
{"name": "fetch", "active": True, "require_approval": False},
], status=True,
)
conv = pg_conn.execute(
text(
"INSERT INTO conversations (user_id, name) "
"VALUES ('u1', 'a') RETURNING id"
)
).fetchone()[0]
tool = _make_tool(
user_id="u1", agent_id=None, conversation_id=str(conv),
)
out = tool.execute_action(
"schedule_task", instruction="x", delay="1h",
)
parsed = json.loads(out)
row = SchedulesRepository(pg_conn).get(parsed["task_id"], "u1")
# The explicit user_tools row is in the snapshot (approval=False).
assert str(user_tool["id"]) in (row["tool_allowlist"] or [])
# Direct allowlist call returns the same set.
ids = _safe_default_allowlist(None, "u1")
assert str(user_tool["id"]) in ids
class TestAllowlistSnapshotSemantics:
"""The schedule's ``tool_allowlist`` is a **pre-auth snapshot**, not a
visibility cap. The LLM sees the user's *current* tools at fire time
(via ``ToolExecutor._get_user_tools``); the snapshot only governs
whether an approval-gated tool can run unattended."""
def test_tool_added_after_creation_is_visible_at_fire_time(
self, pg_conn, patch_sessions,
):
"""Schedule captures the allowlist at creation; a tool added later is
visible at fire time (resolver re-queries) but isn't in the snapshot."""
from application.agents.tools.scheduler import _safe_default_allowlist
from application.storage.db.repositories.user_tools import (
UserToolsRepository,
)
pg_conn.execute(
text(
"INSERT INTO conversations (user_id, name) "
"VALUES ('u1', 'snap-add') RETURNING id"
)
).fetchone()
# Snapshot the allowlist BEFORE adding the new tool.
snapshot_before = _safe_default_allowlist(None, "u1")
# User adds an approval-gated tool AFTER schedule creation.
added = UserToolsRepository(pg_conn).create(
"u1", "telegram",
config={}, actions=[
{"name": "send", "active": True, "require_approval": True},
], status=True,
)
# The snapshot does NOT include the post-creation tool.
assert str(added["id"]) not in snapshot_before
# …but the LLM sees it at fire time (current resolver state).
snapshot_after = _safe_default_allowlist(None, "u1")
# An approval-gated tool is excluded from the snapshot regardless,
# but it IS in ``list_active_for_user`` (what the LLM's tool_executor
# uses) — make that explicit:
ids_now = {
str(r["id"]) for r in
UserToolsRepository(pg_conn).list_active_for_user("u1")
}
assert str(added["id"]) in ids_now
# And approval-gated still skipped from the safe allowlist.
assert str(added["id"]) not in snapshot_after
def test_tool_deleted_between_creation_and_fire_is_invisible(
self, pg_conn, patch_sessions,
):
"""A tool deleted between schedule creation and fire is gone for the
LLM at fire time (the resolver lists the current state)."""
from application.agents.tools.scheduler import _safe_default_allowlist
from application.storage.db.repositories.user_tools import (
UserToolsRepository,
)
repo = UserToolsRepository(pg_conn)
existing = repo.create(
"u1", "read_webpage",
config={}, actions=[
{"name": "fetch", "active": True, "require_approval": False},
], status=True,
)
# Snapshot at creation includes it (non-approval).
snapshot = _safe_default_allowlist(None, "u1")
assert str(existing["id"]) in snapshot
# User deletes it; fire-time resolver no longer surfaces it.
repo.delete(str(existing["id"]), "u1")
ids_now = {r["id"] for r in repo.list_active_for_user("u1")}
assert str(existing["id"]) not in ids_now
# And the freshly-recomputed allowlist drops it too.
snapshot_after = _safe_default_allowlist(None, "u1")
assert str(existing["id"]) not in snapshot_after
class TestInternalFlag:
def test_internal_true(self):
assert SchedulerTool.internal is True
def test_not_in_tool_manager_auto_load(self):
from application.agents.tools.tool_manager import ToolManager
tm = ToolManager(config={})
assert "scheduler" not in tm.tools
def test_load_tool_special_case_still_works(self):
from application.agents.tools.tool_manager import ToolManager
tm = ToolManager(config={})
tool = tm.load_tool(
"scheduler",
tool_config={"agent_id": str(uuid.uuid4())},
user_id="u1",
)
assert isinstance(tool, SchedulerTool)
assert tool.user_id == "u1"

View File

@@ -246,6 +246,68 @@ class TestCompleteStreamMethod:
mock_reserve.assert_called_once()
mock_finalize.assert_called_once()
def test_tool_executor_conversation_id_set_after_reserve(
self, mock_mongo_db, flask_app,
):
"""Regression: ``save_user_question`` may mint a fresh
``conversation_id`` (first turn). The propagation MUST land on
``agent.tool_executor.conversation_id`` BEFORE ``agent.gen`` runs,
so tools needing a conversation home (``scheduler`` in an agentless
chat) see it on the very first call.
"""
from application.api.answer.routes.base import BaseAnswerResource
with flask_app.app_context():
resource = BaseAnswerResource()
fresh_conv_id = str(uuid.uuid4())
seen_conv_id_on_gen: dict = {}
mock_agent = MagicMock()
tool_executor = MagicMock()
# Start with no conversation_id — the propagation must set it.
tool_executor.conversation_id = None
mock_agent.tool_executor = tool_executor
def _gen(**_kwargs):
# Capture the executor's id at the exact moment gen runs;
# this is what tools see when called from the agent loop.
seen_conv_id_on_gen["value"] = (
mock_agent.tool_executor.conversation_id
)
yield {"answer": "ok"}
mock_agent.gen.side_effect = _gen
mock_agent.gen.return_value = None # use side_effect instead
with patch.object(
resource.conversation_service, "save_user_question"
) as mock_reserve, patch.object(
resource.conversation_service, "finalize_message",
return_value=True,
):
mock_reserve.return_value = {
"conversation_id": fresh_conv_id,
"message_id": str(uuid.uuid4()),
"request_id": "req-prop",
}
list(
resource.complete_stream(
question="schedule something",
agent=mock_agent,
conversation_id=None, # caller had no conv yet
user_api_key=None,
decoded_token={"sub": "user-prop"},
should_save_conversation=True,
)
)
# The fresh id reserved by save_user_question must reach the
# tool_executor before agent.gen consumes it.
assert seen_conv_id_on_gen["value"] == fresh_conv_id
assert tool_executor.conversation_id == fresh_conv_id
@pytest.mark.unit

View File

@@ -429,7 +429,7 @@ class TestConfigureRetriever:
assert sp.retriever_config["retriever_name"] == "hybrid_search"
assert sp.retriever_config["chunks"] == 5
def test_request_overrides_agent(self):
def test_agent_wins_over_request_on_agent_bound(self):
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
@@ -438,9 +438,33 @@ class TestConfigureRetriever:
)
sp._agent_data = {"retriever": "hybrid_search", "chunks": 5}
sp._configure_retriever()
assert sp.retriever_config["retriever_name"] == "hybrid_search"
assert sp.retriever_config["chunks"] == 5
def test_body_wins_on_agentless(self):
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
sp = StreamProcessor(
{"retriever": "duckdb", "chunks": 7}, {"sub": "u"},
)
sp._configure_retriever()
assert sp.retriever_config["retriever_name"] == "duckdb"
assert sp.retriever_config["chunks"] == 7
def test_agent_bound_drops_body_chunks_and_retriever(self):
# Missing agent values fall back to system defaults, not body's.
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
sp = StreamProcessor(
{"retriever": "duckdb", "chunks": 7}, {"sub": "u"},
)
sp._agent_data = {}
sp._configure_retriever()
assert sp.retriever_config["retriever_name"] == "classic"
assert sp.retriever_config["chunks"] == 2
def test_invalid_agent_chunks_falls_back(self):
from application.api.answer.services.stream_processor import (
StreamProcessor,
@@ -569,10 +593,11 @@ class TestPreFetchTools:
got = sp.pre_fetch_tools()
assert got is None
def test_no_user_tools_returns_none(self, pg_conn):
def test_no_template_skips_default_tool_prefetch(self, pg_conn):
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
sp = StreamProcessor({}, {"sub": "no-tools-user"})
with _patch_db(pg_conn), patch(
"application.api.answer.services.stream_processor.settings.ENABLE_TOOL_PREFETCH",
@@ -580,3 +605,347 @@ class TestPreFetchTools:
):
got = sp.pre_fetch_tools()
assert got is None
def test_no_template_skips_only_default_rows_not_explicit(self, pg_conn):
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
from application.storage.db.repositories.user_tools import (
UserToolsRepository,
)
UserToolsRepository(pg_conn).create(
user_id="u-explicit-prefetch", name="read_webpage", status=True
)
sp = StreamProcessor({}, {"sub": "u-explicit-prefetch"})
fetched = []
def _fake_fetch(tool_doc, required_actions):
fetched.append(tool_doc)
return {"ok": True}
with _patch_db(pg_conn), patch(
"application.api.answer.services.stream_processor.settings.ENABLE_TOOL_PREFETCH",
True,
), patch.object(sp, "_fetch_tool_data", _fake_fetch):
got = sp.pre_fetch_tools()
assert got is not None
assert "read_webpage" in got
assert all(not d.get("default") for d in fetched)
assert any(d.get("name") == "read_webpage" for d in fetched)
def test_default_tool_prefetched_when_template_references_it(
self, pg_conn
):
from application.agents.default_tools import default_tool_id
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
sp = StreamProcessor({}, {"sub": "u-tpl-default"})
sp._required_tool_actions = {"read_webpage": {None}}
fetched = []
def _fake_fetch(tool_doc, required_actions):
fetched.append(tool_doc)
return {"ok": True}
with _patch_db(pg_conn), patch(
"application.api.answer.services.stream_processor.settings.ENABLE_TOOL_PREFETCH",
True,
), patch.object(sp, "_fetch_tool_data", _fake_fetch):
got = sp.pre_fetch_tools()
assert got is not None
assert any(
d.get("name") == "read_webpage" and d.get("default")
for d in fetched
)
# Defaults are reachable by synthetic id only — not by name.
assert default_tool_id("read_webpage") in got
def test_agent_bound_invocation_omits_default_tool_prefetch(self, pg_conn):
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
sp = StreamProcessor({"agent_id": "agent-xyz"}, {"sub": "u-ag"})
sp._required_tool_actions = {"read_webpage": {None}}
with _patch_db(pg_conn), patch(
"application.api.answer.services.stream_processor.settings.ENABLE_TOOL_PREFETCH",
True,
):
got = sp.pre_fetch_tools()
assert got is None
def test_template_name_key_favors_explicit_over_default(self, pg_conn):
"""An explicit row and the synthesized default of the same name
coexist: name key stays on the explicit, default reachable by
synthetic id only."""
from application.agents.default_tools import default_tool_id
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
from application.storage.db.repositories.user_tools import (
UserToolsRepository,
)
user = "u-collision"
explicit = UserToolsRepository(pg_conn).create(
user_id=user, name="read_webpage", status=True,
)
explicit_id = str(explicit["id"])
default_id = default_tool_id("read_webpage")
sp = StreamProcessor({}, {"sub": user})
sp._required_tool_actions = {"read_webpage": {None}}
def _fake_fetch(tool_doc, required_actions):
return {
"is_default": bool(tool_doc.get("default")),
"id": str(tool_doc.get("_id") or tool_doc.get("id")),
}
with _patch_db(pg_conn), patch(
"application.api.answer.services.stream_processor.settings.ENABLE_TOOL_PREFETCH",
True,
), patch.object(sp, "_fetch_tool_data", _fake_fetch):
got = sp.pre_fetch_tools()
assert got is not None
assert got["read_webpage"]["is_default"] is False
assert got["read_webpage"]["id"] == explicit_id
assert got[explicit_id]["is_default"] is False
assert got[default_id]["is_default"] is True
class TestValidateAndSetModelAgentAuthority:
"""Agent-bound chats: agent's ``default_model_id`` is authoritative."""
def test_agent_bound_ignores_body_model_id(self):
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
sp = StreamProcessor({"model_id": "body-model"}, {"sub": "caller"})
sp._agent_data = {"user": "owner"}
sp.agent_config = {
"default_model_id": "agent-model",
"user_id": "owner",
}
captured: list = []
def _fake_validate(model_id, user_id=None):
captured.append((model_id, user_id))
return True
with patch(
"application.api.answer.services.stream_processor.validate_model_id",
side_effect=_fake_validate,
), patch(
"application.api.answer.services.stream_processor.get_default_model_id",
return_value="global-default",
):
sp._validate_and_set_model()
assert sp.model_id == "agent-model"
# Resolved under the agent owner, not the caller.
assert sp.model_user_id == "owner"
assert ("agent-model", "owner") in captured
def test_agent_bound_no_default_falls_back_to_system(self):
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
sp = StreamProcessor({"model_id": "body-model"}, {"sub": "u"})
sp._agent_data = {"user": "u"}
sp.agent_config = {"default_model_id": "", "user_id": "u"}
with patch(
"application.api.answer.services.stream_processor.validate_model_id",
return_value=False,
), patch(
"application.api.answer.services.stream_processor.get_default_model_id",
return_value="global-default",
):
sp._validate_and_set_model()
assert sp.model_id == "global-default"
assert sp.model_user_id is None
def test_agentless_body_model_still_wins(self):
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
sp = StreamProcessor({"model_id": "body-model"}, {"sub": "u"})
sp._agent_data = None
with patch(
"application.api.answer.services.stream_processor.validate_model_id",
return_value=True,
):
sp._validate_and_set_model()
assert sp.model_id == "body-model"
assert sp.model_user_id == "u"
class TestGetDataFromApiKeySourceUnion:
"""`_get_data_from_api_key`: primary extras, deduplicated, primary first."""
def _make_sp(self):
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
return StreamProcessor({}, {"sub": "u"})
def test_union_primary_and_extras(self, pg_conn):
from application.storage.db.repositories.agents import AgentsRepository
from application.storage.db.repositories.sources import SourcesRepository
owner = "u-merge-both"
sources_repo = SourcesRepository(pg_conn)
primary = sources_repo.create(name="primary", user_id=owner)
extra1 = sources_repo.create(name="extra1", user_id=owner)
extra2 = sources_repo.create(name="extra2", user_id=owner)
agent = AgentsRepository(pg_conn).create(
owner, "agent-merge", "published",
key="merge-key",
source_id=str(primary["id"]),
extra_source_ids=[str(extra1["id"]), str(extra2["id"])],
retriever="hybrid",
chunks=5,
)
assert agent is not None
sp = self._make_sp()
with _patch_db(pg_conn):
data = sp._get_data_from_api_key("merge-key")
ids = [s["id"] for s in data["sources"]]
assert ids == [
str(primary["id"]),
str(extra1["id"]),
str(extra2["id"]),
]
assert data["source"] == str(primary["id"])
def test_only_primary(self, pg_conn):
from application.storage.db.repositories.agents import AgentsRepository
from application.storage.db.repositories.sources import SourcesRepository
owner = "u-merge-primary-only"
primary = SourcesRepository(pg_conn).create(
name="primary", user_id=owner,
)
AgentsRepository(pg_conn).create(
owner, "primary-only", "published",
key="primary-only-key",
source_id=str(primary["id"]),
extra_source_ids=[],
)
sp = self._make_sp()
with _patch_db(pg_conn):
data = sp._get_data_from_api_key("primary-only-key")
assert [s["id"] for s in data["sources"]] == [str(primary["id"])]
assert data["source"] == str(primary["id"])
def test_only_extras(self, pg_conn):
from application.storage.db.repositories.agents import AgentsRepository
from application.storage.db.repositories.sources import SourcesRepository
owner = "u-merge-extras-only"
e1 = SourcesRepository(pg_conn).create(name="e1", user_id=owner)
e2 = SourcesRepository(pg_conn).create(name="e2", user_id=owner)
AgentsRepository(pg_conn).create(
owner, "extras-only", "published",
key="extras-only-key",
extra_source_ids=[str(e1["id"]), str(e2["id"])],
)
sp = self._make_sp()
with _patch_db(pg_conn):
data = sp._get_data_from_api_key("extras-only-key")
assert [s["id"] for s in data["sources"]] == [
str(e1["id"]), str(e2["id"]),
]
assert data["source"] is None
def test_dedupe_primary_repeated_in_extras(self, pg_conn):
from application.storage.db.repositories.agents import AgentsRepository
from application.storage.db.repositories.sources import SourcesRepository
owner = "u-merge-dedupe"
primary = SourcesRepository(pg_conn).create(
name="dup-primary", user_id=owner,
)
extra = SourcesRepository(pg_conn).create(
name="dup-extra", user_id=owner,
)
AgentsRepository(pg_conn).create(
owner, "dedupe", "published",
key="dedupe-key",
source_id=str(primary["id"]),
extra_source_ids=[str(primary["id"]), str(extra["id"])],
)
sp = self._make_sp()
with _patch_db(pg_conn):
data = sp._get_data_from_api_key("dedupe-key")
ids = [s["id"] for s in data["sources"]]
assert ids == [str(primary["id"]), str(extra["id"])]
class TestAgentBoundFieldsAuthoritative:
"""End-to-end regression: agent's source/model/chunks/retriever win."""
def test_agent_values_win_over_body(self, pg_conn):
from application.api.answer.services.stream_processor import (
StreamProcessor,
)
from application.storage.db.repositories.agents import AgentsRepository
from application.storage.db.repositories.sources import SourcesRepository
owner = "u-regr-agent-authority"
primary = SourcesRepository(pg_conn).create(
name="primary", user_id=owner,
)
extra = SourcesRepository(pg_conn).create(
name="extra", user_id=owner,
)
AgentsRepository(pg_conn).create(
owner, "authoritative", "published",
key="auth-key",
source_id=str(primary["id"]),
extra_source_ids=[str(extra["id"])],
default_model_id="model-A",
retriever="hybrid",
chunks=5,
)
# Body sends different values for every field; all must be ignored.
body = {
"api_key": "auth-key",
"model_id": "body-model-Z",
"retriever": "duckdb",
"chunks": 99,
"active_docs": "body-source-id",
}
sp = StreamProcessor(body, {"sub": owner})
with _patch_db(pg_conn), patch(
"application.api.answer.services.stream_processor.validate_model_id",
return_value=True,
), patch(
"application.api.answer.services.stream_processor.get_default_model_id",
return_value="system-default",
):
sp._configure_agent()
sp._validate_and_set_model()
sp._configure_source()
sp._configure_retriever()
assert sp.model_id == "model-A"
assert sp.model_user_id == owner
assert sp.agent_config["default_model_id"] == "model-A"
assert sp.retriever_config["chunks"] == 5
assert sp.retriever_config["retriever_name"] == "hybrid"
assert sp.source == {
"active_docs": [str(primary["id"]), str(extra["id"])],
}

View File

@@ -6,6 +6,7 @@ from unittest.mock import MagicMock, patch
import pytest
from flask import Flask
from sqlalchemy import text
@pytest.fixture
@@ -256,9 +257,39 @@ class TestPaginatedSources:
for key in (
"id", "name", "date", "model", "location", "tokens",
"retriever", "syncFrequency", "provider", "isNested", "type",
"ingestStatus",
):
assert key in row
def test_exposes_stalled_ingest_status(self, app, pg_conn):
"""A source whose ingest the reconciler escalated to 'stalled'
surfaces ingestStatus='failed' so the UI can badge it.
"""
from application.api.user.sources.routes import PaginatedSources
user = "u-ingest-status"
src = _seed_source(pg_conn, user, name="stalled-doc", type="file")
pg_conn.execute(
text(
"""
INSERT INTO ingest_chunk_progress (
source_id, total_chunks, embedded_chunks, last_index,
status
)
VALUES (CAST(:sid AS uuid), 907, 9, 8, 'stalled')
"""
),
{"sid": str(src["id"])},
)
with _patch_db(pg_conn), app.test_request_context(
"/api/sources/paginated?page=1&rows=10"
):
from flask import request
request.decoded_token = {"sub": user}
response = PaginatedSources().get()
row = response.json["paginated"][0]
assert row["ingestStatus"] == "failed"
class TestDeleteOldIndexes:
def test_returns_401_unauthenticated(self, app):
@@ -553,6 +584,35 @@ class TestSyncSource:
assert response.status_code == 200
assert response.json["task_id"] == "task-123"
def test_normalizes_dict_remote_data_before_dispatch(self, app, pg_conn):
"""The route must hand the sync task the normalized URL string."""
from application.api.user.sources.routes import SyncSource
user = "u-normalize"
src = _seed_source(
pg_conn, user, name="crawl-src", type="crawler",
remote_data=json.dumps(
{"url": "https://example.com", "provider": "crawler"}
),
)
fake_task = MagicMock(id="task-norm")
with _patch_db(pg_conn), patch(
"application.api.user.sources.routes.sync_source.delay",
return_value=fake_task,
) as mock_delay, app.test_request_context(
"/api/sync_source",
method="POST",
json={"source_id": str(src["id"])},
):
from flask import request
request.decoded_token = {"sub": user}
response = SyncSource().post()
assert response.status_code == 200
assert mock_delay.call_args.kwargs["source_data"] == "https://example.com"
assert mock_delay.call_args.kwargs["loader"] == "crawler"
def test_sync_task_raises_returns_400(self, app, pg_conn):
from application.api.user.sources.routes import SyncSource
@@ -576,6 +636,135 @@ class TestSyncSource:
assert response.status_code == 400
class TestReingestSource:
def test_returns_401_unauthenticated(self, app):
from application.api.user.sources.routes import ReingestSource
with app.test_request_context(
"/api/sources/reingest", method="POST", json={"source_id": "x"}
):
from flask import request
request.decoded_token = None
response = ReingestSource().post()
assert response.status_code == 401
def test_returns_400_missing_id(self, app):
from application.api.user.sources.routes import ReingestSource
with app.test_request_context(
"/api/sources/reingest", method="POST", json={}
):
from flask import request
request.decoded_token = {"sub": "u"}
response = ReingestSource().post()
assert response.status_code == 400
def test_returns_404_missing_source(self, app, pg_conn):
from application.api.user.sources.routes import ReingestSource
with _patch_db(pg_conn), app.test_request_context(
"/api/sources/reingest",
method="POST",
json={"source_id": "00000000-0000-0000-0000-000000000000"},
):
from flask import request
request.decoded_token = {"sub": "u"}
response = ReingestSource().post()
assert response.status_code == 404
def test_triggers_reingest_task(self, app, pg_conn):
from application.api.user.sources.routes import ReingestSource
user = "u-reingest"
src = _seed_source(pg_conn, user, name="stalled-src", type="file")
fake_task = MagicMock(id="reingest-task-1")
with _patch_db(pg_conn), patch(
"application.api.user.sources.routes.reingest_source_task.delay",
return_value=fake_task,
) as mock_delay, app.test_request_context(
"/api/sources/reingest",
method="POST",
json={"source_id": str(src["id"])},
):
from flask import request
request.decoded_token = {"sub": user}
response = ReingestSource().post()
assert response.status_code == 200
assert response.json["task_id"] == "reingest-task-1"
assert mock_delay.call_args.kwargs["source_id"] == str(src["id"])
assert mock_delay.call_args.kwargs["user"] == user
# Scoped idempotency key engages the task's lease so repeated
# clicks collapse onto one reingest instead of racing.
assert mock_delay.call_args.kwargs["idempotency_key"] == (
f"reingest-source:{user}:{src['id']}"
)
def test_clears_stalled_ingest_progress_row(self, app, pg_conn):
"""Reingest drops the stale chunk-progress row so the sources
list stops deriving a 'failed' ingest status for the source.
"""
from application.api.user.sources.routes import ReingestSource
user = "u-reingest-clear"
src = _seed_source(pg_conn, user, name="stalled-doc", type="file")
pg_conn.execute(
text(
"""
INSERT INTO ingest_chunk_progress (
source_id, total_chunks, embedded_chunks, last_index,
status
)
VALUES (CAST(:sid AS uuid), 100, 9, 8, 'stalled')
"""
),
{"sid": str(src["id"])},
)
fake_task = MagicMock(id="reingest-task-2")
with _patch_db(pg_conn), patch(
"application.api.user.sources.routes.reingest_source_task.delay",
return_value=fake_task,
), app.test_request_context(
"/api/sources/reingest",
method="POST",
json={"source_id": str(src["id"])},
):
from flask import request
request.decoded_token = {"sub": user}
response = ReingestSource().post()
assert response.status_code == 200
remaining = pg_conn.execute(
text(
"SELECT count(*) FROM ingest_chunk_progress "
"WHERE source_id = CAST(:sid AS uuid)"
),
{"sid": str(src["id"])},
).scalar()
assert remaining == 0
def test_reingest_task_raises_returns_400(self, app, pg_conn):
from application.api.user.sources.routes import ReingestSource
user = "u-reingest-fail"
src = _seed_source(pg_conn, user, name="fail-src", type="file")
with _patch_db(pg_conn), patch(
"application.api.user.sources.routes.reingest_source_task.delay",
side_effect=RuntimeError("boom"),
), app.test_request_context(
"/api/sources/reingest",
method="POST",
json={"source_id": str(src["id"])},
):
from flask import request
request.decoded_token = {"sub": user}
response = ReingestSource().post()
assert response.status_code == 400
class TestDirectoryStructure:
def test_returns_401_unauthenticated(self, app):
from application.api.user.sources.routes import DirectoryStructure

View File

@@ -417,3 +417,181 @@ class TestSuccessfulRunClearsLease:
assert row[0] == "completed"
assert row[1] is None
assert row[2] is None
@pytest.mark.unit
class TestSynthesizedKeyGuardsKeylessDispatch:
"""A keyless dispatch carrying ``source_id`` is still poison-guarded:
the wrapper synthesizes a deterministic key from ``source_id``.
"""
def test_keyless_with_source_id_records_dedup_row(self, pg_conn):
from application.api.user.idempotency import with_idempotency
@with_idempotency(task_name="ingest")
def task(self, idempotency_key=None, source_id=None):
return {"ran": True}
with _patch_decorator_db(pg_conn):
result = task(_fake_celery_self(), source_id="src-abc")
assert result == {"ran": True}
row = _row_for(pg_conn, "auto:ingest:src-abc")
assert row is not None
assert row[0] == "ingest"
assert row[2] == "completed"
def test_synthesized_key_stable_across_redeliveries(self, pg_conn):
"""Same ``source_id`` → same key → a redelivery short-circuits to
the cached result instead of re-running the body.
"""
from application.api.user.idempotency import with_idempotency
runs = {"count": 0}
@with_idempotency(task_name="ingest")
def task(self, idempotency_key=None, source_id=None):
runs["count"] += 1
return {"n": runs["count"]}
with _patch_decorator_db(pg_conn):
first = task(_fake_celery_self(), source_id="src-1")
second = task(_fake_celery_self(), source_id="src-1")
assert first == second == {"n": 1}
assert runs["count"] == 1
def test_poison_guard_trips_for_keyless_dispatch(self, pg_conn):
"""The core fix: a keyless OOM-looping dispatch is bounded — the
guard trips after MAX_TASK_ATTEMPTS with no explicit key.
"""
from application.api.user.idempotency import (
MAX_TASK_ATTEMPTS, with_idempotency,
)
runs = {"count": 0}
@with_idempotency(task_name="ingest")
def task(self, idempotency_key=None, source_id=None):
runs["count"] += 1
raise RuntimeError("OOM-style failure")
with _patch_decorator_db(pg_conn):
for _ in range(MAX_TASK_ATTEMPTS):
with pytest.raises(RuntimeError):
task(_fake_celery_self(), source_id="src-poison")
result = task(_fake_celery_self(), source_id="src-poison")
assert runs["count"] == MAX_TASK_ATTEMPTS
assert result["success"] is False
assert "poison-loop" in result["error"]
assert _row_for(pg_conn, "auto:ingest:src-poison")[2] == "failed"
def test_no_source_id_no_key_runs_unguarded(self, pg_conn):
"""No explicit key and no ``source_id`` anchor → pass through with
no DB writes, exactly as before.
"""
from application.api.user.idempotency import with_idempotency
@with_idempotency(task_name="store_attachment")
def task(self, idempotency_key=None):
return {"ran": True}
with patch(
"application.api.user.idempotency.db_session"
) as mock_session, patch(
"application.api.user.idempotency.db_readonly"
) as mock_readonly:
result = task(_fake_celery_self())
assert result == {"ran": True}
assert mock_session.call_count == 0
assert mock_readonly.call_count == 0
def test_explicit_key_takes_precedence_over_source_id(self, pg_conn):
"""An explicit key wins; the synthesized ``auto:`` key is unused."""
from application.api.user.idempotency import with_idempotency
@with_idempotency(task_name="ingest")
def task(self, idempotency_key=None, source_id=None):
return {"ran": True}
with _patch_decorator_db(pg_conn):
task(
_fake_celery_self(),
idempotency_key="explicit-k",
source_id="src-x",
)
assert _row_for(pg_conn, "explicit-k") is not None
assert _row_for(pg_conn, "auto:ingest:src-x") is None
@pytest.mark.unit
class TestPoisonHook:
"""``on_poison`` fires on the poison-guard branch with the task's
bound arguments, and never on the success path.
"""
def test_hook_invoked_with_bound_args_on_poison(self, pg_conn):
from application.api.user.idempotency import (
MAX_TASK_ATTEMPTS, with_idempotency,
)
captured = []
def _hook(task_name, bound):
captured.append((task_name, bound))
@with_idempotency(task_name="ingest", on_poison=_hook)
def task(self, idempotency_key=None, source_id=None):
raise RuntimeError("never converges")
with _patch_decorator_db(pg_conn):
for _ in range(MAX_TASK_ATTEMPTS):
with pytest.raises(RuntimeError):
task(_fake_celery_self(), source_id="src-h")
task(_fake_celery_self(), source_id="src-h")
assert len(captured) == 1
task_name, bound = captured[0]
assert task_name == "ingest"
assert bound["source_id"] == "src-h"
def test_hook_not_invoked_on_success(self, pg_conn):
from application.api.user.idempotency import with_idempotency
calls = []
@with_idempotency(
task_name="ingest", on_poison=lambda *a: calls.append(a)
)
def task(self, idempotency_key=None, source_id=None):
return {"ok": True}
with _patch_decorator_db(pg_conn):
task(_fake_celery_self(), source_id="src-ok")
assert calls == []
def test_hook_failure_does_not_break_poison_return(self, pg_conn):
"""A throwing hook must not change the poison-guard outcome."""
from application.api.user.idempotency import (
MAX_TASK_ATTEMPTS, with_idempotency,
)
def _bad_hook(task_name, bound):
raise ValueError("hook blew up")
@with_idempotency(task_name="ingest", on_poison=_bad_hook)
def task(self, idempotency_key=None, source_id=None):
raise RuntimeError("never converges")
with _patch_decorator_db(pg_conn):
for _ in range(MAX_TASK_ATTEMPTS):
with pytest.raises(RuntimeError):
task(_fake_celery_self(), source_id="src-bad")
result = task(_fake_celery_self(), source_id="src-bad")
assert result["success"] is False
assert "poison-loop" in result["error"]

Some files were not shown because too many files have changed in this diff Show More