feat: better logging

This commit is contained in:
Alex
2026-04-28 00:14:43 +01:00
parent 318de18d43
commit 82c71be819
21 changed files with 725 additions and 29 deletions

View File

@@ -274,7 +274,14 @@ class ToolExecutor:
if tool_id is None or action_name is None:
error_message = f"Error: Failed to parse LLM tool call. Tool name: {llm_name}"
logger.error(error_message)
logger.error(
"tool_call_parse_failed",
extra={
"llm_class_name": llm_class_name,
"llm_tool_name": llm_name,
"call_id": call_id,
},
)
tool_call_data = {
"tool_name": "unknown",
@@ -289,7 +296,15 @@ class ToolExecutor:
if tool_id not in tools_dict:
error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
logger.error(error_message)
logger.error(
"tool_id_not_found",
extra={
"tool_id": tool_id,
"llm_tool_name": llm_name,
"call_id": call_id,
"available_tool_count": len(tools_dict),
},
)
tool_call_data = {
"tool_name": "unknown",
@@ -356,7 +371,15 @@ class ToolExecutor:
f"Failed to load tool '{tool_data.get('name')}' (tool_id key={tool_id}): "
"missing 'id' on tool row."
)
logger.error(error_message)
logger.error(
"tool_load_failed",
extra={
"tool_name": tool_data.get("name"),
"tool_id": tool_id,
"action_name": action_name,
"call_id": call_id,
},
)
tool_call_data["result"] = error_message
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
self.tool_calls.append(tool_call_data)
@@ -451,10 +474,12 @@ class ToolExecutor:
row_id = tool_data.get("id")
if not row_id:
logger.error(
"Tool data missing 'id' for tool name=%s (enumerate-key tool_id=%s); "
"skipping load to avoid binding a non-UUID downstream.",
tool_data.get("name"),
tool_id,
"tool_missing_row_id",
extra={
"tool_name": tool_data.get("name"),
"tool_id": tool_id,
"action_name": action_name,
},
)
return None
tool_config["tool_id"] = str(row_id)

View File

@@ -9,6 +9,7 @@ from jose import jwt
from application.auth import handle_auth
from application.core import log_context
from application.core.logging_config import setup_logging
setup_logging()
@@ -112,6 +113,38 @@ def generate_token():
return jsonify({"error": "Token generation not allowed in current auth mode"}), 400
_LOG_CTX_TOKEN_ATTR = "_log_ctx_token"
@app.before_request
def _bind_log_context():
"""Bind activity_id + endpoint for the duration of this request.
Runs before ``authenticate_request``; ``user_id`` is overlaid in a
follow-up handler once the JWT has been decoded.
"""
if request.method == "OPTIONS":
return None
activity_id = str(uuid.uuid4())
request.activity_id = activity_id
token = log_context.bind(
activity_id=activity_id,
endpoint=request.endpoint,
)
setattr(request, _LOG_CTX_TOKEN_ATTR, token)
return None
@app.teardown_request
def _reset_log_context(_exc):
# SSE streams keep yielding after teardown fires, but a2wsgi runs each
# request inside ``copy_context().run(...)``, so this reset doesn't
# leak into the stream's view of the context.
token = getattr(request, _LOG_CTX_TOKEN_ATTR, None)
if token is not None:
log_context.reset(token)
@app.before_request
def enforce_stt_request_size_limits():
if request.method == "OPTIONS":
@@ -148,6 +181,21 @@ def authenticate_request():
request.decoded_token = decoded_token
@app.before_request
def _bind_user_id_to_log_context():
# Registered after ``authenticate_request`` (Flask runs before_request
# handlers in registration order), so ``request.decoded_token`` is
# populated by the time we read it. ``teardown_request`` unwinds the
# whole request-level bind, so no separate reset token is needed here.
if request.method == "OPTIONS":
return None
decoded_token = getattr(request, "decoded_token", None)
user_id = decoded_token.get("sub") if isinstance(decoded_token, dict) else None
if user_id:
log_context.bind(user_id=user_id)
return None
@app.after_request
def after_request(response: Response) -> Response:
"""Add CORS headers for the pure Flask development entrypoint."""

View File

@@ -1,8 +1,17 @@
import inspect
import logging
import threading
from celery import Celery
from application.core import log_context
from application.core.settings import settings
from celery.signals import setup_logging, worker_process_init, worker_ready
from celery.signals import (
setup_logging,
task_postrun,
task_prerun,
worker_process_init,
worker_ready,
)
def make_celery(app_name=__name__):
@@ -41,6 +50,54 @@ def _dispose_db_engine_on_fork(*args, **kwargs):
dispose_engine()
# Most tasks in this repo accept ``user`` where the log context wants
# ``user_id``; map task parameter names to context keys explicitly.
_TASK_PARAM_TO_CTX_KEY: dict[str, str] = {
"user": "user_id",
"user_id": "user_id",
"agent_id": "agent_id",
"conversation_id": "conversation_id",
}
_task_log_tokens: dict[str, object] = {}
@task_prerun.connect
def _bind_task_log_context(task_id, task, args, kwargs, **_):
# Resolve task args by parameter name — nearly every task in this repo
# is called positionally, so ``kwargs.get('user')`` would bind nothing.
ctx = {"activity_id": task_id}
try:
sig = inspect.signature(task.run)
bound = sig.bind_partial(*args, **kwargs).arguments
except (TypeError, ValueError):
bound = dict(kwargs)
for param_name, value in bound.items():
ctx_key = _TASK_PARAM_TO_CTX_KEY.get(param_name)
if ctx_key and value:
ctx[ctx_key] = value
_task_log_tokens[task_id] = log_context.bind(**ctx)
@task_postrun.connect
def _unbind_task_log_context(task_id, **_):
# ``task_postrun`` fires on both success and failure. Required for
# Celery: unlike the Flask path, tasks aren't isolated in their own
# ``copy_context().run(...)``, so a missing reset would leak the
# bind onto the next task on the same worker.
token = _task_log_tokens.pop(task_id, None)
if token is None:
return
try:
log_context.reset(token)
except ValueError:
# task_prerun and task_postrun ran on different threads (non-default
# Celery pool); the token isn't valid in this context. Drop it.
logging.getLogger(__name__).debug(
"log_context reset skipped for task %s", task_id
)
@worker_ready.connect
def _run_version_check(*args, **kwargs):
"""Kick off the anonymous version check on worker startup.

View File

@@ -0,0 +1,57 @@
"""Per-activity logging context backed by ``contextvars``.
The ``_ContextFilter`` installed by ``logging_config.setup_logging`` stamps
every ``LogRecord`` emitted inside a ``bind`` block with the bound keys, so
they land as first-class attributes on the OTLP log export rather than being
buried inside formatted message bodies.
A single ``ContextVar`` holds a dict so nested binds reset atomically (LIFO)
via the token returned by ``bind``.
"""
from __future__ import annotations
from contextvars import ContextVar, Token
from typing import Mapping
_CTX_KEYS: frozenset[str] = frozenset(
{
"activity_id",
"parent_activity_id",
"user_id",
"agent_id",
"conversation_id",
"endpoint",
"model",
}
)
_ctx: ContextVar[Mapping[str, str]] = ContextVar("log_ctx", default={})
def bind(**kwargs: object) -> Token:
"""Overlay the given keys onto the current context.
Returns a ``Token`` so the caller can ``reset`` in a ``finally`` block.
Keys outside :data:`_CTX_KEYS` are silently dropped (so a typo can't
stamp a stray field name onto every record), as are ``None`` values
(a missing attribute is more useful than the literal string ``"None"``).
"""
overlay = {
k: str(v)
for k, v in kwargs.items()
if k in _CTX_KEYS and v is not None
}
new = {**_ctx.get(), **overlay}
return _ctx.set(new)
def reset(token: Token) -> None:
"""Restore the context to the snapshot captured by the matching ``bind``."""
_ctx.reset(token)
def snapshot() -> Mapping[str, str]:
"""Return the current context dict. Treat as read-only; use :func:`bind`."""
return _ctx.get()

View File

@@ -2,6 +2,36 @@ import logging
import os
from logging.config import dictConfig
from application.core.log_context import snapshot as _ctx_snapshot
# Loggers with ``propagate=False`` don't share root's handlers, so the
# context filter has to be installed on their handlers directly.
_NON_PROPAGATING_LOGGERS: tuple[str, ...] = (
"uvicorn",
"uvicorn.access",
"uvicorn.error",
"celery.app.trace",
"celery.worker.strategy",
"gunicorn.error",
"gunicorn.access",
)
class _ContextFilter(logging.Filter):
"""Stamp the current ``log_context`` snapshot onto every ``LogRecord``.
Must be installed on **handlers**, not loggers: Python skips logger-level
filters when a child logger's record propagates up. The ``hasattr`` guard
keeps an explicit ``logger.info(..., extra={...})`` from being overwritten.
"""
def filter(self, record: logging.LogRecord) -> bool:
for key, value in _ctx_snapshot().items():
if not hasattr(record, key):
setattr(record, key, value)
return True
def _otlp_logs_enabled() -> bool:
"""Return True when the user has opted in to OTLP log export.
@@ -60,3 +90,23 @@ def setup_logging() -> None:
for handler in preserved_handlers:
if handler not in root.handlers:
root.addHandler(handler)
_install_context_filter()
def _install_context_filter() -> None:
"""Attach :class:`_ContextFilter` to root's handlers + every handler on
the known non-propagating loggers. Skipping handlers that already carry
one keeps repeat ``setup_logging`` calls from stacking filters.
"""
def _has_ctx_filter(handler: logging.Handler) -> bool:
return any(isinstance(f, _ContextFilter) for f in handler.filters)
for handler in logging.getLogger().handlers:
if not _has_ctx_filter(handler):
handler.addFilter(_ContextFilter())
for name in _NON_PROPAGATING_LOGGERS:
for handler in logging.getLogger(name).handlers:
if not _has_ctx_filter(handler):
handler.addFilter(_ContextFilter())

View File

@@ -11,6 +11,7 @@ logger = logging.getLogger(__name__)
class AnthropicLLM(BaseLLM):
provider_name = "anthropic"
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):

View File

@@ -1,5 +1,6 @@
import logging
from abc import ABC, abstractmethod
from typing import ClassVar
from application.cache import gen_cache, stream_cache
@@ -10,6 +11,10 @@ logger = logging.getLogger(__name__)
class BaseLLM(ABC):
# Stamped onto the ``llm_stream_start`` event so dashboards can group
# calls by vendor. Subclasses override.
provider_name: ClassVar[str] = "unknown"
def __init__(
self,
decoded_token=None,
@@ -206,6 +211,21 @@ class BaseLLM(ABC):
)
def gen_stream(self, model, messages, stream=True, tools=None, *args, **kwargs):
# Attachments arrive as ``_usage_attachments`` from ``Agent._llm_gen``;
# the ``stream_token_usage`` decorator pops that key, but the log
# fires before the decorator runs so it's still in ``kwargs`` here.
logging.info(
"llm_stream_start",
extra={
"model": model,
"provider": self.provider_name,
"message_count": len(messages) if messages is not None else 0,
"has_attachments": bool(
kwargs.get("_usage_attachments") or kwargs.get("attachments")
),
"has_tools": bool(tools),
},
)
decorators = [stream_cache, stream_token_usage]
return self._execute_with_fallback(
"_raw_gen_stream",

View File

@@ -6,6 +6,8 @@ DOCSGPT_BASE_URL = "https://oai.arc53.com"
DOCSGPT_MODEL = "docsgpt"
class DocsGPTAPILLM(OpenAILLM):
provider_name = "docsgpt"
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
super().__init__(
api_key=DOCSGPT_API_KEY,

View File

@@ -10,6 +10,8 @@ from application.storage.storage_creator import StorageCreator
class GoogleLLM(BaseLLM):
provider_name = "google"
def __init__(
self, api_key=None, user_api_key=None, decoded_token=None, *args, **kwargs
):
@@ -541,22 +543,6 @@ class GoogleLLM(BaseLLM):
config.response_mime_type = "application/json"
# Check if we have both tools and file attachments
has_attachments = False
for message in messages:
for part in message.parts:
if hasattr(part, "file_data") and part.file_data is not None:
has_attachments = True
break
if has_attachments:
break
messages_summary = self._summarize_messages_for_log(messages)
logging.info(
"GoogleLLM: Starting stream generation. Model: %s, Messages: %s, Has attachments: %s",
model,
messages_summary,
has_attachments,
)
response = client.models.generate_content_stream(
model=model,
contents=messages,

View File

@@ -5,6 +5,8 @@ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
class GroqLLM(OpenAILLM):
provider_name = "groq"
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
super().__init__(
api_key=api_key or settings.GROQ_API_KEY or settings.API_KEY,

View File

@@ -26,6 +26,8 @@ class LlamaSingleton:
class LlamaCpp(BaseLLM):
provider_name = "llama_cpp"
def __init__(
self,
api_key=None,

View File

@@ -5,6 +5,8 @@ NOVITA_BASE_URL = "https://api.novita.ai/openai"
class NovitaLLM(OpenAILLM):
provider_name = "novita"
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
super().__init__(
api_key=api_key or settings.NOVITA_API_KEY or settings.API_KEY,

View File

@@ -5,6 +5,8 @@ OPEN_ROUTER_BASE_URL = "https://openrouter.ai/api/v1"
class OpenRouterLLM(OpenAILLM):
provider_name = "openrouter"
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
super().__init__(
api_key=api_key or settings.OPEN_ROUTER_API_KEY or settings.API_KEY,

View File

@@ -61,6 +61,7 @@ def _truncate_base64_for_logging(messages):
class OpenAILLM(BaseLLM):
provider_name = "openai"
def __init__(
self,

View File

@@ -3,6 +3,7 @@ from application.core.settings import settings
class PremAILLM(BaseLLM):
provider_name = "premai"
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
from premai import Prem

View File

@@ -59,6 +59,7 @@ class LineIterator:
class SagemakerAPILLM(BaseLLM):
provider_name = "sagemaker"
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
import boto3

View File

@@ -1,11 +1,13 @@
import datetime
import functools
import inspect
import time
import logging
import uuid
from typing import Any, Callable, Dict, Generator, List
from application.core import log_context
from application.storage.db.repositories.stack_logs import StackLogsRepository
from application.storage.db.session import db_session
@@ -78,22 +80,96 @@ def log_activity() -> Callable:
user = data.get("user", "local")
api_key = data.get("user_api_key", "")
query = kwargs.get("query", getattr(args[0], "query", ""))
agent_id = getattr(args[0], "agent_id", None) or kwargs.get("agent_id")
conversation_id = (
kwargs.get("conversation_id")
or getattr(args[0], "conversation_id", None)
)
model = getattr(args[0], "gpt_model", None) or getattr(args[0], "model", None)
# Capture the surrounding activity_id before overlaying ours,
# so nested activities record the parent → child link.
parent_activity_id = log_context.snapshot().get("activity_id")
context = LogContext(endpoint, activity_id, user, api_key, query)
kwargs["log_context"] = context
logging.info(
f"Starting activity: {endpoint} - {activity_id} - User: {user}"
ctx_token = log_context.bind(
activity_id=activity_id,
parent_activity_id=parent_activity_id,
user_id=user,
agent_id=agent_id,
conversation_id=conversation_id,
endpoint=endpoint,
model=model,
)
generator = func(*args, **kwargs)
yield from _consume_and_log(generator, context)
started_at = time.monotonic()
logging.info(
"activity_started",
extra={
"activity_id": activity_id,
"parent_activity_id": parent_activity_id,
"user_id": user,
"agent_id": agent_id,
"conversation_id": conversation_id,
"endpoint": endpoint,
"model": model,
},
)
error: BaseException | None = None
try:
generator = func(*args, **kwargs)
yield from _consume_and_log(generator, context)
except Exception as exc:
# Only ``Exception`` counts as an activity error; ``GeneratorExit``
# (consumer disconnected mid-stream) and ``KeyboardInterrupt``
# flow through the finally as ``status="ok"``, matching
# ``_consume_and_log``.
error = exc
raise
finally:
_emit_activity_finished(
activity_id=activity_id,
parent_activity_id=parent_activity_id,
user=user,
endpoint=endpoint,
started_at=started_at,
error=error,
)
log_context.reset(ctx_token)
return wrapper
return decorator
def _emit_activity_finished(
*,
activity_id: str,
parent_activity_id: str | None,
user: str,
endpoint: str,
started_at: float,
error: BaseException | None,
) -> None:
"""Emit the paired ``activity_finished`` event with duration and outcome."""
duration_ms = int((time.monotonic() - started_at) * 1000)
logging.info(
"activity_finished",
extra={
"activity_id": activity_id,
"parent_activity_id": parent_activity_id,
"user_id": user,
"endpoint": endpoint,
"duration_ms": duration_ms,
"status": "error" if error is not None else "ok",
"error_class": type(error).__name__ if error is not None else None,
},
)
def _consume_and_log(generator: Generator, context: "LogContext"):
try:
for item in generator:

View File

@@ -432,7 +432,15 @@ def run_agent_logic(agent_config, input_data):
"tool_calls": tool_calls,
"thought": thought,
}
logging.info(f"Agent response: {result}")
logging.info(
"agent_response",
extra={
"answer_length": len(response_full),
"source_count": len(source_log_docs),
"tool_call_count": len(tool_calls),
"thought_length": len(thought),
},
)
return result
except Exception as e:
logging.error(f"Error in run_agent_logic: {e}", exc_info=True)

View File

@@ -0,0 +1,161 @@
"""Unit tests for ``log_context`` + ``_ContextFilter``."""
from __future__ import annotations
import logging
import pytest
from application.core import log_context
from application.core.logging_config import _ContextFilter
@pytest.fixture(autouse=True)
def _clean_log_ctx():
# The contextvar is module-scoped; snapshot at entry, restore at exit
# to keep tests from leaking state into each other.
token = log_context.bind()
try:
yield
finally:
log_context.reset(token)
@pytest.mark.unit
class TestBindAndSnapshot:
def test_bind_returns_token_and_snapshot_reflects_overlay(self):
token = log_context.bind(activity_id="a1", user_id="u1")
assert log_context.snapshot() == {"activity_id": "a1", "user_id": "u1"}
log_context.reset(token)
assert log_context.snapshot() == {}
def test_bind_drops_unknown_keys(self):
token = log_context.bind(activity_id="a1", not_a_real_key="boom")
try:
assert log_context.snapshot() == {"activity_id": "a1"}
finally:
log_context.reset(token)
def test_bind_drops_none_values(self):
token = log_context.bind(activity_id="a1", agent_id=None)
try:
assert "agent_id" not in log_context.snapshot()
finally:
log_context.reset(token)
def test_bind_coerces_values_to_str(self):
token = log_context.bind(activity_id=42)
try:
assert log_context.snapshot()["activity_id"] == "42"
finally:
log_context.reset(token)
def test_nested_bind_overlays_and_resets_lifo(self):
outer = log_context.bind(activity_id="outer", user_id="u1")
inner = log_context.bind(activity_id="inner", agent_id="agent-1")
# Inner overrides activity_id, keeps user_id from outer, adds agent_id.
assert log_context.snapshot() == {
"activity_id": "inner",
"user_id": "u1",
"agent_id": "agent-1",
}
log_context.reset(inner)
assert log_context.snapshot() == {"activity_id": "outer", "user_id": "u1"}
log_context.reset(outer)
assert log_context.snapshot() == {}
def test_parent_activity_id_pattern(self):
outer = log_context.bind(activity_id="parent-1")
parent = log_context.snapshot().get("activity_id")
inner = log_context.bind(activity_id="child-1", parent_activity_id=parent)
try:
snap = log_context.snapshot()
assert snap["activity_id"] == "child-1"
assert snap["parent_activity_id"] == "parent-1"
finally:
log_context.reset(inner)
log_context.reset(outer)
@pytest.mark.unit
class TestContextFilter:
def _make_record(self, **extra) -> logging.LogRecord:
record = logging.LogRecord(
name="test",
level=logging.INFO,
pathname=__file__,
lineno=1,
msg="hello",
args=(),
exc_info=None,
)
for k, v in extra.items():
setattr(record, k, v)
return record
def test_stamps_record_with_context(self):
token = log_context.bind(activity_id="a1", user_id="u1")
try:
record = self._make_record()
assert _ContextFilter().filter(record) is True
assert record.activity_id == "a1"
assert record.user_id == "u1"
finally:
log_context.reset(token)
def test_explicit_extra_wins_over_context(self):
# extra={} sets attributes on the record before the filter runs;
# the filter must not overwrite them.
token = log_context.bind(activity_id="from-ctx")
try:
record = self._make_record(activity_id="from-extra")
_ContextFilter().filter(record)
assert record.activity_id == "from-extra"
finally:
log_context.reset(token)
def test_no_op_when_context_empty(self):
record = self._make_record()
assert _ContextFilter().filter(record) is True
assert not hasattr(record, "activity_id")
@pytest.mark.unit
class TestFilterWiringEndToEnd:
"""Regression guard: the filter must be installed on handlers, not on
loggers — Python skips logger-level filters during propagation.
"""
def test_propagated_record_gets_stamped(self):
from application.core.logging_config import _install_context_filter
captured: list[logging.LogRecord] = []
class _Capture(logging.Handler):
def emit(self, record):
captured.append(record)
root = logging.getLogger()
saved_handlers = list(root.handlers)
saved_level = root.level
try:
root.handlers = [_Capture()]
root.setLevel(logging.DEBUG)
_install_context_filter()
child = logging.getLogger("test_log_context.propagation")
child.setLevel(logging.DEBUG)
token = log_context.bind(activity_id="propagated-id")
try:
child.info("from a child logger")
finally:
log_context.reset(token)
assert captured, "Capture handler should have received the record"
assert captured[0].activity_id == "propagated-id"
finally:
root.handlers = saved_handlers
root.setLevel(saved_level)

View File

@@ -95,6 +95,96 @@ class TestGenMethods:
)
assert result == ["a", "b"]
@patch("application.llm.base.stream_cache", lambda f: f)
@patch("application.llm.base.stream_token_usage", lambda f: f)
def test_gen_stream_emits_llm_stream_start_event(self, caplog):
import logging as _logging
class FakeProvider(StubLLM):
provider_name = "fake-provider"
llm = FakeProvider(raw_gen_stream_items=["x"])
with caplog.at_level(_logging.INFO, logger="root"):
list(
llm.gen_stream(
model="m1",
messages=[{"role": "user", "content": "hi"}, {"role": "assistant", "content": "hey"}],
tools=[{"name": "t"}],
_usage_attachments=[{"path": "/tmp/a.png"}],
)
)
starts = [r for r in caplog.records if r.message == "llm_stream_start"]
assert len(starts) == 1
evt = starts[0]
assert evt.model == "m1"
assert evt.provider == "fake-provider"
assert evt.message_count == 2
# ``_usage_attachments`` is what ``Agent._llm_gen`` actually passes;
# the alias check below covers the bare ``attachments=`` form.
assert evt.has_attachments is True
assert evt.has_tools is True
@patch("application.llm.base.stream_cache", lambda f: f)
@patch("application.llm.base.stream_token_usage", lambda f: f)
def test_gen_stream_recognises_attachments_kwarg_alias(self, caplog):
import logging as _logging
llm = StubLLM(raw_gen_stream_items=["x"])
with caplog.at_level(_logging.INFO, logger="root"):
list(
llm.gen_stream(
model="m1", messages=[], attachments=[{"path": "/tmp/a"}]
)
)
evt = next(r for r in caplog.records if r.message == "llm_stream_start")
assert evt.has_attachments is True
@patch("application.llm.base.stream_cache", lambda f: f)
@patch("application.llm.base.stream_token_usage", lambda f: f)
def test_gen_stream_emits_event_without_attachments_or_tools(self, caplog):
import logging as _logging
llm = StubLLM(raw_gen_stream_items=["x"])
with caplog.at_level(_logging.INFO, logger="root"):
list(llm.gen_stream(model="m1", messages=[]))
evt = next(r for r in caplog.records if r.message == "llm_stream_start")
assert evt.message_count == 0
assert evt.has_attachments is False
assert evt.has_tools is False
# BaseLLM default — concrete providers always override.
assert evt.provider == "unknown"
@pytest.mark.unit
class TestProviderNameRegistry:
"""A new provider without ``provider_name`` would silently report
``provider="unknown"`` in telemetry. Pin the expected values here."""
def test_provider_names_match_expectations(self):
from application.llm.anthropic import AnthropicLLM
from application.llm.docsgpt_provider import DocsGPTAPILLM
from application.llm.google_ai import GoogleLLM
from application.llm.groq import GroqLLM
from application.llm.llama_cpp import LlamaCpp
from application.llm.novita import NovitaLLM
from application.llm.open_router import OpenRouterLLM
from application.llm.openai import OpenAILLM
from application.llm.premai import PremAILLM
from application.llm.sagemaker import SagemakerAPILLM
assert OpenAILLM.provider_name == "openai"
assert GoogleLLM.provider_name == "google"
assert AnthropicLLM.provider_name == "anthropic"
assert GroqLLM.provider_name == "groq"
assert NovitaLLM.provider_name == "novita"
assert OpenRouterLLM.provider_name == "openrouter"
assert DocsGPTAPILLM.provider_name == "docsgpt"
assert PremAILLM.provider_name == "premai"
assert LlamaCpp.provider_name == "llama_cpp"
assert SagemakerAPILLM.provider_name == "sagemaker"
@patch("application.llm.base.gen_cache", lambda f: f)
@patch("application.llm.base.gen_token_usage", lambda f: f)
def test_gen_passes_tools(self):

View File

@@ -153,4 +153,108 @@ class TestLogActivity:
):
list(failing_gen(FakeAgent()))
def test_log_activity_emits_lifecycle_events(self, caplog):
import logging as _logging
from application.logging import log_activity
class FakeAgent:
endpoint = "test"
user = "user1"
user_api_key = "k"
query = "q"
agent_id = "agent-7"
conversation_id = "conv-3"
@log_activity()
def gen(agent, log_context=None):
yield "x"
with patch("application.logging._log_activity_to_db"), \
caplog.at_level(_logging.INFO, logger="root"):
list(gen(FakeAgent()))
messages = [r.message for r in caplog.records]
assert "activity_started" in messages
assert "activity_finished" in messages
started = next(r for r in caplog.records if r.message == "activity_started")
finished = next(r for r in caplog.records if r.message == "activity_finished")
assert started.endpoint == "test"
assert started.user_id == "user1"
assert started.agent_id == "agent-7"
assert started.conversation_id == "conv-3"
assert started.parent_activity_id is None # top-level activity
assert finished.activity_id == started.activity_id
assert finished.status == "ok"
assert isinstance(finished.duration_ms, int)
assert finished.duration_ms >= 0
assert finished.error_class is None
def test_log_activity_records_parent_activity_id_when_nested(self, caplog):
# Sub-agents / workflow_agents wrap an outer @log_activity gen;
# the inner activity_started event must link to the outer's id.
import logging as _logging
from application.logging import log_activity
class FakeAgent:
endpoint = "outer"
user = "user1"
user_api_key = ""
query = ""
class InnerAgent:
endpoint = "inner"
user = "user1"
user_api_key = ""
query = ""
@log_activity()
def inner_gen(agent, log_context=None):
yield "i"
@log_activity()
def outer_gen(agent, log_context=None):
yield from inner_gen(InnerAgent())
with patch("application.logging._log_activity_to_db"), \
caplog.at_level(_logging.INFO, logger="root"):
list(outer_gen(FakeAgent()))
starts = [r for r in caplog.records if r.message == "activity_started"]
assert len(starts) == 2
outer_start, inner_start = starts
assert outer_start.endpoint == "outer"
assert outer_start.parent_activity_id is None
assert inner_start.endpoint == "inner"
assert inner_start.parent_activity_id == outer_start.activity_id
def test_log_activity_records_error_status_on_failure(self, caplog):
import logging as _logging
from application.logging import log_activity
class FakeAgent:
endpoint = "boom"
user = "user1"
user_api_key = ""
query = ""
@log_activity()
def failing(agent, log_context=None):
yield "before"
raise ValueError("bad thing")
with patch("application.logging._log_activity_to_db"), \
caplog.at_level(_logging.INFO, logger="root"), \
pytest.raises(ValueError):
list(failing(FakeAgent()))
finished = next(r for r in caplog.records if r.message == "activity_finished")
assert finished.status == "error"
assert finished.error_class == "ValueError"