mirror of
https://github.com/arc53/DocsGPT.git
synced 2026-05-06 16:25:04 +00:00
feat: better logging
This commit is contained in:
@@ -274,7 +274,14 @@ class ToolExecutor:
|
||||
|
||||
if tool_id is None or action_name is None:
|
||||
error_message = f"Error: Failed to parse LLM tool call. Tool name: {llm_name}"
|
||||
logger.error(error_message)
|
||||
logger.error(
|
||||
"tool_call_parse_failed",
|
||||
extra={
|
||||
"llm_class_name": llm_class_name,
|
||||
"llm_tool_name": llm_name,
|
||||
"call_id": call_id,
|
||||
},
|
||||
)
|
||||
|
||||
tool_call_data = {
|
||||
"tool_name": "unknown",
|
||||
@@ -289,7 +296,15 @@ class ToolExecutor:
|
||||
|
||||
if tool_id not in tools_dict:
|
||||
error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
|
||||
logger.error(error_message)
|
||||
logger.error(
|
||||
"tool_id_not_found",
|
||||
extra={
|
||||
"tool_id": tool_id,
|
||||
"llm_tool_name": llm_name,
|
||||
"call_id": call_id,
|
||||
"available_tool_count": len(tools_dict),
|
||||
},
|
||||
)
|
||||
|
||||
tool_call_data = {
|
||||
"tool_name": "unknown",
|
||||
@@ -356,7 +371,15 @@ class ToolExecutor:
|
||||
f"Failed to load tool '{tool_data.get('name')}' (tool_id key={tool_id}): "
|
||||
"missing 'id' on tool row."
|
||||
)
|
||||
logger.error(error_message)
|
||||
logger.error(
|
||||
"tool_load_failed",
|
||||
extra={
|
||||
"tool_name": tool_data.get("name"),
|
||||
"tool_id": tool_id,
|
||||
"action_name": action_name,
|
||||
"call_id": call_id,
|
||||
},
|
||||
)
|
||||
tool_call_data["result"] = error_message
|
||||
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
|
||||
self.tool_calls.append(tool_call_data)
|
||||
@@ -451,10 +474,12 @@ class ToolExecutor:
|
||||
row_id = tool_data.get("id")
|
||||
if not row_id:
|
||||
logger.error(
|
||||
"Tool data missing 'id' for tool name=%s (enumerate-key tool_id=%s); "
|
||||
"skipping load to avoid binding a non-UUID downstream.",
|
||||
tool_data.get("name"),
|
||||
tool_id,
|
||||
"tool_missing_row_id",
|
||||
extra={
|
||||
"tool_name": tool_data.get("name"),
|
||||
"tool_id": tool_id,
|
||||
"action_name": action_name,
|
||||
},
|
||||
)
|
||||
return None
|
||||
tool_config["tool_id"] = str(row_id)
|
||||
|
||||
@@ -9,6 +9,7 @@ from jose import jwt
|
||||
|
||||
from application.auth import handle_auth
|
||||
|
||||
from application.core import log_context
|
||||
from application.core.logging_config import setup_logging
|
||||
|
||||
setup_logging()
|
||||
@@ -112,6 +113,38 @@ def generate_token():
|
||||
return jsonify({"error": "Token generation not allowed in current auth mode"}), 400
|
||||
|
||||
|
||||
_LOG_CTX_TOKEN_ATTR = "_log_ctx_token"
|
||||
|
||||
|
||||
@app.before_request
|
||||
def _bind_log_context():
|
||||
"""Bind activity_id + endpoint for the duration of this request.
|
||||
|
||||
Runs before ``authenticate_request``; ``user_id`` is overlaid in a
|
||||
follow-up handler once the JWT has been decoded.
|
||||
"""
|
||||
if request.method == "OPTIONS":
|
||||
return None
|
||||
activity_id = str(uuid.uuid4())
|
||||
request.activity_id = activity_id
|
||||
token = log_context.bind(
|
||||
activity_id=activity_id,
|
||||
endpoint=request.endpoint,
|
||||
)
|
||||
setattr(request, _LOG_CTX_TOKEN_ATTR, token)
|
||||
return None
|
||||
|
||||
|
||||
@app.teardown_request
|
||||
def _reset_log_context(_exc):
|
||||
# SSE streams keep yielding after teardown fires, but a2wsgi runs each
|
||||
# request inside ``copy_context().run(...)``, so this reset doesn't
|
||||
# leak into the stream's view of the context.
|
||||
token = getattr(request, _LOG_CTX_TOKEN_ATTR, None)
|
||||
if token is not None:
|
||||
log_context.reset(token)
|
||||
|
||||
|
||||
@app.before_request
|
||||
def enforce_stt_request_size_limits():
|
||||
if request.method == "OPTIONS":
|
||||
@@ -148,6 +181,21 @@ def authenticate_request():
|
||||
request.decoded_token = decoded_token
|
||||
|
||||
|
||||
@app.before_request
|
||||
def _bind_user_id_to_log_context():
|
||||
# Registered after ``authenticate_request`` (Flask runs before_request
|
||||
# handlers in registration order), so ``request.decoded_token`` is
|
||||
# populated by the time we read it. ``teardown_request`` unwinds the
|
||||
# whole request-level bind, so no separate reset token is needed here.
|
||||
if request.method == "OPTIONS":
|
||||
return None
|
||||
decoded_token = getattr(request, "decoded_token", None)
|
||||
user_id = decoded_token.get("sub") if isinstance(decoded_token, dict) else None
|
||||
if user_id:
|
||||
log_context.bind(user_id=user_id)
|
||||
return None
|
||||
|
||||
|
||||
@app.after_request
|
||||
def after_request(response: Response) -> Response:
|
||||
"""Add CORS headers for the pure Flask development entrypoint."""
|
||||
|
||||
@@ -1,8 +1,17 @@
|
||||
import inspect
|
||||
import logging
|
||||
import threading
|
||||
|
||||
from celery import Celery
|
||||
from application.core import log_context
|
||||
from application.core.settings import settings
|
||||
from celery.signals import setup_logging, worker_process_init, worker_ready
|
||||
from celery.signals import (
|
||||
setup_logging,
|
||||
task_postrun,
|
||||
task_prerun,
|
||||
worker_process_init,
|
||||
worker_ready,
|
||||
)
|
||||
|
||||
|
||||
def make_celery(app_name=__name__):
|
||||
@@ -41,6 +50,54 @@ def _dispose_db_engine_on_fork(*args, **kwargs):
|
||||
dispose_engine()
|
||||
|
||||
|
||||
# Most tasks in this repo accept ``user`` where the log context wants
|
||||
# ``user_id``; map task parameter names to context keys explicitly.
|
||||
_TASK_PARAM_TO_CTX_KEY: dict[str, str] = {
|
||||
"user": "user_id",
|
||||
"user_id": "user_id",
|
||||
"agent_id": "agent_id",
|
||||
"conversation_id": "conversation_id",
|
||||
}
|
||||
|
||||
_task_log_tokens: dict[str, object] = {}
|
||||
|
||||
|
||||
@task_prerun.connect
|
||||
def _bind_task_log_context(task_id, task, args, kwargs, **_):
|
||||
# Resolve task args by parameter name — nearly every task in this repo
|
||||
# is called positionally, so ``kwargs.get('user')`` would bind nothing.
|
||||
ctx = {"activity_id": task_id}
|
||||
try:
|
||||
sig = inspect.signature(task.run)
|
||||
bound = sig.bind_partial(*args, **kwargs).arguments
|
||||
except (TypeError, ValueError):
|
||||
bound = dict(kwargs)
|
||||
for param_name, value in bound.items():
|
||||
ctx_key = _TASK_PARAM_TO_CTX_KEY.get(param_name)
|
||||
if ctx_key and value:
|
||||
ctx[ctx_key] = value
|
||||
_task_log_tokens[task_id] = log_context.bind(**ctx)
|
||||
|
||||
|
||||
@task_postrun.connect
|
||||
def _unbind_task_log_context(task_id, **_):
|
||||
# ``task_postrun`` fires on both success and failure. Required for
|
||||
# Celery: unlike the Flask path, tasks aren't isolated in their own
|
||||
# ``copy_context().run(...)``, so a missing reset would leak the
|
||||
# bind onto the next task on the same worker.
|
||||
token = _task_log_tokens.pop(task_id, None)
|
||||
if token is None:
|
||||
return
|
||||
try:
|
||||
log_context.reset(token)
|
||||
except ValueError:
|
||||
# task_prerun and task_postrun ran on different threads (non-default
|
||||
# Celery pool); the token isn't valid in this context. Drop it.
|
||||
logging.getLogger(__name__).debug(
|
||||
"log_context reset skipped for task %s", task_id
|
||||
)
|
||||
|
||||
|
||||
@worker_ready.connect
|
||||
def _run_version_check(*args, **kwargs):
|
||||
"""Kick off the anonymous version check on worker startup.
|
||||
|
||||
57
application/core/log_context.py
Normal file
57
application/core/log_context.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""Per-activity logging context backed by ``contextvars``.
|
||||
|
||||
The ``_ContextFilter`` installed by ``logging_config.setup_logging`` stamps
|
||||
every ``LogRecord`` emitted inside a ``bind`` block with the bound keys, so
|
||||
they land as first-class attributes on the OTLP log export rather than being
|
||||
buried inside formatted message bodies.
|
||||
|
||||
A single ``ContextVar`` holds a dict so nested binds reset atomically (LIFO)
|
||||
via the token returned by ``bind``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextvars import ContextVar, Token
|
||||
from typing import Mapping
|
||||
|
||||
|
||||
_CTX_KEYS: frozenset[str] = frozenset(
|
||||
{
|
||||
"activity_id",
|
||||
"parent_activity_id",
|
||||
"user_id",
|
||||
"agent_id",
|
||||
"conversation_id",
|
||||
"endpoint",
|
||||
"model",
|
||||
}
|
||||
)
|
||||
|
||||
_ctx: ContextVar[Mapping[str, str]] = ContextVar("log_ctx", default={})
|
||||
|
||||
|
||||
def bind(**kwargs: object) -> Token:
|
||||
"""Overlay the given keys onto the current context.
|
||||
|
||||
Returns a ``Token`` so the caller can ``reset`` in a ``finally`` block.
|
||||
Keys outside :data:`_CTX_KEYS` are silently dropped (so a typo can't
|
||||
stamp a stray field name onto every record), as are ``None`` values
|
||||
(a missing attribute is more useful than the literal string ``"None"``).
|
||||
"""
|
||||
overlay = {
|
||||
k: str(v)
|
||||
for k, v in kwargs.items()
|
||||
if k in _CTX_KEYS and v is not None
|
||||
}
|
||||
new = {**_ctx.get(), **overlay}
|
||||
return _ctx.set(new)
|
||||
|
||||
|
||||
def reset(token: Token) -> None:
|
||||
"""Restore the context to the snapshot captured by the matching ``bind``."""
|
||||
_ctx.reset(token)
|
||||
|
||||
|
||||
def snapshot() -> Mapping[str, str]:
|
||||
"""Return the current context dict. Treat as read-only; use :func:`bind`."""
|
||||
return _ctx.get()
|
||||
@@ -2,6 +2,36 @@ import logging
|
||||
import os
|
||||
from logging.config import dictConfig
|
||||
|
||||
from application.core.log_context import snapshot as _ctx_snapshot
|
||||
|
||||
|
||||
# Loggers with ``propagate=False`` don't share root's handlers, so the
|
||||
# context filter has to be installed on their handlers directly.
|
||||
_NON_PROPAGATING_LOGGERS: tuple[str, ...] = (
|
||||
"uvicorn",
|
||||
"uvicorn.access",
|
||||
"uvicorn.error",
|
||||
"celery.app.trace",
|
||||
"celery.worker.strategy",
|
||||
"gunicorn.error",
|
||||
"gunicorn.access",
|
||||
)
|
||||
|
||||
|
||||
class _ContextFilter(logging.Filter):
|
||||
"""Stamp the current ``log_context`` snapshot onto every ``LogRecord``.
|
||||
|
||||
Must be installed on **handlers**, not loggers: Python skips logger-level
|
||||
filters when a child logger's record propagates up. The ``hasattr`` guard
|
||||
keeps an explicit ``logger.info(..., extra={...})`` from being overwritten.
|
||||
"""
|
||||
|
||||
def filter(self, record: logging.LogRecord) -> bool:
|
||||
for key, value in _ctx_snapshot().items():
|
||||
if not hasattr(record, key):
|
||||
setattr(record, key, value)
|
||||
return True
|
||||
|
||||
|
||||
def _otlp_logs_enabled() -> bool:
|
||||
"""Return True when the user has opted in to OTLP log export.
|
||||
@@ -60,3 +90,23 @@ def setup_logging() -> None:
|
||||
for handler in preserved_handlers:
|
||||
if handler not in root.handlers:
|
||||
root.addHandler(handler)
|
||||
|
||||
_install_context_filter()
|
||||
|
||||
|
||||
def _install_context_filter() -> None:
|
||||
"""Attach :class:`_ContextFilter` to root's handlers + every handler on
|
||||
the known non-propagating loggers. Skipping handlers that already carry
|
||||
one keeps repeat ``setup_logging`` calls from stacking filters.
|
||||
"""
|
||||
|
||||
def _has_ctx_filter(handler: logging.Handler) -> bool:
|
||||
return any(isinstance(f, _ContextFilter) for f in handler.filters)
|
||||
|
||||
for handler in logging.getLogger().handlers:
|
||||
if not _has_ctx_filter(handler):
|
||||
handler.addFilter(_ContextFilter())
|
||||
for name in _NON_PROPAGATING_LOGGERS:
|
||||
for handler in logging.getLogger(name).handlers:
|
||||
if not _has_ctx_filter(handler):
|
||||
handler.addFilter(_ContextFilter())
|
||||
|
||||
@@ -11,6 +11,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AnthropicLLM(BaseLLM):
|
||||
provider_name = "anthropic"
|
||||
|
||||
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import ClassVar
|
||||
|
||||
from application.cache import gen_cache, stream_cache
|
||||
|
||||
@@ -10,6 +11,10 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseLLM(ABC):
|
||||
# Stamped onto the ``llm_stream_start`` event so dashboards can group
|
||||
# calls by vendor. Subclasses override.
|
||||
provider_name: ClassVar[str] = "unknown"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
decoded_token=None,
|
||||
@@ -206,6 +211,21 @@ class BaseLLM(ABC):
|
||||
)
|
||||
|
||||
def gen_stream(self, model, messages, stream=True, tools=None, *args, **kwargs):
|
||||
# Attachments arrive as ``_usage_attachments`` from ``Agent._llm_gen``;
|
||||
# the ``stream_token_usage`` decorator pops that key, but the log
|
||||
# fires before the decorator runs so it's still in ``kwargs`` here.
|
||||
logging.info(
|
||||
"llm_stream_start",
|
||||
extra={
|
||||
"model": model,
|
||||
"provider": self.provider_name,
|
||||
"message_count": len(messages) if messages is not None else 0,
|
||||
"has_attachments": bool(
|
||||
kwargs.get("_usage_attachments") or kwargs.get("attachments")
|
||||
),
|
||||
"has_tools": bool(tools),
|
||||
},
|
||||
)
|
||||
decorators = [stream_cache, stream_token_usage]
|
||||
return self._execute_with_fallback(
|
||||
"_raw_gen_stream",
|
||||
|
||||
@@ -6,6 +6,8 @@ DOCSGPT_BASE_URL = "https://oai.arc53.com"
|
||||
DOCSGPT_MODEL = "docsgpt"
|
||||
|
||||
class DocsGPTAPILLM(OpenAILLM):
|
||||
provider_name = "docsgpt"
|
||||
|
||||
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
|
||||
super().__init__(
|
||||
api_key=DOCSGPT_API_KEY,
|
||||
|
||||
@@ -10,6 +10,8 @@ from application.storage.storage_creator import StorageCreator
|
||||
|
||||
|
||||
class GoogleLLM(BaseLLM):
|
||||
provider_name = "google"
|
||||
|
||||
def __init__(
|
||||
self, api_key=None, user_api_key=None, decoded_token=None, *args, **kwargs
|
||||
):
|
||||
@@ -541,22 +543,6 @@ class GoogleLLM(BaseLLM):
|
||||
config.response_mime_type = "application/json"
|
||||
# Check if we have both tools and file attachments
|
||||
|
||||
has_attachments = False
|
||||
for message in messages:
|
||||
for part in message.parts:
|
||||
if hasattr(part, "file_data") and part.file_data is not None:
|
||||
has_attachments = True
|
||||
break
|
||||
if has_attachments:
|
||||
break
|
||||
messages_summary = self._summarize_messages_for_log(messages)
|
||||
logging.info(
|
||||
"GoogleLLM: Starting stream generation. Model: %s, Messages: %s, Has attachments: %s",
|
||||
model,
|
||||
messages_summary,
|
||||
has_attachments,
|
||||
)
|
||||
|
||||
response = client.models.generate_content_stream(
|
||||
model=model,
|
||||
contents=messages,
|
||||
|
||||
@@ -5,6 +5,8 @@ GROQ_BASE_URL = "https://api.groq.com/openai/v1"
|
||||
|
||||
|
||||
class GroqLLM(OpenAILLM):
|
||||
provider_name = "groq"
|
||||
|
||||
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
|
||||
super().__init__(
|
||||
api_key=api_key or settings.GROQ_API_KEY or settings.API_KEY,
|
||||
|
||||
@@ -26,6 +26,8 @@ class LlamaSingleton:
|
||||
|
||||
|
||||
class LlamaCpp(BaseLLM):
|
||||
provider_name = "llama_cpp"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key=None,
|
||||
|
||||
@@ -5,6 +5,8 @@ NOVITA_BASE_URL = "https://api.novita.ai/openai"
|
||||
|
||||
|
||||
class NovitaLLM(OpenAILLM):
|
||||
provider_name = "novita"
|
||||
|
||||
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
|
||||
super().__init__(
|
||||
api_key=api_key or settings.NOVITA_API_KEY or settings.API_KEY,
|
||||
|
||||
@@ -5,6 +5,8 @@ OPEN_ROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
||||
|
||||
|
||||
class OpenRouterLLM(OpenAILLM):
|
||||
provider_name = "openrouter"
|
||||
|
||||
def __init__(self, api_key=None, user_api_key=None, base_url=None, *args, **kwargs):
|
||||
super().__init__(
|
||||
api_key=api_key or settings.OPEN_ROUTER_API_KEY or settings.API_KEY,
|
||||
|
||||
@@ -61,6 +61,7 @@ def _truncate_base64_for_logging(messages):
|
||||
|
||||
|
||||
class OpenAILLM(BaseLLM):
|
||||
provider_name = "openai"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
||||
@@ -3,6 +3,7 @@ from application.core.settings import settings
|
||||
|
||||
|
||||
class PremAILLM(BaseLLM):
|
||||
provider_name = "premai"
|
||||
|
||||
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
|
||||
from premai import Prem
|
||||
|
||||
@@ -59,6 +59,7 @@ class LineIterator:
|
||||
|
||||
|
||||
class SagemakerAPILLM(BaseLLM):
|
||||
provider_name = "sagemaker"
|
||||
|
||||
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
|
||||
import boto3
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
import datetime
|
||||
import functools
|
||||
import inspect
|
||||
import time
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Any, Callable, Dict, Generator, List
|
||||
|
||||
from application.core import log_context
|
||||
from application.storage.db.repositories.stack_logs import StackLogsRepository
|
||||
from application.storage.db.session import db_session
|
||||
|
||||
@@ -78,22 +80,96 @@ def log_activity() -> Callable:
|
||||
user = data.get("user", "local")
|
||||
api_key = data.get("user_api_key", "")
|
||||
query = kwargs.get("query", getattr(args[0], "query", ""))
|
||||
agent_id = getattr(args[0], "agent_id", None) or kwargs.get("agent_id")
|
||||
conversation_id = (
|
||||
kwargs.get("conversation_id")
|
||||
or getattr(args[0], "conversation_id", None)
|
||||
)
|
||||
model = getattr(args[0], "gpt_model", None) or getattr(args[0], "model", None)
|
||||
|
||||
# Capture the surrounding activity_id before overlaying ours,
|
||||
# so nested activities record the parent → child link.
|
||||
parent_activity_id = log_context.snapshot().get("activity_id")
|
||||
|
||||
context = LogContext(endpoint, activity_id, user, api_key, query)
|
||||
kwargs["log_context"] = context
|
||||
|
||||
logging.info(
|
||||
f"Starting activity: {endpoint} - {activity_id} - User: {user}"
|
||||
ctx_token = log_context.bind(
|
||||
activity_id=activity_id,
|
||||
parent_activity_id=parent_activity_id,
|
||||
user_id=user,
|
||||
agent_id=agent_id,
|
||||
conversation_id=conversation_id,
|
||||
endpoint=endpoint,
|
||||
model=model,
|
||||
)
|
||||
|
||||
generator = func(*args, **kwargs)
|
||||
yield from _consume_and_log(generator, context)
|
||||
started_at = time.monotonic()
|
||||
logging.info(
|
||||
"activity_started",
|
||||
extra={
|
||||
"activity_id": activity_id,
|
||||
"parent_activity_id": parent_activity_id,
|
||||
"user_id": user,
|
||||
"agent_id": agent_id,
|
||||
"conversation_id": conversation_id,
|
||||
"endpoint": endpoint,
|
||||
"model": model,
|
||||
},
|
||||
)
|
||||
|
||||
error: BaseException | None = None
|
||||
try:
|
||||
generator = func(*args, **kwargs)
|
||||
yield from _consume_and_log(generator, context)
|
||||
except Exception as exc:
|
||||
# Only ``Exception`` counts as an activity error; ``GeneratorExit``
|
||||
# (consumer disconnected mid-stream) and ``KeyboardInterrupt``
|
||||
# flow through the finally as ``status="ok"``, matching
|
||||
# ``_consume_and_log``.
|
||||
error = exc
|
||||
raise
|
||||
finally:
|
||||
_emit_activity_finished(
|
||||
activity_id=activity_id,
|
||||
parent_activity_id=parent_activity_id,
|
||||
user=user,
|
||||
endpoint=endpoint,
|
||||
started_at=started_at,
|
||||
error=error,
|
||||
)
|
||||
log_context.reset(ctx_token)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def _emit_activity_finished(
|
||||
*,
|
||||
activity_id: str,
|
||||
parent_activity_id: str | None,
|
||||
user: str,
|
||||
endpoint: str,
|
||||
started_at: float,
|
||||
error: BaseException | None,
|
||||
) -> None:
|
||||
"""Emit the paired ``activity_finished`` event with duration and outcome."""
|
||||
duration_ms = int((time.monotonic() - started_at) * 1000)
|
||||
logging.info(
|
||||
"activity_finished",
|
||||
extra={
|
||||
"activity_id": activity_id,
|
||||
"parent_activity_id": parent_activity_id,
|
||||
"user_id": user,
|
||||
"endpoint": endpoint,
|
||||
"duration_ms": duration_ms,
|
||||
"status": "error" if error is not None else "ok",
|
||||
"error_class": type(error).__name__ if error is not None else None,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _consume_and_log(generator: Generator, context: "LogContext"):
|
||||
try:
|
||||
for item in generator:
|
||||
|
||||
@@ -432,7 +432,15 @@ def run_agent_logic(agent_config, input_data):
|
||||
"tool_calls": tool_calls,
|
||||
"thought": thought,
|
||||
}
|
||||
logging.info(f"Agent response: {result}")
|
||||
logging.info(
|
||||
"agent_response",
|
||||
extra={
|
||||
"answer_length": len(response_full),
|
||||
"source_count": len(source_log_docs),
|
||||
"tool_call_count": len(tool_calls),
|
||||
"thought_length": len(thought),
|
||||
},
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
logging.error(f"Error in run_agent_logic: {e}", exc_info=True)
|
||||
|
||||
161
tests/core/test_log_context.py
Normal file
161
tests/core/test_log_context.py
Normal file
@@ -0,0 +1,161 @@
|
||||
"""Unit tests for ``log_context`` + ``_ContextFilter``."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
import pytest
|
||||
|
||||
from application.core import log_context
|
||||
from application.core.logging_config import _ContextFilter
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _clean_log_ctx():
|
||||
# The contextvar is module-scoped; snapshot at entry, restore at exit
|
||||
# to keep tests from leaking state into each other.
|
||||
token = log_context.bind()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
log_context.reset(token)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestBindAndSnapshot:
|
||||
|
||||
def test_bind_returns_token_and_snapshot_reflects_overlay(self):
|
||||
token = log_context.bind(activity_id="a1", user_id="u1")
|
||||
assert log_context.snapshot() == {"activity_id": "a1", "user_id": "u1"}
|
||||
log_context.reset(token)
|
||||
assert log_context.snapshot() == {}
|
||||
|
||||
def test_bind_drops_unknown_keys(self):
|
||||
token = log_context.bind(activity_id="a1", not_a_real_key="boom")
|
||||
try:
|
||||
assert log_context.snapshot() == {"activity_id": "a1"}
|
||||
finally:
|
||||
log_context.reset(token)
|
||||
|
||||
def test_bind_drops_none_values(self):
|
||||
token = log_context.bind(activity_id="a1", agent_id=None)
|
||||
try:
|
||||
assert "agent_id" not in log_context.snapshot()
|
||||
finally:
|
||||
log_context.reset(token)
|
||||
|
||||
def test_bind_coerces_values_to_str(self):
|
||||
token = log_context.bind(activity_id=42)
|
||||
try:
|
||||
assert log_context.snapshot()["activity_id"] == "42"
|
||||
finally:
|
||||
log_context.reset(token)
|
||||
|
||||
def test_nested_bind_overlays_and_resets_lifo(self):
|
||||
outer = log_context.bind(activity_id="outer", user_id="u1")
|
||||
inner = log_context.bind(activity_id="inner", agent_id="agent-1")
|
||||
# Inner overrides activity_id, keeps user_id from outer, adds agent_id.
|
||||
assert log_context.snapshot() == {
|
||||
"activity_id": "inner",
|
||||
"user_id": "u1",
|
||||
"agent_id": "agent-1",
|
||||
}
|
||||
log_context.reset(inner)
|
||||
assert log_context.snapshot() == {"activity_id": "outer", "user_id": "u1"}
|
||||
log_context.reset(outer)
|
||||
assert log_context.snapshot() == {}
|
||||
|
||||
def test_parent_activity_id_pattern(self):
|
||||
outer = log_context.bind(activity_id="parent-1")
|
||||
parent = log_context.snapshot().get("activity_id")
|
||||
inner = log_context.bind(activity_id="child-1", parent_activity_id=parent)
|
||||
try:
|
||||
snap = log_context.snapshot()
|
||||
assert snap["activity_id"] == "child-1"
|
||||
assert snap["parent_activity_id"] == "parent-1"
|
||||
finally:
|
||||
log_context.reset(inner)
|
||||
log_context.reset(outer)
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestContextFilter:
|
||||
|
||||
def _make_record(self, **extra) -> logging.LogRecord:
|
||||
record = logging.LogRecord(
|
||||
name="test",
|
||||
level=logging.INFO,
|
||||
pathname=__file__,
|
||||
lineno=1,
|
||||
msg="hello",
|
||||
args=(),
|
||||
exc_info=None,
|
||||
)
|
||||
for k, v in extra.items():
|
||||
setattr(record, k, v)
|
||||
return record
|
||||
|
||||
def test_stamps_record_with_context(self):
|
||||
token = log_context.bind(activity_id="a1", user_id="u1")
|
||||
try:
|
||||
record = self._make_record()
|
||||
assert _ContextFilter().filter(record) is True
|
||||
assert record.activity_id == "a1"
|
||||
assert record.user_id == "u1"
|
||||
finally:
|
||||
log_context.reset(token)
|
||||
|
||||
def test_explicit_extra_wins_over_context(self):
|
||||
# extra={} sets attributes on the record before the filter runs;
|
||||
# the filter must not overwrite them.
|
||||
token = log_context.bind(activity_id="from-ctx")
|
||||
try:
|
||||
record = self._make_record(activity_id="from-extra")
|
||||
_ContextFilter().filter(record)
|
||||
assert record.activity_id == "from-extra"
|
||||
finally:
|
||||
log_context.reset(token)
|
||||
|
||||
def test_no_op_when_context_empty(self):
|
||||
record = self._make_record()
|
||||
assert _ContextFilter().filter(record) is True
|
||||
assert not hasattr(record, "activity_id")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestFilterWiringEndToEnd:
|
||||
"""Regression guard: the filter must be installed on handlers, not on
|
||||
loggers — Python skips logger-level filters during propagation.
|
||||
"""
|
||||
|
||||
def test_propagated_record_gets_stamped(self):
|
||||
from application.core.logging_config import _install_context_filter
|
||||
|
||||
captured: list[logging.LogRecord] = []
|
||||
|
||||
class _Capture(logging.Handler):
|
||||
def emit(self, record):
|
||||
captured.append(record)
|
||||
|
||||
root = logging.getLogger()
|
||||
saved_handlers = list(root.handlers)
|
||||
saved_level = root.level
|
||||
try:
|
||||
root.handlers = [_Capture()]
|
||||
root.setLevel(logging.DEBUG)
|
||||
_install_context_filter()
|
||||
|
||||
child = logging.getLogger("test_log_context.propagation")
|
||||
child.setLevel(logging.DEBUG)
|
||||
|
||||
token = log_context.bind(activity_id="propagated-id")
|
||||
try:
|
||||
child.info("from a child logger")
|
||||
finally:
|
||||
log_context.reset(token)
|
||||
|
||||
assert captured, "Capture handler should have received the record"
|
||||
assert captured[0].activity_id == "propagated-id"
|
||||
finally:
|
||||
root.handlers = saved_handlers
|
||||
root.setLevel(saved_level)
|
||||
@@ -95,6 +95,96 @@ class TestGenMethods:
|
||||
)
|
||||
assert result == ["a", "b"]
|
||||
|
||||
@patch("application.llm.base.stream_cache", lambda f: f)
|
||||
@patch("application.llm.base.stream_token_usage", lambda f: f)
|
||||
def test_gen_stream_emits_llm_stream_start_event(self, caplog):
|
||||
import logging as _logging
|
||||
|
||||
class FakeProvider(StubLLM):
|
||||
provider_name = "fake-provider"
|
||||
|
||||
llm = FakeProvider(raw_gen_stream_items=["x"])
|
||||
with caplog.at_level(_logging.INFO, logger="root"):
|
||||
list(
|
||||
llm.gen_stream(
|
||||
model="m1",
|
||||
messages=[{"role": "user", "content": "hi"}, {"role": "assistant", "content": "hey"}],
|
||||
tools=[{"name": "t"}],
|
||||
_usage_attachments=[{"path": "/tmp/a.png"}],
|
||||
)
|
||||
)
|
||||
|
||||
starts = [r for r in caplog.records if r.message == "llm_stream_start"]
|
||||
assert len(starts) == 1
|
||||
evt = starts[0]
|
||||
assert evt.model == "m1"
|
||||
assert evt.provider == "fake-provider"
|
||||
assert evt.message_count == 2
|
||||
# ``_usage_attachments`` is what ``Agent._llm_gen`` actually passes;
|
||||
# the alias check below covers the bare ``attachments=`` form.
|
||||
assert evt.has_attachments is True
|
||||
assert evt.has_tools is True
|
||||
|
||||
@patch("application.llm.base.stream_cache", lambda f: f)
|
||||
@patch("application.llm.base.stream_token_usage", lambda f: f)
|
||||
def test_gen_stream_recognises_attachments_kwarg_alias(self, caplog):
|
||||
import logging as _logging
|
||||
|
||||
llm = StubLLM(raw_gen_stream_items=["x"])
|
||||
with caplog.at_level(_logging.INFO, logger="root"):
|
||||
list(
|
||||
llm.gen_stream(
|
||||
model="m1", messages=[], attachments=[{"path": "/tmp/a"}]
|
||||
)
|
||||
)
|
||||
evt = next(r for r in caplog.records if r.message == "llm_stream_start")
|
||||
assert evt.has_attachments is True
|
||||
|
||||
@patch("application.llm.base.stream_cache", lambda f: f)
|
||||
@patch("application.llm.base.stream_token_usage", lambda f: f)
|
||||
def test_gen_stream_emits_event_without_attachments_or_tools(self, caplog):
|
||||
import logging as _logging
|
||||
|
||||
llm = StubLLM(raw_gen_stream_items=["x"])
|
||||
with caplog.at_level(_logging.INFO, logger="root"):
|
||||
list(llm.gen_stream(model="m1", messages=[]))
|
||||
|
||||
evt = next(r for r in caplog.records if r.message == "llm_stream_start")
|
||||
assert evt.message_count == 0
|
||||
assert evt.has_attachments is False
|
||||
assert evt.has_tools is False
|
||||
# BaseLLM default — concrete providers always override.
|
||||
assert evt.provider == "unknown"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestProviderNameRegistry:
|
||||
"""A new provider without ``provider_name`` would silently report
|
||||
``provider="unknown"`` in telemetry. Pin the expected values here."""
|
||||
|
||||
def test_provider_names_match_expectations(self):
|
||||
from application.llm.anthropic import AnthropicLLM
|
||||
from application.llm.docsgpt_provider import DocsGPTAPILLM
|
||||
from application.llm.google_ai import GoogleLLM
|
||||
from application.llm.groq import GroqLLM
|
||||
from application.llm.llama_cpp import LlamaCpp
|
||||
from application.llm.novita import NovitaLLM
|
||||
from application.llm.open_router import OpenRouterLLM
|
||||
from application.llm.openai import OpenAILLM
|
||||
from application.llm.premai import PremAILLM
|
||||
from application.llm.sagemaker import SagemakerAPILLM
|
||||
|
||||
assert OpenAILLM.provider_name == "openai"
|
||||
assert GoogleLLM.provider_name == "google"
|
||||
assert AnthropicLLM.provider_name == "anthropic"
|
||||
assert GroqLLM.provider_name == "groq"
|
||||
assert NovitaLLM.provider_name == "novita"
|
||||
assert OpenRouterLLM.provider_name == "openrouter"
|
||||
assert DocsGPTAPILLM.provider_name == "docsgpt"
|
||||
assert PremAILLM.provider_name == "premai"
|
||||
assert LlamaCpp.provider_name == "llama_cpp"
|
||||
assert SagemakerAPILLM.provider_name == "sagemaker"
|
||||
|
||||
@patch("application.llm.base.gen_cache", lambda f: f)
|
||||
@patch("application.llm.base.gen_token_usage", lambda f: f)
|
||||
def test_gen_passes_tools(self):
|
||||
|
||||
@@ -153,4 +153,108 @@ class TestLogActivity:
|
||||
):
|
||||
list(failing_gen(FakeAgent()))
|
||||
|
||||
def test_log_activity_emits_lifecycle_events(self, caplog):
|
||||
import logging as _logging
|
||||
|
||||
from application.logging import log_activity
|
||||
|
||||
class FakeAgent:
|
||||
endpoint = "test"
|
||||
user = "user1"
|
||||
user_api_key = "k"
|
||||
query = "q"
|
||||
agent_id = "agent-7"
|
||||
conversation_id = "conv-3"
|
||||
|
||||
@log_activity()
|
||||
def gen(agent, log_context=None):
|
||||
yield "x"
|
||||
|
||||
with patch("application.logging._log_activity_to_db"), \
|
||||
caplog.at_level(_logging.INFO, logger="root"):
|
||||
list(gen(FakeAgent()))
|
||||
|
||||
messages = [r.message for r in caplog.records]
|
||||
assert "activity_started" in messages
|
||||
assert "activity_finished" in messages
|
||||
|
||||
started = next(r for r in caplog.records if r.message == "activity_started")
|
||||
finished = next(r for r in caplog.records if r.message == "activity_finished")
|
||||
|
||||
assert started.endpoint == "test"
|
||||
assert started.user_id == "user1"
|
||||
assert started.agent_id == "agent-7"
|
||||
assert started.conversation_id == "conv-3"
|
||||
assert started.parent_activity_id is None # top-level activity
|
||||
|
||||
assert finished.activity_id == started.activity_id
|
||||
assert finished.status == "ok"
|
||||
assert isinstance(finished.duration_ms, int)
|
||||
assert finished.duration_ms >= 0
|
||||
assert finished.error_class is None
|
||||
|
||||
def test_log_activity_records_parent_activity_id_when_nested(self, caplog):
|
||||
# Sub-agents / workflow_agents wrap an outer @log_activity gen;
|
||||
# the inner activity_started event must link to the outer's id.
|
||||
import logging as _logging
|
||||
|
||||
from application.logging import log_activity
|
||||
|
||||
class FakeAgent:
|
||||
endpoint = "outer"
|
||||
user = "user1"
|
||||
user_api_key = ""
|
||||
query = ""
|
||||
|
||||
class InnerAgent:
|
||||
endpoint = "inner"
|
||||
user = "user1"
|
||||
user_api_key = ""
|
||||
query = ""
|
||||
|
||||
@log_activity()
|
||||
def inner_gen(agent, log_context=None):
|
||||
yield "i"
|
||||
|
||||
@log_activity()
|
||||
def outer_gen(agent, log_context=None):
|
||||
yield from inner_gen(InnerAgent())
|
||||
|
||||
with patch("application.logging._log_activity_to_db"), \
|
||||
caplog.at_level(_logging.INFO, logger="root"):
|
||||
list(outer_gen(FakeAgent()))
|
||||
|
||||
starts = [r for r in caplog.records if r.message == "activity_started"]
|
||||
assert len(starts) == 2
|
||||
outer_start, inner_start = starts
|
||||
assert outer_start.endpoint == "outer"
|
||||
assert outer_start.parent_activity_id is None
|
||||
assert inner_start.endpoint == "inner"
|
||||
assert inner_start.parent_activity_id == outer_start.activity_id
|
||||
|
||||
def test_log_activity_records_error_status_on_failure(self, caplog):
|
||||
import logging as _logging
|
||||
|
||||
from application.logging import log_activity
|
||||
|
||||
class FakeAgent:
|
||||
endpoint = "boom"
|
||||
user = "user1"
|
||||
user_api_key = ""
|
||||
query = ""
|
||||
|
||||
@log_activity()
|
||||
def failing(agent, log_context=None):
|
||||
yield "before"
|
||||
raise ValueError("bad thing")
|
||||
|
||||
with patch("application.logging._log_activity_to_db"), \
|
||||
caplog.at_level(_logging.INFO, logger="root"), \
|
||||
pytest.raises(ValueError):
|
||||
list(failing(FakeAgent()))
|
||||
|
||||
finished = next(r for r in caplog.records if r.message == "activity_finished")
|
||||
assert finished.status == "error"
|
||||
assert finished.error_class == "ValueError"
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user