Files
docling-serve/examples/prometheus-scrape.yaml
2026-01-12 13:17:07 +01:00

107 lines
3.2 KiB
YAML

# Prometheus Scrape Configuration for Docling Serve
# Add this to your Prometheus configuration file (prometheus.yml)
scrape_configs:
- job_name: 'docling-serve'
# Scrape interval
scrape_interval: 15s
scrape_timeout: 10s
# Metrics path (default is /metrics)
metrics_path: /metrics
# Static targets configuration
static_configs:
- targets:
# Replace with your docling-serve instance(s)
- 'localhost:5001'
- 'docling-serve-1.example.com:5001'
- 'docling-serve-2.example.com:5001'
# Optional labels to add to all metrics from this job
labels:
environment: 'production'
service: 'docling-serve'
# Optional: Add authentication if API key is required
# basic_auth:
# username: ''
# password: 'your-api-key'
# Optional: TLS configuration
# tls_config:
# ca_file: /path/to/ca.crt
# cert_file: /path/to/client.crt
# key_file: /path/to/client.key
# insecure_skip_verify: false
---
# VictoriaMetrics Scrape Configuration
# For use with VictoriaMetrics, configuration is compatible with Prometheus
# vmagent configuration example:
# vmagent \
# -promscrape.config=prometheus-scrape.yaml \
# -remoteWrite.url=http://victoriametrics:8428/api/v1/write
---
# Kubernetes Service Discovery Example
# For auto-discovery of docling-serve pods in Kubernetes
scrape_configs:
- job_name: 'docling-serve-k8s'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- default
- docling-serve
relabel_configs:
# Only scrape pods with label app=docling-serve
- source_labels: [__meta_kubernetes_pod_label_app]
action: keep
regex: docling-serve
# Use pod name as instance label
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
# Use namespace as label
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
# Set metrics path
- target_label: __metrics_path__
replacement: /metrics
# Set port to scrape
- source_labels: [__address__]
action: replace
regex: ([^:]+)(?::\d+)?
replacement: $1:5001
target_label: __address__
---
# Available Metrics from Docling Serve:
#
# FastAPI/HTTP Metrics (from OpenTelemetry):
# - http.server.request.duration - HTTP request duration histogram
# - http.server.active_requests - Active HTTP requests gauge
# - http.server.request.size - HTTP request size histogram
# - http.server.response.size - HTTP response size histogram
#
# RQ Metrics (when using RQ engine):
# - rq_workers - Number of RQ workers by state
# - rq_workers_success - Successful job count per worker
# - rq_workers_failed - Failed job count per worker
# - rq_workers_working_time - Total working time per worker
# - rq_jobs - Job counts by queue and status
# - rq_request_processing_seconds - Time spent collecting RQ metrics
#
# System Metrics (via Python OpenTelemetry):
# - process.runtime.cpython.cpu.utilization - CPU utilization
# - process.runtime.cpython.memory - Memory usage
# - process.runtime.cpython.gc.count - Garbage collection count