docling-serve/examples/prometheus-scrape.yaml

# Prometheus Scrape Configuration for Docling Serve
# Add this to your Prometheus configuration file (prometheus.yml)

scrape_configs:
  - job_name: 'docling-serve'

    # Scrape interval
    scrape_interval: 15s
    scrape_timeout: 10s

    # Metrics path (default is /metrics)
    metrics_path: /metrics

    # Static targets configuration
    static_configs:
      - targets:
          # Replace with your docling-serve instance(s)
          - 'localhost:5001'
          - 'docling-serve-1.example.com:5001'
          - 'docling-serve-2.example.com:5001'

        # Optional labels to add to all metrics from this job
        labels:
          environment: 'production'
          service: 'docling-serve'

    # Optional: Add authentication if API key is required
    # basic_auth:
    #   username: ''
    #   password: 'your-api-key'

    # Optional: TLS configuration
    # tls_config:
    #   ca_file: /path/to/ca.crt
    #   cert_file: /path/to/client.crt
    #   key_file: /path/to/client.key
    #   insecure_skip_verify: false

---
# VictoriaMetrics Scrape Configuration
# For use with VictoriaMetrics, configuration is compatible with Prometheus

# vmagent configuration example:
# vmagent \
#   -promscrape.config=prometheus-scrape.yaml \
#   -remoteWrite.url=http://victoriametrics:8428/api/v1/write

---
# Kubernetes Service Discovery Example
# For auto-discovery of docling-serve pods in Kubernetes

scrape_configs:
  - job_name: 'docling-serve-k8s'
    kubernetes_sd_configs:
      - role: pod
        namespaces:
          names:
            - default
            - docling-serve

    relabel_configs:
      # Only scrape pods with label app=docling-serve
      - source_labels: [__meta_kubernetes_pod_label_app]
        action: keep
        regex: docling-serve

      # Use pod name as instance label
      - source_labels: [__meta_kubernetes_pod_name]
        target_label: pod

      # Use namespace as label
      - source_labels: [__meta_kubernetes_namespace]
        target_label: namespace

      # Set metrics path
      - target_label: __metrics_path__
        replacement: /metrics

      # Set port to scrape
      - source_labels: [__address__]
        action: replace
        regex: ([^:]+)(?::\d+)?
        replacement: $1:5001
        target_label: __address__

---
# Available Metrics from Docling Serve:
#
# FastAPI/HTTP Metrics (from OpenTelemetry):
# - http.server.request.duration - HTTP request duration histogram
# - http.server.active_requests - Active HTTP requests gauge
# - http.server.request.size - HTTP request size histogram
# - http.server.response.size - HTTP response size histogram
#
# RQ Metrics (when using RQ engine):
# - rq_workers - Number of RQ workers by state
# - rq_workers_success - Successful job count per worker
# - rq_workers_failed - Failed job count per worker
# - rq_workers_working_time - Total working time per worker
# - rq_jobs - Job counts by queue and status
# - rq_request_processing_seconds - Time spent collecting RQ metrics
#
# System Metrics (via Python OpenTelemetry):
# - process.runtime.cpython.cpu.utilization - CPU utilization
# - process.runtime.cpython.memory - Memory usage
# - process.runtime.cpython.gc.count - Garbage collection count