# This example deployment configures Docling Serve with a Service and cuda image --- apiVersion: v1 kind: Service metadata: name: docling-serve labels: app: docling-serve component: docling-serve-api spec: ports: - name: http port: 5001 targetPort: http selector: app: docling-serve component: docling-serve-api --- kind: Deployment apiVersion: apps/v1 metadata: name: docling-serve labels: app: docling-serve component: docling-serve-api spec: replicas: 1 selector: matchLabels: app: docling-serve component: docling-serve-api template: metadata: labels: app: docling-serve component: docling-serve-api spec: restartPolicy: Always containers: - name: api resources: limits: cpu: 1 memory: 4Gi nvidia.com/gpu: 1 # Limit to one GPU requests: cpu: 250m memory: 1Gi nvidia.com/gpu: 1 # Limit to one GPU env: - name: DOCLING_SERVE_ENABLE_UI value: 'true' ports: - name: http containerPort: 5001 protocol: TCP imagePullPolicy: Always image: 'ghcr.io/docling-project/docling-serve-cu124'