diff --git a/.markdownlint-cli2.yaml b/.markdownlint-cli2.yaml
index 8ea0195..719b13c 100644
--- a/.markdownlint-cli2.yaml
+++ b/.markdownlint-cli2.yaml
@@ -3,7 +3,7 @@ config:
no-emphasis-as-header: false
first-line-heading: false
MD033:
- allowed_elements: ["details", "summary", "br", "a", "p", "img"]
+ allowed_elements: ["details", "summary", "br", "a", "b", "p", "img"]
MD024:
siblings_only: true
globs:
diff --git a/docs/deploy-examples/compose-gpu.yaml b/docs/deploy-examples/compose-gpu.yaml
new file mode 100644
index 0000000..0a5e08d
--- /dev/null
+++ b/docs/deploy-examples/compose-gpu.yaml
@@ -0,0 +1,15 @@
+services:
+ docling:
+ image: ghcr.io/docling-project/docling-serve-cu124
+ container_name: docling-serve
+ ports:
+ - 5001:5001
+ environment:
+ - DOCLING_SERVE_ENABLE_UI=true
+ deploy:
+ resources:
+ reservations:
+ devices:
+ - driver: nvidia
+ count: all # nvidia-smi
+ capabilities: [gpu]
diff --git a/docs/deploy-examples/docling-serve-simple.yaml b/docs/deploy-examples/docling-serve-simple.yaml
new file mode 100644
index 0000000..47390f4
--- /dev/null
+++ b/docs/deploy-examples/docling-serve-simple.yaml
@@ -0,0 +1,58 @@
+# This example deployment configures Docling Serve with a Service and cuda image
+---
+apiVersion: v1
+kind: Service
+metadata:
+ name: docling-serve
+ labels:
+ app: docling-serve
+ component: docling-serve-api
+spec:
+ ports:
+ - name: http
+ port: 5001
+ targetPort: http
+ selector:
+ app: docling-serve
+ component: docling-serve-api
+---
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+ name: docling-serve
+ labels:
+ app: docling-serve
+ component: docling-serve-api
+spec:
+ replicas: 1
+ selector:
+ matchLabels:
+ app: docling-serve
+ component: docling-serve-api
+ template:
+ metadata:
+ labels:
+ app: docling-serve
+ component: docling-serve-api
+ spec:
+ restartPolicy: Always
+ containers:
+ - name: api
+ resources:
+ limits:
+ cpu: 500m
+ memory: 2Gi
+ nvidia.com/gpu: 1 # Limit to one GPU
+ requests:
+ cpu: 250m
+ memory: 1Gi
+ nvidia.com/gpu: 1 # Limit to one GPU
+ env:
+ - name: DOCLING_SERVE_ENABLE_UI
+ value: 'true'
+ ports:
+ - name: http
+ containerPort: 5001
+ protocol: TCP
+ imagePullPolicy: Always
+ image: 'ghcr.io/docling-project/docling-serve-cu124'
diff --git a/docs/deployment.md b/docs/deployment.md
index 168b5e9..659e4d2 100644
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -1,7 +1,161 @@
-# Deployment
+# Deployment Examples
+
+This document provides deployment examples for running the application in different environments.
+
+Choose the deployment option that best fits your setup.
+
+- **[Local GPU](#local-gpu)**: For deploying the application locally on a machine with a NVIDIA GPU (using Docker Compose).
+- **[OpenShift](#openshift)**: For deploying the application on an OpenShift cluster, designed for cloud-native environments.
+
+---
+
+## Local GPU
+
+### Docker compose
+
+Manifest example: [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml)
+
+This deployment has the following features:
+
+- NVIDIA cuda enabled
+
+Install the app with:
+
+```sh
+docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
+```
+
+For using the API:
+
+```sh
+# Make a test query
+curl -X 'POST' \
+ "localhost:5001/v1alpha/convert/source/async" \
+ -H "accept: application/json" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+ }'
+```
+
+
+Requirements
+
+- debian/ubuntu/rhel/fedora/opensuse
+- docker
+- nvidia drivers >=550.54.14
+- nvidia-container-toolkit
+
+Docs:
+
+- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/supported-platforms.html)
+- [CUDA Toolkit Release Notes](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#id6)
+
+
+
+
+Steps
+
+1. Check driver version and which GPU you want to use (0/1/2/3.. and update [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml) file or use `count: all`)
+
+ ```sh
+ nvidia-smi
+ ```
+
+2. Check if the NVIDIA Container Toolkit is installed/updated
+
+ ```sh
+ # debian
+ dpkg -l | grep nvidia-container-toolkit
+ ```
+
+ ```sh
+ # rhel
+ rpm -q nvidia-container-toolkit
+ ```
+
+ NVIDIA Container Toolkit install steps can be found here:
+
+
+
+3. Check which runtime is being used by Docker
+
+ ```sh
+ # docker
+ docker info | grep -i runtime
+ ```
+
+4. If the default Docker runtime changes back from 'nvidia' to 'default' after restarting the Docker service (optional):
+
+ Backup the daemon.json file:
+
+ ```sh
+ sudo cp /etc/docker/daemon.json /etc/docker/daemon.json.bak
+ ```
+
+ Update the daemon.json file:
+
+ ```sh
+ echo '{
+ "runtimes": {
+ "nvidia": {
+ "path": "nvidia-container-runtime"
+ }
+ },
+ "default-runtime": "nvidia"
+ }' | sudo tee /etc/docker/daemon.json > /dev/null
+ ```
+
+ Restart the Docker service:
+
+ ```sh
+ sudo systemctl restart docker
+ ```
+
+ Confirm 'nvidia' is the default runtime used by Docker by repeating step 3.
+
+5. Run the container:
+
+ ```sh
+ docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
+ ```
+
+
## OpenShift
+### Simple deployment
+
+Manifest example: [docling-serve-simple.yaml](./deploy-examples/docling-serve-simple.yaml)
+
+This deployment example has the following features:
+
+- Deployment configuration
+- Service configuration
+- NVIDIA cuda enabled
+
+Install the app with:
+
+```sh
+oc apply -f docs/deploy-examples/docling-serve-simple.yaml
+```
+
+For using the API:
+
+```sh
+# Port-forward the service
+oc port-forward svc/docling-serve 5001:5001
+
+# Make a test query
+curl -X 'POST' \
+ "localhost:5001/v1alpha/convert/source/async" \
+ -H "accept: application/json" \
+ -H "Content-Type: application/json" \
+ -d '{
+ "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+ }'
+```
+
### Secure deployment with `oauth-proxy`
Manifest example: [docling-serve-oauth.yaml](./deploy-examples/docling-serve-oauth.yaml)
@@ -15,7 +169,7 @@ This deployment has the following features:
Install the app with:
```sh
-kubectl apply -f docs/deploy-examples/docling-serve-oauth.yaml
+oc apply -f docs/deploy-examples/docling-serve-oauth.yaml
```
For using the API: