From 525a43ff6f04b7cc80f9dd6a0e653a8d8c4ab317 Mon Sep 17 00:00:00 2001 From: Rui Dias Gomes <66125272+rmdg88@users.noreply.github.com> Date: Thu, 17 Apr 2025 13:29:34 +0100 Subject: [PATCH] docs: update deployment examples (#135) Signed-off-by: rmdg88 Signed-off-by: Rui Dias Gomes <66125272+rmdg88@users.noreply.github.com> --- .markdownlint-cli2.yaml | 2 +- docs/deploy-examples/compose-gpu.yaml | 15 ++ .../deploy-examples/docling-serve-simple.yaml | 58 +++++++ docs/deployment.md | 158 +++++++++++++++++- 4 files changed, 230 insertions(+), 3 deletions(-) create mode 100644 docs/deploy-examples/compose-gpu.yaml create mode 100644 docs/deploy-examples/docling-serve-simple.yaml diff --git a/.markdownlint-cli2.yaml b/.markdownlint-cli2.yaml index 8ea0195..719b13c 100644 --- a/.markdownlint-cli2.yaml +++ b/.markdownlint-cli2.yaml @@ -3,7 +3,7 @@ config: no-emphasis-as-header: false first-line-heading: false MD033: - allowed_elements: ["details", "summary", "br", "a", "p", "img"] + allowed_elements: ["details", "summary", "br", "a", "b", "p", "img"] MD024: siblings_only: true globs: diff --git a/docs/deploy-examples/compose-gpu.yaml b/docs/deploy-examples/compose-gpu.yaml new file mode 100644 index 0000000..0a5e08d --- /dev/null +++ b/docs/deploy-examples/compose-gpu.yaml @@ -0,0 +1,15 @@ +services: + docling: + image: ghcr.io/docling-project/docling-serve-cu124 + container_name: docling-serve + ports: + - 5001:5001 + environment: + - DOCLING_SERVE_ENABLE_UI=true + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all # nvidia-smi + capabilities: [gpu] diff --git a/docs/deploy-examples/docling-serve-simple.yaml b/docs/deploy-examples/docling-serve-simple.yaml new file mode 100644 index 0000000..47390f4 --- /dev/null +++ b/docs/deploy-examples/docling-serve-simple.yaml @@ -0,0 +1,58 @@ +# This example deployment configures Docling Serve with a Service and cuda image +--- +apiVersion: v1 +kind: Service +metadata: + name: docling-serve + labels: + app: docling-serve + component: docling-serve-api +spec: + ports: + - name: http + port: 5001 + targetPort: http + selector: + app: docling-serve + component: docling-serve-api +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: docling-serve + labels: + app: docling-serve + component: docling-serve-api +spec: + replicas: 1 + selector: + matchLabels: + app: docling-serve + component: docling-serve-api + template: + metadata: + labels: + app: docling-serve + component: docling-serve-api + spec: + restartPolicy: Always + containers: + - name: api + resources: + limits: + cpu: 500m + memory: 2Gi + nvidia.com/gpu: 1 # Limit to one GPU + requests: + cpu: 250m + memory: 1Gi + nvidia.com/gpu: 1 # Limit to one GPU + env: + - name: DOCLING_SERVE_ENABLE_UI + value: 'true' + ports: + - name: http + containerPort: 5001 + protocol: TCP + imagePullPolicy: Always + image: 'ghcr.io/docling-project/docling-serve-cu124' diff --git a/docs/deployment.md b/docs/deployment.md index 168b5e9..659e4d2 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -1,7 +1,161 @@ -# Deployment +# Deployment Examples + +This document provides deployment examples for running the application in different environments. + +Choose the deployment option that best fits your setup. + +- **[Local GPU](#local-gpu)**: For deploying the application locally on a machine with a NVIDIA GPU (using Docker Compose). +- **[OpenShift](#openshift)**: For deploying the application on an OpenShift cluster, designed for cloud-native environments. + +--- + +## Local GPU + +### Docker compose + +Manifest example: [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml) + +This deployment has the following features: + +- NVIDIA cuda enabled + +Install the app with: + +```sh +docker compose -f docs/deploy-examples/compose-gpu.yaml up -d +``` + +For using the API: + +```sh +# Make a test query +curl -X 'POST' \ + "localhost:5001/v1alpha/convert/source/async" \ + -H "accept: application/json" \ + -H "Content-Type: application/json" \ + -d '{ + "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}] + }' +``` + +
+Requirements + +- debian/ubuntu/rhel/fedora/opensuse +- docker +- nvidia drivers >=550.54.14 +- nvidia-container-toolkit + +Docs: + +- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/supported-platforms.html) +- [CUDA Toolkit Release Notes](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#id6) + +
+ +
+Steps + +1. Check driver version and which GPU you want to use (0/1/2/3.. and update [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml) file or use `count: all`) + + ```sh + nvidia-smi + ``` + +2. Check if the NVIDIA Container Toolkit is installed/updated + + ```sh + # debian + dpkg -l | grep nvidia-container-toolkit + ``` + + ```sh + # rhel + rpm -q nvidia-container-toolkit + ``` + + NVIDIA Container Toolkit install steps can be found here: + + + +3. Check which runtime is being used by Docker + + ```sh + # docker + docker info | grep -i runtime + ``` + +4. If the default Docker runtime changes back from 'nvidia' to 'default' after restarting the Docker service (optional): + + Backup the daemon.json file: + + ```sh + sudo cp /etc/docker/daemon.json /etc/docker/daemon.json.bak + ``` + + Update the daemon.json file: + + ```sh + echo '{ + "runtimes": { + "nvidia": { + "path": "nvidia-container-runtime" + } + }, + "default-runtime": "nvidia" + }' | sudo tee /etc/docker/daemon.json > /dev/null + ``` + + Restart the Docker service: + + ```sh + sudo systemctl restart docker + ``` + + Confirm 'nvidia' is the default runtime used by Docker by repeating step 3. + +5. Run the container: + + ```sh + docker compose -f docs/deploy-examples/compose-gpu.yaml up -d + ``` + +
## OpenShift +### Simple deployment + +Manifest example: [docling-serve-simple.yaml](./deploy-examples/docling-serve-simple.yaml) + +This deployment example has the following features: + +- Deployment configuration +- Service configuration +- NVIDIA cuda enabled + +Install the app with: + +```sh +oc apply -f docs/deploy-examples/docling-serve-simple.yaml +``` + +For using the API: + +```sh +# Port-forward the service +oc port-forward svc/docling-serve 5001:5001 + +# Make a test query +curl -X 'POST' \ + "localhost:5001/v1alpha/convert/source/async" \ + -H "accept: application/json" \ + -H "Content-Type: application/json" \ + -d '{ + "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}] + }' +``` + ### Secure deployment with `oauth-proxy` Manifest example: [docling-serve-oauth.yaml](./deploy-examples/docling-serve-oauth.yaml) @@ -15,7 +169,7 @@ This deployment has the following features: Install the app with: ```sh -kubectl apply -f docs/deploy-examples/docling-serve-oauth.yaml +oc apply -f docs/deploy-examples/docling-serve-oauth.yaml ``` For using the API: