feat(docling): add vlm pipeline configuration and usage instructions

add environment variables for vlm pipeline support (remote services, model loading, device selection) and persistent cache volume. improve final report with detailed vlm pipeline usage example showing how to use ollama with granite3.2-vision model for document conversion.
2026-03-07 22:33:11 +00:00 · 2025-11-26 17:32:09 -07:00
parent 761200adbc
commit ac13c400f5
3 changed files with 42 additions and 7 deletions
--- a/.env.example
+++ b/.env.example
@@ -506,7 +506,15 @@ RAGFLOW_ELASTICSEARCH_PASSWORD=
 #   - ghcr.io/docling-project/docling-serve-cu126 (10.0 GB, NVIDIA GPU with CUDA 12.6)
 #   - ghcr.io/docling-project/docling-serve-cu128 (11.4 GB, NVIDIA GPU with CUDA 12.8)
 # Note: Web UI is always enabled on /ui
+#
+# VLM Pipeline Configuration:
+# DOCLING_SERVE_ENABLE_REMOTE_SERVICES: Required for VLM via external APIs (Ollama, vLLM)
+# DOCLING_SERVE_LOAD_MODELS_AT_BOOT: Pre-load standard models at startup
+# DOCLING_DEVICE: Device for model inference (cpu, cuda, mps)
 ############
 DOCLING_IMAGE=ghcr.io/docling-project/docling-serve-cpu
+DOCLING_SERVE_ENABLE_REMOTE_SERVICES=true
+DOCLING_SERVE_LOAD_MODELS_AT_BOOT=false
+DOCLING_DEVICE=cpu

 ##########################################################################################
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,6 +2,7 @@ volumes:
  caddy-config:
  caddy-data:
  comfyui_data:
+  docling_cache:
  flowise:
  grafana:
  langfuse_clickhouse_data:
@@ -10,9 +11,9 @@ volumes:
  langfuse_postgres_data:
  letta_data:
  libretranslate_api_keys:
+  libretranslate_models:
  lightrag_data:
  lightrag_inputs:
-  libretranslate_models:
  n8n_storage:
  ollama_storage:
  open-webui:
@@ -1059,6 +1060,11 @@ services:
    restart: unless-stopped
    environment:
      - DOCLING_SERVE_ENABLE_UI=1
+      - DOCLING_SERVE_ENABLE_REMOTE_SERVICES=${DOCLING_SERVE_ENABLE_REMOTE_SERVICES:-true}
+      - DOCLING_SERVE_LOAD_MODELS_AT_BOOT=${DOCLING_SERVE_LOAD_MODELS_AT_BOOT:-false}
+      - DOCLING_DEVICE=${DOCLING_DEVICE:-cpu}
+    volumes:
+      - docling_cache:/opt/app-root/src/.cache
    shm_size: 1g
    healthcheck:
      test:
--- a/scripts/07_final_report.sh
+++ b/scripts/07_final_report.sh
@@ -227,14 +227,35 @@ if is_profile_active "docling"; then
  echo
  echo "Web UI: https://${DOCLING_HOSTNAME:-<hostname_not_set>}/ui"
  echo "API Docs: https://${DOCLING_HOSTNAME:-<hostname_not_set>}/docs"
-  echo ""
-  echo ""
+  echo
+  echo "Credentials (Caddy Basic Auth):"
  echo "User: ${DOCLING_USERNAME:-<not_set_in_env>}"
  echo "Password: ${DOCLING_PASSWORD:-<not_set_in_env>}"
-  echo ""
-  echo ""
-  echo "API (external via Caddy): https://${DOCLING_HOSTNAME:-<hostname_not_set>}"
-  echo "API (internal): http://docling:5001"
+  echo
+  echo "API Endpoints:"
+  echo "External (via Caddy): https://${DOCLING_HOSTNAME:-<hostname_not_set>}"
+  echo "Internal (from n8n):  http://docling:5001"
+  echo
+  echo "VLM Pipeline (Vision Language Model):"
+  echo "  1. Load VLM model in Ollama via Open WebUI -> Settings -> Models"
+  echo "     Example: granite3.2-vision:2b"
+  echo
+  echo "  2. API request with VLM pipeline:"
+  echo '     curl -X POST "https://'"${DOCLING_HOSTNAME:-<hostname_not_set>}"'/v1/convert/source" \'
+  echo '       -H "Content-Type: application/json" \'
+  echo '       -u "'"${DOCLING_USERNAME:-<not_set_in_env>}"':'"${DOCLING_PASSWORD:-<not_set_in_env>}"'" \'
+  echo "       -d '{"
+  echo '         "source": "https://arxiv.org/pdf/2501.17887",'
+  echo '         "options": {'
+  echo '           "pipeline": "vlm",'
+  echo '           "vlm_pipeline_model_api": {'
+  echo '             "url": "http://ollama:11434/v1/chat/completions",'
+  echo '             "params": {"model": "granite3.2-vision:2b"},'
+  echo '             "prompt": "Convert this page to docling.",'
+  echo '             "timeout": 300'
+  echo "           }"
+  echo "         }"
+  echo "       }'"
 fi

 if is_profile_active "gotenberg"; then