From ac13c400f51e83544a1a8d8092138dc3bcac71dd Mon Sep 17 00:00:00 2001
From: Yury Kossakovsky <kossakovsky93@gmail.com>
Date: Wed, 26 Nov 2025 17:32:09 -0700
Subject: [PATCH] feat(docling): add vlm pipeline configuration and usage
 instructions

add environment variables for vlm pipeline support (remote services,
model loading, device selection) and persistent cache volume. improve
final report with detailed vlm pipeline usage example showing how to
use ollama with granite3.2-vision model for document conversion.
---
 .env.example               |  8 ++++++++
 docker-compose.yml         |  8 +++++++-
 scripts/07_final_report.sh | 33 +++++++++++++++++++++++++++------
 3 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/.env.example b/.env.example
index 9d148a6..b3e62bc 100644
--- a/.env.example
+++ b/.env.example
@@ -506,7 +506,15 @@ RAGFLOW_ELASTICSEARCH_PASSWORD=
 #   - ghcr.io/docling-project/docling-serve-cu126 (10.0 GB, NVIDIA GPU with CUDA 12.6)
 #   - ghcr.io/docling-project/docling-serve-cu128 (11.4 GB, NVIDIA GPU with CUDA 12.8)
 # Note: Web UI is always enabled on /ui
+#
+# VLM Pipeline Configuration:
+# DOCLING_SERVE_ENABLE_REMOTE_SERVICES: Required for VLM via external APIs (Ollama, vLLM)
+# DOCLING_SERVE_LOAD_MODELS_AT_BOOT: Pre-load standard models at startup
+# DOCLING_DEVICE: Device for model inference (cpu, cuda, mps)
 ############
 DOCLING_IMAGE=ghcr.io/docling-project/docling-serve-cpu
+DOCLING_SERVE_ENABLE_REMOTE_SERVICES=true
+DOCLING_SERVE_LOAD_MODELS_AT_BOOT=false
+DOCLING_DEVICE=cpu
 
 ##########################################################################################
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index 35aa54a..50c7810 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,6 +2,7 @@ volumes:
   caddy-config:
   caddy-data:
   comfyui_data:
+  docling_cache:
   flowise:
   grafana:
   langfuse_clickhouse_data:
@@ -10,9 +11,9 @@ volumes:
   langfuse_postgres_data:
   letta_data:
   libretranslate_api_keys:
+  libretranslate_models:
   lightrag_data:
   lightrag_inputs:
-  libretranslate_models:
   n8n_storage:
   ollama_storage:
   open-webui:
@@ -1059,6 +1060,11 @@ services:
     restart: unless-stopped
     environment:
       - DOCLING_SERVE_ENABLE_UI=1
+      - DOCLING_SERVE_ENABLE_REMOTE_SERVICES=${DOCLING_SERVE_ENABLE_REMOTE_SERVICES:-true}
+      - DOCLING_SERVE_LOAD_MODELS_AT_BOOT=${DOCLING_SERVE_LOAD_MODELS_AT_BOOT:-false}
+      - DOCLING_DEVICE=${DOCLING_DEVICE:-cpu}
+    volumes:
+      - docling_cache:/opt/app-root/src/.cache
     shm_size: 1g
     healthcheck:
       test:
diff --git a/scripts/07_final_report.sh b/scripts/07_final_report.sh
index ac94a82..d221b59 100644
--- a/scripts/07_final_report.sh
+++ b/scripts/07_final_report.sh
@@ -227,14 +227,35 @@ if is_profile_active "docling"; then
   echo
   echo "Web UI: https://${DOCLING_HOSTNAME:-<hostname_not_set>}/ui"
   echo "API Docs: https://${DOCLING_HOSTNAME:-<hostname_not_set>}/docs"
-  echo ""
-  echo ""
+  echo
+  echo "Credentials (Caddy Basic Auth):"
   echo "User: ${DOCLING_USERNAME:-<not_set_in_env>}"
   echo "Password: ${DOCLING_PASSWORD:-<not_set_in_env>}"
-  echo ""
-  echo ""
-  echo "API (external via Caddy): https://${DOCLING_HOSTNAME:-<hostname_not_set>}"
-  echo "API (internal): http://docling:5001"
+  echo
+  echo "API Endpoints:"
+  echo "External (via Caddy): https://${DOCLING_HOSTNAME:-<hostname_not_set>}"
+  echo "Internal (from n8n):  http://docling:5001"
+  echo
+  echo "VLM Pipeline (Vision Language Model):"
+  echo "  1. Load VLM model in Ollama via Open WebUI -> Settings -> Models"
+  echo "     Example: granite3.2-vision:2b"
+  echo
+  echo "  2. API request with VLM pipeline:"
+  echo '     curl -X POST "https://'"${DOCLING_HOSTNAME:-<hostname_not_set>}"'/v1/convert/source" \'
+  echo '       -H "Content-Type: application/json" \'
+  echo '       -u "'"${DOCLING_USERNAME:-<not_set_in_env>}"':'"${DOCLING_PASSWORD:-<not_set_in_env>}"'" \'
+  echo "       -d '{"
+  echo '         "source": "https://arxiv.org/pdf/2501.17887",'
+  echo '         "options": {'
+  echo '           "pipeline": "vlm",'
+  echo '           "vlm_pipeline_model_api": {'
+  echo '             "url": "http://ollama:11434/v1/chat/completions",'
+  echo '             "params": {"model": "granite3.2-vision:2b"},'
+  echo '             "prompt": "Convert this page to docling.",'
+  echo '             "timeout": 300'
+  echo "           }"
+  echo "         }"
+  echo "       }'"
 fi
 
 if is_profile_active "gotenberg"; then