From ac13c400f51e83544a1a8d8092138dc3bcac71dd Mon Sep 17 00:00:00 2001 From: Yury Kossakovsky Date: Wed, 26 Nov 2025 17:32:09 -0700 Subject: [PATCH] feat(docling): add vlm pipeline configuration and usage instructions add environment variables for vlm pipeline support (remote services, model loading, device selection) and persistent cache volume. improve final report with detailed vlm pipeline usage example showing how to use ollama with granite3.2-vision model for document conversion. --- .env.example | 8 ++++++++ docker-compose.yml | 8 +++++++- scripts/07_final_report.sh | 33 +++++++++++++++++++++++++++------ 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/.env.example b/.env.example index 9d148a6..b3e62bc 100644 --- a/.env.example +++ b/.env.example @@ -506,7 +506,15 @@ RAGFLOW_ELASTICSEARCH_PASSWORD= # - ghcr.io/docling-project/docling-serve-cu126 (10.0 GB, NVIDIA GPU with CUDA 12.6) # - ghcr.io/docling-project/docling-serve-cu128 (11.4 GB, NVIDIA GPU with CUDA 12.8) # Note: Web UI is always enabled on /ui +# +# VLM Pipeline Configuration: +# DOCLING_SERVE_ENABLE_REMOTE_SERVICES: Required for VLM via external APIs (Ollama, vLLM) +# DOCLING_SERVE_LOAD_MODELS_AT_BOOT: Pre-load standard models at startup +# DOCLING_DEVICE: Device for model inference (cpu, cuda, mps) ############ DOCLING_IMAGE=ghcr.io/docling-project/docling-serve-cpu +DOCLING_SERVE_ENABLE_REMOTE_SERVICES=true +DOCLING_SERVE_LOAD_MODELS_AT_BOOT=false +DOCLING_DEVICE=cpu ########################################################################################## \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 35aa54a..50c7810 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,6 +2,7 @@ volumes: caddy-config: caddy-data: comfyui_data: + docling_cache: flowise: grafana: langfuse_clickhouse_data: @@ -10,9 +11,9 @@ volumes: langfuse_postgres_data: letta_data: libretranslate_api_keys: + libretranslate_models: lightrag_data: lightrag_inputs: - libretranslate_models: n8n_storage: ollama_storage: open-webui: @@ -1059,6 +1060,11 @@ services: restart: unless-stopped environment: - DOCLING_SERVE_ENABLE_UI=1 + - DOCLING_SERVE_ENABLE_REMOTE_SERVICES=${DOCLING_SERVE_ENABLE_REMOTE_SERVICES:-true} + - DOCLING_SERVE_LOAD_MODELS_AT_BOOT=${DOCLING_SERVE_LOAD_MODELS_AT_BOOT:-false} + - DOCLING_DEVICE=${DOCLING_DEVICE:-cpu} + volumes: + - docling_cache:/opt/app-root/src/.cache shm_size: 1g healthcheck: test: diff --git a/scripts/07_final_report.sh b/scripts/07_final_report.sh index ac94a82..d221b59 100644 --- a/scripts/07_final_report.sh +++ b/scripts/07_final_report.sh @@ -227,14 +227,35 @@ if is_profile_active "docling"; then echo echo "Web UI: https://${DOCLING_HOSTNAME:-}/ui" echo "API Docs: https://${DOCLING_HOSTNAME:-}/docs" - echo "" - echo "" + echo + echo "Credentials (Caddy Basic Auth):" echo "User: ${DOCLING_USERNAME:-}" echo "Password: ${DOCLING_PASSWORD:-}" - echo "" - echo "" - echo "API (external via Caddy): https://${DOCLING_HOSTNAME:-}" - echo "API (internal): http://docling:5001" + echo + echo "API Endpoints:" + echo "External (via Caddy): https://${DOCLING_HOSTNAME:-}" + echo "Internal (from n8n): http://docling:5001" + echo + echo "VLM Pipeline (Vision Language Model):" + echo " 1. Load VLM model in Ollama via Open WebUI -> Settings -> Models" + echo " Example: granite3.2-vision:2b" + echo + echo " 2. API request with VLM pipeline:" + echo ' curl -X POST "https://'"${DOCLING_HOSTNAME:-}"'/v1/convert/source" \' + echo ' -H "Content-Type: application/json" \' + echo ' -u "'"${DOCLING_USERNAME:-}"':'"${DOCLING_PASSWORD:-}"'" \' + echo " -d '{" + echo ' "source": "https://arxiv.org/pdf/2501.17887",' + echo ' "options": {' + echo ' "pipeline": "vlm",' + echo ' "vlm_pipeline_model_api": {' + echo ' "url": "http://ollama:11434/v1/chat/completions",' + echo ' "params": {"model": "granite3.2-vision:2b"},' + echo ' "prompt": "Convert this page to docling.",' + echo ' "timeout": 300' + echo " }" + echo " }" + echo " }'" fi if is_profile_active "gotenberg"; then