feat(docling): add vlm pipeline configuration and usage instructions

add environment variables for vlm pipeline support (remote services,
model loading, device selection) and persistent cache volume. improve
final report with detailed vlm pipeline usage example showing how to
use ollama with granite3.2-vision model for document conversion.
This commit is contained in:
Yury Kossakovsky
2025-11-26 17:32:09 -07:00
parent 761200adbc
commit ac13c400f5
3 changed files with 42 additions and 7 deletions

View File

@@ -506,7 +506,15 @@ RAGFLOW_ELASTICSEARCH_PASSWORD=
# - ghcr.io/docling-project/docling-serve-cu126 (10.0 GB, NVIDIA GPU with CUDA 12.6)
# - ghcr.io/docling-project/docling-serve-cu128 (11.4 GB, NVIDIA GPU with CUDA 12.8)
# Note: Web UI is always enabled on /ui
#
# VLM Pipeline Configuration:
# DOCLING_SERVE_ENABLE_REMOTE_SERVICES: Required for VLM via external APIs (Ollama, vLLM)
# DOCLING_SERVE_LOAD_MODELS_AT_BOOT: Pre-load standard models at startup
# DOCLING_DEVICE: Device for model inference (cpu, cuda, mps)
############
DOCLING_IMAGE=ghcr.io/docling-project/docling-serve-cpu
DOCLING_SERVE_ENABLE_REMOTE_SERVICES=true
DOCLING_SERVE_LOAD_MODELS_AT_BOOT=false
DOCLING_DEVICE=cpu
##########################################################################################

View File

@@ -2,6 +2,7 @@ volumes:
caddy-config:
caddy-data:
comfyui_data:
docling_cache:
flowise:
grafana:
langfuse_clickhouse_data:
@@ -10,9 +11,9 @@ volumes:
langfuse_postgres_data:
letta_data:
libretranslate_api_keys:
libretranslate_models:
lightrag_data:
lightrag_inputs:
libretranslate_models:
n8n_storage:
ollama_storage:
open-webui:
@@ -1059,6 +1060,11 @@ services:
restart: unless-stopped
environment:
- DOCLING_SERVE_ENABLE_UI=1
- DOCLING_SERVE_ENABLE_REMOTE_SERVICES=${DOCLING_SERVE_ENABLE_REMOTE_SERVICES:-true}
- DOCLING_SERVE_LOAD_MODELS_AT_BOOT=${DOCLING_SERVE_LOAD_MODELS_AT_BOOT:-false}
- DOCLING_DEVICE=${DOCLING_DEVICE:-cpu}
volumes:
- docling_cache:/opt/app-root/src/.cache
shm_size: 1g
healthcheck:
test:

View File

@@ -227,14 +227,35 @@ if is_profile_active "docling"; then
echo
echo "Web UI: https://${DOCLING_HOSTNAME:-<hostname_not_set>}/ui"
echo "API Docs: https://${DOCLING_HOSTNAME:-<hostname_not_set>}/docs"
echo ""
echo ""
echo
echo "Credentials (Caddy Basic Auth):"
echo "User: ${DOCLING_USERNAME:-<not_set_in_env>}"
echo "Password: ${DOCLING_PASSWORD:-<not_set_in_env>}"
echo ""
echo ""
echo "API (external via Caddy): https://${DOCLING_HOSTNAME:-<hostname_not_set>}"
echo "API (internal): http://docling:5001"
echo
echo "API Endpoints:"
echo "External (via Caddy): https://${DOCLING_HOSTNAME:-<hostname_not_set>}"
echo "Internal (from n8n): http://docling:5001"
echo
echo "VLM Pipeline (Vision Language Model):"
echo " 1. Load VLM model in Ollama via Open WebUI -> Settings -> Models"
echo " Example: granite3.2-vision:2b"
echo
echo " 2. API request with VLM pipeline:"
echo ' curl -X POST "https://'"${DOCLING_HOSTNAME:-<hostname_not_set>}"'/v1/convert/source" \'
echo ' -H "Content-Type: application/json" \'
echo ' -u "'"${DOCLING_USERNAME:-<not_set_in_env>}"':'"${DOCLING_PASSWORD:-<not_set_in_env>}"'" \'
echo " -d '{"
echo ' "source": "https://arxiv.org/pdf/2501.17887",'
echo ' "options": {'
echo ' "pipeline": "vlm",'
echo ' "vlm_pipeline_model_api": {'
echo ' "url": "http://ollama:11434/v1/chat/completions",'
echo ' "params": {"model": "granite3.2-vision:2b"},'
echo ' "prompt": "Convert this page to docling.",'
echo ' "timeout": 300'
echo " }"
echo " }"
echo " }'"
fi
if is_profile_active "gotenberg"; then