diff --git a/.env.example b/.env.example index 9d148a6..b3e62bc 100644 --- a/.env.example +++ b/.env.example @@ -506,7 +506,15 @@ RAGFLOW_ELASTICSEARCH_PASSWORD= # - ghcr.io/docling-project/docling-serve-cu126 (10.0 GB, NVIDIA GPU with CUDA 12.6) # - ghcr.io/docling-project/docling-serve-cu128 (11.4 GB, NVIDIA GPU with CUDA 12.8) # Note: Web UI is always enabled on /ui +# +# VLM Pipeline Configuration: +# DOCLING_SERVE_ENABLE_REMOTE_SERVICES: Required for VLM via external APIs (Ollama, vLLM) +# DOCLING_SERVE_LOAD_MODELS_AT_BOOT: Pre-load standard models at startup +# DOCLING_DEVICE: Device for model inference (cpu, cuda, mps) ############ DOCLING_IMAGE=ghcr.io/docling-project/docling-serve-cpu +DOCLING_SERVE_ENABLE_REMOTE_SERVICES=true +DOCLING_SERVE_LOAD_MODELS_AT_BOOT=false +DOCLING_DEVICE=cpu ########################################################################################## \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 35aa54a..50c7810 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,6 +2,7 @@ volumes: caddy-config: caddy-data: comfyui_data: + docling_cache: flowise: grafana: langfuse_clickhouse_data: @@ -10,9 +11,9 @@ volumes: langfuse_postgres_data: letta_data: libretranslate_api_keys: + libretranslate_models: lightrag_data: lightrag_inputs: - libretranslate_models: n8n_storage: ollama_storage: open-webui: @@ -1059,6 +1060,11 @@ services: restart: unless-stopped environment: - DOCLING_SERVE_ENABLE_UI=1 + - DOCLING_SERVE_ENABLE_REMOTE_SERVICES=${DOCLING_SERVE_ENABLE_REMOTE_SERVICES:-true} + - DOCLING_SERVE_LOAD_MODELS_AT_BOOT=${DOCLING_SERVE_LOAD_MODELS_AT_BOOT:-false} + - DOCLING_DEVICE=${DOCLING_DEVICE:-cpu} + volumes: + - docling_cache:/opt/app-root/src/.cache shm_size: 1g healthcheck: test: diff --git a/scripts/07_final_report.sh b/scripts/07_final_report.sh index ac94a82..d221b59 100644 --- a/scripts/07_final_report.sh +++ b/scripts/07_final_report.sh @@ -227,14 +227,35 @@ if is_profile_active "docling"; then echo echo "Web UI: https://${DOCLING_HOSTNAME:-}/ui" echo "API Docs: https://${DOCLING_HOSTNAME:-}/docs" - echo "" - echo "" + echo + echo "Credentials (Caddy Basic Auth):" echo "User: ${DOCLING_USERNAME:-}" echo "Password: ${DOCLING_PASSWORD:-}" - echo "" - echo "" - echo "API (external via Caddy): https://${DOCLING_HOSTNAME:-}" - echo "API (internal): http://docling:5001" + echo + echo "API Endpoints:" + echo "External (via Caddy): https://${DOCLING_HOSTNAME:-}" + echo "Internal (from n8n): http://docling:5001" + echo + echo "VLM Pipeline (Vision Language Model):" + echo " 1. Load VLM model in Ollama via Open WebUI -> Settings -> Models" + echo " Example: granite3.2-vision:2b" + echo + echo " 2. API request with VLM pipeline:" + echo ' curl -X POST "https://'"${DOCLING_HOSTNAME:-}"'/v1/convert/source" \' + echo ' -H "Content-Type: application/json" \' + echo ' -u "'"${DOCLING_USERNAME:-}"':'"${DOCLING_PASSWORD:-}"'" \' + echo " -d '{" + echo ' "source": "https://arxiv.org/pdf/2501.17887",' + echo ' "options": {' + echo ' "pipeline": "vlm",' + echo ' "vlm_pipeline_model_api": {' + echo ' "url": "http://ollama:11434/v1/chat/completions",' + echo ' "params": {"model": "granite3.2-vision:2b"},' + echo ' "prompt": "Convert this page to docling.",' + echo ' "timeout": 300' + echo " }" + echo " }" + echo " }'" fi if is_profile_active "gotenberg"; then