From c846f2ae3150fa9c130608f04a295b7dccdabc99 Mon Sep 17 00:00:00 2001 From: Yury Kossakovsky Date: Wed, 27 Aug 2025 13:46:02 -0600 Subject: [PATCH] Add PaddleOCR service configuration - Added PaddleOCR hostname to .env.example for environment variable setup. - Updated Caddyfile to include reverse proxy configuration for PaddleOCR. - Modified docker-compose.yml to define PaddleOCR service with necessary commands and health checks. - Enhanced service selection wizard to include PaddleOCR in the options. - Updated final report script to display PaddleOCR service details if active. --- .env.example | 1 + Caddyfile | 5 +++++ docker-compose.yml | 20 ++++++++++++++++++++ scripts/04_wizard.sh | 3 ++- scripts/06_final_report.sh | 9 +++++++++ 5 files changed, 37 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 068c510..970bd8f 100644 --- a/.env.example +++ b/.env.example @@ -147,6 +147,7 @@ PORTAINER_HOSTNAME=portainer.yourdomain.com POSTIZ_HOSTNAME=postiz.yourdomain.com LETTA_HOSTNAME=letta.yourdomain.com QDRANT_HOSTNAME=qdrant.yourdomain.com +PADDLEOCR_HOSTNAME=paddleocr.yourdomain.com COMFYUI_HOSTNAME=comfyui.yourdomain.com RAGAPP_HOSTNAME=ragapp.yourdomain.com LETSENCRYPT_EMAIL= diff --git a/Caddyfile b/Caddyfile index 68b11e4..da48f67 100644 --- a/Caddyfile +++ b/Caddyfile @@ -104,6 +104,11 @@ https://{$NEO4J_HOSTNAME}:7687 { reverse_proxy neo4j:7687 } +# PaddleOCR (PaddleX Basic Serving) +{$PADDLEOCR_HOSTNAME} { + reverse_proxy paddleocr:8080 +} + import /etc/caddy/addons/*.conf # # SearXNG diff --git a/docker-compose.yml b/docker-compose.yml index 705dbdc..4ff7d0e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -217,6 +217,7 @@ services: - PROMETHEUS_PASSWORD_HASH=${PROMETHEUS_PASSWORD_HASH} - PROMETHEUS_USERNAME=${PROMETHEUS_USERNAME} - QDRANT_HOSTNAME=${QDRANT_HOSTNAME} + - PADDLEOCR_HOSTNAME=${PADDLEOCR_HOSTNAME} - RAGAPP_HOSTNAME=${RAGAPP_HOSTNAME} - RAGAPP_PASSWORD_HASH=${RAGAPP_PASSWORD_HASH} - RAGAPP_USERNAME=${RAGAPP_USERNAME} @@ -697,3 +698,22 @@ services: command: /bin/sh -c 'if [ -f /app/requirements.txt ]; then python -m pip install --no-cache-dir -r /app/requirements.txt; fi; python /app/main.py' volumes: - ./python-runner:/app + + paddleocr: + image: paddlepaddle/paddle:latest + container_name: paddleocr + profiles: ["paddleocr"] + restart: unless-stopped + command: /bin/sh -c "set -e; \ + apt-get update && apt-get install -y --no-install-recommends git wget && rm -rf /var/lib/apt/lists/*; \ + python -m pip install --upgrade pip; \ + pip install paddlex; \ + paddlex --install serving; \ + exec paddlex --serve --pipeline OCR --device cpu --port 8080" + healthcheck: + test: ["CMD-SHELL", "wget -qO- http://localhost:8080 > /dev/null || exit 1"] + interval: 30s + timeout: 10s + retries: 5 + expose: + - "8080" diff --git a/scripts/04_wizard.sh b/scripts/04_wizard.sh index a802141..cced2f8 100755 --- a/scripts/04_wizard.sh +++ b/scripts/04_wizard.sh @@ -64,6 +64,7 @@ base_services_data=( "neo4j" "Neo4j (Graph Database)" "letta" "Letta (Agent Server & SDK)" "gotenberg" "Gotenberg (Document Conversion API)" + "paddleocr" "PaddleOCR (OCR API Server)" "crawl4ai" "Crawl4ai (Web Crawler for AI)" "ragapp" "RAGApp (Open-source RAG UI + API)" "open-webui" "Open WebUI (ChatGPT-like Interface)" @@ -105,7 +106,7 @@ done # Use whiptail to display the checklist CHOICES=$(whiptail --title "Service Selection Wizard" --checklist \ - "Choose the services you want to deploy.\nUse ARROW KEYS to navigate, SPACEBAR to select/deselect, ENTER to confirm." 32 90 21 \ + "Choose the services you want to deploy.\nUse ARROW KEYS to navigate, SPACEBAR to select/deselect, ENTER to confirm." 32 90 22 \ "${services[@]}" \ 3>&1 1>&2 2>&3) diff --git a/scripts/06_final_report.sh b/scripts/06_final_report.sh index bc05644..4df5943 100755 --- a/scripts/06_final_report.sh +++ b/scripts/06_final_report.sh @@ -195,6 +195,15 @@ if is_profile_active "gotenberg"; then echo " Office to PDF: POST /forms/libreoffice/convert" fi +if is_profile_active "paddleocr"; then + echo + echo "================================= PaddleOCR ===========================" + echo + echo "Host: ${PADDLEOCR_HOSTNAME:-}" + echo "Internal Access (HTTP): http://paddleocr:8080" + echo "Notes: PaddleX Basic Serving (CPU), pipeline=OCR" +fi + if is_profile_active "python-runner"; then echo echo "================================= Python Runner ========================"