From fba8f96b81600dad3e0ec7c95fa31fc92efcefe5 Mon Sep 17 00:00:00 2001
From: Yury Kossakovsky <kossakovsky93@gmail.com>
Date: Sat, 1 Nov 2025 11:57:24 -0600
Subject: [PATCH] Add LightRAG service configuration and credentials

- Introduced LightRAG hostname and credentials in .env.example for environment setup.
- Updated Caddyfile to include reverse proxy configuration for LightRAG service.
- Added LightRAG service definition in docker-compose.yml with necessary environment variables and volume mappings.
- Enhanced README.md to document LightRAG integration and access details.
- Updated scripts to generate LightRAG credentials and include them in the final report for user visibility.
---
 .env.example                   | 12 +++++
 Caddyfile                      |  5 ++
 README.md                      |  3 ++
 docker-compose.yml             | 93 ++++++++++++++++++++++++++++++++++
 scripts/03_generate_secrets.sh |  9 +++-
 scripts/04_wizard.sh           |  1 +
 scripts/07_final_report.sh     | 26 ++++++++++
 7 files changed, 147 insertions(+), 2 deletions(-)

diff --git a/.env.example b/.env.example
index 609f9ad..d6045fa 100644
--- a/.env.example
+++ b/.env.example
@@ -147,6 +147,7 @@ FLOWISE_HOSTNAME=flowise.yourdomain.com
 GRAFANA_HOSTNAME=grafana.yourdomain.com
 LANGFUSE_HOSTNAME=langfuse.yourdomain.com
 LETTA_HOSTNAME=letta.yourdomain.com
+LIGHTRAG_HOSTNAME=lightrag.yourdomain.com
 LT_HOSTNAME=translate.yourdomain.com
 N8N_HOSTNAME=n8n.yourdomain.com
 NEO4J_HOSTNAME=neo4j.yourdomain.com
@@ -206,6 +207,17 @@ PADDLEOCR_PASSWORD_HASH=
 RAGAPP_USERNAME=
 RAGAPP_PASSWORD=
 
+############
+# [required]
+# LightRAG credentials (for built-in authentication)
+# Username and password for web interface login
+# API key for programmatic access to the API
+############
+
+LIGHTRAG_USERNAME=
+LIGHTRAG_PASSWORD=
+LIGHTRAG_API_KEY=
+
    #
    #
 #######
diff --git a/Caddyfile b/Caddyfile
index 6e8d3ea..4ba201c 100644
--- a/Caddyfile
+++ b/Caddyfile
@@ -91,6 +91,11 @@
     reverse_proxy letta:8283
 }
 
+# LightRAG (Graph-based RAG with Knowledge Extraction)
+{$LIGHTRAG_HOSTNAME} {
+    reverse_proxy lightrag:9621
+}
+
 # Weaviate
 {$WEAVIATE_HOSTNAME} {
     reverse_proxy weaviate:8080
diff --git a/README.md b/README.md
index 1c614db..a1863c8 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,8 @@ The installer also makes the following powerful open-source tools **available fo
 
 ✅ [**Letta**](https://docs.letta.com/) - An open-source agent server and SDK that can be connected to various LLM API backends (OpenAI, Anthropic, Ollama, etc.), enabling you to build and manage AI agents.
 
+✅ [**LightRAG**](https://github.com/HKUDS/LightRAG) - A simple and fast graph-based Retrieval-Augmented Generation system with automatic knowledge graph extraction, dual-level retrieval mechanisms, and incremental updates. Supports multiple storage backends (PostgreSQL, Neo4j, JSON) and embedding models.
+
 ✅ [**LibreTranslate**](https://docs.libretranslate.com/) - Self-hosted translation API (50+ languages).
 
 ✅ [**Neo4j**](https://neo4j.com/) - A graph database management system that allows you to model, store, and query data as a network of nodes and relationships.
@@ -143,6 +145,7 @@ After successful installation, your services are up and running! Here's how to g
     - **Flowise:** `flowise.yourdomain.com` (Log in with the email address you provided during installation and the initial password from the summary report.)
     - **Grafana:** `grafana.yourdomain.com`
     - **Langfuse:** `langfuse.yourdomain.com`
+    - **LightRAG:** `lightrag.yourdomain.com`
     - **Letta:** `letta.yourdomain.com`
     - **LibreTranslate:** `translate.yourdomain.com`
     - **Neo4j:** `neo4j.yourdomain.com`
diff --git a/docker-compose.yml b/docker-compose.yml
index b0e8bb9..ed3b8b8 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -10,6 +10,8 @@ volumes:
   langfuse_postgres_data:
   letta_data:
   libretranslate_api_keys:
+  lightrag_data:
+  lightrag_inputs:
   libretranslate_models:
   n8n_storage:
   ollama_storage:
@@ -233,6 +235,7 @@ services:
       - LANGFUSE_HOSTNAME=${LANGFUSE_HOSTNAME}
       - LETSENCRYPT_EMAIL=${LETSENCRYPT_EMAIL:-internal}
       - LETTA_HOSTNAME=${LETTA_HOSTNAME}
+      - LIGHTRAG_HOSTNAME=${LIGHTRAG_HOSTNAME}
       - LT_HOSTNAME=${LT_HOSTNAME}
       - LT_USERNAME=${LT_USERNAME}
       - LT_PASSWORD_HASH=${LT_PASSWORD_HASH}
@@ -921,3 +924,93 @@ services:
       nofile:
         soft: 500000
         hard: 500000
+
+  lightrag:
+    image: ghcr.io/hkuds/lightrag:latest
+    container_name: lightrag
+    profiles: ["lightrag"]
+    restart: unless-stopped
+    environment:
+      # Server Configuration
+      - HOST=0.0.0.0
+      - PORT=9621
+      - WEBUI_TITLE=LightRAG Knowledge Graph
+      - WEBUI_DESCRIPTION=Graph-based RAG with Knowledge Extraction
+
+      # Authentication (Built-in)
+      - AUTH_ACCOUNTS=${LIGHTRAG_USERNAME}:${LIGHTRAG_PASSWORD}
+      - LIGHTRAG_API_KEY=${LIGHTRAG_API_KEY}
+
+      # LLM Configuration (Ollama)
+      - LLM_BINDING=ollama
+      - LLM_MODEL=qwen2.5:32b
+      - LLM_BINDING_HOST=http://ollama:11434
+      - OLLAMA_LLM_NUM_CTX=32768
+      - MAX_ASYNC=4
+
+      # Embedding Configuration (Ollama)
+      - EMBEDDING_BINDING=ollama
+      - EMBEDDING_MODEL=bge-m3:latest
+      - EMBEDDING_DIM=1024
+      - EMBEDDING_BINDING_HOST=http://ollama:11434
+      - OLLAMA_EMBEDDING_NUM_CTX=8192
+      - EMBEDDING_FUNC_MAX_ASYNC=16
+      - EMBEDDING_BATCH_NUM=32
+
+      # Query Configuration
+      - ENABLE_LLM_CACHE=true
+      - TOP_K=60
+      - CHUNK_TOP_K=20
+      - COSINE_THRESHOLD=0.2
+      - MAX_ENTITY_TOKENS=6000
+      - MAX_RELATION_TOKENS=8000
+      - MAX_TOTAL_TOKENS=30000
+
+      # Document Processing
+      - ENABLE_LLM_CACHE_FOR_EXTRACT=true
+      - SUMMARY_LANGUAGE=English
+      - CHUNK_SIZE=1200
+      - CHUNK_OVERLAP_SIZE=100
+      - SUMMARY_MAX_TOKENS=500
+      - SUMMARY_CONTEXT_SIZE=10000
+
+      # Storage Configuration (Flexible - uses PostgreSQL/Neo4j if available)
+      - LIGHTRAG_KV_STORAGE=JsonKVStorage
+      - LIGHTRAG_DOC_STATUS_STORAGE=JsonDocStatusStorage
+      - LIGHTRAG_GRAPH_STORAGE=${LIGHTRAG_GRAPH_STORAGE:-NetworkXStorage}
+      - LIGHTRAG_VECTOR_STORAGE=${LIGHTRAG_VECTOR_STORAGE:-NanoVectorDBStorage}
+
+      # PostgreSQL Configuration (if using PostgreSQL storage)
+      - POSTGRES_HOST=postgres
+      - POSTGRES_PORT=5432
+      - POSTGRES_USER=postgres
+      - POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
+      - POSTGRES_DATABASE=postgres
+      - POSTGRES_MAX_CONNECTIONS=12
+      - POSTGRES_VECTOR_INDEX_TYPE=HNSW
+      - POSTGRES_HNSW_M=16
+      - POSTGRES_HNSW_EF=200
+
+      # Neo4j Configuration (if using Neo4j storage)
+      - NEO4J_URI=bolt://neo4j:7687
+      - NEO4J_USERNAME=${NEO4J_AUTH_USERNAME:-neo4j}
+      - NEO4J_PASSWORD=${NEO4J_AUTH_PASSWORD}
+      - NEO4J_DATABASE=neo4j
+
+      # Directories
+      - INPUT_DIR=/app/data/inputs
+      - WORKING_DIR=/app/data/rag_storage
+
+    volumes:
+      - lightrag_data:/app/data/rag_storage
+      - lightrag_inputs:/app/data/inputs
+
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+
+    healthcheck:
+      test: ["CMD-SHELL", "wget -qO- http://localhost:9621/health > /dev/null 2>&1 || exit 1"]
+      interval: 30s
+      timeout: 10s
+      retries: 5
+      start_period: 60s
diff --git a/scripts/03_generate_secrets.sh b/scripts/03_generate_secrets.sh
index c6a7583..e6b09d6 100644
--- a/scripts/03_generate_secrets.sh
+++ b/scripts/03_generate_secrets.sh
@@ -62,6 +62,9 @@ declare -A VARS_TO_GENERATE=(
     ["RAGFLOW_MYSQL_ROOT_PASSWORD"]="password:32"
     ["RAGFLOW_MINIO_ROOT_PASSWORD"]="password:32"
     ["RAGFLOW_REDIS_PASSWORD"]="password:32"
+    # LightRAG credentials
+    ["LIGHTRAG_PASSWORD"]="password:32"
+    ["LIGHTRAG_API_KEY"]="secret:48"
 )
 
 # Initialize existing_env_vars and attempt to read .env if it exists
@@ -267,6 +270,7 @@ generated_values["COMFYUI_USERNAME"]="$USER_EMAIL" # Set ComfyUI username for Ca
 generated_values["RAGAPP_USERNAME"]="$USER_EMAIL" # Set RAGApp username for Caddy
 generated_values["PADDLEOCR_USERNAME"]="$USER_EMAIL" # Set PaddleOCR username for Caddy
 generated_values["LT_USERNAME"]="$USER_EMAIL" # Set LibreTranslate username for Caddy
+generated_values["LIGHTRAG_USERNAME"]="$USER_EMAIL" # Set LightRAG username for built-in auth
 generated_values["WAHA_DASHBOARD_USERNAME"]="$USER_EMAIL" # WAHA dashboard username default
 generated_values["WHATSAPP_SWAGGER_USERNAME"]="$USER_EMAIL" # WAHA swagger username default
 
@@ -293,6 +297,7 @@ found_vars["COMFYUI_USERNAME"]=0
 found_vars["RAGAPP_USERNAME"]=0
 found_vars["PADDLEOCR_USERNAME"]=0
 found_vars["LT_USERNAME"]=0
+found_vars["LIGHTRAG_USERNAME"]=0
 found_vars["WAHA_DASHBOARD_USERNAME"]=0
 found_vars["WHATSAPP_SWAGGER_USERNAME"]=0
 
@@ -341,7 +346,7 @@ while IFS= read -r line || [[ -n "$line" ]]; do
             # This 'else' block is for lines from template not covered by existing values or VARS_TO_GENERATE.
             # Check if it is one of the user input vars - these are handled by found_vars later if not in template.
             is_user_input_var=0 # Reset for each line
-    user_input_vars=("FLOWISE_USERNAME" "DASHBOARD_USERNAME" "LETSENCRYPT_EMAIL" "RUN_N8N_IMPORT" "PROMETHEUS_USERNAME" "SEARXNG_USERNAME" "OPENAI_API_KEY" "LANGFUSE_INIT_USER_EMAIL" "N8N_WORKER_COUNT" "WEAVIATE_USERNAME" "NEO4J_AUTH_USERNAME" "COMFYUI_USERNAME" "RAGAPP_USERNAME" "PADDLEOCR_USERNAME" "LT_USERNAME" "WAHA_DASHBOARD_USERNAME" "WHATSAPP_SWAGGER_USERNAME")
+    user_input_vars=("FLOWISE_USERNAME" "DASHBOARD_USERNAME" "LETSENCRYPT_EMAIL" "RUN_N8N_IMPORT" "PROMETHEUS_USERNAME" "SEARXNG_USERNAME" "OPENAI_API_KEY" "LANGFUSE_INIT_USER_EMAIL" "N8N_WORKER_COUNT" "WEAVIATE_USERNAME" "NEO4J_AUTH_USERNAME" "COMFYUI_USERNAME" "RAGAPP_USERNAME" "PADDLEOCR_USERNAME" "LT_USERNAME" "LIGHTRAG_USERNAME" "WAHA_DASHBOARD_USERNAME" "WHATSAPP_SWAGGER_USERNAME")
             for uivar in "${user_input_vars[@]}"; do
                 if [[ "$varName" == "$uivar" ]]; then
                     is_user_input_var=1
@@ -423,7 +428,7 @@ if [[ -z "${generated_values[SERVICE_ROLE_KEY]}" ]]; then
 fi
 
 # Add any custom variables that weren't found in the template
-for var in "FLOWISE_USERNAME" "DASHBOARD_USERNAME" "LETSENCRYPT_EMAIL" "RUN_N8N_IMPORT" "OPENAI_API_KEY" "PROMETHEUS_USERNAME" "SEARXNG_USERNAME" "LANGFUSE_INIT_USER_EMAIL" "N8N_WORKER_COUNT" "WEAVIATE_USERNAME" "NEO4J_AUTH_USERNAME" "COMFYUI_USERNAME" "RAGAPP_USERNAME" "PADDLEOCR_USERNAME" "LT_USERNAME" "WAHA_DASHBOARD_USERNAME" "WHATSAPP_SWAGGER_USERNAME"; do
+for var in "FLOWISE_USERNAME" "DASHBOARD_USERNAME" "LETSENCRYPT_EMAIL" "RUN_N8N_IMPORT" "OPENAI_API_KEY" "PROMETHEUS_USERNAME" "SEARXNG_USERNAME" "LANGFUSE_INIT_USER_EMAIL" "N8N_WORKER_COUNT" "WEAVIATE_USERNAME" "NEO4J_AUTH_USERNAME" "COMFYUI_USERNAME" "RAGAPP_USERNAME" "PADDLEOCR_USERNAME" "LT_USERNAME" "LIGHTRAG_USERNAME" "WAHA_DASHBOARD_USERNAME" "WHATSAPP_SWAGGER_USERNAME"; do
     if [[ ${found_vars["$var"]} -eq 0 && -v generated_values["$var"] ]]; then
         # Before appending, check if it's already in TMP_ENV_FILE to avoid duplicates
         if ! grep -q -E "^${var}=" "$TMP_ENV_FILE"; then
diff --git a/scripts/04_wizard.sh b/scripts/04_wizard.sh
index eba5019..9414101 100755
--- a/scripts/04_wizard.sh
+++ b/scripts/04_wizard.sh
@@ -58,6 +58,7 @@ base_services_data=(
     "gotenberg" "Gotenberg (Document Conversion API)"
     "langfuse" "Langfuse Suite (AI Observability - includes Clickhouse, Minio)"
     "letta" "Letta (Agent Server & SDK)"
+    "lightrag" "LightRAG (Graph-based RAG with knowledge graphs and incremental updates)"
     "libretranslate" "LibreTranslate (Self-hosted translation API - 50+ languages)"
     "monitoring" "Monitoring Suite (Prometheus, Grafana, cAdvisor, Node-Exporter)"
     "n8n" "n8n, n8n-worker, n8n-import (Workflow Automation)"
diff --git a/scripts/07_final_report.sh b/scripts/07_final_report.sh
index c4eb614..d428195 100644
--- a/scripts/07_final_report.sh
+++ b/scripts/07_final_report.sh
@@ -292,6 +292,32 @@ if is_profile_active "letta"; then
   echo "Authorization: Bearer ${LETTA_SERVER_PASSWORD}"
 fi
 
+if is_profile_active "lightrag"; then
+  echo
+  echo "================================= LightRAG ============================="
+  echo
+  echo "Host: ${LIGHTRAG_HOSTNAME:-<hostname_not_set>}"
+  echo "Web UI: https://${LIGHTRAG_HOSTNAME:-<hostname_not_set>}"
+  echo "Internal Access (e.g., from n8n): http://lightrag:9621"
+  echo ""
+  echo "Authentication (Web UI):"
+  echo "  User: ${LIGHTRAG_USERNAME:-<not_set_in_env>}"
+  echo "  Password: ${LIGHTRAG_PASSWORD:-<not_set_in_env>}"
+  echo ""
+  echo "API Access:"
+  echo "  API Key: ${LIGHTRAG_API_KEY:-<not_set_in_env>}"
+  echo "  API Docs: https://${LIGHTRAG_HOSTNAME:-<hostname_not_set>}/docs"
+  echo "  Ollama-compatible: https://${LIGHTRAG_HOSTNAME:-<hostname_not_set>}/v1/chat/completions"
+  echo ""
+  echo "Configuration:"
+  echo "  LLM: Ollama (qwen2.5:32b) at http://ollama:11434"
+  echo "  Embeddings: Ollama (bge-m3:latest) at http://ollama:11434"
+  echo "  Storage: Flexible (JSON/PostgreSQL/Neo4j based on installed services)"
+  echo ""
+  echo "Note: Requires Ollama to be installed and running for LLM and embeddings."
+  echo "      Upload documents via /app/data/inputs volume or Web UI."
+fi
+
 if is_profile_active "cpu" || is_profile_active "gpu-nvidia" || is_profile_active "gpu-amd"; then
   echo
   echo "================================= Ollama =============================="