From c343d2c1e9188d45ebe2c77a0368ed1013d59a64 Mon Sep 17 00:00:00 2001
From: Yury Kossakovsky <kossakovsky93@gmail.com>
Date: Sat, 24 May 2025 09:41:14 -0600
Subject: [PATCH] Add Weaviate service configuration and update documentation

- Introduced Weaviate service in docker-compose.yml with necessary environment variables and health checks.
- Updated Caddyfile to include reverse proxy settings for Weaviate with basic authentication.
- Enhanced README.md to document Weaviate's integration and access details.
- Modified scripts to generate secrets for Weaviate, including username and password handling.
- Updated final report script to display Weaviate configuration details for user clarity.
---
 Caddyfile                      |  8 ++++++++
 README.md                      |  5 ++++-
 docker-compose.yml             | 34 ++++++++++++++++++++++++++++++++++
 scripts/03_generate_secrets.sh | 23 +++++++++++++++++++++--
 scripts/04_wizard.sh           |  5 +++--
 scripts/06_final_report.sh     | 11 +++++++++++
 6 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/Caddyfile b/Caddyfile
index f7d0fe2..dc8b221 100644
--- a/Caddyfile
+++ b/Caddyfile
@@ -40,6 +40,14 @@
     reverse_proxy letta:8283
 }
 
+# Weaviate
+{$WEAVIATE_HOSTNAME} {
+    basic_auth {
+        {$WEAVIATE_USERNAME} {$WEAVIATE_PASSWORD_HASH}
+    }
+    reverse_proxy weaviate:8080
+}
+
 # Prometheus
 {$PROMETHEUS_HOSTNAME} {
     basic_auth { 
diff --git a/README.md b/README.md
index cd8f6f3..e6d478d 100644
--- a/README.md
+++ b/README.md
@@ -42,6 +42,8 @@ The installer also makes the following powerful open-source tools **available fo
 
 ✅ [**Letta**](https://docs.letta.com/) - An open-source agent server and SDK that can be connected to various LLM API backends (OpenAI, Anthropic, Ollama, etc.), enabling you to build and manage AI agents.
 
+✅ [**Weaviate**](https://weaviate.io/) - An open-source AI-native vector database with a focus on scalability and ease of use. It can be used for RAG, hybrid search, and more.
+
 ✅ [**Ollama**](https://ollama.com/) - Run Llama 3, Mistral, Gemma, and other large language models locally.
 
 ✅ [**Prometheus**](https://prometheus.io/) - An open-source monitoring and alerting toolkit to keep an eye on system health.
@@ -114,11 +116,12 @@ The services will be available at the following addresses (replace `yourdomain.c
 - **Supabase (Dashboard):** `supabase.yourdomain.com`
 - **Langfuse:** `langfuse.yourdomain.com`
 - **Letta:** `letta.yourdomain.com`
+- **Weaviate:** `weaviate.yourdomain.com`
 - **Grafana:** `grafana.yourdomain.com`
 - **SearXNG:** `searxng.yourdomain.com`
 - **Prometheus:** `prometheus.yourdomain.com`
 
-With your n8n instance, you'll have access to over 400 integrations and powerful AI tools to build automated workflows. You can connect n8n to Qdrant or Supabase to store and retrieve information for your AI tasks. If you wish to use large language models (LLMs), you can easily configure them within n8n, assuming you have access to an LLM service.
+With your n8n instance, you'll have access to over 400 integrations and powerful AI tools to build automated workflows. You can connect n8n to Qdrant, Supabase, or Weaviate to store and retrieve information for your AI tasks. If you wish to use large language models (LLMs), you can easily configure them within n8n, assuming you have access to an LLM service.
 
 ## Upgrading
 
diff --git a/docker-compose.yml b/docker-compose.yml
index b189a85..dcd0b4c 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -14,6 +14,7 @@ volumes:
   grafana:
   prometheus_data:
   letta_data:
+  weaviate_data:
 
 x-n8n: &service-n8n
   image: n8nio/n8n:latest
@@ -487,3 +488,36 @@ services:
       LETTA_SERVER_PASSWORD: ${LETTA_SERVER_PASSWORD:-}
     extra_hosts:
       - "host.docker.internal:host-gateway"
+
+  weaviate:
+    image: cr.weaviate.io/semitechnologies/weaviate:latest
+    container_name: weaviate
+    profiles: ["weaviate"]
+    restart: unless-stopped
+    ports:
+      - "8088:8080" # Internal port 8080, mapping to 8088 to avoid conflict if other services use 8080 on host
+      - "50059:50051" # Internal gRPC port 50051, mapping to 50059
+    volumes:
+      - weaviate_data:/var/lib/weaviate
+    environment:
+      QUERY_DEFAULTS_LIMIT: 25
+      AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: "false"
+      AUTHENTICATION_APIKEY_ENABLED: "true"
+      AUTHENTICATION_APIKEY_ALLOWED_KEYS: ${WEAVIATE_API_KEY}
+      AUTHENTICATION_APIKEY_USERS: ${WEAVIATE_USERNAME} # Or a generic user like 'admin'
+      AUTHORIZATION_ADMINLIST_ENABLED: "true" # Explicitly enable Admin List
+      AUTHORIZATION_ADMINLIST_USERS: ${WEAVIATE_USERNAME} # Grant admin rights to the API key user
+      PERSISTENCE_DATA_PATH: "/var/lib/weaviate"
+      ENABLE_API_BASED_MODULES: "true" # To allow integrations like OpenAI, Cohere etc.
+      CLUSTER_HOSTNAME: "node1" # Recommended for single node to avoid issues if hostname changes
+      DEFAULT_VECTORIZER_MODULE: "none" # Explicitly set to none, can be changed by user later
+    healthcheck:
+      test:
+        [
+          "CMD-SHELL",
+          "wget -q --spider http://localhost:8080/v1/.well-known/ready || exit 1",
+        ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 10s
diff --git a/scripts/03_generate_secrets.sh b/scripts/03_generate_secrets.sh
index c91f6bd..1fefec9 100755
--- a/scripts/03_generate_secrets.sh
+++ b/scripts/03_generate_secrets.sh
@@ -45,6 +45,8 @@ declare -A VARS_TO_GENERATE=(
     ["LANGFUSE_INIT_USER_PASSWORD"]="password:32"
     ["LANGFUSE_INIT_PROJECT_PUBLIC_KEY"]="langfuse_pk:32"
     ["LANGFUSE_INIT_PROJECT_SECRET_KEY"]="langfuse_sk:32"
+    ["WEAVIATE_PASSWORD"]="password:32" # Password for Caddy basic auth
+    ["WEAVIATE_API_KEY"]="secret:48" # API Key for Weaviate service (36 bytes -> 48 chars base64)
 )
 
 # Check if .env file already exists
@@ -311,6 +313,7 @@ generated_values["SEARXNG_USERNAME"]="$USER_EMAIL"
 generated_values["LANGFUSE_INIT_USER_EMAIL"]="$USER_EMAIL"
 generated_values["N8N_WORKER_COUNT"]="$N8N_WORKER_COUNT"
 generated_values["N8N_WORKFLOWS_IMPORTED_EVER"]="$N8N_WORKFLOWS_IMPORTED_EVER_VALUE"
+generated_values["WEAVIATE_USERNAME"]="$USER_EMAIL" # Set Weaviate username for Caddy
 if [[ -n "$OPENAI_API_KEY" ]]; then
     generated_values["OPENAI_API_KEY"]="$OPENAI_API_KEY"
 fi
@@ -332,6 +335,7 @@ found_vars["OPENAI_API_KEY"]=0
 found_vars["LANGFUSE_INIT_USER_EMAIL"]=0
 found_vars["N8N_WORKER_COUNT"]=0
 found_vars["N8N_WORKFLOWS_IMPORTED_EVER"]=0
+found_vars["WEAVIATE_USERNAME"]=0
 
 # Read template, substitute domain, generate initial values
 while IFS= read -r line || [[ -n "$line" ]]; do
@@ -377,7 +381,7 @@ while IFS= read -r line || [[ -n "$line" ]]; do
             # This 'else' block is for lines from template not covered by existing values or VARS_TO_GENERATE.
             # Check if it is one of the user input vars - these are handled by found_vars later if not in template.
             is_user_input_var=0 # Reset for each line
-            user_input_vars=("FLOWISE_USERNAME" "DASHBOARD_USERNAME" "LETSENCRYPT_EMAIL" "RUN_N8N_IMPORT" "PROMETHEUS_USERNAME" "SEARXNG_USERNAME" "OPENAI_API_KEY" "LANGFUSE_INIT_USER_EMAIL" "N8N_WORKER_COUNT" "N8N_WORKFLOWS_IMPORTED_EVER")
+            user_input_vars=("FLOWISE_USERNAME" "DASHBOARD_USERNAME" "LETSENCRYPT_EMAIL" "RUN_N8N_IMPORT" "PROMETHEUS_USERNAME" "SEARXNG_USERNAME" "OPENAI_API_KEY" "LANGFUSE_INIT_USER_EMAIL" "N8N_WORKER_COUNT" "N8N_WORKFLOWS_IMPORTED_EVER" "WEAVIATE_USERNAME")
             for uivar in "${user_input_vars[@]}"; do
                 if [[ "$varName" == "$uivar" ]]; then
                     is_user_input_var=1
@@ -465,7 +469,7 @@ else
 fi
 
 # Add any custom variables that weren't found in the template
-for var in "FLOWISE_USERNAME" "DASHBOARD_USERNAME" "LETSENCRYPT_EMAIL" "RUN_N8N_IMPORT" "OPENAI_API_KEY" "PROMETHEUS_USERNAME" "SEARXNG_USERNAME" "LANGFUSE_INIT_USER_EMAIL" "N8N_WORKER_COUNT" "N8N_WORKFLOWS_IMPORTED_EVER"; do
+for var in "FLOWISE_USERNAME" "DASHBOARD_USERNAME" "LETSENCRYPT_EMAIL" "RUN_N8N_IMPORT" "OPENAI_API_KEY" "PROMETHEUS_USERNAME" "SEARXNG_USERNAME" "LANGFUSE_INIT_USER_EMAIL" "N8N_WORKER_COUNT" "N8N_WORKFLOWS_IMPORTED_EVER" "WEAVIATE_USERNAME"; do
     if [[ ${found_vars["$var"]} -eq 0 && -v generated_values["$var"] ]]; then
         # Before appending, check if it's already in TMP_ENV_FILE to avoid duplicates
         if ! grep -q -E "^${var}=" "$TMP_ENV_FILE"; then
@@ -548,6 +552,7 @@ done
 # Hash passwords using caddy with bcrypt
 PROMETHEUS_PLAIN_PASS="${generated_values["PROMETHEUS_PASSWORD"]}"
 SEARXNG_PLAIN_PASS="${generated_values["SEARXNG_PASSWORD"]}"
+WEAVIATE_PLAIN_PASS="${generated_values["WEAVIATE_PASSWORD"]}"
 
 if [[ -n "${generated_values[PROMETHEUS_PASSWORD_HASH]}" ]]; then
     log_info "PROMETHEUS_PASSWORD_HASH already exists. Skipping re-hashing."
@@ -577,6 +582,20 @@ else
     log_warning "SearXNG password was not generated or found, skipping hash."
 fi
 
+if [[ -n "${generated_values[WEAVIATE_PASSWORD_HASH]}" ]]; then
+    log_info "WEAVIATE_PASSWORD_HASH already exists. Skipping re-hashing."
+elif [[ -n "$WEAVIATE_PLAIN_PASS" ]]; then
+    WEAVIATE_HASH=$(caddy hash-password --algorithm bcrypt --plaintext "$WEAVIATE_PLAIN_PASS" 2>/dev/null)
+    if [[ $? -eq 0 && -n "$WEAVIATE_HASH" ]]; then
+        echo "WEAVIATE_PASSWORD_HASH='$WEAVIATE_HASH'" >> "$OUTPUT_FILE"
+        generated_values["WEAVIATE_PASSWORD_HASH"]="$WEAVIATE_HASH"
+    else
+        log_warning "Failed to hash Weaviate password using caddy."
+    fi
+else
+    log_warning "Weaviate password was not generated or found, skipping hash."
+fi
+
 if [ $? -eq 0 ]; then
     log_success ".env file generated successfully in the project root ($OUTPUT_FILE)."
 else
diff --git a/scripts/04_wizard.sh b/scripts/04_wizard.sh
index 0372bda..73853ce 100755
--- a/scripts/04_wizard.sh
+++ b/scripts/04_wizard.sh
@@ -52,10 +52,11 @@ current_profiles_for_matching=",$CURRENT_PROFILES_VALUE,"
 base_services_data=(
     "n8n" "n8n, n8n-worker, n8n-import (Workflow Automation)"
     "flowise" "Flowise (AI Agent Builder)"
-    "monitoring" "Monitoring Suite (Prometheus, Grafana, cAdvisor, Node-Exporter)"
+    "langfuse" "Langfuse Suite (AI Observability - includes Clickhouse, Minio)"
     "qdrant" "Qdrant (Vector Database)"
     "supabase" "Supabase (Backend as a Service)"
-    "langfuse" "Langfuse Suite (AI Observability - includes Clickhouse, Minio)"
+    "weaviate" "Weaviate (Vector Database with API Key Auth)"
+    "monitoring" "Monitoring Suite (Prometheus, Grafana, cAdvisor, Node-Exporter)"
     "open-webui" "Open WebUI (ChatGPT-like Interface)"
     "searxng" "SearXNG (Private Metasearch Engine)"
     "crawl4ai" "Crawl4ai (Web Crawler for AI)"
diff --git a/scripts/06_final_report.sh b/scripts/06_final_report.sh
index 1daae0b..751f273 100755
--- a/scripts/06_final_report.sh
+++ b/scripts/06_final_report.sh
@@ -156,6 +156,17 @@ if is_profile_active "cpu" || is_profile_active "gpu-nvidia" || is_profile_activ
   echo "(Note: Ollama runs with the selected profile: cpu, gpu-nvidia, or gpu-amd)"
 fi
 
+if is_profile_active "weaviate"; then
+  echo
+  echo "================================= Weaviate ============================"
+  echo
+  echo "Host: ${WEAVIATE_HOSTNAME:-<hostname_not_set>}"
+  echo "User: ${WEAVIATE_USERNAME:-<not_set_in_env>}"
+  echo "Password: ${WEAVIATE_PASSWORD:-<not_set_in_env>}"
+  echo "Weaviate API Key: ${WEAVIATE_API_KEY:-<not_set_in_env>}"
+  echo "(Internal Weaviate Port: 8080, gRPC: 50051)"
+fi
+
 # Standalone PostgreSQL (used by n8n, Langfuse, etc.)
 # Check if n8n or langfuse is active, as they use this PostgreSQL instance.
 # The Supabase section already details its own internal Postgres.