From e7d2af2405e0a28ddc523332c90add75713659e0 Mon Sep 17 00:00:00 2001 From: Pavel <32868631+pabik@users.noreply.github.com> Date: Thu, 22 Jan 2026 10:21:01 +0000 Subject: [PATCH] Setup plus env fixes (#2265) * fixes setup scripts fixes to env handling in setup script plus other minor fixes * Remove var declarations Declarations such as `LLM_PROVIDER=$LLM_PROVIDER` override .env variables in compose Similar issue is present in the frontend - need to choose either to switch to separate frontend env or keep as is. * Manage apikeys in settings 1. More pydantic management of api keys. 2. Clean up of variable declarations from docker compose files, used to block .env imports. Now should be managed ether by settings.py defaults or .env --- application/core/model_settings.py | 86 +++++++++++++++++----------- application/core/settings.py | 31 ++++++++++ deployment/docker-compose-azure.yaml | 20 ++----- deployment/docker-compose-hub.yaml | 17 ++---- deployment/docker-compose.yaml | 15 +---- setup.ps1 | 24 ++++---- setup.sh | 54 ++++++++--------- 7 files changed, 138 insertions(+), 109 deletions(-) diff --git a/application/core/model_settings.py b/application/core/model_settings.py index 206589f6..475045d1 100644 --- a/application/core/model_settings.py +++ b/application/core/model_settings.py @@ -88,8 +88,10 @@ class ModelRegistry: # Skip DocsGPT model if using custom OpenAI-compatible endpoint if not settings.OPENAI_BASE_URL: self._add_docsgpt_models(settings) - if settings.OPENAI_API_KEY or ( - settings.LLM_PROVIDER == "openai" and settings.API_KEY + if ( + settings.OPENAI_API_KEY + or (settings.LLM_PROVIDER == "openai" and settings.API_KEY) + or settings.OPENAI_BASE_URL ): self._add_openai_models(settings) if settings.OPENAI_API_BASE or ( @@ -117,15 +119,26 @@ class ModelRegistry: ): self._add_huggingface_models(settings) # Default model selection - - if settings.LLM_NAME and settings.LLM_NAME in self.models: - self.default_model_id = settings.LLM_NAME - elif settings.LLM_PROVIDER and settings.API_KEY: - for model_id, model in self.models.items(): - if model.provider.value == settings.LLM_PROVIDER: - self.default_model_id = model_id + if settings.LLM_NAME: + # Parse LLM_NAME (may be comma-separated) + model_names = self._parse_model_names(settings.LLM_NAME) + # First model in the list becomes default + for model_name in model_names: + if model_name in self.models: + self.default_model_id = model_name break - else: + # Backward compat: try exact match if no parsed model found + if not self.default_model_id and settings.LLM_NAME in self.models: + self.default_model_id = settings.LLM_NAME + + if not self.default_model_id: + if settings.LLM_PROVIDER and settings.API_KEY: + for model_id, model in self.models.items(): + if model.provider.value == settings.LLM_PROVIDER: + self.default_model_id = model_id + break + + if not self.default_model_id and self.models: self.default_model_id = next(iter(self.models.keys())) logger.info( f"ModelRegistry loaded {len(self.models)} models, default: {self.default_model_id}" @@ -137,29 +150,29 @@ class ModelRegistry: create_custom_openai_model, ) - # Add standard OpenAI models if API key is present - if settings.OPENAI_API_KEY: - for model in OPENAI_MODELS: - self.models[model.id] = model + # Check if using local OpenAI-compatible endpoint (Ollama, LM Studio, etc.) + using_local_endpoint = bool( + settings.OPENAI_BASE_URL and settings.OPENAI_BASE_URL.strip() + ) - # Add custom model if OPENAI_BASE_URL is configured with a custom LLM_NAME - if ( - settings.LLM_PROVIDER == "openai" - and settings.OPENAI_BASE_URL - and settings.LLM_NAME - ): - custom_model = create_custom_openai_model( - settings.LLM_NAME, settings.OPENAI_BASE_URL - ) - self.models[settings.LLM_NAME] = custom_model - logger.info( - f"Registered custom OpenAI model: {settings.LLM_NAME} at {settings.OPENAI_BASE_URL}" - ) - - # Fallback: add all OpenAI models if none were added - if not any(m.provider.value == "openai" for m in self.models.values()): - for model in OPENAI_MODELS: - self.models[model.id] = model + if using_local_endpoint: + # When OPENAI_BASE_URL is set, ONLY register custom models from LLM_NAME + # Do NOT add standard OpenAI models (gpt-5.1, etc.) + if settings.LLM_NAME: + model_names = self._parse_model_names(settings.LLM_NAME) + for model_name in model_names: + custom_model = create_custom_openai_model( + model_name, settings.OPENAI_BASE_URL + ) + self.models[model_name] = custom_model + logger.info( + f"Registered custom OpenAI model: {model_name} at {settings.OPENAI_BASE_URL}" + ) + else: + # Standard OpenAI API usage - add standard models if API key is valid + if settings.OPENAI_API_KEY: + for model in OPENAI_MODELS: + self.models[model.id] = model def _add_azure_openai_models(self, settings): from application.core.model_configs import AZURE_OPENAI_MODELS @@ -260,6 +273,15 @@ class ModelRegistry: ) self.models[model_id] = model + def _parse_model_names(self, llm_name: str) -> List[str]: + """ + Parse LLM_NAME which may contain comma-separated model names. + E.g., 'deepseek-r1:1.5b,gemma:2b' -> ['deepseek-r1:1.5b', 'gemma:2b'] + """ + if not llm_name: + return [] + return [name.strip() for name in llm_name.split(",") if name.strip()] + def get_model(self, model_id: str) -> Optional[AvailableModel]: return self.models.get(model_id) diff --git a/application/core/settings.py b/application/core/settings.py index 66018b6f..5c424074 100644 --- a/application/core/settings.py +++ b/application/core/settings.py @@ -2,6 +2,7 @@ import os from pathlib import Path from typing import Optional +from pydantic import field_validator from pydantic_settings import BaseSettings, SettingsConfigDict current_dir = os.path.dirname( @@ -159,6 +160,36 @@ class Settings(BaseSettings): COMPRESSION_PROMPT_VERSION: str = "v1.0" # Track prompt iterations COMPRESSION_MAX_HISTORY_POINTS: int = 3 # Keep only last N compression points to prevent DB bloat + @field_validator( + "API_KEY", + "OPENAI_API_KEY", + "ANTHROPIC_API_KEY", + "GOOGLE_API_KEY", + "GROQ_API_KEY", + "HUGGINGFACE_API_KEY", + "EMBEDDINGS_KEY", + "FALLBACK_LLM_API_KEY", + "QDRANT_API_KEY", + "ELEVENLABS_API_KEY", + "INTERNAL_KEY", + mode="before", + ) + @classmethod + def normalize_api_key(cls, v: Optional[str]) -> Optional[str]: + """ + Normalize API keys: convert 'None', 'none', empty strings, + and whitespace-only strings to actual None. + Handles Pydantic loading 'None' from .env as string "None". + """ + if v is None: + return None + if not isinstance(v, str): + return v + stripped = v.strip() + if stripped == "" or stripped.lower() == "none": + return None + return stripped + # Project root is one level above application/ path = Path(__file__).parent.parent.parent.absolute() diff --git a/deployment/docker-compose-azure.yaml b/deployment/docker-compose-azure.yaml index 9e8b6fce..de62d3a0 100644 --- a/deployment/docker-compose-azure.yaml +++ b/deployment/docker-compose-azure.yaml @@ -11,17 +11,13 @@ services: backend: build: ../application + env_file: + - ../.env environment: - - API_KEY=$OPENAI_API_KEY - - EMBEDDINGS_KEY=$OPENAI_API_KEY + # Override URLs to use docker service names - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/1 - MONGO_URI=mongodb://mongo:27017/docsgpt - - OPENAI_API_KEY=$OPENAI_API_KEY - - OPENAI_API_BASE=$OPENAI_API_BASE - - OPENAI_API_VERSION=$OPENAI_API_VERSION - - AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME - - AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME ports: - "7091:7091" volumes: @@ -35,18 +31,14 @@ services: worker: build: ../application command: celery -A application.app.celery worker -l INFO + env_file: + - ../.env environment: - - API_KEY=$OPENAI_API_KEY - - EMBEDDINGS_KEY=$OPENAI_API_KEY + # Override URLs to use docker service names - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/1 - MONGO_URI=mongodb://mongo:27017/docsgpt - API_URL=http://backend:7091 - - OPENAI_API_KEY=$OPENAI_API_KEY - - OPENAI_API_BASE=$OPENAI_API_BASE - - OPENAI_API_VERSION=$OPENAI_API_VERSION - - AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME - - AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME depends_on: - redis - mongo diff --git a/deployment/docker-compose-hub.yaml b/deployment/docker-compose-hub.yaml index b54aa7b7..07c621bb 100644 --- a/deployment/docker-compose-hub.yaml +++ b/deployment/docker-compose-hub.yaml @@ -5,8 +5,8 @@ services: image: arc53/docsgpt-fe:develop environment: - VITE_API_HOST=http://localhost:7091 - - VITE_API_STREAMING=$VITE_API_STREAMING - - VITE_GOOGLE_CLIENT_ID=$VITE_GOOGLE_CLIENT_ID + - VITE_API_STREAMING=${VITE_API_STREAMING:-true} + - VITE_GOOGLE_CLIENT_ID=${VITE_GOOGLE_CLIENT_ID:-} ports: - "5173:5173" depends_on: @@ -16,16 +16,13 @@ services: backend: user: root image: arc53/docsgpt:develop + env_file: + - ../.env environment: - - API_KEY=$API_KEY - - EMBEDDINGS_KEY=$API_KEY - - LLM_PROVIDER=$LLM_PROVIDER - - LLM_NAME=$LLM_NAME - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/1 - MONGO_URI=mongodb://mongo:27017/docsgpt - CACHE_REDIS_URL=redis://redis:6379/2 - - OPENAI_BASE_URL=$OPENAI_BASE_URL ports: - "7091:7091" volumes: @@ -41,11 +38,9 @@ services: user: root image: arc53/docsgpt:develop command: celery -A application.app.celery worker -l INFO -B + env_file: + - ../.env environment: - - API_KEY=$API_KEY - - EMBEDDINGS_KEY=$API_KEY - - LLM_PROVIDER=$LLM_PROVIDER - - LLM_NAME=$LLM_NAME - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/1 - MONGO_URI=mongodb://mongo:27017/docsgpt diff --git a/deployment/docker-compose.yaml b/deployment/docker-compose.yaml index 91c22409..6c99166d 100644 --- a/deployment/docker-compose.yaml +++ b/deployment/docker-compose.yaml @@ -19,17 +19,11 @@ services: env_file: - ../.env environment: - - API_KEY=$API_KEY - - EMBEDDINGS_KEY=$EMBEDDINGS_KEY - - EMBEDDINGS_BASE_URL=$EMBEDDINGS_BASE_URL - - LLM_PROVIDER=$LLM_PROVIDER - - LLM_NAME=$LLM_NAME + # Override URLs to use docker service names - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/1 - MONGO_URI=mongodb://mongo:27017/docsgpt - CACHE_REDIS_URL=redis://redis:6379/2 - - OPENAI_BASE_URL=$OPENAI_BASE_URL - - INTERNAL_KEY=$INTERNAL_KEY ports: - "7091:7091" volumes: @@ -47,17 +41,12 @@ services: env_file: - ../.env environment: - - API_KEY=$API_KEY - - EMBEDDINGS_KEY=$EMBEDDINGS_KEY - - EMBEDDINGS_BASE_URL=$EMBEDDINGS_BASE_URL - - LLM_PROVIDER=$LLM_PROVIDER - - LLM_NAME=$LLM_NAME + # Override URLs to use docker service names - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/1 - MONGO_URI=mongodb://mongo:27017/docsgpt - API_URL=http://backend:7091 - CACHE_REDIS_URL=redis://redis:6379/2 - - INTERNAL_KEY=$INTERNAL_KEY volumes: - ../application/indexes:/app/indexes - ../application/inputs:/app/inputs diff --git a/setup.ps1 b/setup.ps1 index 1ed3a821..45b354e1 100644 --- a/setup.ps1 +++ b/setup.ps1 @@ -398,9 +398,9 @@ function Serve-LocalOllama { # Create .env file "API_KEY=xxxx" | Out-File -FilePath $ENV_FILE -Encoding utf8 -Force "LLM_PROVIDER=openai" | Add-Content -Path $ENV_FILE -Encoding utf8 - "MODEL_NAME=$model_name" | Add-Content -Path $ENV_FILE -Encoding utf8 + "LLM_NAME=$model_name" | Add-Content -Path $ENV_FILE -Encoding utf8 "VITE_API_STREAMING=true" | Add-Content -Path $ENV_FILE -Encoding utf8 - "OPENAI_BASE_URL=http://host.docker.internal:11434/v1" | Add-Content -Path $ENV_FILE -Encoding utf8 + "OPENAI_BASE_URL=http://ollama:11434/v1" | Add-Content -Path $ENV_FILE -Encoding utf8 "EMBEDDINGS_NAME=huggingface_sentence-transformers/all-mpnet-base-v2" | Add-Content -Path $ENV_FILE -Encoding utf8 Write-ColorText ".env file configured for Ollama ($($docker_compose_file_suffix.ToUpper()))." -ForegroundColor "Green" @@ -495,49 +495,49 @@ function Connect-LocalInferenceEngine { switch ($engine_choice) { "1" { # LLaMa.cpp $script:engine_name = "LLaMa.cpp" - $script:openai_base_url = "http://localhost:8000/v1" + $script:openai_base_url = "http://host.docker.internal:8000/v1" Get-ModelName break } "2" { # Ollama $script:engine_name = "Ollama" - $script:openai_base_url = "http://localhost:11434/v1" + $script:openai_base_url = "http://host.docker.internal:11434/v1" Get-ModelName break } "3" { # TGI $script:engine_name = "TGI" - $script:openai_base_url = "http://localhost:8080/v1" + $script:openai_base_url = "http://host.docker.internal:8080/v1" Get-ModelName break } "4" { # SGLang $script:engine_name = "SGLang" - $script:openai_base_url = "http://localhost:30000/v1" + $script:openai_base_url = "http://host.docker.internal:30000/v1" Get-ModelName break } "5" { # vLLM $script:engine_name = "vLLM" - $script:openai_base_url = "http://localhost:8000/v1" + $script:openai_base_url = "http://host.docker.internal:8000/v1" Get-ModelName break } "6" { # Aphrodite $script:engine_name = "Aphrodite" - $script:openai_base_url = "http://localhost:2242/v1" + $script:openai_base_url = "http://host.docker.internal:2242/v1" Get-ModelName break } "7" { # FriendliAI $script:engine_name = "FriendliAI" - $script:openai_base_url = "http://localhost:8997/v1" + $script:openai_base_url = "http://host.docker.internal:8997/v1" Get-ModelName break } "8" { # LMDeploy $script:engine_name = "LMDeploy" - $script:openai_base_url = "http://localhost:23333/v1" + $script:openai_base_url = "http://host.docker.internal:23333/v1" Get-ModelName break } @@ -561,7 +561,7 @@ function Connect-LocalInferenceEngine { # Create .env file "API_KEY=None" | Out-File -FilePath $ENV_FILE -Encoding utf8 -Force "LLM_PROVIDER=openai" | Add-Content -Path $ENV_FILE -Encoding utf8 - "MODEL_NAME=$model_name" | Add-Content -Path $ENV_FILE -Encoding utf8 + "LLM_NAME=$model_name" | Add-Content -Path $ENV_FILE -Encoding utf8 "VITE_API_STREAMING=true" | Add-Content -Path $ENV_FILE -Encoding utf8 "OPENAI_BASE_URL=$openai_base_url" | Add-Content -Path $ENV_FILE -Encoding utf8 "EMBEDDINGS_NAME=huggingface_sentence-transformers/all-mpnet-base-v2" | Add-Content -Path $ENV_FILE -Encoding utf8 @@ -694,7 +694,7 @@ function Connect-CloudAPIProvider { # Create .env file "API_KEY=$api_key" | Out-File -FilePath $ENV_FILE -Encoding utf8 -Force "LLM_PROVIDER=$llm_name" | Add-Content -Path $ENV_FILE -Encoding utf8 - "MODEL_NAME=$model_name" | Add-Content -Path $ENV_FILE -Encoding utf8 + "LLM_NAME=$model_name" | Add-Content -Path $ENV_FILE -Encoding utf8 "VITE_API_STREAMING=true" | Add-Content -Path $ENV_FILE -Encoding utf8 Write-ColorText ".env file configured for $provider_name." -ForegroundColor "Green" diff --git a/setup.sh b/setup.sh index 23aeb717..690400a7 100755 --- a/setup.sh +++ b/setup.sh @@ -173,8 +173,8 @@ prompt_ollama_options() { # 1) Use DocsGPT Public API Endpoint (simple and free) use_docs_public_api_endpoint() { echo -e "\n${NC}Setting up DocsGPT Public API Endpoint...${NC}" - echo "LLM_PROVIDER=docsgpt" > .env - echo "VITE_API_STREAMING=true" >> .env + echo "LLM_PROVIDER=docsgpt" > "$ENV_FILE" + echo "VITE_API_STREAMING=true" >> "$ENV_FILE" echo -e "${GREEN}.env file configured for DocsGPT Public API.${NC}" check_and_start_docker @@ -240,12 +240,12 @@ serve_local_ollama() { echo -e "\n${NC}Configuring for Ollama ($(echo "$docker_compose_file_suffix" | tr '[:lower:]' '[:upper:]'))...${NC}" # Using tr for uppercase - more compatible - echo "API_KEY=xxxx" > .env # Placeholder API Key - echo "LLM_PROVIDER=openai" >> .env - echo "LLM_NAME=$model_name" >> .env - echo "VITE_API_STREAMING=true" >> .env - echo "OPENAI_BASE_URL=http://ollama:11434/v1" >> .env - echo "EMBEDDINGS_NAME=huggingface_sentence-transformers/all-mpnet-base-v2" >> .env + echo "API_KEY=xxxx" > "$ENV_FILE" # Placeholder API Key + echo "LLM_PROVIDER=openai" >> "$ENV_FILE" + echo "LLM_NAME=$model_name" >> "$ENV_FILE" + echo "VITE_API_STREAMING=true" >> "$ENV_FILE" + echo "OPENAI_BASE_URL=http://ollama:11434/v1" >> "$ENV_FILE" + echo "EMBEDDINGS_NAME=huggingface_sentence-transformers/all-mpnet-base-v2" >> "$ENV_FILE" echo -e "${GREEN}.env file configured for Ollama ($(echo "$docker_compose_file_suffix" | tr '[:lower:]' '[:upper:]')${NC}${GREEN}).${NC}" @@ -308,42 +308,42 @@ connect_local_inference_engine() { case "$engine_choice" in 1) # LLaMa.cpp engine_name="LLaMa.cpp" - openai_base_url="http://localhost:8000/v1" + openai_base_url="http://host.docker.internal:8000/v1" get_model_name break ;; 2) # Ollama engine_name="Ollama" - openai_base_url="http://localhost:11434/v1" + openai_base_url="http://host.docker.internal:11434/v1" get_model_name break ;; 3) # TGI engine_name="TGI" - openai_base_url="http://localhost:8080/v1" + openai_base_url="http://host.docker.internal:8080/v1" get_model_name break ;; 4) # SGLang engine_name="SGLang" - openai_base_url="http://localhost:30000/v1" + openai_base_url="http://host.docker.internal:30000/v1" get_model_name break ;; 5) # vLLM engine_name="vLLM" - openai_base_url="http://localhost:8000/v1" + openai_base_url="http://host.docker.internal:8000/v1" get_model_name break ;; 6) # Aphrodite engine_name="Aphrodite" - openai_base_url="http://localhost:2242/v1" + openai_base_url="http://host.docker.internal:2242/v1" get_model_name break ;; 7) # FriendliAI engine_name="FriendliAI" - openai_base_url="http://localhost:8997/v1" + openai_base_url="http://host.docker.internal:8997/v1" get_model_name break ;; 8) # LMDeploy engine_name="LMDeploy" - openai_base_url="http://localhost:23333/v1" + openai_base_url="http://host.docker.internal:23333/v1" get_model_name break ;; b|B) clear; return ;; # Back to Main Menu @@ -352,19 +352,19 @@ connect_local_inference_engine() { done echo -e "\n${NC}Configuring for Local Inference Engine: ${BOLD}${engine_name}...${NC}" - echo "API_KEY=None" > .env - echo "LLM_PROVIDER=openai" >> .env - echo "LLM_NAME=$model_name" >> .env - echo "VITE_API_STREAMING=true" >> .env - echo "OPENAI_BASE_URL=$openai_base_url" >> .env - echo "EMBEDDINGS_NAME=huggingface_sentence-transformers/all-mpnet-base-v2" >> .env + echo "API_KEY=None" > "$ENV_FILE" + echo "LLM_PROVIDER=openai" >> "$ENV_FILE" + echo "LLM_NAME=$model_name" >> "$ENV_FILE" + echo "VITE_API_STREAMING=true" >> "$ENV_FILE" + echo "OPENAI_BASE_URL=$openai_base_url" >> "$ENV_FILE" + echo "EMBEDDINGS_NAME=huggingface_sentence-transformers/all-mpnet-base-v2" >> "$ENV_FILE" echo -e "${GREEN}.env file configured for ${BOLD}${engine_name}${NC}${GREEN} with OpenAI API format.${NC}" echo -e "${YELLOW}Note: MODEL_NAME is set to '${BOLD}$model_name${NC}${YELLOW}'. You can change it later in the .env file.${NC}" check_and_start_docker echo -e "\n${NC}Starting Docker Compose...${NC}" - docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" pull && docker compose -f "${COMPOSE_FILE}" up -d + docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" pull && docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" up -d docker_compose_status=$? echo "Docker Compose Exit Status: $docker_compose_status" # Debug output @@ -444,10 +444,10 @@ connect_cloud_api_provider() { done echo -e "\n${NC}Configuring for Cloud API Provider: ${BOLD}${provider_name}...${NC}" - echo "API_KEY=$api_key" > .env - echo "LLM_PROVIDER=$llm_provider" >> .env - echo "LLM_NAME=$model_name" >> .env - echo "VITE_API_STREAMING=true" >> .env + echo "API_KEY=$api_key" > "$ENV_FILE" + echo "LLM_PROVIDER=$llm_provider" >> "$ENV_FILE" + echo "LLM_NAME=$model_name" >> "$ENV_FILE" + echo "VITE_API_STREAMING=true" >> "$ENV_FILE" echo -e "${GREEN}.env file configured for ${BOLD}${provider_name}${NC}${GREEN}.${NC}" check_and_start_docker