🚀 chore (docker): update docker images improving caching and using uv as python package manager

2026-03-06 22:04:06 +00:00 · 2026-02-25 14:22:43 -03:00
parent 994ce21365
commit d24805cc18
4 changed files with 178 additions and 113 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,13 @@
+.git
+.github
+.venv
+__pycache__
+*.pyc
+.pytest_cache
+.mypy_cache
+.ruff_cache
+.cache
+.tmp
+.secrets
+dist
+build
--- a/124
+++ b/124
@@ -1,86 +1,74 @@
-FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04
+FROM ghcr.io/astral-sh/uv:0.10.4 AS uvbin

+# --- MARK: Builder Stage
+FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 AS builder-gpu
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1

 WORKDIR /app

-ARG EXTRAS
-ARG HF_PRECACHE_DIR
-ARG HF_TKN_FILE
+RUN apt-get update && \
+  apt-get install -y --no-install-recommends \
+  build-essential \
+  python3-dev && \
+  rm -rf /var/lib/apt/lists/*
+
+# Install UV and set up the environment 
+COPY --from=uvbin /uv /uvx /bin/
+
+ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy UV_NO_DEV=1
+ENV UV_PYTHON_PREFERENCE=only-managed
+ENV UV_PYTHON_INSTALL_DIR=/python
+
+RUN uv python install 3.12
+
+# Install dependencies first to leverage caching
+ARG EXTRAS=gpu-cu129
+COPY pyproject.toml uv.lock /app/
+RUN set -eux; \
+  set --; \
+  for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \
+  set -- "$@" --extra "$extra"; \
+  done; \
+  uv sync --frozen --no-install-project --no-editable --no-cache "$@"
+
+# Copy the source code and install the package only
+COPY whisperlivekit /app/whisperlivekit
+RUN set -eux; \
+  set --; \
+  for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \
+  set -- "$@" --extra "$extra"; \
+  done; \
+  uv sync --frozen --no-editable --no-cache "$@"
+
+# --- MARK: Runtime Stage 
+FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /app

 RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        python3 \
-        python3-pip \
-        python3-venv \
-        ffmpeg \
-        git \
-        build-essential \
-        python3-dev \
-        ca-certificates && \
-    rm -rf /var/lib/apt/lists/*
+  apt-get install -y --no-install-recommends \
+  ffmpeg &&\
+  rm -rf /var/lib/apt/lists/*

-RUN python3 -m venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
+# Copy UV binaries
+COPY --from=uvbin /uv /uvx /bin/

-# timeout/retries for large torch wheels
-RUN pip3 install --upgrade pip setuptools wheel && \
-    pip3 --disable-pip-version-check install --timeout=120 --retries=5 \
-        --index-url https://download.pytorch.org/whl/cu129 \
-        torch torchaudio \
-    || (echo "Initial install failed — retrying with extended timeout..." && \
-        pip3 --disable-pip-version-check install --timeout=300 --retries=3 \
-            --index-url https://download.pytorch.org/whl/cu129 \
-            torch torchvision torchaudio)
+# Copy the Python version
+COPY --from=builder-gpu --chown=python:python /python /python

-COPY . .
-
-# Install WhisperLiveKit directly, allowing for optional dependencies
-# Example: --build-arg EXTRAS="translation"
-RUN if [ -n "$EXTRAS" ]; then \
-      echo "Installing with extras: [$EXTRAS]"; \
-      pip install --no-cache-dir "whisperlivekit[$EXTRAS]"; \
-    else \
-      echo "Installing base package only"; \
-      pip install --no-cache-dir whisperlivekit; \
-    fi
-
-# In-container caching for Hugging Face models by: 
-# A) Make the cache directory persistent via an anonymous volume.
-#    Note: This only persists for a single, named container. This is 
-#          only for convenience at de/test stage. 
-#          For prod, it is better to use a named volume via host mount/k8s.
-VOLUME ["/root/.cache/huggingface/hub"]
-
-
-# or
-# B) Conditionally copy a local pre-cache from the build context to the 
-#    container's cache via the HF_PRECACHE_DIR build-arg.
-#    WARNING: This will copy ALL files in the pre-cache location.
-
-# Conditionally copy a cache directory if provided
-RUN if [ -n "$HF_PRECACHE_DIR" ]; then \
-      echo "Copying Hugging Face cache from $HF_PRECACHE_DIR"; \
-      mkdir -p /root/.cache/huggingface/hub && \
-      cp -r $HF_PRECACHE_DIR/* /root/.cache/huggingface/hub; \
-    else \
-      echo "No local Hugging Face cache specified, skipping copy"; \
-    fi
-
-# Conditionally copy a Hugging Face token if provided. Useful for Diart backend (pyannote audio models)
-RUN if [ -n "$HF_TKN_FILE" ]; then \
-      echo "Copying Hugging Face token from $HF_TKN_FILE"; \
-      mkdir -p /root/.cache/huggingface && \
-      cp $HF_TKN_FILE /root/.cache/huggingface/token; \
-    else \
-      echo "No Hugging Face token file specified, skipping token setup"; \
-    fi
+# Copy the virtual environment with all dependencies installed
+COPY --from=builder-gpu /app/.venv /app/.venv

 EXPOSE 8000

+ENV PATH="/app/.venv/bin:$PATH"
+ENV UV_PYTHON_DOWNLOADS=0
+
 HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \
-    CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1
+  CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1

 ENTRYPOINT ["whisperlivekit-server", "--host", "0.0.0.0"]

--- a/Dockerfile.cpu
+++ b/Dockerfile.cpu
@@ -1,64 +1,76 @@
-FROM python:3.13-slim
+FROM ghcr.io/astral-sh/uv:0.10.4 AS uvbin

+# --- MARK: Builder Stage
+FROM debian:bookworm-slim AS builder-cpu
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1

 WORKDIR /app

-ARG EXTRAS
-ARG HF_PRECACHE_DIR
-ARG HF_TKN_FILE
+RUN apt-get update && \
+  apt-get install -y --no-install-recommends \
+  build-essential \
+  python3-dev && \
+  rm -rf /var/lib/apt/lists/*
+
+# Install UV and set up the environment 
+COPY --from=uvbin /uv /uvx /bin/
+
+ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy UV_NO_DEV=1
+ENV UV_PYTHON_PREFERENCE=only-managed
+ENV UV_PYTHON_INSTALL_DIR=/python
+
+RUN uv python install 3.12
+
+# Install dependencies first to leverage caching
+ARG EXTRAS=cpu
+COPY pyproject.toml uv.lock /app/
+RUN set -eux; \
+  set --; \
+  for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \
+  set -- "$@" --extra "$extra"; \
+  done; \
+  uv sync --frozen --no-install-project --no-editable --no-cache "$@"
+
+# Copy the source code and install the package only
+COPY whisperlivekit /app/whisperlivekit
+RUN set -eux; \
+  set --; \
+  for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \
+  set -- "$@" --extra "$extra"; \
+  done; \
+  uv sync --frozen --no-editable --no-cache "$@"
+
+# --- MARK: Runtime Stage 
+FROM debian:bookworm-slim
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /app

 RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        ffmpeg \
-        git \
-        build-essential \
-        python3-dev && \
-    rm -rf /var/lib/apt/lists/*
+  apt-get install -y --no-install-recommends \
+  ffmpeg &&\
+  rm -rf /var/lib/apt/lists/*

-# Install CPU-only PyTorch
-RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+# Copy UV binaries
+COPY --from=uvbin /uv /uvx /bin/

-COPY . .
+# Copy the Python version
+COPY --from=builder-cpu --chown=python:python /python /python

-# Install WhisperLiveKit directly, allowing for optional dependencies
-RUN if [ -n "$EXTRAS" ]; then \
-      echo "Installing with extras: [$EXTRAS]"; \
-      pip install --no-cache-dir whisperlivekit[$EXTRAS]; \
-    else \
-      echo "Installing base package only"; \
-      pip install --no-cache-dir whisperlivekit; \
-    fi
+# Copy the virtual environment with all dependencies installed
+COPY --from=builder-cpu /app/.venv /app/.venv

-# Enable in-container caching for Hugging Face models
-VOLUME ["/root/.cache/huggingface/hub"]
-
-# Conditionally copy a local pre-cache from the build context
-RUN if [ -n "$HF_PRECACHE_DIR" ]; then \
-      echo "Copying Hugging Face cache from $HF_PRECACHE_DIR"; \
-      mkdir -p /root/.cache/huggingface/hub && \
-      cp -r $HF_PRECACHE_DIR/* /root/.cache/huggingface/hub; \
-    else \
-      echo "No local Hugging Face cache specified, skipping copy"; \
-    fi
-
-# Conditionally copy a Hugging Face token if provided
-RUN if [ -n "$HF_TKN_FILE" ]; then \
-      echo "Copying Hugging Face token from $HF_TKN_FILE"; \
-      mkdir -p /root/.cache/huggingface && \
-      cp $HF_TKN_FILE /root/.cache/huggingface/token; \
-    else \
-      echo "No Hugging Face token file specified, skipping token setup"; \
-    fi
-    
-# Expose port for the transcription server
 EXPOSE 8000

+ENV PATH="/app/.venv/bin:$PATH"
+ENV UV_PYTHON_DOWNLOADS=0
+
 HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \
-    CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1
+  CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1

 ENTRYPOINT ["whisperlivekit-server", "--host", "0.0.0.0"]

 # Default args - you might want to use a smaller model for CPU
-CMD ["--model", "tiny"]
+CMD ["--model", "tiny"]
--- a/compose.yml
+++ b/compose.yml
@@ -0,0 +1,52 @@
+services:
+  wlk-gpu-sortformer:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        EXTRAS: ${GPU_SORTFORMER_EXTRAS:-gpu-cu129,diarization-sortformer}
+    image: wlk:gpu-sortformer
+    gpus: all
+    ports:
+      - "8000:8000"
+    volumes:
+      - hf-cache:/root/.cache/huggingface/hub
+      # - ${HF_TKN_FILE:-./token}:/root/.cache/huggingface/token:ro
+    environment:
+      - HF_TOKEN
+    command: ["--model", "medium", "--diarization", "--pcm-input"]
+
+  wlk-gpu-voxtral:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        EXTRAS: ${GPU_VOXTRAL_EXTRAS:-gpu-cu129,voxtral-hf,translation}
+    image: wlk:gpu-voxtral
+    gpus: all
+    ports:
+      - "8001:8000"
+    volumes:
+      - hf-cache:/root/.cache/huggingface/hub
+      # - ${HF_TKN_FILE:-./token}:/root/.cache/huggingface/token:ro
+    environment:
+      - HF_TOKEN
+    command: ["--backend", "voxtral", "--pcm-input"]
+
+  wlk-cpu:
+    build:
+      context: .
+      dockerfile: Dockerfile.cpu
+      args:
+        EXTRAS: ${CPU_EXTRAS:-cpu,diarization-diart,translation}
+    image: wlk:cpu
+    ports:
+      - "8000:8000"
+    volumes:
+      - hf-cache:/root/.cache/huggingface/hub
+      # - ${HF_TKN_FILE:-./token}:/root/.cache/huggingface/token:ro
+    environment:
+      - HF_TOKEN
+
+volumes:
+  hf-cache: