diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..a599462 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +.git +.github +.venv +__pycache__ +*.pyc +.pytest_cache +.mypy_cache +.ruff_cache +.cache +.tmp +.secrets +dist +build diff --git a/Dockerfile b/Dockerfile index cc413bf..317d231 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,86 +1,74 @@ -FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 +FROM ghcr.io/astral-sh/uv:0.10.4 AS uvbin +# --- MARK: Builder Stage +FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 AS builder-gpu ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 WORKDIR /app -ARG EXTRAS -ARG HF_PRECACHE_DIR -ARG HF_TKN_FILE +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + python3-dev && \ + rm -rf /var/lib/apt/lists/* + +# Install UV and set up the environment +COPY --from=uvbin /uv /uvx /bin/ + +ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy UV_NO_DEV=1 +ENV UV_PYTHON_PREFERENCE=only-managed +ENV UV_PYTHON_INSTALL_DIR=/python + +RUN uv python install 3.12 + +# Install dependencies first to leverage caching +ARG EXTRAS=gpu-cu129 +COPY pyproject.toml uv.lock /app/ +RUN set -eux; \ + set --; \ + for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \ + set -- "$@" --extra "$extra"; \ + done; \ + uv sync --frozen --no-install-project --no-editable --no-cache "$@" + +# Copy the source code and install the package only +COPY whisperlivekit /app/whisperlivekit +RUN set -eux; \ + set --; \ + for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \ + set -- "$@" --extra "$extra"; \ + done; \ + uv sync --frozen --no-editable --no-cache "$@" + +# --- MARK: Runtime Stage +FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 + +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /app RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - python3 \ - python3-pip \ - python3-venv \ - ffmpeg \ - git \ - build-essential \ - python3-dev \ - ca-certificates && \ - rm -rf /var/lib/apt/lists/* + apt-get install -y --no-install-recommends \ + ffmpeg &&\ + rm -rf /var/lib/apt/lists/* -RUN python3 -m venv /opt/venv -ENV PATH="/opt/venv/bin:$PATH" +# Copy UV binaries +COPY --from=uvbin /uv /uvx /bin/ -# timeout/retries for large torch wheels -RUN pip3 install --upgrade pip setuptools wheel && \ - pip3 --disable-pip-version-check install --timeout=120 --retries=5 \ - --index-url https://download.pytorch.org/whl/cu129 \ - torch torchaudio \ - || (echo "Initial install failed — retrying with extended timeout..." && \ - pip3 --disable-pip-version-check install --timeout=300 --retries=3 \ - --index-url https://download.pytorch.org/whl/cu129 \ - torch torchvision torchaudio) +# Copy the Python version +COPY --from=builder-gpu --chown=python:python /python /python -COPY . . - -# Install WhisperLiveKit directly, allowing for optional dependencies -# Example: --build-arg EXTRAS="translation" -RUN if [ -n "$EXTRAS" ]; then \ - echo "Installing with extras: [$EXTRAS]"; \ - pip install --no-cache-dir "whisperlivekit[$EXTRAS]"; \ - else \ - echo "Installing base package only"; \ - pip install --no-cache-dir whisperlivekit; \ - fi - -# In-container caching for Hugging Face models by: -# A) Make the cache directory persistent via an anonymous volume. -# Note: This only persists for a single, named container. This is -# only for convenience at de/test stage. -# For prod, it is better to use a named volume via host mount/k8s. -VOLUME ["/root/.cache/huggingface/hub"] - - -# or -# B) Conditionally copy a local pre-cache from the build context to the -# container's cache via the HF_PRECACHE_DIR build-arg. -# WARNING: This will copy ALL files in the pre-cache location. - -# Conditionally copy a cache directory if provided -RUN if [ -n "$HF_PRECACHE_DIR" ]; then \ - echo "Copying Hugging Face cache from $HF_PRECACHE_DIR"; \ - mkdir -p /root/.cache/huggingface/hub && \ - cp -r $HF_PRECACHE_DIR/* /root/.cache/huggingface/hub; \ - else \ - echo "No local Hugging Face cache specified, skipping copy"; \ - fi - -# Conditionally copy a Hugging Face token if provided. Useful for Diart backend (pyannote audio models) -RUN if [ -n "$HF_TKN_FILE" ]; then \ - echo "Copying Hugging Face token from $HF_TKN_FILE"; \ - mkdir -p /root/.cache/huggingface && \ - cp $HF_TKN_FILE /root/.cache/huggingface/token; \ - else \ - echo "No Hugging Face token file specified, skipping token setup"; \ - fi +# Copy the virtual environment with all dependencies installed +COPY --from=builder-gpu /app/.venv /app/.venv EXPOSE 8000 +ENV PATH="/app/.venv/bin:$PATH" +ENV UV_PYTHON_DOWNLOADS=0 + HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \ - CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1 + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1 ENTRYPOINT ["whisperlivekit-server", "--host", "0.0.0.0"] diff --git a/Dockerfile.cpu b/Dockerfile.cpu index b5a3b2c..8edd3b1 100644 --- a/Dockerfile.cpu +++ b/Dockerfile.cpu @@ -1,64 +1,76 @@ -FROM python:3.13-slim +FROM ghcr.io/astral-sh/uv:0.10.4 AS uvbin +# --- MARK: Builder Stage +FROM debian:bookworm-slim AS builder-cpu ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 WORKDIR /app -ARG EXTRAS -ARG HF_PRECACHE_DIR -ARG HF_TKN_FILE +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + python3-dev && \ + rm -rf /var/lib/apt/lists/* + +# Install UV and set up the environment +COPY --from=uvbin /uv /uvx /bin/ + +ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy UV_NO_DEV=1 +ENV UV_PYTHON_PREFERENCE=only-managed +ENV UV_PYTHON_INSTALL_DIR=/python + +RUN uv python install 3.12 + +# Install dependencies first to leverage caching +ARG EXTRAS=cpu +COPY pyproject.toml uv.lock /app/ +RUN set -eux; \ + set --; \ + for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \ + set -- "$@" --extra "$extra"; \ + done; \ + uv sync --frozen --no-install-project --no-editable --no-cache "$@" + +# Copy the source code and install the package only +COPY whisperlivekit /app/whisperlivekit +RUN set -eux; \ + set --; \ + for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \ + set -- "$@" --extra "$extra"; \ + done; \ + uv sync --frozen --no-editable --no-cache "$@" + +# --- MARK: Runtime Stage +FROM debian:bookworm-slim + +ENV DEBIAN_FRONTEND=noninteractive + +WORKDIR /app RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - ffmpeg \ - git \ - build-essential \ - python3-dev && \ - rm -rf /var/lib/apt/lists/* + apt-get install -y --no-install-recommends \ + ffmpeg &&\ + rm -rf /var/lib/apt/lists/* -# Install CPU-only PyTorch -RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu +# Copy UV binaries +COPY --from=uvbin /uv /uvx /bin/ -COPY . . +# Copy the Python version +COPY --from=builder-cpu --chown=python:python /python /python -# Install WhisperLiveKit directly, allowing for optional dependencies -RUN if [ -n "$EXTRAS" ]; then \ - echo "Installing with extras: [$EXTRAS]"; \ - pip install --no-cache-dir whisperlivekit[$EXTRAS]; \ - else \ - echo "Installing base package only"; \ - pip install --no-cache-dir whisperlivekit; \ - fi +# Copy the virtual environment with all dependencies installed +COPY --from=builder-cpu /app/.venv /app/.venv -# Enable in-container caching for Hugging Face models -VOLUME ["/root/.cache/huggingface/hub"] - -# Conditionally copy a local pre-cache from the build context -RUN if [ -n "$HF_PRECACHE_DIR" ]; then \ - echo "Copying Hugging Face cache from $HF_PRECACHE_DIR"; \ - mkdir -p /root/.cache/huggingface/hub && \ - cp -r $HF_PRECACHE_DIR/* /root/.cache/huggingface/hub; \ - else \ - echo "No local Hugging Face cache specified, skipping copy"; \ - fi - -# Conditionally copy a Hugging Face token if provided -RUN if [ -n "$HF_TKN_FILE" ]; then \ - echo "Copying Hugging Face token from $HF_TKN_FILE"; \ - mkdir -p /root/.cache/huggingface && \ - cp $HF_TKN_FILE /root/.cache/huggingface/token; \ - else \ - echo "No Hugging Face token file specified, skipping token setup"; \ - fi - -# Expose port for the transcription server EXPOSE 8000 +ENV PATH="/app/.venv/bin:$PATH" +ENV UV_PYTHON_DOWNLOADS=0 + HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \ - CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1 + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1 ENTRYPOINT ["whisperlivekit-server", "--host", "0.0.0.0"] # Default args - you might want to use a smaller model for CPU -CMD ["--model", "tiny"] \ No newline at end of file +CMD ["--model", "tiny"] diff --git a/compose.yml b/compose.yml new file mode 100644 index 0000000..da3696e --- /dev/null +++ b/compose.yml @@ -0,0 +1,52 @@ +services: + wlk-gpu-sortformer: + build: + context: . + dockerfile: Dockerfile + args: + EXTRAS: ${GPU_SORTFORMER_EXTRAS:-gpu-cu129,diarization-sortformer} + image: wlk:gpu-sortformer + gpus: all + ports: + - "8000:8000" + volumes: + - hf-cache:/root/.cache/huggingface/hub + # - ${HF_TKN_FILE:-./token}:/root/.cache/huggingface/token:ro + environment: + - HF_TOKEN + command: ["--model", "medium", "--diarization", "--pcm-input"] + + wlk-gpu-voxtral: + build: + context: . + dockerfile: Dockerfile + args: + EXTRAS: ${GPU_VOXTRAL_EXTRAS:-gpu-cu129,voxtral-hf,translation} + image: wlk:gpu-voxtral + gpus: all + ports: + - "8001:8000" + volumes: + - hf-cache:/root/.cache/huggingface/hub + # - ${HF_TKN_FILE:-./token}:/root/.cache/huggingface/token:ro + environment: + - HF_TOKEN + command: ["--backend", "voxtral", "--pcm-input"] + + wlk-cpu: + build: + context: . + dockerfile: Dockerfile.cpu + args: + EXTRAS: ${CPU_EXTRAS:-cpu,diarization-diart,translation} + image: wlk:cpu + ports: + - "8000:8000" + volumes: + - hf-cache:/root/.cache/huggingface/hub + # - ${HF_TKN_FILE:-./token}:/root/.cache/huggingface/token:ro + environment: + - HF_TOKEN + +volumes: + hf-cache: