diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..a599462
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,13 @@
+.git
+.github
+.venv
+__pycache__
+*.pyc
+.pytest_cache
+.mypy_cache
+.ruff_cache
+.cache
+.tmp
+.secrets
+dist
+build
diff --git a/Dockerfile b/Dockerfile
index cc413bf..317d231 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,86 +1,74 @@
-FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04
+FROM ghcr.io/astral-sh/uv:0.10.4 AS uvbin
 
+# --- MARK: Builder Stage
+FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04 AS builder-gpu
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
 
 WORKDIR /app
 
-ARG EXTRAS
-ARG HF_PRECACHE_DIR
-ARG HF_TKN_FILE
+RUN apt-get update && \
+  apt-get install -y --no-install-recommends \
+  build-essential \
+  python3-dev && \
+  rm -rf /var/lib/apt/lists/*
+
+# Install UV and set up the environment 
+COPY --from=uvbin /uv /uvx /bin/
+
+ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy UV_NO_DEV=1
+ENV UV_PYTHON_PREFERENCE=only-managed
+ENV UV_PYTHON_INSTALL_DIR=/python
+
+RUN uv python install 3.12
+
+# Install dependencies first to leverage caching
+ARG EXTRAS=gpu-cu129
+COPY pyproject.toml uv.lock /app/
+RUN set -eux; \
+  set --; \
+  for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \
+  set -- "$@" --extra "$extra"; \
+  done; \
+  uv sync --frozen --no-install-project --no-editable --no-cache "$@"
+
+# Copy the source code and install the package only
+COPY whisperlivekit /app/whisperlivekit
+RUN set -eux; \
+  set --; \
+  for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \
+  set -- "$@" --extra "$extra"; \
+  done; \
+  uv sync --frozen --no-editable --no-cache "$@"
+
+# --- MARK: Runtime Stage 
+FROM nvidia/cuda:12.9.1-cudnn-devel-ubuntu24.04
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /app
 
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        python3 \
-        python3-pip \
-        python3-venv \
-        ffmpeg \
-        git \
-        build-essential \
-        python3-dev \
-        ca-certificates && \
-    rm -rf /var/lib/apt/lists/*
+  apt-get install -y --no-install-recommends \
+  ffmpeg &&\
+  rm -rf /var/lib/apt/lists/*
 
-RUN python3 -m venv /opt/venv
-ENV PATH="/opt/venv/bin:$PATH"
+# Copy UV binaries
+COPY --from=uvbin /uv /uvx /bin/
 
-# timeout/retries for large torch wheels
-RUN pip3 install --upgrade pip setuptools wheel && \
-    pip3 --disable-pip-version-check install --timeout=120 --retries=5 \
-        --index-url https://download.pytorch.org/whl/cu129 \
-        torch torchaudio \
-    || (echo "Initial install failed — retrying with extended timeout..." && \
-        pip3 --disable-pip-version-check install --timeout=300 --retries=3 \
-            --index-url https://download.pytorch.org/whl/cu129 \
-            torch torchvision torchaudio)
+# Copy the Python version
+COPY --from=builder-gpu --chown=python:python /python /python
 
-COPY . .
-
-# Install WhisperLiveKit directly, allowing for optional dependencies
-# Example: --build-arg EXTRAS="translation"
-RUN if [ -n "$EXTRAS" ]; then \
-      echo "Installing with extras: [$EXTRAS]"; \
-      pip install --no-cache-dir "whisperlivekit[$EXTRAS]"; \
-    else \
-      echo "Installing base package only"; \
-      pip install --no-cache-dir whisperlivekit; \
-    fi
-
-# In-container caching for Hugging Face models by: 
-# A) Make the cache directory persistent via an anonymous volume.
-#    Note: This only persists for a single, named container. This is 
-#          only for convenience at de/test stage. 
-#          For prod, it is better to use a named volume via host mount/k8s.
-VOLUME ["/root/.cache/huggingface/hub"]
-
-
-# or
-# B) Conditionally copy a local pre-cache from the build context to the 
-#    container's cache via the HF_PRECACHE_DIR build-arg.
-#    WARNING: This will copy ALL files in the pre-cache location.
-
-# Conditionally copy a cache directory if provided
-RUN if [ -n "$HF_PRECACHE_DIR" ]; then \
-      echo "Copying Hugging Face cache from $HF_PRECACHE_DIR"; \
-      mkdir -p /root/.cache/huggingface/hub && \
-      cp -r $HF_PRECACHE_DIR/* /root/.cache/huggingface/hub; \
-    else \
-      echo "No local Hugging Face cache specified, skipping copy"; \
-    fi
-
-# Conditionally copy a Hugging Face token if provided. Useful for Diart backend (pyannote audio models)
-RUN if [ -n "$HF_TKN_FILE" ]; then \
-      echo "Copying Hugging Face token from $HF_TKN_FILE"; \
-      mkdir -p /root/.cache/huggingface && \
-      cp $HF_TKN_FILE /root/.cache/huggingface/token; \
-    else \
-      echo "No Hugging Face token file specified, skipping token setup"; \
-    fi
+# Copy the virtual environment with all dependencies installed
+COPY --from=builder-gpu /app/.venv /app/.venv
 
 EXPOSE 8000
 
+ENV PATH="/app/.venv/bin:$PATH"
+ENV UV_PYTHON_DOWNLOADS=0
+
 HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \
-    CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1
+  CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1
 
 ENTRYPOINT ["whisperlivekit-server", "--host", "0.0.0.0"]
 
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
index b5a3b2c..8edd3b1 100644
--- a/Dockerfile.cpu
+++ b/Dockerfile.cpu
@@ -1,64 +1,76 @@
-FROM python:3.13-slim
+FROM ghcr.io/astral-sh/uv:0.10.4 AS uvbin
 
+# --- MARK: Builder Stage
+FROM debian:bookworm-slim AS builder-cpu
 ENV DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED=1
 
 WORKDIR /app
 
-ARG EXTRAS
-ARG HF_PRECACHE_DIR
-ARG HF_TKN_FILE
+RUN apt-get update && \
+  apt-get install -y --no-install-recommends \
+  build-essential \
+  python3-dev && \
+  rm -rf /var/lib/apt/lists/*
+
+# Install UV and set up the environment 
+COPY --from=uvbin /uv /uvx /bin/
+
+ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy UV_NO_DEV=1
+ENV UV_PYTHON_PREFERENCE=only-managed
+ENV UV_PYTHON_INSTALL_DIR=/python
+
+RUN uv python install 3.12
+
+# Install dependencies first to leverage caching
+ARG EXTRAS=cpu
+COPY pyproject.toml uv.lock /app/
+RUN set -eux; \
+  set --; \
+  for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \
+  set -- "$@" --extra "$extra"; \
+  done; \
+  uv sync --frozen --no-install-project --no-editable --no-cache "$@"
+
+# Copy the source code and install the package only
+COPY whisperlivekit /app/whisperlivekit
+RUN set -eux; \
+  set --; \
+  for extra in $(echo "${EXTRAS:-}" | tr ',' ' '); do \
+  set -- "$@" --extra "$extra"; \
+  done; \
+  uv sync --frozen --no-editable --no-cache "$@"
+
+# --- MARK: Runtime Stage 
+FROM debian:bookworm-slim
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /app
 
 RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        ffmpeg \
-        git \
-        build-essential \
-        python3-dev && \
-    rm -rf /var/lib/apt/lists/*
+  apt-get install -y --no-install-recommends \
+  ffmpeg &&\
+  rm -rf /var/lib/apt/lists/*
 
-# Install CPU-only PyTorch
-RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+# Copy UV binaries
+COPY --from=uvbin /uv /uvx /bin/
 
-COPY . .
+# Copy the Python version
+COPY --from=builder-cpu --chown=python:python /python /python
 
-# Install WhisperLiveKit directly, allowing for optional dependencies
-RUN if [ -n "$EXTRAS" ]; then \
-      echo "Installing with extras: [$EXTRAS]"; \
-      pip install --no-cache-dir whisperlivekit[$EXTRAS]; \
-    else \
-      echo "Installing base package only"; \
-      pip install --no-cache-dir whisperlivekit; \
-    fi
+# Copy the virtual environment with all dependencies installed
+COPY --from=builder-cpu /app/.venv /app/.venv
 
-# Enable in-container caching for Hugging Face models
-VOLUME ["/root/.cache/huggingface/hub"]
-
-# Conditionally copy a local pre-cache from the build context
-RUN if [ -n "$HF_PRECACHE_DIR" ]; then \
-      echo "Copying Hugging Face cache from $HF_PRECACHE_DIR"; \
-      mkdir -p /root/.cache/huggingface/hub && \
-      cp -r $HF_PRECACHE_DIR/* /root/.cache/huggingface/hub; \
-    else \
-      echo "No local Hugging Face cache specified, skipping copy"; \
-    fi
-
-# Conditionally copy a Hugging Face token if provided
-RUN if [ -n "$HF_TKN_FILE" ]; then \
-      echo "Copying Hugging Face token from $HF_TKN_FILE"; \
-      mkdir -p /root/.cache/huggingface && \
-      cp $HF_TKN_FILE /root/.cache/huggingface/token; \
-    else \
-      echo "No Hugging Face token file specified, skipping token setup"; \
-    fi
-    
-# Expose port for the transcription server
 EXPOSE 8000
 
+ENV PATH="/app/.venv/bin:$PATH"
+ENV UV_PYTHON_DOWNLOADS=0
+
 HEALTHCHECK --interval=30s --timeout=5s --start-period=120s --retries=3 \
-    CMD python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1
+  CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/')" || exit 1
 
 ENTRYPOINT ["whisperlivekit-server", "--host", "0.0.0.0"]
 
 # Default args - you might want to use a smaller model for CPU
-CMD ["--model", "tiny"]
\ No newline at end of file
+CMD ["--model", "tiny"]
diff --git a/compose.yml b/compose.yml
new file mode 100644
index 0000000..da3696e
--- /dev/null
+++ b/compose.yml
@@ -0,0 +1,52 @@
+services:
+  wlk-gpu-sortformer:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        EXTRAS: ${GPU_SORTFORMER_EXTRAS:-gpu-cu129,diarization-sortformer}
+    image: wlk:gpu-sortformer
+    gpus: all
+    ports:
+      - "8000:8000"
+    volumes:
+      - hf-cache:/root/.cache/huggingface/hub
+      # - ${HF_TKN_FILE:-./token}:/root/.cache/huggingface/token:ro
+    environment:
+      - HF_TOKEN
+    command: ["--model", "medium", "--diarization", "--pcm-input"]
+
+  wlk-gpu-voxtral:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        EXTRAS: ${GPU_VOXTRAL_EXTRAS:-gpu-cu129,voxtral-hf,translation}
+    image: wlk:gpu-voxtral
+    gpus: all
+    ports:
+      - "8001:8000"
+    volumes:
+      - hf-cache:/root/.cache/huggingface/hub
+      # - ${HF_TKN_FILE:-./token}:/root/.cache/huggingface/token:ro
+    environment:
+      - HF_TOKEN
+    command: ["--backend", "voxtral", "--pcm-input"]
+
+  wlk-cpu:
+    build:
+      context: .
+      dockerfile: Dockerfile.cpu
+      args:
+        EXTRAS: ${CPU_EXTRAS:-cpu,diarization-diart,translation}
+    image: wlk:cpu
+    ports:
+      - "8000:8000"
+    volumes:
+      - hf-cache:/root/.cache/huggingface/hub
+      # - ${HF_TKN_FILE:-./token}:/root/.cache/huggingface/token:ro
+    environment:
+      - HF_TOKEN
+
+volumes:
+  hf-cache: