From 1adac671557d818caf4ce3f97c0ff521afea595e Mon Sep 17 00:00:00 2001 From: Quentin Fuxa <38427957+QuentinFuxa@users.noreply.github.com> Date: Fri, 29 Aug 2025 21:27:08 +0200 Subject: [PATCH] explanations about model persistency in containers --- Dockerfile | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index f01384d..534f99d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,11 +24,11 @@ RUN apt-get update && \ RUN python3 -m venv /opt/venv ENV PATH="/opt/venv/bin:$PATH" -# upgrade pip + setuptools + wheel and increase timeout/retries for large torch wheels +# timeout/retries for large torch wheels RUN pip3 install --upgrade pip setuptools wheel && \ pip3 --disable-pip-version-check install --timeout=120 --retries=5 \ --index-url https://download.pytorch.org/whl/cu129 \ - torch torchvision torchaudio \ + torch torchaudio \ || (echo "Initial install failed — retrying with extended timeout..." && \ pip3 --disable-pip-version-check install --timeout=300 --retries=3 \ --index-url https://download.pytorch.org/whl/cu129 \ @@ -45,8 +45,20 @@ RUN if [ -n "$EXTRAS" ]; then \ pip install --no-cache-dir whisperlivekit; \ fi +# In-container caching for Hugging Face models by: +# A) Make the cache directory persistent via an anonymous volume. +# Note: This only persists for a single, named container. This is +# only for convenience at de/test stage. +# For prod, it is better to use a named volume via host mount/k8s. VOLUME ["/root/.cache/huggingface/hub"] + +# or +# B) Conditionally copy a local pre-cache from the build context to the +# container's cache via the HF_PRECACHE_DIR build-arg. +# WARNING: This will copy ALL files in the pre-cache location. + +# Conditionally copy a cache directory if provided RUN if [ -n "$HF_PRECACHE_DIR" ]; then \ echo "Copying Hugging Face cache from $HF_PRECACHE_DIR"; \ mkdir -p /root/.cache/huggingface/hub && \ @@ -55,6 +67,7 @@ RUN if [ -n "$HF_PRECACHE_DIR" ]; then \ echo "No local Hugging Face cache specified, skipping copy"; \ fi +# Conditionally copy a Hugging Face token if provided. Useful for Diart backend (pyannote audio models) RUN if [ -n "$HF_TKN_FILE" ]; then \ echo "Copying Hugging Face token from $HF_TKN_FILE"; \ mkdir -p /root/.cache/huggingface && \