feat: distribute linux arm64 images and update cuda versions (#496)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2026-03-07 14:23:22 +00:00 · 2026-02-16 15:42:49 +01:00
parent 19f659cb30
commit c590cb42e1
7 changed files with 1936 additions and 1431 deletions
--- a/.github/workflows/ci-images-dryrun.yml
+++ b/.github/workflows/ci-images-dryrun.yml
@@ -17,22 +17,32 @@ jobs:
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
            platforms: linux/amd64, linux/arm64
+            tag_latest: true
          - name: docling-project/docling-serve-cpu
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
            platforms: linux/amd64, linux/arm64
+            tag_latest: true
          # - name: docling-project/docling-serve-cu124
          #   build_args: |
          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
          #   platforms: linux/amd64
-          - name: docling-project/docling-serve-cu126
-            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
-            platforms: linux/amd64
+          #   tag_latest: false
+          # - name: docling-project/docling-serve-cu126
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
+          #   platforms: linux/amd64
+          #   tag_latest: false
          - name: docling-project/docling-serve-cu128
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
-            platforms: linux/amd64
+            platforms: linux/amd64, linux/arm64
+            tag_latest: false
+          - name: docling-project/docling-serve-cu130
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu130
+            platforms: linux/amd64, linux/arm64
+            tag_latest: false
          # - name: docling-project/docling-serve-rocm
          #   build_args: |
          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
@@ -51,3 +61,4 @@ jobs:
      ghcr_image_name: ${{ matrix.spec.name }}
      quay_image_name: ""
      platforms: ${{ matrix.spec.platforms }}
+      tag_latest: ${{ matrix.spec.tag_latest }}
--- a/.github/workflows/images.yml
+++ b/.github/workflows/images.yml
@@ -21,22 +21,32 @@ jobs:
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
            platforms: linux/amd64, linux/arm64
+            tag_latest: true
          - name: docling-project/docling-serve-cpu
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
            platforms: linux/amd64, linux/arm64
+            tag_latest: true
          # - name: docling-project/docling-serve-cu124
          #   build_args: |
          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
          #   platforms: linux/amd64
-          - name: docling-project/docling-serve-cu126
-            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
-            platforms: linux/amd64
+          #   tag_latest: false
+          # - name: docling-project/docling-serve-cu126
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
+          #   platforms: linux/amd64
+          #   tag_latest: false
          - name: docling-project/docling-serve-cu128
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
            platforms: linux/amd64
+            tag_latest: false
+          - name: docling-project/docling-serve-cu130
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu130
+            platforms: linux/amd64, linux/arm64
+            tag_latest: false
          # - name: docling-project/docling-serve-rocm
          #   build_args: |
          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
@@ -56,3 +66,4 @@ jobs:
      ghcr_image_name: ${{ matrix.spec.name }}
      quay_image_name: ${{ matrix.spec.name }}
      platforms: ${{ matrix.spec.platforms }}
+      tag_latest: ${{ matrix.spec.tag_latest }}
--- a/.github/workflows/job-image.yml
+++ b/.github/workflows/job-image.yml
@@ -21,6 +21,10 @@ on:
        type: boolean
        description: "If true, the images will be published."
        default: false
+      tag_latest:
+        type: boolean
+        description: "If true, the 'latest' tag will be applied to the image."
+        default: true
      environment:
        type: string
        description: "GH Action environment"
@@ -87,6 +91,8 @@ jobs:
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}
+          flavor: |
+            latest=${{ inputs.tag_latest }}

      # # Local test
      # - name: Set metadata outputs for local testing ## comment out Free up space, Log in to cr, Cache Docker, Extract metadata, and quay blocks and run act
@@ -209,6 +215,8 @@ jobs:
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.QUAY_REGISTRY }}/${{ inputs.quay_image_name }}
+          flavor: |
+            latest=${{ inputs.tag_latest }}

      - name: Build and push image to quay.io
        if: ${{ inputs.publish }}
--- a/14
+++ b/14
@@ -63,6 +63,13 @@ docling-serve-cu128-image: Containerfile ## Build docling-serve container image
 	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
 	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)

+.PHONY: docling-serve-cu130-image
+docling-serve-cu130-image: Containerfile ## Build docling-serve container image with CUDA 13.0 support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 13.0]"
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu130" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu130:$(TAG) .
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu130:$(TAG) ghcr.io/docling-project/docling-serve-cu130:$(BRANCH_TAG)
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu130:$(TAG) quay.io/docling-project/docling-serve-cu130:$(BRANCH_TAG)
+
 .PHONY: docling-serve-rocm-image
 docling-serve-rocm-image: Containerfile ## Build docling-serve container image with ROCm support
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with ROCm 6.3]"
@@ -132,6 +139,13 @@ run-docling-cu128: ## Run the docling-serve container with GPU support and assig
 	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.8]"
 	$(CMD_PREFIX) $(CONTAINER_RUNTIME) run -it --name docling-serve-cu128 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu128:main

+.PHONY: run-docling-cu130
+run-docling-cu130: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) rm -f docling-serve-cu130 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 13.0]"
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) run -it --name docling-serve-cu130 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu130:main
+
 .PHONY: run-docling-rocm
 run-docling-rocm: ## Run the docling-serve container with GPU support and assign a container name
 	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
--- a/README.md
+++ b/README.md
@@ -61,8 +61,26 @@ The following container images are available for running **Docling Serve** with
 |-------|-------------|----------------|------|
 | [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Base image with all packages installed from the official PyPI index. | `linux/amd64`, `linux/arm64` | 4.4 GB (arm64) <br> 8.7 GB (amd64) |
 | [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | CPU-only variant, using `torch` from the PyTorch CPU index. | `linux/amd64`, `linux/arm64` | 4.4 GB |
-| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | CUDA 12.6 build with `torch` from the cu126 index. | `linux/amd64` | 10.0 GB |
 | [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | CUDA 12.8 build with `torch` from the cu128 index. | `linux/amd64` | 11.4 GB |
+| [`ghcr.io/docling-project/docling-serve-cu130`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu130) <br> [`quay.io/docling-project/docling-serve-cu130`](https://quay.io/repository/docling-project/docling-serve-cu130) | CUDA 13.0 build with `torch` from the cu130 index. | `linux/amd64`, `linux/arm64` | TBD |
+
+> [!IMPORTANT]
+> **CUDA Image Tagging Policy**
+>
+> CUDA-specific images (`-cu128`, `-cu130`) follow PyTorch's CUDA version support lifecycle and are tagged differently from base images:
+>
+> - **Base images** (`docling-serve`, `docling-serve-cpu`): Tagged with `latest` and `main` for convenience
+> - **CUDA images** (`docling-serve-cu*`): **Only tagged with explicit versions** (e.g., `1.12.0`) and `main`
+>
+> **Why?** CUDA versions are deprecated over time as PyTorch adds support for newer CUDA releases. To avoid accidentally pulling deprecated CUDA versions, CUDA images intentionally exclude the `latest` tag. Always use explicit version tags like:
+>
+> ```bash
+> # ✅ Recommended: Explicit version
+> docker pull quay.io/docling-project/docling-serve-cu130:1.12.0
+>
+> # ❌ Not available for CUDA images
+> docker pull quay.io/docling-project/docling-serve-cu130:latest
+> ```

 #### 🚫 Not Distributed

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -65,11 +65,11 @@ easyocr = [
    "easyocr>=1.7",
 ]
 rapidocr = [
-    "rapidocr (>=3.3,<4.0.0) ; python_version < '3.14'",
+    "rapidocr (>=3.3,<4.0.0)",
    "onnxruntime (>=1.7.0,<2.0.0)",
 ]
 flash-attn = [
-  "flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"
+  "flash-attn~=2.8.2; sys_platform == 'linux'"
 ]

 [dependency-groups]
@@ -116,6 +116,10 @@ cu128 = [
  "torchvision>=0.22.1",
 ]

+cu130 = [
+  "torch>=2.7.1",
+  "torchvision>=0.22.1",
+]
 rocm = [
  "torch>=2.7.1",
  "torchvision>=0.22.1",
@@ -132,6 +136,7 @@ conflicts = [
    # { group = "cu124" },
    { group = "cu126" },
    { group = "cu128" },
+    { group = "cu130" },
    { group = "rocm" },
  ],
 ]
@@ -148,6 +153,7 @@ torch = [
  # { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu130", group = "cu130", marker = "sys_platform == 'linux'" },
  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
 ]

@@ -157,6 +163,7 @@ torchvision = [
  # { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu130", group = "cu130", marker = "sys_platform == 'linux'" },
  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
 ]

@@ -192,6 +199,11 @@ name = "pytorch-cu128"
 url = "https://download.pytorch.org/whl/cu128"
 explicit = true

+[[tool.uv.index]]
+name = "pytorch-cu130"
+url = "https://download.pytorch.org/whl/cu130"
+explicit = true
+
 [[tool.uv.index]]
 name = "pytorch-rocm"
 url = "https://download.pytorch.org/whl/rocm6.3"
--- a/uv.lock
+++ b/uv.lock