feat: distribute linux arm64 images and update cuda versions (#496)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2026-02-16 15:42:49 +01:00
committed by GitHub
parent 19f659cb30
commit c590cb42e1
7 changed files with 1936 additions and 1431 deletions

View File

@@ -17,22 +17,32 @@ jobs:
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
platforms: linux/amd64, linux/arm64
tag_latest: true
- name: docling-project/docling-serve-cpu
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
platforms: linux/amd64, linux/arm64
tag_latest: true
# - name: docling-project/docling-serve-cu124
# build_args: |
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
# platforms: linux/amd64
- name: docling-project/docling-serve-cu126
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
platforms: linux/amd64
# tag_latest: false
# - name: docling-project/docling-serve-cu126
# build_args: |
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
# platforms: linux/amd64
# tag_latest: false
- name: docling-project/docling-serve-cu128
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
platforms: linux/amd64
platforms: linux/amd64, linux/arm64
tag_latest: false
- name: docling-project/docling-serve-cu130
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu130
platforms: linux/amd64, linux/arm64
tag_latest: false
# - name: docling-project/docling-serve-rocm
# build_args: |
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
@@ -51,3 +61,4 @@ jobs:
ghcr_image_name: ${{ matrix.spec.name }}
quay_image_name: ""
platforms: ${{ matrix.spec.platforms }}
tag_latest: ${{ matrix.spec.tag_latest }}

View File

@@ -21,22 +21,32 @@ jobs:
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
platforms: linux/amd64, linux/arm64
tag_latest: true
- name: docling-project/docling-serve-cpu
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
platforms: linux/amd64, linux/arm64
tag_latest: true
# - name: docling-project/docling-serve-cu124
# build_args: |
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
# platforms: linux/amd64
- name: docling-project/docling-serve-cu126
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
platforms: linux/amd64
# tag_latest: false
# - name: docling-project/docling-serve-cu126
# build_args: |
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
# platforms: linux/amd64
# tag_latest: false
- name: docling-project/docling-serve-cu128
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
platforms: linux/amd64
tag_latest: false
- name: docling-project/docling-serve-cu130
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu130
platforms: linux/amd64, linux/arm64
tag_latest: false
# - name: docling-project/docling-serve-rocm
# build_args: |
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
@@ -56,3 +66,4 @@ jobs:
ghcr_image_name: ${{ matrix.spec.name }}
quay_image_name: ${{ matrix.spec.name }}
platforms: ${{ matrix.spec.platforms }}
tag_latest: ${{ matrix.spec.tag_latest }}

View File

@@ -21,6 +21,10 @@ on:
type: boolean
description: "If true, the images will be published."
default: false
tag_latest:
type: boolean
description: "If true, the 'latest' tag will be applied to the image."
default: true
environment:
type: string
description: "GH Action environment"
@@ -87,6 +91,8 @@ jobs:
uses: docker/metadata-action@v5
with:
images: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}
flavor: |
latest=${{ inputs.tag_latest }}
# # Local test
# - name: Set metadata outputs for local testing ## comment out Free up space, Log in to cr, Cache Docker, Extract metadata, and quay blocks and run act
@@ -209,6 +215,8 @@ jobs:
uses: docker/metadata-action@v5
with:
images: ${{ env.QUAY_REGISTRY }}/${{ inputs.quay_image_name }}
flavor: |
latest=${{ inputs.tag_latest }}
- name: Build and push image to quay.io
if: ${{ inputs.publish }}

View File

@@ -63,6 +63,13 @@ docling-serve-cu128-image: Containerfile ## Build docling-serve container image
$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
.PHONY: docling-serve-cu130-image
docling-serve-cu130-image: Containerfile ## Build docling-serve container image with CUDA 13.0 support
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with Cuda 13.0]"
$(CMD_PREFIX) $(CONTAINER_RUNTIME) build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu130" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu130:$(TAG) .
$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu130:$(TAG) ghcr.io/docling-project/docling-serve-cu130:$(BRANCH_TAG)
$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu130:$(TAG) quay.io/docling-project/docling-serve-cu130:$(BRANCH_TAG)
.PHONY: docling-serve-rocm-image
docling-serve-rocm-image: Containerfile ## Build docling-serve container image with ROCm support
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with ROCm 6.3]"
@@ -132,6 +139,13 @@ run-docling-cu128: ## Run the docling-serve container with GPU support and assig
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.8]"
$(CMD_PREFIX) $(CONTAINER_RUNTIME) run -it --name docling-serve-cu128 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu128:main
.PHONY: run-docling-cu130
run-docling-cu130: ## Run the docling-serve container with GPU support and assign a container name
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
$(CMD_PREFIX) $(CONTAINER_RUNTIME) rm -f docling-serve-cu130 2>/dev/null || true
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 13.0]"
$(CMD_PREFIX) $(CONTAINER_RUNTIME) run -it --name docling-serve-cu130 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu130:main
.PHONY: run-docling-rocm
run-docling-rocm: ## Run the docling-serve container with GPU support and assign a container name
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"

View File

@@ -61,8 +61,26 @@ The following container images are available for running **Docling Serve** with
|-------|-------------|----------------|------|
| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Base image with all packages installed from the official PyPI index. | `linux/amd64`, `linux/arm64` | 4.4 GB (arm64) <br> 8.7 GB (amd64) |
| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | CPU-only variant, using `torch` from the PyTorch CPU index. | `linux/amd64`, `linux/arm64` | 4.4 GB |
| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | CUDA 12.6 build with `torch` from the cu126 index. | `linux/amd64` | 10.0 GB |
| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | CUDA 12.8 build with `torch` from the cu128 index. | `linux/amd64` | 11.4 GB |
| [`ghcr.io/docling-project/docling-serve-cu130`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu130) <br> [`quay.io/docling-project/docling-serve-cu130`](https://quay.io/repository/docling-project/docling-serve-cu130) | CUDA 13.0 build with `torch` from the cu130 index. | `linux/amd64`, `linux/arm64` | TBD |
> [!IMPORTANT]
> **CUDA Image Tagging Policy**
>
> CUDA-specific images (`-cu128`, `-cu130`) follow PyTorch's CUDA version support lifecycle and are tagged differently from base images:
>
> - **Base images** (`docling-serve`, `docling-serve-cpu`): Tagged with `latest` and `main` for convenience
> - **CUDA images** (`docling-serve-cu*`): **Only tagged with explicit versions** (e.g., `1.12.0`) and `main`
>
> **Why?** CUDA versions are deprecated over time as PyTorch adds support for newer CUDA releases. To avoid accidentally pulling deprecated CUDA versions, CUDA images intentionally exclude the `latest` tag. Always use explicit version tags like:
>
> ```bash
> # ✅ Recommended: Explicit version
> docker pull quay.io/docling-project/docling-serve-cu130:1.12.0
>
> # ❌ Not available for CUDA images
> docker pull quay.io/docling-project/docling-serve-cu130:latest
> ```
#### 🚫 Not Distributed

View File

@@ -65,11 +65,11 @@ easyocr = [
"easyocr>=1.7",
]
rapidocr = [
"rapidocr (>=3.3,<4.0.0) ; python_version < '3.14'",
"rapidocr (>=3.3,<4.0.0)",
"onnxruntime (>=1.7.0,<2.0.0)",
]
flash-attn = [
"flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"
"flash-attn~=2.8.2; sys_platform == 'linux'"
]
[dependency-groups]
@@ -116,6 +116,10 @@ cu128 = [
"torchvision>=0.22.1",
]
cu130 = [
"torch>=2.7.1",
"torchvision>=0.22.1",
]
rocm = [
"torch>=2.7.1",
"torchvision>=0.22.1",
@@ -132,6 +136,7 @@ conflicts = [
# { group = "cu124" },
{ group = "cu126" },
{ group = "cu128" },
{ group = "cu130" },
{ group = "rocm" },
],
]
@@ -148,6 +153,7 @@ torch = [
# { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
{ index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
{ index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
{ index = "pytorch-cu130", group = "cu130", marker = "sys_platform == 'linux'" },
{ index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
]
@@ -157,6 +163,7 @@ torchvision = [
# { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
{ index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
{ index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
{ index = "pytorch-cu130", group = "cu130", marker = "sys_platform == 'linux'" },
{ index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
]
@@ -192,6 +199,11 @@ name = "pytorch-cu128"
url = "https://download.pytorch.org/whl/cu128"
explicit = true
[[tool.uv.index]]
name = "pytorch-cu130"
url = "https://download.pytorch.org/whl/cu130"
explicit = true
[[tool.uv.index]]
name = "pytorch-rocm"
url = "https://download.pytorch.org/whl/rocm6.3"

3269
uv.lock generated

File diff suppressed because one or more lines are too long