mirror of
https://github.com/docling-project/docling-serve.git
synced 2026-03-07 14:23:22 +00:00
feat: distribute linux arm64 images and update cuda versions (#496)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
21
.github/workflows/ci-images-dryrun.yml
vendored
21
.github/workflows/ci-images-dryrun.yml
vendored
@@ -17,22 +17,32 @@ jobs:
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
|
||||
platforms: linux/amd64, linux/arm64
|
||||
tag_latest: true
|
||||
- name: docling-project/docling-serve-cpu
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
|
||||
platforms: linux/amd64, linux/arm64
|
||||
tag_latest: true
|
||||
# - name: docling-project/docling-serve-cu124
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
|
||||
# platforms: linux/amd64
|
||||
- name: docling-project/docling-serve-cu126
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
|
||||
platforms: linux/amd64
|
||||
# tag_latest: false
|
||||
# - name: docling-project/docling-serve-cu126
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
|
||||
# platforms: linux/amd64
|
||||
# tag_latest: false
|
||||
- name: docling-project/docling-serve-cu128
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
|
||||
platforms: linux/amd64
|
||||
platforms: linux/amd64, linux/arm64
|
||||
tag_latest: false
|
||||
- name: docling-project/docling-serve-cu130
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu130
|
||||
platforms: linux/amd64, linux/arm64
|
||||
tag_latest: false
|
||||
# - name: docling-project/docling-serve-rocm
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
|
||||
@@ -51,3 +61,4 @@ jobs:
|
||||
ghcr_image_name: ${{ matrix.spec.name }}
|
||||
quay_image_name: ""
|
||||
platforms: ${{ matrix.spec.platforms }}
|
||||
tag_latest: ${{ matrix.spec.tag_latest }}
|
||||
|
||||
19
.github/workflows/images.yml
vendored
19
.github/workflows/images.yml
vendored
@@ -21,22 +21,32 @@ jobs:
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
|
||||
platforms: linux/amd64, linux/arm64
|
||||
tag_latest: true
|
||||
- name: docling-project/docling-serve-cpu
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
|
||||
platforms: linux/amd64, linux/arm64
|
||||
tag_latest: true
|
||||
# - name: docling-project/docling-serve-cu124
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
|
||||
# platforms: linux/amd64
|
||||
- name: docling-project/docling-serve-cu126
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
|
||||
platforms: linux/amd64
|
||||
# tag_latest: false
|
||||
# - name: docling-project/docling-serve-cu126
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
|
||||
# platforms: linux/amd64
|
||||
# tag_latest: false
|
||||
- name: docling-project/docling-serve-cu128
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
|
||||
platforms: linux/amd64
|
||||
tag_latest: false
|
||||
- name: docling-project/docling-serve-cu130
|
||||
build_args: |
|
||||
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu130
|
||||
platforms: linux/amd64, linux/arm64
|
||||
tag_latest: false
|
||||
# - name: docling-project/docling-serve-rocm
|
||||
# build_args: |
|
||||
# UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
|
||||
@@ -56,3 +66,4 @@ jobs:
|
||||
ghcr_image_name: ${{ matrix.spec.name }}
|
||||
quay_image_name: ${{ matrix.spec.name }}
|
||||
platforms: ${{ matrix.spec.platforms }}
|
||||
tag_latest: ${{ matrix.spec.tag_latest }}
|
||||
|
||||
8
.github/workflows/job-image.yml
vendored
8
.github/workflows/job-image.yml
vendored
@@ -21,6 +21,10 @@ on:
|
||||
type: boolean
|
||||
description: "If true, the images will be published."
|
||||
default: false
|
||||
tag_latest:
|
||||
type: boolean
|
||||
description: "If true, the 'latest' tag will be applied to the image."
|
||||
default: true
|
||||
environment:
|
||||
type: string
|
||||
description: "GH Action environment"
|
||||
@@ -87,6 +91,8 @@ jobs:
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}
|
||||
flavor: |
|
||||
latest=${{ inputs.tag_latest }}
|
||||
|
||||
# # Local test
|
||||
# - name: Set metadata outputs for local testing ## comment out Free up space, Log in to cr, Cache Docker, Extract metadata, and quay blocks and run act
|
||||
@@ -209,6 +215,8 @@ jobs:
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.QUAY_REGISTRY }}/${{ inputs.quay_image_name }}
|
||||
flavor: |
|
||||
latest=${{ inputs.tag_latest }}
|
||||
|
||||
- name: Build and push image to quay.io
|
||||
if: ${{ inputs.publish }}
|
||||
|
||||
14
Makefile
14
Makefile
@@ -63,6 +63,13 @@ docling-serve-cu128-image: Containerfile ## Build docling-serve container image
|
||||
$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
|
||||
$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
|
||||
|
||||
.PHONY: docling-serve-cu130-image
|
||||
docling-serve-cu130-image: Containerfile ## Build docling-serve container image with CUDA 13.0 support
|
||||
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with Cuda 13.0]"
|
||||
$(CMD_PREFIX) $(CONTAINER_RUNTIME) build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu130" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu130:$(TAG) .
|
||||
$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu130:$(TAG) ghcr.io/docling-project/docling-serve-cu130:$(BRANCH_TAG)
|
||||
$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu130:$(TAG) quay.io/docling-project/docling-serve-cu130:$(BRANCH_TAG)
|
||||
|
||||
.PHONY: docling-serve-rocm-image
|
||||
docling-serve-rocm-image: Containerfile ## Build docling-serve container image with ROCm support
|
||||
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with ROCm 6.3]"
|
||||
@@ -132,6 +139,13 @@ run-docling-cu128: ## Run the docling-serve container with GPU support and assig
|
||||
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.8]"
|
||||
$(CMD_PREFIX) $(CONTAINER_RUNTIME) run -it --name docling-serve-cu128 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu128:main
|
||||
|
||||
.PHONY: run-docling-cu130
|
||||
run-docling-cu130: ## Run the docling-serve container with GPU support and assign a container name
|
||||
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
|
||||
$(CMD_PREFIX) $(CONTAINER_RUNTIME) rm -f docling-serve-cu130 2>/dev/null || true
|
||||
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 13.0]"
|
||||
$(CMD_PREFIX) $(CONTAINER_RUNTIME) run -it --name docling-serve-cu130 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu130:main
|
||||
|
||||
.PHONY: run-docling-rocm
|
||||
run-docling-rocm: ## Run the docling-serve container with GPU support and assign a container name
|
||||
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
|
||||
|
||||
20
README.md
20
README.md
@@ -61,8 +61,26 @@ The following container images are available for running **Docling Serve** with
|
||||
|-------|-------------|----------------|------|
|
||||
| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Base image with all packages installed from the official PyPI index. | `linux/amd64`, `linux/arm64` | 4.4 GB (arm64) <br> 8.7 GB (amd64) |
|
||||
| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | CPU-only variant, using `torch` from the PyTorch CPU index. | `linux/amd64`, `linux/arm64` | 4.4 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | CUDA 12.6 build with `torch` from the cu126 index. | `linux/amd64` | 10.0 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | CUDA 12.8 build with `torch` from the cu128 index. | `linux/amd64` | 11.4 GB |
|
||||
| [`ghcr.io/docling-project/docling-serve-cu130`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu130) <br> [`quay.io/docling-project/docling-serve-cu130`](https://quay.io/repository/docling-project/docling-serve-cu130) | CUDA 13.0 build with `torch` from the cu130 index. | `linux/amd64`, `linux/arm64` | TBD |
|
||||
|
||||
> [!IMPORTANT]
|
||||
> **CUDA Image Tagging Policy**
|
||||
>
|
||||
> CUDA-specific images (`-cu128`, `-cu130`) follow PyTorch's CUDA version support lifecycle and are tagged differently from base images:
|
||||
>
|
||||
> - **Base images** (`docling-serve`, `docling-serve-cpu`): Tagged with `latest` and `main` for convenience
|
||||
> - **CUDA images** (`docling-serve-cu*`): **Only tagged with explicit versions** (e.g., `1.12.0`) and `main`
|
||||
>
|
||||
> **Why?** CUDA versions are deprecated over time as PyTorch adds support for newer CUDA releases. To avoid accidentally pulling deprecated CUDA versions, CUDA images intentionally exclude the `latest` tag. Always use explicit version tags like:
|
||||
>
|
||||
> ```bash
|
||||
> # ✅ Recommended: Explicit version
|
||||
> docker pull quay.io/docling-project/docling-serve-cu130:1.12.0
|
||||
>
|
||||
> # ❌ Not available for CUDA images
|
||||
> docker pull quay.io/docling-project/docling-serve-cu130:latest
|
||||
> ```
|
||||
|
||||
#### 🚫 Not Distributed
|
||||
|
||||
|
||||
@@ -65,11 +65,11 @@ easyocr = [
|
||||
"easyocr>=1.7",
|
||||
]
|
||||
rapidocr = [
|
||||
"rapidocr (>=3.3,<4.0.0) ; python_version < '3.14'",
|
||||
"rapidocr (>=3.3,<4.0.0)",
|
||||
"onnxruntime (>=1.7.0,<2.0.0)",
|
||||
]
|
||||
flash-attn = [
|
||||
"flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"
|
||||
"flash-attn~=2.8.2; sys_platform == 'linux'"
|
||||
]
|
||||
|
||||
[dependency-groups]
|
||||
@@ -116,6 +116,10 @@ cu128 = [
|
||||
"torchvision>=0.22.1",
|
||||
]
|
||||
|
||||
cu130 = [
|
||||
"torch>=2.7.1",
|
||||
"torchvision>=0.22.1",
|
||||
]
|
||||
rocm = [
|
||||
"torch>=2.7.1",
|
||||
"torchvision>=0.22.1",
|
||||
@@ -132,6 +136,7 @@ conflicts = [
|
||||
# { group = "cu124" },
|
||||
{ group = "cu126" },
|
||||
{ group = "cu128" },
|
||||
{ group = "cu130" },
|
||||
{ group = "rocm" },
|
||||
],
|
||||
]
|
||||
@@ -148,6 +153,7 @@ torch = [
|
||||
# { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu130", group = "cu130", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
@@ -157,6 +163,7 @@ torchvision = [
|
||||
# { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-cu130", group = "cu130", marker = "sys_platform == 'linux'" },
|
||||
{ index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
|
||||
]
|
||||
|
||||
@@ -192,6 +199,11 @@ name = "pytorch-cu128"
|
||||
url = "https://download.pytorch.org/whl/cu128"
|
||||
explicit = true
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "pytorch-cu130"
|
||||
url = "https://download.pytorch.org/whl/cu130"
|
||||
explicit = true
|
||||
|
||||
[[tool.uv.index]]
|
||||
name = "pytorch-rocm"
|
||||
url = "https://download.pytorch.org/whl/rocm6.3"
|
||||
|
||||
Reference in New Issue
Block a user