feat: package updates and more cuda images (#229)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-06-24 09:59:05 -05:00
committed by GitHub
parent 717fb3a8d8
commit 30aca92298
13 changed files with 2151 additions and 1489 deletions

View File

@@ -15,15 +15,23 @@ jobs:
spec:
- name: docling-project/docling-serve
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu
UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cpu
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra flash-attn
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cu124
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cpu
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
platforms: linux/amd64
- name: docling-project/docling-serve-cu126
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
platforms: linux/amd64
- name: docling-project/docling-serve-cu128
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
platforms: linux/amd64
permissions:

View File

@@ -19,15 +19,23 @@ jobs:
spec:
- name: docling-project/docling-serve
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu
UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cpu
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra flash-attn
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
platforms: linux/amd64, linux/arm64
- name: docling-project/docling-serve-cu124
build_args: |
UV_SYNC_EXTRA_ARGS=--no-extra cpu
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
platforms: linux/amd64
- name: docling-project/docling-serve-cu126
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
platforms: linux/amd64
- name: docling-project/docling-serve-cu128
build_args: |
UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
platforms: linux/amd64
permissions:

View File

@@ -17,7 +17,7 @@ jobs:
python-version: ${{ matrix.python-version }}
enable-cache: true
- name: Install dependencies
run: uv sync --all-extras --no-extra cu124 --no-extra flash-attn
run: uv sync --all-extras --no-extra flash-attn
- name: Build package
run: uv build
- name: Check content of wheel

View File

@@ -25,7 +25,7 @@ jobs:
key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
- name: Install dependencies
run: uv sync --frozen --all-extras --no-extra cu124 --no-extra flash-attn
run: uv sync --frozen --all-extras --no-extra flash-attn
- name: Run styling check
run: pre-commit run --all-files

View File

@@ -23,6 +23,6 @@ repos:
files: '\.py$'
- repo: https://github.com/astral-sh/uv-pre-commit
# uv version.
rev: 0.6.1
rev: 0.7.13
hooks:
- id: uv-lock

View File

@@ -42,7 +42,7 @@ ENV \
ARG UV_SYNC_EXTRA_ARGS=""
RUN --mount=from=ghcr.io/astral-sh/uv:0.6.1,source=/uv,target=/bin/uv \
RUN --mount=from=ghcr.io/astral-sh/uv:0.7.13,source=/uv,target=/bin/uv \
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
@@ -61,7 +61,7 @@ RUN echo "Downloading models..." && \
chmod -R g=u ${DOCLING_SERVE_ARTIFACTS_PATH}
COPY --chown=1001:0 ./docling_serve ./docling_serve
RUN --mount=from=ghcr.io/astral-sh/uv:0.6.1,source=/uv,target=/bin/uv \
RUN --mount=from=ghcr.io/astral-sh/uv:0.7.13,source=/uv,target=/bin/uv \
--mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \

View File

@@ -26,26 +26,40 @@ md-lint-file:
$(CMD_PREFIX) touch .markdown-lint
.PHONY: docling-serve-image
docling-serve-image: Containerfile
docling-serve-image: Containerfile ## Build docling-serve container image
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve]"
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu" -f Containerfile -t ghcr.io/docling-project/docling-serve:$(TAG) .
$(CMD_PREFIX) docker build --load -f Containerfile -t ghcr.io/docling-project/docling-serve:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) ghcr.io/docling-project/docling-serve:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) quay.io/docling-project/docling-serve:$(BRANCH_TAG)
.PHONY: docling-serve-cpu-image
docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve CPU]"
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra flash-attn" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) ghcr.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) quay.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
.PHONY: docling-serve-cu124-image
docling-serve-cu124-image: Containerfile ## Build docling-serve container image with GPU support
docling-serve-cu124-image: Containerfile ## Build docling-serve container image with CUDA 12.4 support
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with Cuda 12.4]"
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cpu" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu124:$(TAG) .
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu124:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) ghcr.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) quay.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
.PHONY: docling-serve-cu126-image
docling-serve-cu126-image: Containerfile ## Build docling-serve container image with CUDA 12.6 support
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with Cuda 12.6]"
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu126:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) ghcr.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) quay.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
.PHONY: docling-serve-cu128-image
docling-serve-cu128-image: Containerfile ## Build docling-serve container image with CUDA 12.8 support
$(ECHO_PREFIX) printf " %-12s Containerfile\n" "[docling-serve with Cuda 12.8]"
$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu128:$(TAG) .
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
.PHONY: action-lint
action-lint: .action-lint ## Lint GitHub Action workflows
.action-lint: $(shell find .github -type f) | action-lint-file
@@ -87,9 +101,9 @@ run-docling-cpu: ## Run the docling-serve container with CPU support and assign
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with CPU support on port 5001...\n" "[RUN CPU]"
$(CMD_PREFIX) docker run -it --name docling-serve-cpu -p 5001:5001 ghcr.io/docling-project/docling-serve-cpu:main
.PHONY: run-docling-gpu
run-docling-gpu: ## Run the docling-serve container with GPU support and assign a container name
.PHONY: run-docling-cu124
run-docling-cu124: ## Run the docling-serve container with GPU support and assign a container name
$(ECHO_PREFIX) printf " %-12s Removing existing container if it exists...\n" "[CLEANUP]"
$(CMD_PREFIX) docker rm -f docling-serve-gpu 2>/dev/null || true
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN GPU]"
$(CMD_PREFIX) docker run -it --name docling-serve-gpu -p 5001:5001 ghcr.io/docling-project/docling-serve:main
$(CMD_PREFIX) docker rm -f docling-serve-cu124 2>/dev/null || true
$(ECHO_PREFIX) printf " %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.4]"
$(CMD_PREFIX) docker run -it --name docling-serve-cu124 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu124:main

View File

@@ -45,9 +45,11 @@ Available container images:
| Name | Description | Arch | Size |
| -----|-------------|------|------|
| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br /> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Simple image for Docling Serve, installing all packages from the official pypi.org index. | `linux/amd64`, `linux/arm64` | 3.6 GB |
| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br /> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Simple image for Docling Serve, installing all packages from the official pypi.org index. | `linux/amd64`, `linux/arm64` | 3.6 GB (arm64) <br /> 8.7 GB (amd64) |
| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br /> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | Cpu-only image which installs `torch` from the pytorch cpu index. | `linux/amd64`, `linux/arm64` | 3.6 GB |
| [`ghcr.io/docling-project/docling-serve-cu124`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu124) <br /> [`quay.io/docling-project/docling-serve-cu124`](https://quay.io/repository/docling-project/docling-serve-cu124) | Cuda 12.4 image which installs `torch` from the pytorch cu124 index. | `linux/amd64` | 8.7 GB |
| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br /> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | Cuda 12.6 image which installs `torch` from the pytorch cu126 index. | `linux/amd64` | 8.7 GB |
| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br /> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | Cuda 12.8 image which installs `torch` from the pytorch cu128 index. | `linux/amd64` | 8.7 GB |
Coming soon: `docling-serve-slim` images will reduce the size by skipping the model weights download.

View File

@@ -8,8 +8,8 @@ from docling.datamodel.base_models import InputFormat, OutputFormat
from docling.datamodel.pipeline_options import (
EasyOcrOptions,
PdfBackend,
PdfPipeline,
PictureDescriptionBaseOptions,
ProcessingPipeline,
TableFormerMode,
TableStructureOptions,
)
@@ -227,9 +227,9 @@ class ConvertDocumentsOptions(BaseModel):
] = TableStructureOptions().mode
pipeline: Annotated[
PdfPipeline,
ProcessingPipeline,
Field(description="Choose the pipeline to process PDF or image files."),
] = PdfPipeline.STANDARD
] = ProcessingPipeline.STANDARD
page_range: Annotated[
PageRange,

View File

@@ -19,10 +19,10 @@ from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import (
OcrOptions,
PdfBackend,
PdfPipeline,
PdfPipelineOptions,
PictureDescriptionApiOptions,
PictureDescriptionVlmOptions,
ProcessingPipeline,
TableFormerMode,
VlmPipelineOptions,
smoldocling_vlm_conversion_options,
@@ -217,7 +217,7 @@ def get_pdf_pipeline_opts(
)
pipeline_options: Union[PdfPipelineOptions, VlmPipelineOptions]
if request.pipeline == PdfPipeline.STANDARD:
if request.pipeline == ProcessingPipeline.STANDARD:
pipeline_options = _parse_standard_pdf_opts(request, artifacts_path)
backend = _parse_backend(request)
pdf_format_option = PdfFormatOption(
@@ -225,7 +225,7 @@ def get_pdf_pipeline_opts(
backend=backend,
)
elif request.pipeline == PdfPipeline.VLM:
elif request.pipeline == ProcessingPipeline.VLM:
pipeline_options = _parse_vlm_pdf_opts(request, artifacts_path)
pdf_format_option = PdfFormatOption(
pipeline_cls=VlmPipeline, pipeline_options=pipeline_options

View File

@@ -16,7 +16,7 @@ import httpx
from docling.datamodel.base_models import FormatToExtensions
from docling.datamodel.pipeline_options import (
PdfBackend,
PdfPipeline,
ProcessingPipeline,
TableFormerMode,
TableStructureOptions,
)
@@ -587,9 +587,9 @@ with gr.Blocks(
with gr.Row():
with gr.Column(scale=1, min_width=200):
pipeline = gr.Radio(
[(v.value.capitalize(), v.value) for v in PdfPipeline],
[(v.value.capitalize(), v.value) for v in ProcessingPipeline],
label="Pipeline type",
value=PdfPipeline.STANDARD.value,
value=ProcessingPipeline.STANDARD.value,
)
with gr.Row():
with gr.Column(scale=1, min_width=200):

View File

@@ -30,7 +30,7 @@ classifiers = [
]
requires-python = ">=3.10"
dependencies = [
"docling[vlm]~=2.28",
"docling[vlm]~=2.38",
"docling-core>=2.32.0",
"mlx-vlm~=0.1.12; sys_platform == 'darwin' and platform_machine == 'arm64'",
"fastapi[standard]~=0.115",
@@ -57,14 +57,6 @@ rapidocr = [
"rapidocr-onnxruntime~=1.4; python_version<'3.13'",
"onnxruntime~=1.7",
]
cpu = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
cu124 = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
flash-attn = [
"flash-attn~=2.7.0; sys_platform == 'linux' and platform_machine == 'x86_64'"
]
@@ -80,18 +72,39 @@ dev = [
"python-semantic-release~=7.32",
"ruff>=0.9.6",
]
pypi = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
cpu = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
cu124 = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
cu126 = [
"torch>=2.6.0",
"torchvision>=0.21.0",
]
cu128 = [
"torch>=2.7.0",
"torchvision>=0.22.0",
]
[tool.uv]
package = true
default-groups = ["dev", "pypi"]
conflicts = [
[
{ extra = "cpu" },
{ extra = "cu124" },
{ group = "pypi" },
{ group = "cpu" },
{ group = "cu124" },
{ group = "cu126" },
{ group = "cu128" },
],
[
{ extra = "cpu" },
{ extra = "flash-attn" },
],]
]
environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
override-dependencies = [
"urllib3~=2.0"
@@ -99,14 +112,25 @@ override-dependencies = [
[tool.uv.sources]
torch = [
{ index = "pytorch-cpu", extra = "cpu" },
{ index = "pytorch-cu124", extra = "cu124" },
{ index = "pytorch-pypi", group = "pypi" },
{ index = "pytorch-cpu", group = "cpu" },
{ index = "pytorch-cu124", group = "cu124" },
{ index = "pytorch-cu126", group = "cu126" },
{ index = "pytorch-cu128", group = "cu128" },
]
torchvision = [
{ index = "pytorch-cpu", extra = "cpu" },
{ index = "pytorch-cu124", extra = "cu124" },
{ index = "pytorch-pypi", group = "pypi" },
{ index = "pytorch-cpu", group = "cpu" },
{ index = "pytorch-cu124", group = "cu124" },
{ index = "pytorch-cu126", group = "cu126" },
{ index = "pytorch-cu128", group = "cu128" },
]
[[tool.uv.index]]
name = "pytorch-pypi"
url = "https://pypi.org/simple"
explicit = true
[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
@@ -117,6 +141,16 @@ name = "pytorch-cu124"
url = "https://download.pytorch.org/whl/cu124"
explicit = true
[[tool.uv.index]]
name = "pytorch-cu126"
url = "https://download.pytorch.org/whl/cu126"
explicit = true
[[tool.uv.index]]
name = "pytorch-cu128"
url = "https://download.pytorch.org/whl/cu128"
explicit = true
[tool.setuptools.packages.find]
include = ["docling_serve*"]
namespaces = true

3474
uv.lock generated

File diff suppressed because one or more lines are too long