chore: bump version to 1.2.1 [skip ci]

fix: handling of vlm model options and update deps (#314 )
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-29 08:33:50 +00:00 · 2025-08-13 07:37:55 +00:00 · 2025-08-13 09:32:21 +02:00 · 2025-08-08 12:32:19 +02:00 · 2025-08-08 09:02:29 +02:00 · 2025-08-08 08:47:59 +02:00
33 changed files with 3454 additions and 2346 deletions
--- a/.github/styles/config/vocabularies/Docling/accept.txt
+++ b/.github/styles/config/vocabularies/Docling/accept.txt
@@ -0,0 +1,36 @@
+[Dd]ocling
+precommit
+asgi
+async
+(?i)urls
+uvicorn
+[Ww]ebserver
+keyfile
+[Ww]ebsocket(s?)
+[Kk]ubernetes
+UI
+(?i)vllm
+APIs
+[Ss]ubprocesses
+(?i)api
+Kubeflow
+(?i)Jobkit
+(?i)cpu
+(?i)PyTorch
+(?i)CUDA
+(?i)NVIDIA
+(?i)ROCm
+(?i)env
+Gradio
+bool
+Ollama
+inbody
+LGTMs
+Dolfi
+Lysak
+Nikos
+Nassar
+Panos
+Vagenas
+Staar
+Livathinos
--- a/.github/vale.ini
+++ b/.github/vale.ini
@@ -0,0 +1,11 @@
+StylesPath = styles
+MinAlertLevel = suggestion
+; Packages = write-good, proselint
+
+Vocab = Docling
+
+[*.md]
+BasedOnStyles = Vale
+
+[CHANGELOG.md]
+BasedOnStyles = 
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@@ -15,7 +15,7 @@ jobs:
        with:
          fetch-depth: 0  # for fetching tags, required for semantic-release
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
      - name: Install dependencies
@@ -45,7 +45,7 @@ jobs:
          token: ${{ steps.app-token.outputs.token }}
          fetch-depth: 0  # for fetching tags, required for semantic-release
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
      - name: Install dependencies
--- a/.github/workflows/ci-images-dryrun.yml
+++ b/.github/workflows/ci-images-dryrun.yml
@@ -21,10 +21,10 @@ jobs:
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
            platforms: linux/amd64, linux/arm64
-          - name: docling-project/docling-serve-cu124
-            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
-            platforms: linux/amd64
+          # - name: docling-project/docling-serve-cu124
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
+          #   platforms: linux/amd64
          - name: docling-project/docling-serve-cu126
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
@@ -33,6 +33,10 @@ jobs:
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
            platforms: linux/amd64
+          # - name: docling-project/docling-serve-rocm
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
+          #   platforms: linux/amd64

    permissions:
      packages: write
--- a/.github/workflows/images.yml
+++ b/.github/workflows/images.yml
@@ -25,10 +25,10 @@ jobs:
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
            platforms: linux/amd64, linux/arm64
-          - name: docling-project/docling-serve-cu124
-            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
-            platforms: linux/amd64
+          # - name: docling-project/docling-serve-cu124
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
+          #   platforms: linux/amd64
          - name: docling-project/docling-serve-cu126
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
@@ -37,7 +37,10 @@ jobs:
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
            platforms: linux/amd64
-
+          # - name: docling-project/docling-serve-rocm
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
+          #   platforms: linux/amd64
    permissions:
      packages: write
      contents: read
--- a/.github/workflows/job-build.yml
+++ b/.github/workflows/job-build.yml
@@ -12,7 +12,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
--- a/.github/workflows/job-checks.yml
+++ b/.github/workflows/job-checks.yml
@@ -12,7 +12,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
@@ -28,7 +28,7 @@ jobs:
        run: uv sync --frozen --all-extras --no-extra flash-attn

      - name: Run styling check
-        run: pre-commit run --all-files
+        run: uv run pre-commit run --all-files

  build-package:
    uses: ./.github/workflows/job-build.yml
@@ -47,14 +47,16 @@ jobs:
          name: python-package-distributions
          path: dist/
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
+      - name: Create virtual environment
+        run: uv venv
      - name: Install package
        run: uv pip install dist/*.whl
      - name: Create the server
-        run: python -c 'from docling_serve.app import create_app; create_app()'
+        run: .venv/bin/python -c 'from docling_serve.app import create_app; create_app()'

  markdown-lint:
    runs-on: ubuntu-latest
@@ -64,4 +66,3 @@ jobs:
        uses: DavidAnson/markdownlint-cli2-action@v16
        with:
          globs: "**/*.md"
-
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,8 +21,19 @@ repos:
        pass_filenames: false
        language: system
        files: '\.py$'
+  - repo: https://github.com/errata-ai/vale
+    rev: v3.12.0  # Use latest stable version
+    hooks:
+      - id: vale
+        name: vale sync
+        pass_filenames: false
+        args: [sync, "--config=.github/vale.ini"]
+      - id: vale
+        name: Spell and Style Check with Vale
+        args: ["--config=.github/vale.ini"]
+        files: \.md$
  - repo: https://github.com/astral-sh/uv-pre-commit
-    # uv version.
-    rev: 0.7.13
+    # uv version, https://github.com/astral-sh/uv-pre-commit/releases
+    rev: 0.8.3
    hooks:
      - id: uv-lock
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,45 @@
+## [v1.2.1](https://github.com/docling-project/docling-serve/releases/tag/v1.2.1) - 2025-08-13
+
+### Fix
+
+* Handling of vlm model options and update deps ([#314](https://github.com/docling-project/docling-serve/issues/314)) ([`8b470cb`](https://github.com/docling-project/docling-serve/commit/8b470cba8ef500c271eb84c8368c8a1a1a5a6d6a))
+* Add missing response type in sync endpoints ([#309](https://github.com/docling-project/docling-serve/issues/309)) ([`8048f45`](https://github.com/docling-project/docling-serve/commit/8048f4589a91de2b2b391ab33a326efd1b29f25b))
+
+### Documentation
+
+* Update readme to use v1 ([#306](https://github.com/docling-project/docling-serve/issues/306)) ([`b3058e9`](https://github.com/docling-project/docling-serve/commit/b3058e91e0c56e27110eb50f22cbdd89640bf398))
+* Update deployment examples to use v1 API ([#308](https://github.com/docling-project/docling-serve/issues/308)) ([`63da9ee`](https://github.com/docling-project/docling-serve/commit/63da9eedebae3ad31d04e65635e573194e413793))
+* Fix typo in v1 migration instructions ([#307](https://github.com/docling-project/docling-serve/issues/307)) ([`b15dc25`](https://github.com/docling-project/docling-serve/commit/b15dc2529f78d68a475e5221c37408c3f77d8588))
+
+## [v1.2.0](https://github.com/docling-project/docling-serve/releases/tag/v1.2.0) - 2025-08-07
+
+### Feature
+
+* Workers without shared models and convert params ([#304](https://github.com/docling-project/docling-serve/issues/304)) ([`db3fdb5`](https://github.com/docling-project/docling-serve/commit/db3fdb5bc1a0ae250afd420d737abc4071a7546c))
+* Add rocm image build support and fix cuda ([#292](https://github.com/docling-project/docling-serve/issues/292)) ([`fd1b987`](https://github.com/docling-project/docling-serve/commit/fd1b987e8dc174f1a6013c003dde33e9acbae39a))
+
+## [v1.1.0](https://github.com/docling-project/docling-serve/releases/tag/v1.1.0) - 2025-07-30
+
+### Feature
+
+* Add docling-mcp in the distribution ([#290](https://github.com/docling-project/docling-serve/issues/290)) ([`ecb1874`](https://github.com/docling-project/docling-serve/commit/ecb1874a507bef83d102e0e031e49fed34298637))
+* Add 3.0 openapi endpoint ([#287](https://github.com/docling-project/docling-serve/issues/287)) ([`ec594d8`](https://github.com/docling-project/docling-serve/commit/ec594d84fe36df23e7d010a2fcf769856c43600b))
+* Add new source and target ([#270](https://github.com/docling-project/docling-serve/issues/270)) ([`3771c1b`](https://github.com/docling-project/docling-serve/commit/3771c1b55403bd51966d07d8f760d5c4fbcc1760))
+
+### Fix
+
+* Referenced paths relative to zip root ([#289](https://github.com/docling-project/docling-serve/issues/289)) ([`1333f71`](https://github.com/docling-project/docling-serve/commit/1333f71c9c6495342b2169d574e921f828446f15))
+
+## [v1.0.1](https://github.com/docling-project/docling-serve/releases/tag/v1.0.1) - 2025-07-21
+
+### Fix
+
+* Docling update v2.42.0 ([#277](https://github.com/docling-project/docling-serve/issues/277)) ([`8706706`](https://github.com/docling-project/docling-serve/commit/8706706e8797b0a06ec4baa7cf87988311be68b6))
+
+### Documentation
+
+* Typo in README ([#276](https://github.com/docling-project/docling-serve/issues/276)) ([`766adb2`](https://github.com/docling-project/docling-serve/commit/766adb248113c7bd5144d14b3c82929a2ad29f8e))
+
 ## [v1.0.0](https://github.com/docling-project/docling-serve/releases/tag/v1.0.0) - 2025-07-14

 ### Feature
--- a/20
+++ b/20
@@ -1,13 +1,17 @@
 ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s

-FROM ${BASE_IMAGE}
+ARG UV_VERSION=0.8.3

-USER 0
+ARG UV_SYNC_EXTRA_ARGS=""
+
+FROM ${BASE_IMAGE} AS docling-base

 ###################################################################################################
 # OS Layer                                                                                        #
 ###################################################################################################

+USER 0
+
 RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
    dnf -y install --best --nodocs --setopt=install_weak_deps=False dnf-plugins-core && \
    dnf config-manager --best --nodocs --setopt=install_weak_deps=False --save && \
@@ -21,16 +25,19 @@ RUN /usr/bin/fix-permissions /opt/app-root/src/.cache

 ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/

+FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv_stage
+
 ###################################################################################################
 # Docling layer                                                                                   #
 ###################################################################################################

+FROM docling-base
+
 USER 1001

 WORKDIR /opt/app-root/src

 ENV \
-    # On container environments, always set a thread budget to avoid undesired thread congestion.
    OMP_NUM_THREADS=4 \
    LANG=en_US.UTF-8 \
    LC_ALL=en_US.UTF-8 \
@@ -40,9 +47,9 @@ ENV \
    UV_PROJECT_ENVIRONMENT=/opt/app-root \
    DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models

-ARG UV_SYNC_EXTRA_ARGS=""
+ARG UV_SYNC_EXTRA_ARGS

-RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \
+RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
    --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
    --mount=type=bind,source=uv.lock,target=uv.lock \
    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
@@ -61,7 +68,8 @@ RUN echo "Downloading models..." && \
    chmod -R g=u ${DOCLING_SERVE_ARTIFACTS_PATH}

 COPY --chown=1001:0 ./docling_serve ./docling_serve
-RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \
+
+RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
    --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
    --mount=type=bind,source=uv.lock,target=uv.lock \
    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--- a/MAINTAINERS.md
+++ b/MAINTAINERS.md
@@ -1,11 +1,11 @@
 # MAINTAINERS

- Christoph Auer - [@cau-git](https://github.com/cau-git)
- Michele Dolfi - [@dolfim-ibm](https://github.com/dolfim-ibm)
- Maxim Lysak - [@maxmnemonic](https://github.com/maxmnemonic)
- Nikos Livathinos - [@nikos-livathinos](https://github.com/nikos-livathinos)
- Ahmed Nassar - [@nassarofficial](https://github.com/nassarofficial)
- Panos Vagenas - [@vagenas](https://github.com/vagenas)
- Peter Staar - [@PeterStaar-IBM](https://github.com/PeterStaar-IBM)
+- Christoph Auer - [`@cau-git`](https://github.com/cau-git)
+- Michele Dolfi - [`@dolfim-ibm`](https://github.com/dolfim-ibm)
+- Maxim Lysak - [`@maxmnemonic`](https://github.com/maxmnemonic)
+- Nikos Livathinos - [`@nikos-livathinos`](https://github.com/nikos-livathinos)
+- Ahmed Nassar - [`@nassarofficial`](https://github.com/nassarofficial)
+- Panos Vagenas - [`@vagenas`](https://github.com/vagenas)
+- Peter Staar - [`@PeterStaar-IBM`](https://github.com/PeterStaar-IBM)

 Maintainers can be contacted at [deepsearch-core@zurich.ibm.com](mailto:deepsearch-core@zurich.ibm.com).
--- a/28
+++ b/28
@@ -60,6 +60,13 @@ docling-serve-cu128-image: Containerfile ## Build docling-serve container image
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)

+.PHONY: docling-serve-rocm-image
+docling-serve-rocm-image: Containerfile ## Build docling-serve container image with ROCm support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with ROCm 6.3]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-rocm:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) ghcr.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) quay.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
+
 .PHONY: action-lint
 action-lint: .action-lint ##      Lint GitHub Action workflows
 .action-lint: $(shell find .github -type f) | action-lint-file
@@ -107,3 +114,24 @@ run-docling-cu124: ## Run the docling-serve container with GPU support and assig
 	$(CMD_PREFIX) docker rm -f docling-serve-cu124 2>/dev/null || true
 	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.4]"
 	$(CMD_PREFIX) docker run -it --name docling-serve-cu124 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu124:main
+
+.PHONY: run-docling-cu126
+run-docling-cu126: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) docker rm -f docling-serve-cu126 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.6]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-cu126 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu126:main
+
+.PHONY: run-docling-cu128
+run-docling-cu128: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) docker rm -f docling-serve-cu128 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.8]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-cu128 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu128:main
+
+.PHONY: run-docling-rocm
+run-docling-rocm: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) docker rm -f docling-serve-rocm 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN ROCm 6.3]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-rocm -p 5001:5001 ghcr.io/docling-project/docling-serve-rocm:main
--- a/README.md
+++ b/README.md
@@ -12,11 +12,11 @@ Running [Docling](https://github.com/docling-project/docling) as an API service.

 - Learning how to [configure the webserver](./docs/configuration.md)
 - Get to know all [runtime options](./docs/usage.md) of the API
- Explore usefule [deployment examples](./docs/deployment.md)
+- Explore useful [deployment examples](./docs/deployment.md)
 - And more

- > [!NOTE] Migration to the `v1` API
-> Docling Serve now has a stable v1 API. Read more on the [migration to v1](./docs/v1_migration.md).
+> [!NOTE]
+> **Migration to the `v1` API.** Docling Serve now has a stable v1 API. Read more on the [migration to v1](./docs/v1_migration.md).

 ## Getting started

@@ -36,7 +36,8 @@ The server is available at
 - API <http://127.0.0.1:5001>
 - API documentation <http://127.0.0.1:5001/docs>
 - UI playground <http://127.0.0.1:5001/ui>
-  ![swagger.png](img/swagger.png)
+
+![API documentation](img/fastapi-ui.png)

 Try it out with a simple conversion:

@@ -46,21 +47,36 @@ curl -X 'POST' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/json' \
  -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```

-### Container images
+### Container Images

-Available container images:
+The following container images are available for running **Docling Serve** with different hardware and PyTorch configurations:

-| Name | Description | Arch | Size |
-| -----|-------------|------|------|
-| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br /> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Simple image for Docling Serve, installing all packages from the official pypi.org index. | `linux/amd64`, `linux/arm64` | 3.6 GB (arm64) <br /> 8.7 GB (amd64) |
-| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br /> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | Cpu-only image which installs `torch` from the pytorch cpu index. | `linux/amd64`, `linux/arm64` | 3.6 GB |
-| [`ghcr.io/docling-project/docling-serve-cu124`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu124) <br /> [`quay.io/docling-project/docling-serve-cu124`](https://quay.io/repository/docling-project/docling-serve-cu124) | Cuda 12.4 image which installs `torch` from the pytorch cu124 index. | `linux/amd64` | 8.7 GB |
-| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br /> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | Cuda 12.6 image which installs `torch` from the pytorch cu126 index. | `linux/amd64` | 8.7 GB |
-| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br /> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | Cuda 12.8 image which installs `torch` from the pytorch cu128 index. | `linux/amd64` | 8.7 GB |
+#### 📦 Distributed Images
+
+| Image | Description | Architectures | Size |
+|-------|-------------|----------------|------|
+| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Base image with all packages installed from the official PyPI index. | `linux/amd64`, `linux/arm64` | 4.4 GB (arm64) <br> 8.7 GB (amd64) |
+| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | CPU-only variant, using `torch` from the PyTorch CPU index. | `linux/amd64`, `linux/arm64` | 4.4 GB |
+| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | CUDA 12.6 build with `torch` from the cu126 index. | `linux/amd64` | 10.0 GB |
+| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | CUDA 12.8 build with `torch` from the cu128 index. | `linux/amd64` | 11.4 GB |
+
+#### 🚫 Not Distributed
+
+An image for AMD ROCm 6.3 (`docling-serve-rocm`) is supported but **not published** due to its large size.
+
+To build it locally:
+
+```bash
+git clone --branch main git@github.com:docling-project/docling-serve.git
+cd docling-serve/
+make docling-serve-rocm-image
+```
+
+For deployment using Docker Compose, see [docs/deployment.md](docs/deployment.md).

 Coming soon: `docling-serve-slim` images will reduce the size by skipping the model weights download.

@@ -68,9 +84,9 @@ Coming soon: `docling-serve-slim` images will reduce the size by skipping the mo

 An easy to use UI is available at the `/ui` endpoint.

-![ui-input.png](img/ui-input.png)
+![Input controllers in the UI](img/ui-input.png)

-![ui-output.png](img/ui-output.png)
+![Output visualization in the UI](img/ui-output.png)

 ## Get help and support

--- a/docling_serve/app.py
+++ b/docling_serve/app.py
@@ -1,4 +1,5 @@
 import asyncio
+import copy
 import importlib.metadata
 import logging
 import shutil
@@ -24,7 +25,7 @@ from fastapi.openapi.docs import (
    get_swagger_ui_html,
    get_swagger_ui_oauth2_redirect_html,
 )
-from fastapi.responses import RedirectResponse
+from fastapi.responses import JSONResponse, RedirectResponse
 from fastapi.staticfiles import StaticFiles
 from scalar_fastapi import get_scalar_api_reference

@@ -34,8 +35,13 @@ from docling_jobkit.datamodel.callback import (
    ProgressCallbackResponse,
 )
 from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
+from docling_jobkit.datamodel.s3_coords import S3Coordinates
 from docling_jobkit.datamodel.task import Task, TaskSource
-from docling_jobkit.datamodel.task_targets import InBodyTarget, TaskTarget, ZipTarget
+from docling_jobkit.datamodel.task_targets import (
+    InBodyTarget,
+    TaskTarget,
+    ZipTarget,
+)
 from docling_jobkit.orchestrators.base_orchestrator import (
    BaseOrchestrator,
    ProgressInvalid,
@@ -47,6 +53,7 @@ from docling_serve.datamodel.requests import (
    ConvertDocumentsRequest,
    FileSourceRequest,
    HttpSourceRequest,
+    S3SourceRequest,
    TargetName,
 )
 from docling_serve.datamodel.responses import (
@@ -54,6 +61,7 @@ from docling_serve.datamodel.responses import (
    ConvertDocumentResponse,
    HealthCheckResponse,
    MessageKind,
+    PresignedUrlConvertDocumentResponse,
    TaskStatusResponse,
    WebsocketMessage,
 )
@@ -62,7 +70,7 @@ from docling_serve.orchestrator_factory import get_async_orchestrator
 from docling_serve.response_preparation import prepare_response
 from docling_serve.settings import docling_serve_settings
 from docling_serve.storage import get_scratch
-from docling_serve.websocker_notifier import WebsocketNotifier
+from docling_serve.websocket_notifier import WebsocketNotifier


 # Set up custom logging as we'll be intermixes with FastAPI/Uvicorn's logging
@@ -246,6 +254,8 @@ def create_app():  # noqa: C901
                sources.append(FileSource.model_validate(s))
            elif isinstance(s, HttpSourceRequest):
                sources.append(HttpSource.model_validate(s))
+            elif isinstance(s, S3SourceRequest):
+                sources.append(S3Coordinates.model_validate(s))

        task = await orchestrator.enqueue(
            sources=sources,
@@ -286,10 +296,79 @@ def create_app():  # noqa: C901
            if elapsed_time > docling_serve_settings.max_sync_wait:
                return False

+    ##########################################
+    # Downgrade openapi 3.1 to 3.0.x helpers #
+    ##########################################
+
+    def ensure_array_items(schema):
+        """Ensure that array items are defined."""
+        if "type" in schema and schema["type"] == "array":
+            if "items" not in schema or schema["items"] is None:
+                schema["items"] = {"type": "string"}
+            elif isinstance(schema["items"], dict):
+                if "type" not in schema["items"]:
+                    schema["items"]["type"] = "string"
+
+    def handle_discriminators(schema):
+        """Ensure that discriminator properties are included in required."""
+        if "discriminator" in schema and "propertyName" in schema["discriminator"]:
+            prop = schema["discriminator"]["propertyName"]
+            if "properties" in schema and prop in schema["properties"]:
+                if "required" not in schema:
+                    schema["required"] = []
+                if prop not in schema["required"]:
+                    schema["required"].append(prop)
+
+    def handle_properties(schema):
+        """Ensure that property 'kind' is included in required."""
+        if "properties" in schema and "kind" in schema["properties"]:
+            if "required" not in schema:
+                schema["required"] = []
+            if "kind" not in schema["required"]:
+                schema["required"].append("kind")
+
+    # Downgrade openapi 3.1 to 3.0.x
+    def downgrade_openapi31_to_30(spec):
+        def strip_unsupported(obj):
+            if isinstance(obj, dict):
+                obj = {
+                    k: strip_unsupported(v)
+                    for k, v in obj.items()
+                    if k not in ("const", "examples", "prefixItems")
+                }
+
+                handle_discriminators(obj)
+                ensure_array_items(obj)
+
+                # Check for oneOf and anyOf to handle nested schemas
+                for key in ["oneOf", "anyOf"]:
+                    if key in obj:
+                        for sub in obj[key]:
+                            handle_discriminators(sub)
+                            ensure_array_items(sub)
+
+                return obj
+            elif isinstance(obj, list):
+                return [strip_unsupported(i) for i in obj]
+            return obj
+
+        if "components" in spec and "schemas" in spec["components"]:
+            for schema_name, schema in spec["components"]["schemas"].items():
+                handle_properties(schema)
+
+        return strip_unsupported(copy.deepcopy(spec))
+
    #############################
    # API Endpoints definitions #
    #############################

+    @app.get("/openapi-3.0.json")
+    def openapi_30():
+        spec = app.openapi()
+        downgraded = downgrade_openapi31_to_30(spec)
+        downgraded["openapi"] = "3.0.3"
+        return JSONResponse(downgraded)
+
    # Favicon
    @app.get("/favicon.ico", include_in_schema=False)
    async def favicon():
@@ -311,7 +390,7 @@ def create_app():  # noqa: C901
    # Convert a document from URL(s)
    @app.post(
        "/v1/convert/source",
-        response_model=ConvertDocumentResponse,
+        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
@@ -347,7 +426,7 @@ def create_app():  # noqa: C901
    # Convert a document from file(s)
    @app.post(
        "/v1/convert/file",
-        response_model=ConvertDocumentResponse,
+        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
@@ -443,7 +522,8 @@ def create_app():  # noqa: C901
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        task_id: str,
        wait: Annotated[
-            float, Query(help="Number of seconds to wait for a completed status.")
+            float,
+            Query(description="Number of seconds to wait for a completed status."),
        ] = 0.0,
    ):
        try:
@@ -525,7 +605,7 @@ def create_app():  # noqa: C901
    # Task result
    @app.get(
        "/v1/result/{task_id}",
-        response_model=ConvertDocumentResponse,
+        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
--- a/docling_serve/datamodel/requests.py
+++ b/docling_serve/datamodel/requests.py
@@ -1,12 +1,21 @@
 import enum
 from typing import Annotated, Literal

-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
+from pydantic_core import PydanticCustomError
+from typing_extensions import Self

 from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
-from docling_jobkit.datamodel.task_targets import InBodyTarget, TaskTarget, ZipTarget
+from docling_jobkit.datamodel.s3_coords import S3Coordinates
+from docling_jobkit.datamodel.task_targets import (
+    InBodyTarget,
+    S3Target,
+    TaskTarget,
+    ZipTarget,
+)

 from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
+from docling_serve.settings import AsyncEngine, docling_serve_settings

 ## Sources

@@ -19,6 +28,10 @@ class HttpSourceRequest(HttpSource):
    kind: Literal["http"] = "http"


+class S3SourceRequest(S3Coordinates):
+    kind: Literal["s3"] = "s3"
+
+
 ## Multipart targets
 class TargetName(str, enum.Enum):
    INBODY = InBodyTarget().kind
@@ -27,7 +40,7 @@ class TargetName(str, enum.Enum):

 ## Aliases
 SourceRequestItem = Annotated[
-    FileSourceRequest | HttpSourceRequest, Field(discriminator="kind")
+    FileSourceRequest | HttpSourceRequest | S3SourceRequest, Field(discriminator="kind")
 ]


@@ -36,3 +49,24 @@ class ConvertDocumentsRequest(BaseModel):
    options: ConvertDocumentsRequestOptions = ConvertDocumentsRequestOptions()
    sources: list[SourceRequestItem]
    target: TaskTarget = InBodyTarget()
+
+    @model_validator(mode="after")
+    def validate_s3_source_and_target(self) -> Self:
+        for source in self.sources:
+            if isinstance(source, S3SourceRequest):
+                if docling_serve_settings.eng_kind != AsyncEngine.KFP:
+                    raise PydanticCustomError(
+                        "error source", 'source kind "s3" requires engine kind "KFP"'
+                    )
+                if self.target.kind != "s3":
+                    raise PydanticCustomError(
+                        "error source", 'source kind "s3" requires target kind "s3"'
+                    )
+        if isinstance(self.target, S3Target):
+            for source in self.sources:
+                if isinstance(source, S3SourceRequest):
+                    return self
+            raise PydanticCustomError(
+                "error target", 'target kind "s3" requires source kind "s3"'
+            )
+        return self
--- a/docling_serve/datamodel/responses.py
+++ b/docling_serve/datamodel/responses.py
@@ -35,6 +35,11 @@ class ConvertDocumentResponse(BaseModel):
    timings: dict[str, ProfilingItem] = {}


+class PresignedUrlConvertDocumentResponse(BaseModel):
+    status: ConversionStatus
+    processing_time: float
+
+
 class ConvertDocumentErrorResponse(BaseModel):
    status: ConversionStatus

--- a/docling_serve/orchestrator_factory.py
+++ b/docling_serve/orchestrator_factory.py
@@ -19,6 +19,7 @@ def get_async_orchestrator() -> BaseOrchestrator:

        local_config = LocalOrchestratorConfig(
            num_workers=docling_serve_settings.eng_loc_num_workers,
+            shared_models=docling_serve_settings.eng_loc_share_models,
        )

        cm_config = DoclingConverterManagerConfig(
--- a/docling_serve/response_preparation.py
+++ b/docling_serve/response_preparation.py
@@ -7,6 +7,7 @@ from collections.abc import Iterable
 from pathlib import Path
 from typing import Union

+import httpx
 from fastapi import BackgroundTasks, HTTPException
 from fastapi.responses import FileResponse

@@ -15,12 +16,16 @@ from docling.datamodel.document import ConversionResult, ConversionStatus
 from docling_core.types.doc import ImageRefMode
 from docling_jobkit.datamodel.convert import ConvertDocumentsOptions
 from docling_jobkit.datamodel.task import Task
-from docling_jobkit.datamodel.task_targets import InBodyTarget, TaskTarget
+from docling_jobkit.datamodel.task_targets import InBodyTarget, PutTarget, TaskTarget
 from docling_jobkit.orchestrators.base_orchestrator import (
    BaseOrchestrator,
 )

-from docling_serve.datamodel.responses import ConvertDocumentResponse, DocumentResponse
+from docling_serve.datamodel.responses import (
+    ConvertDocumentResponse,
+    DocumentResponse,
+    PresignedUrlConvertDocumentResponse,
+)
 from docling_serve.settings import docling_serve_settings
 from docling_serve.storage import get_scratch

@@ -40,7 +45,9 @@ def _export_document_as_content(
    document = DocumentResponse(filename=conv_res.input.file.name)

    if conv_res.status == ConversionStatus.SUCCESS:
-        new_doc = conv_res.document._make_copy_with_refmode(Path(), image_mode)
+        new_doc = conv_res.document._make_copy_with_refmode(
+            Path(), image_mode, page_no=None
+        )

        # Create the different formats
        if export_json:
@@ -77,11 +84,17 @@ def _export_documents_as_files(
    export_doctags: bool,
    image_export_mode: ImageRefMode,
    md_page_break_placeholder: str,
-):
+) -> ConversionStatus:
    success_count = 0
    failure_count = 0

+    # Default failure in case results is empty
+    conv_result = ConversionStatus.FAILURE
+
+    artifacts_dir = Path("artifacts/")  # will be relative to the fname
+
    for conv_res in conv_results:
+        conv_result = conv_res.status
        if conv_res.status == ConversionStatus.SUCCESS:
            success_count += 1
            doc_filename = conv_res.input.file.stem
@@ -91,7 +104,9 @@ def _export_documents_as_files(
                fname = output_dir / f"{doc_filename}.json"
                _log.info(f"writing JSON output to {fname}")
                conv_res.document.save_as_json(
-                    filename=fname, image_mode=image_export_mode
+                    filename=fname,
+                    image_mode=image_export_mode,
+                    artifacts_dir=artifacts_dir,
                )

            # Export HTML format:
@@ -99,7 +114,9 @@ def _export_documents_as_files(
                fname = output_dir / f"{doc_filename}.html"
                _log.info(f"writing HTML output to {fname}")
                conv_res.document.save_as_html(
-                    filename=fname, image_mode=image_export_mode
+                    filename=fname,
+                    image_mode=image_export_mode,
+                    artifacts_dir=artifacts_dir,
                )

            # Export Text format:
@@ -118,6 +135,7 @@ def _export_documents_as_files(
                _log.info(f"writing Markdown output to {fname}")
                conv_res.document.save_as_markdown(
                    filename=fname,
+                    artifacts_dir=artifacts_dir,
                    image_mode=image_export_mode,
                    page_break_placeholder=md_page_break_placeholder or None,
                )
@@ -136,6 +154,7 @@ def _export_documents_as_files(
        f"Processed {success_count + failure_count} docs, "
        f"of which {failure_count} failed"
    )
+    return conv_result


 def process_results(
@@ -143,7 +162,7 @@ def process_results(
    target: TaskTarget,
    conv_results: Iterable[ConversionResult],
    work_dir: Path,
-) -> Union[ConvertDocumentResponse, FileResponse]:
+) -> Union[ConvertDocumentResponse, FileResponse, PresignedUrlConvertDocumentResponse]:
    # Let's start by processing the documents
    try:
        start_time = time.monotonic()
@@ -167,7 +186,9 @@ def process_results(
        )

    # We have some results, let's prepare the response
-    response: Union[FileResponse, ConvertDocumentResponse]
+    response: Union[
+        FileResponse, ConvertDocumentResponse, PresignedUrlConvertDocumentResponse
+    ]

    # Booleans to know what to export
    export_json = OutputFormat.JSON in conversion_options.to_formats
@@ -207,7 +228,7 @@ def process_results(
        os.getpid()

        # Export the documents
-        _export_documents_as_files(
+        conv_result = _export_documents_as_files(
            conv_results=conv_results,
            output_dir=output_dir,
            export_json=export_json,
@@ -234,9 +255,24 @@ def process_results(
        # Output directory
        # background_tasks.add_task(shutil.rmtree, work_dir, ignore_errors=True)

-        response = FileResponse(
-            file_path, filename=file_path.name, media_type="application/zip"
-        )
+        if isinstance(target, PutTarget):
+            try:
+                with open(file_path, "rb") as file_data:
+                    r = httpx.put(str(target.url), files={"file": file_data})
+                    r.raise_for_status()
+                response = PresignedUrlConvertDocumentResponse(
+                    status=conv_result,
+                    processing_time=processing_time,
+                )
+            except Exception as exc:
+                _log.error("An error occour while uploading zip to s3", exc_info=exc)
+                raise HTTPException(
+                    status_code=500, detail="An error occour while uploading zip to s3."
+                )
+        else:
+            response = FileResponse(
+                file_path, filename=file_path.name, media_type="application/zip"
+            )

    return response

--- a/docling_serve/settings.py
+++ b/docling_serve/settings.py
@@ -63,6 +63,7 @@ class DoclingServeSettings(BaseSettings):
    eng_kind: AsyncEngine = AsyncEngine.LOCAL
    # Local engine
    eng_loc_num_workers: int = 2
+    eng_loc_share_models: bool = False
    # KFP engine
    eng_kfp_endpoint: Optional[AnyUrl] = None
    eng_kfp_token: Optional[str] = None
--- a/docling_serve/websocket_notifier.py
+++ b/docling_serve/websocket_notifier.py
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -7,7 +7,7 @@ server and the actual app-specific configurations.

 > [!WARNING]
 > When the server is running with `reload` or with multiple `workers`, uvicorn
-> will spawn multiple subprocessed. This invalidates all the values configured
+> will spawn multiple subprocesses. This invalidates all the values configured
 > via the CLI command line options. Please use environment variables in this
 > type of deployments.

@@ -36,7 +36,7 @@ THe following table describes the options to configure the Docling Serve app.
 | CLI option | ENV | Default | Description |
 | -----------|-----|---------|-------------|
 | `--artifacts-path` | `DOCLING_SERVE_ARTIFACTS_PATH` | unset | If set to a valid directory, the model weights will be loaded from this path |
-|  | `DOCLING_SERVE_STATIC_PATH` | unset | If set to a valid directory, the static assets for the docs and ui will be loaded from this path |
+|  | `DOCLING_SERVE_STATIC_PATH` | unset | If set to a valid directory, the static assets for the docs and UI will be loaded from this path |
 |  | `DOCLING_SERVE_SCRATCH_PATH` |  | If set, this directory will be used as scratch workspace, e.g. storing the results before they get requested. If unset, a temporary created is created for this purpose. |
 | `--enable-ui` | `DOCLING_SERVE_ENABLE_UI` | `false` | Enable the demonstrator UI. |
 |  | `DOCLING_SERVE_ENABLE_REMOTE_SERVICES` | `false` | Allow pipeline components making remote connections. For example, this is needed when using a vision-language model via APIs. |
@@ -66,6 +66,7 @@ The following table describes the options to configure the Docling Serve local e
 | ENV | Default | Description |
 |-----|---------|-------------|
 | `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
+| `DOCLING_SERVE_ENG_LOC_SHARE_MODELS` | False | If true, each process will share the same models among all thread workers. Otherwise, one instance of the models is allocated for each worker thread. |

 #### KFP engine

--- a/docs/deploy-examples/compose-amd.yaml
+++ b/docs/deploy-examples/compose-amd.yaml
@@ -0,0 +1,21 @@
+# AMD ROCm deployment
+
+services:
+  docling-serve:
+    image: ghcr.io/docling-project/docling-serve-rocm:main
+    container_name: docling-serve
+    ports:
+      - "5001:5001"
+    environment:
+      DOCLING_SERVE_ENABLE_UI: "true"
+      ROCR_VISIBLE_DEVICES: "0" # https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html#rocr-visible-devices
+      ## This section is for compatibility with older cards
+      # HSA_OVERRIDE_GFX_VERSION: "11.0.0"
+      # HSA_ENABLE_SDMA: "0"
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    group_add:
+      - 44    # video group GID from host
+      - 992   # render group GID from host
+    restart: always
--- a/docs/deploy-examples/compose-gpu.yaml
+++ b/docs/deploy-examples/compose-gpu.yaml
@@ -1,15 +0,0 @@
-services:
-  docling:
-    image: ghcr.io/docling-project/docling-serve-cu124
-    container_name: docling-serve
-    ports:
-      - 5001:5001
-    environment:
-      - DOCLING_SERVE_ENABLE_UI=true
-    deploy:
-      resources:
-        reservations:
-          devices:
-          - driver: nvidia
-            count: all # nvidia-smi 
-            capabilities: [gpu]
--- a/docs/deploy-examples/compose-nvidia.yaml
+++ b/docs/deploy-examples/compose-nvidia.yaml
@@ -0,0 +1,20 @@
+# NVIDIA CUDA deployment
+
+services:
+  docling-serve:
+    image: ghcr.io/docling-project/docling-serve-cu126:main
+    container_name: docling-serve
+    ports:
+      - "5001:5001"
+    environment:
+      DOCLING_SERVE_ENABLE_UI: "true"
+      NVIDIA_VISIBLE_DEVICES: "all" # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html
+    # deploy:  # This section is for compatibility with Swarm
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: all
+    #           capabilities: [gpu]
+    runtime: nvidia
+    restart: always
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -4,16 +4,17 @@ This document provides deployment examples for running the application in differ

 Choose the deployment option that best fits your setup.

- **[Local GPU](#local-gpu)**: For deploying the application locally on a machine with a NVIDIA GPU (using Docker Compose).
+- **[Local GPU NVIDIA](#local-gpu-nvidia)**: For deploying the application locally on a machine with a supported NVIDIA GPU (using Docker Compose).
+- **[Local GPU AMD](#local-gpu-amd)**: For deploying the application locally on a machine with a supported AMD GPU (using Docker Compose).
 - **[OpenShift](#openshift)**: For deploying the application on an OpenShift cluster, designed for cloud-native environments.

 ---

-## Local GPU
+## Local GPU NVIDIA

 ### Docker compose

-Manifest example: [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml)
+Manifest example: [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml)

 This deployment has the following features:

@@ -22,7 +23,7 @@ This deployment has the following features:
 Install the app with:

 ```sh
-docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
+docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
 ```

 For using the API:
@@ -34,7 +35,7 @@ curl -X 'POST' \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```

@@ -56,7 +57,7 @@ Docs:
 <details>
 <summary><b>Steps</b></summary>

-1. Check driver version and which GPU you want to use (0/1/2/3.. and update [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml) file or use `count: all`)
+1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml) file or use `count: all`)

    ```sh
    nvidia-smi
@@ -117,7 +118,75 @@ Docs:
 5. Run the container:

    ```sh
-    docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
+    docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
+    ```
+
+</details>
+
+## Local GPU AMD
+
+### Docker compose
+
+Manifest example: [compose-amd.yaml](./deploy-examples/compose-amd.yaml)
+
+This deployment has the following features:
+
+- AMD rocm enabled
+
+Install the app with:
+
+```sh
+docker compose -f docs/deploy-examples/compose-amd.yaml up -d
+```
+
+For using the API:
+
+```sh
+# Make a test query
+curl -X 'POST' \
+  "localhost:5001/v1/convert/source/async" \
+  -H "accept: application/json" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
+  }'
+```
+
+<details>
+<summary><b>Requirements</b></summary>
+
+- debian/ubuntu/rhel/fedora/opensuse
+- docker
+- AMDGPU driver >=6.3
+- AMD ROCm >=6.3
+
+Docs:
+
+- [AMD ROCm installation](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/quick-start.html)
+
+</details>
+
+<details>
+<summary><b>Steps</b></summary>
+
+1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
+
+    ```sh
+    rocm-smi --showdriverversion
+    rocminfo | grep -i "ROCm version"
+    ```
+
+2. Find both video group GID and render group GID from host (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
+
+    ```sh
+    getent group video
+    getent group render
+    ```
+
+3. Build the image locally (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
+
+    ```sh
+    make docling-serve-rocm-image
    ```

 </details>
@@ -152,7 +221,7 @@ curl -X 'POST' \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```

@@ -189,7 +258,7 @@ curl -X 'POST' \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```

@@ -222,7 +291,7 @@ task_id=$(curl -s -X 'POST' \
    -H "accept: application/json" \
    -H "Content-Type: application/json" \
    -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+      "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
    }' \
    -c cookies.txt | grep -oP '"task_id":"\K[^"]+')
 ```
--- a/docs/pre-loading-models.md
+++ b/docs/pre-loading-models.md
@@ -74,7 +74,7 @@ This document provides examples for pre-loading docling models to a persistent v
    Manifest example: [docling-model-cache-job.yaml](./deploy-examples/docling-model-cache-job.yaml)

 3. Now we can mount volume in the docling-serve deployment and set env `DOCLING_SERVE_ARTIFACTS_PATH` to point to it.
-    Following additions to deploymeny should be made:
+    Following additions to deployment should be made:

    ```yaml
    spec:
@@ -98,6 +98,6 @@ This document provides examples for pre-loading docling models to a persistent v

    Make sure that value of `DOCLING_SERVE_ARTIFACTS_PATH` is the same as where models were downloaded and where volume is mounted.

-    Now when docling-serve is executing tasks, the underlying docling installation will load model weights from mouted volume.
+    Now when docling-serve is executing tasks, the underlying docling installation will load model weights from mounted volume.

    Manifest example: [docling-model-cache-deployment.yaml](./deploy-examples/docling-model-cache-deployment.yaml)
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -9,7 +9,7 @@ On top of the source of file (see below), both endpoints support the same parame
 - `from_formats` (List[str]): Input format(s) to convert from. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`. Defaults to all formats.
 - `to_formats` (List[str]): Output format(s) to convert to. Allowed values: `md`, `json`, `html`, `text`, `doctags`. Defaults to `md`.
 - `pipeline` (str). The choice of which pipeline to use. Allowed values are `standard` and `vlm`. Defaults to `standard`.
- `page_range` (tuple). If speficied, only convert a range of pages. The page number starts at 1.
+- `page_range` (tuple). If specified, only convert a range of pages. The page number starts at 1.
 - `do_ocr` (bool): If enabled, the bitmap content will be processed using OCR. Defaults to `True`.
 - `image_export_mode`: Image export mode for the document (only in case of JSON, Markdown or HTML). Allowed values: embedded, placeholder, referenced. Optional, defaults to `embedded`.
 - `force_ocr` (bool): If enabled, replace any existing text with OCR-generated text over the full content. Defaults to `False`.
@@ -25,8 +25,8 @@ On top of the source of file (see below), both endpoints support the same parame
 - `do_picture_classification` (bool): If enabled, classify pictures in documents. Defaults to false.
 - `do_picture_description` (bool): If enabled, describe pictures in documents. Defaults to false.
 - `picture_description_area_threshold` (float): Minimum percentage of the area for a picture to be processed with the models. Defaults to 0.05.
- `picture_description_local` (dict): Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with picture_description_api.
- `picture_description_api` (dict): API details for using a vision-language model in the picture description. This parameter is mutually exclusive with picture_description_local.
+- `picture_description_local` (dict): Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with `picture_description_api`.
+- `picture_description_api` (dict): API details for using a vision-language model in the picture description. This parameter is mutually exclusive with `picture_description_local`.
 - `include_images` (bool): If enabled, images will be extracted from the document. Defaults to false.
 - `images_scale` (float): Scale factor for images. Defaults to 2.0.

@@ -307,7 +307,7 @@ Example URLs are:
    }
    ```

- `http://localhost:11434/v1/chat/completions` for the local ollama api, with example `picture_description_api`:
+- `http://localhost:11434/v1/chat/completions` for the local Ollama api, with example `picture_description_api`:
  - the `granite3.2-vision:2b` model

    ```json
@@ -355,7 +355,7 @@ The response can be a JSON Document or a File.

 Both `/v1/convert/source` and `/v1/convert/file` endpoints are available as asynchronous variants.
 The advantage of the asynchronous endpoints is the possible to interrupt the connection, check for the progress update and fetch the result.
-This approach is more resilient against network stabilities and allows the client application logic to easily interleave conversion with other tasks.
+This approach is more resilient against network instabilities and allows the client application logic to easily interleave conversion with other tasks.

 Launch an asynchronous conversion with:

@@ -402,7 +402,7 @@ while task["task_status"] not in ("success", "failure"):
 ### Subscribe with websockets

 Using websocket you can get the client application being notified about updates of the conversion task.
-To start the websocker connection, use the endpoint:
+To start the websocket connection, use the endpoint:

 - `/v1/status/ws/{task_id}`

@@ -417,7 +417,7 @@ Websocket messages are JSON object with the following structure:
 ```

 <details>
-<summary>Example websocker usage:</summary>
+<summary>Example websocket usage:</summary>

 ```python
 from websockets.sync.client import connect
--- a/docs/v1_migration.md
+++ b/docs/v1_migration.md
@@ -37,7 +37,7 @@ New version:
    "options": {},  // conversion options
    "sources": [
        // input document provided as base64-encoded string
-        {"kind": "kind", "base64_string": "abc123...", "filename": "file.pdf"},
+        {"kind": "file", "base64_string": "abc123...", "filename": "file.pdf"},
        // input document provided as http urls
        {"kind": "http", "url": "https://..."},
    ]
--- a/img/fastapi-ui.png
+++ b/img/fastapi-ui.png
--- a/img/swagger.png
+++ b/img/swagger.png
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "docling-serve"
-version = "1.0.0"  # DO NOT EDIT, updated automatically
+version = "1.2.1"  # DO NOT EDIT, updated automatically
 description = "Running Docling as a service"
 license = {text = "MIT"}
 authors = [
@@ -8,7 +8,6 @@ authors = [
    {name="Guillaume Moutier", email="gmoutier@redhat.com"},
    {name="Anil Vishnoi", email="avishnoi@redhat.com"},
    {name="Panos Vagenas", email="pva@zurich.ibm.com"},
-    {name="Panos Vagenas", email="pva@zurich.ibm.com"},
    {name="Christoph Auer", email="cau@zurich.ibm.com"},
    {name="Peter Staar", email="taa@zurich.ibm.com"},
 ]
@@ -35,8 +34,8 @@ classifiers = [
 requires-python = ">=3.10"
 dependencies = [
    "docling~=2.38",
-    "docling-core>=2.32.0",
-    "docling-jobkit[kfp,vlm]~=1.1",
+    "docling-core>=2.44.1",
+    "docling-jobkit[kfp,vlm]>=1.3.1,<2.0.0",
    "fastapi[standard]~=0.115",
    "httpx~=0.28",
    "pydantic~=2.10",
@@ -46,6 +45,7 @@ dependencies = [
    "uvicorn[standard]>=0.29.0,<1.0.0",
    "websockets~=14.0",
    "scalar-fastapi>=1.0.3",
+    "docling-mcp>=1.0.0",
 ]

 [project.optional-dependencies]
@@ -61,7 +61,7 @@ rapidocr = [
    "onnxruntime~=1.7",
 ]
 flash-attn = [
-  "flash-attn~=2.7.0; sys_platform == 'linux' and platform_machine == 'x86_64'"
+  "flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"
 ]

 [dependency-groups]
@@ -75,25 +75,36 @@ dev = [
    "python-semantic-release~=7.32",
    "ruff>=0.9.6",
 ]
+
 pypi = [
-  "torch>=2.6.0",
-  "torchvision>=0.21.0",
+  "torch>=2.7.1",
+  "torchvision>=0.22.1",
 ]
+
 cpu = [
-  "torch>=2.6.0",
-  "torchvision>=0.21.0",
+  "torch>=2.7.1",
+  "torchvision>=0.22.1",
 ]
+
 cu124 = [
-  "torch>=2.6.0",
-  "torchvision>=0.21.0",
+  "torch>=2.6.0 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "torchvision>=0.21.0 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
 ]
+
 cu126 = [
-  "torch>=2.6.0",
-  "torchvision>=0.21.0",
+  "torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
 ]
+
 cu128 = [
-  "torch>=2.7.0",
-  "torchvision>=0.22.0",
+  "torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+]
+
+rocm = [
+  "torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "pytorch-triton-rocm>=3.3.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
 ]

 [tool.uv]
@@ -106,6 +117,7 @@ conflicts = [
    { group = "cu124" },
    { group = "cu126" },
    { group = "cu128" },
+    { group = "rocm" },
  ],
 ]
 environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
@@ -117,18 +129,26 @@ override-dependencies = [
 torch = [
  { index = "pytorch-pypi", group = "pypi" },
  { index = "pytorch-cpu", group = "cpu" },
-  { index = "pytorch-cu124", group = "cu124" },
-  { index = "pytorch-cu126", group = "cu126" },
-  { index = "pytorch-cu128", group = "cu128" },
+  { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
 ]
+
 torchvision = [
  { index = "pytorch-pypi", group = "pypi" },
  { index = "pytorch-cpu", group = "cpu" },
-  { index = "pytorch-cu124", group = "cu124" },
-  { index = "pytorch-cu126", group = "cu126" },
-  { index = "pytorch-cu128", group = "cu128" },
+  { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
 ]
-# docling-jobkit = { git = "https://github.com/docling-project/docling-jobkit/", rev = "refactor" }
+
+pytorch-triton-rocm = [
+  { index = "pytorch-rocm", marker = "sys_platform == 'linux'" },
+]
+
+# docling-jobkit = { git = "https://github.com/docling-project/docling-jobkit/", rev = "main" }
 # docling-jobkit = { path = "../docling-jobkit", editable = true }

 [[tool.uv.index]]
@@ -156,6 +176,11 @@ name = "pytorch-cu128"
 url = "https://download.pytorch.org/whl/cu128"
 explicit = true

+[[tool.uv.index]]
+name = "pytorch-rocm"
+url = "https://download.pytorch.org/whl/rocm6.3"
+explicit = true
+
 [tool.setuptools.packages.find]
 include = ["docling_serve*"]
 namespaces = true
--- a/tests/test_fastapi_endpoints.py
+++ b/tests/test_fastapi_endpoints.py
@@ -1,6 +1,8 @@
 import asyncio
+import io
 import json
 import os
+import zipfile

 import pytest
 import pytest_asyncio
@@ -8,6 +10,8 @@ from asgi_lifespan import LifespanManager
 from httpx import ASGITransport, AsyncClient
 from pytest_check import check

+from docling_core.types.doc import DoclingDocument, PictureItem
+
 from docling_serve.app import create_app


@@ -153,3 +157,37 @@ async def test_convert_file(client: AsyncClient):
            data["document"]["doctags_content"],
            msg=f"DocTags document should contain '<doctag><page_header>'. Received: {safe_slice(data['document']['doctags_content'])}",
        )
+
+
+@pytest.mark.asyncio
+async def test_referenced_artifacts(client: AsyncClient):
+    """Test that paths in the zip file are relative to the zip file root."""
+
+    endpoint = "/v1/convert/file"
+    options = {
+        "to_formats": ["json"],
+        "image_export_mode": "referenced",
+        "target_type": "zip",
+        "ocr": False,
+    }
+
+    current_dir = os.path.dirname(__file__)
+    file_path = os.path.join(current_dir, "2206.01062v1.pdf")
+
+    files = {
+        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
+    }
+
+    response = await client.post(endpoint, files=files, data=options)
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    with zipfile.ZipFile(io.BytesIO(response.content)) as zip_file:
+        namelist = zip_file.namelist()
+        for file in namelist:
+            if file.endswith(".json"):
+                doc = DoclingDocument.model_validate(json.loads(zip_file.read(file)))
+                for item, _level in doc.iterate_items():
+                    if isinstance(item, PictureItem):
+                        assert item.image is not None
+                        print(f"{item.image.uri}=")
+                        assert str(item.image.uri) in namelist
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
github-actions[bot]	3bd7828570	chore: bump version to 1.2.1 [skip ci]	2025-08-13 07:37:55 +00:00
Michele Dolfi	8b470cba8e	fix: handling of vlm model options and update deps (#314 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-13 09:32:21 +02:00
Tiago Santana	8048f4589a	fix: add missing response type in sync endpoints (#309 ) Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com>	2025-08-08 12:32:19 +02:00
Thomas Vitale	b3058e91e0	docs: Update readme to use v1 (#306 ) Signed-off-by: Thomas Vitale <ThomasVitale@users.noreply.github.com>	2025-08-08 09:02:29 +02:00
Thomas Vitale	63da9eedeb	docs: Update deployment examples to use v1 API (#308 ) Signed-off-by: Thomas Vitale <ThomasVitale@users.noreply.github.com>	2025-08-08 08:47:59 +02:00
Thomas Vitale	b15dc2529f	docs: Fix typo in v1 migration instructions (#307 ) Signed-off-by: Thomas Vitale <ThomasVitale@users.noreply.github.com>	2025-08-08 08:44:09 +02:00
github-actions[bot]	4c7207be00	chore: bump version to 1.2.0 [skip ci]	2025-08-07 09:20:10 +00:00
Michele Dolfi	db3fdb5bc1	feat: workers without shared models and convert params (#304 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-07 11:16:06 +02:00
Rui Dias Gomes	fd1b987e8d	feat: add rocm image build support and fix cuda (#292 ) Signed-off-by: rmdg88 <rmdg88@gmail.com> Signed-off-by: Rui-Dias-Gomes <rui.dias.gomes@ibm.com> Co-authored-by: Rui-Dias-Gomes <rui.dias.gomes@ibm.com>	2025-07-31 14:22:42 +02:00
github-actions[bot]	ce15e0302b	chore: bump version to 1.1.0 [skip ci]	2025-07-30 15:53:01 +00:00
Michele Dolfi	ecb1874a50	feat: Add docling-mcp in the distribution (#290 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-30 15:39:11 +02:00
Michele Dolfi	1333f71c9c	fix: referenced paths relative to zip root (#289 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-30 14:49:26 +02:00
Tiago Santana	ec594d84fe	feat: add 3.0 openapi endpoint (#287 ) Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com>	2025-07-30 14:08:59 +02:00
Tiago Santana	3771c1b554	feat: add new source and target (#270 ) Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com>	2025-07-29 14:44:49 +02:00
github-actions[bot]	24db461b14	chore: bump version to 1.0.1 [skip ci]	2025-07-21 07:34:14 +00:00
Michele Dolfi	8706706e87	fix: docling update v2.42.0 (#277 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-21 08:47:40 +02:00
Michele Dolfi	766adb2481	docs: typo in README (#276 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-18 14:37:54 +02:00
Michele Dolfi	8222cf8955	ci: add spellchecker with custom vocabulary and fix typos (#268 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-15 14:17:35 +02:00