Cleanup. Text formatting. Fallback picture annotation.

Revamp UI to SSR.
Signed-off-by: DKL <dkl@zurich.ibm.com>
2025-11-29 08:33:50 +00:00 · 2025-11-24 15:17:39 +01:00 · 2025-11-21 16:15:36 +01:00 · 2025-11-21 10:31:56 +01:00 · 2025-11-20 17:57:10 +01:00 · 2025-11-17 08:31:44 +01:00
69 changed files with 14032 additions and 5573 deletions
--- a/.github/scripts/release.sh
+++ b/.github/scripts/release.sh
@@ -3,32 +3,68 @@
 set -e  # trigger failure on error - do not remove!
 set -x  # display command on output

+## debug
+# TARGET_VERSION="1.2.x"
+
 if [ -z "${TARGET_VERSION}" ]; then
    >&2 echo "No TARGET_VERSION specified"
    exit 1
 fi
 CHGLOG_FILE="${CHGLOG_FILE:-CHANGELOG.md}"

-# update package version
+# Update package version
 uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version "${TARGET_VERSION}"
 uv lock --upgrade-package docling-serve

-# collect release notes
+# Extract all docling packages and versions from uv.lock
+DOCVERSIONS=$(uvx --with toml python3 - <<'PY'
+import toml
+data = toml.load("uv.lock")
+for pkg in data.get("package", []):
+    if pkg["name"].startswith("docling"):
+        print(f"{pkg['name']} {pkg['version']}")
+PY
+)
+
+# Format docling versions list without trailing newline
+DOCLING_VERSIONS="### Docling libraries included in this release:"
+while IFS= read -r line; do
+  DOCLING_VERSIONS+="
+- $line"
+done <<< "$DOCVERSIONS"
+
+# Collect release notes
 REL_NOTES=$(mktemp)
 uv run --no-sync semantic-release changelog --unreleased >> "${REL_NOTES}"

-# update changelog
+# Strip trailing blank lines from release notes and append docling versions
+{
+  sed -e :a -e '/^\n*$/{$d;N;};/\n$/ba' "${REL_NOTES}"
+  printf "\n"
+  printf "%s" "${DOCLING_VERSIONS}"
+  printf "\n"
+} > "${REL_NOTES}.tmp" && mv "${REL_NOTES}.tmp" "${REL_NOTES}"
+
+# Update changelog
 TMP_CHGLOG=$(mktemp)
 TARGET_TAG_NAME="v${TARGET_VERSION}"
 RELEASE_URL="$(gh repo view --json url -q ".url")/releases/tag/${TARGET_TAG_NAME}"
-printf "## [${TARGET_TAG_NAME}](${RELEASE_URL}) - $(date -Idate)\n\n" >> "${TMP_CHGLOG}"
-cat "${REL_NOTES}" >> "${TMP_CHGLOG}"
-if [ -f "${CHGLOG_FILE}" ]; then
-    printf "\n" | cat - "${CHGLOG_FILE}" >> "${TMP_CHGLOG}"
-fi
+## debug
+#RELEASE_URL="myrepo/releases/tag/${TARGET_TAG_NAME}"
+
+# Strip leading blank lines from existing changelog to avoid multiple blank lines when appending
+EXISTING_CL=$(sed -e :a -e '/^\n*$/{$d;N;};/\n$/ba' "${CHGLOG_FILE}")
+
+{
+  printf "## [${TARGET_TAG_NAME}](${RELEASE_URL}) - $(date -Idate)\n\n"
+  cat "${REL_NOTES}"
+  printf "\n"
+  printf "%s\n" "${EXISTING_CL}"
+} >> "${TMP_CHGLOG}"
+
 mv "${TMP_CHGLOG}" "${CHGLOG_FILE}"

-# push changes
+# Push changes
 git config --global user.name 'github-actions[bot]'
 git config --global user.email 'github-actions[bot]@users.noreply.github.com'
 git add pyproject.toml uv.lock "${CHGLOG_FILE}"
@@ -36,5 +72,5 @@ COMMIT_MSG="chore: bump version to ${TARGET_VERSION} [skip ci]"
 git commit -m "${COMMIT_MSG}"
 git push origin main

-# create GitHub release (incl. Git tag)
+# Create GitHub release (incl. Git tag)
 gh release create "${TARGET_TAG_NAME}" -F "${REL_NOTES}"
--- a/.github/styles/config/vocabularies/Docling/accept.txt
+++ b/.github/styles/config/vocabularies/Docling/accept.txt
@@ -4,7 +4,10 @@ asgi
 async
 (?i)urls
 uvicorn
+Config
 [Ww]ebserver
+RQ
+(?i)url
 keyfile
 [Ww]ebsocket(s?)
 [Kk]ubernetes
@@ -19,8 +22,10 @@ Kubeflow
 (?i)PyTorch
 (?i)CUDA
 (?i)NVIDIA
+(?i)ROCm
 (?i)env
 Gradio
+Podman
 bool
 Ollama
 inbody
--- a/.github/workflows/actionlint.yml
+++ b/.github/workflows/actionlint.yml
@@ -13,7 +13,7 @@ jobs:
  actionlint:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
      - name: Download actionlint
        id: get_actionlint
        run: bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash)
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@@ -11,11 +11,11 @@ jobs:
    outputs:
      TARGET_TAG_V: ${{ steps.version_check.outputs.TRGT_VERSION }}
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
        with:
          fetch-depth: 0  # for fetching tags, required for semantic-release
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
      - name: Install dependencies
@@ -40,12 +40,12 @@ jobs:
        with:
          app-id: ${{ vars.CI_APP_ID }}
          private-key: ${{ secrets.CI_PRIVATE_KEY }}
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
        with:
          token: ${{ steps.app-token.outputs.token }}
          fetch-depth: 0  # for fetching tags, required for semantic-release
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
      - name: Install dependencies
--- a/.github/workflows/ci-images-dryrun.yml
+++ b/.github/workflows/ci-images-dryrun.yml
@@ -21,10 +21,10 @@ jobs:
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
            platforms: linux/amd64, linux/arm64
-          - name: docling-project/docling-serve-cu124
-            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
-            platforms: linux/amd64
+          # - name: docling-project/docling-serve-cu124
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
+          #   platforms: linux/amd64
          - name: docling-project/docling-serve-cu126
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
@@ -33,6 +33,10 @@ jobs:
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
            platforms: linux/amd64
+          # - name: docling-project/docling-serve-rocm
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
+          #   platforms: linux/amd64

    permissions:
      packages: write
--- a/.github/workflows/discord-release.yml
+++ b/.github/workflows/discord-release.yml
@@ -0,0 +1,42 @@
+# .github/workflows/discord-release.yml
+name: Notify Discord on Release
+
+on:
+  release:
+    types: [published]
+
+jobs:
+  discord:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Send release info to Discord
+        env:
+          DISCORD_WEBHOOK: ${{ secrets.RELEASES_DISCORD_WEBHOOK }}
+        run: |
+          REPO_NAME=${{ github.repository }}
+          RELEASE_TAG=${{ github.event.release.tag_name }}
+          RELEASE_NAME="${{ github.event.release.name }}"
+          RELEASE_URL=${{ github.event.release.html_url }}
+
+          # Capture the body safely (handles backticks, $, ", etc.)
+          RELEASE_BODY=$(cat <<'EOF'
+            ${{ github.event.release.body }}
+          EOF
+          )
+
+          # Fallback if release name is empty
+          if [ -z "$RELEASE_NAME" ]; then
+            RELEASE_NAME=$RELEASE_TAG
+          fi
+
+          PAYLOAD=$(jq -n \
+          --arg title "🚀 New Release: $RELEASE_NAME" \
+          --arg url "$RELEASE_URL" \
+          --arg desc "$RELEASE_BODY" \
+          --arg author_name "$REPO_NAME" \
+          --arg author_icon "https://github.com/docling-project.png" \
+          '{embeds: [{title: $title, url: $url, description: $desc, color: 5814783, author: {name: $author_name, icon_url: $author_icon}}]}')
+
+          curl -H "Content-Type: application/json" \
+               -d "$PAYLOAD" \
+               "$DISCORD_WEBHOOK"
--- a/.github/workflows/images.yml
+++ b/.github/workflows/images.yml
@@ -25,10 +25,10 @@ jobs:
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
            platforms: linux/amd64, linux/arm64
-          - name: docling-project/docling-serve-cu124
-            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
-            platforms: linux/amd64
+          # - name: docling-project/docling-serve-cu124
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
+          #   platforms: linux/amd64
          - name: docling-project/docling-serve-cu126
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
@@ -37,7 +37,10 @@ jobs:
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
            platforms: linux/amd64
-
+          # - name: docling-project/docling-serve-rocm
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
+          #   platforms: linux/amd64
    permissions:
      packages: write
      contents: read
--- a/.github/workflows/job-build.yml
+++ b/.github/workflows/job-build.yml
@@ -10,9 +10,9 @@ jobs:
      matrix:
        python-version: ['3.12']
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
--- a/.github/workflows/job-checks.yml
+++ b/.github/workflows/job-checks.yml
@@ -10,9 +10,9 @@ jobs:
      matrix:
        python-version: ['3.12']
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v5
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
@@ -28,7 +28,7 @@ jobs:
        run: uv sync --frozen --all-extras --no-extra flash-attn

      - name: Run styling check
-        run: pre-commit run --all-files
+        run: uv run pre-commit run --all-files

  build-package:
    uses: ./.github/workflows/job-build.yml
@@ -47,21 +47,22 @@ jobs:
          name: python-package-distributions
          path: dist/
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
+      - name: Create virtual environment
+        run: uv venv
      - name: Install package
        run: uv pip install dist/*.whl
      - name: Create the server
-        run: python -c 'from docling_serve.app import create_app; create_app()'
-
-  markdown-lint:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - name: markdownlint-cli2-action
-        uses: DavidAnson/markdownlint-cli2-action@v16
-        with:
-          globs: "**/*.md"
+        run: .venv/bin/python -c 'from docling_serve.app import create_app; create_app()'

+  # markdown-lint:
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #     - uses: actions/checkout@v5
+  #     - name: markdownlint-cli2-action
+  #       uses: DavidAnson/markdownlint-cli2-action@v16
+  #       with:
+  #         globs: "**/*.md"
--- a/.github/workflows/job-image.yml
+++ b/.github/workflows/job-image.yml
@@ -53,7 +53,7 @@ jobs:
            df -h

      - name: Check out the repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5

      - name: Log in to the GHCR container image registry
        if: ${{ inputs.publish }}
@@ -88,19 +88,115 @@ jobs:
        with:
          images: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}

+      # # Local test
+      # - name: Set metadata outputs for local testing ## comment out Free up space, Log in to cr, Cache Docker, Extract metadata, and quay blocks and run act
+      #   id: ghcr_meta
+      #   run: |
+      #     echo "tags=ghcr.io/docling-project/docling-serve:pr-123" >> $GITHUB_OUTPUT
+      #     echo "labels=org.opencontainers.image.source=https://github.com/docling-project/docling-serve" >> $GITHUB_OUTPUT
+
      - name: Build and push image to ghcr.io
        id: ghcr_push
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        with:
          context: .
-          push: ${{ inputs.publish }}
+          push: ${{ inputs.publish }} # set 'false' for local test
          tags: ${{ steps.ghcr_meta.outputs.tags }}
          labels: ${{ steps.ghcr_meta.outputs.labels }}
-          platforms: ${{ inputs.platforms}}
+          platforms: ${{ inputs.platforms }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: ${{ inputs.build_args }}
+          pull: true
+      ##
+      ## This stage runs after the build, so it leverages all build cache
+      ## 
+      - name: Export built image for testing
+        id: ghcr_export_built_image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          push: false
+          load: true
+          tags: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}:${{ github.sha }}-test
+          labels: |
+            org.opencontainers.image.title=docling-serve
+            org.opencontainers.image.test=true
+          platforms: linux/amd64 # when 'load' is true, we can't use a list ${{ inputs.platforms }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          file: Containerfile
+          build-args: ${{ inputs.build_args }}
+
+      - name: Test image
+        if: steps.ghcr_export_built_image.outcome == 'success'
+        run: |
+          set -e
+
+          IMAGE_TAG="${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}:${{ github.sha }}-test"
+          echo "Testing local image: $IMAGE_TAG"
+
+          # Remove existing container if any
+          docker rm -f docling-serve-test-container 2>/dev/null || true
+
+          echo "Starting container..."
+          docker run -d -p 5001:5001 --name docling-serve-test-container "$IMAGE_TAG"
+
+          echo "Waiting 15s for container to boot..."
+          sleep 15
+
+          # Health check
+          echo "Checking service health..."
+          for i in {1..20}; do
+            HEALTH_RESPONSE=$(curl -s http://localhost:5001/health || true)
+            echo "Health check response [$i]: $HEALTH_RESPONSE"
+
+            if echo "$HEALTH_RESPONSE" | grep -q '"status":"ok"'; then
+              echo "Service is healthy!"
+
+              # Install pytest and dependencies
+              echo "Installing pytest and dependencies..."
+              pip install uv
+              uv venv --allow-existing
+              source .venv/bin/activate
+              uv sync --all-extras --no-extra flash-attn
+
+              # Run pytest tests
+              echo "Running tests..."
+              # Test import
+              python -c 'from docling_serve.app import create_app; create_app()'
+
+              # Run pytest and check result directly
+              if ! pytest -sv -k "test_convert_url" tests/test_1-url-async.py \
+                --disable-warnings; then
+                echo "Tests failed!"
+                docker logs docling-serve-test-container
+                docker rm -f docling-serve-test-container
+                exit 1
+              fi
+
+              echo "Tests passed successfully!"
+              break
+            else
+              echo "Waiting for service... [$i/20]"
+              sleep 3
+            fi
+          done
+
+          # Final health check if service didn't pass earlier
+          if ! echo "$HEALTH_RESPONSE" | grep -q '"status":"ok"'; then
+            echo "Service did not become healthy in time."
+            docker logs docling-serve-test-container
+            docker rm -f docling-serve-test-container
+            exit 1
+          fi
+
+          # Cleanup
+          echo "Cleaning up test container..."
+          docker rm -f docling-serve-test-container
+          echo "Cleaning up test image..."
+          docker rmi "$IMAGE_TAG"

      - name: Generate artifact attestation
        if: ${{ inputs.publish }}
@@ -120,7 +216,7 @@ jobs:
      - name: Build and push image to quay.io
        if: ${{ inputs.publish }}
        # id: push-serve-cpu-quay
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        with:
          context: .
          push: ${{ inputs.publish }}
@@ -131,11 +227,8 @@ jobs:
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: ${{ inputs.build_args }}
-      
-      # - name: Inspect the image details
-      #   run: |
-      #     echo "${{ steps.ghcr_push.outputs.metadata }}"
+          pull: true

-      - name: Remove Local Docker Images
+      - name: Remove local Docker images
        run: |
          docker image prune -af
--- a/.gitignore
+++ b/.gitignore
@@ -445,4 +445,7 @@ pip-selfcheck.json
 .action-lint
 .markdown-lint

-cookies.txt
+cookies.txt
+
+# Examples
+/examples/splitted_pdf/*
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,12 +7,12 @@ repos:
      - id: ruff-format
        name: "Ruff formatter"
        args: [--config=pyproject.toml]
-        files: '^(docling_serve|tests).*\.(py|ipynb)$'
+        files: '^(docling_serve|tests|examples|scripts).*\.(py|ipynb)$'
      # Run the Ruff linter.
      - id: ruff
        name: "Ruff linter"
        args: [--exit-non-zero-on-fix, --fix, --config=pyproject.toml]
-        files: '^(docling_serve|tests).*\.(py|ipynb)$'
+        files: '^(docling_serve|tests|examples|scripts).*\.(py|ipynb)$'
  - repo: local
    hooks:
      - id: system
@@ -21,6 +21,15 @@ repos:
        pass_filenames: false
        language: system
        files: '\.py$'
+  - repo: local
+    hooks:
+      - id: update-docs-common-parameters
+        name: Update Documentation File
+        entry: uv run scripts/update_doc_usage.py
+        language: python
+        pass_filenames: false
+        # Fail the commit if documentation generation fails
+        require_serial: true
  - repo: https://github.com/errata-ai/vale
    rev: v3.12.0  # Use latest stable version
    hooks:
@@ -33,7 +42,7 @@ repos:
        args: ["--config=.github/vale.ini"]
        files: \.md$
  - repo: https://github.com/astral-sh/uv-pre-commit
-    # uv version.
-    rev: 0.7.13
+    # uv version, https://github.com/astral-sh/uv-pre-commit/releases
+    rev: 0.8.19
    hooks:
      - id: uv-lock
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,230 @@
+## [v1.8.0](https://github.com/docling-project/docling-serve/releases/tag/v1.8.0) - 2025-10-31
+
+### Feature
+
+* Docling with new standard pipeline with threading ([#428](https://github.com/docling-project/docling-serve/issues/428)) ([`bf132a3`](https://github.com/docling-project/docling-serve/commit/bf132a3c3e615ddbe624841ea5b3a98593c00654))
+
+### Documentation
+
+* Expand automatic docs to nested objects. More complete usage docs. ([#426](https://github.com/docling-project/docling-serve/issues/426)) ([`35319b0`](https://github.com/docling-project/docling-serve/commit/35319b0da793a2a1a434fd2b60b7632e10ecced3))
+* Add docs for docling parameters like performance and debug ([#424](https://github.com/docling-project/docling-serve/issues/424)) ([`f3957ae`](https://github.com/docling-project/docling-serve/commit/f3957aeb577097121fe9d0d21f75a50643f03369))
+
+### Docling libraries included in this release:
+- docling 2.60.0
+- docling-core 2.50.0
+- docling-ibm-models 3.10.2
+- docling-jobkit 1.8.0
+- docling-mcp 1.3.2
+- docling-parse 4.7.0
+- docling-serve 1.8.0
+
+## [v1.7.2](https://github.com/docling-project/docling-serve/releases/tag/v1.7.2) - 2025-10-30
+
+### Fix
+
+* Update locked dependencies. Docling fixes, Expose temperature parameter for vlm models ([#423](https://github.com/docling-project/docling-serve/issues/423)) ([`e9b4140`](https://github.com/docling-project/docling-serve/commit/e9b41406c4116ff79a212877ff6484a1151e144d))
+* Temporary constrain fastapi version ([#418](https://github.com/docling-project/docling-serve/issues/418)) ([`7bf2e7b`](https://github.com/docling-project/docling-serve/commit/7bf2e7b366470e0cf1c4900df7c84becd6a96991))
+
+### Docling libraries included in this release:
+- docling 2.59.0
+- docling-core 2.50.0
+- docling-ibm-models 3.10.2
+- docling-jobkit 1.7.1
+- docling-mcp 1.3.2
+- docling-parse 4.7.0
+- docling-serve 1.7.2
+
+## [v1.7.1](https://github.com/docling-project/docling-serve/releases/tag/v1.7.1) - 2025-10-22
+
+### Fix
+
+* Upgrade dependencies ([#417](https://github.com/docling-project/docling-serve/issues/417)) ([`97613a1`](https://github.com/docling-project/docling-serve/commit/97613a19748e8c152db4a0f62b5a57fca807a33a))
+* Makes task status shared across multiple instances in RQ mode, resolves #378 ([#415](https://github.com/docling-project/docling-serve/issues/415)) ([`0961f2c`](https://github.com/docling-project/docling-serve/commit/0961f2c57425859c76130da3ea8a871d65df4b26))
+* `DOCLING_SERVE_SYNC_POLL_INTERVAL` controls the synchronous polling time ([#413](https://github.com/docling-project/docling-serve/issues/413)) ([`0f274ab`](https://github.com/docling-project/docling-serve/commit/0f274ab135a9bb41accd05db3c12a9dcce220ad9))
+
+### Documentation
+
+* Generate usage.md automatically ([#340](https://github.com/docling-project/docling-serve/issues/340)) ([`9672f31`](https://github.com/docling-project/docling-serve/commit/9672f310b1bb7030af8a276f14691e46f7da0e9e))
+
+### Docling libraries included in this release:
+- docling 2.58.0
+- docling-core 2.49.0
+- docling-ibm-models 3.10.1
+- docling-jobkit 1.7.0
+- docling-mcp 1.3.2
+- docling-parse 4.7.0
+- docling-serve 1.7.1
+
+## [v1.7.0](https://github.com/docling-project/docling-serve/releases/tag/v1.7.0) - 2025-10-17
+
+### Feature
+
+* **UI:** Add auto and orcmac options in demo UI ([#408](https://github.com/docling-project/docling-serve/issues/408)) ([`f5af71e`](https://github.com/docling-project/docling-serve/commit/f5af71e8f6de00d7dd702471a3eea2e94d882410))
+* Docling with auto-ocr ([#403](https://github.com/docling-project/docling-serve/issues/403)) ([`d95ea94`](https://github.com/docling-project/docling-serve/commit/d95ea940870af0d8df689061baa50f6026efce28))
+
+### Fix
+
+* Run docling ui behind a reverse proxy using a context path ([#396](https://github.com/docling-project/docling-serve/issues/396)) ([`5344505`](https://github.com/docling-project/docling-serve/commit/53445057184aa731ee7456b33b70bc0ecf82f2a6))
+
+### Docling libraries included in this release:
+- docling 2.57.0
+- docling-core 2.48.4
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.6.0
+- docling-mcp 1.3.2
+- docling-parse 4.5.0
+- docling-serve 1.7.0
+
+## [v1.6.0](https://github.com/docling-project/docling-serve/releases/tag/v1.6.0) - 2025-10-03
+
+### Feature
+
+* Pin new version of jobkit with granite-docling and connectors ([#391](https://github.com/docling-project/docling-serve/issues/391)) ([`0595d31`](https://github.com/docling-project/docling-serve/commit/0595d31d5b357553426215ca6771796a47e41324))
+
+### Fix
+
+* Update locked dependencies ([#392](https://github.com/docling-project/docling-serve/issues/392)) ([`45f0f3c`](https://github.com/docling-project/docling-serve/commit/45f0f3c8f95d418ac30e3744d27d02a63f9e4490))
+* **UI:** Allow both lowercase and uppercase extensions ([#386](https://github.com/docling-project/docling-serve/issues/386)) ([`8b22a39`](https://github.com/docling-project/docling-serve/commit/8b22a391418d22c1a4d706f880341f28702057b5))
+* Correctly raise HTTPException for Gateway Timeout ([#382](https://github.com/docling-project/docling-serve/issues/382)) ([`d4eac05`](https://github.com/docling-project/docling-serve/commit/d4eac053f9ce0a60f9070127335bdd56e193d7fa))
+* Pinning of higher version of dependencies to fix potential security issues ([#363](https://github.com/docling-project/docling-serve/issues/363)) ([`ba61af2`](https://github.com/docling-project/docling-serve/commit/ba61af23591eff200481aa2e532cf7d0701f0ea4))
+
+### Documentation
+
+* Fix docs for websocket breaking condition ([#390](https://github.com/docling-project/docling-serve/issues/390)) ([`f6b5f0e`](https://github.com/docling-project/docling-serve/commit/f6b5f0e06354d2db7d03d274b114499e3407dccf))
+
+### Docling libraries included in this release:
+- docling 2.55.1
+- docling-core 2.48.4
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.6.0
+- docling-mcp 1.3.2
+- docling-parse 4.5.0
+- docling-serve 1.6.0
+
+## [v1.5.1](https://github.com/docling-project/docling-serve/releases/tag/v1.5.1) - 2025-09-17
+
+### Fix
+
+* Remove old dependencies, fixes in docling-parse and more minor dependencies upgrade ([#362](https://github.com/docling-project/docling-serve/issues/362)) ([`513ae0c`](https://github.com/docling-project/docling-serve/commit/513ae0c119b66d3b17cf9a5d371a0f7971f43be7))
+* Updates rapidocr deps ([#361](https://github.com/docling-project/docling-serve/issues/361)) ([`bde0406`](https://github.com/docling-project/docling-serve/commit/bde040661fb65c67699326cd6281c0e6232e26f2))
+
+### Docling libraries included in this release:
+- docling 2.52.0
+- docling-core 2.48.1
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.5.0
+- docling-mcp 1.2.0
+- docling-parse 4.5.0
+- docling-serve 1.5.1
+
+## [v1.5.0](https://github.com/docling-project/docling-serve/releases/tag/v1.5.0) - 2025-09-09
+
+### Feature
+
+* Add chunking endpoints ([#353](https://github.com/docling-project/docling-serve/issues/353)) ([`9d6def0`](https://github.com/docling-project/docling-serve/commit/9d6def0ec8b1804ad31aa71defa17658d73d29a1))
+
+### Docling libraries included in this release:
+- docling 2.46.0
+- docling 2.51.0
+- docling-core 2.47.0
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.5.0
+- docling-mcp 1.2.0
+- docling-parse 4.4.0
+- docling-serve 1.5.0
+
+## [v1.4.1](https://github.com/docling-project/docling-serve/releases/tag/v1.4.1) - 2025-09-08
+
+### Fix
+
+* Trigger fix after ci fixes ([#355](https://github.com/docling-project/docling-serve/issues/355)) ([`b0360d7`](https://github.com/docling-project/docling-serve/commit/b0360d723bff202dcf44a25a3173ec1995945fc2))
+
+### Docling libraries included in this release:
+- docling 2.46.0
+- docling 2.51.0
+- docling-core 2.47.0
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.4.1
+- docling-mcp 1.2.0
+- docling-parse 4.4.0
+- docling-serve 1.4.1
+
+## [v1.4.0](https://github.com/docling-project/docling-serve/releases/tag/v1.4.0) - 2025-09-05
+
+### Feature
+
+* **docling:** Perfomance improvements in parsing, new layout model, fixes in html processing ([#352](https://github.com/docling-project/docling-serve/issues/352)) ([`d64a2a9`](https://github.com/docling-project/docling-serve/commit/d64a2a974a276c7ae3b105c448fd79f77a653d20))
+
+### Fix
+
+* Upgrade to latest docling version with fixes ([#335](https://github.com/docling-project/docling-serve/issues/335)) ([`e544947`](https://github.com/docling-project/docling-serve/commit/e5449472b2a3e71796f41c8a58c251d8229305c1))
+
+### Documentation
+
+* Add split processing example ([#303](https://github.com/docling-project/docling-serve/issues/303)) ([`0d4545a`](https://github.com/docling-project/docling-serve/commit/0d4545a65a5a941fc1fdefda57e39cfb1ea106ab))
+* Document DOCLING_NUM_THREADS environment variable ([#341](https://github.com/docling-project/docling-serve/issues/341)) ([`27fdd7b`](https://github.com/docling-project/docling-serve/commit/27fdd7b85ab18b3eece428366f46dc5cf0995e38))
+* Fix parameters typo ([#333](https://github.com/docling-project/docling-serve/issues/333)) ([`81f0a8d`](https://github.com/docling-project/docling-serve/commit/81f0a8ddf80a532042d550ae4568f891458b45e7))
+* Describe how to use Docling MCP ([#332](https://github.com/docling-project/docling-serve/issues/332)) ([`a69cc86`](https://github.com/docling-project/docling-serve/commit/a69cc867f5a3fb76648803ca866d65cc3a75c6b8))
+
+### Docling libraries included in this release:
+- docling 2.46.0
+- docling 2.51.0
+- docling-core 2.47.0
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.4.1
+- docling-mcp 1.2.0
+- docling-parse 4.4.0
+- docling-serve 1.4.0
+
+## [v1.3.1](https://github.com/docling-project/docling-serve/releases/tag/v1.3.1) - 2025-08-21
+
+### Fix
+
+* Configuration and performance fixes via upgrade of packages ([#328](https://github.com/docling-project/docling-serve/issues/328)) ([`f02dbc0`](https://github.com/docling-project/docling-serve/commit/f02dbc01449fe1caf3fb4a73c0a5f4adf8265faf))
+
+### Documentation
+
+* Fix parameter in api key docs ([#323](https://github.com/docling-project/docling-serve/issues/323)) ([`37fe022`](https://github.com/docling-project/docling-serve/commit/37fe02277b3e2358eced28e15b4360e7c82d3b43))
+
+## [v1.3.0](https://github.com/docling-project/docling-serve/releases/tag/v1.3.0) - 2025-08-14
+
+### Feature
+
+* Add configuration option for apikey security ([#322](https://github.com/docling-project/docling-serve/issues/322)) ([`9a64410`](https://github.com/docling-project/docling-serve/commit/9a644105523d312431993ded8dd88e064550a5db))
+* Add RQ engine ([#315](https://github.com/docling-project/docling-serve/issues/315)) ([`885f319`](https://github.com/docling-project/docling-serve/commit/885f319d3a3488a4090869560447437a4104f14e))
+
+### Documentation
+
+* Example of docling-serve deployment in the RQ engine mode ([#321](https://github.com/docling-project/docling-serve/issues/321)) ([`71edf41`](https://github.com/docling-project/docling-serve/commit/71edf4184960d8664ef9da20617e2d0f91793d36))
+* Handling models in docling-serve ([#319](https://github.com/docling-project/docling-serve/issues/319)) ([`6e9aa8c`](https://github.com/docling-project/docling-serve/commit/6e9aa8c759220458281c7fe4c87443ac41023eee))
+* Add Gradio cache usage ([#312](https://github.com/docling-project/docling-serve/issues/312)) ([`d584895`](https://github.com/docling-project/docling-serve/commit/d584895e1108d71a0f45deadcd3c669eb0a58133))
+
+## [v1.2.2](https://github.com/docling-project/docling-serve/releases/tag/v1.2.2) - 2025-08-13
+
+### Fix
+
+* Update of transformers module to 4.55.1 ([#316](https://github.com/docling-project/docling-serve/issues/316)) ([`7692eb2`](https://github.com/docling-project/docling-serve/commit/7692eb26006fd4deaa021180c99e23a1b65de506))
+
+## [v1.2.1](https://github.com/docling-project/docling-serve/releases/tag/v1.2.1) - 2025-08-13
+
+### Fix
+
+* Handling of vlm model options and update deps ([#314](https://github.com/docling-project/docling-serve/issues/314)) ([`8b470cb`](https://github.com/docling-project/docling-serve/commit/8b470cba8ef500c271eb84c8368c8a1a1a5a6d6a))
+* Add missing response type in sync endpoints ([#309](https://github.com/docling-project/docling-serve/issues/309)) ([`8048f45`](https://github.com/docling-project/docling-serve/commit/8048f4589a91de2b2b391ab33a326efd1b29f25b))
+
+### Documentation
+
+* Update readme to use v1 ([#306](https://github.com/docling-project/docling-serve/issues/306)) ([`b3058e9`](https://github.com/docling-project/docling-serve/commit/b3058e91e0c56e27110eb50f22cbdd89640bf398))
+* Update deployment examples to use v1 API ([#308](https://github.com/docling-project/docling-serve/issues/308)) ([`63da9ee`](https://github.com/docling-project/docling-serve/commit/63da9eedebae3ad31d04e65635e573194e413793))
+* Fix typo in v1 migration instructions ([#307](https://github.com/docling-project/docling-serve/issues/307)) ([`b15dc25`](https://github.com/docling-project/docling-serve/commit/b15dc2529f78d68a475e5221c37408c3f77d8588))
+
+## [v1.2.0](https://github.com/docling-project/docling-serve/releases/tag/v1.2.0) - 2025-08-07
+
+### Feature
+
+* Workers without shared models and convert params ([#304](https://github.com/docling-project/docling-serve/issues/304)) ([`db3fdb5`](https://github.com/docling-project/docling-serve/commit/db3fdb5bc1a0ae250afd420d737abc4071a7546c))
+* Add rocm image build support and fix cuda ([#292](https://github.com/docling-project/docling-serve/issues/292)) ([`fd1b987`](https://github.com/docling-project/docling-serve/commit/fd1b987e8dc174f1a6013c003dde33e9acbae39a))
+
 ## [v1.1.0](https://github.com/docling-project/docling-serve/releases/tag/v1.1.0) - 2025-07-30

 ### Feature
--- a/22
+++ b/22
@@ -1,13 +1,17 @@
 ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s

-FROM ${BASE_IMAGE}
+ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.8.19

-USER 0
+ARG UV_SYNC_EXTRA_ARGS=""
+
+FROM ${BASE_IMAGE} AS docling-base

 ###################################################################################################
 # OS Layer                                                                                        #
 ###################################################################################################

+USER 0
+
 RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
    dnf -y install --best --nodocs --setopt=install_weak_deps=False dnf-plugins-core && \
    dnf config-manager --best --nodocs --setopt=install_weak_deps=False --save && \
@@ -21,16 +25,19 @@ RUN /usr/bin/fix-permissions /opt/app-root/src/.cache

 ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/

+FROM ${UV_IMAGE} AS uv_stage
+
 ###################################################################################################
 # Docling layer                                                                                   #
 ###################################################################################################

+FROM docling-base
+
 USER 1001

 WORKDIR /opt/app-root/src

 ENV \
-    # On container environments, always set a thread budget to avoid undesired thread congestion.
    OMP_NUM_THREADS=4 \
    LANG=en_US.UTF-8 \
    LC_ALL=en_US.UTF-8 \
@@ -40,9 +47,9 @@ ENV \
    UV_PROJECT_ENVIRONMENT=/opt/app-root \
    DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models

-ARG UV_SYNC_EXTRA_ARGS=""
+ARG UV_SYNC_EXTRA_ARGS

-RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \
+RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
    --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
    --mount=type=bind,source=uv.lock,target=uv.lock \
    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
@@ -51,7 +58,7 @@ RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \
    uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-extra flash-attn && \
    FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-build-isolation-package=flash-attn

-ARG MODELS_LIST="layout tableformer picture_classifier easyocr"
+ARG MODELS_LIST="layout tableformer picture_classifier rapidocr easyocr"

 RUN echo "Downloading models..." && \
    HF_HUB_DOWNLOAD_TIMEOUT="90" \
@@ -61,7 +68,8 @@ RUN echo "Downloading models..." && \
    chmod -R g=u ${DOCLING_SERVE_ARTIFACTS_PATH}

 COPY --chown=1001:0 ./docling_serve ./docling_serve
-RUN --mount=from=ghcr.io/astral-sh/uv:0.7.19,source=/uv,target=/bin/uv \
+
+RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
    --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
    --mount=type=bind,source=uv.lock,target=uv.lock \
    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--- a/71
+++ b/71
@@ -16,6 +16,9 @@ else
    PIPE_DEV_NULL=
 endif

+# Container runtime - can be overridden: make CONTAINER_RUNTIME=podman cmd
+CONTAINER_RUNTIME ?= docker
+
 TAG=$(shell git rev-parse HEAD)
 BRANCH_TAG=$(shell git rev-parse --abbrev-ref HEAD)

@@ -28,37 +31,44 @@ md-lint-file:
 .PHONY: docling-serve-image
 docling-serve-image: Containerfile ## Build docling-serve container image
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve]"
-	$(CMD_PREFIX) docker build --load -f Containerfile -t ghcr.io/docling-project/docling-serve:$(TAG) .
-	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) ghcr.io/docling-project/docling-serve:$(BRANCH_TAG)
-	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) quay.io/docling-project/docling-serve:$(BRANCH_TAG)
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) build --load -f Containerfile -t ghcr.io/docling-project/docling-serve:$(TAG) .
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve:$(TAG) ghcr.io/docling-project/docling-serve:$(BRANCH_TAG)
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve:$(TAG) quay.io/docling-project/docling-serve:$(BRANCH_TAG)

 .PHONY: docling-serve-cpu-image
 docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve CPU]"
-	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
-	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) ghcr.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
-	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) quay.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) ghcr.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) quay.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)

 .PHONY: docling-serve-cu124-image
 docling-serve-cu124-image: Containerfile ## Build docling-serve container image with CUDA 12.4 support
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.4]"
-	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu124:$(TAG) .
-	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) ghcr.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
-	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) quay.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu124:$(TAG) .
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) ghcr.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) quay.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)

 .PHONY: docling-serve-cu126-image
 docling-serve-cu126-image: Containerfile ## Build docling-serve container image with CUDA 12.6 support
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.6]"
-	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu126:$(TAG) .
-	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) ghcr.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
-	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) quay.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu126:$(TAG) .
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) ghcr.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) quay.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)

 .PHONY: docling-serve-cu128-image
 docling-serve-cu128-image: Containerfile ## Build docling-serve container image with CUDA 12.8 support
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.8]"
-	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu128:$(TAG) .
-	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
-	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu128:$(TAG) .
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
+
+.PHONY: docling-serve-rocm-image
+docling-serve-rocm-image: Containerfile ## Build docling-serve container image with ROCm support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with ROCm 6.3]"
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-rocm:$(TAG) .
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) ghcr.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) quay.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)

 .PHONY: action-lint
 action-lint: .action-lint ##      Lint GitHub Action workflows
@@ -81,7 +91,7 @@ action-lint: .action-lint ##      Lint GitHub Action workflows
 md-lint: .md-lint ##      Lint markdown files
 .md-lint: $(wildcard */**/*.md) | md-lint-file
 	$(ECHO_PREFIX) printf "  %-12s ./...\n" "[MD LINT]"
-	$(CMD_PREFIX) docker run --rm -v $$(pwd):/workdir davidanson/markdownlint-cli2:v0.16.0 "**/*.md" "#.venv"
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) run --rm -v $$(pwd):/workdir davidanson/markdownlint-cli2:v0.16.0 "**/*.md" "#.venv"
 	$(CMD_PREFIX) touch $@

 .PHONY: py-Lint
@@ -97,13 +107,34 @@ py-lint: ##      Lint Python files
 .PHONY: run-docling-cpu
 run-docling-cpu: ## Run the docling-serve container with CPU support and assign a container name
 	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
-	$(CMD_PREFIX) docker rm -f docling-serve-cpu 2>/dev/null || true
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) rm -f docling-serve-cpu 2>/dev/null || true
 	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with CPU support on port 5001...\n" "[RUN CPU]"
-	$(CMD_PREFIX) docker run -it --name docling-serve-cpu -p 5001:5001 ghcr.io/docling-project/docling-serve-cpu:main
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) run -it --name docling-serve-cpu -p 5001:5001 ghcr.io/docling-project/docling-serve-cpu:main

 .PHONY: run-docling-cu124
 run-docling-cu124: ## Run the docling-serve container with GPU support and assign a container name
 	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
-	$(CMD_PREFIX) docker rm -f docling-serve-cu124 2>/dev/null || true
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) rm -f docling-serve-cu124 2>/dev/null || true
 	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.4]"
-	$(CMD_PREFIX) docker run -it --name docling-serve-cu124 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu124:main
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) run -it --name docling-serve-cu124 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu124:main
+
+.PHONY: run-docling-cu126
+run-docling-cu126: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) rm -f docling-serve-cu126 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.6]"
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) run -it --name docling-serve-cu126 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu126:main
+
+.PHONY: run-docling-cu128
+run-docling-cu128: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) rm -f docling-serve-cu128 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.8]"
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) run -it --name docling-serve-cu128 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu128:main
+
+.PHONY: run-docling-rocm
+run-docling-rocm: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) rm -f docling-serve-rocm 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN ROCm 6.3]"
+	$(CMD_PREFIX) $(CONTAINER_RUNTIME) run -it --name docling-serve-rocm -p 5001:5001 ghcr.io/docling-project/docling-serve-rocm:main
--- a/README.md
+++ b/README.md
@@ -36,7 +36,8 @@ The server is available at
 - API <http://127.0.0.1:5001>
 - API documentation <http://127.0.0.1:5001/docs>
 - UI playground <http://127.0.0.1:5001/ui>
-  ![swagger.png](img/swagger.png)
+
+![API documentation](img/fastapi-ui.png)

 Try it out with a simple conversion:

@@ -46,21 +47,36 @@ curl -X 'POST' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/json' \
  -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```

-### Container images
+### Container Images

-Available container images:
+The following container images are available for running **Docling Serve** with different hardware and PyTorch configurations:

-| Name | Description | Arch | Size |
-| -----|-------------|------|------|
-| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br /> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Simple image for Docling Serve, installing all packages from the official pypi.org index. | `linux/amd64`, `linux/arm64` | 3.6 GB (arm64) <br /> 8.7 GB (amd64) |
-| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br /> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | Cpu-only image which installs `torch` from the pytorch cpu index. | `linux/amd64`, `linux/arm64` | 3.6 GB |
-| [`ghcr.io/docling-project/docling-serve-cu124`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu124) <br /> [`quay.io/docling-project/docling-serve-cu124`](https://quay.io/repository/docling-project/docling-serve-cu124) | Cuda 12.4 image which installs `torch` from the pytorch cu124 index. | `linux/amd64` | 8.7 GB |
-| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br /> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | Cuda 12.6 image which installs `torch` from the pytorch cu126 index. | `linux/amd64` | 8.7 GB |
-| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br /> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | Cuda 12.8 image which installs `torch` from the pytorch cu128 index. | `linux/amd64` | 8.7 GB |
+#### 📦 Distributed Images
+
+| Image | Description | Architectures | Size |
+|-------|-------------|----------------|------|
+| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Base image with all packages installed from the official PyPI index. | `linux/amd64`, `linux/arm64` | 4.4 GB (arm64) <br> 8.7 GB (amd64) |
+| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | CPU-only variant, using `torch` from the PyTorch CPU index. | `linux/amd64`, `linux/arm64` | 4.4 GB |
+| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | CUDA 12.6 build with `torch` from the cu126 index. | `linux/amd64` | 10.0 GB |
+| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | CUDA 12.8 build with `torch` from the cu128 index. | `linux/amd64` | 11.4 GB |
+
+#### 🚫 Not Distributed
+
+An image for AMD ROCm 6.3 (`docling-serve-rocm`) is supported but **not published** due to its large size.
+
+To build it locally:
+
+```bash
+git clone --branch main git@github.com:docling-project/docling-serve.git
+cd docling-serve/
+make docling-serve-rocm-image
+```
+
+For deployment using Docker Compose, see [docs/deployment.md](docs/deployment.md).

 Coming soon: `docling-serve-slim` images will reduce the size by skipping the model weights download.

--- a/docling_serve/main.py
+++ b/docling_serve/main.py
@@ -11,6 +11,7 @@ import uvicorn
 from rich.console import Console

 from docling_serve.settings import docling_serve_settings, uvicorn_settings
+from docling_serve.storage import get_scratch

 warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
 warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
@@ -29,7 +30,7 @@ logger = logging.getLogger(__name__)

 def version_callback(value: bool) -> None:
    if value:
-        docling_serve_version = importlib.metadata.version("docling_serve")
+        docling_serve_version = importlib.metadata.version("docling-serve")
        docling_jobkit_version = importlib.metadata.version("docling-jobkit")
        docling_version = importlib.metadata.version("docling")
        docling_core_version = importlib.metadata.version("docling-core")
@@ -361,6 +362,42 @@ def run(
    )


+@app.command()
+def rq_worker() -> Any:
+    """
+    Run the [bold]Docling JobKit[/bold] RQ worker.
+    """
+    from docling_jobkit.convert.manager import DoclingConverterManagerConfig
+    from docling_jobkit.orchestrators.rq.orchestrator import RQOrchestratorConfig
+    from docling_jobkit.orchestrators.rq.worker import run_worker
+
+    rq_config = RQOrchestratorConfig(
+        redis_url=docling_serve_settings.eng_rq_redis_url,
+        results_prefix=docling_serve_settings.eng_rq_results_prefix,
+        sub_channel=docling_serve_settings.eng_rq_sub_channel,
+        scratch_dir=get_scratch(),
+    )
+
+    cm_config = DoclingConverterManagerConfig(
+        artifacts_path=docling_serve_settings.artifacts_path,
+        options_cache_size=docling_serve_settings.options_cache_size,
+        enable_remote_services=docling_serve_settings.enable_remote_services,
+        allow_external_plugins=docling_serve_settings.allow_external_plugins,
+        max_num_pages=docling_serve_settings.max_num_pages,
+        max_file_size=docling_serve_settings.max_file_size,
+        queue_max_size=docling_serve_settings.queue_max_size,
+        ocr_batch_size=docling_serve_settings.ocr_batch_size,
+        layout_batch_size=docling_serve_settings.layout_batch_size,
+        table_batch_size=docling_serve_settings.table_batch_size,
+        batch_polling_interval_seconds=docling_serve_settings.batch_polling_interval_seconds,
+    )
+
+    run_worker(
+        rq_config=rq_config,
+        cm_config=cm_config,
+    )
+
+
 def main() -> None:
    app()

--- a/docling_serve/app.py
+++ b/docling_serve/app.py
@@ -18,6 +18,7 @@ from fastapi import (
    UploadFile,
    WebSocket,
    WebSocketDisconnect,
+    status,
 )
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.openapi.docs import (
@@ -34,12 +35,17 @@ from docling_jobkit.datamodel.callback import (
    ProgressCallbackRequest,
    ProgressCallbackResponse,
 )
+from docling_jobkit.datamodel.chunking import (
+    BaseChunkerOptions,
+    ChunkingExportOptions,
+    HierarchicalChunkerOptions,
+    HybridChunkerOptions,
+)
 from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
 from docling_jobkit.datamodel.s3_coords import S3Coordinates
-from docling_jobkit.datamodel.task import Task, TaskSource
+from docling_jobkit.datamodel.task import Task, TaskSource, TaskType
 from docling_jobkit.datamodel.task_targets import (
    InBodyTarget,
-    TaskTarget,
    ZipTarget,
 )
 from docling_jobkit.orchestrators.base_orchestrator import (
@@ -48,15 +54,20 @@ from docling_jobkit.orchestrators.base_orchestrator import (
    TaskNotFoundError,
 )

+from docling_serve.auth import APIKeyHeaderAuth, AuthenticationResult
 from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
 from docling_serve.datamodel.requests import (
    ConvertDocumentsRequest,
    FileSourceRequest,
+    GenericChunkDocumentsRequest,
    HttpSourceRequest,
    S3SourceRequest,
    TargetName,
+    TargetRequest,
+    make_request_model,
 )
 from docling_serve.datamodel.responses import (
+    ChunkDocumentResponse,
    ClearResponse,
    ConvertDocumentResponse,
    HealthCheckResponse,
@@ -65,7 +76,7 @@ from docling_serve.datamodel.responses import (
    TaskStatusResponse,
    WebsocketMessage,
 )
-from docling_serve.helper_functions import FormDepends
+from docling_serve.helper_functions import DOCLING_VERSIONS, FormDepends
 from docling_serve.orchestrator_factory import get_async_orchestrator
 from docling_serve.response_preparation import prepare_response
 from docling_serve.settings import docling_serve_settings
@@ -156,6 +167,7 @@ def create_app():  # noqa: C901
        offline_docs_assets = True
        _log.info("Found static assets.")

+    require_auth = APIKeyHeaderAuth(docling_serve_settings.api_key)
    app = FastAPI(
        title="Docling Serve",
        docs_url=None if offline_docs_assets else "/swagger",
@@ -176,30 +188,6 @@ def create_app():  # noqa: C901
        allow_headers=headers,
    )

-    # Mount the Gradio app
-    if docling_serve_settings.enable_ui:
-        try:
-            import gradio as gr
-
-            from docling_serve.gradio_ui import ui as gradio_ui
-
-            tmp_output_dir = get_scratch() / "gradio"
-            tmp_output_dir.mkdir(exist_ok=True, parents=True)
-            gradio_ui.gradio_output_dir = tmp_output_dir
-            app = gr.mount_gradio_app(
-                app,
-                gradio_ui,
-                path="/ui",
-                allowed_paths=["./logo.png", tmp_output_dir],
-                root_path="/ui",
-            )
-        except ImportError:
-            _log.warning(
-                "Docling Serve enable_ui is activated, but gradio is not installed. "
-                "Install it with `pip install docling-serve[ui]` "
-                "or `pip install gradio`"
-            )
-
    #############################
    # Offline assets definition #
    #############################
@@ -246,10 +234,11 @@ def create_app():  # noqa: C901
    ########################

    async def _enque_source(
-        orchestrator: BaseOrchestrator, conversion_request: ConvertDocumentsRequest
+        orchestrator: BaseOrchestrator,
+        request: ConvertDocumentsRequest | GenericChunkDocumentsRequest,
    ) -> Task:
        sources: list[TaskSource] = []
-        for s in conversion_request.sources:
+        for s in request.sources:
            if isinstance(s, FileSourceRequest):
                sources.append(FileSource.model_validate(s))
            elif isinstance(s, HttpSourceRequest):
@@ -257,18 +246,41 @@ def create_app():  # noqa: C901
            elif isinstance(s, S3SourceRequest):
                sources.append(S3Coordinates.model_validate(s))

+        convert_options: ConvertDocumentsRequestOptions
+        chunking_options: BaseChunkerOptions | None = None
+        chunking_export_options = ChunkingExportOptions()
+        task_type: TaskType
+        if isinstance(request, ConvertDocumentsRequest):
+            task_type = TaskType.CONVERT
+            convert_options = request.options
+        elif isinstance(request, GenericChunkDocumentsRequest):
+            task_type = TaskType.CHUNK
+            convert_options = request.convert_options
+            chunking_options = request.chunking_options
+            chunking_export_options.include_converted_doc = (
+                request.include_converted_doc
+            )
+        else:
+            raise RuntimeError("Uknown request type.")
+
        task = await orchestrator.enqueue(
+            task_type=task_type,
            sources=sources,
-            options=conversion_request.options,
-            target=conversion_request.target,
+            convert_options=convert_options,
+            chunking_options=chunking_options,
+            chunking_export_options=chunking_export_options,
+            target=request.target,
        )
        return task

    async def _enque_file(
        orchestrator: BaseOrchestrator,
        files: list[UploadFile],
-        options: ConvertDocumentsRequestOptions,
-        target: TaskTarget,
+        task_type: TaskType,
+        convert_options: ConvertDocumentsRequestOptions,
+        chunking_options: BaseChunkerOptions | None,
+        chunking_export_options: ChunkingExportOptions | None,
+        target: TargetRequest,
    ) -> Task:
        _log.info(f"Received {len(files)} files for processing.")

@@ -281,7 +293,12 @@ def create_app():  # noqa: C901
            file_sources.append(DocumentStream(name=name, stream=buf))

        task = await orchestrator.enqueue(
-            sources=file_sources, options=options, target=target
+            task_type=task_type,
+            sources=file_sources,
+            convert_options=convert_options,
+            chunking_options=chunking_options,
+            chunking_export_options=chunking_export_options,
+            target=target,
        )
        return task

@@ -291,7 +308,7 @@ def create_app():  # noqa: C901
            task = await orchestrator.task_status(task_id=task_id)
            if task.is_completed():
                return True
-            await asyncio.sleep(5)
+            await asyncio.sleep(docling_serve_settings.sync_poll_interval)
            elapsed_time = time.monotonic() - start_time
            if elapsed_time > docling_serve_settings.max_sync_wait:
                return False
@@ -378,7 +395,7 @@ def create_app():  # noqa: C901
        response = RedirectResponse(url=logo_url)
        return response

-    @app.get("/health")
+    @app.get("/health", tags=["health"])
    def health() -> HealthCheckResponse:
        return HealthCheckResponse()

@@ -387,10 +404,21 @@ def create_app():  # noqa: C901
    def api_check() -> HealthCheckResponse:
        return HealthCheckResponse()

+    # Docling versions
+    @app.get("/version", tags=["health"])
+    def version_info() -> dict:
+        if not docling_serve_settings.show_version_info:
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail="Forbidden. The server is configured for not showing version details.",
+            )
+        return DOCLING_VERSIONS
+
    # Convert a document from URL(s)
    @app.post(
        "/v1/convert/source",
-        response_model=ConvertDocumentResponse,
+        tags=["convert"],
+        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
@@ -400,11 +428,12 @@ def create_app():  # noqa: C901
    )
    async def process_url(
        background_tasks: BackgroundTasks,
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        conversion_request: ConvertDocumentsRequest,
    ):
        task = await _enque_source(
-            orchestrator=orchestrator, conversion_request=conversion_request
+            orchestrator=orchestrator, request=conversion_request
        )
        completed = await _wait_task_complete(
            orchestrator=orchestrator, task_id=task.task_id
@@ -412,21 +441,30 @@ def create_app():  # noqa: C901

        if not completed:
            # TODO: abort task!
-            return HTTPException(
+            raise HTTPException(
                status_code=504,
                detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
            )

-        task = await orchestrator.get_raw_task(task_id=task.task_id)
+        task_result = await orchestrator.task_result(task_id=task.task_id)
+        if task_result is None:
+            raise HTTPException(
+                status_code=404,
+                detail="Task result not found. Please wait for a completion status.",
+            )
        response = await prepare_response(
-            task=task, orchestrator=orchestrator, background_tasks=background_tasks
+            task_id=task.task_id,
+            task_result=task_result,
+            orchestrator=orchestrator,
+            background_tasks=background_tasks,
        )
        return response

    # Convert a document from file(s)
    @app.post(
        "/v1/convert/file",
-        response_model=ConvertDocumentResponse,
+        tags=["convert"],
+        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
@@ -435,6 +473,7 @@ def create_app():  # noqa: C901
    )
    async def process_file(
        background_tasks: BackgroundTasks,
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        files: list[UploadFile],
        options: Annotated[
@@ -444,7 +483,13 @@ def create_app():  # noqa: C901
    ):
        target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
        task = await _enque_file(
-            orchestrator=orchestrator, files=files, options=options, target=target
+            task_type=TaskType.CONVERT,
+            orchestrator=orchestrator,
+            files=files,
+            convert_options=options,
+            chunking_options=None,
+            chunking_export_options=None,
+            target=target,
        )
        completed = await _wait_task_complete(
            orchestrator=orchestrator, task_id=task.task_id
@@ -452,34 +497,45 @@ def create_app():  # noqa: C901

        if not completed:
            # TODO: abort task!
-            return HTTPException(
+            raise HTTPException(
                status_code=504,
                detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
            )

-        task = await orchestrator.get_raw_task(task_id=task.task_id)
+        task_result = await orchestrator.task_result(task_id=task.task_id)
+        if task_result is None:
+            raise HTTPException(
+                status_code=404,
+                detail="Task result not found. Please wait for a completion status.",
+            )
        response = await prepare_response(
-            task=task, orchestrator=orchestrator, background_tasks=background_tasks
+            task_id=task.task_id,
+            task_result=task_result,
+            orchestrator=orchestrator,
+            background_tasks=background_tasks,
        )
        return response

    # Convert a document from URL(s) using the async api
    @app.post(
        "/v1/convert/source/async",
+        tags=["convert"],
        response_model=TaskStatusResponse,
    )
    async def process_url_async(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        conversion_request: ConvertDocumentsRequest,
    ):
        task = await _enque_source(
-            orchestrator=orchestrator, conversion_request=conversion_request
+            orchestrator=orchestrator, request=conversion_request
        )
        task_queue_position = await orchestrator.get_queue_position(
            task_id=task.task_id
        )
        return TaskStatusResponse(
            task_id=task.task_id,
+            task_type=task.task_type,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
@@ -488,9 +544,11 @@ def create_app():  # noqa: C901
    # Convert a document from file(s) using the async api
    @app.post(
        "/v1/convert/file/async",
+        tags=["convert"],
        response_model=TaskStatusResponse,
    )
    async def process_file_async(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        background_tasks: BackgroundTasks,
        files: list[UploadFile],
@@ -501,24 +559,253 @@ def create_app():  # noqa: C901
    ):
        target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
        task = await _enque_file(
-            orchestrator=orchestrator, files=files, options=options, target=target
+            task_type=TaskType.CONVERT,
+            orchestrator=orchestrator,
+            files=files,
+            convert_options=options,
+            chunking_options=None,
+            chunking_export_options=None,
+            target=target,
        )
        task_queue_position = await orchestrator.get_queue_position(
            task_id=task.task_id
        )
        return TaskStatusResponse(
            task_id=task.task_id,
+            task_type=task.task_type,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
        )

+    # Chunking endpoints
+    for display_name, path_name, opt_cls in (
+        ("HybridChunker", "hybrid", HybridChunkerOptions),
+        ("HierarchicalChunker", "hierarchical", HierarchicalChunkerOptions),
+    ):
+        req_cls = make_request_model(opt_cls)
+
+        @app.post(
+            f"/v1/chunk/{path_name}/source/async",
+            name=f"Chunk sources with {display_name} as async task",
+            tags=["chunk"],
+            response_model=TaskStatusResponse,
+        )
+        async def chunk_source_async(
+            background_tasks: BackgroundTasks,
+            auth: Annotated[AuthenticationResult, Depends(require_auth)],
+            orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+            request: req_cls,
+        ):
+            task = await _enque_source(orchestrator=orchestrator, request=request)
+            task_queue_position = await orchestrator.get_queue_position(
+                task_id=task.task_id
+            )
+            return TaskStatusResponse(
+                task_id=task.task_id,
+                task_type=task.task_type,
+                task_status=task.task_status,
+                task_position=task_queue_position,
+                task_meta=task.processing_meta,
+            )
+
+        @app.post(
+            f"/v1/chunk/{path_name}/file/async",
+            name=f"Chunk files with {display_name} as async task",
+            tags=["chunk"],
+            response_model=TaskStatusResponse,
+        )
+        async def chunk_file_async(
+            background_tasks: BackgroundTasks,
+            auth: Annotated[AuthenticationResult, Depends(require_auth)],
+            orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+            files: list[UploadFile],
+            convert_options: Annotated[
+                ConvertDocumentsRequestOptions,
+                FormDepends(
+                    ConvertDocumentsRequestOptions,
+                    prefix="convert_",
+                    excluded_fields=[
+                        "to_formats",
+                    ],
+                ),
+            ],
+            chunking_options: Annotated[
+                opt_cls,
+                FormDepends(
+                    HybridChunkerOptions,
+                    prefix="chunking_",
+                    excluded_fields=["chunker"],
+                ),
+            ],
+            include_converted_doc: Annotated[
+                bool,
+                Form(
+                    description="If true, the output will include both the chunks and the converted document."
+                ),
+            ] = False,
+            target_type: Annotated[
+                TargetName,
+                Form(description="Specification for the type of output target."),
+            ] = TargetName.INBODY,
+        ):
+            target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
+            task = await _enque_file(
+                task_type=TaskType.CHUNK,
+                orchestrator=orchestrator,
+                files=files,
+                convert_options=convert_options,
+                chunking_options=chunking_options,
+                chunking_export_options=ChunkingExportOptions(
+                    include_converted_doc=include_converted_doc
+                ),
+                target=target,
+            )
+            task_queue_position = await orchestrator.get_queue_position(
+                task_id=task.task_id
+            )
+            return TaskStatusResponse(
+                task_id=task.task_id,
+                task_type=task.task_type,
+                task_status=task.task_status,
+                task_position=task_queue_position,
+                task_meta=task.processing_meta,
+            )
+
+        @app.post(
+            f"/v1/chunk/{path_name}/source",
+            name=f"Chunk sources with {display_name}",
+            tags=["chunk"],
+            response_model=ChunkDocumentResponse,
+            responses={
+                200: {
+                    "content": {"application/zip": {}},
+                    # "description": "Return the JSON item or an image.",
+                }
+            },
+        )
+        async def chunk_source(
+            background_tasks: BackgroundTasks,
+            auth: Annotated[AuthenticationResult, Depends(require_auth)],
+            orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+            request: req_cls,
+        ):
+            task = await _enque_source(orchestrator=orchestrator, request=request)
+            completed = await _wait_task_complete(
+                orchestrator=orchestrator, task_id=task.task_id
+            )
+
+            if not completed:
+                # TODO: abort task!
+                raise HTTPException(
+                    status_code=504,
+                    detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
+                )
+
+            task_result = await orchestrator.task_result(task_id=task.task_id)
+            if task_result is None:
+                raise HTTPException(
+                    status_code=404,
+                    detail="Task result not found. Please wait for a completion status.",
+                )
+            response = await prepare_response(
+                task_id=task.task_id,
+                task_result=task_result,
+                orchestrator=orchestrator,
+                background_tasks=background_tasks,
+            )
+            return response
+
+        @app.post(
+            f"/v1/chunk/{path_name}/file",
+            name=f"Chunk files with {display_name}",
+            tags=["chunk"],
+            response_model=ChunkDocumentResponse,
+            responses={
+                200: {
+                    "content": {"application/zip": {}},
+                }
+            },
+        )
+        async def chunk_file(
+            background_tasks: BackgroundTasks,
+            auth: Annotated[AuthenticationResult, Depends(require_auth)],
+            orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+            files: list[UploadFile],
+            convert_options: Annotated[
+                ConvertDocumentsRequestOptions,
+                FormDepends(
+                    ConvertDocumentsRequestOptions,
+                    prefix="convert_",
+                    excluded_fields=[
+                        "to_formats",
+                    ],
+                ),
+            ],
+            chunking_options: Annotated[
+                opt_cls,
+                FormDepends(
+                    HybridChunkerOptions,
+                    prefix="chunking_",
+                    excluded_fields=["chunker"],
+                ),
+            ],
+            include_converted_doc: Annotated[
+                bool,
+                Form(
+                    description="If true, the output will include both the chunks and the converted document."
+                ),
+            ] = False,
+            target_type: Annotated[
+                TargetName,
+                Form(description="Specification for the type of output target."),
+            ] = TargetName.INBODY,
+        ):
+            target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
+            task = await _enque_file(
+                task_type=TaskType.CHUNK,
+                orchestrator=orchestrator,
+                files=files,
+                convert_options=convert_options,
+                chunking_options=chunking_options,
+                chunking_export_options=ChunkingExportOptions(
+                    include_converted_doc=include_converted_doc
+                ),
+                target=target,
+            )
+            completed = await _wait_task_complete(
+                orchestrator=orchestrator, task_id=task.task_id
+            )
+
+            if not completed:
+                # TODO: abort task!
+                raise HTTPException(
+                    status_code=504,
+                    detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
+                )
+
+            task_result = await orchestrator.task_result(task_id=task.task_id)
+            if task_result is None:
+                raise HTTPException(
+                    status_code=404,
+                    detail="Task result not found. Please wait for a completion status.",
+                )
+            response = await prepare_response(
+                task_id=task.task_id,
+                task_result=task_result,
+                orchestrator=orchestrator,
+                background_tasks=background_tasks,
+            )
+            return response
+
    # Task status poll
    @app.get(
        "/v1/status/poll/{task_id}",
+        tags=["tasks"],
        response_model=TaskStatusResponse,
    )
    async def task_status_poll(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        task_id: str,
        wait: Annotated[
@@ -533,6 +820,7 @@ def create_app():  # noqa: C901
            raise HTTPException(status_code=404, detail="Task not found.")
        return TaskStatusResponse(
            task_id=task.task_id,
+            task_type=task.task_type,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
@@ -546,11 +834,22 @@ def create_app():  # noqa: C901
        websocket: WebSocket,
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        task_id: str,
+        api_key: Annotated[str, Query()] = "",
    ):
+        if docling_serve_settings.api_key:
+            if api_key != docling_serve_settings.api_key:
+                raise HTTPException(
+                    status_code=status.HTTP_401_UNAUTHORIZED,
+                    detail="Api key is required as ?api_key=SECRET.",
+                )
+
        assert isinstance(orchestrator.notifier, WebsocketNotifier)
        await websocket.accept()

-        if task_id not in orchestrator.tasks:
+        try:
+            # Get task status from Redis or RQ directly instead of checking in-memory registry
+            task = await orchestrator.task_status(task_id=task_id)
+        except TaskNotFoundError:
            await websocket.send_text(
                WebsocketMessage(
                    message=MessageKind.ERROR, error="Task not found."
@@ -559,8 +858,6 @@ def create_app():  # noqa: C901
            await websocket.close()
            return

-        task = orchestrator.tasks[task_id]
-
        # Track active WebSocket connections for this job
        orchestrator.notifier.task_subscribers[task_id].add(websocket)

@@ -568,6 +865,7 @@ def create_app():  # noqa: C901
            task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
            task_response = TaskStatusResponse(
                task_id=task.task_id,
+                task_type=task.task_type,
                task_status=task.task_status,
                task_position=task_queue_position,
                task_meta=task.processing_meta,
@@ -583,6 +881,7 @@ def create_app():  # noqa: C901
                )
                task_response = TaskStatusResponse(
                    task_id=task.task_id,
+                    task_type=task.task_type,
                    task_status=task.task_status,
                    task_position=task_queue_position,
                    task_meta=task.processing_meta,
@@ -605,7 +904,10 @@ def create_app():  # noqa: C901
    # Task result
    @app.get(
        "/v1/result/{task_id}",
-        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
+        tags=["tasks"],
+        response_model=ConvertDocumentResponse
+        | PresignedUrlConvertDocumentResponse
+        | ChunkDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
@@ -613,14 +915,23 @@ def create_app():  # noqa: C901
        },
    )
    async def task_result(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        background_tasks: BackgroundTasks,
        task_id: str,
    ):
        try:
-            task = await orchestrator.get_raw_task(task_id=task_id)
+            task_result = await orchestrator.task_result(task_id=task_id)
+            if task_result is None:
+                raise HTTPException(
+                    status_code=404,
+                    detail="Task result not found. Please wait for a completion status.",
+                )
            response = await prepare_response(
-                task=task, orchestrator=orchestrator, background_tasks=background_tasks
+                task_id=task_id,
+                task_result=task_result,
+                orchestrator=orchestrator,
+                background_tasks=background_tasks,
            )
            return response
        except TaskNotFoundError:
@@ -629,9 +940,12 @@ def create_app():  # noqa: C901
    # Update task progress
    @app.post(
        "/v1/callback/task/progress",
+        tags=["internal"],
+        include_in_schema=False,
        response_model=ProgressCallbackResponse,
    )
    async def callback_task_progress(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        request: ProgressCallbackRequest,
    ):
@@ -650,9 +964,11 @@ def create_app():  # noqa: C901
    # Offload models
    @app.get(
        "/v1/clear/converters",
+        tags=["clear"],
        response_model=ClearResponse,
    )
    async def clear_converters(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
    ):
        await orchestrator.clear_converters()
@@ -661,13 +977,31 @@ def create_app():  # noqa: C901
    # Clean results
    @app.get(
        "/v1/clear/results",
+        tags=["clear"],
        response_model=ClearResponse,
    )
    async def clear_results(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        older_then: float = 3600,
    ):
        await orchestrator.clear_results(older_than=older_then)
        return ClearResponse()

+    # Optional UI
+    if docling_serve_settings.enable_ui:
+        try:
+            from docling_serve.ui.app import create_ui_app
+
+            ui_app = create_ui_app(
+                process_file_async, process_url_async, task_result, task_status_poll
+            )
+            app.mount("/ui", app=ui_app, name="ui")
+        except ImportError as ex:
+            _log.error(ex)
+            _log.warning(
+                "Docling Serve enable_ui is activated, but its dependencies are not installed."
+                "Install it with `uv sync --extra ui`"
+            )
+
    return app
--- a/docling_serve/auth.py
+++ b/docling_serve/auth.py
@@ -0,0 +1,89 @@
+from typing import Any
+
+from fastapi import HTTPException, Request, Response, status
+from fastapi.security import APIKeyCookie, APIKeyHeader
+from pydantic import BaseModel
+
+
+class AuthenticationResult(BaseModel):
+    valid: bool
+    errors: list[str] = []
+    detail: Any | None = None
+
+
+class KeyValidator:
+    def __init__(
+        self,
+        api_key: str,
+        field_name: str = "X-Api-Key",
+        fail_on_unauthorized: bool = True,
+    ) -> None:
+        self.api_key = api_key
+        self.field_name = field_name
+        self.fail_on_unauthorized = fail_on_unauthorized
+
+    async def __call__(self, candidate_key: str | None):
+        if candidate_key is None:
+            return self._error(f"Missing field {self.field_name}.")
+
+        candidate_key = candidate_key.strip()
+
+        # Otherwise check the apikey
+        if candidate_key == self.api_key or self.api_key == "":
+            return AuthenticationResult(
+                valid=True,
+                detail=candidate_key,  # Remove?
+            )
+        else:
+            return self._error("The provided API Key is invalid.")
+
+    def _error(self, error: str):
+        if self.fail_on_unauthorized and self.api_key:
+            raise HTTPException(status.HTTP_401_UNAUTHORIZED, error)
+        else:
+            return AuthenticationResult(
+                valid=False,
+                errors=[error],
+            )
+
+
+class APIKeyHeaderAuth(APIKeyHeader):
+    """
+    FastAPI dependency which evaluates a status API Key in a header.
+    """
+
+    def __init__(self, validator: str | KeyValidator) -> None:
+        self.validator = (
+            KeyValidator(validator) if isinstance(validator, str) else validator
+        )
+        super().__init__(name=self.validator.field_name, auto_error=False)
+
+    async def __call__(self, request: Request) -> AuthenticationResult:  # type: ignore
+        key = await super().__call__(request=request)
+        return await self.validator(key)
+
+
+class APIKeyCookieAuth(APIKeyCookie):
+    """
+    FastAPI dependency which evaluates a status API Key in a cookie.
+    """
+
+    def __init__(self, validator: str | KeyValidator) -> None:
+        self.validator = (
+            KeyValidator(validator) if isinstance(validator, str) else validator
+        )
+        super().__init__(name=self.validator.field_name, auto_error=False)
+
+    async def __call__(self, request: Request) -> AuthenticationResult:  # type: ignore
+        api_key = await super().__call__(request=request)
+        return await self.validator(api_key)
+
+    def _set_api_key(self, response: Response, api_key: str, expires=24 * 3600):
+        response.set_cookie(
+            key=self.validator.field_name,
+            value=api_key,
+            expires=expires,
+            secure=True,
+            httponly=True,
+            samesite="strict",
+        )
--- a/docling_serve/datamodel/requests.py
+++ b/docling_serve/datamodel/requests.py
@@ -1,16 +1,20 @@
 import enum
-from typing import Annotated, Literal
+from functools import cache
+from typing import Annotated, Generic, Literal

 from pydantic import BaseModel, Field, model_validator
 from pydantic_core import PydanticCustomError
-from typing_extensions import Self
+from typing_extensions import Self, TypeVar

+from docling_jobkit.datamodel.chunking import (
+    BaseChunkerOptions,
+)
 from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
 from docling_jobkit.datamodel.s3_coords import S3Coordinates
 from docling_jobkit.datamodel.task_targets import (
    InBodyTarget,
+    PutTarget,
    S3Target,
-    TaskTarget,
    ZipTarget,
 )

@@ -43,12 +47,17 @@ SourceRequestItem = Annotated[
    FileSourceRequest | HttpSourceRequest | S3SourceRequest, Field(discriminator="kind")
 ]

+TargetRequest = Annotated[
+    InBodyTarget | ZipTarget | S3Target | PutTarget,
+    Field(discriminator="kind"),
+]
+

 ## Complete Source request
 class ConvertDocumentsRequest(BaseModel):
    options: ConvertDocumentsRequestOptions = ConvertDocumentsRequestOptions()
    sources: list[SourceRequestItem]
-    target: TaskTarget = InBodyTarget()
+    target: TargetRequest = InBodyTarget()

    @model_validator(mode="after")
    def validate_s3_source_and_target(self) -> Self:
@@ -70,3 +79,52 @@ class ConvertDocumentsRequest(BaseModel):
                "error target", 'target kind "s3" requires source kind "s3"'
            )
        return self
+
+
+## Source chunking requests
+
+
+class BaseChunkDocumentsRequest(BaseModel):
+    convert_options: Annotated[
+        ConvertDocumentsRequestOptions, Field(description="Conversion options.")
+    ] = ConvertDocumentsRequestOptions()
+    sources: Annotated[
+        list[SourceRequestItem],
+        Field(description="List of input document sources to process."),
+    ]
+    include_converted_doc: Annotated[
+        bool,
+        Field(
+            description="If true, the output will include both the chunks and the converted document."
+        ),
+    ] = False
+    target: Annotated[
+        TargetRequest, Field(description="Specification for the type of output target.")
+    ] = InBodyTarget()
+
+
+ChunkingOptT = TypeVar("ChunkingOptT", bound=BaseChunkerOptions)
+
+
+class GenericChunkDocumentsRequest(BaseChunkDocumentsRequest, Generic[ChunkingOptT]):
+    chunking_options: ChunkingOptT
+
+
+@cache
+def make_request_model(
+    opt_type: type[ChunkingOptT],
+) -> type[GenericChunkDocumentsRequest[ChunkingOptT]]:
+    """
+    Dynamically create (and cache) a subclass of GenericChunkDocumentsRequest[opt_type]
+    with chunking_options having a default factory.
+    """
+    return type(
+        f"{opt_type.__name__}DocumentsRequest",
+        (GenericChunkDocumentsRequest[opt_type],),  # type: ignore[valid-type]
+        {
+            "__annotations__": {"chunking_options": opt_type},
+            "chunking_options": Field(
+                default_factory=opt_type, description="Options specific to the chunker."
+            ),
+        },
+    )
--- a/docling_serve/datamodel/responses.py
+++ b/docling_serve/datamodel/responses.py
@@ -5,8 +5,12 @@ from pydantic import BaseModel

 from docling.datamodel.document import ConversionStatus, ErrorItem
 from docling.utils.profiling import ProfilingItem
-from docling_core.types.doc import DoclingDocument
-from docling_jobkit.datamodel.task_meta import TaskProcessingMeta
+from docling_jobkit.datamodel.result import (
+    ChunkedDocumentResultItem,
+    ExportDocumentResponse,
+    ExportResult,
+)
+from docling_jobkit.datamodel.task_meta import TaskProcessingMeta, TaskType


 # Status
@@ -18,17 +22,8 @@ class ClearResponse(BaseModel):
    status: str = "ok"


-class DocumentResponse(BaseModel):
-    filename: str
-    md_content: Optional[str] = None
-    json_content: Optional[DoclingDocument] = None
-    html_content: Optional[str] = None
-    text_content: Optional[str] = None
-    doctags_content: Optional[str] = None
-
-
 class ConvertDocumentResponse(BaseModel):
-    document: DocumentResponse
+    document: ExportDocumentResponse
    status: ConversionStatus
    errors: list[ErrorItem] = []
    processing_time: float
@@ -36,16 +31,25 @@ class ConvertDocumentResponse(BaseModel):


 class PresignedUrlConvertDocumentResponse(BaseModel):
-    status: ConversionStatus
    processing_time: float
+    num_converted: int
+    num_succeeded: int
+    num_failed: int


 class ConvertDocumentErrorResponse(BaseModel):
    status: ConversionStatus


+class ChunkDocumentResponse(BaseModel):
+    chunks: list[ChunkedDocumentResultItem]
+    documents: list[ExportResult]
+    processing_time: float
+
+
 class TaskStatusResponse(BaseModel):
    task_id: str
+    task_type: TaskType
    task_status: str
    task_position: Optional[int] = None
    task_meta: Optional[TaskProcessingMeta] = None
--- a/docling_serve/gradio_ui.py
+++ b/docling_serve/gradio_ui.py
@@ -1,873 +0,0 @@
-import base64
-import importlib
-import itertools
-import json
-import logging
-import ssl
-import tempfile
-import time
-from pathlib import Path
-from typing import Optional
-
-import certifi
-import gradio as gr
-import httpx
-
-from docling.datamodel.base_models import FormatToExtensions
-from docling.datamodel.pipeline_options import (
-    PdfBackend,
-    ProcessingPipeline,
-    TableFormerMode,
-    TableStructureOptions,
-)
-
-from docling_serve.helper_functions import _to_list_of_strings
-from docling_serve.settings import docling_serve_settings, uvicorn_settings
-
-logger = logging.getLogger(__name__)
-
-############################
-# Path of static artifacts #
-############################
-
-logo_path = "https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg"
-js_components_url = "https://unpkg.com/@docling/docling-components@0.0.7"
-if (
-    docling_serve_settings.static_path is not None
-    and docling_serve_settings.static_path.is_dir()
-):
-    logo_path = str(docling_serve_settings.static_path / "logo.svg")
-    js_components_url = "/static/docling-components.js"
-
-
-##############################
-# Head JS for web components #
-##############################
-head = f"""
-    <script src="{js_components_url}" type="module"></script>
-"""
-
-#################
-# CSS and theme #
-#################
-
-css = """
-#logo {
-    border-style: none;
-    background: none;
-    box-shadow: none;
-    min-width: 80px;
-}
-#dark_mode_column {
-    display: flex;
-    align-content: flex-end;
-}
-#title {
-    text-align: left;
-    display:block;
-    height: auto;
-    padding-top: 5px;
-    line-height: 0;
-}
-.title-text h1 > p, .title-text p {
-    margin-top: 0px !important;
-    margin-bottom: 2px !important;
-}
-#custom-container {
-    border: 0.909091px solid;
-    padding: 10px;
-    border-radius: 4px;
-}
-#custom-container h4 {
-    font-size: 14px;
-}
-#file_input_zone {
-    height: 140px;
-}
-
-docling-img {
-    gap: 1rem;
-}
-
-docling-img::part(page) {
-    box-shadow: 0 0.5rem 1rem 0 rgba(0, 0, 0, 0.2);
-}
-"""
-
-theme = gr.themes.Default(
-    text_size="md",
-    spacing_size="md",
-    font=[
-        gr.themes.GoogleFont("Red Hat Display"),
-        "ui-sans-serif",
-        "system-ui",
-        "sans-serif",
-    ],
-    font_mono=[
-        gr.themes.GoogleFont("Red Hat Mono"),
-        "ui-monospace",
-        "Consolas",
-        "monospace",
-    ],
-)
-
-#############
-# Variables #
-#############
-
-gradio_output_dir = None  # Will be set by FastAPI when mounted
-file_output_path = None  # Will be set when a new file is generated
-
-#############
-# Functions #
-#############
-
-
-def get_api_endpoint() -> str:
-    protocol = "http"
-    if uvicorn_settings.ssl_keyfile is not None:
-        protocol = "https"
-    return f"{protocol}://{docling_serve_settings.api_host}:{uvicorn_settings.port}"
-
-
-def get_ssl_context() -> ssl.SSLContext:
-    ctx = ssl.create_default_context(cafile=certifi.where())
-    kube_sa_ca_cert_path = Path(
-        "/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"
-    )
-    if (
-        uvicorn_settings.ssl_keyfile is not None
-        and ".svc." in docling_serve_settings.api_host
-        and kube_sa_ca_cert_path.exists()
-    ):
-        ctx.load_verify_locations(cafile=kube_sa_ca_cert_path)
-    return ctx
-
-
-def health_check():
-    response = httpx.get(f"{get_api_endpoint()}/health")
-    if response.status_code == 200:
-        return "Healthy"
-    return "Unhealthy"
-
-
-def set_options_visibility(x):
-    return gr.Accordion("Options", open=x)
-
-
-def set_outputs_visibility_direct(x, y):
-    content = gr.Row(visible=x)
-    file = gr.Row(visible=y)
-    return content, file
-
-
-def set_task_id_visibility(x):
-    task_id_row = gr.Row(visible=x)
-    return task_id_row
-
-
-def set_outputs_visibility_process(x):
-    content = gr.Row(visible=not x)
-    file = gr.Row(visible=x)
-    return content, file
-
-
-def set_download_button_label(label_text: gr.State):
-    return gr.DownloadButton(label=str(label_text), scale=1)
-
-
-def clear_outputs():
-    task_id_rendered = ""
-    markdown_content = ""
-    json_content = ""
-    json_rendered_content = ""
-    html_content = ""
-    text_content = ""
-    doctags_content = ""
-
-    return (
-        task_id_rendered,
-        markdown_content,
-        markdown_content,
-        json_content,
-        json_rendered_content,
-        html_content,
-        html_content,
-        text_content,
-        doctags_content,
-    )
-
-
-def clear_url_input():
-    return ""
-
-
-def clear_file_input():
-    return None
-
-
-def auto_set_return_as_file(
-    url_input_value: str,
-    file_input_value: Optional[list[str]],
-    image_export_mode_value: str,
-):
-    # If more than one input source is provided, return as file
-    if (
-        (len(url_input_value.split(",")) > 1)
-        or (file_input_value and len(file_input_value) > 1)
-        or (image_export_mode_value == "referenced")
-    ):
-        return True
-    else:
-        return False
-
-
-def change_ocr_lang(ocr_engine):
-    if ocr_engine == "easyocr":
-        return "en,fr,de,es"
-    elif ocr_engine == "tesseract_cli":
-        return "eng,fra,deu,spa"
-    elif ocr_engine == "tesseract":
-        return "eng,fra,deu,spa"
-    elif ocr_engine == "rapidocr":
-        return "english,chinese"
-
-
-def wait_task_finish(task_id: str, return_as_file: bool):
-    conversion_sucess = False
-    task_finished = False
-    task_status = ""
-    ssl_ctx = get_ssl_context()
-    while not task_finished:
-        try:
-            response = httpx.get(
-                f"{get_api_endpoint()}/v1/status/poll/{task_id}?wait=5",
-                verify=ssl_ctx,
-                timeout=15,
-            )
-            task_status = response.json()["task_status"]
-            if task_status == "success":
-                conversion_sucess = True
-                task_finished = True
-
-            if task_status in ("failure", "revoked"):
-                conversion_sucess = False
-                task_finished = True
-                raise RuntimeError(f"Task failed with status {task_status!r}")
-            time.sleep(5)
-        except Exception as e:
-            logger.error(f"Error processing file(s): {e}")
-            conversion_sucess = False
-            task_finished = True
-            raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
-
-    if conversion_sucess:
-        try:
-            response = httpx.get(
-                f"{get_api_endpoint()}/v1/result/{task_id}",
-                timeout=15,
-                verify=ssl_ctx,
-            )
-            output = response_to_output(response, return_as_file)
-            return output
-        except Exception as e:
-            logger.error(f"Error getting task result: {e}")
-
-    raise gr.Error(
-        f"Error getting task result, conversion finished with status: {task_status}"
-    )
-
-
-def process_url(
-    input_sources,
-    to_formats,
-    image_export_mode,
-    pipeline,
-    ocr,
-    force_ocr,
-    ocr_engine,
-    ocr_lang,
-    pdf_backend,
-    table_mode,
-    abort_on_error,
-    return_as_file,
-    do_code_enrichment,
-    do_formula_enrichment,
-    do_picture_classification,
-    do_picture_description,
-):
-    target = {"kind": "zip" if return_as_file else "inbody"}
-    parameters = {
-        "sources": [
-            {"kind": "http", "url": source} for source in input_sources.split(",")
-        ],
-        "options": {
-            "to_formats": to_formats,
-            "image_export_mode": image_export_mode,
-            "pipeline": pipeline,
-            "ocr": ocr,
-            "force_ocr": force_ocr,
-            "ocr_engine": ocr_engine,
-            "ocr_lang": _to_list_of_strings(ocr_lang),
-            "pdf_backend": pdf_backend,
-            "table_mode": table_mode,
-            "abort_on_error": abort_on_error,
-            "do_code_enrichment": do_code_enrichment,
-            "do_formula_enrichment": do_formula_enrichment,
-            "do_picture_classification": do_picture_classification,
-            "do_picture_description": do_picture_description,
-        },
-        "target": target,
-    }
-    if (
-        not parameters["sources"]
-        or len(parameters["sources"]) == 0
-        or parameters["sources"][0]["url"] == ""
-    ):
-        logger.error("No input sources provided.")
-        raise gr.Error("No input sources provided.", print_exception=False)
-    try:
-        ssl_ctx = get_ssl_context()
-        response = httpx.post(
-            f"{get_api_endpoint()}/v1/convert/source/async",
-            json=parameters,
-            verify=ssl_ctx,
-            timeout=60,
-        )
-    except Exception as e:
-        logger.error(f"Error processing URL: {e}")
-        raise gr.Error(f"Error processing URL: {e}", print_exception=False)
-    if response.status_code != 200:
-        data = response.json()
-        error_message = data.get("detail", "An unknown error occurred.")
-        logger.error(f"Error processing file: {error_message}")
-        raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
-
-    task_id_rendered = response.json()["task_id"]
-    return task_id_rendered
-
-
-def file_to_base64(file):
-    with open(file.name, "rb") as f:
-        encoded_string = base64.b64encode(f.read()).decode("utf-8")
-    return encoded_string
-
-
-def process_file(
-    files,
-    to_formats,
-    image_export_mode,
-    pipeline,
-    ocr,
-    force_ocr,
-    ocr_engine,
-    ocr_lang,
-    pdf_backend,
-    table_mode,
-    abort_on_error,
-    return_as_file,
-    do_code_enrichment,
-    do_formula_enrichment,
-    do_picture_classification,
-    do_picture_description,
-):
-    if not files or len(files) == 0:
-        logger.error("No files provided.")
-        raise gr.Error("No files provided.", print_exception=False)
-    files_data = [
-        {"kind": "file", "base64_string": file_to_base64(file), "filename": file.name}
-        for file in files
-    ]
-    target = {"kind": "zip" if return_as_file else "inbody"}
-
-    parameters = {
-        "sources": files_data,
-        "options": {
-            "to_formats": to_formats,
-            "image_export_mode": image_export_mode,
-            "pipeline": pipeline,
-            "ocr": ocr,
-            "force_ocr": force_ocr,
-            "ocr_engine": ocr_engine,
-            "ocr_lang": _to_list_of_strings(ocr_lang),
-            "pdf_backend": pdf_backend,
-            "table_mode": table_mode,
-            "abort_on_error": abort_on_error,
-            "return_as_file": return_as_file,
-            "do_code_enrichment": do_code_enrichment,
-            "do_formula_enrichment": do_formula_enrichment,
-            "do_picture_classification": do_picture_classification,
-            "do_picture_description": do_picture_description,
-        },
-        "target": target,
-    }
-
-    try:
-        ssl_ctx = get_ssl_context()
-        response = httpx.post(
-            f"{get_api_endpoint()}/v1/convert/source/async",
-            json=parameters,
-            verify=ssl_ctx,
-            timeout=60,
-        )
-    except Exception as e:
-        logger.error(f"Error processing file(s): {e}")
-        raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
-    if response.status_code != 200:
-        data = response.json()
-        error_message = data.get("detail", "An unknown error occurred.")
-        logger.error(f"Error processing file: {error_message}")
-        raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
-
-    task_id_rendered = response.json()["task_id"]
-    return task_id_rendered
-
-
-def response_to_output(response, return_as_file):
-    markdown_content = ""
-    json_content = ""
-    json_rendered_content = ""
-    html_content = ""
-    text_content = ""
-    doctags_content = ""
-    download_button = gr.DownloadButton(visible=False, label="Download Output", scale=1)
-    if return_as_file:
-        filename = (
-            response.headers.get("Content-Disposition").split("filename=")[1].strip('"')
-        )
-        tmp_output_dir = Path(tempfile.mkdtemp(dir=gradio_output_dir, prefix="ui_"))
-        file_output_path = f"{tmp_output_dir}/{filename}"
-        # logger.info(f"Saving file to: {file_output_path}")
-        with open(file_output_path, "wb") as f:
-            f.write(response.content)
-        download_button = gr.DownloadButton(
-            visible=True, label=f"Download {filename}", scale=1, value=file_output_path
-        )
-    else:
-        full_content = response.json()
-        markdown_content = full_content.get("document").get("md_content")
-        json_content = json.dumps(
-            full_content.get("document").get("json_content"), indent=2
-        )
-        # Embed document JSON and trigger load at client via an image.
-        json_rendered_content = f"""
-            <docling-img id="dclimg" pagenumbers><docling-tooltip></docling-tooltip></docling-img>
-            <script id="dcljson" type="application/json" onload="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);">{json_content}</script>
-            <img src onerror="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);" />
-            """
-        html_content = full_content.get("document").get("html_content")
-        text_content = full_content.get("document").get("text_content")
-        doctags_content = full_content.get("document").get("doctags_content")
-    return (
-        markdown_content,
-        markdown_content,
-        json_content,
-        json_rendered_content,
-        html_content,
-        html_content,
-        text_content,
-        doctags_content,
-        download_button,
-    )
-
-
-############
-# UI Setup #
-############
-
-with gr.Blocks(
-    head=head,
-    css=css,
-    theme=theme,
-    title="Docling Serve",
-    delete_cache=(3600, 3600),  # Delete all files older than 1 hour every hour
-) as ui:
-    # Constants stored in states to be able to pass them as inputs to functions
-    processing_text = gr.State("Processing your document(s), please wait...")
-    true_bool = gr.State(True)
-    false_bool = gr.State(False)
-
-    # Banner
-    with gr.Row(elem_id="check_health"):
-        # Logo
-        with gr.Column(scale=1, min_width=90):
-            try:
-                gr.Image(
-                    logo_path,
-                    height=80,
-                    width=80,
-                    show_download_button=False,
-                    show_label=False,
-                    show_fullscreen_button=False,
-                    container=False,
-                    elem_id="logo",
-                    scale=0,
-                )
-            except Exception:
-                logger.warning("Logo not found.")
-
-        # Title
-        with gr.Column(scale=1, min_width=200):
-            gr.Markdown(
-                f"# Docling Serve \n(docling version: "
-                f"{importlib.metadata.version('docling')})",
-                elem_id="title",
-                elem_classes=["title-text"],
-            )
-        # Dark mode button
-        with gr.Column(scale=16, elem_id="dark_mode_column"):
-            dark_mode_btn = gr.Button("Dark/Light Mode", scale=0)
-            dark_mode_btn.click(
-                None,
-                None,
-                None,
-                js="""() => {
-                    if (document.querySelectorAll('.dark').length) {
-                        document.querySelectorAll('.dark').forEach(
-                        el => el.classList.remove('dark')
-                        );
-                    } else {
-                        document.querySelector('body').classList.add('dark');
-                    }
-                }""",
-                show_api=False,
-            )
-
-    # URL Processing Tab
-    with gr.Tab("Convert URL"):
-        with gr.Row():
-            with gr.Column(scale=4):
-                url_input = gr.Textbox(
-                    label="URL Input Source",
-                    placeholder="https://arxiv.org/pdf/2501.17887",
-                )
-            with gr.Column(scale=1):
-                url_process_btn = gr.Button("Process URL", scale=1)
-                url_reset_btn = gr.Button("Reset", scale=1)
-
-    # File Processing Tab
-    with gr.Tab("Convert File"):
-        with gr.Row():
-            with gr.Column(scale=4):
-                file_input = gr.File(
-                    elem_id="file_input_zone",
-                    label="Upload File",
-                    file_types=[
-                        f".{v}"
-                        for v in itertools.chain.from_iterable(
-                            FormatToExtensions.values()
-                        )
-                    ],
-                    file_count="multiple",
-                    scale=4,
-                )
-            with gr.Column(scale=1):
-                file_process_btn = gr.Button("Process File", scale=1)
-                file_reset_btn = gr.Button("Reset", scale=1)
-
-    # Options
-    with gr.Accordion("Options") as options:
-        with gr.Row():
-            with gr.Column(scale=1):
-                to_formats = gr.CheckboxGroup(
-                    [
-                        ("Docling (JSON)", "json"),
-                        ("Markdown", "md"),
-                        ("HTML", "html"),
-                        ("Plain Text", "text"),
-                        ("Doc Tags", "doctags"),
-                    ],
-                    label="To Formats",
-                    value=["json", "md"],
-                )
-            with gr.Column(scale=1):
-                image_export_mode = gr.Radio(
-                    [
-                        ("Embedded", "embedded"),
-                        ("Placeholder", "placeholder"),
-                        ("Referenced", "referenced"),
-                    ],
-                    label="Image Export Mode",
-                    value="embedded",
-                )
-        with gr.Row():
-            with gr.Column(scale=1, min_width=200):
-                pipeline = gr.Radio(
-                    [(v.value.capitalize(), v.value) for v in ProcessingPipeline],
-                    label="Pipeline type",
-                    value=ProcessingPipeline.STANDARD.value,
-                )
-        with gr.Row():
-            with gr.Column(scale=1, min_width=200):
-                ocr = gr.Checkbox(label="Enable OCR", value=True)
-                force_ocr = gr.Checkbox(label="Force OCR", value=False)
-            with gr.Column(scale=1):
-                ocr_engine = gr.Radio(
-                    [
-                        ("EasyOCR", "easyocr"),
-                        ("Tesseract", "tesseract"),
-                        ("RapidOCR", "rapidocr"),
-                    ],
-                    label="OCR Engine",
-                    value="easyocr",
-                )
-            with gr.Column(scale=1, min_width=200):
-                ocr_lang = gr.Textbox(
-                    label="OCR Language (beware of the format)", value="en,fr,de,es"
-                )
-            ocr_engine.change(change_ocr_lang, inputs=[ocr_engine], outputs=[ocr_lang])
-        with gr.Row():
-            with gr.Column(scale=4):
-                pdf_backend = gr.Radio(
-                    [v.value for v in PdfBackend],
-                    label="PDF Backend",
-                    value=PdfBackend.DLPARSE_V4.value,
-                )
-            with gr.Column(scale=2):
-                table_mode = gr.Radio(
-                    [(v.value.capitalize(), v.value) for v in TableFormerMode],
-                    label="Table Mode",
-                    value=TableStructureOptions().mode.value,
-                )
-            with gr.Column(scale=1):
-                abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
-                return_as_file = gr.Checkbox(label="Return as File", value=False)
-        with gr.Row():
-            with gr.Column():
-                do_code_enrichment = gr.Checkbox(
-                    label="Enable code enrichment", value=False
-                )
-                do_formula_enrichment = gr.Checkbox(
-                    label="Enable formula enrichment", value=False
-                )
-            with gr.Column():
-                do_picture_classification = gr.Checkbox(
-                    label="Enable picture classification", value=False
-                )
-                do_picture_description = gr.Checkbox(
-                    label="Enable picture description", value=False
-                )
-
-    # Task id output
-    with gr.Row(visible=False) as task_id_output:
-        task_id_rendered = gr.Textbox(label="Task id", interactive=False)
-
-    # Document output
-    with gr.Row(visible=False) as content_output:
-        with gr.Tab("Docling (JSON)"):
-            output_json = gr.Code(language="json", wrap_lines=True, show_label=False)
-        with gr.Tab("Docling-Rendered"):
-            output_json_rendered = gr.HTML(label="Response")
-        with gr.Tab("Markdown"):
-            output_markdown = gr.Code(
-                language="markdown", wrap_lines=True, show_label=False
-            )
-        with gr.Tab("Markdown-Rendered"):
-            output_markdown_rendered = gr.Markdown(label="Response")
-        with gr.Tab("HTML"):
-            output_html = gr.Code(language="html", wrap_lines=True, show_label=False)
-        with gr.Tab("HTML-Rendered"):
-            output_html_rendered = gr.HTML(label="Response")
-        with gr.Tab("Text"):
-            output_text = gr.Code(wrap_lines=True, show_label=False)
-        with gr.Tab("DocTags"):
-            output_doctags = gr.Code(wrap_lines=True, show_label=False)
-
-    # File download output
-    with gr.Row(visible=False) as file_output:
-        download_file_btn = gr.DownloadButton(label="Placeholder", scale=1)
-
-    ##############
-    # UI Actions #
-    ##############
-
-    # Handle Return as File
-    url_input.change(
-        auto_set_return_as_file,
-        inputs=[url_input, file_input, image_export_mode],
-        outputs=[return_as_file],
-    )
-    file_input.change(
-        auto_set_return_as_file,
-        inputs=[url_input, file_input, image_export_mode],
-        outputs=[return_as_file],
-    )
-    image_export_mode.change(
-        auto_set_return_as_file,
-        inputs=[url_input, file_input, image_export_mode],
-        outputs=[return_as_file],
-    )
-
-    # URL processing
-    url_process_btn.click(
-        set_options_visibility, inputs=[false_bool], outputs=[options]
-    ).then(
-        set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
-    ).then(
-        clear_outputs,
-        inputs=None,
-        outputs=[
-            task_id_rendered,
-            output_markdown,
-            output_markdown_rendered,
-            output_json,
-            output_json_rendered,
-            output_html,
-            output_html_rendered,
-            output_text,
-            output_doctags,
-        ],
-    ).then(
-        set_task_id_visibility,
-        inputs=[true_bool],
-        outputs=[task_id_output],
-    ).then(
-        process_url,
-        inputs=[
-            url_input,
-            to_formats,
-            image_export_mode,
-            pipeline,
-            ocr,
-            force_ocr,
-            ocr_engine,
-            ocr_lang,
-            pdf_backend,
-            table_mode,
-            abort_on_error,
-            return_as_file,
-            do_code_enrichment,
-            do_formula_enrichment,
-            do_picture_classification,
-            do_picture_description,
-        ],
-        outputs=[
-            task_id_rendered,
-        ],
-    ).then(
-        set_outputs_visibility_process,
-        inputs=[return_as_file],
-        outputs=[content_output, file_output],
-    ).then(
-        wait_task_finish,
-        inputs=[task_id_rendered, return_as_file],
-        outputs=[
-            output_markdown,
-            output_markdown_rendered,
-            output_json,
-            output_json_rendered,
-            output_html,
-            output_html_rendered,
-            output_text,
-            output_doctags,
-            download_file_btn,
-        ],
-    )
-
-    url_reset_btn.click(
-        clear_outputs,
-        inputs=None,
-        outputs=[
-            output_markdown,
-            output_markdown_rendered,
-            output_json,
-            output_json_rendered,
-            output_html,
-            output_html_rendered,
-            output_text,
-            output_doctags,
-        ],
-    ).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then(
-        set_outputs_visibility_direct,
-        inputs=[false_bool, false_bool],
-        outputs=[content_output, file_output],
-    ).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
-        clear_url_input, inputs=None, outputs=[url_input]
-    )
-
-    # File processing
-    file_process_btn.click(
-        set_options_visibility, inputs=[false_bool], outputs=[options]
-    ).then(
-        set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
-    ).then(
-        clear_outputs,
-        inputs=None,
-        outputs=[
-            task_id_rendered,
-            output_markdown,
-            output_markdown_rendered,
-            output_json,
-            output_json_rendered,
-            output_html,
-            output_html_rendered,
-            output_text,
-            output_doctags,
-        ],
-    ).then(
-        set_task_id_visibility,
-        inputs=[true_bool],
-        outputs=[task_id_output],
-    ).then(
-        process_file,
-        inputs=[
-            file_input,
-            to_formats,
-            image_export_mode,
-            pipeline,
-            ocr,
-            force_ocr,
-            ocr_engine,
-            ocr_lang,
-            pdf_backend,
-            table_mode,
-            abort_on_error,
-            return_as_file,
-            do_code_enrichment,
-            do_formula_enrichment,
-            do_picture_classification,
-            do_picture_description,
-        ],
-        outputs=[
-            task_id_rendered,
-        ],
-    ).then(
-        set_outputs_visibility_process,
-        inputs=[return_as_file],
-        outputs=[content_output, file_output],
-    ).then(
-        wait_task_finish,
-        inputs=[task_id_rendered, return_as_file],
-        outputs=[
-            output_markdown,
-            output_markdown_rendered,
-            output_json,
-            output_json_rendered,
-            output_html,
-            output_html_rendered,
-            output_text,
-            output_doctags,
-            download_file_btn,
-        ],
-    )
-
-    file_reset_btn.click(
-        clear_outputs,
-        inputs=None,
-        outputs=[
-            output_markdown,
-            output_markdown_rendered,
-            output_json,
-            output_json_rendered,
-            output_html,
-            output_html_rendered,
-            output_text,
-            output_doctags,
-        ],
-    ).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then(
-        set_outputs_visibility_direct,
-        inputs=[false_bool, false_bool],
-        outputs=[content_output, file_output],
-    ).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
-        clear_file_input, inputs=None, outputs=[file_input]
-    )
--- a/docling_serve/helper_functions.py
+++ b/docling_serve/helper_functions.py
@@ -1,11 +1,25 @@
+import importlib.metadata
 import inspect
 import json
+import platform
 import re
+import sys
 from typing import Union, get_args, get_origin

 from fastapi import Depends, Form
 from pydantic import BaseModel, TypeAdapter

+DOCLING_VERSIONS = {
+    "docling-serve": importlib.metadata.version("docling-serve"),
+    "docling-jobkit": importlib.metadata.version("docling-jobkit"),
+    "docling": importlib.metadata.version("docling"),
+    "docling-core": importlib.metadata.version("docling-core"),
+    "docling-ibm-models": importlib.metadata.version("docling-ibm-models"),
+    "docling-parse": importlib.metadata.version("docling-parse"),
+    "python": f"{sys.implementation.cache_tag} ({platform.python_version()})",
+    "plaform": platform.platform(),
+}
+

 def is_pydantic_model(type_):
    try:
@@ -29,10 +43,15 @@ def is_pydantic_model(type_):

 # Adapted from
 # https://github.com/fastapi/fastapi/discussions/8971#discussioncomment-7892972
-def FormDepends(cls: type[BaseModel]):
+def FormDepends(
+    cls: type[BaseModel], prefix: str = "", excluded_fields: list[str] = []
+):
    new_parameters = []

    for field_name, model_field in cls.model_fields.items():
+        if field_name in excluded_fields:
+            continue
+
        annotation = model_field.annotation
        description = model_field.description
        default = (
@@ -63,7 +82,7 @@ def FormDepends(cls: type[BaseModel]):

        new_parameters.append(
            inspect.Parameter(
-                name=field_name,
+                name=f"{prefix}{field_name}",
                kind=inspect.Parameter.POSITIONAL_ONLY,
                default=default,
                annotation=annotation,
@@ -71,19 +90,23 @@ def FormDepends(cls: type[BaseModel]):
        )

    async def as_form_func(**data):
+        newdata = {}
        for field_name, model_field in cls.model_fields.items():
-            value = data.get(field_name)
+            if field_name in excluded_fields:
+                continue
+            value = data.get(f"{prefix}{field_name}")
+            newdata[field_name] = value
            annotation = model_field.annotation

            # Parse nested models from JSON string
            if value is not None and is_pydantic_model(annotation):
                try:
                    validator = TypeAdapter(annotation)
-                    data[field_name] = validator.validate_json(value)
+                    newdata[field_name] = validator.validate_json(value)
                except Exception as e:
                    raise ValueError(f"Invalid JSON for field '{field_name}': {e}")

-        return cls(**data)
+        return cls(**newdata)

    sig = inspect.signature(as_form_func)
    sig = sig.replace(parameters=new_parameters)
--- a/docling_serve/orchestrator_factory.py
+++ b/docling_serve/orchestrator_factory.py
@@ -1,8 +1,266 @@
+import json
+import logging
 from functools import lru_cache
+from typing import Any, Optional

-from docling_jobkit.orchestrators.base_orchestrator import BaseOrchestrator
+import redis.asyncio as redis
+
+from docling_jobkit.datamodel.task import Task
+from docling_jobkit.datamodel.task_meta import TaskStatus
+from docling_jobkit.orchestrators.base_orchestrator import (
+    BaseOrchestrator,
+    TaskNotFoundError,
+)

 from docling_serve.settings import AsyncEngine, docling_serve_settings
+from docling_serve.storage import get_scratch
+
+_log = logging.getLogger(__name__)
+
+
+class RedisTaskStatusMixin:
+    tasks: dict[str, Task]
+    _task_result_keys: dict[str, str]
+    config: Any
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.redis_prefix = "docling:tasks:"
+        self._redis_pool = redis.ConnectionPool.from_url(
+            self.config.redis_url,
+            max_connections=10,
+            socket_timeout=2.0,
+        )
+
+    async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
+        """
+        Get task status by checking Redis first, then falling back to RQ verification.
+
+        When Redis shows 'pending' but RQ shows 'success', we update Redis
+        and return the RQ status for cross-instance consistency.
+        """
+        _log.info(f"Task {task_id} status check")
+
+        # Always check RQ directly first - this is the most reliable source
+        rq_task = await self._get_task_from_rq_direct(task_id)
+        if rq_task:
+            _log.info(f"Task {task_id} in RQ: {rq_task.task_status}")
+
+            # Update memory registry
+            self.tasks[task_id] = rq_task
+
+            # Store/update in Redis for other instances
+            await self._store_task_in_redis(rq_task)
+            return rq_task
+
+        # If not in RQ, check Redis (maybe it's cached from another instance)
+        task = await self._get_task_from_redis(task_id)
+        if task:
+            _log.info(f"Task {task_id} in Redis: {task.task_status}")
+
+            # CRITICAL FIX: Check if Redis status might be stale
+            # STARTED tasks might have completed since they were cached
+            if task.task_status in [TaskStatus.PENDING, TaskStatus.STARTED]:
+                _log.debug(f"Task {task_id} verifying stale status")
+
+                # Try to get fresh status from RQ
+                fresh_rq_task = await self._get_task_from_rq_direct(task_id)
+                if fresh_rq_task and fresh_rq_task.task_status != task.task_status:
+                    _log.info(
+                        f"Task {task_id} status updated: {fresh_rq_task.task_status}"
+                    )
+
+                    # Update memory and Redis with fresh status
+                    self.tasks[task_id] = fresh_rq_task
+                    await self._store_task_in_redis(fresh_rq_task)
+                    return fresh_rq_task
+                else:
+                    _log.debug(f"Task {task_id} status consistent")
+
+            return task
+
+        # Fall back to parent implementation
+        try:
+            parent_task = await super().task_status(task_id, wait)  # type: ignore[misc]
+            _log.debug(f"Task {task_id} from parent: {parent_task.task_status}")
+
+            # Store in Redis for other instances to find
+            await self._store_task_in_redis(parent_task)
+            return parent_task
+        except TaskNotFoundError:
+            _log.warning(f"Task {task_id} not found")
+            raise
+
+    async def _get_task_from_redis(self, task_id: str) -> Optional[Task]:
+        try:
+            async with redis.Redis(connection_pool=self._redis_pool) as r:
+                task_data = await r.get(f"{self.redis_prefix}{task_id}:metadata")
+                if not task_data:
+                    return None
+
+                data: dict[str, Any] = json.loads(task_data)
+                meta = data.get("processing_meta") or {}
+                meta.setdefault("num_docs", 0)
+                meta.setdefault("num_processed", 0)
+                meta.setdefault("num_succeeded", 0)
+                meta.setdefault("num_failed", 0)
+
+                return Task(
+                    task_id=data["task_id"],
+                    task_type=data["task_type"],
+                    task_status=TaskStatus(data["task_status"]),
+                    processing_meta=meta,
+                )
+        except Exception as e:
+            _log.error(f"Redis get task {task_id}: {e}")
+            return None
+
+    async def _get_task_from_rq_direct(self, task_id: str) -> Optional[Task]:
+        try:
+            _log.debug(f"Checking RQ for task {task_id}")
+
+            temp_task = Task(
+                task_id=task_id,
+                task_type="convert",
+                task_status=TaskStatus.PENDING,
+                processing_meta={
+                    "num_docs": 0,
+                    "num_processed": 0,
+                    "num_succeeded": 0,
+                    "num_failed": 0,
+                },
+            )
+
+            original_task = self.tasks.get(task_id)
+            self.tasks[task_id] = temp_task
+
+            try:
+                await super()._update_task_from_rq(task_id)  # type: ignore[misc]
+
+                updated_task = self.tasks.get(task_id)
+                if updated_task and updated_task.task_status != TaskStatus.PENDING:
+                    _log.debug(f"RQ task {task_id}: {updated_task.task_status}")
+
+                    # Store result key if available
+                    if task_id in self._task_result_keys:
+                        try:
+                            async with redis.Redis(
+                                connection_pool=self._redis_pool
+                            ) as r:
+                                await r.set(
+                                    f"{self.redis_prefix}{task_id}:result_key",
+                                    self._task_result_keys[task_id],
+                                    ex=86400,
+                                )
+                                _log.debug(f"Stored result key for {task_id}")
+                        except Exception as e:
+                            _log.error(f"Store result key {task_id}: {e}")
+
+                    return updated_task
+                return None
+
+            finally:
+                # Restore original task state
+                if original_task:
+                    self.tasks[task_id] = original_task
+                elif task_id in self.tasks and self.tasks[task_id] == temp_task:
+                    # Only remove if it's still our temp task
+                    del self.tasks[task_id]
+
+        except Exception as e:
+            _log.error(f"RQ check {task_id}: {e}")
+            return None
+
+    async def get_raw_task(self, task_id: str) -> Task:
+        if task_id in self.tasks:
+            return self.tasks[task_id]
+
+        task = await self._get_task_from_redis(task_id)
+        if task:
+            self.tasks[task_id] = task
+            return task
+
+        try:
+            parent_task = await super().get_raw_task(task_id)  # type: ignore[misc]
+            await self._store_task_in_redis(parent_task)
+            return parent_task
+        except TaskNotFoundError:
+            raise
+
+    async def _store_task_in_redis(self, task: Task) -> None:
+        try:
+            meta: Any = task.processing_meta
+            if hasattr(meta, "model_dump"):
+                meta = meta.model_dump()
+            elif not isinstance(meta, dict):
+                meta = {
+                    "num_docs": 0,
+                    "num_processed": 0,
+                    "num_succeeded": 0,
+                    "num_failed": 0,
+                }
+
+            data: dict[str, Any] = {
+                "task_id": task.task_id,
+                "task_type": task.task_type.value
+                if hasattr(task.task_type, "value")
+                else str(task.task_type),
+                "task_status": task.task_status.value,
+                "processing_meta": meta,
+            }
+            async with redis.Redis(connection_pool=self._redis_pool) as r:
+                await r.set(
+                    f"{self.redis_prefix}{task.task_id}:metadata",
+                    json.dumps(data),
+                    ex=86400,
+                )
+        except Exception as e:
+            _log.error(f"Store task {task.task_id}: {e}")
+
+    async def enqueue(self, **kwargs):  # type: ignore[override]
+        task = await super().enqueue(**kwargs)  # type: ignore[misc]
+        await self._store_task_in_redis(task)
+        return task
+
+    async def task_result(self, task_id: str):  # type: ignore[override]
+        result = await super().task_result(task_id)  # type: ignore[misc]
+        if result is not None:
+            return result
+
+        try:
+            async with redis.Redis(connection_pool=self._redis_pool) as r:
+                result_key = await r.get(f"{self.redis_prefix}{task_id}:result_key")
+                if result_key:
+                    self._task_result_keys[task_id] = result_key.decode("utf-8")
+                    return await super().task_result(task_id)  # type: ignore[misc]
+        except Exception as e:
+            _log.error(f"Redis result key {task_id}: {e}")
+
+        return None
+
+    async def _update_task_from_rq(self, task_id: str) -> None:
+        original_status = (
+            self.tasks[task_id].task_status if task_id in self.tasks else None
+        )
+
+        await super()._update_task_from_rq(task_id)  # type: ignore[misc]
+
+        if task_id in self.tasks:
+            new_status = self.tasks[task_id].task_status
+            if original_status != new_status:
+                _log.debug(f"Task {task_id} status: {original_status} -> {new_status}")
+                await self._store_task_in_redis(self.tasks[task_id])
+
+        if task_id in self._task_result_keys:
+            try:
+                async with redis.Redis(connection_pool=self._redis_pool) as r:
+                    await r.set(
+                        f"{self.redis_prefix}{task_id}:result_key",
+                        self._task_result_keys[task_id],
+                        ex=86400,
+                    )
+            except Exception as e:
+                _log.error(f"Store result key {task_id}: {e}")


@lru_cache
@@ -19,6 +277,8 @@ def get_async_orchestrator() -> BaseOrchestrator:

        local_config = LocalOrchestratorConfig(
            num_workers=docling_serve_settings.eng_loc_num_workers,
+            shared_models=docling_serve_settings.eng_loc_share_models,
+            scratch_dir=get_scratch(),
        )

        cm_config = DoclingConverterManagerConfig(
@@ -28,10 +288,34 @@ def get_async_orchestrator() -> BaseOrchestrator:
            allow_external_plugins=docling_serve_settings.allow_external_plugins,
            max_num_pages=docling_serve_settings.max_num_pages,
            max_file_size=docling_serve_settings.max_file_size,
+            queue_max_size=docling_serve_settings.queue_max_size,
+            ocr_batch_size=docling_serve_settings.ocr_batch_size,
+            layout_batch_size=docling_serve_settings.layout_batch_size,
+            table_batch_size=docling_serve_settings.table_batch_size,
+            batch_polling_interval_seconds=docling_serve_settings.batch_polling_interval_seconds,
        )
        cm = DoclingConverterManager(config=cm_config)

        return LocalOrchestrator(config=local_config, converter_manager=cm)
+
+    elif docling_serve_settings.eng_kind == AsyncEngine.RQ:
+        from docling_jobkit.orchestrators.rq.orchestrator import (
+            RQOrchestrator,
+            RQOrchestratorConfig,
+        )
+
+        class RedisAwareRQOrchestrator(RedisTaskStatusMixin, RQOrchestrator):  # type: ignore[misc]
+            pass
+
+        rq_config = RQOrchestratorConfig(
+            redis_url=docling_serve_settings.eng_rq_redis_url,
+            results_prefix=docling_serve_settings.eng_rq_results_prefix,
+            sub_channel=docling_serve_settings.eng_rq_sub_channel,
+            scratch_dir=get_scratch(),
+        )
+
+        return RedisAwareRQOrchestrator(config=rq_config)
+
    elif docling_serve_settings.eng_kind == AsyncEngine.KFP:
        from docling_jobkit.orchestrators.kfp.orchestrator import (
            KfpOrchestrator,
--- a/docling_serve/response_preparation.py
+++ b/docling_serve/response_preparation.py
@@ -1,317 +1,78 @@
 import asyncio
 import logging
-import os
-import shutil
-import time
-from collections.abc import Iterable
-from pathlib import Path
-from typing import Union

-import httpx
-from fastapi import BackgroundTasks, HTTPException
-from fastapi.responses import FileResponse
+from fastapi import BackgroundTasks, Response

-from docling.datamodel.base_models import OutputFormat
-from docling.datamodel.document import ConversionResult, ConversionStatus
-from docling_core.types.doc import ImageRefMode
-from docling_jobkit.datamodel.convert import ConvertDocumentsOptions
-from docling_jobkit.datamodel.task import Task
-from docling_jobkit.datamodel.task_targets import InBodyTarget, PutTarget, TaskTarget
+from docling_jobkit.datamodel.result import (
+    ChunkedDocumentResult,
+    DoclingTaskResult,
+    ExportResult,
+    RemoteTargetResult,
+    ZipArchiveResult,
+)
 from docling_jobkit.orchestrators.base_orchestrator import (
    BaseOrchestrator,
 )

 from docling_serve.datamodel.responses import (
+    ChunkDocumentResponse,
    ConvertDocumentResponse,
-    DocumentResponse,
    PresignedUrlConvertDocumentResponse,
 )
 from docling_serve.settings import docling_serve_settings
-from docling_serve.storage import get_scratch

 _log = logging.getLogger(__name__)


-def _export_document_as_content(
-    conv_res: ConversionResult,
-    export_json: bool,
-    export_html: bool,
-    export_md: bool,
-    export_txt: bool,
-    export_doctags: bool,
-    image_mode: ImageRefMode,
-    md_page_break_placeholder: str,
-):
-    document = DocumentResponse(filename=conv_res.input.file.name)
-
-    if conv_res.status == ConversionStatus.SUCCESS:
-        new_doc = conv_res.document._make_copy_with_refmode(
-            Path(), image_mode, page_no=None
-        )
-
-        # Create the different formats
-        if export_json:
-            document.json_content = new_doc
-        if export_html:
-            document.html_content = new_doc.export_to_html(image_mode=image_mode)
-        if export_txt:
-            document.text_content = new_doc.export_to_markdown(
-                strict_text=True,
-                image_mode=image_mode,
-            )
-        if export_md:
-            document.md_content = new_doc.export_to_markdown(
-                image_mode=image_mode,
-                page_break_placeholder=md_page_break_placeholder or None,
-            )
-        if export_doctags:
-            document.doctags_content = new_doc.export_to_doctags()
-    elif conv_res.status == ConversionStatus.SKIPPED:
-        raise HTTPException(status_code=400, detail=conv_res.errors)
-    else:
-        raise HTTPException(status_code=500, detail=conv_res.errors)
-
-    return document
-
-
-def _export_documents_as_files(
-    conv_results: Iterable[ConversionResult],
-    output_dir: Path,
-    export_json: bool,
-    export_html: bool,
-    export_md: bool,
-    export_txt: bool,
-    export_doctags: bool,
-    image_export_mode: ImageRefMode,
-    md_page_break_placeholder: str,
-) -> ConversionStatus:
-    success_count = 0
-    failure_count = 0
-
-    # Default failure in case results is empty
-    conv_result = ConversionStatus.FAILURE
-
-    artifacts_dir = Path("artifacts/")  # will be relative to the fname
-
-    for conv_res in conv_results:
-        conv_result = conv_res.status
-        if conv_res.status == ConversionStatus.SUCCESS:
-            success_count += 1
-            doc_filename = conv_res.input.file.stem
-
-            # Export JSON format:
-            if export_json:
-                fname = output_dir / f"{doc_filename}.json"
-                _log.info(f"writing JSON output to {fname}")
-                conv_res.document.save_as_json(
-                    filename=fname,
-                    image_mode=image_export_mode,
-                    artifacts_dir=artifacts_dir,
-                )
-
-            # Export HTML format:
-            if export_html:
-                fname = output_dir / f"{doc_filename}.html"
-                _log.info(f"writing HTML output to {fname}")
-                conv_res.document.save_as_html(
-                    filename=fname,
-                    image_mode=image_export_mode,
-                    artifacts_dir=artifacts_dir,
-                )
-
-            # Export Text format:
-            if export_txt:
-                fname = output_dir / f"{doc_filename}.txt"
-                _log.info(f"writing TXT output to {fname}")
-                conv_res.document.save_as_markdown(
-                    filename=fname,
-                    strict_text=True,
-                    image_mode=ImageRefMode.PLACEHOLDER,
-                )
-
-            # Export Markdown format:
-            if export_md:
-                fname = output_dir / f"{doc_filename}.md"
-                _log.info(f"writing Markdown output to {fname}")
-                conv_res.document.save_as_markdown(
-                    filename=fname,
-                    artifacts_dir=artifacts_dir,
-                    image_mode=image_export_mode,
-                    page_break_placeholder=md_page_break_placeholder or None,
-                )
-
-            # Export Document Tags format:
-            if export_doctags:
-                fname = output_dir / f"{doc_filename}.doctags"
-                _log.info(f"writing Doc Tags output to {fname}")
-                conv_res.document.save_as_doctags(filename=fname)
-
-        else:
-            _log.warning(f"Document {conv_res.input.file} failed to convert.")
-            failure_count += 1
-
-    _log.info(
-        f"Processed {success_count + failure_count} docs, "
-        f"of which {failure_count} failed"
-    )
-    return conv_result
-
-
-def process_results(
-    conversion_options: ConvertDocumentsOptions,
-    target: TaskTarget,
-    conv_results: Iterable[ConversionResult],
-    work_dir: Path,
-) -> Union[ConvertDocumentResponse, FileResponse, PresignedUrlConvertDocumentResponse]:
-    # Let's start by processing the documents
-    try:
-        start_time = time.monotonic()
-
-        # Convert the iterator to a list to count the number of results and get timings
-        # As it's an iterator (lazy evaluation), it will also start the conversion
-        conv_results = list(conv_results)
-
-        processing_time = time.monotonic() - start_time
-
-        _log.info(
-            f"Processed {len(conv_results)} docs in {processing_time:.2f} seconds."
-        )
-
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
-    if len(conv_results) == 0:
-        raise HTTPException(
-            status_code=500, detail="No documents were generated by Docling."
-        )
-
-    # We have some results, let's prepare the response
-    response: Union[
-        FileResponse, ConvertDocumentResponse, PresignedUrlConvertDocumentResponse
-    ]
-
-    # Booleans to know what to export
-    export_json = OutputFormat.JSON in conversion_options.to_formats
-    export_html = OutputFormat.HTML in conversion_options.to_formats
-    export_md = OutputFormat.MARKDOWN in conversion_options.to_formats
-    export_txt = OutputFormat.TEXT in conversion_options.to_formats
-    export_doctags = OutputFormat.DOCTAGS in conversion_options.to_formats
-
-    # Only 1 document was processed, and we are not returning it as a file
-    if len(conv_results) == 1 and isinstance(target, InBodyTarget):
-        conv_res = conv_results[0]
-        document = _export_document_as_content(
-            conv_res,
-            export_json=export_json,
-            export_html=export_html,
-            export_md=export_md,
-            export_txt=export_txt,
-            export_doctags=export_doctags,
-            image_mode=conversion_options.image_export_mode,
-            md_page_break_placeholder=conversion_options.md_page_break_placeholder,
-        )
-
-        response = ConvertDocumentResponse(
-            document=document,
-            status=conv_res.status,
-            processing_time=processing_time,
-            timings=conv_res.timings,
-        )
-
-    # Multiple documents were processed, or we are forced returning as a file
-    else:
-        # Temporary directory to store the outputs
-        output_dir = work_dir / "output"
-        output_dir.mkdir(parents=True, exist_ok=True)
-
-        # Worker pid to use in archive identification as we may have multiple workers
-        os.getpid()
-
-        # Export the documents
-        conv_result = _export_documents_as_files(
-            conv_results=conv_results,
-            output_dir=output_dir,
-            export_json=export_json,
-            export_html=export_html,
-            export_md=export_md,
-            export_txt=export_txt,
-            export_doctags=export_doctags,
-            image_export_mode=conversion_options.image_export_mode,
-            md_page_break_placeholder=conversion_options.md_page_break_placeholder,
-        )
-
-        files = os.listdir(output_dir)
-        if len(files) == 0:
-            raise HTTPException(status_code=500, detail="No documents were exported.")
-
-        file_path = work_dir / "converted_docs.zip"
-        shutil.make_archive(
-            base_name=str(file_path.with_suffix("")),
-            format="zip",
-            root_dir=output_dir,
-        )
-
-        # Other cleanups after the response is sent
-        # Output directory
-        # background_tasks.add_task(shutil.rmtree, work_dir, ignore_errors=True)
-
-        if isinstance(target, PutTarget):
-            try:
-                with open(file_path, "rb") as file_data:
-                    r = httpx.put(str(target.url), files={"file": file_data})
-                    r.raise_for_status()
-                response = PresignedUrlConvertDocumentResponse(
-                    status=conv_result,
-                    processing_time=processing_time,
-                )
-            except Exception as exc:
-                _log.error("An error occour while uploading zip to s3", exc_info=exc)
-                raise HTTPException(
-                    status_code=500, detail="An error occour while uploading zip to s3."
-                )
-        else:
-            response = FileResponse(
-                file_path, filename=file_path.name, media_type="application/zip"
-            )
-
-    return response
-
-
 async def prepare_response(
-    task: Task, orchestrator: BaseOrchestrator, background_tasks: BackgroundTasks
+    task_id: str,
+    task_result: DoclingTaskResult,
+    orchestrator: BaseOrchestrator,
+    background_tasks: BackgroundTasks,
 ):
-    if task.results is None:
-        raise HTTPException(
-            status_code=404,
-            detail="Task result not found. Please wait for a completion status.",
-        )
-    assert task.options is not None
-
-    work_dir = get_scratch() / task.task_id
-    response = process_results(
-        conversion_options=task.options,
-        target=task.target,
-        conv_results=task.results,
-        work_dir=work_dir,
+    response: (
+        Response
+        | ConvertDocumentResponse
+        | PresignedUrlConvertDocumentResponse
+        | ChunkDocumentResponse
    )
-
-    if work_dir.exists():
-        task.scratch_dir = work_dir
-        if not isinstance(response, FileResponse):
-            _log.warning(
-                f"Task {task.task_id=} produced content in {work_dir=} but the response is not a file."
-            )
-            shutil.rmtree(work_dir, ignore_errors=True)
+    if isinstance(task_result.result, ExportResult):
+        response = ConvertDocumentResponse(
+            document=task_result.result.content,
+            status=task_result.result.status,
+            processing_time=task_result.processing_time,
+            timings=task_result.result.timings,
+            errors=task_result.result.errors,
+        )
+    elif isinstance(task_result.result, ZipArchiveResult):
+        response = Response(
+            content=task_result.result.content,
+            media_type="application/zip",
+            headers={
+                "Content-Disposition": 'attachment; filename="converted_docs.zip"'
+            },
+        )
+    elif isinstance(task_result.result, RemoteTargetResult):
+        response = PresignedUrlConvertDocumentResponse(
+            processing_time=task_result.processing_time,
+            num_converted=task_result.num_converted,
+            num_succeeded=task_result.num_succeeded,
+            num_failed=task_result.num_failed,
+        )
+    elif isinstance(task_result.result, ChunkedDocumentResult):
+        response = ChunkDocumentResponse(
+            chunks=task_result.result.chunks,
+            documents=task_result.result.documents,
+            processing_time=task_result.processing_time,
+        )
+    else:
+        raise ValueError("Unknown result type")

    if docling_serve_settings.single_use_results:
-        if task.scratch_dir is not None:
-            background_tasks.add_task(
-                shutil.rmtree, task.scratch_dir, ignore_errors=True
-            )

        async def _remove_task_impl():
            await asyncio.sleep(docling_serve_settings.result_removal_delay)
-            await orchestrator.delete_task(task_id=task.task_id)
+            await orchestrator.delete_task(task_id=task_id)

        async def _remove_task():
            asyncio.create_task(_remove_task_impl())  # noqa: RUF006
--- a/docling_serve/settings.py
+++ b/docling_serve/settings.py
@@ -28,6 +28,7 @@ class UvicornSettings(BaseSettings):
 class AsyncEngine(str, enum.Enum):
    LOCAL = "local"
    KFP = "kfp"
+    RQ = "rq"


 class DoclingServeSettings(BaseSettings):
@@ -49,11 +50,22 @@ class DoclingServeSettings(BaseSettings):
    options_cache_size: int = 2
    enable_remote_services: bool = False
    allow_external_plugins: bool = False
+    show_version_info: bool = True
+
+    api_key: str = ""

    max_document_timeout: float = 3_600 * 24 * 7  # 7 days
    max_num_pages: int = sys.maxsize
    max_file_size: int = sys.maxsize

+    # Threading pipeline
+    queue_max_size: Optional[int] = None
+    ocr_batch_size: Optional[int] = None
+    layout_batch_size: Optional[int] = None
+    table_batch_size: Optional[int] = None
+    batch_polling_interval_seconds: Optional[float] = None
+
+    sync_poll_interval: int = 2  # seconds
    max_sync_wait: int = 120  # 2 minutes

    cors_origins: list[str] = ["*"]
@@ -63,6 +75,11 @@ class DoclingServeSettings(BaseSettings):
    eng_kind: AsyncEngine = AsyncEngine.LOCAL
    # Local engine
    eng_loc_num_workers: int = 2
+    eng_loc_share_models: bool = False
+    # RQ engine
+    eng_rq_redis_url: str = ""
+    eng_rq_results_prefix: str = "docling:results"
+    eng_rq_sub_channel: str = "docling:updates"
    # KFP engine
    eng_kfp_endpoint: Optional[AnyUrl] = None
    eng_kfp_token: Optional[str] = None
@@ -86,6 +103,10 @@ class DoclingServeSettings(BaseSettings):
                    "KFP is not yet working. To enable the development version, you must set DOCLING_SERVE_ENG_KFP_EXPERIMENTAL=true."
                )

+        if self.eng_kind == AsyncEngine.RQ:
+            if not self.eng_rq_redis_url:
+                raise ValueError("RQ Redis url is required when using the RQ engine.")
+
        return self


--- a/docling_serve/ui/init.py
+++ b/docling_serve/ui/init.py
--- a/docling_serve/ui/app.py
+++ b/docling_serve/ui/app.py
@@ -0,0 +1,278 @@
+import io
+import logging
+from pathlib import Path
+from typing import Annotated
+
+from fastapi import (
+    BackgroundTasks,
+    Depends,
+    FastAPI,
+    Form,
+    HTTPException,
+    Request,
+    UploadFile,
+    status,
+)
+from fastapi.responses import HTMLResponse, RedirectResponse, Response
+from fastapi.staticfiles import StaticFiles
+from pydantic import AnyHttpUrl
+from pyjsx import auto_setup  # type: ignore
+from starlette.exceptions import HTTPException as StarletteHTTPException
+
+from docling.datamodel.base_models import OutputFormat
+from docling_core.types.doc.document import (
+    FloatingItem,
+    PageItem,
+    RefItem,
+)
+from docling_jobkit.orchestrators.base_orchestrator import (
+    BaseOrchestrator,
+)
+
+from docling_serve.auth import APIKeyCookieAuth, AuthenticationResult
+from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
+from docling_serve.datamodel.requests import ConvertDocumentsRequest, HttpSourceRequest
+from docling_serve.helper_functions import FormDepends
+from docling_serve.orchestrator_factory import get_async_orchestrator
+from docling_serve.settings import docling_serve_settings
+
+from .convert import ConvertPage  # type: ignore
+from .pages import AuthPage, StatusPage, TaskPage, TasksPage  # type: ignore
+
+# Initialize JSX.
+auto_setup
+
+_log = logging.getLogger(__name__)
+
+
+# TODO: Isolate passed functions into a controller?
+def create_ui_app(process_file, process_url, task_result, task_status_poll) -> FastAPI:  # noqa: C901
+    ui_app = FastAPI()
+    require_auth = APIKeyCookieAuth(docling_serve_settings.api_key)
+
+    # Static files.
+    ui_app.mount(
+        "/static",
+        StaticFiles(directory=Path(__file__).parent.absolute() / "static"),
+        name="static",
+    )
+
+    # Convert page.
+    @ui_app.get("/")
+    async def get_root():
+        return RedirectResponse(url="convert")
+
+    @ui_app.get("/convert", response_class=HTMLResponse)
+    async def get_convert(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+    ):
+        return str(ConvertPage())
+
+    @ui_app.post("/convert", response_class=HTMLResponse)
+    async def post_convert(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        background_tasks: BackgroundTasks,
+        options: Annotated[
+            ConvertDocumentsRequestOptions, FormDepends(ConvertDocumentsRequestOptions)
+        ],
+        files: Annotated[list[UploadFile], Form()],
+        url: Annotated[str, Form()],
+        page_min: Annotated[str, Form()],
+        page_max: Annotated[str, Form()],
+    ):
+        # Refined model options and behavior.
+        if len(page_min) > 0:
+            options.page_range = (int(page_min), options.page_range[1])
+        if len(page_max) > 0:
+            options.page_range = (options.page_range[0], int(page_max))
+
+        options.ocr_lang = [
+            sub_lang.strip()
+            for lang in options.ocr_lang or []
+            for sub_lang in lang.split(",")
+            if len(sub_lang.strip()) > 0
+        ]
+
+        files = [f for f in files if f.size]
+        if len(files) > 0:
+            # Directly uploaded documents.
+            response = await process_file(
+                auth=auth,
+                orchestrator=orchestrator,
+                background_tasks=background_tasks,
+                files=files,
+                options=options,
+            )
+        elif len(url.strip()) > 0:
+            # URLs of documents.
+            source = HttpSourceRequest(url=AnyHttpUrl(url))
+            request = ConvertDocumentsRequest(options=options, sources=[source])
+
+            response = await process_url(
+                auth=auth,
+                orchestrator=orchestrator,
+                conversion_request=request,
+            )
+        else:
+            validation = {
+                "files": "Upload files or enter a URL",
+                "url": "Enter a URL or upload files",
+            }
+            return str(ConvertPage(options=options, validation=validation))
+
+        return RedirectResponse(f"tasks/{response.task_id}/", status.HTTP_303_SEE_OTHER)
+
+    # Task overview page.
+    @ui_app.get("/tasks/", response_class=HTMLResponse)
+    async def get_tasks(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+    ):
+        tasks = sorted(orchestrator.tasks.values(), key=lambda t: t.created_at)
+
+        return str(TasksPage(tasks))
+
+    # Task specific page.
+    @ui_app.get("/tasks/{task_id}/", response_class=HTMLResponse)
+    async def get_task(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        background_tasks: BackgroundTasks,
+        task_id: str,
+    ):
+        poll = await task_status_poll(auth, orchestrator, task_id)
+
+        result = None
+        if poll.task_status in ["success", "failure"]:
+            try:
+                result = await task_result(
+                    auth, orchestrator, background_tasks, task_id
+                )
+            except Exception as ex:
+                logging.error(ex)
+
+        return str(TaskPage(poll, result))
+
+    # Poll task via HTTP status.
+    @ui_app.get("/tasks/{task_id}/poll", response_class=Response)
+    async def poll_task(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        task_id: str,
+    ):
+        poll = await task_status_poll(auth, orchestrator, task_id)
+        return Response(
+            status_code=status.HTTP_202_ACCEPTED
+            if poll.task_status == "started"
+            else status.HTTP_200_OK
+        )
+
+    # Download the contents of zipped documents.
+    @ui_app.get("/tasks/{task_id}/documents.zip")
+    async def get_task_zip(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        background_tasks: BackgroundTasks,
+        task_id: str,
+    ):
+        return await task_result(auth, orchestrator, background_tasks, task_id)
+
+    # Get the output of a task, as a converted document in a specific format.
+    @ui_app.get("/tasks/{task_id}/document.{format}")
+    async def get_task_document_format(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        background_tasks: BackgroundTasks,
+        task_id: str,
+        format: str,
+    ):
+        if format not in [f.value for f in OutputFormat]:
+            raise HTTPException(status.HTTP_404_NOT_FOUND, "Output format not found.")
+        else:
+            response = await task_result(auth, orchestrator, background_tasks, task_id)
+
+        # TODO: Make this compatible with base_models FormatToMimeType?
+        mimes = {
+            "html": "text/html",
+            "md": "text/markdown",
+            "json": "application/json",
+        }
+
+        content = (
+            response.document.json_content.export_to_dict()
+            if format == OutputFormat.JSON
+            else response.document.dict()[f"{format}_content"]
+        )
+
+        return Response(
+            content=str(content),
+            media_type=mimes.get(format, "text/plain"),
+        )
+
+    @ui_app.get("/tasks/{task_id}/document/{cref:path}")
+    async def get_task_document_item(
+        request: Request,
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        background_tasks: BackgroundTasks,
+        task_id: str,
+        cref: str,
+    ):
+        response = await task_result(auth, orchestrator, background_tasks, task_id)
+        doc = response.document.json_content
+        item = RefItem(cref=f"#/{cref}").resolve(doc)  # type: ignore
+
+        if "image/*" in (request.headers.get("Accept") or "") and isinstance(
+            item, FloatingItem | PageItem
+        ):
+            content = io.BytesIO()
+
+            if (
+                isinstance(item, PageItem)
+                and (img_ref := item.image)
+                and img_ref.pil_image
+            ):
+                img_ref.pil_image.save(content, format="PNG")
+            elif isinstance(item, FloatingItem) and (img := item.get_image(doc)):
+                img.save(content, format="PNG")
+
+            return Response(content=content.getvalue(), media_type="image/png")
+        else:
+            return item
+
+    # Page not found; catch all.
+    @ui_app.api_route("/{path_name:path}")
+    def no_page(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+    ):
+        raise HTTPException(status.HTTP_404_NOT_FOUND, "Page not found.")
+
+    # Exception and auth pages.
+    @ui_app.exception_handler(StarletteHTTPException)
+    @ui_app.exception_handler(Exception)
+    async def exception_page(request: Request, ex: Exception):
+        if not isinstance(ex, StarletteHTTPException):
+            # Internal error.
+            ex = HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR)
+
+        if request.method == "POST":
+            # Authorization required -> API key dialog.
+            form = await request.form()
+            form_api_key = form.get("api_key")
+            if isinstance(form_api_key, str):
+                response = RedirectResponse(request.url, status.HTTP_303_SEE_OTHER)
+                require_auth._set_api_key(response, form_api_key)
+                return response
+
+        if ex.status_code == status.HTTP_401_UNAUTHORIZED:
+            return HTMLResponse(str(AuthPage()), status.HTTP_401_UNAUTHORIZED)
+
+        # HTTP exception page; avoid referer loop.
+        referer = request.headers.get("Referer")
+        if referer == request.url:
+            referer = None
+
+        return HTMLResponse(str(StatusPage(ex, referer)), ex.status_code)
+
+    return ui_app
--- a/docling_serve/ui/convert.px
+++ b/docling_serve/ui/convert.px
@@ -0,0 +1,251 @@
+import json
+import sys
+
+from pyjsx import jsx, JSX
+
+from docling.datamodel.base_models import FormatToExtensions, OutputFormat
+from docling.datamodel.pipeline_options import PdfBackend, ProcessingPipeline, TableFormerMode
+from docling_core.types.doc import ImageRefMode
+from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions, ocr_engines_enum
+
+from .forms import EnumCheckboxes, EnumRadios, EnumSelect, ocr_engine_languages, ValidatedInput
+from .pages import Header, Page
+
+base_convert_options = ConvertDocumentsRequestOptions()
+base_convert_options.to_formats.append(OutputFormat.JSON)
+
+
+def ConvertPage(
+  options: ConvertDocumentsRequestOptions = base_convert_options,
+  validation: None | dict[str, str] = None
+) -> JSX:
+    file_accept = ",".join([f".{ext}" for exts in FormatToExtensions.values() for ext in exts])
+
+    return (
+        <Page title="Convert">
+            <main class="container">
+                <Header />
+
+                <form class="convert" method="post" enctype="multipart/form-data">
+                    <legend>
+                        <b>Documents</b>
+                    </legend>
+                    <fieldset class="grid">
+                        <ValidatedInput
+                            name="files"
+                            type="file"
+                            multiple
+                            accept={file_accept}
+                            validation={validation}
+                        />
+                        <ValidatedInput
+                            name="url"
+                            placeholder="or enter a URL: https://arxiv.org/pdf/2501.17887"
+                            validation={validation}
+                        />
+                    </fieldset>
+
+                    <fieldset class="grid">
+                        <EnumSelect
+                            enum={ProcessingPipeline}
+                            selected={options.pipeline}
+                            name="pipeline"
+                            title="Pipeline"
+                        />
+                        <EnumSelect
+                            enum={PdfBackend}
+                            selected={options.pdf_backend}
+                            name="pdf_backend"
+                            title="PDF Backend"
+                        />
+
+                        <div>
+                            <label>Pages</label>
+                            <div role="group">
+                                <input
+                                    type="number"
+                                    name="page_min"
+                                    min={1}
+                                    step={1}
+                                    placeholder="1"
+                                    value={None if options.page_range[0] <= 1 else options.page_range[0]}
+                                />
+                                <input
+                                    type="number"
+                                    name="page_max"
+                                    min={1}
+                                    step={1}
+                                    placeholder="max."
+                                    value={None if options.page_range[1] >= sys.maxsize else options.page_range[1]}
+                                />
+                            </div>
+                        </div>
+
+                        <div>
+                            <label>Timeout<small>in seconds</small></label>
+                            <input
+                                type="number"
+                                name="document_timeout"
+                                min={1}
+                                step={1}
+                                value={int(options.document_timeout)}
+                            />
+                        </div>
+                    </fieldset>
+
+                    <div class="grid">
+                        <EnumCheckboxes
+                            enum={OutputFormat}
+                            selected={options.to_formats}
+                            name="to_formats"
+                            title={<b>Output</b>}
+                        />
+
+                        <div>
+                            <fieldset>
+                                <label>
+                                    <input
+                                        type="checkbox"
+                                        name="do_ocr"
+                                        checked={options.do_ocr}
+                                    />
+                                    <b>OCR</b>
+                                </label>
+                                <label display-when="do_ocr">
+                                    <input
+                                        type="checkbox"
+                                        name="do_code_enrichment"
+                                        checked={options.do_code_enrichment}
+                                    />
+                                    Code
+                                </label>
+                                <label display-when="do_ocr">
+                                    <input
+                                        type="checkbox"
+                                        name="do_formula_enrichment"
+                                        checked={options.do_formula_enrichment}
+                                    />
+                                    Formulas
+                                </label>
+                            </fieldset>
+
+                            <EnumSelect
+                                display-when="do_ocr"
+                                enum={ocr_engines_enum}
+                                selected={options.ocr_engine}
+                                name="ocr_engine"
+                                title="Engine"
+                            />
+
+                            <label display-when="do_ocr">Language</label>
+                            <input
+                                display-when="do_ocr"
+                                name="ocr_lang"
+                                dep-on="ocr_engine"
+                                dep-values={json.dumps(ocr_engine_languages)}
+                                pattern="[\w+]*[,\w+]*"
+                                title="A comma separated list of language codes, of which the format depends on the selected engine."
+                            />
+
+                            <label display-when="do_ocr">
+                                <input
+                                    type="checkbox"
+                                    name="force_ocr"
+                                    checked={options.force_ocr}
+                                />
+                                Force
+                            </label>
+                        </div>
+
+                        <div>
+                            <fieldset>
+                                <label>
+                                    <input
+                                        type="checkbox"
+                                        name="include_images"
+                                        checked={options.include_images}
+                                    />
+                                    <b>Images</b>
+                                </label>
+                                <label display-when="include_images">
+                                    <input
+                                        type="checkbox"
+                                        name="do_picture_classification"
+                                        checked={options.do_picture_classification}
+                                    />
+                                    Classification
+                                </label>
+                                <label display-when="include_images">
+                                    <input
+                                        type="checkbox"
+                                        name="do_picture_description"
+                                        checked={options.do_picture_description}
+                                    />
+                                    Description
+                                </label>
+                                <label display-when="include_images,do_picture_description">Area threshold</label>
+                                <input
+                                    display-when="include_images,do_picture_description"
+                                    type="number"
+                                    name="picture_description_area_threshold"
+                                    min={0}
+                                    max={1}
+                                    step={0.01}
+                                    value={options.picture_description_area_threshold}
+                                />
+                            </fieldset>
+
+                            <EnumSelect
+                                display-when="include_images"
+                                enum={ImageRefMode}
+                                selected={options.image_export_mode}
+                                name="image_export_mode"
+                                title="Export"
+                            />
+                            <label display-when="include_images">Scale</label>
+                            <input
+                                display-when="include_images"
+                                type="number"
+                                name="images_scale"
+                                min={0}
+                                step={0.1}
+                                value={options.images_scale}
+                            />
+                        </div>
+
+                        <div>
+                            <fieldset>
+                                <label>
+                                    <input
+                                        type="checkbox"
+                                        name="do_table_structure"
+                                        checked={options.do_table_structure}
+                                    />
+                                    <b>Tables</b>
+                                </label>
+                                <label display-when="do_table_structure">
+                                    <input
+                                        type="checkbox"
+                                        name="table_cell_matching"
+                                        checked={options.table_cell_matching}
+                                    />
+                                    Cell matching
+                                </label>
+                            </fieldset>
+                            <EnumSelect
+                                display-when="do_table_structure"
+                                enum={TableFormerMode}
+                                selected={options.table_mode}
+                                name="table_mode"
+                                title="Mode"
+                            />
+                        </div>
+                    </div>
+
+                    <div class="sticky-footer">
+                        <input type="submit" value="Convert" />
+                    </div>
+                </form>
+            </main>
+        </Page>
+    )
--- a/docling_serve/ui/forms.px
+++ b/docling_serve/ui/forms.px
@@ -0,0 +1,127 @@
+from enum import Enum
+from typing import Type
+
+from pyjsx import jsx, JSX
+
+from docling.datamodel.pipeline_options import OcrOptions
+
+from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
+
+
+ocr_engine_languages = {
+    SubOptions.kind: ",".join(SubOptions().lang)
+    for SubOptions in OcrOptions.__subclasses__()
+}
+
+
+def _format_label(label: str) -> str:
+    return label.replace("_", " ").lower()
+
+
+def option_example(field_name: str) -> str | None:
+    field = ConvertDocumentsRequestOptions.model_fields[field_name]
+    return (field.examples or [])[0]
+
+
+def ValidatedInput(validation: None | dict[str, str], name: str, **kwargs) -> JSX:
+    if validation:
+        invalid = "true" if name in validation else "false"
+        content = [<input name={name} aria-invalid={invalid} {...kwargs} />]
+
+        if name in validation:
+            content.append(<small>{validation[name]}</small>)
+
+        return <div>{content}</div>
+    else:
+        return <input name={name} {...kwargs} />
+
+
+def EnumCheckboxes(
+    children,
+    enum: Type[Enum],
+    selected: list[Enum],
+    name: str,
+    title: JSX = None,
+    **kwargs
+) -> JSX:
+    return (
+        <fieldset {...kwargs}>
+            {
+                <legend>{title}</legend>
+                if title
+                else None
+            }
+
+            {[
+                <label>
+                    <input
+                        type="checkbox"
+                        name={name}
+                        value={e.value}
+                        checked={e.value in selected}
+                    />
+                    {_format_label(e.name)}
+                </label>
+                for e in enum
+            ]}
+        </fieldset>
+    )
+
+
+def EnumRadios(
+    children,
+    enum: Type[Enum],
+    selected: Enum,
+    name: str,
+    title: JSX = None,
+    **kwargs
+) -> JSX:
+    return (
+        <fieldset {...kwargs}>
+            {
+                <legend>{title}</legend>
+                if title
+                else None
+            }
+
+            {[
+                <label>
+                    <input
+                        type="radio"
+                        name={name}
+                        value={e.value}
+                        checked={e.value == selected}
+                    />
+                    {_format_label(e.name)}
+                </label>
+                for e in enum
+            ]}
+        </fieldset>
+    )
+
+
+def EnumSelect(
+    children,
+    enum: Type[Enum],
+    selected: Enum,
+    name: str,
+    title: JSX = None,
+    **kwargs
+) -> JSX:
+    return (
+        <div {...kwargs}>
+            {
+                <label>{title}</label>
+                if title
+                else None
+            }
+            <select name={name}>
+                {[
+                    <option value={e.value} selected={e.value == selected}>
+                        {_format_label(e.name)}
+                    </option>
+                    for e in enum
+                ]}
+            </select>
+        </div>
+    )
--- a/docling_serve/ui/pages.px
+++ b/docling_serve/ui/pages.px
@@ -0,0 +1,220 @@
+from importlib import metadata
+
+from fastapi import FastAPI, HTTPException, Response
+from pyjsx import jsx, JSX
+
+from docling.datamodel.base_models import OutputFormat
+from docling.datamodel.pipeline_options import PdfBackend, ProcessingPipeline, TableFormerMode
+from docling_jobkit.datamodel.task import Task
+from docling_serve.datamodel.responses import ConvertDocumentResponse
+
+from .preview import DocPreview
+
+
+def Header(children, classname: str = "") -> JSX:
+    return (
+        <header class={classname}>
+            <span class="title">
+                D<img src="/ui/static/logo.svg" />CLING SERVE
+            </span>
+
+            <span class="version" title="Docling version">
+                {metadata.version('docling')}
+            </span>
+
+            <nav>
+                <ul>
+                    <li><a href="/ui/convert">Convert</a></li>
+                    <li><a href="/ui/tasks/">Tasks</a></li>
+                </ul>
+            </nav>
+        </header>
+    )
+
+
+def Page(children, title: str, poll: bool = False) -> JSX:
+    return (
+        <html lang="en" id="root">
+            <head>
+                <title>{title}</title>
+
+                <meta charset="utf-8" />
+                <meta name="viewport" content="width=device-width, initial-scale=1" />
+
+                <link rel="stylesheet" href="/ui/static/style.css" />
+                <script src="/ui/static/main.js" />
+            </head>
+
+            <body onload={'setInterval(async () => { if ((await fetch("poll")).status == 200) location.reload(); }, 3000)' if poll else None}>
+                {children}
+            </body>
+        </html>
+    )
+
+
+def AuthPage():
+    return (
+        <Page title="Authenticate">
+            <form method="post">
+                <dialog open>
+                    <article>
+                        <header>
+                            <h4>Authenticate</h4>
+                        </header>
+                        <input
+                            type="password"
+                            name="api_key"
+                            placeholder="Enter API key"
+                            required autofocus
+                        />
+                        <footer>
+                            <input type="submit" value="Confirm" />
+                        </footer>
+                    </article>
+                </dialog>
+            </form>
+        </Page>
+    )
+
+
+def TasksPage(tasks: list[Task]) -> JSX:
+    return (
+        <Page title="Tasks">
+            <main class="container">
+                <Header />
+
+                {(
+                    <p>There are no active tasks. <a href="../convert">Convert</a> a document to create a new task.</p>
+                ) if len(tasks) == 0 else (
+                    <table>
+                        <thead>
+                            <tr>
+                                <th>Task</th>
+                                <th>Status</th>
+                                <th>ID</th>
+                                <th>Created</th>
+                            </tr>
+                        </thead>
+                        <tbody>
+                            {
+                                <tr>
+                                    <td>{task.task_type.name}</td>
+                                    <td>{task.task_status.name}</td>
+                                    <td>
+                                        <a href={f"{task.task_id}/"}>{task.task_id}</a>
+                                    </td>
+                                    <td>{task.created_at.strftime("%d-%m-%Y, %H:%M:%S")}</td>
+                                </tr>
+                                for task in tasks
+                            }
+                        </tbody>
+                    </table>
+                )}
+            </main>
+        </Page>
+    )
+
+
+def TaskPage(poll, task: ConvertDocumentResponse) -> JSX:
+    def PlainPage(children, poll = False) -> JSX:
+        return (
+            <Page title="Task" poll={poll}>
+                <main class="container">
+                    <Header classname={"loading" if poll else None} />
+                    {children}
+                </main>
+            </Page>
+        )
+
+    if isinstance(task, Response):
+        return (
+            <PlainPage>
+                <p>
+                    <ins>Converted multiple documents successfully</ins>
+                </p>
+                <a href="documents.zip">documents.zip</a>
+            </PlainPage>
+        )
+    else:
+        match poll.task_status:
+            case "success":
+                doc = task.document.dict()
+                doc_json = task.document.json_content
+
+                return (
+                    <Page title={task.document.filename}>
+                        <main class="preview">
+                            <Header />
+
+                            <div class="status">
+                                <div>
+                                    <span>Task</span>
+                                    <b>{poll.task_id}</b>
+                                </div>
+                                <div>
+                                    <span>converted</span>
+                                    <b>{task.document.filename}</b>
+                                </div>
+                                <div>
+                                    <span>in</span>
+                                    <b>{round(task.processing_time)} seconds</b>
+                                </div>
+                            </div>
+                            
+                            <div class="formats">
+                                {[
+                                    <a class="secondary" href={f"document.{f.value}"} target="_blank">
+                                        <button>{f.name}</button>
+                                    </a>
+                                    for f in OutputFormat
+                                    if doc.get(f"{f.value}_content")
+                                ]}
+                                <label class="configDarkImg">
+                                    <input type="checkbox" name="invert-images" persist="preview" />
+                                    Invert images
+                                </label>
+                            </div>
+
+                            {
+                                <DocPreview doc={doc_json} />
+                                if doc_json
+                                else (<p>No document preview because JSON is missing as an output format.</p>)
+                            }
+                        </main>
+                    </Page>
+                )
+            case "started":
+                return (
+                    <PlainPage poll>
+                        <p class="progress">Task <b>{poll.task_id}</b> is in progress...</p>
+                        <progress />
+                    </PlainPage>
+                )
+            case _:
+                return (
+                    <PlainPage>
+                        <p class="fail">
+                            Task <b>{poll.task_id}</b> failed.
+                        </p>
+                        <button onclick="history.back()">
+                            Go back
+                        </button>
+                    </PlainPage>
+                )
+
+
+def StatusPage(ex: HTTPException, referer: str | None) -> JSX:
+    return (
+        <Page title={ex.status_code}>
+            <main class="container">
+                <Header />
+                <h4>{ex.status_code}</h4>
+                <p>{ex.detail}</p>
+                <p>
+                    <a href={referer or ".."}>
+                        <button>Go back</button>
+                    </a>
+                </p>
+            </main>
+        </Page>
+    )
--- a/docling_serve/ui/preview.px
+++ b/docling_serve/ui/preview.px
@@ -0,0 +1,347 @@
+from collections import defaultdict
+from html import escape
+from typing import Type
+
+from docling_core.types.doc.document import (
+    BaseAnnotation,
+    CodeItem,
+    ContentLayer,
+    DescriptionAnnotation,
+    DoclingDocument,
+    DocItem,
+    FloatingItem,
+    Formatting,
+    FormulaItem,
+    GroupItem,
+    GroupLabel,
+    ListGroup,
+    ListItem,
+    NodeItem,
+    PictureClassificationData,
+    PictureItem,
+    ProvenanceItem,
+    RefItem,
+    Script,
+    SectionHeaderItem,
+    TableCell,
+    TableItem,
+    TextItem,
+    TitleItem
+)
+from pyjsx import jsx, JSX, JSXComponent
+
+from .svg import image, path, rect, text
+
+
+_node_components: dict[str, JSXComponent] = {}
+
+
+def component(*node_types: list[Type[BaseAnnotation | NodeItem]]):
+    def decorator(component):
+        for t in node_types:
+            _node_components[t.__name__] = component
+    return decorator
+
+
+def AnnotationComponent(children, annotation: BaseAnnotation):
+    Comp = _node_components.get(annotation.__class__.__name__)
+    element = Comp(annotation=annotation, children=[]) if Comp else (
+        <code>{escape(annotation.model_dump_json(indent=2))}</code>
+    )
+
+    element.props["class"] = element.props.get("class", "") + " annotation"
+    element.props["data-kind"] = annotation.kind
+
+    return element
+
+
+def NodeComponent(children, node: NodeItem | RefItem, doc: DoclingDocument):
+    # Specific component or fallback.
+    Comp = _node_components.get(node.__class__.__name__)
+    element = Comp(node=node, doc=doc, children=[]) if Comp else (
+        <span class="void"></span>
+    )
+
+    # Wrap item component with annotations, if any.
+    if isinstance(node, DocItem) and (anns := node.get_annotations()):
+        element = (
+            <div class="annotated">
+                {element}
+                {[<AnnotationComponent annotation={ann} /> for ann in anns]}
+            </div>
+        )
+
+    # Extend interaction and styling.
+    id = node.self_ref[2:]
+    element.props["id"] = id
+    element.props["onclick"] = "clickId(event)"
+
+    classes = ["item", node.content_layer.value]
+    element.props["class"] = f"{element.props.get("class", "")} {" ".join(classes)}"
+
+    return element
+
+
+def node_provs(node: NodeItem, doc: DoclingDocument) -> ProvenanceItem:
+    return node.prov if isinstance(node, DocItem) else [
+        p
+        for c in node.children
+        if isinstance(c.resolve(doc), DocItem)
+        for p in c.resolve(doc).prov
+    ]
+
+
+def DocPage(children, page_no: int, items: list[NodeItem], doc: DoclingDocument):
+    page = doc.pages[page_no]
+    exclusive_items = [
+        item
+        for item in items
+        if min([p.page_no for p in node_provs(item, doc)]) == page_no
+    ]
+
+    comps = []
+    for i in range(len(exclusive_items)):
+        item = exclusive_items[i]
+        id = item.self_ref[2:]
+        kind, *index = id.split("/")
+
+        parent_class = ""
+        if isinstance(item, GroupItem):
+            parent_class = "group"
+        else:
+            parent = item.parent.resolve(doc)
+            if isinstance(parent, GroupItem) and parent.label is not GroupLabel.UNSPECIFIED:
+                parent_class = "grouped"
+
+        comps.append(
+            <div class={f"item-markers {parent_class} {item.content_layer.value}"} data-id={id}>
+                <span>{"/".join(index)}</span>
+                <span>{item.label.replace("_", " ")}</span>
+                {
+                    <span>{item.content_layer.value.replace("_", " ")}</span>
+                    if item.content_layer is not ContentLayer.BODY
+                    else None
+                }
+                <a href={f"document/{id}"} target="_blank">{"{;}"}</a>
+            </div>
+        )
+        comps.append(<NodeComponent node={item} doc={doc} />)
+
+        pages = set([p.page_no for p in node_provs(item, doc)])
+        page_mark_class = "page-marker"
+        if i == 0 or len(pages) > 1:
+            page_mark_class += " border"
+        comps.append(<div class={page_mark_class}></div>)
+
+    
+    def ItemBox(children, item: DocItem, prov: ProvenanceItem):
+        item_id = item.self_ref[2:]
+        sub_items = [
+            (item_id, prov.bbox.to_top_left_origin(page.size.height))
+        ]
+
+        # Table cells.
+        if isinstance(item, TableItem):
+            for cell in item.data.table_cells:
+                sub_items.append(
+                    (f"{item_id}/{cell.start_col_offset_idx}/{cell.start_row_offset_idx}", cell.bbox)
+                )
+
+        return [
+            <rect
+                data-id={id}
+                x={bbox.l - 1}
+                y={bbox.t - 1}
+                width={bbox.width + 2}
+                height={bbox.height + 2}
+                vector-effect="non-scaling-stroke"
+                onclick="clickId(event)"
+            />
+            for id, bbox in sub_items
+        ]
+
+    # Span extra row to fill up excess space.
+    comps.append(
+        <svg
+            class="page-image"
+            style={{ "grid-row": f"span {len(exclusive_items) + 1}" }}
+            width="50vw"
+            viewBox={f"0 0 {page.size.width} {page.size.height}"}
+        >
+            <image
+                href={f"document/pages/{page_no}"}
+                width={page.size.width}
+                height={page.size.height}
+            />
+            {[
+                <ItemBox item={item} prov={prov} />
+                for item in items
+                if isinstance(item, DocItem)
+                for prov in item.prov
+                if prov.page_no == page_no
+            ]}
+
+            <text class="top-no" x={5} y={5}>{page_no}</text>
+            <text class="bottom-no" x={5} y={page.size.height - 5}>{page_no}</text>
+        </svg>
+    )
+
+    return <div class="page">{comps}</div>
+
+
+def DocPreview(children, doc: DoclingDocument):
+    page_items: dict[int, list[NodeItem]] = defaultdict(list)
+
+    for item, level in doc.iterate_items(
+        with_groups=True,
+        included_content_layers={*ContentLayer}
+    ):
+        if not isinstance(item, GroupItem) or item.label is not GroupLabel.UNSPECIFIED:
+            pages = set([p.page_no for p in node_provs(item, doc)])
+            for page in pages:
+                page_items[page].append(item)
+
+    return [
+        <DocPage page_no={page_no} items={page_items[page_no]} doc={doc} />
+        for page_no in sorted(page_items.keys())
+    ]
+
+
+def _text_classes(node: TextItem) -> str:
+    classes = [node.label]
+
+    if frmt := node.formatting:
+        formats = {
+            "bold": frmt.bold,
+            "italic": frmt.italic,
+            "underline": frmt.underline,
+            "strikethrough": frmt.strikethrough
+        }
+        classes.extend([cls for cls, active in formats.items() if active])
+        classes.append(frmt.script)
+
+    return " ".join(classes)
+
+
+@component(TextItem)
+def TextComponent(children, node: TextItem, doc: DoclingDocument):
+    return <p class={_text_classes(node)}>{escape(node.text)}</p>
+
+
+@component(TitleItem)
+def TitleComponent(children, node: TitleItem, doc: DoclingDocument):
+    return <h1 class={_text_classes(node)}>{escape(node.text)}</h1>
+
+
+@component(SectionHeaderItem)
+def SectionHeaderComponent(children, node: SectionHeaderItem, doc: DoclingDocument):
+    return <h4 class={_text_classes(node)}>{escape(node.text)}</h4>
+
+
+@component(ListItem)
+def ListComponent(children, node: ListItem, doc: DoclingDocument):
+    return (
+        <li>
+            <b>{node.marker}</b>
+            <span class={_text_classes(node)}>{escape(node.text)}</span>
+        </li>
+    )
+
+
+@component(CodeItem)
+def CodeComponent(children, node: CodeItem, doc: DoclingDocument):
+    return (
+        <figure>
+            <code class={_text_classes(node)}>
+                {escape(node.text or node.orig)}
+            </code>
+        </figure>
+    )
+
+
+@component(FormulaItem)
+def FormulaComponent(children, node: FormulaItem, doc: DoclingDocument):
+    return (
+        <figure>
+            <code class={_text_classes(node)}>
+                {escape(node.text or node.orig)}
+            </code>
+        </figure>
+    )
+
+
+@component(PictureItem)
+def PictureComponent(children, node: PictureItem, doc: DoclingDocument):
+    return <figure><img src={f"document/{node.self_ref[2:]}"} loading="lazy" /></figure>
+
+
+@component(PictureClassificationData)
+def PictureClassificationComponent(children, annotation: PictureClassificationData):
+    return (
+        <table>
+            <tbody>
+                {[
+                    <tr>
+                        <td>{cls.class_name.replace("_", " ")}</td>
+                        <td>{f"{cls.confidence:.2f}"}</td>
+                    </tr>
+                    for cls in annotation.predicted_classes
+                    if cls.confidence > 0.01
+                ]}
+            </tbody>
+        </table>
+    )
+
+
+@component(DescriptionAnnotation)
+def DescriptionAnnotation(children, annotation: DescriptionAnnotation):
+    return <span>{escape(annotation.text)}</span>
+
+
+@component(TableItem)
+def TableComponent(children, node: TableItem, doc: DoclingDocument):
+    covered_cells: set[(int, int)] = set()
+
+    def check_cover(cell: TableCell):
+        is_covered = (cell.start_col_offset_idx, cell.start_row_offset_idx) in covered_cells
+
+        if not is_covered:
+            for x in range(cell.start_col_offset_idx, cell.end_col_offset_idx):
+                for y in range(cell.start_row_offset_idx, cell.end_row_offset_idx):
+                    covered_cells.add((x, y))
+        
+        return is_covered
+
+    def Cell(children, cell: TableCell):
+        id = f"{node.self_ref[2:]}/{cell.start_col_offset_idx}/{cell.start_row_offset_idx}"
+
+        return (
+            <td
+                id={id}
+                class={"header" if cell.column_header or cell.row_header else None}
+                colspan={cell.col_span or 1}
+                rowspan={cell.row_span or 1}
+                onclick="clickId(event)"
+            >
+                {escape(cell.text)}
+            </td>
+        )
+
+    return (
+        <div class="table">
+            <table>
+                <tbody>
+                    {[
+                        <tr>
+                            {[
+                                <Cell cell={cell} />
+                                for cell in row
+                                if not check_cover(cell)
+                            ]}
+                        </tr>
+                        for row in node.data.grid
+                    ]}
+                </tbody>
+            </table>
+        </div>
+    )
--- a/docling_serve/ui/static/logo.svg
+++ b/docling_serve/ui/static/logo.svg
@@ -0,0 +1,116 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg width="100%" height="100%" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:1.5;">
+    <g id="Docling" transform="matrix(1.07666,0,0,1.07666,-35.9018,-84.1562)">
+        <g id="Outline" transform="matrix(1,0,0,1,-0.429741,55.0879)">
+            <path d="M394.709,69.09C417.34,35.077 467.97,30.178 478.031,55.609C486.35,55.043 494.726,54.701 503.158,54.589C533.157,45.238 560.496,47.419 584.65,60.732C800.941,96.66 966.069,284.814 966.069,511.232C966.069,763.284 761.435,967.918 509.383,967.918C433.692,967.918 362.277,949.464 299.385,916.808L242.3,931.993C203.092,943.242 187.715,928.369 208.575,891.871C208.935,891.24 216.518,879.37 223.997,867.677C119.604,783.975 52.698,655.355 52.698,511.232C52.698,298.778 198.086,120.013 394.709,69.09Z" style="fill:white;"/>
+        </g>
+        <g id="Color" transform="matrix(1.02317,0,0,1.02317,-11.55,-17.8333)">
+            <path d="M284.8,894.232L179.735,783.955L130.222,645.203L125.538,504.726L185.211,385.816C209.006,322.738 249.951,278.973 302.281,248.028L406.684,203.333L413.483,175.767L436.637,152.428L451.408,153.312L457.726,183.183L485.164,165.379L526.92,159.699L557.014,177.545L612.652,211.018C679.009,226.066 740.505,264.146 797.138,325.26L862.813,423.477L891.583,560.826L883.273,683.32L814.268,809.924L734.431,894.384L644.495,926.906L497.146,954.121L361.064,940.647L284.8,894.232Z" style="fill:url(#_Linear1);"/>
+            <path d="M699.932,887.255L634.427,825.291L597.884,782.352L594.906,738.956L610.14,709.396L643.207,699.954L685,710.111L730.425,736.425L765.204,778.79L775.166,849.531L719.381,894.082L699.932,887.255Z" style="fill:url(#_Linear2);"/>
+            <g transform="matrix(-0.765945,0,0,1,839.727,5.47434)">
+                <clipPath id="_clip3">
+                    <path d="M699.932,887.255L634.427,825.291L597.884,782.352L594.906,738.956L610.14,709.396L643.207,699.954L685,710.111L730.425,736.425L765.204,778.79L775.166,849.531L719.381,894.082L699.932,887.255Z"/>
+                </clipPath>
+                <g clip-path="url(#_clip3)">
+                    <g transform="matrix(-1.18516,0,0,0.907769,1039.04,88.3496)">
+                        <use xlink:href="#_Image4" x="223.969" y="674.21" width="152.098px" height="213.852px" transform="matrix(0.994105,0,0,0.999308,0,0)"/>
+                    </g>
+                </g>
+            </g>
+            <path d="M311.699,713.521C189.178,639.091 164.299,526.77 191.824,394.113L135.136,476.434L122.004,547.53C143.022,614.014 174.522,676.199 225.005,730.598C210.601,754.156 201.894,776.601 197.955,798.114L245.803,841.67C247.274,812.1 254.934,783.047 270.614,754.664L311.699,713.521Z" style="fill-opacity:0.22;"/>
+            <g transform="matrix(-1,0,0,1,1022.04,2.74442)">
+                <path d="M311.699,713.521C189.178,639.091 164.299,526.77 191.824,394.113L135.136,476.434L122.004,547.53C143.022,614.014 174.522,676.199 225.005,730.598C210.601,754.156 201.894,776.601 197.955,798.114L245.803,841.67C247.274,812.1 254.934,783.047 270.614,754.664L311.699,713.521Z" style="fill-opacity:0.22;"/>
+            </g>
+            <path d="M354.92,650.818L420.009,663.185L493.368,666.379L554.826,665.251L620.19,658.511L658.169,651.428L671.428,644.802L673.265,627.093L659.898,611.845L625.422,609.244L599.275,591.212L568.632,556.79L542.9,534.336L515.052,528.253L480.412,532.71L455.2,552.337L428.514,578.155L405.312,599.359L374.228,612.097L355.342,614.456L340.75,630.308L341.568,645.341L354.92,650.818Z" style="fill:url(#_Linear5);"/>
+            <path d="M257.168,949.32L317.434,876.747L364.928,810.6L384.1,743.934L378.759,714.719L376.844,685.849L374.836,659.954L448.734,664.2L511.462,667.602L571.339,665.091L632.796,658.836L648.232,656.882L649.937,697.808L608.105,717.702L598.45,738.594L592.286,761.642L604.743,796.309L639.595,825.803L649.872,840.757L558.219,895.152L502.124,907.569L425.781,923.496L333.29,931.298L286.269,936.907L257.168,949.32Z" style="fill:url(#_Linear6);"/>
+            <g transform="matrix(1,0,0,1.30081,-1.77636e-15,-196.488)">
+                <path d="M374.165,685.268C463.946,706.599 553.728,707.491 643.51,688.593L641.903,653.199C549.263,671.731 459.645,672.22 373.059,654.611L374.165,685.268Z" style="fill-opacity:0.18;"/>
+            </g>
+            <path d="M459.633,571.457C476.7,536.091 530.064,535.913 553.1,568.767C520.703,551.407 489.553,552.374 459.633,571.457Z" style="fill:white;"/>
+            <g transform="matrix(1,0,0,1,0.223468,-2.61949)">
+                <path d="M355.3,267.232C500.64,173.156 720.699,241.362 793.691,423.582C766.716,384.84 735.725,357.078 697.53,349.014L717.306,335.248C698.537,321.49 675.794,320.957 651.039,327.119C652.235,315.768 658.995,306.991 674.188,302.115C641.864,287.427 617.356,289.473 596.258,298.818C597.049,286.116 605.827,278.087 620.068,273.254C589.192,267.477 564.13,270.926 544.651,283.232C545.822,271.831 550.709,260.943 560.913,250.79C517.498,257.095 492.995,267.925 482.892,282.202C477.311,269.499 477.274,257.221 487.625,245.739C439.161,252.932 421.555,265.094 410.355,278.286C407.697,269.01 407.705,260.632 410.853,253.316C389.633,254.773 372.178,260.663 355.3,267.232Z" style="fill:rgb(255,213,95);"/>
+            </g>
+            <path d="M475.656,209.175C479.639,175.037 503.437,173.299 532.412,180.026C507.242,183.404 486.969,195.251 473.705,219.215L475.656,209.175Z" style="fill:rgb(255,215,101);"/>
+            <g transform="matrix(0.114323,-0.655229,0.82741,0.144365,224.632,497.317)">
+                <path d="M475.656,209.175C479.639,175.037 503.437,173.299 532.412,180.026C507.242,183.404 486.969,195.251 473.705,219.215L475.656,209.175Z" style="fill:rgb(255,215,101);"/>
+            </g>
+            <g transform="matrix(1.6739,1.15217e-16,-1.15217e-16,-0.733075,-341.46,1039.77)">
+                <path d="M447.449,560.911C468.179,536.963 546.237,539.305 565.638,560.831C533.166,555.541 477.296,553.494 447.449,560.911Z" style="fill:white;"/>
+            </g>
+            <path d="M348.201,622.341C395.549,653.534 622.351,660.854 661.936,616.729L677.568,633.834L667.044,650.308L557.802,667.518L498.074,670.562L446.718,666.416L391.404,658.406L348.154,652.501L340.161,637.119L348.201,622.341Z" style="fill:rgb(199,68,6);"/>
+        </g>
+        <g id="Black-outline" serif:id="Black outline" transform="matrix(1.02317,0,0,1.02317,-11.55,-17.8333)">
+            <path d="M373.389,657.919C376.285,676.334 377.04,695.016 375.326,714.008" style="fill:none;stroke:black;stroke-width:15.73px;"/>
+            <path d="M645.931,654.961C646.158,669.958 647.22,684.853 648.975,699.661" style="fill:none;stroke:black;stroke-width:15.73px;"/>
+            <path d="M290.084,534.662C276.554,533.535 264.892,530.024 254.279,525.175C276.732,555.341 305.316,569.76 338.631,572.029L290.084,534.662Z"/>
+            <g transform="matrix(0.94177,0,0,0.94909,28.8868,3.79501)">
+                <ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
+            </g>
+            <g transform="matrix(0.112099,0.0552506,-0.0673118,0.136571,455.367,509.409)">
+                <ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
+            </g>
+            <g transform="matrix(-0.112099,0.0552506,0.0673118,0.136571,560.529,509.492)">
+                <ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
+            </g>
+            <g transform="matrix(-1,0,0,1,1013.33,-1.15187)">
+                <path d="M290.084,534.662C276.554,533.535 264.892,530.024 254.279,525.175C276.732,555.341 305.316,569.76 338.631,572.029L290.084,534.662Z"/>
+            </g>
+            <g transform="matrix(-0.94177,0,0,0.94909,984.44,2.64314)">
+                <ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
+            </g>
+            <g transform="matrix(1,0,0,1,1.9047,-5.57346)">
+                <path d="M277.021,489.604C279.828,554.545 355.855,583.508 405.306,537.851C354.458,599.537 263.881,560.914 277.021,489.604Z" style="fill:white;"/>
+            </g>
+            <g transform="matrix(-1,0,0,1,1011.43,-5.7284)">
+                <path d="M277.021,489.604C279.828,554.545 355.855,583.508 405.306,537.851C354.458,599.537 263.881,560.914 277.021,489.604Z" style="fill:white;"/>
+            </g>
+            <g transform="matrix(0.973815,0,0,1.00246,4.71761,-0.508759)">
+                <path d="M407.22,206.891C107.655,339.384 134.447,630.03 314.615,708.305" style="fill:none;stroke:black;stroke-width:29.39px;"/>
+            </g>
+            <g transform="matrix(-0.973815,0,0,1.00246,1006.67,-1.31695)">
+                <path d="M461.559,196.756C119.768,256.762 111.059,642.544 320.305,711.486" style="fill:none;stroke:black;stroke-width:29.39px;"/>
+            </g>
+            <g id="vector-duck" serif:id="vector duck">
+                <path d="M240.912,850.71C248.043,740.231 325.609,685.992 371.268,715.193C386.487,724.926 392.506,757.72 358.575,816.753C327.005,871.68 300.465,894.596 288.329,903.447" style="fill:none;stroke:black;stroke-width:21.79px;"/>
+                <path d="M638.382,843.426C427.991,964.695 389.022,902.942 251.512,947.641L307.759,889.573" style="fill:none;stroke:black;stroke-width:15.73px;"/>
+                <path d="M770.991,853.754C779.364,764.998 730.67,727.923 666.385,704.966C629.568,691.819 580.483,723.886 595.974,772.596C606.285,805.016 650.54,839.029 707.786,886.778" style="fill:none;stroke:black;stroke-width:21.79px;"/>
+                <g transform="matrix(1,0,0,1,-1.87208,0.908099)">
+                    <path d="M603.287,772.415C614.237,757.963 627.553,750.285 642.878,748.352C628.356,760.968 617.23,775.676 620.632,799.336C635.815,785.15 650.367,779.457 664.396,780.801C651.715,790.7 639.329,803.279 641.039,818.089C641.247,819.891 647.043,823.996 647.595,825.837C659.897,816.37 672.867,811.065 689.234,809.472C676.577,822.659 668.021,834.011 674.478,848.729L664.333,847.825L625.643,812.604L603.629,786.218L603.287,772.415Z"/>
+                </g>
+                <g transform="matrix(-0.969851,0.2437,0.2437,0.969851,773.329,-138.212)">
+                    <path d="M603.287,772.415C614.237,757.963 627.553,750.285 642.878,748.352C628.356,760.968 617.23,775.676 620.632,799.336C635.815,785.15 650.367,779.457 664.396,780.801C651.715,790.7 639.329,803.279 641.039,818.089C641.247,819.891 647.043,823.996 647.595,825.837C659.897,816.37 672.867,811.065 689.234,809.472C676.577,822.659 668.021,834.011 674.478,848.729L664.333,847.825L625.643,812.604L603.629,786.218L603.287,772.415Z"/>
+                </g>
+                <path d="M511.787,670.044C461.061,671.835 411.878,662.84 361.322,653.92C329.071,648.229 335.56,616.432 361.693,615.181C391.498,613.754 411.83,601.737 437.593,569.084C459.063,541.872 482.443,528.143 506.834,529.767" style="fill:none;stroke:black;stroke-width:15.73px;"/>
+                <g transform="matrix(-1,0,0,1,1014.44,-0.213451)">
+                    <path d="M511.787,670.044C461.061,671.835 411.878,662.84 361.322,653.92C329.071,648.229 335.56,616.432 361.693,615.181C391.498,613.754 411.83,601.737 437.593,569.084C459.063,541.872 482.443,528.143 506.834,529.767" style="fill:none;stroke:black;stroke-width:15.73px;"/>
+                </g>
+            </g>
+            <g transform="matrix(2.4586,0,0,2.5497,-444.527,-690.434)">
+                <ellipse cx="312.566" cy="450.751" rx="10.63" ry="10.48" style="fill:white;"/>
+            </g>
+            <g transform="matrix(2.4586,0,0,2.5497,-127.75,-690.991)">
+                <ellipse cx="312.566" cy="450.751" rx="10.63" ry="10.48" style="fill:white;"/>
+            </g>
+            <path d="M505.738,698.061L578.879,713.989" style="fill:none;stroke:black;stroke-width:12.1px;"/>
+            <path d="M422.781,709.6L568.438,743.041" style="fill:none;stroke:black;stroke-width:12.1px;"/>
+            <path d="M419.941,738.409L565.688,772.989" style="fill:none;stroke:black;stroke-width:12.1px;"/>
+            <path d="M408.6,787.08L510.634,810.689" style="fill:none;stroke:black;stroke-width:12.1px;"/>
+            <path d="M397.571,815.956L500.93,840.219" style="fill:none;stroke:black;stroke-width:12.1px;"/>
+            <path d="M386.763,844.926L454.065,861.974" style="fill:none;stroke:black;stroke-width:12.1px;"/>
+            <path d="M459.169,919.169C512.194,898.262 539.171,867.298 535.241,824.402C568.052,818.31 598.499,817.058 625.84,822.165" style="fill:none;stroke:black;stroke-width:16.95px;"/>
+            <path d="M366.219,241.106C389.605,229.261 413.371,220.601 438.247,217.5C416.795,202.419 418.72,174.582 444.22,162.47C442.086,178.175 447.633,193.354 464.772,207.738C468.721,167.57 530.015,162.087 545.674,184.112C526.45,189.314 513.082,197.344 504.566,207.717C522.403,208.119 540.706,207.86 556.2,210.609L566.935,168.471C536.388,146.208 495.718,142.166 464.65,166.705C467.703,133.264 419.536,128.364 404.624,178.47L366.219,241.106Z"/>
+            <path d="M392.617,924.576C428.953,936.938 467.84,943.636 508.258,943.636C708.944,943.636 871.876,778.49 871.876,575.076C871.876,382.463 725.788,224.162 539.898,207.895L554.137,173.696L554.485,168.187C757.218,191.602 914.895,366.003 914.895,577.383C914.895,804.698 732.549,989.249 507.949,989.249C435.381,989.249 367.223,969.983 308.199,936.232L392.617,924.576ZM279.206,917.988C171.663,843.819 101.002,718.887 101.002,577.383C101.002,383.006 234.333,219.898 413.398,176.712L424.375,216.389C264.082,254.803 144.64,400.913 144.64,575.076C144.64,703.735 209.822,817.086 308.514,883.023L279.206,917.988Z"/>
+            <path d="M714.938,895.223L647.287,836.693L616.06,855.308L549.158,889.412L459.845,919.216L390.213,928.828L429.291,950.712L535.832,960.1L586.137,952.591L662.254,931.896L714.938,895.223Z"/>
+            <path d="M423.538,929.39C509.164,917.593 580.815,890.465 640.827,850.566C635.677,886.828 622.639,918.218 594.006,939.977C530.254,930.953 474.955,928.632 423.538,929.39Z" style="fill:url(#_Linear7);"/>
+        </g>
+    </g>
+    <defs>
+        <linearGradient id="_Linear1" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(-52.3962,375.121,-375.121,-52.3962,471.134,384.463)"><stop offset="0" style="stop-color:rgb(255,176,44);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(255,73,2);stop-opacity:1"/></linearGradient>
+        <linearGradient id="_Linear2" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(28.6198,-84.8913,84.8913,28.6198,647.831,831.55)"><stop offset="0" style="stop-color:rgb(255,73,2);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(255,176,44);stop-opacity:1"/></linearGradient>
+        <image id="_Image4" width="153px" height="214px" xlink:href="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcHBw8LCwkMEQ8SEhEPERETFhwXExQaFRERGCEYGh0dHx8fExciJCIeJBweHx7/2wBDAQUFBQcGBw4ICA4eFBEUHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh7/wAARCADWAJkDAREAAhEBAxEB/8QAGQABAQEBAQEAAAAAAAAAAAAAAwACBwYF/8QAGBABAQEBAQAAAAAAAAAAAAAAAgABEhH/xAAbAQADAQADAQAAAAAAAAAAAAAAAQMCBAUGB//EABYRAQEBAAAAAAAAAAAAAAAAAAABEf/aAAwDAQACEQMRAD8A63fAX1BQFAUBQFAUBQFAUBQFAZShqQSUNyBSmpIJK0pIJqakgUptyCampIx1DWPS0XSqAoCgKAoCgKAoCgKAwlDUgkobkE1aVkClNuQbU1JAtTUkElNSQTU25GOptY9Vcd0KgKAoCgKAoCgKAoDDUNSCSmpIJqakgUptyCampIJKakgWpqSCSm3IJKakjHU2sewuM86oCgKAoCgKAoCgMJQ1IJqakgWpqSCSmpIJqbcgUpqSCSmpIJqbcgmpqSCSmpIx1PGse1uK8yoCgKAoCgKAoA2obkGlNuQLU1JBJTUkElPG5AtTUkElNSQSU1JBNTbkClNSQSU1JGOptY93cR5VQFAUBQFAUAbUNyCam3IJKakgmpqSBampIJKbcgmpqSBampIJKakgmptyBampINKakjHU8ax0C4byKgKAoCgLd8gDShuQTU25AtTVkElNuQTU1JBNTUkElNuQLU1JBNTUkElNSQLU25BJWlJBJTUkY6hrHRrhPGqAoCgLd8gDahuQSU1JAtTUkE1NuQSU1JBNTUkClNSQSU25BNTUkC1NSQSVpSQSUNyCatKSBSmpIx1DWOmXBeJUBQFu+QBtQ3IFqakgkpqSCam3IFqakgkpqSCampIJqbcgUpqSCampIJq0pIJKakgWptyCampIJKakjHU2sdRuveFUBbvkASUNyCSmpIJqakgkpqSBam3IJqakgkpqSCam3IFqakgmpqSCampIFq0pIJKbcgkpqSBSmpIJKakjHUNY6vde8Ct3yAJKG5BNTUkE1NSQLU1JBJTUkE1NuQLU1JBJWlJBJQpIJq03IFKakgkp4pIJqakgmptyBSmpIJqakgkpqSMdQeOt7vl1z5/INKG5BNTbkClPFJBJTUkE1NSQKU1JBJTbkE1NSQLU1JBtTbkC1aUkE1NSQSU1JAtTUkElNuQSU1JBJTUkC1NSRjqbWOupXWPnsgmpqSBSmpIJqbcgkpqSBampIJK0pIJKbcgWoUkE1aUkElNSQTU25ApTUkElNSQSU25AtTUkElNSQTU1JApTUkZ6g8dcautfPpBJTUkE1NSQLU25BJTUkE1aUkC1NuQSU1JBNTUkClNSQSU25BJTUkE1NSQSU1JAtTbkE1NSQSU8UkClNSQe77NtQHWErrXgJBJTUkE1NuQSU8UkClNSQTVpuQSU1JBJTUkC1NSQTU1JBJTbkE1NSQKU1JBJTUkElNuQLU1JBJTUkHu+zbUBQHU2rrnhJBJWlJAtTbkElNSQTU1JBJTbkC1NSQSU1JBNTUkElNuQKU1JBJTUkE1NSQSU1JAtTbkElNSQe77NtQFAUB01q694iQSU1JBNWm5BNTUkClNSQTU25BJTUkElNSQKU25BNTVkElNuQTU1JApTUkElNSQSU25B7vs21AUBQFAdIauC8ZIJKeNyCampIFKakgmp4pIJKbcgWpqSCSnikgmpqSCSm3IFKakgkpqSCampIFKakjG77NpQFAUBQFAdCauE8fIJKakgkpqSCampIFKakgkptyCSmpIFqakg0ptyBampIJqakgWpqSCSmpIxNpQFAUBQFAUB71q4bycgkpqSBampIJKakgmpqSCSm3IFqakgkpqSCSmpIJqbcgWrSkgkoUkYm0oCgKAoCgKAoD3CVxHl5AtTUkElNSQTU1JApTbkElNSQSU1JApTUkElNuQTU1JBJWlJGIaUBQFAUBQFAUBQHsmrivNyBSmpIJKakgkptyCatKyCSm3IFqFJBNWlJBJTUkElNuRiGlAUBQFAUBQFAUBQHrErjPPyCampIJKakgmpqSBatNyCShSQTU1JAtWlJBJQ3IzNpQFAUBQFAUBQFAUBQHp2rjujkElaUkClNSQTU25BJTUkElCkgWrSkgkpqSMwagKAoCgKAoCgKAoCgKA9ElQdPIFq0pIJKakgmobkC1aUkElNSQSU1JGYNQFAUBQFAUBQFAUBQFAUB9xqk6uQTU1JApTxSQTUNyBatKSDSmpIzBqAoCgKAoCgKAoCgKAoCgKA+u1TdfIFKcUkE1NuQTU1JBLZqSMwagKAoCgKAoCgKAoCgKAoCgKA/9k="/>
+        <linearGradient id="_Linear5" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(-39.3403,137.423,-137.423,-39.3403,545.523,573.246)"><stop offset="0" style="stop-color:rgb(255,200,41);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(255,73,2);stop-opacity:1"/></linearGradient>
+        <linearGradient id="_Linear6" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(1.01113,-68.2054,68.2054,1.01113,482.996,741.463)"><stop offset="0" style="stop-color:white;stop-opacity:1"/><stop offset="1" style="stop-color:rgb(179,179,179);stop-opacity:1"/></linearGradient>
+        <linearGradient id="_Linear7" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(-7.13599,-34.117,34.117,-7.13599,578.793,922.144)"><stop offset="0" style="stop-color:rgb(164,164,164);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(106,106,106);stop-opacity:1"/></linearGradient>
+    </defs>
+</svg>
--- a/docling_serve/ui/static/main.js
+++ b/docling_serve/ui/static/main.js
@@ -0,0 +1,115 @@
+// Propagate URL hash to CSS target class for elements with the same id or data-id.
+window.addEventListener("hashchange", function (event) {
+  [
+    ["remove", "oldURL"],
+    ["add", "newURL"],
+  ].forEach(([op, tense]) => {
+    const hash = new URL(event[tense]).hash.slice(1);
+    document
+      .querySelectorAll(`[data-id="${hash}"], [id="${hash}"]`)
+      .forEach((el) => el.classList[op]("target"));
+  });
+});
+
+// Navigate document items with cursor keys.
+document.addEventListener("keydown", function (event) {
+  const target = document.querySelector("*:target");
+  const tbounds = target?.getBoundingClientRect();
+  const filters = {
+    ArrowUp: (_x, y) => y < tbounds.top,
+    ArrowDown: (_x, y) => y > tbounds.bottom,
+    ArrowLeft: (x, _y) => x < tbounds.left,
+    ArrowRight: (x, _y) => x > tbounds.right,
+  };
+
+  if (target && filters[event.key]) {
+    const elements = [...document.querySelectorAll(".item[id], .item *[id]")];
+
+    let minEl, minDist;
+    for (const el of elements) {
+      const elBounds = el.getBoundingClientRect();
+
+      if (
+        filters[event.key](
+          (elBounds.left + elBounds.right) / 2,
+          (elBounds.top + elBounds.bottom) / 2
+        )
+      ) {
+        const elDist =
+          Math.abs(tbounds.x - elBounds.x) + Math.abs(tbounds.y - elBounds.y);
+
+        if (el != target && elDist < (minDist ?? Number.MAX_VALUE)) {
+          minEl = el;
+          minDist = elDist;
+        }
+      }
+    }
+
+    if (minEl) {
+      event.preventDefault();
+      location.href = `#${minEl.id}`;
+    }
+  }
+});
+
+// Navigate to item with id when it is clicked.
+function clickId(e) {
+  e.stopPropagation();
+  const id = e.currentTarget.getAttribute("data-id") ?? e.currentTarget.id;
+  location.href = `#${id}`;
+}
+
+window.onload = () => {
+  // (Re-)set the value of input[data-dep-on] to conform to a value of another input[name="data-dep-on"].
+  document.querySelectorAll("input[dep-on]").forEach((element) => {
+    const onName = element.getAttribute("dep-on");
+    const onElement = document.getElementsByName(onName)[0];
+    const depMap = JSON.parse(element.getAttribute("dep-values") ?? "{}");
+
+    if (onElement && depMap) {
+      // On load.
+      element.value = depMap[onElement.value] ?? "";
+
+      // On change.
+      onElement.addEventListener(
+        "change",
+        (event) => (element.value = depMap[event.currentTarget.value] ?? "")
+      );
+    }
+  });
+
+  // Toggle display of input[data-display-when] when it requires a different input[type=checkbox] to be checked.
+  document.querySelectorAll("*[display-when]").forEach((element) => {
+    const whenElements = element
+      .getAttribute("display-when")
+      .split(",")
+      .flatMap((whenName) => [...document.getElementsByName(whenName.trim())]);
+
+    function update() {
+      const allChecked = whenElements.every((el) => el.checked);
+      element.classList[allChecked ? "remove" : "add"]("hidden");
+    }
+
+    // On load.
+    update();
+
+    // On change.
+    whenElements.forEach((whenElement) =>
+      whenElement.addEventListener("change", update)
+    );
+  });
+
+  // Persist input value in local storage.
+  document
+    .querySelectorAll("input[type=checkbox][persist]")
+    .forEach((element) => {
+      const prefix = element.getAttribute("persist");
+      const name = element.getAttribute("name");
+      const key = `docling-serve-${prefix}-${name}`;
+
+      element.checked = localStorage.getItem(key) === "true";
+      element.addEventListener("change", (event) =>
+        localStorage.setItem(key, event.target.checked)
+      );
+    });
+};
--- a/docling_serve/ui/static/pico.css
+++ b/docling_serve/ui/static/pico.css
--- a/docling_serve/ui/static/style.css
+++ b/docling_serve/ui/static/style.css
@@ -0,0 +1,429 @@
+@import "pico.css";
+
+@view-transition {
+  navigation: auto;
+}
+
+:root {
+  --pico-font-size: 16px;
+
+  --highlight-factor: 0.8;
+  --target: hsl(240, 100%, 34%);
+  --mark: hsl(29, 100%, 35%);
+}
+
+@media (prefers-color-scheme: dark) {
+  :root {
+    --highlight-factor: 1.5;
+    --target: hsl(240, 100%, 70%);
+    --mark: hsl(29, 100%, 70%);
+  }
+}
+
+/* Utilities. */
+.hidden {
+  display: none;
+}
+
+.sticky-footer {
+  position: sticky;
+  bottom: 0;
+  padding-top: var(--pico-spacing);
+  background: var(--pico-background-color);
+}
+
+html {
+  scroll-behavior: smooth;
+}
+
+header {
+  position: relative;
+  display: flex;
+  gap: 5rem;
+  margin-bottom: 2rem;
+
+  > .title {
+    white-space: nowrap;
+    font-size: 2rem;
+    font-weight: 300;
+    line-height: 1.75;
+
+    img {
+      display: inline-block;
+      max-height: 0.8em;
+      margin: -0.2rem -0.2em 0.25rem -0.2em;
+    }
+  }
+
+  &.loading img {
+    animation: shake 0.5s ease-in-out alternate infinite;
+    scale: 1.5;
+    translate: 0 1.5rem;
+  }
+
+  > .version {
+    position: absolute;
+    left: 6.25rem;
+    bottom: -0.5rem;
+    padding: 0 0.25rem;
+    font-size: 0.65rem;
+    line-height: 1rem;
+    border: solid 1px var(--pico-color);
+    border-radius: 0.3rem;
+  }
+
+  @media (prefers-color-scheme: dark) {
+    --glow: hsl(29, 100%, 70%);
+
+    > .title {
+      text-shadow: 0 0 0.25rem white, 0 0 0.5rem var(--glow),
+        0 0 0.75rem var(--glow), 0 0 1rem var(--glow);
+      color: white;
+
+      img {
+        filter: drop-shadow(0 0 0.05rem white)
+          drop-shadow(0 0 0.1rem var(--glow))
+          drop-shadow(0 0 0.15rem var(--glow))
+          drop-shadow(0 0 0.2rem var(--glow));
+      }
+    }
+
+    > .version {
+      color: white;
+      border-color: white;
+      text-shadow: 0 0 0.05rem white, 0 0 0.1rem var(--glow),
+        0 0 0.15rem var(--glow), 0 0 0.2rem var(--glow);
+      box-shadow: 0 0 0.05rem white, 0 0 0.1rem var(--glow),
+        0 0 0.15rem var(--glow), 0 0 0.2rem var(--glow);
+    }
+  }
+}
+
+@keyframes shake {
+  50% {
+    transform: rotate(-20deg);
+  }
+  100% {
+    transform: rotate(20deg);
+  }
+}
+
+label > small {
+  margin-left: var(--pico-spacing);
+  opacity: 0.75;
+}
+
+/* Conversion results. */
+.progress,
+.fail {
+  margin-top: 3rem;
+}
+
+.fail {
+  color: var(--pico-del-color);
+}
+
+main.preview {
+  display: grid;
+  grid:
+    auto / 1fr 0.5rem minmax(20ch, 70ch) 0.5rem minmax(min-content, auto)
+    minmax(0.5rem, 1fr);
+  grid-auto-flow: dense;
+  align-content: start;
+}
+
+/* Header and task status. */
+main.preview {
+  > header {
+    grid-column: 3;
+    padding: 0 0.5rem;
+  }
+
+  > .status {
+    grid-row: 2;
+    grid-column: 3;
+    display: inline-block;
+    margin: 0 0.5rem 3rem 0.5rem;
+
+    span {
+      display: inline-block;
+      min-width: calc(5 * var(--pico-spacing));
+      padding-right: calc(0.5 * var(--pico-spacing));
+    }
+  }
+
+  > .formats {
+    grid-row: 2;
+    grid-column: 5;
+    margin-bottom: 3rem;
+    display: flex;
+    align-items: flex-end;
+    gap: 1rem;
+
+    > .configDarkImg {
+      display: none;
+      grid-row: 2;
+      grid-column: 6;
+      margin-left: auto;
+    }
+
+    @media (prefers-color-scheme: dark) {
+      > .configDarkImg {
+        display: block;
+      }
+    }
+  }
+}
+
+/* Invert images in dark mode (option). */
+@media (prefers-color-scheme: dark) {
+  main.preview:has(.configDarkImg > input:checked) {
+    --img-hover-border: white;
+
+    svg.page-image {
+      --mark: hsl(29, 100%, 70%)
+    }
+
+    image,
+    img {
+      filter: invert(1) hue-rotate(180deg) saturate(1.25);
+    }
+  }
+}
+
+/* Document contents. */
+main.preview {
+  --img-hover-border: black;
+
+  *[id] {
+    scroll-margin-top: 20vh;
+  }
+
+  > .page {
+    position: relative;
+    display: grid;
+    grid-template-columns: subgrid;
+    grid-auto-flow: dense;
+    grid-column: 1 / span 6;
+
+    > .item {
+      grid-column: 3;
+      width: 100%;
+      min-height: 3rem;
+      max-height: fit-content;
+      margin: 0;
+      padding: 0.5rem;
+      text-align: justify;
+      background-color: var(--pico-background-color);
+
+      cursor: pointer;
+
+      &:hover {
+        filter: brightness(var(--highlight-factor));
+      }
+
+      &.target {
+        outline: 2px solid var(--target);
+        z-index: 10;
+      }
+    }
+
+    > .item.void {
+      visibility: hidden;
+    }
+
+    > .item.annotated {
+      display: flex;
+      flex-direction: column;
+      align-items: stretch;
+      gap: 1rem;
+    }
+
+    /* Formatting. */
+    .bold {
+      font-weight: bold;
+    }
+    .italic {
+      font-style: italic;
+    }
+    .underline {
+      text-decoration: underline;
+    }
+    .strikethrough {
+      text-decoration: line-through;
+    }
+    .underline.strikethrough {
+      text-decoration: underline line-through;
+    }
+    .sub {
+      font-size: smaller;
+      vertical-align: sub;
+    }
+    .super {
+      font-size: smaller;
+      vertical-align: super;
+    }
+
+    /* Items out of content layer. */
+    > .item:not(.body),
+    > .item-markers:not(.body) {
+      opacity: 0.5;
+    }
+
+    > li.item {
+      list-style-type: none;
+    }
+
+    > .item.caption {
+      padding: 0.5rem 1.5rem;
+      font-size: 0.9rem;
+    }
+
+    > .item.table {
+      min-width: 0;
+      overflow-x: auto;
+
+      table {
+        font-size: 0.75rem;
+        border-collapse: collapse;
+
+        td {
+          vertical-align: top;
+        }
+
+        td.header {
+          font-weight: bold;
+          background-color: var(--pico-code-background-color);
+        }
+
+        td.target {
+          outline: solid 2px var(--target);
+        }
+      }
+    }
+
+    .annotation {
+      margin: 0;
+
+      &::before {
+        content: attr(data-kind);
+        opacity: 0.7;
+      }
+
+      &,
+      * {
+        font-size: 0.9rem;
+        color: var(--mark);
+      }
+    }
+
+    .annotation[data-kind="description"],
+    code.annotation {
+      white-space: pre-line;
+    }
+
+    .annotation[data-kind="classification"] {
+      width: fit-content;
+    }
+
+    > .item-markers {
+      position: relative;
+      grid-column: 2;
+      padding-top: 0.125rem;
+      padding-right: 0.5rem;
+
+      display: flex;
+      flex-direction: column;
+      align-items: flex-end;
+
+      font-family: monospace;
+      font-size: 0.675rem;
+      line-height: 1.25;
+      letter-spacing: 0;
+      color: var(--mark);
+      white-space: nowrap;
+      border-top: solid 1px var(--mark);
+
+      > * {
+        margin-right: 0.5rem;
+      }
+
+      > a {
+        padding: 0.125rem 0.25rem;
+        margin-top: 0.5rem;
+        border-radius: 0.125rem;
+        color: var(--pico-contrast-inverse);
+        background-color: var(--target);
+        text-decoration: none;
+      }
+
+      > a:hover {
+        filter: brightness(--highlight-factor);
+      }
+
+      &:not(.target) > a {
+        display: none;
+      }
+
+      &.group,
+      &.grouped {
+        border-left: 1px dashed var(--mark);
+      }
+
+      &.group {
+        margin-top: 0.5rem;
+        border-top: none;
+      }
+    }
+
+    > .page-marker {
+      grid-column: 4;
+
+      &.border {
+        transform: translateY(-1px);
+        border-top: solid 1px var(--mark);
+      }
+    }
+
+    > svg.page-image {
+      --mark: hsl(29, 100%, 35%);
+
+      grid-column: 5;
+      position: sticky;
+      top: 0.5rem;
+      width: 100%;
+      max-height: calc(100vh - 1rem);
+      outline: 1px solid var(--mark);
+
+      rect {
+        stroke: var(--mark);
+        stroke-width: 1px;
+        fill: var(--target);
+        fill-opacity: 0.0001; /* To activate hover. */
+        cursor: pointer;
+
+        &:hover {
+          filter: brightness(0.8);
+          fill-opacity: 0.1;
+          stroke: var(--img-hover-border);
+          stroke-width: 3px;
+        }
+      }
+
+      rect.target {
+        stroke: var(--target);
+        stroke-width: 3px;
+        stroke-dasharray: none;
+      }
+
+      text {
+        font-size: 0.675rem;
+        color: var(--mark);
+
+        &.top-no {
+          alignment-baseline: hanging;
+        }
+      }
+    }
+  }
+}
--- a/docling_serve/ui/svg.py
+++ b/docling_serve/ui/svg.py
@@ -0,0 +1,20 @@
+from pyjsx import JSX  # type: ignore
+
+
+def _tag(name: str):
+    def factory(children, **args) -> JSX:
+        props = " ".join([f'{k}="{v}"' for k, v in args.items()])
+
+        if children:
+            child_renders = "".join([str(c) for c in children])
+            return f"<{name} {props}>{child_renders}</{name}>"
+        else:
+            return f"<{name} {props} />"
+
+    return factory
+
+
+image = _tag("image")
+path = _tag("path")
+rect = _tag("rect")
+text = _tag("text")
--- a/docling_serve/websocket_notifier.py
+++ b/docling_serve/websocket_notifier.py
@@ -30,25 +30,47 @@ class WebsocketNotifier(BaseNotifier):
        if task_id not in self.task_subscribers:
            raise RuntimeError(f"Task {task_id} does not have a subscribers list.")

-        task = await self.orchestrator.get_raw_task(task_id=task_id)
-        task_queue_position = await self.orchestrator.get_queue_position(task_id)
-        msg = TaskStatusResponse(
-            task_id=task.task_id,
-            task_status=task.task_status,
-            task_position=task_queue_position,
-            task_meta=task.processing_meta,
-        )
-        for websocket in self.task_subscribers[task_id]:
-            await websocket.send_text(
-                WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
+        try:
+            # Get task status from Redis or RQ directly instead of in-memory registry
+            task = await self.orchestrator.task_status(task_id=task_id)
+            task_queue_position = await self.orchestrator.get_queue_position(task_id)
+            msg = TaskStatusResponse(
+                task_id=task.task_id,
+                task_type=task.task_type,
+                task_status=task.task_status,
+                task_position=task_queue_position,
+                task_meta=task.processing_meta,
            )
-            if task.is_completed():
-                await websocket.close()
+            for websocket in self.task_subscribers[task_id]:
+                await websocket.send_text(
+                    WebsocketMessage(
+                        message=MessageKind.UPDATE, task=msg
+                    ).model_dump_json()
+                )
+                if task.is_completed():
+                    await websocket.close()
+        except Exception as e:
+            # Log the error but don't crash the notifier
+            import logging
+
+            _log = logging.getLogger(__name__)
+            _log.error(f"Error notifying subscribers for task {task_id}: {e}")

    async def notify_queue_positions(self):
+        """Notify all subscribers of pending tasks about queue position updates."""
        for task_id in self.task_subscribers.keys():
-            # notify only pending tasks
-            if self.orchestrator.tasks[task_id].task_status != TaskStatus.PENDING:
-                continue
+            try:
+                # Check task status directly from Redis or RQ
+                task = await self.orchestrator.task_status(task_id)

-            await self.notify_task_subscribers(task_id)
+                # Notify only pending tasks
+                if task.task_status == TaskStatus.PENDING:
+                    await self.notify_task_subscribers(task_id)
+            except Exception as e:
+                # Log the error but don't crash the notifier
+                import logging
+
+                _log = logging.getLogger(__name__)
+                _log.error(
+                    f"Error checking task {task_id} status for queue position notification: {e}"
+                )
--- a/docs/README.md
+++ b/docs/README.md
@@ -3,7 +3,9 @@
 This documentation pages explore the webserver configurations, runtime options, deployment examples as well as development best practices.

 - [Configuration](./configuration.md)
- [Advance usage](./usage.md)
+- [Handling models](./models.md)
+- [Usage](./usage.md)
 - [Deployment](./deployment.md)
+- [MCP](./mcp.md)
 - [Development](./development.md)
 - [`v1` migration](./v1_migration.md)
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -39,6 +39,7 @@ THe following table describes the options to configure the Docling Serve app.
 |  | `DOCLING_SERVE_STATIC_PATH` | unset | If set to a valid directory, the static assets for the docs and UI will be loaded from this path |
 |  | `DOCLING_SERVE_SCRATCH_PATH` |  | If set, this directory will be used as scratch workspace, e.g. storing the results before they get requested. If unset, a temporary created is created for this purpose. |
 | `--enable-ui` | `DOCLING_SERVE_ENABLE_UI` | `false` | Enable the demonstrator UI. |
+|  | `DOCLING_SERVE_SHOW_VERSION_INFO` | `true` | If enabled, the `/version` endpoint will provide the Docling package versions, otherwise it will return a forbidden 403 error. |
 |  | `DOCLING_SERVE_ENABLE_REMOTE_SERVICES` | `false` | Allow pipeline components making remote connections. For example, this is needed when using a vision-language model via APIs. |
 |  | `DOCLING_SERVE_ALLOW_EXTERNAL_PLUGINS` | `false` | Allow the selection of third-party plugins. |
 |  | `DOCLING_SERVE_SINGLE_USE_RESULTS` | `true` | If true, results can be accessed only once. If false, the results accumulate in the scratch directory. |
@@ -46,13 +47,33 @@ THe following table describes the options to configure the Docling Serve app.
 |  | `DOCLING_SERVE_MAX_DOCUMENT_TIMEOUT` | `604800` (7 days) | The maximum time for processing a document. |
 |  | `DOCLING_SERVE_MAX_NUM_PAGES` |  | The maximum number of pages for a document to be processed. |
 |  | `DOCLING_SERVE_MAX_FILE_SIZE` |  | The maximum file size for a document to be processed. |
+|  | `DOCLING_SERVE_SYNC_POLL_INTERVAL` | `2` | Number of seconds to sleep between polling the task status in the sync endpoints. |
 |  | `DOCLING_SERVE_MAX_SYNC_WAIT` | `120` | Max number of seconds a synchronous endpoint is waiting for the task completion. |
 |  | `DOCLING_SERVE_LOAD_MODELS_AT_BOOT` | `True` | If enabled, the models for the default options will be loaded at boot. |
 |  | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. |
+|  | `DOCLING_SERVE_QUEUE_MAX_SIZE` | | Size of the pages queue. Potentially so many pages opened at the same time. |
+|  | `DOCLING_SERVE_OCR_BATCH_SIZE` | | Batch size for the OCR stage. |
+|  | `DOCLING_SERVE_LAYOUT_BATCH_SIZE` | | Batch size for the layout detection stage. |
+|  | `DOCLING_SERVE_TABLE_BATCH_SIZE` | | Batch size for the table structure stage. |
+|  | `DOCLING_SERVE_BATCH_POLLING_INTERVAL_SECONDS` | | Wait time for gathering pages before starting a stage processing. |
 |  | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
 |  | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
 |  | `DOCLING_SERVE_CORS_HEADERS` | `["*"]` | A list of HTTP request headers that should be supported for cross-origin requests. |
-|  | `DOCLING_SERVE_ENG_KIND` | `local` | The compute engine to use for the async tasks. Possible values are `local` and `kfp`. See below for more configurations of the engines. |
+|  | `DOCLING_SERVE_API_KEY` | | If specified, all the API requests must contain the header `X-Api-Key` with this value. |
+|  | `DOCLING_SERVE_ENG_KIND` | `local` | The compute engine to use for the async tasks. Possible values are `local`, `rq` and `kfp`. See below for more configurations of the engines. |
+
+### Docling configuration
+
+Some Docling settings, mostly about performance, are exposed as environment variable which can be used also when running Docling Serve.
+
+| ENV | Default | Description |
+| ----|---------|-------------|
+| `DOCLING_NUM_THREADS` | `4` | Number of concurrent threads used for the `torch` CPU execution. |
+| `DOCLING_DEVICE` | | Device used for the model execution. Valid values are `cpu`, `cuda`, `mps`. When unset, the best device is chosen. For CUDA-enabled environments, you can choose which GPU using the syntax `cuda:0`, `cuda:1`, ... |
+| `DOCLING_PERF_PAGE_BATCH_SIZE` | `4` | Number of pages processed in the same batch. |
+| `DOCLING_PERF_ELEMENTS_BATCH_SIZE` | `8` | Number of document items/elements processed in the same batch during enrichment. |
+| `DOCLING_DEBUG_PROFILE_PIPELINE_TIMINGS` | `false` | When enabled, Docling will provide detailed timings information. |
+

 ### Compute engine

@@ -66,6 +87,17 @@ The following table describes the options to configure the Docling Serve local e
 | ENV | Default | Description |
 |-----|---------|-------------|
 | `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
+| `DOCLING_SERVE_ENG_LOC_SHARE_MODELS` | False | If true, each process will share the same models among all thread workers. Otherwise, one instance of the models is allocated for each worker thread. |
+
+#### RQ engine
+
+The following table describes the options to configure the Docling Serve RQ engine.
+
+| ENV | Default | Description |
+|-----|---------|-------------|
+| `DOCLING_SERVE_ENG_RQ_REDIS_URL` | (required) | The connection Redis url, e.g. `redis://localhost:6373/` |
+| `DOCLING_SERVE_ENG_RQ_RESULTS_PREFIX` | `docling:results` | The prefix used for storing the results in Redis. |
+| `DOCLING_SERVE_ENG_RQ_SUB_CHANNEL` | `docling:updates` | The channel key name used for storing communicating updates between the workers and the orchestrator. |

 #### KFP engine

@@ -79,3 +111,10 @@ The following table describes the options to configure the Docling Serve KFP eng
 | `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_ENDPOINT` |  | If set, it enables internal callbacks providing status update of the KFP job. Usually something like `https://NAME.NAMESPACE.svc.cluster.local:5001/v1/callback/task/progress`. |
 | `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_TOKEN_PATH` |  | The token used for authenticating the progress callback. For cluster-internal workloads, use `/run/secrets/kubernetes.io/serviceaccount/token`. |
 | `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_CA_CERT_PATH` |  | The CA certificate for the progress callback. For cluster-inetrnal workloads, use `/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt`. |
+
+#### Gradio UI
+
+When using Gradio UI and using the option to output conversion as file, Gradio uses cache to prevent files to be overwritten ([more info here](https://www.gradio.app/guides/file-access#the-gradio-cache)), and we defined the cache clean frequency of one hour to clean files older than 10hours. For situations that files need to be available to download from UI older than 10 hours, there is two options:
+
+- Increase the older age of files to clean [here](https://github.com/docling-project/docling-serve/blob/main/docling_serve/gradio_ui.py#L483) to suffice the age desired;
+- Or set the clean up manually by defining the temporary dir of Gradio to use the same as `DOCLING_SERVE_SCRATCH_PATH` absolute path. This can be achieved by setting the environment variable `GRADIO_TEMP_DIR`, that can be done via command line `export GRADIO_TEMP_DIR="<same_path_as_scratch>"` or in `Dockerfile` using `ENV GRADIO_TEMP_DIR="<same_path_as_scratch>"`. After this, set the clean of cache to `None` [here](https://github.com/docling-project/docling-serve/blob/main/docling_serve/gradio_ui.py#L483). Now, the clean up of `DOCLING_SERVE_SCRATCH_PATH` will also clean the Gradio temporary dir. (If you use this option, please remember when reversing changes to remove the environment variable `GRADIO_TEMP_DIR`, otherwise may lead to files not be available to download).
--- a/docs/deploy-examples/compose-amd.yaml
+++ b/docs/deploy-examples/compose-amd.yaml
@@ -0,0 +1,21 @@
+# AMD ROCm deployment
+
+services:
+  docling-serve:
+    image: ghcr.io/docling-project/docling-serve-rocm:main
+    container_name: docling-serve
+    ports:
+      - "5001:5001"
+    environment:
+      DOCLING_SERVE_ENABLE_UI: "true"
+      ROCR_VISIBLE_DEVICES: "0" # https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html#rocr-visible-devices
+      ## This section is for compatibility with older cards
+      # HSA_OVERRIDE_GFX_VERSION: "11.0.0"
+      # HSA_ENABLE_SDMA: "0"
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    group_add:
+      - 44    # video group GID from host
+      - 992   # render group GID from host
+    restart: always
--- a/docs/deploy-examples/compose-gpu.yaml
+++ b/docs/deploy-examples/compose-gpu.yaml
@@ -1,15 +0,0 @@
-services:
-  docling:
-    image: ghcr.io/docling-project/docling-serve-cu124
-    container_name: docling-serve
-    ports:
-      - 5001:5001
-    environment:
-      - DOCLING_SERVE_ENABLE_UI=true
-    deploy:
-      resources:
-        reservations:
-          devices:
-          - driver: nvidia
-            count: all # nvidia-smi 
-            capabilities: [gpu]
--- a/docs/deploy-examples/compose-nvidia.yaml
+++ b/docs/deploy-examples/compose-nvidia.yaml
@@ -0,0 +1,20 @@
+# NVIDIA CUDA deployment
+
+services:
+  docling-serve:
+    image: ghcr.io/docling-project/docling-serve-cu126:main
+    container_name: docling-serve
+    ports:
+      - "5001:5001"
+    environment:
+      DOCLING_SERVE_ENABLE_UI: "true"
+      NVIDIA_VISIBLE_DEVICES: "all" # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html
+    # deploy:  # This section is for compatibility with Swarm
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: all
+    #           capabilities: [gpu]
+    runtime: nvidia
+    restart: always
--- a/docs/deploy-examples/docling-serve-rq-workers.yaml
+++ b/docs/deploy-examples/docling-serve-rq-workers.yaml
@@ -0,0 +1,192 @@
+# This example deployment configures Docling Serve with a Service and RQ workers
+
+# Create following secret
+# kubectl create secret generic docling-serve-rq-secrets --from-literal=REDIS_PASSWORD=myredispassword --from-literal=RQ_REDIS_URL=redis://:myredispassword@docling-serve-redis-service:6373/
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  ports:
+  - name: http
+    port: 5001
+    targetPort: http
+  selector:
+    app: docling-serve
+    component: docling-serve-api
+---
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: docling-serve
+      component: docling-serve-api
+  template:
+    metadata:
+      labels:
+        app: docling-serve
+        component: docling-serve-api
+    spec:
+      restartPolicy: Always
+      containers:
+        - name: api
+          resources:
+            limits:
+              cpu: 1
+              memory: 8Gi
+            requests:
+              cpu: 250m
+              memory: 1Gi
+          env:
+            - name: DOCLING_SERVE_ENABLE_UI
+              value: 'true'
+            - name: DOCLING_SERVE_ENG_KIND
+              value: 'rq'
+            - name: DOCLING_SERVE_ENG_RQ_REDIS_URL
+              valueFrom:
+                secretKeyRef:
+                  name: docling-serve-rq-secrets
+                  key: RQ_REDIS_URL
+          ports:
+            - name: http
+              containerPort: 5001
+              protocol: TCP
+          imagePullPolicy: Always
+          image: 'ghcr.io/docling-project/docling-serve-cpu'
+---
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: docling-serve-rq-workers
+  labels:
+    app: docling-serve-rq-workers
+    component: docling-serve-rq-worker
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: docling-serve-rq-workers
+      component: docling-serve-rq-worker
+  template:
+    metadata:
+      labels:
+        app: docling-serve-rq-workers
+        component: docling-serve-rq-worker
+    spec:
+      restartPolicy: Always
+      containers:
+        - name: worker
+          resources:
+            limits:
+              cpu: 1
+              memory: 4Gi
+            requests:
+              cpu: 250m
+              memory: 1Gi
+          env:
+            - name: DOCLING_SERVE_ENG_KIND
+              value: 'rq'
+            - name: DOCLING_SERVE_ENG_RQ_REDIS_URL
+              valueFrom:
+                secretKeyRef:
+                  name: docling-serve-rq-secrets
+                  key: RQ_REDIS_URL
+          ports:
+            - name: http
+              containerPort: 5001
+              protocol: TCP
+          imagePullPolicy: Always
+          image: 'ghcr.io/docling-project/docling-serve-cpu'
+          command: ["docling-serve"]
+          args: ["rq-worker"]
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: docling-serve-redis
+  labels:
+    app: docling-serve-redis
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: docling-serve-redis
+  template:
+    metadata:
+      labels:
+        app: docling-serve-redis
+    spec:
+      restartPolicy: Always
+      terminationGracePeriodSeconds: 30
+      containers:
+        - name: redis
+          resources:
+            limits:
+              cpu: 1
+              memory: 1Gi
+            requests:
+              cpu: 250m
+              memory: 100Mi
+          image: redis:latest
+          command: ["redis-server"]
+          args:
+            - "--port"
+            - "6373"
+            - "--dir"
+            - "/mnt/redis/data"
+            - "--appendonly"
+            - "yes"
+            - "--requirepass"
+            - "$(REDIS_PASSWORD)"
+          ports:
+            - containerPort: 6373
+          env:
+            - name: REDIS_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: docling-serve-rq-secrets
+                  key: REDIS_PASSWORD
+          volumeMounts:
+            - name: redis-data
+              mountPath: /mnt/redis/data
+          securityContext:
+            fsGroup: 1004
+            runAsNonRoot: true
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+            seccompProfile:
+              type: RuntimeDefault
+      volumes:
+        - name: redis-data
+          emptyDir:
+            medium: Memory
+            sizeLimit: 2Gi
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: docling-serve-redis-service
+  labels:
+      app: docling-serve-redis
+spec:
+  type: NodePort
+  ports:
+    - name: redis-service
+      protocol: TCP
+      port: 6373
+      targetPort: 6373
+  selector:
+    app: docling-serve-redis
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -4,16 +4,17 @@ This document provides deployment examples for running the application in differ

 Choose the deployment option that best fits your setup.

- **[Local GPU](#local-gpu)**: For deploying the application locally on a machine with a NVIDIA GPU (using Docker Compose).
+- **[Local GPU NVIDIA](#local-gpu-nvidia)**: For deploying the application locally on a machine with a supported NVIDIA GPU (using Docker Compose).
+- **[Local GPU AMD](#local-gpu-amd)**: For deploying the application locally on a machine with a supported AMD GPU (using Docker Compose).
 - **[OpenShift](#openshift)**: For deploying the application on an OpenShift cluster, designed for cloud-native environments.

 ---

-## Local GPU
+## Local GPU NVIDIA

 ### Docker compose

-Manifest example: [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml)
+Manifest example: [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml)

 This deployment has the following features:

@@ -22,7 +23,7 @@ This deployment has the following features:
 Install the app with:

 ```sh
-docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
+docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
 ```

 For using the API:
@@ -34,7 +35,7 @@ curl -X 'POST' \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```

@@ -56,7 +57,7 @@ Docs:
 <details>
 <summary><b>Steps</b></summary>

-1. Check driver version and which GPU you want to use (0/1/2/3.. and update [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml) file or use `count: all`)
+1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml) file or use `count: all`)

    ```sh
    nvidia-smi
@@ -117,7 +118,75 @@ Docs:
 5. Run the container:

    ```sh
-    docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
+    docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
+    ```
+
+</details>
+
+## Local GPU AMD
+
+### Docker compose
+
+Manifest example: [compose-amd.yaml](./deploy-examples/compose-amd.yaml)
+
+This deployment has the following features:
+
+- AMD rocm enabled
+
+Install the app with:
+
+```sh
+docker compose -f docs/deploy-examples/compose-amd.yaml up -d
+```
+
+For using the API:
+
+```sh
+# Make a test query
+curl -X 'POST' \
+  "localhost:5001/v1/convert/source/async" \
+  -H "accept: application/json" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
+  }'
+```
+
+<details>
+<summary><b>Requirements</b></summary>
+
+- debian/ubuntu/rhel/fedora/opensuse
+- docker
+- AMDGPU driver >=6.3
+- AMD ROCm >=6.3
+
+Docs:
+
+- [AMD ROCm installation](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/quick-start.html)
+
+</details>
+
+<details>
+<summary><b>Steps</b></summary>
+
+1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
+
+    ```sh
+    rocm-smi --showdriverversion
+    rocminfo | grep -i "ROCm version"
+    ```
+
+2. Find both video group GID and render group GID from host (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
+
+    ```sh
+    getent group video
+    getent group render
+    ```
+
+3. Build the image locally (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
+
+    ```sh
+    make docling-serve-rocm-image
    ```

 </details>
@@ -152,10 +221,35 @@ curl -X 'POST' \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```

+### Multiple workers with RQ
+
+Manifest example: [`docling-serve-rq-workers.yaml`](./deploy-examples/docling-serve-rq-workers.yaml)
+
+This deployment example has the following features:
+
+- Deployment configuration
+- Service configuration
+- Redis deployment
+- Multiple (2 by default) worker Pods
+
+Install the app with:
+
+- create k8s secret:
+
+```sh
+kubectl create secret generic docling-serve-rq-secrets --from-literal=REDIS_PASSWORD=myredispassword --from-literal=RQ_REDIS_URL=redis://:myredispassword@docling-serve-redis-service:6373/
+```
+
+- apply deployment manifest:
+
+```sh
+oc apply -f docs/deploy-examples/docling-serve-rq-workers.yaml
+```
+
 ### Secure deployment with `oauth-proxy`

 Manifest example: [docling-serve-oauth.yaml](./deploy-examples/docling-serve-oauth.yaml)
@@ -189,7 +283,7 @@ curl -X 'POST' \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```

@@ -222,7 +316,7 @@ task_id=$(curl -s -X 'POST' \
    -H "accept: application/json" \
    -H "Content-Type: application/json" \
    -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+      "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
    }' \
    -c cookies.txt | grep -oP '"task_id":"\K[^"]+')
 ```
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -0,0 +1,22 @@
+# Examples
+
+## Split processing
+
+The example of provided of split processing demonstrates how to split a PDF into chunks of pages and send them for conversion. At the end, it concatenates all split pages into a single conversion `JSON`.
+
+At beginning of file there's variables to be used (and modified) such as:
+| Variable | Description |
+| ---------|-------------|
+| `path_to_pdf`| Path to PDF file to be split |
+| `pages_per_file`| The number of pages per chunk to split PDF |
+| `base_url`| Base url of the `docling-serve` host |
+| `out_dir`| The output folder of each conversion `JSON` of split PDF and the final concatenated `JSON` |
+
+The example follows the following logic:
+- Get the number of pages of the `PDF`
+- Based on the number of chunks of pages, send each chunk to conversion using `page_range` parameter
+- Wait all conversions to finish
+- Get all conversion results
+- Save each conversion `JSON` result into a `JSON` file
+- Concatenate all `JSONs` into a single `JSON` using `docling` concatenate method
+- Save concatenated `JSON` into a `JSON` file
--- a/docs/mcp.md
+++ b/docs/mcp.md
@@ -0,0 +1,39 @@
+# Docling MCP in Docling Serve
+
+The `docling-serve` container image includes all MCP (Model Communication Protocol) features starting from version v1.1.0. To leverage these features, you simply need to use a different entrypoint—no custom image builds or additional installations are required. The image provides the `docling-mcp-server` executable, which enables MCP functionality out of the box as of version v1.1.0 ([changelog](https://github.com/docling-project/docling-serve/blob/624f65d41b734e8b39ff267bc8bf6e766c376d6d/CHANGELOG.md)).
+
+Read more on [Docling MCP](https://github.com/docling-project/docling-mcp) in its dedicated repository.
+
+## Launching the MCP Service
+
+By default, the container runs `docling-serve run` and exposes port 5001. To start the MCP service, override the entrypoint and specify your desired port mapping. For example:
+
+```sh
+podman run -p 8000:8000 quay.io/docling-project/docling-serve -- docling-mcp-server --transport streamable-http --port 8000 --host 0.0.0.0
+```
+
+This command starts the MCP server on port 8000, accessible at `http://localhost:8000/mcp`. Adjust the port and host as needed. Key arguments for `docling-mcp-server` include `--transport streamable-http` (HTTP transport for client connections), `--port <PORT>`, and `--host <HOST>` (use `0.0.0.0` to accept connections from any interface).
+
+## Configuring MCP Clients
+
+Most MCP-compatible clients, such as LM Studio and Claude Desktop, allow you to specify custom MCP server endpoints. The standard configuration uses a JSON block to define available MCP servers. For example, to connect to the Docling MCP server running on port 8000:
+
+```json
+{
+  "mcpServers": {
+    "docling": {
+      "url": "http://localhost:8000/mcp"
+    }
+  }
+}
+```
+
+Insert this configuration in your client's settings where MCP servers are defined. Update the URL if you use a different port.
+
+### LM Studio and Claude Desktop
+
+Both LM Studio and Claude Desktop support MCP endpoints via configuration files or UI settings. Paste the above JSON block into the appropriate configuration section. For Claude Desktop, add the MCP server in the "Custom Model" or "MCP Server" section. For LM Studio, refer to its documentation for the location of the MCP server configuration.
+
+### Other MCP Clients
+
+Other clients, such as Continue Coding Assistant, also support custom MCP endpoints. Use the same configuration pattern: provide the MCP server URL ending with `/mcp` and ensure the port matches your container setup. See the [Docling MCP docs](https://github.com/docling-project/docling-mcp/tree/main/docs/integrations) for more details.
--- a/docs/models.md
+++ b/docs/models.md
@@ -0,0 +1,175 @@
+# Handling Models in Docling Serve
+
+When enabling steps in Docling Serve that require extra models (such as picture classification, picture description, table detection, code recognition, formula extraction, or vision-language modules), you must ensure those models are available in the runtime environment. The standard container image includes only the default models. Any additional models must be downloaded and made available before use. If required models are missing, Docling Serve will raise runtime errors rather than downloading them automatically. This default choice wants to guarantee the system is not calling external services.
+
+## Model Storage Location
+
+Docling Serve loads models from the directory specified by the `DOCLING_SERVE_ARTIFACTS_PATH` environment variable. This path must be consistent across model download and runtime. When running with multiple workers or reload enabled, you must use the environment variable rather than the CLI argument for configuration [[source]](./configuration.md).
+
+## Approaches for Making Extra Models Available
+
+There are several ways to ensure required models are present:
+
+### 1. Disable Local Models (Trigger Auto-Download)
+
+You can configure the container to download all models at startup by clearing the artifacts path:
+
+```sh
+podman run -d -p 5001:5001 --name docling-serve \
+  -e DOCLING_SERVE_ARTIFACTS_PATH="" \
+  -e DOCLING_SERVE_ENABLE_UI=true \
+  quay.io/docling-project/docling-serve
+```
+
+This approach is simple for local development but not recommended for production, as it increases startup time and depends on network availability.
+
+### 2. Build a Custom Image with Pre-Downloaded Models
+
+You can create a new image that includes the required models:
+
+```Dockerfile
+FROM quay.io/docling-project/docling-serve
+RUN docling-tools models download smolvlm
+```
+
+This method is suitable for production, as it ensures all models are present in the image and avoids runtime downloads.
+
+### 3. Update the Entrypoint to Download Models Before Startup
+
+You can override the entrypoint to download models before starting the service:
+
+```sh
+podman run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=true \
+  quay.io/docling-project/docling-serve \
+  -- sh -c 'exec docling-tools models download smolvlm && exec docling-serve run'
+```
+
+This is useful for environments where you want to keep the base image unchanged but still automate model preparation.
+
+### 4. Mount a Volume with Pre-Downloaded Models
+
+Download models locally and mount them into the container:
+
+```sh
+# Download the models locally
+docling-tools models download --all -o models
+
+# Start the container with the local models folder
+podman run -p 5001:5001 \
+  -v $(pwd)/models:/opt/app-root/src/models \
+  -e DOCLING_SERVE_ARTIFACTS_PATH="/opt/app-root/src/models" \
+  -e DOCLING_SERVE_ENABLE_UI=true \
+  quay.io/docling-project/docling-serve
+```
+
+This approach is robust for both local and production deployments, especially when using persistent storage.
+
+## Kubernetes/Cluster Deployments
+
+For Kubernetes or OpenShift clusters, the recommended approach is to use a PersistentVolumeClaim (PVC) for model storage, a Kubernetes Job to download models, and mount the volume into the deployment. This ensures models persist across pod restarts and scale-out scenarios.
+
+### Example: PersistentVolumeClaim
+
+```yaml
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: docling-model-cache-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  volumeMode: Filesystem
+  resources:
+    requests:
+      storage: 10Gi
+```
+
+If you don't want to use default storage class, set your custom storage class with following:
+
+```yaml
+spec:
+    ...
+    storageClassName: <Storage Class Name>
+```
+
+Manifest example: [docling-model-cache-pvc.yaml](./deploy-examples/docling-model-cache-pvc.yaml)
+
+### Example: Model Download Job
+
+```yaml
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: docling-model-cache-load
+spec:
+  template:
+    spec:
+      containers:
+        - name: loader
+          image: ghcr.io/docling-project/docling-serve-cpu:main
+          command:
+            - docling-tools
+            - models
+            - download
+            - '--output-dir=/modelcache'
+            - 'layout'
+            - 'tableformer'
+            - 'code_formula'
+            - 'picture_classifier'
+            - 'smolvlm'
+            - 'granite_vision'
+            - 'easyocr'
+          volumeMounts:
+            - name: docling-model-cache
+              mountPath: /modelcache
+      volumes:
+        - name: docling-model-cache
+          persistentVolumeClaim:
+            claimName: docling-model-cache-pvc
+      restartPolicy: Never
+```
+
+The job will mount the previously created persistent volume and execute command similar to how we would load models locally:
+`docling-tools models download --output-dir <MOUNT-PATH> [LIST_OF_MODELS]`
+
+In manifest, we specify desired models individually, or we can use `--all` parameter to download all models.
+
+Manifest example: [docling-model-cache-job.yaml](./deploy-examples/docling-model-cache-job.yaml)
+
+### Example: Deployment with Mounted Volume
+
+```yaml
+spec:
+  template:
+    spec:
+      containers:
+        - name: api
+          env:
+            - name: DOCLING_SERVE_ARTIFACTS_PATH
+              value: '/modelcache'
+          volumeMounts:
+            - name: docling-model-cache
+              mountPath: /modelcache
+      volumes:
+        - name: docling-model-cache
+          persistentVolumeClaim:
+            claimName: docling-model-cache-pvc
+```
+
+The value of `DOCLING_SERVE_ARTIFACTS_PATH` must match the mount path where models are stored.
+
+Now, when docling-serve is executing tasks, the underlying docling installation will load model weights from mounted volume.
+
+Manifest example: [docling-model-cache-deployment.yaml](./deploy-examples/docling-model-cache-deployment.yaml)
+
+## Local Docker Execution
+
+For local Docker or Podman execution, you can use any of the approaches above. Mounting a local directory with pre-downloaded models is the most reliable for repeated runs and avoids network dependencies.
+
+## Troubleshooting and Best Practices
+
+- If a required model is missing from the artifacts path, Docling Serve will raise a runtime error.
+- Always ensure the value of `DOCLING_SERVE_ARTIFACTS_PATH` matches the directory where models are stored and mounted.
+- For production and cluster environments, prefer persistent storage and pre-loading models via a dedicated job.
+
+For more details and YAML manifest examples, see the [deployment documentation](./deployment.md).
--- a/docs/pre-loading-models.md
+++ b/docs/pre-loading-models.md
@@ -1,103 +0,0 @@
-# Pre-loading models for docling
-
-This document provides examples for pre-loading docling models to a persistent volume and re-using it for docling-serve deployments.
-
-1. We need to create a persistent volume that will store models weights:
-
-    ```yaml
-    apiVersion: v1
-    kind: PersistentVolumeClaim
-    metadata:
-      name: docling-model-cache-pvc
-    spec:
-      accessModes:
-        - ReadWriteOnce
-      volumeMode: Filesystem
-      resources:
-        requests:
-          storage: 10Gi
-    ```
-
-    If you don't want to use default storage class, set your custom storage class with following:
-
-    ```yaml
-    spec:
-      ...
-      storageClassName: <Storage Class Name>
-    ```
-
-    Manifest example: [docling-model-cache-pvc.yaml](./deploy-examples/docling-model-cache-pvc.yaml)
-
-2. In order to load model weights, we can use docling-toolkit to download them, as this is a one time operation we can use kubernetes job for this:
-
-    ```yaml
-    apiVersion: batch/v1
-    kind: Job
-    metadata:
-      name: docling-model-cache-load
-    spec:
-      selector: {}
-      template:
-        metadata:
-          name: docling-model-load
-        spec:
-          containers:
-            - name: loader
-              image: ghcr.io/docling-project/docling-serve-cpu:main
-              command:
-                - docling-tools
-                - models
-                - download
-                - '--output-dir=/modelcache'
-                - 'layout'
-                - 'tableformer'
-                - 'code_formula'
-                - 'picture_classifier'
-                - 'smolvlm'
-                - 'granite_vision'
-                - 'easyocr'
-              volumeMounts:
-                - name: docling-model-cache
-                  mountPath: /modelcache
-          volumes:
-            - name: docling-model-cache
-              persistentVolumeClaim:
-                claimName: docling-model-cache-pvc
-          restartPolicy: Never
-    ```
-
-    The job will mount previously created persistent volume and execute command similar to how we would load models locally:
-    `docling-tools models download --output-dir <MOUNT-PATH> [LIST_OF_MODELS]`
-
-    In manifest, we specify desired models individually, or we can use `--all` parameter to download all models.
-
-    Manifest example: [docling-model-cache-job.yaml](./deploy-examples/docling-model-cache-job.yaml)
-
-3. Now we can mount volume in the docling-serve deployment and set env `DOCLING_SERVE_ARTIFACTS_PATH` to point to it.
-    Following additions to deployment should be made:
-
-    ```yaml
-    spec:
-      template:
-        spec:
-          containers:
-            - name: api
-              env:
-              ...
-                - name: DOCLING_SERVE_ARTIFACTS_PATH
-                  value: '/modelcache'
-              volumeMounts:
-                - name: docling-model-cache
-                  mountPath: /modelcache
-          ...
-          volumes:
-            - name: docling-model-cache
-              persistentVolumeClaim:
-                claimName: docling-model-cache-pvc
-    ```
-
-    Make sure that value of `DOCLING_SERVE_ARTIFACTS_PATH` is the same as where models were downloaded and where volume is mounted.
-
-    Now when docling-serve is executing tasks, the underlying docling installation will load model weights from mounted volume.
-
-    Manifest example: [docling-model-cache-deployment.yaml](./deploy-examples/docling-model-cache-deployment.yaml)
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -4,31 +4,93 @@ The API provides two endpoints: one for urls, one for files. This is necessary t

 ## Common parameters

-On top of the source of file (see below), both endpoints support the same parameters, which are almost the same as the Docling CLI.
+On top of the source of file (see below), both endpoints support the same parameters.

- `from_formats` (List[str]): Input format(s) to convert from. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`. Defaults to all formats.
- `to_formats` (List[str]): Output format(s) to convert to. Allowed values: `md`, `json`, `html`, `text`, `doctags`. Defaults to `md`.
- `pipeline` (str). The choice of which pipeline to use. Allowed values are `standard` and `vlm`. Defaults to `standard`.
- `page_range` (tuple). If specified, only convert a range of pages. The page number starts at 1.
- `do_ocr` (bool): If enabled, the bitmap content will be processed using OCR. Defaults to `True`.
- `image_export_mode`: Image export mode for the document (only in case of JSON, Markdown or HTML). Allowed values: embedded, placeholder, referenced. Optional, defaults to `embedded`.
- `force_ocr` (bool): If enabled, replace any existing text with OCR-generated text over the full content. Defaults to `False`.
- `ocr_engine` (str): OCR engine to use. Allowed values: `easyocr`, `tesserocr`, `tesseract`, `rapidocr`, `ocrmac`. Defaults to `easyocr`. To use the `tesserocr` engine, `tesserocr` must be installed where docling-serve is running: `pip install tesserocr`
- `ocr_lang` (List[str]): List of languages used by the OCR engine. Note that each OCR engine has different values for the language names. Defaults to empty.
- `pdf_backend` (str): PDF backend to use. Allowed values: `pypdfium2`, `dlparse_v1`, `dlparse_v2`, `dlparse_v4`. Defaults to `dlparse_v4`.
- `table_mode` (str): Table mode to use. Allowed values: `fast`, `accurate`. Defaults to `fast`.
- `abort_on_error` (bool): If enabled, abort on error. Defaults to false.
- `md_page_break_placeholder` (str): Add this placeholder between pages in the markdown output.
- `do_table_structure` (bool): If enabled, the table structure will be extracted. Defaults to true.
- `do_code_enrichment` (bool): If enabled, perform OCR code enrichment. Defaults to false.
- `do_formula_enrichment` (bool): If enabled, perform formula OCR, return LaTeX code. Defaults to false.
- `do_picture_classification` (bool): If enabled, classify pictures in documents. Defaults to false.
- `do_picture_description` (bool): If enabled, describe pictures in documents. Defaults to false.
- `picture_description_area_threshold` (float): Minimum percentage of the area for a picture to be processed with the models. Defaults to 0.05.
- `picture_description_local` (dict): Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with `picture_description_api`.
- `picture_description_api` (dict): API details for using a vision-language model in the picture description. This parameter is mutually exclusive with `picture_description_local`.
- `include_images` (bool): If enabled, images will be extracted from the document. Defaults to false.
- `images_scale` (float): Scale factor for images. Defaults to 2.0.
+<!-- begin: parameters-docs -->
+<h4>ConvertDocumentsRequestOptions</h4>
+
+| Field Name | Type | Description |
+|------------|------|-------------|
+| `from_formats` | List[InputFormat] | Input format(s) to convert from. String or list of strings. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`, `csv`, `xlsx`, `xml_uspto`, `xml_jats`, `mets_gbs`, `json_docling`, `audio`, `vtt`. Optional, defaults to all formats. |
+| `to_formats` | List[OutputFormat] | Output format(s) to convert to. String or list of strings. Allowed values: `md`, `json`, `html`, `html_split_page`, `text`, `doctags`. Optional, defaults to Markdown. |
+| `image_export_mode` | ImageRefMode | Image export mode for the document (in case of JSON, Markdown or HTML). Allowed values: `placeholder`, `embedded`, `referenced`. Optional, defaults to Embedded. |
+| `do_ocr` | bool | If enabled, the bitmap content will be processed using OCR. Boolean. Optional, defaults to true |
+| `force_ocr` | bool | If enabled, replace existing text with OCR-generated text over content. Boolean. Optional, defaults to false. |
+| `ocr_engine` | `ocr_engines_enum` | The OCR engine to use. String. Allowed values: `auto`, `easyocr`, `ocrmac`, `rapidocr`, `tesserocr`, `tesseract`. Optional, defaults to `easyocr`. |
+| `ocr_lang` | List[str] or NoneType | List of languages used by the OCR engine. Note that each OCR engine has different values for the language names. String or list of strings. Optional, defaults to empty. |
+| `pdf_backend` | PdfBackend | The PDF backend to use. String. Allowed values: `pypdfium2`, `dlparse_v1`, `dlparse_v2`, `dlparse_v4`. Optional, defaults to `dlparse_v4`. |
+| `table_mode` | TableFormerMode | Mode to use for table structure, String. Allowed values: `fast`, `accurate`. Optional, defaults to accurate. |
+| `table_cell_matching` | bool | If true, matches table cells predictions back to PDF cells. Can break table output if PDF cells are merged across table columns. If false, let table structure model define the text cells, ignore PDF cells. |
+| `pipeline` | ProcessingPipeline | Choose the pipeline to process PDF or image files. |
+| `page_range` | Tuple | Only convert a range of pages. The page number starts at 1. |
+| `document_timeout` | float | The timeout for processing each document, in seconds. |
+| `abort_on_error` | bool | Abort on error if enabled. Boolean. Optional, defaults to false. |
+| `do_table_structure` | bool | If enabled, the table structure will be extracted. Boolean. Optional, defaults to true. |
+| `include_images` | bool | If enabled, images will be extracted from the document. Boolean. Optional, defaults to true. |
+| `images_scale` | float | Scale factor for images. Float. Optional, defaults to 2.0. |
+| `md_page_break_placeholder` | str | Add this placeholder between pages in the markdown output. |
+| `do_code_enrichment` | bool | If enabled, perform OCR code enrichment. Boolean. Optional, defaults to false. |
+| `do_formula_enrichment` | bool | If enabled, perform formula OCR, return LaTeX code. Boolean. Optional, defaults to false. |
+| `do_picture_classification` | bool | If enabled, classify pictures in documents. Boolean. Optional, defaults to false. |
+| `do_picture_description` | bool | If enabled, describe pictures in documents. Boolean. Optional, defaults to false. |
+| `picture_description_area_threshold` | float | Minimum percentage of the area for a picture to be processed with the models. |
+| `picture_description_local` | PictureDescriptionLocal or NoneType | Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with `picture_description_api`. |
+| `picture_description_api` | PictureDescriptionApi or NoneType | API details for using a vision-language model in the picture description. This parameter is mutually exclusive with `picture_description_local`. |
+| `vlm_pipeline_model` | VlmModelType or NoneType | Preset of local and API models for the `vlm` pipeline. This parameter is mutually exclusive with `vlm_pipeline_model_local` and `vlm_pipeline_model_api`. Use the other options for more parameters. |
+| `vlm_pipeline_model_local` | VlmModelLocal or NoneType | Options for running a local vision-language model for the `vlm` pipeline. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with `vlm_pipeline_model_api` and `vlm_pipeline_model`. |
+| `vlm_pipeline_model_api` | VlmModelApi or NoneType | API details for using a vision-language model for the `vlm` pipeline. This parameter is mutually exclusive with `vlm_pipeline_model_local` and `vlm_pipeline_model`. |
+
+<h4>VlmModelApi</h4>
+
+| Field Name | Type | Description |
+|------------|------|-------------|
+| `url` | AnyUrl | Endpoint which accepts openai-api compatible requests. |
+| `headers` | Dict[str, str] | Headers used for calling the API endpoint. For example, it could include authentication headers. |
+| `params` | Dict[str, Any] | Model parameters. |
+| `timeout` | float | Timeout for the API request. |
+| `concurrency` | int | Maximum number of concurrent requests to the API. |
+| `prompt` | str | Prompt used when calling the vision-language model. |
+| `scale` | float | Scale factor of the images used. |
+| `response_format` | ResponseFormat | Type of response generated by the model. |
+| `temperature` | float | Temperature parameter controlling the reproducibility of the result. |
+
+<h4>VlmModelLocal</h4>
+
+| Field Name | Type | Description |
+|------------|------|-------------|
+| `repo_id` | str | Repository id from the Hugging Face Hub. |
+| `prompt` | str | Prompt used when calling the vision-language model. |
+| `scale` | float | Scale factor of the images used. |
+| `response_format` | ResponseFormat | Type of response generated by the model. |
+| `inference_framework` | InferenceFramework | Inference framework to use. |
+| `transformers_model_type` | TransformersModelType | Type of transformers auto-model to use. |
+| `extra_generation_config` | Dict[str, Any] | Config from https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig |
+| `temperature` | float | Temperature parameter controlling the reproducibility of the result. |
+
+<h4>PictureDescriptionApi</h4>
+
+| Field Name | Type | Description |
+|------------|------|-------------|
+| `url` | AnyUrl | Endpoint which accepts openai-api compatible requests. |
+| `headers` | Dict[str, str] | Headers used for calling the API endpoint. For example, it could include authentication headers. |
+| `params` | Dict[str, Any] | Model parameters. |
+| `timeout` | float | Timeout for the API request. |
+| `concurrency` | int | Maximum number of concurrent requests to the API. |
+| `prompt` | str | Prompt used when calling the vision-language model. |
+
+<h4>PictureDescriptionLocal</h4>
+
+| Field Name | Type | Description |
+|------------|------|-------------|
+| `repo_id` | str | Repository id from the Hugging Face Hub. |
+| `prompt` | str | Prompt used when calling the vision-language model. |
+| `generation_config` | Dict[str, Any] | Config from https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig |
+
+<!-- end: parameters-docs -->
+
+### Authentication
+
+When authentication is activated (see the parameter `DOCLING_SERVE_API_KEY` in [configuration.md](./configuration.md)), all the API requests **must** provide the header `X-Api-Key` with the correct secret key.

 ## Convert endpoints

@@ -429,7 +491,7 @@ with connect(uri) as websocket:
            payload = json.loads(message)
            if payload["message"] == "error":
                break
-            if payload["message"] == "error" and payload["task"]["task_status"] in ("success", "failure"):
+            if payload["message"] == "update" and payload["task"]["task_status"] in ("success", "failure"):
                break
        except:
          break
--- a/docs/v1_migration.md
+++ b/docs/v1_migration.md
@@ -37,7 +37,7 @@ New version:
    "options": {},  // conversion options
    "sources": [
        // input document provided as base64-encoded string
-        {"kind": "kind", "base64_string": "abc123...", "filename": "file.pdf"},
+        {"kind": "file", "base64_string": "abc123...", "filename": "file.pdf"},
        // input document provided as http urls
        {"kind": "http", "url": "https://..."},
    ]
--- a/examples/split_processing.py
+++ b/examples/split_processing.py
@@ -0,0 +1,124 @@
+import json
+import time
+from pathlib import Path
+
+import httpx
+from pydantic import BaseModel
+from pypdf import PdfReader
+
+from docling_core.types.doc.document import DoclingDocument
+
+# Variables to use
+path_to_pdf = Path("./tests/2206.01062v1.pdf")
+pages_per_file = 4
+base_url = "http://localhost:5001/v1"
+out_dir = Path("examples/splitted_pdf/")
+
+
+class ConvertedSplittedPdf(BaseModel):
+    task_id: str
+    conversion_finished: bool = False
+    result: dict | None = None
+
+
+def get_task_result(task_id: str):
+    response = httpx.get(
+        f"{base_url}/result/{task_id}",
+        timeout=15,
+    )
+    return response.json()
+
+
+def check_task_status(task_id: str):
+    response = httpx.get(f"{base_url}/status/poll/{task_id}", timeout=15)
+    task = response.json()
+    task_status = task["task_status"]
+
+    task_finished = False
+    if task_status == "success":
+        task_finished = True
+
+    if task_status in ("failure", "revoked"):
+        raise RuntimeError("A conversion failed")
+
+    time.sleep(5)
+
+    return task_finished
+
+
+def post_file(file_path: Path, start_page: int, end_page: int):
+    payload = {
+        "to_formats": ["json"],
+        "image_export_mode": "placeholder",
+        "ocr": False,
+        "abort_on_error": False,
+        "page_range": [start_page, end_page],
+    }
+
+    files = {
+        "files": (file_path.name, file_path.open("rb"), "application/pdf"),
+    }
+    response = httpx.post(
+        f"{base_url}/convert/file/async",
+        files=files,
+        data=payload,
+        timeout=15,
+    )
+
+    task = response.json()
+
+    return task["task_id"]
+
+
+def main():
+    filename = path_to_pdf
+
+    splitted_pdfs: list[ConvertedSplittedPdf] = []
+
+    with open(filename, "rb") as input_pdf_file:
+        pdf_reader = PdfReader(input_pdf_file)
+        total_pages = len(pdf_reader.pages)
+
+        for start_page in range(0, total_pages, pages_per_file):
+            task_id = post_file(
+                filename, start_page + 1, min(start_page + pages_per_file, total_pages)
+            )
+            splitted_pdfs.append(ConvertedSplittedPdf(task_id=task_id))
+
+    all_files_converted = False
+    while not all_files_converted:
+        found_conversion_running = False
+        for splitted_pdf in splitted_pdfs:
+            if not splitted_pdf.conversion_finished:
+                found_conversion_running = True
+                print("checking conversion status...")
+                splitted_pdf.conversion_finished = check_task_status(
+                    splitted_pdf.task_id
+                )
+        if not found_conversion_running:
+            all_files_converted = True
+
+    for splitted_pdf in splitted_pdfs:
+        splitted_pdf.result = get_task_result(splitted_pdf.task_id)
+
+    files = []
+    for i, splitted_pdf in enumerate(splitted_pdfs):
+        json_content = json.dumps(
+            splitted_pdf.result.get("document").get("json_content"), indent=2
+        )
+        doc = DoclingDocument.model_validate_json(json_content)
+        filename = f"{out_dir}/splited_json_{i}.json"
+        doc.save_as_json(filename=filename)
+        files.append(filename)
+
+    docs = [DoclingDocument.load_from_json(filename=f) for f in files]
+    concate_doc = DoclingDocument.concatenate(docs=docs)
+
+    exp_json_file = Path(f"{out_dir}/concatenated.json")
+    concate_doc.save_as_json(exp_json_file)
+
+    print("Finished")
+
+
+if __name__ == "__main__":
+    main()
--- a/img/fastapi-ui.png
+++ b/img/fastapi-ui.png
--- a/img/swagger.png
+++ b/img/swagger.png
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "docling-serve"
-version = "1.1.0"  # DO NOT EDIT, updated automatically
+version = "1.8.0"  # DO NOT EDIT, updated automatically
 description = "Running Docling as a service"
 license = {text = "MIT"}
 authors = [
@@ -8,7 +8,6 @@ authors = [
    {name="Guillaume Moutier", email="gmoutier@redhat.com"},
    {name="Anil Vishnoi", email="avishnoi@redhat.com"},
    {name="Panos Vagenas", email="pva@zurich.ibm.com"},
-    {name="Panos Vagenas", email="pva@zurich.ibm.com"},
    {name="Christoph Auer", email="cau@zurich.ibm.com"},
    {name="Peter Staar", email="taa@zurich.ibm.com"},
 ]
@@ -35,9 +34,9 @@ classifiers = [
 requires-python = ">=3.10"
 dependencies = [
    "docling~=2.38",
-    "docling-core>=2.44.1",
-    "docling-jobkit[kfp,vlm]~=1.2",
-    "fastapi[standard]~=0.115",
+    "docling-core>=2.45.0",
+    "docling-jobkit[kfp,rq,vlm]>=1.8.0,<2.0.0",
+    "fastapi[standard]<0.119.0",  # ~=0.115
    "httpx~=0.28",
    "pydantic~=2.10",
    "pydantic-settings~=2.4",
@@ -51,18 +50,20 @@ dependencies = [

 [project.optional-dependencies]
 ui = [
-    "gradio~=5.9",
-    "pydantic<2.11.0",  # fix compatibility between gradio and new pydantic 2.11
+    "python-jsx>=0.2.0",
 ]
 tesserocr = [
    "tesserocr~=2.7"
 ]
+easyocr = [
+    "easyocr>=1.7",
+]
 rapidocr = [
-    "rapidocr-onnxruntime~=1.4; python_version<'3.13'",
-    "onnxruntime~=1.7",
+    "rapidocr (>=3.3,<4.0.0) ; python_version < '3.14'",
+    "onnxruntime (>=1.7.0,<2.0.0)",
 ]
 flash-attn = [
-  "flash-attn~=2.7.0; sys_platform == 'linux' and platform_machine == 'x86_64'"
+  "flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"
 ]

 [dependency-groups]
@@ -70,31 +71,43 @@ dev = [
    "asgi-lifespan~=2.0",
    "mypy~=1.11",
    "pre-commit-uv~=4.1",
+    "pypdf>=6.0.0",
    "pytest~=8.3",
    "pytest-asyncio~=0.24",
    "pytest-check~=2.4",
    "python-semantic-release~=7.32",
    "ruff>=0.9.6",
 ]
+
 pypi = [
-  "torch>=2.6.0",
-  "torchvision>=0.21.0",
+  "torch>=2.7.1",
+  "torchvision>=0.22.1",
 ]
+
 cpu = [
-  "torch>=2.6.0",
-  "torchvision>=0.21.0",
-]
-cu124 = [
-  "torch>=2.6.0",
-  "torchvision>=0.21.0",
+  "torch>=2.7.1",
+  "torchvision>=0.22.1",
 ]
+
+# cu124 = [
+#   "torch>=2.6.0",
+#   "torchvision>=0.21.0",
+# ]
+
 cu126 = [
-  "torch>=2.6.0",
-  "torchvision>=0.21.0",
+  "torch>=2.7.1",
+  "torchvision>=0.22.1",
 ]
+
 cu128 = [
-  "torch>=2.7.0",
-  "torchvision>=0.22.0",
+  "torch>=2.7.1",
+  "torchvision>=0.22.1",
+]
+
+rocm = [
+  "torch>=2.7.1",
+  "torchvision>=0.22.1",
+  "pytorch-triton-rocm>=3.3.1 ; sys_platform == 'linux' and platform_machine == 'x86_64'",
 ]

 [tool.uv]
@@ -104,31 +117,41 @@ conflicts = [
  [
    { group = "pypi" },
    { group = "cpu" },
-    { group = "cu124" },
+    # { group = "cu124" },
    { group = "cu126" },
    { group = "cu128" },
+    { group = "rocm" },
  ],
 ]
 environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
 override-dependencies = [
-  "urllib3~=2.0"
+  "urllib3~=2.0",
+  "xgrammar>=0.1.24"
 ]

 [tool.uv.sources]
 torch = [
  { index = "pytorch-pypi", group = "pypi" },
  { index = "pytorch-cpu", group = "cpu" },
-  { index = "pytorch-cu124", group = "cu124" },
-  { index = "pytorch-cu126", group = "cu126" },
-  { index = "pytorch-cu128", group = "cu128" },
+  # { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
 ]
+
 torchvision = [
  { index = "pytorch-pypi", group = "pypi" },
  { index = "pytorch-cpu", group = "cpu" },
-  { index = "pytorch-cu124", group = "cu124" },
-  { index = "pytorch-cu126", group = "cu126" },
-  { index = "pytorch-cu128", group = "cu128" },
+  # { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
 ]
+
+pytorch-triton-rocm = [
+  { index = "pytorch-rocm", marker = "sys_platform == 'linux'" },
+]
+
 # docling-jobkit = { git = "https://github.com/docling-project/docling-jobkit/", rev = "main" }
 # docling-jobkit = { path = "../docling-jobkit", editable = true }

@@ -142,10 +165,10 @@ name = "pytorch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
 explicit = true

-[[tool.uv.index]]
-name = "pytorch-cu124"
-url = "https://download.pytorch.org/whl/cu124"
-explicit = true
+# [[tool.uv.index]]
+# name = "pytorch-cu124"
+# url = "https://download.pytorch.org/whl/cu124"
+# explicit = true

 [[tool.uv.index]]
 name = "pytorch-cu126"
@@ -157,6 +180,11 @@ name = "pytorch-cu128"
 url = "https://download.pytorch.org/whl/cu128"
 explicit = true

+[[tool.uv.index]]
+name = "pytorch-rocm"
+url = "https://download.pytorch.org/whl/rocm6.3"
+explicit = true
+
 [tool.setuptools.packages.find]
 include = ["docling_serve*"]
 namespaces = true
@@ -254,6 +282,7 @@ module = [
    "kfp.*",
    "kfp_server_api.*",
    "mlx_vlm.*",
+    "mlx.*",
    "scalar_fastapi.*",
 ]
 ignore_missing_imports = true
--- a/scripts/init.py
+++ b/scripts/init.py
--- a/scripts/update_doc_usage.py
+++ b/scripts/update_doc_usage.py
@@ -0,0 +1,199 @@
+import re
+from typing import Annotated, Any, Union, get_args, get_origin
+
+from pydantic import BaseModel
+
+from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
+
+DOCS_FILE = "docs/usage.md"
+
+VARIABLE_WORDS: list[str] = [
+    "picture_description_local",
+    "vlm_pipeline_model",
+    "vlm",
+    "vlm_pipeline_model_api",
+    "ocr_engines_enum",
+    "easyocr",
+    "dlparse_v4",
+    "fast",
+    "picture_description_api",
+    "vlm_pipeline_model_local",
+]
+
+
+def format_variable_names(text: str) -> str:
+    """Format specific words in description to be code-formatted."""
+    sorted_words = sorted(VARIABLE_WORDS, key=len, reverse=True)
+
+    escaped_words = [re.escape(word) for word in sorted_words]
+
+    for word in escaped_words:
+        pattern = rf"(?<!`)\b{word}\b(?!`)"
+        text = re.sub(pattern, f"`{word}`", text)
+
+    return text
+
+
+def format_allowed_values_description(description: str) -> str:
+    """Format description to code-format allowed values."""
+    # Regex pattern to find text after "Allowed values:"
+    match = re.search(r"Allowed values:(.+?)(?:\.|$)", description, re.DOTALL)
+
+    if match:
+        # Extract the allowed values
+        values_str = match.group(1).strip()
+
+        # Split values, handling both comma and 'and' separators
+        values = re.split(r"\s*(?:,\s*|\s+and\s+)", values_str)
+
+        # Remove any remaining punctuation and whitespace
+        values = [value.strip("., ") for value in values]
+
+        # Create code-formatted values
+        formatted_values = ", ".join(f"`{value}`" for value in values)
+
+        # Replace the original allowed values with formatted version
+        formatted_description = re.sub(
+            r"(Allowed values:)(.+?)(?:\.|$)",
+            f"\\1 {formatted_values}.",
+            description,
+            flags=re.DOTALL,
+        )
+
+        return formatted_description
+
+    return description
+
+
+def _format_type(type_hint: Any) -> str:
+    """Format type ccrrectly, like Annotation or Union."""
+    if get_origin(type_hint) is Annotated:
+        base_type = get_args(type_hint)[0]
+        return _format_type(base_type)
+
+    if hasattr(type_hint, "__origin__"):
+        origin = type_hint.__origin__
+        args = get_args(type_hint)
+
+        if origin is list:
+            return f"List[{_format_type(args[0])}]"
+        elif origin is dict:
+            return f"Dict[{_format_type(args[0])}, {_format_type(args[1])}]"
+        elif str(origin).__contains__("Union") or str(origin).__contains__("Optional"):
+            return " or ".join(_format_type(arg) for arg in args)
+        elif origin is None:
+            return "null"
+
+    if hasattr(type_hint, "__name__"):
+        return type_hint.__name__
+
+    return str(type_hint)
+
+
+def _unroll_types(tp) -> list[type]:
+    """
+    Unrolls typing.Union and typing.Optional types into a flat list of types.
+    """
+    origin = get_origin(tp)
+    if origin is Union:
+        # Recursively unroll each type inside the Union
+        types = []
+        for arg in get_args(tp):
+            types.extend(_unroll_types(arg))
+        # Remove duplicates while preserving order
+        return list(dict.fromkeys(types))
+    else:
+        # If it's not a Union, just return it as a single-element list
+        return [tp]
+
+
+def generate_model_doc(model: type[BaseModel]) -> str:
+    """Generate documentation for a Pydantic model."""
+
+    models_stack = [model]
+
+    doc = ""
+    while models_stack:
+        current_model = models_stack.pop()
+
+        doc += f"<h4>{current_model.__name__}</h4>\n"
+
+        doc += "\n| Field Name | Type | Description |\n"
+        doc += "|------------|------|-------------|\n"
+
+        base_models = []
+        if hasattr(current_model, "__mro__"):
+            base_models = current_model.__mro__
+        else:
+            base_models = [current_model]
+
+        for base_model in base_models:
+            # Check if this is a Pydantic model
+            if hasattr(base_model, "model_fields"):
+                # Iterate through fields of this model
+                for field_name, field in base_model.model_fields.items():
+                    # Extract description from Annotated field if possible
+                    description = field.description or "No description provided."
+                    description = format_allowed_values_description(description)
+                    description = format_variable_names(description)
+
+                    # Handle Annotated types
+                    original_type = field.annotation
+                    if get_origin(original_type) is Annotated:
+                        # Extract base type and additional metadata
+                        type_args = get_args(original_type)
+                        base_type = type_args[0]
+                    else:
+                        base_type = original_type
+
+                    field_type = _format_type(base_type)
+                    field_type = format_variable_names(field_type)
+
+                    doc += f"| `{field_name}` | {field_type} | {description} |\n"
+
+                    for field_type in _unroll_types(base_type):
+                        if issubclass(field_type, BaseModel):
+                            models_stack.append(field_type)
+
+                # stop iterating the base classes
+                break
+
+        doc += "\n"
+    return doc
+
+
+def update_documentation():
+    """Update the documentation file with model information."""
+    doc_request = generate_model_doc(ConvertDocumentsRequestOptions)
+
+    with open(DOCS_FILE) as f:
+        content = f.readlines()
+
+    # Prepare to update the content
+    new_content = []
+    in_cp_section = False
+
+    for line in content:
+        if line.startswith("<!-- begin: parameters-docs -->"):
+            in_cp_section = True
+            new_content.append(line)
+            new_content.append(doc_request)
+            continue
+
+        if in_cp_section and line.strip() == "<!-- end: parameters-docs -->":
+            in_cp_section = False
+
+        if not in_cp_section:
+            new_content.append(line)
+
+    # Only write to the file if new_content is different from content
+    if "".join(new_content) != "".join(content):
+        with open(DOCS_FILE, "w") as f:
+            f.writelines(new_content)
+        print(f"Documentation updated in {DOCS_FILE}")
+    else:
+        print("No changes detected. Documentation file remains unchanged.")
+
+
+if __name__ == "__main__":
+    update_documentation()
--- a/tests/test_1-file-all-outputs.py
+++ b/tests/test_1-file-all-outputs.py
@@ -6,10 +6,15 @@ import pytest
 import pytest_asyncio
 from pytest_check import check

+from docling_serve.settings import docling_serve_settings
+

@pytest_asyncio.fixture
 async def async_client():
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
        yield client


--- a/tests/test_1-file-async.py
+++ b/tests/test_1-file-async.py
@@ -6,10 +6,15 @@ import httpx
 import pytest
 import pytest_asyncio

+from docling_serve.settings import docling_serve_settings
+

@pytest_asyncio.fixture
 async def async_client():
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
        yield client


--- a/tests/test_1-url-all-outputs.py
+++ b/tests/test_1-url-all-outputs.py
@@ -5,10 +5,15 @@ import pytest
 import pytest_asyncio
 from pytest_check import check

+from docling_serve.settings import docling_serve_settings
+

@pytest_asyncio.fixture
 async def async_client():
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
        yield client


--- a/tests/test_1-url-async-ws.py
+++ b/tests/test_1-url-async-ws.py
@@ -6,16 +6,24 @@ import pytest
 import pytest_asyncio
 from websockets.sync.client import connect

+from docling_serve.settings import docling_serve_settings
+

@pytest_asyncio.fixture
 async def async_client():
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
        yield client


@pytest.mark.asyncio
 async def test_convert_url(async_client: httpx.AsyncClient):
    """Test convert URL to all outputs"""
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key

    doc_filename = Path("tests/2408.09869v5.pdf")
    encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()
@@ -57,7 +65,13 @@ async def test_convert_url(async_client: httpx.AsyncClient):

    task = response.json()

-    uri = f"ws://localhost:5001/v1/status/ws/{task['task_id']}"
+    uri = f"ws://localhost:5001/v1/status/ws/{task['task_id']}?api_key={docling_serve_settings.api_key}"
    with connect(uri) as websocket:
        for message in websocket:
            print(message)
+
+    result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
+    assert result_resp.status_code == 200, "Response should be 200 OK"
+    result = result_resp.json()
+    print(f"{result['processing_time']=}")
+    assert result["processing_time"] > 1.0
--- a/tests/test_1-url-async.py
+++ b/tests/test_1-url-async.py
@@ -6,10 +6,15 @@ import httpx
 import pytest
 import pytest_asyncio

+from docling_serve.settings import docling_serve_settings
+

@pytest_asyncio.fixture
 async def async_client():
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
        yield client


@@ -57,3 +62,60 @@ async def test_convert_url(async_client):
        time.sleep(2)

    assert task["task_status"] == "success"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("include_converted_doc", [False, True])
+async def test_chunk_url(async_client, include_converted_doc: bool):
+    """Test chunk URL"""
+
+    example_docs = [
+        "https://arxiv.org/pdf/2311.18481",
+    ]
+
+    base_url = "http://localhost:5001/v1"
+    payload = {
+        "sources": [{"kind": "http", "url": random.choice(example_docs)}],
+        "include_converted_doc": include_converted_doc,
+    }
+
+    response = await async_client.post(
+        f"{base_url}/chunk/hybrid/source/async", json=payload
+    )
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    task = response.json()
+
+    print(json.dumps(task, indent=2))
+
+    while task["task_status"] not in ("success", "failure"):
+        response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
+        assert response.status_code == 200, "Response should be 200 OK"
+        task = response.json()
+        print(f"{task['task_status']=}")
+        print(f"{task['task_position']=}")
+
+        time.sleep(2)
+
+    assert task["task_status"] == "success"
+
+    result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
+    assert result_resp.status_code == 200, "Response should be 200 OK"
+    result = result_resp.json()
+    print("Got result.")
+
+    assert "chunks" in result
+    assert len(result["chunks"]) > 0
+
+    assert "documents" in result
+    assert len(result["documents"]) > 0
+    assert result["documents"][0]["status"] == "success"
+
+    if include_converted_doc:
+        assert result["documents"][0]["content"]["json_content"] is not None
+        assert (
+            result["documents"][0]["content"]["json_content"]["schema_name"]
+            == "DoclingDocument"
+        )
+    else:
+        assert result["documents"][0]["content"]["json_content"] is None
--- a/tests/test_2-files-all-outputs.py
+++ b/tests/test_2-files-all-outputs.py
@@ -5,10 +5,15 @@ import pytest
 import pytest_asyncio
 from pytest_check import check

+from docling_serve.settings import docling_serve_settings
+

@pytest_asyncio.fixture
 async def async_client():
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
        yield client


--- a/tests/test_2-urls-all-outputs.py
+++ b/tests/test_2-urls-all-outputs.py
@@ -3,10 +3,15 @@ import pytest
 import pytest_asyncio
 from pytest_check import check

+from docling_serve.settings import docling_serve_settings
+

@pytest_asyncio.fixture
 async def async_client():
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
        yield client


--- a/tests/test_2-urls-async-all-outputs.py
+++ b/tests/test_2-urls-async-all-outputs.py
@@ -6,10 +6,15 @@ import pytest
 import pytest_asyncio
 from pytest_check import check

+from docling_serve.settings import docling_serve_settings
+

@pytest_asyncio.fixture
 async def async_client():
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
        yield client


--- a/tests/test_fastapi_endpoints.py
+++ b/tests/test_fastapi_endpoints.py
@@ -13,6 +13,7 @@ from pytest_check import check
 from docling_core.types.doc import DoclingDocument, PictureItem

 from docling_serve.app import create_app
+from docling_serve.settings import docling_serve_settings


@pytest.fixture(scope="session")
@@ -20,6 +21,14 @@ def event_loop():
    return asyncio.get_event_loop()


+@pytest.fixture(scope="session")
+def auth_headers():
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    return headers
+
+
@pytest_asyncio.fixture(scope="session")
 async def app():
    app = create_app()
@@ -46,7 +55,15 @@ async def test_health(client: AsyncClient):


@pytest.mark.asyncio
-async def test_convert_file(client: AsyncClient):
+async def test_openapijson(client: AsyncClient):
+    response = await client.get("/openapi.json")
+    assert response.status_code == 200
+    schema = response.json()
+    assert "openapi" in schema
+
+
+@pytest.mark.asyncio
+async def test_convert_file(client: AsyncClient, auth_headers: dict):
    """Test convert single file to all outputs"""

    endpoint = "/v1/convert/file"
@@ -79,7 +96,9 @@ async def test_convert_file(client: AsyncClient):
        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
    }

-    response = await client.post(endpoint, files=files, data=options)
+    response = await client.post(
+        endpoint, files=files, data=options, headers=auth_headers
+    )
    assert response.status_code == 200, "Response should be 200 OK"

    data = response.json()
@@ -160,7 +179,7 @@ async def test_convert_file(client: AsyncClient):


@pytest.mark.asyncio
-async def test_referenced_artifacts(client: AsyncClient):
+async def test_referenced_artifacts(client: AsyncClient, auth_headers: dict):
    """Test that paths in the zip file are relative to the zip file root."""

    endpoint = "/v1/convert/file"
@@ -178,7 +197,9 @@ async def test_referenced_artifacts(client: AsyncClient):
        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
    }

-    response = await client.post(endpoint, files=files, data=options)
+    response = await client.post(
+        endpoint, files=files, data=options, headers=auth_headers
+    )
    assert response.status_code == 200, "Response should be 200 OK"

    with zipfile.ZipFile(io.BytesIO(response.content)) as zip_file:
--- a/tests/test_file_opts.py
+++ b/tests/test_file_opts.py
@@ -11,6 +11,7 @@ from docling_core.types import DoclingDocument
 from docling_core.types.doc.document import PictureDescriptionData

 from docling_serve.app import create_app
+from docling_serve.settings import docling_serve_settings


@pytest.fixture(scope="session")
@@ -18,6 +19,14 @@ def event_loop():
    return asyncio.get_event_loop()


+@pytest.fixture(scope="session")
+def auth_headers():
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    return headers
+
+
@pytest_asyncio.fixture(scope="session")
 async def app():
    app = create_app()
@@ -37,7 +46,7 @@ async def client(app):


@pytest.mark.asyncio
-async def test_convert_file(client: AsyncClient):
+async def test_convert_file(client: AsyncClient, auth_headers: dict):
    """Test convert single file to all outputs"""

    endpoint = "/v1/convert/file"
@@ -63,7 +72,9 @@ async def test_convert_file(client: AsyncClient):
        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
    }

-    response = await client.post(endpoint, files=files, data=options)
+    response = await client.post(
+        endpoint, files=files, data=options, headers=auth_headers
+    )
    assert response.status_code == 200, "Response should be 200 OK"

    data = response.json()
--- a/tests/test_results_clear.py
+++ b/tests/test_results_clear.py
@@ -17,6 +17,14 @@ def event_loop():
    return asyncio.get_event_loop()


+@pytest.fixture(scope="session")
+def auth_headers():
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    return headers
+
+
@pytest_asyncio.fixture(scope="session")
 async def app():
    app = create_app()
@@ -35,7 +43,7 @@ async def client(app):
        yield client


-async def convert_file(client: AsyncClient):
+async def convert_file(client: AsyncClient, auth_headers: dict):
    doc_filename = Path("tests/2408.09869v5.pdf")
    encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()

@@ -52,7 +60,9 @@ async def convert_file(client: AsyncClient):
        ],
    }

-    response = await client.post("/v1/convert/source/async", json=payload)
+    response = await client.post(
+        "/v1/convert/source/async", json=payload, headers=auth_headers
+    )
    assert response.status_code == 200, "Response should be 200 OK"

    task = response.json()
@@ -60,7 +70,9 @@ async def convert_file(client: AsyncClient):
    print(json.dumps(task, indent=2))

    while task["task_status"] not in ("success", "failure"):
-        response = await client.get(f"/v1/status/poll/{task['task_id']}")
+        response = await client.get(
+            f"/v1/status/poll/{task['task_id']}", headers=auth_headers
+        )
        assert response.status_code == 200, "Response should be 200 OK"
        task = response.json()
        print(f"{task['task_status']=}")
@@ -74,52 +86,62 @@ async def convert_file(client: AsyncClient):


@pytest.mark.asyncio
-async def test_clear_results(client: AsyncClient):
+async def test_clear_results(client: AsyncClient, auth_headers: dict):
    """Test removal of task."""

    # Set long delay deletion
    docling_serve_settings.result_removal_delay = 100

    # Convert and wait for completion
-    task = await convert_file(client)
+    task = await convert_file(client, auth_headers=auth_headers)

    # Get result once
-    result_response = await client.get(f"/v1/result/{task['task_id']}")
+    result_response = await client.get(
+        f"/v1/result/{task['task_id']}", headers=auth_headers
+    )
    assert result_response.status_code == 200, "Response should be 200 OK"
    print("Result 1 ok.")
    result = result_response.json()
    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"

    # Get result twice
-    result_response = await client.get(f"/v1/result/{task['task_id']}")
+    result_response = await client.get(
+        f"/v1/result/{task['task_id']}", headers=auth_headers
+    )
    assert result_response.status_code == 200, "Response should be 200 OK"
    print("Result 2 ok.")
    result = result_response.json()
    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"

    # Clear
-    clear_response = await client.get("/v1/clear/results?older_then=0")
+    clear_response = await client.get(
+        "/v1/clear/results?older_then=0", headers=auth_headers
+    )
    assert clear_response.status_code == 200, "Response should be 200 OK"
    print("Clear ok.")

    # Get deleted result
-    result_response = await client.get(f"/v1/result/{task['task_id']}")
+    result_response = await client.get(
+        f"/v1/result/{task['task_id']}", headers=auth_headers
+    )
    assert result_response.status_code == 404, "Response should be removed"
    print("Result was no longer found.")


@pytest.mark.asyncio
-async def test_delay_remove(client: AsyncClient):
+async def test_delay_remove(client: AsyncClient, auth_headers: dict):
    """Test automatic removal of task with delay."""

    # Set short delay deletion
    docling_serve_settings.result_removal_delay = 5

    # Convert and wait for completion
-    task = await convert_file(client)
+    task = await convert_file(client, auth_headers=auth_headers)

    # Get result once
-    result_response = await client.get(f"/v1/result/{task['task_id']}")
+    result_response = await client.get(
+        f"/v1/result/{task['task_id']}", headers=auth_headers
+    )
    assert result_response.status_code == 200, "Response should be 200 OK"
    print("Result ok.")
    result = result_response.json()
@@ -129,5 +151,7 @@ async def test_delay_remove(client: AsyncClient):
    await asyncio.sleep(10)

    # Get deleted result
-    result_response = await client.get(f"/v1/result/{task['task_id']}")
+    result_response = await client.get(
+        f"/v1/result/{task['task_id']}", headers=auth_headers
+    )
    assert result_response.status_code == 404, "Response should be removed"
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
DKL	025c4c8942	Cleanup. Text formatting. Fallback picture annotation.	2025-11-24 15:17:39 +01:00
DKL	8d5892b176	Revamp UI to SSR. Signed-off-by: DKL <dkl@zurich.ibm.com>	2025-11-21 16:15:36 +01:00
Michele Dolfi	e437e830c9	fix: Dependencies updates – Docling 2.63.0 (#443 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-11-21 10:31:56 +01:00
Michele Dolfi	2c23f65507	feat: version endpoint (#442 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-11-20 17:57:10 +01:00
Burt Holzman	5dc942f25b	chore: docs typo (cude -> cuda) (#437 ) Signed-off-by: Burt Holzman <burt@fnal.gov>	2025-11-17 08:31:44 +01:00
github-actions[bot]	ff310f2b13	chore: bump version to 1.8.0 [skip ci]	2025-10-31 17:01:56 +00:00
Michele Dolfi	bf132a3c3e	feat: Docling with new standard pipeline with threading (#428 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-31 17:57:38 +01:00
Michele Dolfi	35319b0da7	docs: Expand automatic docs to nested objects. More complete usage docs. (#426 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-31 15:02:20 +01:00
Michele Dolfi	f3957aeb57	docs: add docs for docling parameters like performance and debug (#424 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-31 14:17:31 +01:00
github-actions[bot]	1ec44220f5	chore: bump version to 1.7.2 [skip ci]	2025-10-30 15:14:17 +00:00
Michele Dolfi	e9b41406c4	fix: Update locked dependencies. Docling fixes, Expose temperature parameter for vlm models (#423 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-30 16:09:21 +01:00
Michele Dolfi	a2e68d39ae	test: check that processing time is not skipped (#416 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-27 08:29:05 +01:00
Michele Dolfi	7bf2e7b366	fix: temporary constrain fastapi version (#418 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-24 11:22:05 +02:00
github-actions[bot]	462ceff9d1	chore: bump version to 1.7.1 [skip ci]	2025-10-22 14:01:58 +00:00
Michele Dolfi	97613a1974	fix: Upgrade dependencies (#417 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-22 15:42:59 +02:00
Paweł Rein	0961f2c574	fix: makes task status shared across multiple instances in RQ mode, resolves #378 (#415 ) Signed-off-by: Pawel Rein <pawel.rein@prezi.com>	2025-10-21 15:08:42 +02:00
Tiago Santana	9672f310b1	docs: Generate usage.md automatically (#340 ) Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-21 14:27:01 +02:00
Michele Dolfi	56e8535a7a	chore: publish release notes on Discord (#409 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-20 14:15:58 +02:00
Michele Dolfi	0f274ab135	fix: `DOCLING_SERVE_SYNC_POLL_INTERVAL` controls the synchronous polling time (#413 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-20 14:14:00 +02:00
Michele Dolfi	0427f71ef4	chore: allow to change the container runtime (#412 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-20 14:13:51 +02:00
github-actions[bot]	b6eece7ef0	chore: bump version to 1.7.0 [skip ci]	2025-10-17 12:16:37 +00:00
Michele Dolfi	f5af71e8f6	feat(UI): add auto and orcmac options in demo UI (#408 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-17 12:23:57 +02:00
Michele Dolfi	d95ea94087	feat: Docling with auto-ocr (#403 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-15 21:15:29 +02:00
sahlex	5344505718	fix: run docling ui behind a reverse proxy using a context path (#396 ) Signed-off-by: Sahler.Alexander <Alexander.Sahler@m-net.de> Signed-off-by: sahlex <1122279+sahlex@users.noreply.github.com> Co-authored-by: Sahler.Alexander <Alexander.Sahler@m-net.de>	2025-10-09 16:07:02 +02:00
github-actions[bot]	5edc624fbf	chore: bump version to 1.6.0 [skip ci]	2025-10-03 13:39:59 +00:00
Michele Dolfi	45f0f3c8f9	fix: update locked dependencies (#392 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-03 15:33:45 +02:00
Michele Dolfi	0595d31d5b	feat: pin new version of jobkit with granite-docling and connectors (#391 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-03 14:24:51 +02:00
Michele Dolfi	f6b5f0e063	docs: fix docs for websocket breaking condition (#390 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-02 10:55:00 +02:00
Michele Dolfi	8b22a39141	fix(UI): allow both lowercase and uppercase extensions (#386 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-29 09:40:49 +02:00
erikmargaronis	d4eac053f9	fix: Correctly raise HTTPException for Gateway Timeout (#382 ) Signed-off-by: Erik Margaronis <erik.margaronis@gmail.com>	2025-09-29 08:06:21 +02:00
Rui Dias Gomes	fa1c5f04f3	ci: improve caching steps (#371 ) Signed-off-by: rmdg88 <rmdg88@gmail.com>	2025-09-23 18:15:12 +02:00
Viktor Kuropiatnyk	ba61af2359	fix: Pinning of higher version of dependencies to fix potential security issues (#363 ) Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>	2025-09-18 08:57:41 +02:00
github-actions[bot]	6b6dd8a0d0	chore: bump version to 1.5.1 [skip ci]	2025-09-17 13:45:40 +00:00
Michele Dolfi	513ae0c119	fix: remove old dependencies, fixes in docling-parse and more minor dependencies upgrade (#362 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-17 15:36:23 +02:00
Rui Dias Gomes	bde040661f	fix: updates rapidocr deps (#361 ) Signed-off-by: rmdg88 <rmdg88@gmail.com>	2025-09-16 14:00:21 +02:00
github-actions[bot]	496f7ec26b	chore: bump version to 1.5.0 [skip ci]	2025-09-09 08:46:36 +00:00
Michele Dolfi	9d6def0ec8	feat: add chunking endpoints (#353 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-09 08:38:54 +02:00
github-actions[bot]	a4fed2d965	chore: bump version to 1.4.1 [skip ci]	2025-09-08 10:28:12 +00:00
Michele Dolfi	b0360d723b	fix: trigger fix after ci fixes (#355 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-08 12:23:07 +02:00
Michele Dolfi	4adc0dfa79	ci: fix use simple tag for testing (#354 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-08 11:29:55 +02:00
github-actions[bot]	40c7f1bcd3	chore: bump version to 1.4.0 [skip ci]	2025-09-05 17:57:08 +00:00
Michele Dolfi	d64a2a974a	feat(docling): perfomance improvements in parsing, new layout model, fixes in html processing (#352 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-05 16:21:29 +02:00
Tiago Santana	0d4545a65a	docs: add split processing example (#303 ) Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-04 10:42:11 +02:00
Rui Dias Gomes	fe98338239	ci: fix runner disk space issue (#350 ) Signed-off-by: Rui Dias Gomes <66125272+rmdg88@users.noreply.github.com>	2025-09-04 09:17:19 +02:00
Michele Dolfi	b844ce737e	ci: remove mdlint (#348 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-03 15:42:55 +02:00
Antonio Pisano	27fdd7b85a	docs: document DOCLING_NUM_THREADS environment variable (#341 ) Signed-off-by: Antonio Pisano <antonio.pisano@wu.ac.at> Co-authored-by: Antonio Pisano <antonio.pisano@wu.ac.at>	2025-09-03 11:00:28 +02:00
Rui Dias Gomes	1df62adf01	ci: workflow improvements (#310 ) Signed-off-by: rmdg88 <rmdg88@gmail.com> Signed-off-by: Rui Dias Gomes <66125272+rmdg88@users.noreply.github.com>	2025-09-03 10:06:30 +02:00
Michele Dolfi	e5449472b2	fix: upgrade to latest docling version with fixes (#335 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-25 10:55:43 +02:00
Michele Dolfi	81f0a8ddf8	docs: fix parameters typo (#333 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-22 14:59:12 +02:00
Michele Dolfi	a69cc867f5	docs: Describe how to use Docling MCP (#332 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-22 14:56:08 +02:00
github-actions[bot]	624f65d41b	chore: bump version to 1.3.1 [skip ci]	2025-08-21 07:01:51 +00:00
Michele Dolfi	f02dbc0144	fix: configuration and performance fixes via upgrade of packages (#328 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-20 20:40:52 +02:00
Michele Dolfi	37fe02277b	docs: fix parameter in api key docs (#323 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-15 11:00:05 +02:00
github-actions[bot]	783ada0580	chore: bump version to 1.3.0 [skip ci]	2025-08-14 14:26:57 +00:00
VIktor Kuropiantnyk	71edf41849	docs: example of docling-serve deployment in the RQ engine mode (#321 ) Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-14 16:10:39 +02:00
Michele Dolfi	9a64410552	feat: Add configuration option for apikey security (#322 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-14 15:25:53 +02:00
Michele Dolfi	6e9aa8c759	docs: handling models in docling-serve (#319 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-14 09:12:04 +02:00
Michele Dolfi	885f319d3a	feat: Add RQ engine (#315 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-14 08:48:31 +02:00
Tiago Santana	d584895e11	docs: add Gradio cache usage (#312 ) Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com>	2025-08-13 16:49:54 +02:00
github-actions[bot]	d26e6637d8	chore: bump version to 1.2.2 [skip ci]	2025-08-13 14:48:17 +00:00
VIktor Kuropiantnyk	7692eb2600	fix: update of transformers module to 4.55.1 (#316 ) Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>	2025-08-13 16:07:52 +02:00
github-actions[bot]	3bd7828570	chore: bump version to 1.2.1 [skip ci]	2025-08-13 07:37:55 +00:00
Michele Dolfi	8b470cba8e	fix: handling of vlm model options and update deps (#314 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-13 09:32:21 +02:00
Tiago Santana	8048f4589a	fix: add missing response type in sync endpoints (#309 ) Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com>	2025-08-08 12:32:19 +02:00
Thomas Vitale	b3058e91e0	docs: Update readme to use v1 (#306 ) Signed-off-by: Thomas Vitale <ThomasVitale@users.noreply.github.com>	2025-08-08 09:02:29 +02:00
Thomas Vitale	63da9eedeb	docs: Update deployment examples to use v1 API (#308 ) Signed-off-by: Thomas Vitale <ThomasVitale@users.noreply.github.com>	2025-08-08 08:47:59 +02:00
Thomas Vitale	b15dc2529f	docs: Fix typo in v1 migration instructions (#307 ) Signed-off-by: Thomas Vitale <ThomasVitale@users.noreply.github.com>	2025-08-08 08:44:09 +02:00
github-actions[bot]	4c7207be00	chore: bump version to 1.2.0 [skip ci]	2025-08-07 09:20:10 +00:00
Michele Dolfi	db3fdb5bc1	feat: workers without shared models and convert params (#304 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-07 11:16:06 +02:00
Rui Dias Gomes	fd1b987e8d	feat: add rocm image build support and fix cuda (#292 ) Signed-off-by: rmdg88 <rmdg88@gmail.com> Signed-off-by: Rui-Dias-Gomes <rui.dias.gomes@ibm.com> Co-authored-by: Rui-Dias-Gomes <rui.dias.gomes@ibm.com>	2025-07-31 14:22:42 +02:00