chore: bump version to 1.2.1 [skip ci]

fix: handling of vlm model options and update deps (#314 )
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-29 08:33:50 +00:00 · 2025-08-13 07:37:55 +00:00 · 2025-08-13 09:32:21 +02:00 · 2025-08-08 12:32:19 +02:00 · 2025-08-08 09:02:29 +02:00 · 2025-08-08 08:47:59 +02:00
81 changed files with 14277 additions and 5209 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,40 @@
 # Ignore Python cache files
 __pycache__/
 **/__pycache__/
 *.pyc
 *.pyo
 *.pyd
 # Ignore virtual environments
 env/
 venv/
 # Ignore development artifacts
 *.log
 *.db
 *.sqlite3
 # Ignore configuration and sensitive files
 **/.env
 *.env
 *.ini
 *.cfg
 # Ignore IDE and editor settings
 .vscode/
 .idea/
 *.swp
 *.swo
 # Ignore Git files
 .git/
 .gitignore
 # Ignore Docker files themselves (optional if not needed in the image)
 .dockerignore
 Dockerfile*
 # Ignore build artifacts (if applicable)
 build/
 dist/
 *.egg-info
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,3 @@
 TESSDATA_PREFIX=/usr/share/tesseract/tessdata/
 UVICORN_WORKERS=2
 UVICORN_RELOAD=True
--- a/.flake8
+++ b/.flake8
@@ -1,7 +0,0 @@
 [flake8]
 max-line-length = 88
 exclude = test/*
 max-complexity = 18
 docstring-convention = google
 ignore = W503,E203
 classmethod-decorators = classmethod,validator
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,12 @@
 <!-- Thank you for contributing to Docling! -->
 <!-- STEPS TO FOLLOW:
  1. Add a description of the changes (frequently the same as the commit description)
  2. Enter the issue number next to "Resolves #" below (if there is no tracking issue resolved, **remove that section**)
  3. Make sure the PR title follows the **Commit Message Formatting**: https://www.conventionalcommits.org/en/v1.0.0/#summary.
 -->
 <!-- Uncomment this section with the issue number if an issue is being resolved
 **Issue resolved by this Pull Request:**
 Resolves #
 --->
--- a/.github/SECURITY.md
+++ b/.github/SECURITY.md
@@ -0,0 +1,23 @@
 # Security and Disclosure Information Policy for the Docling Project
 The Docling team and community take security bugs seriously. We appreciate your efforts to responsibly disclose your findings, and will make every effort to acknowledge your contributions.
 ## Reporting a Vulnerability
 If you think you've identified a security issue in an Docling project repository, please DO NOT report the issue publicly via the GitHub issue tracker, etc.
 Instead, send an email with as many details as possible to [deepsearch-core@zurich.ibm.com](mailto:deepsearch-core@zurich.ibm.com). This is a private mailing list for the maintainers team.
 Please do not create a public issue.
 ## Security Vulnerability Response
 Each report is acknowledged and analyzed by the core maintainers within 3 working days.
 Any vulnerability information shared with core maintainers stays within the Docling project and will not be disseminated to other projects unless it is necessary to get the issue fixed.
 After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
 ## Security Alerts
 We will send announcements of security vulnerabilities and steps to remediate on the [Docling announcements](https://github.com/docling-project/docling/discussions/categories/announcements).
--- a/.github/actions/setup-poetry/action.yml
+++ b/.github/actions/setup-poetry/action.yml
@@ -1,19 +0,0 @@
 name: 'Set up Poetry and install'
 description: 'Set up a specific version of Poetry and install dependencies using caching.'
 inputs:
  python-version:
    description: "Version range or exact version of Python or PyPy to use, using SemVer's version range syntax."
    default: '3.11'
 runs:
  using: 'composite'
  steps:
    - name: Install poetry
      run: pipx install poetry==1.8.3
      shell: bash
    - uses: actions/setup-python@v4
      with:
        python-version: ${{ inputs.python-version }}
        cache: 'poetry'
    - name: Install dependencies
      run: poetry install --all-extras
      shell: bash
--- a/.github/dco.yml
+++ b/.github/dco.yml
@@ -0,0 +1,2 @@
 allowRemediationCommits:
  individual: true
--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@@ -0,0 +1,9 @@
 merge_protections:
  - name: Enforce conventional commit
    description: Make sure that we follow https://www.conventionalcommits.org/en/v1.0.0/
    if:
      - base = main
    success_conditions:
      - "title ~=
        ^(fix|feat|docs|style|refactor|perf|test|build|ci|chore|revert)(?:\\(.+\
        \\))?(!)?:"
--- a/.github/scripts/release.sh
+++ b/.github/scripts/release.sh
@@ -0,0 +1,40 @@
 #!/bin/bash
 set -e  # trigger failure on error - do not remove!
 set -x  # display command on output
 if [ -z "${TARGET_VERSION}" ]; then
    >&2 echo "No TARGET_VERSION specified"
    exit 1
 fi
 CHGLOG_FILE="${CHGLOG_FILE:-CHANGELOG.md}"
 # update package version
 uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version "${TARGET_VERSION}"
 uv lock --upgrade-package docling-serve
 # collect release notes
 REL_NOTES=$(mktemp)
 uv run --no-sync semantic-release changelog --unreleased >> "${REL_NOTES}"
 # update changelog
 TMP_CHGLOG=$(mktemp)
 TARGET_TAG_NAME="v${TARGET_VERSION}"
 RELEASE_URL="$(gh repo view --json url -q ".url")/releases/tag/${TARGET_TAG_NAME}"
 printf "## [${TARGET_TAG_NAME}](${RELEASE_URL}) - $(date -Idate)\n\n" >> "${TMP_CHGLOG}"
 cat "${REL_NOTES}" >> "${TMP_CHGLOG}"
 if [ -f "${CHGLOG_FILE}" ]; then
    printf "\n" | cat - "${CHGLOG_FILE}" >> "${TMP_CHGLOG}"
 fi
 mv "${TMP_CHGLOG}" "${CHGLOG_FILE}"
 # push changes
 git config --global user.name 'github-actions[bot]'
 git config --global user.email 'github-actions[bot]@users.noreply.github.com'
 git add pyproject.toml uv.lock "${CHGLOG_FILE}"
 COMMIT_MSG="chore: bump version to ${TARGET_VERSION} [skip ci]"
 git commit -m "${COMMIT_MSG}"
 git push origin main
 # create GitHub release (incl. Git tag)
 gh release create "${TARGET_TAG_NAME}" -F "${REL_NOTES}"
--- a/.github/styles/config/vocabularies/Docling/accept.txt
+++ b/.github/styles/config/vocabularies/Docling/accept.txt
@@ -0,0 +1,36 @@
 [Dd]ocling
 precommit
 asgi
 async
 (?i)urls
 uvicorn
 [Ww]ebserver
 keyfile
 [Ww]ebsocket(s?)
 [Kk]ubernetes
 UI
 (?i)vllm
 APIs
 [Ss]ubprocesses
 (?i)api
 Kubeflow
 (?i)Jobkit
 (?i)cpu
 (?i)PyTorch
 (?i)CUDA
 (?i)NVIDIA
 (?i)ROCm
 (?i)env
 Gradio
 bool
 Ollama
 inbody
 LGTMs
 Dolfi
 Lysak
 Nikos
 Nassar
 Panos
 Vagenas
 Staar
 Livathinos
--- a/.github/vale.ini
+++ b/.github/vale.ini
@@ -0,0 +1,11 @@
 StylesPath = styles
 MinAlertLevel = suggestion
 ; Packages = write-good, proselint
 Vocab = Docling
 [*.md]
 BasedOnStyles = Vale
 [CHANGELOG.md]
 BasedOnStyles = 
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@@ -0,0 +1,59 @@
 name: "Run CD"
 on:
  workflow_dispatch:
 jobs:
  code-checks:
    uses: ./.github/workflows/job-checks.yml
  pre-release-check:
    runs-on: ubuntu-latest
    outputs:
      TARGET_TAG_V: ${{ steps.version_check.outputs.TRGT_VERSION }}
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0  # for fetching tags, required for semantic-release
      - name: Install uv and set the python version
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
      - name: Install dependencies
        run: uv sync --only-dev
      - name: Check version of potential release
        id: version_check
        run: |
          TRGT_VERSION=$(uv run --no-sync semantic-release print-version)
          echo "TRGT_VERSION=${TRGT_VERSION}" >> "$GITHUB_OUTPUT"
          echo "${TRGT_VERSION}"
      - name: Check notes of potential release
        run: uv run --no-sync semantic-release changelog --unreleased
  release:
    needs: [code-checks, pre-release-check]
    if: needs.pre-release-check.outputs.TARGET_TAG_V != ''
    environment: auto-release
    runs-on: ubuntu-latest
    concurrency: release
    steps:
      - uses: actions/create-github-app-token@v1
        id: app-token
        with:
          app-id: ${{ vars.CI_APP_ID }}
          private-key: ${{ secrets.CI_PRIVATE_KEY }}
      - uses: actions/checkout@v4
        with:
          token: ${{ steps.app-token.outputs.token }}
          fetch-depth: 0  # for fetching tags, required for semantic-release
      - name: Install uv and set the python version
        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
      - name: Install dependencies
        run: uv sync --only-dev
      - name: Run release script
        env:
          GH_TOKEN: ${{ steps.app-token.outputs.token }}
          TARGET_VERSION: ${{ needs.pre-release-check.outputs.TARGET_TAG_V }}
          CHGLOG_FILE: CHANGELOG.md
        run: ./.github/scripts/release.sh
        shell: bash
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -1,34 +0,0 @@
 name: Run linter checks
 on:
  push:
    branches: ["main"]
  pull_request:
    branches: ["main"]
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
 jobs:
  py-lint:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ['3.11']
    steps:
      - uses: actions/checkout@v4
      - uses: ./.github/actions/setup-poetry
        with:
          python-version: ${{ matrix.python-version }}
      - name: Run styling check
        run: poetry run pre-commit run --all-files
  markdown-lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: markdownlint-cli2-action
        uses: DavidAnson/markdownlint-cli2-action@v16
        with:
          globs: "**/*.md"
--- a/.github/workflows/ci-images-dryrun.yml
+++ b/.github/workflows/ci-images-dryrun.yml
@@ -0,0 +1,53 @@
 name: Dry run docling-serve image building
 on:
  workflow_call:
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true
 jobs:
  build_image:
    name: Build ${{ matrix.spec.name }} container image
    strategy:
      matrix:
        spec:
          - name: docling-project/docling-serve
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
            platforms: linux/amd64, linux/arm64
          - name: docling-project/docling-serve-cpu
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
            platforms: linux/amd64, linux/arm64
          # - name: docling-project/docling-serve-cu124
          #   build_args: |
          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
          #   platforms: linux/amd64
          - name: docling-project/docling-serve-cu126
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
            platforms: linux/amd64
          - name: docling-project/docling-serve-cu128
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
            platforms: linux/amd64
          # - name: docling-project/docling-serve-rocm
          #   build_args: |
          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
          #   platforms: linux/amd64
    permissions:
      packages: write
      contents: read
      attestations: write
      id-token: write
    uses: ./.github/workflows/job-image.yml
    with:
      publish: false
      build_args: ${{ matrix.spec.build_args }}
      ghcr_image_name: ${{ matrix.spec.name }}
      quay_image_name: ""
      platforms: ${{ matrix.spec.platforms }}
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,25 @@
 name: "Run CI"
 on:
  push:
    branches: ["main"]
  pull_request:
    branches: ["main"]
 jobs:
  code-checks:
    # if: ${{ github.event_name == 'push' || (github.event.pull_request.head.repo.full_name != 'docling-project/docling-serve' && github.event.pull_request.head.repo.full_name != 'docling-project/docling-serve') }}
    uses: ./.github/workflows/job-checks.yml
    permissions:
      packages: write
      contents: read
      attestations: write
      id-token: write
  build-images:
    uses: ./.github/workflows/ci-images-dryrun.yml
    permissions:
      packages: write
      contents: read
      attestations: write
      id-token: write
--- a/.github/workflows/dco-advisor.yml
+++ b/.github/workflows/dco-advisor.yml
@@ -0,0 +1,192 @@
 name: DCO Advisor Bot
 on:
  pull_request_target:
    types: [opened, reopened, synchronize]
 permissions:
  pull-requests: write
  issues: write
 jobs:
  dco_advisor:
    runs-on: ubuntu-latest
    steps:
      - name: Handle DCO check result
        uses: actions/github-script@v7
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            const pr = context.payload.pull_request || context.payload.check_run?.pull_requests?.[0];
            if (!pr) return;
            const prNumber = pr.number;
            const baseRef = pr.base.ref;
            const headSha =
              context.payload.check_run?.head_sha ||
              pr.head?.sha;
            const username = pr.user.login;
            console.log("HEAD SHA:", headSha);
            const sleep = ms => new Promise(resolve => setTimeout(resolve, ms));
            // Poll until DCO check has a conclusion (max 6 attempts, 30s)
            let dcoCheck = null;
            for (let attempt = 0; attempt < 6; attempt++) {
              const { data: checks } = await github.rest.checks.listForRef({
                owner: context.repo.owner,
                repo: context.repo.repo,
                ref: headSha
              });
              console.log("All check runs:");
                checks.check_runs.forEach(run => {
                console.log(`- ${run.name} (${run.status}/${run.conclusion}) @ ${run.head_sha}`);
              });
              dcoCheck = checks.check_runs.find(run =>
                run.name.toLowerCase().includes("dco") &&
              !run.name.toLowerCase().includes("dco_advisor") &&
                run.head_sha === headSha
              );
              if (dcoCheck?.conclusion) break;
              console.log(`Waiting for DCO check... (${attempt + 1})`);
              await sleep(5000); // wait 5 seconds
            }
            if (!dcoCheck || !dcoCheck.conclusion) {
              console.log("DCO check did not complete in time.");
              return;
            }
            const isFailure = ["failure", "action_required"].includes(dcoCheck.conclusion);
            console.log(`DCO check conclusion for ${headSha}: ${dcoCheck.conclusion} (treated as ${isFailure ? "failure" : "success"})`);
            // Parse DCO output for commit SHAs and author
            let badCommits = [];
            let authorName = "";
            let authorEmail = "";
            let moreInfo = `More info: [DCO check report](${dcoCheck?.html_url})`;
            if (isFailure) {
                const { data: commits } = await github.rest.pulls.listCommits({
                    owner: context.repo.owner,
                    repo: context.repo.repo,
                    pull_number: prNumber,
                });
                for (const commit of commits) {
                    const commitMessage = commit.commit.message;
                    const signoffMatch = commitMessage.match(/^Signed-off-by:\s+.+<.+>$/m);
                    if (!signoffMatch) {
                        console.log(`Bad commit found ${commit.sha}`)
                        badCommits.push({
                        sha: commit.sha,
                        authorName: commit.commit.author.name,
                        authorEmail: commit.commit.author.email,
                        });
                    }
                }            
            }
            // If multiple authors are present, you could adapt the message accordingly
            // For now, we'll just use the first one
            if (badCommits.length > 0) {
            authorName = badCommits[0].authorName;
            authorEmail = badCommits[0].authorEmail;
            }
            // Generate remediation commit message if needed
            let remediationSnippet = "";
            if (badCommits.length && authorEmail) {
              remediationSnippet = `git commit --allow-empty -s -m "DCO Remediation Commit for ${authorName} <${authorEmail}>\n\n` +
                badCommits.map(c => `I, ${c.authorName} <${c.authorEmail}>, hereby add my Signed-off-by to this commit: ${c.sha}`).join('\n') +
                `"`;
            } else {
              remediationSnippet = "# Unable to auto-generate remediation message. Please check the DCO check details.";
            }
            // Build comment
            const commentHeader = '<!-- dco-advice-bot -->';
            let body = "";
            if (isFailure) {
              body = [
                commentHeader,
                '❌ **DCO Check Failed**',
                '',
                `Hi @${username}, your pull request has failed the Developer Certificate of Origin (DCO) check.`,
                '',
                'This repository supports **remediation commits**, so you can fix this without rewriting history — but you must follow the required message format.',
                '',
                '---',
                '',
                '### 🛠 Quick Fix: Add a remediation commit',
                'Run this command:',
                '',
                '```bash',
                remediationSnippet,
                'git push',
                '```',
                '',
                '---',
                '',
                '<details>',
                '<summary>🔧 Advanced: Sign off each commit directly</summary>',
                '',
                '**For the latest commit:**',
                '```bash',
                'git commit --amend --signoff',
                'git push --force-with-lease',
                '```',
                '',
                '**For multiple commits:**',
                '```bash',
                `git rebase --signoff origin/${baseRef}`,
                'git push --force-with-lease',
                '```',
                '',
                '</details>',
                '',
                moreInfo
              ].join('\n');
            } else {
              body = [
                commentHeader,
                '✅ **DCO Check Passed**',
                '',
                `Thanks @${username}, all your commits are properly signed off. 🎉`
              ].join('\n');
            }
            // Get existing comments on the PR
            const { data: comments } = await github.rest.issues.listComments({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: prNumber
            });
            // Look for a previous bot comment
            const existingComment = comments.find(c =>
              c.body.includes("<!-- dco-advice-bot -->")
            );
            if (existingComment) {
              await github.rest.issues.updateComment({
                owner: context.repo.owner,
                repo: context.repo.repo,
                comment_id: existingComment.id,
                body: body
              });
            } else {
              await github.rest.issues.createComment({
                owner: context.repo.owner,
                repo: context.repo.repo,
                issue_number: prNumber,
                body: body
              });
            }
--- a/.github/workflows/images-dryrun.yml
+++ b/.github/workflows/images-dryrun.yml
@@ -1,105 +0,0 @@
 name: Dry run docling-serve image building
 on:
  pull_request:
    branches: ["main"]
 env:
  GHCR_REGISTRY: ghcr.io
  GHCR_DOCLING_SERVE_CPU_IMAGE_NAME: ds4sd/docling-serve-cpu
  GHCR_DOCLING_SERVE_GPU_IMAGE_NAME: ds4sd/docling-serve
 jobs:
  build_cpu_image:
    name: Build docling-serve "CPU only" container image
    runs-on: ubuntu-latest
    permissions:
      packages: write
      contents: read
      attestations: write
      id-token: write
    steps:
      - name: Check out the repo
        uses: actions/checkout@v4
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Cache Docker layers
        uses: actions/cache@v4
        with:
          path: /tmp/.buildx-cache
          key: ${{ runner.os }}-buildx-${{ github.sha }}
          restore-keys: |
            ${{ runner.os }}-buildx-
      - name: Extract metadata (tags, labels) for docling-serve (CPU only) ghcr image
        id: ghcr_serve_cpu_meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_CPU_IMAGE_NAME }}
      - name: Build docling-serve-cpu image
        id: build-serve-cpu-ghcr
        uses: docker/build-push-action@v5
        with:
          context: .
          push: false
          tags: ${{ steps.ghcr_serve_cpu_meta.outputs.tags }}
          labels: ${{ steps.ghcr_serve_cpu_meta.outputs.labels }}
          platforms: linux/amd64, linux/arm64
          cache-from: type=gha
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: |
            --build-arg CPU_ONLY=true
      - name: Remove Local Docker Images
        run: |
          docker image prune -af
  build_gpu_image:
    name: Build docling-serve (with GPU support) container image
    runs-on: ubuntu-latest
    permissions:
      packages: write
      contents: read
      attestations: write
      id-token: write
    steps:
      - name: Check out the repo
        uses: actions/checkout@v4
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Cache Docker layers
        uses: actions/cache@v4
        with:
          path: /tmp/.buildx-cache
          key: ${{ runner.os }}-buildx-${{ github.sha }}
          restore-keys: |
            ${{ runner.os }}-buildx-
      - name: Extract metadata (tags, labels) for docling-serve (GPU) ghcr image
        id: ghcr_serve_gpu_meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_GPU_IMAGE_NAME }}
      - name: Build docling-serve (GPU) image
        id: build-serve-gpu-ghcr
        uses: docker/build-push-action@v5
        with:
          context: .
          push: false
          tags: ${{ steps.ghcr_serve_gpu_meta.outputs.tags }}
          labels: ${{ steps.ghcr_serve_gpu_meta.outputs.labels }}
          platforms: linux/amd64,linux/arm64
          cache-from: type=gha
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: |
            --build-arg CPU_ONLY=false
--- a/.github/workflows/images.yml
+++ b/.github/workflows/images.yml
@@ -4,193 +4,55 @@ on:
  push:
    branches:
      - main
-    tags:
+  release:
-      - 'v*'
+    types: [published]
-env:
+concurrency:
-  GHCR_REGISTRY: ghcr.io
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  GHCR_DOCLING_SERVE_CPU_IMAGE_NAME: ds4sd/docling-serve-cpu
+  cancel-in-progress: true
  GHCR_DOCLING_SERVE_GPU_IMAGE_NAME: ds4sd/docling-serve
  QUAY_REGISTRY: quay.io
  QUAY_DOCLING_SERVE_CPU_IMAGE_NAME: ds4sd/docling-serve-cpu
  QUAY_DOCLING_SERVE_GPU_IMAGE_NAME: ds4sd/docling-serve
 jobs:
-  build_and_publish_cpu_images:
+  build_and_publish_images:
-    name: Push docling-serve "CPU only" container image to GHCR and QUAY
+    name: Build and push ${{ matrix.spec.name }} container image to GHCR and QUAY
-    runs-on: ubuntu-latest
+    strategy:
-    environment: registry-creds
+      matrix:
        spec:
          - name: docling-project/docling-serve
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
            platforms: linux/amd64, linux/arm64
          - name: docling-project/docling-serve-cpu
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
            platforms: linux/amd64, linux/arm64
          # - name: docling-project/docling-serve-cu124
          #   build_args: |
          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
          #   platforms: linux/amd64
          - name: docling-project/docling-serve-cu126
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
            platforms: linux/amd64
          - name: docling-project/docling-serve-cu128
            build_args: |
              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
            platforms: linux/amd64
          # - name: docling-project/docling-serve-rocm
          #   build_args: |
          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
          #   platforms: linux/amd64
    permissions:
      packages: write
      contents: read
      attestations: write
      id-token: write
    secrets: inherit
-    steps:
+    uses: ./.github/workflows/job-image.yml
-      - name: Check out the repo
+    with:
-        uses: actions/checkout@v4
+      publish: true
-
+      environment: registry-creds
-      - name: Log in to the GHCR container image registry
+      build_args: ${{ matrix.spec.build_args }}
-        uses: docker/login-action@v3
+      ghcr_image_name: ${{ matrix.spec.name }}
-        with:
+      quay_image_name: ${{ matrix.spec.name }}
-          registry: ${{ env.GHCR_REGISTRY }}
+      platforms: ${{ matrix.spec.platforms }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Log in to the Quay container image registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.QUAY_REGISTRY }}
          username: ${{ secrets.QUAY_USERNAME }}
          password: ${{ secrets.QUAY_TOKEN }}
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Cache Docker layers
        uses: actions/cache@v4
        with:
          path: /tmp/.buildx-cache
          key: ${{ runner.os }}-buildx-${{ github.sha }}
          restore-keys: |
            ${{ runner.os }}-buildx-
      - name: Extract metadata (tags, labels) for docling-serve (CPU only) ghcr image
        id: ghcr_serve_cpu_meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_CPU_IMAGE_NAME }}
      - name: Build and push docling-serve-cpu image to ghcr.io
        id: push-serve-cpu-ghcr
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          tags: ${{ steps.ghcr_serve_cpu_meta.outputs.tags }}
          labels: ${{ steps.ghcr_serve_cpu_meta.outputs.labels }}
          platforms: linux/amd64, linux/arm64
          cache-from: type=gha
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: |
            --build-arg CPU_ONLY=true
      - name: Generate artifact attestation
        uses: actions/attest-build-provenance@v1
        with:
          subject-name: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_CPU_IMAGE_NAME}}
          subject-digest: ${{ steps.push-serve-cpu-ghcr.outputs.digest }}
          push-to-registry: true
      - name: Extract metadata (tags, labels) for docling-serve (CPU only) quay image
        id: quay_serve_cpu_meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.QUAY_REGISTRY }}/${{ env.QUAY_DOCLING_SERVE_CPU_IMAGE_NAME }}
      - name: Build and push docling-serve-cpu image to quay.io
        id: push-serve-cpu-quay
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          tags: ${{ steps.quay_serve_cpu_meta.outputs.tags }}
          labels: ${{ steps.quay_serve_cpu_meta.outputs.labels }}
          platforms: linux/amd64, linux/arm64
          cache-from: type=gha
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: |
            --build-arg CPU_ONLY=true
      - name: Remove Local Docker Images
        run: |
          docker image prune -af
  build_and_publish_gpu_images:
    name: Push docling-serve (with GPU support) container image to GHCR and QUAY
    runs-on: ubuntu-latest
    environment: registry-creds
    permissions:
      packages: write
      contents: read
      attestations: write
      id-token: write
    steps:
      - name: Check out the repo
        uses: actions/checkout@v4
      - name: Log in to the GHCR container image registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.GHCR_REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Log in to the Quay container image registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.QUAY_REGISTRY }}
          username: ${{ secrets.QUAY_USERNAME }}
          password: ${{ secrets.QUAY_TOKEN }}
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Cache Docker layers
        uses: actions/cache@v4
        with:
          path: /tmp/.buildx-cache
          key: ${{ runner.os }}-buildx-${{ github.sha }}
          restore-keys: |
            ${{ runner.os }}-buildx-
      - name: Extract metadata (tags, labels) for docling-serve (GPU) ghcr image
        id: ghcr_serve_gpu_meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_GPU_IMAGE_NAME }}
      - name: Build and push docling-serve (GPU) image to ghcr.io
        id: push-serve-gpu-ghcr
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          tags: ${{ steps.ghcr_serve_gpu_meta.outputs.tags }}
          labels: ${{ steps.ghcr_serve_gpu_meta.outputs.labels }}
          platforms: linux/amd64,linux/arm64
          cache-from: type=gha
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: |
            --build-arg CPU_ONLY=false
      - name: Generate artifact attestation
        uses: actions/attest-build-provenance@v1
        with:
          subject-name: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_GPU_IMAGE_NAME}}
          subject-digest: ${{ steps.push-serve-gpu-ghcr.outputs.digest }}
          push-to-registry: true
      - name: Extract metadata (tags, labels) for docling-serve (GPU) quay image
        id: quay_serve_gpu_meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.QUAY_REGISTRY }}/${{ env.QUAY_DOCLING_SERVE_GPU_IMAGE_NAME }}
      - name: Build and push docling-serve (GPU) image to quay.io
        id: push-serve-gpu-quay
        uses: docker/build-push-action@v5
        with:
          context: .
          push: true
          tags: ${{ steps.quay_serve_gpu_meta.outputs.tags }}
          labels: ${{ steps.quay_serve_gpu_meta.outputs.labels }}
          platforms: linux/amd64,linux/arm64
          cache-from: type=gha
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: |
            --build-arg CPU_ONLY=false
--- a/.github/workflows/job-build.yml
+++ b/.github/workflows/job-build.yml
@@ -0,0 +1,29 @@
 name: Run checks
 on:
  workflow_call:
 jobs:
  build-package:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ['3.12']
    steps:
      - uses: actions/checkout@v4
      - name: Install uv and set the python version
        uses: astral-sh/setup-uv@v6
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
      - name: Install dependencies
        run: uv sync --all-extras --no-extra flash-attn
      - name: Build package
        run: uv build
      - name: Check content of wheel
        run: unzip -l dist/*.whl
      - name: Store the distribution packages
        uses: actions/upload-artifact@v4
        with:
          name: python-package-distributions
          path: dist/
--- a/.github/workflows/job-checks.yml
+++ b/.github/workflows/job-checks.yml
@@ -0,0 +1,68 @@
 name: Run checks
 on:
  workflow_call:
 jobs:
  py-lint:
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ['3.12']
    steps:
      - uses: actions/checkout@v4
      - name: Install uv and set the python version
        uses: astral-sh/setup-uv@v6
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
      - name: pre-commit cache key
        run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> "$GITHUB_ENV"
      - uses: actions/cache@v4
        with:
          path: ~/.cache/pre-commit
          key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
      - name: Install dependencies
        run: uv sync --frozen --all-extras --no-extra flash-attn
      - name: Run styling check
        run: uv run pre-commit run --all-files
  build-package:
    uses: ./.github/workflows/job-build.yml
  test-package:
    needs:
      - build-package
    runs-on: ubuntu-latest
    strategy:
      matrix:
        python-version: ['3.12']
    steps:
      - name: Download all the dists
        uses: actions/download-artifact@v4
        with:
          name: python-package-distributions
          path: dist/
      - name: Install uv and set the python version
        uses: astral-sh/setup-uv@v6
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
      - name: Create virtual environment
        run: uv venv
      - name: Install package
        run: uv pip install dist/*.whl
      - name: Create the server
        run: .venv/bin/python -c 'from docling_serve.app import create_app; create_app()'
  markdown-lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: markdownlint-cli2-action
        uses: DavidAnson/markdownlint-cli2-action@v16
        with:
          globs: "**/*.md"
--- a/.github/workflows/job-image.yml
+++ b/.github/workflows/job-image.yml
@@ -0,0 +1,141 @@
 name: Build docling-serve container image
 on:
  workflow_call:
    inputs:
      build_args:
        type: string
        description: "Extra build arguments for the build."
        default: ""
      ghcr_image_name:
        type: string
        description: "Name of the image for GHCR."
      quay_image_name:
        type: string
        description: "Name of the image Quay."
      platforms:
        type: string
        description: "Platform argument for building images."
        default: linux/amd64, linux/arm64
      publish:
        type: boolean
        description: "If true, the images will be published."
        default: false
      environment:
        type: string
        description: "GH Action environment"
        default: ""
 env:
  GHCR_REGISTRY: ghcr.io
  QUAY_REGISTRY: quay.io
 jobs:
  image:
    runs-on: ubuntu-latest
    permissions:
      packages: write
      contents: read
      attestations: write
      id-token: write
    environment: ${{ inputs.environment }}
    steps:
      - name: Free up space in github runner
        # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
        run: |
            df -h
            sudo rm -rf "/usr/local/share/boost"
            sudo rm -rf "$AGENT_TOOLSDIRECTORY"
            sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup
            # shellcheck disable=SC2046
            sudo docker rmi "$(docker image ls -aq)" >/dev/null 2>&1 || true
            df -h
      - name: Check out the repo
        uses: actions/checkout@v4
      - name: Log in to the GHCR container image registry
        if: ${{ inputs.publish }}
        uses: docker/login-action@v3
        with:
          registry: ${{ env.GHCR_REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Log in to the Quay container image registry
        if: ${{ inputs.publish }}
        uses: docker/login-action@v3
        with:
          registry: ${{ env.QUAY_REGISTRY }}
          username: ${{ secrets.QUAY_USERNAME }}
          password: ${{ secrets.QUAY_TOKEN }}
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3
      - name: Cache Docker layers
        uses: actions/cache@v4
        with:
          path: /tmp/.buildx-cache
          key: ${{ runner.os }}-buildx-${{ github.sha }}
          restore-keys: |
            ${{ runner.os }}-buildx-
      - name: Extract metadata (tags, labels) for docling-serve ghcr image
        id: ghcr_meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}
      - name: Build and push image to ghcr.io
        id: ghcr_push
        uses: docker/build-push-action@v5
        with:
          context: .
          push: ${{ inputs.publish }}
          tags: ${{ steps.ghcr_meta.outputs.tags }}
          labels: ${{ steps.ghcr_meta.outputs.labels }}
          platforms: ${{ inputs.platforms}}
          cache-from: type=gha
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: ${{ inputs.build_args }}
      - name: Generate artifact attestation
        if: ${{ inputs.publish }}
        uses: actions/attest-build-provenance@v1
        with:
          subject-name: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}
          subject-digest: ${{ steps.ghcr_push.outputs.digest }}
          push-to-registry: true
      - name: Extract metadata (tags, labels) for docling-serve quay image
        if: ${{ inputs.publish }}
        id: quay_meta
        uses: docker/metadata-action@v5
        with:
          images: ${{ env.QUAY_REGISTRY }}/${{ inputs.quay_image_name }}
      - name: Build and push image to quay.io
        if: ${{ inputs.publish }}
        # id: push-serve-cpu-quay
        uses: docker/build-push-action@v5
        with:
          context: .
          push: ${{ inputs.publish }}
          tags: ${{ steps.quay_meta.outputs.tags }}
          labels: ${{ steps.quay_meta.outputs.labels }}
          platforms: ${{ inputs.platforms}}
          cache-from: type=gha
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: ${{ inputs.build_args }}
      # - name: Inspect the image details
      #   run: |
      #     echo "${{ steps.ghcr_push.outputs.metadata }}"
      - name: Remove Local Docker Images
        run: |
          docker image prune -af
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -0,0 +1,34 @@
 name: "Build and publish package"
 on:
  release:
    types: [published]
 permissions:
  contents: read
 jobs:
  build-package:
    uses: ./.github/workflows/job-build.yml
  build-and-publish:
    needs:
      - build-package
    runs-on: ubuntu-latest
    environment:
      name: pypi
      url: https://pypi.org/p/docling-serve  # Replace <package-name> with your PyPI project name
    permissions:
      id-token: write  # IMPORTANT: mandatory for trusted publishing
    steps:
      - name: Download all the dists
        uses: actions/download-artifact@v4
        with:
          name: python-package-distributions
          path: dist/
      - name: Publish distribution 📦 to PyPI
        uses: pypa/gh-action-pypi-publish@release/v1
        with:
          # currently not working with reusable workflows
          attestations: false
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 model_artifacts/
 scratch/
 .md-lint
 actionlint
 # Created by https://www.toptal.com/developers/gitignore/api/python,macos,virtualenv,pycharm,visualstudiocode,emacs,vim,jupyternotebooks
 # Edit at https://www.toptal.com/developers/gitignore?templates=python,macos,virtualenv,pycharm,visualstudiocode,emacs,vim,jupyternotebooks
@@ -442,3 +444,5 @@ pip-selfcheck.json
 # Makefile
 .action-lint
 .markdown-lint
 cookies.txt
--- a/.markdownlint-cli2.yaml
+++ b/.markdownlint-cli2.yaml
@@ -2,5 +2,9 @@ config:
  line-length: false
  no-emphasis-as-header: false
  first-line-heading: false
  MD033:
    allowed_elements: ["details", "summary", "br", "a", "b", "p", "img"]
  MD024:
    siblings_only: true
 globs:
  - "**/*.md"
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,41 +1,39 @@
 fail_fast: true
 repos:
-  - repo: local
+  - repo: https://github.com/astral-sh/ruff-pre-commit
    rev: v0.9.6
    hooks:
-      - id: system
+      # Run the Ruff formatter.
-        name: Black
+      - id: ruff-format
-        entry: poetry run black docling_serve tests
+        name: "Ruff formatter"
-        pass_filenames: false
+        args: [--config=pyproject.toml]
-        language: system
+        files: '^(docling_serve|tests).*\.(py|ipynb)$'
-        files: '\.py$'
+      # Run the Ruff linter.
-  - repo: local
+      - id: ruff
-    hooks:
+        name: "Ruff linter"
-      - id: system
+        args: [--exit-non-zero-on-fix, --fix, --config=pyproject.toml]
-        name: isort
+        files: '^(docling_serve|tests).*\.(py|ipynb)$'
        entry: poetry run isort docling_serve tests
        pass_filenames: false
        language: system
        files: '\.py$'
  - repo: local
    hooks:
      - id: system
        name: flake8
        entry: poetry run flake8 docling_serve
        pass_filenames: false
        language: system
        files: '\.py$'
  - repo: local
    hooks:
      - id: system
        name: MyPy
-        entry: poetry run mypy docling_serve
+        entry: uv run --no-sync mypy docling_serve
        pass_filenames: false
        language: system
        files: '\.py$'
-  - repo: local
+  - repo: https://github.com/errata-ai/vale
    rev: v3.12.0  # Use latest stable version
    hooks:
-      - id: system
+      - id: vale
-        name: Poetry check
+        name: vale sync
        entry: poetry check --lock
        pass_filenames: false
-        language: system
+        args: [sync, "--config=.github/vale.ini"]
      - id: vale
        name: Spell and Style Check with Vale
        args: ["--config=.github/vale.ini"]
        files: \.md$
  - repo: https://github.com/astral-sh/uv-pre-commit
    # uv version, https://github.com/astral-sh/uv-pre-commit/releases
    rev: 0.8.3
    hooks:
      - id: uv-lock
--- a/.python-version
+++ b/.python-version
@@ -0,0 +1 @@
 3.12
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,258 @@
 ## [v1.2.1](https://github.com/docling-project/docling-serve/releases/tag/v1.2.1) - 2025-08-13
 ### Fix
 * Handling of vlm model options and update deps ([#314](https://github.com/docling-project/docling-serve/issues/314)) ([`8b470cb`](https://github.com/docling-project/docling-serve/commit/8b470cba8ef500c271eb84c8368c8a1a1a5a6d6a))
 * Add missing response type in sync endpoints ([#309](https://github.com/docling-project/docling-serve/issues/309)) ([`8048f45`](https://github.com/docling-project/docling-serve/commit/8048f4589a91de2b2b391ab33a326efd1b29f25b))
 ### Documentation
 * Update readme to use v1 ([#306](https://github.com/docling-project/docling-serve/issues/306)) ([`b3058e9`](https://github.com/docling-project/docling-serve/commit/b3058e91e0c56e27110eb50f22cbdd89640bf398))
 * Update deployment examples to use v1 API ([#308](https://github.com/docling-project/docling-serve/issues/308)) ([`63da9ee`](https://github.com/docling-project/docling-serve/commit/63da9eedebae3ad31d04e65635e573194e413793))
 * Fix typo in v1 migration instructions ([#307](https://github.com/docling-project/docling-serve/issues/307)) ([`b15dc25`](https://github.com/docling-project/docling-serve/commit/b15dc2529f78d68a475e5221c37408c3f77d8588))
 ## [v1.2.0](https://github.com/docling-project/docling-serve/releases/tag/v1.2.0) - 2025-08-07
 ### Feature
 * Workers without shared models and convert params ([#304](https://github.com/docling-project/docling-serve/issues/304)) ([`db3fdb5`](https://github.com/docling-project/docling-serve/commit/db3fdb5bc1a0ae250afd420d737abc4071a7546c))
 * Add rocm image build support and fix cuda ([#292](https://github.com/docling-project/docling-serve/issues/292)) ([`fd1b987`](https://github.com/docling-project/docling-serve/commit/fd1b987e8dc174f1a6013c003dde33e9acbae39a))
 ## [v1.1.0](https://github.com/docling-project/docling-serve/releases/tag/v1.1.0) - 2025-07-30
 ### Feature
 * Add docling-mcp in the distribution ([#290](https://github.com/docling-project/docling-serve/issues/290)) ([`ecb1874`](https://github.com/docling-project/docling-serve/commit/ecb1874a507bef83d102e0e031e49fed34298637))
 * Add 3.0 openapi endpoint ([#287](https://github.com/docling-project/docling-serve/issues/287)) ([`ec594d8`](https://github.com/docling-project/docling-serve/commit/ec594d84fe36df23e7d010a2fcf769856c43600b))
 * Add new source and target ([#270](https://github.com/docling-project/docling-serve/issues/270)) ([`3771c1b`](https://github.com/docling-project/docling-serve/commit/3771c1b55403bd51966d07d8f760d5c4fbcc1760))
 ### Fix
 * Referenced paths relative to zip root ([#289](https://github.com/docling-project/docling-serve/issues/289)) ([`1333f71`](https://github.com/docling-project/docling-serve/commit/1333f71c9c6495342b2169d574e921f828446f15))
 ## [v1.0.1](https://github.com/docling-project/docling-serve/releases/tag/v1.0.1) - 2025-07-21
 ### Fix
 * Docling update v2.42.0 ([#277](https://github.com/docling-project/docling-serve/issues/277)) ([`8706706`](https://github.com/docling-project/docling-serve/commit/8706706e8797b0a06ec4baa7cf87988311be68b6))
 ### Documentation
 * Typo in README ([#276](https://github.com/docling-project/docling-serve/issues/276)) ([`766adb2`](https://github.com/docling-project/docling-serve/commit/766adb248113c7bd5144d14b3c82929a2ad29f8e))
 ## [v1.0.0](https://github.com/docling-project/docling-serve/releases/tag/v1.0.0) - 2025-07-14
 ### Feature
 * V1 api with list of sources and target ([#249](https://github.com/docling-project/docling-serve/issues/249)) ([`56e328b`](https://github.com/docling-project/docling-serve/commit/56e328baf76b4bb0476fc6ca820b52034e4f97bf))
 * Use orchestrators from jobkit ([#248](https://github.com/docling-project/docling-serve/issues/248)) ([`daa924a`](https://github.com/docling-project/docling-serve/commit/daa924a77e56d063ef17347dfd8a838872a70529))
 ### Breaking
 * v1 api with list of sources and target ([#249](https://github.com/docling-project/docling-serve/issues/249)) ([`56e328b`](https://github.com/docling-project/docling-serve/commit/56e328baf76b4bb0476fc6ca820b52034e4f97bf))
 * use orchestrators from jobkit ([#248](https://github.com/docling-project/docling-serve/issues/248)) ([`daa924a`](https://github.com/docling-project/docling-serve/commit/daa924a77e56d063ef17347dfd8a838872a70529))
 ## [v0.16.1](https://github.com/docling-project/docling-serve/releases/tag/v0.16.1) - 2025-07-07
 ### Fix
 * Upgrade deps including, docling v2.40.0 with locks in models init ([#264](https://github.com/docling-project/docling-serve/issues/264)) ([`bfde1a0`](https://github.com/docling-project/docling-serve/commit/bfde1a0991c2da53b72c4f131ff74fa10f6340de))
 * Missing tesseract osd ([#263](https://github.com/docling-project/docling-serve/issues/263)) ([`eb3892e`](https://github.com/docling-project/docling-serve/commit/eb3892ee141eb2c941d580b095d8a266f2d2610c))
 * Properly load models at boot ([#244](https://github.com/docling-project/docling-serve/issues/244)) ([`149a8cb`](https://github.com/docling-project/docling-serve/commit/149a8cb1c0a16c1e0b7d17f40b88b4d6e8f0109d))
 ### Documentation
 * Fix typo ([#259](https://github.com/docling-project/docling-serve/issues/259)) ([`93b8471`](https://github.com/docling-project/docling-serve/commit/93b84712b2c6d180908a197847b52b217a7ff05f))
 * Change the doc example ([#258](https://github.com/docling-project/docling-serve/issues/258)) ([`c45b937`](https://github.com/docling-project/docling-serve/commit/c45b93706466a073ab4a5c75aa8a267110873e26))
 * Update typo ([#247](https://github.com/docling-project/docling-serve/issues/247)) ([`50e431f`](https://github.com/docling-project/docling-serve/commit/50e431f30fbffa33f43727417fe746d20cbb9d6b))
 ## [v0.16.0](https://github.com/docling-project/docling-serve/releases/tag/v0.16.0) - 2025-06-25
 ### Feature
 * Package updates and more cuda images ([#229](https://github.com/docling-project/docling-serve/issues/229)) ([`30aca92`](https://github.com/docling-project/docling-serve/commit/30aca92298ab0d86bb4debcfcacb2dd8b9040a27))
 ### Documentation
 * Update example resources and improve README ([#231](https://github.com/docling-project/docling-serve/issues/231)) ([`80755a7`](https://github.com/docling-project/docling-serve/commit/80755a7d5955f7d0c53df8e558fdd852dd1f5b75))
 ## [v0.15.0](https://github.com/docling-project/docling-serve/releases/tag/v0.15.0) - 2025-06-17
 ### Feature
 * Use redocs and scalar as api docs ([#228](https://github.com/docling-project/docling-serve/issues/228)) ([`873d05a`](https://github.com/docling-project/docling-serve/commit/873d05aefe141c63b9c1cf53b23b4fa8c96de05d))
 ### Fix
 * "tesserocr" instead of "tesseract_cli" in usage docs ([#223](https://github.com/docling-project/docling-serve/issues/223)) ([`196c5ce`](https://github.com/docling-project/docling-serve/commit/196c5ce42a04d77234a4212c3d9b9772d2c2073e))
 ## [v0.14.0](https://github.com/docling-project/docling-serve/releases/tag/v0.14.0) - 2025-06-17
 ### Feature
 * Read supported file extensions from docling ([#214](https://github.com/docling-project/docling-serve/issues/214)) ([`524f6a8`](https://github.com/docling-project/docling-serve/commit/524f6a8997b86d2f869ca491ec8fb40585b42ca4))
 ### Fix
 * Typo in Headline ([#220](https://github.com/docling-project/docling-serve/issues/220)) ([`d5455b7`](https://github.com/docling-project/docling-serve/commit/d5455b7f66de39ea1f8b8927b5968d2baa23ca88))
 ## [v0.13.0](https://github.com/docling-project/docling-serve/releases/tag/v0.13.0) - 2025-06-04
 ### Feature
 * Upgrade docling to 2.36 ([#212](https://github.com/docling-project/docling-serve/issues/212)) ([`ffea347`](https://github.com/docling-project/docling-serve/commit/ffea34732b24fdd438fabd6df02d3d9ce66b4534))
 ## [v0.12.0](https://github.com/docling-project/docling-serve/releases/tag/v0.12.0) - 2025-06-03
 ### Feature
 * Export annotations in markdown and html (Docling upgrade) ([#202](https://github.com/docling-project/docling-serve/issues/202)) ([`c4c41f1`](https://github.com/docling-project/docling-serve/commit/c4c41f16dff83c5d2a0b8a4c625b5de19b36b7c5))
 ### Fix
 * Processing complex params in multipart-form ([#210](https://github.com/docling-project/docling-serve/issues/210)) ([`7066f35`](https://github.com/docling-project/docling-serve/commit/7066f3520a88c07df1c80a0cc6c4339eaac4d6a7))
 ### Documentation
 * Add openshift replicasets examples ([#209](https://github.com/docling-project/docling-serve/issues/209)) ([`6a8190c`](https://github.com/docling-project/docling-serve/commit/6a8190c315792bd1e0e2b0af310656baaa5551e5))
 ## [v0.11.0](https://github.com/docling-project/docling-serve/releases/tag/v0.11.0) - 2025-05-23
 ### Feature
 * Page break placeholder in markdown exports options ([#194](https://github.com/docling-project/docling-serve/issues/194)) ([`32b8a80`](https://github.com/docling-project/docling-serve/commit/32b8a809f348bf9fbde657f93589a56935d3749d))
 * Clear results registry ([#192](https://github.com/docling-project/docling-serve/issues/192)) ([`de002df`](https://github.com/docling-project/docling-serve/commit/de002dfcdc111c942a08b156c84b7fa22b3fbaf3))
 * Upgrade to Docling 2.33.0 ([#198](https://github.com/docling-project/docling-serve/issues/198)) ([`abe5aa0`](https://github.com/docling-project/docling-serve/commit/abe5aa03f54d44ecf5c6d76e3258028997a53e68))
 * Api to trigger offloading the models ([#188](https://github.com/docling-project/docling-serve/issues/188)) ([`00be428`](https://github.com/docling-project/docling-serve/commit/00be4284904d55b78c75c5475578ef11c2ade94c))
 * Figure annotations @ docling components 0.0.7 ([#181](https://github.com/docling-project/docling-serve/issues/181)) ([`3ff1b2f`](https://github.com/docling-project/docling-serve/commit/3ff1b2f9834aca37472a895a0e3da47560457d77))
 ### Fix
 * Usage of hashlib for FIPS ([#171](https://github.com/docling-project/docling-serve/issues/171)) ([`8406fb9`](https://github.com/docling-project/docling-serve/commit/8406fb9b59d83247b8379974cabed497703dfc4d))
 ### Documentation
 * Example and instructions on how to load model weights to persistent volume ([#197](https://github.com/docling-project/docling-serve/issues/197)) ([`3f090b7`](https://github.com/docling-project/docling-serve/commit/3f090b7d15eaf696611d89bbbba5b98569610828))
 * Async api usage and fixes ([#195](https://github.com/docling-project/docling-serve/issues/195)) ([`21c1791`](https://github.com/docling-project/docling-serve/commit/21c1791e427f5b1946ed46c68dfda03c957dca8f))
 ## [v0.10.1](https://github.com/docling-project/docling-serve/releases/tag/v0.10.1) - 2025-04-30
 ### Fix
 * Avoid missing specialized keys in the options hash ([#166](https://github.com/docling-project/docling-serve/issues/166)) ([`36787bc`](https://github.com/docling-project/docling-serve/commit/36787bc0616356a6199da618d8646de51636b34e))
 * Allow users to set the area threshold for picture descriptions ([#165](https://github.com/docling-project/docling-serve/issues/165)) ([`509f488`](https://github.com/docling-project/docling-serve/commit/509f4889f8ed4c0f0ce25bec4126ef1f1199797c))
 * Expose max wait time in sync endpoints ([#164](https://github.com/docling-project/docling-serve/issues/164)) ([`919cf5c`](https://github.com/docling-project/docling-serve/commit/919cf5c0414f2f11eb8012f451fed7a8f582b7ad))
 * Add flash-attn for cuda images ([#161](https://github.com/docling-project/docling-serve/issues/161)) ([`35c2630`](https://github.com/docling-project/docling-serve/commit/35c2630c613cf229393fc67b6938152b063ff498))
 ## [v0.10.0](https://github.com/docling-project/docling-serve/releases/tag/v0.10.0) - 2025-04-28
 ### Feature
 * Add support for file upload and return as file in async endpoints ([#152](https://github.com/docling-project/docling-serve/issues/152)) ([`c65f3c6`](https://github.com/docling-project/docling-serve/commit/c65f3c654c76c6b64b6aada1f0a153d74789d629))
 ### Documentation
 * Fix new default pdf_backend ([#158](https://github.com/docling-project/docling-serve/issues/158)) ([`829effe`](https://github.com/docling-project/docling-serve/commit/829effec1a1b80320ccaf2c501be8015169b6fa3))
 * Fixing small typo in docs ([#155](https://github.com/docling-project/docling-serve/issues/155)) ([`14bafb2`](https://github.com/docling-project/docling-serve/commit/14bafb26286b94f80b56846c50d6e9a6d99a9763))
 ## [v0.9.0](https://github.com/docling-project/docling-serve/releases/tag/v0.9.0) - 2025-04-25
 ### Feature
 * Expose picture description options ([#148](https://github.com/docling-project/docling-serve/issues/148)) ([`4c9571a`](https://github.com/docling-project/docling-serve/commit/4c9571a052d5ec0044e49225bc5615e13cdb0a56))
 * Add parameters for Kubeflow pipeline engine (WIP) ([#107](https://github.com/docling-project/docling-serve/issues/107)) ([`26bef5b`](https://github.com/docling-project/docling-serve/commit/26bef5bec060f0afd8d358816b68c3f2c0dd4bc2))
 ### Fix
 * Produce image artifacts in referenced mode ([#151](https://github.com/docling-project/docling-serve/issues/151)) ([`71c5fae`](https://github.com/docling-project/docling-serve/commit/71c5fae505366459fd481d2ecdabc5ebed94d49c))
 ### Documentation
 * Vlm and picture description options ([#149](https://github.com/docling-project/docling-serve/issues/149)) ([`91956cb`](https://github.com/docling-project/docling-serve/commit/91956cbf4e91cf82bb4d54ace397cdbbfaf594ba))
 ## [v0.8.0](https://github.com/docling-project/docling-serve/releases/tag/v0.8.0) - 2025-04-22
 ### Feature
 * Add option for vlm pipeline ([#143](https://github.com/docling-project/docling-serve/issues/143)) ([`ee89ee4`](https://github.com/docling-project/docling-serve/commit/ee89ee4daee5e916bd6a3bdb452f78934cd03f60))
 * Expose more conversion options ([#142](https://github.com/docling-project/docling-serve/issues/142)) ([`6b3d281`](https://github.com/docling-project/docling-serve/commit/6b3d281f02905c195ab75f25bb39f5c4d4e7b680))
 * **UI:** Change UI to use async endpoints ([#131](https://github.com/docling-project/docling-serve/issues/131)) ([`b598872`](https://github.com/docling-project/docling-serve/commit/b598872e5c48928ac44417a11bb7acc0e5c3f0c6))
 ### Fix
 * **UI:** Use https when calling the api ([#139](https://github.com/docling-project/docling-serve/issues/139)) ([`57f9073`](https://github.com/docling-project/docling-serve/commit/57f9073bc0daf72428b068ea28e2bec7cd76c37b))
 * Fix permissions in docker image ([#136](https://github.com/docling-project/docling-serve/issues/136)) ([`c1ce471`](https://github.com/docling-project/docling-serve/commit/c1ce4719c933179ba3c59d73d0584853bbd6fa6a))
 * Picture caption visuals ([#129](https://github.com/docling-project/docling-serve/issues/129)) ([`5dfb75d`](https://github.com/docling-project/docling-serve/commit/5dfb75d3b9a7022d1daad12edbb8ec7bbf9aa264))
 ### Documentation
 * Fix required permissions for oauth2-proxy requests ([#141](https://github.com/docling-project/docling-serve/issues/141)) ([`087417e`](https://github.com/docling-project/docling-serve/commit/087417e5c2387d4ed95500222058f34d8a8702aa))
 * Update deployment examples ([#135](https://github.com/docling-project/docling-serve/issues/135)) ([`525a43f`](https://github.com/docling-project/docling-serve/commit/525a43ff6f04b7cc80f9dd6a0e653a8d8c4ab317))
 * Fix image tag ([#124](https://github.com/docling-project/docling-serve/issues/124)) ([`420162e`](https://github.com/docling-project/docling-serve/commit/420162e674cc38b4c3c13673ffbee4c20a1b15f1))
 ## [v0.7.0](https://github.com/docling-project/docling-serve/releases/tag/v0.7.0) - 2025-03-31
 ### Feature
 * Expose TLS settings and example deploy with oauth-proxy ([#112](https://github.com/docling-project/docling-serve/issues/112)) ([`7a0faba`](https://github.com/docling-project/docling-serve/commit/7a0fabae07020c2659dbb22c3b0359909051a74c))
 * Offline static files ([#109](https://github.com/docling-project/docling-serve/issues/109)) ([`68772bb`](https://github.com/docling-project/docling-serve/commit/68772bb6f0a87b71094a08ff851f5754c6ca6163))
 * Update to Docling 2.28 ([#106](https://github.com/docling-project/docling-serve/issues/106)) ([`20ec87a`](https://github.com/docling-project/docling-serve/commit/20ec87a63a99145bc0ad7931549af8a0c30db641))
 ### Fix
 * Move ARGs to prevent cache invalidation ([#104](https://github.com/docling-project/docling-serve/issues/104)) ([`e30f458`](https://github.com/docling-project/docling-serve/commit/e30f458923d34c169db7d5a5c296848716e8cac4))
 ## [v0.6.0](https://github.com/docling-project/docling-serve/releases/tag/v0.6.0) - 2025-03-17
 ### Feature
 * Expose options for new features ([#92](https://github.com/docling-project/docling-serve/issues/92)) ([`ec57b52`](https://github.com/docling-project/docling-serve/commit/ec57b528ed3f8e7b9604ff4cdf06da3d52c714dd))
 ### Fix
 * Allow changes in CORS settings ([#100](https://github.com/docling-project/docling-serve/issues/100)) ([`422c402`](https://github.com/docling-project/docling-serve/commit/422c402bab7f05e46274ede11f234a19a62e093e))
 * Avoid exploding options cache using lru and expose size parameter ([#101](https://github.com/docling-project/docling-serve/issues/101)) ([`ea09028`](https://github.com/docling-project/docling-serve/commit/ea090288d3eec4ea8fbdcd32a6a497a99c89189d))
 * Increase timeout_keep_alive and allow parameter changes ([#98](https://github.com/docling-project/docling-serve/issues/98)) ([`07c48ed`](https://github.com/docling-project/docling-serve/commit/07c48edd5d9437219d9623e3d05bc5166c5bb85a))
 * Add warning when using incompatible parameters ([#99](https://github.com/docling-project/docling-serve/issues/99)) ([`a212547`](https://github.com/docling-project/docling-serve/commit/a212547d28d6588c65e52000dc7bc04f3f77e69e))
 * **ui:** Use --port parameter and avoid failing when image is not found ([#97](https://github.com/docling-project/docling-serve/issues/97)) ([`c76daac`](https://github.com/docling-project/docling-serve/commit/c76daac70c87da412f791666881e48b74688b060))
 ### Documentation
 * Simplify README and move details to docs ([#102](https://github.com/docling-project/docling-serve/issues/102)) ([`fd8e40a`](https://github.com/docling-project/docling-serve/commit/fd8e40a00849771263d9b75b9a56f6caeccb8517))
 ## [v0.5.1](https://github.com/docling-project/docling-serve/releases/tag/v0.5.1) - 2025-03-10
 ### Fix
 * Submodules in wheels ([#85](https://github.com/docling-project/docling-serve/issues/85)) ([`a92ad48`](https://github.com/docling-project/docling-serve/commit/a92ad48b287bfcb134011dc0fc3f91ee04e067ee))
 ## [v0.5.0](https://github.com/docling-project/docling-serve/releases/tag/v0.5.0) - 2025-03-07
 ### Feature
 * Async api ([#60](https://github.com/docling-project/docling-serve/issues/60)) ([`82f8900`](https://github.com/docling-project/docling-serve/commit/82f890019745859699c1b01f9ccfb64cb7e37906))
 * Display version in fastapi docs ([#78](https://github.com/docling-project/docling-serve/issues/78)) ([`ed851c9`](https://github.com/docling-project/docling-serve/commit/ed851c95fee5f59305ddc3dcd5c09efce618470b))
 ### Fix
 * Remove uv from image, merge ARG and ENV declarations ([#57](https://github.com/docling-project/docling-serve/issues/57)) ([`c95db36`](https://github.com/docling-project/docling-serve/commit/c95db3643807a4dfb96d93c8e10d6eb486c49a30))
 * **docs:** Remove comma in convert/source curl example ([#73](https://github.com/docling-project/docling-serve/issues/73)) ([`05df073`](https://github.com/docling-project/docling-serve/commit/05df0735d35a589bdc2a11fcdd764a10f700cb6f))
 ## [v0.4.0](https://github.com/docling-project/docling-serve/releases/tag/v0.4.0) - 2025-02-26
 ### Feature
 * New container images ([#68](https://github.com/docling-project/docling-serve/issues/68)) ([`7e6d9cd`](https://github.com/docling-project/docling-serve/commit/7e6d9cdef398df70a5b4d626aeb523c428c10d56))
 * Render DoclingDocument with npm docling-components in the example UI ([#65](https://github.com/docling-project/docling-serve/issues/65)) ([`c430d9b`](https://github.com/docling-project/docling-serve/commit/c430d9b1a162ab29104d86ebaa1ac5a5488b1f09))
 ## [v0.3.0](https://github.com/docling-project/docling-serve/releases/tag/v0.3.0) - 2025-02-19
 ### Feature
 * Add new docling-serve cli ([#50](https://github.com/docling-project/docling-serve/issues/50)) ([`ec33a61`](https://github.com/docling-project/docling-serve/commit/ec33a61faa7846b9b7998fbf557ebe39a3b800f6))
 ### Fix
 * Set DOCLING_SERVE_ARTIFACTS_PATH in images ([#53](https://github.com/docling-project/docling-serve/issues/53)) ([`4877248`](https://github.com/docling-project/docling-serve/commit/487724836896576ca4f98e84abf15fd1c383bec8))
 * Set root UI path when behind proxy ([#38](https://github.com/docling-project/docling-serve/issues/38)) ([`c64a450`](https://github.com/docling-project/docling-serve/commit/c64a450bf9ba9947ab180e92bef2763ff710b210))
 * Support python 3.13 and docling updates and switch to uv ([#48](https://github.com/docling-project/docling-serve/issues/48)) ([`ae3b490`](https://github.com/docling-project/docling-serve/commit/ae3b4906f1c0829b1331ea491f3518741cabff71))
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -3,13 +3,13 @@
 Our project welcomes external contributions. If you have an itch, please feel
 free to scratch it.
-To contribute code or documentation, please submit a [pull request](https://github.com/DS4SD/docling-serve/pulls).
+To contribute code or documentation, please submit a [pull request](https://github.com/docling-project/docling-serve/pulls).
 A good way to familiarize yourself with the codebase and contribution process is
-to look for and tackle low-hanging fruit in the [issue tracker](https://github.com/DS4SD/docling-serve/issues).
+to look for and tackle low-hanging fruit in the [issue tracker](https://github.com/docling-project/docling-serve/issues).
 Before embarking on a more ambitious contribution, please quickly [get in touch](#communication) with us.
-For general questions or support requests, please refer to the [discussion section](https://github.com/DS4SD/docling-serve/discussions).
+For general questions or support requests, please refer to the [discussion section](https://github.com/docling-project/docling-serve/discussions).
 **Note: We appreciate your effort, and want to avoid a situation where a contribution
 requires extensive rework (by you or by us), sits in backlog for a long time, or
@@ -17,14 +17,14 @@ cannot be accepted at all!**
 ### Proposing new features
-If you would like to implement a new feature, please [raise an issue](https://github.com/DS4SD/docling-serve/issues)
+If you would like to implement a new feature, please [raise an issue](https://github.com/docling-project/docling-serve/issues)
 before sending a pull request so the feature can be discussed. This is to avoid
 you wasting your valuable time working on a feature that the project developers
 are not interested in accepting into the code base.
 ### Fixing bugs
-If you would like to fix a bug, please [raise an issue](https://github.com/DS4SD/docling-serve/issues) before sending a
+If you would like to fix a bug, please [raise an issue](https://github.com/docling-project/docling-serve/issues) before sending a
 pull request so it can be tracked.
 ### Merge approval
@@ -73,7 +73,7 @@ git commit -s
 ## Communication
-Please feel free to connect with us using the [discussion section](https://github.com/DS4SD/docling-serve/discussions).
+Please feel free to connect with us using the [discussion section](https://github.com/docling-project/docling-serve/discussions).
 ## Developing
@@ -142,8 +142,7 @@ poetry add NAME
 We use the following tools to enforce code style:
- iSort, to sort imports
+- ruff, to sort imports and format code
 - Black, to format code
 We run a series of checks on the code base on every commit, using `pre-commit`. To install the hooks, run:
@@ -157,4 +156,4 @@ To run the checks on-demand, run:
 pre-commit run --all-files
 ```
-Note: Checks like `Black` and `isort` will "fail" if they modify files. This is because `pre-commit` doesn't like to see files modified by their Hooks. In these cases, `git add` the modified files and `git commit` again.
+Note: Formatting checks like `ruff` will "fail" if they modify files. This is because `pre-commit` doesn't like to see files modified by their Hooks. In these cases, `git add` the modified files and `git commit` again.
--- a/88
+++ b/88
@@ -1,32 +1,80 @@
-FROM python:3.11-slim-bookworm
+ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s
-ARG CPU_ONLY=false
+ARG UV_VERSION=0.8.3
 WORKDIR /docling-serve
-RUN apt-get update \
+ARG UV_SYNC_EXTRA_ARGS=""
    && apt-get install -y libgl1 libglib2.0-0 curl wget git \
    && apt-get clean
-RUN pip install --no-cache-dir poetry
+FROM ${BASE_IMAGE} AS docling-base
-COPY pyproject.toml poetry.lock README.md /docling-serve/
+###################################################################################################
 # OS Layer                                                                                        #
 ###################################################################################################
-RUN if [ "$CPU_ONLY" = "true" ]; then \
+USER 0
    poetry install --no-root --with cpu; \
    else \
        poetry install --no-root; \
    fi
-ENV HF_HOME=/tmp/
+RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
-ENV TORCH_HOME=/tmp/
+    dnf -y install --best --nodocs --setopt=install_weak_deps=False dnf-plugins-core && \
    dnf config-manager --best --nodocs --setopt=install_weak_deps=False --save && \
    dnf config-manager --enable crb && \
    dnf -y update && \
    dnf install -y $(cat /tmp/os-packages.txt) && \
    dnf -y clean all && \
    rm -rf /var/cache/dnf
-RUN poetry run python -c 'from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline; artifacts_path = StandardPdfPipeline.download_models_hf(force=True);'
+RUN /usr/bin/fix-permissions /opt/app-root/src/.cache
-# On container environments, always set a thread budget to avoid undesired thread congestion.
+ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/
 ENV OMP_NUM_THREADS=4
-COPY ./docling_serve /docling-serve/docling_serve
+FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv_stage
 ###################################################################################################
 # Docling layer                                                                                   #
 ###################################################################################################
 FROM docling-base
 USER 1001
 WORKDIR /opt/app-root/src
 ENV \
    OMP_NUM_THREADS=4 \
    LANG=en_US.UTF-8 \
    LC_ALL=en_US.UTF-8 \
    PYTHONIOENCODING=utf-8 \
    UV_COMPILE_BYTECODE=1 \
    UV_LINK_MODE=copy \
    UV_PROJECT_ENVIRONMENT=/opt/app-root \
    DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models
 ARG UV_SYNC_EXTRA_ARGS
 RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
    --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
    --mount=type=bind,source=uv.lock,target=uv.lock \
    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
    umask 002 && \
    UV_SYNC_ARGS="--frozen --no-install-project --no-dev --all-extras" && \
    uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-extra flash-attn && \
    FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-build-isolation-package=flash-attn
 ARG MODELS_LIST="layout tableformer picture_classifier easyocr"
 RUN echo "Downloading models..." && \
    HF_HUB_DOWNLOAD_TIMEOUT="90" \
    HF_HUB_ETAG_TIMEOUT="90" \
    docling-tools models download -o "${DOCLING_SERVE_ARTIFACTS_PATH}" ${MODELS_LIST} && \
    chown -R 1001:0 ${DOCLING_SERVE_ARTIFACTS_PATH} && \
    chmod -R g=u ${DOCLING_SERVE_ARTIFACTS_PATH}
 COPY --chown=1001:0 ./docling_serve ./docling_serve
 RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
    --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
    --mount=type=bind,source=uv.lock,target=uv.lock \
    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
    umask 002 && uv sync --frozen --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}
 EXPOSE 5001
-CMD ["poetry", "run", "uvicorn", "--port", "5001", "--host", "0.0.0.0", "docling_serve.app:app"]
+CMD ["docling-serve", "run"]
--- a/MAINTAINERS.md
+++ b/MAINTAINERS.md
@@ -1,11 +1,11 @@
 # MAINTAINERS
- Christoph Auer - [@cau-git](https://github.com/cau-git)
+- Christoph Auer - [`@cau-git`](https://github.com/cau-git)
- Michele Dolfi - [@dolfim-ibm](https://github.com/dolfim-ibm)
+- Michele Dolfi - [`@dolfim-ibm`](https://github.com/dolfim-ibm)
- Maxim Lysak - [@maxmnemonic](https://github.com/maxmnemonic)
+- Maxim Lysak - [`@maxmnemonic`](https://github.com/maxmnemonic)
- Nikos Livathinos - [@nikos-livathinos](https://github.com/nikos-livathinos)
+- Nikos Livathinos - [`@nikos-livathinos`](https://github.com/nikos-livathinos)
- Ahmed Nassar - [@nassarofficial](https://github.com/nassarofficial)
+- Ahmed Nassar - [`@nassarofficial`](https://github.com/nassarofficial)
- Panos Vagenas - [@vagenas](https://github.com/vagenas)
+- Panos Vagenas - [`@vagenas`](https://github.com/vagenas)
- Peter Staar - [@PeterStaar-IBM](https://github.com/PeterStaar-IBM)
+- Peter Staar - [`@PeterStaar-IBM`](https://github.com/PeterStaar-IBM)
 Maintainers can be contacted at [deepsearch-core@zurich.ibm.com](mailto:deepsearch-core@zurich.ibm.com).
--- a/100
+++ b/100
@@ -17,6 +17,7 @@ else
 endif
 TAG=$(shell git rev-parse HEAD)
 BRANCH_TAG=$(shell git rev-parse --abbrev-ref HEAD)
 action-lint-file:
 	$(CMD_PREFIX) touch .action-lint
@@ -24,19 +25,47 @@ action-lint-file:
 md-lint-file:
 	$(CMD_PREFIX) touch .markdown-lint
-.PHONY: docling-serve-cpu-image
+.PHONY: docling-serve-image
-docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" continaer image
+docling-serve-image: Containerfile ## Build docling-serve container image
-	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve CPU ONLY]"
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve]"
-	$(CMD_PREFIX) docker build --build-arg CPU_ONLY=true -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve-cpu:$(TAG) .
+	$(CMD_PREFIX) docker build --load -f Containerfile -t ghcr.io/docling-project/docling-serve:$(TAG) .
-	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) ghcr.io/ds4sd/docling-serve-cpu:main
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) ghcr.io/docling-project/docling-serve:$(BRANCH_TAG)
-	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) quay.io/ds4sd/docling-serve-cpu:main
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) quay.io/docling-project/docling-serve:$(BRANCH_TAG)
-.PHONY: docling-serve-gpu-image
+.PHONY: docling-serve-cpu-image
-docling-serve-gpu-image: Containerfile ## Build docling-serve continaer image with GPU support
+docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
-	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with GPU]"
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve CPU]"
-	$(CMD_PREFIX) docker build --build-arg CPU_ONLY=false -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve:$(TAG) .
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
-	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) ghcr.io/ds4sd/docling-serve:main
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) ghcr.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
-	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) quay.io/ds4sd/docling-serve:main
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) quay.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
 .PHONY: docling-serve-cu124-image
 docling-serve-cu124-image: Containerfile ## Build docling-serve container image with CUDA 12.4 support
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.4]"
 	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu124:$(TAG) .
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) ghcr.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) quay.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
 .PHONY: docling-serve-cu126-image
 docling-serve-cu126-image: Containerfile ## Build docling-serve container image with CUDA 12.6 support
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.6]"
 	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu126:$(TAG) .
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) ghcr.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) quay.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
 .PHONY: docling-serve-cu128-image
 docling-serve-cu128-image: Containerfile ## Build docling-serve container image with CUDA 12.8 support
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.8]"
 	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu128:$(TAG) .
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
 .PHONY: docling-serve-rocm-image
 docling-serve-rocm-image: Containerfile ## Build docling-serve container image with ROCm support
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with ROCm 6.3]"
 	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-rocm:$(TAG) .
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) ghcr.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) quay.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
 .PHONY: action-lint
 action-lint: .action-lint ##      Lint GitHub Action workflows
@@ -59,17 +88,50 @@ action-lint: .action-lint ##      Lint GitHub Action workflows
 md-lint: .md-lint ##      Lint markdown files
 .md-lint: $(wildcard */**/*.md) | md-lint-file
 	$(ECHO_PREFIX) printf "  %-12s ./...\n" "[MD LINT]"
-	$(CMD_PREFIX) docker run --rm -v $$(pwd):/workdir davidanson/markdownlint-cli2:v0.14.0 "**/*.md"
+	$(CMD_PREFIX) docker run --rm -v $$(pwd):/workdir davidanson/markdownlint-cli2:v0.16.0 "**/*.md" "#.venv"
 	$(CMD_PREFIX) touch $@
 .PHONY: py-Lint
 py-lint: ##      Lint Python files
 	$(ECHO_PREFIX) printf "  %-12s ./...\n" "[PY LINT]"
-	$(CMD_PREFIX) if ! which poetry $(PIPE_DEV_NULL) ; then \
+	$(CMD_PREFIX) if ! which uv $(PIPE_DEV_NULL) ; then \
-		echo "Please install poetry." ; \
+		echo "Please install uv." ; \
 		echo "pip install poetry" ; \
 		exit 1 ; \
 	fi
-	$(CMD_PREFIX) poetry install --all-extras
+	$(CMD_PREFIX) uv sync --extra ui
-	$(CMD_PREFIX) poetry run pre-commit run --all-files
+	$(CMD_PREFIX) uv run pre-commit run --all-files
 .PHONY: run-docling-cpu
 run-docling-cpu: ## Run the docling-serve container with CPU support and assign a container name
 	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
 	$(CMD_PREFIX) docker rm -f docling-serve-cpu 2>/dev/null || true
 	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with CPU support on port 5001...\n" "[RUN CPU]"
 	$(CMD_PREFIX) docker run -it --name docling-serve-cpu -p 5001:5001 ghcr.io/docling-project/docling-serve-cpu:main
 .PHONY: run-docling-cu124
 run-docling-cu124: ## Run the docling-serve container with GPU support and assign a container name
 	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
 	$(CMD_PREFIX) docker rm -f docling-serve-cu124 2>/dev/null || true
 	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.4]"
 	$(CMD_PREFIX) docker run -it --name docling-serve-cu124 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu124:main
 .PHONY: run-docling-cu126
 run-docling-cu126: ## Run the docling-serve container with GPU support and assign a container name
 	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
 	$(CMD_PREFIX) docker rm -f docling-serve-cu126 2>/dev/null || true
 	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.6]"
 	$(CMD_PREFIX) docker run -it --name docling-serve-cu126 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu126:main
 .PHONY: run-docling-cu128
 run-docling-cu128: ## Run the docling-serve container with GPU support and assign a container name
 	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
 	$(CMD_PREFIX) docker rm -f docling-serve-cu128 2>/dev/null || true
 	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.8]"
 	$(CMD_PREFIX) docker run -it --name docling-serve-cu128 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu128:main
 .PHONY: run-docling-rocm
 run-docling-rocm: ## Run the docling-serve container with GPU support and assign a container name
 	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
 	$(CMD_PREFIX) docker rm -f docling-serve-rocm 2>/dev/null || true
 	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN ROCm 6.3]"
 	$(CMD_PREFIX) docker run -it --name docling-serve-rocm -p 5001:5001 ghcr.io/docling-project/docling-serve-rocm:main
--- a/README.md
+++ b/README.md
@@ -1,56 +1,122 @@
 <p align="center">
  <a href="https://github.com/docling-project/docling-serve">
    <img loading="lazy" alt="Docling" src="https://github.com/docling-project/docling-serve/raw/main/docs/assets/docling-serve-pic.png" width="30%"/>
  </a>
 </p>
 # Docling Serve
- Running [Docling](https://github.com/DS4SD/docling) as an API service.
+Running [Docling](https://github.com/docling-project/docling) as an API service.
- > [!NOTE]
+📚 [Docling Serve documentation](./docs/README.md)
 > This is an unstable draft implementation which will quickly evolve.
-## Development
+- Learning how to [configure the webserver](./docs/configuration.md)
 - Get to know all [runtime options](./docs/usage.md) of the API
 - Explore useful [deployment examples](./docs/deployment.md)
 - And more
-Install the dependencies
+> [!NOTE]
 > **Migration to the `v1` API.** Docling Serve now has a stable v1 API. Read more on the [migration to v1](./docs/v1_migration.md).
-```sh
+## Getting started
 # Install poetry if not already available
 curl -sSL https://install.python-poetry.org | python3 -
-# Install dependencies
+Install the `docling-serve` package and run the server.
 poetry install
-# Run the server
+```bash
-poetry run uvicorn docling_serve.app:app --reload
+# Using the python package
 pip install "docling-serve[ui]"
 docling-serve run --enable-ui
 # Using container images, e.g. with Podman
 podman run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=1 quay.io/docling-project/docling-serve
 ```
-Example payload (http source):
+The server is available at
-```sh
+- API <http://127.0.0.1:5001>
 - API documentation <http://127.0.0.1:5001/docs>
 - UI playground <http://127.0.0.1:5001/ui>
 ![API documentation](img/fastapi-ui.png)
 Try it out with a simple conversion:
 ```bash
 curl -X 'POST' \
-  'http://127.0.0.1:8000/convert' \
+  'http://localhost:5001/v1/convert/source' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/json' \
  -d '{
-  "http_source": {
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
-    "url": "https://arxiv.org/pdf/2206.01062"
+  }'
  }
 }'
 ```
-### Cuda GPU Support
+### Container Images
-For GPU support try the following:
+The following container images are available for running **Docling Serve** with different hardware and PyTorch configurations:
-```sh
+#### 📦 Distributed Images
 # Create a virtual env
 python3 -m venv venv
-# Activate the venv
+| Image | Description | Architectures | Size |
-source venv/bin/active
+|-------|-------------|----------------|------|
 | [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Base image with all packages installed from the official PyPI index. | `linux/amd64`, `linux/arm64` | 4.4 GB (arm64) <br> 8.7 GB (amd64) |
 | [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | CPU-only variant, using `torch` from the PyTorch CPU index. | `linux/amd64`, `linux/arm64` | 4.4 GB |
 | [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | CUDA 12.6 build with `torch` from the cu126 index. | `linux/amd64` | 10.0 GB |
 | [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | CUDA 12.8 build with `torch` from the cu128 index. | `linux/amd64` | 11.4 GB |
-# Install torch with the special index
+#### 🚫 Not Distributed
 pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
-# Install the package
+An image for AMD ROCm 6.3 (`docling-serve-rocm`) is supported but **not published** due to its large size.
 pip install -e .
-# Run the server
+To build it locally:
-poetry run uvicorn docling_serve.app:app --reload
+
 ```bash
 git clone --branch main git@github.com:docling-project/docling-serve.git
 cd docling-serve/
 make docling-serve-rocm-image
 ```
 For deployment using Docker Compose, see [docs/deployment.md](docs/deployment.md).
 Coming soon: `docling-serve-slim` images will reduce the size by skipping the model weights download.
 ### Demonstration UI
 An easy to use UI is available at the `/ui` endpoint.
 ![Input controllers in the UI](img/ui-input.png)
 ![Output visualization in the UI](img/ui-output.png)
 ## Get help and support
 Please feel free to connect with us using the [discussion section](https://github.com/docling-project/docling/discussions).
 ## Contributing
 Please read [Contributing to Docling Serve](https://github.com/docling-project/docling-serve/blob/main/CONTRIBUTING.md) for details.
 ## References
 If you use Docling in your projects, please consider citing the following:
 ```bib
@techreport{Docling,
  author = {Docling Contributors},
  month = {1},
  title = {Docling: An Efficient Open-Source Toolkit for AI-driven Document Conversion},
  url = {https://arxiv.org/abs/2501.17887},
  eprint = {2501.17887},
  doi = {10.48550/arXiv.2501.17887},
  version = {2.0.0},
  year = {2025}
 }
 ```
 ## License
 The Docling Serve codebase is under MIT license.
 ## IBM ❤️ Open Source AI
 Docling has been brought to you by IBM.
--- a/docling_serve/main.py
+++ b/docling_serve/main.py
@@ -0,0 +1,370 @@
 import importlib.metadata
 import logging
 import platform
 import sys
 import warnings
 from pathlib import Path
 from typing import Annotated, Any, Optional, Union
 import typer
 import uvicorn
 from rich.console import Console
 from docling_serve.settings import docling_serve_settings, uvicorn_settings
 warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
 warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
 err_console = Console(stderr=True)
 console = Console()
 app = typer.Typer(
    no_args_is_help=True,
    rich_markup_mode="rich",
 )
 logger = logging.getLogger(__name__)
 def version_callback(value: bool) -> None:
    if value:
        docling_serve_version = importlib.metadata.version("docling_serve")
        docling_jobkit_version = importlib.metadata.version("docling-jobkit")
        docling_version = importlib.metadata.version("docling")
        docling_core_version = importlib.metadata.version("docling-core")
        docling_ibm_models_version = importlib.metadata.version("docling-ibm-models")
        docling_parse_version = importlib.metadata.version("docling-parse")
        platform_str = platform.platform()
        py_impl_version = sys.implementation.cache_tag
        py_lang_version = platform.python_version()
        console.print(f"Docling Serve version: {docling_serve_version}")
        console.print(f"Docling Jobkit version: {docling_jobkit_version}")
        console.print(f"Docling version: {docling_version}")
        console.print(f"Docling Core version: {docling_core_version}")
        console.print(f"Docling IBM Models version: {docling_ibm_models_version}")
        console.print(f"Docling Parse version: {docling_parse_version}")
        console.print(f"Python: {py_impl_version} ({py_lang_version})")
        console.print(f"Platform: {platform_str}")
        raise typer.Exit()
@app.callback()
 def callback(
    version: Annotated[
        Union[bool, None],
        typer.Option(help="Show the version and exit.", callback=version_callback),
    ] = None,
    verbose: Annotated[
        int,
        typer.Option(
            "--verbose",
            "-v",
            count=True,
            help="Set the verbosity level. -v for info logging, -vv for debug logging.",
        ),
    ] = 0,
 ) -> None:
    if verbose == 0:
        logging.basicConfig(level=logging.WARNING)
    elif verbose == 1:
        logging.basicConfig(level=logging.INFO)
    elif verbose == 2:
        logging.basicConfig(level=logging.DEBUG)
 def _run(
    *,
    command: str,
    # Docling serve parameters
    artifacts_path: Path | None,
    enable_ui: bool,
 ) -> None:
    server_type = "development" if command == "dev" else "production"
    console.print(f"Starting {server_type} server 🚀")
    run_subprocess = (
        uvicorn_settings.workers is not None and uvicorn_settings.workers > 1
    ) or uvicorn_settings.reload
    run_ssl = (
        uvicorn_settings.ssl_certfile is not None
        and uvicorn_settings.ssl_keyfile is not None
    )
    if run_subprocess and docling_serve_settings.artifacts_path != artifacts_path:
        err_console.print(
            "\n[yellow]:warning: The server will run with reload or multiple workers. \n"
            "The argument [bold]--artifacts-path[/bold] will be ignored, please set the value \n"
            "using the environment variable [bold]DOCLING_SERVE_ARTIFACTS_PATH[/bold].[/yellow]"
        )
    if run_subprocess and docling_serve_settings.enable_ui != enable_ui:
        err_console.print(
            "\n[yellow]:warning: The server will run with reload or multiple workers. \n"
            "The argument [bold]--enable-ui[/bold] will be ignored, please set the value \n"
            "using the environment variable [bold]DOCLING_SERVE_ENABLE_UI[/bold].[/yellow]"
        )
    # Propagate the settings to the app settings
    docling_serve_settings.artifacts_path = artifacts_path
    docling_serve_settings.enable_ui = enable_ui
    # Print documentation
    protocol = "https" if run_ssl else "http"
    url = f"{protocol}://{uvicorn_settings.host}:{uvicorn_settings.port}"
    url_docs = f"{url}/docs"
    url_scalar = f"{url}/scalar"
    url_ui = f"{url}/ui"
    console.print("")
    console.print(f"Server started at [link={url}]{url}[/]")
    console.print(f"Documentation at [link={url_docs}]{url_docs}[/]")
    console.print(f"Scalar docs at [link={url_docs}]{url_scalar}[/]")
    if docling_serve_settings.enable_ui:
        console.print(f"UI at [link={url_ui}]{url_ui}[/]")
    if command == "dev":
        console.print("")
        console.print(
            "Running in development mode, for production use: "
            "[bold]docling-serve run[/]",
        )
    console.print("")
    console.print("Logs:")
    # Launch the server
    uvicorn.run(
        app="docling_serve.app:create_app",
        factory=True,
        host=uvicorn_settings.host,
        port=uvicorn_settings.port,
        reload=uvicorn_settings.reload,
        workers=uvicorn_settings.workers,
        root_path=uvicorn_settings.root_path,
        proxy_headers=uvicorn_settings.proxy_headers,
        timeout_keep_alive=uvicorn_settings.timeout_keep_alive,
        ssl_certfile=uvicorn_settings.ssl_certfile,
        ssl_keyfile=uvicorn_settings.ssl_keyfile,
        ssl_keyfile_password=uvicorn_settings.ssl_keyfile_password,
    )
@app.command()
 def dev(
    *,
    # uvicorn options
    host: Annotated[
        str,
        typer.Option(
            help=(
                "The host to serve on. For local development in localhost "
                "use [blue]127.0.0.1[/blue]. To enable public access, "
                "e.g. in a container, use all the IP addresses "
                "available with [blue]0.0.0.0[/blue]."
            )
        ),
    ] = "127.0.0.1",
    port: Annotated[
        int,
        typer.Option(help="The port to serve on."),
    ] = uvicorn_settings.port,
    reload: Annotated[
        bool,
        typer.Option(
            help=(
                "Enable auto-reload of the server when (code) files change. "
                "This is [bold]resource intensive[/bold], "
                "use it only during development."
            )
        ),
    ] = True,
    root_path: Annotated[
        str,
        typer.Option(
            help=(
                "The root path is used to tell your app that it is being served "
                "to the outside world with some [bold]path prefix[/bold] "
                "set up in some termination proxy or similar."
            )
        ),
    ] = uvicorn_settings.root_path,
    proxy_headers: Annotated[
        bool,
        typer.Option(
            help=(
                "Enable/Disable X-Forwarded-Proto, X-Forwarded-For, "
                "X-Forwarded-Port to populate remote address info."
            )
        ),
    ] = uvicorn_settings.proxy_headers,
    timeout_keep_alive: Annotated[
        int, typer.Option(help="Timeout for the server response.")
    ] = uvicorn_settings.timeout_keep_alive,
    ssl_certfile: Annotated[
        Optional[Path], typer.Option(help="SSL certificate file")
    ] = uvicorn_settings.ssl_certfile,
    ssl_keyfile: Annotated[
        Optional[Path], typer.Option(help="SSL key file")
    ] = uvicorn_settings.ssl_keyfile,
    ssl_keyfile_password: Annotated[
        Optional[str], typer.Option(help="SSL keyfile password")
    ] = uvicorn_settings.ssl_keyfile_password,
    # docling options
    artifacts_path: Annotated[
        Optional[Path],
        typer.Option(
            help=(
                "If set to a valid directory, "
                "the model weights will be loaded from this path."
            )
        ),
    ] = docling_serve_settings.artifacts_path,
    enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
 ) -> Any:
    """
    Run a [bold]Docling Serve[/bold] app in [yellow]development[/yellow] mode. 🧪
    This is equivalent to [bold]docling-serve run[/bold] but with [bold]reload[/bold]
    enabled and listening on the [blue]127.0.0.1[/blue] address.
    Options can be set also with the corresponding ENV variable, with the exception
    of --enable-ui, --host and --reload.
    """
    uvicorn_settings.host = host
    uvicorn_settings.port = port
    uvicorn_settings.reload = reload
    uvicorn_settings.root_path = root_path
    uvicorn_settings.proxy_headers = proxy_headers
    uvicorn_settings.timeout_keep_alive = timeout_keep_alive
    uvicorn_settings.ssl_certfile = ssl_certfile
    uvicorn_settings.ssl_keyfile = ssl_keyfile
    uvicorn_settings.ssl_keyfile_password = ssl_keyfile_password
    _run(
        command="dev",
        artifacts_path=artifacts_path,
        enable_ui=enable_ui,
    )
@app.command()
 def run(
    *,
    host: Annotated[
        str,
        typer.Option(
            help=(
                "The host to serve on. For local development in localhost "
                "use [blue]127.0.0.1[/blue]. To enable public access, "
                "e.g. in a container, use all the IP addresses "
                "available with [blue]0.0.0.0[/blue]."
            )
        ),
    ] = uvicorn_settings.host,
    port: Annotated[
        int,
        typer.Option(help="The port to serve on."),
    ] = uvicorn_settings.port,
    reload: Annotated[
        bool,
        typer.Option(
            help=(
                "Enable auto-reload of the server when (code) files change. "
                "This is [bold]resource intensive[/bold], "
                "use it only during development."
            )
        ),
    ] = uvicorn_settings.reload,
    workers: Annotated[
        Union[int, None],
        typer.Option(
            help=(
                "Use multiple worker processes. "
                "Mutually exclusive with the --reload flag."
            )
        ),
    ] = uvicorn_settings.workers,
    root_path: Annotated[
        str,
        typer.Option(
            help=(
                "The root path is used to tell your app that it is being served "
                "to the outside world with some [bold]path prefix[/bold] "
                "set up in some termination proxy or similar."
            )
        ),
    ] = uvicorn_settings.root_path,
    proxy_headers: Annotated[
        bool,
        typer.Option(
            help=(
                "Enable/Disable X-Forwarded-Proto, X-Forwarded-For, "
                "X-Forwarded-Port to populate remote address info."
            )
        ),
    ] = uvicorn_settings.proxy_headers,
    timeout_keep_alive: Annotated[
        int, typer.Option(help="Timeout for the server response.")
    ] = uvicorn_settings.timeout_keep_alive,
    ssl_certfile: Annotated[
        Optional[Path], typer.Option(help="SSL certificate file")
    ] = uvicorn_settings.ssl_certfile,
    ssl_keyfile: Annotated[
        Optional[Path], typer.Option(help="SSL key file")
    ] = uvicorn_settings.ssl_keyfile,
    ssl_keyfile_password: Annotated[
        Optional[str], typer.Option(help="SSL keyfile password")
    ] = uvicorn_settings.ssl_keyfile_password,
    # docling options
    artifacts_path: Annotated[
        Optional[Path],
        typer.Option(
            help=(
                "If set to a valid directory, "
                "the model weights will be loaded from this path."
            )
        ),
    ] = docling_serve_settings.artifacts_path,
    enable_ui: Annotated[
        bool, typer.Option(help="Enable the development UI.")
    ] = docling_serve_settings.enable_ui,
 ) -> Any:
    """
    Run a [bold]Docling Serve[/bold] app in [green]production[/green] mode. 🚀
    This is equivalent to [bold]docling-serve dev[/bold] but with [bold]reload[/bold]
    disabled and listening on the [blue]0.0.0.0[/blue] address.
    Options can be set also with the corresponding ENV variable, e.g. UVICORN_PORT
    or DOCLING_SERVE_ENABLE_UI.
    """
    uvicorn_settings.host = host
    uvicorn_settings.port = port
    uvicorn_settings.reload = reload
    uvicorn_settings.workers = workers
    uvicorn_settings.root_path = root_path
    uvicorn_settings.proxy_headers = proxy_headers
    uvicorn_settings.timeout_keep_alive = timeout_keep_alive
    uvicorn_settings.ssl_certfile = ssl_certfile
    uvicorn_settings.ssl_keyfile = ssl_keyfile
    uvicorn_settings.ssl_keyfile_password = ssl_keyfile_password
    _run(
        command="run",
        artifacts_path=artifacts_path,
        enable_ui=enable_ui,
    )
 def main() -> None:
    app()
 # Launch the CLI when calling python -m docling_serve
 if __name__ == "__main__":
    main()
--- a/docling_serve/app.py
+++ b/docling_serve/app.py
@@ -1,279 +1,673 @@
-import base64
+import asyncio
-import hashlib
+import copy
 import importlib.metadata
 import logging
 import shutil
 import time
 from contextlib import asynccontextmanager
 from enum import Enum
 from io import BytesIO
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Annotated
-import httpx
+from fastapi import (
-from docling.datamodel.base_models import (
+    BackgroundTasks,
-    ConversionStatus,
+    Depends,
-    DocumentStream,
+    FastAPI,
-    ErrorItem,
+    Form,
-    InputFormat,
+    HTTPException,
    Query,
    UploadFile,
    WebSocket,
    WebSocketDisconnect,
 )
-from docling.datamodel.document import ConversionResult
+from fastapi.middleware.cors import CORSMiddleware
-from docling.datamodel.pipeline_options import (
+from fastapi.openapi.docs import (
-    EasyOcrOptions,
+    get_redoc_html,
-    OcrOptions,
+    get_swagger_ui_html,
-    PdfPipelineOptions,
+    get_swagger_ui_oauth2_redirect_html,
    RapidOcrOptions,
    TesseractOcrOptions,
 )
-from docling.document_converter import DocumentConverter, PdfFormatOption
+from fastapi.responses import JSONResponse, RedirectResponse
-from docling.utils.profiling import ProfilingItem
+from fastapi.staticfiles import StaticFiles
-from docling_core.types.doc import DoclingDocument, ImageRefMode
+from scalar_fastapi import get_scalar_api_reference
-from docling_core.utils.file import resolve_remote_filename
+
-from fastapi import FastAPI, HTTPException, Response
+from docling.datamodel.base_models import DocumentStream
-from pydantic import AnyHttpUrl, BaseModel
+from docling_jobkit.datamodel.callback import (
    ProgressCallbackRequest,
    ProgressCallbackResponse,
 )
 from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
 from docling_jobkit.datamodel.s3_coords import S3Coordinates
 from docling_jobkit.datamodel.task import Task, TaskSource
 from docling_jobkit.datamodel.task_targets import (
    InBodyTarget,
    TaskTarget,
    ZipTarget,
 )
 from docling_jobkit.orchestrators.base_orchestrator import (
    BaseOrchestrator,
    ProgressInvalid,
    TaskNotFoundError,
 )
 from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
 from docling_serve.datamodel.requests import (
    ConvertDocumentsRequest,
    FileSourceRequest,
    HttpSourceRequest,
    S3SourceRequest,
    TargetName,
 )
 from docling_serve.datamodel.responses import (
    ClearResponse,
    ConvertDocumentResponse,
    HealthCheckResponse,
    MessageKind,
    PresignedUrlConvertDocumentResponse,
    TaskStatusResponse,
    WebsocketMessage,
 )
 from docling_serve.helper_functions import FormDepends
 from docling_serve.orchestrator_factory import get_async_orchestrator
 from docling_serve.response_preparation import prepare_response
 from docling_serve.settings import docling_serve_settings
 from docling_serve.storage import get_scratch
 from docling_serve.websocket_notifier import WebsocketNotifier
-# TODO: import enum from Docling, once it is exposed
+# Set up custom logging as we'll be intermixes with FastAPI/Uvicorn's logging
-class OcrEngine(str, Enum):
+class ColoredLogFormatter(logging.Formatter):
-    EASYOCR = "easyocr"
+    COLOR_CODES = {
-    TESSERACT = "tesseract"
+        logging.DEBUG: "\033[94m",  # Blue
-    RAPIDOCR = "rapidocr"
+        logging.INFO: "\033[92m",  # Green
        logging.WARNING: "\033[93m",  # Yellow
        logging.ERROR: "\033[91m",  # Red
        logging.CRITICAL: "\033[95m",  # Magenta
    }
    RESET_CODE = "\033[0m"
    def format(self, record):
        color = self.COLOR_CODES.get(record.levelno, "")
        record.levelname = f"{color}{record.levelname}{self.RESET_CODE}"
        return super().format(record)
-class ConvertOptions(BaseModel):
+logging.basicConfig(
-    output_docling_document: bool = True
+    level=logging.INFO,  # Set the logging level
-    output_markdown: bool = False
+    format="%(levelname)s:\t%(asctime)s - %(name)s - %(message)s",
-    output_html: bool = False
+    datefmt="%H:%M:%S",
-    do_ocr: bool = True
+)
-    ocr_engine: OcrEngine = OcrEngine.EASYOCR
+
-    ocr_lang: Optional[List[str]] = None
+# Override the formatter with the custom ColoredLogFormatter
-    force_ocr: bool = False
+root_logger = logging.getLogger()  # Get the root logger
-    do_table_structure: bool = True
+for handler in root_logger.handlers:  # Iterate through existing handlers
-    include_images: bool = True
+    if handler.formatter:
-    images_scale: float = 2.0
+        handler.setFormatter(ColoredLogFormatter(handler.formatter._fmt))
-
+
-
+_log = logging.getLogger(__name__)
 class DocumentConvertBase(BaseModel):
    options: ConvertOptions = ConvertOptions()
 class HttpSource(BaseModel):
    url: str
    headers: Dict[str, Any] = {}
 class FileSource(BaseModel):
    base64_string: str
    filename: str
 class ConvertDocumentHttpSourceRequest(DocumentConvertBase):
    http_source: HttpSource
 class ConvertDocumentFileSourceRequest(DocumentConvertBase):
    file_source: FileSource
 class DocumentResponse(BaseModel):
    markdown: Optional[str] = None
    docling_document: Optional[DoclingDocument] = None
    html: Optional[str] = None
 class ConvertDocumentResponse(BaseModel):
    document: DocumentResponse
    status: ConversionStatus
    errors: List[ErrorItem] = []
    timings: Dict[str, ProfilingItem] = {}
 class ConvertDocumentErrorResponse(BaseModel):
    status: ConversionStatus
    # errors: List[ErrorItem] = []
 ConvertDocumentRequest = Union[
    ConvertDocumentFileSourceRequest, ConvertDocumentHttpSourceRequest
 ]
 class MarkdownTextResponse(Response):
    media_type = "text/markdown"
 class HealthCheckResponse(BaseModel):
    status: str = "ok"
 def get_pdf_pipeline_opts(options: ConvertOptions) -> Tuple[PdfPipelineOptions, str]:
    if options.ocr_engine == OcrEngine.EASYOCR:
        try:
            import easyocr  # noqa: F401
        except ImportError:
            raise HTTPException(
                status_code=400,
                detail="The requested OCR engine"
                f" (ocr_engine={options.ocr_engine.value})"
                " is not available on this system. Please choose another OCR engine "
                "or contact your system administrator.",
            )
        ocr_options: OcrOptions = EasyOcrOptions(force_full_page_ocr=options.force_ocr)
    elif options.ocr_engine == OcrEngine.TESSERACT:
        try:
            import tesserocr  # noqa: F401
        except ImportError:
            raise HTTPException(
                status_code=400,
                detail="The requested OCR engine"
                f" (ocr_engine={options.ocr_engine.value})"
                " is not available on this system. Please choose another OCR engine "
                "or contact your system administrator.",
            )
        ocr_options = TesseractOcrOptions(force_full_page_ocr=options.force_ocr)
    elif options.ocr_engine == OcrEngine.RAPIDOCR:
        try:
            from rapidocr_onnxruntime import RapidOCR  # noqa: F401
        except ImportError:
            raise HTTPException(
                status_code=400,
                detail="The requested OCR engine"
                f" (ocr_engine={options.ocr_engine.value})"
                " is not available on this system. Please choose another OCR engine "
                "or contact your system administrator.",
            )
        ocr_options = RapidOcrOptions(force_full_page_ocr=options.force_ocr)
    else:
        raise RuntimeError(f"Unexpected OCR engine type {options.ocr_engine}")
    if options.ocr_lang is not None:
        ocr_options.lang = options.ocr_lang
    pipeline_options = PdfPipelineOptions(
        do_ocr=options.do_ocr,
        ocr_options=ocr_options,
        do_table_structure=options.do_table_structure,
        generate_page_images=options.include_images,
        generate_picture_images=options.include_images,
        images_scale=options.images_scale,
    )
    options_hash = hashlib.sha1(pipeline_options.model_dump_json().encode()).hexdigest()
    return pipeline_options, options_hash
 converters: Dict[str, DocumentConverter] = {}
 # Context manager to initialize and clean up the lifespan of the FastAPI app
@asynccontextmanager
 async def lifespan(app: FastAPI):
-    # settings = Settings()
+    scratch_dir = get_scratch()
-    # Converter with default options
+    orchestrator = get_async_orchestrator()
-    pipeline_options, options_hash = get_pdf_pipeline_opts(ConvertOptions())
+    notifier = WebsocketNotifier(orchestrator)
-    converters[options_hash] = DocumentConverter(
+    orchestrator.bind_notifier(notifier)
        format_options={
            InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options),
            InputFormat.IMAGE: PdfFormatOption(pipeline_options=pipeline_options),
        }
    )
-    converters[options_hash].initialize_pipeline(InputFormat.PDF)
+    # Warm up processing cache
    if docling_serve_settings.load_models_at_boot:
        await orchestrator.warm_up_caches()
    # Start the background queue processor
    queue_task = asyncio.create_task(orchestrator.process_queue())
    yield
-    converters.clear()
+    # Cancel the background queue processor on shutdown
    queue_task.cancel()
    try:
        await queue_task
    except asyncio.CancelledError:
        _log.info("Queue processor cancelled.")
    # Remove scratch directory in case it was a tempfile
    if docling_serve_settings.scratch_path is not None:
        shutil.rmtree(scratch_dir, ignore_errors=True)
-app = FastAPI(
+##################################
-    title="Docling Serve",
+# App creation and configuration #
-    lifespan=lifespan,
+##################################
 )
-@app.get("/health")
+def create_app():  # noqa: C901
-def health() -> HealthCheckResponse:
+    try:
-    return HealthCheckResponse()
+        version = importlib.metadata.version("docling_serve")
    except importlib.metadata.PackageNotFoundError:
        _log.warning("Unable to get docling_serve version, falling back to 0.0.0")
        version = "0.0.0"
-def _convert_document(
+    offline_docs_assets = False
-    body: ConvertDocumentRequest,
+    if (
-) -> ConversionResult:
+        docling_serve_settings.static_path is not None
        and (docling_serve_settings.static_path).is_dir()
    ):
        offline_docs_assets = True
        _log.info("Found static assets.")
-    filename: str
+    app = FastAPI(
-    buf: BytesIO
+        title="Docling Serve",
        docs_url=None if offline_docs_assets else "/swagger",
        redoc_url=None if offline_docs_assets else "/docs",
        lifespan=lifespan,
        version=version,
    )
-    if isinstance(body, ConvertDocumentFileSourceRequest):
+    origins = docling_serve_settings.cors_origins
-        buf = BytesIO(base64.b64decode(body.file_source.base64_string))
+    methods = docling_serve_settings.cors_methods
-        filename = body.file_source.filename
+    headers = docling_serve_settings.cors_headers
-    elif isinstance(body, ConvertDocumentHttpSourceRequest):
+
-        http_res = httpx.get(body.http_source.url, headers=body.http_source.headers)
+    app.add_middleware(
-        buf = BytesIO(http_res.content)
+        CORSMiddleware,
-        filename = resolve_remote_filename(
+        allow_origins=origins,
-            http_url=AnyHttpUrl(body.http_source.url),
+        allow_credentials=True,
-            response_headers=dict(**http_res.headers),
+        allow_methods=methods,
        allow_headers=headers,
    )
    # Mount the Gradio app
    if docling_serve_settings.enable_ui:
        try:
            import gradio as gr
            from docling_serve.gradio_ui import ui as gradio_ui
            tmp_output_dir = get_scratch() / "gradio"
            tmp_output_dir.mkdir(exist_ok=True, parents=True)
            gradio_ui.gradio_output_dir = tmp_output_dir
            app = gr.mount_gradio_app(
                app,
                gradio_ui,
                path="/ui",
                allowed_paths=["./logo.png", tmp_output_dir],
                root_path="/ui",
            )
        except ImportError:
            _log.warning(
                "Docling Serve enable_ui is activated, but gradio is not installed. "
                "Install it with `pip install docling-serve[ui]` "
                "or `pip install gradio`"
            )
    #############################
    # Offline assets definition #
    #############################
    if offline_docs_assets:
        app.mount(
            "/static",
            StaticFiles(directory=docling_serve_settings.static_path),
            name="static",
        )
-    doc_input = DocumentStream(name=filename, stream=buf)
+        @app.get("/swagger", include_in_schema=False)
        async def custom_swagger_ui_html():
            return get_swagger_ui_html(
                openapi_url=app.openapi_url,
                title=app.title + " - Swagger UI",
                oauth2_redirect_url=app.swagger_ui_oauth2_redirect_url,
                swagger_js_url="/static/swagger-ui-bundle.js",
                swagger_css_url="/static/swagger-ui.css",
            )
-    pipeline_options, options_hash = get_pdf_pipeline_opts(body.options)
+        @app.get(app.swagger_ui_oauth2_redirect_url, include_in_schema=False)
-    if options_hash not in converters:
+        async def swagger_ui_redirect():
-        converters[options_hash] = DocumentConverter(
+            return get_swagger_ui_oauth2_redirect_html()
-            format_options={
+
-                InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options),
+        @app.get("/docs", include_in_schema=False)
-                InputFormat.IMAGE: PdfFormatOption(pipeline_options=pipeline_options),
+        async def redoc_html():
            return get_redoc_html(
                openapi_url=app.openapi_url,
                title=app.title + " - ReDoc",
                redoc_js_url="/static/redoc.standalone.js",
            )
    @app.get("/scalar", include_in_schema=False)
    async def scalar_html():
        return get_scalar_api_reference(
            openapi_url=app.openapi_url,
            title=app.title,
            scalar_favicon_url="https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg",
            # hide_client_button=True,  # not yet released but in main
        )
    ########################
    # Async / Sync helpers #
    ########################
    async def _enque_source(
        orchestrator: BaseOrchestrator, conversion_request: ConvertDocumentsRequest
    ) -> Task:
        sources: list[TaskSource] = []
        for s in conversion_request.sources:
            if isinstance(s, FileSourceRequest):
                sources.append(FileSource.model_validate(s))
            elif isinstance(s, HttpSourceRequest):
                sources.append(HttpSource.model_validate(s))
            elif isinstance(s, S3SourceRequest):
                sources.append(S3Coordinates.model_validate(s))
        task = await orchestrator.enqueue(
            sources=sources,
            options=conversion_request.options,
            target=conversion_request.target,
        )
        return task
    async def _enque_file(
        orchestrator: BaseOrchestrator,
        files: list[UploadFile],
        options: ConvertDocumentsRequestOptions,
        target: TaskTarget,
    ) -> Task:
        _log.info(f"Received {len(files)} files for processing.")
        # Load the uploaded files to Docling DocumentStream
        file_sources: list[TaskSource] = []
        for i, file in enumerate(files):
            buf = BytesIO(file.file.read())
            suffix = "" if len(file_sources) == 1 else f"_{i}"
            name = file.filename if file.filename else f"file{suffix}.pdf"
            file_sources.append(DocumentStream(name=name, stream=buf))
        task = await orchestrator.enqueue(
            sources=file_sources, options=options, target=target
        )
        return task
    async def _wait_task_complete(orchestrator: BaseOrchestrator, task_id: str) -> bool:
        start_time = time.monotonic()
        while True:
            task = await orchestrator.task_status(task_id=task_id)
            if task.is_completed():
                return True
            await asyncio.sleep(5)
            elapsed_time = time.monotonic() - start_time
            if elapsed_time > docling_serve_settings.max_sync_wait:
                return False
    ##########################################
    # Downgrade openapi 3.1 to 3.0.x helpers #
    ##########################################
    def ensure_array_items(schema):
        """Ensure that array items are defined."""
        if "type" in schema and schema["type"] == "array":
            if "items" not in schema or schema["items"] is None:
                schema["items"] = {"type": "string"}
            elif isinstance(schema["items"], dict):
                if "type" not in schema["items"]:
                    schema["items"]["type"] = "string"
    def handle_discriminators(schema):
        """Ensure that discriminator properties are included in required."""
        if "discriminator" in schema and "propertyName" in schema["discriminator"]:
            prop = schema["discriminator"]["propertyName"]
            if "properties" in schema and prop in schema["properties"]:
                if "required" not in schema:
                    schema["required"] = []
                if prop not in schema["required"]:
                    schema["required"].append(prop)
    def handle_properties(schema):
        """Ensure that property 'kind' is included in required."""
        if "properties" in schema and "kind" in schema["properties"]:
            if "required" not in schema:
                schema["required"] = []
            if "kind" not in schema["required"]:
                schema["required"].append("kind")
    # Downgrade openapi 3.1 to 3.0.x
    def downgrade_openapi31_to_30(spec):
        def strip_unsupported(obj):
            if isinstance(obj, dict):
                obj = {
                    k: strip_unsupported(v)
                    for k, v in obj.items()
                    if k not in ("const", "examples", "prefixItems")
                }
                handle_discriminators(obj)
                ensure_array_items(obj)
                # Check for oneOf and anyOf to handle nested schemas
                for key in ["oneOf", "anyOf"]:
                    if key in obj:
                        for sub in obj[key]:
                            handle_discriminators(sub)
                            ensure_array_items(sub)
                return obj
            elif isinstance(obj, list):
                return [strip_unsupported(i) for i in obj]
            return obj
        if "components" in spec and "schemas" in spec["components"]:
            for schema_name, schema in spec["components"]["schemas"].items():
                handle_properties(schema)
        return strip_unsupported(copy.deepcopy(spec))
    #############################
    # API Endpoints definitions #
    #############################
    @app.get("/openapi-3.0.json")
    def openapi_30():
        spec = app.openapi()
        downgraded = downgrade_openapi31_to_30(spec)
        downgraded["openapi"] = "3.0.3"
        return JSONResponse(downgraded)
    # Favicon
    @app.get("/favicon.ico", include_in_schema=False)
    async def favicon():
        logo_url = "https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg"
        if offline_docs_assets:
            logo_url = "/static/logo.svg"
        response = RedirectResponse(url=logo_url)
        return response
    @app.get("/health")
    def health() -> HealthCheckResponse:
        return HealthCheckResponse()
    # API readiness compatibility for OpenShift AI Workbench
    @app.get("/api", include_in_schema=False)
    def api_check() -> HealthCheckResponse:
        return HealthCheckResponse()
    # Convert a document from URL(s)
    @app.post(
        "/v1/convert/source",
        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
                # "description": "Return the JSON item or an image.",
            }
        },
    )
    async def process_url(
        background_tasks: BackgroundTasks,
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        conversion_request: ConvertDocumentsRequest,
    ):
        task = await _enque_source(
            orchestrator=orchestrator, conversion_request=conversion_request
        )
        completed = await _wait_task_complete(
            orchestrator=orchestrator, task_id=task.task_id
        )
-    result: ConversionResult = converters[options_hash].convert(doc_input)
+        if not completed:
            # TODO: abort task!
            return HTTPException(
                status_code=504,
                detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
            )
-    if result is None or result.status == ConversionStatus.SKIPPED:
+        task = await orchestrator.get_raw_task(task_id=task.task_id)
-        raise HTTPException(status_code=400, detail=result.errors)
+        response = await prepare_response(
            task=task, orchestrator=orchestrator, background_tasks=background_tasks
        )
        return response
-    if result is None or result.status not in {
+    # Convert a document from file(s)
-        ConversionStatus.SUCCESS,
+    @app.post(
-    }:
+        "/v1/convert/file",
-        raise HTTPException(
+        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
-            status_code=500, detail={"errors": result.errors, "status": result.status}
+        responses={
            200: {
                "content": {"application/zip": {}},
            }
        },
    )
    async def process_file(
        background_tasks: BackgroundTasks,
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        files: list[UploadFile],
        options: Annotated[
            ConvertDocumentsRequestOptions, FormDepends(ConvertDocumentsRequestOptions)
        ],
        target_type: Annotated[TargetName, Form()] = TargetName.INBODY,
    ):
        target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
        task = await _enque_file(
            orchestrator=orchestrator, files=files, options=options, target=target
        )
        completed = await _wait_task_complete(
            orchestrator=orchestrator, task_id=task.task_id
        )
-    return result
+        if not completed:
            # TODO: abort task!
            return HTTPException(
                status_code=504,
                detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
            )
        task = await orchestrator.get_raw_task(task_id=task.task_id)
        response = await prepare_response(
            task=task, orchestrator=orchestrator, background_tasks=background_tasks
        )
        return response
-@app.post(
+    # Convert a document from URL(s) using the async api
-    "/convert",
+    @app.post(
-)
+        "/v1/convert/source/async",
-def convert_document(
+        response_model=TaskStatusResponse,
    body: ConvertDocumentRequest,
 ) -> ConvertDocumentResponse:
    result = _convert_document(body=body)
    image_mode = (
        ImageRefMode.EMBEDDED
        if body.options.include_images
        else ImageRefMode.PLACEHOLDER
    )
-    doc_resp = DocumentResponse()
+    async def process_url_async(
-    if body.options.output_docling_document:
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
-        doc_resp.docling_document = result.document
+        conversion_request: ConvertDocumentsRequest,
-    if body.options.output_markdown:
+    ):
-        doc_resp.markdown = result.document.export_to_markdown(image_mode=image_mode)
+        task = await _enque_source(
-    if body.options.output_html:
+            orchestrator=orchestrator, conversion_request=conversion_request
-        doc_resp.html = result.document.export_to_html(image_mode=image_mode)
+        )
        task_queue_position = await orchestrator.get_queue_position(
            task_id=task.task_id
        )
        return TaskStatusResponse(
            task_id=task.task_id,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
        )
-    return ConvertDocumentResponse(
+    # Convert a document from file(s) using the async api
-        document=doc_resp, status=result.status, timings=result.timings
+    @app.post(
        "/v1/convert/file/async",
        response_model=TaskStatusResponse,
    )
    async def process_file_async(
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        background_tasks: BackgroundTasks,
        files: list[UploadFile],
        options: Annotated[
            ConvertDocumentsRequestOptions, FormDepends(ConvertDocumentsRequestOptions)
        ],
        target_type: Annotated[TargetName, Form()] = TargetName.INBODY,
    ):
        target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
        task = await _enque_file(
            orchestrator=orchestrator, files=files, options=options, target=target
        )
        task_queue_position = await orchestrator.get_queue_position(
            task_id=task.task_id
        )
        return TaskStatusResponse(
            task_id=task.task_id,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
        )
    # Task status poll
    @app.get(
        "/v1/status/poll/{task_id}",
        response_model=TaskStatusResponse,
    )
    async def task_status_poll(
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        task_id: str,
        wait: Annotated[
            float,
            Query(description="Number of seconds to wait for a completed status."),
        ] = 0.0,
    ):
        try:
            task = await orchestrator.task_status(task_id=task_id, wait=wait)
            task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
        except TaskNotFoundError:
            raise HTTPException(status_code=404, detail="Task not found.")
        return TaskStatusResponse(
            task_id=task.task_id,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
        )
-@app.post("/convert/markdown", response_class=MarkdownTextResponse)
+    # Task status websocket
-def convert_document_md(
+    @app.websocket(
-    body: ConvertDocumentRequest,
+        "/v1/status/ws/{task_id}",
 ) -> MarkdownTextResponse:
    result = _convert_document(body=body)
    image_mode = (
        ImageRefMode.EMBEDDED
        if body.options.include_images
        else ImageRefMode.PLACEHOLDER
    )
-    return MarkdownTextResponse(
+    async def task_status_ws(
-        result.document.export_to_markdown(image_mode=image_mode)
+        websocket: WebSocket,
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        task_id: str,
    ):
        assert isinstance(orchestrator.notifier, WebsocketNotifier)
        await websocket.accept()
        if task_id not in orchestrator.tasks:
            await websocket.send_text(
                WebsocketMessage(
                    message=MessageKind.ERROR, error="Task not found."
                ).model_dump_json()
            )
            await websocket.close()
            return
        task = orchestrator.tasks[task_id]
        # Track active WebSocket connections for this job
        orchestrator.notifier.task_subscribers[task_id].add(websocket)
        try:
            task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
            task_response = TaskStatusResponse(
                task_id=task.task_id,
                task_status=task.task_status,
                task_position=task_queue_position,
                task_meta=task.processing_meta,
            )
            await websocket.send_text(
                WebsocketMessage(
                    message=MessageKind.CONNECTION, task=task_response
                ).model_dump_json()
            )
            while True:
                task_queue_position = await orchestrator.get_queue_position(
                    task_id=task_id
                )
                task_response = TaskStatusResponse(
                    task_id=task.task_id,
                    task_status=task.task_status,
                    task_position=task_queue_position,
                    task_meta=task.processing_meta,
                )
                await websocket.send_text(
                    WebsocketMessage(
                        message=MessageKind.UPDATE, task=task_response
                    ).model_dump_json()
                )
                # each client message will be interpreted as a request for update
                msg = await websocket.receive_text()
                _log.debug(f"Received message: {msg}")
        except WebSocketDisconnect:
            _log.info(f"WebSocket disconnected for job {task_id}")
        finally:
            orchestrator.notifier.task_subscribers[task_id].remove(websocket)
    # Task result
    @app.get(
        "/v1/result/{task_id}",
        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
            }
        },
    )
    async def task_result(
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        background_tasks: BackgroundTasks,
        task_id: str,
    ):
        try:
            task = await orchestrator.get_raw_task(task_id=task_id)
            response = await prepare_response(
                task=task, orchestrator=orchestrator, background_tasks=background_tasks
            )
            return response
        except TaskNotFoundError:
            raise HTTPException(status_code=404, detail="Task not found.")
    # Update task progress
    @app.post(
        "/v1/callback/task/progress",
        response_model=ProgressCallbackResponse,
    )
    async def callback_task_progress(
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        request: ProgressCallbackRequest,
    ):
        try:
            await orchestrator.receive_task_progress(request=request)
            return ProgressCallbackResponse(status="ack")
        except TaskNotFoundError:
            raise HTTPException(status_code=404, detail="Task not found.")
        except ProgressInvalid as err:
            raise HTTPException(
                status_code=400, detail=f"Invalid progress payload: {err}"
            )
    #### Clear requests
    # Offload models
    @app.get(
        "/v1/clear/converters",
        response_model=ClearResponse,
    )
    async def clear_converters(
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
    ):
        await orchestrator.clear_converters()
        return ClearResponse()
    # Clean results
    @app.get(
        "/v1/clear/results",
        response_model=ClearResponse,
    )
    async def clear_results(
        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        older_then: float = 3600,
    ):
        await orchestrator.clear_results(older_than=older_then)
        return ClearResponse()
    return app
--- a/docling_serve/datamodel/init.py
+++ b/docling_serve/datamodel/init.py
--- a/docling_serve/datamodel/convert.py
+++ b/docling_serve/datamodel/convert.py
@@ -0,0 +1,40 @@
 # Define the input options for the API
 from typing import Annotated
 from pydantic import Field
 from docling.datamodel.pipeline_options import (
    EasyOcrOptions,
 )
 from docling.models.factories import get_ocr_factory
 from docling_jobkit.datamodel.convert import ConvertDocumentsOptions
 from docling_serve.settings import docling_serve_settings
 ocr_factory = get_ocr_factory(
    allow_external_plugins=docling_serve_settings.allow_external_plugins
 )
 ocr_engines_enum = ocr_factory.get_enum()
 class ConvertDocumentsRequestOptions(ConvertDocumentsOptions):
    ocr_engine: Annotated[  # type: ignore
        ocr_engines_enum,
        Field(
            description=(
                "The OCR engine to use. String. "
                f"Allowed values: {', '.join([v.value for v in ocr_engines_enum])}. "
                "Optional, defaults to easyocr."
            ),
            examples=[EasyOcrOptions.kind],
        ),
    ] = ocr_engines_enum(EasyOcrOptions.kind)  # type: ignore
    document_timeout: Annotated[
        float,
        Field(
            description="The timeout for processing each document, in seconds.",
            gt=0,
            le=docling_serve_settings.max_document_timeout,
        ),
    ] = docling_serve_settings.max_document_timeout
--- a/docling_serve/datamodel/requests.py
+++ b/docling_serve/datamodel/requests.py
@@ -0,0 +1,72 @@
 import enum
 from typing import Annotated, Literal
 from pydantic import BaseModel, Field, model_validator
 from pydantic_core import PydanticCustomError
 from typing_extensions import Self
 from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
 from docling_jobkit.datamodel.s3_coords import S3Coordinates
 from docling_jobkit.datamodel.task_targets import (
    InBodyTarget,
    S3Target,
    TaskTarget,
    ZipTarget,
 )
 from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
 from docling_serve.settings import AsyncEngine, docling_serve_settings
 ## Sources
 class FileSourceRequest(FileSource):
    kind: Literal["file"] = "file"
 class HttpSourceRequest(HttpSource):
    kind: Literal["http"] = "http"
 class S3SourceRequest(S3Coordinates):
    kind: Literal["s3"] = "s3"
 ## Multipart targets
 class TargetName(str, enum.Enum):
    INBODY = InBodyTarget().kind
    ZIP = ZipTarget().kind
 ## Aliases
 SourceRequestItem = Annotated[
    FileSourceRequest | HttpSourceRequest | S3SourceRequest, Field(discriminator="kind")
 ]
 ## Complete Source request
 class ConvertDocumentsRequest(BaseModel):
    options: ConvertDocumentsRequestOptions = ConvertDocumentsRequestOptions()
    sources: list[SourceRequestItem]
    target: TaskTarget = InBodyTarget()
    @model_validator(mode="after")
    def validate_s3_source_and_target(self) -> Self:
        for source in self.sources:
            if isinstance(source, S3SourceRequest):
                if docling_serve_settings.eng_kind != AsyncEngine.KFP:
                    raise PydanticCustomError(
                        "error source", 'source kind "s3" requires engine kind "KFP"'
                    )
                if self.target.kind != "s3":
                    raise PydanticCustomError(
                        "error source", 'source kind "s3" requires target kind "s3"'
                    )
        if isinstance(self.target, S3Target):
            for source in self.sources:
                if isinstance(source, S3SourceRequest):
                    return self
            raise PydanticCustomError(
                "error target", 'target kind "s3" requires source kind "s3"'
            )
        return self
--- a/docling_serve/datamodel/responses.py
+++ b/docling_serve/datamodel/responses.py
@@ -0,0 +1,63 @@
 import enum
 from typing import Optional
 from pydantic import BaseModel
 from docling.datamodel.document import ConversionStatus, ErrorItem
 from docling.utils.profiling import ProfilingItem
 from docling_core.types.doc import DoclingDocument
 from docling_jobkit.datamodel.task_meta import TaskProcessingMeta
 # Status
 class HealthCheckResponse(BaseModel):
    status: str = "ok"
 class ClearResponse(BaseModel):
    status: str = "ok"
 class DocumentResponse(BaseModel):
    filename: str
    md_content: Optional[str] = None
    json_content: Optional[DoclingDocument] = None
    html_content: Optional[str] = None
    text_content: Optional[str] = None
    doctags_content: Optional[str] = None
 class ConvertDocumentResponse(BaseModel):
    document: DocumentResponse
    status: ConversionStatus
    errors: list[ErrorItem] = []
    processing_time: float
    timings: dict[str, ProfilingItem] = {}
 class PresignedUrlConvertDocumentResponse(BaseModel):
    status: ConversionStatus
    processing_time: float
 class ConvertDocumentErrorResponse(BaseModel):
    status: ConversionStatus
 class TaskStatusResponse(BaseModel):
    task_id: str
    task_status: str
    task_position: Optional[int] = None
    task_meta: Optional[TaskProcessingMeta] = None
 class MessageKind(str, enum.Enum):
    CONNECTION = "connection"
    UPDATE = "update"
    ERROR = "error"
 class WebsocketMessage(BaseModel):
    message: MessageKind
    task: Optional[TaskStatusResponse] = None
    error: Optional[str] = None
--- a/docling_serve/gradio_ui.py
+++ b/docling_serve/gradio_ui.py
@@ -0,0 +1,873 @@
 import base64
 import importlib
 import itertools
 import json
 import logging
 import ssl
 import tempfile
 import time
 from pathlib import Path
 from typing import Optional
 import certifi
 import gradio as gr
 import httpx
 from docling.datamodel.base_models import FormatToExtensions
 from docling.datamodel.pipeline_options import (
    PdfBackend,
    ProcessingPipeline,
    TableFormerMode,
    TableStructureOptions,
 )
 from docling_serve.helper_functions import _to_list_of_strings
 from docling_serve.settings import docling_serve_settings, uvicorn_settings
 logger = logging.getLogger(__name__)
 ############################
 # Path of static artifacts #
 ############################
 logo_path = "https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg"
 js_components_url = "https://unpkg.com/@docling/docling-components@0.0.7"
 if (
    docling_serve_settings.static_path is not None
    and docling_serve_settings.static_path.is_dir()
 ):
    logo_path = str(docling_serve_settings.static_path / "logo.svg")
    js_components_url = "/static/docling-components.js"
 ##############################
 # Head JS for web components #
 ##############################
 head = f"""
    <script src="{js_components_url}" type="module"></script>
 """
 #################
 # CSS and theme #
 #################
 css = """
 #logo {
    border-style: none;
    background: none;
    box-shadow: none;
    min-width: 80px;
 }
 #dark_mode_column {
    display: flex;
    align-content: flex-end;
 }
 #title {
    text-align: left;
    display:block;
    height: auto;
    padding-top: 5px;
    line-height: 0;
 }
 .title-text h1 > p, .title-text p {
    margin-top: 0px !important;
    margin-bottom: 2px !important;
 }
 #custom-container {
    border: 0.909091px solid;
    padding: 10px;
    border-radius: 4px;
 }
 #custom-container h4 {
    font-size: 14px;
 }
 #file_input_zone {
    height: 140px;
 }
 docling-img {
    gap: 1rem;
 }
 docling-img::part(page) {
    box-shadow: 0 0.5rem 1rem 0 rgba(0, 0, 0, 0.2);
 }
 """
 theme = gr.themes.Default(
    text_size="md",
    spacing_size="md",
    font=[
        gr.themes.GoogleFont("Red Hat Display"),
        "ui-sans-serif",
        "system-ui",
        "sans-serif",
    ],
    font_mono=[
        gr.themes.GoogleFont("Red Hat Mono"),
        "ui-monospace",
        "Consolas",
        "monospace",
    ],
 )
 #############
 # Variables #
 #############
 gradio_output_dir = None  # Will be set by FastAPI when mounted
 file_output_path = None  # Will be set when a new file is generated
 #############
 # Functions #
 #############
 def get_api_endpoint() -> str:
    protocol = "http"
    if uvicorn_settings.ssl_keyfile is not None:
        protocol = "https"
    return f"{protocol}://{docling_serve_settings.api_host}:{uvicorn_settings.port}"
 def get_ssl_context() -> ssl.SSLContext:
    ctx = ssl.create_default_context(cafile=certifi.where())
    kube_sa_ca_cert_path = Path(
        "/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"
    )
    if (
        uvicorn_settings.ssl_keyfile is not None
        and ".svc." in docling_serve_settings.api_host
        and kube_sa_ca_cert_path.exists()
    ):
        ctx.load_verify_locations(cafile=kube_sa_ca_cert_path)
    return ctx
 def health_check():
    response = httpx.get(f"{get_api_endpoint()}/health")
    if response.status_code == 200:
        return "Healthy"
    return "Unhealthy"
 def set_options_visibility(x):
    return gr.Accordion("Options", open=x)
 def set_outputs_visibility_direct(x, y):
    content = gr.Row(visible=x)
    file = gr.Row(visible=y)
    return content, file
 def set_task_id_visibility(x):
    task_id_row = gr.Row(visible=x)
    return task_id_row
 def set_outputs_visibility_process(x):
    content = gr.Row(visible=not x)
    file = gr.Row(visible=x)
    return content, file
 def set_download_button_label(label_text: gr.State):
    return gr.DownloadButton(label=str(label_text), scale=1)
 def clear_outputs():
    task_id_rendered = ""
    markdown_content = ""
    json_content = ""
    json_rendered_content = ""
    html_content = ""
    text_content = ""
    doctags_content = ""
    return (
        task_id_rendered,
        markdown_content,
        markdown_content,
        json_content,
        json_rendered_content,
        html_content,
        html_content,
        text_content,
        doctags_content,
    )
 def clear_url_input():
    return ""
 def clear_file_input():
    return None
 def auto_set_return_as_file(
    url_input_value: str,
    file_input_value: Optional[list[str]],
    image_export_mode_value: str,
 ):
    # If more than one input source is provided, return as file
    if (
        (len(url_input_value.split(",")) > 1)
        or (file_input_value and len(file_input_value) > 1)
        or (image_export_mode_value == "referenced")
    ):
        return True
    else:
        return False
 def change_ocr_lang(ocr_engine):
    if ocr_engine == "easyocr":
        return "en,fr,de,es"
    elif ocr_engine == "tesseract_cli":
        return "eng,fra,deu,spa"
    elif ocr_engine == "tesseract":
        return "eng,fra,deu,spa"
    elif ocr_engine == "rapidocr":
        return "english,chinese"
 def wait_task_finish(task_id: str, return_as_file: bool):
    conversion_sucess = False
    task_finished = False
    task_status = ""
    ssl_ctx = get_ssl_context()
    while not task_finished:
        try:
            response = httpx.get(
                f"{get_api_endpoint()}/v1/status/poll/{task_id}?wait=5",
                verify=ssl_ctx,
                timeout=15,
            )
            task_status = response.json()["task_status"]
            if task_status == "success":
                conversion_sucess = True
                task_finished = True
            if task_status in ("failure", "revoked"):
                conversion_sucess = False
                task_finished = True
                raise RuntimeError(f"Task failed with status {task_status!r}")
            time.sleep(5)
        except Exception as e:
            logger.error(f"Error processing file(s): {e}")
            conversion_sucess = False
            task_finished = True
            raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
    if conversion_sucess:
        try:
            response = httpx.get(
                f"{get_api_endpoint()}/v1/result/{task_id}",
                timeout=15,
                verify=ssl_ctx,
            )
            output = response_to_output(response, return_as_file)
            return output
        except Exception as e:
            logger.error(f"Error getting task result: {e}")
    raise gr.Error(
        f"Error getting task result, conversion finished with status: {task_status}"
    )
 def process_url(
    input_sources,
    to_formats,
    image_export_mode,
    pipeline,
    ocr,
    force_ocr,
    ocr_engine,
    ocr_lang,
    pdf_backend,
    table_mode,
    abort_on_error,
    return_as_file,
    do_code_enrichment,
    do_formula_enrichment,
    do_picture_classification,
    do_picture_description,
 ):
    target = {"kind": "zip" if return_as_file else "inbody"}
    parameters = {
        "sources": [
            {"kind": "http", "url": source} for source in input_sources.split(",")
        ],
        "options": {
            "to_formats": to_formats,
            "image_export_mode": image_export_mode,
            "pipeline": pipeline,
            "ocr": ocr,
            "force_ocr": force_ocr,
            "ocr_engine": ocr_engine,
            "ocr_lang": _to_list_of_strings(ocr_lang),
            "pdf_backend": pdf_backend,
            "table_mode": table_mode,
            "abort_on_error": abort_on_error,
            "do_code_enrichment": do_code_enrichment,
            "do_formula_enrichment": do_formula_enrichment,
            "do_picture_classification": do_picture_classification,
            "do_picture_description": do_picture_description,
        },
        "target": target,
    }
    if (
        not parameters["sources"]
        or len(parameters["sources"]) == 0
        or parameters["sources"][0]["url"] == ""
    ):
        logger.error("No input sources provided.")
        raise gr.Error("No input sources provided.", print_exception=False)
    try:
        ssl_ctx = get_ssl_context()
        response = httpx.post(
            f"{get_api_endpoint()}/v1/convert/source/async",
            json=parameters,
            verify=ssl_ctx,
            timeout=60,
        )
    except Exception as e:
        logger.error(f"Error processing URL: {e}")
        raise gr.Error(f"Error processing URL: {e}", print_exception=False)
    if response.status_code != 200:
        data = response.json()
        error_message = data.get("detail", "An unknown error occurred.")
        logger.error(f"Error processing file: {error_message}")
        raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
    task_id_rendered = response.json()["task_id"]
    return task_id_rendered
 def file_to_base64(file):
    with open(file.name, "rb") as f:
        encoded_string = base64.b64encode(f.read()).decode("utf-8")
    return encoded_string
 def process_file(
    files,
    to_formats,
    image_export_mode,
    pipeline,
    ocr,
    force_ocr,
    ocr_engine,
    ocr_lang,
    pdf_backend,
    table_mode,
    abort_on_error,
    return_as_file,
    do_code_enrichment,
    do_formula_enrichment,
    do_picture_classification,
    do_picture_description,
 ):
    if not files or len(files) == 0:
        logger.error("No files provided.")
        raise gr.Error("No files provided.", print_exception=False)
    files_data = [
        {"kind": "file", "base64_string": file_to_base64(file), "filename": file.name}
        for file in files
    ]
    target = {"kind": "zip" if return_as_file else "inbody"}
    parameters = {
        "sources": files_data,
        "options": {
            "to_formats": to_formats,
            "image_export_mode": image_export_mode,
            "pipeline": pipeline,
            "ocr": ocr,
            "force_ocr": force_ocr,
            "ocr_engine": ocr_engine,
            "ocr_lang": _to_list_of_strings(ocr_lang),
            "pdf_backend": pdf_backend,
            "table_mode": table_mode,
            "abort_on_error": abort_on_error,
            "return_as_file": return_as_file,
            "do_code_enrichment": do_code_enrichment,
            "do_formula_enrichment": do_formula_enrichment,
            "do_picture_classification": do_picture_classification,
            "do_picture_description": do_picture_description,
        },
        "target": target,
    }
    try:
        ssl_ctx = get_ssl_context()
        response = httpx.post(
            f"{get_api_endpoint()}/v1/convert/source/async",
            json=parameters,
            verify=ssl_ctx,
            timeout=60,
        )
    except Exception as e:
        logger.error(f"Error processing file(s): {e}")
        raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
    if response.status_code != 200:
        data = response.json()
        error_message = data.get("detail", "An unknown error occurred.")
        logger.error(f"Error processing file: {error_message}")
        raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
    task_id_rendered = response.json()["task_id"]
    return task_id_rendered
 def response_to_output(response, return_as_file):
    markdown_content = ""
    json_content = ""
    json_rendered_content = ""
    html_content = ""
    text_content = ""
    doctags_content = ""
    download_button = gr.DownloadButton(visible=False, label="Download Output", scale=1)
    if return_as_file:
        filename = (
            response.headers.get("Content-Disposition").split("filename=")[1].strip('"')
        )
        tmp_output_dir = Path(tempfile.mkdtemp(dir=gradio_output_dir, prefix="ui_"))
        file_output_path = f"{tmp_output_dir}/{filename}"
        # logger.info(f"Saving file to: {file_output_path}")
        with open(file_output_path, "wb") as f:
            f.write(response.content)
        download_button = gr.DownloadButton(
            visible=True, label=f"Download {filename}", scale=1, value=file_output_path
        )
    else:
        full_content = response.json()
        markdown_content = full_content.get("document").get("md_content")
        json_content = json.dumps(
            full_content.get("document").get("json_content"), indent=2
        )
        # Embed document JSON and trigger load at client via an image.
        json_rendered_content = f"""
            <docling-img id="dclimg" pagenumbers><docling-tooltip></docling-tooltip></docling-img>
            <script id="dcljson" type="application/json" onload="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);">{json_content}</script>
            <img src onerror="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);" />
            """
        html_content = full_content.get("document").get("html_content")
        text_content = full_content.get("document").get("text_content")
        doctags_content = full_content.get("document").get("doctags_content")
    return (
        markdown_content,
        markdown_content,
        json_content,
        json_rendered_content,
        html_content,
        html_content,
        text_content,
        doctags_content,
        download_button,
    )
 ############
 # UI Setup #
 ############
 with gr.Blocks(
    head=head,
    css=css,
    theme=theme,
    title="Docling Serve",
    delete_cache=(3600, 3600),  # Delete all files older than 1 hour every hour
 ) as ui:
    # Constants stored in states to be able to pass them as inputs to functions
    processing_text = gr.State("Processing your document(s), please wait...")
    true_bool = gr.State(True)
    false_bool = gr.State(False)
    # Banner
    with gr.Row(elem_id="check_health"):
        # Logo
        with gr.Column(scale=1, min_width=90):
            try:
                gr.Image(
                    logo_path,
                    height=80,
                    width=80,
                    show_download_button=False,
                    show_label=False,
                    show_fullscreen_button=False,
                    container=False,
                    elem_id="logo",
                    scale=0,
                )
            except Exception:
                logger.warning("Logo not found.")
        # Title
        with gr.Column(scale=1, min_width=200):
            gr.Markdown(
                f"# Docling Serve \n(docling version: "
                f"{importlib.metadata.version('docling')})",
                elem_id="title",
                elem_classes=["title-text"],
            )
        # Dark mode button
        with gr.Column(scale=16, elem_id="dark_mode_column"):
            dark_mode_btn = gr.Button("Dark/Light Mode", scale=0)
            dark_mode_btn.click(
                None,
                None,
                None,
                js="""() => {
                    if (document.querySelectorAll('.dark').length) {
                        document.querySelectorAll('.dark').forEach(
                        el => el.classList.remove('dark')
                        );
                    } else {
                        document.querySelector('body').classList.add('dark');
                    }
                }""",
                show_api=False,
            )
    # URL Processing Tab
    with gr.Tab("Convert URL"):
        with gr.Row():
            with gr.Column(scale=4):
                url_input = gr.Textbox(
                    label="URL Input Source",
                    placeholder="https://arxiv.org/pdf/2501.17887",
                )
            with gr.Column(scale=1):
                url_process_btn = gr.Button("Process URL", scale=1)
                url_reset_btn = gr.Button("Reset", scale=1)
    # File Processing Tab
    with gr.Tab("Convert File"):
        with gr.Row():
            with gr.Column(scale=4):
                file_input = gr.File(
                    elem_id="file_input_zone",
                    label="Upload File",
                    file_types=[
                        f".{v}"
                        for v in itertools.chain.from_iterable(
                            FormatToExtensions.values()
                        )
                    ],
                    file_count="multiple",
                    scale=4,
                )
            with gr.Column(scale=1):
                file_process_btn = gr.Button("Process File", scale=1)
                file_reset_btn = gr.Button("Reset", scale=1)
    # Options
    with gr.Accordion("Options") as options:
        with gr.Row():
            with gr.Column(scale=1):
                to_formats = gr.CheckboxGroup(
                    [
                        ("Docling (JSON)", "json"),
                        ("Markdown", "md"),
                        ("HTML", "html"),
                        ("Plain Text", "text"),
                        ("Doc Tags", "doctags"),
                    ],
                    label="To Formats",
                    value=["json", "md"],
                )
            with gr.Column(scale=1):
                image_export_mode = gr.Radio(
                    [
                        ("Embedded", "embedded"),
                        ("Placeholder", "placeholder"),
                        ("Referenced", "referenced"),
                    ],
                    label="Image Export Mode",
                    value="embedded",
                )
        with gr.Row():
            with gr.Column(scale=1, min_width=200):
                pipeline = gr.Radio(
                    [(v.value.capitalize(), v.value) for v in ProcessingPipeline],
                    label="Pipeline type",
                    value=ProcessingPipeline.STANDARD.value,
                )
        with gr.Row():
            with gr.Column(scale=1, min_width=200):
                ocr = gr.Checkbox(label="Enable OCR", value=True)
                force_ocr = gr.Checkbox(label="Force OCR", value=False)
            with gr.Column(scale=1):
                ocr_engine = gr.Radio(
                    [
                        ("EasyOCR", "easyocr"),
                        ("Tesseract", "tesseract"),
                        ("RapidOCR", "rapidocr"),
                    ],
                    label="OCR Engine",
                    value="easyocr",
                )
            with gr.Column(scale=1, min_width=200):
                ocr_lang = gr.Textbox(
                    label="OCR Language (beware of the format)", value="en,fr,de,es"
                )
            ocr_engine.change(change_ocr_lang, inputs=[ocr_engine], outputs=[ocr_lang])
        with gr.Row():
            with gr.Column(scale=4):
                pdf_backend = gr.Radio(
                    [v.value for v in PdfBackend],
                    label="PDF Backend",
                    value=PdfBackend.DLPARSE_V4.value,
                )
            with gr.Column(scale=2):
                table_mode = gr.Radio(
                    [(v.value.capitalize(), v.value) for v in TableFormerMode],
                    label="Table Mode",
                    value=TableStructureOptions().mode.value,
                )
            with gr.Column(scale=1):
                abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
                return_as_file = gr.Checkbox(label="Return as File", value=False)
        with gr.Row():
            with gr.Column():
                do_code_enrichment = gr.Checkbox(
                    label="Enable code enrichment", value=False
                )
                do_formula_enrichment = gr.Checkbox(
                    label="Enable formula enrichment", value=False
                )
            with gr.Column():
                do_picture_classification = gr.Checkbox(
                    label="Enable picture classification", value=False
                )
                do_picture_description = gr.Checkbox(
                    label="Enable picture description", value=False
                )
    # Task id output
    with gr.Row(visible=False) as task_id_output:
        task_id_rendered = gr.Textbox(label="Task id", interactive=False)
    # Document output
    with gr.Row(visible=False) as content_output:
        with gr.Tab("Docling (JSON)"):
            output_json = gr.Code(language="json", wrap_lines=True, show_label=False)
        with gr.Tab("Docling-Rendered"):
            output_json_rendered = gr.HTML(label="Response")
        with gr.Tab("Markdown"):
            output_markdown = gr.Code(
                language="markdown", wrap_lines=True, show_label=False
            )
        with gr.Tab("Markdown-Rendered"):
            output_markdown_rendered = gr.Markdown(label="Response")
        with gr.Tab("HTML"):
            output_html = gr.Code(language="html", wrap_lines=True, show_label=False)
        with gr.Tab("HTML-Rendered"):
            output_html_rendered = gr.HTML(label="Response")
        with gr.Tab("Text"):
            output_text = gr.Code(wrap_lines=True, show_label=False)
        with gr.Tab("DocTags"):
            output_doctags = gr.Code(wrap_lines=True, show_label=False)
    # File download output
    with gr.Row(visible=False) as file_output:
        download_file_btn = gr.DownloadButton(label="Placeholder", scale=1)
    ##############
    # UI Actions #
    ##############
    # Handle Return as File
    url_input.change(
        auto_set_return_as_file,
        inputs=[url_input, file_input, image_export_mode],
        outputs=[return_as_file],
    )
    file_input.change(
        auto_set_return_as_file,
        inputs=[url_input, file_input, image_export_mode],
        outputs=[return_as_file],
    )
    image_export_mode.change(
        auto_set_return_as_file,
        inputs=[url_input, file_input, image_export_mode],
        outputs=[return_as_file],
    )
    # URL processing
    url_process_btn.click(
        set_options_visibility, inputs=[false_bool], outputs=[options]
    ).then(
        set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
    ).then(
        clear_outputs,
        inputs=None,
        outputs=[
            task_id_rendered,
            output_markdown,
            output_markdown_rendered,
            output_json,
            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
            output_doctags,
        ],
    ).then(
        set_task_id_visibility,
        inputs=[true_bool],
        outputs=[task_id_output],
    ).then(
        process_url,
        inputs=[
            url_input,
            to_formats,
            image_export_mode,
            pipeline,
            ocr,
            force_ocr,
            ocr_engine,
            ocr_lang,
            pdf_backend,
            table_mode,
            abort_on_error,
            return_as_file,
            do_code_enrichment,
            do_formula_enrichment,
            do_picture_classification,
            do_picture_description,
        ],
        outputs=[
            task_id_rendered,
        ],
    ).then(
        set_outputs_visibility_process,
        inputs=[return_as_file],
        outputs=[content_output, file_output],
    ).then(
        wait_task_finish,
        inputs=[task_id_rendered, return_as_file],
        outputs=[
            output_markdown,
            output_markdown_rendered,
            output_json,
            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
            output_doctags,
            download_file_btn,
        ],
    )
    url_reset_btn.click(
        clear_outputs,
        inputs=None,
        outputs=[
            output_markdown,
            output_markdown_rendered,
            output_json,
            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
            output_doctags,
        ],
    ).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then(
        set_outputs_visibility_direct,
        inputs=[false_bool, false_bool],
        outputs=[content_output, file_output],
    ).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
        clear_url_input, inputs=None, outputs=[url_input]
    )
    # File processing
    file_process_btn.click(
        set_options_visibility, inputs=[false_bool], outputs=[options]
    ).then(
        set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
    ).then(
        clear_outputs,
        inputs=None,
        outputs=[
            task_id_rendered,
            output_markdown,
            output_markdown_rendered,
            output_json,
            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
            output_doctags,
        ],
    ).then(
        set_task_id_visibility,
        inputs=[true_bool],
        outputs=[task_id_output],
    ).then(
        process_file,
        inputs=[
            file_input,
            to_formats,
            image_export_mode,
            pipeline,
            ocr,
            force_ocr,
            ocr_engine,
            ocr_lang,
            pdf_backend,
            table_mode,
            abort_on_error,
            return_as_file,
            do_code_enrichment,
            do_formula_enrichment,
            do_picture_classification,
            do_picture_description,
        ],
        outputs=[
            task_id_rendered,
        ],
    ).then(
        set_outputs_visibility_process,
        inputs=[return_as_file],
        outputs=[content_output, file_output],
    ).then(
        wait_task_finish,
        inputs=[task_id_rendered, return_as_file],
        outputs=[
            output_markdown,
            output_markdown_rendered,
            output_json,
            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
            output_doctags,
            download_file_btn,
        ],
    )
    file_reset_btn.click(
        clear_outputs,
        inputs=None,
        outputs=[
            output_markdown,
            output_markdown_rendered,
            output_json,
            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
            output_doctags,
        ],
    ).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then(
        set_outputs_visibility_direct,
        inputs=[false_bool, false_bool],
        outputs=[content_output, file_output],
    ).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
        clear_file_input, inputs=None, outputs=[file_input]
    )
--- a/docling_serve/helper_functions.py
+++ b/docling_serve/helper_functions.py
@@ -0,0 +1,120 @@
 import inspect
 import json
 import re
 from typing import Union, get_args, get_origin
 from fastapi import Depends, Form
 from pydantic import BaseModel, TypeAdapter
 def is_pydantic_model(type_):
    try:
        if inspect.isclass(type_) and issubclass(type_, BaseModel):
            return True
        origin = get_origin(type_)
        if origin is Union:
            args = get_args(type_)
            return any(
                inspect.isclass(arg) and issubclass(arg, BaseModel)
                for arg in args
                if arg is not type(None)
            )
    except Exception:
        pass
    return False
 # Adapted from
 # https://github.com/fastapi/fastapi/discussions/8971#discussioncomment-7892972
 def FormDepends(cls: type[BaseModel]):
    new_parameters = []
    for field_name, model_field in cls.model_fields.items():
        annotation = model_field.annotation
        description = model_field.description
        default = (
            Form(..., description=description, examples=model_field.examples)
            if model_field.is_required()
            else Form(
                model_field.default,
                examples=model_field.examples,
                description=description,
            )
        )
        # Flatten nested Pydantic models by accepting them as JSON strings
        if is_pydantic_model(annotation):
            annotation = str
            default = Form(
                None
                if model_field.default is None
                else json.dumps(model_field.default.model_dump(mode="json")),
                description=description,
                examples=None
                if not model_field.examples
                else [
                    json.dumps(ex.model_dump(mode="json"))
                    for ex in model_field.examples
                ],
            )
        new_parameters.append(
            inspect.Parameter(
                name=field_name,
                kind=inspect.Parameter.POSITIONAL_ONLY,
                default=default,
                annotation=annotation,
            )
        )
    async def as_form_func(**data):
        for field_name, model_field in cls.model_fields.items():
            value = data.get(field_name)
            annotation = model_field.annotation
            # Parse nested models from JSON string
            if value is not None and is_pydantic_model(annotation):
                try:
                    validator = TypeAdapter(annotation)
                    data[field_name] = validator.validate_json(value)
                except Exception as e:
                    raise ValueError(f"Invalid JSON for field '{field_name}': {e}")
        return cls(**data)
    sig = inspect.signature(as_form_func)
    sig = sig.replace(parameters=new_parameters)
    as_form_func.__signature__ = sig  # type: ignore
    return Depends(as_form_func)
 def _to_list_of_strings(input_value: Union[str, list[str]]) -> list[str]:
    def split_and_strip(value: str) -> list[str]:
        if re.search(r"[;,]", value):
            return [item.strip() for item in re.split(r"[;,]", value)]
        else:
            return [value.strip()]
    if isinstance(input_value, str):
        return split_and_strip(input_value)
    elif isinstance(input_value, list):
        result = []
        for item in input_value:
            result.extend(split_and_strip(str(item)))
        return result
    else:
        raise ValueError("Invalid input: must be a string or a list of strings.")
 # Helper functions to parse inputs coming as Form objects
 def _str_to_bool(value: Union[str, bool]) -> bool:
    if isinstance(value, bool):
        return value  # Already a boolean, return as-is
    if isinstance(value, str):
        value = value.strip().lower()  # Normalize input
        return value in ("true", "1", "yes")
    return False  # Default to False if none of the above matches
--- a/docling_serve/orchestrator_factory.py
+++ b/docling_serve/orchestrator_factory.py
@@ -0,0 +1,53 @@
 from functools import lru_cache
 from docling_jobkit.orchestrators.base_orchestrator import BaseOrchestrator
 from docling_serve.settings import AsyncEngine, docling_serve_settings
@lru_cache
 def get_async_orchestrator() -> BaseOrchestrator:
    if docling_serve_settings.eng_kind == AsyncEngine.LOCAL:
        from docling_jobkit.convert.manager import (
            DoclingConverterManager,
            DoclingConverterManagerConfig,
        )
        from docling_jobkit.orchestrators.local.orchestrator import (
            LocalOrchestrator,
            LocalOrchestratorConfig,
        )
        local_config = LocalOrchestratorConfig(
            num_workers=docling_serve_settings.eng_loc_num_workers,
            shared_models=docling_serve_settings.eng_loc_share_models,
        )
        cm_config = DoclingConverterManagerConfig(
            artifacts_path=docling_serve_settings.artifacts_path,
            options_cache_size=docling_serve_settings.options_cache_size,
            enable_remote_services=docling_serve_settings.enable_remote_services,
            allow_external_plugins=docling_serve_settings.allow_external_plugins,
            max_num_pages=docling_serve_settings.max_num_pages,
            max_file_size=docling_serve_settings.max_file_size,
        )
        cm = DoclingConverterManager(config=cm_config)
        return LocalOrchestrator(config=local_config, converter_manager=cm)
    elif docling_serve_settings.eng_kind == AsyncEngine.KFP:
        from docling_jobkit.orchestrators.kfp.orchestrator import (
            KfpOrchestrator,
            KfpOrchestratorConfig,
        )
        kfp_config = KfpOrchestratorConfig(
            endpoint=docling_serve_settings.eng_kfp_endpoint,
            token=docling_serve_settings.eng_kfp_token,
            ca_cert_path=docling_serve_settings.eng_kfp_ca_cert_path,
            self_callback_endpoint=docling_serve_settings.eng_kfp_self_callback_endpoint,
            self_callback_token_path=docling_serve_settings.eng_kfp_self_callback_token_path,
            self_callback_ca_cert_path=docling_serve_settings.eng_kfp_self_callback_ca_cert_path,
        )
        return KfpOrchestrator(config=kfp_config)
    raise RuntimeError(f"Engine {docling_serve_settings.eng_kind} not recognized.")
--- a/docling_serve/response_preparation.py
+++ b/docling_serve/response_preparation.py
@@ -0,0 +1,321 @@
 import asyncio
 import logging
 import os
 import shutil
 import time
 from collections.abc import Iterable
 from pathlib import Path
 from typing import Union
 import httpx
 from fastapi import BackgroundTasks, HTTPException
 from fastapi.responses import FileResponse
 from docling.datamodel.base_models import OutputFormat
 from docling.datamodel.document import ConversionResult, ConversionStatus
 from docling_core.types.doc import ImageRefMode
 from docling_jobkit.datamodel.convert import ConvertDocumentsOptions
 from docling_jobkit.datamodel.task import Task
 from docling_jobkit.datamodel.task_targets import InBodyTarget, PutTarget, TaskTarget
 from docling_jobkit.orchestrators.base_orchestrator import (
    BaseOrchestrator,
 )
 from docling_serve.datamodel.responses import (
    ConvertDocumentResponse,
    DocumentResponse,
    PresignedUrlConvertDocumentResponse,
 )
 from docling_serve.settings import docling_serve_settings
 from docling_serve.storage import get_scratch
 _log = logging.getLogger(__name__)
 def _export_document_as_content(
    conv_res: ConversionResult,
    export_json: bool,
    export_html: bool,
    export_md: bool,
    export_txt: bool,
    export_doctags: bool,
    image_mode: ImageRefMode,
    md_page_break_placeholder: str,
 ):
    document = DocumentResponse(filename=conv_res.input.file.name)
    if conv_res.status == ConversionStatus.SUCCESS:
        new_doc = conv_res.document._make_copy_with_refmode(
            Path(), image_mode, page_no=None
        )
        # Create the different formats
        if export_json:
            document.json_content = new_doc
        if export_html:
            document.html_content = new_doc.export_to_html(image_mode=image_mode)
        if export_txt:
            document.text_content = new_doc.export_to_markdown(
                strict_text=True,
                image_mode=image_mode,
            )
        if export_md:
            document.md_content = new_doc.export_to_markdown(
                image_mode=image_mode,
                page_break_placeholder=md_page_break_placeholder or None,
            )
        if export_doctags:
            document.doctags_content = new_doc.export_to_doctags()
    elif conv_res.status == ConversionStatus.SKIPPED:
        raise HTTPException(status_code=400, detail=conv_res.errors)
    else:
        raise HTTPException(status_code=500, detail=conv_res.errors)
    return document
 def _export_documents_as_files(
    conv_results: Iterable[ConversionResult],
    output_dir: Path,
    export_json: bool,
    export_html: bool,
    export_md: bool,
    export_txt: bool,
    export_doctags: bool,
    image_export_mode: ImageRefMode,
    md_page_break_placeholder: str,
 ) -> ConversionStatus:
    success_count = 0
    failure_count = 0
    # Default failure in case results is empty
    conv_result = ConversionStatus.FAILURE
    artifacts_dir = Path("artifacts/")  # will be relative to the fname
    for conv_res in conv_results:
        conv_result = conv_res.status
        if conv_res.status == ConversionStatus.SUCCESS:
            success_count += 1
            doc_filename = conv_res.input.file.stem
            # Export JSON format:
            if export_json:
                fname = output_dir / f"{doc_filename}.json"
                _log.info(f"writing JSON output to {fname}")
                conv_res.document.save_as_json(
                    filename=fname,
                    image_mode=image_export_mode,
                    artifacts_dir=artifacts_dir,
                )
            # Export HTML format:
            if export_html:
                fname = output_dir / f"{doc_filename}.html"
                _log.info(f"writing HTML output to {fname}")
                conv_res.document.save_as_html(
                    filename=fname,
                    image_mode=image_export_mode,
                    artifacts_dir=artifacts_dir,
                )
            # Export Text format:
            if export_txt:
                fname = output_dir / f"{doc_filename}.txt"
                _log.info(f"writing TXT output to {fname}")
                conv_res.document.save_as_markdown(
                    filename=fname,
                    strict_text=True,
                    image_mode=ImageRefMode.PLACEHOLDER,
                )
            # Export Markdown format:
            if export_md:
                fname = output_dir / f"{doc_filename}.md"
                _log.info(f"writing Markdown output to {fname}")
                conv_res.document.save_as_markdown(
                    filename=fname,
                    artifacts_dir=artifacts_dir,
                    image_mode=image_export_mode,
                    page_break_placeholder=md_page_break_placeholder or None,
                )
            # Export Document Tags format:
            if export_doctags:
                fname = output_dir / f"{doc_filename}.doctags"
                _log.info(f"writing Doc Tags output to {fname}")
                conv_res.document.save_as_doctags(filename=fname)
        else:
            _log.warning(f"Document {conv_res.input.file} failed to convert.")
            failure_count += 1
    _log.info(
        f"Processed {success_count + failure_count} docs, "
        f"of which {failure_count} failed"
    )
    return conv_result
 def process_results(
    conversion_options: ConvertDocumentsOptions,
    target: TaskTarget,
    conv_results: Iterable[ConversionResult],
    work_dir: Path,
 ) -> Union[ConvertDocumentResponse, FileResponse, PresignedUrlConvertDocumentResponse]:
    # Let's start by processing the documents
    try:
        start_time = time.monotonic()
        # Convert the iterator to a list to count the number of results and get timings
        # As it's an iterator (lazy evaluation), it will also start the conversion
        conv_results = list(conv_results)
        processing_time = time.monotonic() - start_time
        _log.info(
            f"Processed {len(conv_results)} docs in {processing_time:.2f} seconds."
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))
    if len(conv_results) == 0:
        raise HTTPException(
            status_code=500, detail="No documents were generated by Docling."
        )
    # We have some results, let's prepare the response
    response: Union[
        FileResponse, ConvertDocumentResponse, PresignedUrlConvertDocumentResponse
    ]
    # Booleans to know what to export
    export_json = OutputFormat.JSON in conversion_options.to_formats
    export_html = OutputFormat.HTML in conversion_options.to_formats
    export_md = OutputFormat.MARKDOWN in conversion_options.to_formats
    export_txt = OutputFormat.TEXT in conversion_options.to_formats
    export_doctags = OutputFormat.DOCTAGS in conversion_options.to_formats
    # Only 1 document was processed, and we are not returning it as a file
    if len(conv_results) == 1 and isinstance(target, InBodyTarget):
        conv_res = conv_results[0]
        document = _export_document_as_content(
            conv_res,
            export_json=export_json,
            export_html=export_html,
            export_md=export_md,
            export_txt=export_txt,
            export_doctags=export_doctags,
            image_mode=conversion_options.image_export_mode,
            md_page_break_placeholder=conversion_options.md_page_break_placeholder,
        )
        response = ConvertDocumentResponse(
            document=document,
            status=conv_res.status,
            processing_time=processing_time,
            timings=conv_res.timings,
        )
    # Multiple documents were processed, or we are forced returning as a file
    else:
        # Temporary directory to store the outputs
        output_dir = work_dir / "output"
        output_dir.mkdir(parents=True, exist_ok=True)
        # Worker pid to use in archive identification as we may have multiple workers
        os.getpid()
        # Export the documents
        conv_result = _export_documents_as_files(
            conv_results=conv_results,
            output_dir=output_dir,
            export_json=export_json,
            export_html=export_html,
            export_md=export_md,
            export_txt=export_txt,
            export_doctags=export_doctags,
            image_export_mode=conversion_options.image_export_mode,
            md_page_break_placeholder=conversion_options.md_page_break_placeholder,
        )
        files = os.listdir(output_dir)
        if len(files) == 0:
            raise HTTPException(status_code=500, detail="No documents were exported.")
        file_path = work_dir / "converted_docs.zip"
        shutil.make_archive(
            base_name=str(file_path.with_suffix("")),
            format="zip",
            root_dir=output_dir,
        )
        # Other cleanups after the response is sent
        # Output directory
        # background_tasks.add_task(shutil.rmtree, work_dir, ignore_errors=True)
        if isinstance(target, PutTarget):
            try:
                with open(file_path, "rb") as file_data:
                    r = httpx.put(str(target.url), files={"file": file_data})
                    r.raise_for_status()
                response = PresignedUrlConvertDocumentResponse(
                    status=conv_result,
                    processing_time=processing_time,
                )
            except Exception as exc:
                _log.error("An error occour while uploading zip to s3", exc_info=exc)
                raise HTTPException(
                    status_code=500, detail="An error occour while uploading zip to s3."
                )
        else:
            response = FileResponse(
                file_path, filename=file_path.name, media_type="application/zip"
            )
    return response
 async def prepare_response(
    task: Task, orchestrator: BaseOrchestrator, background_tasks: BackgroundTasks
 ):
    if task.results is None:
        raise HTTPException(
            status_code=404,
            detail="Task result not found. Please wait for a completion status.",
        )
    assert task.options is not None
    work_dir = get_scratch() / task.task_id
    response = process_results(
        conversion_options=task.options,
        target=task.target,
        conv_results=task.results,
        work_dir=work_dir,
    )
    if work_dir.exists():
        task.scratch_dir = work_dir
        if not isinstance(response, FileResponse):
            _log.warning(
                f"Task {task.task_id=} produced content in {work_dir=} but the response is not a file."
            )
            shutil.rmtree(work_dir, ignore_errors=True)
    if docling_serve_settings.single_use_results:
        if task.scratch_dir is not None:
            background_tasks.add_task(
                shutil.rmtree, task.scratch_dir, ignore_errors=True
            )
        async def _remove_task_impl():
            await asyncio.sleep(docling_serve_settings.result_removal_delay)
            await orchestrator.delete_task(task_id=task.task_id)
        async def _remove_task():
            asyncio.create_task(_remove_task_impl())  # noqa: RUF006
        background_tasks.add_task(_remove_task)
    return response
--- a/docling_serve/settings.py
+++ b/docling_serve/settings.py
@@ -1,6 +1,94 @@
 import enum
 import sys
 from pathlib import Path
 from typing import Optional, Union
 from pydantic import AnyUrl, model_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from typing_extensions import Self
-class Settings(BaseSettings):
+class UvicornSettings(BaseSettings):
    model_config = SettingsConfigDict(
        env_prefix="UVICORN_", env_file=".env", extra="allow"
    )
-    model_config = SettingsConfigDict(env_prefix="DOCLING_")
+    host: str = "0.0.0.0"
    port: int = 5001
    reload: bool = False
    root_path: str = ""
    proxy_headers: bool = True
    timeout_keep_alive: int = 60
    ssl_certfile: Optional[Path] = None
    ssl_keyfile: Optional[Path] = None
    ssl_keyfile_password: Optional[str] = None
    workers: Union[int, None] = None
 class AsyncEngine(str, enum.Enum):
    LOCAL = "local"
    KFP = "kfp"
 class DoclingServeSettings(BaseSettings):
    model_config = SettingsConfigDict(
        env_prefix="DOCLING_SERVE_",
        env_file=".env",
        env_parse_none_str="",
        extra="allow",
    )
    enable_ui: bool = False
    api_host: str = "localhost"
    artifacts_path: Optional[Path] = None
    static_path: Optional[Path] = None
    scratch_path: Optional[Path] = None
    single_use_results: bool = True
    result_removal_delay: float = 300  # 5 minutes
    load_models_at_boot: bool = True
    options_cache_size: int = 2
    enable_remote_services: bool = False
    allow_external_plugins: bool = False
    max_document_timeout: float = 3_600 * 24 * 7  # 7 days
    max_num_pages: int = sys.maxsize
    max_file_size: int = sys.maxsize
    max_sync_wait: int = 120  # 2 minutes
    cors_origins: list[str] = ["*"]
    cors_methods: list[str] = ["*"]
    cors_headers: list[str] = ["*"]
    eng_kind: AsyncEngine = AsyncEngine.LOCAL
    # Local engine
    eng_loc_num_workers: int = 2
    eng_loc_share_models: bool = False
    # KFP engine
    eng_kfp_endpoint: Optional[AnyUrl] = None
    eng_kfp_token: Optional[str] = None
    eng_kfp_ca_cert_path: Optional[str] = None
    eng_kfp_self_callback_endpoint: Optional[str] = None
    eng_kfp_self_callback_token_path: Optional[Path] = None
    eng_kfp_self_callback_ca_cert_path: Optional[Path] = None
    eng_kfp_experimental: bool = False
    @model_validator(mode="after")
    def engine_settings(self) -> Self:
        # Validate KFP engine settings
        if self.eng_kind == AsyncEngine.KFP:
            if self.eng_kfp_endpoint is None:
                raise ValueError("KFP endpoint is required when using the KFP engine.")
        if self.eng_kind == AsyncEngine.KFP:
            if not self.eng_kfp_experimental:
                raise ValueError(
                    "KFP is not yet working. To enable the development version, you must set DOCLING_SERVE_ENG_KFP_EXPERIMENTAL=true."
                )
        return self
 uvicorn_settings = UvicornSettings()
 docling_serve_settings = DoclingServeSettings()
--- a/docling_serve/storage.py
+++ b/docling_serve/storage.py
@@ -0,0 +1,16 @@
 import tempfile
 from functools import lru_cache
 from pathlib import Path
 from docling_serve.settings import docling_serve_settings
@lru_cache
 def get_scratch() -> Path:
    scratch_dir = (
        docling_serve_settings.scratch_path
        if docling_serve_settings.scratch_path is not None
        else Path(tempfile.mkdtemp(prefix="docling_"))
    )
    scratch_dir.mkdir(exist_ok=True, parents=True)
    return scratch_dir
--- a/docling_serve/websocket_notifier.py
+++ b/docling_serve/websocket_notifier.py
@@ -0,0 +1,54 @@
 from fastapi import WebSocket
 from docling_jobkit.datamodel.task_meta import TaskStatus
 from docling_jobkit.orchestrators.base_notifier import BaseNotifier
 from docling_jobkit.orchestrators.base_orchestrator import BaseOrchestrator
 from docling_serve.datamodel.responses import (
    MessageKind,
    TaskStatusResponse,
    WebsocketMessage,
 )
 class WebsocketNotifier(BaseNotifier):
    def __init__(self, orchestrator: BaseOrchestrator):
        super().__init__(orchestrator)
        self.task_subscribers: dict[str, set[WebSocket]] = {}
    async def add_task(self, task_id: str):
        self.task_subscribers[task_id] = set()
    async def remove_task(self, task_id: str):
        if task_id in self.task_subscribers:
            for websocket in self.task_subscribers[task_id]:
                await websocket.close()
            del self.task_subscribers[task_id]
    async def notify_task_subscribers(self, task_id: str):
        if task_id not in self.task_subscribers:
            raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
        task = await self.orchestrator.get_raw_task(task_id=task_id)
        task_queue_position = await self.orchestrator.get_queue_position(task_id)
        msg = TaskStatusResponse(
            task_id=task.task_id,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
        )
        for websocket in self.task_subscribers[task_id]:
            await websocket.send_text(
                WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
            )
            if task.is_completed():
                await websocket.close()
    async def notify_queue_positions(self):
        for task_id in self.task_subscribers.keys():
            # notify only pending tasks
            if self.orchestrator.tasks[task_id].task_status != TaskStatus.PENDING:
                continue
            await self.notify_task_subscribers(task_id)
--- a/docs/README.md
+++ b/docs/README.md
@@ -0,0 +1,9 @@
 # Docling Serve documentation
 This documentation pages explore the webserver configurations, runtime options, deployment examples as well as development best practices.
 - [Configuration](./configuration.md)
 - [Advance usage](./usage.md)
 - [Deployment](./deployment.md)
 - [Development](./development.md)
 - [`v1` migration](./v1_migration.md)
--- a/docs/assets/docling-serve-pic.png
+++ b/docs/assets/docling-serve-pic.png
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -0,0 +1,82 @@
 # Configuration
 The `docling-serve` executable allows to configure the server via command line
 options as well as environment variables.
 Configurations are divided between the settings used for the `uvicorn` asgi
 server and the actual app-specific configurations.
 > [!WARNING]
 > When the server is running with `reload` or with multiple `workers`, uvicorn
 > will spawn multiple subprocesses. This invalidates all the values configured
 > via the CLI command line options. Please use environment variables in this
 > type of deployments.
 ## Webserver configuration
 The following table shows the options which are propagated directly to the
 `uvicorn` webserver runtime.
 | CLI option | ENV | Default | Description |
 | -----------|-----|---------|-------------|
 | `--host` | `UVICORN_HOST` | `0.0.0.0` for `run`, `localhost` for `dev` | THe host to serve on. |
 | `--port` | `UVICORN_PORT` | `5001` | The port to serve on. |
 | `--reload` | `UVICORN_RELOAD` | `false` for `run`, `true` for `dev` | Enable auto-reload of the server when (code) files change. |
 | `--workers` | `UVICORN_WORKERS` | `1` | Use multiple worker processes. |
 | `--root-path` | `UVICORN_ROOT_PATH` | `""` | The root path is used to tell your app that it is being served to the outside world with some |
 | `--proxy-headers` | `UVICORN_PROXY_HEADERS` | `true` | Enable/Disable X-Forwarded-Proto, X-Forwarded-For, X-Forwarded-Port to populate remote address info. |
 | `--timeout-keep-alive` | `UVICORN_TIMEOUT_KEEP_ALIVE` | `60` | Timeout for the server response. |
 | `--ssl-certfile` | `UVICORN_SSL_CERTFILE` |  | SSL certificate file. |
 | `--ssl-keyfile` | `UVICORN_SSL_KEYFILE` |  | SSL key file. |
 | `--ssl-keyfile-password` | `UVICORN_SSL_KEYFILE_PASSWORD` |  | SSL keyfile password. |
 ## Docling Serve configuration
 THe following table describes the options to configure the Docling Serve app.
 | CLI option | ENV | Default | Description |
 | -----------|-----|---------|-------------|
 | `--artifacts-path` | `DOCLING_SERVE_ARTIFACTS_PATH` | unset | If set to a valid directory, the model weights will be loaded from this path |
 |  | `DOCLING_SERVE_STATIC_PATH` | unset | If set to a valid directory, the static assets for the docs and UI will be loaded from this path |
 |  | `DOCLING_SERVE_SCRATCH_PATH` |  | If set, this directory will be used as scratch workspace, e.g. storing the results before they get requested. If unset, a temporary created is created for this purpose. |
 | `--enable-ui` | `DOCLING_SERVE_ENABLE_UI` | `false` | Enable the demonstrator UI. |
 |  | `DOCLING_SERVE_ENABLE_REMOTE_SERVICES` | `false` | Allow pipeline components making remote connections. For example, this is needed when using a vision-language model via APIs. |
 |  | `DOCLING_SERVE_ALLOW_EXTERNAL_PLUGINS` | `false` | Allow the selection of third-party plugins. |
 |  | `DOCLING_SERVE_SINGLE_USE_RESULTS` | `true` | If true, results can be accessed only once. If false, the results accumulate in the scratch directory. |
 |  | `DOCLING_SERVE_RESULT_REMOVAL_DELAY` | `300` | When `DOCLING_SERVE_SINGLE_USE_RESULTS` is active, this is the delay before results are removed from the task registry. |
 |  | `DOCLING_SERVE_MAX_DOCUMENT_TIMEOUT` | `604800` (7 days) | The maximum time for processing a document. |
 |  | `DOCLING_SERVE_MAX_NUM_PAGES` |  | The maximum number of pages for a document to be processed. |
 |  | `DOCLING_SERVE_MAX_FILE_SIZE` |  | The maximum file size for a document to be processed. |
 |  | `DOCLING_SERVE_MAX_SYNC_WAIT` | `120` | Max number of seconds a synchronous endpoint is waiting for the task completion. |
 |  | `DOCLING_SERVE_LOAD_MODELS_AT_BOOT` | `True` | If enabled, the models for the default options will be loaded at boot. |
 |  | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. |
 |  | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
 |  | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
 |  | `DOCLING_SERVE_CORS_HEADERS` | `["*"]` | A list of HTTP request headers that should be supported for cross-origin requests. |
 |  | `DOCLING_SERVE_ENG_KIND` | `local` | The compute engine to use for the async tasks. Possible values are `local` and `kfp`. See below for more configurations of the engines. |
 ### Compute engine
 Docling Serve can be deployed with several possible of compute engine.
 The selected compute engine will be running all the async jobs.
 #### Local engine
 The following table describes the options to configure the Docling Serve local engine.
 | ENV | Default | Description |
 |-----|---------|-------------|
 | `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
 | `DOCLING_SERVE_ENG_LOC_SHARE_MODELS` | False | If true, each process will share the same models among all thread workers. Otherwise, one instance of the models is allocated for each worker thread. |
 #### KFP engine
 The following table describes the options to configure the Docling Serve KFP engine.
 | ENV | Default | Description |
 |-----|---------|-------------|
 | `DOCLING_SERVE_ENG_KFP_ENDPOINT` |  | Must be set to the Kubeflow Pipeline endpoint. When using the in-cluster deployment, make sure to use the cluster endpoint, e.g. `https://NAME.NAMESPACE.svc.cluster.local:8888`  |
 | `DOCLING_SERVE_ENG_KFP_TOKEN` |  | The authentication token for KFP. For in-cluster deployment, the app will load automatically the token of the ServiceAccount. |
 | `DOCLING_SERVE_ENG_KFP_CA_CERT_PATH` |  | Path to the CA certificates for the KFP endpoint. For in-cluster deployment, the app will load automatically the internal CA. |
 | `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_ENDPOINT` |  | If set, it enables internal callbacks providing status update of the KFP job. Usually something like `https://NAME.NAMESPACE.svc.cluster.local:5001/v1/callback/task/progress`. |
 | `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_TOKEN_PATH` |  | The token used for authenticating the progress callback. For cluster-internal workloads, use `/run/secrets/kubernetes.io/serviceaccount/token`. |
 | `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_CA_CERT_PATH` |  | The CA certificate for the progress callback. For cluster-inetrnal workloads, use `/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt`. |
--- a/docs/deploy-examples/compose-amd.yaml
+++ b/docs/deploy-examples/compose-amd.yaml
@@ -0,0 +1,21 @@
 # AMD ROCm deployment
 services:
  docling-serve:
    image: ghcr.io/docling-project/docling-serve-rocm:main
    container_name: docling-serve
    ports:
      - "5001:5001"
    environment:
      DOCLING_SERVE_ENABLE_UI: "true"
      ROCR_VISIBLE_DEVICES: "0" # https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html#rocr-visible-devices
      ## This section is for compatibility with older cards
      # HSA_OVERRIDE_GFX_VERSION: "11.0.0"
      # HSA_ENABLE_SDMA: "0"
    devices:
      - /dev/kfd:/dev/kfd
      - /dev/dri:/dev/dri
    group_add:
      - 44    # video group GID from host
      - 992   # render group GID from host
    restart: always
--- a/docs/deploy-examples/compose-nvidia.yaml
+++ b/docs/deploy-examples/compose-nvidia.yaml
@@ -0,0 +1,20 @@
 # NVIDIA CUDA deployment
 services:
  docling-serve:
    image: ghcr.io/docling-project/docling-serve-cu126:main
    container_name: docling-serve
    ports:
      - "5001:5001"
    environment:
      DOCLING_SERVE_ENABLE_UI: "true"
      NVIDIA_VISIBLE_DEVICES: "all" # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html
    # deploy:  # This section is for compatibility with Swarm
    #   resources:
    #     reservations:
    #       devices:
    #         - driver: nvidia
    #           count: all
    #           capabilities: [gpu]
    runtime: nvidia
    restart: always
--- a/docs/deploy-examples/docling-model-cache-deployment.yaml
+++ b/docs/deploy-examples/docling-model-cache-deployment.yaml
@@ -0,0 +1,47 @@
 kind: Deployment
 apiVersion: apps/v1
 metadata:
  name: docling-serve
  labels:
    app: docling-serve
    component: docling-serve-api
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: docling-serve
      component: docling-serve-api
  template:
    metadata:
      labels:
        app: docling-serve
        component: docling-serve-api
    spec:
      restartPolicy: Always
      containers:
        - name: api
          resources:
            limits:
              cpu: 2
              memory: 4Gi
            requests:
              cpu: 250m
              memory: 1Gi
          env:
            - name: DOCLING_SERVE_ENABLE_UI
              value: 'true'
            - name: DOCLING_SERVE_ARTIFACTS_PATH
              value: '/modelcache'
          ports:
            - name: http
              containerPort: 5001
              protocol: TCP
          imagePullPolicy: Always
          image: 'ghcr.io/docling-project/docling-serve-cpu'
          volumeMounts:
            - name: docling-model-cache
              mountPath: /modelcache
      volumes:
        - name: docling-model-cache
          persistentVolumeClaim:
            claimName: docling-model-cache-pvc
--- a/docs/deploy-examples/docling-model-cache-job.yaml
+++ b/docs/deploy-examples/docling-model-cache-job.yaml
@@ -0,0 +1,33 @@
 apiVersion: batch/v1
 kind: Job
 metadata:
  name: docling-model-cache-load
 spec:
  selector: {}
  template:
    metadata:
      name: docling-model-load
    spec:
      containers:
        - name: loader
          image: ghcr.io/docling-project/docling-serve-cpu:main
          command:
            - docling-tools
            - models
            - download
            - '--output-dir=/modelcache'
            - 'layout'
            - 'tableformer'
            - 'code_formula'
            - 'picture_classifier'
            - 'smolvlm'
            - 'granite_vision'
            - 'easyocr'
          volumeMounts:
            - name: docling-model-cache
              mountPath: /modelcache
      volumes:
        - name: docling-model-cache
          persistentVolumeClaim:
            claimName: docling-model-cache-pvc
      restartPolicy: Never
--- a/docs/deploy-examples/docling-model-cache-pvc.yaml
+++ b/docs/deploy-examples/docling-model-cache-pvc.yaml
@@ -0,0 +1,11 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: docling-model-cache-pvc
 spec:
  accessModes:
    - ReadWriteOnce
  volumeMode: Filesystem
  resources:
    requests:
      storage: 10Gi
--- a/docs/deploy-examples/docling-serve-oauth.yaml
+++ b/docs/deploy-examples/docling-serve-oauth.yaml
@@ -0,0 +1,192 @@
 # This example deployment configures Docling Serve with a OAuth-Proxy sidecar and TLS termination
 ---
 apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: docling-serve
  labels:
    app: docling-serve
  annotations:
    serviceaccounts.openshift.io/oauth-redirectreference.primary: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"docling-serve"}}'
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
  name: docling-serve-oauth
 roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:auth-delegator
 subjects:
 - kind: ServiceAccount
  name: docling-serve
  namespace: docling
 ---
 apiVersion: route.openshift.io/v1
 kind: Route
 metadata:
  name: docling-serve
  labels:
    app: docling-serve
    component: docling-serve-api
 spec:
  to:
    kind: Service
    name: docling-serve
  port:
    targetPort: oauth
  tls:
    termination: Reencrypt
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: docling-serve
  labels:
    app: docling-serve
    component: docling-serve-api
  annotations:
    service.alpha.openshift.io/serving-cert-secret-name: docling-serve-tls
 spec:
  ports:
  - name: oauth
    port: 8443
    targetPort: oauth
  - name: http
    port: 5001
    targetPort: http
  selector:
    app: docling-serve
    component: docling-serve-api
 ---
 kind: Deployment
 apiVersion: apps/v1
 metadata:
  name: docling-serve
  labels:
    app: docling-serve
    component: docling-serve-api
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: docling-serve
      component: docling-serve-api
  template:
    metadata:
      labels:
        app: docling-serve
        component: docling-serve-api
    spec:
      restartPolicy: Always
      serviceAccountName: docling-serve
      containers:
        - name: api
          resources:
            limits:
              cpu: 2000m
              memory: 4Gi
            requests:
              cpu: 800m
              memory: 1Gi
          readinessProbe:
            httpGet:
              path: /health
              port: http
              scheme: HTTPS
            initialDelaySeconds: 10
            timeoutSeconds: 2
            periodSeconds: 5
            successThreshold: 1
            failureThreshold: 3
          livenessProbe:
            httpGet:
              path: /health
              port: http
              scheme: HTTPS
            initialDelaySeconds: 3
            timeoutSeconds: 4
            periodSeconds: 10
            successThreshold: 1
            failureThreshold: 5
          env:
            - name: NAMESPACE
              valueFrom:
                fieldRef:
                  fieldPath: metadata.namespace
            - name: DOCLING_SERVE_ENABLE_UI
              value: 'true'
            - name: DOCLING_SERVE_API_HOST
              value: 'docling-serve.$(NAMESPACE).svc.cluster.local'
            - name: UVICORN_SSL_CERTFILE
              value: '/etc/tls/private/tls.crt'
            - name: UVICORN_SSL_KEYFILE
              value: '/etc/tls/private/tls.key'
          ports:
            - name: http
              containerPort: 5001
              protocol: TCP
          volumeMounts:
            - name: proxy-tls
              mountPath: /etc/tls/private
          imagePullPolicy: Always
          image: 'ghcr.io/docling-project/docling-serve-cpu:fix-ui-with-https'
        - name: oauth-proxy
          resources:
            limits:
              cpu: 100m
              memory: 256Mi
            requests:
              cpu: 100m
              memory: 256Mi
          readinessProbe:
            httpGet:
              path: /oauth/healthz
              port: oauth
              scheme: HTTPS
            initialDelaySeconds: 5
            timeoutSeconds: 1
            periodSeconds: 5
            successThreshold: 1
            failureThreshold: 3
          livenessProbe:
            httpGet:
              path: /oauth/healthz
              port: oauth
              scheme: HTTPS
            initialDelaySeconds: 30
            timeoutSeconds: 1
            periodSeconds: 5
            successThreshold: 1
            failureThreshold: 3
          ports:
            - name: oauth
              containerPort: 8443
              protocol: TCP
          imagePullPolicy: IfNotPresent
          volumeMounts:
            - name: proxy-tls
              mountPath: /etc/tls/private
          env:
            - name: NAMESPACE
              valueFrom:
                fieldRef:
                  fieldPath: metadata.namespace
          image: 'registry.redhat.io/openshift4/ose-oauth-proxy:v4.13'
          args:
            - '--https-address=:8443'
            - '--provider=openshift'
            - '--openshift-service-account=docling-serve'
            - '--upstream=https://docling-serve.$(NAMESPACE).svc.cluster.local:5001'
            - '--upstream-ca=/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt'
            - '--tls-cert=/etc/tls/private/tls.crt'
            - '--tls-key=/etc/tls/private/tls.key'
            - '--cookie-secret=SECRET'
            - '--openshift-delegate-urls={"/": {"group":"route.openshift.io","resource":"routes","verb":"get","name":"docling-serve","namespace":"$(NAMESPACE)"}}'
            - '--openshift-sar={"namespace":"$(NAMESPACE)","resource":"routes","resourceName":"docling-serve","verb":"get","resourceAPIGroup":"route.openshift.io"}'
            - '--skip-auth-regex=''(^/health|^/docs)'''
      volumes:
        - name: proxy-tls
          secret:
            secretName: docling-serve-tls
            defaultMode: 420
--- a/docs/deploy-examples/docling-serve-replicas-w-sticky-sessions.yaml
+++ b/docs/deploy-examples/docling-serve-replicas-w-sticky-sessions.yaml
@@ -0,0 +1,76 @@
 # This example deployment configures Docling Serve with a Route + Sticky sessions, a Service and cpu image
 ---
 kind: Route
 apiVersion: route.openshift.io/v1
 metadata:
  name: docling-serve
  labels:
    app: docling-serve
    component: docling-serve-api
  annotations:
    haproxy.router.openshift.io/disable_cookies: "false" # this annotation enables the sticky sessions
 spec:
  path: /
  to:
    kind: Service
    name: docling-serve
  port:
    targetPort: http
  tls:
    termination: edge
    insecureEdgeTerminationPolicy: Redirect
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: docling-serve
  labels:
    app: docling-serve
    component: docling-serve-api
 spec:
  ports:
  - name: http
    port: 5001
    targetPort: http
  selector:
    app: docling-serve
    component: docling-serve-api
 ---
 kind: Deployment
 apiVersion: apps/v1
 metadata:
  name: docling-serve
  labels:
    app: docling-serve
    component: docling-serve-api
 spec:
  replicas: 3
  selector:
    matchLabels:
      app: docling-serve
      component: docling-serve-api
  template:
    metadata:
      labels:
        app: docling-serve
        component: docling-serve-api
    spec:
      restartPolicy: Always
      containers:
        - name: api
          resources:
            limits:
              cpu: 1
              memory: 4Gi
            requests:
              cpu: 250m
              memory: 1Gi
          env:
            - name: DOCLING_SERVE_ENABLE_UI
              value: 'true'
          ports:
            - name: http
              containerPort: 5001
              protocol: TCP
          imagePullPolicy: Always
          image: 'ghcr.io/docling-project/docling-serve'
--- a/docs/deploy-examples/docling-serve-simple.yaml
+++ b/docs/deploy-examples/docling-serve-simple.yaml
@@ -0,0 +1,58 @@
 # This example deployment configures Docling Serve with a Service and cuda image
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: docling-serve
  labels:
    app: docling-serve
    component: docling-serve-api
 spec:
  ports:
  - name: http
    port: 5001
    targetPort: http
  selector:
    app: docling-serve
    component: docling-serve-api
 ---
 kind: Deployment
 apiVersion: apps/v1
 metadata:
  name: docling-serve
  labels:
    app: docling-serve
    component: docling-serve-api
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: docling-serve
      component: docling-serve-api
  template:
    metadata:
      labels:
        app: docling-serve
        component: docling-serve-api
    spec:
      restartPolicy: Always
      containers:
        - name: api
          resources:
            limits:
              cpu: 1
              memory: 4Gi
              nvidia.com/gpu: 1  # Limit to one GPU
            requests:
              cpu: 250m
              memory: 1Gi
              nvidia.com/gpu: 1  # Limit to one GPU
          env:
            - name: DOCLING_SERVE_ENABLE_UI
              value: 'true'
          ports:
            - name: http
              containerPort: 5001
              protocol: TCP
          imagePullPolicy: Always
          image: 'ghcr.io/docling-project/docling-serve-cu124'
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -0,0 +1,305 @@
 # Deployment Examples
 This document provides deployment examples for running the application in different environments.
 Choose the deployment option that best fits your setup.
 - **[Local GPU NVIDIA](#local-gpu-nvidia)**: For deploying the application locally on a machine with a supported NVIDIA GPU (using Docker Compose).
 - **[Local GPU AMD](#local-gpu-amd)**: For deploying the application locally on a machine with a supported AMD GPU (using Docker Compose).
 - **[OpenShift](#openshift)**: For deploying the application on an OpenShift cluster, designed for cloud-native environments.
 ---
 ## Local GPU NVIDIA
 ### Docker compose
 Manifest example: [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml)
 This deployment has the following features:
 - NVIDIA cuda enabled
 Install the app with:
 ```sh
 docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
 ```
 For using the API:
 ```sh
 # Make a test query
 curl -X 'POST' \
  "localhost:5001/v1/convert/source/async" \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```
 <details>
 <summary><b>Requirements</b></summary>
 - debian/ubuntu/rhel/fedora/opensuse
 - docker
 - nvidia drivers >=550.54.14
 - nvidia-container-toolkit
 Docs:
 - [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/supported-platforms.html)
 - [CUDA Toolkit Release Notes](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#id6)
 </details>
 <details>
 <summary><b>Steps</b></summary>
 1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml) file or use `count: all`)
    ```sh
    nvidia-smi
    ```
 2. Check if the NVIDIA Container Toolkit is installed/updated
    ```sh
    # debian
    dpkg -l | grep nvidia-container-toolkit
    ```
    ```sh
    # rhel
    rpm -q nvidia-container-toolkit
    ```
    NVIDIA Container Toolkit install steps can be found here:
    <https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html>
 3. Check which runtime is being used by Docker
    ```sh
    # docker
    docker info | grep -i runtime
    ```
 4. If the default Docker runtime changes back from 'nvidia' to 'default' after restarting the Docker service (optional):
    Backup the daemon.json file:
    ```sh
    sudo cp /etc/docker/daemon.json /etc/docker/daemon.json.bak
    ```
    Update the daemon.json file:
    ```sh
    echo '{
      "runtimes": {
        "nvidia": {
          "path": "nvidia-container-runtime"
        }
      },
      "default-runtime": "nvidia"
    }' | sudo tee /etc/docker/daemon.json > /dev/null
    ```
    Restart the Docker service:
    ```sh
    sudo systemctl restart docker
    ```
    Confirm 'nvidia' is the default runtime used by Docker by repeating step 3.
 5. Run the container:
    ```sh
    docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
    ```
 </details>
 ## Local GPU AMD
 ### Docker compose
 Manifest example: [compose-amd.yaml](./deploy-examples/compose-amd.yaml)
 This deployment has the following features:
 - AMD rocm enabled
 Install the app with:
 ```sh
 docker compose -f docs/deploy-examples/compose-amd.yaml up -d
 ```
 For using the API:
 ```sh
 # Make a test query
 curl -X 'POST' \
  "localhost:5001/v1/convert/source/async" \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```
 <details>
 <summary><b>Requirements</b></summary>
 - debian/ubuntu/rhel/fedora/opensuse
 - docker
 - AMDGPU driver >=6.3
 - AMD ROCm >=6.3
 Docs:
 - [AMD ROCm installation](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/quick-start.html)
 </details>
 <details>
 <summary><b>Steps</b></summary>
 1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
    ```sh
    rocm-smi --showdriverversion
    rocminfo | grep -i "ROCm version"
    ```
 2. Find both video group GID and render group GID from host (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
    ```sh
    getent group video
    getent group render
    ```
 3. Build the image locally (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
    ```sh
    make docling-serve-rocm-image
    ```
 </details>
 ## OpenShift
 ### Simple deployment
 Manifest example: [docling-serve-simple.yaml](./deploy-examples/docling-serve-simple.yaml)
 This deployment example has the following features:
 - Deployment configuration
 - Service configuration
 - NVIDIA cuda enabled
 Install the app with:
 ```sh
 oc apply -f docs/deploy-examples/docling-serve-simple.yaml
 ```
 For using the API:
 ```sh
 # Port-forward the service
 oc port-forward svc/docling-serve 5001:5001
 # Make a test query
 curl -X 'POST' \
  "localhost:5001/v1/convert/source/async" \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```
 ### Secure deployment with `oauth-proxy`
 Manifest example: [docling-serve-oauth.yaml](./deploy-examples/docling-serve-oauth.yaml)
 This deployment has the following features:
 - TLS encryption between all components (using the cluster-internal CA authority).
 - Authentication via a secure `oauth-proxy` sidecar.
 - Expose the service using a secure OpenShift `Route`
 Install the app with:
 ```sh
 oc apply -f docs/deploy-examples/docling-serve-oauth.yaml
 ```
 For using the API:
 ```sh
 # Retrieve the endpoint
 DOCLING_NAME=docling-serve
 DOCLING_ROUTE="https://$(oc get routes ${DOCLING_NAME} --template={{.spec.host}})"
 # Retrieve the authentication token
 OCP_AUTH_TOKEN=$(oc whoami --show-token)
 # Make a test query
 curl -X 'POST' \
  "${DOCLING_ROUTE}/v1/convert/source/async" \
  -H "Authorization: Bearer ${OCP_AUTH_TOKEN}" \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```
 ### ReplicaSets with `sticky sessions`
 Manifest example: [docling-serve-replicas-w-sticky-sessions.yaml](./deploy-examples/docling-serve-replicas-w-sticky-sessions.yaml)
 This deployment has the following features:
 - Deployment configuration with 3 replicas
 - Service configuration
 - Expose the service using a OpenShift `Route` and enables sticky sessions
 Install the app with:
 ```sh
 oc apply -f docs/deploy-examples/docling-serve-replicas-w-sticky-sessions.yaml
 ```
 For using the API:
 ```sh
 # Retrieve the endpoint
 DOCLING_NAME=docling-serve
 DOCLING_ROUTE="https://$(oc get routes $DOCLING_NAME --template={{.spec.host}})"
 # Make a test query, store the cookie and taskid
 task_id=$(curl -s -X 'POST' \
    "${DOCLING_ROUTE}/v1/convert/source/async" \
    -H "accept: application/json" \
    -H "Content-Type: application/json" \
    -d '{
      "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
    }' \
    -c cookies.txt | grep -oP '"task_id":"\K[^"]+')
 ```
 ```sh
 # Grab the taskid and cookie to check the task status
 curl -v -X 'GET' \
  "${DOCLING_ROUTE}/v1/status/poll/$task_id?wait=0" \
  -H "accept: application/json" \
  -b "cookies.txt"
 ```
--- a/docs/development.md
+++ b/docs/development.md
@@ -0,0 +1,57 @@
 # Development
 ## Install dependencies
 ### CPU only
 ```sh
 # Install uv if not already available
 curl -LsSf https://astral.sh/uv/install.sh | sh
 # Install dependencies
 uv sync --extra cpu
 ```
 ### Cuda GPU
 For GPU support use the following command:
 ```sh
 # Install dependencies
 uv sync
 ```
 ### Gradio UI and different OCR backends
 `/ui` endpoint using `gradio` and different OCR backends can be enabled via package extras:
 ```sh
 # Enable ui and rapidocr
 uv sync --extra ui --extra rapidocr
 ```
 ```sh
 # Enable tesserocr
 uv sync --extra tesserocr
 ```
 See `[project.optional-dependencies]` section in `pyproject.toml` for full list of options and runtime options with `uv run docling-serve --help`.
 ### Run the server
 The `docling-serve` executable is a convenient script for launching the webserver both in
 development and production mode.
 ```sh
 # Run the server in development mode
 # - reload is enabled by default
 # - listening on the 127.0.0.1 address
 # - ui is enabled by default
 docling-serve dev
 # Run the server in production mode
 # - reload is disabled by default
 # - listening on the 0.0.0.0 address
 # - ui is disabled by default
 docling-serve run
 ```
--- a/docs/pre-loading-models.md
+++ b/docs/pre-loading-models.md
@@ -0,0 +1,103 @@
 # Pre-loading models for docling
 This document provides examples for pre-loading docling models to a persistent volume and re-using it for docling-serve deployments.
 1. We need to create a persistent volume that will store models weights:
    ```yaml
    apiVersion: v1
    kind: PersistentVolumeClaim
    metadata:
      name: docling-model-cache-pvc
    spec:
      accessModes:
        - ReadWriteOnce
      volumeMode: Filesystem
      resources:
        requests:
          storage: 10Gi
    ```
    If you don't want to use default storage class, set your custom storage class with following:
    ```yaml
    spec:
      ...
      storageClassName: <Storage Class Name>
    ```
    Manifest example: [docling-model-cache-pvc.yaml](./deploy-examples/docling-model-cache-pvc.yaml)
 2. In order to load model weights, we can use docling-toolkit to download them, as this is a one time operation we can use kubernetes job for this:
    ```yaml
    apiVersion: batch/v1
    kind: Job
    metadata:
      name: docling-model-cache-load
    spec:
      selector: {}
      template:
        metadata:
          name: docling-model-load
        spec:
          containers:
            - name: loader
              image: ghcr.io/docling-project/docling-serve-cpu:main
              command:
                - docling-tools
                - models
                - download
                - '--output-dir=/modelcache'
                - 'layout'
                - 'tableformer'
                - 'code_formula'
                - 'picture_classifier'
                - 'smolvlm'
                - 'granite_vision'
                - 'easyocr'
              volumeMounts:
                - name: docling-model-cache
                  mountPath: /modelcache
          volumes:
            - name: docling-model-cache
              persistentVolumeClaim:
                claimName: docling-model-cache-pvc
          restartPolicy: Never
    ```
    The job will mount previously created persistent volume and execute command similar to how we would load models locally:
    `docling-tools models download --output-dir <MOUNT-PATH> [LIST_OF_MODELS]`
    In manifest, we specify desired models individually, or we can use `--all` parameter to download all models.
    Manifest example: [docling-model-cache-job.yaml](./deploy-examples/docling-model-cache-job.yaml)
 3. Now we can mount volume in the docling-serve deployment and set env `DOCLING_SERVE_ARTIFACTS_PATH` to point to it.
    Following additions to deployment should be made:
    ```yaml
    spec:
      template:
        spec:
          containers:
            - name: api
              env:
              ...
                - name: DOCLING_SERVE_ARTIFACTS_PATH
                  value: '/modelcache'
              volumeMounts:
                - name: docling-model-cache
                  mountPath: /modelcache
          ...
          volumes:
            - name: docling-model-cache
              persistentVolumeClaim:
                claimName: docling-model-cache-pvc
    ```
    Make sure that value of `DOCLING_SERVE_ARTIFACTS_PATH` is the same as where models were downloaded and where volume is mounted.
    Now when docling-serve is executing tasks, the underlying docling installation will load model weights from mounted volume.
    Manifest example: [docling-model-cache-deployment.yaml](./deploy-examples/docling-model-cache-deployment.yaml)
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -0,0 +1,444 @@
 # Usage
 The API provides two endpoints: one for urls, one for files. This is necessary to send files directly in binary format instead of base64-encoded strings.
 ## Common parameters
 On top of the source of file (see below), both endpoints support the same parameters, which are almost the same as the Docling CLI.
 - `from_formats` (List[str]): Input format(s) to convert from. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`. Defaults to all formats.
 - `to_formats` (List[str]): Output format(s) to convert to. Allowed values: `md`, `json`, `html`, `text`, `doctags`. Defaults to `md`.
 - `pipeline` (str). The choice of which pipeline to use. Allowed values are `standard` and `vlm`. Defaults to `standard`.
 - `page_range` (tuple). If specified, only convert a range of pages. The page number starts at 1.
 - `do_ocr` (bool): If enabled, the bitmap content will be processed using OCR. Defaults to `True`.
 - `image_export_mode`: Image export mode for the document (only in case of JSON, Markdown or HTML). Allowed values: embedded, placeholder, referenced. Optional, defaults to `embedded`.
 - `force_ocr` (bool): If enabled, replace any existing text with OCR-generated text over the full content. Defaults to `False`.
 - `ocr_engine` (str): OCR engine to use. Allowed values: `easyocr`, `tesserocr`, `tesseract`, `rapidocr`, `ocrmac`. Defaults to `easyocr`. To use the `tesserocr` engine, `tesserocr` must be installed where docling-serve is running: `pip install tesserocr`
 - `ocr_lang` (List[str]): List of languages used by the OCR engine. Note that each OCR engine has different values for the language names. Defaults to empty.
 - `pdf_backend` (str): PDF backend to use. Allowed values: `pypdfium2`, `dlparse_v1`, `dlparse_v2`, `dlparse_v4`. Defaults to `dlparse_v4`.
 - `table_mode` (str): Table mode to use. Allowed values: `fast`, `accurate`. Defaults to `fast`.
 - `abort_on_error` (bool): If enabled, abort on error. Defaults to false.
 - `md_page_break_placeholder` (str): Add this placeholder between pages in the markdown output.
 - `do_table_structure` (bool): If enabled, the table structure will be extracted. Defaults to true.
 - `do_code_enrichment` (bool): If enabled, perform OCR code enrichment. Defaults to false.
 - `do_formula_enrichment` (bool): If enabled, perform formula OCR, return LaTeX code. Defaults to false.
 - `do_picture_classification` (bool): If enabled, classify pictures in documents. Defaults to false.
 - `do_picture_description` (bool): If enabled, describe pictures in documents. Defaults to false.
 - `picture_description_area_threshold` (float): Minimum percentage of the area for a picture to be processed with the models. Defaults to 0.05.
 - `picture_description_local` (dict): Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with `picture_description_api`.
 - `picture_description_api` (dict): API details for using a vision-language model in the picture description. This parameter is mutually exclusive with `picture_description_local`.
 - `include_images` (bool): If enabled, images will be extracted from the document. Defaults to false.
 - `images_scale` (float): Scale factor for images. Defaults to 2.0.
 ## Convert endpoints
 ### Source endpoint
 The endpoint is `/v1/convert/source`, listening for POST requests of JSON payloads.
 On top of the above parameters, you must send the URL(s) of the document you want process with either the `http_sources` or `file_sources` fields.
 The first is fetching URL(s) (optionally using with extra headers), the second allows to provide documents as base64-encoded strings.
 No `options` is required, they can be partially or completely omitted.
 Simple payload example:
 ```json
 {
  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
 }
 ```
 <details>
 <summary>Complete payload example:</summary>
 ```json
 {
  "options": {
    "from_formats": ["docx", "pptx", "html", "image", "pdf", "asciidoc", "md", "xlsx"],
    "to_formats": ["md", "json", "html", "text", "doctags"],
    "image_export_mode": "placeholder",
    "do_ocr": true,
    "force_ocr": false,
    "ocr_engine": "easyocr",
    "ocr_lang": ["en"],
    "pdf_backend": "dlparse_v2",
    "table_mode": "fast",
    "abort_on_error": false,
  },
  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
 }
 ```
 </details>
 <details>
 <summary>CURL example:</summary>
 ```sh
 curl -X 'POST' \
  'http://localhost:5001/v1/convert/source' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/json' \
  -d '{
  "options": {
    "from_formats": [
      "docx",
      "pptx",
      "html",
      "image",
      "pdf",
      "asciidoc",
      "md",
      "xlsx"
    ],
    "to_formats": ["md", "json", "html", "text", "doctags"],
    "image_export_mode": "placeholder",
    "do_ocr": true,
    "force_ocr": false,
    "ocr_engine": "easyocr",
    "ocr_lang": [
      "fr",
      "de",
      "es",
      "en"
    ],
    "pdf_backend": "dlparse_v2",
    "table_mode": "fast",
    "abort_on_error": false,
    "do_table_structure": true,
    "include_images": true,
    "images_scale": 2
  },
  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
 }'
 ```
 </details>
 <details>
 <summary>Python example:</summary>
 ```python
 import httpx
 async_client = httpx.AsyncClient(timeout=60.0)
 url = "http://localhost:5001/v1/convert/source"
 payload = {
  "options": {
    "from_formats": ["docx", "pptx", "html", "image", "pdf", "asciidoc", "md", "xlsx"],
    "to_formats": ["md", "json", "html", "text", "doctags"],
    "image_export_mode": "placeholder",
    "do_ocr": True,
    "force_ocr": False,
    "ocr_engine": "easyocr",
    "ocr_lang": "en",
    "pdf_backend": "dlparse_v2",
    "table_mode": "fast",
    "abort_on_error": False,
  },
  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
 }
 response = await async_client_client.post(url, json=payload)
 data = response.json()
 ```
 </details>
 #### File as base64
 The `file_sources` argument in the endpoint allows to send files as base64-encoded strings.
 When your PDF or other file type is too large, encoding it and passing it inline to curl
 can lead to an “Argument list too long” error on some systems. To avoid this, we write
 the JSON request body to a file and have curl read from that file.
 <details>
 <summary>CURL steps:</summary>
 ```sh
 # 1. Base64-encode the file
 B64_DATA=$(base64 -w 0 /path/to/file/pdf-to-convert.pdf)
 # 2. Build the JSON with your options
 cat <<EOF > /tmp/request_body.json
 {
  "options": {
  },
  "file_sources": [{
    "base64_string": "${B64_DATA}",
    "filename": "pdf-to-convert.pdf"
  }]
 }
 EOF
 # 3. POST the request to the docling service
 curl -X POST "localhost:5001/v1/convert/source" \
     -H "Content-Type: application/json" \
     -d @/tmp/request_body.json
 ```
 </details>
 ### File endpoint
 The endpoint is: `/v1/convert/file`, listening for POST requests of Form payloads (necessary as the files are sent as multipart/form data). You can send one or multiple files.
 <details>
 <summary>CURL example:</summary>
 ```sh
 curl -X 'POST' \
  'http://127.0.0.1:5001/v1/convert/file' \
  -H 'accept: application/json' \
  -H 'Content-Type: multipart/form-data' \
  -F 'ocr_engine=easyocr' \
  -F 'pdf_backend=dlparse_v2' \
  -F 'from_formats=pdf' \
  -F 'from_formats=docx' \
  -F 'force_ocr=false' \
  -F 'image_export_mode=embedded' \
  -F 'ocr_lang=en' \
  -F 'ocr_lang=pl' \
  -F 'table_mode=fast' \
  -F 'files=@2206.01062v1.pdf;type=application/pdf' \
  -F 'abort_on_error=false' \
  -F 'to_formats=md' \
  -F 'to_formats=text' \
  -F 'do_ocr=true'
 ```
 </details>
 <details>
 <summary>Python example:</summary>
 ```python
 import httpx
 async_client = httpx.AsyncClient(timeout=60.0)
 url = "http://localhost:5001/v1/convert/file"
 parameters = {
 "from_formats": ["docx", "pptx", "html", "image", "pdf", "asciidoc", "md", "xlsx"],
 "to_formats": ["md", "json", "html", "text", "doctags"],
 "image_export_mode": "placeholder",
 "do_ocr": True,
 "force_ocr": False,
 "ocr_engine": "easyocr",
 "ocr_lang": ["en"],
 "pdf_backend": "dlparse_v2",
 "table_mode": "fast",
 "abort_on_error": False,
 }
 current_dir = os.path.dirname(__file__)
 file_path = os.path.join(current_dir, '2206.01062v1.pdf')
 files = {
    'files': ('2206.01062v1.pdf', open(file_path, 'rb'), 'application/pdf'),
 }
 response = await async_client.post(url, files=files, data=parameters)
 assert response.status_code == 200, "Response should be 200 OK"
 data = response.json()
 ```
 </details>
 ### Picture description options
 When the picture description enrichment is activated, users may specify which model and which execution mode to use for this task. There are two choices for the execution mode: _local_ will run the vision-language model directly, _api_ will invoke an external API endpoint.
 The local option is specified with:
 ```jsonc
 {
  "picture_description_local": {
    "repo_id": "",  // Repository id from the Hugging Face Hub.
    "generation_config": {"max_new_tokens": 200, "do_sample": false},  // HF generation config.
    "prompt": "Describe this image in a few sentences. ",  // Prompt used when calling the vision-language model.
  }
 }
 ```
 The possible values for `generation_config` are documented in the [Hugging Face text generation docs](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig).
 The api option is specified with:
 ```jsonc
 {
  "picture_description_api": {
    "url": "",  // Endpoint which accepts openai-api compatible requests.
    "headers": {},  // Headers used for calling the API endpoint. For example, it could include authentication headers.
    "params": {},  // Model parameters.
    "timeout": 20,  // Timeout for the API request.
    "prompt": "Describe this image in a few sentences. ",  // Prompt used when calling the vision-language model.
  }
 }
 ```
 Example URLs are:
 - `http://localhost:8000/v1/chat/completions` for the local vllm api, with example `picture_description_api`:
  - the `HuggingFaceTB/SmolVLM-256M-Instruct` model
    ```json
    {
      "url": "http://localhost:8000/v1/chat/completions",
      "params": {
        "model": "HuggingFaceTB/SmolVLM-256M-Instruct",
        "max_completion_tokens": 200,
      }
    }
    ```
  - the `ibm-granite/granite-vision-3.2-2b` model
    ```json
    {
      "url": "http://localhost:8000/v1/chat/completions",
      "params": {
        "model": "ibm-granite/granite-vision-3.2-2b",
        "max_completion_tokens": 200,
      }
    }
    ```
 - `http://localhost:11434/v1/chat/completions` for the local Ollama api, with example `picture_description_api`:
  - the `granite3.2-vision:2b` model
    ```json
    {
      "url": "http://localhost:11434/v1/chat/completions",
      "params": {
        "model": "granite3.2-vision:2b"
      }
    }
    ```
 Note that when using `picture_description_api`, the server must be launched with `DOCLING_SERVE_ENABLE_REMOTE_SERVICES=true`.
 ## Response format
 The response can be a JSON Document or a File.
 - If you process only one file, the response will be a JSON document with the following format:
  ```jsonc
  {
    "document": {
      "md_content": "",
      "json_content": {},
      "html_content": "",
      "text_content": "",
      "doctags_content": ""
      },
    "status": "<success|partial_success|skipped|failure>",
    "processing_time": 0.0,
    "timings": {},
    "errors": []
  }
  ```
  Depending on the value you set in `output_formats`, the different items will be populated with their respective results or empty.
  `processing_time` is the Docling processing time in seconds, and `timings` (when enabled in the backend) provides the detailed
  timing of all the internal Docling components.
 - If you set the parameter `target` to the zip mode, the response will be a zip file.
 - If multiple files are generated (multiple inputs, or one input but multiple outputs with the zip target mode), the response will be a zip file.
 ## Asynchronous API
 Both `/v1/convert/source` and `/v1/convert/file` endpoints are available as asynchronous variants.
 The advantage of the asynchronous endpoints is the possible to interrupt the connection, check for the progress update and fetch the result.
 This approach is more resilient against network instabilities and allows the client application logic to easily interleave conversion with other tasks.
 Launch an asynchronous conversion with:
 - `POST /v1/convert/source/async` when providing the input as sources.
 - `POST /v1/convert/file/async` when providing the input as multipart-form files.
 The response format is a task detail:
 ```jsonc
 {
  "task_id": "<task_id>",  // the task_id which can be used for the next operations
  "task_status": "pending|started|success|failure",  // the task status
  "task_position": 1,  // the position in the queue
  "task_meta": null,  // metadata e.g. how many documents are in the total job and how many have been converted
 }
 ```
 ### Polling status
 For checking the progress of the conversion task and wait for its completion, use the endpoint:
 - `GET /v1/status/poll/{task_id}`
 <details>
 <summary>Example waiting loop:</summary>
 ```python
 import time
 import httpx
 # ...
 # response from the async task submission
 task = response.json()
 while task["task_status"] not in ("success", "failure"):
    response = httpx.get(f"{base_url}/status/poll/{task['task_id']}")
    task = response.json()
    time.sleep(5)
 ```
 <details>
 ### Subscribe with websockets
 Using websocket you can get the client application being notified about updates of the conversion task.
 To start the websocket connection, use the endpoint:
 - `/v1/status/ws/{task_id}`
 Websocket messages are JSON object with the following structure:
 ```jsonc
 {
  "message": "connection|update|error",  // type of message being sent
  "task": {},  // the same content of the task description
  "error": "",  // description of the error
 }
 ```
 <details>
 <summary>Example websocket usage:</summary>
 ```python
 from websockets.sync.client import connect
 uri = f"ws://{base_url}/v1/status/ws/{task['task_id']}"
 with connect(uri) as websocket:
    for message in websocket:
        try:
            payload = json.loads(message)
            if payload["message"] == "error":
                break
            if payload["message"] == "error" and payload["task"]["task_status"] in ("success", "failure"):
                break
        except:
          break
 ```
 </details>
 ### Fetch results
 When the task is completed, the result can be fetched with the endpoint:
 - `GET /v1/result/{task_id}`
--- a/docs/v1_migration.md
+++ b/docs/v1_migration.md
@@ -0,0 +1,80 @@
 # Migration to the `v1` API
 Docling Serve from the initial prototype `v1alpha` API to the stable `v1` API.
 This page provides simple instructions to upgrade your application to the new API.
 ## API changes
 The breaking changes introduced in the `v1` release of Docling Serve are designed to provide a stable schema which
 allows the project to provide new capabilities as new type of input sources, targets and also the definition of callback for event-driven applications.
 ### Endpoint names
 All endpoints are renamed from `/v1alpha/` to `/v1/`.
 ### Sources
 When using the `/v1/convert/source` endpoint, input documents have to be specified with the `sources: []` argument, which is replacing the usage of `file_sources` and `http_sources`.
 Old version:
 ```jsonc
 {
    "options": {},  // conversion options
    "file_sources": [  // input documents provided as base64-encoded strings
        {"base64_string": "abc123...", "filename": "file.pdf"}
    ],
    "http_sources": [  // input documents provided as http urls
        {"url": "https://..."}
    ]
 }
 ```
 New version:
 ```jsonc
 {
    "options": {},  // conversion options
    "sources": [
        // input document provided as base64-encoded string
        {"kind": "file", "base64_string": "abc123...", "filename": "file.pdf"},
        // input document provided as http urls
        {"kind": "http", "url": "https://..."},
    ]
 }
 ```
 ### Targets
 Switching between output formats, i.e. from the JSON inbody response to the zip archive response, users have to specify the `target` argument, which is replacing the usage of `options.return_as_file`.
 Old version:
 ```jsonc
 {
    "options": {
        "return_as_file": true  // <-- to be removed
    },
    // ...
 }
 ```
 New version:
 ```jsonc
 {
    "options": {},
    "target": {"kind": "zip"},  // <-- add this
    // ...
 }
 ```
 ## Continue with the old API
 If you are not able to apply the changes above to your application, please consider pinning of the previous `v0.x` container images, e.g.
 ```sh
 podman run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=1 quay.io/docling-project/docling-serve:v0.16.1
 ```
 _Note that the old prototype API will not be supported in new `v1.x` versions._
--- a/img/fastapi-ui.png
+++ b/img/fastapi-ui.png
--- a/img/ui-input.png
+++ b/img/ui-input.png
--- a/img/ui-output.png
+++ b/img/ui-output.png
--- a/os-packages.txt
+++ b/os-packages.txt
@@ -0,0 +1,7 @@
 tesseract
 tesseract-devel
 tesseract-langpack-eng
 tesseract-osd
 leptonica-devel
 libglvnd-glx
 glib2
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,117 +1,266 @@
-[tool.poetry]
+[project]
 name = "docling-serve"
-version = "0.1.0"
+version = "1.2.1"  # DO NOT EDIT, updated automatically
 description = "Running Docling as a service"
-license = "MIT"
+license = {text = "MIT"}
 authors = [
-    "Michele Dolfi <dol@zurich.ibm.com>",
+    {name="Michele Dolfi", email="dol@zurich.ibm.com"},
-    "Christoph Auer <cau@zurich.ibm.com>",
+    {name="Guillaume Moutier", email="gmoutier@redhat.com"},
-    "Panos Vagenas <pva@zurich.ibm.com>",
+    {name="Anil Vishnoi", email="avishnoi@redhat.com"},
-    "Cesar Berrospi Ramis <ceb@zurich.ibm.com>",
+    {name="Panos Vagenas", email="pva@zurich.ibm.com"},
-   "Peter Staar <taa@zurich.ibm.com>",
+    {name="Christoph Auer", email="cau@zurich.ibm.com"},
    {name="Peter Staar", email="taa@zurich.ibm.com"},
 ]
 maintainers = [
-    "Peter Staar <taa@zurich.ibm.com>",
+    {name="Michele Dolfi", email="dol@zurich.ibm.com"},
-    "Christoph Auer <cau@zurich.ibm.com>",
+    {name="Anil Vishnoi", email="avishnoi@redhat.com"},
-    "Michele Dolfi <dol@zurich.ibm.com>",
+    {name="Panos Vagenas", email="pva@zurich.ibm.com"},
-    "Cesar Berrospi Ramis <ceb@zurich.ibm.com>",
+    {name="Christoph Auer", email="cau@zurich.ibm.com"},
-    "Panos Vagenas <pva@zurich.ibm.com>",
+    {name="Peter Staar", email="taa@zurich.ibm.com"},
 ]
 readme = "README.md"
 repository = "https://github.com/DS4SD/docling-serve"
 homepage = "https://github.com/DS4SD/docling-serve"
 classifiers = [
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
-    # "Development Status :: 5 - Production/Stable",
+    "Development Status :: 5 - Production/Stable",
    "Intended Audience :: Developers",
    "Typing :: Typed",
-    "Programming Language :: Python :: 3"
+    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
 ]
 requires-python = ">=3.10"
 dependencies = [
    "docling~=2.38",
    "docling-core>=2.44.1",
    "docling-jobkit[kfp,vlm]>=1.3.1,<2.0.0",
    "fastapi[standard]~=0.115",
    "httpx~=0.28",
    "pydantic~=2.10",
    "pydantic-settings~=2.4",
    "python-multipart>=0.0.14,<0.1.0",
    "typer~=0.12",
    "uvicorn[standard]>=0.29.0,<1.0.0",
    "websockets~=14.0",
    "scalar-fastapi>=1.0.3",
    "docling-mcp>=1.0.0",
 ]
-[tool.poetry.dependencies]
+[project.optional-dependencies]
-python = "^3.9"
+ui = [
-docling = "^2.10.0"
+    "gradio~=5.9",
-fastapi = {version = "^0.115.6", extras = ["standard"]}
+    "pydantic<2.11.0",  # fix compatibility between gradio and new pydantic 2.11
-uvicorn = "^0.32.1"
+]
-pydantic-settings = "^2.4.0"
+tesserocr = [
-httpx = "^0.28.1"
+    "tesserocr~=2.7"
-tesserocr = { version = "^2.7.1", optional = true }
+]
-rapidocr-onnxruntime = { version = "^1.4.0", optional = true, markers = "python_version < '3.13'" }
+rapidocr = [
-onnxruntime = [
+    "rapidocr-onnxruntime~=1.4; python_version<'3.13'",
-  # 1.19.2 is the last version with python3.9 support,
+    "onnxruntime~=1.7",
-  # see https://github.com/microsoft/onnxruntime/releases/tag/v1.20.0
+]
-  { version = ">=1.7.0,<1.20.0", optional = true, markers = "python_version < '3.10'" },
+flash-attn = [
-  { version = "^1.7.0", optional = true, markers = "python_version >= '3.10'" }
+  "flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"
 ]
 [dependency-groups]
 dev = [
    "asgi-lifespan~=2.0",
    "mypy~=1.11",
    "pre-commit-uv~=4.1",
    "pytest~=8.3",
    "pytest-asyncio~=0.24",
    "pytest-check~=2.4",
    "python-semantic-release~=7.32",
    "ruff>=0.9.6",
 ]
-[tool.poetry.extras]
+pypi = [
-tesserocr = ["tesserocr"]
+  "torch>=2.7.1",
-rapidocr = ["rapidocr-onnxruntime", "onnxruntime"]
+  "torchvision>=0.22.1",
 ]
 cpu = [
  "torch>=2.7.1",
  "torchvision>=0.22.1",
 ]
-[tool.poetry.group.pypi-torch]
+cu124 = [
-optional = false
+  "torch>=2.6.0 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
  "torchvision>=0.21.0 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
 ]
-[tool.poetry.group.pypi-torch.dependencies]
+cu126 = [
  "torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
  "torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
 ]
 cu128 = [
  "torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
  "torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
 ]
 rocm = [
  "torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
  "torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
  "pytorch-triton-rocm>=3.3.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
 ]
 [tool.uv]
 package = true
 default-groups = ["dev", "pypi"]
 conflicts = [
  [
    { group = "pypi" },
    { group = "cpu" },
    { group = "cu124" },
    { group = "cu126" },
    { group = "cu128" },
    { group = "rocm" },
  ],
 ]
 environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
 override-dependencies = [
  "urllib3~=2.0"
 ]
 [tool.uv.sources]
 torch = [
-  {version = "!=2.4.1+cpu" },
+  { index = "pytorch-pypi", group = "pypi" },
  { index = "pytorch-cpu", group = "cpu" },
  { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
 ]
 torchvision = [
-  {version = "!=0.19.1+cpu" },
+  { index = "pytorch-pypi", group = "pypi" },
  { index = "pytorch-cpu", group = "cpu" },
  { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
 ]
-[tool.poetry.group.cpu]
+pytorch-triton-rocm = [
-optional = true
+  { index = "pytorch-rocm", marker = "sys_platform == 'linux'" },
 [tool.poetry.group.cpu.dependencies]
 torch = [
    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.10"', url="https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp310-cp310-linux_x86_64.whl"},
    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.11"', url="https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp311-cp311-linux_x86_64.whl"},
    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.12"', url="https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp312-cp312-linux_x86_64.whl"},
 ]
 torchvision = [
    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.10"', url="https://download.pytorch.org/whl/cpu/torchvision-0.19.1%2Bcpu-cp310-cp310-linux_x86_64.whl"},
    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.11"', url="https://download.pytorch.org/whl/cpu/torchvision-0.19.1%2Bcpu-cp311-cp311-linux_x86_64.whl"},
    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.12"', url="https://download.pytorch.org/whl/cpu/torchvision-0.19.1%2Bcpu-cp312-cp312-linux_x86_64.whl"},
 ]
-[tool.poetry.group.constraints.dependencies]
+# docling-jobkit = { git = "https://github.com/docling-project/docling-jobkit/", rev = "main" }
-numpy = [
+# docling-jobkit = { path = "../docling-jobkit", editable = true }
    { version = "^2.1.0", markers = 'python_version >= "3.13"' },
    { version = "^1.24.4", markers = 'python_version < "3.13"' },
 ]
-[tool.poetry.group.dev.dependencies]
+[[tool.uv.index]]
-black = "^24.8.0"
+name = "pytorch-pypi"
-isort = "^5.13.2"
+url = "https://pypi.org/simple"
-pre-commit = "^3.8.0"
+explicit = true
 autoflake = "^2.3.1"
 flake8 = "^7.1.1"
 pytest = "^8.3.2"
 mypy = "^1.11.2"
-[build-system]
+[[tool.uv.index]]
-requires = ["poetry-core"]
+name = "pytorch-cpu"
-build-backend = "poetry.core.masonry.api"
+url = "https://download.pytorch.org/whl/cpu"
 explicit = true
-[tool.black]
+[[tool.uv.index]]
 name = "pytorch-cu124"
 url = "https://download.pytorch.org/whl/cu124"
 explicit = true
 [[tool.uv.index]]
 name = "pytorch-cu126"
 url = "https://download.pytorch.org/whl/cu126"
 explicit = true
 [[tool.uv.index]]
 name = "pytorch-cu128"
 url = "https://download.pytorch.org/whl/cu128"
 explicit = true
 [[tool.uv.index]]
 name = "pytorch-rocm"
 url = "https://download.pytorch.org/whl/rocm6.3"
 explicit = true
 [tool.setuptools.packages.find]
 include = ["docling_serve*"]
 namespaces = true
 [project.scripts]
 docling-serve = "docling_serve.__main__:main"
 [project.urls]
 Homepage = "https://github.com/docling-project/docling-serve"
 # Documentation = "https://ds4sd.github.io/docling"
 Repository = "https://github.com/docling-project/docling-serve"
 Issues = "https://github.com/docling-project/docling-serve/issues"
 Changelog = "https://github.com/docling-project/docling-serve/blob/main/CHANGELOG.md"
 [tool.ruff]
 target-version = "py310"
 line-length = 88
-target-version = ["py310"]
+respect-gitignore = true
 include = '\.pyi?$'
-[tool.isort]
+# extend-exclude = [
-profile = "black"
+#     "tests",
-line_length = 88
+# ]
 py_version=311
-[tool.autoflake]
+[tool.ruff.format]
-in-place = true
+skip-magic-trailing-comma = false
-remove-all-unused-imports = true
+
-remove-unused-variables = true
+[tool.ruff.lint]
-expand-star-imports = true
+select = [
-recursive = true
+    # "B",  # flake8-bugbear
    "C",  # flake8-comprehensions
    "C9",  # mccabe
    # "D",  # flake8-docstrings
    "E",  # pycodestyle errors (default)
    "F",  # pyflakes (default)
    "I",  # isort
    "PD", # pandas-vet
    "PIE", # pie
    # "PTH", # pathlib
    "Q",  # flake8-quotes
    # "RET", # return
    "RUF", # Enable all ruff-specific checks
    # "SIM", # simplify
    "S307", # eval
    # "T20",  # (disallow print statements) keep debugging statements out of the codebase
    "W",  # pycodestyle warnings
    "ASYNC", # async
    "UP", # pyupgrade
 ]
 ignore = [
    "E501",  # Line too long, handled by ruff formatter
    "D107", # "Missing docstring in __init__",
    "F811", # "redefinition of the same function"
    "PL", # Pylint
    "RUF012", # Mutable Class Attributes
    "UP007", # Option and Union
 ]
 #extend-select = []
 [tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["E402", "F401"]
 "tests/*.py" = ["ASYNC"] # Disable ASYNC check for tests
 [tool.ruff.lint.mccabe]
 max-complexity = 15
 [tool.ruff.lint.isort.sections]
 "docling" = ["docling", "docling_core", "docling_jobkit"]
 [tool.ruff.lint.isort]
 combine-as-imports = true
 section-order = [
  "future",
  "standard-library",
  "third-party",
  "docling",
  "first-party",
  "local-folder",
 ]
 [tool.mypy]
 pretty = true
@@ -125,5 +274,35 @@ module = [
    "easyocr.*",
    "tesserocr.*",
    "rapidocr_onnxruntime.*",
    "requests.*",
    "kfp.*",
    "kfp_server_api.*",
    "mlx_vlm.*",
    "scalar_fastapi.*",
 ]
 ignore_missing_imports = true
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
 asyncio_default_fixture_loop_scope = "function"
 minversion = "8.2"
 testpaths = [
    "tests",
 ]
 addopts = "-rA --color=yes --tb=short --maxfail=5"
 markers = [
 "asyncio",
 ]
 [tool.semantic_release]
 # for default values check:
 # https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
 version_source = "tag_only"
 branch = "main"
 # configure types which should trigger minor and patch version bumps respectively
 # (note that they must be a subset of the configured allowed types):
 parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
 parser_angular_minor_types = "feat"
 parser_angular_patch_types = "fix,perf"
--- a/tests/2206.01062v1.pdf
+++ b/tests/2206.01062v1.pdf
--- a/tests/2408.09869v5.pdf
+++ b/tests/2408.09869v5.pdf
--- a/tests/test_1-file-all-outputs.py
+++ b/tests/test_1-file-all-outputs.py
@@ -0,0 +1,121 @@
 import json
 import os
 import httpx
 import pytest
 import pytest_asyncio
 from pytest_check import check
@pytest_asyncio.fixture
 async def async_client():
    async with httpx.AsyncClient(timeout=60.0) as client:
        yield client
@pytest.mark.asyncio
 async def test_convert_file(async_client):
    """Test convert single file to all outputs"""
    url = "http://localhost:5001/v1/convert/file"
    options = {
        "from_formats": [
            "docx",
            "pptx",
            "html",
            "image",
            "pdf",
            "asciidoc",
            "md",
            "xlsx",
        ],
        "to_formats": ["md", "json", "html", "text", "doctags"],
        "image_export_mode": "placeholder",
        "ocr": True,
        "force_ocr": False,
        "ocr_engine": "easyocr",
        "ocr_lang": ["en"],
        "pdf_backend": "dlparse_v2",
        "table_mode": "fast",
        "abort_on_error": False,
    }
    current_dir = os.path.dirname(__file__)
    file_path = os.path.join(current_dir, "2206.01062v1.pdf")
    files = {
        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
    }
    response = await async_client.post(url, files=files, data=options)
    assert response.status_code == 200, "Response should be 200 OK"
    data = response.json()
    # Response content checks
    # Helper function to safely slice strings
    def safe_slice(value, length=100):
        if isinstance(value, str):
            return value[:length]
        return str(value)  # Convert non-string values to string for debug purposes
    # Document check
    check.is_in(
        "document",
        data,
        msg=f"Response should contain 'document' key. Received keys: {list(data.keys())}",
    )
    # MD check
    check.is_in(
        "md_content",
        data.get("document", {}),
        msg=f"Response should contain 'md_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("md_content") is not None:
        check.is_in(
            "## DocLayNet: ",
            data["document"]["md_content"],
            msg=f"Markdown document should contain 'DocLayNet: '. Received: {safe_slice(data['document']['md_content'])}",
        )
    # JSON check
    check.is_in(
        "json_content",
        data.get("document", {}),
        msg=f"Response should contain 'json_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("json_content") is not None:
        check.is_in(
            '{"schema_name": "DoclingDocument"',
            json.dumps(data["document"]["json_content"]),
            msg=f'JSON document should contain \'{{\\n  "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
        )
    # HTML check
    if data.get("document", {}).get("html_content") is not None:
        check.is_in(
            "<!DOCTYPE html>\n<html>\n<head>",
            data["document"]["html_content"],
            msg=f"HTML document should contain '<!DOCTYPE html>\\n<html>'. Received: {safe_slice(data['document']['html_content'])}",
        )
    # Text check
    check.is_in(
        "text_content",
        data.get("document", {}),
        msg=f"Response should contain 'text_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("text_content") is not None:
        check.is_in(
            "DocLayNet: A Large Human-Annotated Dataset",
            data["document"]["text_content"],
            msg=f"Text document should contain 'DocLayNet: A Large Human-Annotated Dataset'. Received: {safe_slice(data['document']['text_content'])}",
        )
    # DocTags check
    check.is_in(
        "doctags_content",
        data.get("document", {}),
        msg=f"Response should contain 'doctags_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("doctags_content") is not None:
        check.is_in(
            "<doctag><page_header><loc",
            data["document"]["doctags_content"],
            msg=f"DocTags document should contain '<doctag><page_header><loc'. Received: {safe_slice(data['document']['doctags_content'])}",
        )
--- a/tests/test_1-file-async.py
+++ b/tests/test_1-file-async.py
@@ -0,0 +1,70 @@
 import json
 import time
 from pathlib import Path
 import httpx
 import pytest
 import pytest_asyncio
@pytest_asyncio.fixture
 async def async_client():
    async with httpx.AsyncClient(timeout=60.0) as client:
        yield client
@pytest.mark.asyncio
 async def test_convert_url(async_client):
    """Test convert URL to all outputs"""
    base_url = "http://localhost:5001/v1"
    payload = {
        "to_formats": ["md", "json", "html"],
        "image_export_mode": "placeholder",
        "ocr": False,
        "abort_on_error": False,
    }
    file_path = Path(__file__).parent / "2206.01062v1.pdf"
    files = {
        "files": (file_path.name, file_path.open("rb"), "application/pdf"),
    }
    for n in range(1):
        response = await async_client.post(
            f"{base_url}/convert/file/async", files=files, data=payload
        )
        assert response.status_code == 200, "Response should be 200 OK"
    task = response.json()
    print(json.dumps(task, indent=2))
    while task["task_status"] not in ("success", "failure"):
        response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
        assert response.status_code == 200, "Response should be 200 OK"
        task = response.json()
        print(f"{task['task_status']=}")
        print(f"{task['task_position']=}")
        time.sleep(2)
    assert task["task_status"] == "success"
    print(f"Task completed with status {task['task_status']=}")
    result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
    assert result_resp.status_code == 200, "Response should be 200 OK"
    result = result_resp.json()
    print("Got result.")
    assert "md_content" in result["document"]
    assert result["document"]["md_content"] is not None
    assert len(result["document"]["md_content"]) > 10
    assert "html_content" in result["document"]
    assert result["document"]["html_content"] is not None
    assert len(result["document"]["html_content"]) > 10
    assert "json_content" in result["document"]
    assert result["document"]["json_content"] is not None
    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
--- a/tests/test_1-url-all-outputs.py
+++ b/tests/test_1-url-all-outputs.py
@@ -0,0 +1,122 @@
 import json
 import httpx
 import pytest
 import pytest_asyncio
 from pytest_check import check
@pytest_asyncio.fixture
 async def async_client():
    async with httpx.AsyncClient(timeout=60.0) as client:
        yield client
@pytest.mark.asyncio
 async def test_convert_url(async_client):
    """Test convert URL to all outputs"""
    url = "http://localhost:5001/v1/convert/source"
    payload = {
        "options": {
            "from_formats": [
                "docx",
                "pptx",
                "html",
                "image",
                "pdf",
                "asciidoc",
                "md",
                "xlsx",
            ],
            "to_formats": ["md", "json", "html", "text", "doctags"],
            "image_export_mode": "placeholder",
            "ocr": True,
            "force_ocr": False,
            "ocr_engine": "easyocr",
            "ocr_lang": ["en"],
            "pdf_backend": "dlparse_v2",
            "table_mode": "fast",
            "abort_on_error": False,
        },
        "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2206.01062"}],
    }
    print(json.dumps(payload, indent=2))
    response = await async_client.post(url, json=payload)
    assert response.status_code == 200, "Response should be 200 OK"
    data = response.json()
    # Response content checks
    # Helper function to safely slice strings
    def safe_slice(value, length=100):
        if isinstance(value, str):
            return value[:length]
        return str(value)  # Convert non-string values to string for debug purposes
    # Document check
    check.is_in(
        "document",
        data,
        msg=f"Response should contain 'document' key. Received keys: {list(data.keys())}",
    )
    # MD check
    check.is_in(
        "md_content",
        data.get("document", {}),
        msg=f"Response should contain 'md_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("md_content") is not None:
        check.is_in(
            "## DocLayNet: ",
            data["document"]["md_content"],
            msg=f"Markdown document should contain 'DocLayNet: '. Received: {safe_slice(data['document']['md_content'])}",
        )
    # JSON check
    check.is_in(
        "json_content",
        data.get("document", {}),
        msg=f"Response should contain 'json_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("json_content") is not None:
        check.is_in(
            '{"schema_name": "DoclingDocument"',
            json.dumps(data["document"]["json_content"]),
            msg=f'JSON document should contain \'{{\\n  "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
        )
    # HTML check
    check.is_in(
        "html_content",
        data.get("document", {}),
        msg=f"Response should contain 'html_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("html_content") is not None:
        check.is_in(
            "<!DOCTYPE html>\n<html>\n<head>",
            data["document"]["html_content"],
            msg=f"HTML document should contain '<!DOCTYPE html>\\n<html>'. Received: {safe_slice(data['document']['html_content'])}",
        )
    # Text check
    check.is_in(
        "text_content",
        data.get("document", {}),
        msg=f"Response should contain 'text_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("text_content") is not None:
        check.is_in(
            "DocLayNet: A Large Human-Annotated Dataset",
            data["document"]["text_content"],
            msg=f"Text document should contain 'DocLayNet: A Large Human-Annotated Dataset'. Received: {safe_slice(data['document']['text_content'])}",
        )
    # DocTags check
    check.is_in(
        "doctags_content",
        data.get("document", {}),
        msg=f"Response should contain 'doctags_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("doctags_content") is not None:
        check.is_in(
            "<doctag><page_header><loc",
            data["document"]["doctags_content"],
            msg=f"DocTags document should contain '<doctag><page_header><loc'. Received: {safe_slice(data['document']['doctags_content'])}",
        )
--- a/tests/test_1-url-async-ws.py
+++ b/tests/test_1-url-async-ws.py
@@ -0,0 +1,63 @@
 import base64
 from pathlib import Path
 import httpx
 import pytest
 import pytest_asyncio
 from websockets.sync.client import connect
@pytest_asyncio.fixture
 async def async_client():
    async with httpx.AsyncClient(timeout=60.0) as client:
        yield client
@pytest.mark.asyncio
 async def test_convert_url(async_client: httpx.AsyncClient):
    """Test convert URL to all outputs"""
    doc_filename = Path("tests/2408.09869v5.pdf")
    encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()
    base_url = "http://localhost:5001/v1"
    payload = {
        "options": {
            "to_formats": ["md", "json"],
            "image_export_mode": "placeholder",
            "ocr": True,
            "abort_on_error": False,
            # "do_picture_description": True,
            # "picture_description_api": {
            #     "url": "http://localhost:11434/v1/chat/completions",
            #     "params": {
            #         "model": "granite3.2-vision:2b",
            #     }
            # },
            # "picture_description_local": {
            #     "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
            # },
        },
        # "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}],
        "sources": [
            {
                "kind": "file",
                "base64_string": encoded_doc,
                "filename": doc_filename.name,
            }
        ],
    }
    # print(json.dumps(payload, indent=2))
    for n in range(5):
        response = await async_client.post(
            f"{base_url}/convert/source/async", json=payload
        )
        assert response.status_code == 200, "Response should be 200 OK"
    task = response.json()
    uri = f"ws://localhost:5001/v1/status/ws/{task['task_id']}"
    with connect(uri) as websocket:
        for message in websocket:
            print(message)
--- a/tests/test_1-url-async.py
+++ b/tests/test_1-url-async.py
@@ -0,0 +1,59 @@
 import json
 import random
 import time
 import httpx
 import pytest
 import pytest_asyncio
@pytest_asyncio.fixture
 async def async_client():
    async with httpx.AsyncClient(timeout=60.0) as client:
        yield client
@pytest.mark.asyncio
 async def test_convert_url(async_client):
    """Test convert URL to all outputs"""
    example_docs = [
        "https://arxiv.org/pdf/2411.19710",
        "https://arxiv.org/pdf/2501.17887",
        "https://www.nature.com/articles/s41467-024-50779-y.pdf",
        "https://arxiv.org/pdf/2306.12802",
        "https://arxiv.org/pdf/2311.18481",
    ]
    base_url = "http://localhost:5001/v1"
    payload = {
        "options": {
            "to_formats": ["md", "json"],
            "image_export_mode": "placeholder",
            "ocr": True,
            "abort_on_error": False,
        },
        "sources": [{"kind": "http", "url": random.choice(example_docs)}],
    }
    print(json.dumps(payload, indent=2))
    for n in range(3):
        response = await async_client.post(
            f"{base_url}/convert/source/async", json=payload
        )
        assert response.status_code == 200, "Response should be 200 OK"
    task = response.json()
    print(json.dumps(task, indent=2))
    while task["task_status"] not in ("success", "failure"):
        response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
        assert response.status_code == 200, "Response should be 200 OK"
        task = response.json()
        print(f"{task['task_status']=}")
        print(f"{task['task_position']=}")
        time.sleep(2)
    assert task["task_status"] == "success"
--- a/tests/test_2-files-all-outputs.py
+++ b/tests/test_2-files-all-outputs.py
@@ -0,0 +1,70 @@
 import os
 import httpx
 import pytest
 import pytest_asyncio
 from pytest_check import check
@pytest_asyncio.fixture
 async def async_client():
    async with httpx.AsyncClient(timeout=60.0) as client:
        yield client
@pytest.mark.asyncio
 async def test_convert_file(async_client):
    """Test convert single file to all outputs"""
    url = "http://localhost:5001/v1/convert/file"
    options = {
        "from_formats": [
            "docx",
            "pptx",
            "html",
            "image",
            "pdf",
            "asciidoc",
            "md",
            "xlsx",
        ],
        "to_formats": ["md", "json", "html", "text", "doctags"],
        "image_export_mode": "placeholder",
        "ocr": True,
        "force_ocr": False,
        "ocr_engine": "easyocr",
        "ocr_lang": ["en"],
        "pdf_backend": "dlparse_v2",
        "table_mode": "fast",
        "abort_on_error": False,
    }
    current_dir = os.path.dirname(__file__)
    file_path = os.path.join(current_dir, "2206.01062v1.pdf")
    files = [
        ("files", ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf")),
        ("files", ("2408.09869v5.pdf", open(file_path, "rb"), "application/pdf")),
    ]
    response = await async_client.post(url, files=files, data=options)
    assert response.status_code == 200, "Response should be 200 OK"
    # Check for zip file attachment
    content_disposition = response.headers.get("content-disposition")
    with check:
        assert content_disposition is not None, (
            "Content-Disposition header should be present"
        )
    with check:
        assert "attachment" in content_disposition, "Response should be an attachment"
    with check:
        assert 'filename="converted_docs.zip"' in content_disposition, (
            "Attachment filename should be 'converted_docs.zip'"
        )
    content_type = response.headers.get("content-type")
    with check:
        assert content_type == "application/zip", (
            "Content-Type should be 'application/zip'"
        )
--- a/tests/test_2-urls-all-outputs.py
+++ b/tests/test_2-urls-all-outputs.py
@@ -0,0 +1,67 @@
 import httpx
 import pytest
 import pytest_asyncio
 from pytest_check import check
@pytest_asyncio.fixture
 async def async_client():
    async with httpx.AsyncClient(timeout=60.0) as client:
        yield client
@pytest.mark.asyncio
 async def test_convert_url(async_client):
    """Test convert URL to all outputs"""
    url = "http://localhost:5001/v1/convert/source"
    payload = {
        "options": {
            "from_formats": [
                "docx",
                "pptx",
                "html",
                "image",
                "pdf",
                "asciidoc",
                "md",
                "xlsx",
            ],
            "to_formats": ["md", "json", "html", "text", "doctags"],
            "image_export_mode": "placeholder",
            "ocr": True,
            "force_ocr": False,
            "ocr_engine": "easyocr",
            "ocr_lang": ["en"],
            "pdf_backend": "dlparse_v2",
            "table_mode": "fast",
            "abort_on_error": False,
        },
        "sources": [
            {"kind": "http", "url": "https://arxiv.org/pdf/2206.01062"},
            {"kind": "http", "url": "https://arxiv.org/pdf/2408.09869"},
        ],
        "target": {"kind": "zip"},
    }
    response = await async_client.post(url, json=payload)
    assert response.status_code == 200, "Response should be 200 OK"
    # Check for zip file attachment
    content_disposition = response.headers.get("content-disposition")
    with check:
        assert content_disposition is not None, (
            "Content-Disposition header should be present"
        )
    with check:
        assert "attachment" in content_disposition, "Response should be an attachment"
    with check:
        assert 'filename="converted_docs.zip"' in content_disposition, (
            "Attachment filename should be 'converted_docs.zip'"
        )
    content_type = response.headers.get("content-type")
    with check:
        assert content_type == "application/zip", (
            "Content-Type should be 'application/zip'"
        )
--- a/tests/test_2-urls-async-all-outputs.py
+++ b/tests/test_2-urls-async-all-outputs.py
@@ -0,0 +1,88 @@
 import json
 import time
 import httpx
 import pytest
 import pytest_asyncio
 from pytest_check import check
@pytest_asyncio.fixture
 async def async_client():
    async with httpx.AsyncClient(timeout=60.0) as client:
        yield client
@pytest.mark.asyncio
 async def test_convert_url(async_client):
    """Test convert URL to all outputs"""
    base_url = "http://localhost:5001/v1"
    payload = {
        "options": {
            "from_formats": [
                "docx",
                "pptx",
                "html",
                "image",
                "pdf",
                "asciidoc",
                "md",
                "xlsx",
            ],
            "to_formats": ["md", "json", "html", "text", "doctags"],
            "image_export_mode": "placeholder",
            "ocr": True,
            "force_ocr": False,
            "ocr_engine": "easyocr",
            "ocr_lang": ["en"],
            "pdf_backend": "dlparse_v2",
            "table_mode": "fast",
            "abort_on_error": False,
        },
        "sources": [
            {"kind": "http", "url": "https://arxiv.org/pdf/2206.01062"},
            {"kind": "http", "url": "https://arxiv.org/pdf/2408.09869"},
        ],
        "target": {"kind": "zip"},
    }
    response = await async_client.post(f"{base_url}/convert/source/async", json=payload)
    assert response.status_code == 200, "Response should be 200 OK"
    task = response.json()
    print(json.dumps(task, indent=2))
    while task["task_status"] not in ("success", "failure"):
        response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
        assert response.status_code == 200, "Response should be 200 OK"
        task = response.json()
        print(f"{task['task_status']=}")
        print(f"{task['task_position']=}")
        time.sleep(2)
    assert task["task_status"] == "success"
    result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
    assert result_resp.status_code == 200, "Response should be 200 OK"
    # Check for zip file attachment
    content_disposition = result_resp.headers.get("content-disposition")
    with check:
        assert content_disposition is not None, (
            "Content-Disposition header should be present"
        )
    with check:
        assert "attachment" in content_disposition, "Response should be an attachment"
    with check:
        assert 'filename="converted_docs.zip"' in content_disposition, (
            "Attachment filename should be 'converted_docs.zip'"
        )
    content_type = result_resp.headers.get("content-type")
    with check:
        assert content_type == "application/zip", (
            "Content-Type should be 'application/zip'"
        )
--- a/tests/test_fastapi_endpoints.py
+++ b/tests/test_fastapi_endpoints.py
@@ -0,0 +1,193 @@
 import asyncio
 import io
 import json
 import os
 import zipfile
 import pytest
 import pytest_asyncio
 from asgi_lifespan import LifespanManager
 from httpx import ASGITransport, AsyncClient
 from pytest_check import check
 from docling_core.types.doc import DoclingDocument, PictureItem
 from docling_serve.app import create_app
@pytest.fixture(scope="session")
 def event_loop():
    return asyncio.get_event_loop()
@pytest_asyncio.fixture(scope="session")
 async def app():
    app = create_app()
    async with LifespanManager(app) as manager:
        print("Launching lifespan of app.")
        yield manager.app
@pytest_asyncio.fixture(scope="session")
 async def client(app):
    async with AsyncClient(
        transport=ASGITransport(app=app), base_url="http://app.io"
    ) as client:
        print("Client is ready")
        yield client
@pytest.mark.asyncio
 async def test_health(client: AsyncClient):
    response = await client.get("/health")
    assert response.status_code == 200
    assert response.json() == {"status": "ok"}
@pytest.mark.asyncio
 async def test_convert_file(client: AsyncClient):
    """Test convert single file to all outputs"""
    endpoint = "/v1/convert/file"
    options = {
        "from_formats": [
            "docx",
            "pptx",
            "html",
            "image",
            "pdf",
            "asciidoc",
            "md",
            "xlsx",
        ],
        "to_formats": ["md", "json", "html", "text", "doctags"],
        "image_export_mode": "placeholder",
        "ocr": True,
        "force_ocr": False,
        "ocr_engine": "easyocr",
        "ocr_lang": ["en"],
        "pdf_backend": "dlparse_v2",
        "table_mode": "fast",
        "abort_on_error": False,
    }
    current_dir = os.path.dirname(__file__)
    file_path = os.path.join(current_dir, "2206.01062v1.pdf")
    files = {
        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
    }
    response = await client.post(endpoint, files=files, data=options)
    assert response.status_code == 200, "Response should be 200 OK"
    data = response.json()
    # Response content checks
    # Helper function to safely slice strings
    def safe_slice(value, length=100):
        if isinstance(value, str):
            return value[:length]
        return str(value)  # Convert non-string values to string for debug purposes
    # Document check
    check.is_in(
        "document",
        data,
        msg=f"Response should contain 'document' key. Received keys: {list(data.keys())}",
    )
    # MD check
    check.is_in(
        "md_content",
        data.get("document", {}),
        msg=f"Response should contain 'md_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("md_content") is not None:
        check.is_in(
            "## DocLayNet: ",
            data["document"]["md_content"],
            msg=f"Markdown document should contain 'DocLayNet: '. Received: {safe_slice(data['document']['md_content'])}",
        )
    # JSON check
    check.is_in(
        "json_content",
        data.get("document", {}),
        msg=f"Response should contain 'json_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("json_content") is not None:
        check.is_in(
            '{"schema_name": "DoclingDocument"',
            json.dumps(data["document"]["json_content"]),
            msg=f'JSON document should contain \'{{\\n  "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
        )
    # HTML check
    check.is_in(
        "html_content",
        data.get("document", {}),
        msg=f"Response should contain 'html_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("html_content") is not None:
        check.is_in(
            "<!DOCTYPE html>\n<html>\n<head>",
            data["document"]["html_content"],
            msg=f"HTML document should contain '<!DOCTYPE html>\n<html>\n<head>'. Received: {safe_slice(data['document']['html_content'])}",
        )
    # Text check
    check.is_in(
        "text_content",
        data.get("document", {}),
        msg=f"Response should contain 'text_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("text_content") is not None:
        check.is_in(
            "DocLayNet: A Large Human-Annotated Dataset",
            data["document"]["text_content"],
            msg=f"Text document should contain 'DocLayNet: A Large Human-Annotated Dataset'. Received: {safe_slice(data['document']['text_content'])}",
        )
    # DocTags check
    check.is_in(
        "doctags_content",
        data.get("document", {}),
        msg=f"Response should contain 'doctags_content' key. Received keys: {list(data.get('document', {}).keys())}",
    )
    if data.get("document", {}).get("doctags_content") is not None:
        check.is_in(
            "<doctag><page_header>",
            data["document"]["doctags_content"],
            msg=f"DocTags document should contain '<doctag><page_header>'. Received: {safe_slice(data['document']['doctags_content'])}",
        )
@pytest.mark.asyncio
 async def test_referenced_artifacts(client: AsyncClient):
    """Test that paths in the zip file are relative to the zip file root."""
    endpoint = "/v1/convert/file"
    options = {
        "to_formats": ["json"],
        "image_export_mode": "referenced",
        "target_type": "zip",
        "ocr": False,
    }
    current_dir = os.path.dirname(__file__)
    file_path = os.path.join(current_dir, "2206.01062v1.pdf")
    files = {
        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
    }
    response = await client.post(endpoint, files=files, data=options)
    assert response.status_code == 200, "Response should be 200 OK"
    with zipfile.ZipFile(io.BytesIO(response.content)) as zip_file:
        namelist = zip_file.namelist()
        for file in namelist:
            if file.endswith(".json"):
                doc = DoclingDocument.model_validate(json.loads(zip_file.read(file)))
                for item, _level in doc.iterate_items():
                    if isinstance(item, PictureItem):
                        assert item.image is not None
                        print(f"{item.image.uri}=")
                        assert str(item.image.uri) in namelist
--- a/tests/test_file_opts.py
+++ b/tests/test_file_opts.py
@@ -0,0 +1,77 @@
 import asyncio
 import json
 import os
 import pytest
 import pytest_asyncio
 from asgi_lifespan import LifespanManager
 from httpx import ASGITransport, AsyncClient
 from docling_core.types import DoclingDocument
 from docling_core.types.doc.document import PictureDescriptionData
 from docling_serve.app import create_app
@pytest.fixture(scope="session")
 def event_loop():
    return asyncio.get_event_loop()
@pytest_asyncio.fixture(scope="session")
 async def app():
    app = create_app()
    async with LifespanManager(app) as manager:
        print("Launching lifespan of app.")
        yield manager.app
@pytest_asyncio.fixture(scope="session")
 async def client(app):
    async with AsyncClient(
        transport=ASGITransport(app=app), base_url="http://app.io"
    ) as client:
        print("Client is ready")
        yield client
@pytest.mark.asyncio
 async def test_convert_file(client: AsyncClient):
    """Test convert single file to all outputs"""
    endpoint = "/v1/convert/file"
    options = {
        "to_formats": ["md", "json"],
        "image_export_mode": "placeholder",
        "ocr": False,
        "do_picture_description": True,
        "picture_description_api": json.dumps(
            {
                "url": "http://localhost:11434/v1/chat/completions",  # ollama
                "params": {"model": "granite3.2-vision:2b"},
                "timeout": 60,
                "prompt": "Describe this image in a few sentences. ",
            }
        ),
    }
    current_dir = os.path.dirname(__file__)
    file_path = os.path.join(current_dir, "2206.01062v1.pdf")
    files = {
        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
    }
    response = await client.post(endpoint, files=files, data=options)
    assert response.status_code == 200, "Response should be 200 OK"
    data = response.json()
    doc = DoclingDocument.model_validate(data["document"]["json_content"])
    for pic in doc.pictures:
        for ann in pic.annotations:
            if isinstance(ann, PictureDescriptionData):
                print(f"{pic.self_ref}")
                print(ann.text)
--- a/tests/test_results_clear.py
+++ b/tests/test_results_clear.py
@@ -0,0 +1,133 @@
 import asyncio
 import base64
 import json
 from pathlib import Path
 import pytest
 import pytest_asyncio
 from asgi_lifespan import LifespanManager
 from httpx import ASGITransport, AsyncClient
 from docling_serve.app import create_app
 from docling_serve.settings import docling_serve_settings
@pytest.fixture(scope="session")
 def event_loop():
    return asyncio.get_event_loop()
@pytest_asyncio.fixture(scope="session")
 async def app():
    app = create_app()
    async with LifespanManager(app) as manager:
        print("Launching lifespan of app.")
        yield manager.app
@pytest_asyncio.fixture(scope="session")
 async def client(app):
    async with AsyncClient(
        transport=ASGITransport(app=app), base_url="http://app.io"
    ) as client:
        print("Client is ready")
        yield client
 async def convert_file(client: AsyncClient):
    doc_filename = Path("tests/2408.09869v5.pdf")
    encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()
    payload = {
        "options": {
            "to_formats": ["json"],
        },
        "sources": [
            {
                "kind": "file",
                "base64_string": encoded_doc,
                "filename": doc_filename.name,
            }
        ],
    }
    response = await client.post("/v1/convert/source/async", json=payload)
    assert response.status_code == 200, "Response should be 200 OK"
    task = response.json()
    print(json.dumps(task, indent=2))
    while task["task_status"] not in ("success", "failure"):
        response = await client.get(f"/v1/status/poll/{task['task_id']}")
        assert response.status_code == 200, "Response should be 200 OK"
        task = response.json()
        print(f"{task['task_status']=}")
        print(f"{task['task_position']=}")
        await asyncio.sleep(2)
    assert task["task_status"] == "success"
    return task
@pytest.mark.asyncio
 async def test_clear_results(client: AsyncClient):
    """Test removal of task."""
    # Set long delay deletion
    docling_serve_settings.result_removal_delay = 100
    # Convert and wait for completion
    task = await convert_file(client)
    # Get result once
    result_response = await client.get(f"/v1/result/{task['task_id']}")
    assert result_response.status_code == 200, "Response should be 200 OK"
    print("Result 1 ok.")
    result = result_response.json()
    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
    # Get result twice
    result_response = await client.get(f"/v1/result/{task['task_id']}")
    assert result_response.status_code == 200, "Response should be 200 OK"
    print("Result 2 ok.")
    result = result_response.json()
    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
    # Clear
    clear_response = await client.get("/v1/clear/results?older_then=0")
    assert clear_response.status_code == 200, "Response should be 200 OK"
    print("Clear ok.")
    # Get deleted result
    result_response = await client.get(f"/v1/result/{task['task_id']}")
    assert result_response.status_code == 404, "Response should be removed"
    print("Result was no longer found.")
@pytest.mark.asyncio
 async def test_delay_remove(client: AsyncClient):
    """Test automatic removal of task with delay."""
    # Set short delay deletion
    docling_serve_settings.result_removal_delay = 5
    # Convert and wait for completion
    task = await convert_file(client)
    # Get result once
    result_response = await client.get(f"/v1/result/{task['task_id']}")
    assert result_response.status_code == 200, "Response should be 200 OK"
    print("Result ok.")
    result = result_response.json()
    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
    print("Sleeping to wait the automatic task deletion.")
    await asyncio.sleep(10)
    # Get deleted result
    result_response = await client.get(f"/v1/result/{task['task_id']}")
    assert result_response.status_code == 404, "Response should be removed"
--- a/uv.lock
+++ b/uv.lock
		`@@ -0,0 +1,2 @@`
							`allowRemediationCommits:`
							`individual: true`