chore: bump version to 1.3.1 [skip ci]

fix: configuration and performance fixes via upgrade of packages (#328 )
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-29 08:33:50 +00:00 · 2025-08-21 07:01:51 +00:00 · 2025-08-20 20:40:52 +02:00 · 2025-08-15 11:00:05 +02:00 · 2025-08-14 14:26:57 +00:00 · 2025-08-14 16:10:39 +02:00
82 changed files with 13165 additions and 6569 deletions
--- a/docling_serve/.env.example
+++ b/docling_serve/.env.example
@@ -1,3 +1,3 @@
 TESSDATA_PREFIX=/usr/share/tesseract/tessdata/
 UVICORN_WORKERS=2
-RELOAD=True
+UVICORN_RELOAD=True
--- a/.flake8
+++ b/.flake8
@@ -1,7 +0,0 @@
-[flake8]
-max-line-length = 88
-exclude = test/*
-max-complexity = 18
-docstring-convention = google
-ignore = W503,E203
-classmethod-decorators = classmethod,validator
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,12 @@
+<!-- Thank you for contributing to Docling! -->
+
+<!-- STEPS TO FOLLOW:
+  1. Add a description of the changes (frequently the same as the commit description)
+  2. Enter the issue number next to "Resolves #" below (if there is no tracking issue resolved, **remove that section**)
+  3. Make sure the PR title follows the **Commit Message Formatting**: https://www.conventionalcommits.org/en/v1.0.0/#summary.
+-->
+
+<!-- Uncomment this section with the issue number if an issue is being resolved
+**Issue resolved by this Pull Request:**
+Resolves #
+--->
--- a/.github/SECURITY.md
+++ b/.github/SECURITY.md
@@ -0,0 +1,23 @@
+# Security and Disclosure Information Policy for the Docling Project
+
+The Docling team and community take security bugs seriously. We appreciate your efforts to responsibly disclose your findings, and will make every effort to acknowledge your contributions.
+
+## Reporting a Vulnerability
+
+If you think you've identified a security issue in an Docling project repository, please DO NOT report the issue publicly via the GitHub issue tracker, etc.
+
+Instead, send an email with as many details as possible to [deepsearch-core@zurich.ibm.com](mailto:deepsearch-core@zurich.ibm.com). This is a private mailing list for the maintainers team.
+
+Please do not create a public issue.
+
+## Security Vulnerability Response
+
+Each report is acknowledged and analyzed by the core maintainers within 3 working days.
+
+Any vulnerability information shared with core maintainers stays within the Docling project and will not be disseminated to other projects unless it is necessary to get the issue fixed.
+
+After the initial reply to your report, the security team will keep you informed of the progress towards a fix and full announcement, and may ask for additional information or guidance.
+
+## Security Alerts
+
+We will send announcements of security vulnerabilities and steps to remediate on the [Docling announcements](https://github.com/docling-project/docling/discussions/categories/announcements).
--- a/.github/actions/setup-poetry/action.yml
+++ b/.github/actions/setup-poetry/action.yml
@@ -1,19 +0,0 @@
-name: 'Set up Poetry and install'
-description: 'Set up a specific version of Poetry and install dependencies using caching.'
-inputs:
-  python-version:
-    description: "Version range or exact version of Python or PyPy to use, using SemVer's version range syntax."
-    default: '3.11'
-runs:
-  using: 'composite'
-  steps:
-    - name: Install poetry
-      run: pipx install poetry==1.8.3
-      shell: bash
-    - uses: actions/setup-python@v4
-      with:
-        python-version: ${{ inputs.python-version }}
-        cache: 'poetry'
-    - name: Install dependencies
-      run: poetry install --all-extras
-      shell: bash
--- a/.github/dco.yml
+++ b/.github/dco.yml
@@ -0,0 +1,2 @@
+allowRemediationCommits:
+  individual: true
--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@@ -0,0 +1,9 @@
+merge_protections:
+  - name: Enforce conventional commit
+    description: Make sure that we follow https://www.conventionalcommits.org/en/v1.0.0/
+    if:
+      - base = main
+    success_conditions:
+      - "title ~=
+        ^(fix|feat|docs|style|refactor|perf|test|build|ci|chore|revert)(?:\\(.+\
+        \\))?(!)?:"
--- a/.github/scripts/release.sh
+++ b/.github/scripts/release.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+set -e  # trigger failure on error - do not remove!
+set -x  # display command on output
+
+if [ -z "${TARGET_VERSION}" ]; then
+    >&2 echo "No TARGET_VERSION specified"
+    exit 1
+fi
+CHGLOG_FILE="${CHGLOG_FILE:-CHANGELOG.md}"
+
+# update package version
+uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version "${TARGET_VERSION}"
+uv lock --upgrade-package docling-serve
+
+# collect release notes
+REL_NOTES=$(mktemp)
+uv run --no-sync semantic-release changelog --unreleased >> "${REL_NOTES}"
+
+# update changelog
+TMP_CHGLOG=$(mktemp)
+TARGET_TAG_NAME="v${TARGET_VERSION}"
+RELEASE_URL="$(gh repo view --json url -q ".url")/releases/tag/${TARGET_TAG_NAME}"
+printf "## [${TARGET_TAG_NAME}](${RELEASE_URL}) - $(date -Idate)\n\n" >> "${TMP_CHGLOG}"
+cat "${REL_NOTES}" >> "${TMP_CHGLOG}"
+if [ -f "${CHGLOG_FILE}" ]; then
+    printf "\n" | cat - "${CHGLOG_FILE}" >> "${TMP_CHGLOG}"
+fi
+mv "${TMP_CHGLOG}" "${CHGLOG_FILE}"
+
+# push changes
+git config --global user.name 'github-actions[bot]'
+git config --global user.email 'github-actions[bot]@users.noreply.github.com'
+git add pyproject.toml uv.lock "${CHGLOG_FILE}"
+COMMIT_MSG="chore: bump version to ${TARGET_VERSION} [skip ci]"
+git commit -m "${COMMIT_MSG}"
+git push origin main
+
+# create GitHub release (incl. Git tag)
+gh release create "${TARGET_TAG_NAME}" -F "${REL_NOTES}"
--- a/.github/styles/config/vocabularies/Docling/accept.txt
+++ b/.github/styles/config/vocabularies/Docling/accept.txt
@@ -0,0 +1,39 @@
+[Dd]ocling
+precommit
+asgi
+async
+(?i)urls
+uvicorn
+[Ww]ebserver
+RQ
+(?i)url
+keyfile
+[Ww]ebsocket(s?)
+[Kk]ubernetes
+UI
+(?i)vllm
+APIs
+[Ss]ubprocesses
+(?i)api
+Kubeflow
+(?i)Jobkit
+(?i)cpu
+(?i)PyTorch
+(?i)CUDA
+(?i)NVIDIA
+(?i)ROCm
+(?i)env
+Gradio
+Podman
+bool
+Ollama
+inbody
+LGTMs
+Dolfi
+Lysak
+Nikos
+Nassar
+Panos
+Vagenas
+Staar
+Livathinos
--- a/.github/vale.ini
+++ b/.github/vale.ini
@@ -0,0 +1,11 @@
+StylesPath = styles
+MinAlertLevel = suggestion
+; Packages = write-good, proselint
+
+Vocab = Docling
+
+[*.md]
+BasedOnStyles = Vale
+
+[CHANGELOG.md]
+BasedOnStyles = 
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@@ -0,0 +1,59 @@
+name: "Run CD"
+
+on:
+  workflow_dispatch:
+
+jobs:
+  code-checks:
+    uses: ./.github/workflows/job-checks.yml
+  pre-release-check:
+    runs-on: ubuntu-latest
+    outputs:
+      TARGET_TAG_V: ${{ steps.version_check.outputs.TRGT_VERSION }}
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # for fetching tags, required for semantic-release
+      - name: Install uv and set the python version
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+      - name: Install dependencies
+        run: uv sync --only-dev
+      - name: Check version of potential release
+        id: version_check
+        run: |
+          TRGT_VERSION=$(uv run --no-sync semantic-release print-version)
+          echo "TRGT_VERSION=${TRGT_VERSION}" >> "$GITHUB_OUTPUT"
+          echo "${TRGT_VERSION}"
+      - name: Check notes of potential release
+        run: uv run --no-sync semantic-release changelog --unreleased
+  release:
+    needs: [code-checks, pre-release-check]
+    if: needs.pre-release-check.outputs.TARGET_TAG_V != ''
+    environment: auto-release
+    runs-on: ubuntu-latest
+    concurrency: release
+    steps:
+      - uses: actions/create-github-app-token@v1
+        id: app-token
+        with:
+          app-id: ${{ vars.CI_APP_ID }}
+          private-key: ${{ secrets.CI_PRIVATE_KEY }}
+      - uses: actions/checkout@v4
+        with:
+          token: ${{ steps.app-token.outputs.token }}
+          fetch-depth: 0  # for fetching tags, required for semantic-release
+      - name: Install uv and set the python version
+        uses: astral-sh/setup-uv@v6
+        with:
+          enable-cache: true
+      - name: Install dependencies
+        run: uv sync --only-dev
+      - name: Run release script
+        env:
+          GH_TOKEN: ${{ steps.app-token.outputs.token }}
+          TARGET_VERSION: ${{ needs.pre-release-check.outputs.TARGET_TAG_V }}
+          CHGLOG_FILE: CHANGELOG.md
+        run: ./.github/scripts/release.sh
+        shell: bash
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -1,34 +0,0 @@
-name: Run linter checks
-on:
-  push:
-    branches: ["main"]
-  pull_request:
-    branches: ["main"]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  py-lint:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        python-version: ['3.11']
-    steps:
-      - uses: actions/checkout@v4
-      - uses: ./.github/actions/setup-poetry
-        with:
-          python-version: ${{ matrix.python-version }}
-      - name: Run styling check
-        run: poetry run pre-commit run --all-files
-
-  markdown-lint:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - name: markdownlint-cli2-action
-        uses: DavidAnson/markdownlint-cli2-action@v16
-        with:
-          globs: "**/*.md"
-
--- a/.github/workflows/ci-images-dryrun.yml
+++ b/.github/workflows/ci-images-dryrun.yml
@@ -0,0 +1,53 @@
+name: Dry run docling-serve image building
+
+on:
+  workflow_call:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build_image:
+    name: Build ${{ matrix.spec.name }} container image
+    strategy:
+      matrix:
+        spec:
+          - name: docling-project/docling-serve
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
+            platforms: linux/amd64, linux/arm64
+          - name: docling-project/docling-serve-cpu
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
+            platforms: linux/amd64, linux/arm64
+          # - name: docling-project/docling-serve-cu124
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
+          #   platforms: linux/amd64
+          - name: docling-project/docling-serve-cu126
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
+            platforms: linux/amd64
+          - name: docling-project/docling-serve-cu128
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
+            platforms: linux/amd64
+          # - name: docling-project/docling-serve-rocm
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
+          #   platforms: linux/amd64
+
+    permissions:
+      packages: write
+      contents: read
+      attestations: write
+      id-token: write
+
+    uses: ./.github/workflows/job-image.yml
+    with:
+      publish: false
+      build_args: ${{ matrix.spec.build_args }}
+      ghcr_image_name: ${{ matrix.spec.name }}
+      quay_image_name: ""
+      platforms: ${{ matrix.spec.platforms }}
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,25 @@
+name: "Run CI"
+
+on:
+  push:
+    branches: ["main"]
+  pull_request:
+    branches: ["main"]
+
+jobs:
+  code-checks:
+    # if: ${{ github.event_name == 'push' || (github.event.pull_request.head.repo.full_name != 'docling-project/docling-serve' && github.event.pull_request.head.repo.full_name != 'docling-project/docling-serve') }}
+    uses: ./.github/workflows/job-checks.yml
+    permissions:
+      packages: write
+      contents: read
+      attestations: write
+      id-token: write
+
+  build-images:
+    uses: ./.github/workflows/ci-images-dryrun.yml
+    permissions:
+      packages: write
+      contents: read
+      attestations: write
+      id-token: write
--- a/.github/workflows/dco-advisor.yml
+++ b/.github/workflows/dco-advisor.yml
@@ -0,0 +1,192 @@
+name: DCO Advisor Bot
+
+on:
+  pull_request_target:
+    types: [opened, reopened, synchronize]
+
+permissions:
+  pull-requests: write
+  issues: write
+
+jobs:
+  dco_advisor:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Handle DCO check result
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const pr = context.payload.pull_request || context.payload.check_run?.pull_requests?.[0];
+            if (!pr) return;
+
+            const prNumber = pr.number;
+            const baseRef = pr.base.ref;
+            const headSha =
+              context.payload.check_run?.head_sha ||
+              pr.head?.sha;
+            const username = pr.user.login;
+
+            console.log("HEAD SHA:", headSha);
+
+            const sleep = ms => new Promise(resolve => setTimeout(resolve, ms));
+
+            // Poll until DCO check has a conclusion (max 6 attempts, 30s)
+            let dcoCheck = null;
+            for (let attempt = 0; attempt < 6; attempt++) {
+              const { data: checks } = await github.rest.checks.listForRef({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                ref: headSha
+              });
+
+              
+              console.log("All check runs:");
+                checks.check_runs.forEach(run => {
+                console.log(`- ${run.name} (${run.status}/${run.conclusion}) @ ${run.head_sha}`);
+              });
+
+              dcoCheck = checks.check_runs.find(run =>
+                run.name.toLowerCase().includes("dco") &&
+              !run.name.toLowerCase().includes("dco_advisor") &&
+                run.head_sha === headSha
+              );
+
+
+              if (dcoCheck?.conclusion) break;
+              console.log(`Waiting for DCO check... (${attempt + 1})`);
+              await sleep(5000); // wait 5 seconds
+            }
+
+            if (!dcoCheck || !dcoCheck.conclusion) {
+              console.log("DCO check did not complete in time.");
+              return;
+            }
+
+            const isFailure = ["failure", "action_required"].includes(dcoCheck.conclusion);
+            console.log(`DCO check conclusion for ${headSha}: ${dcoCheck.conclusion} (treated as ${isFailure ? "failure" : "success"})`);
+
+            // Parse DCO output for commit SHAs and author
+            let badCommits = [];
+            let authorName = "";
+            let authorEmail = "";
+            let moreInfo = `More info: [DCO check report](${dcoCheck?.html_url})`;
+
+            if (isFailure) {
+                const { data: commits } = await github.rest.pulls.listCommits({
+                    owner: context.repo.owner,
+                    repo: context.repo.repo,
+                    pull_number: prNumber,
+                });
+
+                for (const commit of commits) {
+                    const commitMessage = commit.commit.message;
+                    const signoffMatch = commitMessage.match(/^Signed-off-by:\s+.+<.+>$/m);
+                    if (!signoffMatch) {
+                        console.log(`Bad commit found ${commit.sha}`)
+                        badCommits.push({
+                        sha: commit.sha,
+                        authorName: commit.commit.author.name,
+                        authorEmail: commit.commit.author.email,
+                        });
+                    }
+                }            
+            }
+
+            // If multiple authors are present, you could adapt the message accordingly
+            // For now, we'll just use the first one
+            if (badCommits.length > 0) {
+            authorName = badCommits[0].authorName;
+            authorEmail = badCommits[0].authorEmail;
+            }
+
+            // Generate remediation commit message if needed
+            let remediationSnippet = "";
+            if (badCommits.length && authorEmail) {
+              remediationSnippet = `git commit --allow-empty -s -m "DCO Remediation Commit for ${authorName} <${authorEmail}>\n\n` +
+                badCommits.map(c => `I, ${c.authorName} <${c.authorEmail}>, hereby add my Signed-off-by to this commit: ${c.sha}`).join('\n') +
+                `"`;
+            } else {
+              remediationSnippet = "# Unable to auto-generate remediation message. Please check the DCO check details.";
+            }
+
+            // Build comment
+            const commentHeader = '<!-- dco-advice-bot -->';
+            let body = "";
+
+            if (isFailure) {
+              body = [
+                commentHeader,
+                '❌ **DCO Check Failed**',
+                '',
+                `Hi @${username}, your pull request has failed the Developer Certificate of Origin (DCO) check.`,
+                '',
+                'This repository supports **remediation commits**, so you can fix this without rewriting history — but you must follow the required message format.',
+                '',
+                '---',
+                '',
+                '### 🛠 Quick Fix: Add a remediation commit',
+                'Run this command:',
+                '',
+                '```bash',
+                remediationSnippet,
+                'git push',
+                '```',
+                '',
+                '---',
+                '',
+                '<details>',
+                '<summary>🔧 Advanced: Sign off each commit directly</summary>',
+                '',
+                '**For the latest commit:**',
+                '```bash',
+                'git commit --amend --signoff',
+                'git push --force-with-lease',
+                '```',
+                '',
+                '**For multiple commits:**',
+                '```bash',
+                `git rebase --signoff origin/${baseRef}`,
+                'git push --force-with-lease',
+                '```',
+                '',
+                '</details>',
+                '',
+                moreInfo
+              ].join('\n');
+            } else {
+              body = [
+                commentHeader,
+                '✅ **DCO Check Passed**',
+                '',
+                `Thanks @${username}, all your commits are properly signed off. 🎉`
+              ].join('\n');
+            }
+
+            // Get existing comments on the PR
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber
+            });
+
+            // Look for a previous bot comment
+            const existingComment = comments.find(c =>
+              c.body.includes("<!-- dco-advice-bot -->")
+            );
+
+            if (existingComment) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existingComment.id,
+                body: body
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                body: body
+              });
+            }
--- a/.github/workflows/images-dryrun.yml
+++ b/.github/workflows/images-dryrun.yml
@@ -1,105 +0,0 @@
-name: Dry run docling-serve image building
-
-on:
-  pull_request:
-    branches: ["main"]
-
-env:
-  GHCR_REGISTRY: ghcr.io
-  GHCR_DOCLING_SERVE_CPU_IMAGE_NAME: ds4sd/docling-serve-cpu
-  GHCR_DOCLING_SERVE_GPU_IMAGE_NAME: ds4sd/docling-serve
-
-jobs:
-  build_cpu_image:
-    name: Build docling-serve "CPU only" container image
-    runs-on: ubuntu-latest
-    permissions:
-      packages: write
-      contents: read
-      attestations: write
-      id-token: write
-
-    steps:
-      - name: Check out the repo
-        uses: actions/checkout@v4
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Cache Docker layers
-        uses: actions/cache@v4
-        with:
-          path: /tmp/.buildx-cache
-          key: ${{ runner.os }}-buildx-${{ github.sha }}
-          restore-keys: |
-            ${{ runner.os }}-buildx-
-
-      - name: Extract metadata (tags, labels) for docling-serve (CPU only) ghcr image
-        id: ghcr_serve_cpu_meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_CPU_IMAGE_NAME }}
-
-      - name: Build docling-serve-cpu image
-        id: build-serve-cpu-ghcr
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: false
-          tags: ${{ steps.ghcr_serve_cpu_meta.outputs.tags }}
-          labels: ${{ steps.ghcr_serve_cpu_meta.outputs.labels }}
-          platforms: linux/amd64, linux/arm64
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-          file: Containerfile
-          build-args: |
-            --build-arg CPU_ONLY=true
-
-      - name: Remove Local Docker Images
-        run: |
-          docker image prune -af
-
-  build_gpu_image:
-    name: Build docling-serve (with GPU support) container image
-    runs-on: ubuntu-latest
-    permissions:
-      packages: write
-      contents: read
-      attestations: write
-      id-token: write
-
-    steps:
-      - name: Check out the repo
-        uses: actions/checkout@v4
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Cache Docker layers
-        uses: actions/cache@v4
-        with:
-          path: /tmp/.buildx-cache
-          key: ${{ runner.os }}-buildx-${{ github.sha }}
-          restore-keys: |
-            ${{ runner.os }}-buildx-
-
-      - name: Extract metadata (tags, labels) for docling-serve (GPU) ghcr image
-        id: ghcr_serve_gpu_meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_GPU_IMAGE_NAME }}
-
-      - name: Build docling-serve (GPU) image
-        id: build-serve-gpu-ghcr
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: false
-          tags: ${{ steps.ghcr_serve_gpu_meta.outputs.tags }}
-          labels: ${{ steps.ghcr_serve_gpu_meta.outputs.labels }}
-          platforms: linux/amd64,linux/arm64
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-          file: Containerfile
-          build-args: |
-            --build-arg CPU_ONLY=false
--- a/.github/workflows/images.yml
+++ b/.github/workflows/images.yml
@@ -4,193 +4,55 @@ on:
  push:
    branches:
      - main
-    tags:
-      - 'v*'
+  release:
+    types: [published]

-env:
-  GHCR_REGISTRY: ghcr.io
-  GHCR_DOCLING_SERVE_CPU_IMAGE_NAME: ds4sd/docling-serve-cpu
-  GHCR_DOCLING_SERVE_GPU_IMAGE_NAME: ds4sd/docling-serve
-  QUAY_REGISTRY: quay.io
-  QUAY_DOCLING_SERVE_CPU_IMAGE_NAME: ds4sd/docling-serve-cpu
-  QUAY_DOCLING_SERVE_GPU_IMAGE_NAME: ds4sd/docling-serve
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true

 jobs:
-  build_and_publish_cpu_images:
-    name: Push docling-serve "CPU only" container image to GHCR and QUAY
-    runs-on: ubuntu-latest
-    environment: registry-creds
+  build_and_publish_images:
+    name: Build and push ${{ matrix.spec.name }} container image to GHCR and QUAY
+    strategy:
+      matrix:
+        spec:
+          - name: docling-project/docling-serve
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
+            platforms: linux/amd64, linux/arm64
+          - name: docling-project/docling-serve-cpu
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
+            platforms: linux/amd64, linux/arm64
+          # - name: docling-project/docling-serve-cu124
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
+          #   platforms: linux/amd64
+          - name: docling-project/docling-serve-cu126
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
+            platforms: linux/amd64
+          - name: docling-project/docling-serve-cu128
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
+            platforms: linux/amd64
+          # - name: docling-project/docling-serve-rocm
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
+          #   platforms: linux/amd64
    permissions:
      packages: write
      contents: read
      attestations: write
      id-token: write
+    secrets: inherit

-    steps:
-      - name: Check out the repo
-        uses: actions/checkout@v4
-
-      - name: Log in to the GHCR container image registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.GHCR_REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Log in to the Quay container image registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.QUAY_REGISTRY }}
-          username: ${{ secrets.QUAY_USERNAME }}
-          password: ${{ secrets.QUAY_TOKEN }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Cache Docker layers
-        uses: actions/cache@v4
-        with:
-          path: /tmp/.buildx-cache
-          key: ${{ runner.os }}-buildx-${{ github.sha }}
-          restore-keys: |
-            ${{ runner.os }}-buildx-
-
-      - name: Extract metadata (tags, labels) for docling-serve (CPU only) ghcr image
-        id: ghcr_serve_cpu_meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_CPU_IMAGE_NAME }}
-
-      - name: Build and push docling-serve-cpu image to ghcr.io
-        id: push-serve-cpu-ghcr
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: true
-          tags: ${{ steps.ghcr_serve_cpu_meta.outputs.tags }}
-          labels: ${{ steps.ghcr_serve_cpu_meta.outputs.labels }}
-          platforms: linux/amd64, linux/arm64
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-          file: Containerfile
-          build-args: |
-            --build-arg CPU_ONLY=true
-
-      - name: Generate artifact attestation
-        uses: actions/attest-build-provenance@v1
-        with:
-          subject-name: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_CPU_IMAGE_NAME}}
-          subject-digest: ${{ steps.push-serve-cpu-ghcr.outputs.digest }}
-          push-to-registry: true
-
-      - name: Extract metadata (tags, labels) for docling-serve (CPU only) quay image
-        id: quay_serve_cpu_meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.QUAY_REGISTRY }}/${{ env.QUAY_DOCLING_SERVE_CPU_IMAGE_NAME }}
-
-      - name: Build and push docling-serve-cpu image to quay.io
-        id: push-serve-cpu-quay
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: true
-          tags: ${{ steps.quay_serve_cpu_meta.outputs.tags }}
-          labels: ${{ steps.quay_serve_cpu_meta.outputs.labels }}
-          platforms: linux/amd64, linux/arm64
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-          file: Containerfile
-          build-args: |
-            --build-arg CPU_ONLY=true
-      - name: Remove Local Docker Images
-        run: |
-          docker image prune -af
-
-  build_and_publish_gpu_images:
-    name: Push docling-serve (with GPU support) container image to GHCR and QUAY
-    runs-on: ubuntu-latest
-    environment: registry-creds
-    permissions:
-      packages: write
-      contents: read
-      attestations: write
-      id-token: write
-
-    steps:
-      - name: Check out the repo
-        uses: actions/checkout@v4
-
-      - name: Log in to the GHCR container image registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.GHCR_REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Log in to the Quay container image registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.QUAY_REGISTRY }}
-          username: ${{ secrets.QUAY_USERNAME }}
-          password: ${{ secrets.QUAY_TOKEN }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Cache Docker layers
-        uses: actions/cache@v4
-        with:
-          path: /tmp/.buildx-cache
-          key: ${{ runner.os }}-buildx-${{ github.sha }}
-          restore-keys: |
-            ${{ runner.os }}-buildx-
-
-      - name: Extract metadata (tags, labels) for docling-serve (GPU) ghcr image
-        id: ghcr_serve_gpu_meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_GPU_IMAGE_NAME }}
-
-      - name: Build and push docling-serve (GPU) image to ghcr.io
-        id: push-serve-gpu-ghcr
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: true
-          tags: ${{ steps.ghcr_serve_gpu_meta.outputs.tags }}
-          labels: ${{ steps.ghcr_serve_gpu_meta.outputs.labels }}
-          platforms: linux/amd64,linux/arm64
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-          file: Containerfile
-          build-args: |
-            --build-arg CPU_ONLY=false
-
-      - name: Generate artifact attestation
-        uses: actions/attest-build-provenance@v1
-        with:
-          subject-name: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_DOCLING_SERVE_GPU_IMAGE_NAME}}
-          subject-digest: ${{ steps.push-serve-gpu-ghcr.outputs.digest }}
-          push-to-registry: true
-
-      - name: Extract metadata (tags, labels) for docling-serve (GPU) quay image
-        id: quay_serve_gpu_meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ env.QUAY_REGISTRY }}/${{ env.QUAY_DOCLING_SERVE_GPU_IMAGE_NAME }}
-
-      - name: Build and push docling-serve (GPU) image to quay.io
-        id: push-serve-gpu-quay
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: true
-          tags: ${{ steps.quay_serve_gpu_meta.outputs.tags }}
-          labels: ${{ steps.quay_serve_gpu_meta.outputs.labels }}
-          platforms: linux/amd64,linux/arm64
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
-          file: Containerfile
-          build-args: |
-            --build-arg CPU_ONLY=false
+    uses: ./.github/workflows/job-image.yml
+    with:
+      publish: true
+      environment: registry-creds
+      build_args: ${{ matrix.spec.build_args }}
+      ghcr_image_name: ${{ matrix.spec.name }}
+      quay_image_name: ${{ matrix.spec.name }}
+      platforms: ${{ matrix.spec.platforms }}
--- a/.github/workflows/job-build.yml
+++ b/.github/workflows/job-build.yml
@@ -0,0 +1,29 @@
+name: Run checks
+
+on:
+  workflow_call:
+
+jobs:
+  build-package:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.12']
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv and set the python version
+        uses: astral-sh/setup-uv@v6
+        with:
+          python-version: ${{ matrix.python-version }}
+          enable-cache: true
+      - name: Install dependencies
+        run: uv sync --all-extras --no-extra flash-attn
+      - name: Build package
+        run: uv build
+      - name: Check content of wheel
+        run: unzip -l dist/*.whl
+      - name: Store the distribution packages
+        uses: actions/upload-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
--- a/.github/workflows/job-checks.yml
+++ b/.github/workflows/job-checks.yml
@@ -0,0 +1,68 @@
+name: Run checks
+
+on:
+  workflow_call:
+
+jobs:
+  py-lint:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.12']
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install uv and set the python version
+        uses: astral-sh/setup-uv@v6
+        with:
+          python-version: ${{ matrix.python-version }}
+          enable-cache: true
+
+      - name: pre-commit cache key
+        run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> "$GITHUB_ENV"
+      - uses: actions/cache@v4
+        with:
+          path: ~/.cache/pre-commit
+          key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
+
+      - name: Install dependencies
+        run: uv sync --frozen --all-extras --no-extra flash-attn
+
+      - name: Run styling check
+        run: uv run pre-commit run --all-files
+
+  build-package:
+    uses: ./.github/workflows/job-build.yml
+
+  test-package:
+    needs:
+      - build-package
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.12']
+    steps:
+      - name: Download all the dists
+        uses: actions/download-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+      - name: Install uv and set the python version
+        uses: astral-sh/setup-uv@v6
+        with:
+          python-version: ${{ matrix.python-version }}
+          enable-cache: true
+      - name: Create virtual environment
+        run: uv venv
+      - name: Install package
+        run: uv pip install dist/*.whl
+      - name: Create the server
+        run: .venv/bin/python -c 'from docling_serve.app import create_app; create_app()'
+
+  markdown-lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: markdownlint-cli2-action
+        uses: DavidAnson/markdownlint-cli2-action@v16
+        with:
+          globs: "**/*.md"
--- a/.github/workflows/job-image.yml
+++ b/.github/workflows/job-image.yml
@@ -0,0 +1,141 @@
+name: Build docling-serve container image
+
+on:
+  workflow_call:
+    inputs:
+      build_args:
+        type: string
+        description: "Extra build arguments for the build."
+        default: ""
+      ghcr_image_name:
+        type: string
+        description: "Name of the image for GHCR."
+      quay_image_name:
+        type: string
+        description: "Name of the image Quay."
+      platforms:
+        type: string
+        description: "Platform argument for building images."
+        default: linux/amd64, linux/arm64
+      publish:
+        type: boolean
+        description: "If true, the images will be published."
+        default: false
+      environment:
+        type: string
+        description: "GH Action environment"
+        default: ""
+
+env:
+  GHCR_REGISTRY: ghcr.io
+  QUAY_REGISTRY: quay.io
+
+jobs:
+  image:
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+      contents: read
+      attestations: write
+      id-token: write
+    environment: ${{ inputs.environment }}
+
+    steps:
+      - name: Free up space in github runner
+        # Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
+        run: |
+            df -h
+            sudo rm -rf "/usr/local/share/boost"
+            sudo rm -rf "$AGENT_TOOLSDIRECTORY"
+            sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup
+            # shellcheck disable=SC2046
+            sudo docker rmi "$(docker image ls -aq)" >/dev/null 2>&1 || true
+            df -h
+
+      - name: Check out the repo
+        uses: actions/checkout@v4
+
+      - name: Log in to the GHCR container image registry
+        if: ${{ inputs.publish }}
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.GHCR_REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Log in to the Quay container image registry
+        if: ${{ inputs.publish }}
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.QUAY_REGISTRY }}
+          username: ${{ secrets.QUAY_USERNAME }}
+          password: ${{ secrets.QUAY_TOKEN }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Cache Docker layers
+        uses: actions/cache@v4
+        with:
+          path: /tmp/.buildx-cache
+          key: ${{ runner.os }}-buildx-${{ github.sha }}
+          restore-keys: |
+            ${{ runner.os }}-buildx-
+
+      - name: Extract metadata (tags, labels) for docling-serve ghcr image
+        id: ghcr_meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}
+
+      - name: Build and push image to ghcr.io
+        id: ghcr_push
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: ${{ inputs.publish }}
+          tags: ${{ steps.ghcr_meta.outputs.tags }}
+          labels: ${{ steps.ghcr_meta.outputs.labels }}
+          platforms: ${{ inputs.platforms}}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          file: Containerfile
+          build-args: ${{ inputs.build_args }}
+
+      - name: Generate artifact attestation
+        if: ${{ inputs.publish }}
+        uses: actions/attest-build-provenance@v1
+        with:
+          subject-name: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}
+          subject-digest: ${{ steps.ghcr_push.outputs.digest }}
+          push-to-registry: true
+
+      - name: Extract metadata (tags, labels) for docling-serve quay image
+        if: ${{ inputs.publish }}
+        id: quay_meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.QUAY_REGISTRY }}/${{ inputs.quay_image_name }}
+
+      - name: Build and push image to quay.io
+        if: ${{ inputs.publish }}
+        # id: push-serve-cpu-quay
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          push: ${{ inputs.publish }}
+          tags: ${{ steps.quay_meta.outputs.tags }}
+          labels: ${{ steps.quay_meta.outputs.labels }}
+          platforms: ${{ inputs.platforms}}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          file: Containerfile
+          build-args: ${{ inputs.build_args }}
+      
+      # - name: Inspect the image details
+      #   run: |
+      #     echo "${{ steps.ghcr_push.outputs.metadata }}"
+
+      - name: Remove Local Docker Images
+        run: |
+          docker image prune -af
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -0,0 +1,34 @@
+name: "Build and publish package"
+
+on:
+  release:
+    types: [published]
+
+permissions:
+  contents: read
+
+jobs:
+
+  build-package:
+    uses: ./.github/workflows/job-build.yml
+
+  build-and-publish:
+    needs:
+      - build-package
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/docling-serve  # Replace <package-name> with your PyPI project name
+    permissions:
+      id-token: write  # IMPORTANT: mandatory for trusted publishing
+    steps:
+      - name: Download all the dists
+        uses: actions/download-artifact@v4
+        with:
+          name: python-package-distributions
+          path: dist/
+      - name: Publish distribution 📦 to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          # currently not working with reusable workflows
+          attestations: false
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 model_artifacts/
 scratch/
+.md-lint
+actionlint

 # Created by https://www.toptal.com/developers/gitignore/api/python,macos,virtualenv,pycharm,visualstudiocode,emacs,vim,jupyternotebooks
 # Edit at https://www.toptal.com/developers/gitignore?templates=python,macos,virtualenv,pycharm,visualstudiocode,emacs,vim,jupyternotebooks
@@ -442,3 +444,5 @@ pip-selfcheck.json
 # Makefile
 .action-lint
 .markdown-lint
+
+cookies.txt
--- a/.markdownlint-cli2.yaml
+++ b/.markdownlint-cli2.yaml
@@ -3,6 +3,8 @@ config:
  no-emphasis-as-header: false
  first-line-heading: false
  MD033:
-    allowed_elements: ["details", "summary"]
+    allowed_elements: ["details", "summary", "br", "a", "b", "p", "img"]
+  MD024:
+    siblings_only: true
 globs:
  - "**/*.md"
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,49 +1,39 @@
 fail_fast: true
 repos:
-  - repo: local
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.6
    hooks:
-      - id: system
-        name: Black
-        entry: poetry run black docling_serve tests
-        pass_filenames: false
-        language: system
-        files: '\.py$'
-  - repo: local
-    hooks:
-      - id: system
-        name: isort
-        entry: poetry run isort docling_serve tests
-        pass_filenames: false
-        language: system
-        files: '\.py$'
-  - repo: local
-    hooks:
-      - id: autoflake
-        name: autoflake
-        entry: poetry run autoflake docling_serve tests
-        pass_filenames: false
-        language: system
-        files: '\.py$'
-  - repo: local
-    hooks:
-      - id: system
-        name: flake8
-        entry: poetry run flake8 docling_serve
-        pass_filenames: false
-        language: system
-        files: '\.py$'
+      # Run the Ruff formatter.
+      - id: ruff-format
+        name: "Ruff formatter"
+        args: [--config=pyproject.toml]
+        files: '^(docling_serve|tests).*\.(py|ipynb)$'
+      # Run the Ruff linter.
+      - id: ruff
+        name: "Ruff linter"
+        args: [--exit-non-zero-on-fix, --fix, --config=pyproject.toml]
+        files: '^(docling_serve|tests).*\.(py|ipynb)$'
  - repo: local
    hooks:
      - id: system
        name: MyPy
-        entry: poetry run mypy docling_serve
+        entry: uv run --no-sync mypy docling_serve
        pass_filenames: false
        language: system
        files: '\.py$'
-  - repo: local
+  - repo: https://github.com/errata-ai/vale
+    rev: v3.12.0  # Use latest stable version
    hooks:
-      - id: system
-        name: Poetry check
-        entry: poetry check --lock
+      - id: vale
+        name: vale sync
        pass_filenames: false
-        language: system
+        args: [sync, "--config=.github/vale.ini"]
+      - id: vale
+        name: Spell and Style Check with Vale
+        args: ["--config=.github/vale.ini"]
+        files: \.md$
+  - repo: https://github.com/astral-sh/uv-pre-commit
+    # uv version, https://github.com/astral-sh/uv-pre-commit/releases
+    rev: 0.8.3
+    hooks:
+      - id: uv-lock
--- a/.python-version
+++ b/.python-version
@@ -0,0 +1 @@
+3.12
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -0,0 +1,287 @@
+## [v1.3.1](https://github.com/docling-project/docling-serve/releases/tag/v1.3.1) - 2025-08-21
+
+### Fix
+
+* Configuration and performance fixes via upgrade of packages ([#328](https://github.com/docling-project/docling-serve/issues/328)) ([`f02dbc0`](https://github.com/docling-project/docling-serve/commit/f02dbc01449fe1caf3fb4a73c0a5f4adf8265faf))
+
+### Documentation
+
+* Fix parameter in api key docs ([#323](https://github.com/docling-project/docling-serve/issues/323)) ([`37fe022`](https://github.com/docling-project/docling-serve/commit/37fe02277b3e2358eced28e15b4360e7c82d3b43))
+
+## [v1.3.0](https://github.com/docling-project/docling-serve/releases/tag/v1.3.0) - 2025-08-14
+
+### Feature
+
+* Add configuration option for apikey security ([#322](https://github.com/docling-project/docling-serve/issues/322)) ([`9a64410`](https://github.com/docling-project/docling-serve/commit/9a644105523d312431993ded8dd88e064550a5db))
+* Add RQ engine ([#315](https://github.com/docling-project/docling-serve/issues/315)) ([`885f319`](https://github.com/docling-project/docling-serve/commit/885f319d3a3488a4090869560447437a4104f14e))
+
+### Documentation
+
+* Example of docling-serve deployment in the RQ engine mode ([#321](https://github.com/docling-project/docling-serve/issues/321)) ([`71edf41`](https://github.com/docling-project/docling-serve/commit/71edf4184960d8664ef9da20617e2d0f91793d36))
+* Handling models in docling-serve ([#319](https://github.com/docling-project/docling-serve/issues/319)) ([`6e9aa8c`](https://github.com/docling-project/docling-serve/commit/6e9aa8c759220458281c7fe4c87443ac41023eee))
+* Add Gradio cache usage ([#312](https://github.com/docling-project/docling-serve/issues/312)) ([`d584895`](https://github.com/docling-project/docling-serve/commit/d584895e1108d71a0f45deadcd3c669eb0a58133))
+
+## [v1.2.2](https://github.com/docling-project/docling-serve/releases/tag/v1.2.2) - 2025-08-13
+
+### Fix
+
+* Update of transformers module to 4.55.1 ([#316](https://github.com/docling-project/docling-serve/issues/316)) ([`7692eb2`](https://github.com/docling-project/docling-serve/commit/7692eb26006fd4deaa021180c99e23a1b65de506))
+
+## [v1.2.1](https://github.com/docling-project/docling-serve/releases/tag/v1.2.1) - 2025-08-13
+
+### Fix
+
+* Handling of vlm model options and update deps ([#314](https://github.com/docling-project/docling-serve/issues/314)) ([`8b470cb`](https://github.com/docling-project/docling-serve/commit/8b470cba8ef500c271eb84c8368c8a1a1a5a6d6a))
+* Add missing response type in sync endpoints ([#309](https://github.com/docling-project/docling-serve/issues/309)) ([`8048f45`](https://github.com/docling-project/docling-serve/commit/8048f4589a91de2b2b391ab33a326efd1b29f25b))
+
+### Documentation
+
+* Update readme to use v1 ([#306](https://github.com/docling-project/docling-serve/issues/306)) ([`b3058e9`](https://github.com/docling-project/docling-serve/commit/b3058e91e0c56e27110eb50f22cbdd89640bf398))
+* Update deployment examples to use v1 API ([#308](https://github.com/docling-project/docling-serve/issues/308)) ([`63da9ee`](https://github.com/docling-project/docling-serve/commit/63da9eedebae3ad31d04e65635e573194e413793))
+* Fix typo in v1 migration instructions ([#307](https://github.com/docling-project/docling-serve/issues/307)) ([`b15dc25`](https://github.com/docling-project/docling-serve/commit/b15dc2529f78d68a475e5221c37408c3f77d8588))
+
+## [v1.2.0](https://github.com/docling-project/docling-serve/releases/tag/v1.2.0) - 2025-08-07
+
+### Feature
+
+* Workers without shared models and convert params ([#304](https://github.com/docling-project/docling-serve/issues/304)) ([`db3fdb5`](https://github.com/docling-project/docling-serve/commit/db3fdb5bc1a0ae250afd420d737abc4071a7546c))
+* Add rocm image build support and fix cuda ([#292](https://github.com/docling-project/docling-serve/issues/292)) ([`fd1b987`](https://github.com/docling-project/docling-serve/commit/fd1b987e8dc174f1a6013c003dde33e9acbae39a))
+
+## [v1.1.0](https://github.com/docling-project/docling-serve/releases/tag/v1.1.0) - 2025-07-30
+
+### Feature
+
+* Add docling-mcp in the distribution ([#290](https://github.com/docling-project/docling-serve/issues/290)) ([`ecb1874`](https://github.com/docling-project/docling-serve/commit/ecb1874a507bef83d102e0e031e49fed34298637))
+* Add 3.0 openapi endpoint ([#287](https://github.com/docling-project/docling-serve/issues/287)) ([`ec594d8`](https://github.com/docling-project/docling-serve/commit/ec594d84fe36df23e7d010a2fcf769856c43600b))
+* Add new source and target ([#270](https://github.com/docling-project/docling-serve/issues/270)) ([`3771c1b`](https://github.com/docling-project/docling-serve/commit/3771c1b55403bd51966d07d8f760d5c4fbcc1760))
+
+### Fix
+
+* Referenced paths relative to zip root ([#289](https://github.com/docling-project/docling-serve/issues/289)) ([`1333f71`](https://github.com/docling-project/docling-serve/commit/1333f71c9c6495342b2169d574e921f828446f15))
+
+## [v1.0.1](https://github.com/docling-project/docling-serve/releases/tag/v1.0.1) - 2025-07-21
+
+### Fix
+
+* Docling update v2.42.0 ([#277](https://github.com/docling-project/docling-serve/issues/277)) ([`8706706`](https://github.com/docling-project/docling-serve/commit/8706706e8797b0a06ec4baa7cf87988311be68b6))
+
+### Documentation
+
+* Typo in README ([#276](https://github.com/docling-project/docling-serve/issues/276)) ([`766adb2`](https://github.com/docling-project/docling-serve/commit/766adb248113c7bd5144d14b3c82929a2ad29f8e))
+
+## [v1.0.0](https://github.com/docling-project/docling-serve/releases/tag/v1.0.0) - 2025-07-14
+
+### Feature
+
+* V1 api with list of sources and target ([#249](https://github.com/docling-project/docling-serve/issues/249)) ([`56e328b`](https://github.com/docling-project/docling-serve/commit/56e328baf76b4bb0476fc6ca820b52034e4f97bf))
+* Use orchestrators from jobkit ([#248](https://github.com/docling-project/docling-serve/issues/248)) ([`daa924a`](https://github.com/docling-project/docling-serve/commit/daa924a77e56d063ef17347dfd8a838872a70529))
+
+### Breaking
+
+* v1 api with list of sources and target ([#249](https://github.com/docling-project/docling-serve/issues/249)) ([`56e328b`](https://github.com/docling-project/docling-serve/commit/56e328baf76b4bb0476fc6ca820b52034e4f97bf))
+* use orchestrators from jobkit ([#248](https://github.com/docling-project/docling-serve/issues/248)) ([`daa924a`](https://github.com/docling-project/docling-serve/commit/daa924a77e56d063ef17347dfd8a838872a70529))
+
+## [v0.16.1](https://github.com/docling-project/docling-serve/releases/tag/v0.16.1) - 2025-07-07
+
+### Fix
+
+* Upgrade deps including, docling v2.40.0 with locks in models init ([#264](https://github.com/docling-project/docling-serve/issues/264)) ([`bfde1a0`](https://github.com/docling-project/docling-serve/commit/bfde1a0991c2da53b72c4f131ff74fa10f6340de))
+* Missing tesseract osd ([#263](https://github.com/docling-project/docling-serve/issues/263)) ([`eb3892e`](https://github.com/docling-project/docling-serve/commit/eb3892ee141eb2c941d580b095d8a266f2d2610c))
+* Properly load models at boot ([#244](https://github.com/docling-project/docling-serve/issues/244)) ([`149a8cb`](https://github.com/docling-project/docling-serve/commit/149a8cb1c0a16c1e0b7d17f40b88b4d6e8f0109d))
+
+### Documentation
+
+* Fix typo ([#259](https://github.com/docling-project/docling-serve/issues/259)) ([`93b8471`](https://github.com/docling-project/docling-serve/commit/93b84712b2c6d180908a197847b52b217a7ff05f))
+* Change the doc example ([#258](https://github.com/docling-project/docling-serve/issues/258)) ([`c45b937`](https://github.com/docling-project/docling-serve/commit/c45b93706466a073ab4a5c75aa8a267110873e26))
+* Update typo ([#247](https://github.com/docling-project/docling-serve/issues/247)) ([`50e431f`](https://github.com/docling-project/docling-serve/commit/50e431f30fbffa33f43727417fe746d20cbb9d6b))
+
+## [v0.16.0](https://github.com/docling-project/docling-serve/releases/tag/v0.16.0) - 2025-06-25
+
+### Feature
+
+* Package updates and more cuda images ([#229](https://github.com/docling-project/docling-serve/issues/229)) ([`30aca92`](https://github.com/docling-project/docling-serve/commit/30aca92298ab0d86bb4debcfcacb2dd8b9040a27))
+
+### Documentation
+
+* Update example resources and improve README ([#231](https://github.com/docling-project/docling-serve/issues/231)) ([`80755a7`](https://github.com/docling-project/docling-serve/commit/80755a7d5955f7d0c53df8e558fdd852dd1f5b75))
+
+## [v0.15.0](https://github.com/docling-project/docling-serve/releases/tag/v0.15.0) - 2025-06-17
+
+### Feature
+
+* Use redocs and scalar as api docs ([#228](https://github.com/docling-project/docling-serve/issues/228)) ([`873d05a`](https://github.com/docling-project/docling-serve/commit/873d05aefe141c63b9c1cf53b23b4fa8c96de05d))
+
+### Fix
+
+* "tesserocr" instead of "tesseract_cli" in usage docs ([#223](https://github.com/docling-project/docling-serve/issues/223)) ([`196c5ce`](https://github.com/docling-project/docling-serve/commit/196c5ce42a04d77234a4212c3d9b9772d2c2073e))
+
+## [v0.14.0](https://github.com/docling-project/docling-serve/releases/tag/v0.14.0) - 2025-06-17
+
+### Feature
+
+* Read supported file extensions from docling ([#214](https://github.com/docling-project/docling-serve/issues/214)) ([`524f6a8`](https://github.com/docling-project/docling-serve/commit/524f6a8997b86d2f869ca491ec8fb40585b42ca4))
+
+### Fix
+
+* Typo in Headline ([#220](https://github.com/docling-project/docling-serve/issues/220)) ([`d5455b7`](https://github.com/docling-project/docling-serve/commit/d5455b7f66de39ea1f8b8927b5968d2baa23ca88))
+
+## [v0.13.0](https://github.com/docling-project/docling-serve/releases/tag/v0.13.0) - 2025-06-04
+
+### Feature
+
+* Upgrade docling to 2.36 ([#212](https://github.com/docling-project/docling-serve/issues/212)) ([`ffea347`](https://github.com/docling-project/docling-serve/commit/ffea34732b24fdd438fabd6df02d3d9ce66b4534))
+
+## [v0.12.0](https://github.com/docling-project/docling-serve/releases/tag/v0.12.0) - 2025-06-03
+
+### Feature
+
+* Export annotations in markdown and html (Docling upgrade) ([#202](https://github.com/docling-project/docling-serve/issues/202)) ([`c4c41f1`](https://github.com/docling-project/docling-serve/commit/c4c41f16dff83c5d2a0b8a4c625b5de19b36b7c5))
+
+### Fix
+
+* Processing complex params in multipart-form ([#210](https://github.com/docling-project/docling-serve/issues/210)) ([`7066f35`](https://github.com/docling-project/docling-serve/commit/7066f3520a88c07df1c80a0cc6c4339eaac4d6a7))
+
+### Documentation
+
+* Add openshift replicasets examples ([#209](https://github.com/docling-project/docling-serve/issues/209)) ([`6a8190c`](https://github.com/docling-project/docling-serve/commit/6a8190c315792bd1e0e2b0af310656baaa5551e5))
+
+## [v0.11.0](https://github.com/docling-project/docling-serve/releases/tag/v0.11.0) - 2025-05-23
+
+### Feature
+
+* Page break placeholder in markdown exports options ([#194](https://github.com/docling-project/docling-serve/issues/194)) ([`32b8a80`](https://github.com/docling-project/docling-serve/commit/32b8a809f348bf9fbde657f93589a56935d3749d))
+* Clear results registry ([#192](https://github.com/docling-project/docling-serve/issues/192)) ([`de002df`](https://github.com/docling-project/docling-serve/commit/de002dfcdc111c942a08b156c84b7fa22b3fbaf3))
+* Upgrade to Docling 2.33.0 ([#198](https://github.com/docling-project/docling-serve/issues/198)) ([`abe5aa0`](https://github.com/docling-project/docling-serve/commit/abe5aa03f54d44ecf5c6d76e3258028997a53e68))
+* Api to trigger offloading the models ([#188](https://github.com/docling-project/docling-serve/issues/188)) ([`00be428`](https://github.com/docling-project/docling-serve/commit/00be4284904d55b78c75c5475578ef11c2ade94c))
+* Figure annotations @ docling components 0.0.7 ([#181](https://github.com/docling-project/docling-serve/issues/181)) ([`3ff1b2f`](https://github.com/docling-project/docling-serve/commit/3ff1b2f9834aca37472a895a0e3da47560457d77))
+
+### Fix
+
+* Usage of hashlib for FIPS ([#171](https://github.com/docling-project/docling-serve/issues/171)) ([`8406fb9`](https://github.com/docling-project/docling-serve/commit/8406fb9b59d83247b8379974cabed497703dfc4d))
+
+### Documentation
+
+* Example and instructions on how to load model weights to persistent volume ([#197](https://github.com/docling-project/docling-serve/issues/197)) ([`3f090b7`](https://github.com/docling-project/docling-serve/commit/3f090b7d15eaf696611d89bbbba5b98569610828))
+* Async api usage and fixes ([#195](https://github.com/docling-project/docling-serve/issues/195)) ([`21c1791`](https://github.com/docling-project/docling-serve/commit/21c1791e427f5b1946ed46c68dfda03c957dca8f))
+
+## [v0.10.1](https://github.com/docling-project/docling-serve/releases/tag/v0.10.1) - 2025-04-30
+
+### Fix
+
+* Avoid missing specialized keys in the options hash ([#166](https://github.com/docling-project/docling-serve/issues/166)) ([`36787bc`](https://github.com/docling-project/docling-serve/commit/36787bc0616356a6199da618d8646de51636b34e))
+* Allow users to set the area threshold for picture descriptions ([#165](https://github.com/docling-project/docling-serve/issues/165)) ([`509f488`](https://github.com/docling-project/docling-serve/commit/509f4889f8ed4c0f0ce25bec4126ef1f1199797c))
+* Expose max wait time in sync endpoints ([#164](https://github.com/docling-project/docling-serve/issues/164)) ([`919cf5c`](https://github.com/docling-project/docling-serve/commit/919cf5c0414f2f11eb8012f451fed7a8f582b7ad))
+* Add flash-attn for cuda images ([#161](https://github.com/docling-project/docling-serve/issues/161)) ([`35c2630`](https://github.com/docling-project/docling-serve/commit/35c2630c613cf229393fc67b6938152b063ff498))
+
+## [v0.10.0](https://github.com/docling-project/docling-serve/releases/tag/v0.10.0) - 2025-04-28
+
+### Feature
+
+* Add support for file upload and return as file in async endpoints ([#152](https://github.com/docling-project/docling-serve/issues/152)) ([`c65f3c6`](https://github.com/docling-project/docling-serve/commit/c65f3c654c76c6b64b6aada1f0a153d74789d629))
+
+### Documentation
+
+* Fix new default pdf_backend ([#158](https://github.com/docling-project/docling-serve/issues/158)) ([`829effe`](https://github.com/docling-project/docling-serve/commit/829effec1a1b80320ccaf2c501be8015169b6fa3))
+* Fixing small typo in docs ([#155](https://github.com/docling-project/docling-serve/issues/155)) ([`14bafb2`](https://github.com/docling-project/docling-serve/commit/14bafb26286b94f80b56846c50d6e9a6d99a9763))
+
+## [v0.9.0](https://github.com/docling-project/docling-serve/releases/tag/v0.9.0) - 2025-04-25
+
+### Feature
+
+* Expose picture description options ([#148](https://github.com/docling-project/docling-serve/issues/148)) ([`4c9571a`](https://github.com/docling-project/docling-serve/commit/4c9571a052d5ec0044e49225bc5615e13cdb0a56))
+* Add parameters for Kubeflow pipeline engine (WIP) ([#107](https://github.com/docling-project/docling-serve/issues/107)) ([`26bef5b`](https://github.com/docling-project/docling-serve/commit/26bef5bec060f0afd8d358816b68c3f2c0dd4bc2))
+
+### Fix
+
+* Produce image artifacts in referenced mode ([#151](https://github.com/docling-project/docling-serve/issues/151)) ([`71c5fae`](https://github.com/docling-project/docling-serve/commit/71c5fae505366459fd481d2ecdabc5ebed94d49c))
+
+### Documentation
+
+* Vlm and picture description options ([#149](https://github.com/docling-project/docling-serve/issues/149)) ([`91956cb`](https://github.com/docling-project/docling-serve/commit/91956cbf4e91cf82bb4d54ace397cdbbfaf594ba))
+
+## [v0.8.0](https://github.com/docling-project/docling-serve/releases/tag/v0.8.0) - 2025-04-22
+
+### Feature
+
+* Add option for vlm pipeline ([#143](https://github.com/docling-project/docling-serve/issues/143)) ([`ee89ee4`](https://github.com/docling-project/docling-serve/commit/ee89ee4daee5e916bd6a3bdb452f78934cd03f60))
+* Expose more conversion options ([#142](https://github.com/docling-project/docling-serve/issues/142)) ([`6b3d281`](https://github.com/docling-project/docling-serve/commit/6b3d281f02905c195ab75f25bb39f5c4d4e7b680))
+* **UI:** Change UI to use async endpoints ([#131](https://github.com/docling-project/docling-serve/issues/131)) ([`b598872`](https://github.com/docling-project/docling-serve/commit/b598872e5c48928ac44417a11bb7acc0e5c3f0c6))
+
+### Fix
+
+* **UI:** Use https when calling the api ([#139](https://github.com/docling-project/docling-serve/issues/139)) ([`57f9073`](https://github.com/docling-project/docling-serve/commit/57f9073bc0daf72428b068ea28e2bec7cd76c37b))
+* Fix permissions in docker image ([#136](https://github.com/docling-project/docling-serve/issues/136)) ([`c1ce471`](https://github.com/docling-project/docling-serve/commit/c1ce4719c933179ba3c59d73d0584853bbd6fa6a))
+* Picture caption visuals ([#129](https://github.com/docling-project/docling-serve/issues/129)) ([`5dfb75d`](https://github.com/docling-project/docling-serve/commit/5dfb75d3b9a7022d1daad12edbb8ec7bbf9aa264))
+
+### Documentation
+
+* Fix required permissions for oauth2-proxy requests ([#141](https://github.com/docling-project/docling-serve/issues/141)) ([`087417e`](https://github.com/docling-project/docling-serve/commit/087417e5c2387d4ed95500222058f34d8a8702aa))
+* Update deployment examples ([#135](https://github.com/docling-project/docling-serve/issues/135)) ([`525a43f`](https://github.com/docling-project/docling-serve/commit/525a43ff6f04b7cc80f9dd6a0e653a8d8c4ab317))
+* Fix image tag ([#124](https://github.com/docling-project/docling-serve/issues/124)) ([`420162e`](https://github.com/docling-project/docling-serve/commit/420162e674cc38b4c3c13673ffbee4c20a1b15f1))
+
+## [v0.7.0](https://github.com/docling-project/docling-serve/releases/tag/v0.7.0) - 2025-03-31
+
+### Feature
+
+* Expose TLS settings and example deploy with oauth-proxy ([#112](https://github.com/docling-project/docling-serve/issues/112)) ([`7a0faba`](https://github.com/docling-project/docling-serve/commit/7a0fabae07020c2659dbb22c3b0359909051a74c))
+* Offline static files ([#109](https://github.com/docling-project/docling-serve/issues/109)) ([`68772bb`](https://github.com/docling-project/docling-serve/commit/68772bb6f0a87b71094a08ff851f5754c6ca6163))
+* Update to Docling 2.28 ([#106](https://github.com/docling-project/docling-serve/issues/106)) ([`20ec87a`](https://github.com/docling-project/docling-serve/commit/20ec87a63a99145bc0ad7931549af8a0c30db641))
+
+### Fix
+
+* Move ARGs to prevent cache invalidation ([#104](https://github.com/docling-project/docling-serve/issues/104)) ([`e30f458`](https://github.com/docling-project/docling-serve/commit/e30f458923d34c169db7d5a5c296848716e8cac4))
+
+## [v0.6.0](https://github.com/docling-project/docling-serve/releases/tag/v0.6.0) - 2025-03-17
+
+### Feature
+
+* Expose options for new features ([#92](https://github.com/docling-project/docling-serve/issues/92)) ([`ec57b52`](https://github.com/docling-project/docling-serve/commit/ec57b528ed3f8e7b9604ff4cdf06da3d52c714dd))
+
+### Fix
+
+* Allow changes in CORS settings ([#100](https://github.com/docling-project/docling-serve/issues/100)) ([`422c402`](https://github.com/docling-project/docling-serve/commit/422c402bab7f05e46274ede11f234a19a62e093e))
+* Avoid exploding options cache using lru and expose size parameter ([#101](https://github.com/docling-project/docling-serve/issues/101)) ([`ea09028`](https://github.com/docling-project/docling-serve/commit/ea090288d3eec4ea8fbdcd32a6a497a99c89189d))
+* Increase timeout_keep_alive and allow parameter changes ([#98](https://github.com/docling-project/docling-serve/issues/98)) ([`07c48ed`](https://github.com/docling-project/docling-serve/commit/07c48edd5d9437219d9623e3d05bc5166c5bb85a))
+* Add warning when using incompatible parameters ([#99](https://github.com/docling-project/docling-serve/issues/99)) ([`a212547`](https://github.com/docling-project/docling-serve/commit/a212547d28d6588c65e52000dc7bc04f3f77e69e))
+* **ui:** Use --port parameter and avoid failing when image is not found ([#97](https://github.com/docling-project/docling-serve/issues/97)) ([`c76daac`](https://github.com/docling-project/docling-serve/commit/c76daac70c87da412f791666881e48b74688b060))
+
+### Documentation
+
+* Simplify README and move details to docs ([#102](https://github.com/docling-project/docling-serve/issues/102)) ([`fd8e40a`](https://github.com/docling-project/docling-serve/commit/fd8e40a00849771263d9b75b9a56f6caeccb8517))
+
+## [v0.5.1](https://github.com/docling-project/docling-serve/releases/tag/v0.5.1) - 2025-03-10
+
+### Fix
+
+* Submodules in wheels ([#85](https://github.com/docling-project/docling-serve/issues/85)) ([`a92ad48`](https://github.com/docling-project/docling-serve/commit/a92ad48b287bfcb134011dc0fc3f91ee04e067ee))
+
+## [v0.5.0](https://github.com/docling-project/docling-serve/releases/tag/v0.5.0) - 2025-03-07
+
+### Feature
+
+* Async api ([#60](https://github.com/docling-project/docling-serve/issues/60)) ([`82f8900`](https://github.com/docling-project/docling-serve/commit/82f890019745859699c1b01f9ccfb64cb7e37906))
+* Display version in fastapi docs ([#78](https://github.com/docling-project/docling-serve/issues/78)) ([`ed851c9`](https://github.com/docling-project/docling-serve/commit/ed851c95fee5f59305ddc3dcd5c09efce618470b))
+
+### Fix
+
+* Remove uv from image, merge ARG and ENV declarations ([#57](https://github.com/docling-project/docling-serve/issues/57)) ([`c95db36`](https://github.com/docling-project/docling-serve/commit/c95db3643807a4dfb96d93c8e10d6eb486c49a30))
+* **docs:** Remove comma in convert/source curl example ([#73](https://github.com/docling-project/docling-serve/issues/73)) ([`05df073`](https://github.com/docling-project/docling-serve/commit/05df0735d35a589bdc2a11fcdd764a10f700cb6f))
+
+## [v0.4.0](https://github.com/docling-project/docling-serve/releases/tag/v0.4.0) - 2025-02-26
+
+### Feature
+
+* New container images ([#68](https://github.com/docling-project/docling-serve/issues/68)) ([`7e6d9cd`](https://github.com/docling-project/docling-serve/commit/7e6d9cdef398df70a5b4d626aeb523c428c10d56))
+* Render DoclingDocument with npm docling-components in the example UI ([#65](https://github.com/docling-project/docling-serve/issues/65)) ([`c430d9b`](https://github.com/docling-project/docling-serve/commit/c430d9b1a162ab29104d86ebaa1ac5a5488b1f09))
+
+## [v0.3.0](https://github.com/docling-project/docling-serve/releases/tag/v0.3.0) - 2025-02-19
+
+### Feature
+
+* Add new docling-serve cli ([#50](https://github.com/docling-project/docling-serve/issues/50)) ([`ec33a61`](https://github.com/docling-project/docling-serve/commit/ec33a61faa7846b9b7998fbf557ebe39a3b800f6))
+
+### Fix
+
+* Set DOCLING_SERVE_ARTIFACTS_PATH in images ([#53](https://github.com/docling-project/docling-serve/issues/53)) ([`4877248`](https://github.com/docling-project/docling-serve/commit/487724836896576ca4f98e84abf15fd1c383bec8))
+* Set root UI path when behind proxy ([#38](https://github.com/docling-project/docling-serve/issues/38)) ([`c64a450`](https://github.com/docling-project/docling-serve/commit/c64a450bf9ba9947ab180e92bef2763ff710b210))
+* Support python 3.13 and docling updates and switch to uv ([#48](https://github.com/docling-project/docling-serve/issues/48)) ([`ae3b490`](https://github.com/docling-project/docling-serve/commit/ae3b4906f1c0829b1331ea491f3518741cabff71))
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -3,13 +3,13 @@
 Our project welcomes external contributions. If you have an itch, please feel
 free to scratch it.

-To contribute code or documentation, please submit a [pull request](https://github.com/DS4SD/docling-serve/pulls).
+To contribute code or documentation, please submit a [pull request](https://github.com/docling-project/docling-serve/pulls).

 A good way to familiarize yourself with the codebase and contribution process is
-to look for and tackle low-hanging fruit in the [issue tracker](https://github.com/DS4SD/docling-serve/issues).
+to look for and tackle low-hanging fruit in the [issue tracker](https://github.com/docling-project/docling-serve/issues).
 Before embarking on a more ambitious contribution, please quickly [get in touch](#communication) with us.

-For general questions or support requests, please refer to the [discussion section](https://github.com/DS4SD/docling-serve/discussions).
+For general questions or support requests, please refer to the [discussion section](https://github.com/docling-project/docling-serve/discussions).

 **Note: We appreciate your effort, and want to avoid a situation where a contribution
 requires extensive rework (by you or by us), sits in backlog for a long time, or
@@ -17,14 +17,14 @@ cannot be accepted at all!**

 ### Proposing new features

-If you would like to implement a new feature, please [raise an issue](https://github.com/DS4SD/docling-serve/issues)
+If you would like to implement a new feature, please [raise an issue](https://github.com/docling-project/docling-serve/issues)
 before sending a pull request so the feature can be discussed. This is to avoid
 you wasting your valuable time working on a feature that the project developers
 are not interested in accepting into the code base.

 ### Fixing bugs

-If you would like to fix a bug, please [raise an issue](https://github.com/DS4SD/docling-serve/issues) before sending a
+If you would like to fix a bug, please [raise an issue](https://github.com/docling-project/docling-serve/issues) before sending a
 pull request so it can be tracked.

 ### Merge approval
@@ -73,7 +73,7 @@ git commit -s

 ## Communication

-Please feel free to connect with us using the [discussion section](https://github.com/DS4SD/docling-serve/discussions).
+Please feel free to connect with us using the [discussion section](https://github.com/docling-project/docling-serve/discussions).

 ## Developing

@@ -142,8 +142,7 @@ poetry add NAME

 We use the following tools to enforce code style:

- iSort, to sort imports
- Black, to format code
+- ruff, to sort imports and format code

 We run a series of checks on the code base on every commit, using `pre-commit`. To install the hooks, run:

@@ -157,4 +156,4 @@ To run the checks on-demand, run:
 pre-commit run --all-files
 ```

-Note: Checks like `Black` and `isort` will "fail" if they modify files. This is because `pre-commit` doesn't like to see files modified by their Hooks. In these cases, `git add` the modified files and `git commit` again.
+Note: Formatting checks like `ruff` will "fail" if they modify files. This is because `pre-commit` doesn't like to see files modified by their Hooks. In these cases, `git add` the modified files and `git commit` again.
--- a/69
+++ b/69
@@ -1,15 +1,17 @@
 ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s

-FROM ${BASE_IMAGE}
+ARG UV_VERSION=0.8.3

-ARG CPU_ONLY=false
+ARG UV_SYNC_EXTRA_ARGS=""

-USER 0
+FROM ${BASE_IMAGE} AS docling-base

 ###################################################################################################
 # OS Layer                                                                                        #
 ###################################################################################################

+USER 0
+
 RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
    dnf -y install --best --nodocs --setopt=install_weak_deps=False dnf-plugins-core && \
    dnf config-manager --best --nodocs --setopt=install_weak_deps=False --save && \
@@ -19,43 +21,60 @@ RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
    dnf -y clean all && \
    rm -rf /var/cache/dnf

+RUN /usr/bin/fix-permissions /opt/app-root/src/.cache
+
 ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/

+FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv_stage
+
 ###################################################################################################
 # Docling layer                                                                                   #
 ###################################################################################################

+FROM docling-base
+
 USER 1001

 WORKDIR /opt/app-root/src

-# On container environments, always set a thread budget to avoid undesired thread congestion.
-ENV OMP_NUM_THREADS=4
+ENV \
+    OMP_NUM_THREADS=4 \
+    LANG=en_US.UTF-8 \
+    LC_ALL=en_US.UTF-8 \
+    PYTHONIOENCODING=utf-8 \
+    UV_COMPILE_BYTECODE=1 \
+    UV_LINK_MODE=copy \
+    UV_PROJECT_ENVIRONMENT=/opt/app-root \
+    DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models

-ENV LANG=en_US.UTF-8
-ENV LC_ALL=en_US.UTF-8
-ENV PYTHONIOENCODING=utf-8
+ARG UV_SYNC_EXTRA_ARGS

-ENV WITH_UI=True
+RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
+    --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    umask 002 && \
+    UV_SYNC_ARGS="--frozen --no-install-project --no-dev --all-extras" && \
+    uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-extra flash-attn && \
+    FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-build-isolation-package=flash-attn

-COPY --chown=1001:0 pyproject.toml poetry.lock models_download.py README.md ./
+ARG MODELS_LIST="layout tableformer picture_classifier easyocr"

-RUN pip install --no-cache-dir poetry && \
-    # We already are in a virtual environment, so we don't need to create a new one, only activate it.
-    poetry config virtualenvs.create false && \
-    source /opt/app-root/bin/activate && \
-    if [ "$CPU_ONLY" = "true" ]; then \
-        poetry install --no-root --no-cache --no-interaction --all-extras --with cpu --without dev; \
-    else \
-        poetry install --no-root --no-cache --no-interaction --all-extras --without dev; \
-    fi && \
-    echo "Downloading models..." && \
-    python models_download.py && \
-    chown -R 1001:0 /opt/app-root/src && \
-    chmod -R g=u /opt/app-root/src
+RUN echo "Downloading models..." && \
+    HF_HUB_DOWNLOAD_TIMEOUT="90" \
+    HF_HUB_ETAG_TIMEOUT="90" \
+    docling-tools models download -o "${DOCLING_SERVE_ARTIFACTS_PATH}" ${MODELS_LIST} && \
+    chown -R 1001:0 ${DOCLING_SERVE_ARTIFACTS_PATH} && \
+    chmod -R g=u ${DOCLING_SERVE_ARTIFACTS_PATH}

-COPY --chown=1001:0 --chmod=664 ./docling_serve ./docling_serve
+COPY --chown=1001:0 ./docling_serve ./docling_serve
+
+RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
+    --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    umask 002 && uv sync --frozen --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}

 EXPOSE 5001

-CMD ["python", "-m", "docling_serve"]
+CMD ["docling-serve", "run"]
--- a/MAINTAINERS.md
+++ b/MAINTAINERS.md
@@ -1,11 +1,11 @@
 # MAINTAINERS

- Christoph Auer - [@cau-git](https://github.com/cau-git)
- Michele Dolfi - [@dolfim-ibm](https://github.com/dolfim-ibm)
- Maxim Lysak - [@maxmnemonic](https://github.com/maxmnemonic)
- Nikos Livathinos - [@nikos-livathinos](https://github.com/nikos-livathinos)
- Ahmed Nassar - [@nassarofficial](https://github.com/nassarofficial)
- Panos Vagenas - [@vagenas](https://github.com/vagenas)
- Peter Staar - [@PeterStaar-IBM](https://github.com/PeterStaar-IBM)
+- Christoph Auer - [`@cau-git`](https://github.com/cau-git)
+- Michele Dolfi - [`@dolfim-ibm`](https://github.com/dolfim-ibm)
+- Maxim Lysak - [`@maxmnemonic`](https://github.com/maxmnemonic)
+- Nikos Livathinos - [`@nikos-livathinos`](https://github.com/nikos-livathinos)
+- Ahmed Nassar - [`@nassarofficial`](https://github.com/nassarofficial)
+- Panos Vagenas - [`@vagenas`](https://github.com/vagenas)
+- Peter Staar - [`@PeterStaar-IBM`](https://github.com/PeterStaar-IBM)

 Maintainers can be contacted at [deepsearch-core@zurich.ibm.com](mailto:deepsearch-core@zurich.ibm.com).
--- a/93
+++ b/93
@@ -17,6 +17,7 @@ else
 endif

 TAG=$(shell git rev-parse HEAD)
+BRANCH_TAG=$(shell git rev-parse --abbrev-ref HEAD)

 action-lint-file:
 	$(CMD_PREFIX) touch .action-lint
@@ -24,19 +25,47 @@ action-lint-file:
 md-lint-file:
 	$(CMD_PREFIX) touch .markdown-lint

+.PHONY: docling-serve-image
+docling-serve-image: Containerfile ## Build docling-serve container image
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve]"
+	$(CMD_PREFIX) docker build --load -f Containerfile -t ghcr.io/docling-project/docling-serve:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) ghcr.io/docling-project/docling-serve:$(BRANCH_TAG)
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) quay.io/docling-project/docling-serve:$(BRANCH_TAG)
+
 .PHONY: docling-serve-cpu-image
 docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
-	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve CPU ONLY]"
-	$(CMD_PREFIX) docker build --build-arg CPU_ONLY=true -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve-cpu:$(TAG) .
-	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) ghcr.io/ds4sd/docling-serve-cpu:main
-	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) quay.io/ds4sd/docling-serve-cpu:main
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve CPU]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) ghcr.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) quay.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)

-.PHONY: docling-serve-gpu-image
-docling-serve-gpu-image: Containerfile ## Build docling-serve container image with GPU support
-	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with GPU]"
-	$(CMD_PREFIX) docker build --build-arg CPU_ONLY=false -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve:$(TAG) .
-	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) ghcr.io/ds4sd/docling-serve:main
-	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) quay.io/ds4sd/docling-serve:main
+.PHONY: docling-serve-cu124-image
+docling-serve-cu124-image: Containerfile ## Build docling-serve container image with CUDA 12.4 support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.4]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu124:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) ghcr.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) quay.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
+
+.PHONY: docling-serve-cu126-image
+docling-serve-cu126-image: Containerfile ## Build docling-serve container image with CUDA 12.6 support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.6]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu126:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) ghcr.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) quay.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
+
+.PHONY: docling-serve-cu128-image
+docling-serve-cu128-image: Containerfile ## Build docling-serve container image with CUDA 12.8 support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.8]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu128:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
+
+.PHONY: docling-serve-rocm-image
+docling-serve-rocm-image: Containerfile ## Build docling-serve container image with ROCm support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with ROCm 6.3]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-rocm:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) ghcr.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) quay.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)

 .PHONY: action-lint
 action-lint: .action-lint ##      Lint GitHub Action workflows
@@ -59,30 +88,50 @@ action-lint: .action-lint ##      Lint GitHub Action workflows
 md-lint: .md-lint ##      Lint markdown files
 .md-lint: $(wildcard */**/*.md) | md-lint-file
 	$(ECHO_PREFIX) printf "  %-12s ./...\n" "[MD LINT]"
-	$(CMD_PREFIX) docker run --rm -v $$(pwd):/workdir davidanson/markdownlint-cli2:v0.14.0 "**/*.md"
+	$(CMD_PREFIX) docker run --rm -v $$(pwd):/workdir davidanson/markdownlint-cli2:v0.16.0 "**/*.md" "#.venv"
 	$(CMD_PREFIX) touch $@

 .PHONY: py-Lint
 py-lint: ##      Lint Python files
 	$(ECHO_PREFIX) printf "  %-12s ./...\n" "[PY LINT]"
-	$(CMD_PREFIX) if ! which poetry $(PIPE_DEV_NULL) ; then \
-		echo "Please install poetry." ; \
-		echo "pip install poetry" ; \
+	$(CMD_PREFIX) if ! which uv $(PIPE_DEV_NULL) ; then \
+		echo "Please install uv." ; \
 		exit 1 ; \
 	fi
-	$(CMD_PREFIX) poetry install --all-extras
-	$(CMD_PREFIX) poetry run pre-commit run --all-files
+	$(CMD_PREFIX) uv sync --extra ui
+	$(CMD_PREFIX) uv run pre-commit run --all-files

 .PHONY: run-docling-cpu
 run-docling-cpu: ## Run the docling-serve container with CPU support and assign a container name
 	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
 	$(CMD_PREFIX) docker rm -f docling-serve-cpu 2>/dev/null || true
 	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with CPU support on port 5001...\n" "[RUN CPU]"
-	$(CMD_PREFIX) docker run -it --name docling-serve-cpu -p 5001:5001 ghcr.io/ds4sd/docling-serve-cpu:main
+	$(CMD_PREFIX) docker run -it --name docling-serve-cpu -p 5001:5001 ghcr.io/docling-project/docling-serve-cpu:main

-.PHONY: run-docling-gpu
-run-docling-gpu: ## Run the docling-serve container with GPU support and assign a container name
+.PHONY: run-docling-cu124
+run-docling-cu124: ## Run the docling-serve container with GPU support and assign a container name
 	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
-	$(CMD_PREFIX) docker rm -f docling-serve-gpu 2>/dev/null || true
-	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN GPU]"
-	$(CMD_PREFIX) docker run -it --name docling-serve-gpu -p 5001:5001 ghcr.io/ds4sd/docling-serve:main
+	$(CMD_PREFIX) docker rm -f docling-serve-cu124 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.4]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-cu124 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu124:main
+
+.PHONY: run-docling-cu126
+run-docling-cu126: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) docker rm -f docling-serve-cu126 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.6]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-cu126 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu126:main
+
+.PHONY: run-docling-cu128
+run-docling-cu128: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) docker rm -f docling-serve-cu128 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.8]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-cu128 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu128:main
+
+.PHONY: run-docling-rocm
+run-docling-rocm: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) docker rm -f docling-serve-rocm 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN ROCm 6.3]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-rocm -p 5001:5001 ghcr.io/docling-project/docling-serve-rocm:main
--- a/README.md
+++ b/README.md
@@ -1,342 +1,100 @@
+<p align="center">
+  <a href="https://github.com/docling-project/docling-serve">
+    <img loading="lazy" alt="Docling" src="https://github.com/docling-project/docling-serve/raw/main/docs/assets/docling-serve-pic.png" width="30%"/>
+  </a>
+</p>
+
 # Docling Serve

- Running [Docling](https://github.com/DS4SD/docling) as an API service.
+Running [Docling](https://github.com/docling-project/docling) as an API service.

-## Usage
+📚 [Docling Serve documentation](./docs/README.md)

-The API provides two endpoints: one for urls, one for files. This is necessary to send files directly in binary format instead of base64-encoded strings.
+- Learning how to [configure the webserver](./docs/configuration.md)
+- Get to know all [runtime options](./docs/usage.md) of the API
+- Explore useful [deployment examples](./docs/deployment.md)
+- And more

-### Common parameters
+> [!NOTE]
+> **Migration to the `v1` API.** Docling Serve now has a stable v1 API. Read more on the [migration to v1](./docs/v1_migration.md).

-On top of the source of file (see below), both endpoints support the same parameters, which are almost the same as the Docling CLI.
+## Getting started

- `from_format` (List[str]): Input format(s) to convert from. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`. Defaults to all formats.
- `to_formats` (List[str]): Output format(s) to convert to. Allowed values: `md`, `json`, `html`, `text`, `doctags`. Defaults to `md`.
- `do_ocr` (bool): If enabled, the bitmap content will be processed using OCR. Defaults to `True`.
- `image_export_mode`: Image export mode for the document (only in case of JSON, Markdown or HTML). Allowed values: embedded, placeholder, referenced. Optional, defaults to `embedded`.
- `force_ocr` (bool): If enabled, replace any existing text with OCR-generated text over the full content. Defaults to `False`.
- `ocr_engine` (str): OCR engine to use. Allowed values: `easyocr`, `tesseract_cli`, `tesseract`, `rapidocr`, `ocrmac`. Defaults to `easyocr`.
- `ocr_lang` (List[str]): List of languages used by the OCR engine. Note that each OCR engine has different values for the language names. Defaults to empty.
- `pdf_backend` (str): PDF backend to use. Allowed values: `pypdfium2`, `dlparse_v1`, `dlparse_v2`. Defaults to `dlparse_v2`.
- `table_mode` (str): Table mode to use. Allowed values: `fast`, `accurate`. Defaults to `fast`.
- `abort_on_error` (bool): If enabled, abort on error. Defaults to false.
- `return_as_file` (boo): If enabled, return the output as a file. Defaults to false.
- `do_table_structure` (bool): If enabled, the table structure will be extracted. Defaults to true.
- `include_images` (bool): If enabled, images will be extracted from the document. Defaults to true.
- `images_scale` (float): Scale factor for images. Defaults to 2.0.
+Install the `docling-serve` package and run the server.

-### URL endpoint
+```bash
+# Using the python package
+pip install "docling-serve[ui]"
+docling-serve run --enable-ui

-The endpoint is `/v1alpha/convert/source`, listening for POST requests of JSON payloads.
-
-On top of the above parameters, you must send the URL(s) of the document you want process with either the `http_sources` or `file_sources` fields.
-The first is fetching URL(s) (optionally using with extra headers), the second allows to provide documents as base64-encoded strings.
-No `options` is required, they can be partially or completely omitted.
-
-Simple payload example:
-
-```json
-{
-  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
-}
+# Using container images, e.g. with Podman
+podman run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=1 quay.io/docling-project/docling-serve
 ```

-<details>
+The server is available at

-<summary>Complete payload example:</summary>
+- API <http://127.0.0.1:5001>
+- API documentation <http://127.0.0.1:5001/docs>
+- UI playground <http://127.0.0.1:5001/ui>

-```json
-{
-  "options": {
-    "from_formats": ["docx", "pptx", "html", "image", "pdf", "asciidoc", "md", "xlsx"],
-    "to_formats": ["md", "json", "html", "text", "doctags"],
-    "image_export_mode": "placeholder",
-    "do_ocr": true,
-    "force_ocr": false,
-    "ocr_engine": "easyocr",
-    "ocr_lang": ["en"],
-    "pdf_backend": "dlparse_v2",
-    "table_mode": "fast",
-    "abort_on_error": false,
-    "return_as_file": false,
-  },
-  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
-}
-```
+![API documentation](img/fastapi-ui.png)

-</details>
+Try it out with a simple conversion:

-<details>
-
-<summary>CURL example:</summary>
-
-```sh
+```bash
 curl -X 'POST' \
-  'http://localhost:5001/v1alpha/convert/source' \
+  'http://localhost:5001/v1/convert/source' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/json' \
  -d '{
-  "options": {
-    "from_formats": [
-      "docx",
-      "pptx",
-      "html",
-      "image",
-      "pdf",
-      "asciidoc",
-      "md",
-      "xlsx"
-    ],
-    "to_formats": ["md", "json", "html", "text", "doctags"],
-    "image_export_mode": "placeholder",
-    "do_ocr": true,
-    "force_ocr": false,
-    "ocr_engine": "easyocr",
-    "ocr_lang": [
-      "fr",
-      "de",
-      "es",
-      "en"
-    ],
-    "pdf_backend": "dlparse_v2",
-    "table_mode": "fast",
-    "abort_on_error": false,
-    "return_as_file": false,
-    "do_table_structure": true,
-    "include_images": true,
-    "images_scale": 2,
-  },
-  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
-}'
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
+  }'
 ```

-</details>
+### Container Images

-<details>
-<summary>Python example:</summary>
+The following container images are available for running **Docling Serve** with different hardware and PyTorch configurations:

-```python
-import httpx
+#### 📦 Distributed Images

-async_client = httpx.AsyncClient(timeout=60.0)
-url = "http://localhost:5001/v1alpha/convert/source"
-payload = {
-  "options": {
-    "from_formats": ["docx", "pptx", "html", "image", "pdf", "asciidoc", "md", "xlsx"],
-    "to_formats": ["md", "json", "html", "text", "doctags"],
-    "image_export_mode": "placeholder",
-    "do_ocr": True,
-    "force_ocr": False,
-    "ocr_engine": "easyocr",
-    "ocr_lang": "en",
-    "pdf_backend": "dlparse_v2",
-    "table_mode": "fast",
-    "abort_on_error": False,
-    "return_as_file": False,
-  },
-  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
-}
+| Image | Description | Architectures | Size |
+|-------|-------------|----------------|------|
+| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Base image with all packages installed from the official PyPI index. | `linux/amd64`, `linux/arm64` | 4.4 GB (arm64) <br> 8.7 GB (amd64) |
+| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | CPU-only variant, using `torch` from the PyTorch CPU index. | `linux/amd64`, `linux/arm64` | 4.4 GB |
+| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | CUDA 12.6 build with `torch` from the cu126 index. | `linux/amd64` | 10.0 GB |
+| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | CUDA 12.8 build with `torch` from the cu128 index. | `linux/amd64` | 11.4 GB |

-response = await async_client_client.post(url, json=payload)
+#### 🚫 Not Distributed

-data = response.json()
+An image for AMD ROCm 6.3 (`docling-serve-rocm`) is supported but **not published** due to its large size.
+
+To build it locally:
+
+```bash
+git clone --branch main git@github.com:docling-project/docling-serve.git
+cd docling-serve/
+make docling-serve-rocm-image
 ```

-</details>
+For deployment using Docker Compose, see [docs/deployment.md](docs/deployment.md).

-#### File as base64
+Coming soon: `docling-serve-slim` images will reduce the size by skipping the model weights download.

-The `file_sources` argument in the endpoint allows to send files as base64-encoded strings.
-When your PDF or other file type is too large, encoding it and passing it inline to curl
-can lead to an “Argument list too long” error on some systems. To avoid this, we write
-the JSON request body to a file and have curl read from that file.
+### Demonstration UI

-<details>
-<summary>CURL steps:</summary>
+An easy to use UI is available at the `/ui` endpoint.

-```sh
-# 1. Base64-encode the file
-B64_DATA=$(base64 -w 0 /path/to/file/pdf-to-convert.pdf)
+![Input controllers in the UI](img/ui-input.png)

-# 2. Build the JSON with your options
-cat <<EOF > /tmp/request_body.json
-{
-  "options": {
-  },
-  "file_sources": [{
-    "base64_string": "${B64_DATA}",
-    "filename": "pdf-to-convert.pdf"
-  }]
-}
-EOF
-
-# 3. POST the request to the docling service
-curl -X POST "localhost:5001/v1alpha/convert/source" \
-     -H "Content-Type: application/json" \
-     -d @/tmp/request_body.json
-```
-
-</details>
-
-### File endpoint
-
-The endpoint is: `/v1alpha/convert/file`, listening for POST requests of Form payloads (necessary as the files are sent as multipart/form data). You can send one or multiple files.
-
-<details>
-<summary>CURL example:</summary>
-
-```sh
-curl -X 'POST' \
-  'http://127.0.0.1:5001/v1alpha/convert/file' \
-  -H 'accept: application/json' \
-  -H 'Content-Type: multipart/form-data' \
-  -F 'ocr_engine=easyocr' \
-  -F 'pdf_backend=dlparse_v2' \
-  -F 'from_formats=pdf' \
-  -F 'from_formats=docx' \
-  -F 'force_ocr=false' \
-  -F 'image_export_mode=embedded' \
-  -F 'ocr_lang=en' \
-  -F 'ocr_lang=pl' \
-  -F 'table_mode=fast' \
-  -F 'files=@2206.01062v1.pdf;type=application/pdf' \
-  -F 'abort_on_error=false' \
-  -F 'to_formats=md' \
-  -F 'to_formats=text' \
-  -F 'return_as_file=false' \
-  -F 'do_ocr=true'
-```
-
-</details>
-
-<details>
-<summary>Python example:</summary>
-
-```python
-import httpx
-
-async_client = httpx.AsyncClient(timeout=60.0)
-url = "http://localhost:5001/v1alpha/convert/file"
-parameters = {
-"from_formats": ["docx", "pptx", "html", "image", "pdf", "asciidoc", "md", "xlsx"],
-"to_formats": ["md", "json", "html", "text", "doctags"],
-"image_export_mode": "placeholder",
-"do_ocr": True,
-"force_ocr": False,
-"ocr_engine": "easyocr",
-"ocr_lang": ["en"],
-"pdf_backend": "dlparse_v2",
-"table_mode": "fast",
-"abort_on_error": False,
-"return_as_file": False
-}
-
-current_dir = os.path.dirname(__file__)
-file_path = os.path.join(current_dir, '2206.01062v1.pdf')
-
-files = {
-    'files': ('2206.01062v1.pdf', open(file_path, 'rb'), 'application/pdf'),
-}
-
-response = await async_client.post(url, files=files, data={"parameters": json.dumps(parameters)})
-assert response.status_code == 200, "Response should be 200 OK"
-
-data = response.json()
-```
-
-</details>
-
-### Response format
-
-The response can be a JSON Document or a File.
-
- If you process only one file, the response will be a JSON document with the following format:
-
-  ```jsonc
-  {
-    "document": {
-      "md_content": "",
-      "json_content": {},
-      "html_content": "",
-      "text_content": "",
-      "doctags_content": ""
-      },
-    "status": "<success|partial_success|skipped|failure>",
-    "processing_time": 0.0,
-    "timings": {},
-    "errors": []
-  }
-  ```
-
-  Depending on the value you set in `output_formats`, the different items will be populated with their respective results or empty.
-
-  `processing_time` is the Docling processing time in seconds, and `timings` (when enabled in the backend) provides the detailed
-  timing of all the internal Docling components.
-
- If you set the parameter `return_as_file` to True, the response will be a zip file.
- If multiple files are generated (multiple inputs, or one input but multiple outputs with `return_as_file` True), the response will be a zip file.
-
-## Helpers
-
- A full Swagger UI is available at the `/docs` endpoint.
-
-![swagger.png](img/swagger.png)
-
- An easy to use UI is available at the `/ui` endpoint.
-
-![ui-input.png](img/ui-input.png)
-
-![ui-output.png](img/ui-output.png)
-
-## Development
-
-### CPU only
-
-```sh
-# Install poetry if not already available
-curl -sSL https://install.python-poetry.org | python3 -
-
-# Install dependencies
-poetry install --with cpu
-```
-
-### Cuda GPU
-
-For GPU support use the following command:
-
-```sh
-# Install dependencies
-poetry install
-```
-
-### Run the server
-
-The [start_server.sh](./start_server.sh) executable is a convenient script for launching the local webserver.
-
-```sh
-# Run the server
-bash start_server.sh
-
-# Run the server with live reload
-RELOAD=true bash start_server.sh
-```
-
-### Environment variables
-
-The following variables are available:
-
-`TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
-`UVICORN_WORKERS`: Number of workers to use.
-`RELOAD`: If `True`, this will enable auto-reload when you modify files, useful for development.
-`WITH_UI`: If `True`, The Gradio UI will be available at `/ui`.
+![Output visualization in the UI](img/ui-output.png)

 ## Get help and support

-Please feel free to connect with us using the [discussion section](https://github.com/DS4SD/docling/discussions).
+Please feel free to connect with us using the [discussion section](https://github.com/docling-project/docling/discussions).

 ## Contributing

-Please read [Contributing to Docling Serve](https://github.com/DS4SD/docling-serve/blob/main/CONTRIBUTING.md) for details.
+Please read [Contributing to Docling Serve](https://github.com/docling-project/docling-serve/blob/main/CONTRIBUTING.md) for details.

 ## References

@@ -344,14 +102,14 @@ If you use Docling in your projects, please consider citing the following:

 ```bib
@techreport{Docling,
-  author = {Deep Search Team},
-  month = {8},
-  title = {Docling Technical Report},
-  url = {https://arxiv.org/abs/2408.09869},
-  eprint = {2408.09869},
-  doi = {10.48550/arXiv.2408.09869},
-  version = {1.0.0},
-  year = {2024}
+  author = {Docling Contributors},
+  month = {1},
+  title = {Docling: An Efficient Open-Source Toolkit for AI-driven Document Conversion},
+  url = {https://arxiv.org/abs/2501.17887},
+  eprint = {2501.17887},
+  doi = {10.48550/arXiv.2501.17887},
+  version = {2.0.0},
+  year = {2025}
 }
 ```

--- a/docling_serve/main.py
+++ b/docling_serve/main.py
@@ -1,20 +1,402 @@
-import os
+import importlib.metadata
+import logging
+import platform
+import sys
+import warnings
+from pathlib import Path
+from typing import Annotated, Any, Optional, Union

-from docling_serve.app import app
-from docling_serve.helper_functions import _str_to_bool
+import typer
+import uvicorn
+from rich.console import Console

-# Launch the FastAPI server
-if __name__ == "__main__":
-    from uvicorn import run
+from docling_serve.settings import docling_serve_settings, uvicorn_settings
+from docling_serve.storage import get_scratch

-    port = int(os.getenv("PORT", "5001"))
-    workers = int(os.getenv("UVICORN_WORKERS", "1"))
-    reload = _str_to_bool(os.getenv("RELOAD", "False"))
-    run(
-        app,
-        host="0.0.0.0",
-        port=port,
-        workers=workers,
-        timeout_keep_alive=600,
-        reload=reload,
+warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
+warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
+
+
+err_console = Console(stderr=True)
+console = Console()
+
+app = typer.Typer(
+    no_args_is_help=True,
+    rich_markup_mode="rich",
+)
+
+logger = logging.getLogger(__name__)
+
+
+def version_callback(value: bool) -> None:
+    if value:
+        docling_serve_version = importlib.metadata.version("docling_serve")
+        docling_jobkit_version = importlib.metadata.version("docling-jobkit")
+        docling_version = importlib.metadata.version("docling")
+        docling_core_version = importlib.metadata.version("docling-core")
+        docling_ibm_models_version = importlib.metadata.version("docling-ibm-models")
+        docling_parse_version = importlib.metadata.version("docling-parse")
+        platform_str = platform.platform()
+        py_impl_version = sys.implementation.cache_tag
+        py_lang_version = platform.python_version()
+        console.print(f"Docling Serve version: {docling_serve_version}")
+        console.print(f"Docling Jobkit version: {docling_jobkit_version}")
+        console.print(f"Docling version: {docling_version}")
+        console.print(f"Docling Core version: {docling_core_version}")
+        console.print(f"Docling IBM Models version: {docling_ibm_models_version}")
+        console.print(f"Docling Parse version: {docling_parse_version}")
+        console.print(f"Python: {py_impl_version} ({py_lang_version})")
+        console.print(f"Platform: {platform_str}")
+        raise typer.Exit()
+
+
+@app.callback()
+def callback(
+    version: Annotated[
+        Union[bool, None],
+        typer.Option(help="Show the version and exit.", callback=version_callback),
+    ] = None,
+    verbose: Annotated[
+        int,
+        typer.Option(
+            "--verbose",
+            "-v",
+            count=True,
+            help="Set the verbosity level. -v for info logging, -vv for debug logging.",
+        ),
+    ] = 0,
+) -> None:
+    if verbose == 0:
+        logging.basicConfig(level=logging.WARNING)
+    elif verbose == 1:
+        logging.basicConfig(level=logging.INFO)
+    elif verbose == 2:
+        logging.basicConfig(level=logging.DEBUG)
+
+
+def _run(
+    *,
+    command: str,
+    # Docling serve parameters
+    artifacts_path: Path | None,
+    enable_ui: bool,
+) -> None:
+    server_type = "development" if command == "dev" else "production"
+
+    console.print(f"Starting {server_type} server 🚀")
+
+    run_subprocess = (
+        uvicorn_settings.workers is not None and uvicorn_settings.workers > 1
+    ) or uvicorn_settings.reload
+
+    run_ssl = (
+        uvicorn_settings.ssl_certfile is not None
+        and uvicorn_settings.ssl_keyfile is not None
    )
+
+    if run_subprocess and docling_serve_settings.artifacts_path != artifacts_path:
+        err_console.print(
+            "\n[yellow]:warning: The server will run with reload or multiple workers. \n"
+            "The argument [bold]--artifacts-path[/bold] will be ignored, please set the value \n"
+            "using the environment variable [bold]DOCLING_SERVE_ARTIFACTS_PATH[/bold].[/yellow]"
+        )
+
+    if run_subprocess and docling_serve_settings.enable_ui != enable_ui:
+        err_console.print(
+            "\n[yellow]:warning: The server will run with reload or multiple workers. \n"
+            "The argument [bold]--enable-ui[/bold] will be ignored, please set the value \n"
+            "using the environment variable [bold]DOCLING_SERVE_ENABLE_UI[/bold].[/yellow]"
+        )
+
+    # Propagate the settings to the app settings
+    docling_serve_settings.artifacts_path = artifacts_path
+    docling_serve_settings.enable_ui = enable_ui
+
+    # Print documentation
+    protocol = "https" if run_ssl else "http"
+    url = f"{protocol}://{uvicorn_settings.host}:{uvicorn_settings.port}"
+    url_docs = f"{url}/docs"
+    url_scalar = f"{url}/scalar"
+    url_ui = f"{url}/ui"
+
+    console.print("")
+    console.print(f"Server started at [link={url}]{url}[/]")
+    console.print(f"Documentation at [link={url_docs}]{url_docs}[/]")
+    console.print(f"Scalar docs at [link={url_docs}]{url_scalar}[/]")
+    if docling_serve_settings.enable_ui:
+        console.print(f"UI at [link={url_ui}]{url_ui}[/]")
+
+    if command == "dev":
+        console.print("")
+        console.print(
+            "Running in development mode, for production use: "
+            "[bold]docling-serve run[/]",
+        )
+
+    console.print("")
+    console.print("Logs:")
+
+    # Launch the server
+    uvicorn.run(
+        app="docling_serve.app:create_app",
+        factory=True,
+        host=uvicorn_settings.host,
+        port=uvicorn_settings.port,
+        reload=uvicorn_settings.reload,
+        workers=uvicorn_settings.workers,
+        root_path=uvicorn_settings.root_path,
+        proxy_headers=uvicorn_settings.proxy_headers,
+        timeout_keep_alive=uvicorn_settings.timeout_keep_alive,
+        ssl_certfile=uvicorn_settings.ssl_certfile,
+        ssl_keyfile=uvicorn_settings.ssl_keyfile,
+        ssl_keyfile_password=uvicorn_settings.ssl_keyfile_password,
+    )
+
+
+@app.command()
+def dev(
+    *,
+    # uvicorn options
+    host: Annotated[
+        str,
+        typer.Option(
+            help=(
+                "The host to serve on. For local development in localhost "
+                "use [blue]127.0.0.1[/blue]. To enable public access, "
+                "e.g. in a container, use all the IP addresses "
+                "available with [blue]0.0.0.0[/blue]."
+            )
+        ),
+    ] = "127.0.0.1",
+    port: Annotated[
+        int,
+        typer.Option(help="The port to serve on."),
+    ] = uvicorn_settings.port,
+    reload: Annotated[
+        bool,
+        typer.Option(
+            help=(
+                "Enable auto-reload of the server when (code) files change. "
+                "This is [bold]resource intensive[/bold], "
+                "use it only during development."
+            )
+        ),
+    ] = True,
+    root_path: Annotated[
+        str,
+        typer.Option(
+            help=(
+                "The root path is used to tell your app that it is being served "
+                "to the outside world with some [bold]path prefix[/bold] "
+                "set up in some termination proxy or similar."
+            )
+        ),
+    ] = uvicorn_settings.root_path,
+    proxy_headers: Annotated[
+        bool,
+        typer.Option(
+            help=(
+                "Enable/Disable X-Forwarded-Proto, X-Forwarded-For, "
+                "X-Forwarded-Port to populate remote address info."
+            )
+        ),
+    ] = uvicorn_settings.proxy_headers,
+    timeout_keep_alive: Annotated[
+        int, typer.Option(help="Timeout for the server response.")
+    ] = uvicorn_settings.timeout_keep_alive,
+    ssl_certfile: Annotated[
+        Optional[Path], typer.Option(help="SSL certificate file")
+    ] = uvicorn_settings.ssl_certfile,
+    ssl_keyfile: Annotated[
+        Optional[Path], typer.Option(help="SSL key file")
+    ] = uvicorn_settings.ssl_keyfile,
+    ssl_keyfile_password: Annotated[
+        Optional[str], typer.Option(help="SSL keyfile password")
+    ] = uvicorn_settings.ssl_keyfile_password,
+    # docling options
+    artifacts_path: Annotated[
+        Optional[Path],
+        typer.Option(
+            help=(
+                "If set to a valid directory, "
+                "the model weights will be loaded from this path."
+            )
+        ),
+    ] = docling_serve_settings.artifacts_path,
+    enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
+) -> Any:
+    """
+    Run a [bold]Docling Serve[/bold] app in [yellow]development[/yellow] mode. 🧪
+
+    This is equivalent to [bold]docling-serve run[/bold] but with [bold]reload[/bold]
+    enabled and listening on the [blue]127.0.0.1[/blue] address.
+
+    Options can be set also with the corresponding ENV variable, with the exception
+    of --enable-ui, --host and --reload.
+    """
+
+    uvicorn_settings.host = host
+    uvicorn_settings.port = port
+    uvicorn_settings.reload = reload
+    uvicorn_settings.root_path = root_path
+    uvicorn_settings.proxy_headers = proxy_headers
+    uvicorn_settings.timeout_keep_alive = timeout_keep_alive
+    uvicorn_settings.ssl_certfile = ssl_certfile
+    uvicorn_settings.ssl_keyfile = ssl_keyfile
+    uvicorn_settings.ssl_keyfile_password = ssl_keyfile_password
+
+    _run(
+        command="dev",
+        artifacts_path=artifacts_path,
+        enable_ui=enable_ui,
+    )
+
+
+@app.command()
+def run(
+    *,
+    host: Annotated[
+        str,
+        typer.Option(
+            help=(
+                "The host to serve on. For local development in localhost "
+                "use [blue]127.0.0.1[/blue]. To enable public access, "
+                "e.g. in a container, use all the IP addresses "
+                "available with [blue]0.0.0.0[/blue]."
+            )
+        ),
+    ] = uvicorn_settings.host,
+    port: Annotated[
+        int,
+        typer.Option(help="The port to serve on."),
+    ] = uvicorn_settings.port,
+    reload: Annotated[
+        bool,
+        typer.Option(
+            help=(
+                "Enable auto-reload of the server when (code) files change. "
+                "This is [bold]resource intensive[/bold], "
+                "use it only during development."
+            )
+        ),
+    ] = uvicorn_settings.reload,
+    workers: Annotated[
+        Union[int, None],
+        typer.Option(
+            help=(
+                "Use multiple worker processes. "
+                "Mutually exclusive with the --reload flag."
+            )
+        ),
+    ] = uvicorn_settings.workers,
+    root_path: Annotated[
+        str,
+        typer.Option(
+            help=(
+                "The root path is used to tell your app that it is being served "
+                "to the outside world with some [bold]path prefix[/bold] "
+                "set up in some termination proxy or similar."
+            )
+        ),
+    ] = uvicorn_settings.root_path,
+    proxy_headers: Annotated[
+        bool,
+        typer.Option(
+            help=(
+                "Enable/Disable X-Forwarded-Proto, X-Forwarded-For, "
+                "X-Forwarded-Port to populate remote address info."
+            )
+        ),
+    ] = uvicorn_settings.proxy_headers,
+    timeout_keep_alive: Annotated[
+        int, typer.Option(help="Timeout for the server response.")
+    ] = uvicorn_settings.timeout_keep_alive,
+    ssl_certfile: Annotated[
+        Optional[Path], typer.Option(help="SSL certificate file")
+    ] = uvicorn_settings.ssl_certfile,
+    ssl_keyfile: Annotated[
+        Optional[Path], typer.Option(help="SSL key file")
+    ] = uvicorn_settings.ssl_keyfile,
+    ssl_keyfile_password: Annotated[
+        Optional[str], typer.Option(help="SSL keyfile password")
+    ] = uvicorn_settings.ssl_keyfile_password,
+    # docling options
+    artifacts_path: Annotated[
+        Optional[Path],
+        typer.Option(
+            help=(
+                "If set to a valid directory, "
+                "the model weights will be loaded from this path."
+            )
+        ),
+    ] = docling_serve_settings.artifacts_path,
+    enable_ui: Annotated[
+        bool, typer.Option(help="Enable the development UI.")
+    ] = docling_serve_settings.enable_ui,
+) -> Any:
+    """
+    Run a [bold]Docling Serve[/bold] app in [green]production[/green] mode. 🚀
+
+    This is equivalent to [bold]docling-serve dev[/bold] but with [bold]reload[/bold]
+    disabled and listening on the [blue]0.0.0.0[/blue] address.
+
+    Options can be set also with the corresponding ENV variable, e.g. UVICORN_PORT
+    or DOCLING_SERVE_ENABLE_UI.
+    """
+
+    uvicorn_settings.host = host
+    uvicorn_settings.port = port
+    uvicorn_settings.reload = reload
+    uvicorn_settings.workers = workers
+    uvicorn_settings.root_path = root_path
+    uvicorn_settings.proxy_headers = proxy_headers
+    uvicorn_settings.timeout_keep_alive = timeout_keep_alive
+    uvicorn_settings.ssl_certfile = ssl_certfile
+    uvicorn_settings.ssl_keyfile = ssl_keyfile
+    uvicorn_settings.ssl_keyfile_password = ssl_keyfile_password
+
+    _run(
+        command="run",
+        artifacts_path=artifacts_path,
+        enable_ui=enable_ui,
+    )
+
+
+@app.command()
+def rq_worker() -> Any:
+    """
+    Run the [bold]Docling JobKit[/bold] RQ worker.
+    """
+    from docling_jobkit.convert.manager import DoclingConverterManagerConfig
+    from docling_jobkit.orchestrators.rq.orchestrator import RQOrchestratorConfig
+    from docling_jobkit.orchestrators.rq.worker import run_worker
+
+    rq_config = RQOrchestratorConfig(
+        redis_url=docling_serve_settings.eng_rq_redis_url,
+        results_prefix=docling_serve_settings.eng_rq_results_prefix,
+        sub_channel=docling_serve_settings.eng_rq_sub_channel,
+        scratch_dir=get_scratch(),
+    )
+
+    cm_config = DoclingConverterManagerConfig(
+        artifacts_path=docling_serve_settings.artifacts_path,
+        options_cache_size=docling_serve_settings.options_cache_size,
+        enable_remote_services=docling_serve_settings.enable_remote_services,
+        allow_external_plugins=docling_serve_settings.allow_external_plugins,
+        max_num_pages=docling_serve_settings.max_num_pages,
+        max_file_size=docling_serve_settings.max_file_size,
+    )
+
+    run_worker(
+        rq_config=rq_config,
+        cm_config=cm_config,
+    )
+
+
+def main() -> None:
+    app()
+
+
+# Launch the CLI when calling python -m docling_serve
+if __name__ == "__main__":
+    main()
--- a/docling_serve/app.py
+++ b/docling_serve/app.py
@@ -1,38 +1,78 @@
+import asyncio
+import copy
+import importlib.metadata
 import logging
-import os
-import tempfile
+import shutil
+import time
 from contextlib import asynccontextmanager
 from io import BytesIO
-from pathlib import Path
-from typing import Annotated, Any, Dict, List, Optional, Union
+from typing import Annotated

-from docling.datamodel.base_models import DocumentStream, InputFormat
-from docling.document_converter import DocumentConverter
-from dotenv import load_dotenv
-from fastapi import BackgroundTasks, FastAPI, UploadFile
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import RedirectResponse
-from pydantic import BaseModel
-
-from docling_serve.docling_conversion import (
-    ConvertDocumentFileSourcesRequest,
-    ConvertDocumentsOptions,
-    ConvertDocumentsRequest,
-    convert_documents,
-    converters,
-    get_pdf_pipeline_opts,
+from fastapi import (
+    BackgroundTasks,
+    Depends,
+    FastAPI,
+    Form,
+    HTTPException,
+    Query,
+    UploadFile,
+    WebSocket,
+    WebSocketDisconnect,
+    status,
 )
-from docling_serve.helper_functions import FormDepends, _str_to_bool
-from docling_serve.response_preparation import ConvertDocumentResponse, process_results
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.openapi.docs import (
+    get_redoc_html,
+    get_swagger_ui_html,
+    get_swagger_ui_oauth2_redirect_html,
+)
+from fastapi.responses import JSONResponse, RedirectResponse
+from fastapi.staticfiles import StaticFiles
+from scalar_fastapi import get_scalar_api_reference

-# Load local env vars if present
-load_dotenv()
+from docling.datamodel.base_models import DocumentStream
+from docling_jobkit.datamodel.callback import (
+    ProgressCallbackRequest,
+    ProgressCallbackResponse,
+)
+from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
+from docling_jobkit.datamodel.s3_coords import S3Coordinates
+from docling_jobkit.datamodel.task import Task, TaskSource
+from docling_jobkit.datamodel.task_targets import (
+    InBodyTarget,
+    TaskTarget,
+    ZipTarget,
+)
+from docling_jobkit.orchestrators.base_orchestrator import (
+    BaseOrchestrator,
+    ProgressInvalid,
+    TaskNotFoundError,
+)

-WITH_UI = _str_to_bool(os.getenv("WITH_UI", "False"))
-if WITH_UI:
-    import gradio as gr
-
-    from docling_serve.gradio_ui import ui as gradio_ui
+from docling_serve.auth import APIKeyAuth, AuthenticationResult
+from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
+from docling_serve.datamodel.requests import (
+    ConvertDocumentsRequest,
+    FileSourceRequest,
+    HttpSourceRequest,
+    S3SourceRequest,
+    TargetName,
+)
+from docling_serve.datamodel.responses import (
+    ClearResponse,
+    ConvertDocumentResponse,
+    HealthCheckResponse,
+    MessageKind,
+    PresignedUrlConvertDocumentResponse,
+    TaskStatusResponse,
+    WebsocketMessage,
+)
+from docling_serve.helper_functions import FormDepends
+from docling_serve.orchestrator_factory import get_async_orchestrator
+from docling_serve.response_preparation import prepare_response
+from docling_serve.settings import docling_serve_settings
+from docling_serve.storage import get_scratch
+from docling_serve.websocket_notifier import WebsocketNotifier


 # Set up custom logging as we'll be intermixes with FastAPI/Uvicorn's logging
@@ -70,155 +110,608 @@ _log = logging.getLogger(__name__)
 # Context manager to initialize and clean up the lifespan of the FastAPI app
@asynccontextmanager
 async def lifespan(app: FastAPI):
-    # settings = Settings()
+    scratch_dir = get_scratch()

-    # Converter with default options
-    pdf_format_option, options_hash = get_pdf_pipeline_opts(ConvertDocumentsOptions())
-    converters[options_hash] = DocumentConverter(
-        format_options={
-            InputFormat.PDF: pdf_format_option,
-            InputFormat.IMAGE: pdf_format_option,
-        }
-    )
+    orchestrator = get_async_orchestrator()
+    notifier = WebsocketNotifier(orchestrator)
+    orchestrator.bind_notifier(notifier)

-    converters[options_hash].initialize_pipeline(InputFormat.PDF)
+    # Warm up processing cache
+    if docling_serve_settings.load_models_at_boot:
+        await orchestrator.warm_up_caches()
+
+    # Start the background queue processor
+    queue_task = asyncio.create_task(orchestrator.process_queue())

    yield

-    converters.clear()
-    if WITH_UI:
-        gradio_ui.close()
+    # Cancel the background queue processor on shutdown
+    queue_task.cancel()
+    try:
+        await queue_task
+    except asyncio.CancelledError:
+        _log.info("Queue processor cancelled.")
+
+    # Remove scratch directory in case it was a tempfile
+    if docling_serve_settings.scratch_path is not None:
+        shutil.rmtree(scratch_dir, ignore_errors=True)


 ##################################
 # App creation and configuration #
 ##################################

-app = FastAPI(
-    title="Docling Serve",
-    lifespan=lifespan,
-)

-origins = ["*"]
-methods = ["*"]
-headers = ["*"]
+def create_app():  # noqa: C901
+    try:
+        version = importlib.metadata.version("docling_serve")
+    except importlib.metadata.PackageNotFoundError:
+        _log.warning("Unable to get docling_serve version, falling back to 0.0.0")

-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=methods,
-    allow_headers=headers,
-)
+        version = "0.0.0"

-# Mount the Gradio app
-if WITH_UI:
-    tmp_output_dir = Path(tempfile.mkdtemp())
-    gradio_ui.gradio_output_dir = tmp_output_dir
-    app = gr.mount_gradio_app(
-        app, gradio_ui, path="/ui", allowed_paths=["./logo.png", tmp_output_dir]
+    offline_docs_assets = False
+    if (
+        docling_serve_settings.static_path is not None
+        and (docling_serve_settings.static_path).is_dir()
+    ):
+        offline_docs_assets = True
+        _log.info("Found static assets.")
+
+    require_auth = APIKeyAuth(docling_serve_settings.api_key)
+    app = FastAPI(
+        title="Docling Serve",
+        docs_url=None if offline_docs_assets else "/swagger",
+        redoc_url=None if offline_docs_assets else "/docs",
+        lifespan=lifespan,
+        version=version,
    )

+    origins = docling_serve_settings.cors_origins
+    methods = docling_serve_settings.cors_methods
+    headers = docling_serve_settings.cors_headers

-#############################
-# API Endpoints definitions #
-#############################
-
-
-# Favicon
-@app.get("/favicon.ico", include_in_schema=False)
-async def favicon():
-    response = RedirectResponse(url="https://ds4sd.github.io/docling/assets/logo.png")
-    return response
-
-
-# Status
-class HealthCheckResponse(BaseModel):
-    status: str = "ok"
-
-
-@app.get("/health")
-def health() -> HealthCheckResponse:
-    return HealthCheckResponse()
-
-
-# API readiness compatibility for OpenShift AI Workbench
-@app.get("/api", include_in_schema=False)
-def api_check() -> HealthCheckResponse:
-    return HealthCheckResponse()
-
-
-# Convert a document from URL(s)
-@app.post(
-    "/v1alpha/convert/source",
-    response_model=ConvertDocumentResponse,
-    responses={
-        200: {
-            "content": {"application/zip": {}},
-            # "description": "Return the JSON item or an image.",
-        }
-    },
-)
-def process_url(
-    background_tasks: BackgroundTasks, conversion_request: ConvertDocumentsRequest
-):
-    sources: List[Union[str, DocumentStream]] = []
-    headers: Optional[Dict[str, Any]] = None
-    if isinstance(conversion_request, ConvertDocumentFileSourcesRequest):
-        for file_source in conversion_request.file_sources:
-            sources.append(file_source.to_document_stream())
-    else:
-        for http_source in conversion_request.http_sources:
-            sources.append(http_source.url)
-            if headers is None and http_source.headers:
-                headers = http_source.headers
-
-    # Note: results are only an iterator->lazy evaluation
-    results = convert_documents(
-        sources=sources, options=conversion_request.options, headers=headers
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=origins,
+        allow_credentials=True,
+        allow_methods=methods,
+        allow_headers=headers,
    )

-    # The real processing will happen here
-    response = process_results(
-        background_tasks=background_tasks,
-        conversion_options=conversion_request.options,
-        conv_results=results,
+    # Mount the Gradio app
+    if docling_serve_settings.enable_ui:
+        try:
+            import gradio as gr
+
+            from docling_serve.gradio_ui import ui as gradio_ui
+
+            tmp_output_dir = get_scratch() / "gradio"
+            tmp_output_dir.mkdir(exist_ok=True, parents=True)
+            gradio_ui.gradio_output_dir = tmp_output_dir
+            app = gr.mount_gradio_app(
+                app,
+                gradio_ui,
+                path="/ui",
+                allowed_paths=["./logo.png", tmp_output_dir],
+                root_path="/ui",
+            )
+        except ImportError:
+            _log.warning(
+                "Docling Serve enable_ui is activated, but gradio is not installed. "
+                "Install it with `pip install docling-serve[ui]` "
+                "or `pip install gradio`"
+            )
+
+    #############################
+    # Offline assets definition #
+    #############################
+    if offline_docs_assets:
+        app.mount(
+            "/static",
+            StaticFiles(directory=docling_serve_settings.static_path),
+            name="static",
+        )
+
+        @app.get("/swagger", include_in_schema=False)
+        async def custom_swagger_ui_html():
+            return get_swagger_ui_html(
+                openapi_url=app.openapi_url,
+                title=app.title + " - Swagger UI",
+                oauth2_redirect_url=app.swagger_ui_oauth2_redirect_url,
+                swagger_js_url="/static/swagger-ui-bundle.js",
+                swagger_css_url="/static/swagger-ui.css",
+            )
+
+        @app.get(app.swagger_ui_oauth2_redirect_url, include_in_schema=False)
+        async def swagger_ui_redirect():
+            return get_swagger_ui_oauth2_redirect_html()
+
+        @app.get("/docs", include_in_schema=False)
+        async def redoc_html():
+            return get_redoc_html(
+                openapi_url=app.openapi_url,
+                title=app.title + " - ReDoc",
+                redoc_js_url="/static/redoc.standalone.js",
+            )
+
+    @app.get("/scalar", include_in_schema=False)
+    async def scalar_html():
+        return get_scalar_api_reference(
+            openapi_url=app.openapi_url,
+            title=app.title,
+            scalar_favicon_url="https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg",
+            # hide_client_button=True,  # not yet released but in main
+        )
+
+    ########################
+    # Async / Sync helpers #
+    ########################
+
+    async def _enque_source(
+        orchestrator: BaseOrchestrator, conversion_request: ConvertDocumentsRequest
+    ) -> Task:
+        sources: list[TaskSource] = []
+        for s in conversion_request.sources:
+            if isinstance(s, FileSourceRequest):
+                sources.append(FileSource.model_validate(s))
+            elif isinstance(s, HttpSourceRequest):
+                sources.append(HttpSource.model_validate(s))
+            elif isinstance(s, S3SourceRequest):
+                sources.append(S3Coordinates.model_validate(s))
+
+        task = await orchestrator.enqueue(
+            sources=sources,
+            options=conversion_request.options,
+            target=conversion_request.target,
+        )
+        return task
+
+    async def _enque_file(
+        orchestrator: BaseOrchestrator,
+        files: list[UploadFile],
+        options: ConvertDocumentsRequestOptions,
+        target: TaskTarget,
+    ) -> Task:
+        _log.info(f"Received {len(files)} files for processing.")
+
+        # Load the uploaded files to Docling DocumentStream
+        file_sources: list[TaskSource] = []
+        for i, file in enumerate(files):
+            buf = BytesIO(file.file.read())
+            suffix = "" if len(file_sources) == 1 else f"_{i}"
+            name = file.filename if file.filename else f"file{suffix}.pdf"
+            file_sources.append(DocumentStream(name=name, stream=buf))
+
+        task = await orchestrator.enqueue(
+            sources=file_sources, options=options, target=target
+        )
+        return task
+
+    async def _wait_task_complete(orchestrator: BaseOrchestrator, task_id: str) -> bool:
+        start_time = time.monotonic()
+        while True:
+            task = await orchestrator.task_status(task_id=task_id)
+            if task.is_completed():
+                return True
+            await asyncio.sleep(5)
+            elapsed_time = time.monotonic() - start_time
+            if elapsed_time > docling_serve_settings.max_sync_wait:
+                return False
+
+    ##########################################
+    # Downgrade openapi 3.1 to 3.0.x helpers #
+    ##########################################
+
+    def ensure_array_items(schema):
+        """Ensure that array items are defined."""
+        if "type" in schema and schema["type"] == "array":
+            if "items" not in schema or schema["items"] is None:
+                schema["items"] = {"type": "string"}
+            elif isinstance(schema["items"], dict):
+                if "type" not in schema["items"]:
+                    schema["items"]["type"] = "string"
+
+    def handle_discriminators(schema):
+        """Ensure that discriminator properties are included in required."""
+        if "discriminator" in schema and "propertyName" in schema["discriminator"]:
+            prop = schema["discriminator"]["propertyName"]
+            if "properties" in schema and prop in schema["properties"]:
+                if "required" not in schema:
+                    schema["required"] = []
+                if prop not in schema["required"]:
+                    schema["required"].append(prop)
+
+    def handle_properties(schema):
+        """Ensure that property 'kind' is included in required."""
+        if "properties" in schema and "kind" in schema["properties"]:
+            if "required" not in schema:
+                schema["required"] = []
+            if "kind" not in schema["required"]:
+                schema["required"].append("kind")
+
+    # Downgrade openapi 3.1 to 3.0.x
+    def downgrade_openapi31_to_30(spec):
+        def strip_unsupported(obj):
+            if isinstance(obj, dict):
+                obj = {
+                    k: strip_unsupported(v)
+                    for k, v in obj.items()
+                    if k not in ("const", "examples", "prefixItems")
+                }
+
+                handle_discriminators(obj)
+                ensure_array_items(obj)
+
+                # Check for oneOf and anyOf to handle nested schemas
+                for key in ["oneOf", "anyOf"]:
+                    if key in obj:
+                        for sub in obj[key]:
+                            handle_discriminators(sub)
+                            ensure_array_items(sub)
+
+                return obj
+            elif isinstance(obj, list):
+                return [strip_unsupported(i) for i in obj]
+            return obj
+
+        if "components" in spec and "schemas" in spec["components"]:
+            for schema_name, schema in spec["components"]["schemas"].items():
+                handle_properties(schema)
+
+        return strip_unsupported(copy.deepcopy(spec))
+
+    #############################
+    # API Endpoints definitions #
+    #############################
+
+    @app.get("/openapi-3.0.json")
+    def openapi_30():
+        spec = app.openapi()
+        downgraded = downgrade_openapi31_to_30(spec)
+        downgraded["openapi"] = "3.0.3"
+        return JSONResponse(downgraded)
+
+    # Favicon
+    @app.get("/favicon.ico", include_in_schema=False)
+    async def favicon():
+        logo_url = "https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg"
+        if offline_docs_assets:
+            logo_url = "/static/logo.svg"
+        response = RedirectResponse(url=logo_url)
+        return response
+
+    @app.get("/health")
+    def health() -> HealthCheckResponse:
+        return HealthCheckResponse()
+
+    # API readiness compatibility for OpenShift AI Workbench
+    @app.get("/api", include_in_schema=False)
+    def api_check() -> HealthCheckResponse:
+        return HealthCheckResponse()
+
+    # Convert a document from URL(s)
+    @app.post(
+        "/v1/convert/source",
+        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
+        responses={
+            200: {
+                "content": {"application/zip": {}},
+                # "description": "Return the JSON item or an image.",
+            }
+        },
    )
+    async def process_url(
+        background_tasks: BackgroundTasks,
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        conversion_request: ConvertDocumentsRequest,
+    ):
+        task = await _enque_source(
+            orchestrator=orchestrator, conversion_request=conversion_request
+        )
+        completed = await _wait_task_complete(
+            orchestrator=orchestrator, task_id=task.task_id
+        )

-    return response
+        if not completed:
+            # TODO: abort task!
+            return HTTPException(
+                status_code=504,
+                detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
+            )

+        task_result = await orchestrator.task_result(task_id=task.task_id)
+        if task_result is None:
+            raise HTTPException(
+                status_code=404,
+                detail="Task result not found. Please wait for a completion status.",
+            )
+        response = await prepare_response(
+            task_id=task.task_id,
+            task_result=task_result,
+            orchestrator=orchestrator,
+            background_tasks=background_tasks,
+        )
+        return response

-# Convert a document from file(s)
-@app.post(
-    "/v1alpha/convert/file",
-    response_model=ConvertDocumentResponse,
-    responses={
-        200: {
-            "content": {"application/zip": {}},
-        }
-    },
-)
-async def process_file(
-    background_tasks: BackgroundTasks,
-    files: List[UploadFile],
-    options: Annotated[ConvertDocumentsOptions, FormDepends(ConvertDocumentsOptions)],
-):
-
-    _log.info(f"Received {len(files)} files for processing.")
-
-    # Load the uploaded files to Docling DocumentStream
-    file_sources = []
-    for file in files:
-        buf = BytesIO(file.file.read())
-        name = file.filename if file.filename else "file.pdf"
-        file_sources.append(DocumentStream(name=name, stream=buf))
-
-    results = convert_documents(sources=file_sources, options=options)
-
-    response = process_results(
-        background_tasks=background_tasks,
-        conversion_options=options,
-        conv_results=results,
+    # Convert a document from file(s)
+    @app.post(
+        "/v1/convert/file",
+        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
+        responses={
+            200: {
+                "content": {"application/zip": {}},
+            }
+        },
    )
+    async def process_file(
+        background_tasks: BackgroundTasks,
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        files: list[UploadFile],
+        options: Annotated[
+            ConvertDocumentsRequestOptions, FormDepends(ConvertDocumentsRequestOptions)
+        ],
+        target_type: Annotated[TargetName, Form()] = TargetName.INBODY,
+    ):
+        target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
+        task = await _enque_file(
+            orchestrator=orchestrator, files=files, options=options, target=target
+        )
+        completed = await _wait_task_complete(
+            orchestrator=orchestrator, task_id=task.task_id
+        )

-    return response
+        if not completed:
+            # TODO: abort task!
+            return HTTPException(
+                status_code=504,
+                detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
+            )
+
+        task_result = await orchestrator.task_result(task_id=task.task_id)
+        if task_result is None:
+            raise HTTPException(
+                status_code=404,
+                detail="Task result not found. Please wait for a completion status.",
+            )
+        response = await prepare_response(
+            task_id=task.task_id,
+            task_result=task_result,
+            orchestrator=orchestrator,
+            background_tasks=background_tasks,
+        )
+        return response
+
+    # Convert a document from URL(s) using the async api
+    @app.post(
+        "/v1/convert/source/async",
+        response_model=TaskStatusResponse,
+    )
+    async def process_url_async(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        conversion_request: ConvertDocumentsRequest,
+    ):
+        task = await _enque_source(
+            orchestrator=orchestrator, conversion_request=conversion_request
+        )
+        task_queue_position = await orchestrator.get_queue_position(
+            task_id=task.task_id
+        )
+        return TaskStatusResponse(
+            task_id=task.task_id,
+            task_status=task.task_status,
+            task_position=task_queue_position,
+            task_meta=task.processing_meta,
+        )
+
+    # Convert a document from file(s) using the async api
+    @app.post(
+        "/v1/convert/file/async",
+        response_model=TaskStatusResponse,
+    )
+    async def process_file_async(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        background_tasks: BackgroundTasks,
+        files: list[UploadFile],
+        options: Annotated[
+            ConvertDocumentsRequestOptions, FormDepends(ConvertDocumentsRequestOptions)
+        ],
+        target_type: Annotated[TargetName, Form()] = TargetName.INBODY,
+    ):
+        target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
+        task = await _enque_file(
+            orchestrator=orchestrator, files=files, options=options, target=target
+        )
+        task_queue_position = await orchestrator.get_queue_position(
+            task_id=task.task_id
+        )
+        return TaskStatusResponse(
+            task_id=task.task_id,
+            task_status=task.task_status,
+            task_position=task_queue_position,
+            task_meta=task.processing_meta,
+        )
+
+    # Task status poll
+    @app.get(
+        "/v1/status/poll/{task_id}",
+        response_model=TaskStatusResponse,
+    )
+    async def task_status_poll(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        task_id: str,
+        wait: Annotated[
+            float,
+            Query(description="Number of seconds to wait for a completed status."),
+        ] = 0.0,
+    ):
+        try:
+            task = await orchestrator.task_status(task_id=task_id, wait=wait)
+            task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
+        except TaskNotFoundError:
+            raise HTTPException(status_code=404, detail="Task not found.")
+        return TaskStatusResponse(
+            task_id=task.task_id,
+            task_status=task.task_status,
+            task_position=task_queue_position,
+            task_meta=task.processing_meta,
+        )
+
+    # Task status websocket
+    @app.websocket(
+        "/v1/status/ws/{task_id}",
+    )
+    async def task_status_ws(
+        websocket: WebSocket,
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        task_id: str,
+        api_key: Annotated[str, Query()] = "",
+    ):
+        if docling_serve_settings.api_key:
+            if api_key != docling_serve_settings.api_key:
+                raise HTTPException(
+                    status_code=status.HTTP_401_UNAUTHORIZED,
+                    detail="Api key is required as ?api_key=SECRET.",
+                )
+
+        assert isinstance(orchestrator.notifier, WebsocketNotifier)
+        await websocket.accept()
+
+        if task_id not in orchestrator.tasks:
+            await websocket.send_text(
+                WebsocketMessage(
+                    message=MessageKind.ERROR, error="Task not found."
+                ).model_dump_json()
+            )
+            await websocket.close()
+            return
+
+        task = orchestrator.tasks[task_id]
+
+        # Track active WebSocket connections for this job
+        orchestrator.notifier.task_subscribers[task_id].add(websocket)
+
+        try:
+            task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
+            task_response = TaskStatusResponse(
+                task_id=task.task_id,
+                task_status=task.task_status,
+                task_position=task_queue_position,
+                task_meta=task.processing_meta,
+            )
+            await websocket.send_text(
+                WebsocketMessage(
+                    message=MessageKind.CONNECTION, task=task_response
+                ).model_dump_json()
+            )
+            while True:
+                task_queue_position = await orchestrator.get_queue_position(
+                    task_id=task_id
+                )
+                task_response = TaskStatusResponse(
+                    task_id=task.task_id,
+                    task_status=task.task_status,
+                    task_position=task_queue_position,
+                    task_meta=task.processing_meta,
+                )
+                await websocket.send_text(
+                    WebsocketMessage(
+                        message=MessageKind.UPDATE, task=task_response
+                    ).model_dump_json()
+                )
+                # each client message will be interpreted as a request for update
+                msg = await websocket.receive_text()
+                _log.debug(f"Received message: {msg}")
+
+        except WebSocketDisconnect:
+            _log.info(f"WebSocket disconnected for job {task_id}")
+
+        finally:
+            orchestrator.notifier.task_subscribers[task_id].remove(websocket)
+
+    # Task result
+    @app.get(
+        "/v1/result/{task_id}",
+        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
+        responses={
+            200: {
+                "content": {"application/zip": {}},
+            }
+        },
+    )
+    async def task_result(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        background_tasks: BackgroundTasks,
+        task_id: str,
+    ):
+        try:
+            task_result = await orchestrator.task_result(task_id=task_id)
+            if task_result is None:
+                raise HTTPException(
+                    status_code=404,
+                    detail="Task result not found. Please wait for a completion status.",
+                )
+            response = await prepare_response(
+                task_id=task_id,
+                task_result=task_result,
+                orchestrator=orchestrator,
+                background_tasks=background_tasks,
+            )
+            return response
+        except TaskNotFoundError:
+            raise HTTPException(status_code=404, detail="Task not found.")
+
+    # Update task progress
+    @app.post(
+        "/v1/callback/task/progress",
+        response_model=ProgressCallbackResponse,
+    )
+    async def callback_task_progress(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        request: ProgressCallbackRequest,
+    ):
+        try:
+            await orchestrator.receive_task_progress(request=request)
+            return ProgressCallbackResponse(status="ack")
+        except TaskNotFoundError:
+            raise HTTPException(status_code=404, detail="Task not found.")
+        except ProgressInvalid as err:
+            raise HTTPException(
+                status_code=400, detail=f"Invalid progress payload: {err}"
+            )
+
+    #### Clear requests
+
+    # Offload models
+    @app.get(
+        "/v1/clear/converters",
+        response_model=ClearResponse,
+    )
+    async def clear_converters(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+    ):
+        await orchestrator.clear_converters()
+        return ClearResponse()
+
+    # Clean results
+    @app.get(
+        "/v1/clear/results",
+        response_model=ClearResponse,
+    )
+    async def clear_results(
+        auth: Annotated[AuthenticationResult, Depends(require_auth)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        older_then: float = 3600,
+    ):
+        await orchestrator.clear_results(older_than=older_then)
+        return ClearResponse()
+
+    return app
--- a/docling_serve/auth.py
+++ b/docling_serve/auth.py
@@ -0,0 +1,56 @@
+from typing import Any
+
+from fastapi import HTTPException, Request, status
+from fastapi.security import APIKeyHeader
+from pydantic import BaseModel
+
+
+class AuthenticationResult(BaseModel):
+    valid: bool
+    errors: list[str] = []
+    detail: Any | None = None
+
+
+class APIKeyAuth(APIKeyHeader):
+    """
+    FastAPI dependency which evaluates a status API Key.
+    """
+
+    def __init__(
+        self,
+        api_key: str,
+        header_name: str = "X-Api-Key",
+        fail_on_unauthorized: bool = True,
+    ) -> None:
+        self.api_key = api_key
+        self.header_name = header_name
+        super().__init__(name=self.header_name, auto_error=False)
+
+    async def _validate_api_key(self, header_api_key: str | None):
+        if header_api_key is None:
+            return AuthenticationResult(
+                valid=False, errors=[f"Missing header {self.header_name}."]
+            )
+
+        header_api_key = header_api_key.strip()
+
+        # Otherwise check the apikey
+        if header_api_key == self.api_key or self.api_key == "":
+            return AuthenticationResult(
+                valid=True,
+                detail=header_api_key,
+            )
+        else:
+            return AuthenticationResult(
+                valid=False,
+                errors=["The provided API Key is invalid."],
+            )
+
+    async def __call__(self, request: Request) -> AuthenticationResult:  # type: ignore
+        header_api_key = await super().__call__(request=request)
+        result = await self._validate_api_key(header_api_key)
+        if self.api_key and not result.valid:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED, detail=result.detail
+            )
+        return result
--- a/docling_serve/datamodel/init.py
+++ b/docling_serve/datamodel/init.py
--- a/docling_serve/datamodel/convert.py
+++ b/docling_serve/datamodel/convert.py
@@ -0,0 +1,40 @@
+# Define the input options for the API
+from typing import Annotated
+
+from pydantic import Field
+
+from docling.datamodel.pipeline_options import (
+    EasyOcrOptions,
+)
+from docling.models.factories import get_ocr_factory
+from docling_jobkit.datamodel.convert import ConvertDocumentsOptions
+
+from docling_serve.settings import docling_serve_settings
+
+ocr_factory = get_ocr_factory(
+    allow_external_plugins=docling_serve_settings.allow_external_plugins
+)
+ocr_engines_enum = ocr_factory.get_enum()
+
+
+class ConvertDocumentsRequestOptions(ConvertDocumentsOptions):
+    ocr_engine: Annotated[  # type: ignore
+        ocr_engines_enum,
+        Field(
+            description=(
+                "The OCR engine to use. String. "
+                f"Allowed values: {', '.join([v.value for v in ocr_engines_enum])}. "
+                "Optional, defaults to easyocr."
+            ),
+            examples=[EasyOcrOptions.kind],
+        ),
+    ] = ocr_engines_enum(EasyOcrOptions.kind)  # type: ignore
+
+    document_timeout: Annotated[
+        float,
+        Field(
+            description="The timeout for processing each document, in seconds.",
+            gt=0,
+            le=docling_serve_settings.max_document_timeout,
+        ),
+    ] = docling_serve_settings.max_document_timeout
--- a/docling_serve/datamodel/requests.py
+++ b/docling_serve/datamodel/requests.py
@@ -0,0 +1,72 @@
+import enum
+from typing import Annotated, Literal
+
+from pydantic import BaseModel, Field, model_validator
+from pydantic_core import PydanticCustomError
+from typing_extensions import Self
+
+from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
+from docling_jobkit.datamodel.s3_coords import S3Coordinates
+from docling_jobkit.datamodel.task_targets import (
+    InBodyTarget,
+    S3Target,
+    TaskTarget,
+    ZipTarget,
+)
+
+from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
+from docling_serve.settings import AsyncEngine, docling_serve_settings
+
+## Sources
+
+
+class FileSourceRequest(FileSource):
+    kind: Literal["file"] = "file"
+
+
+class HttpSourceRequest(HttpSource):
+    kind: Literal["http"] = "http"
+
+
+class S3SourceRequest(S3Coordinates):
+    kind: Literal["s3"] = "s3"
+
+
+## Multipart targets
+class TargetName(str, enum.Enum):
+    INBODY = InBodyTarget().kind
+    ZIP = ZipTarget().kind
+
+
+## Aliases
+SourceRequestItem = Annotated[
+    FileSourceRequest | HttpSourceRequest | S3SourceRequest, Field(discriminator="kind")
+]
+
+
+## Complete Source request
+class ConvertDocumentsRequest(BaseModel):
+    options: ConvertDocumentsRequestOptions = ConvertDocumentsRequestOptions()
+    sources: list[SourceRequestItem]
+    target: TaskTarget = InBodyTarget()
+
+    @model_validator(mode="after")
+    def validate_s3_source_and_target(self) -> Self:
+        for source in self.sources:
+            if isinstance(source, S3SourceRequest):
+                if docling_serve_settings.eng_kind != AsyncEngine.KFP:
+                    raise PydanticCustomError(
+                        "error source", 'source kind "s3" requires engine kind "KFP"'
+                    )
+                if self.target.kind != "s3":
+                    raise PydanticCustomError(
+                        "error source", 'source kind "s3" requires target kind "s3"'
+                    )
+        if isinstance(self.target, S3Target):
+            for source in self.sources:
+                if isinstance(source, S3SourceRequest):
+                    return self
+            raise PydanticCustomError(
+                "error target", 'target kind "s3" requires source kind "s3"'
+            )
+        return self
--- a/docling_serve/datamodel/responses.py
+++ b/docling_serve/datamodel/responses.py
@@ -0,0 +1,56 @@
+import enum
+from typing import Optional
+
+from pydantic import BaseModel
+
+from docling.datamodel.document import ConversionStatus, ErrorItem
+from docling.utils.profiling import ProfilingItem
+from docling_jobkit.datamodel.result import ExportDocumentResponse
+from docling_jobkit.datamodel.task_meta import TaskProcessingMeta
+
+
+# Status
+class HealthCheckResponse(BaseModel):
+    status: str = "ok"
+
+
+class ClearResponse(BaseModel):
+    status: str = "ok"
+
+
+class ConvertDocumentResponse(BaseModel):
+    document: ExportDocumentResponse
+    status: ConversionStatus
+    errors: list[ErrorItem] = []
+    processing_time: float
+    timings: dict[str, ProfilingItem] = {}
+
+
+class PresignedUrlConvertDocumentResponse(BaseModel):
+    processing_time: float
+    num_converted: int
+    num_succeeded: int
+    num_failed: int
+
+
+class ConvertDocumentErrorResponse(BaseModel):
+    status: ConversionStatus
+
+
+class TaskStatusResponse(BaseModel):
+    task_id: str
+    task_status: str
+    task_position: Optional[int] = None
+    task_meta: Optional[TaskProcessingMeta] = None
+
+
+class MessageKind(str, enum.Enum):
+    CONNECTION = "connection"
+    UPDATE = "update"
+    ERROR = "error"
+
+
+class WebsocketMessage(BaseModel):
+    message: MessageKind
+    task: Optional[TaskStatusResponse] = None
+    error: Optional[str] = None
--- a/docling_serve/docling_conversion.py
+++ b/docling_serve/docling_conversion.py
@@ -1,400 +0,0 @@
-import base64
-import hashlib
-import json
-import logging
-from io import BytesIO
-from pathlib import Path
-from typing import (
-    Annotated,
-    Any,
-    Dict,
-    Iterable,
-    Iterator,
-    List,
-    Optional,
-    Tuple,
-    Type,
-    Union,
-)
-
-from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
-from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
-from docling.backend.pdf_backend import PdfDocumentBackend
-from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
-from docling.datamodel.base_models import DocumentStream, InputFormat, OutputFormat
-from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import (
-    EasyOcrOptions,
-    OcrEngine,
-    OcrOptions,
-    PdfBackend,
-    PdfPipelineOptions,
-    RapidOcrOptions,
-    TableFormerMode,
-    TesseractOcrOptions,
-)
-from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
-from docling_core.types.doc import ImageRefMode
-from fastapi import HTTPException
-from pydantic import BaseModel, Field
-
-from docling_serve.helper_functions import _to_list_of_strings
-
-_log = logging.getLogger(__name__)
-
-
-# Define the input options for the API
-class ConvertDocumentsOptions(BaseModel):
-    from_formats: Annotated[
-        List[InputFormat],
-        Field(
-            description=(
-                "Input format(s) to convert from. String or list of strings. "
-                f"Allowed values: {', '.join([v.value for v in InputFormat])}. "
-                "Optional, defaults to all formats."
-            ),
-            examples=[[v.value for v in InputFormat]],
-        ),
-    ] = [v for v in InputFormat]
-
-    to_formats: Annotated[
-        List[OutputFormat],
-        Field(
-            description=(
-                "Output format(s) to convert to. String or list of strings. "
-                f"Allowed values: {', '.join([v.value for v in OutputFormat])}. "
-                "Optional, defaults to Markdown."
-            ),
-            examples=[[OutputFormat.MARKDOWN]],
-        ),
-    ] = [OutputFormat.MARKDOWN]
-
-    image_export_mode: Annotated[
-        ImageRefMode,
-        Field(
-            description=(
-                "Image export mode for the document (in case of JSON,"
-                " Markdown or HTML). "
-                f"Allowed values: {', '.join([v.value for v in ImageRefMode])}. "
-                "Optional, defaults to Embedded."
-            ),
-            examples=[ImageRefMode.EMBEDDED.value],
-            # pattern="embedded|placeholder|referenced",
-        ),
-    ] = ImageRefMode.EMBEDDED
-
-    do_ocr: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, the bitmap content will be processed using OCR. "
-                "Boolean. Optional, defaults to true"
-            ),
-            # examples=[True],
-        ),
-    ] = True
-
-    force_ocr: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, replace existing text with OCR-generated "
-                "text over content. Boolean. Optional, defaults to false."
-            ),
-            # examples=[False],
-        ),
-    ] = False
-
-    # TODO: use a restricted list based on what is installed on the system
-    ocr_engine: Annotated[
-        OcrEngine,
-        Field(
-            description=(
-                "The OCR engine to use. String. "
-                "Allowed values: easyocr, tesseract, rapidocr. "
-                "Optional, defaults to easyocr."
-            ),
-            examples=[OcrEngine.EASYOCR],
-        ),
-    ] = OcrEngine.EASYOCR
-
-    ocr_lang: Annotated[
-        Optional[List[str]],
-        Field(
-            description=(
-                "List of languages used by the OCR engine. "
-                "Note that each OCR engine has "
-                "different values for the language names. String or list of strings. "
-                "Optional, defaults to empty."
-            ),
-            examples=[["fr", "de", "es", "en"]],
-        ),
-    ] = None
-
-    pdf_backend: Annotated[
-        PdfBackend,
-        Field(
-            description=(
-                "The PDF backend to use. String. "
-                f"Allowed values: {', '.join([v.value for v in PdfBackend])}. "
-                f"Optional, defaults to {PdfBackend.DLPARSE_V2.value}."
-            ),
-            examples=[PdfBackend.DLPARSE_V2],
-        ),
-    ] = PdfBackend.DLPARSE_V2
-
-    table_mode: Annotated[
-        TableFormerMode,
-        Field(
-            TableFormerMode.FAST,
-            description=(
-                "Mode to use for table structure, String. "
-                f"Allowed values: {', '.join([v.value for v in TableFormerMode])}. "
-                "Optional, defaults to fast."
-            ),
-            examples=[TableFormerMode.FAST],
-            # pattern="fast|accurate",
-        ),
-    ] = TableFormerMode.FAST
-
-    abort_on_error: Annotated[
-        bool,
-        Field(
-            description=(
-                "Abort on error if enabled. " "Boolean. Optional, defaults to false."
-            ),
-            # examples=[False],
-        ),
-    ] = False
-
-    return_as_file: Annotated[
-        bool,
-        Field(
-            description=(
-                "Return the output as a zip file "
-                "(will happen anyway if multiple files are generated). "
-                "Boolean. Optional, defaults to false."
-            ),
-            examples=[False],
-        ),
-    ] = False
-
-    do_table_structure: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, the table structure will be extracted. "
-                "Boolean. Optional, defaults to true."
-            ),
-            examples=[True],
-        ),
-    ] = True
-
-    include_images: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, images will be extracted from the document. "
-                "Boolean. Optional, defaults to true."
-            ),
-            examples=[True],
-        ),
-    ] = True
-
-    images_scale: Annotated[
-        float,
-        Field(
-            description="Scale factor for images. Float. Optional, defaults to 2.0.",
-            examples=[2.0],
-        ),
-    ] = 2.0
-
-
-class DocumentsConvertBase(BaseModel):
-    options: ConvertDocumentsOptions = ConvertDocumentsOptions()
-
-
-class HttpSource(BaseModel):
-    url: Annotated[
-        str,
-        Field(
-            description="HTTP url to process",
-            examples=["https://arxiv.org/pdf/2206.01062"],
-        ),
-    ]
-    headers: Annotated[
-        Dict[str, Any],
-        Field(
-            description="Additional headers used to fetch the urls, "
-            "e.g. authorization, agent, etc"
-        ),
-    ] = {}
-
-
-class FileSource(BaseModel):
-    base64_string: Annotated[
-        str,
-        Field(
-            description="Content of the file serialized in base64. "
-            "For example it can be obtained via "
-            "`base64 -w 0 /path/to/file/pdf-to-convert.pdf`."
-        ),
-    ]
-    filename: Annotated[
-        str,
-        Field(description="Filename of the uploaded document", examples=["file.pdf"]),
-    ]
-
-    def to_document_stream(self) -> DocumentStream:
-        buf = BytesIO(base64.b64decode(self.base64_string))
-        return DocumentStream(stream=buf, name=self.filename)
-
-
-class ConvertDocumentHttpSourcesRequest(DocumentsConvertBase):
-    http_sources: List[HttpSource]
-
-
-class ConvertDocumentFileSourcesRequest(DocumentsConvertBase):
-    file_sources: List[FileSource]
-
-
-ConvertDocumentsRequest = Union[
-    ConvertDocumentFileSourcesRequest, ConvertDocumentHttpSourcesRequest
-]
-
-
-# Document converters will be preloaded and stored in a dictionary
-converters: Dict[str, DocumentConverter] = {}
-
-
-# Custom serializer for PdfFormatOption
-# (model_dump_json does not work with some classes)
-def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
-    data = pdf_format_option.model_dump()
-
-    # pipeline_options are not fully serialized by model_dump, dedicated pass
-    if pdf_format_option.pipeline_options:
-        data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
-
-    # Replace `pipeline_cls` with a string representation
-    data["pipeline_cls"] = repr(data["pipeline_cls"])
-
-    # Replace `backend` with a string representation
-    data["backend"] = repr(data["backend"])
-
-    # Handle `device` in `accelerator_options`
-    if "accelerator_options" in data and "device" in data["accelerator_options"]:
-        data["accelerator_options"]["device"] = repr(
-            data["accelerator_options"]["device"]
-        )
-
-    # Serialize the dictionary to JSON with sorted keys to have consistent hashes
-    return json.dumps(data, sort_keys=True)
-
-
-# Computes the PDF pipeline options and returns the PdfFormatOption and its hash
-def get_pdf_pipeline_opts(
-    request: ConvertDocumentsOptions,
-) -> Tuple[PdfFormatOption, str]:
-
-    if request.ocr_engine == OcrEngine.EASYOCR:
-        try:
-            import easyocr  # noqa: F401
-        except ImportError:
-            raise HTTPException(
-                status_code=400,
-                detail="The requested OCR engine"
-                f" (ocr_engine={request.ocr_engine.value})"
-                " is not available on this system. Please choose another OCR engine "
-                "or contact your system administrator.",
-            )
-        ocr_options: OcrOptions = EasyOcrOptions(force_full_page_ocr=request.force_ocr)
-    elif request.ocr_engine == OcrEngine.TESSERACT:
-        try:
-            import tesserocr  # noqa: F401
-        except ImportError:
-            raise HTTPException(
-                status_code=400,
-                detail="The requested OCR engine"
-                f" (ocr_engine={request.ocr_engine.value})"
-                " is not available on this system. Please choose another OCR engine "
-                "or contact your system administrator.",
-            )
-        ocr_options = TesseractOcrOptions(force_full_page_ocr=request.force_ocr)
-    elif request.ocr_engine == OcrEngine.RAPIDOCR:
-        try:
-            from rapidocr_onnxruntime import RapidOCR  # noqa: F401
-        except ImportError:
-            raise HTTPException(
-                status_code=400,
-                detail="The requested OCR engine"
-                f" (ocr_engine={request.ocr_engine.value})"
-                " is not available on this system. Please choose another OCR engine "
-                "or contact your system administrator.",
-            )
-        ocr_options = RapidOcrOptions(force_full_page_ocr=request.force_ocr)
-    else:
-        raise RuntimeError(f"Unexpected OCR engine type {request.ocr_engine}")
-
-    if request.ocr_lang is not None:
-        if isinstance(request.ocr_lang, str):
-            ocr_options.lang = _to_list_of_strings(request.ocr_lang)
-        else:
-            ocr_options.lang = request.ocr_lang
-
-    pipeline_options = PdfPipelineOptions(
-        do_ocr=request.do_ocr,
-        ocr_options=ocr_options,
-        do_table_structure=request.do_table_structure,
-    )
-    pipeline_options.table_structure_options.do_cell_matching = True  # do_cell_matching
-    pipeline_options.table_structure_options.mode = TableFormerMode(request.table_mode)
-
-    if request.image_export_mode != ImageRefMode.PLACEHOLDER:
-        pipeline_options.generate_page_images = True
-        if request.images_scale:
-            pipeline_options.images_scale = request.images_scale
-
-    if request.pdf_backend == PdfBackend.DLPARSE_V1:
-        backend: Type[PdfDocumentBackend] = DoclingParseDocumentBackend
-    elif request.pdf_backend == PdfBackend.DLPARSE_V2:
-        backend = DoclingParseV2DocumentBackend
-    elif request.pdf_backend == PdfBackend.PYPDFIUM2:
-        backend = PyPdfiumDocumentBackend
-    else:
-        raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}")
-
-    pdf_format_option = PdfFormatOption(
-        pipeline_options=pipeline_options,
-        backend=backend,
-    )
-
-    serialized_data = _serialize_pdf_format_option(pdf_format_option)
-
-    options_hash = hashlib.sha1(serialized_data.encode()).hexdigest()
-
-    return pdf_format_option, options_hash
-
-
-def convert_documents(
-    sources: Iterable[Union[Path, str, DocumentStream]],
-    options: ConvertDocumentsOptions,
-    headers: Optional[Dict[str, Any]] = None,
-):
-    pdf_format_option, options_hash = get_pdf_pipeline_opts(options)
-
-    if options_hash not in converters:
-        format_options: Dict[InputFormat, FormatOption] = {
-            InputFormat.PDF: pdf_format_option,
-            InputFormat.IMAGE: pdf_format_option,
-        }
-
-        converters[options_hash] = DocumentConverter(format_options=format_options)
-        _log.info(f"We now have {len(converters)} converters in memory.")
-
-    results: Iterator[ConversionResult] = converters[options_hash].convert_all(
-        sources,
-        headers=headers,
-    )
-
-    return results
--- a/docling_serve/gradio_ui.py
+++ b/docling_serve/gradio_ui.py
@@ -1,17 +1,52 @@
+import base64
 import importlib
+import itertools
 import json
 import logging
-import os
+import ssl
 import tempfile
+import time
 from pathlib import Path
+from typing import Optional

+import certifi
 import gradio as gr
-import requests
+import httpx
+
+from docling.datamodel.base_models import FormatToExtensions
+from docling.datamodel.pipeline_options import (
+    PdfBackend,
+    ProcessingPipeline,
+    TableFormerMode,
+    TableStructureOptions,
+)

 from docling_serve.helper_functions import _to_list_of_strings
+from docling_serve.settings import docling_serve_settings, uvicorn_settings

 logger = logging.getLogger(__name__)

+############################
+# Path of static artifacts #
+############################
+
+logo_path = "https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg"
+js_components_url = "https://unpkg.com/@docling/docling-components@0.0.7"
+if (
+    docling_serve_settings.static_path is not None
+    and docling_serve_settings.static_path.is_dir()
+):
+    logo_path = str(docling_serve_settings.static_path / "logo.svg")
+    js_components_url = "/static/docling-components.js"
+
+
+##############################
+# Head JS for web components #
+##############################
+head = f"""
+    <script src="{js_components_url}" type="module"></script>
+"""
+
 #################
 # CSS and theme #
 #################
@@ -49,6 +84,14 @@ css = """
 #file_input_zone {
    height: 140px;
 }
+
+docling-img {
+    gap: 1rem;
+}
+
+docling-img::part(page) {
+    box-shadow: 0 0.5rem 1rem 0 rgba(0, 0, 0, 0.2);
+}
 """

 theme = gr.themes.Default(
@@ -80,8 +123,29 @@ file_output_path = None  # Will be set when a new file is generated
 #############


+def get_api_endpoint() -> str:
+    protocol = "http"
+    if uvicorn_settings.ssl_keyfile is not None:
+        protocol = "https"
+    return f"{protocol}://{docling_serve_settings.api_host}:{uvicorn_settings.port}"
+
+
+def get_ssl_context() -> ssl.SSLContext:
+    ctx = ssl.create_default_context(cafile=certifi.where())
+    kube_sa_ca_cert_path = Path(
+        "/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"
+    )
+    if (
+        uvicorn_settings.ssl_keyfile is not None
+        and ".svc." in docling_serve_settings.api_host
+        and kube_sa_ca_cert_path.exists()
+    ):
+        ctx.load_verify_locations(cafile=kube_sa_ca_cert_path)
+    return ctx
+
+
 def health_check():
-    response = requests.get(f"http://localhost:{int(os.getenv('PORT', '5001'))}/health")
+    response = httpx.get(f"{get_api_endpoint()}/health")
    if response.status_code == 200:
        return "Healthy"
    return "Unhealthy"
@@ -97,6 +161,11 @@ def set_outputs_visibility_direct(x, y):
    return content, file


+def set_task_id_visibility(x):
+    task_id_row = gr.Row(visible=x)
+    return task_id_row
+
+
 def set_outputs_visibility_process(x):
    content = gr.Row(visible=not x)
    file = gr.Row(visible=x)
@@ -108,16 +177,20 @@ def set_download_button_label(label_text: gr.State):


 def clear_outputs():
+    task_id_rendered = ""
    markdown_content = ""
    json_content = ""
+    json_rendered_content = ""
    html_content = ""
    text_content = ""
    doctags_content = ""

    return (
+        task_id_rendered,
        markdown_content,
        markdown_content,
        json_content,
+        json_rendered_content,
        html_content,
        html_content,
        text_content,
@@ -133,12 +206,16 @@ def clear_file_input():
    return None


-def auto_set_return_as_file(url_input, file_input, image_export_mode):
+def auto_set_return_as_file(
+    url_input_value: str,
+    file_input_value: Optional[list[str]],
+    image_export_mode_value: str,
+):
    # If more than one input source is provided, return as file
    if (
-        (len(url_input.split(",")) > 1)
-        or (file_input and len(file_input) > 1)
-        or (image_export_mode == "referenced")
+        (len(url_input_value.split(",")) > 1)
+        or (file_input_value and len(file_input_value) > 1)
+        or (image_export_mode_value == "referenced")
    ):
        return True
    else:
@@ -156,10 +233,64 @@ def change_ocr_lang(ocr_engine):
        return "english,chinese"


+def wait_task_finish(auth: str, task_id: str, return_as_file: bool):
+    conversion_sucess = False
+    task_finished = False
+    task_status = ""
+
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = str(auth)
+
+    ssl_ctx = get_ssl_context()
+    while not task_finished:
+        try:
+            response = httpx.get(
+                f"{get_api_endpoint()}/v1/status/poll/{task_id}?wait=5",
+                headers=headers,
+                verify=ssl_ctx,
+                timeout=15,
+            )
+            task_status = response.json()["task_status"]
+            if task_status == "success":
+                conversion_sucess = True
+                task_finished = True
+
+            if task_status in ("failure", "revoked"):
+                conversion_sucess = False
+                task_finished = True
+                raise RuntimeError(f"Task failed with status {task_status!r}")
+            time.sleep(5)
+        except Exception as e:
+            logger.error(f"Error processing file(s): {e}")
+            conversion_sucess = False
+            task_finished = True
+            raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
+
+    if conversion_sucess:
+        try:
+            response = httpx.get(
+                f"{get_api_endpoint()}/v1/result/{task_id}",
+                headers=headers,
+                timeout=15,
+                verify=ssl_ctx,
+            )
+            output = response_to_output(response, return_as_file)
+            return output
+        except Exception as e:
+            logger.error(f"Error getting task result: {e}")
+
+    raise gr.Error(
+        f"Error getting task result, conversion finished with status: {task_status}"
+    )
+
+
 def process_url(
+    auth,
    input_sources,
    to_formats,
    image_export_mode,
+    pipeline,
    ocr,
    force_ocr,
    ocr_engine,
@@ -168,12 +299,20 @@ def process_url(
    table_mode,
    abort_on_error,
    return_as_file,
+    do_code_enrichment,
+    do_formula_enrichment,
+    do_picture_classification,
+    do_picture_description,
 ):
+    target = {"kind": "zip" if return_as_file else "inbody"}
    parameters = {
-        "http_sources": [{"url": source} for source in input_sources.split(",")],
+        "sources": [
+            {"kind": "http", "url": source} for source in input_sources.split(",")
+        ],
        "options": {
            "to_formats": to_formats,
            "image_export_mode": image_export_mode,
+            "pipeline": pipeline,
            "ocr": ocr,
            "force_ocr": force_ocr,
            "ocr_engine": ocr_engine,
@@ -181,20 +320,34 @@ def process_url(
            "pdf_backend": pdf_backend,
            "table_mode": table_mode,
            "abort_on_error": abort_on_error,
-            "return_as_file": return_as_file,
+            "do_code_enrichment": do_code_enrichment,
+            "do_formula_enrichment": do_formula_enrichment,
+            "do_picture_classification": do_picture_classification,
+            "do_picture_description": do_picture_description,
        },
+        "target": target,
    }
    if (
-        not parameters["http_sources"]
-        or len(parameters["http_sources"]) == 0
-        or parameters["http_sources"][0]["url"] == ""
+        not parameters["sources"]
+        or len(parameters["sources"]) == 0
+        or parameters["sources"][0]["url"] == ""
    ):
        logger.error("No input sources provided.")
        raise gr.Error("No input sources provided.", print_exception=False)
+
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = str(auth)
+
+    print(f"{headers=}")
    try:
-        response = requests.post(
-            f"http://localhost:{int(os.getenv('PORT', '5001'))}/v1alpha/convert/source",
+        ssl_ctx = get_ssl_context()
+        response = httpx.post(
+            f"{get_api_endpoint()}/v1/convert/source/async",
            json=parameters,
+            headers=headers,
+            verify=ssl_ctx,
+            timeout=60,
        )
    except Exception as e:
        logger.error(f"Error processing URL: {e}")
@@ -204,14 +357,23 @@ def process_url(
        error_message = data.get("detail", "An unknown error occurred.")
        logger.error(f"Error processing file: {error_message}")
        raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
-    output = response_to_output(response, return_as_file)
-    return output
+
+    task_id_rendered = response.json()["task_id"]
+    return task_id_rendered
+
+
+def file_to_base64(file):
+    with open(file.name, "rb") as f:
+        encoded_string = base64.b64encode(f.read()).decode("utf-8")
+    return encoded_string


 def process_file(
+    auth,
    files,
    to_formats,
    image_export_mode,
+    pipeline,
    ocr,
    force_ocr,
    ocr_engine,
@@ -220,30 +382,54 @@ def process_file(
    table_mode,
    abort_on_error,
    return_as_file,
+    do_code_enrichment,
+    do_formula_enrichment,
+    do_picture_classification,
+    do_picture_description,
 ):
-    if not files or len(files) == 0 or files[0] == "":
+    if not files or len(files) == 0:
        logger.error("No files provided.")
        raise gr.Error("No files provided.", print_exception=False)
-    files_data = [("files", (file.name, open(file.name, "rb"))) for file in files]
+    files_data = [
+        {"kind": "file", "base64_string": file_to_base64(file), "filename": file.name}
+        for file in files
+    ]
+    target = {"kind": "zip" if return_as_file else "inbody"}

    parameters = {
-        "to_formats": to_formats,
-        "image_export_mode": image_export_mode,
-        "ocr": str(ocr).lower(),
-        "force_ocr": str(force_ocr).lower(),
-        "ocr_engine": ocr_engine,
-        "ocr_lang": _to_list_of_strings(ocr_lang),
-        "pdf_backend": pdf_backend,
-        "table_mode": table_mode,
-        "abort_on_error": str(abort_on_error).lower(),
-        "return_as_file": str(return_as_file).lower(),
+        "sources": files_data,
+        "options": {
+            "to_formats": to_formats,
+            "image_export_mode": image_export_mode,
+            "pipeline": pipeline,
+            "ocr": ocr,
+            "force_ocr": force_ocr,
+            "ocr_engine": ocr_engine,
+            "ocr_lang": _to_list_of_strings(ocr_lang),
+            "pdf_backend": pdf_backend,
+            "table_mode": table_mode,
+            "abort_on_error": abort_on_error,
+            "return_as_file": return_as_file,
+            "do_code_enrichment": do_code_enrichment,
+            "do_formula_enrichment": do_formula_enrichment,
+            "do_picture_classification": do_picture_classification,
+            "do_picture_description": do_picture_description,
+        },
+        "target": target,
    }

+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = str(auth)
+
    try:
-        response = requests.post(
-            f"http://localhost:{int(os.getenv('PORT', '5001'))}/v1alpha/convert/file",
-            files=files_data,
-            data=parameters,
+        ssl_ctx = get_ssl_context()
+        response = httpx.post(
+            f"{get_api_endpoint()}/v1/convert/source/async",
+            json=parameters,
+            headers=headers,
+            verify=ssl_ctx,
+            timeout=60,
        )
    except Exception as e:
        logger.error(f"Error processing file(s): {e}")
@@ -253,13 +439,15 @@ def process_file(
        error_message = data.get("detail", "An unknown error occurred.")
        logger.error(f"Error processing file: {error_message}")
        raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
-    output = response_to_output(response, return_as_file)
-    return output
+
+    task_id_rendered = response.json()["task_id"]
+    return task_id_rendered


 def response_to_output(response, return_as_file):
    markdown_content = ""
    json_content = ""
+    json_rendered_content = ""
    html_content = ""
    text_content = ""
    doctags_content = ""
@@ -282,6 +470,12 @@ def response_to_output(response, return_as_file):
        json_content = json.dumps(
            full_content.get("document").get("json_content"), indent=2
        )
+        # Embed document JSON and trigger load at client via an image.
+        json_rendered_content = f"""
+            <docling-img id="dclimg" pagenumbers><docling-tooltip></docling-tooltip></docling-img>
+            <script id="dcljson" type="application/json" onload="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);">{json_content}</script>
+            <img src onerror="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);" />
+            """
        html_content = full_content.get("document").get("html_content")
        text_content = full_content.get("document").get("text_content")
        doctags_content = full_content.get("document").get("doctags_content")
@@ -289,6 +483,7 @@ def response_to_output(response, return_as_file):
        markdown_content,
        markdown_content,
        json_content,
+        json_rendered_content,
        html_content,
        html_content,
        text_content,
@@ -302,12 +497,12 @@ def response_to_output(response, return_as_file):
 ############

 with gr.Blocks(
+    head=head,
    css=css,
    theme=theme,
    title="Docling Serve",
-    delete_cache=(3600, 3600),  # Delete all files older than 1 hour every hour
+    delete_cache=(3600, 36000),  # Delete all files older than 10 hour every hour
 ) as ui:
-
    # Constants stored in states to be able to pass them as inputs to functions
    processing_text = gr.State("Processing your document(s), please wait...")
    true_bool = gr.State(True)
@@ -317,17 +512,21 @@ with gr.Blocks(
    with gr.Row(elem_id="check_health"):
        # Logo
        with gr.Column(scale=1, min_width=90):
-            gr.Image(
-                "https://ds4sd.github.io/docling/assets/logo.png",
-                height=80,
-                width=80,
-                show_download_button=False,
-                show_label=False,
-                show_fullscreen_button=False,
-                container=False,
-                elem_id="logo",
-                scale=0,
-            )
+            try:
+                gr.Image(
+                    logo_path,
+                    height=80,
+                    width=80,
+                    show_download_button=False,
+                    show_label=False,
+                    show_fullscreen_button=False,
+                    container=False,
+                    elem_id="logo",
+                    scale=0,
+                )
+            except Exception:
+                logger.warning("Logo not found.")
+
        # Title
        with gr.Column(scale=1, min_width=200):
            gr.Markdown(
@@ -356,59 +555,60 @@ with gr.Blocks(
            )

    # URL Processing Tab
-    with gr.Tab("Convert URL(s)"):
+    with gr.Tab("Convert URL"):
        with gr.Row():
            with gr.Column(scale=4):
                url_input = gr.Textbox(
-                    label="Input Sources (comma-separated URLs)",
-                    placeholder="https://arxiv.org/pdf/2206.01062",
+                    label="URL Input Source",
+                    placeholder="https://arxiv.org/pdf/2501.17887",
                )
            with gr.Column(scale=1):
-                url_process_btn = gr.Button("Process URL(s)", scale=1)
+                url_process_btn = gr.Button("Process URL", scale=1)
                url_reset_btn = gr.Button("Reset", scale=1)

    # File Processing Tab
-    with gr.Tab("Convert File(s)"):
+    with gr.Tab("Convert File"):
        with gr.Row():
            with gr.Column(scale=4):
                file_input = gr.File(
                    elem_id="file_input_zone",
-                    label="Upload Files",
+                    label="Upload File",
                    file_types=[
-                        ".pdf",
-                        ".docx",
-                        ".pptx",
-                        ".html",
-                        ".xlsx",
-                        ".asciidoc",
-                        ".txt",
-                        ".md",
-                        ".jpg",
-                        ".jpeg",
-                        ".png",
-                        ".gif",
+                        f".{v}"
+                        for v in itertools.chain.from_iterable(
+                            FormatToExtensions.values()
+                        )
                    ],
                    file_count="multiple",
                    scale=4,
                )
            with gr.Column(scale=1):
-                file_process_btn = gr.Button("Process File(s)", scale=1)
+                file_process_btn = gr.Button("Process File", scale=1)
                file_reset_btn = gr.Button("Reset", scale=1)

+    # Auth
+    with gr.Row(visible=bool(docling_serve_settings.api_key)):
+        with gr.Column():
+            auth = gr.Textbox(
+                label="Authentication",
+                placeholder="API Key",
+                type="password",
+            )
+
    # Options
    with gr.Accordion("Options") as options:
        with gr.Row():
            with gr.Column(scale=1):
                to_formats = gr.CheckboxGroup(
                    [
-                        ("Markdown", "md"),
                        ("Docling (JSON)", "json"),
+                        ("Markdown", "md"),
                        ("HTML", "html"),
                        ("Plain Text", "text"),
                        ("Doc Tags", "doctags"),
                    ],
                    label="To Formats",
-                    value=["md"],
+                    value=["json", "md"],
                )
            with gr.Column(scale=1):
                image_export_mode = gr.Radio(
@@ -420,6 +620,14 @@ with gr.Blocks(
                    label="Image Export Mode",
                    value="embedded",
                )
+
+        with gr.Row():
+            with gr.Column(scale=1, min_width=200):
+                pipeline = gr.Radio(
+                    [(v.value.capitalize(), v.value) for v in ProcessingPipeline],
+                    label="Pipeline type",
+                    value=ProcessingPipeline.STANDARD.value,
+                )
        with gr.Row():
            with gr.Column(scale=1, min_width=200):
                ocr = gr.Checkbox(label="Enable OCR", value=True)
@@ -440,30 +648,53 @@ with gr.Blocks(
                )
            ocr_engine.change(change_ocr_lang, inputs=[ocr_engine], outputs=[ocr_lang])
        with gr.Row():
-            with gr.Column(scale=2):
+            with gr.Column(scale=4):
                pdf_backend = gr.Radio(
-                    ["pypdfium2", "dlparse_v1", "dlparse_v2"],
+                    [v.value for v in PdfBackend],
                    label="PDF Backend",
-                    value="dlparse_v2",
+                    value=PdfBackend.DLPARSE_V4.value,
                )
            with gr.Column(scale=2):
                table_mode = gr.Radio(
-                    ["fast", "accurate"], label="Table Mode", value="fast"
+                    [(v.value.capitalize(), v.value) for v in TableFormerMode],
+                    label="Table Mode",
+                    value=TableStructureOptions().mode.value,
                )
            with gr.Column(scale=1):
                abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
                return_as_file = gr.Checkbox(label="Return as File", value=False)
+        with gr.Row():
+            with gr.Column():
+                do_code_enrichment = gr.Checkbox(
+                    label="Enable code enrichment", value=False
+                )
+                do_formula_enrichment = gr.Checkbox(
+                    label="Enable formula enrichment", value=False
+                )
+            with gr.Column():
+                do_picture_classification = gr.Checkbox(
+                    label="Enable picture classification", value=False
+                )
+                do_picture_description = gr.Checkbox(
+                    label="Enable picture description", value=False
+                )
+
+    # Task id output
+    with gr.Row(visible=False) as task_id_output:
+        task_id_rendered = gr.Textbox(label="Task id", interactive=False)

    # Document output
    with gr.Row(visible=False) as content_output:
+        with gr.Tab("Docling (JSON)"):
+            output_json = gr.Code(language="json", wrap_lines=True, show_label=False)
+        with gr.Tab("Docling-Rendered"):
+            output_json_rendered = gr.HTML(label="Response")
        with gr.Tab("Markdown"):
            output_markdown = gr.Code(
                language="markdown", wrap_lines=True, show_label=False
            )
        with gr.Tab("Markdown-Rendered"):
            output_markdown_rendered = gr.Markdown(label="Response")
-        with gr.Tab("Docling (JSON)"):
-            output_json = gr.Code(language="json", wrap_lines=True, show_label=False)
        with gr.Tab("HTML"):
            output_html = gr.Code(language="html", wrap_lines=True, show_label=False)
        with gr.Tab("HTML-Rendered"):
@@ -503,28 +734,32 @@ with gr.Blocks(
        set_options_visibility, inputs=[false_bool], outputs=[options]
    ).then(
        set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
-    ).then(
-        set_outputs_visibility_process,
-        inputs=[return_as_file],
-        outputs=[content_output, file_output],
    ).then(
        clear_outputs,
        inputs=None,
        outputs=[
+            task_id_rendered,
            output_markdown,
            output_markdown_rendered,
            output_json,
+            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
            output_doctags,
        ],
+    ).then(
+        set_task_id_visibility,
+        inputs=[true_bool],
+        outputs=[task_id_output],
    ).then(
        process_url,
        inputs=[
+            auth,
            url_input,
            to_formats,
            image_export_mode,
+            pipeline,
            ocr,
            force_ocr,
            ocr_engine,
@@ -533,11 +768,26 @@ with gr.Blocks(
            table_mode,
            abort_on_error,
            return_as_file,
+            do_code_enrichment,
+            do_formula_enrichment,
+            do_picture_classification,
+            do_picture_description,
        ],
+        outputs=[
+            task_id_rendered,
+        ],
+    ).then(
+        set_outputs_visibility_process,
+        inputs=[return_as_file],
+        outputs=[content_output, file_output],
+    ).then(
+        wait_task_finish,
+        inputs=[auth, task_id_rendered, return_as_file],
        outputs=[
            output_markdown,
            output_markdown_rendered,
            output_json,
+            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
@@ -553,6 +803,7 @@ with gr.Blocks(
            output_markdown,
            output_markdown_rendered,
            output_json,
+            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
@@ -562,7 +813,7 @@ with gr.Blocks(
        set_outputs_visibility_direct,
        inputs=[false_bool, false_bool],
        outputs=[content_output, file_output],
-    ).then(
+    ).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
        clear_url_input, inputs=None, outputs=[url_input]
    )

@@ -571,28 +822,32 @@ with gr.Blocks(
        set_options_visibility, inputs=[false_bool], outputs=[options]
    ).then(
        set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
-    ).then(
-        set_outputs_visibility_process,
-        inputs=[return_as_file],
-        outputs=[content_output, file_output],
    ).then(
        clear_outputs,
        inputs=None,
        outputs=[
+            task_id_rendered,
            output_markdown,
            output_markdown_rendered,
            output_json,
+            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
            output_doctags,
        ],
+    ).then(
+        set_task_id_visibility,
+        inputs=[true_bool],
+        outputs=[task_id_output],
    ).then(
        process_file,
        inputs=[
+            auth,
            file_input,
            to_formats,
            image_export_mode,
+            pipeline,
            ocr,
            force_ocr,
            ocr_engine,
@@ -601,11 +856,26 @@ with gr.Blocks(
            table_mode,
            abort_on_error,
            return_as_file,
+            do_code_enrichment,
+            do_formula_enrichment,
+            do_picture_classification,
+            do_picture_description,
        ],
+        outputs=[
+            task_id_rendered,
+        ],
+    ).then(
+        set_outputs_visibility_process,
+        inputs=[return_as_file],
+        outputs=[content_output, file_output],
+    ).then(
+        wait_task_finish,
+        inputs=[auth, task_id_rendered, return_as_file],
        outputs=[
            output_markdown,
            output_markdown_rendered,
            output_json,
+            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
@@ -621,6 +891,7 @@ with gr.Blocks(
            output_markdown,
            output_markdown_rendered,
            output_json,
+            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
@@ -630,6 +901,6 @@ with gr.Blocks(
        set_outputs_visibility_direct,
        inputs=[false_bool, false_bool],
        outputs=[content_output, file_output],
-    ).then(
+    ).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
        clear_file_input, inputs=None, outputs=[file_input]
    )
--- a/docling_serve/helper_functions.py
+++ b/docling_serve/helper_functions.py
@@ -1,41 +1,99 @@
 import inspect
+import json
 import re
-from typing import List, Type, Union
+from typing import Union, get_args, get_origin

 from fastapi import Depends, Form
-from pydantic import BaseModel
+from pydantic import BaseModel, TypeAdapter
+
+
+def is_pydantic_model(type_):
+    try:
+        if inspect.isclass(type_) and issubclass(type_, BaseModel):
+            return True
+
+        origin = get_origin(type_)
+        if origin is Union:
+            args = get_args(type_)
+            return any(
+                inspect.isclass(arg) and issubclass(arg, BaseModel)
+                for arg in args
+                if arg is not type(None)
+            )
+
+    except Exception:
+        pass
+
+    return False


 # Adapted from
 # https://github.com/fastapi/fastapi/discussions/8971#discussioncomment-7892972
-def FormDepends(cls: Type[BaseModel]):
+def FormDepends(cls: type[BaseModel]):
    new_parameters = []

    for field_name, model_field in cls.model_fields.items():
+        annotation = model_field.annotation
+        description = model_field.description
+        default = (
+            Form(..., description=description, examples=model_field.examples)
+            if model_field.is_required()
+            else Form(
+                model_field.default,
+                examples=model_field.examples,
+                description=description,
+            )
+        )
+
+        # Flatten nested Pydantic models by accepting them as JSON strings
+        if is_pydantic_model(annotation):
+            annotation = str
+            default = Form(
+                None
+                if model_field.default is None
+                else json.dumps(model_field.default.model_dump(mode="json")),
+                description=description,
+                examples=None
+                if not model_field.examples
+                else [
+                    json.dumps(ex.model_dump(mode="json"))
+                    for ex in model_field.examples
+                ],
+            )
+
        new_parameters.append(
            inspect.Parameter(
                name=field_name,
                kind=inspect.Parameter.POSITIONAL_ONLY,
-                default=(
-                    Form(...)
-                    if model_field.is_required()
-                    else Form(model_field.default)
-                ),
-                annotation=model_field.annotation,
+                default=default,
+                annotation=annotation,
            )
        )

    async def as_form_func(**data):
+        for field_name, model_field in cls.model_fields.items():
+            value = data.get(field_name)
+            annotation = model_field.annotation
+
+            # Parse nested models from JSON string
+            if value is not None and is_pydantic_model(annotation):
+                try:
+                    validator = TypeAdapter(annotation)
+                    data[field_name] = validator.validate_json(value)
+                except Exception as e:
+                    raise ValueError(f"Invalid JSON for field '{field_name}': {e}")
+
        return cls(**data)

    sig = inspect.signature(as_form_func)
    sig = sig.replace(parameters=new_parameters)
    as_form_func.__signature__ = sig  # type: ignore
+
    return Depends(as_form_func)


-def _to_list_of_strings(input_value: Union[str, List[str]]) -> List[str]:
-    def split_and_strip(value: str) -> List[str]:
+def _to_list_of_strings(input_value: Union[str, list[str]]) -> list[str]:
+    def split_and_strip(value: str) -> list[str]:
        if re.search(r"[;,]", value):
            return [item.strip() for item in re.split(r"[;,]", value)]
        else:
--- a/docling_serve/orchestrator_factory.py
+++ b/docling_serve/orchestrator_factory.py
@@ -0,0 +1,69 @@
+from functools import lru_cache
+
+from docling_jobkit.orchestrators.base_orchestrator import BaseOrchestrator
+
+from docling_serve.settings import AsyncEngine, docling_serve_settings
+from docling_serve.storage import get_scratch
+
+
+@lru_cache
+def get_async_orchestrator() -> BaseOrchestrator:
+    if docling_serve_settings.eng_kind == AsyncEngine.LOCAL:
+        from docling_jobkit.convert.manager import (
+            DoclingConverterManager,
+            DoclingConverterManagerConfig,
+        )
+        from docling_jobkit.orchestrators.local.orchestrator import (
+            LocalOrchestrator,
+            LocalOrchestratorConfig,
+        )
+
+        local_config = LocalOrchestratorConfig(
+            num_workers=docling_serve_settings.eng_loc_num_workers,
+            shared_models=docling_serve_settings.eng_loc_share_models,
+            scratch_dir=get_scratch(),
+        )
+
+        cm_config = DoclingConverterManagerConfig(
+            artifacts_path=docling_serve_settings.artifacts_path,
+            options_cache_size=docling_serve_settings.options_cache_size,
+            enable_remote_services=docling_serve_settings.enable_remote_services,
+            allow_external_plugins=docling_serve_settings.allow_external_plugins,
+            max_num_pages=docling_serve_settings.max_num_pages,
+            max_file_size=docling_serve_settings.max_file_size,
+        )
+        cm = DoclingConverterManager(config=cm_config)
+
+        return LocalOrchestrator(config=local_config, converter_manager=cm)
+    elif docling_serve_settings.eng_kind == AsyncEngine.RQ:
+        from docling_jobkit.orchestrators.rq.orchestrator import (
+            RQOrchestrator,
+            RQOrchestratorConfig,
+        )
+
+        rq_config = RQOrchestratorConfig(
+            redis_url=docling_serve_settings.eng_rq_redis_url,
+            results_prefix=docling_serve_settings.eng_rq_results_prefix,
+            sub_channel=docling_serve_settings.eng_rq_sub_channel,
+            scratch_dir=get_scratch(),
+        )
+
+        return RQOrchestrator(config=rq_config)
+    elif docling_serve_settings.eng_kind == AsyncEngine.KFP:
+        from docling_jobkit.orchestrators.kfp.orchestrator import (
+            KfpOrchestrator,
+            KfpOrchestratorConfig,
+        )
+
+        kfp_config = KfpOrchestratorConfig(
+            endpoint=docling_serve_settings.eng_kfp_endpoint,
+            token=docling_serve_settings.eng_kfp_token,
+            ca_cert_path=docling_serve_settings.eng_kfp_ca_cert_path,
+            self_callback_endpoint=docling_serve_settings.eng_kfp_self_callback_endpoint,
+            self_callback_token_path=docling_serve_settings.eng_kfp_self_callback_token_path,
+            self_callback_ca_cert_path=docling_serve_settings.eng_kfp_self_callback_ca_cert_path,
+        )
+
+        return KfpOrchestrator(config=kfp_config)
+
+    raise RuntimeError(f"Engine {docling_serve_settings.eng_kind} not recognized.")
--- a/docling_serve/response_preparation.py
+++ b/docling_serve/response_preparation.py
@@ -1,248 +1,69 @@
+import asyncio
 import logging
-import os
-import shutil
-import tempfile
-import time
-from pathlib import Path
-from typing import Dict, Iterable, List, Optional, Union

-from docling.datamodel.base_models import OutputFormat
-from docling.datamodel.document import ConversionResult, ConversionStatus, ErrorItem
-from docling.utils.profiling import ProfilingItem
-from docling_core.types.doc import DoclingDocument, ImageRefMode
-from fastapi import BackgroundTasks, HTTPException
-from fastapi.responses import FileResponse
-from pydantic import BaseModel
+from fastapi import BackgroundTasks, Response

-from docling_serve.docling_conversion import ConvertDocumentsOptions
+from docling_jobkit.datamodel.result import (
+    ConvertDocumentResult,
+    ExportResult,
+    RemoteTargetResult,
+    ZipArchiveResult,
+)
+from docling_jobkit.orchestrators.base_orchestrator import (
+    BaseOrchestrator,
+)
+
+from docling_serve.datamodel.responses import (
+    ConvertDocumentResponse,
+    PresignedUrlConvertDocumentResponse,
+)
+from docling_serve.settings import docling_serve_settings

 _log = logging.getLogger(__name__)


-class DocumentResponse(BaseModel):
-    filename: str
-    md_content: Optional[str] = None
-    json_content: Optional[DoclingDocument] = None
-    html_content: Optional[str] = None
-    text_content: Optional[str] = None
-    doctags_content: Optional[str] = None
-
-
-class ConvertDocumentResponse(BaseModel):
-    document: DocumentResponse
-    status: ConversionStatus
-    errors: List[ErrorItem] = []
-    processing_time: float
-    timings: Dict[str, ProfilingItem] = {}
-
-
-class ConvertDocumentErrorResponse(BaseModel):
-    status: ConversionStatus
-
-
-def _export_document_as_content(
-    conv_res: ConversionResult,
-    export_json: bool,
-    export_html: bool,
-    export_md: bool,
-    export_txt: bool,
-    export_doctags: bool,
-    image_mode: ImageRefMode,
-):
-
-    document = DocumentResponse(filename=conv_res.input.file.name)
-
-    if conv_res.status == ConversionStatus.SUCCESS:
-        new_doc = conv_res.document._make_copy_with_refmode(Path(), image_mode)
-
-        # Create the different formats
-        if export_json:
-            document.json_content = new_doc
-        if export_html:
-            document.html_content = new_doc.export_to_html(image_mode=image_mode)
-        if export_txt:
-            document.text_content = new_doc.export_to_markdown(
-                strict_text=True, image_mode=image_mode
-            )
-        if export_md:
-            document.md_content = new_doc.export_to_markdown(image_mode=image_mode)
-        if export_doctags:
-            document.doctags_content = new_doc.export_to_document_tokens()
-    elif conv_res.status == ConversionStatus.SKIPPED:
-        raise HTTPException(status_code=400, detail=conv_res.errors)
-    else:
-        raise HTTPException(status_code=500, detail=conv_res.errors)
-
-    return document
-
-
-def _export_documents_as_files(
-    conv_results: Iterable[ConversionResult],
-    output_dir: Path,
-    export_json: bool,
-    export_html: bool,
-    export_md: bool,
-    export_txt: bool,
-    export_doctags: bool,
-    image_export_mode: ImageRefMode,
-):
-
-    success_count = 0
-    failure_count = 0
-
-    for conv_res in conv_results:
-        if conv_res.status == ConversionStatus.SUCCESS:
-            success_count += 1
-            doc_filename = conv_res.input.file.stem
-
-            # Export JSON format:
-            if export_json:
-                fname = output_dir / f"{doc_filename}.json"
-                _log.info(f"writing JSON output to {fname}")
-                conv_res.document.save_as_json(
-                    filename=fname, image_mode=image_export_mode
-                )
-
-            # Export HTML format:
-            if export_html:
-                fname = output_dir / f"{doc_filename}.html"
-                _log.info(f"writing HTML output to {fname}")
-                conv_res.document.save_as_html(
-                    filename=fname, image_mode=image_export_mode
-                )
-
-            # Export Text format:
-            if export_txt:
-                fname = output_dir / f"{doc_filename}.txt"
-                _log.info(f"writing TXT output to {fname}")
-                conv_res.document.save_as_markdown(
-                    filename=fname,
-                    strict_text=True,
-                    image_mode=ImageRefMode.PLACEHOLDER,
-                )
-
-            # Export Markdown format:
-            if export_md:
-                fname = output_dir / f"{doc_filename}.md"
-                _log.info(f"writing Markdown output to {fname}")
-                conv_res.document.save_as_markdown(
-                    filename=fname, image_mode=image_export_mode
-                )
-
-            # Export Document Tags format:
-            if export_doctags:
-                fname = output_dir / f"{doc_filename}.doctags"
-                _log.info(f"writing Doc Tags output to {fname}")
-                conv_res.document.save_as_document_tokens(filename=fname)
-
-        else:
-            _log.warning(f"Document {conv_res.input.file} failed to convert.")
-            failure_count += 1
-
-    _log.info(
-        f"Processed {success_count + failure_count} docs, "
-        f"of which {failure_count} failed"
-    )
-
-
-def process_results(
+async def prepare_response(
+    task_id: str,
+    task_result: ConvertDocumentResult,
+    orchestrator: BaseOrchestrator,
    background_tasks: BackgroundTasks,
-    conversion_options: ConvertDocumentsOptions,
-    conv_results: Iterable[ConversionResult],
-) -> Union[ConvertDocumentResponse, FileResponse]:
-
-    # Let's start by processing the documents
-    try:
-        start_time = time.monotonic()
-
-        # Convert the iterator to a list to count the number of results and get timings
-        # As it's an iterator (lazy evaluation), it will also start the conversion
-        conv_results = list(conv_results)
-
-        processing_time = time.monotonic() - start_time
-
-        _log.info(
-            f"Processed {len(conv_results)} docs in {processing_time:.2f} seconds."
-        )
-
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))
-
-    if len(conv_results) == 0:
-        raise HTTPException(
-            status_code=500, detail="No documents were generated by Docling."
-        )
-
-    # We have some results, let's prepare the response
-    response: Union[FileResponse, ConvertDocumentResponse]
-
-    # Booleans to know what to export
-    export_json = OutputFormat.JSON in conversion_options.to_formats
-    export_html = OutputFormat.HTML in conversion_options.to_formats
-    export_md = OutputFormat.MARKDOWN in conversion_options.to_formats
-    export_txt = OutputFormat.TEXT in conversion_options.to_formats
-    export_doctags = OutputFormat.DOCTAGS in conversion_options.to_formats
-
-    # Only 1 document was processed, and we are not returning it as a file
-    if len(conv_results) == 1 and not conversion_options.return_as_file:
-        conv_res = conv_results[0]
-        document = _export_document_as_content(
-            conv_res,
-            export_json=export_json,
-            export_html=export_html,
-            export_md=export_md,
-            export_txt=export_txt,
-            export_doctags=export_doctags,
-            image_mode=conversion_options.image_export_mode,
-        )
-
+):
+    response: Response | ConvertDocumentResponse | PresignedUrlConvertDocumentResponse
+    if isinstance(task_result.result, ExportResult):
        response = ConvertDocumentResponse(
-            document=document,
-            status=conv_res.status,
-            processing_time=processing_time,
-            timings=conv_res.timings,
+            document=task_result.result.content,
+            status=task_result.result.status,
+            processing_time=task_result.processing_time,
+            timings=task_result.result.timings,
+            errors=task_result.result.errors,
+        )
+    elif isinstance(task_result.result, ZipArchiveResult):
+        response = Response(
+            content=task_result.result.content,
+            media_type="application/zip",
+            headers={
+                "Content-Disposition": 'attachment; filename="converted_docs.zip"'
+            },
+        )
+    elif isinstance(task_result.result, RemoteTargetResult):
+        response = PresignedUrlConvertDocumentResponse(
+            processing_time=task_result.processing_time,
+            num_converted=task_result.num_converted,
+            num_succeeded=task_result.num_succeeded,
+            num_failed=task_result.num_failed,
        )
-
-    # Multiple documents were processed, or we are forced returning as a file
    else:
-        # Temporary directory to store the outputs
-        work_dir = Path(tempfile.mkdtemp(prefix="docling_"))
-        output_dir = work_dir / "output"
-        output_dir.mkdir(parents=True, exist_ok=True)
+        raise ValueError("Unknown result type")

-        # Worker pid to use in archive identification as we may have multiple workers
-        os.getpid()
+    if docling_serve_settings.single_use_results:

-        # Export the documents
-        _export_documents_as_files(
-            conv_results=conv_results,
-            output_dir=output_dir,
-            export_json=export_json,
-            export_html=export_html,
-            export_md=export_md,
-            export_txt=export_txt,
-            export_doctags=export_doctags,
-            image_export_mode=conversion_options.image_export_mode,
-        )
+        async def _remove_task_impl():
+            await asyncio.sleep(docling_serve_settings.result_removal_delay)
+            await orchestrator.delete_task(task_id=task_id)

-        files = os.listdir(output_dir)
+        async def _remove_task():
+            asyncio.create_task(_remove_task_impl())  # noqa: RUF006

-        if len(files) == 0:
-            raise HTTPException(status_code=500, detail="No documents were exported.")
-
-        file_path = work_dir / "converted_docs.zip"
-        shutil.make_archive(
-            base_name=str(file_path.with_suffix("")),
-            format="zip",
-            root_dir=output_dir,
-        )
-
-        # Other cleanups after the response is sent
-        # Output directory
-        background_tasks.add_task(shutil.rmtree, work_dir, ignore_errors=True)
-
-        response = FileResponse(
-            file_path, filename=file_path.name, media_type="application/zip"
-        )
+        background_tasks.add_task(_remove_task)

    return response
--- a/docling_serve/settings.py
+++ b/docling_serve/settings.py
@@ -1,6 +1,105 @@
+import enum
+import sys
+from pathlib import Path
+from typing import Optional, Union
+
+from pydantic import AnyUrl, model_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing_extensions import Self


-class Settings(BaseSettings):
+class UvicornSettings(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_prefix="UVICORN_", env_file=".env", extra="allow"
+    )

-    model_config = SettingsConfigDict(env_prefix="DOCLING_")
+    host: str = "0.0.0.0"
+    port: int = 5001
+    reload: bool = False
+    root_path: str = ""
+    proxy_headers: bool = True
+    timeout_keep_alive: int = 60
+    ssl_certfile: Optional[Path] = None
+    ssl_keyfile: Optional[Path] = None
+    ssl_keyfile_password: Optional[str] = None
+    workers: Union[int, None] = None
+
+
+class AsyncEngine(str, enum.Enum):
+    LOCAL = "local"
+    KFP = "kfp"
+    RQ = "rq"
+
+
+class DoclingServeSettings(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_prefix="DOCLING_SERVE_",
+        env_file=".env",
+        env_parse_none_str="",
+        extra="allow",
+    )
+
+    enable_ui: bool = False
+    api_host: str = "localhost"
+    artifacts_path: Optional[Path] = None
+    static_path: Optional[Path] = None
+    scratch_path: Optional[Path] = None
+    single_use_results: bool = True
+    result_removal_delay: float = 300  # 5 minutes
+    load_models_at_boot: bool = True
+    options_cache_size: int = 2
+    enable_remote_services: bool = False
+    allow_external_plugins: bool = False
+
+    api_key: str = ""
+
+    max_document_timeout: float = 3_600 * 24 * 7  # 7 days
+    max_num_pages: int = sys.maxsize
+    max_file_size: int = sys.maxsize
+
+    max_sync_wait: int = 120  # 2 minutes
+
+    cors_origins: list[str] = ["*"]
+    cors_methods: list[str] = ["*"]
+    cors_headers: list[str] = ["*"]
+
+    eng_kind: AsyncEngine = AsyncEngine.LOCAL
+    # Local engine
+    eng_loc_num_workers: int = 2
+    eng_loc_share_models: bool = False
+    # RQ engine
+    eng_rq_redis_url: str = ""
+    eng_rq_results_prefix: str = "docling:results"
+    eng_rq_sub_channel: str = "docling:updates"
+    # KFP engine
+    eng_kfp_endpoint: Optional[AnyUrl] = None
+    eng_kfp_token: Optional[str] = None
+    eng_kfp_ca_cert_path: Optional[str] = None
+    eng_kfp_self_callback_endpoint: Optional[str] = None
+    eng_kfp_self_callback_token_path: Optional[Path] = None
+    eng_kfp_self_callback_ca_cert_path: Optional[Path] = None
+
+    eng_kfp_experimental: bool = False
+
+    @model_validator(mode="after")
+    def engine_settings(self) -> Self:
+        # Validate KFP engine settings
+        if self.eng_kind == AsyncEngine.KFP:
+            if self.eng_kfp_endpoint is None:
+                raise ValueError("KFP endpoint is required when using the KFP engine.")
+
+        if self.eng_kind == AsyncEngine.KFP:
+            if not self.eng_kfp_experimental:
+                raise ValueError(
+                    "KFP is not yet working. To enable the development version, you must set DOCLING_SERVE_ENG_KFP_EXPERIMENTAL=true."
+                )
+
+        if self.eng_kind == AsyncEngine.RQ:
+            if not self.eng_rq_redis_url:
+                raise ValueError("RQ Redis url is required when using the RQ engine.")
+
+        return self
+
+
+uvicorn_settings = UvicornSettings()
+docling_serve_settings = DoclingServeSettings()
--- a/docling_serve/storage.py
+++ b/docling_serve/storage.py
@@ -0,0 +1,16 @@
+import tempfile
+from functools import lru_cache
+from pathlib import Path
+
+from docling_serve.settings import docling_serve_settings
+
+
+@lru_cache
+def get_scratch() -> Path:
+    scratch_dir = (
+        docling_serve_settings.scratch_path
+        if docling_serve_settings.scratch_path is not None
+        else Path(tempfile.mkdtemp(prefix="docling_"))
+    )
+    scratch_dir.mkdir(exist_ok=True, parents=True)
+    return scratch_dir
--- a/docling_serve/websocket_notifier.py
+++ b/docling_serve/websocket_notifier.py
@@ -0,0 +1,54 @@
+from fastapi import WebSocket
+
+from docling_jobkit.datamodel.task_meta import TaskStatus
+from docling_jobkit.orchestrators.base_notifier import BaseNotifier
+from docling_jobkit.orchestrators.base_orchestrator import BaseOrchestrator
+
+from docling_serve.datamodel.responses import (
+    MessageKind,
+    TaskStatusResponse,
+    WebsocketMessage,
+)
+
+
+class WebsocketNotifier(BaseNotifier):
+    def __init__(self, orchestrator: BaseOrchestrator):
+        super().__init__(orchestrator)
+        self.task_subscribers: dict[str, set[WebSocket]] = {}
+
+    async def add_task(self, task_id: str):
+        self.task_subscribers[task_id] = set()
+
+    async def remove_task(self, task_id: str):
+        if task_id in self.task_subscribers:
+            for websocket in self.task_subscribers[task_id]:
+                await websocket.close()
+
+            del self.task_subscribers[task_id]
+
+    async def notify_task_subscribers(self, task_id: str):
+        if task_id not in self.task_subscribers:
+            raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
+
+        task = await self.orchestrator.get_raw_task(task_id=task_id)
+        task_queue_position = await self.orchestrator.get_queue_position(task_id)
+        msg = TaskStatusResponse(
+            task_id=task.task_id,
+            task_status=task.task_status,
+            task_position=task_queue_position,
+            task_meta=task.processing_meta,
+        )
+        for websocket in self.task_subscribers[task_id]:
+            await websocket.send_text(
+                WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
+            )
+            if task.is_completed():
+                await websocket.close()
+
+    async def notify_queue_positions(self):
+        for task_id in self.task_subscribers.keys():
+            # notify only pending tasks
+            if self.orchestrator.tasks[task_id].task_status != TaskStatus.PENDING:
+                continue
+
+            await self.notify_task_subscribers(task_id)
--- a/docs/README.md
+++ b/docs/README.md
@@ -0,0 +1,10 @@
+# Docling Serve documentation
+
+This documentation pages explore the webserver configurations, runtime options, deployment examples as well as development best practices.
+
+- [Configuration](./configuration.md)
+- [Handling models](./models.md)
+- [Usage](./usage.md)
+- [Deployment](./deployment.md)
+- [Development](./development.md)
+- [`v1` migration](./v1_migration.md)
--- a/docs/assets/docling-serve-pic.png
+++ b/docs/assets/docling-serve-pic.png
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -0,0 +1,100 @@
+# Configuration
+
+The `docling-serve` executable allows to configure the server via command line
+options as well as environment variables.
+Configurations are divided between the settings used for the `uvicorn` asgi
+server and the actual app-specific configurations.
+
+ > [!WARNING]
+> When the server is running with `reload` or with multiple `workers`, uvicorn
+> will spawn multiple subprocesses. This invalidates all the values configured
+> via the CLI command line options. Please use environment variables in this
+> type of deployments.
+
+## Webserver configuration
+
+The following table shows the options which are propagated directly to the
+`uvicorn` webserver runtime.
+
+| CLI option | ENV | Default | Description |
+| -----------|-----|---------|-------------|
+| `--host` | `UVICORN_HOST` | `0.0.0.0` for `run`, `localhost` for `dev` | THe host to serve on. |
+| `--port` | `UVICORN_PORT` | `5001` | The port to serve on. |
+| `--reload` | `UVICORN_RELOAD` | `false` for `run`, `true` for `dev` | Enable auto-reload of the server when (code) files change. |
+| `--workers` | `UVICORN_WORKERS` | `1` | Use multiple worker processes. |
+| `--root-path` | `UVICORN_ROOT_PATH` | `""` | The root path is used to tell your app that it is being served to the outside world with some |
+| `--proxy-headers` | `UVICORN_PROXY_HEADERS` | `true` | Enable/Disable X-Forwarded-Proto, X-Forwarded-For, X-Forwarded-Port to populate remote address info. |
+| `--timeout-keep-alive` | `UVICORN_TIMEOUT_KEEP_ALIVE` | `60` | Timeout for the server response. |
+| `--ssl-certfile` | `UVICORN_SSL_CERTFILE` |  | SSL certificate file. |
+| `--ssl-keyfile` | `UVICORN_SSL_KEYFILE` |  | SSL key file. |
+| `--ssl-keyfile-password` | `UVICORN_SSL_KEYFILE_PASSWORD` |  | SSL keyfile password. |
+
+## Docling Serve configuration
+
+THe following table describes the options to configure the Docling Serve app.
+
+| CLI option | ENV | Default | Description |
+| -----------|-----|---------|-------------|
+| `--artifacts-path` | `DOCLING_SERVE_ARTIFACTS_PATH` | unset | If set to a valid directory, the model weights will be loaded from this path |
+|  | `DOCLING_SERVE_STATIC_PATH` | unset | If set to a valid directory, the static assets for the docs and UI will be loaded from this path |
+|  | `DOCLING_SERVE_SCRATCH_PATH` |  | If set, this directory will be used as scratch workspace, e.g. storing the results before they get requested. If unset, a temporary created is created for this purpose. |
+| `--enable-ui` | `DOCLING_SERVE_ENABLE_UI` | `false` | Enable the demonstrator UI. |
+|  | `DOCLING_SERVE_ENABLE_REMOTE_SERVICES` | `false` | Allow pipeline components making remote connections. For example, this is needed when using a vision-language model via APIs. |
+|  | `DOCLING_SERVE_ALLOW_EXTERNAL_PLUGINS` | `false` | Allow the selection of third-party plugins. |
+|  | `DOCLING_SERVE_SINGLE_USE_RESULTS` | `true` | If true, results can be accessed only once. If false, the results accumulate in the scratch directory. |
+|  | `DOCLING_SERVE_RESULT_REMOVAL_DELAY` | `300` | When `DOCLING_SERVE_SINGLE_USE_RESULTS` is active, this is the delay before results are removed from the task registry. |
+|  | `DOCLING_SERVE_MAX_DOCUMENT_TIMEOUT` | `604800` (7 days) | The maximum time for processing a document. |
+|  | `DOCLING_SERVE_MAX_NUM_PAGES` |  | The maximum number of pages for a document to be processed. |
+|  | `DOCLING_SERVE_MAX_FILE_SIZE` |  | The maximum file size for a document to be processed. |
+|  | `DOCLING_SERVE_MAX_SYNC_WAIT` | `120` | Max number of seconds a synchronous endpoint is waiting for the task completion. |
+|  | `DOCLING_SERVE_LOAD_MODELS_AT_BOOT` | `True` | If enabled, the models for the default options will be loaded at boot. |
+|  | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. |
+|  | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
+|  | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
+|  | `DOCLING_SERVE_CORS_HEADERS` | `["*"]` | A list of HTTP request headers that should be supported for cross-origin requests. |
+|  | `DOCLING_SERVE_API_KEY` | | If specified, all the API requests must contain the header `X-Api-Key` with this value. |
+|  | `DOCLING_SERVE_ENG_KIND` | `local` | The compute engine to use for the async tasks. Possible values are `local`, `rq` and `kfp`. See below for more configurations of the engines. |
+
+### Compute engine
+
+Docling Serve can be deployed with several possible of compute engine.
+The selected compute engine will be running all the async jobs.
+
+#### Local engine
+
+The following table describes the options to configure the Docling Serve local engine.
+
+| ENV | Default | Description |
+|-----|---------|-------------|
+| `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
+| `DOCLING_SERVE_ENG_LOC_SHARE_MODELS` | False | If true, each process will share the same models among all thread workers. Otherwise, one instance of the models is allocated for each worker thread. |
+
+#### RQ engine
+
+The following table describes the options to configure the Docling Serve RQ engine.
+
+| ENV | Default | Description |
+|-----|---------|-------------|
+| `DOCLING_SERVE_ENG_RQ_REDIS_URL` | (required) | The connection Redis url, e.g. `redis://localhost:6373/` |
+| `DOCLING_SERVE_ENG_RQ_RESULTS_PREFIX` | `docling:results` | The prefix used for storing the results in Redis. |
+| `DOCLING_SERVE_ENG_RQ_RESULTS_PREFIX` | `docling:updates` | The channel key name used for storing communicating updates between the workers and the orchestrator. |
+
+#### KFP engine
+
+The following table describes the options to configure the Docling Serve KFP engine.
+
+| ENV | Default | Description |
+|-----|---------|-------------|
+| `DOCLING_SERVE_ENG_KFP_ENDPOINT` |  | Must be set to the Kubeflow Pipeline endpoint. When using the in-cluster deployment, make sure to use the cluster endpoint, e.g. `https://NAME.NAMESPACE.svc.cluster.local:8888`  |
+| `DOCLING_SERVE_ENG_KFP_TOKEN` |  | The authentication token for KFP. For in-cluster deployment, the app will load automatically the token of the ServiceAccount. |
+| `DOCLING_SERVE_ENG_KFP_CA_CERT_PATH` |  | Path to the CA certificates for the KFP endpoint. For in-cluster deployment, the app will load automatically the internal CA. |
+| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_ENDPOINT` |  | If set, it enables internal callbacks providing status update of the KFP job. Usually something like `https://NAME.NAMESPACE.svc.cluster.local:5001/v1/callback/task/progress`. |
+| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_TOKEN_PATH` |  | The token used for authenticating the progress callback. For cluster-internal workloads, use `/run/secrets/kubernetes.io/serviceaccount/token`. |
+| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_CA_CERT_PATH` |  | The CA certificate for the progress callback. For cluster-inetrnal workloads, use `/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt`. |
+
+#### Gradio UI
+
+When using Gradio UI and using the option to output conversion as file, Gradio uses cache to prevent files to be overwritten ([more info here](https://www.gradio.app/guides/file-access#the-gradio-cache)), and we defined the cache clean frequency of one hour to clean files older than 10hours. For situations that files need to be available to download from UI older than 10 hours, there is two options:
+
+- Increase the older age of files to clean [here](https://github.com/docling-project/docling-serve/blob/main/docling_serve/gradio_ui.py#L483) to suffice the age desired;
+- Or set the clean up manually by defining the temporary dir of Gradio to use the same as `DOCLING_SERVE_SCRATCH_PATH` absolute path. This can be achieved by setting the environment variable `GRADIO_TEMP_DIR`, that can be done via command line `export GRADIO_TEMP_DIR="<same_path_as_scratch>"` or in `Dockerfile` using `ENV GRADIO_TEMP_DIR="<same_path_as_scratch>"`. After this, set the clean of cache to `None` [here](https://github.com/docling-project/docling-serve/blob/main/docling_serve/gradio_ui.py#L483). Now, the clean up of `DOCLING_SERVE_SCRATCH_PATH` will also clean the Gradio temporary dir. (If you use this option, please remember when reversing changes to remove the environment variable `GRADIO_TEMP_DIR`, otherwise may lead to files not be available to download).
--- a/docs/deploy-examples/compose-amd.yaml
+++ b/docs/deploy-examples/compose-amd.yaml
@@ -0,0 +1,21 @@
+# AMD ROCm deployment
+
+services:
+  docling-serve:
+    image: ghcr.io/docling-project/docling-serve-rocm:main
+    container_name: docling-serve
+    ports:
+      - "5001:5001"
+    environment:
+      DOCLING_SERVE_ENABLE_UI: "true"
+      ROCR_VISIBLE_DEVICES: "0" # https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html#rocr-visible-devices
+      ## This section is for compatibility with older cards
+      # HSA_OVERRIDE_GFX_VERSION: "11.0.0"
+      # HSA_ENABLE_SDMA: "0"
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    group_add:
+      - 44    # video group GID from host
+      - 992   # render group GID from host
+    restart: always
--- a/docs/deploy-examples/compose-nvidia.yaml
+++ b/docs/deploy-examples/compose-nvidia.yaml
@@ -0,0 +1,20 @@
+# NVIDIA CUDA deployment
+
+services:
+  docling-serve:
+    image: ghcr.io/docling-project/docling-serve-cu126:main
+    container_name: docling-serve
+    ports:
+      - "5001:5001"
+    environment:
+      DOCLING_SERVE_ENABLE_UI: "true"
+      NVIDIA_VISIBLE_DEVICES: "all" # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html
+    # deploy:  # This section is for compatibility with Swarm
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: all
+    #           capabilities: [gpu]
+    runtime: nvidia
+    restart: always
--- a/docs/deploy-examples/docling-model-cache-deployment.yaml
+++ b/docs/deploy-examples/docling-model-cache-deployment.yaml
@@ -0,0 +1,47 @@
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: docling-serve
+      component: docling-serve-api
+  template:
+    metadata:
+      labels:
+        app: docling-serve
+        component: docling-serve-api
+    spec:
+      restartPolicy: Always
+      containers:
+        - name: api
+          resources:
+            limits:
+              cpu: 2
+              memory: 4Gi
+            requests:
+              cpu: 250m
+              memory: 1Gi
+          env:
+            - name: DOCLING_SERVE_ENABLE_UI
+              value: 'true'
+            - name: DOCLING_SERVE_ARTIFACTS_PATH
+              value: '/modelcache'
+          ports:
+            - name: http
+              containerPort: 5001
+              protocol: TCP
+          imagePullPolicy: Always
+          image: 'ghcr.io/docling-project/docling-serve-cpu'
+          volumeMounts:
+            - name: docling-model-cache
+              mountPath: /modelcache
+      volumes:
+        - name: docling-model-cache
+          persistentVolumeClaim:
+            claimName: docling-model-cache-pvc
--- a/docs/deploy-examples/docling-model-cache-job.yaml
+++ b/docs/deploy-examples/docling-model-cache-job.yaml
@@ -0,0 +1,33 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: docling-model-cache-load
+spec:
+  selector: {}
+  template:
+    metadata:
+      name: docling-model-load
+    spec:
+      containers:
+        - name: loader
+          image: ghcr.io/docling-project/docling-serve-cpu:main
+          command:
+            - docling-tools
+            - models
+            - download
+            - '--output-dir=/modelcache'
+            - 'layout'
+            - 'tableformer'
+            - 'code_formula'
+            - 'picture_classifier'
+            - 'smolvlm'
+            - 'granite_vision'
+            - 'easyocr'
+          volumeMounts:
+            - name: docling-model-cache
+              mountPath: /modelcache
+      volumes:
+        - name: docling-model-cache
+          persistentVolumeClaim:
+            claimName: docling-model-cache-pvc
+      restartPolicy: Never
--- a/docs/deploy-examples/docling-model-cache-pvc.yaml
+++ b/docs/deploy-examples/docling-model-cache-pvc.yaml
@@ -0,0 +1,11 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: docling-model-cache-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  volumeMode: Filesystem
+  resources:
+    requests:
+      storage: 10Gi
--- a/docs/deploy-examples/docling-serve-oauth.yaml
+++ b/docs/deploy-examples/docling-serve-oauth.yaml
@@ -0,0 +1,192 @@
+# This example deployment configures Docling Serve with a OAuth-Proxy sidecar and TLS termination
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+  annotations:
+    serviceaccounts.openshift.io/oauth-redirectreference.primary: '{"kind":"OAuthRedirectReference","apiVersion":"v1","reference":{"kind":"Route","name":"docling-serve"}}'
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: docling-serve-oauth
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: system:auth-delegator
+subjects:
+- kind: ServiceAccount
+  name: docling-serve
+  namespace: docling
+---
+apiVersion: route.openshift.io/v1
+kind: Route
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  to:
+    kind: Service
+    name: docling-serve
+  port:
+    targetPort: oauth
+  tls:
+    termination: Reencrypt
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+  annotations:
+    service.alpha.openshift.io/serving-cert-secret-name: docling-serve-tls
+spec:
+  ports:
+  - name: oauth
+    port: 8443
+    targetPort: oauth
+  - name: http
+    port: 5001
+    targetPort: http
+  selector:
+    app: docling-serve
+    component: docling-serve-api
+---
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: docling-serve
+      component: docling-serve-api
+  template:
+    metadata:
+      labels:
+        app: docling-serve
+        component: docling-serve-api
+    spec:
+      restartPolicy: Always
+      serviceAccountName: docling-serve
+      containers:
+        - name: api
+          resources:
+            limits:
+              cpu: 2000m
+              memory: 4Gi
+            requests:
+              cpu: 800m
+              memory: 1Gi
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: http
+              scheme: HTTPS
+            initialDelaySeconds: 10
+            timeoutSeconds: 2
+            periodSeconds: 5
+            successThreshold: 1
+            failureThreshold: 3
+          livenessProbe:
+            httpGet:
+              path: /health
+              port: http
+              scheme: HTTPS
+            initialDelaySeconds: 3
+            timeoutSeconds: 4
+            periodSeconds: 10
+            successThreshold: 1
+            failureThreshold: 5
+          env:
+            - name: NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+            - name: DOCLING_SERVE_ENABLE_UI
+              value: 'true'
+            - name: DOCLING_SERVE_API_HOST
+              value: 'docling-serve.$(NAMESPACE).svc.cluster.local'
+            - name: UVICORN_SSL_CERTFILE
+              value: '/etc/tls/private/tls.crt'
+            - name: UVICORN_SSL_KEYFILE
+              value: '/etc/tls/private/tls.key'
+          ports:
+            - name: http
+              containerPort: 5001
+              protocol: TCP
+          volumeMounts:
+            - name: proxy-tls
+              mountPath: /etc/tls/private
+          imagePullPolicy: Always
+          image: 'ghcr.io/docling-project/docling-serve-cpu:fix-ui-with-https'
+        - name: oauth-proxy
+          resources:
+            limits:
+              cpu: 100m
+              memory: 256Mi
+            requests:
+              cpu: 100m
+              memory: 256Mi
+          readinessProbe:
+            httpGet:
+              path: /oauth/healthz
+              port: oauth
+              scheme: HTTPS
+            initialDelaySeconds: 5
+            timeoutSeconds: 1
+            periodSeconds: 5
+            successThreshold: 1
+            failureThreshold: 3
+          livenessProbe:
+            httpGet:
+              path: /oauth/healthz
+              port: oauth
+              scheme: HTTPS
+            initialDelaySeconds: 30
+            timeoutSeconds: 1
+            periodSeconds: 5
+            successThreshold: 1
+            failureThreshold: 3
+          ports:
+            - name: oauth
+              containerPort: 8443
+              protocol: TCP
+          imagePullPolicy: IfNotPresent
+          volumeMounts:
+            - name: proxy-tls
+              mountPath: /etc/tls/private
+          env:
+            - name: NAMESPACE
+              valueFrom:
+                fieldRef:
+                  fieldPath: metadata.namespace
+          image: 'registry.redhat.io/openshift4/ose-oauth-proxy:v4.13'
+          args:
+            - '--https-address=:8443'
+            - '--provider=openshift'
+            - '--openshift-service-account=docling-serve'
+            - '--upstream=https://docling-serve.$(NAMESPACE).svc.cluster.local:5001'
+            - '--upstream-ca=/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt'
+            - '--tls-cert=/etc/tls/private/tls.crt'
+            - '--tls-key=/etc/tls/private/tls.key'
+            - '--cookie-secret=SECRET'
+            - '--openshift-delegate-urls={"/": {"group":"route.openshift.io","resource":"routes","verb":"get","name":"docling-serve","namespace":"$(NAMESPACE)"}}'
+            - '--openshift-sar={"namespace":"$(NAMESPACE)","resource":"routes","resourceName":"docling-serve","verb":"get","resourceAPIGroup":"route.openshift.io"}'
+            - '--skip-auth-regex=''(^/health|^/docs)'''
+      volumes:
+        - name: proxy-tls
+          secret:
+            secretName: docling-serve-tls
+            defaultMode: 420
--- a/docs/deploy-examples/docling-serve-replicas-w-sticky-sessions.yaml
+++ b/docs/deploy-examples/docling-serve-replicas-w-sticky-sessions.yaml
@@ -0,0 +1,76 @@
+# This example deployment configures Docling Serve with a Route + Sticky sessions, a Service and cpu image
+---
+kind: Route
+apiVersion: route.openshift.io/v1
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+  annotations:
+    haproxy.router.openshift.io/disable_cookies: "false" # this annotation enables the sticky sessions
+spec:
+  path: /
+  to:
+    kind: Service
+    name: docling-serve
+  port:
+    targetPort: http
+  tls:
+    termination: edge
+    insecureEdgeTerminationPolicy: Redirect
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  ports:
+  - name: http
+    port: 5001
+    targetPort: http
+  selector:
+    app: docling-serve
+    component: docling-serve-api
+---
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: docling-serve
+      component: docling-serve-api
+  template:
+    metadata:
+      labels:
+        app: docling-serve
+        component: docling-serve-api
+    spec:
+      restartPolicy: Always
+      containers:
+        - name: api
+          resources:
+            limits:
+              cpu: 1
+              memory: 4Gi
+            requests:
+              cpu: 250m
+              memory: 1Gi
+          env:
+            - name: DOCLING_SERVE_ENABLE_UI
+              value: 'true'
+          ports:
+            - name: http
+              containerPort: 5001
+              protocol: TCP
+          imagePullPolicy: Always
+          image: 'ghcr.io/docling-project/docling-serve'
--- a/docs/deploy-examples/docling-serve-rq-workers.yaml
+++ b/docs/deploy-examples/docling-serve-rq-workers.yaml
@@ -0,0 +1,192 @@
+# This example deployment configures Docling Serve with a Service and RQ workers
+
+# Create following secret
+# kubectl create secret generic docling-serve-rq-secrets --from-literal=REDIS_PASSWORD=myredispassword --from-literal=RQ_REDIS_URL=redis://:myredispassword@docling-serve-redis-service:6373/
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  ports:
+  - name: http
+    port: 5001
+    targetPort: http
+  selector:
+    app: docling-serve
+    component: docling-serve-api
+---
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: docling-serve
+      component: docling-serve-api
+  template:
+    metadata:
+      labels:
+        app: docling-serve
+        component: docling-serve-api
+    spec:
+      restartPolicy: Always
+      containers:
+        - name: api
+          resources:
+            limits:
+              cpu: 1
+              memory: 8Gi
+            requests:
+              cpu: 250m
+              memory: 1Gi
+          env:
+            - name: DOCLING_SERVE_ENABLE_UI
+              value: 'true'
+            - name: DOCLING_SERVE_ENG_KIND
+              value: 'rq'
+            - name: DOCLING_SERVE_ENG_RQ_REDIS_URL
+              valueFrom:
+                secretKeyRef:
+                  name: docling-serve-rq-secrets
+                  key: RQ_REDIS_URL
+          ports:
+            - name: http
+              containerPort: 5001
+              protocol: TCP
+          imagePullPolicy: Always
+          image: 'ghcr.io/docling-project/docling-serve-cpu'
+---
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: docling-serve-rq-workers
+  labels:
+    app: docling-serve-rq-workers
+    component: docling-serve-rq-worker
+spec:
+  replicas: 2
+  selector:
+    matchLabels:
+      app: docling-serve-rq-workers
+      component: docling-serve-rq-worker
+  template:
+    metadata:
+      labels:
+        app: docling-serve-rq-workers
+        component: docling-serve-rq-worker
+    spec:
+      restartPolicy: Always
+      containers:
+        - name: worker
+          resources:
+            limits:
+              cpu: 1
+              memory: 4Gi
+            requests:
+              cpu: 250m
+              memory: 1Gi
+          env:
+            - name: DOCLING_SERVE_ENG_KIND
+              value: 'rq'
+            - name: DOCLING_SERVE_ENG_RQ_REDIS_URL
+              valueFrom:
+                secretKeyRef:
+                  name: docling-serve-rq-secrets
+                  key: RQ_REDIS_URL
+          ports:
+            - name: http
+              containerPort: 5001
+              protocol: TCP
+          imagePullPolicy: Always
+          image: 'ghcr.io/docling-project/docling-serve-cpu'
+          command: ["docling-serve"]
+          args: ["rq-worker"]
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: docling-serve-redis
+  labels:
+    app: docling-serve-redis
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: docling-serve-redis
+  template:
+    metadata:
+      labels:
+        app: docling-serve-redis
+    spec:
+      restartPolicy: Always
+      terminationGracePeriodSeconds: 30
+      containers:
+        - name: redis
+          resources:
+            limits:
+              cpu: 1
+              memory: 1Gi
+            requests:
+              cpu: 250m
+              memory: 100Mi
+          image: redis:latest
+          command: ["redis-server"]
+          args:
+            - "--port"
+            - "6373"
+            - "--dir"
+            - "/mnt/redis/data"
+            - "--appendonly"
+            - "yes"
+            - "--requirepass"
+            - "$(REDIS_PASSWORD)"
+          ports:
+            - containerPort: 6373
+          env:
+            - name: REDIS_PASSWORD
+              valueFrom:
+                secretKeyRef:
+                  name: docling-serve-rq-secrets
+                  key: REDIS_PASSWORD
+          volumeMounts:
+            - name: redis-data
+              mountPath: /mnt/redis/data
+          securityContext:
+            fsGroup: 1004
+            runAsNonRoot: true
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop:
+                - ALL
+            seccompProfile:
+              type: RuntimeDefault
+      volumes:
+        - name: redis-data
+          emptyDir:
+            medium: Memory
+            sizeLimit: 2Gi
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: docling-serve-redis-service
+  labels:
+      app: docling-serve-redis
+spec:
+  type: NodePort
+  ports:
+    - name: redis-service
+      protocol: TCP
+      port: 6373
+      targetPort: 6373
+  selector:
+    app: docling-serve-redis
--- a/docs/deploy-examples/docling-serve-simple.yaml
+++ b/docs/deploy-examples/docling-serve-simple.yaml
@@ -0,0 +1,58 @@
+# This example deployment configures Docling Serve with a Service and cuda image
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  ports:
+  - name: http
+    port: 5001
+    targetPort: http
+  selector:
+    app: docling-serve
+    component: docling-serve-api
+---
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: docling-serve
+      component: docling-serve-api
+  template:
+    metadata:
+      labels:
+        app: docling-serve
+        component: docling-serve-api
+    spec:
+      restartPolicy: Always
+      containers:
+        - name: api
+          resources:
+            limits:
+              cpu: 1
+              memory: 4Gi
+              nvidia.com/gpu: 1  # Limit to one GPU
+            requests:
+              cpu: 250m
+              memory: 1Gi
+              nvidia.com/gpu: 1  # Limit to one GPU
+          env:
+            - name: DOCLING_SERVE_ENABLE_UI
+              value: 'true'
+          ports:
+            - name: http
+              containerPort: 5001
+              protocol: TCP
+          imagePullPolicy: Always
+          image: 'ghcr.io/docling-project/docling-serve-cu124'
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -0,0 +1,330 @@
+# Deployment Examples
+
+This document provides deployment examples for running the application in different environments.
+
+Choose the deployment option that best fits your setup.
+
+- **[Local GPU NVIDIA](#local-gpu-nvidia)**: For deploying the application locally on a machine with a supported NVIDIA GPU (using Docker Compose).
+- **[Local GPU AMD](#local-gpu-amd)**: For deploying the application locally on a machine with a supported AMD GPU (using Docker Compose).
+- **[OpenShift](#openshift)**: For deploying the application on an OpenShift cluster, designed for cloud-native environments.
+
+---
+
+## Local GPU NVIDIA
+
+### Docker compose
+
+Manifest example: [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml)
+
+This deployment has the following features:
+
+- NVIDIA cuda enabled
+
+Install the app with:
+
+```sh
+docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
+```
+
+For using the API:
+
+```sh
+# Make a test query
+curl -X 'POST' \
+  "localhost:5001/v1/convert/source/async" \
+  -H "accept: application/json" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
+  }'
+```
+
+<details>
+<summary><b>Requirements</b></summary>
+
+- debian/ubuntu/rhel/fedora/opensuse
+- docker
+- nvidia drivers >=550.54.14
+- nvidia-container-toolkit
+
+Docs:
+
+- [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/supported-platforms.html)
+- [CUDA Toolkit Release Notes](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#id6)
+
+</details>
+
+<details>
+<summary><b>Steps</b></summary>
+
+1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml) file or use `count: all`)
+
+    ```sh
+    nvidia-smi
+    ```
+
+2. Check if the NVIDIA Container Toolkit is installed/updated
+
+    ```sh
+    # debian
+    dpkg -l | grep nvidia-container-toolkit
+    ```
+
+    ```sh
+    # rhel
+    rpm -q nvidia-container-toolkit
+    ```
+
+    NVIDIA Container Toolkit install steps can be found here:
+
+    <https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html>
+
+3. Check which runtime is being used by Docker
+
+    ```sh
+    # docker
+    docker info | grep -i runtime
+    ```
+
+4. If the default Docker runtime changes back from 'nvidia' to 'default' after restarting the Docker service (optional):
+
+    Backup the daemon.json file:
+
+    ```sh
+    sudo cp /etc/docker/daemon.json /etc/docker/daemon.json.bak
+    ```
+
+    Update the daemon.json file:
+
+    ```sh
+    echo '{
+      "runtimes": {
+        "nvidia": {
+          "path": "nvidia-container-runtime"
+        }
+      },
+      "default-runtime": "nvidia"
+    }' | sudo tee /etc/docker/daemon.json > /dev/null
+    ```
+
+    Restart the Docker service:
+
+    ```sh
+    sudo systemctl restart docker
+    ```
+
+    Confirm 'nvidia' is the default runtime used by Docker by repeating step 3.
+
+5. Run the container:
+
+    ```sh
+    docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
+    ```
+
+</details>
+
+## Local GPU AMD
+
+### Docker compose
+
+Manifest example: [compose-amd.yaml](./deploy-examples/compose-amd.yaml)
+
+This deployment has the following features:
+
+- AMD rocm enabled
+
+Install the app with:
+
+```sh
+docker compose -f docs/deploy-examples/compose-amd.yaml up -d
+```
+
+For using the API:
+
+```sh
+# Make a test query
+curl -X 'POST' \
+  "localhost:5001/v1/convert/source/async" \
+  -H "accept: application/json" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
+  }'
+```
+
+<details>
+<summary><b>Requirements</b></summary>
+
+- debian/ubuntu/rhel/fedora/opensuse
+- docker
+- AMDGPU driver >=6.3
+- AMD ROCm >=6.3
+
+Docs:
+
+- [AMD ROCm installation](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/quick-start.html)
+
+</details>
+
+<details>
+<summary><b>Steps</b></summary>
+
+1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
+
+    ```sh
+    rocm-smi --showdriverversion
+    rocminfo | grep -i "ROCm version"
+    ```
+
+2. Find both video group GID and render group GID from host (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
+
+    ```sh
+    getent group video
+    getent group render
+    ```
+
+3. Build the image locally (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
+
+    ```sh
+    make docling-serve-rocm-image
+    ```
+
+</details>
+
+## OpenShift
+
+### Simple deployment
+
+Manifest example: [docling-serve-simple.yaml](./deploy-examples/docling-serve-simple.yaml)
+
+This deployment example has the following features:
+
+- Deployment configuration
+- Service configuration
+- NVIDIA cuda enabled
+
+Install the app with:
+
+```sh
+oc apply -f docs/deploy-examples/docling-serve-simple.yaml
+```
+
+For using the API:
+
+```sh
+# Port-forward the service
+oc port-forward svc/docling-serve 5001:5001
+
+# Make a test query
+curl -X 'POST' \
+  "localhost:5001/v1/convert/source/async" \
+  -H "accept: application/json" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
+  }'
+```
+
+### Multiple workers with RQ
+
+Manifest example: [`docling-serve-rq-workers.yaml`](./deploy-examples/docling-serve-rq-workers.yaml)
+
+This deployment example has the following features:
+
+- Deployment configuration
+- Service configuration
+- Redis deployment
+- Multiple (2 by default) worker Pods
+
+Install the app with:
+
+- create k8s secret:
+
+```sh
+kubectl create secret generic docling-serve-rq-secrets --from-literal=REDIS_PASSWORD=myredispassword --from-literal=RQ_REDIS_URL=redis://:myredispassword@docling-serve-redis-service:6373/
+```
+
+- apply deployment manifest:
+
+```sh
+oc apply -f docs/deploy-examples/docling-serve-rq-workers.yaml
+```
+
+### Secure deployment with `oauth-proxy`
+
+Manifest example: [docling-serve-oauth.yaml](./deploy-examples/docling-serve-oauth.yaml)
+
+This deployment has the following features:
+
+- TLS encryption between all components (using the cluster-internal CA authority).
+- Authentication via a secure `oauth-proxy` sidecar.
+- Expose the service using a secure OpenShift `Route`
+
+Install the app with:
+
+```sh
+oc apply -f docs/deploy-examples/docling-serve-oauth.yaml
+```
+
+For using the API:
+
+```sh
+# Retrieve the endpoint
+DOCLING_NAME=docling-serve
+DOCLING_ROUTE="https://$(oc get routes ${DOCLING_NAME} --template={{.spec.host}})"
+
+# Retrieve the authentication token
+OCP_AUTH_TOKEN=$(oc whoami --show-token)
+
+# Make a test query
+curl -X 'POST' \
+  "${DOCLING_ROUTE}/v1/convert/source/async" \
+  -H "Authorization: Bearer ${OCP_AUTH_TOKEN}" \
+  -H "accept: application/json" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
+  }'
+```
+
+### ReplicaSets with `sticky sessions`
+
+Manifest example: [docling-serve-replicas-w-sticky-sessions.yaml](./deploy-examples/docling-serve-replicas-w-sticky-sessions.yaml)
+
+This deployment has the following features:
+
+- Deployment configuration with 3 replicas
+- Service configuration
+- Expose the service using a OpenShift `Route` and enables sticky sessions
+
+Install the app with:
+
+```sh
+oc apply -f docs/deploy-examples/docling-serve-replicas-w-sticky-sessions.yaml
+```
+
+For using the API:
+
+```sh
+# Retrieve the endpoint
+DOCLING_NAME=docling-serve
+DOCLING_ROUTE="https://$(oc get routes $DOCLING_NAME --template={{.spec.host}})"
+
+# Make a test query, store the cookie and taskid
+task_id=$(curl -s -X 'POST' \
+    "${DOCLING_ROUTE}/v1/convert/source/async" \
+    -H "accept: application/json" \
+    -H "Content-Type: application/json" \
+    -d '{
+      "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
+    }' \
+    -c cookies.txt | grep -oP '"task_id":"\K[^"]+')
+```
+
+```sh
+# Grab the taskid and cookie to check the task status
+curl -v -X 'GET' \
+  "${DOCLING_ROUTE}/v1/status/poll/$task_id?wait=0" \
+  -H "accept: application/json" \
+  -b "cookies.txt"
+```
--- a/docs/development.md
+++ b/docs/development.md
@@ -0,0 +1,57 @@
+# Development
+
+## Install dependencies
+
+### CPU only
+
+```sh
+# Install uv if not already available
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# Install dependencies
+uv sync --extra cpu
+```
+
+### Cuda GPU
+
+For GPU support use the following command:
+
+```sh
+# Install dependencies
+uv sync
+```
+
+### Gradio UI and different OCR backends
+
+`/ui` endpoint using `gradio` and different OCR backends can be enabled via package extras:
+
+```sh
+# Enable ui and rapidocr
+uv sync --extra ui --extra rapidocr
+```
+
+```sh
+# Enable tesserocr
+uv sync --extra tesserocr
+```
+
+See `[project.optional-dependencies]` section in `pyproject.toml` for full list of options and runtime options with `uv run docling-serve --help`.
+
+### Run the server
+
+The `docling-serve` executable is a convenient script for launching the webserver both in
+development and production mode.
+
+```sh
+# Run the server in development mode
+# - reload is enabled by default
+# - listening on the 127.0.0.1 address
+# - ui is enabled by default
+docling-serve dev
+
+# Run the server in production mode
+# - reload is disabled by default
+# - listening on the 0.0.0.0 address
+# - ui is disabled by default
+docling-serve run
+```
--- a/docs/models.md
+++ b/docs/models.md
@@ -0,0 +1,175 @@
+# Handling Models in Docling Serve
+
+When enabling steps in Docling Serve that require extra models (such as picture classification, picture description, table detection, code recognition, formula extraction, or vision-language modules), you must ensure those models are available in the runtime environment. The standard container image includes only the default models. Any additional models must be downloaded and made available before use. If required models are missing, Docling Serve will raise runtime errors rather than downloading them automatically. This default choice wants to guarantee the system is not calling external services.
+
+## Model Storage Location
+
+Docling Serve loads models from the directory specified by the `DOCLING_SERVE_ARTIFACTS_PATH` environment variable. This path must be consistent across model download and runtime. When running with multiple workers or reload enabled, you must use the environment variable rather than the CLI argument for configuration [[source]](./configuration.md).
+
+## Approaches for Making Extra Models Available
+
+There are several ways to ensure required models are present:
+
+### 1. Disable Local Models (Trigger Auto-Download)
+
+You can configure the container to download all models at startup by clearing the artifacts path:
+
+```sh
+podman run -d -p 5001:5001 --name docling-serve \
+  -e DOCLING_SERVE_ARTIFACTS_PATH="" \
+  -e DOCLING_SERVE_ENABLE_UI=true \
+  quay.io/docling-project/docling-serve
+```
+
+This approach is simple for local development but not recommended for production, as it increases startup time and depends on network availability.
+
+### 2. Build a Custom Image with Pre-Downloaded Models
+
+You can create a new image that includes the required models:
+
+```Dockerfile
+FROM quay.io/docling-project/docling-serve
+RUN docling-tools models download smolvlm
+```
+
+This method is suitable for production, as it ensures all models are present in the image and avoids runtime downloads.
+
+### 3. Update the Entrypoint to Download Models Before Startup
+
+You can override the entrypoint to download models before starting the service:
+
+```sh
+podman run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=true \
+  quay.io/docling-project/docling-serve \
+  -- sh -c 'exec docling-tools models download smolvlm && exec docling-serve run'
+```
+
+This is useful for environments where you want to keep the base image unchanged but still automate model preparation.
+
+### 4. Mount a Volume with Pre-Downloaded Models
+
+Download models locally and mount them into the container:
+
+```sh
+# Download the models locally
+docling-tools models download --all -o models
+
+# Start the container with the local models folder
+podman run -p 5001:5001 \
+  -v $(pwd)/models:/opt/app-root/src/models \
+  -e DOCLING_SERVE_ARTIFACTS_PATH="/opt/app-root/src/models" \
+  -e DOCLING_SERVE_ENABLE_UI=true \
+  quay.io/docling-project/docling-serve
+```
+
+This approach is robust for both local and production deployments, especially when using persistent storage.
+
+## Kubernetes/Cluster Deployments
+
+For Kubernetes or OpenShift clusters, the recommended approach is to use a PersistentVolumeClaim (PVC) for model storage, a Kubernetes Job to download models, and mount the volume into the deployment. This ensures models persist across pod restarts and scale-out scenarios.
+
+### Example: PersistentVolumeClaim
+
+```yaml
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: docling-model-cache-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  volumeMode: Filesystem
+  resources:
+    requests:
+      storage: 10Gi
+```
+
+If you don't want to use default storage class, set your custom storage class with following:
+
+```yaml
+spec:
+    ...
+    storageClassName: <Storage Class Name>
+```
+
+Manifest example: [docling-model-cache-pvc.yaml](./deploy-examples/docling-model-cache-pvc.yaml)
+
+### Example: Model Download Job
+
+```yaml
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: docling-model-cache-load
+spec:
+  template:
+    spec:
+      containers:
+        - name: loader
+          image: ghcr.io/docling-project/docling-serve-cpu:main
+          command:
+            - docling-tools
+            - models
+            - download
+            - '--output-dir=/modelcache'
+            - 'layout'
+            - 'tableformer'
+            - 'code_formula'
+            - 'picture_classifier'
+            - 'smolvlm'
+            - 'granite_vision'
+            - 'easyocr'
+          volumeMounts:
+            - name: docling-model-cache
+              mountPath: /modelcache
+      volumes:
+        - name: docling-model-cache
+          persistentVolumeClaim:
+            claimName: docling-model-cache-pvc
+      restartPolicy: Never
+```
+
+The job will mount the previously created persistent volume and execute command similar to how we would load models locally:
+`docling-tools models download --output-dir <MOUNT-PATH> [LIST_OF_MODELS]`
+
+In manifest, we specify desired models individually, or we can use `--all` parameter to download all models.
+
+Manifest example: [docling-model-cache-job.yaml](./deploy-examples/docling-model-cache-job.yaml)
+
+### Example: Deployment with Mounted Volume
+
+```yaml
+spec:
+  template:
+    spec:
+      containers:
+        - name: api
+          env:
+            - name: DOCLING_SERVE_ARTIFACTS_PATH
+              value: '/modelcache'
+          volumeMounts:
+            - name: docling-model-cache
+              mountPath: /modelcache
+      volumes:
+        - name: docling-model-cache
+          persistentVolumeClaim:
+            claimName: docling-model-cache-pvc
+```
+
+The value of `DOCLING_SERVE_ARTIFACTS_PATH` must match the mount path where models are stored.
+
+Now, when docling-serve is executing tasks, the underlying docling installation will load model weights from mounted volume.
+
+Manifest example: [docling-model-cache-deployment.yaml](./deploy-examples/docling-model-cache-deployment.yaml)
+
+## Local Docker Execution
+
+For local Docker or Podman execution, you can use any of the approaches above. Mounting a local directory with pre-downloaded models is the most reliable for repeated runs and avoids network dependencies.
+
+## Troubleshooting and Best Practices
+
+- If a required model is missing from the artifacts path, Docling Serve will raise a runtime error.
+- Always ensure the value of `DOCLING_SERVE_ARTIFACTS_PATH` matches the directory where models are stored and mounted.
+- For production and cluster environments, prefer persistent storage and pre-loading models via a dedicated job.
+
+For more details and YAML manifest examples, see the [deployment documentation](./deployment.md).
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -0,0 +1,448 @@
+# Usage
+
+The API provides two endpoints: one for urls, one for files. This is necessary to send files directly in binary format instead of base64-encoded strings.
+
+## Common parameters
+
+On top of the source of file (see below), both endpoints support the same parameters, which are almost the same as the Docling CLI.
+
+- `from_formats` (List[str]): Input format(s) to convert from. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`. Defaults to all formats.
+- `to_formats` (List[str]): Output format(s) to convert to. Allowed values: `md`, `json`, `html`, `text`, `doctags`. Defaults to `md`.
+- `pipeline` (str). The choice of which pipeline to use. Allowed values are `standard` and `vlm`. Defaults to `standard`.
+- `page_range` (tuple). If specified, only convert a range of pages. The page number starts at 1.
+- `do_ocr` (bool): If enabled, the bitmap content will be processed using OCR. Defaults to `True`.
+- `image_export_mode`: Image export mode for the document (only in case of JSON, Markdown or HTML). Allowed values: embedded, placeholder, referenced. Optional, defaults to `embedded`.
+- `force_ocr` (bool): If enabled, replace any existing text with OCR-generated text over the full content. Defaults to `False`.
+- `ocr_engine` (str): OCR engine to use. Allowed values: `easyocr`, `tesserocr`, `tesseract`, `rapidocr`, `ocrmac`. Defaults to `easyocr`. To use the `tesserocr` engine, `tesserocr` must be installed where docling-serve is running: `pip install tesserocr`
+- `ocr_lang` (List[str]): List of languages used by the OCR engine. Note that each OCR engine has different values for the language names. Defaults to empty.
+- `pdf_backend` (str): PDF backend to use. Allowed values: `pypdfium2`, `dlparse_v1`, `dlparse_v2`, `dlparse_v4`. Defaults to `dlparse_v4`.
+- `table_mode` (str): Table mode to use. Allowed values: `fast`, `accurate`. Defaults to `fast`.
+- `abort_on_error` (bool): If enabled, abort on error. Defaults to false.
+- `md_page_break_placeholder` (str): Add this placeholder between pages in the markdown output.
+- `do_table_structure` (bool): If enabled, the table structure will be extracted. Defaults to true.
+- `do_code_enrichment` (bool): If enabled, perform OCR code enrichment. Defaults to false.
+- `do_formula_enrichment` (bool): If enabled, perform formula OCR, return LaTeX code. Defaults to false.
+- `do_picture_classification` (bool): If enabled, classify pictures in documents. Defaults to false.
+- `do_picture_description` (bool): If enabled, describe pictures in documents. Defaults to false.
+- `picture_description_area_threshold` (float): Minimum percentage of the area for a picture to be processed with the models. Defaults to 0.05.
+- `picture_description_local` (dict): Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with `picture_description_api`.
+- `picture_description_api` (dict): API details for using a vision-language model in the picture description. This parameter is mutually exclusive with `picture_description_local`.
+- `include_images` (bool): If enabled, images will be extracted from the document. Defaults to false.
+- `images_scale` (float): Scale factor for images. Defaults to 2.0.
+
+### Authentication
+
+When authentication is activated (see the parameter `DOCLING_SERVE_API_KEY` in [configuration.md](./configuration.md)), all the API requests **must** provide the header `X-Api-Key` with the correct secret key.
+
+## Convert endpoints
+
+### Source endpoint
+
+The endpoint is `/v1/convert/source`, listening for POST requests of JSON payloads.
+
+On top of the above parameters, you must send the URL(s) of the document you want process with either the `http_sources` or `file_sources` fields.
+The first is fetching URL(s) (optionally using with extra headers), the second allows to provide documents as base64-encoded strings.
+No `options` is required, they can be partially or completely omitted.
+
+Simple payload example:
+
+```json
+{
+  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
+}
+```
+
+<details>
+
+<summary>Complete payload example:</summary>
+
+```json
+{
+  "options": {
+    "from_formats": ["docx", "pptx", "html", "image", "pdf", "asciidoc", "md", "xlsx"],
+    "to_formats": ["md", "json", "html", "text", "doctags"],
+    "image_export_mode": "placeholder",
+    "do_ocr": true,
+    "force_ocr": false,
+    "ocr_engine": "easyocr",
+    "ocr_lang": ["en"],
+    "pdf_backend": "dlparse_v2",
+    "table_mode": "fast",
+    "abort_on_error": false,
+  },
+  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
+}
+```
+
+</details>
+
+<details>
+
+<summary>CURL example:</summary>
+
+```sh
+curl -X 'POST' \
+  'http://localhost:5001/v1/convert/source' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "options": {
+    "from_formats": [
+      "docx",
+      "pptx",
+      "html",
+      "image",
+      "pdf",
+      "asciidoc",
+      "md",
+      "xlsx"
+    ],
+    "to_formats": ["md", "json", "html", "text", "doctags"],
+    "image_export_mode": "placeholder",
+    "do_ocr": true,
+    "force_ocr": false,
+    "ocr_engine": "easyocr",
+    "ocr_lang": [
+      "fr",
+      "de",
+      "es",
+      "en"
+    ],
+    "pdf_backend": "dlparse_v2",
+    "table_mode": "fast",
+    "abort_on_error": false,
+    "do_table_structure": true,
+    "include_images": true,
+    "images_scale": 2
+  },
+  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
+}'
+```
+
+</details>
+
+<details>
+<summary>Python example:</summary>
+
+```python
+import httpx
+
+async_client = httpx.AsyncClient(timeout=60.0)
+url = "http://localhost:5001/v1/convert/source"
+payload = {
+  "options": {
+    "from_formats": ["docx", "pptx", "html", "image", "pdf", "asciidoc", "md", "xlsx"],
+    "to_formats": ["md", "json", "html", "text", "doctags"],
+    "image_export_mode": "placeholder",
+    "do_ocr": True,
+    "force_ocr": False,
+    "ocr_engine": "easyocr",
+    "ocr_lang": "en",
+    "pdf_backend": "dlparse_v2",
+    "table_mode": "fast",
+    "abort_on_error": False,
+  },
+  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
+}
+
+response = await async_client_client.post(url, json=payload)
+
+data = response.json()
+```
+
+</details>
+
+#### File as base64
+
+The `file_sources` argument in the endpoint allows to send files as base64-encoded strings.
+When your PDF or other file type is too large, encoding it and passing it inline to curl
+can lead to an “Argument list too long” error on some systems. To avoid this, we write
+the JSON request body to a file and have curl read from that file.
+
+<details>
+<summary>CURL steps:</summary>
+
+```sh
+# 1. Base64-encode the file
+B64_DATA=$(base64 -w 0 /path/to/file/pdf-to-convert.pdf)
+
+# 2. Build the JSON with your options
+cat <<EOF > /tmp/request_body.json
+{
+  "options": {
+  },
+  "file_sources": [{
+    "base64_string": "${B64_DATA}",
+    "filename": "pdf-to-convert.pdf"
+  }]
+}
+EOF
+
+# 3. POST the request to the docling service
+curl -X POST "localhost:5001/v1/convert/source" \
+     -H "Content-Type: application/json" \
+     -d @/tmp/request_body.json
+```
+
+</details>
+
+### File endpoint
+
+The endpoint is: `/v1/convert/file`, listening for POST requests of Form payloads (necessary as the files are sent as multipart/form data). You can send one or multiple files.
+
+<details>
+<summary>CURL example:</summary>
+
+```sh
+curl -X 'POST' \
+  'http://127.0.0.1:5001/v1/convert/file' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: multipart/form-data' \
+  -F 'ocr_engine=easyocr' \
+  -F 'pdf_backend=dlparse_v2' \
+  -F 'from_formats=pdf' \
+  -F 'from_formats=docx' \
+  -F 'force_ocr=false' \
+  -F 'image_export_mode=embedded' \
+  -F 'ocr_lang=en' \
+  -F 'ocr_lang=pl' \
+  -F 'table_mode=fast' \
+  -F 'files=@2206.01062v1.pdf;type=application/pdf' \
+  -F 'abort_on_error=false' \
+  -F 'to_formats=md' \
+  -F 'to_formats=text' \
+  -F 'do_ocr=true'
+```
+
+</details>
+
+<details>
+<summary>Python example:</summary>
+
+```python
+import httpx
+
+async_client = httpx.AsyncClient(timeout=60.0)
+url = "http://localhost:5001/v1/convert/file"
+parameters = {
+"from_formats": ["docx", "pptx", "html", "image", "pdf", "asciidoc", "md", "xlsx"],
+"to_formats": ["md", "json", "html", "text", "doctags"],
+"image_export_mode": "placeholder",
+"do_ocr": True,
+"force_ocr": False,
+"ocr_engine": "easyocr",
+"ocr_lang": ["en"],
+"pdf_backend": "dlparse_v2",
+"table_mode": "fast",
+"abort_on_error": False,
+}
+
+current_dir = os.path.dirname(__file__)
+file_path = os.path.join(current_dir, '2206.01062v1.pdf')
+
+files = {
+    'files': ('2206.01062v1.pdf', open(file_path, 'rb'), 'application/pdf'),
+}
+
+response = await async_client.post(url, files=files, data=parameters)
+assert response.status_code == 200, "Response should be 200 OK"
+
+data = response.json()
+```
+
+</details>
+
+### Picture description options
+
+When the picture description enrichment is activated, users may specify which model and which execution mode to use for this task. There are two choices for the execution mode: _local_ will run the vision-language model directly, _api_ will invoke an external API endpoint.
+
+The local option is specified with:
+
+```jsonc
+{
+  "picture_description_local": {
+    "repo_id": "",  // Repository id from the Hugging Face Hub.
+    "generation_config": {"max_new_tokens": 200, "do_sample": false},  // HF generation config.
+    "prompt": "Describe this image in a few sentences. ",  // Prompt used when calling the vision-language model.
+  }
+}
+```
+
+The possible values for `generation_config` are documented in the [Hugging Face text generation docs](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig).
+
+The api option is specified with:
+
+```jsonc
+{
+  "picture_description_api": {
+    "url": "",  // Endpoint which accepts openai-api compatible requests.
+    "headers": {},  // Headers used for calling the API endpoint. For example, it could include authentication headers.
+    "params": {},  // Model parameters.
+    "timeout": 20,  // Timeout for the API request.
+    "prompt": "Describe this image in a few sentences. ",  // Prompt used when calling the vision-language model.
+  }
+}
+```
+
+Example URLs are:
+
+- `http://localhost:8000/v1/chat/completions` for the local vllm api, with example `picture_description_api`:
+  - the `HuggingFaceTB/SmolVLM-256M-Instruct` model
+
+    ```json
+    {
+      "url": "http://localhost:8000/v1/chat/completions",
+      "params": {
+        "model": "HuggingFaceTB/SmolVLM-256M-Instruct",
+        "max_completion_tokens": 200,
+      }
+    }
+    ```
+
+  - the `ibm-granite/granite-vision-3.2-2b` model
+
+    ```json
+    {
+      "url": "http://localhost:8000/v1/chat/completions",
+      "params": {
+        "model": "ibm-granite/granite-vision-3.2-2b",
+        "max_completion_tokens": 200,
+      }
+    }
+    ```
+
+- `http://localhost:11434/v1/chat/completions` for the local Ollama api, with example `picture_description_api`:
+  - the `granite3.2-vision:2b` model
+
+    ```json
+    {
+      "url": "http://localhost:11434/v1/chat/completions",
+      "params": {
+        "model": "granite3.2-vision:2b"
+      }
+    }
+    ```
+
+Note that when using `picture_description_api`, the server must be launched with `DOCLING_SERVE_ENABLE_REMOTE_SERVICES=true`.
+
+## Response format
+
+The response can be a JSON Document or a File.
+
+- If you process only one file, the response will be a JSON document with the following format:
+
+  ```jsonc
+  {
+    "document": {
+      "md_content": "",
+      "json_content": {},
+      "html_content": "",
+      "text_content": "",
+      "doctags_content": ""
+      },
+    "status": "<success|partial_success|skipped|failure>",
+    "processing_time": 0.0,
+    "timings": {},
+    "errors": []
+  }
+  ```
+
+  Depending on the value you set in `output_formats`, the different items will be populated with their respective results or empty.
+
+  `processing_time` is the Docling processing time in seconds, and `timings` (when enabled in the backend) provides the detailed
+  timing of all the internal Docling components.
+
+- If you set the parameter `target` to the zip mode, the response will be a zip file.
+- If multiple files are generated (multiple inputs, or one input but multiple outputs with the zip target mode), the response will be a zip file.
+
+## Asynchronous API
+
+Both `/v1/convert/source` and `/v1/convert/file` endpoints are available as asynchronous variants.
+The advantage of the asynchronous endpoints is the possible to interrupt the connection, check for the progress update and fetch the result.
+This approach is more resilient against network instabilities and allows the client application logic to easily interleave conversion with other tasks.
+
+Launch an asynchronous conversion with:
+
+- `POST /v1/convert/source/async` when providing the input as sources.
+- `POST /v1/convert/file/async` when providing the input as multipart-form files.
+
+The response format is a task detail:
+
+```jsonc
+{
+  "task_id": "<task_id>",  // the task_id which can be used for the next operations
+  "task_status": "pending|started|success|failure",  // the task status
+  "task_position": 1,  // the position in the queue
+  "task_meta": null,  // metadata e.g. how many documents are in the total job and how many have been converted
+}
+```
+
+### Polling status
+
+For checking the progress of the conversion task and wait for its completion, use the endpoint:
+
+- `GET /v1/status/poll/{task_id}`
+
+<details>
+<summary>Example waiting loop:</summary>
+
+```python
+import time
+import httpx
+
+# ...
+# response from the async task submission
+task = response.json()
+
+while task["task_status"] not in ("success", "failure"):
+    response = httpx.get(f"{base_url}/status/poll/{task['task_id']}")
+    task = response.json()
+
+    time.sleep(5)
+```
+
+<details>
+
+### Subscribe with websockets
+
+Using websocket you can get the client application being notified about updates of the conversion task.
+To start the websocket connection, use the endpoint:
+
+- `/v1/status/ws/{task_id}`
+
+Websocket messages are JSON object with the following structure:
+
+```jsonc
+{
+  "message": "connection|update|error",  // type of message being sent
+  "task": {},  // the same content of the task description
+  "error": "",  // description of the error
+}
+```
+
+<details>
+<summary>Example websocket usage:</summary>
+
+```python
+from websockets.sync.client import connect
+
+uri = f"ws://{base_url}/v1/status/ws/{task['task_id']}"
+with connect(uri) as websocket:
+    for message in websocket:
+        try:
+            payload = json.loads(message)
+            if payload["message"] == "error":
+                break
+            if payload["message"] == "error" and payload["task"]["task_status"] in ("success", "failure"):
+                break
+        except:
+          break
+```
+
+</details>
+
+### Fetch results
+
+When the task is completed, the result can be fetched with the endpoint:
+
+- `GET /v1/result/{task_id}`
--- a/docs/v1_migration.md
+++ b/docs/v1_migration.md
@@ -0,0 +1,80 @@
+# Migration to the `v1` API
+
+Docling Serve from the initial prototype `v1alpha` API to the stable `v1` API.
+This page provides simple instructions to upgrade your application to the new API.
+
+## API changes
+
+The breaking changes introduced in the `v1` release of Docling Serve are designed to provide a stable schema which
+allows the project to provide new capabilities as new type of input sources, targets and also the definition of callback for event-driven applications.
+
+### Endpoint names
+
+All endpoints are renamed from `/v1alpha/` to `/v1/`.
+
+### Sources
+
+When using the `/v1/convert/source` endpoint, input documents have to be specified with the `sources: []` argument, which is replacing the usage of `file_sources` and `http_sources`.
+
+Old version:
+
+```jsonc
+{
+    "options": {},  // conversion options
+    "file_sources": [  // input documents provided as base64-encoded strings
+        {"base64_string": "abc123...", "filename": "file.pdf"}
+    ],
+    "http_sources": [  // input documents provided as http urls
+        {"url": "https://..."}
+    ]
+}
+```
+
+New version:
+
+```jsonc
+{
+    "options": {},  // conversion options
+    "sources": [
+        // input document provided as base64-encoded string
+        {"kind": "file", "base64_string": "abc123...", "filename": "file.pdf"},
+        // input document provided as http urls
+        {"kind": "http", "url": "https://..."},
+    ]
+}
+```
+
+### Targets
+
+Switching between output formats, i.e. from the JSON inbody response to the zip archive response, users have to specify the `target` argument, which is replacing the usage of `options.return_as_file`.
+
+Old version:
+
+```jsonc
+{
+    "options": {
+        "return_as_file": true  // <-- to be removed
+    },
+    // ...
+}
+```
+
+New version:
+
+```jsonc
+{
+    "options": {},
+    "target": {"kind": "zip"},  // <-- add this
+    // ...
+}
+```
+
+## Continue with the old API
+
+If you are not able to apply the changes above to your application, please consider pinning of the previous `v0.x` container images, e.g.
+
+```sh
+podman run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=1 quay.io/docling-project/docling-serve:v0.16.1
+```
+
+_Note that the old prototype API will not be supported in new `v1.x` versions._
--- a/img/fastapi-ui.png
+++ b/img/fastapi-ui.png
--- a/img/swagger.png
+++ b/img/swagger.png
--- a/models_download.py
+++ b/models_download.py
@@ -1,36 +0,0 @@
-import os
-import zipfile
-
-import requests
-from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models
-from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
-
-# Download Docling models
-StandardPdfPipeline.download_models_hf(force=True)
-load_pretrained_nlp_models(verbose=True)
-
-# Download EasyOCR models
-urls = [
-    "https://github.com/JaidedAI/EasyOCR/releases/download/v1.3/latin_g2.zip",
-    "https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip"
-]
-
-local_zip_paths = [
-    "/opt/app-root/src/latin_g2.zip",
-    "/opt/app-root/src/craft_mlt_25k.zip"
-]
-
-extract_path = "/opt/app-root/src/.EasyOCR/model/"
-
-for url, local_zip_path in zip(urls, local_zip_paths):
-    # Download the file
-    response = requests.get(url)
-    with open(local_zip_path, "wb") as file:
-        file.write(response.content)
-
-    # Unzip the file
-    with zipfile.ZipFile(local_zip_path, "r") as zip_ref:
-        zip_ref.extractall(extract_path)
-
-    # Clean up the zip file
-    os.remove(local_zip_path)
--- a/os-packages.txt
+++ b/os-packages.txt
@@ -1,8 +1,7 @@
 tesseract
 tesseract-devel
 tesseract-langpack-eng
+tesseract-osd
 leptonica-devel
 libglvnd-glx
 glib2
-wget
-git
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,123 +1,266 @@
-[tool.poetry]
+[project]
 name = "docling-serve"
-version = "0.2.0"
+version = "1.3.1"  # DO NOT EDIT, updated automatically
 description = "Running Docling as a service"
-license = "MIT"
+license = {text = "MIT"}
 authors = [
-    "Michele Dolfi <dol@zurich.ibm.com>",
-    "Christoph Auer <cau@zurich.ibm.com>",
-    "Panos Vagenas <pva@zurich.ibm.com>",
-    "Cesar Berrospi Ramis <ceb@zurich.ibm.com>",
-   "Peter Staar <taa@zurich.ibm.com>",
+    {name="Michele Dolfi", email="dol@zurich.ibm.com"},
+    {name="Guillaume Moutier", email="gmoutier@redhat.com"},
+    {name="Anil Vishnoi", email="avishnoi@redhat.com"},
+    {name="Panos Vagenas", email="pva@zurich.ibm.com"},
+    {name="Christoph Auer", email="cau@zurich.ibm.com"},
+    {name="Peter Staar", email="taa@zurich.ibm.com"},
 ]
 maintainers = [
-    "Peter Staar <taa@zurich.ibm.com>",
-    "Christoph Auer <cau@zurich.ibm.com>",
-    "Michele Dolfi <dol@zurich.ibm.com>",
-    "Cesar Berrospi Ramis <ceb@zurich.ibm.com>",
-    "Panos Vagenas <pva@zurich.ibm.com>",
+    {name="Michele Dolfi", email="dol@zurich.ibm.com"},
+    {name="Anil Vishnoi", email="avishnoi@redhat.com"},
+    {name="Panos Vagenas", email="pva@zurich.ibm.com"},
+    {name="Christoph Auer", email="cau@zurich.ibm.com"},
+    {name="Peter Staar", email="taa@zurich.ibm.com"},
 ]
 readme = "README.md"
-repository = "https://github.com/DS4SD/docling-serve"
-homepage = "https://github.com/DS4SD/docling-serve"
 classifiers = [
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
-    # "Development Status :: 5 - Production/Stable",
+    "Development Status :: 5 - Production/Stable",
    "Intended Audience :: Developers",
    "Typing :: Typed",
-    "Programming Language :: Python :: 3"
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+]
+requires-python = ">=3.10"
+dependencies = [
+    "docling~=2.38",
+    "docling-core>=2.44.1",
+    "docling-jobkit[kfp,rq,vlm]>=1.4.0,<2.0.0",
+    "fastapi[standard]~=0.115",
+    "httpx~=0.28",
+    "pydantic~=2.10",
+    "pydantic-settings~=2.4",
+    "python-multipart>=0.0.14,<0.1.0",
+    "typer~=0.12",
+    "uvicorn[standard]>=0.29.0,<1.0.0",
+    "websockets~=14.0",
+    "scalar-fastapi>=1.0.3",
+    "docling-mcp>=1.0.0",
 ]

-[tool.poetry.dependencies]
-python = ">=3.10,<3.13" # 3.10 needed for Gradio, and no torchvision build for 3.13 yet
-docling = "^2.14.0"
-fastapi = {version = "^0.115.6", extras = ["standard"]}
-gradio = { version = "^5.9.1", optional = true }
-uvicorn = "~0.29.0"
-pydantic = "^2.10.3"
-pydantic-settings = "^2.4.0"
-python-multipart = "^0.0.19"
-httpx = "^0.28.1"
-tesserocr = { version = "^2.7.1", optional = true }
-rapidocr-onnxruntime = { version = "^1.4.0", optional = true, markers = "python_version < '3.13'" }
-onnxruntime = [
-  # 1.19.2 is the last version with python3.9 support,
-  # see https://github.com/microsoft/onnxruntime/releases/tag/v1.20.0
-  { version = ">=1.7.0,<1.20.0", optional = true, markers = "python_version < '3.10'" },
-  { version = "^1.7.0", optional = true, markers = "python_version >= '3.10'" }
+[project.optional-dependencies]
+ui = [
+    "gradio~=5.9",
+    "pydantic<2.11.0",  # fix compatibility between gradio and new pydantic 2.11
+]
+tesserocr = [
+    "tesserocr~=2.7"
+]
+rapidocr = [
+    "rapidocr-onnxruntime~=1.4; python_version<'3.13'",
+    "onnxruntime~=1.7",
+]
+flash-attn = [
+  "flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"
 ]

+[dependency-groups]
+dev = [
+    "asgi-lifespan~=2.0",
+    "mypy~=1.11",
+    "pre-commit-uv~=4.1",
+    "pytest~=8.3",
+    "pytest-asyncio~=0.24",
+    "pytest-check~=2.4",
+    "python-semantic-release~=7.32",
+    "ruff>=0.9.6",
+]

-[tool.poetry.extras]
-ui = ["gradio"]
-tesserocr = ["tesserocr"]
-rapidocr = ["rapidocr-onnxruntime", "onnxruntime"]
+pypi = [
+  "torch>=2.7.1",
+  "torchvision>=0.22.1",
+]

+cpu = [
+  "torch>=2.7.1",
+  "torchvision>=0.22.1",
+]

-[tool.poetry.group.pypi-torch]
-optional = false
+cu124 = [
+  "torch>=2.6.0 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "torchvision>=0.21.0 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+]

-[tool.poetry.group.pypi-torch.dependencies]
+cu126 = [
+  "torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+]
+
+cu128 = [
+  "torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+]
+
+rocm = [
+  "torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "pytorch-triton-rocm>=3.3.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+]
+
+[tool.uv]
+package = true
+default-groups = ["dev", "pypi"]
+conflicts = [
+  [
+    { group = "pypi" },
+    { group = "cpu" },
+    { group = "cu124" },
+    { group = "cu126" },
+    { group = "cu128" },
+    { group = "rocm" },
+  ],
+]
+environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
+override-dependencies = [
+  "urllib3~=2.0"
+]
+
+[tool.uv.sources]
 torch = [
-  {version = "!=2.4.1+cpu" },
+  { index = "pytorch-pypi", group = "pypi" },
+  { index = "pytorch-cpu", group = "cpu" },
+  { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
 ]
+
 torchvision = [
-  {version = "!=0.19.1+cpu" },
+  { index = "pytorch-pypi", group = "pypi" },
+  { index = "pytorch-cpu", group = "cpu" },
+  { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
 ]

-[tool.poetry.group.cpu]
-optional = true
-
-[tool.poetry.group.cpu.dependencies]
-torch = [
-    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.10"', url="https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp310-cp310-linux_x86_64.whl"},
-    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.11"', url="https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp311-cp311-linux_x86_64.whl"},
-    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.12"', url="https://download.pytorch.org/whl/cpu/torch-2.4.1%2Bcpu-cp312-cp312-linux_x86_64.whl"},
-]
-torchvision = [
-    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.10"', url="https://download.pytorch.org/whl/cpu/torchvision-0.19.1%2Bcpu-cp310-cp310-linux_x86_64.whl"},
-    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.11"', url="https://download.pytorch.org/whl/cpu/torchvision-0.19.1%2Bcpu-cp311-cp311-linux_x86_64.whl"},
-    {markers = 'platform_machine=="x86_64" and sys_platform=="linux" and python_version == "3.12"', url="https://download.pytorch.org/whl/cpu/torchvision-0.19.1%2Bcpu-cp312-cp312-linux_x86_64.whl"},
+pytorch-triton-rocm = [
+  { index = "pytorch-rocm", marker = "sys_platform == 'linux'" },
 ]

-[tool.poetry.group.constraints.dependencies]
-numpy = [
-    { version = "^2.1.0", markers = 'python_version >= "3.13"' },
-    { version = "^1.24.4", markers = 'python_version < "3.13"' },
-]
+# docling-jobkit = { git = "https://github.com/docling-project/docling-jobkit/", rev = "main" }
+# docling-jobkit = { path = "../docling-jobkit", editable = true }

-[tool.poetry.group.dev.dependencies]
-black = "^24.8.0"
-isort = "^5.13.2"
-pre-commit = "^3.8.0"
-autoflake = "^2.3.1"
-flake8 = "^7.1.1"
-pytest = "^8.3.4"
-pytest-asyncio = "^0.24.0"
-pytest-check = "^2.4.1"
-mypy = "^1.11.2"
+[[tool.uv.index]]
+name = "pytorch-pypi"
+url = "https://pypi.org/simple"
+explicit = true

-[build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true

-[tool.black]
+[[tool.uv.index]]
+name = "pytorch-cu124"
+url = "https://download.pytorch.org/whl/cu124"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu126"
+url = "https://download.pytorch.org/whl/cu126"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu128"
+url = "https://download.pytorch.org/whl/cu128"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-rocm"
+url = "https://download.pytorch.org/whl/rocm6.3"
+explicit = true
+
+[tool.setuptools.packages.find]
+include = ["docling_serve*"]
+namespaces = true
+
+[project.scripts]
+docling-serve = "docling_serve.__main__:main"
+
+[project.urls]
+Homepage = "https://github.com/docling-project/docling-serve"
+# Documentation = "https://ds4sd.github.io/docling"
+Repository = "https://github.com/docling-project/docling-serve"
+Issues = "https://github.com/docling-project/docling-serve/issues"
+Changelog = "https://github.com/docling-project/docling-serve/blob/main/CHANGELOG.md"
+
+[tool.ruff]
+target-version = "py310"
 line-length = 88
-target-version = ["py310"]
-include = '\.pyi?$'
+respect-gitignore = true

-[tool.isort]
-profile = "black"
-line_length = 88
-py_version=311
+# extend-exclude = [
+#     "tests",
+# ]

-[tool.autoflake]
-in-place = true
-remove-all-unused-imports = true
-remove-unused-variables = true
-expand-star-imports = true
-recursive = true
+[tool.ruff.format]
+skip-magic-trailing-comma = false
+
+[tool.ruff.lint]
+select = [
+    # "B",  # flake8-bugbear
+    "C",  # flake8-comprehensions
+    "C9",  # mccabe
+    # "D",  # flake8-docstrings
+    "E",  # pycodestyle errors (default)
+    "F",  # pyflakes (default)
+    "I",  # isort
+    "PD", # pandas-vet
+    "PIE", # pie
+    # "PTH", # pathlib
+    "Q",  # flake8-quotes
+    # "RET", # return
+    "RUF", # Enable all ruff-specific checks
+    # "SIM", # simplify
+    "S307", # eval
+    # "T20",  # (disallow print statements) keep debugging statements out of the codebase
+    "W",  # pycodestyle warnings
+    "ASYNC", # async
+    "UP", # pyupgrade
+]
+
+ignore = [
+    "E501",  # Line too long, handled by ruff formatter
+    "D107", # "Missing docstring in __init__",
+    "F811", # "redefinition of the same function"
+    "PL", # Pylint
+    "RUF012", # Mutable Class Attributes
+    "UP007", # Option and Union
+]
+
+#extend-select = []
+
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["E402", "F401"]
+"tests/*.py" = ["ASYNC"] # Disable ASYNC check for tests
+
+[tool.ruff.lint.mccabe]
+max-complexity = 15
+
+[tool.ruff.lint.isort.sections]
+"docling" = ["docling", "docling_core", "docling_jobkit"]
+
+[tool.ruff.lint.isort]
+combine-as-imports = true
+section-order = [
+  "future",
+  "standard-library",
+  "third-party",
+  "docling",
+  "first-party",
+  "local-folder",
+]

 [tool.mypy]
 pretty = true
@@ -131,11 +274,11 @@ module = [
    "easyocr.*",
    "tesserocr.*",
    "rapidocr_onnxruntime.*",
-    "docling_conversion.*",
-    "gradio_ui.*",
-    "response_preparation.*",
-    "helper_functions.*",
    "requests.*",
+    "kfp.*",
+    "kfp_server_api.*",
+    "mlx_vlm.*",
+    "scalar_fastapi.*",
 ]
 ignore_missing_imports = true

@@ -150,3 +293,16 @@ addopts = "-rA --color=yes --tb=short --maxfail=5"
 markers = [
 "asyncio",
 ]
+
+[tool.semantic_release]
+# for default values check:
+# https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
+
+version_source = "tag_only"
+branch = "main"
+
+# configure types which should trigger minor and patch version bumps respectively
+# (note that they must be a subset of the configured allowed types):
+parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
+parser_angular_minor_types = "feat"
+parser_angular_patch_types = "fix,perf"
--- a/start_server.sh
+++ b/start_server.sh
@@ -1,30 +0,0 @@
-#!/bin/bash
-set -Eeuo pipefail
-
-# Network settings
-export PORT="${PORT:-5001}"
-export HOST="${HOST:-"0.0.0.0"}"
-
-# Performance settings
-UVICORN_WORKERS="${UVICORN_WORKERS:-1}"
-
-# Development settings
-export WITH_UI="${WITH_UI:-"true"}"
-export RELOAD=${RELOAD:-"false"}
-
-# --------------------------------------
-# Process env settings
-
-EXTRA_ARGS=""
-if [ "$RELOAD" == "true" ]; then
-  EXTRA_ARGS="$EXTRA_ARGS --reload"
-fi
-
-# Launch
-exec poetry run uvicorn \
-    docling_serve.app:app \
-    --host=${HOST} \
-    --port=${PORT} \
-    --timeout-keep-alive=600 \
-    ${EXTRA_ARGS} \
-    --workers=${UVICORN_WORKERS}
--- a/tests/test_1-file-all-outputs.py
+++ b/tests/test_1-file-all-outputs.py
@@ -6,17 +6,22 @@ import pytest
 import pytest_asyncio
 from pytest_check import check

+from docling_serve.settings import docling_serve_settings
+

@pytest_asyncio.fixture
 async def async_client():
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
        yield client


@pytest.mark.asyncio
 async def test_convert_file(async_client):
    """Test convert single file to all outputs"""
-    url = "http://localhost:5001/v1alpha/convert/file"
+    url = "http://localhost:5001/v1/convert/file"
    options = {
        "from_formats": [
            "docx",
@@ -37,7 +42,6 @@ async def test_convert_file(async_client):
        "pdf_backend": "dlparse_v2",
        "table_mode": "fast",
        "abort_on_error": False,
-        "return_as_file": False,
    }

    current_dir = os.path.dirname(__file__)
@@ -47,9 +51,7 @@ async def test_convert_file(async_client):
        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
    }

-    response = await async_client.post(
-        url, files=files, data={"options": json.dumps(options)}
-    )
+    response = await async_client.post(url, files=files, data=options)
    assert response.status_code == 200, "Response should be 200 OK"

    data = response.json()
@@ -89,19 +91,14 @@ async def test_convert_file(async_client):
        check.is_in(
            '{"schema_name": "DoclingDocument"',
            json.dumps(data["document"]["json_content"]),
-            msg=f"JSON document should contain '{{\\n  \"schema_name\": \"DoclingDocument'\". Received: {safe_slice(data['document']['json_content'])}",
+            msg=f'JSON document should contain \'{{\\n  "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
        )
    # HTML check
-    check.is_in(
-        "html_content",
-        data.get("document", {}),
-        msg=f"Response should contain 'html_content' key. Received keys: {list(data.get('document', {}).keys())}",
-    )
    if data.get("document", {}).get("html_content") is not None:
        check.is_in(
-            '<!DOCTYPE html>\n<html lang="en">\n<head>',
+            "<!DOCTYPE html>\n<html>\n<head>",
            data["document"]["html_content"],
-            msg=f"HTML document should contain '<!DOCTYPE html>\\n<html lang=\"en'>. Received: {safe_slice(data['document']['html_content'])}",
+            msg=f"HTML document should contain '<!DOCTYPE html>\\n<html>'. Received: {safe_slice(data['document']['html_content'])}",
        )
    # Text check
    check.is_in(
@@ -123,7 +120,7 @@ async def test_convert_file(async_client):
    )
    if data.get("document", {}).get("doctags_content") is not None:
        check.is_in(
-            "<document>\n<section_header_level_1><location>",
+            "<doctag><page_header><loc",
            data["document"]["doctags_content"],
-            msg=f"DocTags document should contain '<document>\\n<section_header_level_1><location>'. Received: {safe_slice(data['document']['doctags_content'])}",
+            msg=f"DocTags document should contain '<doctag><page_header><loc'. Received: {safe_slice(data['document']['doctags_content'])}",
        )
--- a/tests/test_1-file-async.py
+++ b/tests/test_1-file-async.py
@@ -0,0 +1,75 @@
+import json
+import time
+from pathlib import Path
+
+import httpx
+import pytest
+import pytest_asyncio
+
+from docling_serve.settings import docling_serve_settings
+
+
+@pytest_asyncio.fixture
+async def async_client():
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
+        yield client
+
+
+@pytest.mark.asyncio
+async def test_convert_url(async_client):
+    """Test convert URL to all outputs"""
+
+    base_url = "http://localhost:5001/v1"
+    payload = {
+        "to_formats": ["md", "json", "html"],
+        "image_export_mode": "placeholder",
+        "ocr": False,
+        "abort_on_error": False,
+    }
+
+    file_path = Path(__file__).parent / "2206.01062v1.pdf"
+    files = {
+        "files": (file_path.name, file_path.open("rb"), "application/pdf"),
+    }
+
+    for n in range(1):
+        response = await async_client.post(
+            f"{base_url}/convert/file/async", files=files, data=payload
+        )
+        assert response.status_code == 200, "Response should be 200 OK"
+
+    task = response.json()
+
+    print(json.dumps(task, indent=2))
+
+    while task["task_status"] not in ("success", "failure"):
+        response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
+        assert response.status_code == 200, "Response should be 200 OK"
+        task = response.json()
+        print(f"{task['task_status']=}")
+        print(f"{task['task_position']=}")
+
+        time.sleep(2)
+
+    assert task["task_status"] == "success"
+    print(f"Task completed with status {task['task_status']=}")
+
+    result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
+    assert result_resp.status_code == 200, "Response should be 200 OK"
+    result = result_resp.json()
+    print("Got result.")
+
+    assert "md_content" in result["document"]
+    assert result["document"]["md_content"] is not None
+    assert len(result["document"]["md_content"]) > 10
+
+    assert "html_content" in result["document"]
+    assert result["document"]["html_content"] is not None
+    assert len(result["document"]["html_content"]) > 10
+
+    assert "json_content" in result["document"]
+    assert result["document"]["json_content"] is not None
+    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
--- a/tests/test_1-url-all-outputs.py
+++ b/tests/test_1-url-all-outputs.py
@@ -5,17 +5,22 @@ import pytest
 import pytest_asyncio
 from pytest_check import check

+from docling_serve.settings import docling_serve_settings
+

@pytest_asyncio.fixture
 async def async_client():
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
        yield client


@pytest.mark.asyncio
 async def test_convert_url(async_client):
    """Test convert URL to all outputs"""
-    url = "http://localhost:5001/v1alpha/convert/source"
+    url = "http://localhost:5001/v1/convert/source"
    payload = {
        "options": {
            "from_formats": [
@@ -37,9 +42,8 @@ async def test_convert_url(async_client):
            "pdf_backend": "dlparse_v2",
            "table_mode": "fast",
            "abort_on_error": False,
-            "return_as_file": False,
        },
-        "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}],
+        "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2206.01062"}],
    }
    print(json.dumps(payload, indent=2))

@@ -83,7 +87,7 @@ async def test_convert_url(async_client):
        check.is_in(
            '{"schema_name": "DoclingDocument"',
            json.dumps(data["document"]["json_content"]),
-            msg=f"JSON document should contain '{{\\n  \"schema_name\": \"DoclingDocument'\". Received: {safe_slice(data['document']['json_content'])}",
+            msg=f'JSON document should contain \'{{\\n  "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
        )
    # HTML check
    check.is_in(
@@ -93,9 +97,9 @@ async def test_convert_url(async_client):
    )
    if data.get("document", {}).get("html_content") is not None:
        check.is_in(
-            '<!DOCTYPE html>\n<html lang="en">\n<head>',
+            "<!DOCTYPE html>\n<html>\n<head>",
            data["document"]["html_content"],
-            msg=f"HTML document should contain '<!DOCTYPE html>\\n<html lang=\"en'>. Received: {safe_slice(data['document']['html_content'])}",
+            msg=f"HTML document should contain '<!DOCTYPE html>\\n<html>'. Received: {safe_slice(data['document']['html_content'])}",
        )
    # Text check
    check.is_in(
@@ -117,7 +121,7 @@ async def test_convert_url(async_client):
    )
    if data.get("document", {}).get("doctags_content") is not None:
        check.is_in(
-            "<document>\n<section_header_level_1><location>",
+            "<doctag><page_header><loc",
            data["document"]["doctags_content"],
-            msg=f"DocTags document should contain '<document>\\n<section_header_level_1><location>'. Received: {safe_slice(data['document']['doctags_content'])}",
+            msg=f"DocTags document should contain '<doctag><page_header><loc'. Received: {safe_slice(data['document']['doctags_content'])}",
        )
--- a/tests/test_1-url-async-ws.py
+++ b/tests/test_1-url-async-ws.py
@@ -0,0 +1,71 @@
+import base64
+from pathlib import Path
+
+import httpx
+import pytest
+import pytest_asyncio
+from websockets.sync.client import connect
+
+from docling_serve.settings import docling_serve_settings
+
+
+@pytest_asyncio.fixture
+async def async_client():
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
+        yield client
+
+
+@pytest.mark.asyncio
+async def test_convert_url(async_client: httpx.AsyncClient):
+    """Test convert URL to all outputs"""
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+
+    doc_filename = Path("tests/2408.09869v5.pdf")
+    encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()
+
+    base_url = "http://localhost:5001/v1"
+    payload = {
+        "options": {
+            "to_formats": ["md", "json"],
+            "image_export_mode": "placeholder",
+            "ocr": True,
+            "abort_on_error": False,
+            # "do_picture_description": True,
+            # "picture_description_api": {
+            #     "url": "http://localhost:11434/v1/chat/completions",
+            #     "params": {
+            #         "model": "granite3.2-vision:2b",
+            #     }
+            # },
+            # "picture_description_local": {
+            #     "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
+            # },
+        },
+        # "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}],
+        "sources": [
+            {
+                "kind": "file",
+                "base64_string": encoded_doc,
+                "filename": doc_filename.name,
+            }
+        ],
+    }
+    # print(json.dumps(payload, indent=2))
+
+    for n in range(5):
+        response = await async_client.post(
+            f"{base_url}/convert/source/async", json=payload
+        )
+        assert response.status_code == 200, "Response should be 200 OK"
+
+    task = response.json()
+
+    uri = f"ws://localhost:5001/v1/status/ws/{task['task_id']}?api_key={docling_serve_settings.api_key}"
+    with connect(uri) as websocket:
+        for message in websocket:
+            print(message)
--- a/tests/test_1-url-async.py
+++ b/tests/test_1-url-async.py
@@ -0,0 +1,64 @@
+import json
+import random
+import time
+
+import httpx
+import pytest
+import pytest_asyncio
+
+from docling_serve.settings import docling_serve_settings
+
+
+@pytest_asyncio.fixture
+async def async_client():
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
+        yield client
+
+
+@pytest.mark.asyncio
+async def test_convert_url(async_client):
+    """Test convert URL to all outputs"""
+
+    example_docs = [
+        "https://arxiv.org/pdf/2411.19710",
+        "https://arxiv.org/pdf/2501.17887",
+        "https://www.nature.com/articles/s41467-024-50779-y.pdf",
+        "https://arxiv.org/pdf/2306.12802",
+        "https://arxiv.org/pdf/2311.18481",
+    ]
+
+    base_url = "http://localhost:5001/v1"
+    payload = {
+        "options": {
+            "to_formats": ["md", "json"],
+            "image_export_mode": "placeholder",
+            "ocr": True,
+            "abort_on_error": False,
+        },
+        "sources": [{"kind": "http", "url": random.choice(example_docs)}],
+    }
+    print(json.dumps(payload, indent=2))
+
+    for n in range(3):
+        response = await async_client.post(
+            f"{base_url}/convert/source/async", json=payload
+        )
+        assert response.status_code == 200, "Response should be 200 OK"
+
+    task = response.json()
+
+    print(json.dumps(task, indent=2))
+
+    while task["task_status"] not in ("success", "failure"):
+        response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
+        assert response.status_code == 200, "Response should be 200 OK"
+        task = response.json()
+        print(f"{task['task_status']=}")
+        print(f"{task['task_position']=}")
+
+        time.sleep(2)
+
+    assert task["task_status"] == "success"
--- a/tests/test_2-files-all-outputs.py
+++ b/tests/test_2-files-all-outputs.py
@@ -1,4 +1,3 @@
-import json
 import os

 import httpx
@@ -6,17 +5,22 @@ import pytest
 import pytest_asyncio
 from pytest_check import check

+from docling_serve.settings import docling_serve_settings
+

@pytest_asyncio.fixture
 async def async_client():
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
        yield client


@pytest.mark.asyncio
 async def test_convert_file(async_client):
    """Test convert single file to all outputs"""
-    url = "http://localhost:5001/v1alpha/convert/file"
+    url = "http://localhost:5001/v1/convert/file"
    options = {
        "from_formats": [
            "docx",
@@ -37,7 +41,6 @@ async def test_convert_file(async_client):
        "pdf_backend": "dlparse_v2",
        "table_mode": "fast",
        "abort_on_error": False,
-        "return_as_file": False,
    }

    current_dir = os.path.dirname(__file__)
@@ -48,27 +51,25 @@ async def test_convert_file(async_client):
        ("files", ("2408.09869v5.pdf", open(file_path, "rb"), "application/pdf")),
    ]

-    response = await async_client.post(
-        url, files=files, data={"options": json.dumps(options)}
-    )
+    response = await async_client.post(url, files=files, data=options)
    assert response.status_code == 200, "Response should be 200 OK"

    # Check for zip file attachment
    content_disposition = response.headers.get("content-disposition")

    with check:
-        assert (
-            content_disposition is not None
-        ), "Content-Disposition header should be present"
+        assert content_disposition is not None, (
+            "Content-Disposition header should be present"
+        )
    with check:
        assert "attachment" in content_disposition, "Response should be an attachment"
    with check:
-        assert (
-            'filename="converted_docs.zip"' in content_disposition
-        ), "Attachment filename should be 'converted_docs.zip'"
+        assert 'filename="converted_docs.zip"' in content_disposition, (
+            "Attachment filename should be 'converted_docs.zip'"
+        )

    content_type = response.headers.get("content-type")
    with check:
-        assert (
-            content_type == "application/zip"
-        ), "Content-Type should be 'application/zip'"
+        assert content_type == "application/zip", (
+            "Content-Type should be 'application/zip'"
+        )
--- a/tests/test_2-urls-all-outputs.py
+++ b/tests/test_2-urls-all-outputs.py
@@ -3,17 +3,22 @@ import pytest
 import pytest_asyncio
 from pytest_check import check

+from docling_serve.settings import docling_serve_settings
+

@pytest_asyncio.fixture
 async def async_client():
-    async with httpx.AsyncClient(timeout=60.0) as client:
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
        yield client


@pytest.mark.asyncio
 async def test_convert_url(async_client):
    """Test convert URL to all outputs"""
-    url = "http://localhost:5001/v1alpha/convert/source"
+    url = "http://localhost:5001/v1/convert/source"
    payload = {
        "options": {
            "from_formats": [
@@ -35,12 +40,12 @@ async def test_convert_url(async_client):
            "pdf_backend": "dlparse_v2",
            "table_mode": "fast",
            "abort_on_error": False,
-            "return_as_file": False,
        },
-        "http_sources": [
-            {"url": "https://arxiv.org/pdf/2206.01062"},
-            {"url": "https://arxiv.org/pdf/2408.09869"},
+        "sources": [
+            {"kind": "http", "url": "https://arxiv.org/pdf/2206.01062"},
+            {"kind": "http", "url": "https://arxiv.org/pdf/2408.09869"},
        ],
+        "target": {"kind": "zip"},
    }

    response = await async_client.post(url, json=payload)
@@ -50,18 +55,18 @@ async def test_convert_url(async_client):
    content_disposition = response.headers.get("content-disposition")

    with check:
-        assert (
-            content_disposition is not None
-        ), "Content-Disposition header should be present"
+        assert content_disposition is not None, (
+            "Content-Disposition header should be present"
+        )
    with check:
        assert "attachment" in content_disposition, "Response should be an attachment"
    with check:
-        assert (
-            'filename="converted_docs.zip"' in content_disposition
-        ), "Attachment filename should be 'converted_docs.zip'"
+        assert 'filename="converted_docs.zip"' in content_disposition, (
+            "Attachment filename should be 'converted_docs.zip'"
+        )

    content_type = response.headers.get("content-type")
    with check:
-        assert (
-            content_type == "application/zip"
-        ), "Content-Type should be 'application/zip'"
+        assert content_type == "application/zip", (
+            "Content-Type should be 'application/zip'"
+        )
--- a/tests/test_2-urls-async-all-outputs.py
+++ b/tests/test_2-urls-async-all-outputs.py
@@ -0,0 +1,93 @@
+import json
+import time
+
+import httpx
+import pytest
+import pytest_asyncio
+from pytest_check import check
+
+from docling_serve.settings import docling_serve_settings
+
+
+@pytest_asyncio.fixture
+async def async_client():
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    async with httpx.AsyncClient(timeout=60.0, headers=headers) as client:
+        yield client
+
+
+@pytest.mark.asyncio
+async def test_convert_url(async_client):
+    """Test convert URL to all outputs"""
+    base_url = "http://localhost:5001/v1"
+    payload = {
+        "options": {
+            "from_formats": [
+                "docx",
+                "pptx",
+                "html",
+                "image",
+                "pdf",
+                "asciidoc",
+                "md",
+                "xlsx",
+            ],
+            "to_formats": ["md", "json", "html", "text", "doctags"],
+            "image_export_mode": "placeholder",
+            "ocr": True,
+            "force_ocr": False,
+            "ocr_engine": "easyocr",
+            "ocr_lang": ["en"],
+            "pdf_backend": "dlparse_v2",
+            "table_mode": "fast",
+            "abort_on_error": False,
+        },
+        "sources": [
+            {"kind": "http", "url": "https://arxiv.org/pdf/2206.01062"},
+            {"kind": "http", "url": "https://arxiv.org/pdf/2408.09869"},
+        ],
+        "target": {"kind": "zip"},
+    }
+
+    response = await async_client.post(f"{base_url}/convert/source/async", json=payload)
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    task = response.json()
+
+    print(json.dumps(task, indent=2))
+
+    while task["task_status"] not in ("success", "failure"):
+        response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
+        assert response.status_code == 200, "Response should be 200 OK"
+        task = response.json()
+        print(f"{task['task_status']=}")
+        print(f"{task['task_position']=}")
+
+        time.sleep(2)
+
+    assert task["task_status"] == "success"
+
+    result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
+    assert result_resp.status_code == 200, "Response should be 200 OK"
+
+    # Check for zip file attachment
+    content_disposition = result_resp.headers.get("content-disposition")
+
+    with check:
+        assert content_disposition is not None, (
+            "Content-Disposition header should be present"
+        )
+    with check:
+        assert "attachment" in content_disposition, "Response should be an attachment"
+    with check:
+        assert 'filename="converted_docs.zip"' in content_disposition, (
+            "Attachment filename should be 'converted_docs.zip'"
+        )
+
+    content_type = result_resp.headers.get("content-type")
+    with check:
+        assert content_type == "application/zip", (
+            "Content-Type should be 'application/zip'"
+        )
--- a/tests/test_fastapi_endpoints.py
+++ b/tests/test_fastapi_endpoints.py
@@ -0,0 +1,206 @@
+import asyncio
+import io
+import json
+import os
+import zipfile
+
+import pytest
+import pytest_asyncio
+from asgi_lifespan import LifespanManager
+from httpx import ASGITransport, AsyncClient
+from pytest_check import check
+
+from docling_core.types.doc import DoclingDocument, PictureItem
+
+from docling_serve.app import create_app
+from docling_serve.settings import docling_serve_settings
+
+
+@pytest.fixture(scope="session")
+def event_loop():
+    return asyncio.get_event_loop()
+
+
+@pytest.fixture(scope="session")
+def auth_headers():
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    return headers
+
+
+@pytest_asyncio.fixture(scope="session")
+async def app():
+    app = create_app()
+
+    async with LifespanManager(app) as manager:
+        print("Launching lifespan of app.")
+        yield manager.app
+
+
+@pytest_asyncio.fixture(scope="session")
+async def client(app):
+    async with AsyncClient(
+        transport=ASGITransport(app=app), base_url="http://app.io"
+    ) as client:
+        print("Client is ready")
+        yield client
+
+
+@pytest.mark.asyncio
+async def test_health(client: AsyncClient):
+    response = await client.get("/health")
+    assert response.status_code == 200
+    assert response.json() == {"status": "ok"}
+
+
+@pytest.mark.asyncio
+async def test_convert_file(client: AsyncClient, auth_headers: dict):
+    """Test convert single file to all outputs"""
+
+    endpoint = "/v1/convert/file"
+    options = {
+        "from_formats": [
+            "docx",
+            "pptx",
+            "html",
+            "image",
+            "pdf",
+            "asciidoc",
+            "md",
+            "xlsx",
+        ],
+        "to_formats": ["md", "json", "html", "text", "doctags"],
+        "image_export_mode": "placeholder",
+        "ocr": True,
+        "force_ocr": False,
+        "ocr_engine": "easyocr",
+        "ocr_lang": ["en"],
+        "pdf_backend": "dlparse_v2",
+        "table_mode": "fast",
+        "abort_on_error": False,
+    }
+
+    current_dir = os.path.dirname(__file__)
+    file_path = os.path.join(current_dir, "2206.01062v1.pdf")
+
+    files = {
+        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
+    }
+
+    response = await client.post(
+        endpoint, files=files, data=options, headers=auth_headers
+    )
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    data = response.json()
+
+    # Response content checks
+    # Helper function to safely slice strings
+    def safe_slice(value, length=100):
+        if isinstance(value, str):
+            return value[:length]
+        return str(value)  # Convert non-string values to string for debug purposes
+
+    # Document check
+    check.is_in(
+        "document",
+        data,
+        msg=f"Response should contain 'document' key. Received keys: {list(data.keys())}",
+    )
+    # MD check
+    check.is_in(
+        "md_content",
+        data.get("document", {}),
+        msg=f"Response should contain 'md_content' key. Received keys: {list(data.get('document', {}).keys())}",
+    )
+    if data.get("document", {}).get("md_content") is not None:
+        check.is_in(
+            "## DocLayNet: ",
+            data["document"]["md_content"],
+            msg=f"Markdown document should contain 'DocLayNet: '. Received: {safe_slice(data['document']['md_content'])}",
+        )
+    # JSON check
+    check.is_in(
+        "json_content",
+        data.get("document", {}),
+        msg=f"Response should contain 'json_content' key. Received keys: {list(data.get('document', {}).keys())}",
+    )
+    if data.get("document", {}).get("json_content") is not None:
+        check.is_in(
+            '{"schema_name": "DoclingDocument"',
+            json.dumps(data["document"]["json_content"]),
+            msg=f'JSON document should contain \'{{\\n  "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
+        )
+    # HTML check
+    check.is_in(
+        "html_content",
+        data.get("document", {}),
+        msg=f"Response should contain 'html_content' key. Received keys: {list(data.get('document', {}).keys())}",
+    )
+    if data.get("document", {}).get("html_content") is not None:
+        check.is_in(
+            "<!DOCTYPE html>\n<html>\n<head>",
+            data["document"]["html_content"],
+            msg=f"HTML document should contain '<!DOCTYPE html>\n<html>\n<head>'. Received: {safe_slice(data['document']['html_content'])}",
+        )
+    # Text check
+    check.is_in(
+        "text_content",
+        data.get("document", {}),
+        msg=f"Response should contain 'text_content' key. Received keys: {list(data.get('document', {}).keys())}",
+    )
+    if data.get("document", {}).get("text_content") is not None:
+        check.is_in(
+            "DocLayNet: A Large Human-Annotated Dataset",
+            data["document"]["text_content"],
+            msg=f"Text document should contain 'DocLayNet: A Large Human-Annotated Dataset'. Received: {safe_slice(data['document']['text_content'])}",
+        )
+    # DocTags check
+    check.is_in(
+        "doctags_content",
+        data.get("document", {}),
+        msg=f"Response should contain 'doctags_content' key. Received keys: {list(data.get('document', {}).keys())}",
+    )
+    if data.get("document", {}).get("doctags_content") is not None:
+        check.is_in(
+            "<doctag><page_header>",
+            data["document"]["doctags_content"],
+            msg=f"DocTags document should contain '<doctag><page_header>'. Received: {safe_slice(data['document']['doctags_content'])}",
+        )
+
+
+@pytest.mark.asyncio
+async def test_referenced_artifacts(client: AsyncClient, auth_headers: dict):
+    """Test that paths in the zip file are relative to the zip file root."""
+
+    endpoint = "/v1/convert/file"
+    options = {
+        "to_formats": ["json"],
+        "image_export_mode": "referenced",
+        "target_type": "zip",
+        "ocr": False,
+    }
+
+    current_dir = os.path.dirname(__file__)
+    file_path = os.path.join(current_dir, "2206.01062v1.pdf")
+
+    files = {
+        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
+    }
+
+    response = await client.post(
+        endpoint, files=files, data=options, headers=auth_headers
+    )
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    with zipfile.ZipFile(io.BytesIO(response.content)) as zip_file:
+        namelist = zip_file.namelist()
+        for file in namelist:
+            if file.endswith(".json"):
+                doc = DoclingDocument.model_validate(json.loads(zip_file.read(file)))
+                for item, _level in doc.iterate_items():
+                    if isinstance(item, PictureItem):
+                        assert item.image is not None
+                        print(f"{item.image.uri}=")
+                        assert str(item.image.uri) in namelist
--- a/tests/test_file_opts.py
+++ b/tests/test_file_opts.py
@@ -0,0 +1,88 @@
+import asyncio
+import json
+import os
+
+import pytest
+import pytest_asyncio
+from asgi_lifespan import LifespanManager
+from httpx import ASGITransport, AsyncClient
+
+from docling_core.types import DoclingDocument
+from docling_core.types.doc.document import PictureDescriptionData
+
+from docling_serve.app import create_app
+from docling_serve.settings import docling_serve_settings
+
+
+@pytest.fixture(scope="session")
+def event_loop():
+    return asyncio.get_event_loop()
+
+
+@pytest.fixture(scope="session")
+def auth_headers():
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    return headers
+
+
+@pytest_asyncio.fixture(scope="session")
+async def app():
+    app = create_app()
+
+    async with LifespanManager(app) as manager:
+        print("Launching lifespan of app.")
+        yield manager.app
+
+
+@pytest_asyncio.fixture(scope="session")
+async def client(app):
+    async with AsyncClient(
+        transport=ASGITransport(app=app), base_url="http://app.io"
+    ) as client:
+        print("Client is ready")
+        yield client
+
+
+@pytest.mark.asyncio
+async def test_convert_file(client: AsyncClient, auth_headers: dict):
+    """Test convert single file to all outputs"""
+
+    endpoint = "/v1/convert/file"
+    options = {
+        "to_formats": ["md", "json"],
+        "image_export_mode": "placeholder",
+        "ocr": False,
+        "do_picture_description": True,
+        "picture_description_api": json.dumps(
+            {
+                "url": "http://localhost:11434/v1/chat/completions",  # ollama
+                "params": {"model": "granite3.2-vision:2b"},
+                "timeout": 60,
+                "prompt": "Describe this image in a few sentences. ",
+            }
+        ),
+    }
+
+    current_dir = os.path.dirname(__file__)
+    file_path = os.path.join(current_dir, "2206.01062v1.pdf")
+
+    files = {
+        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
+    }
+
+    response = await client.post(
+        endpoint, files=files, data=options, headers=auth_headers
+    )
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    data = response.json()
+
+    doc = DoclingDocument.model_validate(data["document"]["json_content"])
+
+    for pic in doc.pictures:
+        for ann in pic.annotations:
+            if isinstance(ann, PictureDescriptionData):
+                print(f"{pic.self_ref}")
+                print(ann.text)
--- a/tests/test_results_clear.py
+++ b/tests/test_results_clear.py
@@ -0,0 +1,157 @@
+import asyncio
+import base64
+import json
+from pathlib import Path
+
+import pytest
+import pytest_asyncio
+from asgi_lifespan import LifespanManager
+from httpx import ASGITransport, AsyncClient
+
+from docling_serve.app import create_app
+from docling_serve.settings import docling_serve_settings
+
+
+@pytest.fixture(scope="session")
+def event_loop():
+    return asyncio.get_event_loop()
+
+
+@pytest.fixture(scope="session")
+def auth_headers():
+    headers = {}
+    if docling_serve_settings.api_key:
+        headers["X-Api-Key"] = docling_serve_settings.api_key
+    return headers
+
+
+@pytest_asyncio.fixture(scope="session")
+async def app():
+    app = create_app()
+
+    async with LifespanManager(app) as manager:
+        print("Launching lifespan of app.")
+        yield manager.app
+
+
+@pytest_asyncio.fixture(scope="session")
+async def client(app):
+    async with AsyncClient(
+        transport=ASGITransport(app=app), base_url="http://app.io"
+    ) as client:
+        print("Client is ready")
+        yield client
+
+
+async def convert_file(client: AsyncClient, auth_headers: dict):
+    doc_filename = Path("tests/2408.09869v5.pdf")
+    encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()
+
+    payload = {
+        "options": {
+            "to_formats": ["json"],
+        },
+        "sources": [
+            {
+                "kind": "file",
+                "base64_string": encoded_doc,
+                "filename": doc_filename.name,
+            }
+        ],
+    }
+
+    response = await client.post(
+        "/v1/convert/source/async", json=payload, headers=auth_headers
+    )
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    task = response.json()
+
+    print(json.dumps(task, indent=2))
+
+    while task["task_status"] not in ("success", "failure"):
+        response = await client.get(
+            f"/v1/status/poll/{task['task_id']}", headers=auth_headers
+        )
+        assert response.status_code == 200, "Response should be 200 OK"
+        task = response.json()
+        print(f"{task['task_status']=}")
+        print(f"{task['task_position']=}")
+
+        await asyncio.sleep(2)
+
+    assert task["task_status"] == "success"
+
+    return task
+
+
+@pytest.mark.asyncio
+async def test_clear_results(client: AsyncClient, auth_headers: dict):
+    """Test removal of task."""
+
+    # Set long delay deletion
+    docling_serve_settings.result_removal_delay = 100
+
+    # Convert and wait for completion
+    task = await convert_file(client, auth_headers=auth_headers)
+
+    # Get result once
+    result_response = await client.get(
+        f"/v1/result/{task['task_id']}", headers=auth_headers
+    )
+    assert result_response.status_code == 200, "Response should be 200 OK"
+    print("Result 1 ok.")
+    result = result_response.json()
+    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
+
+    # Get result twice
+    result_response = await client.get(
+        f"/v1/result/{task['task_id']}", headers=auth_headers
+    )
+    assert result_response.status_code == 200, "Response should be 200 OK"
+    print("Result 2 ok.")
+    result = result_response.json()
+    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
+
+    # Clear
+    clear_response = await client.get(
+        "/v1/clear/results?older_then=0", headers=auth_headers
+    )
+    assert clear_response.status_code == 200, "Response should be 200 OK"
+    print("Clear ok.")
+
+    # Get deleted result
+    result_response = await client.get(
+        f"/v1/result/{task['task_id']}", headers=auth_headers
+    )
+    assert result_response.status_code == 404, "Response should be removed"
+    print("Result was no longer found.")
+
+
+@pytest.mark.asyncio
+async def test_delay_remove(client: AsyncClient, auth_headers: dict):
+    """Test automatic removal of task with delay."""
+
+    # Set short delay deletion
+    docling_serve_settings.result_removal_delay = 5
+
+    # Convert and wait for completion
+    task = await convert_file(client, auth_headers=auth_headers)
+
+    # Get result once
+    result_response = await client.get(
+        f"/v1/result/{task['task_id']}", headers=auth_headers
+    )
+    assert result_response.status_code == 200, "Response should be 200 OK"
+    print("Result ok.")
+    result = result_response.json()
+    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
+
+    print("Sleeping to wait the automatic task deletion.")
+    await asyncio.sleep(10)
+
+    # Get deleted result
+    result_response = await client.get(
+        f"/v1/result/{task['task_id']}", headers=auth_headers
+    )
+    assert result_response.status_code == 404, "Response should be removed"
--- a/uv.lock
+++ b/uv.lock