chore: bump version to 1.2.1 [skip ci]

fix: handling of vlm model options and update deps (#314 )
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-29 16:43:24 +00:00 · 2025-08-13 07:37:55 +00:00 · 2025-08-13 09:32:21 +02:00 · 2025-08-08 12:32:19 +02:00 · 2025-08-08 09:02:29 +02:00 · 2025-08-08 08:47:59 +02:00
76 changed files with 7669 additions and 5011 deletions
--- a/.github/dco.yml
+++ b/.github/dco.yml
@@ -0,0 +1,2 @@
+allowRemediationCommits:
+  individual: true
--- a/.github/styles/config/vocabularies/Docling/accept.txt
+++ b/.github/styles/config/vocabularies/Docling/accept.txt
@@ -0,0 +1,36 @@
+[Dd]ocling
+precommit
+asgi
+async
+(?i)urls
+uvicorn
+[Ww]ebserver
+keyfile
+[Ww]ebsocket(s?)
+[Kk]ubernetes
+UI
+(?i)vllm
+APIs
+[Ss]ubprocesses
+(?i)api
+Kubeflow
+(?i)Jobkit
+(?i)cpu
+(?i)PyTorch
+(?i)CUDA
+(?i)NVIDIA
+(?i)ROCm
+(?i)env
+Gradio
+bool
+Ollama
+inbody
+LGTMs
+Dolfi
+Lysak
+Nikos
+Nassar
+Panos
+Vagenas
+Staar
+Livathinos
--- a/.github/vale.ini
+++ b/.github/vale.ini
@@ -0,0 +1,11 @@
+StylesPath = styles
+MinAlertLevel = suggestion
+; Packages = write-good, proselint
+
+Vocab = Docling
+
+[*.md]
+BasedOnStyles = Vale
+
+[CHANGELOG.md]
+BasedOnStyles = 
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@@ -15,7 +15,7 @@ jobs:
        with:
          fetch-depth: 0  # for fetching tags, required for semantic-release
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
      - name: Install dependencies
@@ -45,7 +45,7 @@ jobs:
          token: ${{ steps.app-token.outputs.token }}
          fetch-depth: 0  # for fetching tags, required for semantic-release
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          enable-cache: true
      - name: Install dependencies
--- a/.github/workflows/ci-images-dryrun.yml
+++ b/.github/workflows/ci-images-dryrun.yml
@@ -15,16 +15,28 @@ jobs:
        spec:
          - name: docling-project/docling-serve
            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu
+              UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
            platforms: linux/amd64, linux/arm64
          - name: docling-project/docling-serve-cpu
            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-extra cu124
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
            platforms: linux/amd64, linux/arm64
-          - name: docling-project/docling-serve-cu124
+          # - name: docling-project/docling-serve-cu124
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
+          #   platforms: linux/amd64
+          - name: docling-project/docling-serve-cu126
            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-extra cpu
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
            platforms: linux/amd64
+          - name: docling-project/docling-serve-cu128
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
+            platforms: linux/amd64
+          # - name: docling-project/docling-serve-rocm
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
+          #   platforms: linux/amd64

    permissions:
      packages: write
--- a/.github/workflows/dco-advisor.yml
+++ b/.github/workflows/dco-advisor.yml
@@ -0,0 +1,192 @@
+name: DCO Advisor Bot
+
+on:
+  pull_request_target:
+    types: [opened, reopened, synchronize]
+
+permissions:
+  pull-requests: write
+  issues: write
+
+jobs:
+  dco_advisor:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Handle DCO check result
+        uses: actions/github-script@v7
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const pr = context.payload.pull_request || context.payload.check_run?.pull_requests?.[0];
+            if (!pr) return;
+
+            const prNumber = pr.number;
+            const baseRef = pr.base.ref;
+            const headSha =
+              context.payload.check_run?.head_sha ||
+              pr.head?.sha;
+            const username = pr.user.login;
+
+            console.log("HEAD SHA:", headSha);
+
+            const sleep = ms => new Promise(resolve => setTimeout(resolve, ms));
+
+            // Poll until DCO check has a conclusion (max 6 attempts, 30s)
+            let dcoCheck = null;
+            for (let attempt = 0; attempt < 6; attempt++) {
+              const { data: checks } = await github.rest.checks.listForRef({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                ref: headSha
+              });
+
+              
+              console.log("All check runs:");
+                checks.check_runs.forEach(run => {
+                console.log(`- ${run.name} (${run.status}/${run.conclusion}) @ ${run.head_sha}`);
+              });
+
+              dcoCheck = checks.check_runs.find(run =>
+                run.name.toLowerCase().includes("dco") &&
+              !run.name.toLowerCase().includes("dco_advisor") &&
+                run.head_sha === headSha
+              );
+
+
+              if (dcoCheck?.conclusion) break;
+              console.log(`Waiting for DCO check... (${attempt + 1})`);
+              await sleep(5000); // wait 5 seconds
+            }
+
+            if (!dcoCheck || !dcoCheck.conclusion) {
+              console.log("DCO check did not complete in time.");
+              return;
+            }
+
+            const isFailure = ["failure", "action_required"].includes(dcoCheck.conclusion);
+            console.log(`DCO check conclusion for ${headSha}: ${dcoCheck.conclusion} (treated as ${isFailure ? "failure" : "success"})`);
+
+            // Parse DCO output for commit SHAs and author
+            let badCommits = [];
+            let authorName = "";
+            let authorEmail = "";
+            let moreInfo = `More info: [DCO check report](${dcoCheck?.html_url})`;
+
+            if (isFailure) {
+                const { data: commits } = await github.rest.pulls.listCommits({
+                    owner: context.repo.owner,
+                    repo: context.repo.repo,
+                    pull_number: prNumber,
+                });
+
+                for (const commit of commits) {
+                    const commitMessage = commit.commit.message;
+                    const signoffMatch = commitMessage.match(/^Signed-off-by:\s+.+<.+>$/m);
+                    if (!signoffMatch) {
+                        console.log(`Bad commit found ${commit.sha}`)
+                        badCommits.push({
+                        sha: commit.sha,
+                        authorName: commit.commit.author.name,
+                        authorEmail: commit.commit.author.email,
+                        });
+                    }
+                }            
+            }
+
+            // If multiple authors are present, you could adapt the message accordingly
+            // For now, we'll just use the first one
+            if (badCommits.length > 0) {
+            authorName = badCommits[0].authorName;
+            authorEmail = badCommits[0].authorEmail;
+            }
+
+            // Generate remediation commit message if needed
+            let remediationSnippet = "";
+            if (badCommits.length && authorEmail) {
+              remediationSnippet = `git commit --allow-empty -s -m "DCO Remediation Commit for ${authorName} <${authorEmail}>\n\n` +
+                badCommits.map(c => `I, ${c.authorName} <${c.authorEmail}>, hereby add my Signed-off-by to this commit: ${c.sha}`).join('\n') +
+                `"`;
+            } else {
+              remediationSnippet = "# Unable to auto-generate remediation message. Please check the DCO check details.";
+            }
+
+            // Build comment
+            const commentHeader = '<!-- dco-advice-bot -->';
+            let body = "";
+
+            if (isFailure) {
+              body = [
+                commentHeader,
+                '❌ **DCO Check Failed**',
+                '',
+                `Hi @${username}, your pull request has failed the Developer Certificate of Origin (DCO) check.`,
+                '',
+                'This repository supports **remediation commits**, so you can fix this without rewriting history — but you must follow the required message format.',
+                '',
+                '---',
+                '',
+                '### 🛠 Quick Fix: Add a remediation commit',
+                'Run this command:',
+                '',
+                '```bash',
+                remediationSnippet,
+                'git push',
+                '```',
+                '',
+                '---',
+                '',
+                '<details>',
+                '<summary>🔧 Advanced: Sign off each commit directly</summary>',
+                '',
+                '**For the latest commit:**',
+                '```bash',
+                'git commit --amend --signoff',
+                'git push --force-with-lease',
+                '```',
+                '',
+                '**For multiple commits:**',
+                '```bash',
+                `git rebase --signoff origin/${baseRef}`,
+                'git push --force-with-lease',
+                '```',
+                '',
+                '</details>',
+                '',
+                moreInfo
+              ].join('\n');
+            } else {
+              body = [
+                commentHeader,
+                '✅ **DCO Check Passed**',
+                '',
+                `Thanks @${username}, all your commits are properly signed off. 🎉`
+              ].join('\n');
+            }
+
+            // Get existing comments on the PR
+            const { data: comments } = await github.rest.issues.listComments({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber
+            });
+
+            // Look for a previous bot comment
+            const existingComment = comments.find(c =>
+              c.body.includes("<!-- dco-advice-bot -->")
+            );
+
+            if (existingComment) {
+              await github.rest.issues.updateComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                comment_id: existingComment.id,
+                body: body
+              });
+            } else {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                issue_number: prNumber,
+                body: body
+              });
+            }
--- a/.github/workflows/images.yml
+++ b/.github/workflows/images.yml
@@ -19,17 +19,28 @@ jobs:
        spec:
          - name: docling-project/docling-serve
            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu
+              UV_SYNC_EXTRA_ARGS=--no-extra flash-attn
            platforms: linux/amd64, linux/arm64
          - name: docling-project/docling-serve-cpu
            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-extra cu124
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn
            platforms: linux/amd64, linux/arm64
-          - name: docling-project/docling-serve-cu124
+          # - name: docling-project/docling-serve-cu124
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124
+          #   platforms: linux/amd64
+          - name: docling-project/docling-serve-cu126
            build_args: |
-              UV_SYNC_EXTRA_ARGS=--no-extra cpu
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126
            platforms: linux/amd64
-
+          - name: docling-project/docling-serve-cu128
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128
+            platforms: linux/amd64
+          # - name: docling-project/docling-serve-rocm
+          #   build_args: |
+          #     UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn
+          #   platforms: linux/amd64
    permissions:
      packages: write
      contents: read
--- a/.github/workflows/job-build.yml
+++ b/.github/workflows/job-build.yml
@@ -12,12 +12,12 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
      - name: Install dependencies
-        run: uv sync --all-extras --no-extra cu124
+        run: uv sync --all-extras --no-extra flash-attn
      - name: Build package
        run: uv build
      - name: Check content of wheel
--- a/.github/workflows/job-checks.yml
+++ b/.github/workflows/job-checks.yml
@@ -12,7 +12,7 @@ jobs:
    steps:
      - uses: actions/checkout@v4
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
@@ -25,10 +25,10 @@ jobs:
          key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}

      - name: Install dependencies
-        run: uv sync --frozen --all-extras --no-extra cu124
+        run: uv sync --frozen --all-extras --no-extra flash-attn

      - name: Run styling check
-        run: pre-commit run --all-files
+        run: uv run pre-commit run --all-files

  build-package:
    uses: ./.github/workflows/job-build.yml
@@ -47,14 +47,16 @@ jobs:
          name: python-package-distributions
          path: dist/
      - name: Install uv and set the python version
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
        with:
          python-version: ${{ matrix.python-version }}
          enable-cache: true
+      - name: Create virtual environment
+        run: uv venv
      - name: Install package
        run: uv pip install dist/*.whl
      - name: Create the server
-        run: python -c 'from docling_serve.app import create_app; create_app()'
+        run: .venv/bin/python -c 'from docling_serve.app import create_app; create_app()'

  markdown-lint:
    runs-on: ubuntu-latest
@@ -64,4 +66,3 @@ jobs:
        uses: DavidAnson/markdownlint-cli2-action@v16
        with:
          globs: "**/*.md"
-
--- a/.gitignore
+++ b/.gitignore
@@ -444,3 +444,5 @@ pip-selfcheck.json
 # Makefile
 .action-lint
 .markdown-lint
+
+cookies.txt
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,8 +21,19 @@ repos:
        pass_filenames: false
        language: system
        files: '\.py$'
+  - repo: https://github.com/errata-ai/vale
+    rev: v3.12.0  # Use latest stable version
+    hooks:
+      - id: vale
+        name: vale sync
+        pass_filenames: false
+        args: [sync, "--config=.github/vale.ini"]
+      - id: vale
+        name: Spell and Style Check with Vale
+        args: ["--config=.github/vale.ini"]
+        files: \.md$
  - repo: https://github.com/astral-sh/uv-pre-commit
-    # uv version.
-    rev: 0.6.1
+    # uv version, https://github.com/astral-sh/uv-pre-commit/releases
+    rev: 0.8.3
    hooks:
      - id: uv-lock
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,149 @@
+## [v1.2.1](https://github.com/docling-project/docling-serve/releases/tag/v1.2.1) - 2025-08-13
+
+### Fix
+
+* Handling of vlm model options and update deps ([#314](https://github.com/docling-project/docling-serve/issues/314)) ([`8b470cb`](https://github.com/docling-project/docling-serve/commit/8b470cba8ef500c271eb84c8368c8a1a1a5a6d6a))
+* Add missing response type in sync endpoints ([#309](https://github.com/docling-project/docling-serve/issues/309)) ([`8048f45`](https://github.com/docling-project/docling-serve/commit/8048f4589a91de2b2b391ab33a326efd1b29f25b))
+
+### Documentation
+
+* Update readme to use v1 ([#306](https://github.com/docling-project/docling-serve/issues/306)) ([`b3058e9`](https://github.com/docling-project/docling-serve/commit/b3058e91e0c56e27110eb50f22cbdd89640bf398))
+* Update deployment examples to use v1 API ([#308](https://github.com/docling-project/docling-serve/issues/308)) ([`63da9ee`](https://github.com/docling-project/docling-serve/commit/63da9eedebae3ad31d04e65635e573194e413793))
+* Fix typo in v1 migration instructions ([#307](https://github.com/docling-project/docling-serve/issues/307)) ([`b15dc25`](https://github.com/docling-project/docling-serve/commit/b15dc2529f78d68a475e5221c37408c3f77d8588))
+
+## [v1.2.0](https://github.com/docling-project/docling-serve/releases/tag/v1.2.0) - 2025-08-07
+
+### Feature
+
+* Workers without shared models and convert params ([#304](https://github.com/docling-project/docling-serve/issues/304)) ([`db3fdb5`](https://github.com/docling-project/docling-serve/commit/db3fdb5bc1a0ae250afd420d737abc4071a7546c))
+* Add rocm image build support and fix cuda ([#292](https://github.com/docling-project/docling-serve/issues/292)) ([`fd1b987`](https://github.com/docling-project/docling-serve/commit/fd1b987e8dc174f1a6013c003dde33e9acbae39a))
+
+## [v1.1.0](https://github.com/docling-project/docling-serve/releases/tag/v1.1.0) - 2025-07-30
+
+### Feature
+
+* Add docling-mcp in the distribution ([#290](https://github.com/docling-project/docling-serve/issues/290)) ([`ecb1874`](https://github.com/docling-project/docling-serve/commit/ecb1874a507bef83d102e0e031e49fed34298637))
+* Add 3.0 openapi endpoint ([#287](https://github.com/docling-project/docling-serve/issues/287)) ([`ec594d8`](https://github.com/docling-project/docling-serve/commit/ec594d84fe36df23e7d010a2fcf769856c43600b))
+* Add new source and target ([#270](https://github.com/docling-project/docling-serve/issues/270)) ([`3771c1b`](https://github.com/docling-project/docling-serve/commit/3771c1b55403bd51966d07d8f760d5c4fbcc1760))
+
+### Fix
+
+* Referenced paths relative to zip root ([#289](https://github.com/docling-project/docling-serve/issues/289)) ([`1333f71`](https://github.com/docling-project/docling-serve/commit/1333f71c9c6495342b2169d574e921f828446f15))
+
+## [v1.0.1](https://github.com/docling-project/docling-serve/releases/tag/v1.0.1) - 2025-07-21
+
+### Fix
+
+* Docling update v2.42.0 ([#277](https://github.com/docling-project/docling-serve/issues/277)) ([`8706706`](https://github.com/docling-project/docling-serve/commit/8706706e8797b0a06ec4baa7cf87988311be68b6))
+
+### Documentation
+
+* Typo in README ([#276](https://github.com/docling-project/docling-serve/issues/276)) ([`766adb2`](https://github.com/docling-project/docling-serve/commit/766adb248113c7bd5144d14b3c82929a2ad29f8e))
+
+## [v1.0.0](https://github.com/docling-project/docling-serve/releases/tag/v1.0.0) - 2025-07-14
+
+### Feature
+
+* V1 api with list of sources and target ([#249](https://github.com/docling-project/docling-serve/issues/249)) ([`56e328b`](https://github.com/docling-project/docling-serve/commit/56e328baf76b4bb0476fc6ca820b52034e4f97bf))
+* Use orchestrators from jobkit ([#248](https://github.com/docling-project/docling-serve/issues/248)) ([`daa924a`](https://github.com/docling-project/docling-serve/commit/daa924a77e56d063ef17347dfd8a838872a70529))
+
+### Breaking
+
+* v1 api with list of sources and target ([#249](https://github.com/docling-project/docling-serve/issues/249)) ([`56e328b`](https://github.com/docling-project/docling-serve/commit/56e328baf76b4bb0476fc6ca820b52034e4f97bf))
+* use orchestrators from jobkit ([#248](https://github.com/docling-project/docling-serve/issues/248)) ([`daa924a`](https://github.com/docling-project/docling-serve/commit/daa924a77e56d063ef17347dfd8a838872a70529))
+
+## [v0.16.1](https://github.com/docling-project/docling-serve/releases/tag/v0.16.1) - 2025-07-07
+
+### Fix
+
+* Upgrade deps including, docling v2.40.0 with locks in models init ([#264](https://github.com/docling-project/docling-serve/issues/264)) ([`bfde1a0`](https://github.com/docling-project/docling-serve/commit/bfde1a0991c2da53b72c4f131ff74fa10f6340de))
+* Missing tesseract osd ([#263](https://github.com/docling-project/docling-serve/issues/263)) ([`eb3892e`](https://github.com/docling-project/docling-serve/commit/eb3892ee141eb2c941d580b095d8a266f2d2610c))
+* Properly load models at boot ([#244](https://github.com/docling-project/docling-serve/issues/244)) ([`149a8cb`](https://github.com/docling-project/docling-serve/commit/149a8cb1c0a16c1e0b7d17f40b88b4d6e8f0109d))
+
+### Documentation
+
+* Fix typo ([#259](https://github.com/docling-project/docling-serve/issues/259)) ([`93b8471`](https://github.com/docling-project/docling-serve/commit/93b84712b2c6d180908a197847b52b217a7ff05f))
+* Change the doc example ([#258](https://github.com/docling-project/docling-serve/issues/258)) ([`c45b937`](https://github.com/docling-project/docling-serve/commit/c45b93706466a073ab4a5c75aa8a267110873e26))
+* Update typo ([#247](https://github.com/docling-project/docling-serve/issues/247)) ([`50e431f`](https://github.com/docling-project/docling-serve/commit/50e431f30fbffa33f43727417fe746d20cbb9d6b))
+
+## [v0.16.0](https://github.com/docling-project/docling-serve/releases/tag/v0.16.0) - 2025-06-25
+
+### Feature
+
+* Package updates and more cuda images ([#229](https://github.com/docling-project/docling-serve/issues/229)) ([`30aca92`](https://github.com/docling-project/docling-serve/commit/30aca92298ab0d86bb4debcfcacb2dd8b9040a27))
+
+### Documentation
+
+* Update example resources and improve README ([#231](https://github.com/docling-project/docling-serve/issues/231)) ([`80755a7`](https://github.com/docling-project/docling-serve/commit/80755a7d5955f7d0c53df8e558fdd852dd1f5b75))
+
+## [v0.15.0](https://github.com/docling-project/docling-serve/releases/tag/v0.15.0) - 2025-06-17
+
+### Feature
+
+* Use redocs and scalar as api docs ([#228](https://github.com/docling-project/docling-serve/issues/228)) ([`873d05a`](https://github.com/docling-project/docling-serve/commit/873d05aefe141c63b9c1cf53b23b4fa8c96de05d))
+
+### Fix
+
+* "tesserocr" instead of "tesseract_cli" in usage docs ([#223](https://github.com/docling-project/docling-serve/issues/223)) ([`196c5ce`](https://github.com/docling-project/docling-serve/commit/196c5ce42a04d77234a4212c3d9b9772d2c2073e))
+
+## [v0.14.0](https://github.com/docling-project/docling-serve/releases/tag/v0.14.0) - 2025-06-17
+
+### Feature
+
+* Read supported file extensions from docling ([#214](https://github.com/docling-project/docling-serve/issues/214)) ([`524f6a8`](https://github.com/docling-project/docling-serve/commit/524f6a8997b86d2f869ca491ec8fb40585b42ca4))
+
+### Fix
+
+* Typo in Headline ([#220](https://github.com/docling-project/docling-serve/issues/220)) ([`d5455b7`](https://github.com/docling-project/docling-serve/commit/d5455b7f66de39ea1f8b8927b5968d2baa23ca88))
+
+## [v0.13.0](https://github.com/docling-project/docling-serve/releases/tag/v0.13.0) - 2025-06-04
+
+### Feature
+
+* Upgrade docling to 2.36 ([#212](https://github.com/docling-project/docling-serve/issues/212)) ([`ffea347`](https://github.com/docling-project/docling-serve/commit/ffea34732b24fdd438fabd6df02d3d9ce66b4534))
+
+## [v0.12.0](https://github.com/docling-project/docling-serve/releases/tag/v0.12.0) - 2025-06-03
+
+### Feature
+
+* Export annotations in markdown and html (Docling upgrade) ([#202](https://github.com/docling-project/docling-serve/issues/202)) ([`c4c41f1`](https://github.com/docling-project/docling-serve/commit/c4c41f16dff83c5d2a0b8a4c625b5de19b36b7c5))
+
+### Fix
+
+* Processing complex params in multipart-form ([#210](https://github.com/docling-project/docling-serve/issues/210)) ([`7066f35`](https://github.com/docling-project/docling-serve/commit/7066f3520a88c07df1c80a0cc6c4339eaac4d6a7))
+
+### Documentation
+
+* Add openshift replicasets examples ([#209](https://github.com/docling-project/docling-serve/issues/209)) ([`6a8190c`](https://github.com/docling-project/docling-serve/commit/6a8190c315792bd1e0e2b0af310656baaa5551e5))
+
+## [v0.11.0](https://github.com/docling-project/docling-serve/releases/tag/v0.11.0) - 2025-05-23
+
+### Feature
+
+* Page break placeholder in markdown exports options ([#194](https://github.com/docling-project/docling-serve/issues/194)) ([`32b8a80`](https://github.com/docling-project/docling-serve/commit/32b8a809f348bf9fbde657f93589a56935d3749d))
+* Clear results registry ([#192](https://github.com/docling-project/docling-serve/issues/192)) ([`de002df`](https://github.com/docling-project/docling-serve/commit/de002dfcdc111c942a08b156c84b7fa22b3fbaf3))
+* Upgrade to Docling 2.33.0 ([#198](https://github.com/docling-project/docling-serve/issues/198)) ([`abe5aa0`](https://github.com/docling-project/docling-serve/commit/abe5aa03f54d44ecf5c6d76e3258028997a53e68))
+* Api to trigger offloading the models ([#188](https://github.com/docling-project/docling-serve/issues/188)) ([`00be428`](https://github.com/docling-project/docling-serve/commit/00be4284904d55b78c75c5475578ef11c2ade94c))
+* Figure annotations @ docling components 0.0.7 ([#181](https://github.com/docling-project/docling-serve/issues/181)) ([`3ff1b2f`](https://github.com/docling-project/docling-serve/commit/3ff1b2f9834aca37472a895a0e3da47560457d77))
+
+### Fix
+
+* Usage of hashlib for FIPS ([#171](https://github.com/docling-project/docling-serve/issues/171)) ([`8406fb9`](https://github.com/docling-project/docling-serve/commit/8406fb9b59d83247b8379974cabed497703dfc4d))
+
+### Documentation
+
+* Example and instructions on how to load model weights to persistent volume ([#197](https://github.com/docling-project/docling-serve/issues/197)) ([`3f090b7`](https://github.com/docling-project/docling-serve/commit/3f090b7d15eaf696611d89bbbba5b98569610828))
+* Async api usage and fixes ([#195](https://github.com/docling-project/docling-serve/issues/195)) ([`21c1791`](https://github.com/docling-project/docling-serve/commit/21c1791e427f5b1946ed46c68dfda03c957dca8f))
+
+## [v0.10.1](https://github.com/docling-project/docling-serve/releases/tag/v0.10.1) - 2025-04-30
+
+### Fix
+
+* Avoid missing specialized keys in the options hash ([#166](https://github.com/docling-project/docling-serve/issues/166)) ([`36787bc`](https://github.com/docling-project/docling-serve/commit/36787bc0616356a6199da618d8646de51636b34e))
+* Allow users to set the area threshold for picture descriptions ([#165](https://github.com/docling-project/docling-serve/issues/165)) ([`509f488`](https://github.com/docling-project/docling-serve/commit/509f4889f8ed4c0f0ce25bec4126ef1f1199797c))
+* Expose max wait time in sync endpoints ([#164](https://github.com/docling-project/docling-serve/issues/164)) ([`919cf5c`](https://github.com/docling-project/docling-serve/commit/919cf5c0414f2f11eb8012f451fed7a8f582b7ad))
+* Add flash-attn for cuda images ([#161](https://github.com/docling-project/docling-serve/issues/161)) ([`35c2630`](https://github.com/docling-project/docling-serve/commit/35c2630c613cf229393fc67b6938152b063ff498))
+
 ## [v0.10.0](https://github.com/docling-project/docling-serve/releases/tag/v0.10.0) - 2025-04-28

 ### Feature
--- a/25
+++ b/25
@@ -1,13 +1,17 @@
 ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s

-FROM ${BASE_IMAGE}
+ARG UV_VERSION=0.8.3

-USER 0
+ARG UV_SYNC_EXTRA_ARGS=""
+
+FROM ${BASE_IMAGE} AS docling-base

 ###################################################################################################
 # OS Layer                                                                                        #
 ###################################################################################################

+USER 0
+
 RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \
    dnf -y install --best --nodocs --setopt=install_weak_deps=False dnf-plugins-core && \
    dnf config-manager --best --nodocs --setopt=install_weak_deps=False --save && \
@@ -21,16 +25,19 @@ RUN /usr/bin/fix-permissions /opt/app-root/src/.cache

 ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/

+FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv_stage
+
 ###################################################################################################
 # Docling layer                                                                                   #
 ###################################################################################################

+FROM docling-base
+
 USER 1001

 WORKDIR /opt/app-root/src

 ENV \
-    # On container environments, always set a thread budget to avoid undesired thread congestion.
    OMP_NUM_THREADS=4 \
    LANG=en_US.UTF-8 \
    LC_ALL=en_US.UTF-8 \
@@ -40,13 +47,16 @@ ENV \
    UV_PROJECT_ENVIRONMENT=/opt/app-root \
    DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models

-ARG UV_SYNC_EXTRA_ARGS=""
+ARG UV_SYNC_EXTRA_ARGS

-RUN --mount=from=ghcr.io/astral-sh/uv:0.6.1,source=/uv,target=/bin/uv \
+RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
    --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
    --mount=type=bind,source=uv.lock,target=uv.lock \
    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
-    umask 002 && uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}
+    umask 002 && \
+    UV_SYNC_ARGS="--frozen --no-install-project --no-dev --all-extras" && \
+    uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-extra flash-attn && \
+    FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-build-isolation-package=flash-attn

 ARG MODELS_LIST="layout tableformer picture_classifier easyocr"

@@ -58,7 +68,8 @@ RUN echo "Downloading models..." && \
    chmod -R g=u ${DOCLING_SERVE_ARTIFACTS_PATH}

 COPY --chown=1001:0 ./docling_serve ./docling_serve
-RUN --mount=from=ghcr.io/astral-sh/uv:0.6.1,source=/uv,target=/bin/uv \
+
+RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
    --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
    --mount=type=bind,source=uv.lock,target=uv.lock \
    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
--- a/MAINTAINERS.md
+++ b/MAINTAINERS.md
@@ -1,11 +1,11 @@
 # MAINTAINERS

- Christoph Auer - [@cau-git](https://github.com/cau-git)
- Michele Dolfi - [@dolfim-ibm](https://github.com/dolfim-ibm)
- Maxim Lysak - [@maxmnemonic](https://github.com/maxmnemonic)
- Nikos Livathinos - [@nikos-livathinos](https://github.com/nikos-livathinos)
- Ahmed Nassar - [@nassarofficial](https://github.com/nassarofficial)
- Panos Vagenas - [@vagenas](https://github.com/vagenas)
- Peter Staar - [@PeterStaar-IBM](https://github.com/PeterStaar-IBM)
+- Christoph Auer - [`@cau-git`](https://github.com/cau-git)
+- Michele Dolfi - [`@dolfim-ibm`](https://github.com/dolfim-ibm)
+- Maxim Lysak - [`@maxmnemonic`](https://github.com/maxmnemonic)
+- Nikos Livathinos - [`@nikos-livathinos`](https://github.com/nikos-livathinos)
+- Ahmed Nassar - [`@nassarofficial`](https://github.com/nassarofficial)
+- Panos Vagenas - [`@vagenas`](https://github.com/vagenas)
+- Peter Staar - [`@PeterStaar-IBM`](https://github.com/PeterStaar-IBM)

 Maintainers can be contacted at [deepsearch-core@zurich.ibm.com](mailto:deepsearch-core@zurich.ibm.com).
--- a/62
+++ b/62
@@ -26,26 +26,47 @@ md-lint-file:
 	$(CMD_PREFIX) touch .markdown-lint

 .PHONY: docling-serve-image
-docling-serve-image: Containerfile
+docling-serve-image: Containerfile ## Build docling-serve container image
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve]"
-	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu" -f Containerfile -t ghcr.io/docling-project/docling-serve:$(TAG) .
+	$(CMD_PREFIX) docker build --load -f Containerfile -t ghcr.io/docling-project/docling-serve:$(TAG) .
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) ghcr.io/docling-project/docling-serve:$(BRANCH_TAG)
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve:$(TAG) quay.io/docling-project/docling-serve:$(BRANCH_TAG)

 .PHONY: docling-serve-cpu-image
 docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve CPU]"
-	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cpu --no-extra flash-attn" -f Containerfile -t ghcr.io/docling-project/docling-serve-cpu:$(TAG) .
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) ghcr.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cpu:$(TAG) quay.io/docling-project/docling-serve-cpu:$(BRANCH_TAG)

 .PHONY: docling-serve-cu124-image
-docling-serve-cu124-image: Containerfile ## Build docling-serve container image with GPU support
+docling-serve-cu124-image: Containerfile ## Build docling-serve container image with CUDA 12.4 support
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.4]"
-	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cpu" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu124:$(TAG) .
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu124" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu124:$(TAG) .
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) ghcr.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)
 	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu124:$(TAG) quay.io/docling-project/docling-serve-cu124:$(BRANCH_TAG)

+.PHONY: docling-serve-cu126-image
+docling-serve-cu126-image: Containerfile ## Build docling-serve container image with CUDA 12.6 support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.6]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu126" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu126:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) ghcr.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu126:$(TAG) quay.io/docling-project/docling-serve-cu126:$(BRANCH_TAG)
+
+.PHONY: docling-serve-cu128-image
+docling-serve-cu128-image: Containerfile ## Build docling-serve container image with CUDA 12.8 support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.8]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group cu128" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-cu128:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) ghcr.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-cu128:$(TAG) quay.io/docling-project/docling-serve-cu128:$(BRANCH_TAG)
+
+.PHONY: docling-serve-rocm-image
+docling-serve-rocm-image: Containerfile ## Build docling-serve container image with ROCm support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with ROCm 6.3]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-group pypi --group rocm --no-extra flash-attn" -f Containerfile --platform linux/amd64 -t ghcr.io/docling-project/docling-serve-rocm:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) ghcr.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
+	$(CMD_PREFIX) docker tag ghcr.io/docling-project/docling-serve-rocm:$(TAG) quay.io/docling-project/docling-serve-rocm:$(BRANCH_TAG)
+
 .PHONY: action-lint
 action-lint: .action-lint ##      Lint GitHub Action workflows
 .action-lint: $(shell find .github -type f) | action-lint-file
@@ -87,9 +108,30 @@ run-docling-cpu: ## Run the docling-serve container with CPU support and assign
 	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with CPU support on port 5001...\n" "[RUN CPU]"
 	$(CMD_PREFIX) docker run -it --name docling-serve-cpu -p 5001:5001 ghcr.io/docling-project/docling-serve-cpu:main

-.PHONY: run-docling-gpu
-run-docling-gpu: ## Run the docling-serve container with GPU support and assign a container name
+.PHONY: run-docling-cu124
+run-docling-cu124: ## Run the docling-serve container with GPU support and assign a container name
 	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
-	$(CMD_PREFIX) docker rm -f docling-serve-gpu 2>/dev/null || true
-	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN GPU]"
-	$(CMD_PREFIX) docker run -it --name docling-serve-gpu -p 5001:5001 ghcr.io/docling-project/docling-serve:main
+	$(CMD_PREFIX) docker rm -f docling-serve-cu124 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.4]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-cu124 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu124:main
+
+.PHONY: run-docling-cu126
+run-docling-cu126: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) docker rm -f docling-serve-cu126 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.6]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-cu126 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu126:main
+
+.PHONY: run-docling-cu128
+run-docling-cu128: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) docker rm -f docling-serve-cu128 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN CUDA 12.8]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-cu128 -p 5001:5001 ghcr.io/docling-project/docling-serve-cu128:main
+
+.PHONY: run-docling-rocm
+run-docling-rocm: ## Run the docling-serve container with GPU support and assign a container name
+	$(ECHO_PREFIX) printf "  %-12s Removing existing container if it exists...\n" "[CLEANUP]"
+	$(CMD_PREFIX) docker rm -f docling-serve-rocm 2>/dev/null || true
+	$(ECHO_PREFIX) printf "  %-12s Running docling-serve container with GPU support on port 5001...\n" "[RUN ROCm 6.3]"
+	$(CMD_PREFIX) docker run -it --name docling-serve-rocm -p 5001:5001 ghcr.io/docling-project/docling-serve-rocm:main
--- a/README.md
+++ b/README.md
@@ -8,69 +8,85 @@

 Running [Docling](https://github.com/docling-project/docling) as an API service.

+📚 [Docling Serve documentation](./docs/README.md)
+
+- Learning how to [configure the webserver](./docs/configuration.md)
+- Get to know all [runtime options](./docs/usage.md) of the API
+- Explore useful [deployment examples](./docs/deployment.md)
+- And more
+
+> [!NOTE]
+> **Migration to the `v1` API.** Docling Serve now has a stable v1 API. Read more on the [migration to v1](./docs/v1_migration.md).
+
 ## Getting started

 Install the `docling-serve` package and run the server.

 ```bash
 # Using the python package
-pip install "docling-serve"
-docling-serve run
+pip install "docling-serve[ui]"
+docling-serve run --enable-ui

 # Using container images, e.g. with Podman
-podman run -p 5001:5001 quay.io/docling-project/docling-serve
+podman run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=1 quay.io/docling-project/docling-serve
 ```

 The server is available at

 - API <http://127.0.0.1:5001>
 - API documentation <http://127.0.0.1:5001/docs>
-  ![swagger.png](img/swagger.png)
+- UI playground <http://127.0.0.1:5001/ui>
+
+![API documentation](img/fastapi-ui.png)

 Try it out with a simple conversion:

 ```bash
 curl -X 'POST' \
-  'http://localhost:5001/v1alpha/convert/source' \
+  'http://localhost:5001/v1/convert/source' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/json' \
  -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```

-### Container images
+### Container Images

-Available container images:
+The following container images are available for running **Docling Serve** with different hardware and PyTorch configurations:

-| Name | Description | Arch | Size |
-| -----|-------------|------|------|
-| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br /> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Simple image for Docling Serve, installing all packages from the official pypi.org index. | `linux/amd64`, `linux/arm64` | 3.6 GB |
-| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br /> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | Cpu-only image which installs `torch` from the pytorch cpu index. | `linux/amd64`, `linux/arm64` | 3.6 GB |
-| [`ghcr.io/docling-project/docling-serve-cu124`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu124) <br /> [`quay.io/docling-project/docling-serve-cu124`](https://quay.io/repository/docling-project/docling-serve-cu124) | Cuda 12.4 image which installs `torch` from the pytorch cu124 index. | `linux/amd64` | 8.7 GB |
+#### 📦 Distributed Images
+
+| Image | Description | Architectures | Size |
+|-------|-------------|----------------|------|
+| [`ghcr.io/docling-project/docling-serve`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve) <br> [`quay.io/docling-project/docling-serve`](https://quay.io/repository/docling-project/docling-serve) | Base image with all packages installed from the official PyPI index. | `linux/amd64`, `linux/arm64` | 4.4 GB (arm64) <br> 8.7 GB (amd64) |
+| [`ghcr.io/docling-project/docling-serve-cpu`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cpu) <br> [`quay.io/docling-project/docling-serve-cpu`](https://quay.io/repository/docling-project/docling-serve-cpu) | CPU-only variant, using `torch` from the PyTorch CPU index. | `linux/amd64`, `linux/arm64` | 4.4 GB |
+| [`ghcr.io/docling-project/docling-serve-cu126`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu126) <br> [`quay.io/docling-project/docling-serve-cu126`](https://quay.io/repository/docling-project/docling-serve-cu126) | CUDA 12.6 build with `torch` from the cu126 index. | `linux/amd64` | 10.0 GB |
+| [`ghcr.io/docling-project/docling-serve-cu128`](https://github.com/docling-project/docling-serve/pkgs/container/docling-serve-cu128) <br> [`quay.io/docling-project/docling-serve-cu128`](https://quay.io/repository/docling-project/docling-serve-cu128) | CUDA 12.8 build with `torch` from the cu128 index. | `linux/amd64` | 11.4 GB |
+
+#### 🚫 Not Distributed
+
+An image for AMD ROCm 6.3 (`docling-serve-rocm`) is supported but **not published** due to its large size.
+
+To build it locally:
+
+```bash
+git clone --branch main git@github.com:docling-project/docling-serve.git
+cd docling-serve/
+make docling-serve-rocm-image
+```
+
+For deployment using Docker Compose, see [docs/deployment.md](docs/deployment.md).

 Coming soon: `docling-serve-slim` images will reduce the size by skipping the model weights download.

 ### Demonstration UI

-```bash
-# Install the Python package with the extra dependencies
-pip install "docling-serve[ui]"
-docling-serve run --enable-ui
-
-# Run the container image with the extra env parameters
-podman run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=true quay.io/docling-project/docling-serve
-```
-
 An easy to use UI is available at the `/ui` endpoint.

-![ui-input.png](img/ui-input.png)
+![Input controllers in the UI](img/ui-input.png)

-![ui-output.png](img/ui-output.png)
-
-## Documentation and advance usages
-
-Visit the [Docling Serve documentation](./docs/README.md) for learning how to [configure the webserver](./docs/configuration.md), use all the [runtime options](./docs/usage.md) of the API and [deployment examples](./docs/deployment.md).
+![Output visualization in the UI](img/ui-output.png)

 ## Get help and support

--- a/docling_serve/main.py
+++ b/docling_serve/main.py
@@ -30,6 +30,7 @@ logger = logging.getLogger(__name__)
 def version_callback(value: bool) -> None:
    if value:
        docling_serve_version = importlib.metadata.version("docling_serve")
+        docling_jobkit_version = importlib.metadata.version("docling-jobkit")
        docling_version = importlib.metadata.version("docling")
        docling_core_version = importlib.metadata.version("docling-core")
        docling_ibm_models_version = importlib.metadata.version("docling-ibm-models")
@@ -38,6 +39,7 @@ def version_callback(value: bool) -> None:
        py_impl_version = sys.implementation.cache_tag
        py_lang_version = platform.python_version()
        console.print(f"Docling Serve version: {docling_serve_version}")
+        console.print(f"Docling Jobkit version: {docling_jobkit_version}")
        console.print(f"Docling version: {docling_version}")
        console.print(f"Docling Core version: {docling_core_version}")
        console.print(f"Docling IBM Models version: {docling_ibm_models_version}")
@@ -113,11 +115,13 @@ def _run(
    protocol = "https" if run_ssl else "http"
    url = f"{protocol}://{uvicorn_settings.host}:{uvicorn_settings.port}"
    url_docs = f"{url}/docs"
+    url_scalar = f"{url}/scalar"
    url_ui = f"{url}/ui"

    console.print("")
    console.print(f"Server started at [link={url}]{url}[/]")
    console.print(f"Documentation at [link={url_docs}]{url_docs}[/]")
+    console.print(f"Scalar docs at [link={url_docs}]{url_scalar}[/]")
    if docling_serve_settings.enable_ui:
        console.print(f"UI at [link={url_ui}]{url_ui}[/]")

--- a/docling_serve/app.py
+++ b/docling_serve/app.py
@@ -1,4 +1,5 @@
 import asyncio
+import copy
 import importlib.metadata
 import logging
 import shutil
@@ -11,6 +12,7 @@ from fastapi import (
    BackgroundTasks,
    Depends,
    FastAPI,
+    Form,
    HTTPException,
    Query,
    UploadFile,
@@ -23,38 +25,52 @@ from fastapi.openapi.docs import (
    get_swagger_ui_html,
    get_swagger_ui_oauth2_redirect_html,
 )
-from fastapi.responses import RedirectResponse
+from fastapi.responses import JSONResponse, RedirectResponse
 from fastapi.staticfiles import StaticFiles
+from scalar_fastapi import get_scalar_api_reference

 from docling.datamodel.base_models import DocumentStream
-
-from docling_serve.datamodel.callback import (
+from docling_jobkit.datamodel.callback import (
    ProgressCallbackRequest,
    ProgressCallbackResponse,
 )
-from docling_serve.datamodel.convert import ConvertDocumentsOptions
+from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
+from docling_jobkit.datamodel.s3_coords import S3Coordinates
+from docling_jobkit.datamodel.task import Task, TaskSource
+from docling_jobkit.datamodel.task_targets import (
+    InBodyTarget,
+    TaskTarget,
+    ZipTarget,
+)
+from docling_jobkit.orchestrators.base_orchestrator import (
+    BaseOrchestrator,
+    ProgressInvalid,
+    TaskNotFoundError,
+)
+
+from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
 from docling_serve.datamodel.requests import (
-    ConvertDocumentFileSourcesRequest,
-    ConvertDocumentHttpSourcesRequest,
    ConvertDocumentsRequest,
+    FileSourceRequest,
+    HttpSourceRequest,
+    S3SourceRequest,
+    TargetName,
 )
 from docling_serve.datamodel.responses import (
+    ClearResponse,
    ConvertDocumentResponse,
    HealthCheckResponse,
    MessageKind,
+    PresignedUrlConvertDocumentResponse,
    TaskStatusResponse,
    WebsocketMessage,
 )
-from docling_serve.datamodel.task import Task, TaskSource
-from docling_serve.engines.async_orchestrator import (
-    BaseAsyncOrchestrator,
-    ProgressInvalid,
-)
-from docling_serve.engines.async_orchestrator_factory import get_async_orchestrator
-from docling_serve.engines.base_orchestrator import TaskNotFoundError
 from docling_serve.helper_functions import FormDepends
+from docling_serve.orchestrator_factory import get_async_orchestrator
+from docling_serve.response_preparation import prepare_response
 from docling_serve.settings import docling_serve_settings
 from docling_serve.storage import get_scratch
+from docling_serve.websocket_notifier import WebsocketNotifier


 # Set up custom logging as we'll be intermixes with FastAPI/Uvicorn's logging
@@ -92,11 +108,15 @@ _log = logging.getLogger(__name__)
 # Context manager to initialize and clean up the lifespan of the FastAPI app
@asynccontextmanager
 async def lifespan(app: FastAPI):
-    orchestrator = get_async_orchestrator()
    scratch_dir = get_scratch()

+    orchestrator = get_async_orchestrator()
+    notifier = WebsocketNotifier(orchestrator)
+    orchestrator.bind_notifier(notifier)
+
    # Warm up processing cache
-    await orchestrator.warm_up_caches()
+    if docling_serve_settings.load_models_at_boot:
+        await orchestrator.warm_up_caches()

    # Start the background queue processor
    queue_task = asyncio.create_task(orchestrator.process_queue())
@@ -138,8 +158,8 @@ def create_app():  # noqa: C901

    app = FastAPI(
        title="Docling Serve",
-        docs_url=None if offline_docs_assets else "/docs",
-        redoc_url=None if offline_docs_assets else "/redocs",
+        docs_url=None if offline_docs_assets else "/swagger",
+        redoc_url=None if offline_docs_assets else "/docs",
        lifespan=lifespan,
        version=version,
    )
@@ -190,7 +210,7 @@ def create_app():  # noqa: C901
            name="static",
        )

-        @app.get("/docs", include_in_schema=False)
+        @app.get("/swagger", include_in_schema=False)
        async def custom_swagger_ui_html():
            return get_swagger_ui_html(
                openapi_url=app.openapi_url,
@@ -204,7 +224,7 @@ def create_app():  # noqa: C901
        async def swagger_ui_redirect():
            return get_swagger_ui_oauth2_redirect_html()

-        @app.get("/redoc", include_in_schema=False)
+        @app.get("/docs", include_in_schema=False)
        async def redoc_html():
            return get_redoc_html(
                openapi_url=app.openapi_url,
@@ -212,28 +232,43 @@ def create_app():  # noqa: C901
                redoc_js_url="/static/redoc.standalone.js",
            )

+    @app.get("/scalar", include_in_schema=False)
+    async def scalar_html():
+        return get_scalar_api_reference(
+            openapi_url=app.openapi_url,
+            title=app.title,
+            scalar_favicon_url="https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg",
+            # hide_client_button=True,  # not yet released but in main
+        )
+
    ########################
    # Async / Sync helpers #
    ########################

    async def _enque_source(
-        orchestrator: BaseAsyncOrchestrator, conversion_request: ConvertDocumentsRequest
+        orchestrator: BaseOrchestrator, conversion_request: ConvertDocumentsRequest
    ) -> Task:
        sources: list[TaskSource] = []
-        if isinstance(conversion_request, ConvertDocumentFileSourcesRequest):
-            sources.extend(conversion_request.file_sources)
-        if isinstance(conversion_request, ConvertDocumentHttpSourcesRequest):
-            sources.extend(conversion_request.http_sources)
+        for s in conversion_request.sources:
+            if isinstance(s, FileSourceRequest):
+                sources.append(FileSource.model_validate(s))
+            elif isinstance(s, HttpSourceRequest):
+                sources.append(HttpSource.model_validate(s))
+            elif isinstance(s, S3SourceRequest):
+                sources.append(S3Coordinates.model_validate(s))

        task = await orchestrator.enqueue(
-            sources=sources, options=conversion_request.options
+            sources=sources,
+            options=conversion_request.options,
+            target=conversion_request.target,
        )
        return task

    async def _enque_file(
-        orchestrator: BaseAsyncOrchestrator,
+        orchestrator: BaseOrchestrator,
        files: list[UploadFile],
-        options: ConvertDocumentsOptions,
+        options: ConvertDocumentsRequestOptions,
+        target: TaskTarget,
    ) -> Task:
        _log.info(f"Received {len(files)} files for processing.")

@@ -245,13 +280,12 @@ def create_app():  # noqa: C901
            name = file.filename if file.filename else f"file{suffix}.pdf"
            file_sources.append(DocumentStream(name=name, stream=buf))

-        task = await orchestrator.enqueue(sources=file_sources, options=options)
+        task = await orchestrator.enqueue(
+            sources=file_sources, options=options, target=target
+        )
        return task

-    async def _wait_task_complete(
-        orchestrator: BaseAsyncOrchestrator, task_id: str
-    ) -> bool:
-        MAX_WAIT = 120
+    async def _wait_task_complete(orchestrator: BaseOrchestrator, task_id: str) -> bool:
        start_time = time.monotonic()
        while True:
            task = await orchestrator.task_status(task_id=task_id)
@@ -259,13 +293,82 @@ def create_app():  # noqa: C901
                return True
            await asyncio.sleep(5)
            elapsed_time = time.monotonic() - start_time
-            if elapsed_time > MAX_WAIT:
+            if elapsed_time > docling_serve_settings.max_sync_wait:
                return False

+    ##########################################
+    # Downgrade openapi 3.1 to 3.0.x helpers #
+    ##########################################
+
+    def ensure_array_items(schema):
+        """Ensure that array items are defined."""
+        if "type" in schema and schema["type"] == "array":
+            if "items" not in schema or schema["items"] is None:
+                schema["items"] = {"type": "string"}
+            elif isinstance(schema["items"], dict):
+                if "type" not in schema["items"]:
+                    schema["items"]["type"] = "string"
+
+    def handle_discriminators(schema):
+        """Ensure that discriminator properties are included in required."""
+        if "discriminator" in schema and "propertyName" in schema["discriminator"]:
+            prop = schema["discriminator"]["propertyName"]
+            if "properties" in schema and prop in schema["properties"]:
+                if "required" not in schema:
+                    schema["required"] = []
+                if prop not in schema["required"]:
+                    schema["required"].append(prop)
+
+    def handle_properties(schema):
+        """Ensure that property 'kind' is included in required."""
+        if "properties" in schema and "kind" in schema["properties"]:
+            if "required" not in schema:
+                schema["required"] = []
+            if "kind" not in schema["required"]:
+                schema["required"].append("kind")
+
+    # Downgrade openapi 3.1 to 3.0.x
+    def downgrade_openapi31_to_30(spec):
+        def strip_unsupported(obj):
+            if isinstance(obj, dict):
+                obj = {
+                    k: strip_unsupported(v)
+                    for k, v in obj.items()
+                    if k not in ("const", "examples", "prefixItems")
+                }
+
+                handle_discriminators(obj)
+                ensure_array_items(obj)
+
+                # Check for oneOf and anyOf to handle nested schemas
+                for key in ["oneOf", "anyOf"]:
+                    if key in obj:
+                        for sub in obj[key]:
+                            handle_discriminators(sub)
+                            ensure_array_items(sub)
+
+                return obj
+            elif isinstance(obj, list):
+                return [strip_unsupported(i) for i in obj]
+            return obj
+
+        if "components" in spec and "schemas" in spec["components"]:
+            for schema_name, schema in spec["components"]["schemas"].items():
+                handle_properties(schema)
+
+        return strip_unsupported(copy.deepcopy(spec))
+
    #############################
    # API Endpoints definitions #
    #############################

+    @app.get("/openapi-3.0.json")
+    def openapi_30():
+        spec = app.openapi()
+        downgraded = downgrade_openapi31_to_30(spec)
+        downgraded["openapi"] = "3.0.3"
+        return JSONResponse(downgraded)
+
    # Favicon
    @app.get("/favicon.ico", include_in_schema=False)
    async def favicon():
@@ -286,8 +389,8 @@ def create_app():  # noqa: C901

    # Convert a document from URL(s)
    @app.post(
-        "/v1alpha/convert/source",
-        response_model=ConvertDocumentResponse,
+        "/v1/convert/source",
+        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
@@ -297,36 +400,33 @@ def create_app():  # noqa: C901
    )
    async def process_url(
        background_tasks: BackgroundTasks,
-        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        conversion_request: ConvertDocumentsRequest,
    ):
        task = await _enque_source(
            orchestrator=orchestrator, conversion_request=conversion_request
        )
-        success = await _wait_task_complete(
+        completed = await _wait_task_complete(
            orchestrator=orchestrator, task_id=task.task_id
        )

-        if not success:
+        if not completed:
            # TODO: abort task!
            return HTTPException(
-                status_code=504, detail="Conversion is taking too long."
+                status_code=504,
+                detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
            )

-        result = await orchestrator.task_result(
-            task_id=task.task_id, background_tasks=background_tasks
+        task = await orchestrator.get_raw_task(task_id=task.task_id)
+        response = await prepare_response(
+            task=task, orchestrator=orchestrator, background_tasks=background_tasks
        )
-        if result is None:
-            raise HTTPException(
-                status_code=404,
-                detail="Task result not found. Please wait for a completion status.",
-            )
-        return result
+        return response

    # Convert a document from file(s)
    @app.post(
-        "/v1alpha/convert/file",
-        response_model=ConvertDocumentResponse,
+        "/v1/convert/file",
+        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
@@ -335,42 +435,41 @@ def create_app():  # noqa: C901
    )
    async def process_file(
        background_tasks: BackgroundTasks,
-        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        files: list[UploadFile],
        options: Annotated[
-            ConvertDocumentsOptions, FormDepends(ConvertDocumentsOptions)
+            ConvertDocumentsRequestOptions, FormDepends(ConvertDocumentsRequestOptions)
        ],
+        target_type: Annotated[TargetName, Form()] = TargetName.INBODY,
    ):
+        target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
        task = await _enque_file(
-            orchestrator=orchestrator, files=files, options=options
+            orchestrator=orchestrator, files=files, options=options, target=target
        )
-        success = await _wait_task_complete(
+        completed = await _wait_task_complete(
            orchestrator=orchestrator, task_id=task.task_id
        )

-        if not success:
+        if not completed:
            # TODO: abort task!
            return HTTPException(
-                status_code=504, detail="Conversion is taking too long."
+                status_code=504,
+                detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
            )

-        result = await orchestrator.task_result(
-            task_id=task.task_id, background_tasks=background_tasks
+        task = await orchestrator.get_raw_task(task_id=task.task_id)
+        response = await prepare_response(
+            task=task, orchestrator=orchestrator, background_tasks=background_tasks
        )
-        if result is None:
-            raise HTTPException(
-                status_code=404,
-                detail="Task result not found. Please wait for a completion status.",
-            )
-        return result
+        return response

    # Convert a document from URL(s) using the async api
    @app.post(
-        "/v1alpha/convert/source/async",
+        "/v1/convert/source/async",
        response_model=TaskStatusResponse,
    )
    async def process_url_async(
-        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        conversion_request: ConvertDocumentsRequest,
    ):
        task = await _enque_source(
@@ -388,19 +487,21 @@ def create_app():  # noqa: C901

    # Convert a document from file(s) using the async api
    @app.post(
-        "/v1alpha/convert/file/async",
+        "/v1/convert/file/async",
        response_model=TaskStatusResponse,
    )
    async def process_file_async(
-        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        background_tasks: BackgroundTasks,
        files: list[UploadFile],
        options: Annotated[
-            ConvertDocumentsOptions, FormDepends(ConvertDocumentsOptions)
+            ConvertDocumentsRequestOptions, FormDepends(ConvertDocumentsRequestOptions)
        ],
+        target_type: Annotated[TargetName, Form()] = TargetName.INBODY,
    ):
+        target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
        task = await _enque_file(
-            orchestrator=orchestrator, files=files, options=options
+            orchestrator=orchestrator, files=files, options=options, target=target
        )
        task_queue_position = await orchestrator.get_queue_position(
            task_id=task.task_id
@@ -414,14 +515,15 @@ def create_app():  # noqa: C901

    # Task status poll
    @app.get(
-        "/v1alpha/status/poll/{task_id}",
+        "/v1/status/poll/{task_id}",
        response_model=TaskStatusResponse,
    )
    async def task_status_poll(
-        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        task_id: str,
        wait: Annotated[
-            float, Query(help="Number of seconds to wait for a completed status.")
+            float,
+            Query(description="Number of seconds to wait for a completed status."),
        ] = 0.0,
    ):
        try:
@@ -438,13 +540,14 @@ def create_app():  # noqa: C901

    # Task status websocket
    @app.websocket(
-        "/v1alpha/status/ws/{task_id}",
+        "/v1/status/ws/{task_id}",
    )
    async def task_status_ws(
        websocket: WebSocket,
-        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        task_id: str,
    ):
+        assert isinstance(orchestrator.notifier, WebsocketNotifier)
        await websocket.accept()

        if task_id not in orchestrator.tasks:
@@ -459,7 +562,7 @@ def create_app():  # noqa: C901
        task = orchestrator.tasks[task_id]

        # Track active WebSocket connections for this job
-        orchestrator.task_subscribers[task_id].add(websocket)
+        orchestrator.notifier.task_subscribers[task_id].add(websocket)

        try:
            task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
@@ -497,12 +600,12 @@ def create_app():  # noqa: C901
            _log.info(f"WebSocket disconnected for job {task_id}")

        finally:
-            orchestrator.task_subscribers[task_id].remove(websocket)
+            orchestrator.notifier.task_subscribers[task_id].remove(websocket)

    # Task result
    @app.get(
-        "/v1alpha/result/{task_id}",
-        response_model=ConvertDocumentResponse,
+        "/v1/result/{task_id}",
+        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
@@ -510,27 +613,26 @@ def create_app():  # noqa: C901
        },
    )
    async def task_result(
-        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        background_tasks: BackgroundTasks,
        task_id: str,
    ):
-        result = await orchestrator.task_result(
-            task_id=task_id, background_tasks=background_tasks
-        )
-        if result is None:
-            raise HTTPException(
-                status_code=404,
-                detail="Task result not found. Please wait for a completion status.",
+        try:
+            task = await orchestrator.get_raw_task(task_id=task_id)
+            response = await prepare_response(
+                task=task, orchestrator=orchestrator, background_tasks=background_tasks
            )
-        return result
+            return response
+        except TaskNotFoundError:
+            raise HTTPException(status_code=404, detail="Task not found.")

    # Update task progress
    @app.post(
-        "/v1alpha/callback/task/progress",
+        "/v1/callback/task/progress",
        response_model=ProgressCallbackResponse,
    )
    async def callback_task_progress(
-        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
        request: ProgressCallbackRequest,
    ):
        try:
@@ -543,4 +645,29 @@ def create_app():  # noqa: C901
                status_code=400, detail=f"Invalid progress payload: {err}"
            )

+    #### Clear requests
+
+    # Offload models
+    @app.get(
+        "/v1/clear/converters",
+        response_model=ClearResponse,
+    )
+    async def clear_converters(
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+    ):
+        await orchestrator.clear_converters()
+        return ClearResponse()
+
+    # Clean results
+    @app.get(
+        "/v1/clear/results",
+        response_model=ClearResponse,
+    )
+    async def clear_results(
+        orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+        older_then: float = 3600,
+    ):
+        await orchestrator.clear_results(older_than=older_then)
+        return ClearResponse()
+
    return app
--- a/docling_serve/datamodel/callback.py
+++ b/docling_serve/datamodel/callback.py
@@ -1,50 +0,0 @@
-import enum
-from typing import Annotated, Literal
-
-from pydantic import BaseModel, Field
-
-
-class ProgressKind(str, enum.Enum):
-    SET_NUM_DOCS = "set_num_docs"
-    UPDATE_PROCESSED = "update_processed"
-
-
-class BaseProgress(BaseModel):
-    kind: ProgressKind
-
-
-class ProgressSetNumDocs(BaseProgress):
-    kind: Literal[ProgressKind.SET_NUM_DOCS] = ProgressKind.SET_NUM_DOCS
-
-    num_docs: int
-
-
-class SucceededDocsItem(BaseModel):
-    source: str
-
-
-class FailedDocsItem(BaseModel):
-    source: str
-    error: str
-
-
-class ProgressUpdateProcessed(BaseProgress):
-    kind: Literal[ProgressKind.UPDATE_PROCESSED] = ProgressKind.UPDATE_PROCESSED
-
-    num_processed: int
-    num_succeeded: int
-    num_failed: int
-
-    docs_succeeded: list[SucceededDocsItem]
-    docs_failed: list[FailedDocsItem]
-
-
-class ProgressCallbackRequest(BaseModel):
-    task_id: str
-    progress: Annotated[
-        ProgressSetNumDocs | ProgressUpdateProcessed, Field(discriminator="kind")
-    ]
-
-
-class ProgressCallbackResponse(BaseModel):
-    status: Literal["ack"] = "ack"
--- a/docling_serve/datamodel/convert.py
+++ b/docling_serve/datamodel/convert.py
@@ -1,23 +1,13 @@
 # Define the input options for the API
-from typing import Annotated, Any, Optional
+from typing import Annotated

-from pydantic import AnyUrl, BaseModel, Field, model_validator
-from typing_extensions import Self
+from pydantic import Field

-from docling.datamodel.base_models import InputFormat, OutputFormat
 from docling.datamodel.pipeline_options import (
    EasyOcrOptions,
-    PdfBackend,
-    PdfPipeline,
-    TableFormerMode,
-    TableStructureOptions,
-)
-from docling.datamodel.settings import (
-    DEFAULT_PAGE_RANGE,
-    PageRange,
 )
 from docling.models.factories import get_ocr_factory
-from docling_core.types.doc import ImageRefMode
+from docling_jobkit.datamodel.convert import ConvertDocumentsOptions

 from docling_serve.settings import docling_serve_settings

@@ -27,150 +17,7 @@ ocr_factory = get_ocr_factory(
 ocr_engines_enum = ocr_factory.get_enum()


-class PictureDescriptionLocal(BaseModel):
-    repo_id: Annotated[
-        str,
-        Field(
-            description="Repository id from the Hugging Face Hub.",
-            examples=[
-                "HuggingFaceTB/SmolVLM-256M-Instruct",
-                "ibm-granite/granite-vision-3.2-2b",
-            ],
-        ),
-    ]
-    prompt: Annotated[
-        str,
-        Field(
-            description="Prompt used when calling the vision-language model.",
-            examples=[
-                "Describe this image in a few sentences.",
-                "This is a figure from a document. Provide a detailed description of it.",
-            ],
-        ),
-    ] = "Describe this image in a few sentences."
-    generation_config: Annotated[
-        dict[str, Any],
-        Field(
-            description="Config from https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig",
-            examples=[{"max_new_tokens": 200, "do_sample": False}],
-        ),
-    ] = {"max_new_tokens": 200, "do_sample": False}
-
-
-class PictureDescriptionApi(BaseModel):
-    url: Annotated[
-        AnyUrl,
-        Field(
-            description="Endpoint which accepts openai-api compatible requests.",
-            examples=[
-                AnyUrl(
-                    "http://localhost:8000/v1/chat/completions"
-                ),  # example of a local vllm api
-                AnyUrl(
-                    "http://localhost:11434/v1/chat/completions"
-                ),  # example of ollama
-            ],
-        ),
-    ]
-    headers: Annotated[
-        dict[str, str],
-        Field(
-            description="Headers used for calling the API endpoint. For example, it could include authentication headers."
-        ),
-    ] = {}
-    params: Annotated[
-        dict[str, Any],
-        Field(
-            description="Model parameters.",
-            examples=[
-                {  # on vllm
-                    "model": "HuggingFaceTB/SmolVLM-256M-Instruct",
-                    "max_completion_tokens": 200,
-                },
-                {  # on vllm
-                    "model": "ibm-granite/granite-vision-3.2-2b",
-                    "max_completion_tokens": 200,
-                },
-                {  # on ollama
-                    "model": "granite3.2-vision:2b"
-                },
-            ],
-        ),
-    ] = {}
-    timeout: Annotated[float, Field(description="Timeout for the API request.")] = 20
-    prompt: Annotated[
-        str,
-        Field(
-            description="Prompt used when calling the vision-language model.",
-            examples=[
-                "Describe this image in a few sentences.",
-                "This is a figures from a document. Provide a detailed description of it.",
-            ],
-        ),
-    ] = "Describe this image in a few sentences."
-
-
-class ConvertDocumentsOptions(BaseModel):
-    from_formats: Annotated[
-        list[InputFormat],
-        Field(
-            description=(
-                "Input format(s) to convert from. String or list of strings. "
-                f"Allowed values: {', '.join([v.value for v in InputFormat])}. "
-                "Optional, defaults to all formats."
-            ),
-            examples=[[v.value for v in InputFormat]],
-        ),
-    ] = list(InputFormat)
-
-    to_formats: Annotated[
-        list[OutputFormat],
-        Field(
-            description=(
-                "Output format(s) to convert to. String or list of strings. "
-                f"Allowed values: {', '.join([v.value for v in OutputFormat])}. "
-                "Optional, defaults to Markdown."
-            ),
-            examples=[[OutputFormat.MARKDOWN]],
-        ),
-    ] = [OutputFormat.MARKDOWN]
-
-    image_export_mode: Annotated[
-        ImageRefMode,
-        Field(
-            description=(
-                "Image export mode for the document (in case of JSON,"
-                " Markdown or HTML). "
-                f"Allowed values: {', '.join([v.value for v in ImageRefMode])}. "
-                "Optional, defaults to Embedded."
-            ),
-            examples=[ImageRefMode.EMBEDDED.value],
-            # pattern="embedded|placeholder|referenced",
-        ),
-    ] = ImageRefMode.EMBEDDED
-
-    do_ocr: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, the bitmap content will be processed using OCR. "
-                "Boolean. Optional, defaults to true"
-            ),
-            # examples=[True],
-        ),
-    ] = True
-
-    force_ocr: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, replace existing text with OCR-generated "
-                "text over content. Boolean. Optional, defaults to false."
-            ),
-            # examples=[False],
-        ),
-    ] = False
-
+class ConvertDocumentsRequestOptions(ConvertDocumentsOptions):
    ocr_engine: Annotated[  # type: ignore
        ocr_engines_enum,
        Field(
@@ -183,57 +30,6 @@ class ConvertDocumentsOptions(BaseModel):
        ),
    ] = ocr_engines_enum(EasyOcrOptions.kind)  # type: ignore

-    ocr_lang: Annotated[
-        Optional[list[str]],
-        Field(
-            description=(
-                "List of languages used by the OCR engine. "
-                "Note that each OCR engine has "
-                "different values for the language names. String or list of strings. "
-                "Optional, defaults to empty."
-            ),
-            examples=[["fr", "de", "es", "en"]],
-        ),
-    ] = None
-
-    pdf_backend: Annotated[
-        PdfBackend,
-        Field(
-            description=(
-                "The PDF backend to use. String. "
-                f"Allowed values: {', '.join([v.value for v in PdfBackend])}. "
-                f"Optional, defaults to {PdfBackend.DLPARSE_V4.value}."
-            ),
-            examples=[PdfBackend.DLPARSE_V4],
-        ),
-    ] = PdfBackend.DLPARSE_V4
-
-    table_mode: Annotated[
-        TableFormerMode,
-        Field(
-            description=(
-                "Mode to use for table structure, String. "
-                f"Allowed values: {', '.join([v.value for v in TableFormerMode])}. "
-                "Optional, defaults to fast."
-            ),
-            examples=[TableStructureOptions().mode],
-            # pattern="fast|accurate",
-        ),
-    ] = TableStructureOptions().mode
-
-    pipeline: Annotated[
-        PdfPipeline,
-        Field(description="Choose the pipeline to process PDF or image files."),
-    ] = PdfPipeline.STANDARD
-
-    page_range: Annotated[
-        PageRange,
-        Field(
-            description="Only convert a range of pages. The page number starts at 1.",
-            examples=[(1, 4)],
-        ),
-    ] = DEFAULT_PAGE_RANGE
-
    document_timeout: Annotated[
        float,
        Field(
@@ -242,126 +38,3 @@ class ConvertDocumentsOptions(BaseModel):
            le=docling_serve_settings.max_document_timeout,
        ),
    ] = docling_serve_settings.max_document_timeout
-
-    abort_on_error: Annotated[
-        bool,
-        Field(
-            description=(
-                "Abort on error if enabled. Boolean. Optional, defaults to false."
-            ),
-            # examples=[False],
-        ),
-    ] = False
-
-    return_as_file: Annotated[
-        bool,
-        Field(
-            description=(
-                "Return the output as a zip file "
-                "(will happen anyway if multiple files are generated). "
-                "Boolean. Optional, defaults to false."
-            ),
-            examples=[False],
-        ),
-    ] = False
-
-    do_table_structure: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, the table structure will be extracted. "
-                "Boolean. Optional, defaults to true."
-            ),
-            examples=[True],
-        ),
-    ] = True
-
-    include_images: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, images will be extracted from the document. "
-                "Boolean. Optional, defaults to true."
-            ),
-            examples=[True],
-        ),
-    ] = True
-
-    images_scale: Annotated[
-        float,
-        Field(
-            description="Scale factor for images. Float. Optional, defaults to 2.0.",
-            examples=[2.0],
-        ),
-    ] = 2.0
-
-    do_code_enrichment: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, perform OCR code enrichment. "
-                "Boolean. Optional, defaults to false."
-            ),
-            examples=[False],
-        ),
-    ] = False
-
-    do_formula_enrichment: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, perform formula OCR, return LaTeX code. "
-                "Boolean. Optional, defaults to false."
-            ),
-            examples=[False],
-        ),
-    ] = False
-
-    do_picture_classification: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, classify pictures in documents. "
-                "Boolean. Optional, defaults to false."
-            ),
-            examples=[False],
-        ),
-    ] = False
-
-    do_picture_description: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, describe pictures in documents. "
-                "Boolean. Optional, defaults to false."
-            ),
-            examples=[False],
-        ),
-    ] = False
-
-    picture_description_local: Annotated[
-        Optional[PictureDescriptionLocal],
-        Field(
-            description="Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with picture_description_api."
-        ),
-    ] = None
-
-    picture_description_api: Annotated[
-        Optional[PictureDescriptionApi],
-        Field(
-            description="API details for using a vision-language model in the picture description. This parameter is mutually exclusive with picture_description_local."
-        ),
-    ] = None
-
-    @model_validator(mode="after")
-    def picture_description_exclusivity(self) -> Self:
-        # Validate picture description options
-        if (
-            self.picture_description_local is not None
-            and self.picture_description_api is not None
-        ):
-            raise ValueError(
-                "The parameters picture_description_local and picture_description_api are mutually exclusive, only one of them can be set."
-            )
-
-        return self
--- a/docling_serve/datamodel/engines.py
+++ b/docling_serve/datamodel/engines.py
@@ -1,13 +0,0 @@
-import enum
-
-
-class TaskStatus(str, enum.Enum):
-    SUCCESS = "success"
-    PENDING = "pending"
-    STARTED = "started"
-    FAILURE = "failure"
-
-
-class AsyncEngine(str, enum.Enum):
-    LOCAL = "local"
-    KFP = "kfp"
--- a/docling_serve/datamodel/kfp.py
+++ b/docling_serve/datamodel/kfp.py
@@ -1,7 +0,0 @@
-from pydantic import AnyUrl, BaseModel
-
-
-class CallbackSpec(BaseModel):
-    url: AnyUrl
-    headers: dict[str, str] = {}
-    ca_cert: str = ""
--- a/docling_serve/datamodel/requests.py
+++ b/docling_serve/datamodel/requests.py
@@ -1,62 +1,72 @@
-import base64
-from io import BytesIO
-from typing import Annotated, Any, Union
+import enum
+from typing import Annotated, Literal

-from pydantic import AnyHttpUrl, BaseModel, Field
+from pydantic import BaseModel, Field, model_validator
+from pydantic_core import PydanticCustomError
+from typing_extensions import Self

-from docling.datamodel.base_models import DocumentStream
+from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
+from docling_jobkit.datamodel.s3_coords import S3Coordinates
+from docling_jobkit.datamodel.task_targets import (
+    InBodyTarget,
+    S3Target,
+    TaskTarget,
+    ZipTarget,
+)

-from docling_serve.datamodel.convert import ConvertDocumentsOptions
+from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
+from docling_serve.settings import AsyncEngine, docling_serve_settings
+
+## Sources


-class DocumentsConvertBase(BaseModel):
-    options: ConvertDocumentsOptions = ConvertDocumentsOptions()
+class FileSourceRequest(FileSource):
+    kind: Literal["file"] = "file"


-class HttpSource(BaseModel):
-    url: Annotated[
-        AnyHttpUrl,
-        Field(
-            description="HTTP url to process",
-            examples=["https://arxiv.org/pdf/2206.01062"],
-        ),
-    ]
-    headers: Annotated[
-        dict[str, Any],
-        Field(
-            description="Additional headers used to fetch the urls, "
-            "e.g. authorization, agent, etc"
-        ),
-    ] = {}
+class HttpSourceRequest(HttpSource):
+    kind: Literal["http"] = "http"


-class FileSource(BaseModel):
-    base64_string: Annotated[
-        str,
-        Field(
-            description="Content of the file serialized in base64. "
-            "For example it can be obtained via "
-            "`base64 -w 0 /path/to/file/pdf-to-convert.pdf`."
-        ),
-    ]
-    filename: Annotated[
-        str,
-        Field(description="Filename of the uploaded document", examples=["file.pdf"]),
-    ]
-
-    def to_document_stream(self) -> DocumentStream:
-        buf = BytesIO(base64.b64decode(self.base64_string))
-        return DocumentStream(stream=buf, name=self.filename)
+class S3SourceRequest(S3Coordinates):
+    kind: Literal["s3"] = "s3"


-class ConvertDocumentHttpSourcesRequest(DocumentsConvertBase):
-    http_sources: list[HttpSource]
+## Multipart targets
+class TargetName(str, enum.Enum):
+    INBODY = InBodyTarget().kind
+    ZIP = ZipTarget().kind


-class ConvertDocumentFileSourcesRequest(DocumentsConvertBase):
-    file_sources: list[FileSource]
-
-
-ConvertDocumentsRequest = Union[
-    ConvertDocumentFileSourcesRequest, ConvertDocumentHttpSourcesRequest
+## Aliases
+SourceRequestItem = Annotated[
+    FileSourceRequest | HttpSourceRequest | S3SourceRequest, Field(discriminator="kind")
 ]
+
+
+## Complete Source request
+class ConvertDocumentsRequest(BaseModel):
+    options: ConvertDocumentsRequestOptions = ConvertDocumentsRequestOptions()
+    sources: list[SourceRequestItem]
+    target: TaskTarget = InBodyTarget()
+
+    @model_validator(mode="after")
+    def validate_s3_source_and_target(self) -> Self:
+        for source in self.sources:
+            if isinstance(source, S3SourceRequest):
+                if docling_serve_settings.eng_kind != AsyncEngine.KFP:
+                    raise PydanticCustomError(
+                        "error source", 'source kind "s3" requires engine kind "KFP"'
+                    )
+                if self.target.kind != "s3":
+                    raise PydanticCustomError(
+                        "error source", 'source kind "s3" requires target kind "s3"'
+                    )
+        if isinstance(self.target, S3Target):
+            for source in self.sources:
+                if isinstance(source, S3SourceRequest):
+                    return self
+            raise PydanticCustomError(
+                "error target", 'target kind "s3" requires source kind "s3"'
+            )
+        return self
--- a/docling_serve/datamodel/responses.py
+++ b/docling_serve/datamodel/responses.py
@@ -6,8 +6,7 @@ from pydantic import BaseModel
 from docling.datamodel.document import ConversionStatus, ErrorItem
 from docling.utils.profiling import ProfilingItem
 from docling_core.types.doc import DoclingDocument
-
-from docling_serve.datamodel.task_meta import TaskProcessingMeta
+from docling_jobkit.datamodel.task_meta import TaskProcessingMeta


 # Status
@@ -15,6 +14,10 @@ class HealthCheckResponse(BaseModel):
    status: str = "ok"


+class ClearResponse(BaseModel):
+    status: str = "ok"
+
+
 class DocumentResponse(BaseModel):
    filename: str
    md_content: Optional[str] = None
@@ -32,6 +35,11 @@ class ConvertDocumentResponse(BaseModel):
    timings: dict[str, ProfilingItem] = {}


+class PresignedUrlConvertDocumentResponse(BaseModel):
+    status: ConversionStatus
+    processing_time: float
+
+
 class ConvertDocumentErrorResponse(BaseModel):
    status: ConversionStatus

--- a/docling_serve/datamodel/task.py
+++ b/docling_serve/datamodel/task.py
@@ -1,32 +0,0 @@
-from pathlib import Path
-from typing import Optional, Union
-
-from fastapi.responses import FileResponse
-from pydantic import BaseModel, ConfigDict
-
-from docling.datamodel.base_models import DocumentStream
-
-from docling_serve.datamodel.convert import ConvertDocumentsOptions
-from docling_serve.datamodel.engines import TaskStatus
-from docling_serve.datamodel.requests import FileSource, HttpSource
-from docling_serve.datamodel.responses import ConvertDocumentResponse
-from docling_serve.datamodel.task_meta import TaskProcessingMeta
-
-TaskSource = Union[HttpSource, FileSource, DocumentStream]
-
-
-class Task(BaseModel):
-    model_config = ConfigDict(arbitrary_types_allowed=True)
-
-    task_id: str
-    task_status: TaskStatus = TaskStatus.PENDING
-    sources: list[TaskSource] = []
-    options: Optional[ConvertDocumentsOptions]
-    result: Optional[Union[ConvertDocumentResponse, FileResponse]] = None
-    scratch_dir: Optional[Path] = None
-    processing_meta: Optional[TaskProcessingMeta] = None
-
-    def is_completed(self) -> bool:
-        if self.task_status in [TaskStatus.SUCCESS, TaskStatus.FAILURE]:
-            return True
-        return False
--- a/docling_serve/datamodel/task_meta.py
+++ b/docling_serve/datamodel/task_meta.py
@@ -1,8 +0,0 @@
-from pydantic import BaseModel
-
-
-class TaskProcessingMeta(BaseModel):
-    num_docs: int
-    num_processed: int = 0
-    num_succeeded: int = 0
-    num_failed: int = 0
--- a/docling_serve/docling_conversion.py
+++ b/docling_serve/docling_conversion.py
@@ -1,260 +0,0 @@
-import hashlib
-import json
-import logging
-import sys
-from collections.abc import Iterable, Iterator
-from functools import lru_cache
-from pathlib import Path
-from typing import Any, Optional, Union
-
-from fastapi import HTTPException
-
-from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
-from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
-from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
-from docling.backend.pdf_backend import PdfDocumentBackend
-from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
-from docling.datamodel.base_models import DocumentStream, InputFormat
-from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import (
-    OcrOptions,
-    PdfBackend,
-    PdfPipeline,
-    PdfPipelineOptions,
-    PictureDescriptionApiOptions,
-    PictureDescriptionVlmOptions,
-    TableFormerMode,
-    VlmPipelineOptions,
-    smoldocling_vlm_conversion_options,
-    smoldocling_vlm_mlx_conversion_options,
-)
-from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
-from docling.pipeline.vlm_pipeline import VlmPipeline
-from docling_core.types.doc import ImageRefMode
-
-from docling_serve.datamodel.convert import ConvertDocumentsOptions, ocr_factory
-from docling_serve.helper_functions import _to_list_of_strings
-from docling_serve.settings import docling_serve_settings
-
-_log = logging.getLogger(__name__)
-
-
-# Custom serializer for PdfFormatOption
-# (model_dump_json does not work with some classes)
-def _hash_pdf_format_option(pdf_format_option: PdfFormatOption) -> bytes:
-    data = pdf_format_option.model_dump()
-
-    # pipeline_options are not fully serialized by model_dump, dedicated pass
-    if pdf_format_option.pipeline_options:
-        data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
-
-        # Replace `artifacts_path` with a string representation
-        data["pipeline_options"]["artifacts_path"] = repr(
-            data["pipeline_options"]["artifacts_path"]
-        )
-
-    # Replace `pipeline_cls` with a string representation
-    data["pipeline_cls"] = repr(data["pipeline_cls"])
-
-    # Replace `backend` with a string representation
-    data["backend"] = repr(data["backend"])
-
-    # Handle `device` in `accelerator_options`
-    if "accelerator_options" in data and "device" in data["accelerator_options"]:
-        data["accelerator_options"]["device"] = repr(
-            data["accelerator_options"]["device"]
-        )
-
-    # Serialize the dictionary to JSON with sorted keys to have consistent hashes
-    serialized_data = json.dumps(data, sort_keys=True)
-    options_hash = hashlib.sha1(serialized_data.encode()).digest()
-    return options_hash
-
-
-# Cache of DocumentConverter objects
-_options_map: dict[bytes, PdfFormatOption] = {}
-
-
-@lru_cache(maxsize=docling_serve_settings.options_cache_size)
-def _get_converter_from_hash(options_hash: bytes) -> DocumentConverter:
-    pdf_format_option = _options_map[options_hash]
-    format_options: dict[InputFormat, FormatOption] = {
-        InputFormat.PDF: pdf_format_option,
-        InputFormat.IMAGE: pdf_format_option,
-    }
-
-    return DocumentConverter(format_options=format_options)
-
-
-def get_converter(pdf_format_option: PdfFormatOption) -> DocumentConverter:
-    options_hash = _hash_pdf_format_option(pdf_format_option)
-    _options_map[options_hash] = pdf_format_option
-    return _get_converter_from_hash(options_hash)
-
-
-def _parse_standard_pdf_opts(
-    request: ConvertDocumentsOptions, artifacts_path: Optional[Path]
-) -> PdfPipelineOptions:
-    try:
-        ocr_options: OcrOptions = ocr_factory.create_options(
-            kind=request.ocr_engine.value,  # type: ignore
-            force_full_page_ocr=request.force_ocr,
-        )
-    except ImportError as err:
-        raise HTTPException(
-            status_code=400,
-            detail="The requested OCR engine"
-            f" (ocr_engine={request.ocr_engine.value})"  # type: ignore
-            " is not available on this system. Please choose another OCR engine "
-            "or contact your system administrator.\n"
-            f"{err}",
-        )
-
-    if request.ocr_lang is not None:
-        if isinstance(request.ocr_lang, str):
-            ocr_options.lang = _to_list_of_strings(request.ocr_lang)
-        else:
-            ocr_options.lang = request.ocr_lang
-
-    pipeline_options = PdfPipelineOptions(
-        artifacts_path=artifacts_path,
-        enable_remote_services=docling_serve_settings.enable_remote_services,
-        document_timeout=request.document_timeout,
-        do_ocr=request.do_ocr,
-        ocr_options=ocr_options,
-        do_table_structure=request.do_table_structure,
-        do_code_enrichment=request.do_code_enrichment,
-        do_formula_enrichment=request.do_formula_enrichment,
-        do_picture_classification=request.do_picture_classification,
-        do_picture_description=request.do_picture_description,
-    )
-    pipeline_options.table_structure_options.mode = TableFormerMode(request.table_mode)
-
-    if request.image_export_mode != ImageRefMode.PLACEHOLDER:
-        pipeline_options.generate_page_images = True
-        if request.image_export_mode == ImageRefMode.REFERENCED:
-            pipeline_options.generate_picture_images = True
-        if request.images_scale:
-            pipeline_options.images_scale = request.images_scale
-
-    if request.picture_description_local is not None:
-        pipeline_options.picture_description_options = (
-            PictureDescriptionVlmOptions.model_validate(
-                request.picture_description_local.model_dump()
-            )
-        )
-
-    if request.picture_description_api is not None:
-        pipeline_options.picture_description_options = (
-            PictureDescriptionApiOptions.model_validate(
-                request.picture_description_api.model_dump()
-            )
-        )
-
-    return pipeline_options
-
-
-def _parse_backend(request: ConvertDocumentsOptions) -> type[PdfDocumentBackend]:
-    if request.pdf_backend == PdfBackend.DLPARSE_V1:
-        backend: type[PdfDocumentBackend] = DoclingParseDocumentBackend
-    elif request.pdf_backend == PdfBackend.DLPARSE_V2:
-        backend = DoclingParseV2DocumentBackend
-    elif request.pdf_backend == PdfBackend.DLPARSE_V4:
-        backend = DoclingParseV4DocumentBackend
-    elif request.pdf_backend == PdfBackend.PYPDFIUM2:
-        backend = PyPdfiumDocumentBackend
-    else:
-        raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}")
-
-    return backend
-
-
-def _parse_vlm_pdf_opts(
-    request: ConvertDocumentsOptions, artifacts_path: Optional[Path]
-) -> VlmPipelineOptions:
-    pipeline_options = VlmPipelineOptions(
-        artifacts_path=artifacts_path,
-        document_timeout=request.document_timeout,
-    )
-    pipeline_options.vlm_options = smoldocling_vlm_conversion_options
-    if sys.platform == "darwin":
-        try:
-            import mlx_vlm  # noqa: F401
-
-            pipeline_options.vlm_options = smoldocling_vlm_mlx_conversion_options
-        except ImportError:
-            _log.warning(
-                "To run SmolDocling faster, please install mlx-vlm:\n"
-                "pip install mlx-vlm"
-            )
-    return pipeline_options
-
-
-# Computes the PDF pipeline options and returns the PdfFormatOption and its hash
-def get_pdf_pipeline_opts(
-    request: ConvertDocumentsOptions,
-) -> PdfFormatOption:
-    artifacts_path: Optional[Path] = None
-    if docling_serve_settings.artifacts_path is not None:
-        if str(docling_serve_settings.artifacts_path.absolute()) == "":
-            _log.info(
-                "artifacts_path is an empty path, model weights will be downloaded "
-                "at runtime."
-            )
-            artifacts_path = None
-        elif docling_serve_settings.artifacts_path.is_dir():
-            _log.info(
-                "artifacts_path is set to a valid directory. "
-                "No model weights will be downloaded at runtime."
-            )
-            artifacts_path = docling_serve_settings.artifacts_path
-        else:
-            _log.warning(
-                "artifacts_path is set to an invalid directory. "
-                "The system will download the model weights at runtime."
-            )
-            artifacts_path = None
-    else:
-        _log.info(
-            "artifacts_path is unset. "
-            "The system will download the model weights at runtime."
-        )
-
-    pipeline_options: Union[PdfPipelineOptions, VlmPipelineOptions]
-    if request.pipeline == PdfPipeline.STANDARD:
-        pipeline_options = _parse_standard_pdf_opts(request, artifacts_path)
-        backend = _parse_backend(request)
-        pdf_format_option = PdfFormatOption(
-            pipeline_options=pipeline_options,
-            backend=backend,
-        )
-
-    elif request.pipeline == PdfPipeline.VLM:
-        pipeline_options = _parse_vlm_pdf_opts(request, artifacts_path)
-        pdf_format_option = PdfFormatOption(
-            pipeline_cls=VlmPipeline, pipeline_options=pipeline_options
-        )
-    else:
-        raise NotImplementedError(
-            f"The pipeline {request.pipeline} is not implemented."
-        )
-
-    return pdf_format_option
-
-
-def convert_documents(
-    sources: Iterable[Union[Path, str, DocumentStream]],
-    options: ConvertDocumentsOptions,
-    headers: Optional[dict[str, Any]] = None,
-):
-    pdf_format_option = get_pdf_pipeline_opts(options)
-    converter = get_converter(pdf_format_option)
-    results: Iterator[ConversionResult] = converter.convert_all(
-        sources,
-        headers=headers,
-        page_range=options.page_range,
-        max_file_size=docling_serve_settings.max_file_size,
-        max_num_pages=docling_serve_settings.max_num_pages,
-    )
-
-    return results
--- a/docling_serve/engines/init.py
+++ b/docling_serve/engines/init.py
--- a/docling_serve/engines/async_kfp/init.py
+++ b/docling_serve/engines/async_kfp/init.py
--- a/docling_serve/engines/async_kfp/kfp_pipeline.py
+++ b/docling_serve/engines/async_kfp/kfp_pipeline.py
@@ -1,137 +0,0 @@
-# ruff: noqa: E402, UP006, UP035
-
-from typing import Any, Dict, List
-
-from kfp import dsl
-
-PYTHON_BASE_IMAGE = "python:3.12"
-
-
-@dsl.component(
-    base_image=PYTHON_BASE_IMAGE,
-    packages_to_install=[
-        "pydantic",
-        "docling-serve @ git+https://github.com/docling-project/docling-serve@feat-kfp-engine",
-    ],
-    pip_index_urls=["https://download.pytorch.org/whl/cpu", "https://pypi.org/simple"],
-)
-def generate_chunks(
-    run_name: str,
-    request: Dict[str, Any],
-    batch_size: int,
-    callbacks: List[Dict[str, Any]],
-) -> List[List[Dict[str, Any]]]:
-    from pydantic import TypeAdapter
-
-    from docling_serve.datamodel.callback import (
-        ProgressCallbackRequest,
-        ProgressSetNumDocs,
-    )
-    from docling_serve.datamodel.kfp import CallbackSpec
-    from docling_serve.engines.async_kfp.notify import notify_callbacks
-
-    CallbacksListType = TypeAdapter(list[CallbackSpec])
-
-    sources = request["http_sources"]
-    splits = [sources[i : i + batch_size] for i in range(0, len(sources), batch_size)]
-
-    total = sum(len(chunk) for chunk in splits)
-    payload = ProgressCallbackRequest(
-        task_id=run_name, progress=ProgressSetNumDocs(num_docs=total)
-    )
-    notify_callbacks(
-        payload=payload,
-        callbacks=CallbacksListType.validate_python(callbacks),
-    )
-
-    return splits
-
-
-@dsl.component(
-    base_image=PYTHON_BASE_IMAGE,
-    packages_to_install=[
-        "pydantic",
-        "docling-serve @ git+https://github.com/docling-project/docling-serve@feat-kfp-engine",
-    ],
-    pip_index_urls=["https://download.pytorch.org/whl/cpu", "https://pypi.org/simple"],
-)
-def convert_batch(
-    run_name: str,
-    data_splits: List[Dict[str, Any]],
-    request: Dict[str, Any],
-    callbacks: List[Dict[str, Any]],
-    output_path: dsl.OutputPath("Directory"),  # type: ignore
-):
-    from pathlib import Path
-
-    from pydantic import AnyUrl, TypeAdapter
-
-    from docling_serve.datamodel.callback import (
-        FailedDocsItem,
-        ProgressCallbackRequest,
-        ProgressUpdateProcessed,
-        SucceededDocsItem,
-    )
-    from docling_serve.datamodel.convert import ConvertDocumentsOptions
-    from docling_serve.datamodel.kfp import CallbackSpec
-    from docling_serve.datamodel.requests import HttpSource
-    from docling_serve.engines.async_kfp.notify import notify_callbacks
-
-    CallbacksListType = TypeAdapter(list[CallbackSpec])
-
-    convert_options = ConvertDocumentsOptions.model_validate(request["options"])
-    print(convert_options)
-
-    output_dir = Path(output_path)
-    output_dir.mkdir(exist_ok=True, parents=True)
-    docs_succeeded: list[SucceededDocsItem] = []
-    docs_failed: list[FailedDocsItem] = []
-    for source_dict in data_splits:
-        source = HttpSource.model_validate(source_dict)
-        filename = Path(str(AnyUrl(source.url).path)).name
-        output_filename = output_dir / filename
-        print(f"Writing {output_filename}")
-        with output_filename.open("w") as f:
-            f.write(source.model_dump_json())
-        docs_succeeded.append(SucceededDocsItem(source=source.url))
-
-    payload = ProgressCallbackRequest(
-        task_id=run_name,
-        progress=ProgressUpdateProcessed(
-            num_failed=len(docs_failed),
-            num_processed=len(docs_succeeded) + len(docs_failed),
-            num_succeeded=len(docs_succeeded),
-            docs_succeeded=docs_succeeded,
-            docs_failed=docs_failed,
-        ),
-    )
-
-    print(payload)
-    notify_callbacks(
-        payload=payload,
-        callbacks=CallbacksListType.validate_python(callbacks),
-    )
-
-
-@dsl.pipeline()
-def process(
-    batch_size: int,
-    request: Dict[str, Any],
-    callbacks: List[Dict[str, Any]] = [],
-    run_name: str = "",
-):
-    chunks_task = generate_chunks(
-        run_name=run_name,
-        request=request,
-        batch_size=batch_size,
-        callbacks=callbacks,
-    )
-    chunks_task.set_caching_options(False)
-
-    with dsl.ParallelFor(chunks_task.output, parallelism=4) as data_splits:
-        convert_batch(
-            run_name=run_name,
-            data_splits=data_splits,
-            request=request,
-            callbacks=callbacks,
-        )
--- a/docling_serve/engines/async_kfp/notify.py
+++ b/docling_serve/engines/async_kfp/notify.py
@@ -1,32 +0,0 @@
-import ssl
-
-import certifi
-import httpx
-
-from docling_serve.datamodel.callback import ProgressCallbackRequest
-from docling_serve.datamodel.kfp import CallbackSpec
-
-
-def notify_callbacks(
-    payload: ProgressCallbackRequest,
-    callbacks: list[CallbackSpec],
-):
-    if len(callbacks) == 0:
-        return
-
-    for callback in callbacks:
-        # https://www.python-httpx.org/advanced/ssl/#configuring-client-instances
-        if callback.ca_cert:
-            ctx = ssl.create_default_context(cadata=callback.ca_cert)
-        else:
-            ctx = ssl.create_default_context(cafile=certifi.where())
-
-        try:
-            httpx.post(
-                str(callback.url),
-                headers=callback.headers,
-                json=payload.model_dump(mode="json"),
-                verify=ctx,
-            )
-        except httpx.HTTPError as err:
-            print(f"Error notifying callback {callback.url}: {err}")
--- a/docling_serve/engines/async_kfp/orchestrator.py
+++ b/docling_serve/engines/async_kfp/orchestrator.py
@@ -1,235 +0,0 @@
-import datetime
-import json
-import logging
-import uuid
-from pathlib import Path
-from typing import Optional
-
-from kfp_server_api.models import V2beta1RuntimeState
-from pydantic import BaseModel, TypeAdapter
-from pydantic_settings import SettingsConfigDict
-
-from docling_serve.datamodel.callback import (
-    ProgressCallbackRequest,
-    ProgressSetNumDocs,
-    ProgressUpdateProcessed,
-)
-from docling_serve.datamodel.convert import ConvertDocumentsOptions
-from docling_serve.datamodel.engines import TaskStatus
-from docling_serve.datamodel.kfp import CallbackSpec
-from docling_serve.datamodel.requests import HttpSource
-from docling_serve.datamodel.task import Task, TaskSource
-from docling_serve.datamodel.task_meta import TaskProcessingMeta
-from docling_serve.engines.async_kfp.kfp_pipeline import process
-from docling_serve.engines.async_orchestrator import (
-    BaseAsyncOrchestrator,
-    ProgressInvalid,
-)
-from docling_serve.settings import docling_serve_settings
-
-_log = logging.getLogger(__name__)
-
-
-class _RunItem(BaseModel):
-    model_config = SettingsConfigDict(arbitrary_types_allowed=True)
-
-    run_id: str
-    state: str
-    created_at: datetime.datetime
-    scheduled_at: datetime.datetime
-    finished_at: datetime.datetime
-
-
-class AsyncKfpOrchestrator(BaseAsyncOrchestrator):
-    def __init__(self):
-        super().__init__()
-        import kfp
-
-        kfp_endpoint = docling_serve_settings.eng_kfp_endpoint
-        if kfp_endpoint is None:
-            raise ValueError("KFP endpoint is required when using the KFP engine.")
-
-        kube_sa_token_path = Path("/run/secrets/kubernetes.io/serviceaccount/token")
-        kube_sa_ca_cert_path = Path(
-            "/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"
-        )
-
-        ssl_ca_cert = docling_serve_settings.eng_kfp_ca_cert_path
-        token = docling_serve_settings.eng_kfp_token
-        if (
-            ssl_ca_cert is None
-            and ".svc" in kfp_endpoint.host
-            and kube_sa_ca_cert_path.exists()
-        ):
-            ssl_ca_cert = str(kube_sa_ca_cert_path)
-        if token is None and kube_sa_token_path.exists():
-            token = kube_sa_token_path.read_text()
-
-        self._client = kfp.Client(
-            host=str(kfp_endpoint),
-            existing_token=token,
-            ssl_ca_cert=ssl_ca_cert,
-            # verify_ssl=False,
-        )
-
-    async def enqueue(
-        self, sources: list[TaskSource], options: ConvertDocumentsOptions
-    ) -> Task:
-        callbacks = []
-        if docling_serve_settings.eng_kfp_self_callback_endpoint is not None:
-            headers = {}
-            if docling_serve_settings.eng_kfp_self_callback_token_path is not None:
-                token = (
-                    docling_serve_settings.eng_kfp_self_callback_token_path.read_text()
-                )
-                headers["Authorization"] = f"Bearer {token}"
-            ca_cert = ""
-            if docling_serve_settings.eng_kfp_self_callback_ca_cert_path is not None:
-                ca_cert = docling_serve_settings.eng_kfp_self_callback_ca_cert_path.read_text()
-            callbacks.append(
-                CallbackSpec(
-                    url=docling_serve_settings.eng_kfp_self_callback_endpoint,
-                    headers=headers,
-                    ca_cert=ca_cert,
-                )
-            )
-
-        CallbacksType = TypeAdapter(list[CallbackSpec])
-        SourcesListType = TypeAdapter(list[HttpSource])
-        http_sources = [s for s in sources if isinstance(s, HttpSource)]
-        # hack: since the current kfp backend is not resolving the job_id placeholder,
-        # we set the run_name and pass it as argument to the job itself.
-        run_name = f"docling-job-{uuid.uuid4()}"
-        kfp_run = self._client.create_run_from_pipeline_func(
-            process,
-            arguments={
-                "batch_size": 10,
-                "sources": SourcesListType.dump_python(http_sources, mode="json"),
-                "options": options.model_dump(mode="json"),
-                "callbacks": CallbacksType.dump_python(callbacks, mode="json"),
-                "run_name": run_name,
-            },
-            run_name=run_name,
-        )
-        task_id = kfp_run.run_id
-
-        task = Task(task_id=task_id, sources=sources, options=options)
-        await self.init_task_tracking(task)
-        return task
-
-    async def _update_task_from_run(self, task_id: str, wait: float = 0.0):
-        run_info = self._client.get_run(run_id=task_id)
-        task = await self.get_raw_task(task_id=task_id)
-        # RUNTIME_STATE_UNSPECIFIED = "RUNTIME_STATE_UNSPECIFIED"
-        # PENDING = "PENDING"
-        # RUNNING = "RUNNING"
-        # SUCCEEDED = "SUCCEEDED"
-        # SKIPPED = "SKIPPED"
-        # FAILED = "FAILED"
-        # CANCELING = "CANCELING"
-        # CANCELED = "CANCELED"
-        # PAUSED = "PAUSED"
-        if run_info.state == V2beta1RuntimeState.SUCCEEDED:
-            task.task_status = TaskStatus.SUCCESS
-        elif run_info.state == V2beta1RuntimeState.PENDING:
-            task.task_status = TaskStatus.PENDING
-        elif run_info.state == V2beta1RuntimeState.RUNNING:
-            task.task_status = TaskStatus.STARTED
-        else:
-            task.task_status = TaskStatus.FAILURE
-
-    async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
-        await self._update_task_from_run(task_id=task_id, wait=wait)
-        return await self.get_raw_task(task_id=task_id)
-
-    async def _get_pending(self) -> list[_RunItem]:
-        runs: list[_RunItem] = []
-        next_page: Optional[str] = None
-        while True:
-            res = self._client.list_runs(
-                page_token=next_page,
-                page_size=20,
-                filter=json.dumps(
-                    {
-                        "predicates": [
-                            {
-                                "operation": "EQUALS",
-                                "key": "state",
-                                "stringValue": "PENDING",
-                            }
-                        ]
-                    }
-                ),
-            )
-            if res.runs is not None:
-                for run in res.runs:
-                    runs.append(
-                        _RunItem(
-                            run_id=run.run_id,
-                            state=run.state,
-                            created_at=run.created_at,
-                            scheduled_at=run.scheduled_at,
-                            finished_at=run.finished_at,
-                        )
-                    )
-            if res.next_page_token is None:
-                break
-            next_page = res.next_page_token
-        return runs
-
-    async def queue_size(self) -> int:
-        runs = await self._get_pending()
-        return len(runs)
-
-    async def get_queue_position(self, task_id: str) -> Optional[int]:
-        runs = await self._get_pending()
-        for pos, run in enumerate(runs, start=1):
-            if run.run_id == task_id:
-                return pos
-        return None
-
-    async def process_queue(self):
-        return
-
-    async def warm_up_caches(self):
-        return
-
-    async def _get_run_id(self, run_name: str) -> str:
-        res = self._client.list_runs(
-            filter=json.dumps(
-                {
-                    "predicates": [
-                        {
-                            "operation": "EQUALS",
-                            "key": "name",
-                            "stringValue": run_name,
-                        }
-                    ]
-                }
-            ),
-        )
-        if res.runs is not None and len(res.runs) > 0:
-            return res.runs[0].run_id
-        raise RuntimeError(f"Run with {run_name=} not found.")
-
-    async def receive_task_progress(self, request: ProgressCallbackRequest):
-        task_id = await self._get_run_id(run_name=request.task_id)
-        progress = request.progress
-        task = await self.get_raw_task(task_id=task_id)
-
-        if isinstance(progress, ProgressSetNumDocs):
-            task.processing_meta = TaskProcessingMeta(num_docs=progress.num_docs)
-            task.task_status = TaskStatus.STARTED
-
-        elif isinstance(progress, ProgressUpdateProcessed):
-            if task.processing_meta is None:
-                raise ProgressInvalid(
-                    "UpdateProcessed was called before setting the expected number of documents."
-                )
-            task.processing_meta.num_processed += progress.num_processed
-            task.processing_meta.num_succeeded += progress.num_succeeded
-            task.processing_meta.num_failed += progress.num_failed
-            task.task_status = TaskStatus.STARTED
-
-        # TODO: could be moved to BackgroundTask
-        await self.notify_task_subscribers(task_id=task_id)
--- a/docling_serve/engines/async_local/init.py
+++ b/docling_serve/engines/async_local/init.py
--- a/docling_serve/engines/async_local/orchestrator.py
+++ b/docling_serve/engines/async_local/orchestrator.py
@@ -1,57 +0,0 @@
-import asyncio
-import logging
-import uuid
-from typing import Optional
-
-from docling_serve.datamodel.convert import ConvertDocumentsOptions
-from docling_serve.datamodel.task import Task, TaskSource
-from docling_serve.docling_conversion import get_converter, get_pdf_pipeline_opts
-from docling_serve.engines.async_local.worker import AsyncLocalWorker
-from docling_serve.engines.async_orchestrator import BaseAsyncOrchestrator
-from docling_serve.settings import docling_serve_settings
-
-_log = logging.getLogger(__name__)
-
-
-class AsyncLocalOrchestrator(BaseAsyncOrchestrator):
-    def __init__(self):
-        super().__init__()
-        self.task_queue = asyncio.Queue()
-        self.queue_list: list[str] = []
-
-    async def enqueue(
-        self, sources: list[TaskSource], options: ConvertDocumentsOptions
-    ) -> Task:
-        task_id = str(uuid.uuid4())
-        task = Task(task_id=task_id, sources=sources, options=options)
-        await self.init_task_tracking(task)
-
-        self.queue_list.append(task_id)
-        await self.task_queue.put(task_id)
-        return task
-
-    async def queue_size(self) -> int:
-        return self.task_queue.qsize()
-
-    async def get_queue_position(self, task_id: str) -> Optional[int]:
-        return (
-            self.queue_list.index(task_id) + 1 if task_id in self.queue_list else None
-        )
-
-    async def process_queue(self):
-        # Create a pool of workers
-        workers = []
-        for i in range(docling_serve_settings.eng_loc_num_workers):
-            _log.debug(f"Starting worker {i}")
-            w = AsyncLocalWorker(i, self)
-            worker_task = asyncio.create_task(w.loop())
-            workers.append(worker_task)
-
-        # Wait for all workers to complete (they won't, as they run indefinitely)
-        await asyncio.gather(*workers)
-        _log.debug("All workers completed.")
-
-    async def warm_up_caches(self):
-        # Converter with default options
-        pdf_format_option = get_pdf_pipeline_opts(ConvertDocumentsOptions())
-        get_converter(pdf_format_option)
--- a/docling_serve/engines/async_local/worker.py
+++ b/docling_serve/engines/async_local/worker.py
@@ -1,124 +0,0 @@
-import asyncio
-import logging
-import shutil
-import time
-from typing import TYPE_CHECKING, Any, Optional, Union
-
-from fastapi.responses import FileResponse
-
-from docling.datamodel.base_models import DocumentStream
-
-from docling_serve.datamodel.engines import TaskStatus
-from docling_serve.datamodel.requests import FileSource, HttpSource
-from docling_serve.docling_conversion import convert_documents
-from docling_serve.response_preparation import process_results
-from docling_serve.storage import get_scratch
-
-if TYPE_CHECKING:
-    from docling_serve.engines.async_local.orchestrator import AsyncLocalOrchestrator
-
-_log = logging.getLogger(__name__)
-
-
-class AsyncLocalWorker:
-    def __init__(self, worker_id: int, orchestrator: "AsyncLocalOrchestrator"):
-        self.worker_id = worker_id
-        self.orchestrator = orchestrator
-
-    async def loop(self):
-        _log.debug(f"Starting loop for worker {self.worker_id}")
-        while True:
-            task_id: str = await self.orchestrator.task_queue.get()
-            self.orchestrator.queue_list.remove(task_id)
-
-            if task_id not in self.orchestrator.tasks:
-                raise RuntimeError(f"Task {task_id} not found.")
-            task = self.orchestrator.tasks[task_id]
-
-            try:
-                task.task_status = TaskStatus.STARTED
-                _log.info(f"Worker {self.worker_id} processing task {task_id}")
-
-                # Notify clients about task updates
-                await self.orchestrator.notify_task_subscribers(task_id)
-
-                # Notify clients about queue updates
-                await self.orchestrator.notify_queue_positions()
-
-                # Define a callback function to send progress updates to the client.
-                # TODO: send partial updates, e.g. when a document in the batch is done
-                def run_conversion():
-                    convert_sources: list[Union[str, DocumentStream]] = []
-                    headers: Optional[dict[str, Any]] = None
-                    for source in task.sources:
-                        if isinstance(source, DocumentStream):
-                            convert_sources.append(source)
-                        elif isinstance(source, FileSource):
-                            convert_sources.append(source.to_document_stream())
-                        elif isinstance(source, HttpSource):
-                            convert_sources.append(str(source.url))
-                            if headers is None and source.headers:
-                                headers = source.headers
-
-                    # Note: results are only an iterator->lazy evaluation
-                    results = convert_documents(
-                        sources=convert_sources,
-                        options=task.options,
-                        headers=headers,
-                    )
-
-                    # The real processing will happen here
-                    work_dir = get_scratch() / task_id
-                    response = process_results(
-                        conversion_options=task.options,
-                        conv_results=results,
-                        work_dir=work_dir,
-                    )
-
-                    if work_dir.exists():
-                        task.scratch_dir = work_dir
-                        if not isinstance(response, FileResponse):
-                            _log.warning(
-                                f"Task {task_id=} produced content in {work_dir=} but the response is not a file."
-                            )
-                            shutil.rmtree(work_dir, ignore_errors=True)
-
-                    return response
-
-                start_time = time.monotonic()
-
-                # Run the prediction in a thread to avoid blocking the event loop.
-                # Get the current event loop
-                # loop = asyncio.get_event_loop()
-                # future = asyncio.run_coroutine_threadsafe(
-                #     run_conversion(),
-                #     loop=loop
-                # )
-                # response = future.result()
-
-                # Run in a thread
-                response = await asyncio.to_thread(
-                    run_conversion,
-                )
-                processing_time = time.monotonic() - start_time
-
-                task.result = response
-                task.sources = []
-                task.options = None
-
-                task.task_status = TaskStatus.SUCCESS
-                _log.info(
-                    f"Worker {self.worker_id} completed job {task_id} "
-                    f"in {processing_time:.2f} seconds"
-                )
-
-            except Exception as e:
-                _log.error(
-                    f"Worker {self.worker_id} failed to process job {task_id}: {e}"
-                )
-                task.task_status = TaskStatus.FAILURE
-
-            finally:
-                await self.orchestrator.notify_task_subscribers(task_id)
-                self.orchestrator.task_queue.task_done()
-                _log.debug(f"Worker {self.worker_id} completely done with {task_id}")
--- a/docling_serve/engines/async_orchestrator.py
+++ b/docling_serve/engines/async_orchestrator.py
@@ -1,85 +0,0 @@
-import shutil
-from typing import Union
-
-from fastapi import BackgroundTasks, WebSocket
-from fastapi.responses import FileResponse
-
-from docling_serve.datamodel.callback import ProgressCallbackRequest
-from docling_serve.datamodel.engines import TaskStatus
-from docling_serve.datamodel.responses import (
-    ConvertDocumentResponse,
-    MessageKind,
-    TaskStatusResponse,
-    WebsocketMessage,
-)
-from docling_serve.datamodel.task import Task
-from docling_serve.engines.base_orchestrator import (
-    BaseOrchestrator,
-    OrchestratorError,
-    TaskNotFoundError,
-)
-from docling_serve.settings import docling_serve_settings
-
-
-class ProgressInvalid(OrchestratorError):
-    pass
-
-
-class BaseAsyncOrchestrator(BaseOrchestrator):
-    def __init__(self):
-        self.tasks: dict[str, Task] = {}
-        self.task_subscribers: dict[str, set[WebSocket]] = {}
-
-    async def init_task_tracking(self, task: Task):
-        task_id = task.task_id
-        self.tasks[task.task_id] = task
-        self.task_subscribers[task_id] = set()
-
-    async def get_raw_task(self, task_id: str) -> Task:
-        if task_id not in self.tasks:
-            raise TaskNotFoundError()
-        return self.tasks[task_id]
-
-    async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
-        return await self.get_raw_task(task_id=task_id)
-
-    async def task_result(
-        self, task_id: str, background_tasks: BackgroundTasks
-    ) -> Union[ConvertDocumentResponse, FileResponse, None]:
-        task = await self.get_raw_task(task_id=task_id)
-        if task.is_completed() and task.scratch_dir is not None:
-            if docling_serve_settings.single_use_results:
-                background_tasks.add_task(
-                    shutil.rmtree, task.scratch_dir, ignore_errors=True
-                )
-        return task.result
-
-    async def notify_task_subscribers(self, task_id: str):
-        if task_id not in self.task_subscribers:
-            raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
-
-        task = await self.get_raw_task(task_id=task_id)
-        task_queue_position = await self.get_queue_position(task_id)
-        msg = TaskStatusResponse(
-            task_id=task.task_id,
-            task_status=task.task_status,
-            task_position=task_queue_position,
-            task_meta=task.processing_meta,
-        )
-        for websocket in self.task_subscribers[task_id]:
-            await websocket.send_text(
-                WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
-            )
-            if task.is_completed():
-                await websocket.close()
-
-    async def notify_queue_positions(self):
-        for task_id in self.task_subscribers.keys():
-            # notify only pending tasks
-            if self.tasks[task_id].task_status != TaskStatus.PENDING:
-                continue
-
-            await self.notify_task_subscribers(task_id)
-
-    async def receive_task_progress(self, request: ProgressCallbackRequest):
-        raise NotImplementedError()
--- a/docling_serve/engines/async_orchestrator_factory.py
+++ b/docling_serve/engines/async_orchestrator_factory.py
@@ -1,21 +0,0 @@
-from functools import lru_cache
-
-from docling_serve.datamodel.engines import AsyncEngine
-from docling_serve.engines.async_orchestrator import BaseAsyncOrchestrator
-from docling_serve.settings import docling_serve_settings
-
-
-@lru_cache
-def get_async_orchestrator() -> BaseAsyncOrchestrator:
-    if docling_serve_settings.eng_kind == AsyncEngine.LOCAL:
-        from docling_serve.engines.async_local.orchestrator import (
-            AsyncLocalOrchestrator,
-        )
-
-        return AsyncLocalOrchestrator()
-    elif docling_serve_settings.eng_kind == AsyncEngine.KFP:
-        from docling_serve.engines.async_kfp.orchestrator import AsyncKfpOrchestrator
-
-        return AsyncKfpOrchestrator()
-
-    raise RuntimeError(f"Engine {docling_serve_settings.eng_kind} not recognized.")
--- a/docling_serve/engines/base_orchestrator.py
+++ b/docling_serve/engines/base_orchestrator.py
@@ -1,51 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Optional, Union
-
-from fastapi import BackgroundTasks
-from fastapi.responses import FileResponse
-
-from docling_serve.datamodel.convert import ConvertDocumentsOptions
-from docling_serve.datamodel.responses import ConvertDocumentResponse
-from docling_serve.datamodel.task import Task, TaskSource
-
-
-class OrchestratorError(Exception):
-    pass
-
-
-class TaskNotFoundError(OrchestratorError):
-    pass
-
-
-class BaseOrchestrator(ABC):
-    @abstractmethod
-    async def enqueue(
-        self, sources: list[TaskSource], options: ConvertDocumentsOptions
-    ) -> Task:
-        pass
-
-    @abstractmethod
-    async def queue_size(self) -> int:
-        pass
-
-    @abstractmethod
-    async def get_queue_position(self, task_id: str) -> Optional[int]:
-        pass
-
-    @abstractmethod
-    async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
-        pass
-
-    @abstractmethod
-    async def task_result(
-        self, task_id: str, background_tasks: BackgroundTasks
-    ) -> Union[ConvertDocumentResponse, FileResponse, None]:
-        pass
-
-    @abstractmethod
-    async def process_queue(self):
-        pass
-
-    @abstractmethod
-    async def warm_up_caches(self):
-        pass
--- a/docling_serve/engines/block_local/init.py
+++ b/docling_serve/engines/block_local/init.py
--- a/docling_serve/gradio_ui.py
+++ b/docling_serve/gradio_ui.py
@@ -1,5 +1,6 @@
 import base64
 import importlib
+import itertools
 import json
 import logging
 import ssl
@@ -12,9 +13,10 @@ import certifi
 import gradio as gr
 import httpx

+from docling.datamodel.base_models import FormatToExtensions
 from docling.datamodel.pipeline_options import (
    PdfBackend,
-    PdfPipeline,
+    ProcessingPipeline,
    TableFormerMode,
    TableStructureOptions,
 )
@@ -29,7 +31,7 @@ logger = logging.getLogger(__name__)
 ############################

 logo_path = "https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg"
-js_components_url = "https://unpkg.com/@docling/docling-components@0.0.6"
+js_components_url = "https://unpkg.com/@docling/docling-components@0.0.7"
 if (
    docling_serve_settings.static_path is not None
    and docling_serve_settings.static_path.is_dir()
@@ -83,7 +85,7 @@ css = """
    height: 140px;
 }

-docling-img::part(pages) {
+docling-img {
    gap: 1rem;
 }

@@ -239,7 +241,7 @@ def wait_task_finish(task_id: str, return_as_file: bool):
    while not task_finished:
        try:
            response = httpx.get(
-                f"{get_api_endpoint()}/v1alpha/status/poll/{task_id}?wait=5",
+                f"{get_api_endpoint()}/v1/status/poll/{task_id}?wait=5",
                verify=ssl_ctx,
                timeout=15,
            )
@@ -262,7 +264,7 @@ def wait_task_finish(task_id: str, return_as_file: bool):
    if conversion_sucess:
        try:
            response = httpx.get(
-                f"{get_api_endpoint()}/v1alpha/result/{task_id}",
+                f"{get_api_endpoint()}/v1/result/{task_id}",
                timeout=15,
                verify=ssl_ctx,
            )
@@ -294,8 +296,11 @@ def process_url(
    do_picture_classification,
    do_picture_description,
 ):
+    target = {"kind": "zip" if return_as_file else "inbody"}
    parameters = {
-        "http_sources": [{"url": source} for source in input_sources.split(",")],
+        "sources": [
+            {"kind": "http", "url": source} for source in input_sources.split(",")
+        ],
        "options": {
            "to_formats": to_formats,
            "image_export_mode": image_export_mode,
@@ -307,24 +312,24 @@ def process_url(
            "pdf_backend": pdf_backend,
            "table_mode": table_mode,
            "abort_on_error": abort_on_error,
-            "return_as_file": return_as_file,
            "do_code_enrichment": do_code_enrichment,
            "do_formula_enrichment": do_formula_enrichment,
            "do_picture_classification": do_picture_classification,
            "do_picture_description": do_picture_description,
        },
+        "target": target,
    }
    if (
-        not parameters["http_sources"]
-        or len(parameters["http_sources"]) == 0
-        or parameters["http_sources"][0]["url"] == ""
+        not parameters["sources"]
+        or len(parameters["sources"]) == 0
+        or parameters["sources"][0]["url"] == ""
    ):
        logger.error("No input sources provided.")
        raise gr.Error("No input sources provided.", print_exception=False)
    try:
        ssl_ctx = get_ssl_context()
        response = httpx.post(
-            f"{get_api_endpoint()}/v1alpha/convert/source/async",
+            f"{get_api_endpoint()}/v1/convert/source/async",
            json=parameters,
            verify=ssl_ctx,
            timeout=60,
@@ -370,11 +375,13 @@ def process_file(
        logger.error("No files provided.")
        raise gr.Error("No files provided.", print_exception=False)
    files_data = [
-        {"base64_string": file_to_base64(file), "filename": file.name} for file in files
+        {"kind": "file", "base64_string": file_to_base64(file), "filename": file.name}
+        for file in files
    ]
+    target = {"kind": "zip" if return_as_file else "inbody"}

    parameters = {
-        "file_sources": files_data,
+        "sources": files_data,
        "options": {
            "to_formats": to_formats,
            "image_export_mode": image_export_mode,
@@ -392,12 +399,13 @@ def process_file(
            "do_picture_classification": do_picture_classification,
            "do_picture_description": do_picture_description,
        },
+        "target": target,
    }

    try:
        ssl_ctx = get_ssl_context()
        response = httpx.post(
-            f"{get_api_endpoint()}/v1alpha/convert/source/async",
+            f"{get_api_endpoint()}/v1/convert/source/async",
            json=parameters,
            verify=ssl_ctx,
            timeout=60,
@@ -443,7 +451,7 @@ def response_to_output(response, return_as_file):
        )
        # Embed document JSON and trigger load at client via an image.
        json_rendered_content = f"""
-            <docling-img id="dclimg" pagenumbers tooltip="parsed"></docling-img>
+            <docling-img id="dclimg" pagenumbers><docling-tooltip></docling-tooltip></docling-img>
            <script id="dcljson" type="application/json" onload="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);">{json_content}</script>
            <img src onerror="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);" />
            """
@@ -545,19 +553,10 @@ with gr.Blocks(
                    elem_id="file_input_zone",
                    label="Upload File",
                    file_types=[
-                        ".pdf",
-                        ".docx",
-                        ".pptx",
-                        ".html",
-                        ".xlsx",
-                        ".json",
-                        ".asciidoc",
-                        ".txt",
-                        ".md",
-                        ".jpg",
-                        ".jpeg",
-                        ".png",
-                        ".gif",
+                        f".{v}"
+                        for v in itertools.chain.from_iterable(
+                            FormatToExtensions.values()
+                        )
                    ],
                    file_count="multiple",
                    scale=4,
@@ -594,9 +593,9 @@ with gr.Blocks(
        with gr.Row():
            with gr.Column(scale=1, min_width=200):
                pipeline = gr.Radio(
-                    [(v.value.capitalize(), v.value) for v in PdfPipeline],
+                    [(v.value.capitalize(), v.value) for v in ProcessingPipeline],
                    label="Pipeline type",
-                    value=PdfPipeline.STANDARD.value,
+                    value=ProcessingPipeline.STANDARD.value,
                )
        with gr.Row():
            with gr.Column(scale=1, min_width=200):
--- a/docling_serve/helper_functions.py
+++ b/docling_serve/helper_functions.py
@@ -1,9 +1,30 @@
 import inspect
+import json
 import re
-from typing import Union
+from typing import Union, get_args, get_origin

 from fastapi import Depends, Form
-from pydantic import BaseModel
+from pydantic import BaseModel, TypeAdapter
+
+
+def is_pydantic_model(type_):
+    try:
+        if inspect.isclass(type_) and issubclass(type_, BaseModel):
+            return True
+
+        origin = get_origin(type_)
+        if origin is Union:
+            args = get_args(type_)
+            return any(
+                inspect.isclass(arg) and issubclass(arg, BaseModel)
+                for arg in args
+                if arg is not type(None)
+            )
+
+    except Exception:
+        pass
+
+    return False


 # Adapted from
@@ -12,25 +33,62 @@ def FormDepends(cls: type[BaseModel]):
    new_parameters = []

    for field_name, model_field in cls.model_fields.items():
+        annotation = model_field.annotation
+        description = model_field.description
+        default = (
+            Form(..., description=description, examples=model_field.examples)
+            if model_field.is_required()
+            else Form(
+                model_field.default,
+                examples=model_field.examples,
+                description=description,
+            )
+        )
+
+        # Flatten nested Pydantic models by accepting them as JSON strings
+        if is_pydantic_model(annotation):
+            annotation = str
+            default = Form(
+                None
+                if model_field.default is None
+                else json.dumps(model_field.default.model_dump(mode="json")),
+                description=description,
+                examples=None
+                if not model_field.examples
+                else [
+                    json.dumps(ex.model_dump(mode="json"))
+                    for ex in model_field.examples
+                ],
+            )
+
        new_parameters.append(
            inspect.Parameter(
                name=field_name,
                kind=inspect.Parameter.POSITIONAL_ONLY,
-                default=(
-                    Form(...)
-                    if model_field.is_required()
-                    else Form(model_field.default)
-                ),
-                annotation=model_field.annotation,
+                default=default,
+                annotation=annotation,
            )
        )

    async def as_form_func(**data):
+        for field_name, model_field in cls.model_fields.items():
+            value = data.get(field_name)
+            annotation = model_field.annotation
+
+            # Parse nested models from JSON string
+            if value is not None and is_pydantic_model(annotation):
+                try:
+                    validator = TypeAdapter(annotation)
+                    data[field_name] = validator.validate_json(value)
+                except Exception as e:
+                    raise ValueError(f"Invalid JSON for field '{field_name}': {e}")
+
        return cls(**data)

    sig = inspect.signature(as_form_func)
    sig = sig.replace(parameters=new_parameters)
    as_form_func.__signature__ = sig  # type: ignore
+
    return Depends(as_form_func)


--- a/docling_serve/orchestrator_factory.py
+++ b/docling_serve/orchestrator_factory.py
@@ -0,0 +1,53 @@
+from functools import lru_cache
+
+from docling_jobkit.orchestrators.base_orchestrator import BaseOrchestrator
+
+from docling_serve.settings import AsyncEngine, docling_serve_settings
+
+
+@lru_cache
+def get_async_orchestrator() -> BaseOrchestrator:
+    if docling_serve_settings.eng_kind == AsyncEngine.LOCAL:
+        from docling_jobkit.convert.manager import (
+            DoclingConverterManager,
+            DoclingConverterManagerConfig,
+        )
+        from docling_jobkit.orchestrators.local.orchestrator import (
+            LocalOrchestrator,
+            LocalOrchestratorConfig,
+        )
+
+        local_config = LocalOrchestratorConfig(
+            num_workers=docling_serve_settings.eng_loc_num_workers,
+            shared_models=docling_serve_settings.eng_loc_share_models,
+        )
+
+        cm_config = DoclingConverterManagerConfig(
+            artifacts_path=docling_serve_settings.artifacts_path,
+            options_cache_size=docling_serve_settings.options_cache_size,
+            enable_remote_services=docling_serve_settings.enable_remote_services,
+            allow_external_plugins=docling_serve_settings.allow_external_plugins,
+            max_num_pages=docling_serve_settings.max_num_pages,
+            max_file_size=docling_serve_settings.max_file_size,
+        )
+        cm = DoclingConverterManager(config=cm_config)
+
+        return LocalOrchestrator(config=local_config, converter_manager=cm)
+    elif docling_serve_settings.eng_kind == AsyncEngine.KFP:
+        from docling_jobkit.orchestrators.kfp.orchestrator import (
+            KfpOrchestrator,
+            KfpOrchestratorConfig,
+        )
+
+        kfp_config = KfpOrchestratorConfig(
+            endpoint=docling_serve_settings.eng_kfp_endpoint,
+            token=docling_serve_settings.eng_kfp_token,
+            ca_cert_path=docling_serve_settings.eng_kfp_ca_cert_path,
+            self_callback_endpoint=docling_serve_settings.eng_kfp_self_callback_endpoint,
+            self_callback_token_path=docling_serve_settings.eng_kfp_self_callback_token_path,
+            self_callback_ca_cert_path=docling_serve_settings.eng_kfp_self_callback_ca_cert_path,
+        )
+
+        return KfpOrchestrator(config=kfp_config)
+
+    raise RuntimeError(f"Engine {docling_serve_settings.eng_kind} not recognized.")
--- a/docling_serve/response_preparation.py
+++ b/docling_serve/response_preparation.py
@@ -1,3 +1,4 @@
+import asyncio
 import logging
 import os
 import shutil
@@ -6,15 +7,27 @@ from collections.abc import Iterable
 from pathlib import Path
 from typing import Union

-from fastapi import HTTPException
+import httpx
+from fastapi import BackgroundTasks, HTTPException
 from fastapi.responses import FileResponse

 from docling.datamodel.base_models import OutputFormat
 from docling.datamodel.document import ConversionResult, ConversionStatus
 from docling_core.types.doc import ImageRefMode
+from docling_jobkit.datamodel.convert import ConvertDocumentsOptions
+from docling_jobkit.datamodel.task import Task
+from docling_jobkit.datamodel.task_targets import InBodyTarget, PutTarget, TaskTarget
+from docling_jobkit.orchestrators.base_orchestrator import (
+    BaseOrchestrator,
+)

-from docling_serve.datamodel.convert import ConvertDocumentsOptions
-from docling_serve.datamodel.responses import ConvertDocumentResponse, DocumentResponse
+from docling_serve.datamodel.responses import (
+    ConvertDocumentResponse,
+    DocumentResponse,
+    PresignedUrlConvertDocumentResponse,
+)
+from docling_serve.settings import docling_serve_settings
+from docling_serve.storage import get_scratch

 _log = logging.getLogger(__name__)

@@ -27,11 +40,14 @@ def _export_document_as_content(
    export_txt: bool,
    export_doctags: bool,
    image_mode: ImageRefMode,
+    md_page_break_placeholder: str,
 ):
    document = DocumentResponse(filename=conv_res.input.file.name)

    if conv_res.status == ConversionStatus.SUCCESS:
-        new_doc = conv_res.document._make_copy_with_refmode(Path(), image_mode)
+        new_doc = conv_res.document._make_copy_with_refmode(
+            Path(), image_mode, page_no=None
+        )

        # Create the different formats
        if export_json:
@@ -40,10 +56,14 @@ def _export_document_as_content(
            document.html_content = new_doc.export_to_html(image_mode=image_mode)
        if export_txt:
            document.text_content = new_doc.export_to_markdown(
-                strict_text=True, image_mode=image_mode
+                strict_text=True,
+                image_mode=image_mode,
            )
        if export_md:
-            document.md_content = new_doc.export_to_markdown(image_mode=image_mode)
+            document.md_content = new_doc.export_to_markdown(
+                image_mode=image_mode,
+                page_break_placeholder=md_page_break_placeholder or None,
+            )
        if export_doctags:
            document.doctags_content = new_doc.export_to_doctags()
    elif conv_res.status == ConversionStatus.SKIPPED:
@@ -63,11 +83,18 @@ def _export_documents_as_files(
    export_txt: bool,
    export_doctags: bool,
    image_export_mode: ImageRefMode,
-):
+    md_page_break_placeholder: str,
+) -> ConversionStatus:
    success_count = 0
    failure_count = 0

+    # Default failure in case results is empty
+    conv_result = ConversionStatus.FAILURE
+
+    artifacts_dir = Path("artifacts/")  # will be relative to the fname
+
    for conv_res in conv_results:
+        conv_result = conv_res.status
        if conv_res.status == ConversionStatus.SUCCESS:
            success_count += 1
            doc_filename = conv_res.input.file.stem
@@ -77,7 +104,9 @@ def _export_documents_as_files(
                fname = output_dir / f"{doc_filename}.json"
                _log.info(f"writing JSON output to {fname}")
                conv_res.document.save_as_json(
-                    filename=fname, image_mode=image_export_mode
+                    filename=fname,
+                    image_mode=image_export_mode,
+                    artifacts_dir=artifacts_dir,
                )

            # Export HTML format:
@@ -85,7 +114,9 @@ def _export_documents_as_files(
                fname = output_dir / f"{doc_filename}.html"
                _log.info(f"writing HTML output to {fname}")
                conv_res.document.save_as_html(
-                    filename=fname, image_mode=image_export_mode
+                    filename=fname,
+                    image_mode=image_export_mode,
+                    artifacts_dir=artifacts_dir,
                )

            # Export Text format:
@@ -103,14 +134,17 @@ def _export_documents_as_files(
                fname = output_dir / f"{doc_filename}.md"
                _log.info(f"writing Markdown output to {fname}")
                conv_res.document.save_as_markdown(
-                    filename=fname, image_mode=image_export_mode
+                    filename=fname,
+                    artifacts_dir=artifacts_dir,
+                    image_mode=image_export_mode,
+                    page_break_placeholder=md_page_break_placeholder or None,
                )

            # Export Document Tags format:
            if export_doctags:
                fname = output_dir / f"{doc_filename}.doctags"
                _log.info(f"writing Doc Tags output to {fname}")
-                conv_res.document.save_as_document_tokens(filename=fname)
+                conv_res.document.save_as_doctags(filename=fname)

        else:
            _log.warning(f"Document {conv_res.input.file} failed to convert.")
@@ -120,13 +154,15 @@ def _export_documents_as_files(
        f"Processed {success_count + failure_count} docs, "
        f"of which {failure_count} failed"
    )
+    return conv_result


 def process_results(
    conversion_options: ConvertDocumentsOptions,
+    target: TaskTarget,
    conv_results: Iterable[ConversionResult],
    work_dir: Path,
-) -> Union[ConvertDocumentResponse, FileResponse]:
+) -> Union[ConvertDocumentResponse, FileResponse, PresignedUrlConvertDocumentResponse]:
    # Let's start by processing the documents
    try:
        start_time = time.monotonic()
@@ -150,7 +186,9 @@ def process_results(
        )

    # We have some results, let's prepare the response
-    response: Union[FileResponse, ConvertDocumentResponse]
+    response: Union[
+        FileResponse, ConvertDocumentResponse, PresignedUrlConvertDocumentResponse
+    ]

    # Booleans to know what to export
    export_json = OutputFormat.JSON in conversion_options.to_formats
@@ -160,7 +198,7 @@ def process_results(
    export_doctags = OutputFormat.DOCTAGS in conversion_options.to_formats

    # Only 1 document was processed, and we are not returning it as a file
-    if len(conv_results) == 1 and not conversion_options.return_as_file:
+    if len(conv_results) == 1 and isinstance(target, InBodyTarget):
        conv_res = conv_results[0]
        document = _export_document_as_content(
            conv_res,
@@ -170,6 +208,7 @@ def process_results(
            export_txt=export_txt,
            export_doctags=export_doctags,
            image_mode=conversion_options.image_export_mode,
+            md_page_break_placeholder=conversion_options.md_page_break_placeholder,
        )

        response = ConvertDocumentResponse(
@@ -189,7 +228,7 @@ def process_results(
        os.getpid()

        # Export the documents
-        _export_documents_as_files(
+        conv_result = _export_documents_as_files(
            conv_results=conv_results,
            output_dir=output_dir,
            export_json=export_json,
@@ -198,6 +237,7 @@ def process_results(
            export_txt=export_txt,
            export_doctags=export_doctags,
            image_export_mode=conversion_options.image_export_mode,
+            md_page_break_placeholder=conversion_options.md_page_break_placeholder,
        )

        files = os.listdir(output_dir)
@@ -215,8 +255,67 @@ def process_results(
        # Output directory
        # background_tasks.add_task(shutil.rmtree, work_dir, ignore_errors=True)

-        response = FileResponse(
-            file_path, filename=file_path.name, media_type="application/zip"
-        )
+        if isinstance(target, PutTarget):
+            try:
+                with open(file_path, "rb") as file_data:
+                    r = httpx.put(str(target.url), files={"file": file_data})
+                    r.raise_for_status()
+                response = PresignedUrlConvertDocumentResponse(
+                    status=conv_result,
+                    processing_time=processing_time,
+                )
+            except Exception as exc:
+                _log.error("An error occour while uploading zip to s3", exc_info=exc)
+                raise HTTPException(
+                    status_code=500, detail="An error occour while uploading zip to s3."
+                )
+        else:
+            response = FileResponse(
+                file_path, filename=file_path.name, media_type="application/zip"
+            )
+
+    return response
+
+
+async def prepare_response(
+    task: Task, orchestrator: BaseOrchestrator, background_tasks: BackgroundTasks
+):
+    if task.results is None:
+        raise HTTPException(
+            status_code=404,
+            detail="Task result not found. Please wait for a completion status.",
+        )
+    assert task.options is not None
+
+    work_dir = get_scratch() / task.task_id
+    response = process_results(
+        conversion_options=task.options,
+        target=task.target,
+        conv_results=task.results,
+        work_dir=work_dir,
+    )
+
+    if work_dir.exists():
+        task.scratch_dir = work_dir
+        if not isinstance(response, FileResponse):
+            _log.warning(
+                f"Task {task.task_id=} produced content in {work_dir=} but the response is not a file."
+            )
+            shutil.rmtree(work_dir, ignore_errors=True)
+
+    if docling_serve_settings.single_use_results:
+        if task.scratch_dir is not None:
+            background_tasks.add_task(
+                shutil.rmtree, task.scratch_dir, ignore_errors=True
+            )
+
+        async def _remove_task_impl():
+            await asyncio.sleep(docling_serve_settings.result_removal_delay)
+            await orchestrator.delete_task(task_id=task.task_id)
+
+        async def _remove_task():
+            asyncio.create_task(_remove_task_impl())  # noqa: RUF006
+
+        background_tasks.add_task(_remove_task)

    return response
--- a/docling_serve/settings.py
+++ b/docling_serve/settings.py
@@ -1,3 +1,4 @@
+import enum
 import sys
 from pathlib import Path
 from typing import Optional, Union
@@ -6,8 +7,6 @@ from pydantic import AnyUrl, model_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from typing_extensions import Self

-from docling_serve.datamodel.engines import AsyncEngine
-

 class UvicornSettings(BaseSettings):
    model_config = SettingsConfigDict(
@@ -26,6 +25,11 @@ class UvicornSettings(BaseSettings):
    workers: Union[int, None] = None


+class AsyncEngine(str, enum.Enum):
+    LOCAL = "local"
+    KFP = "kfp"
+
+
 class DoclingServeSettings(BaseSettings):
    model_config = SettingsConfigDict(
        env_prefix="DOCLING_SERVE_",
@@ -40,6 +44,8 @@ class DoclingServeSettings(BaseSettings):
    static_path: Optional[Path] = None
    scratch_path: Optional[Path] = None
    single_use_results: bool = True
+    result_removal_delay: float = 300  # 5 minutes
+    load_models_at_boot: bool = True
    options_cache_size: int = 2
    enable_remote_services: bool = False
    allow_external_plugins: bool = False
@@ -48,6 +54,8 @@ class DoclingServeSettings(BaseSettings):
    max_num_pages: int = sys.maxsize
    max_file_size: int = sys.maxsize

+    max_sync_wait: int = 120  # 2 minutes
+
    cors_origins: list[str] = ["*"]
    cors_methods: list[str] = ["*"]
    cors_headers: list[str] = ["*"]
@@ -55,6 +63,7 @@ class DoclingServeSettings(BaseSettings):
    eng_kind: AsyncEngine = AsyncEngine.LOCAL
    # Local engine
    eng_loc_num_workers: int = 2
+    eng_loc_share_models: bool = False
    # KFP engine
    eng_kfp_endpoint: Optional[AnyUrl] = None
    eng_kfp_token: Optional[str] = None
--- a/docling_serve/websocket_notifier.py
+++ b/docling_serve/websocket_notifier.py
@@ -0,0 +1,54 @@
+from fastapi import WebSocket
+
+from docling_jobkit.datamodel.task_meta import TaskStatus
+from docling_jobkit.orchestrators.base_notifier import BaseNotifier
+from docling_jobkit.orchestrators.base_orchestrator import BaseOrchestrator
+
+from docling_serve.datamodel.responses import (
+    MessageKind,
+    TaskStatusResponse,
+    WebsocketMessage,
+)
+
+
+class WebsocketNotifier(BaseNotifier):
+    def __init__(self, orchestrator: BaseOrchestrator):
+        super().__init__(orchestrator)
+        self.task_subscribers: dict[str, set[WebSocket]] = {}
+
+    async def add_task(self, task_id: str):
+        self.task_subscribers[task_id] = set()
+
+    async def remove_task(self, task_id: str):
+        if task_id in self.task_subscribers:
+            for websocket in self.task_subscribers[task_id]:
+                await websocket.close()
+
+            del self.task_subscribers[task_id]
+
+    async def notify_task_subscribers(self, task_id: str):
+        if task_id not in self.task_subscribers:
+            raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
+
+        task = await self.orchestrator.get_raw_task(task_id=task_id)
+        task_queue_position = await self.orchestrator.get_queue_position(task_id)
+        msg = TaskStatusResponse(
+            task_id=task.task_id,
+            task_status=task.task_status,
+            task_position=task_queue_position,
+            task_meta=task.processing_meta,
+        )
+        for websocket in self.task_subscribers[task_id]:
+            await websocket.send_text(
+                WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
+            )
+            if task.is_completed():
+                await websocket.close()
+
+    async def notify_queue_positions(self):
+        for task_id in self.task_subscribers.keys():
+            # notify only pending tasks
+            if self.orchestrator.tasks[task_id].task_status != TaskStatus.PENDING:
+                continue
+
+            await self.notify_task_subscribers(task_id)
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,4 +1,4 @@
-# Dolcing Serve documentation
+# Docling Serve documentation

 This documentation pages explore the webserver configurations, runtime options, deployment examples as well as development best practices.

@@ -6,3 +6,4 @@ This documentation pages explore the webserver configurations, runtime options,
 - [Advance usage](./usage.md)
 - [Deployment](./deployment.md)
 - [Development](./development.md)
+- [`v1` migration](./v1_migration.md)
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -7,7 +7,7 @@ server and the actual app-specific configurations.

 > [!WARNING]
 > When the server is running with `reload` or with multiple `workers`, uvicorn
-> will spawn multiple subprocessed. This invalides all the values configured
+> will spawn multiple subprocesses. This invalidates all the values configured
 > via the CLI command line options. Please use environment variables in this
 > type of deployments.

@@ -36,15 +36,18 @@ THe following table describes the options to configure the Docling Serve app.
 | CLI option | ENV | Default | Description |
 | -----------|-----|---------|-------------|
 | `--artifacts-path` | `DOCLING_SERVE_ARTIFACTS_PATH` | unset | If set to a valid directory, the model weights will be loaded from this path |
-|  | `DOCLING_SERVE_STATIC_PATH` | unset | If set to a valid directory, the static assets for the docs and ui will be loaded from this path |
+|  | `DOCLING_SERVE_STATIC_PATH` | unset | If set to a valid directory, the static assets for the docs and UI will be loaded from this path |
 |  | `DOCLING_SERVE_SCRATCH_PATH` |  | If set, this directory will be used as scratch workspace, e.g. storing the results before they get requested. If unset, a temporary created is created for this purpose. |
 | `--enable-ui` | `DOCLING_SERVE_ENABLE_UI` | `false` | Enable the demonstrator UI. |
 |  | `DOCLING_SERVE_ENABLE_REMOTE_SERVICES` | `false` | Allow pipeline components making remote connections. For example, this is needed when using a vision-language model via APIs. |
 |  | `DOCLING_SERVE_ALLOW_EXTERNAL_PLUGINS` | `false` | Allow the selection of third-party plugins. |
 |  | `DOCLING_SERVE_SINGLE_USE_RESULTS` | `true` | If true, results can be accessed only once. If false, the results accumulate in the scratch directory. |
+|  | `DOCLING_SERVE_RESULT_REMOVAL_DELAY` | `300` | When `DOCLING_SERVE_SINGLE_USE_RESULTS` is active, this is the delay before results are removed from the task registry. |
 |  | `DOCLING_SERVE_MAX_DOCUMENT_TIMEOUT` | `604800` (7 days) | The maximum time for processing a document. |
 |  | `DOCLING_SERVE_MAX_NUM_PAGES` |  | The maximum number of pages for a document to be processed. |
 |  | `DOCLING_SERVE_MAX_FILE_SIZE` |  | The maximum file size for a document to be processed. |
+|  | `DOCLING_SERVE_MAX_SYNC_WAIT` | `120` | Max number of seconds a synchronous endpoint is waiting for the task completion. |
+|  | `DOCLING_SERVE_LOAD_MODELS_AT_BOOT` | `True` | If enabled, the models for the default options will be loaded at boot. |
 |  | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. |
 |  | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
 |  | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
@@ -58,11 +61,12 @@ The selected compute engine will be running all the async jobs.

 #### Local engine

-The following table describes the options to configure the Docling Serve KFP engine.
+The following table describes the options to configure the Docling Serve local engine.

 | ENV | Default | Description |
 |-----|---------|-------------|
 | `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
+| `DOCLING_SERVE_ENG_LOC_SHARE_MODELS` | False | If true, each process will share the same models among all thread workers. Otherwise, one instance of the models is allocated for each worker thread. |

 #### KFP engine

@@ -73,6 +77,6 @@ The following table describes the options to configure the Docling Serve KFP eng
 | `DOCLING_SERVE_ENG_KFP_ENDPOINT` |  | Must be set to the Kubeflow Pipeline endpoint. When using the in-cluster deployment, make sure to use the cluster endpoint, e.g. `https://NAME.NAMESPACE.svc.cluster.local:8888`  |
 | `DOCLING_SERVE_ENG_KFP_TOKEN` |  | The authentication token for KFP. For in-cluster deployment, the app will load automatically the token of the ServiceAccount. |
 | `DOCLING_SERVE_ENG_KFP_CA_CERT_PATH` |  | Path to the CA certificates for the KFP endpoint. For in-cluster deployment, the app will load automatically the internal CA. |
-| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_ENDPOINT` |  | If set, it enables internal callbacks providing status update of the KFP job. Usually something like `https://NAME.NAMESPACE.svc.cluster.local:5001/v1alpha/callback/task/progress`. |
+| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_ENDPOINT` |  | If set, it enables internal callbacks providing status update of the KFP job. Usually something like `https://NAME.NAMESPACE.svc.cluster.local:5001/v1/callback/task/progress`. |
 | `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_TOKEN_PATH` |  | The token used for authenticating the progress callback. For cluster-internal workloads, use `/run/secrets/kubernetes.io/serviceaccount/token`. |
 | `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_CA_CERT_PATH` |  | The CA certificate for the progress callback. For cluster-inetrnal workloads, use `/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt`. |
--- a/docs/deploy-examples/compose-amd.yaml
+++ b/docs/deploy-examples/compose-amd.yaml
@@ -0,0 +1,21 @@
+# AMD ROCm deployment
+
+services:
+  docling-serve:
+    image: ghcr.io/docling-project/docling-serve-rocm:main
+    container_name: docling-serve
+    ports:
+      - "5001:5001"
+    environment:
+      DOCLING_SERVE_ENABLE_UI: "true"
+      ROCR_VISIBLE_DEVICES: "0" # https://rocm.docs.amd.com/en/latest/conceptual/gpu-isolation.html#rocr-visible-devices
+      ## This section is for compatibility with older cards
+      # HSA_OVERRIDE_GFX_VERSION: "11.0.0"
+      # HSA_ENABLE_SDMA: "0"
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    group_add:
+      - 44    # video group GID from host
+      - 992   # render group GID from host
+    restart: always
--- a/docs/deploy-examples/compose-gpu.yaml
+++ b/docs/deploy-examples/compose-gpu.yaml
@@ -1,15 +0,0 @@
-services:
-  docling:
-    image: ghcr.io/docling-project/docling-serve-cu124
-    container_name: docling-serve
-    ports:
-      - 5001:5001
-    environment:
-      - DOCLING_SERVE_ENABLE_UI=true
-    deploy:
-      resources:
-        reservations:
-          devices:
-          - driver: nvidia
-            count: all # nvidia-smi 
-            capabilities: [gpu]
--- a/docs/deploy-examples/compose-nvidia.yaml
+++ b/docs/deploy-examples/compose-nvidia.yaml
@@ -0,0 +1,20 @@
+# NVIDIA CUDA deployment
+
+services:
+  docling-serve:
+    image: ghcr.io/docling-project/docling-serve-cu126:main
+    container_name: docling-serve
+    ports:
+      - "5001:5001"
+    environment:
+      DOCLING_SERVE_ENABLE_UI: "true"
+      NVIDIA_VISIBLE_DEVICES: "all" # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html
+    # deploy:  # This section is for compatibility with Swarm
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: all
+    #           capabilities: [gpu]
+    runtime: nvidia
+    restart: always
--- a/docs/deploy-examples/docling-model-cache-deployment.yaml
+++ b/docs/deploy-examples/docling-model-cache-deployment.yaml
@@ -0,0 +1,47 @@
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: docling-serve
+      component: docling-serve-api
+  template:
+    metadata:
+      labels:
+        app: docling-serve
+        component: docling-serve-api
+    spec:
+      restartPolicy: Always
+      containers:
+        - name: api
+          resources:
+            limits:
+              cpu: 2
+              memory: 4Gi
+            requests:
+              cpu: 250m
+              memory: 1Gi
+          env:
+            - name: DOCLING_SERVE_ENABLE_UI
+              value: 'true'
+            - name: DOCLING_SERVE_ARTIFACTS_PATH
+              value: '/modelcache'
+          ports:
+            - name: http
+              containerPort: 5001
+              protocol: TCP
+          imagePullPolicy: Always
+          image: 'ghcr.io/docling-project/docling-serve-cpu'
+          volumeMounts:
+            - name: docling-model-cache
+              mountPath: /modelcache
+      volumes:
+        - name: docling-model-cache
+          persistentVolumeClaim:
+            claimName: docling-model-cache-pvc
--- a/docs/deploy-examples/docling-model-cache-job.yaml
+++ b/docs/deploy-examples/docling-model-cache-job.yaml
@@ -0,0 +1,33 @@
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: docling-model-cache-load
+spec:
+  selector: {}
+  template:
+    metadata:
+      name: docling-model-load
+    spec:
+      containers:
+        - name: loader
+          image: ghcr.io/docling-project/docling-serve-cpu:main
+          command:
+            - docling-tools
+            - models
+            - download
+            - '--output-dir=/modelcache'
+            - 'layout'
+            - 'tableformer'
+            - 'code_formula'
+            - 'picture_classifier'
+            - 'smolvlm'
+            - 'granite_vision'
+            - 'easyocr'
+          volumeMounts:
+            - name: docling-model-cache
+              mountPath: /modelcache
+      volumes:
+        - name: docling-model-cache
+          persistentVolumeClaim:
+            claimName: docling-model-cache-pvc
+      restartPolicy: Never
--- a/docs/deploy-examples/docling-model-cache-pvc.yaml
+++ b/docs/deploy-examples/docling-model-cache-pvc.yaml
@@ -0,0 +1,11 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: docling-model-cache-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  volumeMode: Filesystem
+  resources:
+    requests:
+      storage: 10Gi
--- a/docs/deploy-examples/docling-serve-oauth.yaml
+++ b/docs/deploy-examples/docling-serve-oauth.yaml
@@ -85,7 +85,7 @@ spec:
          resources:
            limits:
              cpu: 2000m
-              memory: 2Gi
+              memory: 4Gi
            requests:
              cpu: 800m
              memory: 1Gi
--- a/docs/deploy-examples/docling-serve-replicas-w-sticky-sessions.yaml
+++ b/docs/deploy-examples/docling-serve-replicas-w-sticky-sessions.yaml
@@ -0,0 +1,76 @@
+# This example deployment configures Docling Serve with a Route + Sticky sessions, a Service and cpu image
+---
+kind: Route
+apiVersion: route.openshift.io/v1
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+  annotations:
+    haproxy.router.openshift.io/disable_cookies: "false" # this annotation enables the sticky sessions
+spec:
+  path: /
+  to:
+    kind: Service
+    name: docling-serve
+  port:
+    targetPort: http
+  tls:
+    termination: edge
+    insecureEdgeTerminationPolicy: Redirect
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  ports:
+  - name: http
+    port: 5001
+    targetPort: http
+  selector:
+    app: docling-serve
+    component: docling-serve-api
+---
+kind: Deployment
+apiVersion: apps/v1
+metadata:
+  name: docling-serve
+  labels:
+    app: docling-serve
+    component: docling-serve-api
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: docling-serve
+      component: docling-serve-api
+  template:
+    metadata:
+      labels:
+        app: docling-serve
+        component: docling-serve-api
+    spec:
+      restartPolicy: Always
+      containers:
+        - name: api
+          resources:
+            limits:
+              cpu: 1
+              memory: 4Gi
+            requests:
+              cpu: 250m
+              memory: 1Gi
+          env:
+            - name: DOCLING_SERVE_ENABLE_UI
+              value: 'true'
+          ports:
+            - name: http
+              containerPort: 5001
+              protocol: TCP
+          imagePullPolicy: Always
+          image: 'ghcr.io/docling-project/docling-serve'
--- a/docs/deploy-examples/docling-serve-simple.yaml
+++ b/docs/deploy-examples/docling-serve-simple.yaml
@@ -40,8 +40,8 @@ spec:
        - name: api
          resources:
            limits:
-              cpu: 500m
-              memory: 2Gi
+              cpu: 1
+              memory: 4Gi
              nvidia.com/gpu: 1  # Limit to one GPU
            requests:
              cpu: 250m
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -4,16 +4,17 @@ This document provides deployment examples for running the application in differ

 Choose the deployment option that best fits your setup.

- **[Local GPU](#local-gpu)**: For deploying the application locally on a machine with a NVIDIA GPU (using Docker Compose).
+- **[Local GPU NVIDIA](#local-gpu-nvidia)**: For deploying the application locally on a machine with a supported NVIDIA GPU (using Docker Compose).
+- **[Local GPU AMD](#local-gpu-amd)**: For deploying the application locally on a machine with a supported AMD GPU (using Docker Compose).
 - **[OpenShift](#openshift)**: For deploying the application on an OpenShift cluster, designed for cloud-native environments.

 ---

-## Local GPU
+## Local GPU NVIDIA

 ### Docker compose

-Manifest example: [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml)
+Manifest example: [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml)

 This deployment has the following features:

@@ -22,7 +23,7 @@ This deployment has the following features:
 Install the app with:

 ```sh
-docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
+docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
 ```

 For using the API:
@@ -30,11 +31,11 @@ For using the API:
 ```sh
 # Make a test query
 curl -X 'POST' \
-  "localhost:5001/v1alpha/convert/source/async" \
+  "localhost:5001/v1/convert/source/async" \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```

@@ -56,7 +57,7 @@ Docs:
 <details>
 <summary><b>Steps</b></summary>

-1. Check driver version and which GPU you want to use (0/1/2/3.. and update [compose-gpu.yaml](./deploy-examples/compose-gpu.yaml) file or use `count: all`)
+1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-nvidia.yaml](./deploy-examples/compose-nvidia.yaml) file or use `count: all`)

    ```sh
    nvidia-smi
@@ -117,7 +118,75 @@ Docs:
 5. Run the container:

    ```sh
-    docker compose -f docs/deploy-examples/compose-gpu.yaml up -d
+    docker compose -f docs/deploy-examples/compose-nvidia.yaml up -d
+    ```
+
+</details>
+
+## Local GPU AMD
+
+### Docker compose
+
+Manifest example: [compose-amd.yaml](./deploy-examples/compose-amd.yaml)
+
+This deployment has the following features:
+
+- AMD rocm enabled
+
+Install the app with:
+
+```sh
+docker compose -f docs/deploy-examples/compose-amd.yaml up -d
+```
+
+For using the API:
+
+```sh
+# Make a test query
+curl -X 'POST' \
+  "localhost:5001/v1/convert/source/async" \
+  -H "accept: application/json" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
+  }'
+```
+
+<details>
+<summary><b>Requirements</b></summary>
+
+- debian/ubuntu/rhel/fedora/opensuse
+- docker
+- AMDGPU driver >=6.3
+- AMD ROCm >=6.3
+
+Docs:
+
+- [AMD ROCm installation](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/quick-start.html)
+
+</details>
+
+<details>
+<summary><b>Steps</b></summary>
+
+1. Check driver version and which GPU you want to use 0/1/2/n (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
+
+    ```sh
+    rocm-smi --showdriverversion
+    rocminfo | grep -i "ROCm version"
+    ```
+
+2. Find both video group GID and render group GID from host (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
+
+    ```sh
+    getent group video
+    getent group render
+    ```
+
+3. Build the image locally (and update [compose-amd.yaml](./deploy-examples/compose-amd.yaml) file)
+
+    ```sh
+    make docling-serve-rocm-image
    ```

 </details>
@@ -148,11 +217,11 @@ oc port-forward svc/docling-serve 5001:5001

 # Make a test query
 curl -X 'POST' \
-  "localhost:5001/v1alpha/convert/source/async" \
+  "localhost:5001/v1/convert/source/async" \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```

@@ -184,11 +253,53 @@ OCP_AUTH_TOKEN=$(oc whoami --show-token)

 # Make a test query
 curl -X 'POST' \
-  "${DOCLING_ROUTE}/v1alpha/convert/source/async" \
+  "${DOCLING_ROUTE}/v1/convert/source/async" \
  -H "Authorization: Bearer ${OCP_AUTH_TOKEN}" \
  -H "accept: application/json" \
  -H "Content-Type: application/json" \
  -d '{
-    "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}]
+    "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
  }'
 ```
+
+### ReplicaSets with `sticky sessions`
+
+Manifest example: [docling-serve-replicas-w-sticky-sessions.yaml](./deploy-examples/docling-serve-replicas-w-sticky-sessions.yaml)
+
+This deployment has the following features:
+
+- Deployment configuration with 3 replicas
+- Service configuration
+- Expose the service using a OpenShift `Route` and enables sticky sessions
+
+Install the app with:
+
+```sh
+oc apply -f docs/deploy-examples/docling-serve-replicas-w-sticky-sessions.yaml
+```
+
+For using the API:
+
+```sh
+# Retrieve the endpoint
+DOCLING_NAME=docling-serve
+DOCLING_ROUTE="https://$(oc get routes $DOCLING_NAME --template={{.spec.host}})"
+
+# Make a test query, store the cookie and taskid
+task_id=$(curl -s -X 'POST' \
+    "${DOCLING_ROUTE}/v1/convert/source/async" \
+    -H "accept: application/json" \
+    -H "Content-Type: application/json" \
+    -d '{
+      "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}]
+    }' \
+    -c cookies.txt | grep -oP '"task_id":"\K[^"]+')
+```
+
+```sh
+# Grab the taskid and cookie to check the task status
+curl -v -X 'GET' \
+  "${DOCLING_ROUTE}/v1/status/poll/$task_id?wait=0" \
+  -H "accept: application/json" \
+  -b "cookies.txt"
+```
--- a/docs/pre-loading-models.md
+++ b/docs/pre-loading-models.md
@@ -0,0 +1,103 @@
+# Pre-loading models for docling
+
+This document provides examples for pre-loading docling models to a persistent volume and re-using it for docling-serve deployments.
+
+1. We need to create a persistent volume that will store models weights:
+
+    ```yaml
+    apiVersion: v1
+    kind: PersistentVolumeClaim
+    metadata:
+      name: docling-model-cache-pvc
+    spec:
+      accessModes:
+        - ReadWriteOnce
+      volumeMode: Filesystem
+      resources:
+        requests:
+          storage: 10Gi
+    ```
+
+    If you don't want to use default storage class, set your custom storage class with following:
+
+    ```yaml
+    spec:
+      ...
+      storageClassName: <Storage Class Name>
+    ```
+
+    Manifest example: [docling-model-cache-pvc.yaml](./deploy-examples/docling-model-cache-pvc.yaml)
+
+2. In order to load model weights, we can use docling-toolkit to download them, as this is a one time operation we can use kubernetes job for this:
+
+    ```yaml
+    apiVersion: batch/v1
+    kind: Job
+    metadata:
+      name: docling-model-cache-load
+    spec:
+      selector: {}
+      template:
+        metadata:
+          name: docling-model-load
+        spec:
+          containers:
+            - name: loader
+              image: ghcr.io/docling-project/docling-serve-cpu:main
+              command:
+                - docling-tools
+                - models
+                - download
+                - '--output-dir=/modelcache'
+                - 'layout'
+                - 'tableformer'
+                - 'code_formula'
+                - 'picture_classifier'
+                - 'smolvlm'
+                - 'granite_vision'
+                - 'easyocr'
+              volumeMounts:
+                - name: docling-model-cache
+                  mountPath: /modelcache
+          volumes:
+            - name: docling-model-cache
+              persistentVolumeClaim:
+                claimName: docling-model-cache-pvc
+          restartPolicy: Never
+    ```
+
+    The job will mount previously created persistent volume and execute command similar to how we would load models locally:
+    `docling-tools models download --output-dir <MOUNT-PATH> [LIST_OF_MODELS]`
+
+    In manifest, we specify desired models individually, or we can use `--all` parameter to download all models.
+
+    Manifest example: [docling-model-cache-job.yaml](./deploy-examples/docling-model-cache-job.yaml)
+
+3. Now we can mount volume in the docling-serve deployment and set env `DOCLING_SERVE_ARTIFACTS_PATH` to point to it.
+    Following additions to deployment should be made:
+
+    ```yaml
+    spec:
+      template:
+        spec:
+          containers:
+            - name: api
+              env:
+              ...
+                - name: DOCLING_SERVE_ARTIFACTS_PATH
+                  value: '/modelcache'
+              volumeMounts:
+                - name: docling-model-cache
+                  mountPath: /modelcache
+          ...
+          volumes:
+            - name: docling-model-cache
+              persistentVolumeClaim:
+                claimName: docling-model-cache-pvc
+    ```
+
+    Make sure that value of `DOCLING_SERVE_ARTIFACTS_PATH` is the same as where models were downloaded and where volume is mounted.
+
+    Now when docling-serve is executing tasks, the underlying docling installation will load model weights from mounted volume.
+
+    Manifest example: [docling-model-cache-deployment.yaml](./deploy-examples/docling-model-cache-deployment.yaml)
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -9,22 +9,24 @@ On top of the source of file (see below), both endpoints support the same parame
 - `from_formats` (List[str]): Input format(s) to convert from. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`. Defaults to all formats.
 - `to_formats` (List[str]): Output format(s) to convert to. Allowed values: `md`, `json`, `html`, `text`, `doctags`. Defaults to `md`.
 - `pipeline` (str). The choice of which pipeline to use. Allowed values are `standard` and `vlm`. Defaults to `standard`.
+- `page_range` (tuple). If specified, only convert a range of pages. The page number starts at 1.
 - `do_ocr` (bool): If enabled, the bitmap content will be processed using OCR. Defaults to `True`.
 - `image_export_mode`: Image export mode for the document (only in case of JSON, Markdown or HTML). Allowed values: embedded, placeholder, referenced. Optional, defaults to `embedded`.
 - `force_ocr` (bool): If enabled, replace any existing text with OCR-generated text over the full content. Defaults to `False`.
- `ocr_engine` (str): OCR engine to use. Allowed values: `easyocr`, `tesseract_cli`, `tesseract`, `rapidocr`, `ocrmac`. Defaults to `easyocr`.
+- `ocr_engine` (str): OCR engine to use. Allowed values: `easyocr`, `tesserocr`, `tesseract`, `rapidocr`, `ocrmac`. Defaults to `easyocr`. To use the `tesserocr` engine, `tesserocr` must be installed where docling-serve is running: `pip install tesserocr`
 - `ocr_lang` (List[str]): List of languages used by the OCR engine. Note that each OCR engine has different values for the language names. Defaults to empty.
 - `pdf_backend` (str): PDF backend to use. Allowed values: `pypdfium2`, `dlparse_v1`, `dlparse_v2`, `dlparse_v4`. Defaults to `dlparse_v4`.
 - `table_mode` (str): Table mode to use. Allowed values: `fast`, `accurate`. Defaults to `fast`.
 - `abort_on_error` (bool): If enabled, abort on error. Defaults to false.
- `return_as_file` (boo): If enabled, return the output as a file. Defaults to false.
+- `md_page_break_placeholder` (str): Add this placeholder between pages in the markdown output.
 - `do_table_structure` (bool): If enabled, the table structure will be extracted. Defaults to true.
 - `do_code_enrichment` (bool): If enabled, perform OCR code enrichment. Defaults to false.
 - `do_formula_enrichment` (bool): If enabled, perform formula OCR, return LaTeX code. Defaults to false.
 - `do_picture_classification` (bool): If enabled, classify pictures in documents. Defaults to false.
 - `do_picture_description` (bool): If enabled, describe pictures in documents. Defaults to false.
- `picture_description_local` (dict): Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with picture_description_api.
- `picture_description_api` (dict): API details for using a vision-language model in the picture description. This parameter is mutually exclusive with picture_description_local.
+- `picture_description_area_threshold` (float): Minimum percentage of the area for a picture to be processed with the models. Defaults to 0.05.
+- `picture_description_local` (dict): Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with `picture_description_api`.
+- `picture_description_api` (dict): API details for using a vision-language model in the picture description. This parameter is mutually exclusive with `picture_description_local`.
 - `include_images` (bool): If enabled, images will be extracted from the document. Defaults to false.
 - `images_scale` (float): Scale factor for images. Defaults to 2.0.

@@ -32,7 +34,7 @@ On top of the source of file (see below), both endpoints support the same parame

 ### Source endpoint

-The endpoint is `/v1alpha/convert/source`, listening for POST requests of JSON payloads.
+The endpoint is `/v1/convert/source`, listening for POST requests of JSON payloads.

 On top of the above parameters, you must send the URL(s) of the document you want process with either the `http_sources` or `file_sources` fields.
 The first is fetching URL(s) (optionally using with extra headers), the second allows to provide documents as base64-encoded strings.
@@ -63,7 +65,6 @@ Simple payload example:
    "pdf_backend": "dlparse_v2",
    "table_mode": "fast",
    "abort_on_error": false,
-    "return_as_file": false,
  },
  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
 }
@@ -77,7 +78,7 @@ Simple payload example:

 ```sh
 curl -X 'POST' \
-  'http://localhost:5001/v1alpha/convert/source' \
+  'http://localhost:5001/v1/convert/source' \
  -H 'accept: application/json' \
  -H 'Content-Type: application/json' \
  -d '{
@@ -106,7 +107,6 @@ curl -X 'POST' \
    "pdf_backend": "dlparse_v2",
    "table_mode": "fast",
    "abort_on_error": false,
-    "return_as_file": false,
    "do_table_structure": true,
    "include_images": true,
    "images_scale": 2
@@ -124,7 +124,7 @@ curl -X 'POST' \
 import httpx

 async_client = httpx.AsyncClient(timeout=60.0)
-url = "http://localhost:5001/v1alpha/convert/source"
+url = "http://localhost:5001/v1/convert/source"
 payload = {
  "options": {
    "from_formats": ["docx", "pptx", "html", "image", "pdf", "asciidoc", "md", "xlsx"],
@@ -137,7 +137,6 @@ payload = {
    "pdf_backend": "dlparse_v2",
    "table_mode": "fast",
    "abort_on_error": False,
-    "return_as_file": False,
  },
  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
 }
@@ -176,7 +175,7 @@ cat <<EOF > /tmp/request_body.json
 EOF

 # 3. POST the request to the docling service
-curl -X POST "localhost:5001/v1alpha/convert/source" \
+curl -X POST "localhost:5001/v1/convert/source" \
     -H "Content-Type: application/json" \
     -d @/tmp/request_body.json
 ```
@@ -185,14 +184,14 @@ curl -X POST "localhost:5001/v1alpha/convert/source" \

 ### File endpoint

-The endpoint is: `/v1alpha/convert/file`, listening for POST requests of Form payloads (necessary as the files are sent as multipart/form data). You can send one or multiple files.
+The endpoint is: `/v1/convert/file`, listening for POST requests of Form payloads (necessary as the files are sent as multipart/form data). You can send one or multiple files.

 <details>
 <summary>CURL example:</summary>

 ```sh
 curl -X 'POST' \
-  'http://127.0.0.1:5001/v1alpha/convert/file' \
+  'http://127.0.0.1:5001/v1/convert/file' \
  -H 'accept: application/json' \
  -H 'Content-Type: multipart/form-data' \
  -F 'ocr_engine=easyocr' \
@@ -208,7 +207,6 @@ curl -X 'POST' \
  -F 'abort_on_error=false' \
  -F 'to_formats=md' \
  -F 'to_formats=text' \
-  -F 'return_as_file=false' \
  -F 'do_ocr=true'
 ```

@@ -221,7 +219,7 @@ curl -X 'POST' \
 import httpx

 async_client = httpx.AsyncClient(timeout=60.0)
-url = "http://localhost:5001/v1alpha/convert/file"
+url = "http://localhost:5001/v1/convert/file"
 parameters = {
 "from_formats": ["docx", "pptx", "html", "image", "pdf", "asciidoc", "md", "xlsx"],
 "to_formats": ["md", "json", "html", "text", "doctags"],
@@ -233,7 +231,6 @@ parameters = {
 "pdf_backend": "dlparse_v2",
 "table_mode": "fast",
 "abort_on_error": False,
-"return_as_file": False
 }

 current_dir = os.path.dirname(__file__)
@@ -243,7 +240,7 @@ files = {
    'files': ('2206.01062v1.pdf', open(file_path, 'rb'), 'application/pdf'),
 }

-response = await async_client.post(url, files=files, data={"parameters": json.dumps(parameters)})
+response = await async_client.post(url, files=files, data=parameters)
 assert response.status_code == 200, "Response should be 200 OK"

 data = response.json()
@@ -285,33 +282,42 @@ The api option is specified with:

 Example URLs are:

- `http://localhost:8000/v1/chat/completions` for the local vllm api, with example `params`:
+- `http://localhost:8000/v1/chat/completions` for the local vllm api, with example `picture_description_api`:
  - the `HuggingFaceTB/SmolVLM-256M-Instruct` model

    ```json
    {
+      "url": "http://localhost:8000/v1/chat/completions",
+      "params": {
        "model": "HuggingFaceTB/SmolVLM-256M-Instruct",
        "max_completion_tokens": 200,
+      }
    }
    ```
-  
+
  - the `ibm-granite/granite-vision-3.2-2b` model

    ```json
    {
+      "url": "http://localhost:8000/v1/chat/completions",
+      "params": {
        "model": "ibm-granite/granite-vision-3.2-2b",
        "max_completion_tokens": 200,
+      }
    }
    ```

- `http://localhost:11434/v1/chat/completions` for the local ollama api, with example `params`:
+- `http://localhost:11434/v1/chat/completions` for the local Ollama api, with example `picture_description_api`:
  - the `granite3.2-vision:2b` model

    ```json
    {
+      "url": "http://localhost:11434/v1/chat/completions",
+      "params": {
        "model": "granite3.2-vision:2b"
+      }
    }
-    ```  
+    ```

 Note that when using `picture_description_api`, the server must be launched with `DOCLING_SERVE_ENABLE_REMOTE_SERVICES=true`.

@@ -342,9 +348,97 @@ The response can be a JSON Document or a File.
  `processing_time` is the Docling processing time in seconds, and `timings` (when enabled in the backend) provides the detailed
  timing of all the internal Docling components.

- If you set the parameter `return_as_file` to True, the response will be a zip file.
- If multiple files are generated (multiple inputs, or one input but multiple outputs with `return_as_file` True), the response will be a zip file.
+- If you set the parameter `target` to the zip mode, the response will be a zip file.
+- If multiple files are generated (multiple inputs, or one input but multiple outputs with the zip target mode), the response will be a zip file.

 ## Asynchronous API

-TBA
+Both `/v1/convert/source` and `/v1/convert/file` endpoints are available as asynchronous variants.
+The advantage of the asynchronous endpoints is the possible to interrupt the connection, check for the progress update and fetch the result.
+This approach is more resilient against network instabilities and allows the client application logic to easily interleave conversion with other tasks.
+
+Launch an asynchronous conversion with:
+
+- `POST /v1/convert/source/async` when providing the input as sources.
+- `POST /v1/convert/file/async` when providing the input as multipart-form files.
+
+The response format is a task detail:
+
+```jsonc
+{
+  "task_id": "<task_id>",  // the task_id which can be used for the next operations
+  "task_status": "pending|started|success|failure",  // the task status
+  "task_position": 1,  // the position in the queue
+  "task_meta": null,  // metadata e.g. how many documents are in the total job and how many have been converted
+}
+```
+
+### Polling status
+
+For checking the progress of the conversion task and wait for its completion, use the endpoint:
+
+- `GET /v1/status/poll/{task_id}`
+
+<details>
+<summary>Example waiting loop:</summary>
+
+```python
+import time
+import httpx
+
+# ...
+# response from the async task submission
+task = response.json()
+
+while task["task_status"] not in ("success", "failure"):
+    response = httpx.get(f"{base_url}/status/poll/{task['task_id']}")
+    task = response.json()
+
+    time.sleep(5)
+```
+
+<details>
+
+### Subscribe with websockets
+
+Using websocket you can get the client application being notified about updates of the conversion task.
+To start the websocket connection, use the endpoint:
+
+- `/v1/status/ws/{task_id}`
+
+Websocket messages are JSON object with the following structure:
+
+```jsonc
+{
+  "message": "connection|update|error",  // type of message being sent
+  "task": {},  // the same content of the task description
+  "error": "",  // description of the error
+}
+```
+
+<details>
+<summary>Example websocket usage:</summary>
+
+```python
+from websockets.sync.client import connect
+
+uri = f"ws://{base_url}/v1/status/ws/{task['task_id']}"
+with connect(uri) as websocket:
+    for message in websocket:
+        try:
+            payload = json.loads(message)
+            if payload["message"] == "error":
+                break
+            if payload["message"] == "error" and payload["task"]["task_status"] in ("success", "failure"):
+                break
+        except:
+          break
+```
+
+</details>
+
+### Fetch results
+
+When the task is completed, the result can be fetched with the endpoint:
+
+- `GET /v1/result/{task_id}`
--- a/docs/v1_migration.md
+++ b/docs/v1_migration.md
@@ -0,0 +1,80 @@
+# Migration to the `v1` API
+
+Docling Serve from the initial prototype `v1alpha` API to the stable `v1` API.
+This page provides simple instructions to upgrade your application to the new API.
+
+## API changes
+
+The breaking changes introduced in the `v1` release of Docling Serve are designed to provide a stable schema which
+allows the project to provide new capabilities as new type of input sources, targets and also the definition of callback for event-driven applications.
+
+### Endpoint names
+
+All endpoints are renamed from `/v1alpha/` to `/v1/`.
+
+### Sources
+
+When using the `/v1/convert/source` endpoint, input documents have to be specified with the `sources: []` argument, which is replacing the usage of `file_sources` and `http_sources`.
+
+Old version:
+
+```jsonc
+{
+    "options": {},  // conversion options
+    "file_sources": [  // input documents provided as base64-encoded strings
+        {"base64_string": "abc123...", "filename": "file.pdf"}
+    ],
+    "http_sources": [  // input documents provided as http urls
+        {"url": "https://..."}
+    ]
+}
+```
+
+New version:
+
+```jsonc
+{
+    "options": {},  // conversion options
+    "sources": [
+        // input document provided as base64-encoded string
+        {"kind": "file", "base64_string": "abc123...", "filename": "file.pdf"},
+        // input document provided as http urls
+        {"kind": "http", "url": "https://..."},
+    ]
+}
+```
+
+### Targets
+
+Switching between output formats, i.e. from the JSON inbody response to the zip archive response, users have to specify the `target` argument, which is replacing the usage of `options.return_as_file`.
+
+Old version:
+
+```jsonc
+{
+    "options": {
+        "return_as_file": true  // <-- to be removed
+    },
+    // ...
+}
+```
+
+New version:
+
+```jsonc
+{
+    "options": {},
+    "target": {"kind": "zip"},  // <-- add this
+    // ...
+}
+```
+
+## Continue with the old API
+
+If you are not able to apply the changes above to your application, please consider pinning of the previous `v0.x` container images, e.g.
+
+```sh
+podman run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=1 quay.io/docling-project/docling-serve:v0.16.1
+```
+
+_Note that the old prototype API will not be supported in new `v1.x` versions._
--- a/img/fastapi-ui.png
+++ b/img/fastapi-ui.png
--- a/img/swagger.png
+++ b/img/swagger.png
--- a/os-packages.txt
+++ b/os-packages.txt
@@ -1,6 +1,7 @@
 tesseract
 tesseract-devel
 tesseract-langpack-eng
+tesseract-osd
 leptonica-devel
 libglvnd-glx
 glib2
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "docling-serve"
-version = "0.10.0"  # DO NOT EDIT, updated automatically
+version = "1.2.1"  # DO NOT EDIT, updated automatically
 description = "Running Docling as a service"
 license = {text = "MIT"}
 authors = [
@@ -8,7 +8,6 @@ authors = [
    {name="Guillaume Moutier", email="gmoutier@redhat.com"},
    {name="Anil Vishnoi", email="avishnoi@redhat.com"},
    {name="Panos Vagenas", email="pva@zurich.ibm.com"},
-    {name="Panos Vagenas", email="pva@zurich.ibm.com"},
    {name="Christoph Auer", email="cau@zurich.ibm.com"},
    {name="Peter Staar", email="taa@zurich.ibm.com"},
 ]
@@ -23,24 +22,30 @@ readme = "README.md"
 classifiers = [
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
-    # "Development Status :: 5 - Production/Stable",
+    "Development Status :: 5 - Production/Stable",
    "Intended Audience :: Developers",
    "Typing :: Typed",
-    "Programming Language :: Python :: 3"
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
 ]
 requires-python = ">=3.10"
 dependencies = [
-    "docling[vlm]~=2.28",
-    "mlx-vlm~=0.1.12; sys_platform == 'darwin' and platform_machine == 'arm64'",
+    "docling~=2.38",
+    "docling-core>=2.44.1",
+    "docling-jobkit[kfp,vlm]>=1.3.1,<2.0.0",
    "fastapi[standard]~=0.115",
    "httpx~=0.28",
-    "kfp[kubernetes]>=2.10.0",
    "pydantic~=2.10",
    "pydantic-settings~=2.4",
    "python-multipart>=0.0.14,<0.1.0",
    "typer~=0.12",
    "uvicorn[standard]>=0.29.0,<1.0.0",
    "websockets~=14.0",
+    "scalar-fastapi>=1.0.3",
+    "docling-mcp>=1.0.0",
 ]

 [project.optional-dependencies]
@@ -55,13 +60,8 @@ rapidocr = [
    "rapidocr-onnxruntime~=1.4; python_version<'3.13'",
    "onnxruntime~=1.7",
 ]
-cpu = [
-  "torch>=2.6.0",
-  "torchvision>=0.21.0",
-]
-cu124 = [
-  "torch>=2.6.0",
-  "torchvision>=0.21.0",
+flash-attn = [
+  "flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"
 ]

 [dependency-groups]
@@ -76,12 +76,48 @@ dev = [
    "ruff>=0.9.6",
 ]

+pypi = [
+  "torch>=2.7.1",
+  "torchvision>=0.22.1",
+]
+
+cpu = [
+  "torch>=2.7.1",
+  "torchvision>=0.22.1",
+]
+
+cu124 = [
+  "torch>=2.6.0 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "torchvision>=0.21.0 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+]
+
+cu126 = [
+  "torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+]
+
+cu128 = [
+  "torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+]
+
+rocm = [
+  "torch>=2.7.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "torchvision>=0.22.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+  "pytorch-triton-rocm>=3.3.1 ; sys_platform == 'linux' and platform_machine == 'x86_64' and python_version < '3.13'",
+]
+
 [tool.uv]
 package = true
+default-groups = ["dev", "pypi"]
 conflicts = [
  [
-    { extra = "cpu" },
-    { extra = "cu124" },
+    { group = "pypi" },
+    { group = "cpu" },
+    { group = "cu124" },
+    { group = "cu126" },
+    { group = "cu128" },
+    { group = "rocm" },
  ],
 ]
 environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
@@ -91,14 +127,35 @@ override-dependencies = [

 [tool.uv.sources]
 torch = [
-  { index = "pytorch-cpu", extra = "cpu" },
-  { index = "pytorch-cu124", extra = "cu124" },
+  { index = "pytorch-pypi", group = "pypi" },
+  { index = "pytorch-cpu", group = "cpu" },
+  { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
 ]
+
 torchvision = [
-  { index = "pytorch-cpu", extra = "cpu" },
-  { index = "pytorch-cu124", extra = "cu124" },
+  { index = "pytorch-pypi", group = "pypi" },
+  { index = "pytorch-cpu", group = "cpu" },
+  { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
+  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
 ]

+pytorch-triton-rocm = [
+  { index = "pytorch-rocm", marker = "sys_platform == 'linux'" },
+]
+
+# docling-jobkit = { git = "https://github.com/docling-project/docling-jobkit/", rev = "main" }
+# docling-jobkit = { path = "../docling-jobkit", editable = true }
+
+[[tool.uv.index]]
+name = "pytorch-pypi"
+url = "https://pypi.org/simple"
+explicit = true
+
 [[tool.uv.index]]
 name = "pytorch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
@@ -109,6 +166,21 @@ name = "pytorch-cu124"
 url = "https://download.pytorch.org/whl/cu124"
 explicit = true

+[[tool.uv.index]]
+name = "pytorch-cu126"
+url = "https://download.pytorch.org/whl/cu126"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu128"
+url = "https://download.pytorch.org/whl/cu128"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-rocm"
+url = "https://download.pytorch.org/whl/rocm6.3"
+explicit = true
+
 [tool.setuptools.packages.find]
 include = ["docling_serve*"]
 namespaces = true
@@ -177,7 +249,7 @@ ignore = [
 max-complexity = 15

 [tool.ruff.lint.isort.sections]
-"docling" = ["docling", "docling_core"]
+"docling" = ["docling", "docling_core", "docling_jobkit"]

 [tool.ruff.lint.isort]
 combine-as-imports = true
@@ -206,6 +278,7 @@ module = [
    "kfp.*",
    "kfp_server_api.*",
    "mlx_vlm.*",
+    "scalar_fastapi.*",
 ]
 ignore_missing_imports = true

--- a/tests/test_1-file-all-outputs.py
+++ b/tests/test_1-file-all-outputs.py
@@ -16,7 +16,7 @@ async def async_client():
@pytest.mark.asyncio
 async def test_convert_file(async_client):
    """Test convert single file to all outputs"""
-    url = "http://localhost:5001/v1alpha/convert/file"
+    url = "http://localhost:5001/v1/convert/file"
    options = {
        "from_formats": [
            "docx",
@@ -37,7 +37,6 @@ async def test_convert_file(async_client):
        "pdf_backend": "dlparse_v2",
        "table_mode": "fast",
        "abort_on_error": False,
-        "return_as_file": False,
    }

    current_dir = os.path.dirname(__file__)
--- a/tests/test_1-file-async.py
+++ b/tests/test_1-file-async.py
@@ -17,13 +17,12 @@ async def async_client():
 async def test_convert_url(async_client):
    """Test convert URL to all outputs"""

-    base_url = "http://localhost:5001/v1alpha"
+    base_url = "http://localhost:5001/v1"
    payload = {
        "to_formats": ["md", "json", "html"],
        "image_export_mode": "placeholder",
        "ocr": False,
        "abort_on_error": False,
-        "return_as_file": False,
    }

    file_path = Path(__file__).parent / "2206.01062v1.pdf"
@@ -51,10 +50,12 @@ async def test_convert_url(async_client):
        time.sleep(2)

    assert task["task_status"] == "success"
+    print(f"Task completed with status {task['task_status']=}")

    result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
    assert result_resp.status_code == 200, "Response should be 200 OK"
    result = result_resp.json()
+    print("Got result.")

    assert "md_content" in result["document"]
    assert result["document"]["md_content"] is not None
--- a/tests/test_1-url-all-outputs.py
+++ b/tests/test_1-url-all-outputs.py
@@ -15,7 +15,7 @@ async def async_client():
@pytest.mark.asyncio
 async def test_convert_url(async_client):
    """Test convert URL to all outputs"""
-    url = "http://localhost:5001/v1alpha/convert/source"
+    url = "http://localhost:5001/v1/convert/source"
    payload = {
        "options": {
            "from_formats": [
@@ -37,9 +37,8 @@ async def test_convert_url(async_client):
            "pdf_backend": "dlparse_v2",
            "table_mode": "fast",
            "abort_on_error": False,
-            "return_as_file": False,
        },
-        "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}],
+        "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2206.01062"}],
    }
    print(json.dumps(payload, indent=2))

--- a/tests/test_1-url-async-ws.py
+++ b/tests/test_1-url-async-ws.py
@@ -20,14 +20,13 @@ async def test_convert_url(async_client: httpx.AsyncClient):
    doc_filename = Path("tests/2408.09869v5.pdf")
    encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()

-    base_url = "http://localhost:5001/v1alpha"
+    base_url = "http://localhost:5001/v1"
    payload = {
        "options": {
            "to_formats": ["md", "json"],
            "image_export_mode": "placeholder",
            "ocr": True,
            "abort_on_error": False,
-            "return_as_file": False,
            # "do_picture_description": True,
            # "picture_description_api": {
            #     "url": "http://localhost:11434/v1/chat/completions",
@@ -39,8 +38,14 @@ async def test_convert_url(async_client: httpx.AsyncClient):
            #     "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
            # },
        },
-        # "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}],
-        "file_sources": [{"base64_string": encoded_doc, "filename": doc_filename.name}],
+        # "sources": [{"kind": "http", "url": "https://arxiv.org/pdf/2501.17887"}],
+        "sources": [
+            {
+                "kind": "file",
+                "base64_string": encoded_doc,
+                "filename": doc_filename.name,
+            }
+        ],
    }
    # print(json.dumps(payload, indent=2))

@@ -52,7 +57,7 @@ async def test_convert_url(async_client: httpx.AsyncClient):

    task = response.json()

-    uri = f"ws://localhost:5001/v1alpha/status/ws/{task['task_id']}"
+    uri = f"ws://localhost:5001/v1/status/ws/{task['task_id']}"
    with connect(uri) as websocket:
        for message in websocket:
            print(message)
--- a/tests/test_1-url-async.py
+++ b/tests/test_1-url-async.py
@@ -25,16 +25,15 @@ async def test_convert_url(async_client):
        "https://arxiv.org/pdf/2311.18481",
    ]

-    base_url = "http://localhost:5001/v1alpha"
+    base_url = "http://localhost:5001/v1"
    payload = {
        "options": {
            "to_formats": ["md", "json"],
            "image_export_mode": "placeholder",
            "ocr": True,
            "abort_on_error": False,
-            "return_as_file": False,
        },
-        "http_sources": [{"url": random.choice(example_docs)}],
+        "sources": [{"kind": "http", "url": random.choice(example_docs)}],
    }
    print(json.dumps(payload, indent=2))

--- a/tests/test_2-files-all-outputs.py
+++ b/tests/test_2-files-all-outputs.py
@@ -15,7 +15,7 @@ async def async_client():
@pytest.mark.asyncio
 async def test_convert_file(async_client):
    """Test convert single file to all outputs"""
-    url = "http://localhost:5001/v1alpha/convert/file"
+    url = "http://localhost:5001/v1/convert/file"
    options = {
        "from_formats": [
            "docx",
@@ -36,7 +36,6 @@ async def test_convert_file(async_client):
        "pdf_backend": "dlparse_v2",
        "table_mode": "fast",
        "abort_on_error": False,
-        "return_as_file": False,
    }

    current_dir = os.path.dirname(__file__)
--- a/tests/test_2-urls-all-outputs.py
+++ b/tests/test_2-urls-all-outputs.py
@@ -13,7 +13,7 @@ async def async_client():
@pytest.mark.asyncio
 async def test_convert_url(async_client):
    """Test convert URL to all outputs"""
-    url = "http://localhost:5001/v1alpha/convert/source"
+    url = "http://localhost:5001/v1/convert/source"
    payload = {
        "options": {
            "from_formats": [
@@ -35,12 +35,12 @@ async def test_convert_url(async_client):
            "pdf_backend": "dlparse_v2",
            "table_mode": "fast",
            "abort_on_error": False,
-            "return_as_file": False,
        },
-        "http_sources": [
-            {"url": "https://arxiv.org/pdf/2206.01062"},
-            {"url": "https://arxiv.org/pdf/2408.09869"},
+        "sources": [
+            {"kind": "http", "url": "https://arxiv.org/pdf/2206.01062"},
+            {"kind": "http", "url": "https://arxiv.org/pdf/2408.09869"},
        ],
+        "target": {"kind": "zip"},
    }

    response = await async_client.post(url, json=payload)
--- a/tests/test_2-urls-async-all-outputs.py
+++ b/tests/test_2-urls-async-all-outputs.py
@@ -16,7 +16,7 @@ async def async_client():
@pytest.mark.asyncio
 async def test_convert_url(async_client):
    """Test convert URL to all outputs"""
-    base_url = "http://localhost:5001/v1alpha"
+    base_url = "http://localhost:5001/v1"
    payload = {
        "options": {
            "from_formats": [
@@ -38,12 +38,12 @@ async def test_convert_url(async_client):
            "pdf_backend": "dlparse_v2",
            "table_mode": "fast",
            "abort_on_error": False,
-            "return_as_file": False,
        },
-        "http_sources": [
-            {"url": "https://arxiv.org/pdf/2206.01062"},
-            {"url": "https://arxiv.org/pdf/2408.09869"},
+        "sources": [
+            {"kind": "http", "url": "https://arxiv.org/pdf/2206.01062"},
+            {"kind": "http", "url": "https://arxiv.org/pdf/2408.09869"},
        ],
+        "target": {"kind": "zip"},
    }

    response = await async_client.post(f"{base_url}/convert/source/async", json=payload)
--- a/tests/test_fastapi_endpoints.py
+++ b/tests/test_fastapi_endpoints.py
@@ -1,6 +1,8 @@
 import asyncio
+import io
 import json
 import os
+import zipfile

 import pytest
 import pytest_asyncio
@@ -8,6 +10,8 @@ from asgi_lifespan import LifespanManager
 from httpx import ASGITransport, AsyncClient
 from pytest_check import check

+from docling_core.types.doc import DoclingDocument, PictureItem
+
 from docling_serve.app import create_app


@@ -45,7 +49,7 @@ async def test_health(client: AsyncClient):
 async def test_convert_file(client: AsyncClient):
    """Test convert single file to all outputs"""

-    endpoint = "/v1alpha/convert/file"
+    endpoint = "/v1/convert/file"
    options = {
        "from_formats": [
            "docx",
@@ -66,7 +70,6 @@ async def test_convert_file(client: AsyncClient):
        "pdf_backend": "dlparse_v2",
        "table_mode": "fast",
        "abort_on_error": False,
-        "return_as_file": False,
    }

    current_dir = os.path.dirname(__file__)
@@ -154,3 +157,37 @@ async def test_convert_file(client: AsyncClient):
            data["document"]["doctags_content"],
            msg=f"DocTags document should contain '<doctag><page_header>'. Received: {safe_slice(data['document']['doctags_content'])}",
        )
+
+
+@pytest.mark.asyncio
+async def test_referenced_artifacts(client: AsyncClient):
+    """Test that paths in the zip file are relative to the zip file root."""
+
+    endpoint = "/v1/convert/file"
+    options = {
+        "to_formats": ["json"],
+        "image_export_mode": "referenced",
+        "target_type": "zip",
+        "ocr": False,
+    }
+
+    current_dir = os.path.dirname(__file__)
+    file_path = os.path.join(current_dir, "2206.01062v1.pdf")
+
+    files = {
+        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
+    }
+
+    response = await client.post(endpoint, files=files, data=options)
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    with zipfile.ZipFile(io.BytesIO(response.content)) as zip_file:
+        namelist = zip_file.namelist()
+        for file in namelist:
+            if file.endswith(".json"):
+                doc = DoclingDocument.model_validate(json.loads(zip_file.read(file)))
+                for item, _level in doc.iterate_items():
+                    if isinstance(item, PictureItem):
+                        assert item.image is not None
+                        print(f"{item.image.uri}=")
+                        assert str(item.image.uri) in namelist
--- a/tests/test_file_opts.py
+++ b/tests/test_file_opts.py
@@ -0,0 +1,77 @@
+import asyncio
+import json
+import os
+
+import pytest
+import pytest_asyncio
+from asgi_lifespan import LifespanManager
+from httpx import ASGITransport, AsyncClient
+
+from docling_core.types import DoclingDocument
+from docling_core.types.doc.document import PictureDescriptionData
+
+from docling_serve.app import create_app
+
+
+@pytest.fixture(scope="session")
+def event_loop():
+    return asyncio.get_event_loop()
+
+
+@pytest_asyncio.fixture(scope="session")
+async def app():
+    app = create_app()
+
+    async with LifespanManager(app) as manager:
+        print("Launching lifespan of app.")
+        yield manager.app
+
+
+@pytest_asyncio.fixture(scope="session")
+async def client(app):
+    async with AsyncClient(
+        transport=ASGITransport(app=app), base_url="http://app.io"
+    ) as client:
+        print("Client is ready")
+        yield client
+
+
+@pytest.mark.asyncio
+async def test_convert_file(client: AsyncClient):
+    """Test convert single file to all outputs"""
+
+    endpoint = "/v1/convert/file"
+    options = {
+        "to_formats": ["md", "json"],
+        "image_export_mode": "placeholder",
+        "ocr": False,
+        "do_picture_description": True,
+        "picture_description_api": json.dumps(
+            {
+                "url": "http://localhost:11434/v1/chat/completions",  # ollama
+                "params": {"model": "granite3.2-vision:2b"},
+                "timeout": 60,
+                "prompt": "Describe this image in a few sentences. ",
+            }
+        ),
+    }
+
+    current_dir = os.path.dirname(__file__)
+    file_path = os.path.join(current_dir, "2206.01062v1.pdf")
+
+    files = {
+        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
+    }
+
+    response = await client.post(endpoint, files=files, data=options)
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    data = response.json()
+
+    doc = DoclingDocument.model_validate(data["document"]["json_content"])
+
+    for pic in doc.pictures:
+        for ann in pic.annotations:
+            if isinstance(ann, PictureDescriptionData):
+                print(f"{pic.self_ref}")
+                print(ann.text)
--- a/tests/test_results_clear.py
+++ b/tests/test_results_clear.py
@@ -0,0 +1,133 @@
+import asyncio
+import base64
+import json
+from pathlib import Path
+
+import pytest
+import pytest_asyncio
+from asgi_lifespan import LifespanManager
+from httpx import ASGITransport, AsyncClient
+
+from docling_serve.app import create_app
+from docling_serve.settings import docling_serve_settings
+
+
+@pytest.fixture(scope="session")
+def event_loop():
+    return asyncio.get_event_loop()
+
+
+@pytest_asyncio.fixture(scope="session")
+async def app():
+    app = create_app()
+
+    async with LifespanManager(app) as manager:
+        print("Launching lifespan of app.")
+        yield manager.app
+
+
+@pytest_asyncio.fixture(scope="session")
+async def client(app):
+    async with AsyncClient(
+        transport=ASGITransport(app=app), base_url="http://app.io"
+    ) as client:
+        print("Client is ready")
+        yield client
+
+
+async def convert_file(client: AsyncClient):
+    doc_filename = Path("tests/2408.09869v5.pdf")
+    encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()
+
+    payload = {
+        "options": {
+            "to_formats": ["json"],
+        },
+        "sources": [
+            {
+                "kind": "file",
+                "base64_string": encoded_doc,
+                "filename": doc_filename.name,
+            }
+        ],
+    }
+
+    response = await client.post("/v1/convert/source/async", json=payload)
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    task = response.json()
+
+    print(json.dumps(task, indent=2))
+
+    while task["task_status"] not in ("success", "failure"):
+        response = await client.get(f"/v1/status/poll/{task['task_id']}")
+        assert response.status_code == 200, "Response should be 200 OK"
+        task = response.json()
+        print(f"{task['task_status']=}")
+        print(f"{task['task_position']=}")
+
+        await asyncio.sleep(2)
+
+    assert task["task_status"] == "success"
+
+    return task
+
+
+@pytest.mark.asyncio
+async def test_clear_results(client: AsyncClient):
+    """Test removal of task."""
+
+    # Set long delay deletion
+    docling_serve_settings.result_removal_delay = 100
+
+    # Convert and wait for completion
+    task = await convert_file(client)
+
+    # Get result once
+    result_response = await client.get(f"/v1/result/{task['task_id']}")
+    assert result_response.status_code == 200, "Response should be 200 OK"
+    print("Result 1 ok.")
+    result = result_response.json()
+    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
+
+    # Get result twice
+    result_response = await client.get(f"/v1/result/{task['task_id']}")
+    assert result_response.status_code == 200, "Response should be 200 OK"
+    print("Result 2 ok.")
+    result = result_response.json()
+    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
+
+    # Clear
+    clear_response = await client.get("/v1/clear/results?older_then=0")
+    assert clear_response.status_code == 200, "Response should be 200 OK"
+    print("Clear ok.")
+
+    # Get deleted result
+    result_response = await client.get(f"/v1/result/{task['task_id']}")
+    assert result_response.status_code == 404, "Response should be removed"
+    print("Result was no longer found.")
+
+
+@pytest.mark.asyncio
+async def test_delay_remove(client: AsyncClient):
+    """Test automatic removal of task with delay."""
+
+    # Set short delay deletion
+    docling_serve_settings.result_removal_delay = 5
+
+    # Convert and wait for completion
+    task = await convert_file(client)
+
+    # Get result once
+    result_response = await client.get(f"/v1/result/{task['task_id']}")
+    assert result_response.status_code == 200, "Response should be 200 OK"
+    print("Result ok.")
+    result = result_response.json()
+    assert result["document"]["json_content"]["schema_name"] == "DoclingDocument"
+
+    print("Sleeping to wait the automatic task deletion.")
+    await asyncio.sleep(10)
+
+    # Get deleted result
+    result_response = await client.get(f"/v1/result/{task['task_id']}")
+    assert result_response.status_code == 404, "Response should be removed"
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
github-actions[bot]	3bd7828570	chore: bump version to 1.2.1 [skip ci]	2025-08-13 07:37:55 +00:00
Michele Dolfi	8b470cba8e	fix: handling of vlm model options and update deps (#314 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-13 09:32:21 +02:00
Tiago Santana	8048f4589a	fix: add missing response type in sync endpoints (#309 ) Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com>	2025-08-08 12:32:19 +02:00
Thomas Vitale	b3058e91e0	docs: Update readme to use v1 (#306 ) Signed-off-by: Thomas Vitale <ThomasVitale@users.noreply.github.com>	2025-08-08 09:02:29 +02:00
Thomas Vitale	63da9eedeb	docs: Update deployment examples to use v1 API (#308 ) Signed-off-by: Thomas Vitale <ThomasVitale@users.noreply.github.com>	2025-08-08 08:47:59 +02:00
Thomas Vitale	b15dc2529f	docs: Fix typo in v1 migration instructions (#307 ) Signed-off-by: Thomas Vitale <ThomasVitale@users.noreply.github.com>	2025-08-08 08:44:09 +02:00
github-actions[bot]	4c7207be00	chore: bump version to 1.2.0 [skip ci]	2025-08-07 09:20:10 +00:00
Michele Dolfi	db3fdb5bc1	feat: workers without shared models and convert params (#304 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-08-07 11:16:06 +02:00
Rui Dias Gomes	fd1b987e8d	feat: add rocm image build support and fix cuda (#292 ) Signed-off-by: rmdg88 <rmdg88@gmail.com> Signed-off-by: Rui-Dias-Gomes <rui.dias.gomes@ibm.com> Co-authored-by: Rui-Dias-Gomes <rui.dias.gomes@ibm.com>	2025-07-31 14:22:42 +02:00
github-actions[bot]	ce15e0302b	chore: bump version to 1.1.0 [skip ci]	2025-07-30 15:53:01 +00:00
Michele Dolfi	ecb1874a50	feat: Add docling-mcp in the distribution (#290 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-30 15:39:11 +02:00
Michele Dolfi	1333f71c9c	fix: referenced paths relative to zip root (#289 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-30 14:49:26 +02:00
Tiago Santana	ec594d84fe	feat: add 3.0 openapi endpoint (#287 ) Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com>	2025-07-30 14:08:59 +02:00
Tiago Santana	3771c1b554	feat: add new source and target (#270 ) Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com>	2025-07-29 14:44:49 +02:00
github-actions[bot]	24db461b14	chore: bump version to 1.0.1 [skip ci]	2025-07-21 07:34:14 +00:00
Michele Dolfi	8706706e87	fix: docling update v2.42.0 (#277 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-21 08:47:40 +02:00
Michele Dolfi	766adb2481	docs: typo in README (#276 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-18 14:37:54 +02:00
Michele Dolfi	8222cf8955	ci: add spellchecker with custom vocabulary and fix typos (#268 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-15 14:17:35 +02:00
github-actions[bot]	b922824e5b	chore: bump version to 1.0.0 [skip ci]	2025-07-14 11:25:06 +00:00
Michele Dolfi	56e328baf7	feat!: v1 api with list of sources and target (#249 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-14 13:19:49 +02:00
Michele Dolfi	daa924a77e	feat!: use orchestrators from jobkit (#248 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-10 15:47:22 +02:00
Eugene	e63197e89e	chore: bump uv to 0.7.19 in container (#266 ) Signed-off-by: Eugene <fogaprod@gmail.com>	2025-07-10 15:10:21 +02:00
github-actions[bot]	767ce0982b	chore: bump version to 0.16.1 [skip ci]	2025-07-07 16:17:50 +00:00
Michele Dolfi	bfde1a0991	fix: upgrade deps including, docling v2.40.0 with locks in models init (#264 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-07-07 17:13:45 +02:00
VIktor Kuropiantnyk	eb3892ee14	fix: missing tesseract osd (#263 ) Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>	2025-07-07 16:36:43 +02:00
tassadarliu	93b84712b2	docs: fix typo (#259 ) Signed-off-by: tassadarliu <rhapsodyn@gmail.com>	2025-07-07 08:47:34 +02:00
Yishen Miao	c45b937064	docs: change the doc example (#258 ) Signed-off-by: Yishen Miao <mys721tx@gmail.com>	2025-07-07 08:47:21 +02:00
Francisco Arceo	50e431f30f	docs: Update typo (#247 ) Signed-off-by: Francisco Arceo <arceofrancisco@gmail.com>	2025-06-27 16:58:37 +02:00
Michele Dolfi	149a8cb1c0	fix: properly load models at boot (#244 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-06-27 12:20:38 +02:00
github-actions[bot]	5f9c20a985	chore: bump version to 0.16.0 [skip ci]	2025-06-25 09:52:08 +00:00
Michele Dolfi	80755a7d59	docs: Update example resources and improve README (#231 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-06-25 07:56:14 +02:00
Michele Dolfi	30aca92298	feat: package updates and more cuda images (#229 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-06-24 16:59:05 +02:00
github-actions[bot]	717fb3a8d8	chore: bump version to 0.15.0 [skip ci]	2025-06-17 15:00:38 +00:00
Michele Dolfi	873d05aefe	feat: use redocs and scalar as api docs (#228 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-06-17 16:54:00 +02:00
Ryan Fernandes	196c5ce42a	fix: "tesserocr" instead of "tesseract_cli" in usage docs (#223 ) Signed-off-by: Ryan Fernandes <ryan@fernandes.us>	2025-06-17 16:53:51 +02:00
github-actions[bot]	b5c5f47892	chore: bump version to 0.14.0 [skip ci]	2025-06-17 13:10:27 +00:00
23Ro	d5455b7f66	fix: Typo in Headline (#220 ) Signed-off-by: 23Ro <m.n@23ro.de>	2025-06-17 14:55:27 +02:00
Michele Dolfi	7a682494d6	chore: dco advisor (#224 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-06-17 09:38:56 +02:00
Eugene	524f6a8997	feat: Read supported file extensions from docling (#214 ) Signed-off-by: Eugene <fogaprod@gmail.com>	2025-06-05 09:38:28 +02:00
github-actions[bot]	9ccf8e3b5e	chore: bump version to 0.13.0 [skip ci]	2025-06-04 12:24:40 +00:00
Michele Dolfi	ffea34732b	feat: upgrade docling to 2.36 (#212 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-06-04 14:20:34 +02:00
github-actions[bot]	b299af002b	chore: bump version to 0.12.0 [skip ci]	2025-06-03 16:30:28 +00:00
Michele Dolfi	c4c41f16df	feat: Export annotations in markdown and html (Docling upgrade) (#202 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-06-03 18:24:27 +02:00
Michele Dolfi	7066f3520a	fix: processing complex params in multipart-form (#210 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-06-03 18:24:05 +02:00
Rui Dias Gomes	6a8190c315	docs: add openshift replicasets examples (#209 ) Signed-off-by: Rui-Dias-Gomes <rui.dias.gomes@ibm.com> Co-authored-by: Rui-Dias-Gomes <rui.dias.gomes@ibm.com>	2025-06-03 17:43:41 +02:00
github-actions[bot]	060ecd8b0e	chore: bump version to 0.11.0 [skip ci]	2025-05-23 13:45:54 +00:00
Michele Dolfi	32b8a809f3	feat: page break placeholder in markdown exports options (#194 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-05-23 15:26:27 +02:00
Michele Dolfi	de002dfcdc	feat: clear results registry (#192 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-05-23 14:30:57 +02:00
Michele Dolfi	abe5aa03f5	feat: Upgrade to Docling 2.33.0 (#198 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-05-22 17:00:29 +02:00
VIktor Kuropiantnyk	3f090b7d15	docs: Example and instructions on how to load model weights to persistent volume (#197 ) Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>	2025-05-21 13:04:46 +02:00
Michele Dolfi	21c1791e42	docs: async api usage and fixes (#195 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-05-19 13:57:35 +02:00
Michele Dolfi	00be428490	feat: api to trigger offloading the models (#188 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-05-14 15:02:18 +02:00
Kasper Dinkla	3ff1b2f983	feat: Figure annotations @ docling components 0.0.7 (#181 ) Signed-off-by: DKL <dkl@zurich.ibm.com>	2025-05-08 16:31:10 +02:00
Michele Dolfi	8406fb9b59	fix: usage of hashlib for FIPS (#171 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-05-02 15:00:10 +02:00
github-actions[bot]	a2dcb0a20f	chore: bump version to 0.10.1 [skip ci]	2025-04-30 16:04:30 +00:00
Michele Dolfi	36787bc061	fix: avoid missing specialized keys in the options hash (#166 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-04-30 13:14:34 +02:00
Michele Dolfi	509f4889f8	fix: allow users to set the area threshold for picture descriptions (#165 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>	2025-04-30 12:37:24 +02:00
Michele Dolfi	919cf5c041	fix: expose max wait time in sync endpoints (#164 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-04-30 12:30:11 +02:00
Michele Dolfi	35c2630c61	fix: add flash-attn for cuda images (#161 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-04-29 16:58:33 +02:00