chore: bump version to 0.5.0 [skip ci]

chore: Remove deprecated type aliases and run as pre-commit (#79 )
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-29 08:33:50 +00:00 · 2025-03-07 17:24:16 +00:00 · 2025-03-07 15:46:52 +01:00 · 2025-03-07 15:33:21 +01:00 · 2025-03-07 11:26:50 +01:00 · 2025-03-07 09:28:05 +01:00
35 changed files with 1102 additions and 483 deletions
--- a/.github/workflows/ci-images-dryrun.yml
+++ b/.github/workflows/ci-images-dryrun.yml
@@ -8,8 +8,24 @@ concurrency:
  cancel-in-progress: true

 jobs:
-  build_cpu_image:
-    name: Build docling-serve "CPU only" container image
+  build_image:
+    name: Build ${{ matrix.spec.name }} container image
+    strategy:
+      matrix:
+        spec:
+          - name: ds4sd/docling-serve
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu
+            platforms: linux/amd64, linux/arm64
+          - name: ds4sd/docling-serve-cpu
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-extra cu124
+            platforms: linux/amd64, linux/arm64
+          - name: ds4sd/docling-serve-cu124
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-extra cpu
+            platforms: linux/amd64
+
    permissions:
      packages: write
      contents: read
@@ -19,25 +35,7 @@ jobs:
    uses: ./.github/workflows/job-image.yml
    with:
      publish: false
-      build_args: |
-        UV_SYNC_EXTRA_ARGS=--no-extra cu124
-      ghcr_image_name: ds4sd/docling-serve-cpu
-      quay_image_name: ""
-
-
-  build_gpu_image:
-    name: Build docling-serve (with GPU support) container image
-    permissions:
-      packages: write
-      contents: read
-      attestations: write
-      id-token: write
-
-    uses: ./.github/workflows/job-image.yml
-    with:
-      publish: false
-      build_args: |
-        UV_SYNC_EXTRA_ARGS=--no-extra cpu
-      platforms: linux/amd64
-      ghcr_image_name: ds4sd/docling-serve
+      build_args: ${{ matrix.spec.build_args }}
+      ghcr_image_name: ${{ matrix.spec.name }}
      quay_image_name: ""
+      platforms: ${{ matrix.spec.platforms }}
--- a/.github/workflows/images.yml
+++ b/.github/workflows/images.yml
@@ -4,24 +4,32 @@ on:
  push:
    branches:
      - main
-    tags:
-      - 'v*'
-
-# env:
-#   GHCR_REGISTRY: ghcr.io
-#   # GHCR_DOCLING_SERVE_CPU_IMAGE_NAME: ds4sd/docling-serve-cpu
-#   # GHCR_DOCLING_SERVE_GPU_IMAGE_NAME: ds4sd/docling-serve
-#   QUAY_REGISTRY: quay.io
-#   # QUAY_DOCLING_SERVE_CPU_IMAGE_NAME: ds4sd/docling-serve-cpu
-#   # QUAY_DOCLING_SERVE_GPU_IMAGE_NAME: ds4sd/docling-serve
+  release:
+    types: [published]

 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

 jobs:
-  build_and_publish_cpu_images:
-    name: Push docling-serve "CPU only" container image to GHCR and QUAY
+  build_and_publish_images:
+    name: Build and push ${{ matrix.spec.name }} container image to GHCR and QUAY
+    strategy:
+      matrix:
+        spec:
+          - name: ds4sd/docling-serve
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu
+            platforms: linux/amd64, linux/arm64
+          - name: ds4sd/docling-serve-cpu
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-extra cu124
+            platforms: linux/amd64, linux/arm64
+          - name: ds4sd/docling-serve-cu124
+            build_args: |
+              UV_SYNC_EXTRA_ARGS=--no-extra cpu
+            platforms: linux/amd64
+
    permissions:
      packages: write
      contents: read
@@ -33,28 +41,7 @@ jobs:
    with:
      publish: true
      environment: registry-creds
-      build_args: |
-        UV_SYNC_EXTRA_ARGS=--no-extra cu124
-      ghcr_image_name: ds4sd/docling-serve-cpu
-      quay_image_name: ds4sd/docling-serve-cpu
-
-
-  build_and_publish_gpu_images:
-    name: Push docling-serve (with GPU support) container image to GHCR and QUAY
-    permissions:
-      packages: write
-      contents: read
-      attestations: write
-      id-token: write
-    secrets: inherit
-
-    uses: ./.github/workflows/job-image.yml
-    with:
-      publish: true
-      environment: registry-creds
-      build_args: |
-        UV_SYNC_EXTRA_ARGS=--no-extra cpu
-      platforms: linux/amd64
-      ghcr_image_name: ds4sd/docling-serve
-      quay_image_name: ds4sd/docling-serve
-
+      build_args: ${{ matrix.spec.build_args }}
+      ghcr_image_name: ${{ matrix.spec.name }}
+      quay_image_name: ${{ matrix.spec.name }}
+      platforms: ${{ matrix.spec.platforms }}
--- a/.github/workflows/job-image.yml
+++ b/.github/workflows/job-image.yml
@@ -28,11 +28,7 @@ on:

 env:
  GHCR_REGISTRY: ghcr.io
-  # GHCR_DOCLING_SERVE_CPU_IMAGE_NAME: ds4sd/docling-serve-cpu
-  # GHCR_DOCLING_SERVE_GPU_IMAGE_NAME: ds4sd/docling-serve
  QUAY_REGISTRY: quay.io
-  # QUAY_DOCLING_SERVE_CPU_IMAGE_NAME: ds4sd/docling-serve-cpu
-  # QUAY_DOCLING_SERVE_GPU_IMAGE_NAME: ds4sd/docling-serve

 jobs:
  image:
@@ -135,6 +131,10 @@ jobs:
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: ${{ inputs.build_args }}
+      
+      # - name: Inspect the image details
+      #   run: |
+      #     echo "${{ steps.ghcr_push.outputs.metadata }}"

      - name: Remove Local Docker Images
        run: |
--- a/.markdownlint-cli2.yaml
+++ b/.markdownlint-cli2.yaml
@@ -4,5 +4,7 @@ config:
  first-line-heading: false
  MD033:
    allowed_elements: ["details", "summary"]
+  MD024:
+    siblings_only: true
 globs:
  - "**/*.md"
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,5 +1,14 @@
 fail_fast: true
 repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.9.6
+    hooks:
+      # Run the Ruff formatter.
+      - id: ruff-format
+        args: [--config=pyproject.toml]
+      # Run the Ruff linter.
+      - id: ruff
+        args: [--exit-non-zero-on-fix, --fix, --config=pyproject.toml]
  - repo: local
    hooks:
      - id: system
@@ -13,12 +22,3 @@ repos:
    rev: 0.6.1
    hooks:
      - id: uv-lock
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.6
-    hooks:
-      # Run the Ruff linter.
-      - id: ruff
-        args: [--exit-non-zero-on-fix, --config=pyproject.toml]
-      # Run the Ruff formatter.
-      # - id: ruff-format
-      #   args: [--config=pyproject.toml]
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,22 @@
+## [v0.5.0](https://github.com/DS4SD/docling-serve/releases/tag/v0.5.0) - 2025-03-07
+
+### Feature
+
+* Async api ([#60](https://github.com/DS4SD/docling-serve/issues/60)) ([`82f8900`](https://github.com/DS4SD/docling-serve/commit/82f890019745859699c1b01f9ccfb64cb7e37906))
+* Display version in fastapi docs ([#78](https://github.com/DS4SD/docling-serve/issues/78)) ([`ed851c9`](https://github.com/DS4SD/docling-serve/commit/ed851c95fee5f59305ddc3dcd5c09efce618470b))
+
+### Fix
+
+* Remove uv from image, merge ARG and ENV declarations ([#57](https://github.com/DS4SD/docling-serve/issues/57)) ([`c95db36`](https://github.com/DS4SD/docling-serve/commit/c95db3643807a4dfb96d93c8e10d6eb486c49a30))
+* **docs:** Remove comma in convert/source curl example ([#73](https://github.com/DS4SD/docling-serve/issues/73)) ([`05df073`](https://github.com/DS4SD/docling-serve/commit/05df0735d35a589bdc2a11fcdd764a10f700cb6f))
+
+## [v0.4.0](https://github.com/DS4SD/docling-serve/releases/tag/v0.4.0) - 2025-02-26
+
+### Feature
+
+* New container images ([#68](https://github.com/DS4SD/docling-serve/issues/68)) ([`7e6d9cd`](https://github.com/DS4SD/docling-serve/commit/7e6d9cdef398df70a5b4d626aeb523c428c10d56))
+* Render DoclingDocument with npm docling-components in the example UI ([#65](https://github.com/DS4SD/docling-serve/issues/65)) ([`c430d9b`](https://github.com/DS4SD/docling-serve/commit/c430d9b1a162ab29104d86ebaa1ac5a5488b1f09))
+
 ## [v0.3.0](https://github.com/DS4SD/docling-serve/releases/tag/v0.3.0) - 2025-02-19

 ### Feature
--- a/44
+++ b/44
@@ -2,8 +2,8 @@ ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s

 FROM ${BASE_IMAGE}

-ARG MODELS_LIST="layout tableformer picture_classifier easyocr"
-ARG UV_SYNC_EXTRA_ARGS=""
+ARG MODELS_LIST="layout tableformer picture_classifier easyocr" \
+    UV_SYNC_EXTRA_ARGS=""

 USER 0

@@ -22,8 +22,6 @@ RUN --mount=type=bind,source=os-packages.txt,target=/tmp/os-packages.txt \

 ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/

-COPY --from=ghcr.io/astral-sh/uv:0.6.1 /uv /uvx /bin/
-
 ###################################################################################################
 # Docling layer                                                                                   #
 ###################################################################################################
@@ -32,30 +30,34 @@ USER 1001

 WORKDIR /opt/app-root/src

-# On container environments, always set a thread budget to avoid undesired thread congestion.
-ENV OMP_NUM_THREADS=4
+ENV \
+    # On container environments, always set a thread budget to avoid undesired thread congestion.
+    OMP_NUM_THREADS=4 \
+    LANG=en_US.UTF-8 \
+    LC_ALL=en_US.UTF-8 \
+    PYTHONIOENCODING=utf-8 \
+    UV_COMPILE_BYTECODE=1 \
+    UV_LINK_MODE=copy \
+    UV_PROJECT_ENVIRONMENT=/opt/app-root \
+    DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models

-ENV LANG=en_US.UTF-8
-ENV LC_ALL=en_US.UTF-8
-ENV PYTHONIOENCODING=utf-8
-ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
-ENV UV_PROJECT_ENVIRONMENT=/opt/app-root
-
-ENV DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models
-
-COPY --chown=1001:0 pyproject.toml uv.lock README.md ./
-
-RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
-    uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}   # --no-extra ${NO_EXTRA}
+RUN --mount=from=ghcr.io/astral-sh/uv:0.6.1,source=/uv,target=/bin/uv \
+    --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}

 RUN echo "Downloading models..." && \
    docling-tools models download -o "${DOCLING_SERVE_ARTIFACTS_PATH}" ${MODELS_LIST} && \
    chown -R 1001:0 /opt/app-root/src/.cache && \
    chmod -R g=u /opt/app-root/src/.cache

-COPY --chown=1001:0 --chmod=664 ./docling_serve ./docling_serve
-RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
-    uv sync --frozen --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}   # --no-extra ${NO_EXTRA}
+COPY --chown=1001:0 ./docling_serve ./docling_serve
+RUN --mount=from=ghcr.io/astral-sh/uv:0.6.1,source=/uv,target=/bin/uv \
+    --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    uv sync --frozen --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}

 EXPOSE 5001

--- a/19
+++ b/19
@@ -24,6 +24,13 @@ action-lint-file:
 md-lint-file:
 	$(CMD_PREFIX) touch .markdown-lint

+.PHONY: docling-serve-image
+docling-serve-image: Containerfile
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124 --no-extra cpu" -f Containerfile -t ghcr.io/ds4sd/docling-serve:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) ghcr.io/ds4sd/docling-serve:main
+	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) quay.io/ds4sd/docling-serve:main
+
 .PHONY: docling-serve-cpu-image
 docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve CPU]"
@@ -31,12 +38,12 @@ docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" contain
 	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) ghcr.io/ds4sd/docling-serve-cpu:main
 	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) quay.io/ds4sd/docling-serve-cpu:main

-.PHONY: docling-serve-gpu-image
-docling-serve-gpu-image: Containerfile ## Build docling-serve container image with GPU support
-	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with GPU]"
-	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cpu" -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve:$(TAG) .
-	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) ghcr.io/ds4sd/docling-serve:main
-	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) quay.io/ds4sd/docling-serve:main
+.PHONY: docling-serve-cu124-image
+docling-serve-cu124-image: Containerfile ## Build docling-serve container image with GPU support
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with Cuda 12.4]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cpu" -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve-cu124:$(TAG) .
+	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cu124:$(TAG) ghcr.io/ds4sd/docling-serve-cu124:main
+	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cu124:$(TAG) quay.io/ds4sd/docling-serve-cu124:main

 .PHONY: action-lint
 action-lint: .action-lint ##      Lint GitHub Action workflows
--- a/README.md
+++ b/README.md
@@ -104,7 +104,7 @@ curl -X 'POST' \
    "return_as_file": false,
    "do_table_structure": true,
    "include_images": true,
-    "images_scale": 2,
+    "images_scale": 2
  },
  "http_sources": [{"url": "https://arxiv.org/pdf/2206.01062"}]
 }'
@@ -276,6 +276,17 @@ The response can be a JSON Document or a File.
 - If you set the parameter `return_as_file` to True, the response will be a zip file.
 - If multiple files are generated (multiple inputs, or one input but multiple outputs with `return_as_file` True), the response will be a zip file.

+## Run docling-serve
+
+Clone the repository and run the following from within the cloned directory root.
+
+```bash
+python -m venv venv
+source venv/bin/activate
+pip install "docling-serve[ui]"
+docling-serve run --enable-ui
+```
+
 ## Helpers

 - A full Swagger UI is available at the `/docs` endpoint.
@@ -323,7 +334,7 @@ uv sync --extra ui --extra rapidocr
 uv sync --extra tesserocr
 ```

-See `[project.optional-dependencies]` section in `pyproject.toml` for full list of options.
+See `[project.optional-dependencies]` section in `pyproject.toml` for full list of options and runtime options with `uv run docling-serve --help`.

 ### Run the server

--- a/docling_serve/main.py
+++ b/docling_serve/main.py
@@ -1,4 +1,4 @@
-import importlib
+import importlib.metadata
 import logging
 import platform
 import sys
@@ -51,9 +51,7 @@ def version_callback(value: bool) -> None:
 def callback(
    version: Annotated[
        Union[bool, None],
-        typer.Option(
-            "--version", help="Show the version and exit.", callback=version_callback
-        ),
+        typer.Option(help="Show the version and exit.", callback=version_callback),
    ] = None,
    verbose: Annotated[
        int,
@@ -298,5 +296,4 @@ def main() -> None:

 # Launch the CLI when calling python -m docling_serve
 if __name__ == "__main__":
-
    main()
--- a/docling_serve/app.py
+++ b/docling_serve/app.py
@@ -1,27 +1,52 @@
+import asyncio
+import importlib.metadata
 import logging
 import tempfile
 from contextlib import asynccontextmanager
 from io import BytesIO
 from pathlib import Path
-from typing import Annotated, Any, Dict, List, Optional, Union
+from typing import Annotated, Any, Optional, Union
+
+from fastapi import (
+    BackgroundTasks,
+    Depends,
+    FastAPI,
+    HTTPException,
+    Query,
+    UploadFile,
+    WebSocket,
+    WebSocketDisconnect,
+)
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import RedirectResponse

 from docling.datamodel.base_models import DocumentStream, InputFormat
 from docling.document_converter import DocumentConverter
-from fastapi import BackgroundTasks, FastAPI, UploadFile
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import RedirectResponse
-from pydantic import BaseModel

-from docling_serve.docling_conversion import (
+from docling_serve.datamodel.convert import ConvertDocumentsOptions
+from docling_serve.datamodel.requests import (
    ConvertDocumentFileSourcesRequest,
-    ConvertDocumentsOptions,
    ConvertDocumentsRequest,
+)
+from docling_serve.datamodel.responses import (
+    ConvertDocumentResponse,
+    HealthCheckResponse,
+    MessageKind,
+    TaskStatusResponse,
+    WebsocketMessage,
+)
+from docling_serve.docling_conversion import (
    convert_documents,
    converters,
    get_pdf_pipeline_opts,
 )
+from docling_serve.engines import get_orchestrator
+from docling_serve.engines.async_local.orchestrator import (
+    AsyncLocalOrchestrator,
+    TaskNotFoundError,
+)
 from docling_serve.helper_functions import FormDepends
-from docling_serve.response_preparation import ConvertDocumentResponse, process_results
+from docling_serve.response_preparation import process_results
 from docling_serve.settings import docling_serve_settings


@@ -60,7 +85,6 @@ _log = logging.getLogger(__name__)
 # Context manager to initialize and clean up the lifespan of the FastAPI app
@asynccontextmanager
 async def lifespan(app: FastAPI):
-
    # Converter with default options
    pdf_format_option, options_hash = get_pdf_pipeline_opts(ConvertDocumentsOptions())
    converters[options_hash] = DocumentConverter(
@@ -72,9 +96,22 @@ async def lifespan(app: FastAPI):

    converters[options_hash].initialize_pipeline(InputFormat.PDF)

+    orchestrator = get_orchestrator()
+
+    # Start the background queue processor
+    queue_task = asyncio.create_task(orchestrator.process_queue())
+
    yield

+    # Cancel the background queue processor on shutdown
+    queue_task.cancel()
+    try:
+        await queue_task
+    except asyncio.CancelledError:
+        _log.info("Queue processor cancelled.")
+
    converters.clear()
+
    # if WITH_UI:
    #     gradio_ui.close()

@@ -84,10 +121,18 @@ async def lifespan(app: FastAPI):
 ##################################


-def create_app():
+def create_app():  # noqa: C901
+    try:
+        version = importlib.metadata.version("docling_serve")
+    except importlib.metadata.PackageNotFoundError:
+        _log.warning("Unable to get docling_serve version, falling back to 0.0.0")
+
+        version = "0.0.0"
+
    app = FastAPI(
        title="Docling Serve",
        lifespan=lifespan,
+        version=version,
    )

    origins = ["*"]
@@ -104,7 +149,6 @@ def create_app():

    # Mount the Gradio app
    if docling_serve_settings.enable_ui:
-
        try:
            import gradio as gr

@@ -138,10 +182,6 @@ def create_app():
        )
        return response

-    # Status
-    class HealthCheckResponse(BaseModel):
-        status: str = "ok"
-
    @app.get("/health")
    def health() -> HealthCheckResponse:
        return HealthCheckResponse()
@@ -165,8 +205,8 @@ def create_app():
    def process_url(
        background_tasks: BackgroundTasks, conversion_request: ConvertDocumentsRequest
    ):
-        sources: List[Union[str, DocumentStream]] = []
-        headers: Optional[Dict[str, Any]] = None
+        sources: list[Union[str, DocumentStream]] = []
+        headers: Optional[dict[str, Any]] = None
        if isinstance(conversion_request, ConvertDocumentFileSourcesRequest):
            for file_source in conversion_request.file_sources:
                sources.append(file_source.to_document_stream())
@@ -202,12 +242,11 @@ def create_app():
    )
    async def process_file(
        background_tasks: BackgroundTasks,
-        files: List[UploadFile],
+        files: list[UploadFile],
        options: Annotated[
            ConvertDocumentsOptions, FormDepends(ConvertDocumentsOptions)
        ],
    ):
-
        _log.info(f"Received {len(files)} files for processing.")

        # Load the uploaded files to Docling DocumentStream
@@ -227,4 +266,129 @@ def create_app():

        return response

+    # Convert a document from URL(s) using the async api
+    @app.post(
+        "/v1alpha/convert/source/async",
+        response_model=TaskStatusResponse,
+    )
+    async def process_url_async(
+        orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
+        conversion_request: ConvertDocumentsRequest,
+    ):
+        task = await orchestrator.enqueue(request=conversion_request)
+        task_queue_position = await orchestrator.get_queue_position(
+            task_id=task.task_id
+        )
+        return TaskStatusResponse(
+            task_id=task.task_id,
+            task_status=task.task_status,
+            task_position=task_queue_position,
+        )
+
+    # Task status poll
+    @app.get(
+        "/v1alpha/status/poll/{task_id}",
+        response_model=TaskStatusResponse,
+    )
+    async def task_status_poll(
+        orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
+        task_id: str,
+        wait: Annotated[
+            float, Query(help="Number of seconds to wait for a completed status.")
+        ] = 0.0,
+    ):
+        try:
+            task = await orchestrator.task_status(task_id=task_id, wait=wait)
+            task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
+        except TaskNotFoundError:
+            raise HTTPException(status_code=404, detail="Task not found.")
+        return TaskStatusResponse(
+            task_id=task.task_id,
+            task_status=task.task_status,
+            task_position=task_queue_position,
+        )
+
+    # Task status websocket
+    @app.websocket(
+        "/v1alpha/status/ws/{task_id}",
+    )
+    async def task_status_ws(
+        websocket: WebSocket,
+        orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
+        task_id: str,
+    ):
+        await websocket.accept()
+
+        if task_id not in orchestrator.tasks:
+            await websocket.send_text(
+                WebsocketMessage(
+                    message=MessageKind.ERROR, error="Task not found."
+                ).model_dump_json()
+            )
+            await websocket.close()
+            return
+
+        task = orchestrator.tasks[task_id]
+
+        # Track active WebSocket connections for this job
+        orchestrator.task_subscribers[task_id].add(websocket)
+
+        try:
+            task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
+            task_response = TaskStatusResponse(
+                task_id=task.task_id,
+                task_status=task.task_status,
+                task_position=task_queue_position,
+            )
+            await websocket.send_text(
+                WebsocketMessage(
+                    message=MessageKind.CONNECTION, task=task_response
+                ).model_dump_json()
+            )
+            while True:
+                task_queue_position = await orchestrator.get_queue_position(
+                    task_id=task_id
+                )
+                task_response = TaskStatusResponse(
+                    task_id=task.task_id,
+                    task_status=task.task_status,
+                    task_position=task_queue_position,
+                )
+                await websocket.send_text(
+                    WebsocketMessage(
+                        message=MessageKind.UPDATE, task=task_response
+                    ).model_dump_json()
+                )
+                # each client message will be interpreted as a request for update
+                msg = await websocket.receive_text()
+                _log.debug(f"Received message: {msg}")
+
+        except WebSocketDisconnect:
+            _log.info(f"WebSocket disconnected for job {task_id}")
+
+        finally:
+            orchestrator.task_subscribers[task_id].remove(websocket)
+
+    # Task result
+    @app.get(
+        "/v1alpha/result/{task_id}",
+        response_model=ConvertDocumentResponse,
+        responses={
+            200: {
+                "content": {"application/zip": {}},
+            }
+        },
+    )
+    async def task_result(
+        orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
+        task_id: str,
+    ):
+        result = await orchestrator.task_result(task_id=task_id)
+        if result is None:
+            raise HTTPException(
+                status_code=404,
+                detail="Task result not found. Please wait for a completion status.",
+            )
+        return result
+
    return app
--- a/docling_serve/datamodel/init.py
+++ b/docling_serve/datamodel/init.py
--- a/docling_serve/datamodel/convert.py
+++ b/docling_serve/datamodel/convert.py
@@ -0,0 +1,174 @@
+# Define the input options for the API
+from typing import Annotated, Optional
+
+from pydantic import BaseModel, Field
+
+from docling.datamodel.base_models import InputFormat, OutputFormat
+from docling.datamodel.pipeline_options import OcrEngine, PdfBackend, TableFormerMode
+from docling_core.types.doc import ImageRefMode
+
+
+class ConvertDocumentsOptions(BaseModel):
+    from_formats: Annotated[
+        list[InputFormat],
+        Field(
+            description=(
+                "Input format(s) to convert from. String or list of strings. "
+                f"Allowed values: {', '.join([v.value for v in InputFormat])}. "
+                "Optional, defaults to all formats."
+            ),
+            examples=[[v.value for v in InputFormat]],
+        ),
+    ] = list(InputFormat)
+
+    to_formats: Annotated[
+        list[OutputFormat],
+        Field(
+            description=(
+                "Output format(s) to convert to. String or list of strings. "
+                f"Allowed values: {', '.join([v.value for v in OutputFormat])}. "
+                "Optional, defaults to Markdown."
+            ),
+            examples=[[OutputFormat.MARKDOWN]],
+        ),
+    ] = [OutputFormat.MARKDOWN]
+
+    image_export_mode: Annotated[
+        ImageRefMode,
+        Field(
+            description=(
+                "Image export mode for the document (in case of JSON,"
+                " Markdown or HTML). "
+                f"Allowed values: {', '.join([v.value for v in ImageRefMode])}. "
+                "Optional, defaults to Embedded."
+            ),
+            examples=[ImageRefMode.EMBEDDED.value],
+            # pattern="embedded|placeholder|referenced",
+        ),
+    ] = ImageRefMode.EMBEDDED
+
+    do_ocr: Annotated[
+        bool,
+        Field(
+            description=(
+                "If enabled, the bitmap content will be processed using OCR. "
+                "Boolean. Optional, defaults to true"
+            ),
+            # examples=[True],
+        ),
+    ] = True
+
+    force_ocr: Annotated[
+        bool,
+        Field(
+            description=(
+                "If enabled, replace existing text with OCR-generated "
+                "text over content. Boolean. Optional, defaults to false."
+            ),
+            # examples=[False],
+        ),
+    ] = False
+
+    # TODO: use a restricted list based on what is installed on the system
+    ocr_engine: Annotated[
+        OcrEngine,
+        Field(
+            description=(
+                "The OCR engine to use. String. "
+                "Allowed values: easyocr, tesseract, rapidocr. "
+                "Optional, defaults to easyocr."
+            ),
+            examples=[OcrEngine.EASYOCR],
+        ),
+    ] = OcrEngine.EASYOCR
+
+    ocr_lang: Annotated[
+        Optional[list[str]],
+        Field(
+            description=(
+                "List of languages used by the OCR engine. "
+                "Note that each OCR engine has "
+                "different values for the language names. String or list of strings. "
+                "Optional, defaults to empty."
+            ),
+            examples=[["fr", "de", "es", "en"]],
+        ),
+    ] = None
+
+    pdf_backend: Annotated[
+        PdfBackend,
+        Field(
+            description=(
+                "The PDF backend to use. String. "
+                f"Allowed values: {', '.join([v.value for v in PdfBackend])}. "
+                f"Optional, defaults to {PdfBackend.DLPARSE_V2.value}."
+            ),
+            examples=[PdfBackend.DLPARSE_V2],
+        ),
+    ] = PdfBackend.DLPARSE_V2
+
+    table_mode: Annotated[
+        TableFormerMode,
+        Field(
+            TableFormerMode.FAST,
+            description=(
+                "Mode to use for table structure, String. "
+                f"Allowed values: {', '.join([v.value for v in TableFormerMode])}. "
+                "Optional, defaults to fast."
+            ),
+            examples=[TableFormerMode.FAST],
+            # pattern="fast|accurate",
+        ),
+    ] = TableFormerMode.FAST
+
+    abort_on_error: Annotated[
+        bool,
+        Field(
+            description=(
+                "Abort on error if enabled. Boolean. Optional, defaults to false."
+            ),
+            # examples=[False],
+        ),
+    ] = False
+
+    return_as_file: Annotated[
+        bool,
+        Field(
+            description=(
+                "Return the output as a zip file "
+                "(will happen anyway if multiple files are generated). "
+                "Boolean. Optional, defaults to false."
+            ),
+            examples=[False],
+        ),
+    ] = False
+
+    do_table_structure: Annotated[
+        bool,
+        Field(
+            description=(
+                "If enabled, the table structure will be extracted. "
+                "Boolean. Optional, defaults to true."
+            ),
+            examples=[True],
+        ),
+    ] = True
+
+    include_images: Annotated[
+        bool,
+        Field(
+            description=(
+                "If enabled, images will be extracted from the document. "
+                "Boolean. Optional, defaults to true."
+            ),
+            examples=[True],
+        ),
+    ] = True
+
+    images_scale: Annotated[
+        float,
+        Field(
+            description="Scale factor for images. Float. Optional, defaults to 2.0.",
+            examples=[2.0],
+        ),
+    ] = 2.0
--- a/docling_serve/datamodel/engines.py
+++ b/docling_serve/datamodel/engines.py
@@ -0,0 +1,30 @@
+import enum
+from typing import Optional
+
+from pydantic import BaseModel
+
+from docling_serve.datamodel.requests import ConvertDocumentsRequest
+from docling_serve.datamodel.responses import ConvertDocumentResponse
+
+
+class TaskStatus(str, enum.Enum):
+    SUCCESS = "success"
+    PENDING = "pending"
+    STARTED = "started"
+    FAILURE = "failure"
+
+
+class AsyncEngine(str, enum.Enum):
+    LOCAL = "local"
+
+
+class Task(BaseModel):
+    task_id: str
+    task_status: TaskStatus = TaskStatus.PENDING
+    request: Optional[ConvertDocumentsRequest]
+    result: Optional[ConvertDocumentResponse] = None
+
+    def is_completed(self) -> bool:
+        if self.task_status in [TaskStatus.SUCCESS, TaskStatus.FAILURE]:
+            return True
+        return False
--- a/docling_serve/datamodel/requests.py
+++ b/docling_serve/datamodel/requests.py
@@ -0,0 +1,62 @@
+import base64
+from io import BytesIO
+from typing import Annotated, Any, Union
+
+from pydantic import BaseModel, Field
+
+from docling.datamodel.base_models import DocumentStream
+
+from docling_serve.datamodel.convert import ConvertDocumentsOptions
+
+
+class DocumentsConvertBase(BaseModel):
+    options: ConvertDocumentsOptions = ConvertDocumentsOptions()
+
+
+class HttpSource(BaseModel):
+    url: Annotated[
+        str,
+        Field(
+            description="HTTP url to process",
+            examples=["https://arxiv.org/pdf/2206.01062"],
+        ),
+    ]
+    headers: Annotated[
+        dict[str, Any],
+        Field(
+            description="Additional headers used to fetch the urls, "
+            "e.g. authorization, agent, etc"
+        ),
+    ] = {}
+
+
+class FileSource(BaseModel):
+    base64_string: Annotated[
+        str,
+        Field(
+            description="Content of the file serialized in base64. "
+            "For example it can be obtained via "
+            "`base64 -w 0 /path/to/file/pdf-to-convert.pdf`."
+        ),
+    ]
+    filename: Annotated[
+        str,
+        Field(description="Filename of the uploaded document", examples=["file.pdf"]),
+    ]
+
+    def to_document_stream(self) -> DocumentStream:
+        buf = BytesIO(base64.b64decode(self.base64_string))
+        return DocumentStream(stream=buf, name=self.filename)
+
+
+class ConvertDocumentHttpSourcesRequest(DocumentsConvertBase):
+    http_sources: list[HttpSource]
+
+
+class ConvertDocumentFileSourcesRequest(DocumentsConvertBase):
+    file_sources: list[FileSource]
+
+
+ConvertDocumentsRequest = Union[
+    ConvertDocumentFileSourcesRequest, ConvertDocumentHttpSourcesRequest
+]
--- a/docling_serve/datamodel/responses.py
+++ b/docling_serve/datamodel/responses.py
@@ -0,0 +1,52 @@
+import enum
+from typing import Optional
+
+from pydantic import BaseModel
+
+from docling.datamodel.document import ConversionStatus, ErrorItem
+from docling.utils.profiling import ProfilingItem
+from docling_core.types.doc import DoclingDocument
+
+
+# Status
+class HealthCheckResponse(BaseModel):
+    status: str = "ok"
+
+
+class DocumentResponse(BaseModel):
+    filename: str
+    md_content: Optional[str] = None
+    json_content: Optional[DoclingDocument] = None
+    html_content: Optional[str] = None
+    text_content: Optional[str] = None
+    doctags_content: Optional[str] = None
+
+
+class ConvertDocumentResponse(BaseModel):
+    document: DocumentResponse
+    status: ConversionStatus
+    errors: list[ErrorItem] = []
+    processing_time: float
+    timings: dict[str, ProfilingItem] = {}
+
+
+class ConvertDocumentErrorResponse(BaseModel):
+    status: ConversionStatus
+
+
+class TaskStatusResponse(BaseModel):
+    task_id: str
+    task_status: str
+    task_position: Optional[int] = None
+
+
+class MessageKind(str, enum.Enum):
+    CONNECTION = "connection"
+    UPDATE = "update"
+    ERROR = "error"
+
+
+class WebsocketMessage(BaseModel):
+    message: MessageKind
+    task: Optional[TaskStatusResponse] = None
+    error: Optional[str] = None
--- a/docling_serve/docling_conversion.py
+++ b/docling_serve/docling_conversion.py
@@ -1,27 +1,17 @@
-import base64
 import hashlib
 import json
 import logging
-from io import BytesIO
+from collections.abc import Iterable, Iterator
 from pathlib import Path
-from typing import (
-    Annotated,
-    Any,
-    Dict,
-    Iterable,
-    Iterator,
-    List,
-    Optional,
-    Tuple,
-    Type,
-    Union,
-)
+from typing import Any, Optional, Union
+
+from fastapi import HTTPException

 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
 from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
-from docling.datamodel.base_models import DocumentStream, InputFormat, OutputFormat
+from docling.datamodel.base_models import DocumentStream, InputFormat
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
    EasyOcrOptions,
@@ -35,237 +25,16 @@ from docling.datamodel.pipeline_options import (
 )
 from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
 from docling_core.types.doc import ImageRefMode
-from fastapi import HTTPException
-from pydantic import BaseModel, Field

+from docling_serve.datamodel.convert import ConvertDocumentsOptions
 from docling_serve.helper_functions import _to_list_of_strings
 from docling_serve.settings import docling_serve_settings

 _log = logging.getLogger(__name__)


-# Define the input options for the API
-class ConvertDocumentsOptions(BaseModel):
-    from_formats: Annotated[
-        List[InputFormat],
-        Field(
-            description=(
-                "Input format(s) to convert from. String or list of strings. "
-                f"Allowed values: {', '.join([v.value for v in InputFormat])}. "
-                "Optional, defaults to all formats."
-            ),
-            examples=[[v.value for v in InputFormat]],
-        ),
-    ] = list(InputFormat)
-
-    to_formats: Annotated[
-        List[OutputFormat],
-        Field(
-            description=(
-                "Output format(s) to convert to. String or list of strings. "
-                f"Allowed values: {', '.join([v.value for v in OutputFormat])}. "
-                "Optional, defaults to Markdown."
-            ),
-            examples=[[OutputFormat.MARKDOWN]],
-        ),
-    ] = [OutputFormat.MARKDOWN]
-
-    image_export_mode: Annotated[
-        ImageRefMode,
-        Field(
-            description=(
-                "Image export mode for the document (in case of JSON,"
-                " Markdown or HTML). "
-                f"Allowed values: {', '.join([v.value for v in ImageRefMode])}. "
-                "Optional, defaults to Embedded."
-            ),
-            examples=[ImageRefMode.EMBEDDED.value],
-            # pattern="embedded|placeholder|referenced",
-        ),
-    ] = ImageRefMode.EMBEDDED
-
-    do_ocr: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, the bitmap content will be processed using OCR. "
-                "Boolean. Optional, defaults to true"
-            ),
-            # examples=[True],
-        ),
-    ] = True
-
-    force_ocr: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, replace existing text with OCR-generated "
-                "text over content. Boolean. Optional, defaults to false."
-            ),
-            # examples=[False],
-        ),
-    ] = False
-
-    # TODO: use a restricted list based on what is installed on the system
-    ocr_engine: Annotated[
-        OcrEngine,
-        Field(
-            description=(
-                "The OCR engine to use. String. "
-                "Allowed values: easyocr, tesseract, rapidocr. "
-                "Optional, defaults to easyocr."
-            ),
-            examples=[OcrEngine.EASYOCR],
-        ),
-    ] = OcrEngine.EASYOCR
-
-    ocr_lang: Annotated[
-        Optional[List[str]],
-        Field(
-            description=(
-                "List of languages used by the OCR engine. "
-                "Note that each OCR engine has "
-                "different values for the language names. String or list of strings. "
-                "Optional, defaults to empty."
-            ),
-            examples=[["fr", "de", "es", "en"]],
-        ),
-    ] = None
-
-    pdf_backend: Annotated[
-        PdfBackend,
-        Field(
-            description=(
-                "The PDF backend to use. String. "
-                f"Allowed values: {', '.join([v.value for v in PdfBackend])}. "
-                f"Optional, defaults to {PdfBackend.DLPARSE_V2.value}."
-            ),
-            examples=[PdfBackend.DLPARSE_V2],
-        ),
-    ] = PdfBackend.DLPARSE_V2
-
-    table_mode: Annotated[
-        TableFormerMode,
-        Field(
-            TableFormerMode.FAST,
-            description=(
-                "Mode to use for table structure, String. "
-                f"Allowed values: {', '.join([v.value for v in TableFormerMode])}. "
-                "Optional, defaults to fast."
-            ),
-            examples=[TableFormerMode.FAST],
-            # pattern="fast|accurate",
-        ),
-    ] = TableFormerMode.FAST
-
-    abort_on_error: Annotated[
-        bool,
-        Field(
-            description=(
-                "Abort on error if enabled. Boolean. Optional, defaults to false."
-            ),
-            # examples=[False],
-        ),
-    ] = False
-
-    return_as_file: Annotated[
-        bool,
-        Field(
-            description=(
-                "Return the output as a zip file "
-                "(will happen anyway if multiple files are generated). "
-                "Boolean. Optional, defaults to false."
-            ),
-            examples=[False],
-        ),
-    ] = False
-
-    do_table_structure: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, the table structure will be extracted. "
-                "Boolean. Optional, defaults to true."
-            ),
-            examples=[True],
-        ),
-    ] = True
-
-    include_images: Annotated[
-        bool,
-        Field(
-            description=(
-                "If enabled, images will be extracted from the document. "
-                "Boolean. Optional, defaults to true."
-            ),
-            examples=[True],
-        ),
-    ] = True
-
-    images_scale: Annotated[
-        float,
-        Field(
-            description="Scale factor for images. Float. Optional, defaults to 2.0.",
-            examples=[2.0],
-        ),
-    ] = 2.0
-
-
-class DocumentsConvertBase(BaseModel):
-    options: ConvertDocumentsOptions = ConvertDocumentsOptions()
-
-
-class HttpSource(BaseModel):
-    url: Annotated[
-        str,
-        Field(
-            description="HTTP url to process",
-            examples=["https://arxiv.org/pdf/2206.01062"],
-        ),
-    ]
-    headers: Annotated[
-        Dict[str, Any],
-        Field(
-            description="Additional headers used to fetch the urls, "
-            "e.g. authorization, agent, etc"
-        ),
-    ] = {}
-
-
-class FileSource(BaseModel):
-    base64_string: Annotated[
-        str,
-        Field(
-            description="Content of the file serialized in base64. "
-            "For example it can be obtained via "
-            "`base64 -w 0 /path/to/file/pdf-to-convert.pdf`."
-        ),
-    ]
-    filename: Annotated[
-        str,
-        Field(description="Filename of the uploaded document", examples=["file.pdf"]),
-    ]
-
-    def to_document_stream(self) -> DocumentStream:
-        buf = BytesIO(base64.b64decode(self.base64_string))
-        return DocumentStream(stream=buf, name=self.filename)
-
-
-class ConvertDocumentHttpSourcesRequest(DocumentsConvertBase):
-    http_sources: List[HttpSource]
-
-
-class ConvertDocumentFileSourcesRequest(DocumentsConvertBase):
-    file_sources: List[FileSource]
-
-
-ConvertDocumentsRequest = Union[
-    ConvertDocumentFileSourcesRequest, ConvertDocumentHttpSourcesRequest
-]
-
-
 # Document converters will be preloaded and stored in a dictionary
-converters: Dict[bytes, DocumentConverter] = {}
+converters: dict[bytes, DocumentConverter] = {}


 # Custom serializer for PdfFormatOption
@@ -301,7 +70,7 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
 # Computes the PDF pipeline options and returns the PdfFormatOption and its hash
 def get_pdf_pipeline_opts(  # noqa: C901
    request: ConvertDocumentsOptions,
-) -> Tuple[PdfFormatOption, bytes]:
+) -> tuple[PdfFormatOption, bytes]:
    if request.ocr_engine == OcrEngine.EASYOCR:
        try:
            import easyocr  # noqa: F401
@@ -361,7 +130,7 @@ def get_pdf_pipeline_opts(  # noqa: C901
            pipeline_options.images_scale = request.images_scale

    if request.pdf_backend == PdfBackend.DLPARSE_V1:
-        backend: Type[PdfDocumentBackend] = DoclingParseDocumentBackend
+        backend: type[PdfDocumentBackend] = DoclingParseDocumentBackend
    elif request.pdf_backend == PdfBackend.DLPARSE_V2:
        backend = DoclingParseV2DocumentBackend
    elif request.pdf_backend == PdfBackend.PYPDFIUM2:
@@ -409,12 +178,12 @@ def get_pdf_pipeline_opts(  # noqa: C901
 def convert_documents(
    sources: Iterable[Union[Path, str, DocumentStream]],
    options: ConvertDocumentsOptions,
-    headers: Optional[Dict[str, Any]] = None,
+    headers: Optional[dict[str, Any]] = None,
 ):
    pdf_format_option, options_hash = get_pdf_pipeline_opts(options)

    if options_hash not in converters:
-        format_options: Dict[InputFormat, FormatOption] = {
+        format_options: dict[InputFormat, FormatOption] = {
            InputFormat.PDF: pdf_format_option,
            InputFormat.IMAGE: pdf_format_option,
        }
--- a/docling_serve/engines/init.py
+++ b/docling_serve/engines/init.py
@@ -0,0 +1,8 @@
+from functools import lru_cache
+
+from docling_serve.engines.async_local.orchestrator import AsyncLocalOrchestrator
+
+
+@lru_cache
+def get_orchestrator() -> AsyncLocalOrchestrator:
+    return AsyncLocalOrchestrator()
--- a/docling_serve/engines/async_local/init.py
+++ b/docling_serve/engines/async_local/init.py
--- a/docling_serve/engines/async_local/orchestrator.py
+++ b/docling_serve/engines/async_local/orchestrator.py
@@ -0,0 +1,101 @@
+import asyncio
+import logging
+import uuid
+from typing import Optional
+
+from fastapi import WebSocket
+
+from docling_serve.datamodel.engines import Task, TaskStatus
+from docling_serve.datamodel.requests import ConvertDocumentsRequest
+from docling_serve.datamodel.responses import (
+    MessageKind,
+    TaskStatusResponse,
+    WebsocketMessage,
+)
+from docling_serve.engines.async_local.worker import AsyncLocalWorker
+from docling_serve.engines.base_orchestrator import BaseOrchestrator
+from docling_serve.settings import docling_serve_settings
+
+_log = logging.getLogger(__name__)
+
+
+class OrchestratorError(Exception):
+    pass
+
+
+class TaskNotFoundError(OrchestratorError):
+    pass
+
+
+class AsyncLocalOrchestrator(BaseOrchestrator):
+    def __init__(self):
+        self.task_queue = asyncio.Queue()
+        self.tasks: dict[str, Task] = {}
+        self.queue_list: list[str] = []
+        self.task_subscribers: dict[str, set[WebSocket]] = {}
+
+    async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
+        task_id = str(uuid.uuid4())
+        task = Task(task_id=task_id, request=request)
+        self.tasks[task_id] = task
+        self.queue_list.append(task_id)
+        self.task_subscribers[task_id] = set()
+        await self.task_queue.put(task_id)
+        return task
+
+    async def queue_size(self) -> int:
+        return self.task_queue.qsize()
+
+    async def get_queue_position(self, task_id: str) -> Optional[int]:
+        return (
+            self.queue_list.index(task_id) + 1 if task_id in self.queue_list else None
+        )
+
+    async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
+        if task_id not in self.tasks:
+            raise TaskNotFoundError()
+        return self.tasks[task_id]
+
+    async def task_result(self, task_id: str):
+        if task_id not in self.tasks:
+            raise TaskNotFoundError()
+        return self.tasks[task_id].result
+
+    async def process_queue(self):
+        # Create a pool of workers
+        workers = []
+        for i in range(docling_serve_settings.eng_loc_num_workers):
+            _log.debug(f"Starting worker {i}")
+            w = AsyncLocalWorker(i, self)
+            worker_task = asyncio.create_task(w.loop())
+            workers.append(worker_task)
+
+        # Wait for all workers to complete (they won't, as they run indefinitely)
+        await asyncio.gather(*workers)
+        _log.debug("All workers completed.")
+
+    async def notify_task_subscribers(self, task_id: str):
+        if task_id not in self.task_subscribers:
+            raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
+
+        task = self.tasks[task_id]
+        task_queue_position = await self.get_queue_position(task_id)
+        msg = TaskStatusResponse(
+            task_id=task.task_id,
+            task_status=task.task_status,
+            task_position=task_queue_position,
+        )
+        for websocket in self.task_subscribers[task_id]:
+            await websocket.send_text(
+                WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
+            )
+            if task.is_completed():
+                await websocket.close()
+
+    async def notify_queue_positions(self):
+        for task_id in self.task_subscribers.keys():
+            # notify only pending tasks
+            if self.tasks[task_id].task_status != TaskStatus.PENDING:
+                continue
+
+            await self.notify_task_subscribers(task_id)
--- a/docling_serve/engines/async_local/worker.py
+++ b/docling_serve/engines/async_local/worker.py
@@ -0,0 +1,116 @@
+import asyncio
+import logging
+import time
+from typing import TYPE_CHECKING, Any, Optional, Union
+
+from fastapi import BackgroundTasks
+
+from docling.datamodel.base_models import DocumentStream
+
+from docling_serve.datamodel.engines import TaskStatus
+from docling_serve.datamodel.requests import ConvertDocumentFileSourcesRequest
+from docling_serve.datamodel.responses import ConvertDocumentResponse
+from docling_serve.docling_conversion import convert_documents
+from docling_serve.response_preparation import process_results
+
+if TYPE_CHECKING:
+    from docling_serve.engines.async_local.orchestrator import AsyncLocalOrchestrator
+
+_log = logging.getLogger(__name__)
+
+
+class AsyncLocalWorker:
+    def __init__(self, worker_id: int, orchestrator: "AsyncLocalOrchestrator"):
+        self.worker_id = worker_id
+        self.orchestrator = orchestrator
+
+    async def loop(self):
+        _log.debug(f"Starting loop for worker {self.worker_id}")
+        while True:
+            task_id: str = await self.orchestrator.task_queue.get()
+            self.orchestrator.queue_list.remove(task_id)
+
+            if task_id not in self.orchestrator.tasks:
+                raise RuntimeError(f"Task {task_id} not found.")
+            task = self.orchestrator.tasks[task_id]
+
+            try:
+                task.task_status = TaskStatus.STARTED
+                _log.info(f"Worker {self.worker_id} processing task {task_id}")
+
+                # Notify clients about task updates
+                await self.orchestrator.notify_task_subscribers(task_id)
+
+                # Notify clients about queue updates
+                await self.orchestrator.notify_queue_positions()
+
+                # Get the current event loop
+                asyncio.get_event_loop()
+
+                # Define a callback function to send progress updates to the client.
+                # TODO: send partial updates, e.g. when a document in the batch is done
+                def run_conversion():
+                    sources: list[Union[str, DocumentStream]] = []
+                    headers: Optional[dict[str, Any]] = None
+                    if isinstance(task.request, ConvertDocumentFileSourcesRequest):
+                        for file_source in task.request.file_sources:
+                            sources.append(file_source.to_document_stream())
+                    else:
+                        for http_source in task.request.http_sources:
+                            sources.append(http_source.url)
+                            if headers is None and http_source.headers:
+                                headers = http_source.headers
+
+                    # Note: results are only an iterator->lazy evaluation
+                    results = convert_documents(
+                        sources=sources,
+                        options=task.request.options,
+                        headers=headers,
+                    )
+
+                    # The real processing will happen here
+                    response = process_results(
+                        background_tasks=BackgroundTasks(),
+                        conversion_options=task.request.options,
+                        conv_results=results,
+                    )
+
+                    return response
+
+                # Run the prediction in a thread to avoid blocking the event loop.
+                start_time = time.monotonic()
+                # future = asyncio.run_coroutine_threadsafe(
+                #     run_conversion(),
+                #     loop=loop
+                # )
+                # response = future.result()
+
+                response = await asyncio.to_thread(
+                    run_conversion,
+                )
+                processing_time = time.monotonic() - start_time
+
+                if not isinstance(response, ConvertDocumentResponse):
+                    _log.error(
+                        f"Worker {self.worker_id} got un-processable "
+                        "result for {task_id}: {type(response)}"
+                    )
+                task.result = response
+                task.request = None
+
+                task.task_status = TaskStatus.SUCCESS
+                _log.info(
+                    f"Worker {self.worker_id} completed job {task_id} "
+                    f"in {processing_time:.2f} seconds"
+                )
+
+            except Exception as e:
+                _log.error(
+                    f"Worker {self.worker_id} failed to process job {task_id}: {e}"
+                )
+                task.task_status = TaskStatus.FAILURE
+
+            finally:
+                await self.orchestrator.notify_task_subscribers(task_id)
+                self.orchestrator.task_queue.task_done()
+                _log.debug(f"Worker {self.worker_id} completely done with {task_id}")
--- a/docling_serve/engines/base_orchestrator.py
+++ b/docling_serve/engines/base_orchestrator.py
@@ -0,0 +1,21 @@
+from abc import ABC, abstractmethod
+
+from docling_serve.datamodel.engines import Task
+
+
+class BaseOrchestrator(ABC):
+    @abstractmethod
+    async def enqueue(self, task) -> Task:
+        pass
+
+    @abstractmethod
+    async def queue_size(self) -> int:
+        pass
+
+    @abstractmethod
+    async def task_status(self, task_id: str) -> Task:
+        pass
+
+    @abstractmethod
+    async def task_result(self, task_id: str):
+        pass
--- a/docling_serve/engines/block_local/init.py
+++ b/docling_serve/engines/block_local/init.py
--- a/docling_serve/gradio_ui.py
+++ b/docling_serve/gradio_ui.py
@@ -12,6 +12,13 @@ from docling_serve.helper_functions import _to_list_of_strings

 logger = logging.getLogger(__name__)

+##############################
+# Head JS for web components #
+##############################
+head = """
+    <script src="https://unpkg.com/@docling/docling-components@0.0.3" type="module"></script>
+"""
+
 #################
 # CSS and theme #
 #################
@@ -49,6 +56,14 @@ css = """
 #file_input_zone {
    height: 140px;
 }
+
+docling-img::part(pages) {
+    gap: 1rem;
+}
+
+docling-img::part(page) {
+    box-shadow: 0 0.5rem 1rem 0 rgba(0, 0, 0, 0.2);
+}
 """

 theme = gr.themes.Default(
@@ -110,6 +125,7 @@ def set_download_button_label(label_text: gr.State):
 def clear_outputs():
    markdown_content = ""
    json_content = ""
+    json_rendered_content = ""
    html_content = ""
    text_content = ""
    doctags_content = ""
@@ -118,6 +134,7 @@ def clear_outputs():
        markdown_content,
        markdown_content,
        json_content,
+        json_rendered_content,
        html_content,
        html_content,
        text_content,
@@ -260,6 +277,7 @@ def process_file(
 def response_to_output(response, return_as_file):
    markdown_content = ""
    json_content = ""
+    json_rendered_content = ""
    html_content = ""
    text_content = ""
    doctags_content = ""
@@ -282,6 +300,12 @@ def response_to_output(response, return_as_file):
        json_content = json.dumps(
            full_content.get("document").get("json_content"), indent=2
        )
+        # Embed document JSON and trigger load at client via an image.
+        json_rendered_content = f"""
+            <docling-img id="dclimg" pagenumbers tooltip="parsed"></docling-img>
+            <script id="dcljson" type="application/json" onload="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);">{json_content}</script>
+            <img src onerror="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);" />
+            """
        html_content = full_content.get("document").get("html_content")
        text_content = full_content.get("document").get("text_content")
        doctags_content = full_content.get("document").get("doctags_content")
@@ -289,6 +313,7 @@ def response_to_output(response, return_as_file):
        markdown_content,
        markdown_content,
        json_content,
+        json_rendered_content,
        html_content,
        html_content,
        text_content,
@@ -302,12 +327,12 @@ def response_to_output(response, return_as_file):
 ############

 with gr.Blocks(
+    head=head,
    css=css,
    theme=theme,
    title="Docling Serve",
    delete_cache=(3600, 3600),  # Delete all files older than 1 hour every hour
 ) as ui:
-
    # Constants stored in states to be able to pass them as inputs to functions
    processing_text = gr.State("Processing your document(s), please wait...")
    true_bool = gr.State(True)
@@ -464,6 +489,8 @@ with gr.Blocks(
            output_markdown_rendered = gr.Markdown(label="Response")
        with gr.Tab("Docling (JSON)"):
            output_json = gr.Code(language="json", wrap_lines=True, show_label=False)
+        with gr.Tab("Docling-Rendered"):
+            output_json_rendered = gr.HTML()
        with gr.Tab("HTML"):
            output_html = gr.Code(language="html", wrap_lines=True, show_label=False)
        with gr.Tab("HTML-Rendered"):
@@ -514,6 +541,7 @@ with gr.Blocks(
            output_markdown,
            output_markdown_rendered,
            output_json,
+            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
@@ -538,6 +566,7 @@ with gr.Blocks(
            output_markdown,
            output_markdown_rendered,
            output_json,
+            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
@@ -553,6 +582,7 @@ with gr.Blocks(
            output_markdown,
            output_markdown_rendered,
            output_json,
+            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
@@ -562,9 +592,7 @@ with gr.Blocks(
        set_outputs_visibility_direct,
        inputs=[false_bool, false_bool],
        outputs=[content_output, file_output],
-    ).then(
-        clear_url_input, inputs=None, outputs=[url_input]
-    )
+    ).then(clear_url_input, inputs=None, outputs=[url_input])

    # File processing
    file_process_btn.click(
@@ -582,6 +610,7 @@ with gr.Blocks(
            output_markdown,
            output_markdown_rendered,
            output_json,
+            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
@@ -606,6 +635,7 @@ with gr.Blocks(
            output_markdown,
            output_markdown_rendered,
            output_json,
+            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
@@ -621,6 +651,7 @@ with gr.Blocks(
            output_markdown,
            output_markdown_rendered,
            output_json,
+            output_json_rendered,
            output_html,
            output_html_rendered,
            output_text,
@@ -630,6 +661,4 @@ with gr.Blocks(
        set_outputs_visibility_direct,
        inputs=[false_bool, false_bool],
        outputs=[content_output, file_output],
-    ).then(
-        clear_file_input, inputs=None, outputs=[file_input]
-    )
+    ).then(clear_file_input, inputs=None, outputs=[file_input])
--- a/docling_serve/helper_functions.py
+++ b/docling_serve/helper_functions.py
@@ -1,6 +1,6 @@
 import inspect
 import re
-from typing import List, Type, Union
+from typing import Union

 from fastapi import Depends, Form
 from pydantic import BaseModel
@@ -8,7 +8,7 @@ from pydantic import BaseModel

 # Adapted from
 # https://github.com/fastapi/fastapi/discussions/8971#discussioncomment-7892972
-def FormDepends(cls: Type[BaseModel]):
+def FormDepends(cls: type[BaseModel]):
    new_parameters = []

    for field_name, model_field in cls.model_fields.items():
@@ -34,8 +34,8 @@ def FormDepends(cls: Type[BaseModel]):
    return Depends(as_form_func)


-def _to_list_of_strings(input_value: Union[str, List[str]]) -> List[str]:
-    def split_and_strip(value: str) -> List[str]:
+def _to_list_of_strings(input_value: Union[str, list[str]]) -> list[str]:
+    def split_and_strip(value: str) -> list[str]:
        if re.search(r"[;,]", value):
            return [item.strip() for item in re.split(r"[;,]", value)]
        else:
--- a/docling_serve/response_preparation.py
+++ b/docling_serve/response_preparation.py
@@ -3,43 +3,23 @@ import os
 import shutil
 import tempfile
 import time
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Dict, Iterable, List, Optional, Union
+from typing import Union

-from docling.datamodel.base_models import OutputFormat
-from docling.datamodel.document import ConversionResult, ConversionStatus, ErrorItem
-from docling.utils.profiling import ProfilingItem
-from docling_core.types.doc import DoclingDocument, ImageRefMode
 from fastapi import BackgroundTasks, HTTPException
 from fastapi.responses import FileResponse
-from pydantic import BaseModel

-from docling_serve.docling_conversion import ConvertDocumentsOptions
+from docling.datamodel.base_models import OutputFormat
+from docling.datamodel.document import ConversionResult, ConversionStatus
+from docling_core.types.doc import ImageRefMode
+
+from docling_serve.datamodel.convert import ConvertDocumentsOptions
+from docling_serve.datamodel.responses import ConvertDocumentResponse, DocumentResponse

 _log = logging.getLogger(__name__)


-class DocumentResponse(BaseModel):
-    filename: str
-    md_content: Optional[str] = None
-    json_content: Optional[DoclingDocument] = None
-    html_content: Optional[str] = None
-    text_content: Optional[str] = None
-    doctags_content: Optional[str] = None
-
-
-class ConvertDocumentResponse(BaseModel):
-    document: DocumentResponse
-    status: ConversionStatus
-    errors: List[ErrorItem] = []
-    processing_time: float
-    timings: Dict[str, ProfilingItem] = {}
-
-
-class ConvertDocumentErrorResponse(BaseModel):
-    status: ConversionStatus
-
-
 def _export_document_as_content(
    conv_res: ConversionResult,
    export_json: bool,
@@ -49,7 +29,6 @@ def _export_document_as_content(
    export_doctags: bool,
    image_mode: ImageRefMode,
 ):
-
    document = DocumentResponse(filename=conv_res.input.file.name)

    if conv_res.status == ConversionStatus.SUCCESS:
@@ -86,7 +65,6 @@ def _export_documents_as_files(
    export_doctags: bool,
    image_export_mode: ImageRefMode,
 ):
-
    success_count = 0
    failure_count = 0

@@ -150,7 +128,6 @@ def process_results(
    conversion_options: ConvertDocumentsOptions,
    conv_results: Iterable[ConversionResult],
 ) -> Union[ConvertDocumentResponse, FileResponse]:
-
    # Let's start by processing the documents
    try:
        start_time = time.monotonic()
--- a/docling_serve/settings.py
+++ b/docling_serve/settings.py
@@ -3,6 +3,8 @@ from typing import Optional, Union

 from pydantic_settings import BaseSettings, SettingsConfigDict

+from docling_serve.datamodel.engines import AsyncEngine
+

 class UvicornSettings(BaseSettings):
    model_config = SettingsConfigDict(
@@ -28,6 +30,9 @@ class DoclingServeSettings(BaseSettings):
    enable_ui: bool = False
    artifacts_path: Optional[Path] = None

+    eng_kind: AsyncEngine = AsyncEngine.LOCAL
+    eng_loc_num_workers: int = 2
+

 uvicorn_settings = UvicornSettings()
 docling_serve_settings = DoclingServeSettings()
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "docling-serve"
-version = "0.3.0"  # DO NOT EDIT, updated automatically
+version = "0.5.0"  # DO NOT EDIT, updated automatically
 description = "Running Docling as a service"
 license = {text = "MIT"}
 authors = [
@@ -30,7 +30,7 @@ classifiers = [
 ]
 requires-python = ">=3.10"
 dependencies = [
-    "docling~=2.23",
+    "docling~=2.25.1",
    "fastapi[standard]~=0.115",
    "httpx~=0.28",
    "pydantic~=2.10",
@@ -38,6 +38,7 @@ dependencies = [
    "python-multipart>=0.0.14,<0.1.0",
    "typer~=0.12",
    "uvicorn[standard]>=0.29.0,<1.0.0",
+    "websockets~=14.0",
 ]

 [project.optional-dependencies]
@@ -144,7 +145,8 @@ select = [
    "S307", # eval
    # "T20",  # (disallow print statements) keep debugging statements out of the codebase
    "W",  # pycodestyle warnings
-    "ASYNC" # async
+    "ASYNC", # async
+    "UP", # pyupgrade
 ]

 ignore = [
@@ -153,6 +155,7 @@ ignore = [
    "F811", # "redefinition of the same function"
    "PL", # Pylint
    "RUF012", # Mutable Class Attributes
+    "UP007", # Option and Union
 ]

 #extend-select = []
@@ -164,9 +167,19 @@ ignore = [
 [tool.ruff.lint.mccabe]
 max-complexity = 15

+[tool.ruff.lint.isort.sections]
+"docling" = ["docling", "docling_core"]
+
 [tool.ruff.lint.isort]
 combine-as-imports = true
-known-third-party = ["docling", "docling_core"]
+section-order = [
+  "future",
+  "standard-library",
+  "third-party",
+  "docling",
+  "first-party",
+  "local-folder",
+]

 [tool.mypy]
 pretty = true
@@ -180,10 +193,6 @@ module = [
    "easyocr.*",
    "tesserocr.*",
    "rapidocr_onnxruntime.*",
-    "docling_conversion.*",
-    "gradio_ui.*",
-    "response_preparation.*",
-    "helper_functions.*",
    "requests.*",
 ]
 ignore_missing_imports = true
--- a/tests/test_1-file-all-outputs.py
+++ b/tests/test_1-file-all-outputs.py
@@ -89,7 +89,7 @@ async def test_convert_file(async_client):
        check.is_in(
            '{"schema_name": "DoclingDocument"',
            json.dumps(data["document"]["json_content"]),
-            msg=f"JSON document should contain '{{\\n  \"schema_name\": \"DoclingDocument'\". Received: {safe_slice(data['document']['json_content'])}",
+            msg=f'JSON document should contain \'{{\\n  "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
        )
    # HTML check
    check.is_in(
--- a/tests/test_1-url-all-outputs.py
+++ b/tests/test_1-url-all-outputs.py
@@ -83,7 +83,7 @@ async def test_convert_url(async_client):
        check.is_in(
            '{"schema_name": "DoclingDocument"',
            json.dumps(data["document"]["json_content"]),
-            msg=f"JSON document should contain '{{\\n  \"schema_name\": \"DoclingDocument'\". Received: {safe_slice(data['document']['json_content'])}",
+            msg=f'JSON document should contain \'{{\\n  "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
        )
    # HTML check
    check.is_in(
--- a/tests/test_1-url-async-ws.py
+++ b/tests/test_1-url-async-ws.py
@@ -0,0 +1,48 @@
+import base64
+from pathlib import Path
+
+import httpx
+import pytest
+import pytest_asyncio
+from websockets.sync.client import connect
+
+
+@pytest_asyncio.fixture
+async def async_client():
+    async with httpx.AsyncClient(timeout=60.0) as client:
+        yield client
+
+
+@pytest.mark.asyncio
+async def test_convert_url(async_client: httpx.AsyncClient):
+    """Test convert URL to all outputs"""
+
+    doc_filename = Path("tests/2408.09869v5.pdf")
+    encoded_doc = base64.b64encode(doc_filename.read_bytes()).decode()
+
+    base_url = "http://localhost:5001/v1alpha"
+    payload = {
+        "options": {
+            "to_formats": ["md", "json"],
+            "image_export_mode": "placeholder",
+            "ocr": True,
+            "abort_on_error": False,
+            "return_as_file": False,
+        },
+        # "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}],
+        "file_sources": [{"base64_string": encoded_doc, "filename": doc_filename.name}],
+    }
+    # print(json.dumps(payload, indent=2))
+
+    for n in range(5):
+        response = await async_client.post(
+            f"{base_url}/convert/source/async", json=payload
+        )
+        assert response.status_code == 200, "Response should be 200 OK"
+
+    task = response.json()
+
+    uri = f"ws://localhost:5001/v1alpha/status/ws/{task['task_id']}"
+    with connect(uri) as websocket:
+        for message in websocket:
+            print(message)
--- a/tests/test_1-url-async.py
+++ b/tests/test_1-url-async.py
@@ -0,0 +1,60 @@
+import json
+import random
+import time
+
+import httpx
+import pytest
+import pytest_asyncio
+
+
+@pytest_asyncio.fixture
+async def async_client():
+    async with httpx.AsyncClient(timeout=60.0) as client:
+        yield client
+
+
+@pytest.mark.asyncio
+async def test_convert_url(async_client):
+    """Test convert URL to all outputs"""
+
+    example_docs = [
+        "https://arxiv.org/pdf/2411.19710",
+        "https://arxiv.org/pdf/2501.17887",
+        "https://www.nature.com/articles/s41467-024-50779-y.pdf",
+        "https://arxiv.org/pdf/2306.12802",
+        "https://arxiv.org/pdf/2311.18481",
+    ]
+
+    base_url = "http://localhost:5001/v1alpha"
+    payload = {
+        "options": {
+            "to_formats": ["md", "json"],
+            "image_export_mode": "placeholder",
+            "ocr": True,
+            "abort_on_error": False,
+            "return_as_file": False,
+        },
+        "http_sources": [{"url": random.choice(example_docs)}],
+    }
+    print(json.dumps(payload, indent=2))
+
+    for n in range(5):
+        response = await async_client.post(
+            f"{base_url}/convert/source/async", json=payload
+        )
+        assert response.status_code == 200, "Response should be 200 OK"
+
+    task = response.json()
+
+    print(json.dumps(task, indent=2))
+
+    while task["task_status"] not in ("success", "failure"):
+        response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
+        assert response.status_code == 200, "Response should be 200 OK"
+        task = response.json()
+        print(f"{task['task_status']=}")
+        print(f"{task['task_position']=}")
+
+        time.sleep(2)
+
+    assert task["task_status"] == "success"
--- a/tests/test_2-files-all-outputs.py
+++ b/tests/test_2-files-all-outputs.py
@@ -57,18 +57,18 @@ async def test_convert_file(async_client):
    content_disposition = response.headers.get("content-disposition")

    with check:
-        assert (
-            content_disposition is not None
-        ), "Content-Disposition header should be present"
+        assert content_disposition is not None, (
+            "Content-Disposition header should be present"
+        )
    with check:
        assert "attachment" in content_disposition, "Response should be an attachment"
    with check:
-        assert (
-            'filename="converted_docs.zip"' in content_disposition
-        ), "Attachment filename should be 'converted_docs.zip'"
+        assert 'filename="converted_docs.zip"' in content_disposition, (
+            "Attachment filename should be 'converted_docs.zip'"
+        )

    content_type = response.headers.get("content-type")
    with check:
-        assert (
-            content_type == "application/zip"
-        ), "Content-Type should be 'application/zip'"
+        assert content_type == "application/zip", (
+            "Content-Type should be 'application/zip'"
+        )
--- a/tests/test_2-urls-all-outputs.py
+++ b/tests/test_2-urls-all-outputs.py
@@ -50,18 +50,18 @@ async def test_convert_url(async_client):
    content_disposition = response.headers.get("content-disposition")

    with check:
-        assert (
-            content_disposition is not None
-        ), "Content-Disposition header should be present"
+        assert content_disposition is not None, (
+            "Content-Disposition header should be present"
+        )
    with check:
        assert "attachment" in content_disposition, "Response should be an attachment"
    with check:
-        assert (
-            'filename="converted_docs.zip"' in content_disposition
-        ), "Attachment filename should be 'converted_docs.zip'"
+        assert 'filename="converted_docs.zip"' in content_disposition, (
+            "Attachment filename should be 'converted_docs.zip'"
+        )

    content_type = response.headers.get("content-type")
    with check:
-        assert (
-            content_type == "application/zip"
-        ), "Content-Type should be 'application/zip'"
+        assert content_type == "application/zip", (
+            "Content-Type should be 'application/zip'"
+        )
--- a/uv.lock
+++ b/uv.lock
@@ -349,38 +349,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/2e/38/3fd83c4690dc7d753a442a284b3826ea5e5c380a411443c66421cd823898/cryptography-44.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7", size = 3134657 },
 ]

-[[package]]
-name = "deepsearch-glm"
-version = "1.0.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pywin32", marker = "sys_platform == 'win32' or (extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124')" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/73/d5/a907234e57f5c4f6480c9ddbc3cdacc47f727c768e502be3d361719fac4e/deepsearch_glm-1.0.0.tar.gz", hash = "sha256:e8dce88ac519a693c260f28bd3c4ec409811e65ade84fb508f6c6e37ca065e62", size = 2401014 }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/40/65/4b2013784d5ed8d3664a2efa61f15600c8bf090766b0363c036d78aca550/deepsearch_glm-1.0.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:94792b57df7a1c4ba8b47ebd8f36ea0a090d4f27a4fba39bd7b166b6b537260a", size = 6303790 },
-    { url = "https://files.pythonhosted.org/packages/45/2a/1e95260a712948a21b74dcb239032d9e612f7e1a273657008655749f4115/deepsearch_glm-1.0.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:ff46e352e96a2f56ce7ae4fdf04b271ee841c29ff159b1dec0e5ecaaadba8d4d", size = 5945851 },
-    { url = "https://files.pythonhosted.org/packages/9e/1a/5c37a98f27644fd02bc447df651e8d5ce484cd6ce7cb178218625b4de5bc/deepsearch_glm-1.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d77d3d94d49641888aa15f3ad23e81158e791aa9d9608dd8168dc71788e56f3", size = 7431282 },
-    { url = "https://files.pythonhosted.org/packages/e8/e2/56b5e7ae3ccc4d8ee758427c8c9a403c985e250a468c53538c269897bef2/deepsearch_glm-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:143de0fd111a570be12935d8799a2715fe1775d4dc4e256337860b429cee5d36", size = 7759571 },
-    { url = "https://files.pythonhosted.org/packages/61/f4/e39a5090a2bf0d641449918865566ad5adabef156993a922bdbf4a3ebb60/deepsearch_glm-1.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:9f2872dd573cd2206ce7f9e2e6016c38b66d9ecbd983283ff5e8c6023813c311", size = 7904646 },
-    { url = "https://files.pythonhosted.org/packages/41/f7/8e8dd9738554f97522b59b0a6d7680ccf2d527bd3471ec4aa4e52acf552a/deepsearch_glm-1.0.0-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:e64d94ff5209f0a11e8c75c6b28b033ef27b95a22c2fbcbd945e7fe8cc421545", size = 6309301 },
-    { url = "https://files.pythonhosted.org/packages/17/37/4d8514d8ef851e44513a71f675a7ebb373f109aece38e324c7d444ced20c/deepsearch_glm-1.0.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a5702205677b768b51f881d15d933370f6ef3c826dfac3b9aa0b904d2e6c495a", size = 5951522 },
-    { url = "https://files.pythonhosted.org/packages/0c/c6/3680318e66df278fa7f0811dc862d6cb3c328ce168b4f36736eb77120b6d/deepsearch_glm-1.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0417a2ae998e1709f03458cfb9adb55423bb1328224eb055300796baa757879f", size = 7434315 },
-    { url = "https://files.pythonhosted.org/packages/c3/cd/9ffb616d347d568f868f47585b3261c16e277aa7b37740e8720eee71c539/deepsearch_glm-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f0e1efe9af0d28e9b473fe599246deb3a0be7c3d546a478da284747144d086a", size = 7761264 },
-    { url = "https://files.pythonhosted.org/packages/3d/d3/e5ebdda9cee8a1c846e6a960a0e5b97624aff2f248c2bc89ae490b9a1342/deepsearch_glm-1.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:807faf13eb0deea55a1951d479a85d5e20de0ff8b2e0b57b2f7939552759a426", size = 7908603 },
-    { url = "https://files.pythonhosted.org/packages/60/ca/6adbadc979910b11594cd0242f1991942c22528eead431d47de064ac2860/deepsearch_glm-1.0.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:56d9575df9eceb8c2ae33e3d15e133924cc195714c3d268599b6f8414c1f6bb8", size = 6308715 },
-    { url = "https://files.pythonhosted.org/packages/20/7c/bf1e9c458705c7143c6630cb6847554ad694d25dc6f1f038512b9c86160a/deepsearch_glm-1.0.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:51f5c6522f60ba73eb12eeb7217bd98d871ba7c078337a4059d05878d8baf2d6", size = 5949609 },
-    { url = "https://files.pythonhosted.org/packages/21/b1/eb0cd0db50d05f2d7a510a77960e85e6caee727eb3d931ed0ec067917813/deepsearch_glm-1.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6211eaf497ad7cfcb68f80f9b5387940be0204fe149a9fc03988a95145f410a", size = 7433929 },
-    { url = "https://files.pythonhosted.org/packages/3a/7e/2b7db77ff02fe9eec41f3605fcd72e3eb4e6b48561b344d432b417a75cfe/deepsearch_glm-1.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b003bf457fce61ea4de79e2d7d0228a1ae349f677eb6570e745f79d4429804f", size = 7760438 },
-    { url = "https://files.pythonhosted.org/packages/ab/97/ffb2bb5d2432c7b0e9f3a3e6b5873fbcd6e19e82b620393bfb8e01bdecb1/deepsearch_glm-1.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9d61f66048e6ab60fe9f84c823fd593bf8517755833bd9efb59156d77a2b42d0", size = 7907583 },
-    { url = "https://files.pythonhosted.org/packages/38/06/08c5fd0e1144c2c8d76d06da1545a9cf589278a37f8b9e6235b5b416eb52/deepsearch_glm-1.0.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:7d558e8b365c27ee665d0589165fd074fb252c73715f9cc6aeb4304a63683f37", size = 6308867 },
-    { url = "https://files.pythonhosted.org/packages/ba/fb/f5f9787876b67ce83d5afa4903901be9f8071530bc0706dc2228afc0b6c0/deepsearch_glm-1.0.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:3199093a9472e5756214b9b6563f827c19c001c7dd8ae00e03eed1140c12930d", size = 5949719 },
-    { url = "https://files.pythonhosted.org/packages/83/0f/42b5a4aa798acbc6309d748435b006c489e58102b6cb2278e7b8f0194743/deepsearch_glm-1.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f18d1ee68a0479592e0c714e6cbf9e2d0fa8edd692d580da64431c84cbef5c2", size = 7434981 },
-    { url = "https://files.pythonhosted.org/packages/17/6a/c2c4eaa4470b78dde6c03f055cbb09f3f7f15b8a6ff38f5bea5180339e6f/deepsearch_glm-1.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62c1c0ea0a544219da15c017632f9e0be116ecdc335b865c6c5760429557fe23", size = 7760773 },
-    { url = "https://files.pythonhosted.org/packages/01/0a/7c3cf75bad38a8d6ff3842b78b3263dd81ad4eaf1d859f4b8e1ab465cad5/deepsearch_glm-1.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:962f393dcec2204de1a5cb0f635c65258bde2424ad2d4e0f5df770139c3958de", size = 7908766 },
-    { url = "https://files.pythonhosted.org/packages/1f/cd/e6507d924aa69e9647f917ed671e2d62e19e41d4f120a15fcbb583661667/deepsearch_glm-1.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:e2315cc4ffe7032dada294a0cd72a47dbc6c0121fd07d4b5719f9a9e9519d091", size = 14644989 },
-]
-
 [[package]]
 name = "dill"
 version = "0.3.9"
@@ -410,12 +378,11 @@ wheels = [

 [[package]]
 name = "docling"
-version = "2.23.0"
+version = "2.25.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "beautifulsoup4" },
    { name = "certifi" },
-    { name = "deepsearch-glm" },
    { name = "docling-core", extra = ["chunking"] },
    { name = "docling-ibm-models" },
    { name = "docling-parse" },
@@ -438,9 +405,9 @@ dependencies = [
    { name = "tqdm" },
    { name = "typer" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/94/45/807f81abeaa1a018998525a98d577508e6c0eba4173e6ec4beeee77f0644/docling-2.23.0.tar.gz", hash = "sha256:7ffde3366b01e2f1c0e47574700501a3b8667082cf3a185efe7e103b8473ee43", size = 106065 }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/88/b6d782d2cd7ed602d2bae1a01e87a6347a37295ad450d86159cc7c252290/docling-2.25.1.tar.gz", hash = "sha256:ba2fce77659f4ccf1c8a696531ea9f17253215dbebfac6536012bbc6d1c29ce8", size = 112676 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9f/7f/79985fbb4f115f87688b2a8d18ddc14d45fcccd8989a02104ddc8cf9b02c/docling-2.23.0-py3-none-any.whl", hash = "sha256:bd3d05bf48fc842e502af9f26153c40e2fcc1df1945ec72ab8c4d5dd1f3b6528", size = 137161 },
+    { url = "https://files.pythonhosted.org/packages/2a/c1/6c58516672f0f60c432ae331391b6548e4fdcb7b6a6dcd7725605284dcf7/docling-2.25.1-py3-none-any.whl", hash = "sha256:92318591342fc50781134fc553c6c57b703ce43e8095a80d59ed02206d0f560c", size = 145677 },
 ]

 [[package]]
@@ -472,14 +439,16 @@ chunking = [

 [[package]]
 name = "docling-ibm-models"
-version = "3.3.2"
+version = "3.4.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "docling-core" },
    { name = "huggingface-hub" },
    { name = "jsonlines" },
    { name = "numpy" },
    { name = "opencv-python-headless" },
    { name = "pillow" },
+    { name = "pydantic" },
    { name = "safetensors", extra = ["torch"] },
    { name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'x86_64' and sys_platform == 'darwin' and extra == 'extra-13-docling-serve-cpu') or (platform_machine == 'x86_64' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124') or (sys_platform != 'darwin' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124')" },
    { name = "torch", version = "2.6.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124') or (extra != 'extra-13-docling-serve-cpu' and extra != 'extra-13-docling-serve-cu124')" },
@@ -492,9 +461,9 @@ dependencies = [
    { name = "tqdm" },
    { name = "transformers" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6c/e5/7ff58b9481beb43e5b93084b784fe686be553a947c961d09cda557630dd0/docling_ibm_models-3.3.2.tar.gz", hash = "sha256:f6ed59dfb3f98a71ccdd003c13c9a868e3003c22bd5adc554197da7eec227cde", size = 66096 }
+sdist = { url = "https://files.pythonhosted.org/packages/eb/a5/88d5b7c970d5e10a06062fe9e9de3cde6acdefcc1f85854f689a82863c2a/docling_ibm_models-3.4.1.tar.gz", hash = "sha256:093b4dff2ea284a4953c3aa009e29945208b8d389b94fb14940a03a93f673e96", size = 69794 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/84/8a/1539d9b951a761e141eeabfed7fcfa99d7ae88aa3073711d42e8df3e8d1a/docling_ibm_models-3.3.2-py3-none-any.whl", hash = "sha256:9f82a2ef73c6cd8d729ab2fcc4223079ccb8b6eec0bf0643c56e55352b97b5cb", size = 76659 },
+    { url = "https://files.pythonhosted.org/packages/af/8f/0f2b823fa09d06deacbdfc6d5d7809d462ddc508f43146960083d113c4c6/docling_ibm_models-3.4.1-py3-none-any.whl", hash = "sha256:c3582c99dddfa3f0eafcf80cf1267fd8efa39c4a74cc7a88f9dd49684fac2986", size = 80886 },
 ]

 [[package]]
@@ -535,7 +504,7 @@ wheels = [

 [[package]]
 name = "docling-serve"
-version = "0.3.0"
+version = "0.4.0"
 source = { editable = "." }
 dependencies = [
    { name = "docling" },
@@ -546,14 +515,15 @@ dependencies = [
    { name = "python-multipart" },
    { name = "typer" },
    { name = "uvicorn", extra = ["standard"] },
+    { name = "websockets" },
 ]

 [package.optional-dependencies]
 cpu = [
-    { name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'x86_64' and sys_platform == 'darwin' and extra == 'extra-13-docling-serve-cpu') or (platform_machine == 'x86_64' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124') or (sys_platform != 'darwin' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124')" },
-    { name = "torch", version = "2.6.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'x86_64' and extra == 'extra-13-docling-serve-cpu') or (platform_machine != 'x86_64' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124') or (sys_platform != 'darwin' and extra == 'extra-13-docling-serve-cpu')" },
-    { name = "torchvision", version = "0.21.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'x86_64' and sys_platform == 'darwin' and extra == 'extra-13-docling-serve-cpu') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-13-docling-serve-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124') or (sys_platform == 'darwin' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124') or (sys_platform == 'linux' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124')" },
-    { name = "torchvision", version = "0.21.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'darwin' and extra == 'extra-13-docling-serve-cpu') or (platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-13-docling-serve-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-13-docling-serve-cpu') or (sys_platform == 'darwin' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124') or (sys_platform == 'linux' and extra == 'extra-13-docling-serve-cpu' and extra == 'extra-13-docling-serve-cu124')" },
+    { name = "torch", version = "2.6.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "platform_machine != 'x86_64' and sys_platform == 'darwin'" },
+    { name = "torch", version = "2.6.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "platform_machine == 'x86_64' or sys_platform != 'darwin'" },
+    { name = "torchvision", version = "0.21.0", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'x86_64' and sys_platform == 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux')" },
+    { name = "torchvision", version = "0.21.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'darwin') or (platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 cu124 = [
    { name = "torch", version = "2.6.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" } },
@@ -583,7 +553,7 @@ dev = [

 [package.metadata]
 requires-dist = [
-    { name = "docling", specifier = "~=2.23" },
+    { name = "docling", specifier = "~=2.25.1" },
    { name = "fastapi", extras = ["standard"], specifier = "~=0.115" },
    { name = "gradio", marker = "extra == 'ui'", specifier = "~=5.9" },
    { name = "httpx", specifier = "~=0.28" },
@@ -599,6 +569,7 @@ requires-dist = [
    { name = "torchvision", marker = "extra == 'cu124'", specifier = ">=0.21.0", index = "https://download.pytorch.org/whl/cu124", conflict = { package = "docling-serve", extra = "cu124" } },
    { name = "typer", specifier = "~=0.12" },
    { name = "uvicorn", extras = ["standard"], specifier = ">=0.29.0,<1.0.0" },
+    { name = "websockets", specifier = "~=14.0" },
 ]
 provides-extras = ["ui", "tesserocr", "rapidocr", "cpu", "cu124"]
Author	SHA1	Message	Date
github-actions[bot]	98b46eda50	chore: bump version to 0.5.0 [skip ci]	2025-03-07 17:24:16 +00:00
Michele Dolfi	7e75919ae8	chore: Remove deprecated type aliases and run as pre-commit (#79 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-03-07 15:46:52 +01:00
Eugene	c95db36438	fix: Remove uv from image, merge ARG and ENV declarations (#57 ) Signed-off-by: Eugene <fogaprod@gmail.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>	2025-03-07 15:33:21 +01:00
Michele Dolfi	82f8900197	feat: Async api (#60 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-03-07 11:26:50 +01:00
Eugene	ed851c95fe	feat: display version in fastapi docs (#78 ) Signed-off-by: Eugene <fogaprod@gmail.com>	2025-03-07 09:28:05 +01:00
Steffen Röcker	05df0735d3	fix(docs): Remove comma in convert/source curl example (#73 ) Signed-off-by: Steffen Röcker <sroecker@redhat.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>	2025-03-06 08:12:09 +01:00
github-actions[bot]	cad1053e36	chore: bump version to 0.4.0 [skip ci]	2025-02-26 13:05:03 +00:00
Michele Dolfi	7e6d9cdef3	feat: New container images (#68 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-02-26 12:49:20 +01:00
Brent Salisbury	343b985287	Readme additions for running Readme additions for a quickstart running of docling-serve Signed-off-by: Brent Salisbury <bsalisbu@redhat.com>	2025-02-25 14:49:50 -08:00
Kasper Dinkla	c430d9b1a1	feat: Render DoclingDocument with npm docling-components in the example UI (#65 ) Signed-off-by: DKL <dkl@zurich.ibm.com>	2025-02-25 11:27:42 +01:00
Anil Vishnoi	63141f1cc7	ci: Use release event to trigger the image publishing job for releases (#63 ) Signed-off-by: Anil Vishnoi <vishnoianil@gmail.com>	2025-02-24 08:21:17 +01:00