chore: bump version to 1.7.0 [skip ci]

feat(UI): add auto and orcmac options in demo UI (#408 )
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-29 16:43:24 +00:00 · 2025-10-17 12:16:37 +00:00 · 2025-10-17 12:23:57 +02:00 · 2025-10-15 21:15:29 +02:00 · 2025-10-09 16:07:02 +02:00 · 2025-10-03 13:39:59 +00:00
15 changed files with 5314 additions and 4878 deletions
--- a/.github/workflows/job-image.yml
+++ b/.github/workflows/job-image.yml
@@ -108,6 +108,7 @@ jobs:
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: ${{ inputs.build_args }}
+          pull: true
      ##
      ## This stage runs after the build, so it leverages all build cache
      ## 
@@ -117,8 +118,8 @@ jobs:
        with:
          context: .
          push: false
-          load: true # == '--output=type=docker'
-          tags: ${{ steps.ghcr_meta.outputs.tags }}-test
+          load: true
+          tags: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}:${{ github.sha }}-test
          labels: |
            org.opencontainers.image.title=docling-serve
            org.opencontainers.image.test=true
@@ -133,7 +134,7 @@ jobs:
        run: |
          set -e

-          IMAGE_TAG="${{ steps.ghcr_meta.outputs.tags }}-test"
+          IMAGE_TAG="${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}:${{ github.sha }}-test"
          echo "Testing local image: $IMAGE_TAG"

          # Remove existing container if any
@@ -226,202 +227,8 @@ jobs:
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: ${{ inputs.build_args }}
+          pull: true

-      - name: Remove Local Docker Images
+      - name: Remove local Docker images
        run: |
          docker image prune -af
-##
-## Extra tests for released images
-##
-
-    # outputs:
-    #   image-tags: ${{ steps.ghcr_meta.outputs.tags }}
-    #   image-labels: ${{ steps.ghcr_meta.outputs.labels }}
-
-  # test-cpu-image:
-  #   needs:
-  #     - image
-  #   runs-on: ubuntu-latest
-  #   permissions:
-  #     contents: read
-  #     packages: read
-
-  #   steps:
-  #     - name: Checkout code
-  #       uses: actions/checkout@v5
-
-  #     - name: Test CPU images
-  #       run: |
-  #         set -e
-
-  #         echo "Testing image: ${{ needs.image.outputs.image-tags }}"
-
-  #         for tag in ${{ needs.image.outputs.image-tags }}; do
-  #           if echo "$tag" | grep -q -- '-cpu' && echo "$tag" | grep -qE ':[vV][0-9]+(\.[0-9]+){0,2}$'; then
-  #             echo "Testing CPU image: $tag"
-
-  #             # Remove existing container if any
-  #             docker rm -f docling-serve-test-container 2>/dev/null || true
-
-  #             echo "Pulling image..."
-  #             docker pull "$tag"
-
-  #             echo "Waiting 5s after pull..."
-  #             sleep 5
-
-  #             echo "Starting container..."
-  #             docker run -d -p 5001:5001 --name docling-serve-test-container "$tag"
-
-  #             echo "Waiting 15s for container to boot..."
-  #             sleep 15
-
-  #             echo "Checking service health..."
-  #             for i in {1..20}; do
-  #               health_response=$(curl -s http://localhost:5001/health || true)
-  #               echo "Health check response [$i]: $health_response"
-  #               if echo "$health_response" | grep -q '"status":"ok"'; then
-  #                 echo "Service is healthy!"
-  #                 echo "Sending test conversion request..."
-
-  #                 status_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST 'http://localhost:5001/v1/convert/source' \
-  #                   -H 'accept: application/json' \
-  #                   -H 'Content-Type: application/json' \
-  #                   -d '{
-  #                     "options": {
-  #                       "from_formats": ["pdf"],
-  #                       "to_formats": ["md"]
-  #                     },
-  #                     "sources": [
-  #                       {
-  #                         "kind": "http",
-  #                         "url": "https://arxiv.org/pdf/2501.17887"
-  #                       }
-  #                     ],
-  #                     "target": {
-  #                       "kind": "inbody"
-  #                     }
-  #                   }')
-
-  #                 echo "Conversion request returned status code: $status_code"
-
-  #                 if [ "$status_code" -ne 200 ]; then
-  #                   echo "Conversion failed!"
-  #                   docker logs docling-serve-test-container
-  #                   docker rm -f docling-serve-test-container
-  #                   exit 1
-  #                 fi
-
-  #                 break
-  #               else
-  #                 echo "Waiting for service... [$i/20]"
-  #                 sleep 3
-  #               fi
-  #             done
-
-  #             if ! echo "$health_response" | grep -q '"status":"ok"'; then
-  #               echo "Service did not become healthy in time."
-  #               docker logs docling-serve-test-container
-  #               docker rm -f docling-serve-test-container
-  #               exit 1
-  #             fi
-
-  #             echo "Cleaning up test container..."
-  #             docker rm -f docling-serve-test-container
-  #           else
-  #             echo "Skipping non-released or non-CPU image: $tag"
-  #           fi
-  #         done
-
-  # test-cuda-image:
-  #   needs:
-  #     - image
-  #   runs-on: ubuntu-latest # >> placeholder for GPU runner << #
-  #   permissions:
-  #     contents: read
-  #     packages: read
-
-  #   steps:
-  #     - name: Checkout code
-  #       uses: actions/checkout@v5
-
-  #     - name: Test CUDA images
-  #       run: |
-  #         set -e
-
-  #         echo "Testing image: ${{ needs.image.outputs.image-tags }}"
-
-  #         for tag in ${{ needs.image.outputs.image-tags }}; do
-  #           if echo "$tag" | grep -qE -- '-cu[0-9]+' && echo "$tag" | grep -qE ':[vV][0-9]+(\.[0-9]+){0,2}$'; then
-  #             echo "Testing CUDA image: $tag"
-
-  #             # Remove existing container if any
-  #             docker rm -f docling-serve-test-container 2>/dev/null || true
-
-  #             echo "Pulling image..."
-  #             docker pull "$tag"
-
-  #             echo "Waiting 5s after pull..."
-  #             sleep 5
-
-  #             echo "Starting container..."
-  #             docker run -d -p 5001:5001 --gpus all --name docling-serve-test-container "$tag"
-
-  #             echo "Waiting 15s for container to boot..."
-  #             sleep 15
-
-  #             echo "Checking service health..."
-  #             for i in {1..25}; do
-  #               health_response=$(curl -s http://localhost:5001/health || true)
-  #               echo "Health check response [$i]: $health_response"
-  #               if echo "$health_response" | grep -q '"status":"ok"'; then
-  #                 echo "Service is healthy!"
-  #                 echo "Sending test conversion request..."
-
-  #                 status_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST 'http://localhost:5001/v1/convert/source' \
-  #                   -H 'accept: application/json' \
-  #                   -H 'Content-Type: application/json' \
-  #                   -d '{
-  #                     "options": {
-  #                       "from_formats": ["pdf"],
-  #                       "to_formats": ["md"]
-  #                     },
-  #                     "sources": [
-  #                       {
-  #                         "kind": "http",
-  #                         "url": "https://arxiv.org/pdf/2501.17887"
-  #                       }
-  #                     ],
-  #                     "target": {
-  #                       "kind": "inbody"
-  #                     }
-  #                   }')
-
-  #                 echo "Conversion request returned status code: $status_code"
-
-  #                 if [ "$status_code" -ne 200 ]; then
-  #                   echo "Conversion failed!"
-  #                   docker logs docling-serve-test-container
-  #                   docker rm -f docling-serve-test-container
-  #                   exit 1
-  #                 fi
-
-  #                 break
-  #               else
-  #                 echo "Waiting for service... [$i/25]"
-  #                 sleep 3
-  #               fi
-  #             done
-
-  #             if ! echo "$health_response" | grep -q '"status":"ok"'; then
-  #               echo "Service did not become healthy in time."
-  #               docker logs docling-serve-test-container
-  #               docker rm -f docling-serve-test-container
-  #               exit 1
-  #             fi
-
-  #             echo "Cleaning up test container..."
-  #             docker rm -f docling-serve-test-container
-  #           else
-  #             echo "Skipping non-released or non-CUDA image: $tag"
-  #           fi
-  #         done
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -34,6 +34,6 @@ repos:
        files: \.md$
  - repo: https://github.com/astral-sh/uv-pre-commit
    # uv version, https://github.com/astral-sh/uv-pre-commit/releases
-    rev: 0.8.3
+    rev: 0.8.19
    hooks:
      - id: uv-lock
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,97 @@
+## [v1.7.0](https://github.com/docling-project/docling-serve/releases/tag/v1.7.0) - 2025-10-17
+
+### Feature
+
+* **UI:** Add auto and orcmac options in demo UI ([#408](https://github.com/docling-project/docling-serve/issues/408)) ([`f5af71e`](https://github.com/docling-project/docling-serve/commit/f5af71e8f6de00d7dd702471a3eea2e94d882410))
+* Docling with auto-ocr ([#403](https://github.com/docling-project/docling-serve/issues/403)) ([`d95ea94`](https://github.com/docling-project/docling-serve/commit/d95ea940870af0d8df689061baa50f6026efce28))
+
+### Fix
+
+* Run docling ui behind a reverse proxy using a context path ([#396](https://github.com/docling-project/docling-serve/issues/396)) ([`5344505`](https://github.com/docling-project/docling-serve/commit/53445057184aa731ee7456b33b70bc0ecf82f2a6))
+
+### Docling libraries included in this release:
+- docling 2.57.0
+- docling-core 2.48.4
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.6.0
+- docling-mcp 1.3.2
+- docling-parse 4.5.0
+- docling-serve 1.7.0
+
+## [v1.6.0](https://github.com/docling-project/docling-serve/releases/tag/v1.6.0) - 2025-10-03
+
+### Feature
+
+* Pin new version of jobkit with granite-docling and connectors ([#391](https://github.com/docling-project/docling-serve/issues/391)) ([`0595d31`](https://github.com/docling-project/docling-serve/commit/0595d31d5b357553426215ca6771796a47e41324))
+
+### Fix
+
+* Update locked dependencies ([#392](https://github.com/docling-project/docling-serve/issues/392)) ([`45f0f3c`](https://github.com/docling-project/docling-serve/commit/45f0f3c8f95d418ac30e3744d27d02a63f9e4490))
+* **UI:** Allow both lowercase and uppercase extensions ([#386](https://github.com/docling-project/docling-serve/issues/386)) ([`8b22a39`](https://github.com/docling-project/docling-serve/commit/8b22a391418d22c1a4d706f880341f28702057b5))
+* Correctly raise HTTPException for Gateway Timeout ([#382](https://github.com/docling-project/docling-serve/issues/382)) ([`d4eac05`](https://github.com/docling-project/docling-serve/commit/d4eac053f9ce0a60f9070127335bdd56e193d7fa))
+* Pinning of higher version of dependencies to fix potential security issues ([#363](https://github.com/docling-project/docling-serve/issues/363)) ([`ba61af2`](https://github.com/docling-project/docling-serve/commit/ba61af23591eff200481aa2e532cf7d0701f0ea4))
+
+### Documentation
+
+* Fix docs for websocket breaking condition ([#390](https://github.com/docling-project/docling-serve/issues/390)) ([`f6b5f0e`](https://github.com/docling-project/docling-serve/commit/f6b5f0e06354d2db7d03d274b114499e3407dccf))
+
+### Docling libraries included in this release:
+- docling 2.55.1
+- docling-core 2.48.4
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.6.0
+- docling-mcp 1.3.2
+- docling-parse 4.5.0
+- docling-serve 1.6.0
+
+## [v1.5.1](https://github.com/docling-project/docling-serve/releases/tag/v1.5.1) - 2025-09-17
+
+### Fix
+
+* Remove old dependencies, fixes in docling-parse and more minor dependencies upgrade ([#362](https://github.com/docling-project/docling-serve/issues/362)) ([`513ae0c`](https://github.com/docling-project/docling-serve/commit/513ae0c119b66d3b17cf9a5d371a0f7971f43be7))
+* Updates rapidocr deps ([#361](https://github.com/docling-project/docling-serve/issues/361)) ([`bde0406`](https://github.com/docling-project/docling-serve/commit/bde040661fb65c67699326cd6281c0e6232e26f2))
+
+### Docling libraries included in this release:
+- docling 2.52.0
+- docling-core 2.48.1
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.5.0
+- docling-mcp 1.2.0
+- docling-parse 4.5.0
+- docling-serve 1.5.1
+
+## [v1.5.0](https://github.com/docling-project/docling-serve/releases/tag/v1.5.0) - 2025-09-09
+
+### Feature
+
+* Add chunking endpoints ([#353](https://github.com/docling-project/docling-serve/issues/353)) ([`9d6def0`](https://github.com/docling-project/docling-serve/commit/9d6def0ec8b1804ad31aa71defa17658d73d29a1))
+
+### Docling libraries included in this release:
+- docling 2.46.0
+- docling 2.51.0
+- docling-core 2.47.0
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.5.0
+- docling-mcp 1.2.0
+- docling-parse 4.4.0
+- docling-serve 1.5.0
+
+## [v1.4.1](https://github.com/docling-project/docling-serve/releases/tag/v1.4.1) - 2025-09-08
+
+### Fix
+
+* Trigger fix after ci fixes ([#355](https://github.com/docling-project/docling-serve/issues/355)) ([`b0360d7`](https://github.com/docling-project/docling-serve/commit/b0360d723bff202dcf44a25a3173ec1995945fc2))
+
+### Docling libraries included in this release:
+- docling 2.46.0
+- docling 2.51.0
+- docling-core 2.47.0
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.4.1
+- docling-mcp 1.2.0
+- docling-parse 4.4.0
+- docling-serve 1.4.1
+
 ## [v1.4.0](https://github.com/docling-project/docling-serve/releases/tag/v1.4.0) - 2025-09-05

 ### Feature
--- a/6
+++ b/6
@@ -1,6 +1,6 @@
 ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s

-ARG UV_VERSION=0.8.3
+ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.8.19

 ARG UV_SYNC_EXTRA_ARGS=""

@@ -25,7 +25,7 @@ RUN /usr/bin/fix-permissions /opt/app-root/src/.cache

 ENV TESSDATA_PREFIX=/usr/share/tesseract/tessdata/

-FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv_stage
+FROM ${UV_IMAGE} AS uv_stage

 ###################################################################################################
 # Docling layer                                                                                   #
@@ -58,7 +58,7 @@ RUN --mount=from=uv_stage,source=/uv,target=/bin/uv \
    uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-extra flash-attn && \
    FLASH_ATTENTION_SKIP_CUDA_BUILD=TRUE uv sync ${UV_SYNC_ARGS} ${UV_SYNC_EXTRA_ARGS} --no-build-isolation-package=flash-attn

-ARG MODELS_LIST="layout tableformer picture_classifier easyocr"
+ARG MODELS_LIST="layout tableformer picture_classifier rapidocr easyocr"

 RUN echo "Downloading models..." && \
    HF_HUB_DOWNLOAD_TIMEOUT="90" \
--- a/docling_serve/app.py
+++ b/docling_serve/app.py
@@ -35,12 +35,17 @@ from docling_jobkit.datamodel.callback import (
    ProgressCallbackRequest,
    ProgressCallbackResponse,
 )
+from docling_jobkit.datamodel.chunking import (
+    BaseChunkerOptions,
+    ChunkingExportOptions,
+    HierarchicalChunkerOptions,
+    HybridChunkerOptions,
+)
 from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
 from docling_jobkit.datamodel.s3_coords import S3Coordinates
-from docling_jobkit.datamodel.task import Task, TaskSource
+from docling_jobkit.datamodel.task import Task, TaskSource, TaskType
 from docling_jobkit.datamodel.task_targets import (
    InBodyTarget,
-    TaskTarget,
    ZipTarget,
 )
 from docling_jobkit.orchestrators.base_orchestrator import (
@@ -54,11 +59,15 @@ from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
 from docling_serve.datamodel.requests import (
    ConvertDocumentsRequest,
    FileSourceRequest,
+    GenericChunkDocumentsRequest,
    HttpSourceRequest,
    S3SourceRequest,
    TargetName,
+    TargetRequest,
+    make_request_model,
 )
 from docling_serve.datamodel.responses import (
+    ChunkDocumentResponse,
    ClearResponse,
    ConvertDocumentResponse,
    HealthCheckResponse,
@@ -185,16 +194,25 @@ def create_app():  # noqa: C901
            import gradio as gr

            from docling_serve.gradio_ui import ui as gradio_ui
+            from docling_serve.settings import uvicorn_settings

            tmp_output_dir = get_scratch() / "gradio"
            tmp_output_dir.mkdir(exist_ok=True, parents=True)
            gradio_ui.gradio_output_dir = tmp_output_dir
+
+            # Build the root_path for Gradio, accounting for UVICORN_ROOT_PATH
+            gradio_root_path = (
+                f"{uvicorn_settings.root_path}/ui"
+                if uvicorn_settings.root_path
+                else "/ui"
+            )
+
            app = gr.mount_gradio_app(
                app,
                gradio_ui,
                path="/ui",
                allowed_paths=["./logo.png", tmp_output_dir],
-                root_path="/ui",
+                root_path=gradio_root_path,
            )
        except ImportError:
            _log.warning(
@@ -249,10 +267,11 @@ def create_app():  # noqa: C901
    ########################

    async def _enque_source(
-        orchestrator: BaseOrchestrator, conversion_request: ConvertDocumentsRequest
+        orchestrator: BaseOrchestrator,
+        request: ConvertDocumentsRequest | GenericChunkDocumentsRequest,
    ) -> Task:
        sources: list[TaskSource] = []
-        for s in conversion_request.sources:
+        for s in request.sources:
            if isinstance(s, FileSourceRequest):
                sources.append(FileSource.model_validate(s))
            elif isinstance(s, HttpSourceRequest):
@@ -260,18 +279,41 @@ def create_app():  # noqa: C901
            elif isinstance(s, S3SourceRequest):
                sources.append(S3Coordinates.model_validate(s))

+        convert_options: ConvertDocumentsRequestOptions
+        chunking_options: BaseChunkerOptions | None = None
+        chunking_export_options = ChunkingExportOptions()
+        task_type: TaskType
+        if isinstance(request, ConvertDocumentsRequest):
+            task_type = TaskType.CONVERT
+            convert_options = request.options
+        elif isinstance(request, GenericChunkDocumentsRequest):
+            task_type = TaskType.CHUNK
+            convert_options = request.convert_options
+            chunking_options = request.chunking_options
+            chunking_export_options.include_converted_doc = (
+                request.include_converted_doc
+            )
+        else:
+            raise RuntimeError("Uknown request type.")
+
        task = await orchestrator.enqueue(
+            task_type=task_type,
            sources=sources,
-            options=conversion_request.options,
-            target=conversion_request.target,
+            convert_options=convert_options,
+            chunking_options=chunking_options,
+            chunking_export_options=chunking_export_options,
+            target=request.target,
        )
        return task

    async def _enque_file(
        orchestrator: BaseOrchestrator,
        files: list[UploadFile],
-        options: ConvertDocumentsRequestOptions,
-        target: TaskTarget,
+        task_type: TaskType,
+        convert_options: ConvertDocumentsRequestOptions,
+        chunking_options: BaseChunkerOptions | None,
+        chunking_export_options: ChunkingExportOptions | None,
+        target: TargetRequest,
    ) -> Task:
        _log.info(f"Received {len(files)} files for processing.")

@@ -284,7 +326,12 @@ def create_app():  # noqa: C901
            file_sources.append(DocumentStream(name=name, stream=buf))

        task = await orchestrator.enqueue(
-            sources=file_sources, options=options, target=target
+            task_type=task_type,
+            sources=file_sources,
+            convert_options=convert_options,
+            chunking_options=chunking_options,
+            chunking_export_options=chunking_export_options,
+            target=target,
        )
        return task

@@ -381,7 +428,7 @@ def create_app():  # noqa: C901
        response = RedirectResponse(url=logo_url)
        return response

-    @app.get("/health")
+    @app.get("/health", tags=["health"])
    def health() -> HealthCheckResponse:
        return HealthCheckResponse()

@@ -393,6 +440,7 @@ def create_app():  # noqa: C901
    # Convert a document from URL(s)
    @app.post(
        "/v1/convert/source",
+        tags=["convert"],
        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
@@ -408,7 +456,7 @@ def create_app():  # noqa: C901
        conversion_request: ConvertDocumentsRequest,
    ):
        task = await _enque_source(
-            orchestrator=orchestrator, conversion_request=conversion_request
+            orchestrator=orchestrator, request=conversion_request
        )
        completed = await _wait_task_complete(
            orchestrator=orchestrator, task_id=task.task_id
@@ -416,7 +464,7 @@ def create_app():  # noqa: C901

        if not completed:
            # TODO: abort task!
-            return HTTPException(
+            raise HTTPException(
                status_code=504,
                detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
            )
@@ -438,6 +486,7 @@ def create_app():  # noqa: C901
    # Convert a document from file(s)
    @app.post(
        "/v1/convert/file",
+        tags=["convert"],
        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
@@ -457,7 +506,13 @@ def create_app():  # noqa: C901
    ):
        target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
        task = await _enque_file(
-            orchestrator=orchestrator, files=files, options=options, target=target
+            task_type=TaskType.CONVERT,
+            orchestrator=orchestrator,
+            files=files,
+            convert_options=options,
+            chunking_options=None,
+            chunking_export_options=None,
+            target=target,
        )
        completed = await _wait_task_complete(
            orchestrator=orchestrator, task_id=task.task_id
@@ -465,7 +520,7 @@ def create_app():  # noqa: C901

        if not completed:
            # TODO: abort task!
-            return HTTPException(
+            raise HTTPException(
                status_code=504,
                detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
            )
@@ -487,6 +542,7 @@ def create_app():  # noqa: C901
    # Convert a document from URL(s) using the async api
    @app.post(
        "/v1/convert/source/async",
+        tags=["convert"],
        response_model=TaskStatusResponse,
    )
    async def process_url_async(
@@ -495,13 +551,14 @@ def create_app():  # noqa: C901
        conversion_request: ConvertDocumentsRequest,
    ):
        task = await _enque_source(
-            orchestrator=orchestrator, conversion_request=conversion_request
+            orchestrator=orchestrator, request=conversion_request
        )
        task_queue_position = await orchestrator.get_queue_position(
            task_id=task.task_id
        )
        return TaskStatusResponse(
            task_id=task.task_id,
+            task_type=task.task_type,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
@@ -510,6 +567,7 @@ def create_app():  # noqa: C901
    # Convert a document from file(s) using the async api
    @app.post(
        "/v1/convert/file/async",
+        tags=["convert"],
        response_model=TaskStatusResponse,
    )
    async def process_file_async(
@@ -524,21 +582,249 @@ def create_app():  # noqa: C901
    ):
        target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
        task = await _enque_file(
-            orchestrator=orchestrator, files=files, options=options, target=target
+            task_type=TaskType.CONVERT,
+            orchestrator=orchestrator,
+            files=files,
+            convert_options=options,
+            chunking_options=None,
+            chunking_export_options=None,
+            target=target,
        )
        task_queue_position = await orchestrator.get_queue_position(
            task_id=task.task_id
        )
        return TaskStatusResponse(
            task_id=task.task_id,
+            task_type=task.task_type,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
        )

+    # Chunking endpoints
+    for display_name, path_name, opt_cls in (
+        ("HybridChunker", "hybrid", HybridChunkerOptions),
+        ("HierarchicalChunker", "hierarchical", HierarchicalChunkerOptions),
+    ):
+        req_cls = make_request_model(opt_cls)
+
+        @app.post(
+            f"/v1/chunk/{path_name}/source/async",
+            name=f"Chunk sources with {display_name} as async task",
+            tags=["chunk"],
+            response_model=TaskStatusResponse,
+        )
+        async def chunk_source_async(
+            background_tasks: BackgroundTasks,
+            auth: Annotated[AuthenticationResult, Depends(require_auth)],
+            orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+            request: req_cls,
+        ):
+            task = await _enque_source(orchestrator=orchestrator, request=request)
+            task_queue_position = await orchestrator.get_queue_position(
+                task_id=task.task_id
+            )
+            return TaskStatusResponse(
+                task_id=task.task_id,
+                task_type=task.task_type,
+                task_status=task.task_status,
+                task_position=task_queue_position,
+                task_meta=task.processing_meta,
+            )
+
+        @app.post(
+            f"/v1/chunk/{path_name}/file/async",
+            name=f"Chunk files with {display_name} as async task",
+            tags=["chunk"],
+            response_model=TaskStatusResponse,
+        )
+        async def chunk_file_async(
+            background_tasks: BackgroundTasks,
+            auth: Annotated[AuthenticationResult, Depends(require_auth)],
+            orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+            files: list[UploadFile],
+            convert_options: Annotated[
+                ConvertDocumentsRequestOptions,
+                FormDepends(
+                    ConvertDocumentsRequestOptions,
+                    prefix="convert_",
+                    excluded_fields=[
+                        "to_formats",
+                    ],
+                ),
+            ],
+            chunking_options: Annotated[
+                opt_cls,
+                FormDepends(
+                    HybridChunkerOptions,
+                    prefix="chunking_",
+                    excluded_fields=["chunker"],
+                ),
+            ],
+            include_converted_doc: Annotated[
+                bool,
+                Form(
+                    description="If true, the output will include both the chunks and the converted document."
+                ),
+            ] = False,
+            target_type: Annotated[
+                TargetName,
+                Form(description="Specification for the type of output target."),
+            ] = TargetName.INBODY,
+        ):
+            target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
+            task = await _enque_file(
+                task_type=TaskType.CHUNK,
+                orchestrator=orchestrator,
+                files=files,
+                convert_options=convert_options,
+                chunking_options=chunking_options,
+                chunking_export_options=ChunkingExportOptions(
+                    include_converted_doc=include_converted_doc
+                ),
+                target=target,
+            )
+            task_queue_position = await orchestrator.get_queue_position(
+                task_id=task.task_id
+            )
+            return TaskStatusResponse(
+                task_id=task.task_id,
+                task_type=task.task_type,
+                task_status=task.task_status,
+                task_position=task_queue_position,
+                task_meta=task.processing_meta,
+            )
+
+        @app.post(
+            f"/v1/chunk/{path_name}/source",
+            name=f"Chunk sources with {display_name}",
+            tags=["chunk"],
+            response_model=ChunkDocumentResponse,
+            responses={
+                200: {
+                    "content": {"application/zip": {}},
+                    # "description": "Return the JSON item or an image.",
+                }
+            },
+        )
+        async def chunk_source(
+            background_tasks: BackgroundTasks,
+            auth: Annotated[AuthenticationResult, Depends(require_auth)],
+            orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+            request: req_cls,
+        ):
+            task = await _enque_source(orchestrator=orchestrator, request=request)
+            completed = await _wait_task_complete(
+                orchestrator=orchestrator, task_id=task.task_id
+            )
+
+            if not completed:
+                # TODO: abort task!
+                raise HTTPException(
+                    status_code=504,
+                    detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
+                )
+
+            task_result = await orchestrator.task_result(task_id=task.task_id)
+            if task_result is None:
+                raise HTTPException(
+                    status_code=404,
+                    detail="Task result not found. Please wait for a completion status.",
+                )
+            response = await prepare_response(
+                task_id=task.task_id,
+                task_result=task_result,
+                orchestrator=orchestrator,
+                background_tasks=background_tasks,
+            )
+            return response
+
+        @app.post(
+            f"/v1/chunk/{path_name}/file",
+            name=f"Chunk files with {display_name}",
+            tags=["chunk"],
+            response_model=ChunkDocumentResponse,
+            responses={
+                200: {
+                    "content": {"application/zip": {}},
+                }
+            },
+        )
+        async def chunk_file(
+            background_tasks: BackgroundTasks,
+            auth: Annotated[AuthenticationResult, Depends(require_auth)],
+            orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+            files: list[UploadFile],
+            convert_options: Annotated[
+                ConvertDocumentsRequestOptions,
+                FormDepends(
+                    ConvertDocumentsRequestOptions,
+                    prefix="convert_",
+                    excluded_fields=[
+                        "to_formats",
+                    ],
+                ),
+            ],
+            chunking_options: Annotated[
+                opt_cls,
+                FormDepends(
+                    HybridChunkerOptions,
+                    prefix="chunking_",
+                    excluded_fields=["chunker"],
+                ),
+            ],
+            include_converted_doc: Annotated[
+                bool,
+                Form(
+                    description="If true, the output will include both the chunks and the converted document."
+                ),
+            ] = False,
+            target_type: Annotated[
+                TargetName,
+                Form(description="Specification for the type of output target."),
+            ] = TargetName.INBODY,
+        ):
+            target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
+            task = await _enque_file(
+                task_type=TaskType.CHUNK,
+                orchestrator=orchestrator,
+                files=files,
+                convert_options=convert_options,
+                chunking_options=chunking_options,
+                chunking_export_options=ChunkingExportOptions(
+                    include_converted_doc=include_converted_doc
+                ),
+                target=target,
+            )
+            completed = await _wait_task_complete(
+                orchestrator=orchestrator, task_id=task.task_id
+            )
+
+            if not completed:
+                # TODO: abort task!
+                raise HTTPException(
+                    status_code=504,
+                    detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
+                )
+
+            task_result = await orchestrator.task_result(task_id=task.task_id)
+            if task_result is None:
+                raise HTTPException(
+                    status_code=404,
+                    detail="Task result not found. Please wait for a completion status.",
+                )
+            response = await prepare_response(
+                task_id=task.task_id,
+                task_result=task_result,
+                orchestrator=orchestrator,
+                background_tasks=background_tasks,
+            )
+            return response
+
    # Task status poll
    @app.get(
        "/v1/status/poll/{task_id}",
+        tags=["tasks"],
        response_model=TaskStatusResponse,
    )
    async def task_status_poll(
@@ -557,6 +843,7 @@ def create_app():  # noqa: C901
            raise HTTPException(status_code=404, detail="Task not found.")
        return TaskStatusResponse(
            task_id=task.task_id,
+            task_type=task.task_type,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
@@ -600,6 +887,7 @@ def create_app():  # noqa: C901
            task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
            task_response = TaskStatusResponse(
                task_id=task.task_id,
+                task_type=task.task_type,
                task_status=task.task_status,
                task_position=task_queue_position,
                task_meta=task.processing_meta,
@@ -615,6 +903,7 @@ def create_app():  # noqa: C901
                )
                task_response = TaskStatusResponse(
                    task_id=task.task_id,
+                    task_type=task.task_type,
                    task_status=task.task_status,
                    task_position=task_queue_position,
                    task_meta=task.processing_meta,
@@ -637,7 +926,10 @@ def create_app():  # noqa: C901
    # Task result
    @app.get(
        "/v1/result/{task_id}",
-        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
+        tags=["tasks"],
+        response_model=ConvertDocumentResponse
+        | PresignedUrlConvertDocumentResponse
+        | ChunkDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
@@ -670,6 +962,8 @@ def create_app():  # noqa: C901
    # Update task progress
    @app.post(
        "/v1/callback/task/progress",
+        tags=["internal"],
+        include_in_schema=False,
        response_model=ProgressCallbackResponse,
    )
    async def callback_task_progress(
@@ -692,6 +986,7 @@ def create_app():  # noqa: C901
    # Offload models
    @app.get(
        "/v1/clear/converters",
+        tags=["clear"],
        response_model=ClearResponse,
    )
    async def clear_converters(
@@ -704,6 +999,7 @@ def create_app():  # noqa: C901
    # Clean results
    @app.get(
        "/v1/clear/results",
+        tags=["clear"],
        response_model=ClearResponse,
    )
    async def clear_results(
--- a/docling_serve/datamodel/requests.py
+++ b/docling_serve/datamodel/requests.py
@@ -1,16 +1,20 @@
 import enum
-from typing import Annotated, Literal
+from functools import cache
+from typing import Annotated, Generic, Literal

 from pydantic import BaseModel, Field, model_validator
 from pydantic_core import PydanticCustomError
-from typing_extensions import Self
+from typing_extensions import Self, TypeVar

+from docling_jobkit.datamodel.chunking import (
+    BaseChunkerOptions,
+)
 from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
 from docling_jobkit.datamodel.s3_coords import S3Coordinates
 from docling_jobkit.datamodel.task_targets import (
    InBodyTarget,
+    PutTarget,
    S3Target,
-    TaskTarget,
    ZipTarget,
 )

@@ -43,12 +47,17 @@ SourceRequestItem = Annotated[
    FileSourceRequest | HttpSourceRequest | S3SourceRequest, Field(discriminator="kind")
 ]

+TargetRequest = Annotated[
+    InBodyTarget | ZipTarget | S3Target | PutTarget,
+    Field(discriminator="kind"),
+]
+

 ## Complete Source request
 class ConvertDocumentsRequest(BaseModel):
    options: ConvertDocumentsRequestOptions = ConvertDocumentsRequestOptions()
    sources: list[SourceRequestItem]
-    target: TaskTarget = InBodyTarget()
+    target: TargetRequest = InBodyTarget()

    @model_validator(mode="after")
    def validate_s3_source_and_target(self) -> Self:
@@ -70,3 +79,52 @@ class ConvertDocumentsRequest(BaseModel):
                "error target", 'target kind "s3" requires source kind "s3"'
            )
        return self
+
+
+## Source chunking requests
+
+
+class BaseChunkDocumentsRequest(BaseModel):
+    convert_options: Annotated[
+        ConvertDocumentsRequestOptions, Field(description="Conversion options.")
+    ] = ConvertDocumentsRequestOptions()
+    sources: Annotated[
+        list[SourceRequestItem],
+        Field(description="List of input document sources to process."),
+    ]
+    include_converted_doc: Annotated[
+        bool,
+        Field(
+            description="If true, the output will include both the chunks and the converted document."
+        ),
+    ] = False
+    target: Annotated[
+        TargetRequest, Field(description="Specification for the type of output target.")
+    ] = InBodyTarget()
+
+
+ChunkingOptT = TypeVar("ChunkingOptT", bound=BaseChunkerOptions)
+
+
+class GenericChunkDocumentsRequest(BaseChunkDocumentsRequest, Generic[ChunkingOptT]):
+    chunking_options: ChunkingOptT
+
+
+@cache
+def make_request_model(
+    opt_type: type[ChunkingOptT],
+) -> type[GenericChunkDocumentsRequest[ChunkingOptT]]:
+    """
+    Dynamically create (and cache) a subclass of GenericChunkDocumentsRequest[opt_type]
+    with chunking_options having a default factory.
+    """
+    return type(
+        f"{opt_type.__name__}DocumentsRequest",
+        (GenericChunkDocumentsRequest[opt_type],),  # type: ignore[valid-type]
+        {
+            "__annotations__": {"chunking_options": opt_type},
+            "chunking_options": Field(
+                default_factory=opt_type, description="Options specific to the chunker."
+            ),
+        },
+    )
--- a/docling_serve/datamodel/responses.py
+++ b/docling_serve/datamodel/responses.py
@@ -5,8 +5,12 @@ from pydantic import BaseModel

 from docling.datamodel.document import ConversionStatus, ErrorItem
 from docling.utils.profiling import ProfilingItem
-from docling_jobkit.datamodel.result import ExportDocumentResponse
-from docling_jobkit.datamodel.task_meta import TaskProcessingMeta
+from docling_jobkit.datamodel.result import (
+    ChunkedDocumentResultItem,
+    ExportDocumentResponse,
+    ExportResult,
+)
+from docling_jobkit.datamodel.task_meta import TaskProcessingMeta, TaskType


 # Status
@@ -37,8 +41,15 @@ class ConvertDocumentErrorResponse(BaseModel):
    status: ConversionStatus


+class ChunkDocumentResponse(BaseModel):
+    chunks: list[ChunkedDocumentResultItem]
+    documents: list[ExportResult]
+    processing_time: float
+
+
 class TaskStatusResponse(BaseModel):
    task_id: str
+    task_type: TaskType
    task_status: str
    task_position: Optional[int] = None
    task_meta: Optional[TaskProcessingMeta] = None
--- a/docling_serve/gradio_ui.py
+++ b/docling_serve/gradio_ui.py
@@ -4,6 +4,7 @@ import itertools
 import json
 import logging
 import ssl
+import sys
 import tempfile
 import time
 from pathlib import Path
@@ -224,13 +225,17 @@ def auto_set_return_as_file(

 def change_ocr_lang(ocr_engine):
    if ocr_engine == "easyocr":
-        return "en,fr,de,es"
+        return gr.update(visible=True, value="en,fr,de,es")
    elif ocr_engine == "tesseract_cli":
-        return "eng,fra,deu,spa"
+        return gr.update(visible=True, value="eng,fra,deu,spa")
    elif ocr_engine == "tesseract":
-        return "eng,fra,deu,spa"
+        return gr.update(visible=True, value="eng,fra,deu,spa")
    elif ocr_engine == "rapidocr":
-        return "english,chinese"
+        return gr.update(visible=True, value="english,chinese")
+    elif ocr_engine == "ocrmac":
+        return gr.update(visible=True, value="fr-FR,de-DE,es-ES,en-US")
+
+    return gr.update(visible=False, value="")


 def wait_task_finish(auth: str, task_id: str, return_as_file: bool):
@@ -570,14 +575,17 @@ with gr.Blocks(
    with gr.Tab("Convert File"):
        with gr.Row():
            with gr.Column(scale=4):
+                raw_exts = itertools.chain.from_iterable(FormatToExtensions.values())
                file_input = gr.File(
                    elem_id="file_input_zone",
                    label="Upload File",
                    file_types=[
-                        f".{v}"
-                        for v in itertools.chain.from_iterable(
-                            FormatToExtensions.values()
-                        )
+                        f".{v.lower()}"
+                        for v in raw_exts  # lowercase
+                    ]
+                    + [
+                        f".{v.upper()}"
+                        for v in raw_exts  # uppercase
                    ],
                    file_count="multiple",
                    scale=4,
@@ -633,18 +641,25 @@ with gr.Blocks(
                ocr = gr.Checkbox(label="Enable OCR", value=True)
                force_ocr = gr.Checkbox(label="Force OCR", value=False)
            with gr.Column(scale=1):
+                engines_list = [
+                    ("Auto", "auto"),
+                    ("EasyOCR", "easyocr"),
+                    ("Tesseract", "tesseract"),
+                    ("RapidOCR", "rapidocr"),
+                ]
+                if sys.platform == "darwin":
+                    engines_list.append(("OCRMac", "ocrmac"))
+
                ocr_engine = gr.Radio(
-                    [
-                        ("EasyOCR", "easyocr"),
-                        ("Tesseract", "tesseract"),
-                        ("RapidOCR", "rapidocr"),
-                    ],
+                    engines_list,
                    label="OCR Engine",
-                    value="easyocr",
+                    value="auto",
                )
            with gr.Column(scale=1, min_width=200):
                ocr_lang = gr.Textbox(
-                    label="OCR Language (beware of the format)", value="en,fr,de,es"
+                    label="OCR Language (beware of the format)",
+                    value="en,fr,de,es",
+                    visible=False,
                )
            ocr_engine.change(change_ocr_lang, inputs=[ocr_engine], outputs=[ocr_lang])
        with gr.Row():
--- a/docling_serve/helper_functions.py
+++ b/docling_serve/helper_functions.py
@@ -29,10 +29,15 @@ def is_pydantic_model(type_):

 # Adapted from
 # https://github.com/fastapi/fastapi/discussions/8971#discussioncomment-7892972
-def FormDepends(cls: type[BaseModel]):
+def FormDepends(
+    cls: type[BaseModel], prefix: str = "", excluded_fields: list[str] = []
+):
    new_parameters = []

    for field_name, model_field in cls.model_fields.items():
+        if field_name in excluded_fields:
+            continue
+
        annotation = model_field.annotation
        description = model_field.description
        default = (
@@ -63,7 +68,7 @@ def FormDepends(cls: type[BaseModel]):

        new_parameters.append(
            inspect.Parameter(
-                name=field_name,
+                name=f"{prefix}{field_name}",
                kind=inspect.Parameter.POSITIONAL_ONLY,
                default=default,
                annotation=annotation,
@@ -71,19 +76,23 @@ def FormDepends(cls: type[BaseModel]):
        )

    async def as_form_func(**data):
+        newdata = {}
        for field_name, model_field in cls.model_fields.items():
-            value = data.get(field_name)
+            if field_name in excluded_fields:
+                continue
+            value = data.get(f"{prefix}{field_name}")
+            newdata[field_name] = value
            annotation = model_field.annotation

            # Parse nested models from JSON string
            if value is not None and is_pydantic_model(annotation):
                try:
                    validator = TypeAdapter(annotation)
-                    data[field_name] = validator.validate_json(value)
+                    newdata[field_name] = validator.validate_json(value)
                except Exception as e:
                    raise ValueError(f"Invalid JSON for field '{field_name}': {e}")

-        return cls(**data)
+        return cls(**newdata)

    sig = inspect.signature(as_form_func)
    sig = sig.replace(parameters=new_parameters)
--- a/docling_serve/response_preparation.py
+++ b/docling_serve/response_preparation.py
@@ -4,7 +4,8 @@ import logging
 from fastapi import BackgroundTasks, Response

 from docling_jobkit.datamodel.result import (
-    ConvertDocumentResult,
+    ChunkedDocumentResult,
+    DoclingTaskResult,
    ExportResult,
    RemoteTargetResult,
    ZipArchiveResult,
@@ -14,6 +15,7 @@ from docling_jobkit.orchestrators.base_orchestrator import (
 )

 from docling_serve.datamodel.responses import (
+    ChunkDocumentResponse,
    ConvertDocumentResponse,
    PresignedUrlConvertDocumentResponse,
 )
@@ -24,11 +26,16 @@ _log = logging.getLogger(__name__)

 async def prepare_response(
    task_id: str,
-    task_result: ConvertDocumentResult,
+    task_result: DoclingTaskResult,
    orchestrator: BaseOrchestrator,
    background_tasks: BackgroundTasks,
 ):
-    response: Response | ConvertDocumentResponse | PresignedUrlConvertDocumentResponse
+    response: (
+        Response
+        | ConvertDocumentResponse
+        | PresignedUrlConvertDocumentResponse
+        | ChunkDocumentResponse
+    )
    if isinstance(task_result.result, ExportResult):
        response = ConvertDocumentResponse(
            document=task_result.result.content,
@@ -52,6 +59,12 @@ async def prepare_response(
            num_succeeded=task_result.num_succeeded,
            num_failed=task_result.num_failed,
        )
+    elif isinstance(task_result.result, ChunkedDocumentResult):
+        response = ChunkDocumentResponse(
+            chunks=task_result.result.chunks,
+            documents=task_result.result.documents,
+            processing_time=task_result.processing_time,
+        )
    else:
        raise ValueError("Unknown result type")

--- a/docling_serve/websocket_notifier.py
+++ b/docling_serve/websocket_notifier.py
@@ -34,6 +34,7 @@ class WebsocketNotifier(BaseNotifier):
        task_queue_position = await self.orchestrator.get_queue_position(task_id)
        msg = TaskStatusResponse(
            task_id=task.task_id,
+            task_type=task.task_type,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -433,7 +433,7 @@ with connect(uri) as websocket:
            payload = json.loads(message)
            if payload["message"] == "error":
                break
-            if payload["message"] == "error" and payload["task"]["task_status"] in ("success", "failure"):
+            if payload["message"] == "update" and payload["task"]["task_status"] in ("success", "failure"):
                break
        except:
          break
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "docling-serve"
-version = "1.4.0"  # DO NOT EDIT, updated automatically
+version = "1.7.0"  # DO NOT EDIT, updated automatically
 description = "Running Docling as a service"
 license = {text = "MIT"}
 authors = [
@@ -35,7 +35,7 @@ requires-python = ">=3.10"
 dependencies = [
    "docling~=2.38",
    "docling-core>=2.45.0",
-    "docling-jobkit[kfp,rq,vlm]>=1.4.0,<2.0.0",
+    "docling-jobkit[kfp,rq,vlm]>=1.6.0,<2.0.0",
    "fastapi[standard]~=0.115",
    "httpx~=0.28",
    "pydantic~=2.10",
@@ -50,15 +50,17 @@ dependencies = [

 [project.optional-dependencies]
 ui = [
-    "gradio~=5.9",
-    "pydantic<2.11.0",  # fix compatibility between gradio and new pydantic 2.11
+    "gradio>=5.23.2,<6.0.0",
 ]
 tesserocr = [
    "tesserocr~=2.7"
 ]
+easyocr = [
+    "easyocr>=1.7",
+]
 rapidocr = [
-    "rapidocr-onnxruntime~=1.4; python_version<'3.13'",
-    "onnxruntime~=1.7",
+    "rapidocr (>=3.3,<4.0.0) ; python_version < '3.14'",
+    "onnxruntime (>=1.7.0,<2.0.0)",
 ]
 flash-attn = [
  "flash-attn~=2.8.2; sys_platform == 'linux' and platform_machine == 'x86_64'"
@@ -87,10 +89,10 @@ cpu = [
  "torchvision>=0.22.1",
 ]

-cu124 = [
-  "torch>=2.6.0",
-  "torchvision>=0.21.0",
-]
+# cu124 = [
+#   "torch>=2.6.0",
+#   "torchvision>=0.21.0",
+# ]

 cu126 = [
  "torch>=2.7.1",
@@ -115,7 +117,7 @@ conflicts = [
  [
    { group = "pypi" },
    { group = "cpu" },
-    { group = "cu124" },
+    # { group = "cu124" },
    { group = "cu126" },
    { group = "cu128" },
    { group = "rocm" },
@@ -123,14 +125,15 @@ conflicts = [
 ]
 environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
 override-dependencies = [
-  "urllib3~=2.0"
+  "urllib3~=2.0",
+  "xgrammar>=0.1.24"
 ]

 [tool.uv.sources]
 torch = [
  { index = "pytorch-pypi", group = "pypi" },
  { index = "pytorch-cpu", group = "cpu" },
-  { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
+  # { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
@@ -139,7 +142,7 @@ torch = [
 torchvision = [
  { index = "pytorch-pypi", group = "pypi" },
  { index = "pytorch-cpu", group = "cpu" },
-  { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
+  # { index = "pytorch-cu124", group = "cu124", marker = "sys_platform == 'linux'" },
  { index = "pytorch-cu126", group = "cu126", marker = "sys_platform == 'linux'" },
  { index = "pytorch-cu128", group = "cu128", marker = "sys_platform == 'linux'" },
  { index = "pytorch-rocm", group = "rocm", marker = "sys_platform == 'linux'" },
@@ -162,10 +165,10 @@ name = "pytorch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
 explicit = true

-[[tool.uv.index]]
-name = "pytorch-cu124"
-url = "https://download.pytorch.org/whl/cu124"
-explicit = true
+# [[tool.uv.index]]
+# name = "pytorch-cu124"
+# url = "https://download.pytorch.org/whl/cu124"
+# explicit = true

 [[tool.uv.index]]
 name = "pytorch-cu126"
@@ -279,6 +282,7 @@ module = [
    "kfp.*",
    "kfp_server_api.*",
    "mlx_vlm.*",
+    "mlx.*",
    "scalar_fastapi.*",
 ]
 ignore_missing_imports = true
--- a/tests/test_1-url-async.py
+++ b/tests/test_1-url-async.py
@@ -62,3 +62,60 @@ async def test_convert_url(async_client):
        time.sleep(2)

    assert task["task_status"] == "success"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("include_converted_doc", [False, True])
+async def test_chunk_url(async_client, include_converted_doc: bool):
+    """Test chunk URL"""
+
+    example_docs = [
+        "https://arxiv.org/pdf/2311.18481",
+    ]
+
+    base_url = "http://localhost:5001/v1"
+    payload = {
+        "sources": [{"kind": "http", "url": random.choice(example_docs)}],
+        "include_converted_doc": include_converted_doc,
+    }
+
+    response = await async_client.post(
+        f"{base_url}/chunk/hybrid/source/async", json=payload
+    )
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    task = response.json()
+
+    print(json.dumps(task, indent=2))
+
+    while task["task_status"] not in ("success", "failure"):
+        response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
+        assert response.status_code == 200, "Response should be 200 OK"
+        task = response.json()
+        print(f"{task['task_status']=}")
+        print(f"{task['task_position']=}")
+
+        time.sleep(2)
+
+    assert task["task_status"] == "success"
+
+    result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
+    assert result_resp.status_code == 200, "Response should be 200 OK"
+    result = result_resp.json()
+    print("Got result.")
+
+    assert "chunks" in result
+    assert len(result["chunks"]) > 0
+
+    assert "documents" in result
+    assert len(result["documents"]) > 0
+    assert result["documents"][0]["status"] == "success"
+
+    if include_converted_doc:
+        assert result["documents"][0]["content"]["json_content"] is not None
+        assert (
+            result["documents"][0]["content"]["json_content"]["schema_name"]
+            == "DoclingDocument"
+        )
+    else:
+        assert result["documents"][0]["content"]["json_content"] is None
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
github-actions[bot]	b6eece7ef0	chore: bump version to 1.7.0 [skip ci]	2025-10-17 12:16:37 +00:00
Michele Dolfi	f5af71e8f6	feat(UI): add auto and orcmac options in demo UI (#408 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-17 12:23:57 +02:00
Michele Dolfi	d95ea94087	feat: Docling with auto-ocr (#403 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-15 21:15:29 +02:00
sahlex	5344505718	fix: run docling ui behind a reverse proxy using a context path (#396 ) Signed-off-by: Sahler.Alexander <Alexander.Sahler@m-net.de> Signed-off-by: sahlex <1122279+sahlex@users.noreply.github.com> Co-authored-by: Sahler.Alexander <Alexander.Sahler@m-net.de>	2025-10-09 16:07:02 +02:00
github-actions[bot]	5edc624fbf	chore: bump version to 1.6.0 [skip ci]	2025-10-03 13:39:59 +00:00
Michele Dolfi	45f0f3c8f9	fix: update locked dependencies (#392 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-03 15:33:45 +02:00
Michele Dolfi	0595d31d5b	feat: pin new version of jobkit with granite-docling and connectors (#391 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-03 14:24:51 +02:00
Michele Dolfi	f6b5f0e063	docs: fix docs for websocket breaking condition (#390 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-10-02 10:55:00 +02:00
Michele Dolfi	8b22a39141	fix(UI): allow both lowercase and uppercase extensions (#386 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-29 09:40:49 +02:00
erikmargaronis	d4eac053f9	fix: Correctly raise HTTPException for Gateway Timeout (#382 ) Signed-off-by: Erik Margaronis <erik.margaronis@gmail.com>	2025-09-29 08:06:21 +02:00
Rui Dias Gomes	fa1c5f04f3	ci: improve caching steps (#371 ) Signed-off-by: rmdg88 <rmdg88@gmail.com>	2025-09-23 18:15:12 +02:00
Viktor Kuropiatnyk	ba61af2359	fix: Pinning of higher version of dependencies to fix potential security issues (#363 ) Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>	2025-09-18 08:57:41 +02:00
github-actions[bot]	6b6dd8a0d0	chore: bump version to 1.5.1 [skip ci]	2025-09-17 13:45:40 +00:00
Michele Dolfi	513ae0c119	fix: remove old dependencies, fixes in docling-parse and more minor dependencies upgrade (#362 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-17 15:36:23 +02:00
Rui Dias Gomes	bde040661f	fix: updates rapidocr deps (#361 ) Signed-off-by: rmdg88 <rmdg88@gmail.com>	2025-09-16 14:00:21 +02:00
github-actions[bot]	496f7ec26b	chore: bump version to 1.5.0 [skip ci]	2025-09-09 08:46:36 +00:00
Michele Dolfi	9d6def0ec8	feat: add chunking endpoints (#353 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-09 08:38:54 +02:00
github-actions[bot]	a4fed2d965	chore: bump version to 1.4.1 [skip ci]	2025-09-08 10:28:12 +00:00
Michele Dolfi	b0360d723b	fix: trigger fix after ci fixes (#355 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-08 12:23:07 +02:00
Michele Dolfi	4adc0dfa79	ci: fix use simple tag for testing (#354 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-08 11:29:55 +02:00