chore: bump version to 1.5.0 [skip ci]

feat: add chunking endpoints (#353 )
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-29 08:33:50 +00:00 · 2025-09-09 08:46:36 +00:00 · 2025-09-09 08:38:54 +02:00 · 2025-09-08 10:28:12 +00:00 · 2025-09-08 12:23:07 +02:00 · 2025-09-08 11:29:55 +02:00
11 changed files with 533 additions and 259 deletions
--- a/.github/workflows/job-image.yml
+++ b/.github/workflows/job-image.yml
@@ -117,8 +117,8 @@ jobs:
        with:
          context: .
          push: false
-          load: true # == '--output=type=docker'
-          tags: ${{ steps.ghcr_meta.outputs.tags }}-test
+          load: true
+          tags: ${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}:${{ github.sha }}-test
          labels: |
            org.opencontainers.image.title=docling-serve
            org.opencontainers.image.test=true
@@ -133,7 +133,7 @@ jobs:
        run: |
          set -e

-          IMAGE_TAG="${{ steps.ghcr_meta.outputs.tags }}-test"
+          IMAGE_TAG="${{ env.GHCR_REGISTRY }}/${{ inputs.ghcr_image_name }}:${{ github.sha }}-test"
          echo "Testing local image: $IMAGE_TAG"

          # Remove existing container if any
@@ -230,198 +230,3 @@ jobs:
      - name: Remove Local Docker Images
        run: |
          docker image prune -af
-##
-## Extra tests for released images
-##
-
-    # outputs:
-    #   image-tags: ${{ steps.ghcr_meta.outputs.tags }}
-    #   image-labels: ${{ steps.ghcr_meta.outputs.labels }}
-
-  # test-cpu-image:
-  #   needs:
-  #     - image
-  #   runs-on: ubuntu-latest
-  #   permissions:
-  #     contents: read
-  #     packages: read
-
-  #   steps:
-  #     - name: Checkout code
-  #       uses: actions/checkout@v5
-
-  #     - name: Test CPU images
-  #       run: |
-  #         set -e
-
-  #         echo "Testing image: ${{ needs.image.outputs.image-tags }}"
-
-  #         for tag in ${{ needs.image.outputs.image-tags }}; do
-  #           if echo "$tag" | grep -q -- '-cpu' && echo "$tag" | grep -qE ':[vV][0-9]+(\.[0-9]+){0,2}$'; then
-  #             echo "Testing CPU image: $tag"
-
-  #             # Remove existing container if any
-  #             docker rm -f docling-serve-test-container 2>/dev/null || true
-
-  #             echo "Pulling image..."
-  #             docker pull "$tag"
-
-  #             echo "Waiting 5s after pull..."
-  #             sleep 5
-
-  #             echo "Starting container..."
-  #             docker run -d -p 5001:5001 --name docling-serve-test-container "$tag"
-
-  #             echo "Waiting 15s for container to boot..."
-  #             sleep 15
-
-  #             echo "Checking service health..."
-  #             for i in {1..20}; do
-  #               health_response=$(curl -s http://localhost:5001/health || true)
-  #               echo "Health check response [$i]: $health_response"
-  #               if echo "$health_response" | grep -q '"status":"ok"'; then
-  #                 echo "Service is healthy!"
-  #                 echo "Sending test conversion request..."
-
-  #                 status_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST 'http://localhost:5001/v1/convert/source' \
-  #                   -H 'accept: application/json' \
-  #                   -H 'Content-Type: application/json' \
-  #                   -d '{
-  #                     "options": {
-  #                       "from_formats": ["pdf"],
-  #                       "to_formats": ["md"]
-  #                     },
-  #                     "sources": [
-  #                       {
-  #                         "kind": "http",
-  #                         "url": "https://arxiv.org/pdf/2501.17887"
-  #                       }
-  #                     ],
-  #                     "target": {
-  #                       "kind": "inbody"
-  #                     }
-  #                   }')
-
-  #                 echo "Conversion request returned status code: $status_code"
-
-  #                 if [ "$status_code" -ne 200 ]; then
-  #                   echo "Conversion failed!"
-  #                   docker logs docling-serve-test-container
-  #                   docker rm -f docling-serve-test-container
-  #                   exit 1
-  #                 fi
-
-  #                 break
-  #               else
-  #                 echo "Waiting for service... [$i/20]"
-  #                 sleep 3
-  #               fi
-  #             done
-
-  #             if ! echo "$health_response" | grep -q '"status":"ok"'; then
-  #               echo "Service did not become healthy in time."
-  #               docker logs docling-serve-test-container
-  #               docker rm -f docling-serve-test-container
-  #               exit 1
-  #             fi
-
-  #             echo "Cleaning up test container..."
-  #             docker rm -f docling-serve-test-container
-  #           else
-  #             echo "Skipping non-released or non-CPU image: $tag"
-  #           fi
-  #         done
-
-  # test-cuda-image:
-  #   needs:
-  #     - image
-  #   runs-on: ubuntu-latest # >> placeholder for GPU runner << #
-  #   permissions:
-  #     contents: read
-  #     packages: read
-
-  #   steps:
-  #     - name: Checkout code
-  #       uses: actions/checkout@v5
-
-  #     - name: Test CUDA images
-  #       run: |
-  #         set -e
-
-  #         echo "Testing image: ${{ needs.image.outputs.image-tags }}"
-
-  #         for tag in ${{ needs.image.outputs.image-tags }}; do
-  #           if echo "$tag" | grep -qE -- '-cu[0-9]+' && echo "$tag" | grep -qE ':[vV][0-9]+(\.[0-9]+){0,2}$'; then
-  #             echo "Testing CUDA image: $tag"
-
-  #             # Remove existing container if any
-  #             docker rm -f docling-serve-test-container 2>/dev/null || true
-
-  #             echo "Pulling image..."
-  #             docker pull "$tag"
-
-  #             echo "Waiting 5s after pull..."
-  #             sleep 5
-
-  #             echo "Starting container..."
-  #             docker run -d -p 5001:5001 --gpus all --name docling-serve-test-container "$tag"
-
-  #             echo "Waiting 15s for container to boot..."
-  #             sleep 15
-
-  #             echo "Checking service health..."
-  #             for i in {1..25}; do
-  #               health_response=$(curl -s http://localhost:5001/health || true)
-  #               echo "Health check response [$i]: $health_response"
-  #               if echo "$health_response" | grep -q '"status":"ok"'; then
-  #                 echo "Service is healthy!"
-  #                 echo "Sending test conversion request..."
-
-  #                 status_code=$(curl -s -o /dev/null -w "%{http_code}" -X POST 'http://localhost:5001/v1/convert/source' \
-  #                   -H 'accept: application/json' \
-  #                   -H 'Content-Type: application/json' \
-  #                   -d '{
-  #                     "options": {
-  #                       "from_formats": ["pdf"],
-  #                       "to_formats": ["md"]
-  #                     },
-  #                     "sources": [
-  #                       {
-  #                         "kind": "http",
-  #                         "url": "https://arxiv.org/pdf/2501.17887"
-  #                       }
-  #                     ],
-  #                     "target": {
-  #                       "kind": "inbody"
-  #                     }
-  #                   }')
-
-  #                 echo "Conversion request returned status code: $status_code"
-
-  #                 if [ "$status_code" -ne 200 ]; then
-  #                   echo "Conversion failed!"
-  #                   docker logs docling-serve-test-container
-  #                   docker rm -f docling-serve-test-container
-  #                   exit 1
-  #                 fi
-
-  #                 break
-  #               else
-  #                 echo "Waiting for service... [$i/25]"
-  #                 sleep 3
-  #               fi
-  #             done
-
-  #             if ! echo "$health_response" | grep -q '"status":"ok"'; then
-  #               echo "Service did not become healthy in time."
-  #               docker logs docling-serve-test-container
-  #               docker rm -f docling-serve-test-container
-  #               exit 1
-  #             fi
-
-  #             echo "Cleaning up test container..."
-  #             docker rm -f docling-serve-test-container
-  #           else
-  #             echo "Skipping non-released or non-CUDA image: $tag"
-  #           fi
-  #         done
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,35 @@
+## [v1.5.0](https://github.com/docling-project/docling-serve/releases/tag/v1.5.0) - 2025-09-09
+
+### Feature
+
+* Add chunking endpoints ([#353](https://github.com/docling-project/docling-serve/issues/353)) ([`9d6def0`](https://github.com/docling-project/docling-serve/commit/9d6def0ec8b1804ad31aa71defa17658d73d29a1))
+
+### Docling libraries included in this release:
+- docling 2.46.0
+- docling 2.51.0
+- docling-core 2.47.0
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.5.0
+- docling-mcp 1.2.0
+- docling-parse 4.4.0
+- docling-serve 1.5.0
+
+## [v1.4.1](https://github.com/docling-project/docling-serve/releases/tag/v1.4.1) - 2025-09-08
+
+### Fix
+
+* Trigger fix after ci fixes ([#355](https://github.com/docling-project/docling-serve/issues/355)) ([`b0360d7`](https://github.com/docling-project/docling-serve/commit/b0360d723bff202dcf44a25a3173ec1995945fc2))
+
+### Docling libraries included in this release:
+- docling 2.46.0
+- docling 2.51.0
+- docling-core 2.47.0
+- docling-ibm-models 3.9.1
+- docling-jobkit 1.4.1
+- docling-mcp 1.2.0
+- docling-parse 4.4.0
+- docling-serve 1.4.1
+
 ## [v1.4.0](https://github.com/docling-project/docling-serve/releases/tag/v1.4.0) - 2025-09-05

 ### Feature
--- a/docling_serve/app.py
+++ b/docling_serve/app.py
@@ -35,9 +35,15 @@ from docling_jobkit.datamodel.callback import (
    ProgressCallbackRequest,
    ProgressCallbackResponse,
 )
+from docling_jobkit.datamodel.chunking import (
+    BaseChunkerOptions,
+    ChunkingExportOptions,
+    HierarchicalChunkerOptions,
+    HybridChunkerOptions,
+)
 from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
 from docling_jobkit.datamodel.s3_coords import S3Coordinates
-from docling_jobkit.datamodel.task import Task, TaskSource
+from docling_jobkit.datamodel.task import Task, TaskSource, TaskType
 from docling_jobkit.datamodel.task_targets import (
    InBodyTarget,
    TaskTarget,
@@ -54,11 +60,14 @@ from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
 from docling_serve.datamodel.requests import (
    ConvertDocumentsRequest,
    FileSourceRequest,
+    GenericChunkDocumentsRequest,
    HttpSourceRequest,
    S3SourceRequest,
    TargetName,
+    make_request_model,
 )
 from docling_serve.datamodel.responses import (
+    ChunkDocumentResponse,
    ClearResponse,
    ConvertDocumentResponse,
    HealthCheckResponse,
@@ -249,10 +258,11 @@ def create_app():  # noqa: C901
    ########################

    async def _enque_source(
-        orchestrator: BaseOrchestrator, conversion_request: ConvertDocumentsRequest
+        orchestrator: BaseOrchestrator,
+        request: ConvertDocumentsRequest | GenericChunkDocumentsRequest,
    ) -> Task:
        sources: list[TaskSource] = []
-        for s in conversion_request.sources:
+        for s in request.sources:
            if isinstance(s, FileSourceRequest):
                sources.append(FileSource.model_validate(s))
            elif isinstance(s, HttpSourceRequest):
@@ -260,17 +270,40 @@ def create_app():  # noqa: C901
            elif isinstance(s, S3SourceRequest):
                sources.append(S3Coordinates.model_validate(s))

+        convert_options: ConvertDocumentsRequestOptions
+        chunking_options: BaseChunkerOptions | None = None
+        chunking_export_options = ChunkingExportOptions()
+        task_type: TaskType
+        if isinstance(request, ConvertDocumentsRequest):
+            task_type = TaskType.CONVERT
+            convert_options = request.options
+        elif isinstance(request, GenericChunkDocumentsRequest):
+            task_type = TaskType.CHUNK
+            convert_options = request.convert_options
+            chunking_options = request.chunking_options
+            chunking_export_options.include_converted_doc = (
+                request.include_converted_doc
+            )
+        else:
+            raise RuntimeError("Uknown request type.")
+
        task = await orchestrator.enqueue(
+            task_type=task_type,
            sources=sources,
-            options=conversion_request.options,
-            target=conversion_request.target,
+            convert_options=convert_options,
+            chunking_options=chunking_options,
+            chunking_export_options=chunking_export_options,
+            target=request.target,
        )
        return task

    async def _enque_file(
        orchestrator: BaseOrchestrator,
        files: list[UploadFile],
-        options: ConvertDocumentsRequestOptions,
+        task_type: TaskType,
+        convert_options: ConvertDocumentsRequestOptions,
+        chunking_options: BaseChunkerOptions | None,
+        chunking_export_options: ChunkingExportOptions | None,
        target: TaskTarget,
    ) -> Task:
        _log.info(f"Received {len(files)} files for processing.")
@@ -284,7 +317,12 @@ def create_app():  # noqa: C901
            file_sources.append(DocumentStream(name=name, stream=buf))

        task = await orchestrator.enqueue(
-            sources=file_sources, options=options, target=target
+            task_type=task_type,
+            sources=file_sources,
+            convert_options=convert_options,
+            chunking_options=chunking_options,
+            chunking_export_options=chunking_export_options,
+            target=target,
        )
        return task

@@ -381,7 +419,7 @@ def create_app():  # noqa: C901
        response = RedirectResponse(url=logo_url)
        return response

-    @app.get("/health")
+    @app.get("/health", tags=["health"])
    def health() -> HealthCheckResponse:
        return HealthCheckResponse()

@@ -393,6 +431,7 @@ def create_app():  # noqa: C901
    # Convert a document from URL(s)
    @app.post(
        "/v1/convert/source",
+        tags=["convert"],
        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
@@ -408,7 +447,7 @@ def create_app():  # noqa: C901
        conversion_request: ConvertDocumentsRequest,
    ):
        task = await _enque_source(
-            orchestrator=orchestrator, conversion_request=conversion_request
+            orchestrator=orchestrator, request=conversion_request
        )
        completed = await _wait_task_complete(
            orchestrator=orchestrator, task_id=task.task_id
@@ -438,6 +477,7 @@ def create_app():  # noqa: C901
    # Convert a document from file(s)
    @app.post(
        "/v1/convert/file",
+        tags=["convert"],
        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
        responses={
            200: {
@@ -457,7 +497,13 @@ def create_app():  # noqa: C901
    ):
        target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
        task = await _enque_file(
-            orchestrator=orchestrator, files=files, options=options, target=target
+            task_type=TaskType.CONVERT,
+            orchestrator=orchestrator,
+            files=files,
+            convert_options=options,
+            chunking_options=None,
+            chunking_export_options=None,
+            target=target,
        )
        completed = await _wait_task_complete(
            orchestrator=orchestrator, task_id=task.task_id
@@ -487,6 +533,7 @@ def create_app():  # noqa: C901
    # Convert a document from URL(s) using the async api
    @app.post(
        "/v1/convert/source/async",
+        tags=["convert"],
        response_model=TaskStatusResponse,
    )
    async def process_url_async(
@@ -495,13 +542,14 @@ def create_app():  # noqa: C901
        conversion_request: ConvertDocumentsRequest,
    ):
        task = await _enque_source(
-            orchestrator=orchestrator, conversion_request=conversion_request
+            orchestrator=orchestrator, request=conversion_request
        )
        task_queue_position = await orchestrator.get_queue_position(
            task_id=task.task_id
        )
        return TaskStatusResponse(
            task_id=task.task_id,
+            task_type=task.task_type,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
@@ -510,6 +558,7 @@ def create_app():  # noqa: C901
    # Convert a document from file(s) using the async api
    @app.post(
        "/v1/convert/file/async",
+        tags=["convert"],
        response_model=TaskStatusResponse,
    )
    async def process_file_async(
@@ -524,21 +573,249 @@ def create_app():  # noqa: C901
    ):
        target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
        task = await _enque_file(
-            orchestrator=orchestrator, files=files, options=options, target=target
+            task_type=TaskType.CONVERT,
+            orchestrator=orchestrator,
+            files=files,
+            convert_options=options,
+            chunking_options=None,
+            chunking_export_options=None,
+            target=target,
        )
        task_queue_position = await orchestrator.get_queue_position(
            task_id=task.task_id
        )
        return TaskStatusResponse(
            task_id=task.task_id,
+            task_type=task.task_type,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
        )

+    # Chunking endpoints
+    for display_name, path_name, opt_cls in (
+        ("HybridChunker", "hybrid", HybridChunkerOptions),
+        ("HierarchicalChunker", "hierarchical", HierarchicalChunkerOptions),
+    ):
+        req_cls = make_request_model(opt_cls)
+
+        @app.post(
+            f"/v1/chunk/{path_name}/source/async",
+            name=f"Chunk sources with {display_name} as async task",
+            tags=["chunk"],
+            response_model=TaskStatusResponse,
+        )
+        async def chunk_source_async(
+            background_tasks: BackgroundTasks,
+            auth: Annotated[AuthenticationResult, Depends(require_auth)],
+            orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+            request: req_cls,
+        ):
+            task = await _enque_source(orchestrator=orchestrator, request=request)
+            task_queue_position = await orchestrator.get_queue_position(
+                task_id=task.task_id
+            )
+            return TaskStatusResponse(
+                task_id=task.task_id,
+                task_type=task.task_type,
+                task_status=task.task_status,
+                task_position=task_queue_position,
+                task_meta=task.processing_meta,
+            )
+
+        @app.post(
+            f"/v1/chunk/{path_name}/file/async",
+            name=f"Chunk files with {display_name} as async task",
+            tags=["chunk"],
+            response_model=TaskStatusResponse,
+        )
+        async def chunk_file_async(
+            background_tasks: BackgroundTasks,
+            auth: Annotated[AuthenticationResult, Depends(require_auth)],
+            orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+            files: list[UploadFile],
+            convert_options: Annotated[
+                ConvertDocumentsRequestOptions,
+                FormDepends(
+                    ConvertDocumentsRequestOptions,
+                    prefix="convert_",
+                    excluded_fields=[
+                        "to_formats",
+                    ],
+                ),
+            ],
+            chunking_options: Annotated[
+                opt_cls,
+                FormDepends(
+                    HybridChunkerOptions,
+                    prefix="chunking_",
+                    excluded_fields=["chunker"],
+                ),
+            ],
+            include_converted_doc: Annotated[
+                bool,
+                Form(
+                    description="If true, the output will include both the chunks and the converted document."
+                ),
+            ] = False,
+            target_type: Annotated[
+                TargetName,
+                Form(description="Specification for the type of output target."),
+            ] = TargetName.INBODY,
+        ):
+            target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
+            task = await _enque_file(
+                task_type=TaskType.CHUNK,
+                orchestrator=orchestrator,
+                files=files,
+                convert_options=convert_options,
+                chunking_options=chunking_options,
+                chunking_export_options=ChunkingExportOptions(
+                    include_converted_doc=include_converted_doc
+                ),
+                target=target,
+            )
+            task_queue_position = await orchestrator.get_queue_position(
+                task_id=task.task_id
+            )
+            return TaskStatusResponse(
+                task_id=task.task_id,
+                task_type=task.task_type,
+                task_status=task.task_status,
+                task_position=task_queue_position,
+                task_meta=task.processing_meta,
+            )
+
+        @app.post(
+            f"/v1/chunk/{path_name}/source",
+            name=f"Chunk sources with {display_name}",
+            tags=["chunk"],
+            response_model=ChunkDocumentResponse,
+            responses={
+                200: {
+                    "content": {"application/zip": {}},
+                    # "description": "Return the JSON item or an image.",
+                }
+            },
+        )
+        async def chunk_source(
+            background_tasks: BackgroundTasks,
+            auth: Annotated[AuthenticationResult, Depends(require_auth)],
+            orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+            request: req_cls,
+        ):
+            task = await _enque_source(orchestrator=orchestrator, request=request)
+            completed = await _wait_task_complete(
+                orchestrator=orchestrator, task_id=task.task_id
+            )
+
+            if not completed:
+                # TODO: abort task!
+                return HTTPException(
+                    status_code=504,
+                    detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
+                )
+
+            task_result = await orchestrator.task_result(task_id=task.task_id)
+            if task_result is None:
+                raise HTTPException(
+                    status_code=404,
+                    detail="Task result not found. Please wait for a completion status.",
+                )
+            response = await prepare_response(
+                task_id=task.task_id,
+                task_result=task_result,
+                orchestrator=orchestrator,
+                background_tasks=background_tasks,
+            )
+            return response
+
+        @app.post(
+            f"/v1/chunk/{path_name}/file",
+            name=f"Chunk files with {display_name}",
+            tags=["chunk"],
+            response_model=ChunkDocumentResponse,
+            responses={
+                200: {
+                    "content": {"application/zip": {}},
+                }
+            },
+        )
+        async def chunk_file(
+            background_tasks: BackgroundTasks,
+            auth: Annotated[AuthenticationResult, Depends(require_auth)],
+            orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
+            files: list[UploadFile],
+            convert_options: Annotated[
+                ConvertDocumentsRequestOptions,
+                FormDepends(
+                    ConvertDocumentsRequestOptions,
+                    prefix="convert_",
+                    excluded_fields=[
+                        "to_formats",
+                    ],
+                ),
+            ],
+            chunking_options: Annotated[
+                opt_cls,
+                FormDepends(
+                    HybridChunkerOptions,
+                    prefix="chunking_",
+                    excluded_fields=["chunker"],
+                ),
+            ],
+            include_converted_doc: Annotated[
+                bool,
+                Form(
+                    description="If true, the output will include both the chunks and the converted document."
+                ),
+            ] = False,
+            target_type: Annotated[
+                TargetName,
+                Form(description="Specification for the type of output target."),
+            ] = TargetName.INBODY,
+        ):
+            target = InBodyTarget() if target_type == TargetName.INBODY else ZipTarget()
+            task = await _enque_file(
+                task_type=TaskType.CHUNK,
+                orchestrator=orchestrator,
+                files=files,
+                convert_options=convert_options,
+                chunking_options=chunking_options,
+                chunking_export_options=ChunkingExportOptions(
+                    include_converted_doc=include_converted_doc
+                ),
+                target=target,
+            )
+            completed = await _wait_task_complete(
+                orchestrator=orchestrator, task_id=task.task_id
+            )
+
+            if not completed:
+                # TODO: abort task!
+                return HTTPException(
+                    status_code=504,
+                    detail=f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={docling_serve_settings.max_sync_wait}.",
+                )
+
+            task_result = await orchestrator.task_result(task_id=task.task_id)
+            if task_result is None:
+                raise HTTPException(
+                    status_code=404,
+                    detail="Task result not found. Please wait for a completion status.",
+                )
+            response = await prepare_response(
+                task_id=task.task_id,
+                task_result=task_result,
+                orchestrator=orchestrator,
+                background_tasks=background_tasks,
+            )
+            return response
+
    # Task status poll
    @app.get(
        "/v1/status/poll/{task_id}",
+        tags=["tasks"],
        response_model=TaskStatusResponse,
    )
    async def task_status_poll(
@@ -557,6 +834,7 @@ def create_app():  # noqa: C901
            raise HTTPException(status_code=404, detail="Task not found.")
        return TaskStatusResponse(
            task_id=task.task_id,
+            task_type=task.task_type,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
@@ -600,6 +878,7 @@ def create_app():  # noqa: C901
            task_queue_position = await orchestrator.get_queue_position(task_id=task_id)
            task_response = TaskStatusResponse(
                task_id=task.task_id,
+                task_type=task.task_type,
                task_status=task.task_status,
                task_position=task_queue_position,
                task_meta=task.processing_meta,
@@ -615,6 +894,7 @@ def create_app():  # noqa: C901
                )
                task_response = TaskStatusResponse(
                    task_id=task.task_id,
+                    task_type=task.task_type,
                    task_status=task.task_status,
                    task_position=task_queue_position,
                    task_meta=task.processing_meta,
@@ -637,7 +917,10 @@ def create_app():  # noqa: C901
    # Task result
    @app.get(
        "/v1/result/{task_id}",
-        response_model=ConvertDocumentResponse | PresignedUrlConvertDocumentResponse,
+        tags=["tasks"],
+        response_model=ConvertDocumentResponse
+        | PresignedUrlConvertDocumentResponse
+        | ChunkDocumentResponse,
        responses={
            200: {
                "content": {"application/zip": {}},
@@ -670,6 +953,8 @@ def create_app():  # noqa: C901
    # Update task progress
    @app.post(
        "/v1/callback/task/progress",
+        tags=["internal"],
+        include_in_schema=False,
        response_model=ProgressCallbackResponse,
    )
    async def callback_task_progress(
@@ -692,6 +977,7 @@ def create_app():  # noqa: C901
    # Offload models
    @app.get(
        "/v1/clear/converters",
+        tags=["clear"],
        response_model=ClearResponse,
    )
    async def clear_converters(
@@ -704,6 +990,7 @@ def create_app():  # noqa: C901
    # Clean results
    @app.get(
        "/v1/clear/results",
+        tags=["clear"],
        response_model=ClearResponse,
    )
    async def clear_results(
--- a/docling_serve/datamodel/requests.py
+++ b/docling_serve/datamodel/requests.py
@@ -1,10 +1,14 @@
 import enum
-from typing import Annotated, Literal
+from functools import cache
+from typing import Annotated, Generic, Literal

 from pydantic import BaseModel, Field, model_validator
 from pydantic_core import PydanticCustomError
-from typing_extensions import Self
+from typing_extensions import Self, TypeVar

+from docling_jobkit.datamodel.chunking import (
+    BaseChunkerOptions,
+)
 from docling_jobkit.datamodel.http_inputs import FileSource, HttpSource
 from docling_jobkit.datamodel.s3_coords import S3Coordinates
 from docling_jobkit.datamodel.task_targets import (
@@ -70,3 +74,52 @@ class ConvertDocumentsRequest(BaseModel):
                "error target", 'target kind "s3" requires source kind "s3"'
            )
        return self
+
+
+## Source chunking requests
+
+
+class BaseChunkDocumentsRequest(BaseModel):
+    convert_options: Annotated[
+        ConvertDocumentsRequestOptions, Field(description="Conversion options.")
+    ] = ConvertDocumentsRequestOptions()
+    sources: Annotated[
+        list[SourceRequestItem],
+        Field(description="List of input document sources to process."),
+    ]
+    include_converted_doc: Annotated[
+        bool,
+        Field(
+            description="If true, the output will include both the chunks and the converted document."
+        ),
+    ] = False
+    target: Annotated[
+        TaskTarget, Field(description="Specification for the type of output target.")
+    ] = InBodyTarget()
+
+
+ChunkingOptT = TypeVar("ChunkingOptT", bound=BaseChunkerOptions)
+
+
+class GenericChunkDocumentsRequest(BaseChunkDocumentsRequest, Generic[ChunkingOptT]):
+    chunking_options: ChunkingOptT
+
+
+@cache
+def make_request_model(
+    opt_type: type[ChunkingOptT],
+) -> type[GenericChunkDocumentsRequest[ChunkingOptT]]:
+    """
+    Dynamically create (and cache) a subclass of GenericChunkDocumentsRequest[opt_type]
+    with chunking_options having a default factory.
+    """
+    return type(
+        f"{opt_type.__name__}DocumentsRequest",
+        (GenericChunkDocumentsRequest[opt_type],),  # type: ignore[valid-type]
+        {
+            "__annotations__": {"chunking_options": opt_type},
+            "chunking_options": Field(
+                default_factory=opt_type, description="Options specific to the chunker."
+            ),
+        },
+    )
--- a/docling_serve/datamodel/responses.py
+++ b/docling_serve/datamodel/responses.py
@@ -5,8 +5,12 @@ from pydantic import BaseModel

 from docling.datamodel.document import ConversionStatus, ErrorItem
 from docling.utils.profiling import ProfilingItem
-from docling_jobkit.datamodel.result import ExportDocumentResponse
-from docling_jobkit.datamodel.task_meta import TaskProcessingMeta
+from docling_jobkit.datamodel.result import (
+    ChunkedDocumentResultItem,
+    ExportDocumentResponse,
+    ExportResult,
+)
+from docling_jobkit.datamodel.task_meta import TaskProcessingMeta, TaskType


 # Status
@@ -37,8 +41,15 @@ class ConvertDocumentErrorResponse(BaseModel):
    status: ConversionStatus


+class ChunkDocumentResponse(BaseModel):
+    chunks: list[ChunkedDocumentResultItem]
+    documents: list[ExportResult]
+    processing_time: float
+
+
 class TaskStatusResponse(BaseModel):
    task_id: str
+    task_type: TaskType
    task_status: str
    task_position: Optional[int] = None
    task_meta: Optional[TaskProcessingMeta] = None
--- a/docling_serve/helper_functions.py
+++ b/docling_serve/helper_functions.py
@@ -29,10 +29,15 @@ def is_pydantic_model(type_):

 # Adapted from
 # https://github.com/fastapi/fastapi/discussions/8971#discussioncomment-7892972
-def FormDepends(cls: type[BaseModel]):
+def FormDepends(
+    cls: type[BaseModel], prefix: str = "", excluded_fields: list[str] = []
+):
    new_parameters = []

    for field_name, model_field in cls.model_fields.items():
+        if field_name in excluded_fields:
+            continue
+
        annotation = model_field.annotation
        description = model_field.description
        default = (
@@ -63,7 +68,7 @@ def FormDepends(cls: type[BaseModel]):

        new_parameters.append(
            inspect.Parameter(
-                name=field_name,
+                name=f"{prefix}{field_name}",
                kind=inspect.Parameter.POSITIONAL_ONLY,
                default=default,
                annotation=annotation,
@@ -71,19 +76,23 @@ def FormDepends(cls: type[BaseModel]):
        )

    async def as_form_func(**data):
+        newdata = {}
        for field_name, model_field in cls.model_fields.items():
-            value = data.get(field_name)
+            if field_name in excluded_fields:
+                continue
+            value = data.get(f"{prefix}{field_name}")
+            newdata[field_name] = value
            annotation = model_field.annotation

            # Parse nested models from JSON string
            if value is not None and is_pydantic_model(annotation):
                try:
                    validator = TypeAdapter(annotation)
-                    data[field_name] = validator.validate_json(value)
+                    newdata[field_name] = validator.validate_json(value)
                except Exception as e:
                    raise ValueError(f"Invalid JSON for field '{field_name}': {e}")

-        return cls(**data)
+        return cls(**newdata)

    sig = inspect.signature(as_form_func)
    sig = sig.replace(parameters=new_parameters)
--- a/docling_serve/response_preparation.py
+++ b/docling_serve/response_preparation.py
@@ -4,7 +4,8 @@ import logging
 from fastapi import BackgroundTasks, Response

 from docling_jobkit.datamodel.result import (
-    ConvertDocumentResult,
+    ChunkedDocumentResult,
+    DoclingTaskResult,
    ExportResult,
    RemoteTargetResult,
    ZipArchiveResult,
@@ -14,6 +15,7 @@ from docling_jobkit.orchestrators.base_orchestrator import (
 )

 from docling_serve.datamodel.responses import (
+    ChunkDocumentResponse,
    ConvertDocumentResponse,
    PresignedUrlConvertDocumentResponse,
 )
@@ -24,11 +26,16 @@ _log = logging.getLogger(__name__)

 async def prepare_response(
    task_id: str,
-    task_result: ConvertDocumentResult,
+    task_result: DoclingTaskResult,
    orchestrator: BaseOrchestrator,
    background_tasks: BackgroundTasks,
 ):
-    response: Response | ConvertDocumentResponse | PresignedUrlConvertDocumentResponse
+    response: (
+        Response
+        | ConvertDocumentResponse
+        | PresignedUrlConvertDocumentResponse
+        | ChunkDocumentResponse
+    )
    if isinstance(task_result.result, ExportResult):
        response = ConvertDocumentResponse(
            document=task_result.result.content,
@@ -52,6 +59,12 @@ async def prepare_response(
            num_succeeded=task_result.num_succeeded,
            num_failed=task_result.num_failed,
        )
+    elif isinstance(task_result.result, ChunkedDocumentResult):
+        response = ChunkDocumentResponse(
+            chunks=task_result.result.chunks,
+            documents=task_result.result.documents,
+            processing_time=task_result.processing_time,
+        )
    else:
        raise ValueError("Unknown result type")

--- a/docling_serve/websocket_notifier.py
+++ b/docling_serve/websocket_notifier.py
@@ -34,6 +34,7 @@ class WebsocketNotifier(BaseNotifier):
        task_queue_position = await self.orchestrator.get_queue_position(task_id)
        msg = TaskStatusResponse(
            task_id=task.task_id,
+            task_type=task.task_type,
            task_status=task.task_status,
            task_position=task_queue_position,
            task_meta=task.processing_meta,
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "docling-serve"
-version = "1.4.0"  # DO NOT EDIT, updated automatically
+version = "1.5.0"  # DO NOT EDIT, updated automatically
 description = "Running Docling as a service"
 license = {text = "MIT"}
 authors = [
@@ -35,7 +35,7 @@ requires-python = ">=3.10"
 dependencies = [
    "docling~=2.38",
    "docling-core>=2.45.0",
-    "docling-jobkit[kfp,rq,vlm]>=1.4.0,<2.0.0",
+    "docling-jobkit[kfp,rq,vlm]>=1.5.0,<2.0.0",
    "fastapi[standard]~=0.115",
    "httpx~=0.28",
    "pydantic~=2.10",
@@ -279,6 +279,7 @@ module = [
    "kfp.*",
    "kfp_server_api.*",
    "mlx_vlm.*",
+    "mlx.*",
    "scalar_fastapi.*",
 ]
 ignore_missing_imports = true
--- a/tests/test_1-url-async.py
+++ b/tests/test_1-url-async.py
@@ -62,3 +62,60 @@ async def test_convert_url(async_client):
        time.sleep(2)

    assert task["task_status"] == "success"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("include_converted_doc", [False, True])
+async def test_chunk_url(async_client, include_converted_doc: bool):
+    """Test chunk URL"""
+
+    example_docs = [
+        "https://arxiv.org/pdf/2311.18481",
+    ]
+
+    base_url = "http://localhost:5001/v1"
+    payload = {
+        "sources": [{"kind": "http", "url": random.choice(example_docs)}],
+        "include_converted_doc": include_converted_doc,
+    }
+
+    response = await async_client.post(
+        f"{base_url}/chunk/hybrid/source/async", json=payload
+    )
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    task = response.json()
+
+    print(json.dumps(task, indent=2))
+
+    while task["task_status"] not in ("success", "failure"):
+        response = await async_client.get(f"{base_url}/status/poll/{task['task_id']}")
+        assert response.status_code == 200, "Response should be 200 OK"
+        task = response.json()
+        print(f"{task['task_status']=}")
+        print(f"{task['task_position']=}")
+
+        time.sleep(2)
+
+    assert task["task_status"] == "success"
+
+    result_resp = await async_client.get(f"{base_url}/result/{task['task_id']}")
+    assert result_resp.status_code == 200, "Response should be 200 OK"
+    result = result_resp.json()
+    print("Got result.")
+
+    assert "chunks" in result
+    assert len(result["chunks"]) > 0
+
+    assert "documents" in result
+    assert len(result["documents"]) > 0
+    assert result["documents"][0]["status"] == "success"
+
+    if include_converted_doc:
+        assert result["documents"][0]["content"]["json_content"] is not None
+        assert (
+            result["documents"][0]["content"]["json_content"]["schema_name"]
+            == "DoclingDocument"
+        )
+    else:
+        assert result["documents"][0]["content"]["json_content"] is None
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
github-actions[bot]	496f7ec26b	chore: bump version to 1.5.0 [skip ci]	2025-09-09 08:46:36 +00:00
Michele Dolfi	9d6def0ec8	feat: add chunking endpoints (#353 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-09 08:38:54 +02:00
github-actions[bot]	a4fed2d965	chore: bump version to 1.4.1 [skip ci]	2025-09-08 10:28:12 +00:00
Michele Dolfi	b0360d723b	fix: trigger fix after ci fixes (#355 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-08 12:23:07 +02:00
Michele Dolfi	4adc0dfa79	ci: fix use simple tag for testing (#354 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-09-08 11:29:55 +02:00