chore: bump version to 0.9.0 [skip ci]

fix: produce image artifacts in referenced mode (#151 )
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-29 16:43:24 +00:00 · 2025-04-25 07:56:40 +00:00 · 2025-04-24 17:33:36 +02:00 · 2025-04-24 14:42:06 +02:00 · 2025-04-24 13:49:44 +02:00 · 2025-04-24 10:25:29 +02:00
28 changed files with 1816 additions and 807 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,18 @@
+## [v0.9.0](https://github.com/docling-project/docling-serve/releases/tag/v0.9.0) - 2025-04-25
+
+### Feature
+
+* Expose picture description options ([#148](https://github.com/docling-project/docling-serve/issues/148)) ([`4c9571a`](https://github.com/docling-project/docling-serve/commit/4c9571a052d5ec0044e49225bc5615e13cdb0a56))
+* Add parameters for Kubeflow pipeline engine (WIP) ([#107](https://github.com/docling-project/docling-serve/issues/107)) ([`26bef5b`](https://github.com/docling-project/docling-serve/commit/26bef5bec060f0afd8d358816b68c3f2c0dd4bc2))
+
+### Fix
+
+* Produce image artifacts in referenced mode ([#151](https://github.com/docling-project/docling-serve/issues/151)) ([`71c5fae`](https://github.com/docling-project/docling-serve/commit/71c5fae505366459fd481d2ecdabc5ebed94d49c))
+
+### Documentation
+
+* Vlm and picture description options ([#149](https://github.com/docling-project/docling-serve/issues/149)) ([`91956cb`](https://github.com/docling-project/docling-serve/commit/91956cbf4e91cf82bb4d54ace397cdbbfaf594ba))
+
 ## [v0.8.0](https://github.com/docling-project/docling-serve/releases/tag/v0.8.0) - 2025-04-22

 ### Feature
--- a/docling_serve/app.py
+++ b/docling_serve/app.py
@@ -28,6 +28,10 @@ from fastapi.staticfiles import StaticFiles

 from docling.datamodel.base_models import DocumentStream

+from docling_serve.datamodel.callback import (
+    ProgressCallbackRequest,
+    ProgressCallbackResponse,
+)
 from docling_serve.datamodel.convert import ConvertDocumentsOptions
 from docling_serve.datamodel.requests import (
    ConvertDocumentFileSourcesRequest,
@@ -45,11 +49,12 @@ from docling_serve.docling_conversion import (
    get_converter,
    get_pdf_pipeline_opts,
 )
-from docling_serve.engines import get_orchestrator
-from docling_serve.engines.async_local.orchestrator import (
-    AsyncLocalOrchestrator,
-    TaskNotFoundError,
+from docling_serve.engines.async_orchestrator import (
+    BaseAsyncOrchestrator,
+    ProgressInvalid,
 )
+from docling_serve.engines.async_orchestrator_factory import get_async_orchestrator
+from docling_serve.engines.base_orchestrator import TaskNotFoundError
 from docling_serve.helper_functions import FormDepends
 from docling_serve.response_preparation import process_results
 from docling_serve.settings import docling_serve_settings
@@ -94,7 +99,7 @@ async def lifespan(app: FastAPI):
    pdf_format_option = get_pdf_pipeline_opts(ConvertDocumentsOptions())
    get_converter(pdf_format_option)

-    orchestrator = get_orchestrator()
+    orchestrator = get_async_orchestrator()

    # Start the background queue processor
    queue_task = asyncio.create_task(orchestrator.process_queue())
@@ -308,7 +313,7 @@ def create_app():  # noqa: C901
        response_model=TaskStatusResponse,
    )
    async def process_url_async(
-        orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
+        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
        conversion_request: ConvertDocumentsRequest,
    ):
        task = await orchestrator.enqueue(request=conversion_request)
@@ -319,6 +324,7 @@ def create_app():  # noqa: C901
            task_id=task.task_id,
            task_status=task.task_status,
            task_position=task_queue_position,
+            task_meta=task.processing_meta,
        )

    # Task status poll
@@ -327,7 +333,7 @@ def create_app():  # noqa: C901
        response_model=TaskStatusResponse,
    )
    async def task_status_poll(
-        orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
+        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
        task_id: str,
        wait: Annotated[
            float, Query(help="Number of seconds to wait for a completed status.")
@@ -342,6 +348,7 @@ def create_app():  # noqa: C901
            task_id=task.task_id,
            task_status=task.task_status,
            task_position=task_queue_position,
+            task_meta=task.processing_meta,
        )

    # Task status websocket
@@ -350,7 +357,7 @@ def create_app():  # noqa: C901
    )
    async def task_status_ws(
        websocket: WebSocket,
-        orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
+        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
        task_id: str,
    ):
        await websocket.accept()
@@ -375,6 +382,7 @@ def create_app():  # noqa: C901
                task_id=task.task_id,
                task_status=task.task_status,
                task_position=task_queue_position,
+                task_meta=task.processing_meta,
            )
            await websocket.send_text(
                WebsocketMessage(
@@ -389,6 +397,7 @@ def create_app():  # noqa: C901
                    task_id=task.task_id,
                    task_status=task.task_status,
                    task_position=task_queue_position,
+                    task_meta=task.processing_meta,
                )
                await websocket.send_text(
                    WebsocketMessage(
@@ -416,7 +425,7 @@ def create_app():  # noqa: C901
        },
    )
    async def task_result(
-        orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
+        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
        task_id: str,
    ):
        result = await orchestrator.task_result(task_id=task_id)
@@ -427,4 +436,23 @@ def create_app():  # noqa: C901
            )
        return result

+    # Update task progress
+    @app.post(
+        "/v1alpha/callback/task/progress",
+        response_model=ProgressCallbackResponse,
+    )
+    async def callback_task_progress(
+        orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
+        request: ProgressCallbackRequest,
+    ):
+        try:
+            await orchestrator.receive_task_progress(request=request)
+            return ProgressCallbackResponse(status="ack")
+        except TaskNotFoundError:
+            raise HTTPException(status_code=404, detail="Task not found.")
+        except ProgressInvalid as err:
+            raise HTTPException(
+                status_code=400, detail=f"Invalid progress payload: {err}"
+            )
+
    return app
--- a/docling_serve/datamodel/callback.py
+++ b/docling_serve/datamodel/callback.py
@@ -0,0 +1,50 @@
+import enum
+from typing import Annotated, Literal
+
+from pydantic import BaseModel, Field
+
+
+class ProgressKind(str, enum.Enum):
+    SET_NUM_DOCS = "set_num_docs"
+    UPDATE_PROCESSED = "update_processed"
+
+
+class BaseProgress(BaseModel):
+    kind: ProgressKind
+
+
+class ProgressSetNumDocs(BaseProgress):
+    kind: Literal[ProgressKind.SET_NUM_DOCS] = ProgressKind.SET_NUM_DOCS
+
+    num_docs: int
+
+
+class SucceededDocsItem(BaseModel):
+    source: str
+
+
+class FailedDocsItem(BaseModel):
+    source: str
+    error: str
+
+
+class ProgressUpdateProcessed(BaseProgress):
+    kind: Literal[ProgressKind.UPDATE_PROCESSED] = ProgressKind.UPDATE_PROCESSED
+
+    num_processed: int
+    num_succeeded: int
+    num_failed: int
+
+    docs_succeeded: list[SucceededDocsItem]
+    docs_failed: list[FailedDocsItem]
+
+
+class ProgressCallbackRequest(BaseModel):
+    task_id: str
+    progress: Annotated[
+        ProgressSetNumDocs | ProgressUpdateProcessed, Field(discriminator="kind")
+    ]
+
+
+class ProgressCallbackResponse(BaseModel):
+    status: Literal["ack"] = "ack"
--- a/docling_serve/datamodel/convert.py
+++ b/docling_serve/datamodel/convert.py
@@ -1,7 +1,8 @@
 # Define the input options for the API
-from typing import Annotated, Optional
+from typing import Annotated, Any, Optional

-from pydantic import BaseModel, Field
+from pydantic import AnyUrl, BaseModel, Field, model_validator
+from typing_extensions import Self

 from docling.datamodel.base_models import InputFormat, OutputFormat
 from docling.datamodel.pipeline_options import (
@@ -26,6 +27,89 @@ ocr_factory = get_ocr_factory(
 ocr_engines_enum = ocr_factory.get_enum()


+class PictureDescriptionLocal(BaseModel):
+    repo_id: Annotated[
+        str,
+        Field(
+            description="Repository id from the Hugging Face Hub.",
+            examples=[
+                "HuggingFaceTB/SmolVLM-256M-Instruct",
+                "ibm-granite/granite-vision-3.2-2b",
+            ],
+        ),
+    ]
+    prompt: Annotated[
+        str,
+        Field(
+            description="Prompt used when calling the vision-language model.",
+            examples=[
+                "Describe this image in a few sentences.",
+                "This is a figure from a document. Provide a detailed description of it.",
+            ],
+        ),
+    ] = "Describe this image in a few sentences."
+    generation_config: Annotated[
+        dict[str, Any],
+        Field(
+            description="Config from https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig",
+            examples=[{"max_new_tokens": 200, "do_sample": False}],
+        ),
+    ] = {"max_new_tokens": 200, "do_sample": False}
+
+
+class PictureDescriptionApi(BaseModel):
+    url: Annotated[
+        AnyUrl,
+        Field(
+            description="Endpoint which accepts openai-api compatible requests.",
+            examples=[
+                AnyUrl(
+                    "http://localhost:8000/v1/chat/completions"
+                ),  # example of a local vllm api
+                AnyUrl(
+                    "http://localhost:11434/v1/chat/completions"
+                ),  # example of ollama
+            ],
+        ),
+    ]
+    headers: Annotated[
+        dict[str, str],
+        Field(
+            description="Headers used for calling the API endpoint. For example, it could include authentication headers."
+        ),
+    ] = {}
+    params: Annotated[
+        dict[str, Any],
+        Field(
+            description="Model parameters.",
+            examples=[
+                {  # on vllm
+                    "model": "HuggingFaceTB/SmolVLM-256M-Instruct",
+                    "max_completion_tokens": 200,
+                },
+                {  # on vllm
+                    "model": "ibm-granite/granite-vision-3.2-2b",
+                    "max_completion_tokens": 200,
+                },
+                {  # on ollama
+                    "model": "granite3.2-vision:2b"
+                },
+            ],
+        ),
+    ] = {}
+    timeout: Annotated[float, Field(description="Timeout for the API request.")] = 20
+    prompt: Annotated[
+        str,
+        Field(
+            description="Prompt used when calling the vision-language model.",
+            examples=[
+                "Describe this image in a few sentences.",
+                "This is a figures from a document. Provide a detailed description of it.",
+            ],
+        ),
+    ] = "Describe this image in a few sentences."
+
+
 class ConvertDocumentsOptions(BaseModel):
    from_formats: Annotated[
        list[InputFormat],
@@ -226,7 +310,7 @@ class ConvertDocumentsOptions(BaseModel):
        bool,
        Field(
            description=(
-                "If enabled, perform formula OCR, return Latex code. "
+                "If enabled, perform formula OCR, return LaTeX code. "
                "Boolean. Optional, defaults to false."
            ),
            examples=[False],
@@ -254,3 +338,30 @@ class ConvertDocumentsOptions(BaseModel):
            examples=[False],
        ),
    ] = False
+
+    picture_description_local: Annotated[
+        Optional[PictureDescriptionLocal],
+        Field(
+            description="Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with picture_description_api."
+        ),
+    ] = None
+
+    picture_description_api: Annotated[
+        Optional[PictureDescriptionApi],
+        Field(
+            description="API details for using a vision-language model in the picture description. This parameter is mutually exclusive with picture_description_local."
+        ),
+    ] = None
+
+    @model_validator(mode="after")
+    def picture_description_exclusivity(self) -> Self:
+        # Validate picture description options
+        if (
+            self.picture_description_local is not None
+            and self.picture_description_api is not None
+        ):
+            raise ValueError(
+                "The parameters picture_description_local and picture_description_api are mutually exclusive, only one of them can be set."
+            )
+
+        return self
--- a/docling_serve/datamodel/engines.py
+++ b/docling_serve/datamodel/engines.py
@@ -10,3 +10,4 @@ class TaskStatus(str, enum.Enum):

 class AsyncEngine(str, enum.Enum):
    LOCAL = "local"
+    KFP = "kfp"
--- a/docling_serve/datamodel/kfp.py
+++ b/docling_serve/datamodel/kfp.py
@@ -0,0 +1,7 @@
+from pydantic import AnyUrl, BaseModel
+
+
+class CallbackSpec(BaseModel):
+    url: AnyUrl
+    headers: dict[str, str] = {}
+    ca_cert: str = ""
--- a/docling_serve/datamodel/responses.py
+++ b/docling_serve/datamodel/responses.py
@@ -7,6 +7,8 @@ from docling.datamodel.document import ConversionStatus, ErrorItem
 from docling.utils.profiling import ProfilingItem
 from docling_core.types.doc import DoclingDocument

+from docling_serve.datamodel.task_meta import TaskProcessingMeta
+

 # Status
 class HealthCheckResponse(BaseModel):
@@ -38,6 +40,7 @@ class TaskStatusResponse(BaseModel):
    task_id: str
    task_status: str
    task_position: Optional[int] = None
+    task_meta: Optional[TaskProcessingMeta] = None


 class MessageKind(str, enum.Enum):
--- a/docling_serve/datamodel/task.py
+++ b/docling_serve/datamodel/task.py
@@ -5,6 +5,7 @@ from pydantic import BaseModel
 from docling_serve.datamodel.engines import TaskStatus
 from docling_serve.datamodel.requests import ConvertDocumentsRequest
 from docling_serve.datamodel.responses import ConvertDocumentResponse
+from docling_serve.datamodel.task_meta import TaskProcessingMeta


 class Task(BaseModel):
@@ -12,6 +13,7 @@ class Task(BaseModel):
    task_status: TaskStatus = TaskStatus.PENDING
    request: Optional[ConvertDocumentsRequest]
    result: Optional[ConvertDocumentResponse] = None
+    processing_meta: Optional[TaskProcessingMeta] = None

    def is_completed(self) -> bool:
        if self.task_status in [TaskStatus.SUCCESS, TaskStatus.FAILURE]:
--- a/docling_serve/datamodel/task_meta.py
+++ b/docling_serve/datamodel/task_meta.py
@@ -0,0 +1,8 @@
+from pydantic import BaseModel
+
+
+class TaskProcessingMeta(BaseModel):
+    num_docs: int
+    num_processed: int = 0
+    num_succeeded: int = 0
+    num_failed: int = 0
--- a/docling_serve/docling_conversion.py
+++ b/docling_serve/docling_conversion.py
@@ -21,6 +21,8 @@ from docling.datamodel.pipeline_options import (
    PdfBackend,
    PdfPipeline,
    PdfPipelineOptions,
+    PictureDescriptionApiOptions,
+    PictureDescriptionVlmOptions,
    TableFormerMode,
    VlmPipelineOptions,
    smoldocling_vlm_conversion_options,
@@ -116,6 +118,7 @@ def _parse_standard_pdf_opts(

    pipeline_options = PdfPipelineOptions(
        artifacts_path=artifacts_path,
+        enable_remote_services=docling_serve_settings.enable_remote_services,
        document_timeout=request.document_timeout,
        do_ocr=request.do_ocr,
        ocr_options=ocr_options,
@@ -129,9 +132,25 @@ def _parse_standard_pdf_opts(

    if request.image_export_mode != ImageRefMode.PLACEHOLDER:
        pipeline_options.generate_page_images = True
+        if request.image_export_mode == ImageRefMode.REFERENCED:
+            pipeline_options.generate_picture_images = True
        if request.images_scale:
            pipeline_options.images_scale = request.images_scale

+    if request.picture_description_local is not None:
+        pipeline_options.picture_description_options = (
+            PictureDescriptionVlmOptions.model_validate(
+                request.picture_description_local.model_dump()
+            )
+        )
+
+    if request.picture_description_api is not None:
+        pipeline_options.picture_description_options = (
+            PictureDescriptionApiOptions.model_validate(
+                request.picture_description_api.model_dump()
+            )
+        )
+
    return pipeline_options


--- a/docling_serve/engines/init.py
+++ b/docling_serve/engines/init.py
@@ -1,8 +0,0 @@
-from functools import lru_cache
-
-from docling_serve.engines.async_local.orchestrator import AsyncLocalOrchestrator
-
-
-@lru_cache
-def get_orchestrator() -> AsyncLocalOrchestrator:
-    return AsyncLocalOrchestrator()
--- a/docling_serve/engines/async_kfp/init.py
+++ b/docling_serve/engines/async_kfp/init.py
--- a/docling_serve/engines/async_kfp/kfp_pipeline.py
+++ b/docling_serve/engines/async_kfp/kfp_pipeline.py
@@ -0,0 +1,137 @@
+# ruff: noqa: E402, UP006, UP035
+
+from typing import Any, Dict, List
+
+from kfp import dsl
+
+PYTHON_BASE_IMAGE = "python:3.12"
+
+
+@dsl.component(
+    base_image=PYTHON_BASE_IMAGE,
+    packages_to_install=[
+        "pydantic",
+        "docling-serve @ git+https://github.com/docling-project/docling-serve@feat-kfp-engine",
+    ],
+    pip_index_urls=["https://download.pytorch.org/whl/cpu", "https://pypi.org/simple"],
+)
+def generate_chunks(
+    run_name: str,
+    request: Dict[str, Any],
+    batch_size: int,
+    callbacks: List[Dict[str, Any]],
+) -> List[List[Dict[str, Any]]]:
+    from pydantic import TypeAdapter
+
+    from docling_serve.datamodel.callback import (
+        ProgressCallbackRequest,
+        ProgressSetNumDocs,
+    )
+    from docling_serve.datamodel.kfp import CallbackSpec
+    from docling_serve.engines.async_kfp.notify import notify_callbacks
+
+    CallbacksListType = TypeAdapter(list[CallbackSpec])
+
+    sources = request["http_sources"]
+    splits = [sources[i : i + batch_size] for i in range(0, len(sources), batch_size)]
+
+    total = sum(len(chunk) for chunk in splits)
+    payload = ProgressCallbackRequest(
+        task_id=run_name, progress=ProgressSetNumDocs(num_docs=total)
+    )
+    notify_callbacks(
+        payload=payload,
+        callbacks=CallbacksListType.validate_python(callbacks),
+    )
+
+    return splits
+
+
+@dsl.component(
+    base_image=PYTHON_BASE_IMAGE,
+    packages_to_install=[
+        "pydantic",
+        "docling-serve @ git+https://github.com/docling-project/docling-serve@feat-kfp-engine",
+    ],
+    pip_index_urls=["https://download.pytorch.org/whl/cpu", "https://pypi.org/simple"],
+)
+def convert_batch(
+    run_name: str,
+    data_splits: List[Dict[str, Any]],
+    request: Dict[str, Any],
+    callbacks: List[Dict[str, Any]],
+    output_path: dsl.OutputPath("Directory"),  # type: ignore
+):
+    from pathlib import Path
+
+    from pydantic import AnyUrl, TypeAdapter
+
+    from docling_serve.datamodel.callback import (
+        FailedDocsItem,
+        ProgressCallbackRequest,
+        ProgressUpdateProcessed,
+        SucceededDocsItem,
+    )
+    from docling_serve.datamodel.convert import ConvertDocumentsOptions
+    from docling_serve.datamodel.kfp import CallbackSpec
+    from docling_serve.datamodel.requests import HttpSource
+    from docling_serve.engines.async_kfp.notify import notify_callbacks
+
+    CallbacksListType = TypeAdapter(list[CallbackSpec])
+
+    convert_options = ConvertDocumentsOptions.model_validate(request["options"])
+    print(convert_options)
+
+    output_dir = Path(output_path)
+    output_dir.mkdir(exist_ok=True, parents=True)
+    docs_succeeded: list[SucceededDocsItem] = []
+    docs_failed: list[FailedDocsItem] = []
+    for source_dict in data_splits:
+        source = HttpSource.model_validate(source_dict)
+        filename = Path(str(AnyUrl(source.url).path)).name
+        output_filename = output_dir / filename
+        print(f"Writing {output_filename}")
+        with output_filename.open("w") as f:
+            f.write(source.model_dump_json())
+        docs_succeeded.append(SucceededDocsItem(source=source.url))
+
+    payload = ProgressCallbackRequest(
+        task_id=run_name,
+        progress=ProgressUpdateProcessed(
+            num_failed=len(docs_failed),
+            num_processed=len(docs_succeeded) + len(docs_failed),
+            num_succeeded=len(docs_succeeded),
+            docs_succeeded=docs_succeeded,
+            docs_failed=docs_failed,
+        ),
+    )
+
+    print(payload)
+    notify_callbacks(
+        payload=payload,
+        callbacks=CallbacksListType.validate_python(callbacks),
+    )
+
+
+@dsl.pipeline()
+def process(
+    batch_size: int,
+    request: Dict[str, Any],
+    callbacks: List[Dict[str, Any]] = [],
+    run_name: str = "",
+):
+    chunks_task = generate_chunks(
+        run_name=run_name,
+        request=request,
+        batch_size=batch_size,
+        callbacks=callbacks,
+    )
+    chunks_task.set_caching_options(False)
+
+    with dsl.ParallelFor(chunks_task.output, parallelism=4) as data_splits:
+        convert_batch(
+            run_name=run_name,
+            data_splits=data_splits,
+            request=request,
+            callbacks=callbacks,
+        )
--- a/docling_serve/engines/async_kfp/notify.py
+++ b/docling_serve/engines/async_kfp/notify.py
@@ -0,0 +1,32 @@
+import ssl
+
+import certifi
+import httpx
+
+from docling_serve.datamodel.callback import ProgressCallbackRequest
+from docling_serve.datamodel.kfp import CallbackSpec
+
+
+def notify_callbacks(
+    payload: ProgressCallbackRequest,
+    callbacks: list[CallbackSpec],
+):
+    if len(callbacks) == 0:
+        return
+
+    for callback in callbacks:
+        # https://www.python-httpx.org/advanced/ssl/#configuring-client-instances
+        if callback.ca_cert:
+            ctx = ssl.create_default_context(cadata=callback.ca_cert)
+        else:
+            ctx = ssl.create_default_context(cafile=certifi.where())
+
+        try:
+            httpx.post(
+                str(callback.url),
+                headers=callback.headers,
+                json=payload.model_dump(mode="json"),
+                verify=ctx,
+            )
+        except httpx.HTTPError as err:
+            print(f"Error notifying callback {callback.url}: {err}")
--- a/docling_serve/engines/async_kfp/orchestrator.py
+++ b/docling_serve/engines/async_kfp/orchestrator.py
@@ -0,0 +1,226 @@
+import datetime
+import json
+import logging
+import uuid
+from pathlib import Path
+from typing import Optional
+
+from kfp_server_api.models import V2beta1RuntimeState
+from pydantic import BaseModel, TypeAdapter
+from pydantic_settings import SettingsConfigDict
+
+from docling_serve.datamodel.callback import (
+    ProgressCallbackRequest,
+    ProgressSetNumDocs,
+    ProgressUpdateProcessed,
+)
+from docling_serve.datamodel.engines import TaskStatus
+from docling_serve.datamodel.kfp import CallbackSpec
+from docling_serve.datamodel.requests import ConvertDocumentsRequest
+from docling_serve.datamodel.task import Task
+from docling_serve.datamodel.task_meta import TaskProcessingMeta
+from docling_serve.engines.async_kfp.kfp_pipeline import process
+from docling_serve.engines.async_orchestrator import (
+    BaseAsyncOrchestrator,
+    ProgressInvalid,
+)
+from docling_serve.settings import docling_serve_settings
+
+_log = logging.getLogger(__name__)
+
+
+class _RunItem(BaseModel):
+    model_config = SettingsConfigDict(arbitrary_types_allowed=True)
+
+    run_id: str
+    state: str
+    created_at: datetime.datetime
+    scheduled_at: datetime.datetime
+    finished_at: datetime.datetime
+
+
+class AsyncKfpOrchestrator(BaseAsyncOrchestrator):
+    def __init__(self):
+        super().__init__()
+        import kfp
+
+        kfp_endpoint = docling_serve_settings.eng_kfp_endpoint
+        if kfp_endpoint is None:
+            raise ValueError("KFP endpoint is required when using the KFP engine.")
+
+        kube_sa_token_path = Path("/run/secrets/kubernetes.io/serviceaccount/token")
+        kube_sa_ca_cert_path = Path(
+            "/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"
+        )
+
+        ssl_ca_cert = docling_serve_settings.eng_kfp_ca_cert_path
+        token = docling_serve_settings.eng_kfp_token
+        if (
+            ssl_ca_cert is None
+            and ".svc" in kfp_endpoint.host
+            and kube_sa_ca_cert_path.exists()
+        ):
+            ssl_ca_cert = str(kube_sa_ca_cert_path)
+        if token is None and kube_sa_token_path.exists():
+            token = kube_sa_token_path.read_text()
+
+        self._client = kfp.Client(
+            host=str(kfp_endpoint),
+            existing_token=token,
+            ssl_ca_cert=ssl_ca_cert,
+            # verify_ssl=False,
+        )
+
+    async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
+        callbacks = []
+        if docling_serve_settings.eng_kfp_self_callback_endpoint is not None:
+            headers = {}
+            if docling_serve_settings.eng_kfp_self_callback_token_path is not None:
+                token = (
+                    docling_serve_settings.eng_kfp_self_callback_token_path.read_text()
+                )
+                headers["Authorization"] = f"Bearer {token}"
+            ca_cert = ""
+            if docling_serve_settings.eng_kfp_self_callback_ca_cert_path is not None:
+                ca_cert = docling_serve_settings.eng_kfp_self_callback_ca_cert_path.read_text()
+            callbacks.append(
+                CallbackSpec(
+                    url=docling_serve_settings.eng_kfp_self_callback_endpoint,
+                    headers=headers,
+                    ca_cert=ca_cert,
+                )
+            )
+
+        CallbacksType = TypeAdapter(list[CallbackSpec])
+        # hack: since the current kfp backend is not resolving the job_id placeholder,
+        # we set the run_name and pass it as argument to the job itself.
+        run_name = f"docling-job-{uuid.uuid4()}"
+        kfp_run = self._client.create_run_from_pipeline_func(
+            process,
+            arguments={
+                "batch_size": 10,
+                "request": request.model_dump(mode="json"),
+                "callbacks": CallbacksType.dump_python(callbacks, mode="json"),
+                "run_name": run_name,
+            },
+            run_name=run_name,
+        )
+        task_id = kfp_run.run_id
+
+        task = Task(task_id=task_id, request=request)
+        await self.init_task_tracking(task)
+        return task
+
+    async def _update_task_from_run(self, task_id: str, wait: float = 0.0):
+        run_info = self._client.get_run(run_id=task_id)
+        task = await self.get_raw_task(task_id=task_id)
+        # RUNTIME_STATE_UNSPECIFIED = "RUNTIME_STATE_UNSPECIFIED"
+        # PENDING = "PENDING"
+        # RUNNING = "RUNNING"
+        # SUCCEEDED = "SUCCEEDED"
+        # SKIPPED = "SKIPPED"
+        # FAILED = "FAILED"
+        # CANCELING = "CANCELING"
+        # CANCELED = "CANCELED"
+        # PAUSED = "PAUSED"
+        if run_info.state == V2beta1RuntimeState.SUCCEEDED:
+            task.task_status = TaskStatus.SUCCESS
+        elif run_info.state == V2beta1RuntimeState.PENDING:
+            task.task_status = TaskStatus.PENDING
+        elif run_info.state == V2beta1RuntimeState.RUNNING:
+            task.task_status = TaskStatus.STARTED
+        else:
+            task.task_status = TaskStatus.FAILURE
+
+    async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
+        await self._update_task_from_run(task_id=task_id, wait=wait)
+        return await self.get_raw_task(task_id=task_id)
+
+    async def _get_pending(self) -> list[_RunItem]:
+        runs: list[_RunItem] = []
+        next_page: Optional[str] = None
+        while True:
+            res = self._client.list_runs(
+                page_token=next_page,
+                page_size=20,
+                filter=json.dumps(
+                    {
+                        "predicates": [
+                            {
+                                "operation": "EQUALS",
+                                "key": "state",
+                                "stringValue": "PENDING",
+                            }
+                        ]
+                    }
+                ),
+            )
+            if res.runs is not None:
+                for run in res.runs:
+                    runs.append(
+                        _RunItem(
+                            run_id=run.run_id,
+                            state=run.state,
+                            created_at=run.created_at,
+                            scheduled_at=run.scheduled_at,
+                            finished_at=run.finished_at,
+                        )
+                    )
+            if res.next_page_token is None:
+                break
+            next_page = res.next_page_token
+        return runs
+
+    async def queue_size(self) -> int:
+        runs = await self._get_pending()
+        return len(runs)
+
+    async def get_queue_position(self, task_id: str) -> Optional[int]:
+        runs = await self._get_pending()
+        for pos, run in enumerate(runs, start=1):
+            if run.run_id == task_id:
+                return pos
+        return None
+
+    async def process_queue(self):
+        return
+
+    async def _get_run_id(self, run_name: str) -> str:
+        res = self._client.list_runs(
+            filter=json.dumps(
+                {
+                    "predicates": [
+                        {
+                            "operation": "EQUALS",
+                            "key": "name",
+                            "stringValue": run_name,
+                        }
+                    ]
+                }
+            ),
+        )
+        if res.runs is not None and len(res.runs) > 0:
+            return res.runs[0].run_id
+        raise RuntimeError(f"Run with {run_name=} not found.")
+
+    async def receive_task_progress(self, request: ProgressCallbackRequest):
+        task_id = await self._get_run_id(run_name=request.task_id)
+        progress = request.progress
+        task = await self.get_raw_task(task_id=task_id)
+
+        if isinstance(progress, ProgressSetNumDocs):
+            task.processing_meta = TaskProcessingMeta(num_docs=progress.num_docs)
+            task.task_status = TaskStatus.STARTED
+
+        elif isinstance(progress, ProgressUpdateProcessed):
+            if task.processing_meta is None:
+                raise ProgressInvalid(
+                    "UpdateProcessed was called before setting the expected number of documents."
+                )
+            task.processing_meta.num_processed += progress.num_processed
+            task.processing_meta.num_succeeded += progress.num_succeeded
+            task.processing_meta.num_failed += progress.num_failed
+            task.task_status = TaskStatus.STARTED
+
+        # TODO: could be moved to BackgroundTask
+        await self.notify_task_subscribers(task_id=task_id)
--- a/docling_serve/engines/async_local/orchestrator.py
+++ b/docling_serve/engines/async_local/orchestrator.py
@@ -3,44 +3,27 @@ import logging
 import uuid
 from typing import Optional

-from fastapi import WebSocket
-
-from docling_serve.datamodel.engines import TaskStatus
 from docling_serve.datamodel.requests import ConvertDocumentsRequest
-from docling_serve.datamodel.responses import (
-    MessageKind,
-    TaskStatusResponse,
-    WebsocketMessage,
-)
 from docling_serve.datamodel.task import Task
 from docling_serve.engines.async_local.worker import AsyncLocalWorker
-from docling_serve.engines.base_orchestrator import BaseOrchestrator
+from docling_serve.engines.async_orchestrator import BaseAsyncOrchestrator
 from docling_serve.settings import docling_serve_settings

 _log = logging.getLogger(__name__)


-class OrchestratorError(Exception):
-    pass
-
-
-class TaskNotFoundError(OrchestratorError):
-    pass
-
-
-class AsyncLocalOrchestrator(BaseOrchestrator):
+class AsyncLocalOrchestrator(BaseAsyncOrchestrator):
    def __init__(self):
+        super().__init__()
        self.task_queue = asyncio.Queue()
-        self.tasks: dict[str, Task] = {}
        self.queue_list: list[str] = []
-        self.task_subscribers: dict[str, set[WebSocket]] = {}

    async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
        task_id = str(uuid.uuid4())
        task = Task(task_id=task_id, request=request)
-        self.tasks[task_id] = task
+        await self.init_task_tracking(task)
+
        self.queue_list.append(task_id)
-        self.task_subscribers[task_id] = set()
        await self.task_queue.put(task_id)
        return task

@@ -52,16 +35,6 @@ class AsyncLocalOrchestrator(BaseOrchestrator):
            self.queue_list.index(task_id) + 1 if task_id in self.queue_list else None
        )

-    async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
-        if task_id not in self.tasks:
-            raise TaskNotFoundError()
-        return self.tasks[task_id]
-
-    async def task_result(self, task_id: str):
-        if task_id not in self.tasks:
-            raise TaskNotFoundError()
-        return self.tasks[task_id].result
-
    async def process_queue(self):
        # Create a pool of workers
        workers = []
@@ -74,29 +47,3 @@ class AsyncLocalOrchestrator(BaseOrchestrator):
        # Wait for all workers to complete (they won't, as they run indefinitely)
        await asyncio.gather(*workers)
        _log.debug("All workers completed.")
-
-    async def notify_task_subscribers(self, task_id: str):
-        if task_id not in self.task_subscribers:
-            raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
-
-        task = self.tasks[task_id]
-        task_queue_position = await self.get_queue_position(task_id)
-        msg = TaskStatusResponse(
-            task_id=task.task_id,
-            task_status=task.task_status,
-            task_position=task_queue_position,
-        )
-        for websocket in self.task_subscribers[task_id]:
-            await websocket.send_text(
-                WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
-            )
-            if task.is_completed():
-                await websocket.close()
-
-    async def notify_queue_positions(self):
-        for task_id in self.task_subscribers.keys():
-            # notify only pending tasks
-            if self.tasks[task_id].task_status != TaskStatus.PENDING:
-                continue
-
-            await self.notify_task_subscribers(task_id)
--- a/docling_serve/engines/async_orchestrator.py
+++ b/docling_serve/engines/async_orchestrator.py
@@ -0,0 +1,72 @@
+from fastapi import WebSocket
+
+from docling_serve.datamodel.callback import ProgressCallbackRequest
+from docling_serve.datamodel.engines import TaskStatus
+from docling_serve.datamodel.responses import (
+    MessageKind,
+    TaskStatusResponse,
+    WebsocketMessage,
+)
+from docling_serve.datamodel.task import Task
+from docling_serve.engines.base_orchestrator import (
+    BaseOrchestrator,
+    OrchestratorError,
+    TaskNotFoundError,
+)
+
+
+class ProgressInvalid(OrchestratorError):
+    pass
+
+
+class BaseAsyncOrchestrator(BaseOrchestrator):
+    def __init__(self):
+        self.tasks: dict[str, Task] = {}
+        self.task_subscribers: dict[str, set[WebSocket]] = {}
+
+    async def init_task_tracking(self, task: Task):
+        task_id = task.task_id
+        self.tasks[task.task_id] = task
+        self.task_subscribers[task_id] = set()
+
+    async def get_raw_task(self, task_id: str) -> Task:
+        if task_id not in self.tasks:
+            raise TaskNotFoundError()
+        return self.tasks[task_id]
+
+    async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
+        return await self.get_raw_task(task_id=task_id)
+
+    async def task_result(self, task_id: str):
+        task = await self.get_raw_task(task_id=task_id)
+        return task.result
+
+    async def notify_task_subscribers(self, task_id: str):
+        if task_id not in self.task_subscribers:
+            raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
+
+        task = await self.get_raw_task(task_id=task_id)
+        task_queue_position = await self.get_queue_position(task_id)
+        msg = TaskStatusResponse(
+            task_id=task.task_id,
+            task_status=task.task_status,
+            task_position=task_queue_position,
+            task_meta=task.processing_meta,
+        )
+        for websocket in self.task_subscribers[task_id]:
+            await websocket.send_text(
+                WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
+            )
+            if task.is_completed():
+                await websocket.close()
+
+    async def notify_queue_positions(self):
+        for task_id in self.task_subscribers.keys():
+            # notify only pending tasks
+            if self.tasks[task_id].task_status != TaskStatus.PENDING:
+                continue
+
+            await self.notify_task_subscribers(task_id)
+
+    async def receive_task_progress(self, request: ProgressCallbackRequest):
+        raise NotImplementedError()
--- a/docling_serve/engines/async_orchestrator_factory.py
+++ b/docling_serve/engines/async_orchestrator_factory.py
@@ -0,0 +1,21 @@
+from functools import lru_cache
+
+from docling_serve.datamodel.engines import AsyncEngine
+from docling_serve.engines.async_orchestrator import BaseAsyncOrchestrator
+from docling_serve.settings import docling_serve_settings
+
+
+@lru_cache
+def get_async_orchestrator() -> BaseAsyncOrchestrator:
+    if docling_serve_settings.eng_kind == AsyncEngine.LOCAL:
+        from docling_serve.engines.async_local.orchestrator import (
+            AsyncLocalOrchestrator,
+        )
+
+        return AsyncLocalOrchestrator()
+    elif docling_serve_settings.eng_kind == AsyncEngine.KFP:
+        from docling_serve.engines.async_kfp.orchestrator import AsyncKfpOrchestrator
+
+        return AsyncKfpOrchestrator()
+
+    raise RuntimeError(f"Engine {docling_serve_settings.eng_kind} not recognized.")
--- a/docling_serve/engines/base_orchestrator.py
+++ b/docling_serve/engines/base_orchestrator.py
@@ -1,11 +1,21 @@
 from abc import ABC, abstractmethod
+from typing import Optional

+from docling_serve.datamodel.requests import ConvertDocumentsRequest
 from docling_serve.datamodel.task import Task


+class OrchestratorError(Exception):
+    pass
+
+
+class TaskNotFoundError(OrchestratorError):
+    pass
+
+
 class BaseOrchestrator(ABC):
    @abstractmethod
-    async def enqueue(self, task) -> Task:
+    async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
        pass

    @abstractmethod
@@ -13,9 +23,17 @@ class BaseOrchestrator(ABC):
        pass

    @abstractmethod
-    async def task_status(self, task_id: str) -> Task:
+    async def get_queue_position(self, task_id: str) -> Optional[int]:
+        pass
+
+    @abstractmethod
+    async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
        pass

    @abstractmethod
    async def task_result(self, task_id: str):
        pass
+
+    @abstractmethod
+    async def process_queue(self):
+        pass
--- a/docling_serve/response_preparation.py
+++ b/docling_serve/response_preparation.py
@@ -46,7 +46,7 @@ def _export_document_as_content(
        if export_md:
            document.md_content = new_doc.export_to_markdown(image_mode=image_mode)
        if export_doctags:
-            document.doctags_content = new_doc.export_to_document_tokens()
+            document.doctags_content = new_doc.export_to_doctags()
    elif conv_res.status == ConversionStatus.SKIPPED:
        raise HTTPException(status_code=400, detail=conv_res.errors)
    else:
--- a/docling_serve/settings.py
+++ b/docling_serve/settings.py
@@ -2,7 +2,9 @@ import sys
 from pathlib import Path
 from typing import Optional, Union

+from pydantic import AnyUrl, model_validator
 from pydantic_settings import BaseSettings, SettingsConfigDict
+from typing_extensions import Self

 from docling_serve.datamodel.engines import AsyncEngine

@@ -37,6 +39,7 @@ class DoclingServeSettings(BaseSettings):
    artifacts_path: Optional[Path] = None
    static_path: Optional[Path] = None
    options_cache_size: int = 2
+    enable_remote_services: bool = False
    allow_external_plugins: bool = False

    max_document_timeout: float = 3_600 * 24 * 7  # 7 days
@@ -48,7 +51,32 @@ class DoclingServeSettings(BaseSettings):
    cors_headers: list[str] = ["*"]

    eng_kind: AsyncEngine = AsyncEngine.LOCAL
+    # Local engine
    eng_loc_num_workers: int = 2
+    # KFP engine
+    eng_kfp_endpoint: Optional[AnyUrl] = None
+    eng_kfp_token: Optional[str] = None
+    eng_kfp_ca_cert_path: Optional[str] = None
+    eng_kfp_self_callback_endpoint: Optional[str] = None
+    eng_kfp_self_callback_token_path: Optional[Path] = None
+    eng_kfp_self_callback_ca_cert_path: Optional[Path] = None
+
+    eng_kfp_experimental: bool = False
+
+    @model_validator(mode="after")
+    def engine_settings(self) -> Self:
+        # Validate KFP engine settings
+        if self.eng_kind == AsyncEngine.KFP:
+            if self.eng_kfp_endpoint is None:
+                raise ValueError("KFP endpoint is required when using the KFP engine.")
+
+        if self.eng_kind == AsyncEngine.KFP:
+            if not self.eng_kfp_experimental:
+                raise ValueError(
+                    "KFP is not yet working. To enable the development version, you must set DOCLING_SERVE_ENG_KFP_EXPERIMENTAL=true."
+                )
+
+        return self


 uvicorn_settings = UvicornSettings()
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -38,7 +38,39 @@ THe following table describes the options to configure the Docling Serve app.
 | `--artifacts-path` | `DOCLING_SERVE_ARTIFACTS_PATH` | unset | If set to a valid directory, the model weights will be loaded from this path |
 |  | `DOCLING_SERVE_STATIC_PATH` | unset | If set to a valid directory, the static assets for the docs and ui will be loaded from this path |
 | `--enable-ui` | `DOCLING_SERVE_ENABLE_UI` | `false` | Enable the demonstrator UI. |
+|  | `DOCLING_SERVE_ENABLE_REMOTE_SERVICES` | `false` | Allow pipeline components making remote connections. For example, this is needed when using a vision-language model via APIs. |
+|  | `DOCLING_SERVE_ALLOW_EXTERNAL_PLUGINS` | `false` | Allow the selection of third-party plugins. |
+|  | `DOCLING_SERVE_MAX_DOCUMENT_TIMEOUT` | `604800` (7 days) | The maximum time for processing a document. |
+|  | `DOCLING_SERVE_MAX_NUM_PAGES` |  | The maximum number of pages for a document to be processed. |
+|  | `DOCLING_SERVE_MAX_FILE_SIZE` |  | The maximum file size for a document to be processed. |
 |  | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. |
 |  | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
 |  | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
 |  | `DOCLING_SERVE_CORS_HEADERS` | `["*"]` | A list of HTTP request headers that should be supported for cross-origin requests. |
+|  | `DOCLING_SERVE_ENG_KIND` | `local` | The compute engine to use for the async tasks. Possible values are `local` and `kfp`. See below for more configurations of the engines. |
+
+### Compute engine
+
+Docling Serve can be deployed with several possible of compute engine.
+The selected compute engine will be running all the async jobs.
+
+#### Local engine
+
+The following table describes the options to configure the Docling Serve KFP engine.
+
+| ENV | Default | Description |
+|-----|---------|-------------|
+| `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
+
+#### KFP engine
+
+The following table describes the options to configure the Docling Serve KFP engine.
+
+| ENV | Default | Description |
+|-----|---------|-------------|
+| `DOCLING_SERVE_ENG_KFP_ENDPOINT` |  | Must be set to the Kubeflow Pipeline endpoint. When using the in-cluster deployment, make sure to use the cluster endpoint, e.g. `https://NAME.NAMESPACE.svc.cluster.local:8888`  |
+| `DOCLING_SERVE_ENG_KFP_TOKEN` |  | The authentication token for KFP. For in-cluster deployment, the app will load automatically the token of the ServiceAccount. |
+| `DOCLING_SERVE_ENG_KFP_CA_CERT_PATH` |  | Path to the CA certificates for the KFP endpoint. For in-cluster deployment, the app will load automatically the internal CA. |
+| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_ENDPOINT` |  | If set, it enables internal callbacks providing status update of the KFP job. Usually something like `https://NAME.NAMESPACE.svc.cluster.local:5001/v1alpha/callback/task/progress`. |
+| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_TOKEN_PATH` |  | The token used for authenticating the progress callback. For cluster-internal workloads, use `/run/secrets/kubernetes.io/serviceaccount/token`. |
+| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_CA_CERT_PATH` |  | The CA certifcate for the progress callback. For cluster-inetrnal workloads, use `/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt`. |
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -8,6 +8,7 @@ On top of the source of file (see below), both endpoints support the same parame

 - `from_format` (List[str]): Input format(s) to convert from. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`. Defaults to all formats.
 - `to_formats` (List[str]): Output format(s) to convert to. Allowed values: `md`, `json`, `html`, `text`, `doctags`. Defaults to `md`.
+- `pipeline` (str). The choice of which pipeline to use. Allowed values are `standard` and `vlm`. Defaults to `standard`.
 - `do_ocr` (bool): If enabled, the bitmap content will be processed using OCR. Defaults to `True`.
 - `image_export_mode`: Image export mode for the document (only in case of JSON, Markdown or HTML). Allowed values: embedded, placeholder, referenced. Optional, defaults to `embedded`.
 - `force_ocr` (bool): If enabled, replace any existing text with OCR-generated text over the full content. Defaults to `False`.
@@ -18,7 +19,13 @@ On top of the source of file (see below), both endpoints support the same parame
 - `abort_on_error` (bool): If enabled, abort on error. Defaults to false.
 - `return_as_file` (boo): If enabled, return the output as a file. Defaults to false.
 - `do_table_structure` (bool): If enabled, the table structure will be extracted. Defaults to true.
- `include_images` (bool): If enabled, images will be extracted from the document. Defaults to true.
+- `do_code_enrichment` (bool): If enabled, perform OCR code enrichment. Defaults to false.
+- `do_formula_enrichment` (bool): If enabled, perform formula OCR, return LaTeX code. Defaults to false.
+- `do_picture_classification` (bool): If enabled, classify pictures in documents. Defaults to false.
+- `do_picture_description` (bool): If enabled, describe pictures in documents. Defaults to false.
+- `picture_description_local` (dict): Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with picture_description_api.
+- `picture_description_api` (dict): API details for using a vision-language model in the picture description. This parameter is mutually exclusive with picture_description_local.
+- `include_images` (bool): If enabled, images will be extracted from the document. Defaults to false.
 - `images_scale` (float): Scale factor for images. Defaults to 2.0.

 ## Convert endpoints
@@ -244,6 +251,70 @@ data = response.json()

 </details>

+### Picture description options
+
+When the picture description enrichment is activated, users may specify which model and which execution mode to use for this task. There are two choices for the execution mode: _local_ will run the vision-language model directly, _api_ will invoke an external API endpoint.
+
+The local option is specified with:
+
+```jsonc
+{
+  "picture_description_local": {
+    "repo_id": "",  // Repository id from the Hugging Face Hub.
+    "generation_config": {"max_new_tokens": 200, "do_sample": false},  // HF generation config.
+    "prompt": "Describe this image in a few sentences. ",  // Prompt used when calling the vision-language model.
+  }
+}
+```
+
+The possible values for `generation_config` are documented in the [Hugging Face text generation docs](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig).
+
+The api option is specified with:
+
+```jsonc
+{
+  "picture_description_api": {
+    "url": "",  // Endpoint which accepts openai-api compatible requests.
+    "headers": {},  // Headers used for calling the API endpoint. For example, it could include authentication headers.
+    "params": {},  // Model parameters.
+    "timeout": 20,  // Timeout for the API request.
+    "prompt": "Describe this image in a few sentences. ",  // Prompt used when calling the vision-language model.
+  }
+}
+```
+
+Example URLs are:
+
+- `http://localhost:8000/v1/chat/completions` for the local vllm api, with example `params`:
+  - the `HuggingFaceTB/SmolVLM-256M-Instruct` model
+
+    ```json
+    {
+        "model": "HuggingFaceTB/SmolVLM-256M-Instruct",
+        "max_completion_tokens": 200,
+    }
+    ```
+  
+  - the `ibm-granite/granite-vision-3.2-2b` model
+
+    ```json
+    {
+        "model": "ibm-granite/granite-vision-3.2-2b",
+        "max_completion_tokens": 200,
+    }
+    ```
+
+- `http://localhost:11434/v1/chat/completions` for the local ollama api, with example `params`:
+  - the `granite3.2-vision:2b` model
+
+    ```json
+    {
+        "model": "granite3.2-vision:2b"
+    }
+    ```  
+
+Note that when using `picture_description_api`, the server must be launched with `DOCLING_SERVE_ENABLE_REMOTE_SERVICES=true`.
+
 ## Response format

 The response can be a JSON Document or a File.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "docling-serve"
-version = "0.8.0"  # DO NOT EDIT, updated automatically
+version = "0.9.0"  # DO NOT EDIT, updated automatically
 description = "Running Docling as a service"
 license = {text = "MIT"}
 authors = [
@@ -34,6 +34,7 @@ dependencies = [
    "mlx-vlm~=0.1.12; sys_platform == 'darwin' and platform_machine == 'arm64'",
    "fastapi[standard]~=0.115",
    "httpx~=0.28",
+    "kfp[kubernetes]>=2.10.0",
    "pydantic~=2.10",
    "pydantic-settings~=2.4",
    "python-multipart>=0.0.14,<0.1.0",
@@ -82,6 +83,10 @@ conflicts = [
    { extra = "cu124" },
  ],
 ]
+environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
+override-dependencies = [
+  "urllib3~=2.0"
+]

 [tool.uv.sources]
 torch = [
@@ -197,6 +202,8 @@ module = [
    "tesserocr.*",
    "rapidocr_onnxruntime.*",
    "requests.*",
+    "kfp.*",
+    "kfp_server_api.*",
    "mlx_vlm.*",
 ]
 ignore_missing_imports = true
--- a/tests/test_1-url-async-ws.py
+++ b/tests/test_1-url-async-ws.py
@@ -28,6 +28,16 @@ async def test_convert_url(async_client: httpx.AsyncClient):
            "ocr": True,
            "abort_on_error": False,
            "return_as_file": False,
+            # "do_picture_description": True,
+            # "picture_description_api": {
+            #     "url": "http://localhost:11434/v1/chat/completions",
+            #     "params": {
+            #         "model": "granite3.2-vision:2b",
+            #     }
+            # },
+            # "picture_description_local": {
+            #     "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
+            # },
        },
        # "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}],
        "file_sources": [{"base64_string": encoded_doc, "filename": doc_filename.name}],
--- a/tests/test_1-url-async.py
+++ b/tests/test_1-url-async.py
@@ -38,7 +38,7 @@ async def test_convert_url(async_client):
    }
    print(json.dumps(payload, indent=2))

-    for n in range(5):
+    for n in range(1):
        response = await async_client.post(
            f"{base_url}/convert/source/async", json=payload
        )
--- a/tests/test_fastapi_endpoints.py
+++ b/tests/test_fastapi_endpoints.py
@@ -0,0 +1,128 @@
+import json
+import os
+
+from fastapi.testclient import TestClient
+from pytest_check import check
+
+from docling_serve.app import create_app
+
+client = TestClient(create_app())
+
+
+def test_health():
+    response = client.get("/health")
+    assert response.status_code == 200
+    assert response.json() == {"status": "ok"}
+
+
+def test_convert_file():
+    """Test convert single file to all outputs"""
+    endpoint = "/v1alpha/convert/file"
+    options = {
+        "from_formats": [
+            "docx",
+            "pptx",
+            "html",
+            "image",
+            "pdf",
+            "asciidoc",
+            "md",
+            "xlsx",
+        ],
+        "to_formats": ["md", "json", "html", "text", "doctags"],
+        "image_export_mode": "placeholder",
+        "ocr": True,
+        "force_ocr": False,
+        "ocr_engine": "easyocr",
+        "ocr_lang": ["en"],
+        "pdf_backend": "dlparse_v2",
+        "table_mode": "fast",
+        "abort_on_error": False,
+        "return_as_file": False,
+    }
+
+    current_dir = os.path.dirname(__file__)
+    file_path = os.path.join(current_dir, "2206.01062v1.pdf")
+
+    files = {
+        "files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
+    }
+
+    response = client.post(endpoint, files=files, data=options)
+    assert response.status_code == 200, "Response should be 200 OK"
+
+    data = response.json()
+
+    # Response content checks
+    # Helper function to safely slice strings
+    def safe_slice(value, length=100):
+        if isinstance(value, str):
+            return value[:length]
+        return str(value)  # Convert non-string values to string for debug purposes
+
+    # Document check
+    check.is_in(
+        "document",
+        data,
+        msg=f"Response should contain 'document' key. Received keys: {list(data.keys())}",
+    )
+    # MD check
+    check.is_in(
+        "md_content",
+        data.get("document", {}),
+        msg=f"Response should contain 'md_content' key. Received keys: {list(data.get('document', {}).keys())}",
+    )
+    if data.get("document", {}).get("md_content") is not None:
+        check.is_in(
+            "## DocLayNet: ",
+            data["document"]["md_content"],
+            msg=f"Markdown document should contain 'DocLayNet: '. Received: {safe_slice(data['document']['md_content'])}",
+        )
+    # JSON check
+    check.is_in(
+        "json_content",
+        data.get("document", {}),
+        msg=f"Response should contain 'json_content' key. Received keys: {list(data.get('document', {}).keys())}",
+    )
+    if data.get("document", {}).get("json_content") is not None:
+        check.is_in(
+            '{"schema_name": "DoclingDocument"',
+            json.dumps(data["document"]["json_content"]),
+            msg=f'JSON document should contain \'{{\\n  "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
+        )
+    # HTML check
+    check.is_in(
+        "html_content",
+        data.get("document", {}),
+        msg=f"Response should contain 'html_content' key. Received keys: {list(data.get('document', {}).keys())}",
+    )
+    if data.get("document", {}).get("html_content") is not None:
+        check.is_in(
+            "<!DOCTYPE html>\n<html>\n<head>",
+            data["document"]["html_content"],
+            msg=f"HTML document should contain '<!DOCTYPE html>\n<html>\n<head>'. Received: {safe_slice(data['document']['html_content'])}",
+        )
+    # Text check
+    check.is_in(
+        "text_content",
+        data.get("document", {}),
+        msg=f"Response should contain 'text_content' key. Received keys: {list(data.get('document', {}).keys())}",
+    )
+    if data.get("document", {}).get("text_content") is not None:
+        check.is_in(
+            "DocLayNet: A Large Human-Annotated Dataset",
+            data["document"]["text_content"],
+            msg=f"Text document should contain 'DocLayNet: A Large Human-Annotated Dataset'. Received: {safe_slice(data['document']['text_content'])}",
+        )
+    # DocTags check
+    check.is_in(
+        "doctags_content",
+        data.get("document", {}),
+        msg=f"Response should contain 'doctags_content' key. Received keys: {list(data.get('document', {}).keys())}",
+    )
+    if data.get("document", {}).get("doctags_content") is not None:
+        check.is_in(
+            "<doctag><page_header>",
+            data["document"]["doctags_content"],
+            msg=f"DocTags document should contain '<doctag><page_header>'. Received: {safe_slice(data['document']['doctags_content'])}",
+        )
--- a/uv.lock
+++ b/uv.lock
Author	SHA1	Message	Date
github-actions[bot]	37e2e1ad09	chore: bump version to 0.9.0 [skip ci]	2025-04-25 07:56:40 +00:00
Michele Dolfi	71c5fae505	fix: produce image artifacts in referenced mode (#151 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-04-24 17:33:36 +02:00
Michele Dolfi	91956cbf4e	docs: vlm and picture description options (#149 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-04-24 14:42:06 +02:00
Michele Dolfi	4c9571a052	feat: expose picture description options (#148 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>	2025-04-24 13:49:44 +02:00
Tiago Santana	41624af09f	test: add tests with fastapi client (#147 ) Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com>	2025-04-24 10:25:29 +02:00
Michele Dolfi	26bef5bec0	feat: Add parameters for Kubeflow pipeline engine (WIP) (#107 ) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>	2025-04-23 14:59:53 +02:00