feat: Add parameters for Kubeflow pipeline engine (WIP) (#107)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-04-23 14:59:53 +02:00
committed by GitHub
parent 40bb21d347
commit 26bef5bec0
21 changed files with 3727 additions and 3078 deletions

View File

@@ -28,6 +28,10 @@ from fastapi.staticfiles import StaticFiles
from docling.datamodel.base_models import DocumentStream
from docling_serve.datamodel.callback import (
ProgressCallbackRequest,
ProgressCallbackResponse,
)
from docling_serve.datamodel.convert import ConvertDocumentsOptions
from docling_serve.datamodel.requests import (
ConvertDocumentFileSourcesRequest,
@@ -45,11 +49,12 @@ from docling_serve.docling_conversion import (
get_converter,
get_pdf_pipeline_opts,
)
from docling_serve.engines import get_orchestrator
from docling_serve.engines.async_local.orchestrator import (
AsyncLocalOrchestrator,
TaskNotFoundError,
from docling_serve.engines.async_orchestrator import (
BaseAsyncOrchestrator,
ProgressInvalid,
)
from docling_serve.engines.async_orchestrator_factory import get_async_orchestrator
from docling_serve.engines.base_orchestrator import TaskNotFoundError
from docling_serve.helper_functions import FormDepends
from docling_serve.response_preparation import process_results
from docling_serve.settings import docling_serve_settings
@@ -94,7 +99,7 @@ async def lifespan(app: FastAPI):
pdf_format_option = get_pdf_pipeline_opts(ConvertDocumentsOptions())
get_converter(pdf_format_option)
orchestrator = get_orchestrator()
orchestrator = get_async_orchestrator()
# Start the background queue processor
queue_task = asyncio.create_task(orchestrator.process_queue())
@@ -308,7 +313,7 @@ def create_app(): # noqa: C901
response_model=TaskStatusResponse,
)
async def process_url_async(
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
conversion_request: ConvertDocumentsRequest,
):
task = await orchestrator.enqueue(request=conversion_request)
@@ -319,6 +324,7 @@ def create_app(): # noqa: C901
task_id=task.task_id,
task_status=task.task_status,
task_position=task_queue_position,
task_meta=task.processing_meta,
)
# Task status poll
@@ -327,7 +333,7 @@ def create_app(): # noqa: C901
response_model=TaskStatusResponse,
)
async def task_status_poll(
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
task_id: str,
wait: Annotated[
float, Query(help="Number of seconds to wait for a completed status.")
@@ -342,6 +348,7 @@ def create_app(): # noqa: C901
task_id=task.task_id,
task_status=task.task_status,
task_position=task_queue_position,
task_meta=task.processing_meta,
)
# Task status websocket
@@ -350,7 +357,7 @@ def create_app(): # noqa: C901
)
async def task_status_ws(
websocket: WebSocket,
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
task_id: str,
):
await websocket.accept()
@@ -375,6 +382,7 @@ def create_app(): # noqa: C901
task_id=task.task_id,
task_status=task.task_status,
task_position=task_queue_position,
task_meta=task.processing_meta,
)
await websocket.send_text(
WebsocketMessage(
@@ -389,6 +397,7 @@ def create_app(): # noqa: C901
task_id=task.task_id,
task_status=task.task_status,
task_position=task_queue_position,
task_meta=task.processing_meta,
)
await websocket.send_text(
WebsocketMessage(
@@ -416,7 +425,7 @@ def create_app(): # noqa: C901
},
)
async def task_result(
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
task_id: str,
):
result = await orchestrator.task_result(task_id=task_id)
@@ -427,4 +436,23 @@ def create_app(): # noqa: C901
)
return result
# Update task progress
@app.post(
"/v1alpha/callback/task/progress",
response_model=ProgressCallbackResponse,
)
async def callback_task_progress(
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
request: ProgressCallbackRequest,
):
try:
await orchestrator.receive_task_progress(request=request)
return ProgressCallbackResponse(status="ack")
except TaskNotFoundError:
raise HTTPException(status_code=404, detail="Task not found.")
except ProgressInvalid as err:
raise HTTPException(
status_code=400, detail=f"Invalid progress payload: {err}"
)
return app

View File

@@ -0,0 +1,50 @@
import enum
from typing import Annotated, Literal
from pydantic import BaseModel, Field
class ProgressKind(str, enum.Enum):
SET_NUM_DOCS = "set_num_docs"
UPDATE_PROCESSED = "update_processed"
class BaseProgress(BaseModel):
kind: ProgressKind
class ProgressSetNumDocs(BaseProgress):
kind: Literal[ProgressKind.SET_NUM_DOCS] = ProgressKind.SET_NUM_DOCS
num_docs: int
class SucceededDocsItem(BaseModel):
source: str
class FailedDocsItem(BaseModel):
source: str
error: str
class ProgressUpdateProcessed(BaseProgress):
kind: Literal[ProgressKind.UPDATE_PROCESSED] = ProgressKind.UPDATE_PROCESSED
num_processed: int
num_succeeded: int
num_failed: int
docs_succeeded: list[SucceededDocsItem]
docs_failed: list[FailedDocsItem]
class ProgressCallbackRequest(BaseModel):
task_id: str
progress: Annotated[
ProgressSetNumDocs | ProgressUpdateProcessed, Field(discriminator="kind")
]
class ProgressCallbackResponse(BaseModel):
status: Literal["ack"] = "ack"

View File

@@ -10,3 +10,4 @@ class TaskStatus(str, enum.Enum):
class AsyncEngine(str, enum.Enum):
LOCAL = "local"
KFP = "kfp"

View File

@@ -0,0 +1,7 @@
from pydantic import AnyUrl, BaseModel
class CallbackSpec(BaseModel):
url: AnyUrl
headers: dict[str, str] = {}
ca_cert: str = ""

View File

@@ -7,6 +7,8 @@ from docling.datamodel.document import ConversionStatus, ErrorItem
from docling.utils.profiling import ProfilingItem
from docling_core.types.doc import DoclingDocument
from docling_serve.datamodel.task_meta import TaskProcessingMeta
# Status
class HealthCheckResponse(BaseModel):
@@ -38,6 +40,7 @@ class TaskStatusResponse(BaseModel):
task_id: str
task_status: str
task_position: Optional[int] = None
task_meta: Optional[TaskProcessingMeta] = None
class MessageKind(str, enum.Enum):

View File

@@ -5,6 +5,7 @@ from pydantic import BaseModel
from docling_serve.datamodel.engines import TaskStatus
from docling_serve.datamodel.requests import ConvertDocumentsRequest
from docling_serve.datamodel.responses import ConvertDocumentResponse
from docling_serve.datamodel.task_meta import TaskProcessingMeta
class Task(BaseModel):
@@ -12,6 +13,7 @@ class Task(BaseModel):
task_status: TaskStatus = TaskStatus.PENDING
request: Optional[ConvertDocumentsRequest]
result: Optional[ConvertDocumentResponse] = None
processing_meta: Optional[TaskProcessingMeta] = None
def is_completed(self) -> bool:
if self.task_status in [TaskStatus.SUCCESS, TaskStatus.FAILURE]:

View File

@@ -0,0 +1,8 @@
from pydantic import BaseModel
class TaskProcessingMeta(BaseModel):
num_docs: int
num_processed: int = 0
num_succeeded: int = 0
num_failed: int = 0

View File

@@ -1,8 +0,0 @@
from functools import lru_cache
from docling_serve.engines.async_local.orchestrator import AsyncLocalOrchestrator
@lru_cache
def get_orchestrator() -> AsyncLocalOrchestrator:
return AsyncLocalOrchestrator()

View File

@@ -0,0 +1,137 @@
# ruff: noqa: E402, UP006, UP035
from typing import Any, Dict, List
from kfp import dsl
PYTHON_BASE_IMAGE = "python:3.12"
@dsl.component(
base_image=PYTHON_BASE_IMAGE,
packages_to_install=[
"pydantic",
"docling-serve @ git+https://github.com/docling-project/docling-serve@feat-kfp-engine",
],
pip_index_urls=["https://download.pytorch.org/whl/cpu", "https://pypi.org/simple"],
)
def generate_chunks(
run_name: str,
request: Dict[str, Any],
batch_size: int,
callbacks: List[Dict[str, Any]],
) -> List[List[Dict[str, Any]]]:
from pydantic import TypeAdapter
from docling_serve.datamodel.callback import (
ProgressCallbackRequest,
ProgressSetNumDocs,
)
from docling_serve.datamodel.kfp import CallbackSpec
from docling_serve.engines.async_kfp.notify import notify_callbacks
CallbacksListType = TypeAdapter(list[CallbackSpec])
sources = request["http_sources"]
splits = [sources[i : i + batch_size] for i in range(0, len(sources), batch_size)]
total = sum(len(chunk) for chunk in splits)
payload = ProgressCallbackRequest(
task_id=run_name, progress=ProgressSetNumDocs(num_docs=total)
)
notify_callbacks(
payload=payload,
callbacks=CallbacksListType.validate_python(callbacks),
)
return splits
@dsl.component(
base_image=PYTHON_BASE_IMAGE,
packages_to_install=[
"pydantic",
"docling-serve @ git+https://github.com/docling-project/docling-serve@feat-kfp-engine",
],
pip_index_urls=["https://download.pytorch.org/whl/cpu", "https://pypi.org/simple"],
)
def convert_batch(
run_name: str,
data_splits: List[Dict[str, Any]],
request: Dict[str, Any],
callbacks: List[Dict[str, Any]],
output_path: dsl.OutputPath("Directory"), # type: ignore
):
from pathlib import Path
from pydantic import AnyUrl, TypeAdapter
from docling_serve.datamodel.callback import (
FailedDocsItem,
ProgressCallbackRequest,
ProgressUpdateProcessed,
SucceededDocsItem,
)
from docling_serve.datamodel.convert import ConvertDocumentsOptions
from docling_serve.datamodel.kfp import CallbackSpec
from docling_serve.datamodel.requests import HttpSource
from docling_serve.engines.async_kfp.notify import notify_callbacks
CallbacksListType = TypeAdapter(list[CallbackSpec])
convert_options = ConvertDocumentsOptions.model_validate(request["options"])
print(convert_options)
output_dir = Path(output_path)
output_dir.mkdir(exist_ok=True, parents=True)
docs_succeeded: list[SucceededDocsItem] = []
docs_failed: list[FailedDocsItem] = []
for source_dict in data_splits:
source = HttpSource.model_validate(source_dict)
filename = Path(str(AnyUrl(source.url).path)).name
output_filename = output_dir / filename
print(f"Writing {output_filename}")
with output_filename.open("w") as f:
f.write(source.model_dump_json())
docs_succeeded.append(SucceededDocsItem(source=source.url))
payload = ProgressCallbackRequest(
task_id=run_name,
progress=ProgressUpdateProcessed(
num_failed=len(docs_failed),
num_processed=len(docs_succeeded) + len(docs_failed),
num_succeeded=len(docs_succeeded),
docs_succeeded=docs_succeeded,
docs_failed=docs_failed,
),
)
print(payload)
notify_callbacks(
payload=payload,
callbacks=CallbacksListType.validate_python(callbacks),
)
@dsl.pipeline()
def process(
batch_size: int,
request: Dict[str, Any],
callbacks: List[Dict[str, Any]] = [],
run_name: str = "",
):
chunks_task = generate_chunks(
run_name=run_name,
request=request,
batch_size=batch_size,
callbacks=callbacks,
)
chunks_task.set_caching_options(False)
with dsl.ParallelFor(chunks_task.output, parallelism=4) as data_splits:
convert_batch(
run_name=run_name,
data_splits=data_splits,
request=request,
callbacks=callbacks,
)

View File

@@ -0,0 +1,32 @@
import ssl
import certifi
import httpx
from docling_serve.datamodel.callback import ProgressCallbackRequest
from docling_serve.datamodel.kfp import CallbackSpec
def notify_callbacks(
payload: ProgressCallbackRequest,
callbacks: list[CallbackSpec],
):
if len(callbacks) == 0:
return
for callback in callbacks:
# https://www.python-httpx.org/advanced/ssl/#configuring-client-instances
if callback.ca_cert:
ctx = ssl.create_default_context(cadata=callback.ca_cert)
else:
ctx = ssl.create_default_context(cafile=certifi.where())
try:
httpx.post(
str(callback.url),
headers=callback.headers,
json=payload.model_dump(mode="json"),
verify=ctx,
)
except httpx.HTTPError as err:
print(f"Error notifying callback {callback.url}: {err}")

View File

@@ -0,0 +1,226 @@
import datetime
import json
import logging
import uuid
from pathlib import Path
from typing import Optional
from kfp_server_api.models import V2beta1RuntimeState
from pydantic import BaseModel, TypeAdapter
from pydantic_settings import SettingsConfigDict
from docling_serve.datamodel.callback import (
ProgressCallbackRequest,
ProgressSetNumDocs,
ProgressUpdateProcessed,
)
from docling_serve.datamodel.engines import TaskStatus
from docling_serve.datamodel.kfp import CallbackSpec
from docling_serve.datamodel.requests import ConvertDocumentsRequest
from docling_serve.datamodel.task import Task
from docling_serve.datamodel.task_meta import TaskProcessingMeta
from docling_serve.engines.async_kfp.kfp_pipeline import process
from docling_serve.engines.async_orchestrator import (
BaseAsyncOrchestrator,
ProgressInvalid,
)
from docling_serve.settings import docling_serve_settings
_log = logging.getLogger(__name__)
class _RunItem(BaseModel):
model_config = SettingsConfigDict(arbitrary_types_allowed=True)
run_id: str
state: str
created_at: datetime.datetime
scheduled_at: datetime.datetime
finished_at: datetime.datetime
class AsyncKfpOrchestrator(BaseAsyncOrchestrator):
def __init__(self):
super().__init__()
import kfp
kfp_endpoint = docling_serve_settings.eng_kfp_endpoint
if kfp_endpoint is None:
raise ValueError("KFP endpoint is required when using the KFP engine.")
kube_sa_token_path = Path("/run/secrets/kubernetes.io/serviceaccount/token")
kube_sa_ca_cert_path = Path(
"/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"
)
ssl_ca_cert = docling_serve_settings.eng_kfp_ca_cert_path
token = docling_serve_settings.eng_kfp_token
if (
ssl_ca_cert is None
and ".svc" in kfp_endpoint.host
and kube_sa_ca_cert_path.exists()
):
ssl_ca_cert = str(kube_sa_ca_cert_path)
if token is None and kube_sa_token_path.exists():
token = kube_sa_token_path.read_text()
self._client = kfp.Client(
host=str(kfp_endpoint),
existing_token=token,
ssl_ca_cert=ssl_ca_cert,
# verify_ssl=False,
)
async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
callbacks = []
if docling_serve_settings.eng_kfp_self_callback_endpoint is not None:
headers = {}
if docling_serve_settings.eng_kfp_self_callback_token_path is not None:
token = (
docling_serve_settings.eng_kfp_self_callback_token_path.read_text()
)
headers["Authorization"] = f"Bearer {token}"
ca_cert = ""
if docling_serve_settings.eng_kfp_self_callback_ca_cert_path is not None:
ca_cert = docling_serve_settings.eng_kfp_self_callback_ca_cert_path.read_text()
callbacks.append(
CallbackSpec(
url=docling_serve_settings.eng_kfp_self_callback_endpoint,
headers=headers,
ca_cert=ca_cert,
)
)
CallbacksType = TypeAdapter(list[CallbackSpec])
# hack: since the current kfp backend is not resolving the job_id placeholder,
# we set the run_name and pass it as argument to the job itself.
run_name = f"docling-job-{uuid.uuid4()}"
kfp_run = self._client.create_run_from_pipeline_func(
process,
arguments={
"batch_size": 10,
"request": request.model_dump(mode="json"),
"callbacks": CallbacksType.dump_python(callbacks, mode="json"),
"run_name": run_name,
},
run_name=run_name,
)
task_id = kfp_run.run_id
task = Task(task_id=task_id, request=request)
await self.init_task_tracking(task)
return task
async def _update_task_from_run(self, task_id: str, wait: float = 0.0):
run_info = self._client.get_run(run_id=task_id)
task = await self.get_raw_task(task_id=task_id)
# RUNTIME_STATE_UNSPECIFIED = "RUNTIME_STATE_UNSPECIFIED"
# PENDING = "PENDING"
# RUNNING = "RUNNING"
# SUCCEEDED = "SUCCEEDED"
# SKIPPED = "SKIPPED"
# FAILED = "FAILED"
# CANCELING = "CANCELING"
# CANCELED = "CANCELED"
# PAUSED = "PAUSED"
if run_info.state == V2beta1RuntimeState.SUCCEEDED:
task.task_status = TaskStatus.SUCCESS
elif run_info.state == V2beta1RuntimeState.PENDING:
task.task_status = TaskStatus.PENDING
elif run_info.state == V2beta1RuntimeState.RUNNING:
task.task_status = TaskStatus.STARTED
else:
task.task_status = TaskStatus.FAILURE
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
await self._update_task_from_run(task_id=task_id, wait=wait)
return await self.get_raw_task(task_id=task_id)
async def _get_pending(self) -> list[_RunItem]:
runs: list[_RunItem] = []
next_page: Optional[str] = None
while True:
res = self._client.list_runs(
page_token=next_page,
page_size=20,
filter=json.dumps(
{
"predicates": [
{
"operation": "EQUALS",
"key": "state",
"stringValue": "PENDING",
}
]
}
),
)
if res.runs is not None:
for run in res.runs:
runs.append(
_RunItem(
run_id=run.run_id,
state=run.state,
created_at=run.created_at,
scheduled_at=run.scheduled_at,
finished_at=run.finished_at,
)
)
if res.next_page_token is None:
break
next_page = res.next_page_token
return runs
async def queue_size(self) -> int:
runs = await self._get_pending()
return len(runs)
async def get_queue_position(self, task_id: str) -> Optional[int]:
runs = await self._get_pending()
for pos, run in enumerate(runs, start=1):
if run.run_id == task_id:
return pos
return None
async def process_queue(self):
return
async def _get_run_id(self, run_name: str) -> str:
res = self._client.list_runs(
filter=json.dumps(
{
"predicates": [
{
"operation": "EQUALS",
"key": "name",
"stringValue": run_name,
}
]
}
),
)
if res.runs is not None and len(res.runs) > 0:
return res.runs[0].run_id
raise RuntimeError(f"Run with {run_name=} not found.")
async def receive_task_progress(self, request: ProgressCallbackRequest):
task_id = await self._get_run_id(run_name=request.task_id)
progress = request.progress
task = await self.get_raw_task(task_id=task_id)
if isinstance(progress, ProgressSetNumDocs):
task.processing_meta = TaskProcessingMeta(num_docs=progress.num_docs)
task.task_status = TaskStatus.STARTED
elif isinstance(progress, ProgressUpdateProcessed):
if task.processing_meta is None:
raise ProgressInvalid(
"UpdateProcessed was called before setting the expected number of documents."
)
task.processing_meta.num_processed += progress.num_processed
task.processing_meta.num_succeeded += progress.num_succeeded
task.processing_meta.num_failed += progress.num_failed
task.task_status = TaskStatus.STARTED
# TODO: could be moved to BackgroundTask
await self.notify_task_subscribers(task_id=task_id)

View File

@@ -3,44 +3,27 @@ import logging
import uuid
from typing import Optional
from fastapi import WebSocket
from docling_serve.datamodel.engines import TaskStatus
from docling_serve.datamodel.requests import ConvertDocumentsRequest
from docling_serve.datamodel.responses import (
MessageKind,
TaskStatusResponse,
WebsocketMessage,
)
from docling_serve.datamodel.task import Task
from docling_serve.engines.async_local.worker import AsyncLocalWorker
from docling_serve.engines.base_orchestrator import BaseOrchestrator
from docling_serve.engines.async_orchestrator import BaseAsyncOrchestrator
from docling_serve.settings import docling_serve_settings
_log = logging.getLogger(__name__)
class OrchestratorError(Exception):
pass
class TaskNotFoundError(OrchestratorError):
pass
class AsyncLocalOrchestrator(BaseOrchestrator):
class AsyncLocalOrchestrator(BaseAsyncOrchestrator):
def __init__(self):
super().__init__()
self.task_queue = asyncio.Queue()
self.tasks: dict[str, Task] = {}
self.queue_list: list[str] = []
self.task_subscribers: dict[str, set[WebSocket]] = {}
async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
task_id = str(uuid.uuid4())
task = Task(task_id=task_id, request=request)
self.tasks[task_id] = task
await self.init_task_tracking(task)
self.queue_list.append(task_id)
self.task_subscribers[task_id] = set()
await self.task_queue.put(task_id)
return task
@@ -52,16 +35,6 @@ class AsyncLocalOrchestrator(BaseOrchestrator):
self.queue_list.index(task_id) + 1 if task_id in self.queue_list else None
)
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
if task_id not in self.tasks:
raise TaskNotFoundError()
return self.tasks[task_id]
async def task_result(self, task_id: str):
if task_id not in self.tasks:
raise TaskNotFoundError()
return self.tasks[task_id].result
async def process_queue(self):
# Create a pool of workers
workers = []
@@ -74,29 +47,3 @@ class AsyncLocalOrchestrator(BaseOrchestrator):
# Wait for all workers to complete (they won't, as they run indefinitely)
await asyncio.gather(*workers)
_log.debug("All workers completed.")
async def notify_task_subscribers(self, task_id: str):
if task_id not in self.task_subscribers:
raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
task = self.tasks[task_id]
task_queue_position = await self.get_queue_position(task_id)
msg = TaskStatusResponse(
task_id=task.task_id,
task_status=task.task_status,
task_position=task_queue_position,
)
for websocket in self.task_subscribers[task_id]:
await websocket.send_text(
WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
)
if task.is_completed():
await websocket.close()
async def notify_queue_positions(self):
for task_id in self.task_subscribers.keys():
# notify only pending tasks
if self.tasks[task_id].task_status != TaskStatus.PENDING:
continue
await self.notify_task_subscribers(task_id)

View File

@@ -0,0 +1,72 @@
from fastapi import WebSocket
from docling_serve.datamodel.callback import ProgressCallbackRequest
from docling_serve.datamodel.engines import TaskStatus
from docling_serve.datamodel.responses import (
MessageKind,
TaskStatusResponse,
WebsocketMessage,
)
from docling_serve.datamodel.task import Task
from docling_serve.engines.base_orchestrator import (
BaseOrchestrator,
OrchestratorError,
TaskNotFoundError,
)
class ProgressInvalid(OrchestratorError):
pass
class BaseAsyncOrchestrator(BaseOrchestrator):
def __init__(self):
self.tasks: dict[str, Task] = {}
self.task_subscribers: dict[str, set[WebSocket]] = {}
async def init_task_tracking(self, task: Task):
task_id = task.task_id
self.tasks[task.task_id] = task
self.task_subscribers[task_id] = set()
async def get_raw_task(self, task_id: str) -> Task:
if task_id not in self.tasks:
raise TaskNotFoundError()
return self.tasks[task_id]
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
return await self.get_raw_task(task_id=task_id)
async def task_result(self, task_id: str):
task = await self.get_raw_task(task_id=task_id)
return task.result
async def notify_task_subscribers(self, task_id: str):
if task_id not in self.task_subscribers:
raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
task = await self.get_raw_task(task_id=task_id)
task_queue_position = await self.get_queue_position(task_id)
msg = TaskStatusResponse(
task_id=task.task_id,
task_status=task.task_status,
task_position=task_queue_position,
task_meta=task.processing_meta,
)
for websocket in self.task_subscribers[task_id]:
await websocket.send_text(
WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
)
if task.is_completed():
await websocket.close()
async def notify_queue_positions(self):
for task_id in self.task_subscribers.keys():
# notify only pending tasks
if self.tasks[task_id].task_status != TaskStatus.PENDING:
continue
await self.notify_task_subscribers(task_id)
async def receive_task_progress(self, request: ProgressCallbackRequest):
raise NotImplementedError()

View File

@@ -0,0 +1,21 @@
from functools import lru_cache
from docling_serve.datamodel.engines import AsyncEngine
from docling_serve.engines.async_orchestrator import BaseAsyncOrchestrator
from docling_serve.settings import docling_serve_settings
@lru_cache
def get_async_orchestrator() -> BaseAsyncOrchestrator:
if docling_serve_settings.eng_kind == AsyncEngine.LOCAL:
from docling_serve.engines.async_local.orchestrator import (
AsyncLocalOrchestrator,
)
return AsyncLocalOrchestrator()
elif docling_serve_settings.eng_kind == AsyncEngine.KFP:
from docling_serve.engines.async_kfp.orchestrator import AsyncKfpOrchestrator
return AsyncKfpOrchestrator()
raise RuntimeError(f"Engine {docling_serve_settings.eng_kind} not recognized.")

View File

@@ -1,11 +1,21 @@
from abc import ABC, abstractmethod
from typing import Optional
from docling_serve.datamodel.requests import ConvertDocumentsRequest
from docling_serve.datamodel.task import Task
class OrchestratorError(Exception):
pass
class TaskNotFoundError(OrchestratorError):
pass
class BaseOrchestrator(ABC):
@abstractmethod
async def enqueue(self, task) -> Task:
async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
pass
@abstractmethod
@@ -13,9 +23,17 @@ class BaseOrchestrator(ABC):
pass
@abstractmethod
async def task_status(self, task_id: str) -> Task:
async def get_queue_position(self, task_id: str) -> Optional[int]:
pass
@abstractmethod
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
pass
@abstractmethod
async def task_result(self, task_id: str):
pass
@abstractmethod
async def process_queue(self):
pass

View File

@@ -2,7 +2,9 @@ import sys
from pathlib import Path
from typing import Optional, Union
from pydantic import AnyUrl, model_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
from typing_extensions import Self
from docling_serve.datamodel.engines import AsyncEngine
@@ -48,7 +50,32 @@ class DoclingServeSettings(BaseSettings):
cors_headers: list[str] = ["*"]
eng_kind: AsyncEngine = AsyncEngine.LOCAL
# Local engine
eng_loc_num_workers: int = 2
# KFP engine
eng_kfp_endpoint: Optional[AnyUrl] = None
eng_kfp_token: Optional[str] = None
eng_kfp_ca_cert_path: Optional[str] = None
eng_kfp_self_callback_endpoint: Optional[str] = None
eng_kfp_self_callback_token_path: Optional[Path] = None
eng_kfp_self_callback_ca_cert_path: Optional[Path] = None
eng_kfp_experimental: bool = False
@model_validator(mode="after")
def engine_settings(self) -> Self:
# Validate KFP engine settings
if self.eng_kind == AsyncEngine.KFP:
if self.eng_kfp_endpoint is None:
raise ValueError("KFP endpoint is required when using the KFP engine.")
if self.eng_kind == AsyncEngine.KFP:
if not self.eng_kfp_experimental:
raise ValueError(
"KFP is not yet working. To enable the development version, you must set DOCLING_SERVE_ENG_KFP_EXPERIMENTAL=true."
)
return self
uvicorn_settings = UvicornSettings()

View File

@@ -42,3 +42,30 @@ THe following table describes the options to configure the Docling Serve app.
| | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
| | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
| | `DOCLING_SERVE_CORS_HEADERS` | `["*"]` | A list of HTTP request headers that should be supported for cross-origin requests. |
| | `DOCLING_SERVE_ENG_KIND` | `local` | The compute engine to use for the async tasks. Possible values are `local` and `kfp`. See below for more configurations of the engines. |
### Compute engine
Docling Serve can be deployed with several possible of compute engine.
The selected compute engine will be running all the async jobs.
#### Local engine
The following table describes the options to configure the Docling Serve KFP engine.
| ENV | Default | Description |
|-----|---------|-------------|
| `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
#### KFP engine
The following table describes the options to configure the Docling Serve KFP engine.
| ENV | Default | Description |
|-----|---------|-------------|
| `DOCLING_SERVE_ENG_KFP_ENDPOINT` | | Must be set to the Kubeflow Pipeline endpoint. When using the in-cluster deployment, make sure to use the cluster endpoint, e.g. `https://NAME.NAMESPACE.svc.cluster.local:8888` |
| `DOCLING_SERVE_ENG_KFP_TOKEN` | | The authentication token for KFP. For in-cluster deployment, the app will load automatically the token of the ServiceAccount. |
| `DOCLING_SERVE_ENG_KFP_CA_CERT_PATH` | | Path to the CA certificates for the KFP endpoint. For in-cluster deployment, the app will load automatically the internal CA. |
| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_ENDPOINT` | | If set, it enables internal callbacks providing status update of the KFP job. Usually something like `https://NAME.NAMESPACE.svc.cluster.local:5001/v1alpha/callback/task/progress`. |
| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_TOKEN_PATH` | | The token used for authenticating the progress callback. For cluster-internal workloads, use `/run/secrets/kubernetes.io/serviceaccount/token`. |
| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_CA_CERT_PATH` | | The CA certifcate for the progress callback. For cluster-inetrnal workloads, use `/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt`. |

View File

@@ -34,6 +34,7 @@ dependencies = [
"mlx-vlm~=0.1.12; sys_platform == 'darwin' and platform_machine == 'arm64'",
"fastapi[standard]~=0.115",
"httpx~=0.28",
"kfp[kubernetes]>=2.10.0",
"pydantic~=2.10",
"pydantic-settings~=2.4",
"python-multipart>=0.0.14,<0.1.0",
@@ -82,6 +83,10 @@ conflicts = [
{ extra = "cu124" },
],
]
environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
override-dependencies = [
"urllib3~=2.0"
]
[tool.uv.sources]
torch = [
@@ -197,6 +202,8 @@ module = [
"tesserocr.*",
"rapidocr_onnxruntime.*",
"requests.*",
"kfp.*",
"kfp_server_api.*",
"mlx_vlm.*",
]
ignore_missing_imports = true

View File

@@ -38,7 +38,7 @@ async def test_convert_url(async_client):
}
print(json.dumps(payload, indent=2))
for n in range(5):
for n in range(1):
response = await async_client.post(
f"{base_url}/convert/source/async", json=payload
)

6044
uv.lock generated

File diff suppressed because it is too large Load Diff