6 Commits

Author SHA1 Message Date
github-actions[bot]
37e2e1ad09 chore: bump version to 0.9.0 [skip ci] 2025-04-25 07:56:40 +00:00
Michele Dolfi
71c5fae505 fix: produce image artifacts in referenced mode (#151)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-04-24 17:33:36 +02:00
Michele Dolfi
91956cbf4e docs: vlm and picture description options (#149)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-04-24 14:42:06 +02:00
Michele Dolfi
4c9571a052 feat: expose picture description options (#148)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com>
Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
2025-04-24 13:49:44 +02:00
Tiago Santana
41624af09f test: add tests with fastapi client (#147)
Signed-off-by: Tiago Santana <54704492+SantanaTiago@users.noreply.github.com>
2025-04-24 10:25:29 +02:00
Michele Dolfi
26bef5bec0 feat: Add parameters for Kubeflow pipeline engine (WIP) (#107)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-04-23 14:59:53 +02:00
28 changed files with 1816 additions and 807 deletions

View File

@@ -1,3 +1,18 @@
## [v0.9.0](https://github.com/docling-project/docling-serve/releases/tag/v0.9.0) - 2025-04-25
### Feature
* Expose picture description options ([#148](https://github.com/docling-project/docling-serve/issues/148)) ([`4c9571a`](https://github.com/docling-project/docling-serve/commit/4c9571a052d5ec0044e49225bc5615e13cdb0a56))
* Add parameters for Kubeflow pipeline engine (WIP) ([#107](https://github.com/docling-project/docling-serve/issues/107)) ([`26bef5b`](https://github.com/docling-project/docling-serve/commit/26bef5bec060f0afd8d358816b68c3f2c0dd4bc2))
### Fix
* Produce image artifacts in referenced mode ([#151](https://github.com/docling-project/docling-serve/issues/151)) ([`71c5fae`](https://github.com/docling-project/docling-serve/commit/71c5fae505366459fd481d2ecdabc5ebed94d49c))
### Documentation
* Vlm and picture description options ([#149](https://github.com/docling-project/docling-serve/issues/149)) ([`91956cb`](https://github.com/docling-project/docling-serve/commit/91956cbf4e91cf82bb4d54ace397cdbbfaf594ba))
## [v0.8.0](https://github.com/docling-project/docling-serve/releases/tag/v0.8.0) - 2025-04-22
### Feature

View File

@@ -28,6 +28,10 @@ from fastapi.staticfiles import StaticFiles
from docling.datamodel.base_models import DocumentStream
from docling_serve.datamodel.callback import (
ProgressCallbackRequest,
ProgressCallbackResponse,
)
from docling_serve.datamodel.convert import ConvertDocumentsOptions
from docling_serve.datamodel.requests import (
ConvertDocumentFileSourcesRequest,
@@ -45,11 +49,12 @@ from docling_serve.docling_conversion import (
get_converter,
get_pdf_pipeline_opts,
)
from docling_serve.engines import get_orchestrator
from docling_serve.engines.async_local.orchestrator import (
AsyncLocalOrchestrator,
TaskNotFoundError,
from docling_serve.engines.async_orchestrator import (
BaseAsyncOrchestrator,
ProgressInvalid,
)
from docling_serve.engines.async_orchestrator_factory import get_async_orchestrator
from docling_serve.engines.base_orchestrator import TaskNotFoundError
from docling_serve.helper_functions import FormDepends
from docling_serve.response_preparation import process_results
from docling_serve.settings import docling_serve_settings
@@ -94,7 +99,7 @@ async def lifespan(app: FastAPI):
pdf_format_option = get_pdf_pipeline_opts(ConvertDocumentsOptions())
get_converter(pdf_format_option)
orchestrator = get_orchestrator()
orchestrator = get_async_orchestrator()
# Start the background queue processor
queue_task = asyncio.create_task(orchestrator.process_queue())
@@ -308,7 +313,7 @@ def create_app(): # noqa: C901
response_model=TaskStatusResponse,
)
async def process_url_async(
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
conversion_request: ConvertDocumentsRequest,
):
task = await orchestrator.enqueue(request=conversion_request)
@@ -319,6 +324,7 @@ def create_app(): # noqa: C901
task_id=task.task_id,
task_status=task.task_status,
task_position=task_queue_position,
task_meta=task.processing_meta,
)
# Task status poll
@@ -327,7 +333,7 @@ def create_app(): # noqa: C901
response_model=TaskStatusResponse,
)
async def task_status_poll(
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
task_id: str,
wait: Annotated[
float, Query(help="Number of seconds to wait for a completed status.")
@@ -342,6 +348,7 @@ def create_app(): # noqa: C901
task_id=task.task_id,
task_status=task.task_status,
task_position=task_queue_position,
task_meta=task.processing_meta,
)
# Task status websocket
@@ -350,7 +357,7 @@ def create_app(): # noqa: C901
)
async def task_status_ws(
websocket: WebSocket,
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
task_id: str,
):
await websocket.accept()
@@ -375,6 +382,7 @@ def create_app(): # noqa: C901
task_id=task.task_id,
task_status=task.task_status,
task_position=task_queue_position,
task_meta=task.processing_meta,
)
await websocket.send_text(
WebsocketMessage(
@@ -389,6 +397,7 @@ def create_app(): # noqa: C901
task_id=task.task_id,
task_status=task.task_status,
task_position=task_queue_position,
task_meta=task.processing_meta,
)
await websocket.send_text(
WebsocketMessage(
@@ -416,7 +425,7 @@ def create_app(): # noqa: C901
},
)
async def task_result(
orchestrator: Annotated[AsyncLocalOrchestrator, Depends(get_orchestrator)],
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
task_id: str,
):
result = await orchestrator.task_result(task_id=task_id)
@@ -427,4 +436,23 @@ def create_app(): # noqa: C901
)
return result
# Update task progress
@app.post(
"/v1alpha/callback/task/progress",
response_model=ProgressCallbackResponse,
)
async def callback_task_progress(
orchestrator: Annotated[BaseAsyncOrchestrator, Depends(get_async_orchestrator)],
request: ProgressCallbackRequest,
):
try:
await orchestrator.receive_task_progress(request=request)
return ProgressCallbackResponse(status="ack")
except TaskNotFoundError:
raise HTTPException(status_code=404, detail="Task not found.")
except ProgressInvalid as err:
raise HTTPException(
status_code=400, detail=f"Invalid progress payload: {err}"
)
return app

View File

@@ -0,0 +1,50 @@
import enum
from typing import Annotated, Literal
from pydantic import BaseModel, Field
class ProgressKind(str, enum.Enum):
SET_NUM_DOCS = "set_num_docs"
UPDATE_PROCESSED = "update_processed"
class BaseProgress(BaseModel):
kind: ProgressKind
class ProgressSetNumDocs(BaseProgress):
kind: Literal[ProgressKind.SET_NUM_DOCS] = ProgressKind.SET_NUM_DOCS
num_docs: int
class SucceededDocsItem(BaseModel):
source: str
class FailedDocsItem(BaseModel):
source: str
error: str
class ProgressUpdateProcessed(BaseProgress):
kind: Literal[ProgressKind.UPDATE_PROCESSED] = ProgressKind.UPDATE_PROCESSED
num_processed: int
num_succeeded: int
num_failed: int
docs_succeeded: list[SucceededDocsItem]
docs_failed: list[FailedDocsItem]
class ProgressCallbackRequest(BaseModel):
task_id: str
progress: Annotated[
ProgressSetNumDocs | ProgressUpdateProcessed, Field(discriminator="kind")
]
class ProgressCallbackResponse(BaseModel):
status: Literal["ack"] = "ack"

View File

@@ -1,7 +1,8 @@
# Define the input options for the API
from typing import Annotated, Optional
from typing import Annotated, Any, Optional
from pydantic import BaseModel, Field
from pydantic import AnyUrl, BaseModel, Field, model_validator
from typing_extensions import Self
from docling.datamodel.base_models import InputFormat, OutputFormat
from docling.datamodel.pipeline_options import (
@@ -26,6 +27,89 @@ ocr_factory = get_ocr_factory(
ocr_engines_enum = ocr_factory.get_enum()
class PictureDescriptionLocal(BaseModel):
repo_id: Annotated[
str,
Field(
description="Repository id from the Hugging Face Hub.",
examples=[
"HuggingFaceTB/SmolVLM-256M-Instruct",
"ibm-granite/granite-vision-3.2-2b",
],
),
]
prompt: Annotated[
str,
Field(
description="Prompt used when calling the vision-language model.",
examples=[
"Describe this image in a few sentences.",
"This is a figure from a document. Provide a detailed description of it.",
],
),
] = "Describe this image in a few sentences."
generation_config: Annotated[
dict[str, Any],
Field(
description="Config from https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig",
examples=[{"max_new_tokens": 200, "do_sample": False}],
),
] = {"max_new_tokens": 200, "do_sample": False}
class PictureDescriptionApi(BaseModel):
url: Annotated[
AnyUrl,
Field(
description="Endpoint which accepts openai-api compatible requests.",
examples=[
AnyUrl(
"http://localhost:8000/v1/chat/completions"
), # example of a local vllm api
AnyUrl(
"http://localhost:11434/v1/chat/completions"
), # example of ollama
],
),
]
headers: Annotated[
dict[str, str],
Field(
description="Headers used for calling the API endpoint. For example, it could include authentication headers."
),
] = {}
params: Annotated[
dict[str, Any],
Field(
description="Model parameters.",
examples=[
{ # on vllm
"model": "HuggingFaceTB/SmolVLM-256M-Instruct",
"max_completion_tokens": 200,
},
{ # on vllm
"model": "ibm-granite/granite-vision-3.2-2b",
"max_completion_tokens": 200,
},
{ # on ollama
"model": "granite3.2-vision:2b"
},
],
),
] = {}
timeout: Annotated[float, Field(description="Timeout for the API request.")] = 20
prompt: Annotated[
str,
Field(
description="Prompt used when calling the vision-language model.",
examples=[
"Describe this image in a few sentences.",
"This is a figures from a document. Provide a detailed description of it.",
],
),
] = "Describe this image in a few sentences."
class ConvertDocumentsOptions(BaseModel):
from_formats: Annotated[
list[InputFormat],
@@ -226,7 +310,7 @@ class ConvertDocumentsOptions(BaseModel):
bool,
Field(
description=(
"If enabled, perform formula OCR, return Latex code. "
"If enabled, perform formula OCR, return LaTeX code. "
"Boolean. Optional, defaults to false."
),
examples=[False],
@@ -254,3 +338,30 @@ class ConvertDocumentsOptions(BaseModel):
examples=[False],
),
] = False
picture_description_local: Annotated[
Optional[PictureDescriptionLocal],
Field(
description="Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with picture_description_api."
),
] = None
picture_description_api: Annotated[
Optional[PictureDescriptionApi],
Field(
description="API details for using a vision-language model in the picture description. This parameter is mutually exclusive with picture_description_local."
),
] = None
@model_validator(mode="after")
def picture_description_exclusivity(self) -> Self:
# Validate picture description options
if (
self.picture_description_local is not None
and self.picture_description_api is not None
):
raise ValueError(
"The parameters picture_description_local and picture_description_api are mutually exclusive, only one of them can be set."
)
return self

View File

@@ -10,3 +10,4 @@ class TaskStatus(str, enum.Enum):
class AsyncEngine(str, enum.Enum):
LOCAL = "local"
KFP = "kfp"

View File

@@ -0,0 +1,7 @@
from pydantic import AnyUrl, BaseModel
class CallbackSpec(BaseModel):
url: AnyUrl
headers: dict[str, str] = {}
ca_cert: str = ""

View File

@@ -7,6 +7,8 @@ from docling.datamodel.document import ConversionStatus, ErrorItem
from docling.utils.profiling import ProfilingItem
from docling_core.types.doc import DoclingDocument
from docling_serve.datamodel.task_meta import TaskProcessingMeta
# Status
class HealthCheckResponse(BaseModel):
@@ -38,6 +40,7 @@ class TaskStatusResponse(BaseModel):
task_id: str
task_status: str
task_position: Optional[int] = None
task_meta: Optional[TaskProcessingMeta] = None
class MessageKind(str, enum.Enum):

View File

@@ -5,6 +5,7 @@ from pydantic import BaseModel
from docling_serve.datamodel.engines import TaskStatus
from docling_serve.datamodel.requests import ConvertDocumentsRequest
from docling_serve.datamodel.responses import ConvertDocumentResponse
from docling_serve.datamodel.task_meta import TaskProcessingMeta
class Task(BaseModel):
@@ -12,6 +13,7 @@ class Task(BaseModel):
task_status: TaskStatus = TaskStatus.PENDING
request: Optional[ConvertDocumentsRequest]
result: Optional[ConvertDocumentResponse] = None
processing_meta: Optional[TaskProcessingMeta] = None
def is_completed(self) -> bool:
if self.task_status in [TaskStatus.SUCCESS, TaskStatus.FAILURE]:

View File

@@ -0,0 +1,8 @@
from pydantic import BaseModel
class TaskProcessingMeta(BaseModel):
num_docs: int
num_processed: int = 0
num_succeeded: int = 0
num_failed: int = 0

View File

@@ -21,6 +21,8 @@ from docling.datamodel.pipeline_options import (
PdfBackend,
PdfPipeline,
PdfPipelineOptions,
PictureDescriptionApiOptions,
PictureDescriptionVlmOptions,
TableFormerMode,
VlmPipelineOptions,
smoldocling_vlm_conversion_options,
@@ -116,6 +118,7 @@ def _parse_standard_pdf_opts(
pipeline_options = PdfPipelineOptions(
artifacts_path=artifacts_path,
enable_remote_services=docling_serve_settings.enable_remote_services,
document_timeout=request.document_timeout,
do_ocr=request.do_ocr,
ocr_options=ocr_options,
@@ -129,9 +132,25 @@ def _parse_standard_pdf_opts(
if request.image_export_mode != ImageRefMode.PLACEHOLDER:
pipeline_options.generate_page_images = True
if request.image_export_mode == ImageRefMode.REFERENCED:
pipeline_options.generate_picture_images = True
if request.images_scale:
pipeline_options.images_scale = request.images_scale
if request.picture_description_local is not None:
pipeline_options.picture_description_options = (
PictureDescriptionVlmOptions.model_validate(
request.picture_description_local.model_dump()
)
)
if request.picture_description_api is not None:
pipeline_options.picture_description_options = (
PictureDescriptionApiOptions.model_validate(
request.picture_description_api.model_dump()
)
)
return pipeline_options

View File

@@ -1,8 +0,0 @@
from functools import lru_cache
from docling_serve.engines.async_local.orchestrator import AsyncLocalOrchestrator
@lru_cache
def get_orchestrator() -> AsyncLocalOrchestrator:
return AsyncLocalOrchestrator()

View File

@@ -0,0 +1,137 @@
# ruff: noqa: E402, UP006, UP035
from typing import Any, Dict, List
from kfp import dsl
PYTHON_BASE_IMAGE = "python:3.12"
@dsl.component(
base_image=PYTHON_BASE_IMAGE,
packages_to_install=[
"pydantic",
"docling-serve @ git+https://github.com/docling-project/docling-serve@feat-kfp-engine",
],
pip_index_urls=["https://download.pytorch.org/whl/cpu", "https://pypi.org/simple"],
)
def generate_chunks(
run_name: str,
request: Dict[str, Any],
batch_size: int,
callbacks: List[Dict[str, Any]],
) -> List[List[Dict[str, Any]]]:
from pydantic import TypeAdapter
from docling_serve.datamodel.callback import (
ProgressCallbackRequest,
ProgressSetNumDocs,
)
from docling_serve.datamodel.kfp import CallbackSpec
from docling_serve.engines.async_kfp.notify import notify_callbacks
CallbacksListType = TypeAdapter(list[CallbackSpec])
sources = request["http_sources"]
splits = [sources[i : i + batch_size] for i in range(0, len(sources), batch_size)]
total = sum(len(chunk) for chunk in splits)
payload = ProgressCallbackRequest(
task_id=run_name, progress=ProgressSetNumDocs(num_docs=total)
)
notify_callbacks(
payload=payload,
callbacks=CallbacksListType.validate_python(callbacks),
)
return splits
@dsl.component(
base_image=PYTHON_BASE_IMAGE,
packages_to_install=[
"pydantic",
"docling-serve @ git+https://github.com/docling-project/docling-serve@feat-kfp-engine",
],
pip_index_urls=["https://download.pytorch.org/whl/cpu", "https://pypi.org/simple"],
)
def convert_batch(
run_name: str,
data_splits: List[Dict[str, Any]],
request: Dict[str, Any],
callbacks: List[Dict[str, Any]],
output_path: dsl.OutputPath("Directory"), # type: ignore
):
from pathlib import Path
from pydantic import AnyUrl, TypeAdapter
from docling_serve.datamodel.callback import (
FailedDocsItem,
ProgressCallbackRequest,
ProgressUpdateProcessed,
SucceededDocsItem,
)
from docling_serve.datamodel.convert import ConvertDocumentsOptions
from docling_serve.datamodel.kfp import CallbackSpec
from docling_serve.datamodel.requests import HttpSource
from docling_serve.engines.async_kfp.notify import notify_callbacks
CallbacksListType = TypeAdapter(list[CallbackSpec])
convert_options = ConvertDocumentsOptions.model_validate(request["options"])
print(convert_options)
output_dir = Path(output_path)
output_dir.mkdir(exist_ok=True, parents=True)
docs_succeeded: list[SucceededDocsItem] = []
docs_failed: list[FailedDocsItem] = []
for source_dict in data_splits:
source = HttpSource.model_validate(source_dict)
filename = Path(str(AnyUrl(source.url).path)).name
output_filename = output_dir / filename
print(f"Writing {output_filename}")
with output_filename.open("w") as f:
f.write(source.model_dump_json())
docs_succeeded.append(SucceededDocsItem(source=source.url))
payload = ProgressCallbackRequest(
task_id=run_name,
progress=ProgressUpdateProcessed(
num_failed=len(docs_failed),
num_processed=len(docs_succeeded) + len(docs_failed),
num_succeeded=len(docs_succeeded),
docs_succeeded=docs_succeeded,
docs_failed=docs_failed,
),
)
print(payload)
notify_callbacks(
payload=payload,
callbacks=CallbacksListType.validate_python(callbacks),
)
@dsl.pipeline()
def process(
batch_size: int,
request: Dict[str, Any],
callbacks: List[Dict[str, Any]] = [],
run_name: str = "",
):
chunks_task = generate_chunks(
run_name=run_name,
request=request,
batch_size=batch_size,
callbacks=callbacks,
)
chunks_task.set_caching_options(False)
with dsl.ParallelFor(chunks_task.output, parallelism=4) as data_splits:
convert_batch(
run_name=run_name,
data_splits=data_splits,
request=request,
callbacks=callbacks,
)

View File

@@ -0,0 +1,32 @@
import ssl
import certifi
import httpx
from docling_serve.datamodel.callback import ProgressCallbackRequest
from docling_serve.datamodel.kfp import CallbackSpec
def notify_callbacks(
payload: ProgressCallbackRequest,
callbacks: list[CallbackSpec],
):
if len(callbacks) == 0:
return
for callback in callbacks:
# https://www.python-httpx.org/advanced/ssl/#configuring-client-instances
if callback.ca_cert:
ctx = ssl.create_default_context(cadata=callback.ca_cert)
else:
ctx = ssl.create_default_context(cafile=certifi.where())
try:
httpx.post(
str(callback.url),
headers=callback.headers,
json=payload.model_dump(mode="json"),
verify=ctx,
)
except httpx.HTTPError as err:
print(f"Error notifying callback {callback.url}: {err}")

View File

@@ -0,0 +1,226 @@
import datetime
import json
import logging
import uuid
from pathlib import Path
from typing import Optional
from kfp_server_api.models import V2beta1RuntimeState
from pydantic import BaseModel, TypeAdapter
from pydantic_settings import SettingsConfigDict
from docling_serve.datamodel.callback import (
ProgressCallbackRequest,
ProgressSetNumDocs,
ProgressUpdateProcessed,
)
from docling_serve.datamodel.engines import TaskStatus
from docling_serve.datamodel.kfp import CallbackSpec
from docling_serve.datamodel.requests import ConvertDocumentsRequest
from docling_serve.datamodel.task import Task
from docling_serve.datamodel.task_meta import TaskProcessingMeta
from docling_serve.engines.async_kfp.kfp_pipeline import process
from docling_serve.engines.async_orchestrator import (
BaseAsyncOrchestrator,
ProgressInvalid,
)
from docling_serve.settings import docling_serve_settings
_log = logging.getLogger(__name__)
class _RunItem(BaseModel):
model_config = SettingsConfigDict(arbitrary_types_allowed=True)
run_id: str
state: str
created_at: datetime.datetime
scheduled_at: datetime.datetime
finished_at: datetime.datetime
class AsyncKfpOrchestrator(BaseAsyncOrchestrator):
def __init__(self):
super().__init__()
import kfp
kfp_endpoint = docling_serve_settings.eng_kfp_endpoint
if kfp_endpoint is None:
raise ValueError("KFP endpoint is required when using the KFP engine.")
kube_sa_token_path = Path("/run/secrets/kubernetes.io/serviceaccount/token")
kube_sa_ca_cert_path = Path(
"/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"
)
ssl_ca_cert = docling_serve_settings.eng_kfp_ca_cert_path
token = docling_serve_settings.eng_kfp_token
if (
ssl_ca_cert is None
and ".svc" in kfp_endpoint.host
and kube_sa_ca_cert_path.exists()
):
ssl_ca_cert = str(kube_sa_ca_cert_path)
if token is None and kube_sa_token_path.exists():
token = kube_sa_token_path.read_text()
self._client = kfp.Client(
host=str(kfp_endpoint),
existing_token=token,
ssl_ca_cert=ssl_ca_cert,
# verify_ssl=False,
)
async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
callbacks = []
if docling_serve_settings.eng_kfp_self_callback_endpoint is not None:
headers = {}
if docling_serve_settings.eng_kfp_self_callback_token_path is not None:
token = (
docling_serve_settings.eng_kfp_self_callback_token_path.read_text()
)
headers["Authorization"] = f"Bearer {token}"
ca_cert = ""
if docling_serve_settings.eng_kfp_self_callback_ca_cert_path is not None:
ca_cert = docling_serve_settings.eng_kfp_self_callback_ca_cert_path.read_text()
callbacks.append(
CallbackSpec(
url=docling_serve_settings.eng_kfp_self_callback_endpoint,
headers=headers,
ca_cert=ca_cert,
)
)
CallbacksType = TypeAdapter(list[CallbackSpec])
# hack: since the current kfp backend is not resolving the job_id placeholder,
# we set the run_name and pass it as argument to the job itself.
run_name = f"docling-job-{uuid.uuid4()}"
kfp_run = self._client.create_run_from_pipeline_func(
process,
arguments={
"batch_size": 10,
"request": request.model_dump(mode="json"),
"callbacks": CallbacksType.dump_python(callbacks, mode="json"),
"run_name": run_name,
},
run_name=run_name,
)
task_id = kfp_run.run_id
task = Task(task_id=task_id, request=request)
await self.init_task_tracking(task)
return task
async def _update_task_from_run(self, task_id: str, wait: float = 0.0):
run_info = self._client.get_run(run_id=task_id)
task = await self.get_raw_task(task_id=task_id)
# RUNTIME_STATE_UNSPECIFIED = "RUNTIME_STATE_UNSPECIFIED"
# PENDING = "PENDING"
# RUNNING = "RUNNING"
# SUCCEEDED = "SUCCEEDED"
# SKIPPED = "SKIPPED"
# FAILED = "FAILED"
# CANCELING = "CANCELING"
# CANCELED = "CANCELED"
# PAUSED = "PAUSED"
if run_info.state == V2beta1RuntimeState.SUCCEEDED:
task.task_status = TaskStatus.SUCCESS
elif run_info.state == V2beta1RuntimeState.PENDING:
task.task_status = TaskStatus.PENDING
elif run_info.state == V2beta1RuntimeState.RUNNING:
task.task_status = TaskStatus.STARTED
else:
task.task_status = TaskStatus.FAILURE
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
await self._update_task_from_run(task_id=task_id, wait=wait)
return await self.get_raw_task(task_id=task_id)
async def _get_pending(self) -> list[_RunItem]:
runs: list[_RunItem] = []
next_page: Optional[str] = None
while True:
res = self._client.list_runs(
page_token=next_page,
page_size=20,
filter=json.dumps(
{
"predicates": [
{
"operation": "EQUALS",
"key": "state",
"stringValue": "PENDING",
}
]
}
),
)
if res.runs is not None:
for run in res.runs:
runs.append(
_RunItem(
run_id=run.run_id,
state=run.state,
created_at=run.created_at,
scheduled_at=run.scheduled_at,
finished_at=run.finished_at,
)
)
if res.next_page_token is None:
break
next_page = res.next_page_token
return runs
async def queue_size(self) -> int:
runs = await self._get_pending()
return len(runs)
async def get_queue_position(self, task_id: str) -> Optional[int]:
runs = await self._get_pending()
for pos, run in enumerate(runs, start=1):
if run.run_id == task_id:
return pos
return None
async def process_queue(self):
return
async def _get_run_id(self, run_name: str) -> str:
res = self._client.list_runs(
filter=json.dumps(
{
"predicates": [
{
"operation": "EQUALS",
"key": "name",
"stringValue": run_name,
}
]
}
),
)
if res.runs is not None and len(res.runs) > 0:
return res.runs[0].run_id
raise RuntimeError(f"Run with {run_name=} not found.")
async def receive_task_progress(self, request: ProgressCallbackRequest):
task_id = await self._get_run_id(run_name=request.task_id)
progress = request.progress
task = await self.get_raw_task(task_id=task_id)
if isinstance(progress, ProgressSetNumDocs):
task.processing_meta = TaskProcessingMeta(num_docs=progress.num_docs)
task.task_status = TaskStatus.STARTED
elif isinstance(progress, ProgressUpdateProcessed):
if task.processing_meta is None:
raise ProgressInvalid(
"UpdateProcessed was called before setting the expected number of documents."
)
task.processing_meta.num_processed += progress.num_processed
task.processing_meta.num_succeeded += progress.num_succeeded
task.processing_meta.num_failed += progress.num_failed
task.task_status = TaskStatus.STARTED
# TODO: could be moved to BackgroundTask
await self.notify_task_subscribers(task_id=task_id)

View File

@@ -3,44 +3,27 @@ import logging
import uuid
from typing import Optional
from fastapi import WebSocket
from docling_serve.datamodel.engines import TaskStatus
from docling_serve.datamodel.requests import ConvertDocumentsRequest
from docling_serve.datamodel.responses import (
MessageKind,
TaskStatusResponse,
WebsocketMessage,
)
from docling_serve.datamodel.task import Task
from docling_serve.engines.async_local.worker import AsyncLocalWorker
from docling_serve.engines.base_orchestrator import BaseOrchestrator
from docling_serve.engines.async_orchestrator import BaseAsyncOrchestrator
from docling_serve.settings import docling_serve_settings
_log = logging.getLogger(__name__)
class OrchestratorError(Exception):
pass
class TaskNotFoundError(OrchestratorError):
pass
class AsyncLocalOrchestrator(BaseOrchestrator):
class AsyncLocalOrchestrator(BaseAsyncOrchestrator):
def __init__(self):
super().__init__()
self.task_queue = asyncio.Queue()
self.tasks: dict[str, Task] = {}
self.queue_list: list[str] = []
self.task_subscribers: dict[str, set[WebSocket]] = {}
async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
task_id = str(uuid.uuid4())
task = Task(task_id=task_id, request=request)
self.tasks[task_id] = task
await self.init_task_tracking(task)
self.queue_list.append(task_id)
self.task_subscribers[task_id] = set()
await self.task_queue.put(task_id)
return task
@@ -52,16 +35,6 @@ class AsyncLocalOrchestrator(BaseOrchestrator):
self.queue_list.index(task_id) + 1 if task_id in self.queue_list else None
)
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
if task_id not in self.tasks:
raise TaskNotFoundError()
return self.tasks[task_id]
async def task_result(self, task_id: str):
if task_id not in self.tasks:
raise TaskNotFoundError()
return self.tasks[task_id].result
async def process_queue(self):
# Create a pool of workers
workers = []
@@ -74,29 +47,3 @@ class AsyncLocalOrchestrator(BaseOrchestrator):
# Wait for all workers to complete (they won't, as they run indefinitely)
await asyncio.gather(*workers)
_log.debug("All workers completed.")
async def notify_task_subscribers(self, task_id: str):
if task_id not in self.task_subscribers:
raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
task = self.tasks[task_id]
task_queue_position = await self.get_queue_position(task_id)
msg = TaskStatusResponse(
task_id=task.task_id,
task_status=task.task_status,
task_position=task_queue_position,
)
for websocket in self.task_subscribers[task_id]:
await websocket.send_text(
WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
)
if task.is_completed():
await websocket.close()
async def notify_queue_positions(self):
for task_id in self.task_subscribers.keys():
# notify only pending tasks
if self.tasks[task_id].task_status != TaskStatus.PENDING:
continue
await self.notify_task_subscribers(task_id)

View File

@@ -0,0 +1,72 @@
from fastapi import WebSocket
from docling_serve.datamodel.callback import ProgressCallbackRequest
from docling_serve.datamodel.engines import TaskStatus
from docling_serve.datamodel.responses import (
MessageKind,
TaskStatusResponse,
WebsocketMessage,
)
from docling_serve.datamodel.task import Task
from docling_serve.engines.base_orchestrator import (
BaseOrchestrator,
OrchestratorError,
TaskNotFoundError,
)
class ProgressInvalid(OrchestratorError):
pass
class BaseAsyncOrchestrator(BaseOrchestrator):
def __init__(self):
self.tasks: dict[str, Task] = {}
self.task_subscribers: dict[str, set[WebSocket]] = {}
async def init_task_tracking(self, task: Task):
task_id = task.task_id
self.tasks[task.task_id] = task
self.task_subscribers[task_id] = set()
async def get_raw_task(self, task_id: str) -> Task:
if task_id not in self.tasks:
raise TaskNotFoundError()
return self.tasks[task_id]
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
return await self.get_raw_task(task_id=task_id)
async def task_result(self, task_id: str):
task = await self.get_raw_task(task_id=task_id)
return task.result
async def notify_task_subscribers(self, task_id: str):
if task_id not in self.task_subscribers:
raise RuntimeError(f"Task {task_id} does not have a subscribers list.")
task = await self.get_raw_task(task_id=task_id)
task_queue_position = await self.get_queue_position(task_id)
msg = TaskStatusResponse(
task_id=task.task_id,
task_status=task.task_status,
task_position=task_queue_position,
task_meta=task.processing_meta,
)
for websocket in self.task_subscribers[task_id]:
await websocket.send_text(
WebsocketMessage(message=MessageKind.UPDATE, task=msg).model_dump_json()
)
if task.is_completed():
await websocket.close()
async def notify_queue_positions(self):
for task_id in self.task_subscribers.keys():
# notify only pending tasks
if self.tasks[task_id].task_status != TaskStatus.PENDING:
continue
await self.notify_task_subscribers(task_id)
async def receive_task_progress(self, request: ProgressCallbackRequest):
raise NotImplementedError()

View File

@@ -0,0 +1,21 @@
from functools import lru_cache
from docling_serve.datamodel.engines import AsyncEngine
from docling_serve.engines.async_orchestrator import BaseAsyncOrchestrator
from docling_serve.settings import docling_serve_settings
@lru_cache
def get_async_orchestrator() -> BaseAsyncOrchestrator:
if docling_serve_settings.eng_kind == AsyncEngine.LOCAL:
from docling_serve.engines.async_local.orchestrator import (
AsyncLocalOrchestrator,
)
return AsyncLocalOrchestrator()
elif docling_serve_settings.eng_kind == AsyncEngine.KFP:
from docling_serve.engines.async_kfp.orchestrator import AsyncKfpOrchestrator
return AsyncKfpOrchestrator()
raise RuntimeError(f"Engine {docling_serve_settings.eng_kind} not recognized.")

View File

@@ -1,11 +1,21 @@
from abc import ABC, abstractmethod
from typing import Optional
from docling_serve.datamodel.requests import ConvertDocumentsRequest
from docling_serve.datamodel.task import Task
class OrchestratorError(Exception):
pass
class TaskNotFoundError(OrchestratorError):
pass
class BaseOrchestrator(ABC):
@abstractmethod
async def enqueue(self, task) -> Task:
async def enqueue(self, request: ConvertDocumentsRequest) -> Task:
pass
@abstractmethod
@@ -13,9 +23,17 @@ class BaseOrchestrator(ABC):
pass
@abstractmethod
async def task_status(self, task_id: str) -> Task:
async def get_queue_position(self, task_id: str) -> Optional[int]:
pass
@abstractmethod
async def task_status(self, task_id: str, wait: float = 0.0) -> Task:
pass
@abstractmethod
async def task_result(self, task_id: str):
pass
@abstractmethod
async def process_queue(self):
pass

View File

@@ -46,7 +46,7 @@ def _export_document_as_content(
if export_md:
document.md_content = new_doc.export_to_markdown(image_mode=image_mode)
if export_doctags:
document.doctags_content = new_doc.export_to_document_tokens()
document.doctags_content = new_doc.export_to_doctags()
elif conv_res.status == ConversionStatus.SKIPPED:
raise HTTPException(status_code=400, detail=conv_res.errors)
else:

View File

@@ -2,7 +2,9 @@ import sys
from pathlib import Path
from typing import Optional, Union
from pydantic import AnyUrl, model_validator
from pydantic_settings import BaseSettings, SettingsConfigDict
from typing_extensions import Self
from docling_serve.datamodel.engines import AsyncEngine
@@ -37,6 +39,7 @@ class DoclingServeSettings(BaseSettings):
artifacts_path: Optional[Path] = None
static_path: Optional[Path] = None
options_cache_size: int = 2
enable_remote_services: bool = False
allow_external_plugins: bool = False
max_document_timeout: float = 3_600 * 24 * 7 # 7 days
@@ -48,7 +51,32 @@ class DoclingServeSettings(BaseSettings):
cors_headers: list[str] = ["*"]
eng_kind: AsyncEngine = AsyncEngine.LOCAL
# Local engine
eng_loc_num_workers: int = 2
# KFP engine
eng_kfp_endpoint: Optional[AnyUrl] = None
eng_kfp_token: Optional[str] = None
eng_kfp_ca_cert_path: Optional[str] = None
eng_kfp_self_callback_endpoint: Optional[str] = None
eng_kfp_self_callback_token_path: Optional[Path] = None
eng_kfp_self_callback_ca_cert_path: Optional[Path] = None
eng_kfp_experimental: bool = False
@model_validator(mode="after")
def engine_settings(self) -> Self:
# Validate KFP engine settings
if self.eng_kind == AsyncEngine.KFP:
if self.eng_kfp_endpoint is None:
raise ValueError("KFP endpoint is required when using the KFP engine.")
if self.eng_kind == AsyncEngine.KFP:
if not self.eng_kfp_experimental:
raise ValueError(
"KFP is not yet working. To enable the development version, you must set DOCLING_SERVE_ENG_KFP_EXPERIMENTAL=true."
)
return self
uvicorn_settings = UvicornSettings()

View File

@@ -38,7 +38,39 @@ THe following table describes the options to configure the Docling Serve app.
| `--artifacts-path` | `DOCLING_SERVE_ARTIFACTS_PATH` | unset | If set to a valid directory, the model weights will be loaded from this path |
| | `DOCLING_SERVE_STATIC_PATH` | unset | If set to a valid directory, the static assets for the docs and ui will be loaded from this path |
| `--enable-ui` | `DOCLING_SERVE_ENABLE_UI` | `false` | Enable the demonstrator UI. |
| | `DOCLING_SERVE_ENABLE_REMOTE_SERVICES` | `false` | Allow pipeline components making remote connections. For example, this is needed when using a vision-language model via APIs. |
| | `DOCLING_SERVE_ALLOW_EXTERNAL_PLUGINS` | `false` | Allow the selection of third-party plugins. |
| | `DOCLING_SERVE_MAX_DOCUMENT_TIMEOUT` | `604800` (7 days) | The maximum time for processing a document. |
| | `DOCLING_SERVE_MAX_NUM_PAGES` | | The maximum number of pages for a document to be processed. |
| | `DOCLING_SERVE_MAX_FILE_SIZE` | | The maximum file size for a document to be processed. |
| | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. |
| | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
| | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
| | `DOCLING_SERVE_CORS_HEADERS` | `["*"]` | A list of HTTP request headers that should be supported for cross-origin requests. |
| | `DOCLING_SERVE_ENG_KIND` | `local` | The compute engine to use for the async tasks. Possible values are `local` and `kfp`. See below for more configurations of the engines. |
### Compute engine
Docling Serve can be deployed with several possible of compute engine.
The selected compute engine will be running all the async jobs.
#### Local engine
The following table describes the options to configure the Docling Serve KFP engine.
| ENV | Default | Description |
|-----|---------|-------------|
| `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
#### KFP engine
The following table describes the options to configure the Docling Serve KFP engine.
| ENV | Default | Description |
|-----|---------|-------------|
| `DOCLING_SERVE_ENG_KFP_ENDPOINT` | | Must be set to the Kubeflow Pipeline endpoint. When using the in-cluster deployment, make sure to use the cluster endpoint, e.g. `https://NAME.NAMESPACE.svc.cluster.local:8888` |
| `DOCLING_SERVE_ENG_KFP_TOKEN` | | The authentication token for KFP. For in-cluster deployment, the app will load automatically the token of the ServiceAccount. |
| `DOCLING_SERVE_ENG_KFP_CA_CERT_PATH` | | Path to the CA certificates for the KFP endpoint. For in-cluster deployment, the app will load automatically the internal CA. |
| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_ENDPOINT` | | If set, it enables internal callbacks providing status update of the KFP job. Usually something like `https://NAME.NAMESPACE.svc.cluster.local:5001/v1alpha/callback/task/progress`. |
| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_TOKEN_PATH` | | The token used for authenticating the progress callback. For cluster-internal workloads, use `/run/secrets/kubernetes.io/serviceaccount/token`. |
| `DOCLING_SERVE_ENG_KFP_SELF_CALLBACK_CA_CERT_PATH` | | The CA certifcate for the progress callback. For cluster-inetrnal workloads, use `/var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt`. |

View File

@@ -8,6 +8,7 @@ On top of the source of file (see below), both endpoints support the same parame
- `from_format` (List[str]): Input format(s) to convert from. Allowed values: `docx`, `pptx`, `html`, `image`, `pdf`, `asciidoc`, `md`. Defaults to all formats.
- `to_formats` (List[str]): Output format(s) to convert to. Allowed values: `md`, `json`, `html`, `text`, `doctags`. Defaults to `md`.
- `pipeline` (str). The choice of which pipeline to use. Allowed values are `standard` and `vlm`. Defaults to `standard`.
- `do_ocr` (bool): If enabled, the bitmap content will be processed using OCR. Defaults to `True`.
- `image_export_mode`: Image export mode for the document (only in case of JSON, Markdown or HTML). Allowed values: embedded, placeholder, referenced. Optional, defaults to `embedded`.
- `force_ocr` (bool): If enabled, replace any existing text with OCR-generated text over the full content. Defaults to `False`.
@@ -18,7 +19,13 @@ On top of the source of file (see below), both endpoints support the same parame
- `abort_on_error` (bool): If enabled, abort on error. Defaults to false.
- `return_as_file` (boo): If enabled, return the output as a file. Defaults to false.
- `do_table_structure` (bool): If enabled, the table structure will be extracted. Defaults to true.
- `include_images` (bool): If enabled, images will be extracted from the document. Defaults to true.
- `do_code_enrichment` (bool): If enabled, perform OCR code enrichment. Defaults to false.
- `do_formula_enrichment` (bool): If enabled, perform formula OCR, return LaTeX code. Defaults to false.
- `do_picture_classification` (bool): If enabled, classify pictures in documents. Defaults to false.
- `do_picture_description` (bool): If enabled, describe pictures in documents. Defaults to false.
- `picture_description_local` (dict): Options for running a local vision-language model in the picture description. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with picture_description_api.
- `picture_description_api` (dict): API details for using a vision-language model in the picture description. This parameter is mutually exclusive with picture_description_local.
- `include_images` (bool): If enabled, images will be extracted from the document. Defaults to false.
- `images_scale` (float): Scale factor for images. Defaults to 2.0.
## Convert endpoints
@@ -244,6 +251,70 @@ data = response.json()
</details>
### Picture description options
When the picture description enrichment is activated, users may specify which model and which execution mode to use for this task. There are two choices for the execution mode: _local_ will run the vision-language model directly, _api_ will invoke an external API endpoint.
The local option is specified with:
```jsonc
{
"picture_description_local": {
"repo_id": "", // Repository id from the Hugging Face Hub.
"generation_config": {"max_new_tokens": 200, "do_sample": false}, // HF generation config.
"prompt": "Describe this image in a few sentences. ", // Prompt used when calling the vision-language model.
}
}
```
The possible values for `generation_config` are documented in the [Hugging Face text generation docs](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig).
The api option is specified with:
```jsonc
{
"picture_description_api": {
"url": "", // Endpoint which accepts openai-api compatible requests.
"headers": {}, // Headers used for calling the API endpoint. For example, it could include authentication headers.
"params": {}, // Model parameters.
"timeout": 20, // Timeout for the API request.
"prompt": "Describe this image in a few sentences. ", // Prompt used when calling the vision-language model.
}
}
```
Example URLs are:
- `http://localhost:8000/v1/chat/completions` for the local vllm api, with example `params`:
- the `HuggingFaceTB/SmolVLM-256M-Instruct` model
```json
{
"model": "HuggingFaceTB/SmolVLM-256M-Instruct",
"max_completion_tokens": 200,
}
```
- the `ibm-granite/granite-vision-3.2-2b` model
```json
{
"model": "ibm-granite/granite-vision-3.2-2b",
"max_completion_tokens": 200,
}
```
- `http://localhost:11434/v1/chat/completions` for the local ollama api, with example `params`:
- the `granite3.2-vision:2b` model
```json
{
"model": "granite3.2-vision:2b"
}
```
Note that when using `picture_description_api`, the server must be launched with `DOCLING_SERVE_ENABLE_REMOTE_SERVICES=true`.
## Response format
The response can be a JSON Document or a File.

View File

@@ -1,6 +1,6 @@
[project]
name = "docling-serve"
version = "0.8.0" # DO NOT EDIT, updated automatically
version = "0.9.0" # DO NOT EDIT, updated automatically
description = "Running Docling as a service"
license = {text = "MIT"}
authors = [
@@ -34,6 +34,7 @@ dependencies = [
"mlx-vlm~=0.1.12; sys_platform == 'darwin' and platform_machine == 'arm64'",
"fastapi[standard]~=0.115",
"httpx~=0.28",
"kfp[kubernetes]>=2.10.0",
"pydantic~=2.10",
"pydantic-settings~=2.4",
"python-multipart>=0.0.14,<0.1.0",
@@ -82,6 +83,10 @@ conflicts = [
{ extra = "cu124" },
],
]
environments = ["sys_platform != 'darwin' or platform_machine != 'x86_64'"]
override-dependencies = [
"urllib3~=2.0"
]
[tool.uv.sources]
torch = [
@@ -197,6 +202,8 @@ module = [
"tesserocr.*",
"rapidocr_onnxruntime.*",
"requests.*",
"kfp.*",
"kfp_server_api.*",
"mlx_vlm.*",
]
ignore_missing_imports = true

View File

@@ -28,6 +28,16 @@ async def test_convert_url(async_client: httpx.AsyncClient):
"ocr": True,
"abort_on_error": False,
"return_as_file": False,
# "do_picture_description": True,
# "picture_description_api": {
# "url": "http://localhost:11434/v1/chat/completions",
# "params": {
# "model": "granite3.2-vision:2b",
# }
# },
# "picture_description_local": {
# "repo_id": "HuggingFaceTB/SmolVLM-256M-Instruct",
# },
},
# "http_sources": [{"url": "https://arxiv.org/pdf/2501.17887"}],
"file_sources": [{"base64_string": encoded_doc, "filename": doc_filename.name}],

View File

@@ -38,7 +38,7 @@ async def test_convert_url(async_client):
}
print(json.dumps(payload, indent=2))
for n in range(5):
for n in range(1):
response = await async_client.post(
f"{base_url}/convert/source/async", json=payload
)

View File

@@ -0,0 +1,128 @@
import json
import os
from fastapi.testclient import TestClient
from pytest_check import check
from docling_serve.app import create_app
client = TestClient(create_app())
def test_health():
response = client.get("/health")
assert response.status_code == 200
assert response.json() == {"status": "ok"}
def test_convert_file():
"""Test convert single file to all outputs"""
endpoint = "/v1alpha/convert/file"
options = {
"from_formats": [
"docx",
"pptx",
"html",
"image",
"pdf",
"asciidoc",
"md",
"xlsx",
],
"to_formats": ["md", "json", "html", "text", "doctags"],
"image_export_mode": "placeholder",
"ocr": True,
"force_ocr": False,
"ocr_engine": "easyocr",
"ocr_lang": ["en"],
"pdf_backend": "dlparse_v2",
"table_mode": "fast",
"abort_on_error": False,
"return_as_file": False,
}
current_dir = os.path.dirname(__file__)
file_path = os.path.join(current_dir, "2206.01062v1.pdf")
files = {
"files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
}
response = client.post(endpoint, files=files, data=options)
assert response.status_code == 200, "Response should be 200 OK"
data = response.json()
# Response content checks
# Helper function to safely slice strings
def safe_slice(value, length=100):
if isinstance(value, str):
return value[:length]
return str(value) # Convert non-string values to string for debug purposes
# Document check
check.is_in(
"document",
data,
msg=f"Response should contain 'document' key. Received keys: {list(data.keys())}",
)
# MD check
check.is_in(
"md_content",
data.get("document", {}),
msg=f"Response should contain 'md_content' key. Received keys: {list(data.get('document', {}).keys())}",
)
if data.get("document", {}).get("md_content") is not None:
check.is_in(
"## DocLayNet: ",
data["document"]["md_content"],
msg=f"Markdown document should contain 'DocLayNet: '. Received: {safe_slice(data['document']['md_content'])}",
)
# JSON check
check.is_in(
"json_content",
data.get("document", {}),
msg=f"Response should contain 'json_content' key. Received keys: {list(data.get('document', {}).keys())}",
)
if data.get("document", {}).get("json_content") is not None:
check.is_in(
'{"schema_name": "DoclingDocument"',
json.dumps(data["document"]["json_content"]),
msg=f'JSON document should contain \'{{\\n "schema_name": "DoclingDocument\'". Received: {safe_slice(data["document"]["json_content"])}',
)
# HTML check
check.is_in(
"html_content",
data.get("document", {}),
msg=f"Response should contain 'html_content' key. Received keys: {list(data.get('document', {}).keys())}",
)
if data.get("document", {}).get("html_content") is not None:
check.is_in(
"<!DOCTYPE html>\n<html>\n<head>",
data["document"]["html_content"],
msg=f"HTML document should contain '<!DOCTYPE html>\n<html>\n<head>'. Received: {safe_slice(data['document']['html_content'])}",
)
# Text check
check.is_in(
"text_content",
data.get("document", {}),
msg=f"Response should contain 'text_content' key. Received keys: {list(data.get('document', {}).keys())}",
)
if data.get("document", {}).get("text_content") is not None:
check.is_in(
"DocLayNet: A Large Human-Annotated Dataset",
data["document"]["text_content"],
msg=f"Text document should contain 'DocLayNet: A Large Human-Annotated Dataset'. Received: {safe_slice(data['document']['text_content'])}",
)
# DocTags check
check.is_in(
"doctags_content",
data.get("document", {}),
msg=f"Response should contain 'doctags_content' key. Received keys: {list(data.get('document', {}).keys())}",
)
if data.get("document", {}).get("doctags_content") is not None:
check.is_in(
"<doctag><page_header>",
data["document"]["doctags_content"],
msg=f"DocTags document should contain '<doctag><page_header>'. Received: {safe_slice(data['document']['doctags_content'])}",
)

1490
uv.lock generated

File diff suppressed because it is too large Load Diff