9 Commits
v1.7.2 ... main

Author SHA1 Message Date
github-actions[bot]
0ec67a37b7 chore: bump version to 1.9.0 [skip ci] 2025-11-24 08:43:53 +00:00
Michele Dolfi
772fcec4ae chore: avoid installing multiple times dependencies (#429)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-21 15:42:42 +01:00
Michele Dolfi
e437e830c9 fix: Dependencies updates – Docling 2.63.0 (#443)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-21 10:31:56 +01:00
Michele Dolfi
2c23f65507 feat: version endpoint (#442)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-20 17:57:10 +01:00
Burt Holzman
5dc942f25b chore: docs typo (cude -> cuda) (#437)
Signed-off-by: Burt Holzman <burt@fnal.gov>
2025-11-17 08:31:44 +01:00
github-actions[bot]
ff310f2b13 chore: bump version to 1.8.0 [skip ci] 2025-10-31 17:01:56 +00:00
Michele Dolfi
bf132a3c3e feat: Docling with new standard pipeline with threading (#428)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-10-31 17:57:38 +01:00
Michele Dolfi
35319b0da7 docs: Expand automatic docs to nested objects. More complete usage docs. (#426)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-10-31 15:02:20 +01:00
Michele Dolfi
f3957aeb57 docs: add docs for docling parameters like performance and debug (#424)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-10-31 14:17:31 +01:00
13 changed files with 1446 additions and 1126 deletions

View File

@@ -4,6 +4,7 @@ asgi
async
(?i)urls
uvicorn
Config
[Ww]ebserver
RQ
(?i)url

View File

@@ -160,13 +160,10 @@ jobs:
pip install uv
uv venv --allow-existing
source .venv/bin/activate
uv sync --all-extras --no-extra flash-attn
uv sync --only-dev
# Run pytest tests
echo "Running tests..."
# Test import
python -c 'from docling_serve.app import create_app; create_app()'
# Run pytest and check result directly
if ! pytest -sv -k "test_convert_url" tests/test_1-url-async.py \
--disable-warnings; then

View File

@@ -1,3 +1,42 @@
## [v1.9.0](https://github.com/docling-project/docling-serve/releases/tag/v1.9.0) - 2025-11-24
### Feature
* Version endpoint ([#442](https://github.com/docling-project/docling-serve/issues/442)) ([`2c23f65`](https://github.com/docling-project/docling-serve/commit/2c23f65507d7699694debd7faa0de840ef2d2cb7))
### Fix
* Dependencies updates Docling 2.63.0 ([#443](https://github.com/docling-project/docling-serve/issues/443)) ([`e437e83`](https://github.com/docling-project/docling-serve/commit/e437e830c956f9a76cd0c62faf9add0231992548))
### Docling libraries included in this release:
- docling 2.63.0
- docling-core 2.52.0
- docling-ibm-models 3.10.2
- docling-jobkit 1.8.0
- docling-mcp 1.3.3
- docling-parse 4.7.1
- docling-serve 1.9.0
## [v1.8.0](https://github.com/docling-project/docling-serve/releases/tag/v1.8.0) - 2025-10-31
### Feature
* Docling with new standard pipeline with threading ([#428](https://github.com/docling-project/docling-serve/issues/428)) ([`bf132a3`](https://github.com/docling-project/docling-serve/commit/bf132a3c3e615ddbe624841ea5b3a98593c00654))
### Documentation
* Expand automatic docs to nested objects. More complete usage docs. ([#426](https://github.com/docling-project/docling-serve/issues/426)) ([`35319b0`](https://github.com/docling-project/docling-serve/commit/35319b0da793a2a1a434fd2b60b7632e10ecced3))
* Add docs for docling parameters like performance and debug ([#424](https://github.com/docling-project/docling-serve/issues/424)) ([`f3957ae`](https://github.com/docling-project/docling-serve/commit/f3957aeb577097121fe9d0d21f75a50643f03369))
### Docling libraries included in this release:
- docling 2.60.0
- docling-core 2.50.0
- docling-ibm-models 3.10.2
- docling-jobkit 1.8.0
- docling-mcp 1.3.2
- docling-parse 4.7.0
- docling-serve 1.8.0
## [v1.7.2](https://github.com/docling-project/docling-serve/releases/tag/v1.7.2) - 2025-10-30
### Fix

View File

@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
def version_callback(value: bool) -> None:
if value:
docling_serve_version = importlib.metadata.version("docling_serve")
docling_serve_version = importlib.metadata.version("docling-serve")
docling_jobkit_version = importlib.metadata.version("docling-jobkit")
docling_version = importlib.metadata.version("docling")
docling_core_version = importlib.metadata.version("docling-core")
@@ -385,6 +385,11 @@ def rq_worker() -> Any:
allow_external_plugins=docling_serve_settings.allow_external_plugins,
max_num_pages=docling_serve_settings.max_num_pages,
max_file_size=docling_serve_settings.max_file_size,
queue_max_size=docling_serve_settings.queue_max_size,
ocr_batch_size=docling_serve_settings.ocr_batch_size,
layout_batch_size=docling_serve_settings.layout_batch_size,
table_batch_size=docling_serve_settings.table_batch_size,
batch_polling_interval_seconds=docling_serve_settings.batch_polling_interval_seconds,
)
run_worker(

View File

@@ -76,7 +76,7 @@ from docling_serve.datamodel.responses import (
TaskStatusResponse,
WebsocketMessage,
)
from docling_serve.helper_functions import FormDepends
from docling_serve.helper_functions import DOCLING_VERSIONS, FormDepends
from docling_serve.orchestrator_factory import get_async_orchestrator
from docling_serve.response_preparation import prepare_response
from docling_serve.settings import docling_serve_settings
@@ -437,6 +437,16 @@ def create_app(): # noqa: C901
def api_check() -> HealthCheckResponse:
return HealthCheckResponse()
# Docling versions
@app.get("/version", tags=["health"])
def version_info() -> dict:
if not docling_serve_settings.show_version_info:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Forbidden. The server is configured for not showing version details.",
)
return DOCLING_VERSIONS
# Convert a document from URL(s)
@app.post(
"/v1/convert/source",

View File

@@ -1,11 +1,25 @@
import importlib.metadata
import inspect
import json
import platform
import re
import sys
from typing import Union, get_args, get_origin
from fastapi import Depends, Form
from pydantic import BaseModel, TypeAdapter
DOCLING_VERSIONS = {
"docling-serve": importlib.metadata.version("docling-serve"),
"docling-jobkit": importlib.metadata.version("docling-jobkit"),
"docling": importlib.metadata.version("docling"),
"docling-core": importlib.metadata.version("docling-core"),
"docling-ibm-models": importlib.metadata.version("docling-ibm-models"),
"docling-parse": importlib.metadata.version("docling-parse"),
"python": f"{sys.implementation.cache_tag} ({platform.python_version()})",
"plaform": platform.platform(),
}
def is_pydantic_model(type_):
try:

View File

@@ -288,6 +288,11 @@ def get_async_orchestrator() -> BaseOrchestrator:
allow_external_plugins=docling_serve_settings.allow_external_plugins,
max_num_pages=docling_serve_settings.max_num_pages,
max_file_size=docling_serve_settings.max_file_size,
queue_max_size=docling_serve_settings.queue_max_size,
ocr_batch_size=docling_serve_settings.ocr_batch_size,
layout_batch_size=docling_serve_settings.layout_batch_size,
table_batch_size=docling_serve_settings.table_batch_size,
batch_polling_interval_seconds=docling_serve_settings.batch_polling_interval_seconds,
)
cm = DoclingConverterManager(config=cm_config)

View File

@@ -50,6 +50,7 @@ class DoclingServeSettings(BaseSettings):
options_cache_size: int = 2
enable_remote_services: bool = False
allow_external_plugins: bool = False
show_version_info: bool = True
api_key: str = ""
@@ -57,6 +58,13 @@ class DoclingServeSettings(BaseSettings):
max_num_pages: int = sys.maxsize
max_file_size: int = sys.maxsize
# Threading pipeline
queue_max_size: Optional[int] = None
ocr_batch_size: Optional[int] = None
layout_batch_size: Optional[int] = None
table_batch_size: Optional[int] = None
batch_polling_interval_seconds: Optional[float] = None
sync_poll_interval: int = 2 # seconds
max_sync_wait: int = 120 # 2 minutes

View File

@@ -39,24 +39,42 @@ THe following table describes the options to configure the Docling Serve app.
| | `DOCLING_SERVE_STATIC_PATH` | unset | If set to a valid directory, the static assets for the docs and UI will be loaded from this path |
| | `DOCLING_SERVE_SCRATCH_PATH` | | If set, this directory will be used as scratch workspace, e.g. storing the results before they get requested. If unset, a temporary created is created for this purpose. |
| `--enable-ui` | `DOCLING_SERVE_ENABLE_UI` | `false` | Enable the demonstrator UI. |
| | `DOCLING_SERVE_SHOW_VERSION_INFO` | `true` | If enabled, the `/version` endpoint will provide the Docling package versions, otherwise it will return a forbidden 403 error. |
| | `DOCLING_SERVE_ENABLE_REMOTE_SERVICES` | `false` | Allow pipeline components making remote connections. For example, this is needed when using a vision-language model via APIs. |
| | `DOCLING_SERVE_ALLOW_EXTERNAL_PLUGINS` | `false` | Allow the selection of third-party plugins. |
| | `DOCLING_SERVE_SINGLE_USE_RESULTS` | `true` | If true, results can be accessed only once. If false, the results accumulate in the scratch directory. |
| | `DOCLING_SERVE_RESULT_REMOVAL_DELAY` | `300` | When `DOCLING_SERVE_SINGLE_USE_RESULTS` is active, this is the delay before results are removed from the task registry. |
| | `DOCLING_SERVE_MAX_DOCUMENT_TIMEOUT` | `604800` (7 days) | The maximum time for processing a document. |
| | `DOCLING_NUM_THREADS` | `4` | Number of concurrent threads for processing a document. |
| | `DOCLING_SERVE_MAX_NUM_PAGES` | | The maximum number of pages for a document to be processed. |
| | `DOCLING_SERVE_MAX_FILE_SIZE` | | The maximum file size for a document to be processed. |
| | `DOCLING_SERVE_SYNC_POLL_INTERVAL` | `2` | Number of seconds to sleep between polling the task status in the sync endpoints. |
| | `DOCLING_SERVE_MAX_SYNC_WAIT` | `120` | Max number of seconds a synchronous endpoint is waiting for the task completion. |
| | `DOCLING_SERVE_LOAD_MODELS_AT_BOOT` | `True` | If enabled, the models for the default options will be loaded at boot. |
| | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. |
| | `DOCLING_SERVE_QUEUE_MAX_SIZE` | | Size of the pages queue. Potentially so many pages opened at the same time. |
| | `DOCLING_SERVE_OCR_BATCH_SIZE` | | Batch size for the OCR stage. |
| | `DOCLING_SERVE_LAYOUT_BATCH_SIZE` | | Batch size for the layout detection stage. |
| | `DOCLING_SERVE_TABLE_BATCH_SIZE` | | Batch size for the table structure stage. |
| | `DOCLING_SERVE_BATCH_POLLING_INTERVAL_SECONDS` | | Wait time for gathering pages before starting a stage processing. |
| | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
| | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
| | `DOCLING_SERVE_CORS_HEADERS` | `["*"]` | A list of HTTP request headers that should be supported for cross-origin requests. |
| | `DOCLING_SERVE_API_KEY` | | If specified, all the API requests must contain the header `X-Api-Key` with this value. |
| | `DOCLING_SERVE_ENG_KIND` | `local` | The compute engine to use for the async tasks. Possible values are `local`, `rq` and `kfp`. See below for more configurations of the engines. |
### Docling configuration
Some Docling settings, mostly about performance, are exposed as environment variable which can be used also when running Docling Serve.
| ENV | Default | Description |
| ----|---------|-------------|
| `DOCLING_NUM_THREADS` | `4` | Number of concurrent threads used for the `torch` CPU execution. |
| `DOCLING_DEVICE` | | Device used for the model execution. Valid values are `cpu`, `cuda`, `mps`. When unset, the best device is chosen. For CUDA-enabled environments, you can choose which GPU using the syntax `cuda:0`, `cuda:1`, ... |
| `DOCLING_PERF_PAGE_BATCH_SIZE` | `4` | Number of pages processed in the same batch. |
| `DOCLING_PERF_ELEMENTS_BATCH_SIZE` | `8` | Number of document items/elements processed in the same batch during enrichment. |
| `DOCLING_DEBUG_PROFILE_PIPELINE_TIMINGS` | `false` | When enabled, Docling will provide detailed timings information. |
### Compute engine
Docling Serve can be deployed with several possible of compute engine.

View File

@@ -7,6 +7,7 @@ The API provides two endpoints: one for urls, one for files. This is necessary t
On top of the source of file (see below), both endpoints support the same parameters.
<!-- begin: parameters-docs -->
<h4>ConvertDocumentsRequestOptions</h4>
| Field Name | Type | Description |
|------------|------|-------------|
@@ -39,6 +40,52 @@ On top of the source of file (see below), both endpoints support the same parame
| `vlm_pipeline_model_local` | VlmModelLocal or NoneType | Options for running a local vision-language model for the `vlm` pipeline. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with `vlm_pipeline_model_api` and `vlm_pipeline_model`. |
| `vlm_pipeline_model_api` | VlmModelApi or NoneType | API details for using a vision-language model for the `vlm` pipeline. This parameter is mutually exclusive with `vlm_pipeline_model_local` and `vlm_pipeline_model`. |
<h4>VlmModelApi</h4>
| Field Name | Type | Description |
|------------|------|-------------|
| `url` | AnyUrl | Endpoint which accepts openai-api compatible requests. |
| `headers` | Dict[str, str] | Headers used for calling the API endpoint. For example, it could include authentication headers. |
| `params` | Dict[str, Any] | Model parameters. |
| `timeout` | float | Timeout for the API request. |
| `concurrency` | int | Maximum number of concurrent requests to the API. |
| `prompt` | str | Prompt used when calling the vision-language model. |
| `scale` | float | Scale factor of the images used. |
| `response_format` | ResponseFormat | Type of response generated by the model. |
| `temperature` | float | Temperature parameter controlling the reproducibility of the result. |
<h4>VlmModelLocal</h4>
| Field Name | Type | Description |
|------------|------|-------------|
| `repo_id` | str | Repository id from the Hugging Face Hub. |
| `prompt` | str | Prompt used when calling the vision-language model. |
| `scale` | float | Scale factor of the images used. |
| `response_format` | ResponseFormat | Type of response generated by the model. |
| `inference_framework` | InferenceFramework | Inference framework to use. |
| `transformers_model_type` | TransformersModelType | Type of transformers auto-model to use. |
| `extra_generation_config` | Dict[str, Any] | Config from https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig |
| `temperature` | float | Temperature parameter controlling the reproducibility of the result. |
<h4>PictureDescriptionApi</h4>
| Field Name | Type | Description |
|------------|------|-------------|
| `url` | AnyUrl | Endpoint which accepts openai-api compatible requests. |
| `headers` | Dict[str, str] | Headers used for calling the API endpoint. For example, it could include authentication headers. |
| `params` | Dict[str, Any] | Model parameters. |
| `timeout` | float | Timeout for the API request. |
| `concurrency` | int | Maximum number of concurrent requests to the API. |
| `prompt` | str | Prompt used when calling the vision-language model. |
<h4>PictureDescriptionLocal</h4>
| Field Name | Type | Description |
|------------|------|-------------|
| `repo_id` | str | Repository id from the Hugging Face Hub. |
| `prompt` | str | Prompt used when calling the vision-language model. |
| `generation_config` | Dict[str, Any] | Config from https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig |
<!-- end: parameters-docs -->
### Authentication

View File

@@ -1,6 +1,6 @@
[project]
name = "docling-serve"
version = "1.7.2" # DO NOT EDIT, updated automatically
version = "1.9.0" # DO NOT EDIT, updated automatically
description = "Running Docling as a service"
license = {text = "MIT"}
authors = [
@@ -35,7 +35,7 @@ requires-python = ">=3.10"
dependencies = [
"docling~=2.38",
"docling-core>=2.45.0",
"docling-jobkit[kfp,rq,vlm]>=1.6.0,<2.0.0",
"docling-jobkit[kfp,rq,vlm]>=1.8.0,<2.0.0",
"fastapi[standard]<0.119.0", # ~=0.115
"httpx~=0.28",
"pydantic~=2.10",
@@ -69,6 +69,9 @@ flash-attn = [
[dependency-groups]
dev = [
"asgi-lifespan~=2.0",
"httpx",
"pydantic",
"pydantic-settings",
"mypy~=1.11",
"pre-commit-uv~=4.1",
"pypdf>=6.0.0",

View File

@@ -1,5 +1,5 @@
import re
from typing import Annotated, Any, get_args, get_origin
from typing import Annotated, Any, Union, get_args, get_origin
from pydantic import BaseModel
@@ -90,39 +90,75 @@ def _format_type(type_hint: Any) -> str:
return str(type_hint)
def _unroll_types(tp) -> list[type]:
"""
Unrolls typing.Union and typing.Optional types into a flat list of types.
"""
origin = get_origin(tp)
if origin is Union:
# Recursively unroll each type inside the Union
types = []
for arg in get_args(tp):
types.extend(_unroll_types(arg))
# Remove duplicates while preserving order
return list(dict.fromkeys(types))
else:
# If it's not a Union, just return it as a single-element list
return [tp]
def generate_model_doc(model: type[BaseModel]) -> str:
"""Generate documentation for a Pydantic model."""
doc = "\n| Field Name | Type | Description |\n"
doc += "|------------|------|-------------|\n"
for base_model in model.__mro__:
# Check if this is a Pydantic model
if hasattr(base_model, "model_fields"):
# Iterate through fields of this model
for field_name, field in base_model.model_fields.items():
# Extract description from Annotated field if possible
description = field.description or "No description provided."
description = format_allowed_values_description(description)
description = format_variable_names(description)
models_stack = [model]
# Handle Annotated types
original_type = field.annotation
if get_origin(original_type) is Annotated:
# Extract base type and additional metadata
type_args = get_args(original_type)
base_type = type_args[0]
else:
base_type = original_type
doc = ""
while models_stack:
current_model = models_stack.pop()
field_type = _format_type(base_type)
field_type = format_variable_names(field_type)
doc += f"<h4>{current_model.__name__}</h4>\n"
doc += f"| `{field_name}` | {field_type} | {description} |\n"
doc += "\n| Field Name | Type | Description |\n"
doc += "|------------|------|-------------|\n"
# stop iterating the base classes
break
base_models = []
if hasattr(current_model, "__mro__"):
base_models = current_model.__mro__
else:
base_models = [current_model]
doc += "\n"
for base_model in base_models:
# Check if this is a Pydantic model
if hasattr(base_model, "model_fields"):
# Iterate through fields of this model
for field_name, field in base_model.model_fields.items():
# Extract description from Annotated field if possible
description = field.description or "No description provided."
description = format_allowed_values_description(description)
description = format_variable_names(description)
# Handle Annotated types
original_type = field.annotation
if get_origin(original_type) is Annotated:
# Extract base type and additional metadata
type_args = get_args(original_type)
base_type = type_args[0]
else:
base_type = original_type
field_type = _format_type(base_type)
field_type = format_variable_names(field_type)
doc += f"| `{field_name}` | {field_type} | {description} |\n"
for field_type in _unroll_types(base_type):
if issubclass(field_type, BaseModel):
models_stack.append(field_type)
# stop iterating the base classes
break
doc += "\n"
return doc

2319
uv.lock generated

File diff suppressed because one or more lines are too long