feat: workers without shared models and convert params (#304)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2026-03-07 22:33:44 +00:00 · 2025-08-07 11:16:06 +02:00
parent fd1b987e8d
commit db3fdb5bc1
5 changed files with 602 additions and 621 deletions
--- a/docling_serve/orchestrator_factory.py
+++ b/docling_serve/orchestrator_factory.py
@@ -19,6 +19,7 @@ def get_async_orchestrator() -> BaseOrchestrator:

        local_config = LocalOrchestratorConfig(
            num_workers=docling_serve_settings.eng_loc_num_workers,
+            shared_models=docling_serve_settings.eng_loc_share_models,
        )

        cm_config = DoclingConverterManagerConfig(
--- a/docling_serve/settings.py
+++ b/docling_serve/settings.py
@@ -63,6 +63,7 @@ class DoclingServeSettings(BaseSettings):
    eng_kind: AsyncEngine = AsyncEngine.LOCAL
    # Local engine
    eng_loc_num_workers: int = 2
+    eng_loc_share_models: bool = False
    # KFP engine
    eng_kfp_endpoint: Optional[AnyUrl] = None
    eng_kfp_token: Optional[str] = None
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -66,6 +66,7 @@ The following table describes the options to configure the Docling Serve local e
 | ENV | Default | Description |
 |-----|---------|-------------|
 | `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
+| `DOCLING_SERVE_ENG_LOC_SHARE_MODELS` | False | If true, each process will share the same models among all thread workers. Otherwise, one instance of the models is allocated for each worker thread. |

 #### KFP engine

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,7 +35,7 @@ requires-python = ">=3.10"
 dependencies = [
    "docling~=2.38",
    "docling-core>=2.44.1",
-    "docling-jobkit[kfp,vlm]~=1.2",
+    "docling-jobkit[kfp,vlm]~=1.3",
    "fastapi[standard]~=0.115",
    "httpx~=0.28",
    "pydantic~=2.10",
--- a/uv.lock
+++ b/uv.lock