feat: workers without shared models and convert params (#304)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-08-07 11:16:06 +02:00
committed by GitHub
parent fd1b987e8d
commit db3fdb5bc1
5 changed files with 602 additions and 621 deletions

View File

@@ -19,6 +19,7 @@ def get_async_orchestrator() -> BaseOrchestrator:
local_config = LocalOrchestratorConfig(
num_workers=docling_serve_settings.eng_loc_num_workers,
shared_models=docling_serve_settings.eng_loc_share_models,
)
cm_config = DoclingConverterManagerConfig(

View File

@@ -63,6 +63,7 @@ class DoclingServeSettings(BaseSettings):
eng_kind: AsyncEngine = AsyncEngine.LOCAL
# Local engine
eng_loc_num_workers: int = 2
eng_loc_share_models: bool = False
# KFP engine
eng_kfp_endpoint: Optional[AnyUrl] = None
eng_kfp_token: Optional[str] = None

View File

@@ -66,6 +66,7 @@ The following table describes the options to configure the Docling Serve local e
| ENV | Default | Description |
|-----|---------|-------------|
| `DOCLING_SERVE_ENG_LOC_NUM_WORKERS` | 2 | Number of workers/threads processing the incoming tasks. |
| `DOCLING_SERVE_ENG_LOC_SHARE_MODELS` | False | If true, each process will share the same models among all thread workers. Otherwise, one instance of the models is allocated for each worker thread. |
#### KFP engine

View File

@@ -35,7 +35,7 @@ requires-python = ">=3.10"
dependencies = [
"docling~=2.38",
"docling-core>=2.44.1",
"docling-jobkit[kfp,vlm]~=1.2",
"docling-jobkit[kfp,vlm]~=1.3",
"fastapi[standard]~=0.115",
"httpx~=0.28",
"pydantic~=2.10",

1218
uv.lock generated

File diff suppressed because one or more lines are too long