diff --git a/docling_serve/app.py b/docling_serve/app.py index ff8b1a5..1ea5170 100644 --- a/docling_serve/app.py +++ b/docling_serve/app.py @@ -99,7 +99,8 @@ async def lifespan(app: FastAPI): scratch_dir = get_scratch() # Warm up processing cache - await orchestrator.warm_up_caches() + if docling_serve_settings.load_models_at_boot: + await orchestrator.warm_up_caches() # Start the background queue processor queue_task = asyncio.create_task(orchestrator.process_queue()) diff --git a/docling_serve/engines/async_local/orchestrator.py b/docling_serve/engines/async_local/orchestrator.py index 51a8892..63417ed 100644 --- a/docling_serve/engines/async_local/orchestrator.py +++ b/docling_serve/engines/async_local/orchestrator.py @@ -3,6 +3,8 @@ import logging import uuid from typing import Optional +from docling.datamodel.base_models import InputFormat + from docling_serve.datamodel.convert import ConvertDocumentsOptions from docling_serve.datamodel.task import Task, TaskSource from docling_serve.docling_conversion import get_converter, get_pdf_pipeline_opts @@ -54,4 +56,5 @@ class AsyncLocalOrchestrator(BaseAsyncOrchestrator): async def warm_up_caches(self): # Converter with default options pdf_format_option = get_pdf_pipeline_opts(ConvertDocumentsOptions()) - get_converter(pdf_format_option) + converter = get_converter(pdf_format_option) + converter.initialize_pipeline(InputFormat.PDF) diff --git a/docling_serve/settings.py b/docling_serve/settings.py index 13ce96c..6811c70 100644 --- a/docling_serve/settings.py +++ b/docling_serve/settings.py @@ -41,6 +41,7 @@ class DoclingServeSettings(BaseSettings): scratch_path: Optional[Path] = None single_use_results: bool = True result_removal_delay: float = 300 # 5 minutes + load_models_at_boot: bool = True options_cache_size: int = 2 enable_remote_services: bool = False allow_external_plugins: bool = False diff --git a/docs/configuration.md b/docs/configuration.md index 487c8c1..3129230 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -47,6 +47,7 @@ THe following table describes the options to configure the Docling Serve app. | | `DOCLING_SERVE_MAX_NUM_PAGES` | | The maximum number of pages for a document to be processed. | | | `DOCLING_SERVE_MAX_FILE_SIZE` | | The maximum file size for a document to be processed. | | | `DOCLING_SERVE_MAX_SYNC_WAIT` | `120` | Max number of seconds a synchronous endpoint is waiting for the task completion. | +| | `DOCLING_SERVE_LOAD_MODELS_AT_BOOT` | `True` | If enabled, the models for the default options will be loaded at boot. | | | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. | | | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. | | | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |