mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 00:23:36 +00:00
fix: properly load models at boot (#244)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -99,7 +99,8 @@ async def lifespan(app: FastAPI):
|
||||
scratch_dir = get_scratch()
|
||||
|
||||
# Warm up processing cache
|
||||
await orchestrator.warm_up_caches()
|
||||
if docling_serve_settings.load_models_at_boot:
|
||||
await orchestrator.warm_up_caches()
|
||||
|
||||
# Start the background queue processor
|
||||
queue_task = asyncio.create_task(orchestrator.process_queue())
|
||||
|
||||
@@ -3,6 +3,8 @@ import logging
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsOptions
|
||||
from docling_serve.datamodel.task import Task, TaskSource
|
||||
from docling_serve.docling_conversion import get_converter, get_pdf_pipeline_opts
|
||||
@@ -54,4 +56,5 @@ class AsyncLocalOrchestrator(BaseAsyncOrchestrator):
|
||||
async def warm_up_caches(self):
|
||||
# Converter with default options
|
||||
pdf_format_option = get_pdf_pipeline_opts(ConvertDocumentsOptions())
|
||||
get_converter(pdf_format_option)
|
||||
converter = get_converter(pdf_format_option)
|
||||
converter.initialize_pipeline(InputFormat.PDF)
|
||||
|
||||
@@ -41,6 +41,7 @@ class DoclingServeSettings(BaseSettings):
|
||||
scratch_path: Optional[Path] = None
|
||||
single_use_results: bool = True
|
||||
result_removal_delay: float = 300 # 5 minutes
|
||||
load_models_at_boot: bool = True
|
||||
options_cache_size: int = 2
|
||||
enable_remote_services: bool = False
|
||||
allow_external_plugins: bool = False
|
||||
|
||||
@@ -47,6 +47,7 @@ THe following table describes the options to configure the Docling Serve app.
|
||||
| | `DOCLING_SERVE_MAX_NUM_PAGES` | | The maximum number of pages for a document to be processed. |
|
||||
| | `DOCLING_SERVE_MAX_FILE_SIZE` | | The maximum file size for a document to be processed. |
|
||||
| | `DOCLING_SERVE_MAX_SYNC_WAIT` | `120` | Max number of seconds a synchronous endpoint is waiting for the task completion. |
|
||||
| | `DOCLING_SERVE_LOAD_MODELS_AT_BOOT` | `True` | If enabled, the models for the default options will be loaded at boot. |
|
||||
| | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. |
|
||||
| | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
|
||||
| | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
|
||||
|
||||
Reference in New Issue
Block a user