fix: set DOCLING_SERVE_ARTIFACTS_PATH in images (#53)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-02-19 22:03:56 +01:00
committed by GitHub
parent ec33a61faa
commit 4877248368
5 changed files with 68 additions and 6 deletions

View File

@@ -41,13 +41,15 @@ ENV PYTHONIOENCODING=utf-8
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
ENV UV_PROJECT_ENVIRONMENT=/opt/app-root
ENV DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models
COPY --chown=1001:0 pyproject.toml uv.lock README.md ./
RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS} # --no-extra ${NO_EXTRA}
RUN echo "Downloading models..." && \
docling-tools models download ${MODELS_LIST} && \
docling-tools models download -o "${DOCLING_SERVE_ARTIFACTS_PATH}" ${MODELS_LIST} && \
chown -R 1001:0 /opt/app-root/src/.cache && \
chmod -R g=u /opt/app-root/src/.cache

View File

@@ -381,6 +381,9 @@ $ docling-serve dev --help
│ --proxy-headers --no-proxy-headers Enable/Disable X-Forwarded-Proto, X-Forwarded-For, │
│ X-Forwarded-Port to populate remote address info. │
│ [default: proxy-headers] │
│ --artifacts-path PATH If set to a valid directory, the model weights will be │
│ loaded from this path. │
│ [default: None] │
│ --enable-ui --no-enable-ui Enable the development UI. [default: enable-ui] │
│ --help Show this message and exit. │
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
@@ -398,11 +401,11 @@ The environment variables controlling the `uvicorn` execution can be specified w
The environment variables controlling specifics of the Docling Serve app can be specified with the
`DOCLING_SERVE_` prefix:
- `DOCLING_SERVE_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/src/.cache/docling/models`.
- `DOCLING_SERVE_ENABLE_UI`: If `True`, The Gradio UI will be available at `/ui`.
Others:
- `DOCLING_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/.cache/docling/cache`.
- `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
## Get help and support

View File

@@ -3,7 +3,8 @@ import logging
import platform
import sys
import warnings
from typing import Annotated, Any, Union
from pathlib import Path
from typing import Annotated, Any, Optional, Union
import typer
import uvicorn
@@ -161,6 +162,15 @@ def dev(
),
] = uvicorn_settings.proxy_headers,
# docling options
artifacts_path: Annotated[
Optional[Path],
typer.Option(
help=(
"If set to a valid directory, "
"the model weights will be loaded from this path."
)
),
] = docling_serve_settings.artifacts_path,
enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
) -> Any:
"""
@@ -179,6 +189,7 @@ def dev(
uvicorn_settings.root_path = root_path
uvicorn_settings.proxy_headers = proxy_headers
docling_serve_settings.artifacts_path = artifacts_path
docling_serve_settings.enable_ui = enable_ui
_run(
@@ -243,6 +254,15 @@ def run(
),
] = uvicorn_settings.proxy_headers,
# docling options
artifacts_path: Annotated[
Optional[Path],
typer.Option(
help=(
"If set to a valid directory, "
"the model weights will be loaded from this path."
)
),
] = docling_serve_settings.artifacts_path,
enable_ui: Annotated[
bool, typer.Option(help="Enable the development UI.")
] = docling_serve_settings.enable_ui,
@@ -264,6 +284,7 @@ def run(
uvicorn_settings.root_path = root_path
uvicorn_settings.proxy_headers = proxy_headers
docling_serve_settings.artifacts_path = artifacts_path
docling_serve_settings.enable_ui = enable_ui
_run(

View File

@@ -39,6 +39,7 @@ from fastapi import HTTPException
from pydantic import BaseModel, Field
from docling_serve.helper_functions import _to_list_of_strings
from docling_serve.settings import docling_serve_settings
_log = logging.getLogger(__name__)
@@ -276,6 +277,11 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
if pdf_format_option.pipeline_options:
data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
# Replace `artifacts_path` with a string representation
data["pipeline_options"]["artifacts_path"] = repr(
data["pipeline_options"]["artifacts_path"]
)
# Replace `pipeline_cls` with a string representation
data["pipeline_cls"] = repr(data["pipeline_cls"])
@@ -293,7 +299,7 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
# Computes the PDF pipeline options and returns the PdfFormatOption and its hash
def get_pdf_pipeline_opts(
def get_pdf_pipeline_opts( # noqa: C901
request: ConvertDocumentsOptions,
) -> Tuple[PdfFormatOption, str]:
@@ -364,6 +370,31 @@ def get_pdf_pipeline_opts(
else:
raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}")
if docling_serve_settings.artifacts_path is not None:
if str(docling_serve_settings.artifacts_path.absolute()) == "":
_log.info(
"artifacts_path is an empty path, model weights will be dowloaded "
"at runtime."
)
pipeline_options.artifacts_path = None
elif docling_serve_settings.artifacts_path.is_dir():
_log.info(
"artifacts_path is set to a valid directory. "
"No model weights will be downloaded at runtime."
)
pipeline_options.artifacts_path = docling_serve_settings.artifacts_path
else:
_log.warning(
"artifacts_path is set to an invalid directory. "
"The system will download the model weights at runtime."
)
pipeline_options.artifacts_path = None
else:
_log.info(
"artifacts_path is unset. "
"The system will download the model weights at runtime."
)
pdf_format_option = PdfFormatOption(
pipeline_options=pipeline_options,
backend=backend,

View File

@@ -1,4 +1,5 @@
from typing import Union
from pathlib import Path
from typing import Optional, Union
from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -18,10 +19,14 @@ class UvicornSettings(BaseSettings):
class DoclingServeSettings(BaseSettings):
model_config = SettingsConfigDict(
env_prefix="DOCLING_SERVE_", env_file=".env", extra="allow"
env_prefix="DOCLING_SERVE_",
env_file=".env",
env_parse_none_str="",
extra="allow",
)
enable_ui: bool = False
artifacts_path: Optional[Path] = None
uvicorn_settings = UvicornSettings()