fix: set DOCLING_SERVE_ARTIFACTS_PATH in images (#53)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-02-19 22:03:56 +01:00
committed by GitHub
parent ec33a61faa
commit 4877248368
5 changed files with 68 additions and 6 deletions

View File

@@ -41,13 +41,15 @@ ENV PYTHONIOENCODING=utf-8
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
ENV UV_PROJECT_ENVIRONMENT=/opt/app-root ENV UV_PROJECT_ENVIRONMENT=/opt/app-root
ENV DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models
COPY --chown=1001:0 pyproject.toml uv.lock README.md ./ COPY --chown=1001:0 pyproject.toml uv.lock README.md ./
RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \ RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS} # --no-extra ${NO_EXTRA} uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS} # --no-extra ${NO_EXTRA}
RUN echo "Downloading models..." && \ RUN echo "Downloading models..." && \
docling-tools models download ${MODELS_LIST} && \ docling-tools models download -o "${DOCLING_SERVE_ARTIFACTS_PATH}" ${MODELS_LIST} && \
chown -R 1001:0 /opt/app-root/src/.cache && \ chown -R 1001:0 /opt/app-root/src/.cache && \
chmod -R g=u /opt/app-root/src/.cache chmod -R g=u /opt/app-root/src/.cache

View File

@@ -381,6 +381,9 @@ $ docling-serve dev --help
│ --proxy-headers --no-proxy-headers Enable/Disable X-Forwarded-Proto, X-Forwarded-For, │ │ --proxy-headers --no-proxy-headers Enable/Disable X-Forwarded-Proto, X-Forwarded-For, │
│ X-Forwarded-Port to populate remote address info. │ │ X-Forwarded-Port to populate remote address info. │
│ [default: proxy-headers] │ │ [default: proxy-headers] │
│ --artifacts-path PATH If set to a valid directory, the model weights will be │
│ loaded from this path. │
│ [default: None] │
│ --enable-ui --no-enable-ui Enable the development UI. [default: enable-ui] │ │ --enable-ui --no-enable-ui Enable the development UI. [default: enable-ui] │
│ --help Show this message and exit. │ │ --help Show this message and exit. │
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
@@ -398,11 +401,11 @@ The environment variables controlling the `uvicorn` execution can be specified w
The environment variables controlling specifics of the Docling Serve app can be specified with the The environment variables controlling specifics of the Docling Serve app can be specified with the
`DOCLING_SERVE_` prefix: `DOCLING_SERVE_` prefix:
- `DOCLING_SERVE_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/src/.cache/docling/models`.
- `DOCLING_SERVE_ENABLE_UI`: If `True`, The Gradio UI will be available at `/ui`. - `DOCLING_SERVE_ENABLE_UI`: If `True`, The Gradio UI will be available at `/ui`.
Others: Others:
- `DOCLING_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/.cache/docling/cache`.
- `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`. - `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
## Get help and support ## Get help and support

View File

@@ -3,7 +3,8 @@ import logging
import platform import platform
import sys import sys
import warnings import warnings
from typing import Annotated, Any, Union from pathlib import Path
from typing import Annotated, Any, Optional, Union
import typer import typer
import uvicorn import uvicorn
@@ -161,6 +162,15 @@ def dev(
), ),
] = uvicorn_settings.proxy_headers, ] = uvicorn_settings.proxy_headers,
# docling options # docling options
artifacts_path: Annotated[
Optional[Path],
typer.Option(
help=(
"If set to a valid directory, "
"the model weights will be loaded from this path."
)
),
] = docling_serve_settings.artifacts_path,
enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True, enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
) -> Any: ) -> Any:
""" """
@@ -179,6 +189,7 @@ def dev(
uvicorn_settings.root_path = root_path uvicorn_settings.root_path = root_path
uvicorn_settings.proxy_headers = proxy_headers uvicorn_settings.proxy_headers = proxy_headers
docling_serve_settings.artifacts_path = artifacts_path
docling_serve_settings.enable_ui = enable_ui docling_serve_settings.enable_ui = enable_ui
_run( _run(
@@ -243,6 +254,15 @@ def run(
), ),
] = uvicorn_settings.proxy_headers, ] = uvicorn_settings.proxy_headers,
# docling options # docling options
artifacts_path: Annotated[
Optional[Path],
typer.Option(
help=(
"If set to a valid directory, "
"the model weights will be loaded from this path."
)
),
] = docling_serve_settings.artifacts_path,
enable_ui: Annotated[ enable_ui: Annotated[
bool, typer.Option(help="Enable the development UI.") bool, typer.Option(help="Enable the development UI.")
] = docling_serve_settings.enable_ui, ] = docling_serve_settings.enable_ui,
@@ -264,6 +284,7 @@ def run(
uvicorn_settings.root_path = root_path uvicorn_settings.root_path = root_path
uvicorn_settings.proxy_headers = proxy_headers uvicorn_settings.proxy_headers = proxy_headers
docling_serve_settings.artifacts_path = artifacts_path
docling_serve_settings.enable_ui = enable_ui docling_serve_settings.enable_ui = enable_ui
_run( _run(

View File

@@ -39,6 +39,7 @@ from fastapi import HTTPException
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from docling_serve.helper_functions import _to_list_of_strings from docling_serve.helper_functions import _to_list_of_strings
from docling_serve.settings import docling_serve_settings
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@@ -276,6 +277,11 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
if pdf_format_option.pipeline_options: if pdf_format_option.pipeline_options:
data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump() data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
# Replace `artifacts_path` with a string representation
data["pipeline_options"]["artifacts_path"] = repr(
data["pipeline_options"]["artifacts_path"]
)
# Replace `pipeline_cls` with a string representation # Replace `pipeline_cls` with a string representation
data["pipeline_cls"] = repr(data["pipeline_cls"]) data["pipeline_cls"] = repr(data["pipeline_cls"])
@@ -293,7 +299,7 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
# Computes the PDF pipeline options and returns the PdfFormatOption and its hash # Computes the PDF pipeline options and returns the PdfFormatOption and its hash
def get_pdf_pipeline_opts( def get_pdf_pipeline_opts( # noqa: C901
request: ConvertDocumentsOptions, request: ConvertDocumentsOptions,
) -> Tuple[PdfFormatOption, str]: ) -> Tuple[PdfFormatOption, str]:
@@ -364,6 +370,31 @@ def get_pdf_pipeline_opts(
else: else:
raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}") raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}")
if docling_serve_settings.artifacts_path is not None:
if str(docling_serve_settings.artifacts_path.absolute()) == "":
_log.info(
"artifacts_path is an empty path, model weights will be dowloaded "
"at runtime."
)
pipeline_options.artifacts_path = None
elif docling_serve_settings.artifacts_path.is_dir():
_log.info(
"artifacts_path is set to a valid directory. "
"No model weights will be downloaded at runtime."
)
pipeline_options.artifacts_path = docling_serve_settings.artifacts_path
else:
_log.warning(
"artifacts_path is set to an invalid directory. "
"The system will download the model weights at runtime."
)
pipeline_options.artifacts_path = None
else:
_log.info(
"artifacts_path is unset. "
"The system will download the model weights at runtime."
)
pdf_format_option = PdfFormatOption( pdf_format_option = PdfFormatOption(
pipeline_options=pipeline_options, pipeline_options=pipeline_options,
backend=backend, backend=backend,

View File

@@ -1,4 +1,5 @@
from typing import Union from pathlib import Path
from typing import Optional, Union
from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -18,10 +19,14 @@ class UvicornSettings(BaseSettings):
class DoclingServeSettings(BaseSettings): class DoclingServeSettings(BaseSettings):
model_config = SettingsConfigDict( model_config = SettingsConfigDict(
env_prefix="DOCLING_SERVE_", env_file=".env", extra="allow" env_prefix="DOCLING_SERVE_",
env_file=".env",
env_parse_none_str="",
extra="allow",
) )
enable_ui: bool = False enable_ui: bool = False
artifacts_path: Optional[Path] = None
uvicorn_settings = UvicornSettings() uvicorn_settings = UvicornSettings()