mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 08:33:50 +00:00
fix: set DOCLING_SERVE_ARTIFACTS_PATH in images (#53)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -41,13 +41,15 @@ ENV PYTHONIOENCODING=utf-8
|
||||
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
|
||||
ENV UV_PROJECT_ENVIRONMENT=/opt/app-root
|
||||
|
||||
ENV DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models
|
||||
|
||||
COPY --chown=1001:0 pyproject.toml uv.lock README.md ./
|
||||
|
||||
RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
||||
uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS} # --no-extra ${NO_EXTRA}
|
||||
|
||||
RUN echo "Downloading models..." && \
|
||||
docling-tools models download ${MODELS_LIST} && \
|
||||
docling-tools models download -o "${DOCLING_SERVE_ARTIFACTS_PATH}" ${MODELS_LIST} && \
|
||||
chown -R 1001:0 /opt/app-root/src/.cache && \
|
||||
chmod -R g=u /opt/app-root/src/.cache
|
||||
|
||||
|
||||
@@ -381,6 +381,9 @@ $ docling-serve dev --help
|
||||
│ --proxy-headers --no-proxy-headers Enable/Disable X-Forwarded-Proto, X-Forwarded-For, │
|
||||
│ X-Forwarded-Port to populate remote address info. │
|
||||
│ [default: proxy-headers] │
|
||||
│ --artifacts-path PATH If set to a valid directory, the model weights will be │
|
||||
│ loaded from this path. │
|
||||
│ [default: None] │
|
||||
│ --enable-ui --no-enable-ui Enable the development UI. [default: enable-ui] │
|
||||
│ --help Show this message and exit. │
|
||||
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
||||
@@ -398,11 +401,11 @@ The environment variables controlling the `uvicorn` execution can be specified w
|
||||
The environment variables controlling specifics of the Docling Serve app can be specified with the
|
||||
`DOCLING_SERVE_` prefix:
|
||||
|
||||
- `DOCLING_SERVE_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/src/.cache/docling/models`.
|
||||
- `DOCLING_SERVE_ENABLE_UI`: If `True`, The Gradio UI will be available at `/ui`.
|
||||
|
||||
Others:
|
||||
|
||||
- `DOCLING_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/.cache/docling/cache`.
|
||||
- `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
|
||||
|
||||
## Get help and support
|
||||
|
||||
@@ -3,7 +3,8 @@ import logging
|
||||
import platform
|
||||
import sys
|
||||
import warnings
|
||||
from typing import Annotated, Any, Union
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Any, Optional, Union
|
||||
|
||||
import typer
|
||||
import uvicorn
|
||||
@@ -161,6 +162,15 @@ def dev(
|
||||
),
|
||||
] = uvicorn_settings.proxy_headers,
|
||||
# docling options
|
||||
artifacts_path: Annotated[
|
||||
Optional[Path],
|
||||
typer.Option(
|
||||
help=(
|
||||
"If set to a valid directory, "
|
||||
"the model weights will be loaded from this path."
|
||||
)
|
||||
),
|
||||
] = docling_serve_settings.artifacts_path,
|
||||
enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
|
||||
) -> Any:
|
||||
"""
|
||||
@@ -179,6 +189,7 @@ def dev(
|
||||
uvicorn_settings.root_path = root_path
|
||||
uvicorn_settings.proxy_headers = proxy_headers
|
||||
|
||||
docling_serve_settings.artifacts_path = artifacts_path
|
||||
docling_serve_settings.enable_ui = enable_ui
|
||||
|
||||
_run(
|
||||
@@ -243,6 +254,15 @@ def run(
|
||||
),
|
||||
] = uvicorn_settings.proxy_headers,
|
||||
# docling options
|
||||
artifacts_path: Annotated[
|
||||
Optional[Path],
|
||||
typer.Option(
|
||||
help=(
|
||||
"If set to a valid directory, "
|
||||
"the model weights will be loaded from this path."
|
||||
)
|
||||
),
|
||||
] = docling_serve_settings.artifacts_path,
|
||||
enable_ui: Annotated[
|
||||
bool, typer.Option(help="Enable the development UI.")
|
||||
] = docling_serve_settings.enable_ui,
|
||||
@@ -264,6 +284,7 @@ def run(
|
||||
uvicorn_settings.root_path = root_path
|
||||
uvicorn_settings.proxy_headers = proxy_headers
|
||||
|
||||
docling_serve_settings.artifacts_path = artifacts_path
|
||||
docling_serve_settings.enable_ui = enable_ui
|
||||
|
||||
_run(
|
||||
|
||||
@@ -39,6 +39,7 @@ from fastapi import HTTPException
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from docling_serve.helper_functions import _to_list_of_strings
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
@@ -276,6 +277,11 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
|
||||
if pdf_format_option.pipeline_options:
|
||||
data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
|
||||
|
||||
# Replace `artifacts_path` with a string representation
|
||||
data["pipeline_options"]["artifacts_path"] = repr(
|
||||
data["pipeline_options"]["artifacts_path"]
|
||||
)
|
||||
|
||||
# Replace `pipeline_cls` with a string representation
|
||||
data["pipeline_cls"] = repr(data["pipeline_cls"])
|
||||
|
||||
@@ -293,7 +299,7 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
|
||||
|
||||
|
||||
# Computes the PDF pipeline options and returns the PdfFormatOption and its hash
|
||||
def get_pdf_pipeline_opts(
|
||||
def get_pdf_pipeline_opts( # noqa: C901
|
||||
request: ConvertDocumentsOptions,
|
||||
) -> Tuple[PdfFormatOption, str]:
|
||||
|
||||
@@ -364,6 +370,31 @@ def get_pdf_pipeline_opts(
|
||||
else:
|
||||
raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}")
|
||||
|
||||
if docling_serve_settings.artifacts_path is not None:
|
||||
if str(docling_serve_settings.artifacts_path.absolute()) == "":
|
||||
_log.info(
|
||||
"artifacts_path is an empty path, model weights will be dowloaded "
|
||||
"at runtime."
|
||||
)
|
||||
pipeline_options.artifacts_path = None
|
||||
elif docling_serve_settings.artifacts_path.is_dir():
|
||||
_log.info(
|
||||
"artifacts_path is set to a valid directory. "
|
||||
"No model weights will be downloaded at runtime."
|
||||
)
|
||||
pipeline_options.artifacts_path = docling_serve_settings.artifacts_path
|
||||
else:
|
||||
_log.warning(
|
||||
"artifacts_path is set to an invalid directory. "
|
||||
"The system will download the model weights at runtime."
|
||||
)
|
||||
pipeline_options.artifacts_path = None
|
||||
else:
|
||||
_log.info(
|
||||
"artifacts_path is unset. "
|
||||
"The system will download the model weights at runtime."
|
||||
)
|
||||
|
||||
pdf_format_option = PdfFormatOption(
|
||||
pipeline_options=pipeline_options,
|
||||
backend=backend,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from typing import Union
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
@@ -18,10 +19,14 @@ class UvicornSettings(BaseSettings):
|
||||
|
||||
class DoclingServeSettings(BaseSettings):
|
||||
model_config = SettingsConfigDict(
|
||||
env_prefix="DOCLING_SERVE_", env_file=".env", extra="allow"
|
||||
env_prefix="DOCLING_SERVE_",
|
||||
env_file=".env",
|
||||
env_parse_none_str="",
|
||||
extra="allow",
|
||||
)
|
||||
|
||||
enable_ui: bool = False
|
||||
artifacts_path: Optional[Path] = None
|
||||
|
||||
|
||||
uvicorn_settings = UvicornSettings()
|
||||
|
||||
Reference in New Issue
Block a user