mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 08:33:50 +00:00
fix: set DOCLING_SERVE_ARTIFACTS_PATH in images (#53)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -41,13 +41,15 @@ ENV PYTHONIOENCODING=utf-8
|
|||||||
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
|
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
|
||||||
ENV UV_PROJECT_ENVIRONMENT=/opt/app-root
|
ENV UV_PROJECT_ENVIRONMENT=/opt/app-root
|
||||||
|
|
||||||
|
ENV DOCLING_SERVE_ARTIFACTS_PATH=/opt/app-root/src/.cache/docling/models
|
||||||
|
|
||||||
COPY --chown=1001:0 pyproject.toml uv.lock README.md ./
|
COPY --chown=1001:0 pyproject.toml uv.lock README.md ./
|
||||||
|
|
||||||
RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
|
||||||
uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS} # --no-extra ${NO_EXTRA}
|
uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS} # --no-extra ${NO_EXTRA}
|
||||||
|
|
||||||
RUN echo "Downloading models..." && \
|
RUN echo "Downloading models..." && \
|
||||||
docling-tools models download ${MODELS_LIST} && \
|
docling-tools models download -o "${DOCLING_SERVE_ARTIFACTS_PATH}" ${MODELS_LIST} && \
|
||||||
chown -R 1001:0 /opt/app-root/src/.cache && \
|
chown -R 1001:0 /opt/app-root/src/.cache && \
|
||||||
chmod -R g=u /opt/app-root/src/.cache
|
chmod -R g=u /opt/app-root/src/.cache
|
||||||
|
|
||||||
|
|||||||
@@ -381,6 +381,9 @@ $ docling-serve dev --help
|
|||||||
│ --proxy-headers --no-proxy-headers Enable/Disable X-Forwarded-Proto, X-Forwarded-For, │
|
│ --proxy-headers --no-proxy-headers Enable/Disable X-Forwarded-Proto, X-Forwarded-For, │
|
||||||
│ X-Forwarded-Port to populate remote address info. │
|
│ X-Forwarded-Port to populate remote address info. │
|
||||||
│ [default: proxy-headers] │
|
│ [default: proxy-headers] │
|
||||||
|
│ --artifacts-path PATH If set to a valid directory, the model weights will be │
|
||||||
|
│ loaded from this path. │
|
||||||
|
│ [default: None] │
|
||||||
│ --enable-ui --no-enable-ui Enable the development UI. [default: enable-ui] │
|
│ --enable-ui --no-enable-ui Enable the development UI. [default: enable-ui] │
|
||||||
│ --help Show this message and exit. │
|
│ --help Show this message and exit. │
|
||||||
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
||||||
@@ -398,11 +401,11 @@ The environment variables controlling the `uvicorn` execution can be specified w
|
|||||||
The environment variables controlling specifics of the Docling Serve app can be specified with the
|
The environment variables controlling specifics of the Docling Serve app can be specified with the
|
||||||
`DOCLING_SERVE_` prefix:
|
`DOCLING_SERVE_` prefix:
|
||||||
|
|
||||||
|
- `DOCLING_SERVE_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/src/.cache/docling/models`.
|
||||||
- `DOCLING_SERVE_ENABLE_UI`: If `True`, The Gradio UI will be available at `/ui`.
|
- `DOCLING_SERVE_ENABLE_UI`: If `True`, The Gradio UI will be available at `/ui`.
|
||||||
|
|
||||||
Others:
|
Others:
|
||||||
|
|
||||||
- `DOCLING_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/.cache/docling/cache`.
|
|
||||||
- `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
|
- `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
|
||||||
|
|
||||||
## Get help and support
|
## Get help and support
|
||||||
|
|||||||
@@ -3,7 +3,8 @@ import logging
|
|||||||
import platform
|
import platform
|
||||||
import sys
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
from typing import Annotated, Any, Union
|
from pathlib import Path
|
||||||
|
from typing import Annotated, Any, Optional, Union
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
import uvicorn
|
import uvicorn
|
||||||
@@ -161,6 +162,15 @@ def dev(
|
|||||||
),
|
),
|
||||||
] = uvicorn_settings.proxy_headers,
|
] = uvicorn_settings.proxy_headers,
|
||||||
# docling options
|
# docling options
|
||||||
|
artifacts_path: Annotated[
|
||||||
|
Optional[Path],
|
||||||
|
typer.Option(
|
||||||
|
help=(
|
||||||
|
"If set to a valid directory, "
|
||||||
|
"the model weights will be loaded from this path."
|
||||||
|
)
|
||||||
|
),
|
||||||
|
] = docling_serve_settings.artifacts_path,
|
||||||
enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
|
enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
|
||||||
) -> Any:
|
) -> Any:
|
||||||
"""
|
"""
|
||||||
@@ -179,6 +189,7 @@ def dev(
|
|||||||
uvicorn_settings.root_path = root_path
|
uvicorn_settings.root_path = root_path
|
||||||
uvicorn_settings.proxy_headers = proxy_headers
|
uvicorn_settings.proxy_headers = proxy_headers
|
||||||
|
|
||||||
|
docling_serve_settings.artifacts_path = artifacts_path
|
||||||
docling_serve_settings.enable_ui = enable_ui
|
docling_serve_settings.enable_ui = enable_ui
|
||||||
|
|
||||||
_run(
|
_run(
|
||||||
@@ -243,6 +254,15 @@ def run(
|
|||||||
),
|
),
|
||||||
] = uvicorn_settings.proxy_headers,
|
] = uvicorn_settings.proxy_headers,
|
||||||
# docling options
|
# docling options
|
||||||
|
artifacts_path: Annotated[
|
||||||
|
Optional[Path],
|
||||||
|
typer.Option(
|
||||||
|
help=(
|
||||||
|
"If set to a valid directory, "
|
||||||
|
"the model weights will be loaded from this path."
|
||||||
|
)
|
||||||
|
),
|
||||||
|
] = docling_serve_settings.artifacts_path,
|
||||||
enable_ui: Annotated[
|
enable_ui: Annotated[
|
||||||
bool, typer.Option(help="Enable the development UI.")
|
bool, typer.Option(help="Enable the development UI.")
|
||||||
] = docling_serve_settings.enable_ui,
|
] = docling_serve_settings.enable_ui,
|
||||||
@@ -264,6 +284,7 @@ def run(
|
|||||||
uvicorn_settings.root_path = root_path
|
uvicorn_settings.root_path = root_path
|
||||||
uvicorn_settings.proxy_headers = proxy_headers
|
uvicorn_settings.proxy_headers = proxy_headers
|
||||||
|
|
||||||
|
docling_serve_settings.artifacts_path = artifacts_path
|
||||||
docling_serve_settings.enable_ui = enable_ui
|
docling_serve_settings.enable_ui = enable_ui
|
||||||
|
|
||||||
_run(
|
_run(
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ from fastapi import HTTPException
|
|||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from docling_serve.helper_functions import _to_list_of_strings
|
from docling_serve.helper_functions import _to_list_of_strings
|
||||||
|
from docling_serve.settings import docling_serve_settings
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -276,6 +277,11 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
|
|||||||
if pdf_format_option.pipeline_options:
|
if pdf_format_option.pipeline_options:
|
||||||
data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
|
data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump()
|
||||||
|
|
||||||
|
# Replace `artifacts_path` with a string representation
|
||||||
|
data["pipeline_options"]["artifacts_path"] = repr(
|
||||||
|
data["pipeline_options"]["artifacts_path"]
|
||||||
|
)
|
||||||
|
|
||||||
# Replace `pipeline_cls` with a string representation
|
# Replace `pipeline_cls` with a string representation
|
||||||
data["pipeline_cls"] = repr(data["pipeline_cls"])
|
data["pipeline_cls"] = repr(data["pipeline_cls"])
|
||||||
|
|
||||||
@@ -293,7 +299,7 @@ def _serialize_pdf_format_option(pdf_format_option: PdfFormatOption) -> str:
|
|||||||
|
|
||||||
|
|
||||||
# Computes the PDF pipeline options and returns the PdfFormatOption and its hash
|
# Computes the PDF pipeline options and returns the PdfFormatOption and its hash
|
||||||
def get_pdf_pipeline_opts(
|
def get_pdf_pipeline_opts( # noqa: C901
|
||||||
request: ConvertDocumentsOptions,
|
request: ConvertDocumentsOptions,
|
||||||
) -> Tuple[PdfFormatOption, str]:
|
) -> Tuple[PdfFormatOption, str]:
|
||||||
|
|
||||||
@@ -364,6 +370,31 @@ def get_pdf_pipeline_opts(
|
|||||||
else:
|
else:
|
||||||
raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}")
|
raise RuntimeError(f"Unexpected PDF backend type {request.pdf_backend}")
|
||||||
|
|
||||||
|
if docling_serve_settings.artifacts_path is not None:
|
||||||
|
if str(docling_serve_settings.artifacts_path.absolute()) == "":
|
||||||
|
_log.info(
|
||||||
|
"artifacts_path is an empty path, model weights will be dowloaded "
|
||||||
|
"at runtime."
|
||||||
|
)
|
||||||
|
pipeline_options.artifacts_path = None
|
||||||
|
elif docling_serve_settings.artifacts_path.is_dir():
|
||||||
|
_log.info(
|
||||||
|
"artifacts_path is set to a valid directory. "
|
||||||
|
"No model weights will be downloaded at runtime."
|
||||||
|
)
|
||||||
|
pipeline_options.artifacts_path = docling_serve_settings.artifacts_path
|
||||||
|
else:
|
||||||
|
_log.warning(
|
||||||
|
"artifacts_path is set to an invalid directory. "
|
||||||
|
"The system will download the model weights at runtime."
|
||||||
|
)
|
||||||
|
pipeline_options.artifacts_path = None
|
||||||
|
else:
|
||||||
|
_log.info(
|
||||||
|
"artifacts_path is unset. "
|
||||||
|
"The system will download the model weights at runtime."
|
||||||
|
)
|
||||||
|
|
||||||
pdf_format_option = PdfFormatOption(
|
pdf_format_option = PdfFormatOption(
|
||||||
pipeline_options=pipeline_options,
|
pipeline_options=pipeline_options,
|
||||||
backend=backend,
|
backend=backend,
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
from typing import Union
|
from pathlib import Path
|
||||||
|
from typing import Optional, Union
|
||||||
|
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
@@ -18,10 +19,14 @@ class UvicornSettings(BaseSettings):
|
|||||||
|
|
||||||
class DoclingServeSettings(BaseSettings):
|
class DoclingServeSettings(BaseSettings):
|
||||||
model_config = SettingsConfigDict(
|
model_config = SettingsConfigDict(
|
||||||
env_prefix="DOCLING_SERVE_", env_file=".env", extra="allow"
|
env_prefix="DOCLING_SERVE_",
|
||||||
|
env_file=".env",
|
||||||
|
env_parse_none_str="",
|
||||||
|
extra="allow",
|
||||||
)
|
)
|
||||||
|
|
||||||
enable_ui: bool = False
|
enable_ui: bool = False
|
||||||
|
artifacts_path: Optional[Path] = None
|
||||||
|
|
||||||
|
|
||||||
uvicorn_settings = UvicornSettings()
|
uvicorn_settings = UvicornSettings()
|
||||||
|
|||||||
Reference in New Issue
Block a user