feat: Add new docling-serve cli (#50)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2026-04-26 19:26:05 +00:00 · 2025-02-19 20:54:13 +01:00
parent 663e03303a
commit ec33a61faa
13 changed files with 526 additions and 215 deletions
--- a/.github/workflows/ci-images-dryrun.yml
+++ b/.github/workflows/ci-images-dryrun.yml
@@ -20,7 +20,7 @@ jobs:
    with:
      publish: false
      build_args: |
-        CPU_ONLY=true
+        UV_SYNC_EXTRA_ARGS=--no-extra cu124
      ghcr_image_name: ds4sd/docling-serve-cpu
      quay_image_name: ""

@@ -37,7 +37,7 @@ jobs:
    with:
      publish: false
      build_args: |
-        CPU_ONLY=false
+        UV_SYNC_EXTRA_ARGS=--no-extra cpu
      platforms: linux/amd64
      ghcr_image_name: ds4sd/docling-serve
      quay_image_name: ""
--- a/.github/workflows/images.yml
+++ b/.github/workflows/images.yml
@@ -34,7 +34,7 @@ jobs:
      publish: true
      environment: registry-creds
      build_args: |
-        CPU_ONLY=true
+        UV_SYNC_EXTRA_ARGS=--no-extra cu124
      ghcr_image_name: ds4sd/docling-serve-cpu
      quay_image_name: ds4sd/docling-serve-cpu

@@ -53,7 +53,7 @@ jobs:
      publish: true
      environment: registry-creds
      build_args: |
-        CPU_ONLY=false
+        UV_SYNC_EXTRA_ARGS=--no-extra cpu
      platforms: linux/amd64
      ghcr_image_name: ds4sd/docling-serve
      quay_image_name: ds4sd/docling-serve
--- a/.github/workflows/job-image.yml
+++ b/.github/workflows/job-image.yml
@@ -105,8 +105,6 @@ jobs:
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: ${{ inputs.build_args }}
-            # |
-            # --build-arg CPU_ONLY=true

      - name: Generate artifact attestation
        if: ${{ inputs.publish }}
@@ -137,8 +135,6 @@ jobs:
          cache-to: type=gha,mode=max
          file: Containerfile
          build-args: ${{ inputs.build_args }}
-            # |
-            # --build-arg CPU_ONLY=true

      - name: Remove Local Docker Images
        run: |
--- a/16
+++ b/16
@@ -2,8 +2,8 @@ ARG BASE_IMAGE=quay.io/sclorg/python-312-c9s:c9s

 FROM ${BASE_IMAGE}

-ARG CPU_ONLY=false
 ARG MODELS_LIST="layout tableformer picture_classifier easyocr"
+ARG UV_SYNC_EXTRA_ARGS=""

 USER 0

@@ -41,17 +41,10 @@ ENV PYTHONIOENCODING=utf-8
 ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
 ENV UV_PROJECT_ENVIRONMENT=/opt/app-root

-ENV WITH_UI=True
-
 COPY --chown=1001:0 pyproject.toml uv.lock README.md ./

 RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
-    if [ "$CPU_ONLY" = "true" ]; then \
-        NO_EXTRA=cu124; \
-    else \
-        NO_EXTRA=cpu; \
-    fi && \
-    uv sync --frozen --no-install-project --no-dev --all-extras --no-extra ${NO_EXTRA}
+    uv sync --frozen --no-install-project --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}   # --no-extra ${NO_EXTRA}

 RUN echo "Downloading models..." && \
    docling-tools models download ${MODELS_LIST} && \
@@ -59,8 +52,9 @@ RUN echo "Downloading models..." && \
    chmod -R g=u /opt/app-root/src/.cache

 COPY --chown=1001:0 --chmod=664 ./docling_serve ./docling_serve
-
+RUN --mount=type=cache,target=/opt/app-root/src/.cache/uv,uid=1001 \
+    uv sync --frozen --no-dev --all-extras ${UV_SYNC_EXTRA_ARGS}   # --no-extra ${NO_EXTRA}

 EXPOSE 5001

-CMD ["python", "-m", "docling_serve"]
+CMD ["docling-serve", "run"]
--- a/6
+++ b/6
@@ -26,15 +26,15 @@ md-lint-file:

 .PHONY: docling-serve-cpu-image
 docling-serve-cpu-image: Containerfile ## Build docling-serve "cpu only" container image
-	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve CPU ONLY]"
-	$(CMD_PREFIX) docker build --build-arg CPU_ONLY=true -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve-cpu:$(TAG) .
+	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve CPU]"
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cu124" -f Containerfile -t ghcr.io/ds4sd/docling-serve-cpu:$(TAG) .
 	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) ghcr.io/ds4sd/docling-serve-cpu:main
 	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve-cpu:$(TAG) quay.io/ds4sd/docling-serve-cpu:main

 .PHONY: docling-serve-gpu-image
 docling-serve-gpu-image: Containerfile ## Build docling-serve container image with GPU support
 	$(ECHO_PREFIX) printf "  %-12s Containerfile\n" "[docling-serve with GPU]"
-	$(CMD_PREFIX) docker build --build-arg CPU_ONLY=false -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve:$(TAG) .
+	$(CMD_PREFIX) docker build --load --build-arg "UV_SYNC_EXTRA_ARGS=--no-extra cpu" -f Containerfile --platform linux/amd64 -t ghcr.io/ds4sd/docling-serve:$(TAG) .
 	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) ghcr.io/ds4sd/docling-serve:main
 	$(CMD_PREFIX) docker tag ghcr.io/ds4sd/docling-serve:$(TAG) quay.io/ds4sd/docling-serve:main

--- a/README.md
+++ b/README.md
@@ -327,25 +327,83 @@ See `[project.optional-dependencies]` section in `pyproject.toml` for full list

 ### Run the server

-The [start_server.sh](./start_server.sh) executable is a convenient script for launching the local webserver.
+The `docling-serve` executable is a convenient script for launching the webserver both in
+development and production mode.

 ```sh
-# Run the server
-bash start_server.sh
+# Run the server in development mode
+# - reload is enabled by default
+# - listening on the 127.0.0.1 address
+# - ui is enabled by default
+docling-serve dev

-# Run the server with live reload
-RELOAD=true bash start_server.sh
+# Run the server in production mode
+# - reload is disabled by default
+# - listening on the 0.0.0.0 address
+# - ui is disabled by default
+docling-serve run
 ```

-### Environment variables
+### Options

-The following variables are available:
+The `docling-serve` executable allows is controlled with both command line
+options and environment variables.
+
+<details>
+<summary>`docling-serve` help message</summary>
+
+```sh
+$ docling-serve dev --help
+                                                                                                              
+ Usage: docling-serve dev [OPTIONS]                                                                           
+                                                                                                              
+ Run a Docling Serve app in development mode. 🧪                                                              
+ This is equivalent to docling-serve run but with reload                                                      
+ enabled and listening on the 127.0.0.1 address.                                                              
+                                                                                                              
+ Options can be set also with the corresponding ENV variable, with the exception                              
+ of --enable-ui, --host and --reload.                                                                         
+                                                                                                              
+╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────╮
+│ --host                                   TEXT     The host to serve on. For local development in localhost │
+│                                                   use 127.0.0.1. To enable public access, e.g. in a        │
+│                                                   container, use all the IP addresses available with       │
+│                                                   0.0.0.0.                                                 │
+│                                                   [default: 127.0.0.1]                                     │
+│ --port                                   INTEGER  The port to serve on. [default: 5001]                    │
+│ --reload           --no-reload                    Enable auto-reload of the server when (code) files       │
+│                                                   change. This is resource intensive, use it only during   │
+│                                                   development.                                             │
+│                                                   [default: reload]                                        │
+│ --root-path                              TEXT     The root path is used to tell your app that it is being  │
+│                                                   served to the outside world with some path prefix set up │
+│                                                   in some termination proxy or similar.                    │
+│ --proxy-headers    --no-proxy-headers             Enable/Disable X-Forwarded-Proto, X-Forwarded-For,       │
+│                                                   X-Forwarded-Port to populate remote address info.        │
+│                                                   [default: proxy-headers]                                 │
+│ --enable-ui        --no-enable-ui                 Enable the development UI. [default: enable-ui]          │
+│ --help                                            Show this message and exit.                              │
+╰────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+```
+
+</details>
+
+#### Environment variables
+
+The environment variables controlling the `uvicorn` execution can be specified with the `UVICORN_` prefix:
+
+- `UVICORN_WORKERS`: Number of workers to use.
+- `UVICORN_RELOAD`: If `True`, this will enable auto-reload when you modify files, useful for development.
+
+The environment variables controlling specifics of the Docling Serve app can be specified with the
+`DOCLING_SERVE_` prefix:
+
+- `DOCLING_SERVE_ENABLE_UI`: If `True`, The Gradio UI will be available at `/ui`.
+
+Others:

 - `DOCLING_ARTIFACTS_PATH`: if set Docling will use only the local weights of models, for example `/opt/app-root/.cache/docling/cache`.
 - `TESSDATA_PREFIX`: Tesseract data location, example `/usr/share/tesseract/tessdata/`.
- `UVICORN_WORKERS`: Number of workers to use.
- `RELOAD`: If `True`, this will enable auto-reload when you modify files, useful for development.
- `WITH_UI`: If `True`, The Gradio UI will be available at `/ui`.

 ## Get help and support

--- a/docling_serve/.env.example
+++ b/docling_serve/.env.example
@@ -1,3 +1,3 @@
 TESSDATA_PREFIX=/usr/share/tesseract/tessdata/
 UVICORN_WORKERS=2
-RELOAD=True
+UVICORN_RELOAD=True
--- a/docling_serve/main.py
+++ b/docling_serve/main.py
@@ -1,20 +1,281 @@
-import os
+import importlib
+import logging
+import platform
+import sys
+import warnings
+from typing import Annotated, Any, Union

-from docling_serve.helper_functions import _str_to_bool
+import typer
+import uvicorn
+from rich.console import Console

-# Launch the FastAPI server
-if __name__ == "__main__":
-    from uvicorn import run
+from docling_serve.settings import docling_serve_settings, uvicorn_settings

-    port = int(os.getenv("PORT", "5001"))
-    workers = int(os.getenv("UVICORN_WORKERS", "1"))
-    reload = _str_to_bool(os.getenv("RELOAD", "False"))
+warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
+warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")

-    run(
-        "docling_serve.app:app",
-        host="0.0.0.0",
-        port=port,
-        workers=workers,
-        timeout_keep_alive=600,
-        reload=reload,
+
+err_console = Console(stderr=True)
+console = Console()
+
+app = typer.Typer(
+    no_args_is_help=True,
+    rich_markup_mode="rich",
+)
+
+logger = logging.getLogger(__name__)
+
+
+def version_callback(value: bool) -> None:
+    if value:
+        docling_serve_version = importlib.metadata.version("docling_serve")
+        docling_version = importlib.metadata.version("docling")
+        docling_core_version = importlib.metadata.version("docling-core")
+        docling_ibm_models_version = importlib.metadata.version("docling-ibm-models")
+        docling_parse_version = importlib.metadata.version("docling-parse")
+        platform_str = platform.platform()
+        py_impl_version = sys.implementation.cache_tag
+        py_lang_version = platform.python_version()
+        console.print(f"Docling Serve version: {docling_serve_version}")
+        console.print(f"Docling version: {docling_version}")
+        console.print(f"Docling Core version: {docling_core_version}")
+        console.print(f"Docling IBM Models version: {docling_ibm_models_version}")
+        console.print(f"Docling Parse version: {docling_parse_version}")
+        console.print(f"Python: {py_impl_version} ({py_lang_version})")
+        console.print(f"Platform: {platform_str}")
+        raise typer.Exit()
+
+
+@app.callback()
+def callback(
+    version: Annotated[
+        Union[bool, None],
+        typer.Option(
+            "--version", help="Show the version and exit.", callback=version_callback
+        ),
+    ] = None,
+    verbose: Annotated[
+        int,
+        typer.Option(
+            "--verbose",
+            "-v",
+            count=True,
+            help="Set the verbosity level. -v for info logging, -vv for debug logging.",
+        ),
+    ] = 0,
+) -> None:
+    if verbose == 0:
+        logging.basicConfig(level=logging.WARNING)
+    elif verbose == 1:
+        logging.basicConfig(level=logging.INFO)
+    elif verbose == 2:
+        logging.basicConfig(level=logging.DEBUG)
+
+
+def _run(
+    *,
+    command: str,
+) -> None:
+    server_type = "development" if command == "dev" else "production"
+
+    console.print(f"Starting {server_type} server 🚀")
+
+    url = f"http://{uvicorn_settings.host}:{uvicorn_settings.port}"
+    url_docs = f"{url}/docs"
+    url_ui = f"{url}/ui"
+
+    console.print("")
+    console.print(f"Server started at [link={url}]{url}[/]")
+    console.print(f"Documentation at [link={url_docs}]{url_docs}[/]")
+    if docling_serve_settings.enable_ui:
+        console.print(f"UI at [link={url_ui}]{url_ui}[/]")
+
+    if command == "dev":
+        console.print("")
+        console.print(
+            "Running in development mode, for production use: "
+            "[bold]docling-serve run[/]",
+        )
+
+    console.print("")
+    console.print("Logs:")
+
+    uvicorn.run(
+        app="docling_serve.app:create_app",
+        factory=True,
+        host=uvicorn_settings.host,
+        port=uvicorn_settings.port,
+        reload=uvicorn_settings.reload,
+        workers=uvicorn_settings.workers,
+        root_path=uvicorn_settings.root_path,
+        proxy_headers=uvicorn_settings.proxy_headers,
    )
+
+
+@app.command()
+def dev(
+    *,
+    # uvicorn options
+    host: Annotated[
+        str,
+        typer.Option(
+            help=(
+                "The host to serve on. For local development in localhost "
+                "use [blue]127.0.0.1[/blue]. To enable public access, "
+                "e.g. in a container, use all the IP addresses "
+                "available with [blue]0.0.0.0[/blue]."
+            )
+        ),
+    ] = "127.0.0.1",
+    port: Annotated[
+        int,
+        typer.Option(help="The port to serve on."),
+    ] = uvicorn_settings.port,
+    reload: Annotated[
+        bool,
+        typer.Option(
+            help=(
+                "Enable auto-reload of the server when (code) files change. "
+                "This is [bold]resource intensive[/bold], "
+                "use it only during development."
+            )
+        ),
+    ] = True,
+    root_path: Annotated[
+        str,
+        typer.Option(
+            help=(
+                "The root path is used to tell your app that it is being served "
+                "to the outside world with some [bold]path prefix[/bold] "
+                "set up in some termination proxy or similar."
+            )
+        ),
+    ] = uvicorn_settings.root_path,
+    proxy_headers: Annotated[
+        bool,
+        typer.Option(
+            help=(
+                "Enable/Disable X-Forwarded-Proto, X-Forwarded-For, "
+                "X-Forwarded-Port to populate remote address info."
+            )
+        ),
+    ] = uvicorn_settings.proxy_headers,
+    # docling options
+    enable_ui: Annotated[bool, typer.Option(help="Enable the development UI.")] = True,
+) -> Any:
+    """
+    Run a [bold]Docling Serve[/bold] app in [yellow]development[/yellow] mode. 🧪
+
+    This is equivalent to [bold]docling-serve run[/bold] but with [bold]reload[/bold]
+    enabled and listening on the [blue]127.0.0.1[/blue] address.
+
+    Options can be set also with the corresponding ENV variable, with the exception
+    of --enable-ui, --host and --reload.
+    """
+
+    uvicorn_settings.host = host
+    uvicorn_settings.port = port
+    uvicorn_settings.reload = reload
+    uvicorn_settings.root_path = root_path
+    uvicorn_settings.proxy_headers = proxy_headers
+
+    docling_serve_settings.enable_ui = enable_ui
+
+    _run(
+        command="dev",
+    )
+
+
+@app.command()
+def run(
+    *,
+    host: Annotated[
+        str,
+        typer.Option(
+            help=(
+                "The host to serve on. For local development in localhost "
+                "use [blue]127.0.0.1[/blue]. To enable public access, "
+                "e.g. in a container, use all the IP addresses "
+                "available with [blue]0.0.0.0[/blue]."
+            )
+        ),
+    ] = uvicorn_settings.host,
+    port: Annotated[
+        int,
+        typer.Option(help="The port to serve on."),
+    ] = uvicorn_settings.port,
+    reload: Annotated[
+        bool,
+        typer.Option(
+            help=(
+                "Enable auto-reload of the server when (code) files change. "
+                "This is [bold]resource intensive[/bold], "
+                "use it only during development."
+            )
+        ),
+    ] = uvicorn_settings.reload,
+    workers: Annotated[
+        Union[int, None],
+        typer.Option(
+            help=(
+                "Use multiple worker processes. "
+                "Mutually exclusive with the --reload flag."
+            )
+        ),
+    ] = uvicorn_settings.workers,
+    root_path: Annotated[
+        str,
+        typer.Option(
+            help=(
+                "The root path is used to tell your app that it is being served "
+                "to the outside world with some [bold]path prefix[/bold] "
+                "set up in some termination proxy or similar."
+            )
+        ),
+    ] = uvicorn_settings.root_path,
+    proxy_headers: Annotated[
+        bool,
+        typer.Option(
+            help=(
+                "Enable/Disable X-Forwarded-Proto, X-Forwarded-For, "
+                "X-Forwarded-Port to populate remote address info."
+            )
+        ),
+    ] = uvicorn_settings.proxy_headers,
+    # docling options
+    enable_ui: Annotated[
+        bool, typer.Option(help="Enable the development UI.")
+    ] = docling_serve_settings.enable_ui,
+) -> Any:
+    """
+    Run a [bold]Docling Serve[/bold] app in [green]production[/green] mode. 🚀
+
+    This is equivalent to [bold]docling-serve dev[/bold] but with [bold]reload[/bold]
+    disabled and listening on the [blue]0.0.0.0[/blue] address.
+
+    Options can be set also with the corresponding ENV variable, e.g. UVICORN_PORT
+    or DOCLING_SERVE_ENABLE_UI.
+    """
+
+    uvicorn_settings.host = host
+    uvicorn_settings.port = port
+    uvicorn_settings.reload = reload
+    uvicorn_settings.workers = workers
+    uvicorn_settings.root_path = root_path
+    uvicorn_settings.proxy_headers = proxy_headers
+
+    docling_serve_settings.enable_ui = enable_ui
+
+    _run(
+        command="run",
+    )
+
+
+def main() -> None:
+    app()
+
+
+# Launch the CLI when calling python -m docling_serve
+if __name__ == "__main__":
+
+    main()
--- a/docling_serve/app.py
+++ b/docling_serve/app.py
@@ -1,5 +1,4 @@
 import logging
-import os
 import tempfile
 from contextlib import asynccontextmanager
 from io import BytesIO
@@ -8,7 +7,6 @@ from typing import Annotated, Any, Dict, List, Optional, Union

 from docling.datamodel.base_models import DocumentStream, InputFormat
 from docling.document_converter import DocumentConverter
-from dotenv import load_dotenv
 from fastapi import BackgroundTasks, FastAPI, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import RedirectResponse
@@ -22,17 +20,9 @@ from docling_serve.docling_conversion import (
    converters,
    get_pdf_pipeline_opts,
 )
-from docling_serve.helper_functions import FormDepends, _str_to_bool
+from docling_serve.helper_functions import FormDepends
 from docling_serve.response_preparation import ConvertDocumentResponse, process_results
-
-# Load local env vars if present
-load_dotenv()
-
-WITH_UI = _str_to_bool(os.getenv("WITH_UI", "False"))
-if WITH_UI:
-    import gradio as gr
-
-    from docling_serve.gradio_ui import ui as gradio_ui
+from docling_serve.settings import docling_serve_settings


 # Set up custom logging as we'll be intermixes with FastAPI/Uvicorn's logging
@@ -70,7 +60,6 @@ _log = logging.getLogger(__name__)
 # Context manager to initialize and clean up the lifespan of the FastAPI app
@asynccontextmanager
 async def lifespan(app: FastAPI):
-    # settings = Settings()

    # Converter with default options
    pdf_format_option, options_hash = get_pdf_pipeline_opts(ConvertDocumentsOptions())
@@ -86,143 +75,156 @@ async def lifespan(app: FastAPI):
    yield

    converters.clear()
-    if WITH_UI:
-        gradio_ui.close()
+    # if WITH_UI:
+    #     gradio_ui.close()


 ##################################
 # App creation and configuration #
 ##################################

-app = FastAPI(
-    title="Docling Serve",
-    lifespan=lifespan,
-)

-origins = ["*"]
-methods = ["*"]
-headers = ["*"]
-
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=origins,
-    allow_credentials=True,
-    allow_methods=methods,
-    allow_headers=headers,
-)
-
-# Mount the Gradio app
-if WITH_UI:
-    tmp_output_dir = Path(tempfile.mkdtemp())
-    gradio_ui.gradio_output_dir = tmp_output_dir
-    app = gr.mount_gradio_app(
-        app,
-        gradio_ui,
-        path="/ui",
-        allowed_paths=["./logo.png", tmp_output_dir],
-        root_path="/ui",
+def create_app():
+    app = FastAPI(
+        title="Docling Serve",
+        lifespan=lifespan,
    )

+    origins = ["*"]
+    methods = ["*"]
+    headers = ["*"]

-#############################
-# API Endpoints definitions #
-#############################
-
-
-# Favicon
-@app.get("/favicon.ico", include_in_schema=False)
-async def favicon():
-    response = RedirectResponse(url="https://ds4sd.github.io/docling/assets/logo.png")
-    return response
-
-
-# Status
-class HealthCheckResponse(BaseModel):
-    status: str = "ok"
-
-
-@app.get("/health")
-def health() -> HealthCheckResponse:
-    return HealthCheckResponse()
-
-
-# API readiness compatibility for OpenShift AI Workbench
-@app.get("/api", include_in_schema=False)
-def api_check() -> HealthCheckResponse:
-    return HealthCheckResponse()
-
-
-# Convert a document from URL(s)
-@app.post(
-    "/v1alpha/convert/source",
-    response_model=ConvertDocumentResponse,
-    responses={
-        200: {
-            "content": {"application/zip": {}},
-            # "description": "Return the JSON item or an image.",
-        }
-    },
-)
-def process_url(
-    background_tasks: BackgroundTasks, conversion_request: ConvertDocumentsRequest
-):
-    sources: List[Union[str, DocumentStream]] = []
-    headers: Optional[Dict[str, Any]] = None
-    if isinstance(conversion_request, ConvertDocumentFileSourcesRequest):
-        for file_source in conversion_request.file_sources:
-            sources.append(file_source.to_document_stream())
-    else:
-        for http_source in conversion_request.http_sources:
-            sources.append(http_source.url)
-            if headers is None and http_source.headers:
-                headers = http_source.headers
-
-    # Note: results are only an iterator->lazy evaluation
-    results = convert_documents(
-        sources=sources, options=conversion_request.options, headers=headers
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=origins,
+        allow_credentials=True,
+        allow_methods=methods,
+        allow_headers=headers,
    )

-    # The real processing will happen here
-    response = process_results(
-        background_tasks=background_tasks,
-        conversion_options=conversion_request.options,
-        conv_results=results,
+    # Mount the Gradio app
+    if docling_serve_settings.enable_ui:
+
+        try:
+            import gradio as gr
+
+            from docling_serve.gradio_ui import ui as gradio_ui
+
+            tmp_output_dir = Path(tempfile.mkdtemp())
+            gradio_ui.gradio_output_dir = tmp_output_dir
+            app = gr.mount_gradio_app(
+                app,
+                gradio_ui,
+                path="/ui",
+                allowed_paths=["./logo.png", tmp_output_dir],
+                root_path="/ui",
+            )
+        except ImportError:
+            _log.warning(
+                "Docling Serve enable_ui is activated, but gradio is not installed. "
+                "Install it with `pip install docling-serve[ui]` "
+                "or `pip install gradio`"
+            )
+
+    #############################
+    # API Endpoints definitions #
+    #############################
+
+    # Favicon
+    @app.get("/favicon.ico", include_in_schema=False)
+    async def favicon():
+        response = RedirectResponse(
+            url="https://ds4sd.github.io/docling/assets/logo.png"
+        )
+        return response
+
+    # Status
+    class HealthCheckResponse(BaseModel):
+        status: str = "ok"
+
+    @app.get("/health")
+    def health() -> HealthCheckResponse:
+        return HealthCheckResponse()
+
+    # API readiness compatibility for OpenShift AI Workbench
+    @app.get("/api", include_in_schema=False)
+    def api_check() -> HealthCheckResponse:
+        return HealthCheckResponse()
+
+    # Convert a document from URL(s)
+    @app.post(
+        "/v1alpha/convert/source",
+        response_model=ConvertDocumentResponse,
+        responses={
+            200: {
+                "content": {"application/zip": {}},
+                # "description": "Return the JSON item or an image.",
+            }
+        },
    )
+    def process_url(
+        background_tasks: BackgroundTasks, conversion_request: ConvertDocumentsRequest
+    ):
+        sources: List[Union[str, DocumentStream]] = []
+        headers: Optional[Dict[str, Any]] = None
+        if isinstance(conversion_request, ConvertDocumentFileSourcesRequest):
+            for file_source in conversion_request.file_sources:
+                sources.append(file_source.to_document_stream())
+        else:
+            for http_source in conversion_request.http_sources:
+                sources.append(http_source.url)
+                if headers is None and http_source.headers:
+                    headers = http_source.headers

-    return response
+        # Note: results are only an iterator->lazy evaluation
+        results = convert_documents(
+            sources=sources, options=conversion_request.options, headers=headers
+        )

+        # The real processing will happen here
+        response = process_results(
+            background_tasks=background_tasks,
+            conversion_options=conversion_request.options,
+            conv_results=results,
+        )

-# Convert a document from file(s)
-@app.post(
-    "/v1alpha/convert/file",
-    response_model=ConvertDocumentResponse,
-    responses={
-        200: {
-            "content": {"application/zip": {}},
-        }
-    },
-)
-async def process_file(
-    background_tasks: BackgroundTasks,
-    files: List[UploadFile],
-    options: Annotated[ConvertDocumentsOptions, FormDepends(ConvertDocumentsOptions)],
-):
+        return response

-    _log.info(f"Received {len(files)} files for processing.")
-
-    # Load the uploaded files to Docling DocumentStream
-    file_sources = []
-    for file in files:
-        buf = BytesIO(file.file.read())
-        name = file.filename if file.filename else "file.pdf"
-        file_sources.append(DocumentStream(name=name, stream=buf))
-
-    results = convert_documents(sources=file_sources, options=options)
-
-    response = process_results(
-        background_tasks=background_tasks,
-        conversion_options=options,
-        conv_results=results,
+    # Convert a document from file(s)
+    @app.post(
+        "/v1alpha/convert/file",
+        response_model=ConvertDocumentResponse,
+        responses={
+            200: {
+                "content": {"application/zip": {}},
+            }
+        },
    )
+    async def process_file(
+        background_tasks: BackgroundTasks,
+        files: List[UploadFile],
+        options: Annotated[
+            ConvertDocumentsOptions, FormDepends(ConvertDocumentsOptions)
+        ],
+    ):

-    return response
+        _log.info(f"Received {len(files)} files for processing.")
+
+        # Load the uploaded files to Docling DocumentStream
+        file_sources = []
+        for file in files:
+            buf = BytesIO(file.file.read())
+            name = file.filename if file.filename else "file.pdf"
+            file_sources.append(DocumentStream(name=name, stream=buf))
+
+        results = convert_documents(sources=file_sources, options=options)
+
+        response = process_results(
+            background_tasks=background_tasks,
+            conversion_options=options,
+            conv_results=results,
+        )
+
+        return response
+
+    return app
--- a/docling_serve/settings.py
+++ b/docling_serve/settings.py
@@ -1,6 +1,28 @@
+from typing import Union
+
 from pydantic_settings import BaseSettings, SettingsConfigDict


-class Settings(BaseSettings):
+class UvicornSettings(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_prefix="UVICORN_", env_file=".env", extra="allow"
+    )

-    model_config = SettingsConfigDict(env_prefix="DOCLING_")
+    host: str = "0.0.0.0"
+    port: int = 5001
+    reload: bool = False
+    root_path: str = ""
+    proxy_headers: bool = True
+    workers: Union[int, None] = None
+
+
+class DoclingServeSettings(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_prefix="DOCLING_SERVE_", env_file=".env", extra="allow"
+    )
+
+    enable_ui: bool = False
+
+
+uvicorn_settings = UvicornSettings()
+docling_serve_settings = DoclingServeSettings()
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,6 +36,7 @@ dependencies = [
    "pydantic~=2.10",
    "pydantic-settings~=2.4",
    "python-multipart>=0.0.14,<0.1.0",
+    "typer~=0.12",
    "uvicorn[standard]>=0.29.0,<1.0.0",
 ]

@@ -74,6 +75,7 @@ dev = [
 ]

 [tool.uv]
+package = true
 conflicts = [
  [
    { extra = "cpu" },
@@ -104,6 +106,9 @@ explicit = true
 [tool.setuptools.packages.find]
 include = ["docling_serve"]

+[project.scripts]
+docling-serve = "docling_serve.__main__:main"
+
 [project.urls]
 Homepage = "https://github.com/DS4SD/docling-serve"
 # Documentation = "https://ds4sd.github.io/docling"
@@ -118,6 +123,7 @@ include = '\.pyi?$'

 [tool.isort]
 profile = "black"
+known_third_party = ["docling", "docling_core"]
 line_length = 88
 py_version=311

--- a/start_server.sh
+++ b/start_server.sh
@@ -1,30 +0,0 @@
-#!/bin/bash
-set -Eeuo pipefail
-
-# Network settings
-export PORT="${PORT:-5001}"
-export HOST="${HOST:-"0.0.0.0"}"
-
-# Performance settings
-UVICORN_WORKERS="${UVICORN_WORKERS:-1}"
-
-# Development settings
-export WITH_UI="${WITH_UI:-"true"}"
-export RELOAD=${RELOAD:-"false"}
-
-# --------------------------------------
-# Process env settings
-
-EXTRA_ARGS=""
-if [ "$RELOAD" == "true" ]; then
-  EXTRA_ARGS="$EXTRA_ARGS --reload"
-fi
-
-# Launch
-exec uv run uvicorn \
-    docling_serve.app:app \
-    --host=${HOST} \
-    --port=${PORT} \
-    --timeout-keep-alive=600 \
-    ${EXTRA_ARGS} \
-    --workers=${UVICORN_WORKERS}
--- a/uv.lock
+++ b/uv.lock
@@ -583,7 +583,7 @@ wheels = [
 [[package]]
 name = "docling-serve"
 version = "0.2.0"
-source = { virtual = "." }
+source = { editable = "." }
 dependencies = [
    { name = "docling" },
    { name = "fastapi", extra = ["standard"] },
@@ -591,6 +591,7 @@ dependencies = [
    { name = "pydantic" },
    { name = "pydantic-settings" },
    { name = "python-multipart" },
+    { name = "typer" },
    { name = "uvicorn", extra = ["standard"] },
 ]

@@ -646,6 +647,7 @@ requires-dist = [
    { name = "torch", marker = "extra == 'cu124'", specifier = ">=2.6.0", index = "https://download.pytorch.org/whl/cu124", conflict = { package = "docling-serve", extra = "cu124" } },
    { name = "torchvision", marker = "extra == 'cpu'", specifier = ">=0.21.0", index = "https://download.pytorch.org/whl/cpu", conflict = { package = "docling-serve", extra = "cpu" } },
    { name = "torchvision", marker = "extra == 'cu124'", specifier = ">=0.21.0", index = "https://download.pytorch.org/whl/cu124", conflict = { package = "docling-serve", extra = "cu124" } },
+    { name = "typer", specifier = "~=0.12" },
    { name = "uvicorn", extras = ["standard"], specifier = ">=0.29.0,<1.0.0" },
 ]
 provides-extras = ["ui", "tesserocr", "rapidocr", "cpu", "cu124"]