9 Commits

Author SHA1 Message Date
DKL
025c4c8942 Cleanup. Text formatting. Fallback picture annotation. 2025-11-24 15:17:39 +01:00
DKL
8d5892b176 Revamp UI to SSR.
Signed-off-by: DKL <dkl@zurich.ibm.com>
2025-11-21 16:15:36 +01:00
Michele Dolfi
e437e830c9 fix: Dependencies updates – Docling 2.63.0 (#443)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-21 10:31:56 +01:00
Michele Dolfi
2c23f65507 feat: version endpoint (#442)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-11-20 17:57:10 +01:00
Burt Holzman
5dc942f25b chore: docs typo (cude -> cuda) (#437)
Signed-off-by: Burt Holzman <burt@fnal.gov>
2025-11-17 08:31:44 +01:00
github-actions[bot]
ff310f2b13 chore: bump version to 1.8.0 [skip ci] 2025-10-31 17:01:56 +00:00
Michele Dolfi
bf132a3c3e feat: Docling with new standard pipeline with threading (#428)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-10-31 17:57:38 +01:00
Michele Dolfi
35319b0da7 docs: Expand automatic docs to nested objects. More complete usage docs. (#426)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-10-31 15:02:20 +01:00
Michele Dolfi
f3957aeb57 docs: add docs for docling parameters like performance and debug (#424)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-10-31 14:17:31 +01:00
25 changed files with 6188 additions and 2361 deletions

View File

@@ -4,6 +4,7 @@ asgi
async
(?i)urls
uvicorn
Config
[Ww]ebserver
RQ
(?i)url

View File

@@ -1,3 +1,23 @@
## [v1.8.0](https://github.com/docling-project/docling-serve/releases/tag/v1.8.0) - 2025-10-31
### Feature
* Docling with new standard pipeline with threading ([#428](https://github.com/docling-project/docling-serve/issues/428)) ([`bf132a3`](https://github.com/docling-project/docling-serve/commit/bf132a3c3e615ddbe624841ea5b3a98593c00654))
### Documentation
* Expand automatic docs to nested objects. More complete usage docs. ([#426](https://github.com/docling-project/docling-serve/issues/426)) ([`35319b0`](https://github.com/docling-project/docling-serve/commit/35319b0da793a2a1a434fd2b60b7632e10ecced3))
* Add docs for docling parameters like performance and debug ([#424](https://github.com/docling-project/docling-serve/issues/424)) ([`f3957ae`](https://github.com/docling-project/docling-serve/commit/f3957aeb577097121fe9d0d21f75a50643f03369))
### Docling libraries included in this release:
- docling 2.60.0
- docling-core 2.50.0
- docling-ibm-models 3.10.2
- docling-jobkit 1.8.0
- docling-mcp 1.3.2
- docling-parse 4.7.0
- docling-serve 1.8.0
## [v1.7.2](https://github.com/docling-project/docling-serve/releases/tag/v1.7.2) - 2025-10-30
### Fix

View File

@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
def version_callback(value: bool) -> None:
if value:
docling_serve_version = importlib.metadata.version("docling_serve")
docling_serve_version = importlib.metadata.version("docling-serve")
docling_jobkit_version = importlib.metadata.version("docling-jobkit")
docling_version = importlib.metadata.version("docling")
docling_core_version = importlib.metadata.version("docling-core")
@@ -385,6 +385,11 @@ def rq_worker() -> Any:
allow_external_plugins=docling_serve_settings.allow_external_plugins,
max_num_pages=docling_serve_settings.max_num_pages,
max_file_size=docling_serve_settings.max_file_size,
queue_max_size=docling_serve_settings.queue_max_size,
ocr_batch_size=docling_serve_settings.ocr_batch_size,
layout_batch_size=docling_serve_settings.layout_batch_size,
table_batch_size=docling_serve_settings.table_batch_size,
batch_polling_interval_seconds=docling_serve_settings.batch_polling_interval_seconds,
)
run_worker(

View File

@@ -54,7 +54,7 @@ from docling_jobkit.orchestrators.base_orchestrator import (
TaskNotFoundError,
)
from docling_serve.auth import APIKeyAuth, AuthenticationResult
from docling_serve.auth import APIKeyHeaderAuth, AuthenticationResult
from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
from docling_serve.datamodel.requests import (
ConvertDocumentsRequest,
@@ -76,7 +76,7 @@ from docling_serve.datamodel.responses import (
TaskStatusResponse,
WebsocketMessage,
)
from docling_serve.helper_functions import FormDepends
from docling_serve.helper_functions import DOCLING_VERSIONS, FormDepends
from docling_serve.orchestrator_factory import get_async_orchestrator
from docling_serve.response_preparation import prepare_response
from docling_serve.settings import docling_serve_settings
@@ -167,7 +167,7 @@ def create_app(): # noqa: C901
offline_docs_assets = True
_log.info("Found static assets.")
require_auth = APIKeyAuth(docling_serve_settings.api_key)
require_auth = APIKeyHeaderAuth(docling_serve_settings.api_key)
app = FastAPI(
title="Docling Serve",
docs_url=None if offline_docs_assets else "/swagger",
@@ -188,39 +188,6 @@ def create_app(): # noqa: C901
allow_headers=headers,
)
# Mount the Gradio app
if docling_serve_settings.enable_ui:
try:
import gradio as gr
from docling_serve.gradio_ui import ui as gradio_ui
from docling_serve.settings import uvicorn_settings
tmp_output_dir = get_scratch() / "gradio"
tmp_output_dir.mkdir(exist_ok=True, parents=True)
gradio_ui.gradio_output_dir = tmp_output_dir
# Build the root_path for Gradio, accounting for UVICORN_ROOT_PATH
gradio_root_path = (
f"{uvicorn_settings.root_path}/ui"
if uvicorn_settings.root_path
else "/ui"
)
app = gr.mount_gradio_app(
app,
gradio_ui,
path="/ui",
allowed_paths=["./logo.png", tmp_output_dir],
root_path=gradio_root_path,
)
except ImportError:
_log.warning(
"Docling Serve enable_ui is activated, but gradio is not installed. "
"Install it with `pip install docling-serve[ui]` "
"or `pip install gradio`"
)
#############################
# Offline assets definition #
#############################
@@ -437,6 +404,16 @@ def create_app(): # noqa: C901
def api_check() -> HealthCheckResponse:
return HealthCheckResponse()
# Docling versions
@app.get("/version", tags=["health"])
def version_info() -> dict:
if not docling_serve_settings.show_version_info:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Forbidden. The server is configured for not showing version details.",
)
return DOCLING_VERSIONS
# Convert a document from URL(s)
@app.post(
"/v1/convert/source",
@@ -1011,4 +988,20 @@ def create_app(): # noqa: C901
await orchestrator.clear_results(older_than=older_then)
return ClearResponse()
# Optional UI
if docling_serve_settings.enable_ui:
try:
from docling_serve.ui.app import create_ui_app
ui_app = create_ui_app(
process_file_async, process_url_async, task_result, task_status_poll
)
app.mount("/ui", app=ui_app, name="ui")
except ImportError as ex:
_log.error(ex)
_log.warning(
"Docling Serve enable_ui is activated, but its dependencies are not installed."
"Install it with `uv sync --extra ui`"
)
return app

View File

@@ -1,7 +1,7 @@
from typing import Any
from fastapi import HTTPException, Request, status
from fastapi.security import APIKeyHeader
from fastapi import HTTPException, Request, Response, status
from fastapi.security import APIKeyCookie, APIKeyHeader
from pydantic import BaseModel
@@ -11,46 +11,79 @@ class AuthenticationResult(BaseModel):
detail: Any | None = None
class APIKeyAuth(APIKeyHeader):
"""
FastAPI dependency which evaluates a status API Key.
"""
class KeyValidator:
def __init__(
self,
api_key: str,
header_name: str = "X-Api-Key",
field_name: str = "X-Api-Key",
fail_on_unauthorized: bool = True,
) -> None:
self.api_key = api_key
self.header_name = header_name
super().__init__(name=self.header_name, auto_error=False)
self.field_name = field_name
self.fail_on_unauthorized = fail_on_unauthorized
async def _validate_api_key(self, header_api_key: str | None):
if header_api_key is None:
return AuthenticationResult(
valid=False, errors=[f"Missing header {self.header_name}."]
)
async def __call__(self, candidate_key: str | None):
if candidate_key is None:
return self._error(f"Missing field {self.field_name}.")
header_api_key = header_api_key.strip()
candidate_key = candidate_key.strip()
# Otherwise check the apikey
if header_api_key == self.api_key or self.api_key == "":
if candidate_key == self.api_key or self.api_key == "":
return AuthenticationResult(
valid=True,
detail=header_api_key,
detail=candidate_key, # Remove?
)
else:
return self._error("The provided API Key is invalid.")
def _error(self, error: str):
if self.fail_on_unauthorized and self.api_key:
raise HTTPException(status.HTTP_401_UNAUTHORIZED, error)
else:
return AuthenticationResult(
valid=False,
errors=["The provided API Key is invalid."],
errors=[error],
)
class APIKeyHeaderAuth(APIKeyHeader):
"""
FastAPI dependency which evaluates a status API Key in a header.
"""
def __init__(self, validator: str | KeyValidator) -> None:
self.validator = (
KeyValidator(validator) if isinstance(validator, str) else validator
)
super().__init__(name=self.validator.field_name, auto_error=False)
async def __call__(self, request: Request) -> AuthenticationResult: # type: ignore
header_api_key = await super().__call__(request=request)
result = await self._validate_api_key(header_api_key)
if self.api_key and not result.valid:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED, detail=result.detail
)
return result
key = await super().__call__(request=request)
return await self.validator(key)
class APIKeyCookieAuth(APIKeyCookie):
"""
FastAPI dependency which evaluates a status API Key in a cookie.
"""
def __init__(self, validator: str | KeyValidator) -> None:
self.validator = (
KeyValidator(validator) if isinstance(validator, str) else validator
)
super().__init__(name=self.validator.field_name, auto_error=False)
async def __call__(self, request: Request) -> AuthenticationResult: # type: ignore
api_key = await super().__call__(request=request)
return await self.validator(api_key)
def _set_api_key(self, response: Response, api_key: str, expires=24 * 3600):
response.set_cookie(
key=self.validator.field_name,
value=api_key,
expires=expires,
secure=True,
httponly=True,
samesite="strict",
)

View File

@@ -1,921 +0,0 @@
import base64
import importlib
import itertools
import json
import logging
import ssl
import sys
import tempfile
import time
from pathlib import Path
from typing import Optional
import certifi
import gradio as gr
import httpx
from docling.datamodel.base_models import FormatToExtensions
from docling.datamodel.pipeline_options import (
PdfBackend,
ProcessingPipeline,
TableFormerMode,
TableStructureOptions,
)
from docling_serve.helper_functions import _to_list_of_strings
from docling_serve.settings import docling_serve_settings, uvicorn_settings
logger = logging.getLogger(__name__)
############################
# Path of static artifacts #
############################
logo_path = "https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg"
js_components_url = "https://unpkg.com/@docling/docling-components@0.0.7"
if (
docling_serve_settings.static_path is not None
and docling_serve_settings.static_path.is_dir()
):
logo_path = str(docling_serve_settings.static_path / "logo.svg")
js_components_url = "/static/docling-components.js"
##############################
# Head JS for web components #
##############################
head = f"""
<script src="{js_components_url}" type="module"></script>
"""
#################
# CSS and theme #
#################
css = """
#logo {
border-style: none;
background: none;
box-shadow: none;
min-width: 80px;
}
#dark_mode_column {
display: flex;
align-content: flex-end;
}
#title {
text-align: left;
display:block;
height: auto;
padding-top: 5px;
line-height: 0;
}
.title-text h1 > p, .title-text p {
margin-top: 0px !important;
margin-bottom: 2px !important;
}
#custom-container {
border: 0.909091px solid;
padding: 10px;
border-radius: 4px;
}
#custom-container h4 {
font-size: 14px;
}
#file_input_zone {
height: 140px;
}
docling-img {
gap: 1rem;
}
docling-img::part(page) {
box-shadow: 0 0.5rem 1rem 0 rgba(0, 0, 0, 0.2);
}
"""
theme = gr.themes.Default(
text_size="md",
spacing_size="md",
font=[
gr.themes.GoogleFont("Red Hat Display"),
"ui-sans-serif",
"system-ui",
"sans-serif",
],
font_mono=[
gr.themes.GoogleFont("Red Hat Mono"),
"ui-monospace",
"Consolas",
"monospace",
],
)
#############
# Variables #
#############
gradio_output_dir = None # Will be set by FastAPI when mounted
file_output_path = None # Will be set when a new file is generated
#############
# Functions #
#############
def get_api_endpoint() -> str:
protocol = "http"
if uvicorn_settings.ssl_keyfile is not None:
protocol = "https"
return f"{protocol}://{docling_serve_settings.api_host}:{uvicorn_settings.port}"
def get_ssl_context() -> ssl.SSLContext:
ctx = ssl.create_default_context(cafile=certifi.where())
kube_sa_ca_cert_path = Path(
"/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"
)
if (
uvicorn_settings.ssl_keyfile is not None
and ".svc." in docling_serve_settings.api_host
and kube_sa_ca_cert_path.exists()
):
ctx.load_verify_locations(cafile=kube_sa_ca_cert_path)
return ctx
def health_check():
response = httpx.get(f"{get_api_endpoint()}/health")
if response.status_code == 200:
return "Healthy"
return "Unhealthy"
def set_options_visibility(x):
return gr.Accordion("Options", open=x)
def set_outputs_visibility_direct(x, y):
content = gr.Row(visible=x)
file = gr.Row(visible=y)
return content, file
def set_task_id_visibility(x):
task_id_row = gr.Row(visible=x)
return task_id_row
def set_outputs_visibility_process(x):
content = gr.Row(visible=not x)
file = gr.Row(visible=x)
return content, file
def set_download_button_label(label_text: gr.State):
return gr.DownloadButton(label=str(label_text), scale=1)
def clear_outputs():
task_id_rendered = ""
markdown_content = ""
json_content = ""
json_rendered_content = ""
html_content = ""
text_content = ""
doctags_content = ""
return (
task_id_rendered,
markdown_content,
markdown_content,
json_content,
json_rendered_content,
html_content,
html_content,
text_content,
doctags_content,
)
def clear_url_input():
return ""
def clear_file_input():
return None
def auto_set_return_as_file(
url_input_value: str,
file_input_value: Optional[list[str]],
image_export_mode_value: str,
):
# If more than one input source is provided, return as file
if (
(len(url_input_value.split(",")) > 1)
or (file_input_value and len(file_input_value) > 1)
or (image_export_mode_value == "referenced")
):
return True
else:
return False
def change_ocr_lang(ocr_engine):
if ocr_engine == "easyocr":
return gr.update(visible=True, value="en,fr,de,es")
elif ocr_engine == "tesseract_cli":
return gr.update(visible=True, value="eng,fra,deu,spa")
elif ocr_engine == "tesseract":
return gr.update(visible=True, value="eng,fra,deu,spa")
elif ocr_engine == "rapidocr":
return gr.update(visible=True, value="english,chinese")
elif ocr_engine == "ocrmac":
return gr.update(visible=True, value="fr-FR,de-DE,es-ES,en-US")
return gr.update(visible=False, value="")
def wait_task_finish(auth: str, task_id: str, return_as_file: bool):
conversion_sucess = False
task_finished = False
task_status = ""
headers = {}
if docling_serve_settings.api_key:
headers["X-Api-Key"] = str(auth)
ssl_ctx = get_ssl_context()
while not task_finished:
try:
response = httpx.get(
f"{get_api_endpoint()}/v1/status/poll/{task_id}?wait=5",
headers=headers,
verify=ssl_ctx,
timeout=15,
)
task_status = response.json()["task_status"]
if task_status == "success":
conversion_sucess = True
task_finished = True
if task_status in ("failure", "revoked"):
conversion_sucess = False
task_finished = True
raise RuntimeError(f"Task failed with status {task_status!r}")
time.sleep(5)
except Exception as e:
logger.error(f"Error processing file(s): {e}")
conversion_sucess = False
task_finished = True
raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
if conversion_sucess:
try:
response = httpx.get(
f"{get_api_endpoint()}/v1/result/{task_id}",
headers=headers,
timeout=15,
verify=ssl_ctx,
)
output = response_to_output(response, return_as_file)
return output
except Exception as e:
logger.error(f"Error getting task result: {e}")
raise gr.Error(
f"Error getting task result, conversion finished with status: {task_status}"
)
def process_url(
auth,
input_sources,
to_formats,
image_export_mode,
pipeline,
ocr,
force_ocr,
ocr_engine,
ocr_lang,
pdf_backend,
table_mode,
abort_on_error,
return_as_file,
do_code_enrichment,
do_formula_enrichment,
do_picture_classification,
do_picture_description,
):
target = {"kind": "zip" if return_as_file else "inbody"}
parameters = {
"sources": [
{"kind": "http", "url": source} for source in input_sources.split(",")
],
"options": {
"to_formats": to_formats,
"image_export_mode": image_export_mode,
"pipeline": pipeline,
"ocr": ocr,
"force_ocr": force_ocr,
"ocr_engine": ocr_engine,
"ocr_lang": _to_list_of_strings(ocr_lang),
"pdf_backend": pdf_backend,
"table_mode": table_mode,
"abort_on_error": abort_on_error,
"do_code_enrichment": do_code_enrichment,
"do_formula_enrichment": do_formula_enrichment,
"do_picture_classification": do_picture_classification,
"do_picture_description": do_picture_description,
},
"target": target,
}
if (
not parameters["sources"]
or len(parameters["sources"]) == 0
or parameters["sources"][0]["url"] == ""
):
logger.error("No input sources provided.")
raise gr.Error("No input sources provided.", print_exception=False)
headers = {}
if docling_serve_settings.api_key:
headers["X-Api-Key"] = str(auth)
print(f"{headers=}")
try:
ssl_ctx = get_ssl_context()
response = httpx.post(
f"{get_api_endpoint()}/v1/convert/source/async",
json=parameters,
headers=headers,
verify=ssl_ctx,
timeout=60,
)
except Exception as e:
logger.error(f"Error processing URL: {e}")
raise gr.Error(f"Error processing URL: {e}", print_exception=False)
if response.status_code != 200:
data = response.json()
error_message = data.get("detail", "An unknown error occurred.")
logger.error(f"Error processing file: {error_message}")
raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
task_id_rendered = response.json()["task_id"]
return task_id_rendered
def file_to_base64(file):
with open(file.name, "rb") as f:
encoded_string = base64.b64encode(f.read()).decode("utf-8")
return encoded_string
def process_file(
auth,
files,
to_formats,
image_export_mode,
pipeline,
ocr,
force_ocr,
ocr_engine,
ocr_lang,
pdf_backend,
table_mode,
abort_on_error,
return_as_file,
do_code_enrichment,
do_formula_enrichment,
do_picture_classification,
do_picture_description,
):
if not files or len(files) == 0:
logger.error("No files provided.")
raise gr.Error("No files provided.", print_exception=False)
files_data = [
{"kind": "file", "base64_string": file_to_base64(file), "filename": file.name}
for file in files
]
target = {"kind": "zip" if return_as_file else "inbody"}
parameters = {
"sources": files_data,
"options": {
"to_formats": to_formats,
"image_export_mode": image_export_mode,
"pipeline": pipeline,
"ocr": ocr,
"force_ocr": force_ocr,
"ocr_engine": ocr_engine,
"ocr_lang": _to_list_of_strings(ocr_lang),
"pdf_backend": pdf_backend,
"table_mode": table_mode,
"abort_on_error": abort_on_error,
"return_as_file": return_as_file,
"do_code_enrichment": do_code_enrichment,
"do_formula_enrichment": do_formula_enrichment,
"do_picture_classification": do_picture_classification,
"do_picture_description": do_picture_description,
},
"target": target,
}
headers = {}
if docling_serve_settings.api_key:
headers["X-Api-Key"] = str(auth)
try:
ssl_ctx = get_ssl_context()
response = httpx.post(
f"{get_api_endpoint()}/v1/convert/source/async",
json=parameters,
headers=headers,
verify=ssl_ctx,
timeout=60,
)
except Exception as e:
logger.error(f"Error processing file(s): {e}")
raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
if response.status_code != 200:
data = response.json()
error_message = data.get("detail", "An unknown error occurred.")
logger.error(f"Error processing file: {error_message}")
raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
task_id_rendered = response.json()["task_id"]
return task_id_rendered
def response_to_output(response, return_as_file):
markdown_content = ""
json_content = ""
json_rendered_content = ""
html_content = ""
text_content = ""
doctags_content = ""
download_button = gr.DownloadButton(visible=False, label="Download Output", scale=1)
if return_as_file:
filename = (
response.headers.get("Content-Disposition").split("filename=")[1].strip('"')
)
tmp_output_dir = Path(tempfile.mkdtemp(dir=gradio_output_dir, prefix="ui_"))
file_output_path = f"{tmp_output_dir}/{filename}"
# logger.info(f"Saving file to: {file_output_path}")
with open(file_output_path, "wb") as f:
f.write(response.content)
download_button = gr.DownloadButton(
visible=True, label=f"Download {filename}", scale=1, value=file_output_path
)
else:
full_content = response.json()
markdown_content = full_content.get("document").get("md_content")
json_content = json.dumps(
full_content.get("document").get("json_content"), indent=2
)
# Embed document JSON and trigger load at client via an image.
json_rendered_content = f"""
<docling-img id="dclimg" pagenumbers><docling-tooltip></docling-tooltip></docling-img>
<script id="dcljson" type="application/json" onload="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);">{json_content}</script>
<img src onerror="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);" />
"""
html_content = full_content.get("document").get("html_content")
text_content = full_content.get("document").get("text_content")
doctags_content = full_content.get("document").get("doctags_content")
return (
markdown_content,
markdown_content,
json_content,
json_rendered_content,
html_content,
html_content,
text_content,
doctags_content,
download_button,
)
############
# UI Setup #
############
with gr.Blocks(
head=head,
css=css,
theme=theme,
title="Docling Serve",
delete_cache=(3600, 36000), # Delete all files older than 10 hour every hour
) as ui:
# Constants stored in states to be able to pass them as inputs to functions
processing_text = gr.State("Processing your document(s), please wait...")
true_bool = gr.State(True)
false_bool = gr.State(False)
# Banner
with gr.Row(elem_id="check_health"):
# Logo
with gr.Column(scale=1, min_width=90):
try:
gr.Image(
logo_path,
height=80,
width=80,
show_download_button=False,
show_label=False,
show_fullscreen_button=False,
container=False,
elem_id="logo",
scale=0,
)
except Exception:
logger.warning("Logo not found.")
# Title
with gr.Column(scale=1, min_width=200):
gr.Markdown(
f"# Docling Serve \n(docling version: "
f"{importlib.metadata.version('docling')})",
elem_id="title",
elem_classes=["title-text"],
)
# Dark mode button
with gr.Column(scale=16, elem_id="dark_mode_column"):
dark_mode_btn = gr.Button("Dark/Light Mode", scale=0)
dark_mode_btn.click(
None,
None,
None,
js="""() => {
if (document.querySelectorAll('.dark').length) {
document.querySelectorAll('.dark').forEach(
el => el.classList.remove('dark')
);
} else {
document.querySelector('body').classList.add('dark');
}
}""",
show_api=False,
)
# URL Processing Tab
with gr.Tab("Convert URL"):
with gr.Row():
with gr.Column(scale=4):
url_input = gr.Textbox(
label="URL Input Source",
placeholder="https://arxiv.org/pdf/2501.17887",
)
with gr.Column(scale=1):
url_process_btn = gr.Button("Process URL", scale=1)
url_reset_btn = gr.Button("Reset", scale=1)
# File Processing Tab
with gr.Tab("Convert File"):
with gr.Row():
with gr.Column(scale=4):
raw_exts = itertools.chain.from_iterable(FormatToExtensions.values())
file_input = gr.File(
elem_id="file_input_zone",
label="Upload File",
file_types=[
f".{v.lower()}"
for v in raw_exts # lowercase
]
+ [
f".{v.upper()}"
for v in raw_exts # uppercase
],
file_count="multiple",
scale=4,
)
with gr.Column(scale=1):
file_process_btn = gr.Button("Process File", scale=1)
file_reset_btn = gr.Button("Reset", scale=1)
# Auth
with gr.Row(visible=bool(docling_serve_settings.api_key)):
with gr.Column():
auth = gr.Textbox(
label="Authentication",
placeholder="API Key",
type="password",
)
# Options
with gr.Accordion("Options") as options:
with gr.Row():
with gr.Column(scale=1):
to_formats = gr.CheckboxGroup(
[
("Docling (JSON)", "json"),
("Markdown", "md"),
("HTML", "html"),
("Plain Text", "text"),
("Doc Tags", "doctags"),
],
label="To Formats",
value=["json", "md"],
)
with gr.Column(scale=1):
image_export_mode = gr.Radio(
[
("Embedded", "embedded"),
("Placeholder", "placeholder"),
("Referenced", "referenced"),
],
label="Image Export Mode",
value="embedded",
)
with gr.Row():
with gr.Column(scale=1, min_width=200):
pipeline = gr.Radio(
[(v.value.capitalize(), v.value) for v in ProcessingPipeline],
label="Pipeline type",
value=ProcessingPipeline.STANDARD.value,
)
with gr.Row():
with gr.Column(scale=1, min_width=200):
ocr = gr.Checkbox(label="Enable OCR", value=True)
force_ocr = gr.Checkbox(label="Force OCR", value=False)
with gr.Column(scale=1):
engines_list = [
("Auto", "auto"),
("EasyOCR", "easyocr"),
("Tesseract", "tesseract"),
("RapidOCR", "rapidocr"),
]
if sys.platform == "darwin":
engines_list.append(("OCRMac", "ocrmac"))
ocr_engine = gr.Radio(
engines_list,
label="OCR Engine",
value="auto",
)
with gr.Column(scale=1, min_width=200):
ocr_lang = gr.Textbox(
label="OCR Language (beware of the format)",
value="en,fr,de,es",
visible=False,
)
ocr_engine.change(change_ocr_lang, inputs=[ocr_engine], outputs=[ocr_lang])
with gr.Row():
with gr.Column(scale=4):
pdf_backend = gr.Radio(
[v.value for v in PdfBackend],
label="PDF Backend",
value=PdfBackend.DLPARSE_V4.value,
)
with gr.Column(scale=2):
table_mode = gr.Radio(
[(v.value.capitalize(), v.value) for v in TableFormerMode],
label="Table Mode",
value=TableStructureOptions().mode.value,
)
with gr.Column(scale=1):
abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
return_as_file = gr.Checkbox(label="Return as File", value=False)
with gr.Row():
with gr.Column():
do_code_enrichment = gr.Checkbox(
label="Enable code enrichment", value=False
)
do_formula_enrichment = gr.Checkbox(
label="Enable formula enrichment", value=False
)
with gr.Column():
do_picture_classification = gr.Checkbox(
label="Enable picture classification", value=False
)
do_picture_description = gr.Checkbox(
label="Enable picture description", value=False
)
# Task id output
with gr.Row(visible=False) as task_id_output:
task_id_rendered = gr.Textbox(label="Task id", interactive=False)
# Document output
with gr.Row(visible=False) as content_output:
with gr.Tab("Docling (JSON)"):
output_json = gr.Code(language="json", wrap_lines=True, show_label=False)
with gr.Tab("Docling-Rendered"):
output_json_rendered = gr.HTML(label="Response")
with gr.Tab("Markdown"):
output_markdown = gr.Code(
language="markdown", wrap_lines=True, show_label=False
)
with gr.Tab("Markdown-Rendered"):
output_markdown_rendered = gr.Markdown(label="Response")
with gr.Tab("HTML"):
output_html = gr.Code(language="html", wrap_lines=True, show_label=False)
with gr.Tab("HTML-Rendered"):
output_html_rendered = gr.HTML(label="Response")
with gr.Tab("Text"):
output_text = gr.Code(wrap_lines=True, show_label=False)
with gr.Tab("DocTags"):
output_doctags = gr.Code(wrap_lines=True, show_label=False)
# File download output
with gr.Row(visible=False) as file_output:
download_file_btn = gr.DownloadButton(label="Placeholder", scale=1)
##############
# UI Actions #
##############
# Handle Return as File
url_input.change(
auto_set_return_as_file,
inputs=[url_input, file_input, image_export_mode],
outputs=[return_as_file],
)
file_input.change(
auto_set_return_as_file,
inputs=[url_input, file_input, image_export_mode],
outputs=[return_as_file],
)
image_export_mode.change(
auto_set_return_as_file,
inputs=[url_input, file_input, image_export_mode],
outputs=[return_as_file],
)
# URL processing
url_process_btn.click(
set_options_visibility, inputs=[false_bool], outputs=[options]
).then(
set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
).then(
clear_outputs,
inputs=None,
outputs=[
task_id_rendered,
output_markdown,
output_markdown_rendered,
output_json,
output_json_rendered,
output_html,
output_html_rendered,
output_text,
output_doctags,
],
).then(
set_task_id_visibility,
inputs=[true_bool],
outputs=[task_id_output],
).then(
process_url,
inputs=[
auth,
url_input,
to_formats,
image_export_mode,
pipeline,
ocr,
force_ocr,
ocr_engine,
ocr_lang,
pdf_backend,
table_mode,
abort_on_error,
return_as_file,
do_code_enrichment,
do_formula_enrichment,
do_picture_classification,
do_picture_description,
],
outputs=[
task_id_rendered,
],
).then(
set_outputs_visibility_process,
inputs=[return_as_file],
outputs=[content_output, file_output],
).then(
wait_task_finish,
inputs=[auth, task_id_rendered, return_as_file],
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_json_rendered,
output_html,
output_html_rendered,
output_text,
output_doctags,
download_file_btn,
],
)
url_reset_btn.click(
clear_outputs,
inputs=None,
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_json_rendered,
output_html,
output_html_rendered,
output_text,
output_doctags,
],
).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then(
set_outputs_visibility_direct,
inputs=[false_bool, false_bool],
outputs=[content_output, file_output],
).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
clear_url_input, inputs=None, outputs=[url_input]
)
# File processing
file_process_btn.click(
set_options_visibility, inputs=[false_bool], outputs=[options]
).then(
set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
).then(
clear_outputs,
inputs=None,
outputs=[
task_id_rendered,
output_markdown,
output_markdown_rendered,
output_json,
output_json_rendered,
output_html,
output_html_rendered,
output_text,
output_doctags,
],
).then(
set_task_id_visibility,
inputs=[true_bool],
outputs=[task_id_output],
).then(
process_file,
inputs=[
auth,
file_input,
to_formats,
image_export_mode,
pipeline,
ocr,
force_ocr,
ocr_engine,
ocr_lang,
pdf_backend,
table_mode,
abort_on_error,
return_as_file,
do_code_enrichment,
do_formula_enrichment,
do_picture_classification,
do_picture_description,
],
outputs=[
task_id_rendered,
],
).then(
set_outputs_visibility_process,
inputs=[return_as_file],
outputs=[content_output, file_output],
).then(
wait_task_finish,
inputs=[auth, task_id_rendered, return_as_file],
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_json_rendered,
output_html,
output_html_rendered,
output_text,
output_doctags,
download_file_btn,
],
)
file_reset_btn.click(
clear_outputs,
inputs=None,
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_json_rendered,
output_html,
output_html_rendered,
output_text,
output_doctags,
],
).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then(
set_outputs_visibility_direct,
inputs=[false_bool, false_bool],
outputs=[content_output, file_output],
).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
clear_file_input, inputs=None, outputs=[file_input]
)

View File

@@ -1,11 +1,25 @@
import importlib.metadata
import inspect
import json
import platform
import re
import sys
from typing import Union, get_args, get_origin
from fastapi import Depends, Form
from pydantic import BaseModel, TypeAdapter
DOCLING_VERSIONS = {
"docling-serve": importlib.metadata.version("docling-serve"),
"docling-jobkit": importlib.metadata.version("docling-jobkit"),
"docling": importlib.metadata.version("docling"),
"docling-core": importlib.metadata.version("docling-core"),
"docling-ibm-models": importlib.metadata.version("docling-ibm-models"),
"docling-parse": importlib.metadata.version("docling-parse"),
"python": f"{sys.implementation.cache_tag} ({platform.python_version()})",
"plaform": platform.platform(),
}
def is_pydantic_model(type_):
try:

View File

@@ -288,6 +288,11 @@ def get_async_orchestrator() -> BaseOrchestrator:
allow_external_plugins=docling_serve_settings.allow_external_plugins,
max_num_pages=docling_serve_settings.max_num_pages,
max_file_size=docling_serve_settings.max_file_size,
queue_max_size=docling_serve_settings.queue_max_size,
ocr_batch_size=docling_serve_settings.ocr_batch_size,
layout_batch_size=docling_serve_settings.layout_batch_size,
table_batch_size=docling_serve_settings.table_batch_size,
batch_polling_interval_seconds=docling_serve_settings.batch_polling_interval_seconds,
)
cm = DoclingConverterManager(config=cm_config)

View File

@@ -50,6 +50,7 @@ class DoclingServeSettings(BaseSettings):
options_cache_size: int = 2
enable_remote_services: bool = False
allow_external_plugins: bool = False
show_version_info: bool = True
api_key: str = ""
@@ -57,6 +58,13 @@ class DoclingServeSettings(BaseSettings):
max_num_pages: int = sys.maxsize
max_file_size: int = sys.maxsize
# Threading pipeline
queue_max_size: Optional[int] = None
ocr_batch_size: Optional[int] = None
layout_batch_size: Optional[int] = None
table_batch_size: Optional[int] = None
batch_polling_interval_seconds: Optional[float] = None
sync_poll_interval: int = 2 # seconds
max_sync_wait: int = 120 # 2 minutes

View File

278
docling_serve/ui/app.py Normal file
View File

@@ -0,0 +1,278 @@
import io
import logging
from pathlib import Path
from typing import Annotated
from fastapi import (
BackgroundTasks,
Depends,
FastAPI,
Form,
HTTPException,
Request,
UploadFile,
status,
)
from fastapi.responses import HTMLResponse, RedirectResponse, Response
from fastapi.staticfiles import StaticFiles
from pydantic import AnyHttpUrl
from pyjsx import auto_setup # type: ignore
from starlette.exceptions import HTTPException as StarletteHTTPException
from docling.datamodel.base_models import OutputFormat
from docling_core.types.doc.document import (
FloatingItem,
PageItem,
RefItem,
)
from docling_jobkit.orchestrators.base_orchestrator import (
BaseOrchestrator,
)
from docling_serve.auth import APIKeyCookieAuth, AuthenticationResult
from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
from docling_serve.datamodel.requests import ConvertDocumentsRequest, HttpSourceRequest
from docling_serve.helper_functions import FormDepends
from docling_serve.orchestrator_factory import get_async_orchestrator
from docling_serve.settings import docling_serve_settings
from .convert import ConvertPage # type: ignore
from .pages import AuthPage, StatusPage, TaskPage, TasksPage # type: ignore
# Initialize JSX.
auto_setup
_log = logging.getLogger(__name__)
# TODO: Isolate passed functions into a controller?
def create_ui_app(process_file, process_url, task_result, task_status_poll) -> FastAPI: # noqa: C901
ui_app = FastAPI()
require_auth = APIKeyCookieAuth(docling_serve_settings.api_key)
# Static files.
ui_app.mount(
"/static",
StaticFiles(directory=Path(__file__).parent.absolute() / "static"),
name="static",
)
# Convert page.
@ui_app.get("/")
async def get_root():
return RedirectResponse(url="convert")
@ui_app.get("/convert", response_class=HTMLResponse)
async def get_convert(
auth: Annotated[AuthenticationResult, Depends(require_auth)],
):
return str(ConvertPage())
@ui_app.post("/convert", response_class=HTMLResponse)
async def post_convert(
auth: Annotated[AuthenticationResult, Depends(require_auth)],
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
background_tasks: BackgroundTasks,
options: Annotated[
ConvertDocumentsRequestOptions, FormDepends(ConvertDocumentsRequestOptions)
],
files: Annotated[list[UploadFile], Form()],
url: Annotated[str, Form()],
page_min: Annotated[str, Form()],
page_max: Annotated[str, Form()],
):
# Refined model options and behavior.
if len(page_min) > 0:
options.page_range = (int(page_min), options.page_range[1])
if len(page_max) > 0:
options.page_range = (options.page_range[0], int(page_max))
options.ocr_lang = [
sub_lang.strip()
for lang in options.ocr_lang or []
for sub_lang in lang.split(",")
if len(sub_lang.strip()) > 0
]
files = [f for f in files if f.size]
if len(files) > 0:
# Directly uploaded documents.
response = await process_file(
auth=auth,
orchestrator=orchestrator,
background_tasks=background_tasks,
files=files,
options=options,
)
elif len(url.strip()) > 0:
# URLs of documents.
source = HttpSourceRequest(url=AnyHttpUrl(url))
request = ConvertDocumentsRequest(options=options, sources=[source])
response = await process_url(
auth=auth,
orchestrator=orchestrator,
conversion_request=request,
)
else:
validation = {
"files": "Upload files or enter a URL",
"url": "Enter a URL or upload files",
}
return str(ConvertPage(options=options, validation=validation))
return RedirectResponse(f"tasks/{response.task_id}/", status.HTTP_303_SEE_OTHER)
# Task overview page.
@ui_app.get("/tasks/", response_class=HTMLResponse)
async def get_tasks(
auth: Annotated[AuthenticationResult, Depends(require_auth)],
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
):
tasks = sorted(orchestrator.tasks.values(), key=lambda t: t.created_at)
return str(TasksPage(tasks))
# Task specific page.
@ui_app.get("/tasks/{task_id}/", response_class=HTMLResponse)
async def get_task(
auth: Annotated[AuthenticationResult, Depends(require_auth)],
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
background_tasks: BackgroundTasks,
task_id: str,
):
poll = await task_status_poll(auth, orchestrator, task_id)
result = None
if poll.task_status in ["success", "failure"]:
try:
result = await task_result(
auth, orchestrator, background_tasks, task_id
)
except Exception as ex:
logging.error(ex)
return str(TaskPage(poll, result))
# Poll task via HTTP status.
@ui_app.get("/tasks/{task_id}/poll", response_class=Response)
async def poll_task(
auth: Annotated[AuthenticationResult, Depends(require_auth)],
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
task_id: str,
):
poll = await task_status_poll(auth, orchestrator, task_id)
return Response(
status_code=status.HTTP_202_ACCEPTED
if poll.task_status == "started"
else status.HTTP_200_OK
)
# Download the contents of zipped documents.
@ui_app.get("/tasks/{task_id}/documents.zip")
async def get_task_zip(
auth: Annotated[AuthenticationResult, Depends(require_auth)],
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
background_tasks: BackgroundTasks,
task_id: str,
):
return await task_result(auth, orchestrator, background_tasks, task_id)
# Get the output of a task, as a converted document in a specific format.
@ui_app.get("/tasks/{task_id}/document.{format}")
async def get_task_document_format(
auth: Annotated[AuthenticationResult, Depends(require_auth)],
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
background_tasks: BackgroundTasks,
task_id: str,
format: str,
):
if format not in [f.value for f in OutputFormat]:
raise HTTPException(status.HTTP_404_NOT_FOUND, "Output format not found.")
else:
response = await task_result(auth, orchestrator, background_tasks, task_id)
# TODO: Make this compatible with base_models FormatToMimeType?
mimes = {
"html": "text/html",
"md": "text/markdown",
"json": "application/json",
}
content = (
response.document.json_content.export_to_dict()
if format == OutputFormat.JSON
else response.document.dict()[f"{format}_content"]
)
return Response(
content=str(content),
media_type=mimes.get(format, "text/plain"),
)
@ui_app.get("/tasks/{task_id}/document/{cref:path}")
async def get_task_document_item(
request: Request,
auth: Annotated[AuthenticationResult, Depends(require_auth)],
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
background_tasks: BackgroundTasks,
task_id: str,
cref: str,
):
response = await task_result(auth, orchestrator, background_tasks, task_id)
doc = response.document.json_content
item = RefItem(cref=f"#/{cref}").resolve(doc) # type: ignore
if "image/*" in (request.headers.get("Accept") or "") and isinstance(
item, FloatingItem | PageItem
):
content = io.BytesIO()
if (
isinstance(item, PageItem)
and (img_ref := item.image)
and img_ref.pil_image
):
img_ref.pil_image.save(content, format="PNG")
elif isinstance(item, FloatingItem) and (img := item.get_image(doc)):
img.save(content, format="PNG")
return Response(content=content.getvalue(), media_type="image/png")
else:
return item
# Page not found; catch all.
@ui_app.api_route("/{path_name:path}")
def no_page(
auth: Annotated[AuthenticationResult, Depends(require_auth)],
):
raise HTTPException(status.HTTP_404_NOT_FOUND, "Page not found.")
# Exception and auth pages.
@ui_app.exception_handler(StarletteHTTPException)
@ui_app.exception_handler(Exception)
async def exception_page(request: Request, ex: Exception):
if not isinstance(ex, StarletteHTTPException):
# Internal error.
ex = HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR)
if request.method == "POST":
# Authorization required -> API key dialog.
form = await request.form()
form_api_key = form.get("api_key")
if isinstance(form_api_key, str):
response = RedirectResponse(request.url, status.HTTP_303_SEE_OTHER)
require_auth._set_api_key(response, form_api_key)
return response
if ex.status_code == status.HTTP_401_UNAUTHORIZED:
return HTMLResponse(str(AuthPage()), status.HTTP_401_UNAUTHORIZED)
# HTTP exception page; avoid referer loop.
referer = request.headers.get("Referer")
if referer == request.url:
referer = None
return HTMLResponse(str(StatusPage(ex, referer)), ex.status_code)
return ui_app

251
docling_serve/ui/convert.px Normal file
View File

@@ -0,0 +1,251 @@
import json
import sys
from pyjsx import jsx, JSX
from docling.datamodel.base_models import FormatToExtensions, OutputFormat
from docling.datamodel.pipeline_options import PdfBackend, ProcessingPipeline, TableFormerMode
from docling_core.types.doc import ImageRefMode
from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions, ocr_engines_enum
from .forms import EnumCheckboxes, EnumRadios, EnumSelect, ocr_engine_languages, ValidatedInput
from .pages import Header, Page
base_convert_options = ConvertDocumentsRequestOptions()
base_convert_options.to_formats.append(OutputFormat.JSON)
def ConvertPage(
options: ConvertDocumentsRequestOptions = base_convert_options,
validation: None | dict[str, str] = None
) -> JSX:
file_accept = ",".join([f".{ext}" for exts in FormatToExtensions.values() for ext in exts])
return (
<Page title="Convert">
<main class="container">
<Header />
<form class="convert" method="post" enctype="multipart/form-data">
<legend>
<b>Documents</b>
</legend>
<fieldset class="grid">
<ValidatedInput
name="files"
type="file"
multiple
accept={file_accept}
validation={validation}
/>
<ValidatedInput
name="url"
placeholder="or enter a URL: https://arxiv.org/pdf/2501.17887"
validation={validation}
/>
</fieldset>
<fieldset class="grid">
<EnumSelect
enum={ProcessingPipeline}
selected={options.pipeline}
name="pipeline"
title="Pipeline"
/>
<EnumSelect
enum={PdfBackend}
selected={options.pdf_backend}
name="pdf_backend"
title="PDF Backend"
/>
<div>
<label>Pages</label>
<div role="group">
<input
type="number"
name="page_min"
min={1}
step={1}
placeholder="1"
value={None if options.page_range[0] <= 1 else options.page_range[0]}
/>
<input
type="number"
name="page_max"
min={1}
step={1}
placeholder="max."
value={None if options.page_range[1] >= sys.maxsize else options.page_range[1]}
/>
</div>
</div>
<div>
<label>Timeout<small>in seconds</small></label>
<input
type="number"
name="document_timeout"
min={1}
step={1}
value={int(options.document_timeout)}
/>
</div>
</fieldset>
<div class="grid">
<EnumCheckboxes
enum={OutputFormat}
selected={options.to_formats}
name="to_formats"
title={<b>Output</b>}
/>
<div>
<fieldset>
<label>
<input
type="checkbox"
name="do_ocr"
checked={options.do_ocr}
/>
<b>OCR</b>
</label>
<label display-when="do_ocr">
<input
type="checkbox"
name="do_code_enrichment"
checked={options.do_code_enrichment}
/>
Code
</label>
<label display-when="do_ocr">
<input
type="checkbox"
name="do_formula_enrichment"
checked={options.do_formula_enrichment}
/>
Formulas
</label>
</fieldset>
<EnumSelect
display-when="do_ocr"
enum={ocr_engines_enum}
selected={options.ocr_engine}
name="ocr_engine"
title="Engine"
/>
<label display-when="do_ocr">Language</label>
<input
display-when="do_ocr"
name="ocr_lang"
dep-on="ocr_engine"
dep-values={json.dumps(ocr_engine_languages)}
pattern="[\w+]*[,\w+]*"
title="A comma separated list of language codes, of which the format depends on the selected engine."
/>
<label display-when="do_ocr">
<input
type="checkbox"
name="force_ocr"
checked={options.force_ocr}
/>
Force
</label>
</div>
<div>
<fieldset>
<label>
<input
type="checkbox"
name="include_images"
checked={options.include_images}
/>
<b>Images</b>
</label>
<label display-when="include_images">
<input
type="checkbox"
name="do_picture_classification"
checked={options.do_picture_classification}
/>
Classification
</label>
<label display-when="include_images">
<input
type="checkbox"
name="do_picture_description"
checked={options.do_picture_description}
/>
Description
</label>
<label display-when="include_images,do_picture_description">Area threshold</label>
<input
display-when="include_images,do_picture_description"
type="number"
name="picture_description_area_threshold"
min={0}
max={1}
step={0.01}
value={options.picture_description_area_threshold}
/>
</fieldset>
<EnumSelect
display-when="include_images"
enum={ImageRefMode}
selected={options.image_export_mode}
name="image_export_mode"
title="Export"
/>
<label display-when="include_images">Scale</label>
<input
display-when="include_images"
type="number"
name="images_scale"
min={0}
step={0.1}
value={options.images_scale}
/>
</div>
<div>
<fieldset>
<label>
<input
type="checkbox"
name="do_table_structure"
checked={options.do_table_structure}
/>
<b>Tables</b>
</label>
<label display-when="do_table_structure">
<input
type="checkbox"
name="table_cell_matching"
checked={options.table_cell_matching}
/>
Cell matching
</label>
</fieldset>
<EnumSelect
display-when="do_table_structure"
enum={TableFormerMode}
selected={options.table_mode}
name="table_mode"
title="Mode"
/>
</div>
</div>
<div class="sticky-footer">
<input type="submit" value="Convert" />
</div>
</form>
</main>
</Page>
)

127
docling_serve/ui/forms.px Normal file
View File

@@ -0,0 +1,127 @@
from enum import Enum
from typing import Type
from pyjsx import jsx, JSX
from docling.datamodel.pipeline_options import OcrOptions
from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
ocr_engine_languages = {
SubOptions.kind: ",".join(SubOptions().lang)
for SubOptions in OcrOptions.__subclasses__()
}
def _format_label(label: str) -> str:
return label.replace("_", " ").lower()
def option_example(field_name: str) -> str | None:
field = ConvertDocumentsRequestOptions.model_fields[field_name]
return (field.examples or [])[0]
def ValidatedInput(validation: None | dict[str, str], name: str, **kwargs) -> JSX:
if validation:
invalid = "true" if name in validation else "false"
content = [<input name={name} aria-invalid={invalid} {...kwargs} />]
if name in validation:
content.append(<small>{validation[name]}</small>)
return <div>{content}</div>
else:
return <input name={name} {...kwargs} />
def EnumCheckboxes(
children,
enum: Type[Enum],
selected: list[Enum],
name: str,
title: JSX = None,
**kwargs
) -> JSX:
return (
<fieldset {...kwargs}>
{
<legend>{title}</legend>
if title
else None
}
{[
<label>
<input
type="checkbox"
name={name}
value={e.value}
checked={e.value in selected}
/>
{_format_label(e.name)}
</label>
for e in enum
]}
</fieldset>
)
def EnumRadios(
children,
enum: Type[Enum],
selected: Enum,
name: str,
title: JSX = None,
**kwargs
) -> JSX:
return (
<fieldset {...kwargs}>
{
<legend>{title}</legend>
if title
else None
}
{[
<label>
<input
type="radio"
name={name}
value={e.value}
checked={e.value == selected}
/>
{_format_label(e.name)}
</label>
for e in enum
]}
</fieldset>
)
def EnumSelect(
children,
enum: Type[Enum],
selected: Enum,
name: str,
title: JSX = None,
**kwargs
) -> JSX:
return (
<div {...kwargs}>
{
<label>{title}</label>
if title
else None
}
<select name={name}>
{[
<option value={e.value} selected={e.value == selected}>
{_format_label(e.name)}
</option>
for e in enum
]}
</select>
</div>
)

220
docling_serve/ui/pages.px Normal file
View File

@@ -0,0 +1,220 @@
from importlib import metadata
from fastapi import FastAPI, HTTPException, Response
from pyjsx import jsx, JSX
from docling.datamodel.base_models import OutputFormat
from docling.datamodel.pipeline_options import PdfBackend, ProcessingPipeline, TableFormerMode
from docling_jobkit.datamodel.task import Task
from docling_serve.datamodel.responses import ConvertDocumentResponse
from .preview import DocPreview
def Header(children, classname: str = "") -> JSX:
return (
<header class={classname}>
<span class="title">
D<img src="/ui/static/logo.svg" />CLING SERVE
</span>
<span class="version" title="Docling version">
{metadata.version('docling')}
</span>
<nav>
<ul>
<li><a href="/ui/convert">Convert</a></li>
<li><a href="/ui/tasks/">Tasks</a></li>
</ul>
</nav>
</header>
)
def Page(children, title: str, poll: bool = False) -> JSX:
return (
<html lang="en" id="root">
<head>
<title>{title}</title>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link rel="stylesheet" href="/ui/static/style.css" />
<script src="/ui/static/main.js" />
</head>
<body onload={'setInterval(async () => { if ((await fetch("poll")).status == 200) location.reload(); }, 3000)' if poll else None}>
{children}
</body>
</html>
)
def AuthPage():
return (
<Page title="Authenticate">
<form method="post">
<dialog open>
<article>
<header>
<h4>Authenticate</h4>
</header>
<input
type="password"
name="api_key"
placeholder="Enter API key"
required autofocus
/>
<footer>
<input type="submit" value="Confirm" />
</footer>
</article>
</dialog>
</form>
</Page>
)
def TasksPage(tasks: list[Task]) -> JSX:
return (
<Page title="Tasks">
<main class="container">
<Header />
{(
<p>There are no active tasks. <a href="../convert">Convert</a> a document to create a new task.</p>
) if len(tasks) == 0 else (
<table>
<thead>
<tr>
<th>Task</th>
<th>Status</th>
<th>ID</th>
<th>Created</th>
</tr>
</thead>
<tbody>
{
<tr>
<td>{task.task_type.name}</td>
<td>{task.task_status.name}</td>
<td>
<a href={f"{task.task_id}/"}>{task.task_id}</a>
</td>
<td>{task.created_at.strftime("%d-%m-%Y, %H:%M:%S")}</td>
</tr>
for task in tasks
}
</tbody>
</table>
)}
</main>
</Page>
)
def TaskPage(poll, task: ConvertDocumentResponse) -> JSX:
def PlainPage(children, poll = False) -> JSX:
return (
<Page title="Task" poll={poll}>
<main class="container">
<Header classname={"loading" if poll else None} />
{children}
</main>
</Page>
)
if isinstance(task, Response):
return (
<PlainPage>
<p>
<ins>Converted multiple documents successfully</ins>
</p>
<a href="documents.zip">documents.zip</a>
</PlainPage>
)
else:
match poll.task_status:
case "success":
doc = task.document.dict()
doc_json = task.document.json_content
return (
<Page title={task.document.filename}>
<main class="preview">
<Header />
<div class="status">
<div>
<span>Task</span>
<b>{poll.task_id}</b>
</div>
<div>
<span>converted</span>
<b>{task.document.filename}</b>
</div>
<div>
<span>in</span>
<b>{round(task.processing_time)} seconds</b>
</div>
</div>
<div class="formats">
{[
<a class="secondary" href={f"document.{f.value}"} target="_blank">
<button>{f.name}</button>
</a>
for f in OutputFormat
if doc.get(f"{f.value}_content")
]}
<label class="configDarkImg">
<input type="checkbox" name="invert-images" persist="preview" />
Invert images
</label>
</div>
{
<DocPreview doc={doc_json} />
if doc_json
else (<p>No document preview because JSON is missing as an output format.</p>)
}
</main>
</Page>
)
case "started":
return (
<PlainPage poll>
<p class="progress">Task <b>{poll.task_id}</b> is in progress...</p>
<progress />
</PlainPage>
)
case _:
return (
<PlainPage>
<p class="fail">
Task <b>{poll.task_id}</b> failed.
</p>
<button onclick="history.back()">
Go back
</button>
</PlainPage>
)
def StatusPage(ex: HTTPException, referer: str | None) -> JSX:
return (
<Page title={ex.status_code}>
<main class="container">
<Header />
<h4>{ex.status_code}</h4>
<p>{ex.detail}</p>
<p>
<a href={referer or ".."}>
<button>Go back</button>
</a>
</p>
</main>
</Page>
)

347
docling_serve/ui/preview.px Normal file
View File

@@ -0,0 +1,347 @@
from collections import defaultdict
from html import escape
from typing import Type
from docling_core.types.doc.document import (
BaseAnnotation,
CodeItem,
ContentLayer,
DescriptionAnnotation,
DoclingDocument,
DocItem,
FloatingItem,
Formatting,
FormulaItem,
GroupItem,
GroupLabel,
ListGroup,
ListItem,
NodeItem,
PictureClassificationData,
PictureItem,
ProvenanceItem,
RefItem,
Script,
SectionHeaderItem,
TableCell,
TableItem,
TextItem,
TitleItem
)
from pyjsx import jsx, JSX, JSXComponent
from .svg import image, path, rect, text
_node_components: dict[str, JSXComponent] = {}
def component(*node_types: list[Type[BaseAnnotation | NodeItem]]):
def decorator(component):
for t in node_types:
_node_components[t.__name__] = component
return decorator
def AnnotationComponent(children, annotation: BaseAnnotation):
Comp = _node_components.get(annotation.__class__.__name__)
element = Comp(annotation=annotation, children=[]) if Comp else (
<code>{escape(annotation.model_dump_json(indent=2))}</code>
)
element.props["class"] = element.props.get("class", "") + " annotation"
element.props["data-kind"] = annotation.kind
return element
def NodeComponent(children, node: NodeItem | RefItem, doc: DoclingDocument):
# Specific component or fallback.
Comp = _node_components.get(node.__class__.__name__)
element = Comp(node=node, doc=doc, children=[]) if Comp else (
<span class="void"></span>
)
# Wrap item component with annotations, if any.
if isinstance(node, DocItem) and (anns := node.get_annotations()):
element = (
<div class="annotated">
{element}
{[<AnnotationComponent annotation={ann} /> for ann in anns]}
</div>
)
# Extend interaction and styling.
id = node.self_ref[2:]
element.props["id"] = id
element.props["onclick"] = "clickId(event)"
classes = ["item", node.content_layer.value]
element.props["class"] = f"{element.props.get("class", "")} {" ".join(classes)}"
return element
def node_provs(node: NodeItem, doc: DoclingDocument) -> ProvenanceItem:
return node.prov if isinstance(node, DocItem) else [
p
for c in node.children
if isinstance(c.resolve(doc), DocItem)
for p in c.resolve(doc).prov
]
def DocPage(children, page_no: int, items: list[NodeItem], doc: DoclingDocument):
page = doc.pages[page_no]
exclusive_items = [
item
for item in items
if min([p.page_no for p in node_provs(item, doc)]) == page_no
]
comps = []
for i in range(len(exclusive_items)):
item = exclusive_items[i]
id = item.self_ref[2:]
kind, *index = id.split("/")
parent_class = ""
if isinstance(item, GroupItem):
parent_class = "group"
else:
parent = item.parent.resolve(doc)
if isinstance(parent, GroupItem) and parent.label is not GroupLabel.UNSPECIFIED:
parent_class = "grouped"
comps.append(
<div class={f"item-markers {parent_class} {item.content_layer.value}"} data-id={id}>
<span>{"/".join(index)}</span>
<span>{item.label.replace("_", " ")}</span>
{
<span>{item.content_layer.value.replace("_", " ")}</span>
if item.content_layer is not ContentLayer.BODY
else None
}
<a href={f"document/{id}"} target="_blank">{"{;}"}</a>
</div>
)
comps.append(<NodeComponent node={item} doc={doc} />)
pages = set([p.page_no for p in node_provs(item, doc)])
page_mark_class = "page-marker"
if i == 0 or len(pages) > 1:
page_mark_class += " border"
comps.append(<div class={page_mark_class}></div>)
def ItemBox(children, item: DocItem, prov: ProvenanceItem):
item_id = item.self_ref[2:]
sub_items = [
(item_id, prov.bbox.to_top_left_origin(page.size.height))
]
# Table cells.
if isinstance(item, TableItem):
for cell in item.data.table_cells:
sub_items.append(
(f"{item_id}/{cell.start_col_offset_idx}/{cell.start_row_offset_idx}", cell.bbox)
)
return [
<rect
data-id={id}
x={bbox.l - 1}
y={bbox.t - 1}
width={bbox.width + 2}
height={bbox.height + 2}
vector-effect="non-scaling-stroke"
onclick="clickId(event)"
/>
for id, bbox in sub_items
]
# Span extra row to fill up excess space.
comps.append(
<svg
class="page-image"
style={{ "grid-row": f"span {len(exclusive_items) + 1}" }}
width="50vw"
viewBox={f"0 0 {page.size.width} {page.size.height}"}
>
<image
href={f"document/pages/{page_no}"}
width={page.size.width}
height={page.size.height}
/>
{[
<ItemBox item={item} prov={prov} />
for item in items
if isinstance(item, DocItem)
for prov in item.prov
if prov.page_no == page_no
]}
<text class="top-no" x={5} y={5}>{page_no}</text>
<text class="bottom-no" x={5} y={page.size.height - 5}>{page_no}</text>
</svg>
)
return <div class="page">{comps}</div>
def DocPreview(children, doc: DoclingDocument):
page_items: dict[int, list[NodeItem]] = defaultdict(list)
for item, level in doc.iterate_items(
with_groups=True,
included_content_layers={*ContentLayer}
):
if not isinstance(item, GroupItem) or item.label is not GroupLabel.UNSPECIFIED:
pages = set([p.page_no for p in node_provs(item, doc)])
for page in pages:
page_items[page].append(item)
return [
<DocPage page_no={page_no} items={page_items[page_no]} doc={doc} />
for page_no in sorted(page_items.keys())
]
def _text_classes(node: TextItem) -> str:
classes = [node.label]
if frmt := node.formatting:
formats = {
"bold": frmt.bold,
"italic": frmt.italic,
"underline": frmt.underline,
"strikethrough": frmt.strikethrough
}
classes.extend([cls for cls, active in formats.items() if active])
classes.append(frmt.script)
return " ".join(classes)
@component(TextItem)
def TextComponent(children, node: TextItem, doc: DoclingDocument):
return <p class={_text_classes(node)}>{escape(node.text)}</p>
@component(TitleItem)
def TitleComponent(children, node: TitleItem, doc: DoclingDocument):
return <h1 class={_text_classes(node)}>{escape(node.text)}</h1>
@component(SectionHeaderItem)
def SectionHeaderComponent(children, node: SectionHeaderItem, doc: DoclingDocument):
return <h4 class={_text_classes(node)}>{escape(node.text)}</h4>
@component(ListItem)
def ListComponent(children, node: ListItem, doc: DoclingDocument):
return (
<li>
<b>{node.marker}</b>
<span class={_text_classes(node)}>{escape(node.text)}</span>
</li>
)
@component(CodeItem)
def CodeComponent(children, node: CodeItem, doc: DoclingDocument):
return (
<figure>
<code class={_text_classes(node)}>
{escape(node.text or node.orig)}
</code>
</figure>
)
@component(FormulaItem)
def FormulaComponent(children, node: FormulaItem, doc: DoclingDocument):
return (
<figure>
<code class={_text_classes(node)}>
{escape(node.text or node.orig)}
</code>
</figure>
)
@component(PictureItem)
def PictureComponent(children, node: PictureItem, doc: DoclingDocument):
return <figure><img src={f"document/{node.self_ref[2:]}"} loading="lazy" /></figure>
@component(PictureClassificationData)
def PictureClassificationComponent(children, annotation: PictureClassificationData):
return (
<table>
<tbody>
{[
<tr>
<td>{cls.class_name.replace("_", " ")}</td>
<td>{f"{cls.confidence:.2f}"}</td>
</tr>
for cls in annotation.predicted_classes
if cls.confidence > 0.01
]}
</tbody>
</table>
)
@component(DescriptionAnnotation)
def DescriptionAnnotation(children, annotation: DescriptionAnnotation):
return <span>{escape(annotation.text)}</span>
@component(TableItem)
def TableComponent(children, node: TableItem, doc: DoclingDocument):
covered_cells: set[(int, int)] = set()
def check_cover(cell: TableCell):
is_covered = (cell.start_col_offset_idx, cell.start_row_offset_idx) in covered_cells
if not is_covered:
for x in range(cell.start_col_offset_idx, cell.end_col_offset_idx):
for y in range(cell.start_row_offset_idx, cell.end_row_offset_idx):
covered_cells.add((x, y))
return is_covered
def Cell(children, cell: TableCell):
id = f"{node.self_ref[2:]}/{cell.start_col_offset_idx}/{cell.start_row_offset_idx}"
return (
<td
id={id}
class={"header" if cell.column_header or cell.row_header else None}
colspan={cell.col_span or 1}
rowspan={cell.row_span or 1}
onclick="clickId(event)"
>
{escape(cell.text)}
</td>
)
return (
<div class="table">
<table>
<tbody>
{[
<tr>
{[
<Cell cell={cell} />
for cell in row
if not check_cover(cell)
]}
</tr>
for row in node.data.grid
]}
</tbody>
</table>
</div>
)

View File

@@ -0,0 +1,116 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg width="100%" height="100%" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:1.5;">
<g id="Docling" transform="matrix(1.07666,0,0,1.07666,-35.9018,-84.1562)">
<g id="Outline" transform="matrix(1,0,0,1,-0.429741,55.0879)">
<path d="M394.709,69.09C417.34,35.077 467.97,30.178 478.031,55.609C486.35,55.043 494.726,54.701 503.158,54.589C533.157,45.238 560.496,47.419 584.65,60.732C800.941,96.66 966.069,284.814 966.069,511.232C966.069,763.284 761.435,967.918 509.383,967.918C433.692,967.918 362.277,949.464 299.385,916.808L242.3,931.993C203.092,943.242 187.715,928.369 208.575,891.871C208.935,891.24 216.518,879.37 223.997,867.677C119.604,783.975 52.698,655.355 52.698,511.232C52.698,298.778 198.086,120.013 394.709,69.09Z" style="fill:white;"/>
</g>
<g id="Color" transform="matrix(1.02317,0,0,1.02317,-11.55,-17.8333)">
<path d="M284.8,894.232L179.735,783.955L130.222,645.203L125.538,504.726L185.211,385.816C209.006,322.738 249.951,278.973 302.281,248.028L406.684,203.333L413.483,175.767L436.637,152.428L451.408,153.312L457.726,183.183L485.164,165.379L526.92,159.699L557.014,177.545L612.652,211.018C679.009,226.066 740.505,264.146 797.138,325.26L862.813,423.477L891.583,560.826L883.273,683.32L814.268,809.924L734.431,894.384L644.495,926.906L497.146,954.121L361.064,940.647L284.8,894.232Z" style="fill:url(#_Linear1);"/>
<path d="M699.932,887.255L634.427,825.291L597.884,782.352L594.906,738.956L610.14,709.396L643.207,699.954L685,710.111L730.425,736.425L765.204,778.79L775.166,849.531L719.381,894.082L699.932,887.255Z" style="fill:url(#_Linear2);"/>
<g transform="matrix(-0.765945,0,0,1,839.727,5.47434)">
<clipPath id="_clip3">
<path d="M699.932,887.255L634.427,825.291L597.884,782.352L594.906,738.956L610.14,709.396L643.207,699.954L685,710.111L730.425,736.425L765.204,778.79L775.166,849.531L719.381,894.082L699.932,887.255Z"/>
</clipPath>
<g clip-path="url(#_clip3)">
<g transform="matrix(-1.18516,0,0,0.907769,1039.04,88.3496)">
<use xlink:href="#_Image4" x="223.969" y="674.21" width="152.098px" height="213.852px" transform="matrix(0.994105,0,0,0.999308,0,0)"/>
</g>
</g>
</g>
<path d="M311.699,713.521C189.178,639.091 164.299,526.77 191.824,394.113L135.136,476.434L122.004,547.53C143.022,614.014 174.522,676.199 225.005,730.598C210.601,754.156 201.894,776.601 197.955,798.114L245.803,841.67C247.274,812.1 254.934,783.047 270.614,754.664L311.699,713.521Z" style="fill-opacity:0.22;"/>
<g transform="matrix(-1,0,0,1,1022.04,2.74442)">
<path d="M311.699,713.521C189.178,639.091 164.299,526.77 191.824,394.113L135.136,476.434L122.004,547.53C143.022,614.014 174.522,676.199 225.005,730.598C210.601,754.156 201.894,776.601 197.955,798.114L245.803,841.67C247.274,812.1 254.934,783.047 270.614,754.664L311.699,713.521Z" style="fill-opacity:0.22;"/>
</g>
<path d="M354.92,650.818L420.009,663.185L493.368,666.379L554.826,665.251L620.19,658.511L658.169,651.428L671.428,644.802L673.265,627.093L659.898,611.845L625.422,609.244L599.275,591.212L568.632,556.79L542.9,534.336L515.052,528.253L480.412,532.71L455.2,552.337L428.514,578.155L405.312,599.359L374.228,612.097L355.342,614.456L340.75,630.308L341.568,645.341L354.92,650.818Z" style="fill:url(#_Linear5);"/>
<path d="M257.168,949.32L317.434,876.747L364.928,810.6L384.1,743.934L378.759,714.719L376.844,685.849L374.836,659.954L448.734,664.2L511.462,667.602L571.339,665.091L632.796,658.836L648.232,656.882L649.937,697.808L608.105,717.702L598.45,738.594L592.286,761.642L604.743,796.309L639.595,825.803L649.872,840.757L558.219,895.152L502.124,907.569L425.781,923.496L333.29,931.298L286.269,936.907L257.168,949.32Z" style="fill:url(#_Linear6);"/>
<g transform="matrix(1,0,0,1.30081,-1.77636e-15,-196.488)">
<path d="M374.165,685.268C463.946,706.599 553.728,707.491 643.51,688.593L641.903,653.199C549.263,671.731 459.645,672.22 373.059,654.611L374.165,685.268Z" style="fill-opacity:0.18;"/>
</g>
<path d="M459.633,571.457C476.7,536.091 530.064,535.913 553.1,568.767C520.703,551.407 489.553,552.374 459.633,571.457Z" style="fill:white;"/>
<g transform="matrix(1,0,0,1,0.223468,-2.61949)">
<path d="M355.3,267.232C500.64,173.156 720.699,241.362 793.691,423.582C766.716,384.84 735.725,357.078 697.53,349.014L717.306,335.248C698.537,321.49 675.794,320.957 651.039,327.119C652.235,315.768 658.995,306.991 674.188,302.115C641.864,287.427 617.356,289.473 596.258,298.818C597.049,286.116 605.827,278.087 620.068,273.254C589.192,267.477 564.13,270.926 544.651,283.232C545.822,271.831 550.709,260.943 560.913,250.79C517.498,257.095 492.995,267.925 482.892,282.202C477.311,269.499 477.274,257.221 487.625,245.739C439.161,252.932 421.555,265.094 410.355,278.286C407.697,269.01 407.705,260.632 410.853,253.316C389.633,254.773 372.178,260.663 355.3,267.232Z" style="fill:rgb(255,213,95);"/>
</g>
<path d="M475.656,209.175C479.639,175.037 503.437,173.299 532.412,180.026C507.242,183.404 486.969,195.251 473.705,219.215L475.656,209.175Z" style="fill:rgb(255,215,101);"/>
<g transform="matrix(0.114323,-0.655229,0.82741,0.144365,224.632,497.317)">
<path d="M475.656,209.175C479.639,175.037 503.437,173.299 532.412,180.026C507.242,183.404 486.969,195.251 473.705,219.215L475.656,209.175Z" style="fill:rgb(255,215,101);"/>
</g>
<g transform="matrix(1.6739,1.15217e-16,-1.15217e-16,-0.733075,-341.46,1039.77)">
<path d="M447.449,560.911C468.179,536.963 546.237,539.305 565.638,560.831C533.166,555.541 477.296,553.494 447.449,560.911Z" style="fill:white;"/>
</g>
<path d="M348.201,622.341C395.549,653.534 622.351,660.854 661.936,616.729L677.568,633.834L667.044,650.308L557.802,667.518L498.074,670.562L446.718,666.416L391.404,658.406L348.154,652.501L340.161,637.119L348.201,622.341Z" style="fill:rgb(199,68,6);"/>
</g>
<g id="Black-outline" serif:id="Black outline" transform="matrix(1.02317,0,0,1.02317,-11.55,-17.8333)">
<path d="M373.389,657.919C376.285,676.334 377.04,695.016 375.326,714.008" style="fill:none;stroke:black;stroke-width:15.73px;"/>
<path d="M645.931,654.961C646.158,669.958 647.22,684.853 648.975,699.661" style="fill:none;stroke:black;stroke-width:15.73px;"/>
<path d="M290.084,534.662C276.554,533.535 264.892,530.024 254.279,525.175C276.732,555.341 305.316,569.76 338.631,572.029L290.084,534.662Z"/>
<g transform="matrix(0.94177,0,0,0.94909,28.8868,3.79501)">
<ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
</g>
<g transform="matrix(0.112099,0.0552506,-0.0673118,0.136571,455.367,509.409)">
<ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
</g>
<g transform="matrix(-0.112099,0.0552506,0.0673118,0.136571,560.529,509.492)">
<ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
</g>
<g transform="matrix(-1,0,0,1,1013.33,-1.15187)">
<path d="M290.084,534.662C276.554,533.535 264.892,530.024 254.279,525.175C276.732,555.341 305.316,569.76 338.631,572.029L290.084,534.662Z"/>
</g>
<g transform="matrix(-0.94177,0,0,0.94909,984.44,2.64314)">
<ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
</g>
<g transform="matrix(1,0,0,1,1.9047,-5.57346)">
<path d="M277.021,489.604C279.828,554.545 355.855,583.508 405.306,537.851C354.458,599.537 263.881,560.914 277.021,489.604Z" style="fill:white;"/>
</g>
<g transform="matrix(-1,0,0,1,1011.43,-5.7284)">
<path d="M277.021,489.604C279.828,554.545 355.855,583.508 405.306,537.851C354.458,599.537 263.881,560.914 277.021,489.604Z" style="fill:white;"/>
</g>
<g transform="matrix(0.973815,0,0,1.00246,4.71761,-0.508759)">
<path d="M407.22,206.891C107.655,339.384 134.447,630.03 314.615,708.305" style="fill:none;stroke:black;stroke-width:29.39px;"/>
</g>
<g transform="matrix(-0.973815,0,0,1.00246,1006.67,-1.31695)">
<path d="M461.559,196.756C119.768,256.762 111.059,642.544 320.305,711.486" style="fill:none;stroke:black;stroke-width:29.39px;"/>
</g>
<g id="vector-duck" serif:id="vector duck">
<path d="M240.912,850.71C248.043,740.231 325.609,685.992 371.268,715.193C386.487,724.926 392.506,757.72 358.575,816.753C327.005,871.68 300.465,894.596 288.329,903.447" style="fill:none;stroke:black;stroke-width:21.79px;"/>
<path d="M638.382,843.426C427.991,964.695 389.022,902.942 251.512,947.641L307.759,889.573" style="fill:none;stroke:black;stroke-width:15.73px;"/>
<path d="M770.991,853.754C779.364,764.998 730.67,727.923 666.385,704.966C629.568,691.819 580.483,723.886 595.974,772.596C606.285,805.016 650.54,839.029 707.786,886.778" style="fill:none;stroke:black;stroke-width:21.79px;"/>
<g transform="matrix(1,0,0,1,-1.87208,0.908099)">
<path d="M603.287,772.415C614.237,757.963 627.553,750.285 642.878,748.352C628.356,760.968 617.23,775.676 620.632,799.336C635.815,785.15 650.367,779.457 664.396,780.801C651.715,790.7 639.329,803.279 641.039,818.089C641.247,819.891 647.043,823.996 647.595,825.837C659.897,816.37 672.867,811.065 689.234,809.472C676.577,822.659 668.021,834.011 674.478,848.729L664.333,847.825L625.643,812.604L603.629,786.218L603.287,772.415Z"/>
</g>
<g transform="matrix(-0.969851,0.2437,0.2437,0.969851,773.329,-138.212)">
<path d="M603.287,772.415C614.237,757.963 627.553,750.285 642.878,748.352C628.356,760.968 617.23,775.676 620.632,799.336C635.815,785.15 650.367,779.457 664.396,780.801C651.715,790.7 639.329,803.279 641.039,818.089C641.247,819.891 647.043,823.996 647.595,825.837C659.897,816.37 672.867,811.065 689.234,809.472C676.577,822.659 668.021,834.011 674.478,848.729L664.333,847.825L625.643,812.604L603.629,786.218L603.287,772.415Z"/>
</g>
<path d="M511.787,670.044C461.061,671.835 411.878,662.84 361.322,653.92C329.071,648.229 335.56,616.432 361.693,615.181C391.498,613.754 411.83,601.737 437.593,569.084C459.063,541.872 482.443,528.143 506.834,529.767" style="fill:none;stroke:black;stroke-width:15.73px;"/>
<g transform="matrix(-1,0,0,1,1014.44,-0.213451)">
<path d="M511.787,670.044C461.061,671.835 411.878,662.84 361.322,653.92C329.071,648.229 335.56,616.432 361.693,615.181C391.498,613.754 411.83,601.737 437.593,569.084C459.063,541.872 482.443,528.143 506.834,529.767" style="fill:none;stroke:black;stroke-width:15.73px;"/>
</g>
</g>
<g transform="matrix(2.4586,0,0,2.5497,-444.527,-690.434)">
<ellipse cx="312.566" cy="450.751" rx="10.63" ry="10.48" style="fill:white;"/>
</g>
<g transform="matrix(2.4586,0,0,2.5497,-127.75,-690.991)">
<ellipse cx="312.566" cy="450.751" rx="10.63" ry="10.48" style="fill:white;"/>
</g>
<path d="M505.738,698.061L578.879,713.989" style="fill:none;stroke:black;stroke-width:12.1px;"/>
<path d="M422.781,709.6L568.438,743.041" style="fill:none;stroke:black;stroke-width:12.1px;"/>
<path d="M419.941,738.409L565.688,772.989" style="fill:none;stroke:black;stroke-width:12.1px;"/>
<path d="M408.6,787.08L510.634,810.689" style="fill:none;stroke:black;stroke-width:12.1px;"/>
<path d="M397.571,815.956L500.93,840.219" style="fill:none;stroke:black;stroke-width:12.1px;"/>
<path d="M386.763,844.926L454.065,861.974" style="fill:none;stroke:black;stroke-width:12.1px;"/>
<path d="M459.169,919.169C512.194,898.262 539.171,867.298 535.241,824.402C568.052,818.31 598.499,817.058 625.84,822.165" style="fill:none;stroke:black;stroke-width:16.95px;"/>
<path d="M366.219,241.106C389.605,229.261 413.371,220.601 438.247,217.5C416.795,202.419 418.72,174.582 444.22,162.47C442.086,178.175 447.633,193.354 464.772,207.738C468.721,167.57 530.015,162.087 545.674,184.112C526.45,189.314 513.082,197.344 504.566,207.717C522.403,208.119 540.706,207.86 556.2,210.609L566.935,168.471C536.388,146.208 495.718,142.166 464.65,166.705C467.703,133.264 419.536,128.364 404.624,178.47L366.219,241.106Z"/>
<path d="M392.617,924.576C428.953,936.938 467.84,943.636 508.258,943.636C708.944,943.636 871.876,778.49 871.876,575.076C871.876,382.463 725.788,224.162 539.898,207.895L554.137,173.696L554.485,168.187C757.218,191.602 914.895,366.003 914.895,577.383C914.895,804.698 732.549,989.249 507.949,989.249C435.381,989.249 367.223,969.983 308.199,936.232L392.617,924.576ZM279.206,917.988C171.663,843.819 101.002,718.887 101.002,577.383C101.002,383.006 234.333,219.898 413.398,176.712L424.375,216.389C264.082,254.803 144.64,400.913 144.64,575.076C144.64,703.735 209.822,817.086 308.514,883.023L279.206,917.988Z"/>
<path d="M714.938,895.223L647.287,836.693L616.06,855.308L549.158,889.412L459.845,919.216L390.213,928.828L429.291,950.712L535.832,960.1L586.137,952.591L662.254,931.896L714.938,895.223Z"/>
<path d="M423.538,929.39C509.164,917.593 580.815,890.465 640.827,850.566C635.677,886.828 622.639,918.218 594.006,939.977C530.254,930.953 474.955,928.632 423.538,929.39Z" style="fill:url(#_Linear7);"/>
</g>
</g>
<defs>
<linearGradient id="_Linear1" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(-52.3962,375.121,-375.121,-52.3962,471.134,384.463)"><stop offset="0" style="stop-color:rgb(255,176,44);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(255,73,2);stop-opacity:1"/></linearGradient>
<linearGradient id="_Linear2" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(28.6198,-84.8913,84.8913,28.6198,647.831,831.55)"><stop offset="0" style="stop-color:rgb(255,73,2);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(255,176,44);stop-opacity:1"/></linearGradient>
<image id="_Image4" width="153px" height="214px" xlink:href=""/>
<linearGradient id="_Linear5" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(-39.3403,137.423,-137.423,-39.3403,545.523,573.246)"><stop offset="0" style="stop-color:rgb(255,200,41);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(255,73,2);stop-opacity:1"/></linearGradient>
<linearGradient id="_Linear6" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(1.01113,-68.2054,68.2054,1.01113,482.996,741.463)"><stop offset="0" style="stop-color:white;stop-opacity:1"/><stop offset="1" style="stop-color:rgb(179,179,179);stop-opacity:1"/></linearGradient>
<linearGradient id="_Linear7" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(-7.13599,-34.117,34.117,-7.13599,578.793,922.144)"><stop offset="0" style="stop-color:rgb(164,164,164);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(106,106,106);stop-opacity:1"/></linearGradient>
</defs>
</svg>

After

Width:  |  Height:  |  Size: 18 KiB

View File

@@ -0,0 +1,115 @@
// Propagate URL hash to CSS target class for elements with the same id or data-id.
window.addEventListener("hashchange", function (event) {
[
["remove", "oldURL"],
["add", "newURL"],
].forEach(([op, tense]) => {
const hash = new URL(event[tense]).hash.slice(1);
document
.querySelectorAll(`[data-id="${hash}"], [id="${hash}"]`)
.forEach((el) => el.classList[op]("target"));
});
});
// Navigate document items with cursor keys.
document.addEventListener("keydown", function (event) {
const target = document.querySelector("*:target");
const tbounds = target?.getBoundingClientRect();
const filters = {
ArrowUp: (_x, y) => y < tbounds.top,
ArrowDown: (_x, y) => y > tbounds.bottom,
ArrowLeft: (x, _y) => x < tbounds.left,
ArrowRight: (x, _y) => x > tbounds.right,
};
if (target && filters[event.key]) {
const elements = [...document.querySelectorAll(".item[id], .item *[id]")];
let minEl, minDist;
for (const el of elements) {
const elBounds = el.getBoundingClientRect();
if (
filters[event.key](
(elBounds.left + elBounds.right) / 2,
(elBounds.top + elBounds.bottom) / 2
)
) {
const elDist =
Math.abs(tbounds.x - elBounds.x) + Math.abs(tbounds.y - elBounds.y);
if (el != target && elDist < (minDist ?? Number.MAX_VALUE)) {
minEl = el;
minDist = elDist;
}
}
}
if (minEl) {
event.preventDefault();
location.href = `#${minEl.id}`;
}
}
});
// Navigate to item with id when it is clicked.
function clickId(e) {
e.stopPropagation();
const id = e.currentTarget.getAttribute("data-id") ?? e.currentTarget.id;
location.href = `#${id}`;
}
window.onload = () => {
// (Re-)set the value of input[data-dep-on] to conform to a value of another input[name="data-dep-on"].
document.querySelectorAll("input[dep-on]").forEach((element) => {
const onName = element.getAttribute("dep-on");
const onElement = document.getElementsByName(onName)[0];
const depMap = JSON.parse(element.getAttribute("dep-values") ?? "{}");
if (onElement && depMap) {
// On load.
element.value = depMap[onElement.value] ?? "";
// On change.
onElement.addEventListener(
"change",
(event) => (element.value = depMap[event.currentTarget.value] ?? "")
);
}
});
// Toggle display of input[data-display-when] when it requires a different input[type=checkbox] to be checked.
document.querySelectorAll("*[display-when]").forEach((element) => {
const whenElements = element
.getAttribute("display-when")
.split(",")
.flatMap((whenName) => [...document.getElementsByName(whenName.trim())]);
function update() {
const allChecked = whenElements.every((el) => el.checked);
element.classList[allChecked ? "remove" : "add"]("hidden");
}
// On load.
update();
// On change.
whenElements.forEach((whenElement) =>
whenElement.addEventListener("change", update)
);
});
// Persist input value in local storage.
document
.querySelectorAll("input[type=checkbox][persist]")
.forEach((element) => {
const prefix = element.getAttribute("persist");
const name = element.getAttribute("name");
const key = `docling-serve-${prefix}-${name}`;
element.checked = localStorage.getItem(key) === "true";
element.addEventListener("change", (event) =>
localStorage.setItem(key, event.target.checked)
);
});
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,429 @@
@import "pico.css";
@view-transition {
navigation: auto;
}
:root {
--pico-font-size: 16px;
--highlight-factor: 0.8;
--target: hsl(240, 100%, 34%);
--mark: hsl(29, 100%, 35%);
}
@media (prefers-color-scheme: dark) {
:root {
--highlight-factor: 1.5;
--target: hsl(240, 100%, 70%);
--mark: hsl(29, 100%, 70%);
}
}
/* Utilities. */
.hidden {
display: none;
}
.sticky-footer {
position: sticky;
bottom: 0;
padding-top: var(--pico-spacing);
background: var(--pico-background-color);
}
html {
scroll-behavior: smooth;
}
header {
position: relative;
display: flex;
gap: 5rem;
margin-bottom: 2rem;
> .title {
white-space: nowrap;
font-size: 2rem;
font-weight: 300;
line-height: 1.75;
img {
display: inline-block;
max-height: 0.8em;
margin: -0.2rem -0.2em 0.25rem -0.2em;
}
}
&.loading img {
animation: shake 0.5s ease-in-out alternate infinite;
scale: 1.5;
translate: 0 1.5rem;
}
> .version {
position: absolute;
left: 6.25rem;
bottom: -0.5rem;
padding: 0 0.25rem;
font-size: 0.65rem;
line-height: 1rem;
border: solid 1px var(--pico-color);
border-radius: 0.3rem;
}
@media (prefers-color-scheme: dark) {
--glow: hsl(29, 100%, 70%);
> .title {
text-shadow: 0 0 0.25rem white, 0 0 0.5rem var(--glow),
0 0 0.75rem var(--glow), 0 0 1rem var(--glow);
color: white;
img {
filter: drop-shadow(0 0 0.05rem white)
drop-shadow(0 0 0.1rem var(--glow))
drop-shadow(0 0 0.15rem var(--glow))
drop-shadow(0 0 0.2rem var(--glow));
}
}
> .version {
color: white;
border-color: white;
text-shadow: 0 0 0.05rem white, 0 0 0.1rem var(--glow),
0 0 0.15rem var(--glow), 0 0 0.2rem var(--glow);
box-shadow: 0 0 0.05rem white, 0 0 0.1rem var(--glow),
0 0 0.15rem var(--glow), 0 0 0.2rem var(--glow);
}
}
}
@keyframes shake {
50% {
transform: rotate(-20deg);
}
100% {
transform: rotate(20deg);
}
}
label > small {
margin-left: var(--pico-spacing);
opacity: 0.75;
}
/* Conversion results. */
.progress,
.fail {
margin-top: 3rem;
}
.fail {
color: var(--pico-del-color);
}
main.preview {
display: grid;
grid:
auto / 1fr 0.5rem minmax(20ch, 70ch) 0.5rem minmax(min-content, auto)
minmax(0.5rem, 1fr);
grid-auto-flow: dense;
align-content: start;
}
/* Header and task status. */
main.preview {
> header {
grid-column: 3;
padding: 0 0.5rem;
}
> .status {
grid-row: 2;
grid-column: 3;
display: inline-block;
margin: 0 0.5rem 3rem 0.5rem;
span {
display: inline-block;
min-width: calc(5 * var(--pico-spacing));
padding-right: calc(0.5 * var(--pico-spacing));
}
}
> .formats {
grid-row: 2;
grid-column: 5;
margin-bottom: 3rem;
display: flex;
align-items: flex-end;
gap: 1rem;
> .configDarkImg {
display: none;
grid-row: 2;
grid-column: 6;
margin-left: auto;
}
@media (prefers-color-scheme: dark) {
> .configDarkImg {
display: block;
}
}
}
}
/* Invert images in dark mode (option). */
@media (prefers-color-scheme: dark) {
main.preview:has(.configDarkImg > input:checked) {
--img-hover-border: white;
svg.page-image {
--mark: hsl(29, 100%, 70%)
}
image,
img {
filter: invert(1) hue-rotate(180deg) saturate(1.25);
}
}
}
/* Document contents. */
main.preview {
--img-hover-border: black;
*[id] {
scroll-margin-top: 20vh;
}
> .page {
position: relative;
display: grid;
grid-template-columns: subgrid;
grid-auto-flow: dense;
grid-column: 1 / span 6;
> .item {
grid-column: 3;
width: 100%;
min-height: 3rem;
max-height: fit-content;
margin: 0;
padding: 0.5rem;
text-align: justify;
background-color: var(--pico-background-color);
cursor: pointer;
&:hover {
filter: brightness(var(--highlight-factor));
}
&.target {
outline: 2px solid var(--target);
z-index: 10;
}
}
> .item.void {
visibility: hidden;
}
> .item.annotated {
display: flex;
flex-direction: column;
align-items: stretch;
gap: 1rem;
}
/* Formatting. */
.bold {
font-weight: bold;
}
.italic {
font-style: italic;
}
.underline {
text-decoration: underline;
}
.strikethrough {
text-decoration: line-through;
}
.underline.strikethrough {
text-decoration: underline line-through;
}
.sub {
font-size: smaller;
vertical-align: sub;
}
.super {
font-size: smaller;
vertical-align: super;
}
/* Items out of content layer. */
> .item:not(.body),
> .item-markers:not(.body) {
opacity: 0.5;
}
> li.item {
list-style-type: none;
}
> .item.caption {
padding: 0.5rem 1.5rem;
font-size: 0.9rem;
}
> .item.table {
min-width: 0;
overflow-x: auto;
table {
font-size: 0.75rem;
border-collapse: collapse;
td {
vertical-align: top;
}
td.header {
font-weight: bold;
background-color: var(--pico-code-background-color);
}
td.target {
outline: solid 2px var(--target);
}
}
}
.annotation {
margin: 0;
&::before {
content: attr(data-kind);
opacity: 0.7;
}
&,
* {
font-size: 0.9rem;
color: var(--mark);
}
}
.annotation[data-kind="description"],
code.annotation {
white-space: pre-line;
}
.annotation[data-kind="classification"] {
width: fit-content;
}
> .item-markers {
position: relative;
grid-column: 2;
padding-top: 0.125rem;
padding-right: 0.5rem;
display: flex;
flex-direction: column;
align-items: flex-end;
font-family: monospace;
font-size: 0.675rem;
line-height: 1.25;
letter-spacing: 0;
color: var(--mark);
white-space: nowrap;
border-top: solid 1px var(--mark);
> * {
margin-right: 0.5rem;
}
> a {
padding: 0.125rem 0.25rem;
margin-top: 0.5rem;
border-radius: 0.125rem;
color: var(--pico-contrast-inverse);
background-color: var(--target);
text-decoration: none;
}
> a:hover {
filter: brightness(--highlight-factor);
}
&:not(.target) > a {
display: none;
}
&.group,
&.grouped {
border-left: 1px dashed var(--mark);
}
&.group {
margin-top: 0.5rem;
border-top: none;
}
}
> .page-marker {
grid-column: 4;
&.border {
transform: translateY(-1px);
border-top: solid 1px var(--mark);
}
}
> svg.page-image {
--mark: hsl(29, 100%, 35%);
grid-column: 5;
position: sticky;
top: 0.5rem;
width: 100%;
max-height: calc(100vh - 1rem);
outline: 1px solid var(--mark);
rect {
stroke: var(--mark);
stroke-width: 1px;
fill: var(--target);
fill-opacity: 0.0001; /* To activate hover. */
cursor: pointer;
&:hover {
filter: brightness(0.8);
fill-opacity: 0.1;
stroke: var(--img-hover-border);
stroke-width: 3px;
}
}
rect.target {
stroke: var(--target);
stroke-width: 3px;
stroke-dasharray: none;
}
text {
font-size: 0.675rem;
color: var(--mark);
&.top-no {
alignment-baseline: hanging;
}
}
}
}
}

20
docling_serve/ui/svg.py Normal file
View File

@@ -0,0 +1,20 @@
from pyjsx import JSX # type: ignore
def _tag(name: str):
def factory(children, **args) -> JSX:
props = " ".join([f'{k}="{v}"' for k, v in args.items()])
if children:
child_renders = "".join([str(c) for c in children])
return f"<{name} {props}>{child_renders}</{name}>"
else:
return f"<{name} {props} />"
return factory
image = _tag("image")
path = _tag("path")
rect = _tag("rect")
text = _tag("text")

View File

@@ -39,24 +39,42 @@ THe following table describes the options to configure the Docling Serve app.
| | `DOCLING_SERVE_STATIC_PATH` | unset | If set to a valid directory, the static assets for the docs and UI will be loaded from this path |
| | `DOCLING_SERVE_SCRATCH_PATH` | | If set, this directory will be used as scratch workspace, e.g. storing the results before they get requested. If unset, a temporary created is created for this purpose. |
| `--enable-ui` | `DOCLING_SERVE_ENABLE_UI` | `false` | Enable the demonstrator UI. |
| | `DOCLING_SERVE_SHOW_VERSION_INFO` | `true` | If enabled, the `/version` endpoint will provide the Docling package versions, otherwise it will return a forbidden 403 error. |
| | `DOCLING_SERVE_ENABLE_REMOTE_SERVICES` | `false` | Allow pipeline components making remote connections. For example, this is needed when using a vision-language model via APIs. |
| | `DOCLING_SERVE_ALLOW_EXTERNAL_PLUGINS` | `false` | Allow the selection of third-party plugins. |
| | `DOCLING_SERVE_SINGLE_USE_RESULTS` | `true` | If true, results can be accessed only once. If false, the results accumulate in the scratch directory. |
| | `DOCLING_SERVE_RESULT_REMOVAL_DELAY` | `300` | When `DOCLING_SERVE_SINGLE_USE_RESULTS` is active, this is the delay before results are removed from the task registry. |
| | `DOCLING_SERVE_MAX_DOCUMENT_TIMEOUT` | `604800` (7 days) | The maximum time for processing a document. |
| | `DOCLING_NUM_THREADS` | `4` | Number of concurrent threads for processing a document. |
| | `DOCLING_SERVE_MAX_NUM_PAGES` | | The maximum number of pages for a document to be processed. |
| | `DOCLING_SERVE_MAX_FILE_SIZE` | | The maximum file size for a document to be processed. |
| | `DOCLING_SERVE_SYNC_POLL_INTERVAL` | `2` | Number of seconds to sleep between polling the task status in the sync endpoints. |
| | `DOCLING_SERVE_MAX_SYNC_WAIT` | `120` | Max number of seconds a synchronous endpoint is waiting for the task completion. |
| | `DOCLING_SERVE_LOAD_MODELS_AT_BOOT` | `True` | If enabled, the models for the default options will be loaded at boot. |
| | `DOCLING_SERVE_OPTIONS_CACHE_SIZE` | `2` | How many DocumentConveter objects (including their loaded models) to keep in the cache. |
| | `DOCLING_SERVE_QUEUE_MAX_SIZE` | | Size of the pages queue. Potentially so many pages opened at the same time. |
| | `DOCLING_SERVE_OCR_BATCH_SIZE` | | Batch size for the OCR stage. |
| | `DOCLING_SERVE_LAYOUT_BATCH_SIZE` | | Batch size for the layout detection stage. |
| | `DOCLING_SERVE_TABLE_BATCH_SIZE` | | Batch size for the table structure stage. |
| | `DOCLING_SERVE_BATCH_POLLING_INTERVAL_SECONDS` | | Wait time for gathering pages before starting a stage processing. |
| | `DOCLING_SERVE_CORS_ORIGINS` | `["*"]` | A list of origins that should be permitted to make cross-origin requests. |
| | `DOCLING_SERVE_CORS_METHODS` | `["*"]` | A list of HTTP methods that should be allowed for cross-origin requests. |
| | `DOCLING_SERVE_CORS_HEADERS` | `["*"]` | A list of HTTP request headers that should be supported for cross-origin requests. |
| | `DOCLING_SERVE_API_KEY` | | If specified, all the API requests must contain the header `X-Api-Key` with this value. |
| | `DOCLING_SERVE_ENG_KIND` | `local` | The compute engine to use for the async tasks. Possible values are `local`, `rq` and `kfp`. See below for more configurations of the engines. |
### Docling configuration
Some Docling settings, mostly about performance, are exposed as environment variable which can be used also when running Docling Serve.
| ENV | Default | Description |
| ----|---------|-------------|
| `DOCLING_NUM_THREADS` | `4` | Number of concurrent threads used for the `torch` CPU execution. |
| `DOCLING_DEVICE` | | Device used for the model execution. Valid values are `cpu`, `cuda`, `mps`. When unset, the best device is chosen. For CUDA-enabled environments, you can choose which GPU using the syntax `cuda:0`, `cuda:1`, ... |
| `DOCLING_PERF_PAGE_BATCH_SIZE` | `4` | Number of pages processed in the same batch. |
| `DOCLING_PERF_ELEMENTS_BATCH_SIZE` | `8` | Number of document items/elements processed in the same batch during enrichment. |
| `DOCLING_DEBUG_PROFILE_PIPELINE_TIMINGS` | `false` | When enabled, Docling will provide detailed timings information. |
### Compute engine
Docling Serve can be deployed with several possible of compute engine.

View File

@@ -7,6 +7,7 @@ The API provides two endpoints: one for urls, one for files. This is necessary t
On top of the source of file (see below), both endpoints support the same parameters.
<!-- begin: parameters-docs -->
<h4>ConvertDocumentsRequestOptions</h4>
| Field Name | Type | Description |
|------------|------|-------------|
@@ -39,6 +40,52 @@ On top of the source of file (see below), both endpoints support the same parame
| `vlm_pipeline_model_local` | VlmModelLocal or NoneType | Options for running a local vision-language model for the `vlm` pipeline. The parameters refer to a model hosted on Hugging Face. This parameter is mutually exclusive with `vlm_pipeline_model_api` and `vlm_pipeline_model`. |
| `vlm_pipeline_model_api` | VlmModelApi or NoneType | API details for using a vision-language model for the `vlm` pipeline. This parameter is mutually exclusive with `vlm_pipeline_model_local` and `vlm_pipeline_model`. |
<h4>VlmModelApi</h4>
| Field Name | Type | Description |
|------------|------|-------------|
| `url` | AnyUrl | Endpoint which accepts openai-api compatible requests. |
| `headers` | Dict[str, str] | Headers used for calling the API endpoint. For example, it could include authentication headers. |
| `params` | Dict[str, Any] | Model parameters. |
| `timeout` | float | Timeout for the API request. |
| `concurrency` | int | Maximum number of concurrent requests to the API. |
| `prompt` | str | Prompt used when calling the vision-language model. |
| `scale` | float | Scale factor of the images used. |
| `response_format` | ResponseFormat | Type of response generated by the model. |
| `temperature` | float | Temperature parameter controlling the reproducibility of the result. |
<h4>VlmModelLocal</h4>
| Field Name | Type | Description |
|------------|------|-------------|
| `repo_id` | str | Repository id from the Hugging Face Hub. |
| `prompt` | str | Prompt used when calling the vision-language model. |
| `scale` | float | Scale factor of the images used. |
| `response_format` | ResponseFormat | Type of response generated by the model. |
| `inference_framework` | InferenceFramework | Inference framework to use. |
| `transformers_model_type` | TransformersModelType | Type of transformers auto-model to use. |
| `extra_generation_config` | Dict[str, Any] | Config from https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig |
| `temperature` | float | Temperature parameter controlling the reproducibility of the result. |
<h4>PictureDescriptionApi</h4>
| Field Name | Type | Description |
|------------|------|-------------|
| `url` | AnyUrl | Endpoint which accepts openai-api compatible requests. |
| `headers` | Dict[str, str] | Headers used for calling the API endpoint. For example, it could include authentication headers. |
| `params` | Dict[str, Any] | Model parameters. |
| `timeout` | float | Timeout for the API request. |
| `concurrency` | int | Maximum number of concurrent requests to the API. |
| `prompt` | str | Prompt used when calling the vision-language model. |
<h4>PictureDescriptionLocal</h4>
| Field Name | Type | Description |
|------------|------|-------------|
| `repo_id` | str | Repository id from the Hugging Face Hub. |
| `prompt` | str | Prompt used when calling the vision-language model. |
| `generation_config` | Dict[str, Any] | Config from https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.GenerationConfig |
<!-- end: parameters-docs -->
### Authentication

View File

@@ -1,6 +1,6 @@
[project]
name = "docling-serve"
version = "1.7.2" # DO NOT EDIT, updated automatically
version = "1.8.0" # DO NOT EDIT, updated automatically
description = "Running Docling as a service"
license = {text = "MIT"}
authors = [
@@ -35,7 +35,7 @@ requires-python = ">=3.10"
dependencies = [
"docling~=2.38",
"docling-core>=2.45.0",
"docling-jobkit[kfp,rq,vlm]>=1.6.0,<2.0.0",
"docling-jobkit[kfp,rq,vlm]>=1.8.0,<2.0.0",
"fastapi[standard]<0.119.0", # ~=0.115
"httpx~=0.28",
"pydantic~=2.10",
@@ -50,7 +50,7 @@ dependencies = [
[project.optional-dependencies]
ui = [
"gradio>=5.23.2,<6.0.0",
"python-jsx>=0.2.0",
]
tesserocr = [
"tesserocr~=2.7"

View File

@@ -1,5 +1,5 @@
import re
from typing import Annotated, Any, get_args, get_origin
from typing import Annotated, Any, Union, get_args, get_origin
from pydantic import BaseModel
@@ -90,39 +90,75 @@ def _format_type(type_hint: Any) -> str:
return str(type_hint)
def _unroll_types(tp) -> list[type]:
"""
Unrolls typing.Union and typing.Optional types into a flat list of types.
"""
origin = get_origin(tp)
if origin is Union:
# Recursively unroll each type inside the Union
types = []
for arg in get_args(tp):
types.extend(_unroll_types(arg))
# Remove duplicates while preserving order
return list(dict.fromkeys(types))
else:
# If it's not a Union, just return it as a single-element list
return [tp]
def generate_model_doc(model: type[BaseModel]) -> str:
"""Generate documentation for a Pydantic model."""
doc = "\n| Field Name | Type | Description |\n"
doc += "|------------|------|-------------|\n"
for base_model in model.__mro__:
# Check if this is a Pydantic model
if hasattr(base_model, "model_fields"):
# Iterate through fields of this model
for field_name, field in base_model.model_fields.items():
# Extract description from Annotated field if possible
description = field.description or "No description provided."
description = format_allowed_values_description(description)
description = format_variable_names(description)
models_stack = [model]
# Handle Annotated types
original_type = field.annotation
if get_origin(original_type) is Annotated:
# Extract base type and additional metadata
type_args = get_args(original_type)
base_type = type_args[0]
else:
base_type = original_type
doc = ""
while models_stack:
current_model = models_stack.pop()
field_type = _format_type(base_type)
field_type = format_variable_names(field_type)
doc += f"<h4>{current_model.__name__}</h4>\n"
doc += f"| `{field_name}` | {field_type} | {description} |\n"
doc += "\n| Field Name | Type | Description |\n"
doc += "|------------|------|-------------|\n"
# stop iterating the base classes
break
base_models = []
if hasattr(current_model, "__mro__"):
base_models = current_model.__mro__
else:
base_models = [current_model]
doc += "\n"
for base_model in base_models:
# Check if this is a Pydantic model
if hasattr(base_model, "model_fields"):
# Iterate through fields of this model
for field_name, field in base_model.model_fields.items():
# Extract description from Annotated field if possible
description = field.description or "No description provided."
description = format_allowed_values_description(description)
description = format_variable_names(description)
# Handle Annotated types
original_type = field.annotation
if get_origin(original_type) is Annotated:
# Extract base type and additional metadata
type_args = get_args(original_type)
base_type = type_args[0]
else:
base_type = original_type
field_type = _format_type(base_type)
field_type = format_variable_names(field_type)
doc += f"| `{field_name}` | {field_type} | {description} |\n"
for field_type in _unroll_types(base_type):
if issubclass(field_type, BaseModel):
models_stack.append(field_type)
# stop iterating the base classes
break
doc += "\n"
return doc

2524
uv.lock generated

File diff suppressed because one or more lines are too long