mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 08:33:50 +00:00
@@ -54,7 +54,7 @@ from docling_jobkit.orchestrators.base_orchestrator import (
|
||||
TaskNotFoundError,
|
||||
)
|
||||
|
||||
from docling_serve.auth import APIKeyAuth, AuthenticationResult
|
||||
from docling_serve.auth import APIKeyHeaderAuth, AuthenticationResult
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
|
||||
from docling_serve.datamodel.requests import (
|
||||
ConvertDocumentsRequest,
|
||||
@@ -167,7 +167,7 @@ def create_app(): # noqa: C901
|
||||
offline_docs_assets = True
|
||||
_log.info("Found static assets.")
|
||||
|
||||
require_auth = APIKeyAuth(docling_serve_settings.api_key)
|
||||
require_auth = APIKeyHeaderAuth(docling_serve_settings.api_key)
|
||||
app = FastAPI(
|
||||
title="Docling Serve",
|
||||
docs_url=None if offline_docs_assets else "/swagger",
|
||||
@@ -188,39 +188,6 @@ def create_app(): # noqa: C901
|
||||
allow_headers=headers,
|
||||
)
|
||||
|
||||
# Mount the Gradio app
|
||||
if docling_serve_settings.enable_ui:
|
||||
try:
|
||||
import gradio as gr
|
||||
|
||||
from docling_serve.gradio_ui import ui as gradio_ui
|
||||
from docling_serve.settings import uvicorn_settings
|
||||
|
||||
tmp_output_dir = get_scratch() / "gradio"
|
||||
tmp_output_dir.mkdir(exist_ok=True, parents=True)
|
||||
gradio_ui.gradio_output_dir = tmp_output_dir
|
||||
|
||||
# Build the root_path for Gradio, accounting for UVICORN_ROOT_PATH
|
||||
gradio_root_path = (
|
||||
f"{uvicorn_settings.root_path}/ui"
|
||||
if uvicorn_settings.root_path
|
||||
else "/ui"
|
||||
)
|
||||
|
||||
app = gr.mount_gradio_app(
|
||||
app,
|
||||
gradio_ui,
|
||||
path="/ui",
|
||||
allowed_paths=["./logo.png", tmp_output_dir],
|
||||
root_path=gradio_root_path,
|
||||
)
|
||||
except ImportError:
|
||||
_log.warning(
|
||||
"Docling Serve enable_ui is activated, but gradio is not installed. "
|
||||
"Install it with `pip install docling-serve[ui]` "
|
||||
"or `pip install gradio`"
|
||||
)
|
||||
|
||||
#############################
|
||||
# Offline assets definition #
|
||||
#############################
|
||||
@@ -1021,4 +988,20 @@ def create_app(): # noqa: C901
|
||||
await orchestrator.clear_results(older_than=older_then)
|
||||
return ClearResponse()
|
||||
|
||||
# Optional UI
|
||||
if docling_serve_settings.enable_ui:
|
||||
try:
|
||||
from docling_serve.ui.app import create_ui_app
|
||||
|
||||
ui_app = create_ui_app(
|
||||
process_file_async, process_url_async, task_result, task_status_poll
|
||||
)
|
||||
app.mount("/ui", app=ui_app, name="ui")
|
||||
except ImportError as ex:
|
||||
_log.error(ex)
|
||||
_log.warning(
|
||||
"Docling Serve enable_ui is activated, but its dependencies are not installed."
|
||||
"Install it with `uv sync --extra ui`"
|
||||
)
|
||||
|
||||
return app
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from typing import Any
|
||||
|
||||
from fastapi import HTTPException, Request, status
|
||||
from fastapi.security import APIKeyHeader
|
||||
from fastapi import HTTPException, Request, Response, status
|
||||
from fastapi.security import APIKeyCookie, APIKeyHeader
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
@@ -11,46 +11,79 @@ class AuthenticationResult(BaseModel):
|
||||
detail: Any | None = None
|
||||
|
||||
|
||||
class APIKeyAuth(APIKeyHeader):
|
||||
"""
|
||||
FastAPI dependency which evaluates a status API Key.
|
||||
"""
|
||||
|
||||
class KeyValidator:
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
header_name: str = "X-Api-Key",
|
||||
field_name: str = "X-Api-Key",
|
||||
fail_on_unauthorized: bool = True,
|
||||
) -> None:
|
||||
self.api_key = api_key
|
||||
self.header_name = header_name
|
||||
super().__init__(name=self.header_name, auto_error=False)
|
||||
self.field_name = field_name
|
||||
self.fail_on_unauthorized = fail_on_unauthorized
|
||||
|
||||
async def _validate_api_key(self, header_api_key: str | None):
|
||||
if header_api_key is None:
|
||||
return AuthenticationResult(
|
||||
valid=False, errors=[f"Missing header {self.header_name}."]
|
||||
)
|
||||
async def __call__(self, candidate_key: str | None):
|
||||
if candidate_key is None:
|
||||
return self._error(f"Missing field {self.field_name}.")
|
||||
|
||||
header_api_key = header_api_key.strip()
|
||||
candidate_key = candidate_key.strip()
|
||||
|
||||
# Otherwise check the apikey
|
||||
if header_api_key == self.api_key or self.api_key == "":
|
||||
if candidate_key == self.api_key or self.api_key == "":
|
||||
return AuthenticationResult(
|
||||
valid=True,
|
||||
detail=header_api_key,
|
||||
detail=candidate_key, # Remove?
|
||||
)
|
||||
else:
|
||||
return self._error("The provided API Key is invalid.")
|
||||
|
||||
def _error(self, error: str):
|
||||
if self.fail_on_unauthorized and self.api_key:
|
||||
raise HTTPException(status.HTTP_401_UNAUTHORIZED, error)
|
||||
else:
|
||||
return AuthenticationResult(
|
||||
valid=False,
|
||||
errors=["The provided API Key is invalid."],
|
||||
errors=[error],
|
||||
)
|
||||
|
||||
|
||||
class APIKeyHeaderAuth(APIKeyHeader):
|
||||
"""
|
||||
FastAPI dependency which evaluates a status API Key in a header.
|
||||
"""
|
||||
|
||||
def __init__(self, validator: str | KeyValidator) -> None:
|
||||
self.validator = (
|
||||
KeyValidator(validator) if isinstance(validator, str) else validator
|
||||
)
|
||||
super().__init__(name=self.validator.field_name, auto_error=False)
|
||||
|
||||
async def __call__(self, request: Request) -> AuthenticationResult: # type: ignore
|
||||
header_api_key = await super().__call__(request=request)
|
||||
result = await self._validate_api_key(header_api_key)
|
||||
if self.api_key and not result.valid:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED, detail=result.detail
|
||||
)
|
||||
return result
|
||||
key = await super().__call__(request=request)
|
||||
return await self.validator(key)
|
||||
|
||||
|
||||
class APIKeyCookieAuth(APIKeyCookie):
|
||||
"""
|
||||
FastAPI dependency which evaluates a status API Key in a cookie.
|
||||
"""
|
||||
|
||||
def __init__(self, validator: str | KeyValidator) -> None:
|
||||
self.validator = (
|
||||
KeyValidator(validator) if isinstance(validator, str) else validator
|
||||
)
|
||||
super().__init__(name=self.validator.field_name, auto_error=False)
|
||||
|
||||
async def __call__(self, request: Request) -> AuthenticationResult: # type: ignore
|
||||
api_key = await super().__call__(request=request)
|
||||
return await self.validator(api_key)
|
||||
|
||||
def _set_api_key(self, response: Response, api_key: str, expires=24 * 3600):
|
||||
response.set_cookie(
|
||||
key=self.validator.field_name,
|
||||
value=api_key,
|
||||
expires=expires,
|
||||
secure=True,
|
||||
httponly=True,
|
||||
samesite="strict",
|
||||
)
|
||||
|
||||
@@ -1,921 +0,0 @@
|
||||
import base64
|
||||
import importlib
|
||||
import itertools
|
||||
import json
|
||||
import logging
|
||||
import ssl
|
||||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import certifi
|
||||
import gradio as gr
|
||||
import httpx
|
||||
|
||||
from docling.datamodel.base_models import FormatToExtensions
|
||||
from docling.datamodel.pipeline_options import (
|
||||
PdfBackend,
|
||||
ProcessingPipeline,
|
||||
TableFormerMode,
|
||||
TableStructureOptions,
|
||||
)
|
||||
|
||||
from docling_serve.helper_functions import _to_list_of_strings
|
||||
from docling_serve.settings import docling_serve_settings, uvicorn_settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
############################
|
||||
# Path of static artifacts #
|
||||
############################
|
||||
|
||||
logo_path = "https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg"
|
||||
js_components_url = "https://unpkg.com/@docling/docling-components@0.0.7"
|
||||
if (
|
||||
docling_serve_settings.static_path is not None
|
||||
and docling_serve_settings.static_path.is_dir()
|
||||
):
|
||||
logo_path = str(docling_serve_settings.static_path / "logo.svg")
|
||||
js_components_url = "/static/docling-components.js"
|
||||
|
||||
|
||||
##############################
|
||||
# Head JS for web components #
|
||||
##############################
|
||||
head = f"""
|
||||
<script src="{js_components_url}" type="module"></script>
|
||||
"""
|
||||
|
||||
#################
|
||||
# CSS and theme #
|
||||
#################
|
||||
|
||||
css = """
|
||||
#logo {
|
||||
border-style: none;
|
||||
background: none;
|
||||
box-shadow: none;
|
||||
min-width: 80px;
|
||||
}
|
||||
#dark_mode_column {
|
||||
display: flex;
|
||||
align-content: flex-end;
|
||||
}
|
||||
#title {
|
||||
text-align: left;
|
||||
display:block;
|
||||
height: auto;
|
||||
padding-top: 5px;
|
||||
line-height: 0;
|
||||
}
|
||||
.title-text h1 > p, .title-text p {
|
||||
margin-top: 0px !important;
|
||||
margin-bottom: 2px !important;
|
||||
}
|
||||
#custom-container {
|
||||
border: 0.909091px solid;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
}
|
||||
#custom-container h4 {
|
||||
font-size: 14px;
|
||||
}
|
||||
#file_input_zone {
|
||||
height: 140px;
|
||||
}
|
||||
|
||||
docling-img {
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
docling-img::part(page) {
|
||||
box-shadow: 0 0.5rem 1rem 0 rgba(0, 0, 0, 0.2);
|
||||
}
|
||||
"""
|
||||
|
||||
theme = gr.themes.Default(
|
||||
text_size="md",
|
||||
spacing_size="md",
|
||||
font=[
|
||||
gr.themes.GoogleFont("Red Hat Display"),
|
||||
"ui-sans-serif",
|
||||
"system-ui",
|
||||
"sans-serif",
|
||||
],
|
||||
font_mono=[
|
||||
gr.themes.GoogleFont("Red Hat Mono"),
|
||||
"ui-monospace",
|
||||
"Consolas",
|
||||
"monospace",
|
||||
],
|
||||
)
|
||||
|
||||
#############
|
||||
# Variables #
|
||||
#############
|
||||
|
||||
gradio_output_dir = None # Will be set by FastAPI when mounted
|
||||
file_output_path = None # Will be set when a new file is generated
|
||||
|
||||
#############
|
||||
# Functions #
|
||||
#############
|
||||
|
||||
|
||||
def get_api_endpoint() -> str:
|
||||
protocol = "http"
|
||||
if uvicorn_settings.ssl_keyfile is not None:
|
||||
protocol = "https"
|
||||
return f"{protocol}://{docling_serve_settings.api_host}:{uvicorn_settings.port}"
|
||||
|
||||
|
||||
def get_ssl_context() -> ssl.SSLContext:
|
||||
ctx = ssl.create_default_context(cafile=certifi.where())
|
||||
kube_sa_ca_cert_path = Path(
|
||||
"/run/secrets/kubernetes.io/serviceaccount/service-ca.crt"
|
||||
)
|
||||
if (
|
||||
uvicorn_settings.ssl_keyfile is not None
|
||||
and ".svc." in docling_serve_settings.api_host
|
||||
and kube_sa_ca_cert_path.exists()
|
||||
):
|
||||
ctx.load_verify_locations(cafile=kube_sa_ca_cert_path)
|
||||
return ctx
|
||||
|
||||
|
||||
def health_check():
|
||||
response = httpx.get(f"{get_api_endpoint()}/health")
|
||||
if response.status_code == 200:
|
||||
return "Healthy"
|
||||
return "Unhealthy"
|
||||
|
||||
|
||||
def set_options_visibility(x):
|
||||
return gr.Accordion("Options", open=x)
|
||||
|
||||
|
||||
def set_outputs_visibility_direct(x, y):
|
||||
content = gr.Row(visible=x)
|
||||
file = gr.Row(visible=y)
|
||||
return content, file
|
||||
|
||||
|
||||
def set_task_id_visibility(x):
|
||||
task_id_row = gr.Row(visible=x)
|
||||
return task_id_row
|
||||
|
||||
|
||||
def set_outputs_visibility_process(x):
|
||||
content = gr.Row(visible=not x)
|
||||
file = gr.Row(visible=x)
|
||||
return content, file
|
||||
|
||||
|
||||
def set_download_button_label(label_text: gr.State):
|
||||
return gr.DownloadButton(label=str(label_text), scale=1)
|
||||
|
||||
|
||||
def clear_outputs():
|
||||
task_id_rendered = ""
|
||||
markdown_content = ""
|
||||
json_content = ""
|
||||
json_rendered_content = ""
|
||||
html_content = ""
|
||||
text_content = ""
|
||||
doctags_content = ""
|
||||
|
||||
return (
|
||||
task_id_rendered,
|
||||
markdown_content,
|
||||
markdown_content,
|
||||
json_content,
|
||||
json_rendered_content,
|
||||
html_content,
|
||||
html_content,
|
||||
text_content,
|
||||
doctags_content,
|
||||
)
|
||||
|
||||
|
||||
def clear_url_input():
|
||||
return ""
|
||||
|
||||
|
||||
def clear_file_input():
|
||||
return None
|
||||
|
||||
|
||||
def auto_set_return_as_file(
|
||||
url_input_value: str,
|
||||
file_input_value: Optional[list[str]],
|
||||
image_export_mode_value: str,
|
||||
):
|
||||
# If more than one input source is provided, return as file
|
||||
if (
|
||||
(len(url_input_value.split(",")) > 1)
|
||||
or (file_input_value and len(file_input_value) > 1)
|
||||
or (image_export_mode_value == "referenced")
|
||||
):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
def change_ocr_lang(ocr_engine):
|
||||
if ocr_engine == "easyocr":
|
||||
return gr.update(visible=True, value="en,fr,de,es")
|
||||
elif ocr_engine == "tesseract_cli":
|
||||
return gr.update(visible=True, value="eng,fra,deu,spa")
|
||||
elif ocr_engine == "tesseract":
|
||||
return gr.update(visible=True, value="eng,fra,deu,spa")
|
||||
elif ocr_engine == "rapidocr":
|
||||
return gr.update(visible=True, value="english,chinese")
|
||||
elif ocr_engine == "ocrmac":
|
||||
return gr.update(visible=True, value="fr-FR,de-DE,es-ES,en-US")
|
||||
|
||||
return gr.update(visible=False, value="")
|
||||
|
||||
|
||||
def wait_task_finish(auth: str, task_id: str, return_as_file: bool):
|
||||
conversion_sucess = False
|
||||
task_finished = False
|
||||
task_status = ""
|
||||
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = str(auth)
|
||||
|
||||
ssl_ctx = get_ssl_context()
|
||||
while not task_finished:
|
||||
try:
|
||||
response = httpx.get(
|
||||
f"{get_api_endpoint()}/v1/status/poll/{task_id}?wait=5",
|
||||
headers=headers,
|
||||
verify=ssl_ctx,
|
||||
timeout=15,
|
||||
)
|
||||
task_status = response.json()["task_status"]
|
||||
if task_status == "success":
|
||||
conversion_sucess = True
|
||||
task_finished = True
|
||||
|
||||
if task_status in ("failure", "revoked"):
|
||||
conversion_sucess = False
|
||||
task_finished = True
|
||||
raise RuntimeError(f"Task failed with status {task_status!r}")
|
||||
time.sleep(5)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file(s): {e}")
|
||||
conversion_sucess = False
|
||||
task_finished = True
|
||||
raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
|
||||
|
||||
if conversion_sucess:
|
||||
try:
|
||||
response = httpx.get(
|
||||
f"{get_api_endpoint()}/v1/result/{task_id}",
|
||||
headers=headers,
|
||||
timeout=15,
|
||||
verify=ssl_ctx,
|
||||
)
|
||||
output = response_to_output(response, return_as_file)
|
||||
return output
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting task result: {e}")
|
||||
|
||||
raise gr.Error(
|
||||
f"Error getting task result, conversion finished with status: {task_status}"
|
||||
)
|
||||
|
||||
|
||||
def process_url(
|
||||
auth,
|
||||
input_sources,
|
||||
to_formats,
|
||||
image_export_mode,
|
||||
pipeline,
|
||||
ocr,
|
||||
force_ocr,
|
||||
ocr_engine,
|
||||
ocr_lang,
|
||||
pdf_backend,
|
||||
table_mode,
|
||||
abort_on_error,
|
||||
return_as_file,
|
||||
do_code_enrichment,
|
||||
do_formula_enrichment,
|
||||
do_picture_classification,
|
||||
do_picture_description,
|
||||
):
|
||||
target = {"kind": "zip" if return_as_file else "inbody"}
|
||||
parameters = {
|
||||
"sources": [
|
||||
{"kind": "http", "url": source} for source in input_sources.split(",")
|
||||
],
|
||||
"options": {
|
||||
"to_formats": to_formats,
|
||||
"image_export_mode": image_export_mode,
|
||||
"pipeline": pipeline,
|
||||
"ocr": ocr,
|
||||
"force_ocr": force_ocr,
|
||||
"ocr_engine": ocr_engine,
|
||||
"ocr_lang": _to_list_of_strings(ocr_lang),
|
||||
"pdf_backend": pdf_backend,
|
||||
"table_mode": table_mode,
|
||||
"abort_on_error": abort_on_error,
|
||||
"do_code_enrichment": do_code_enrichment,
|
||||
"do_formula_enrichment": do_formula_enrichment,
|
||||
"do_picture_classification": do_picture_classification,
|
||||
"do_picture_description": do_picture_description,
|
||||
},
|
||||
"target": target,
|
||||
}
|
||||
if (
|
||||
not parameters["sources"]
|
||||
or len(parameters["sources"]) == 0
|
||||
or parameters["sources"][0]["url"] == ""
|
||||
):
|
||||
logger.error("No input sources provided.")
|
||||
raise gr.Error("No input sources provided.", print_exception=False)
|
||||
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = str(auth)
|
||||
|
||||
print(f"{headers=}")
|
||||
try:
|
||||
ssl_ctx = get_ssl_context()
|
||||
response = httpx.post(
|
||||
f"{get_api_endpoint()}/v1/convert/source/async",
|
||||
json=parameters,
|
||||
headers=headers,
|
||||
verify=ssl_ctx,
|
||||
timeout=60,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing URL: {e}")
|
||||
raise gr.Error(f"Error processing URL: {e}", print_exception=False)
|
||||
if response.status_code != 200:
|
||||
data = response.json()
|
||||
error_message = data.get("detail", "An unknown error occurred.")
|
||||
logger.error(f"Error processing file: {error_message}")
|
||||
raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
|
||||
|
||||
task_id_rendered = response.json()["task_id"]
|
||||
return task_id_rendered
|
||||
|
||||
|
||||
def file_to_base64(file):
|
||||
with open(file.name, "rb") as f:
|
||||
encoded_string = base64.b64encode(f.read()).decode("utf-8")
|
||||
return encoded_string
|
||||
|
||||
|
||||
def process_file(
|
||||
auth,
|
||||
files,
|
||||
to_formats,
|
||||
image_export_mode,
|
||||
pipeline,
|
||||
ocr,
|
||||
force_ocr,
|
||||
ocr_engine,
|
||||
ocr_lang,
|
||||
pdf_backend,
|
||||
table_mode,
|
||||
abort_on_error,
|
||||
return_as_file,
|
||||
do_code_enrichment,
|
||||
do_formula_enrichment,
|
||||
do_picture_classification,
|
||||
do_picture_description,
|
||||
):
|
||||
if not files or len(files) == 0:
|
||||
logger.error("No files provided.")
|
||||
raise gr.Error("No files provided.", print_exception=False)
|
||||
files_data = [
|
||||
{"kind": "file", "base64_string": file_to_base64(file), "filename": file.name}
|
||||
for file in files
|
||||
]
|
||||
target = {"kind": "zip" if return_as_file else "inbody"}
|
||||
|
||||
parameters = {
|
||||
"sources": files_data,
|
||||
"options": {
|
||||
"to_formats": to_formats,
|
||||
"image_export_mode": image_export_mode,
|
||||
"pipeline": pipeline,
|
||||
"ocr": ocr,
|
||||
"force_ocr": force_ocr,
|
||||
"ocr_engine": ocr_engine,
|
||||
"ocr_lang": _to_list_of_strings(ocr_lang),
|
||||
"pdf_backend": pdf_backend,
|
||||
"table_mode": table_mode,
|
||||
"abort_on_error": abort_on_error,
|
||||
"return_as_file": return_as_file,
|
||||
"do_code_enrichment": do_code_enrichment,
|
||||
"do_formula_enrichment": do_formula_enrichment,
|
||||
"do_picture_classification": do_picture_classification,
|
||||
"do_picture_description": do_picture_description,
|
||||
},
|
||||
"target": target,
|
||||
}
|
||||
|
||||
headers = {}
|
||||
if docling_serve_settings.api_key:
|
||||
headers["X-Api-Key"] = str(auth)
|
||||
|
||||
try:
|
||||
ssl_ctx = get_ssl_context()
|
||||
response = httpx.post(
|
||||
f"{get_api_endpoint()}/v1/convert/source/async",
|
||||
json=parameters,
|
||||
headers=headers,
|
||||
verify=ssl_ctx,
|
||||
timeout=60,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file(s): {e}")
|
||||
raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
|
||||
if response.status_code != 200:
|
||||
data = response.json()
|
||||
error_message = data.get("detail", "An unknown error occurred.")
|
||||
logger.error(f"Error processing file: {error_message}")
|
||||
raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
|
||||
|
||||
task_id_rendered = response.json()["task_id"]
|
||||
return task_id_rendered
|
||||
|
||||
|
||||
def response_to_output(response, return_as_file):
|
||||
markdown_content = ""
|
||||
json_content = ""
|
||||
json_rendered_content = ""
|
||||
html_content = ""
|
||||
text_content = ""
|
||||
doctags_content = ""
|
||||
download_button = gr.DownloadButton(visible=False, label="Download Output", scale=1)
|
||||
if return_as_file:
|
||||
filename = (
|
||||
response.headers.get("Content-Disposition").split("filename=")[1].strip('"')
|
||||
)
|
||||
tmp_output_dir = Path(tempfile.mkdtemp(dir=gradio_output_dir, prefix="ui_"))
|
||||
file_output_path = f"{tmp_output_dir}/{filename}"
|
||||
# logger.info(f"Saving file to: {file_output_path}")
|
||||
with open(file_output_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
download_button = gr.DownloadButton(
|
||||
visible=True, label=f"Download {filename}", scale=1, value=file_output_path
|
||||
)
|
||||
else:
|
||||
full_content = response.json()
|
||||
markdown_content = full_content.get("document").get("md_content")
|
||||
json_content = json.dumps(
|
||||
full_content.get("document").get("json_content"), indent=2
|
||||
)
|
||||
# Embed document JSON and trigger load at client via an image.
|
||||
json_rendered_content = f"""
|
||||
<docling-img id="dclimg" pagenumbers><docling-tooltip></docling-tooltip></docling-img>
|
||||
<script id="dcljson" type="application/json" onload="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);">{json_content}</script>
|
||||
<img src onerror="document.getElementById('dclimg').src = JSON.parse(document.getElementById('dcljson').textContent);" />
|
||||
"""
|
||||
html_content = full_content.get("document").get("html_content")
|
||||
text_content = full_content.get("document").get("text_content")
|
||||
doctags_content = full_content.get("document").get("doctags_content")
|
||||
return (
|
||||
markdown_content,
|
||||
markdown_content,
|
||||
json_content,
|
||||
json_rendered_content,
|
||||
html_content,
|
||||
html_content,
|
||||
text_content,
|
||||
doctags_content,
|
||||
download_button,
|
||||
)
|
||||
|
||||
|
||||
############
|
||||
# UI Setup #
|
||||
############
|
||||
|
||||
with gr.Blocks(
|
||||
head=head,
|
||||
css=css,
|
||||
theme=theme,
|
||||
title="Docling Serve",
|
||||
delete_cache=(3600, 36000), # Delete all files older than 10 hour every hour
|
||||
) as ui:
|
||||
# Constants stored in states to be able to pass them as inputs to functions
|
||||
processing_text = gr.State("Processing your document(s), please wait...")
|
||||
true_bool = gr.State(True)
|
||||
false_bool = gr.State(False)
|
||||
|
||||
# Banner
|
||||
with gr.Row(elem_id="check_health"):
|
||||
# Logo
|
||||
with gr.Column(scale=1, min_width=90):
|
||||
try:
|
||||
gr.Image(
|
||||
logo_path,
|
||||
height=80,
|
||||
width=80,
|
||||
show_download_button=False,
|
||||
show_label=False,
|
||||
show_fullscreen_button=False,
|
||||
container=False,
|
||||
elem_id="logo",
|
||||
scale=0,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Logo not found.")
|
||||
|
||||
# Title
|
||||
with gr.Column(scale=1, min_width=200):
|
||||
gr.Markdown(
|
||||
f"# Docling Serve \n(docling version: "
|
||||
f"{importlib.metadata.version('docling')})",
|
||||
elem_id="title",
|
||||
elem_classes=["title-text"],
|
||||
)
|
||||
# Dark mode button
|
||||
with gr.Column(scale=16, elem_id="dark_mode_column"):
|
||||
dark_mode_btn = gr.Button("Dark/Light Mode", scale=0)
|
||||
dark_mode_btn.click(
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
js="""() => {
|
||||
if (document.querySelectorAll('.dark').length) {
|
||||
document.querySelectorAll('.dark').forEach(
|
||||
el => el.classList.remove('dark')
|
||||
);
|
||||
} else {
|
||||
document.querySelector('body').classList.add('dark');
|
||||
}
|
||||
}""",
|
||||
show_api=False,
|
||||
)
|
||||
|
||||
# URL Processing Tab
|
||||
with gr.Tab("Convert URL"):
|
||||
with gr.Row():
|
||||
with gr.Column(scale=4):
|
||||
url_input = gr.Textbox(
|
||||
label="URL Input Source",
|
||||
placeholder="https://arxiv.org/pdf/2501.17887",
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
url_process_btn = gr.Button("Process URL", scale=1)
|
||||
url_reset_btn = gr.Button("Reset", scale=1)
|
||||
|
||||
# File Processing Tab
|
||||
with gr.Tab("Convert File"):
|
||||
with gr.Row():
|
||||
with gr.Column(scale=4):
|
||||
raw_exts = itertools.chain.from_iterable(FormatToExtensions.values())
|
||||
file_input = gr.File(
|
||||
elem_id="file_input_zone",
|
||||
label="Upload File",
|
||||
file_types=[
|
||||
f".{v.lower()}"
|
||||
for v in raw_exts # lowercase
|
||||
]
|
||||
+ [
|
||||
f".{v.upper()}"
|
||||
for v in raw_exts # uppercase
|
||||
],
|
||||
file_count="multiple",
|
||||
scale=4,
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
file_process_btn = gr.Button("Process File", scale=1)
|
||||
file_reset_btn = gr.Button("Reset", scale=1)
|
||||
|
||||
# Auth
|
||||
with gr.Row(visible=bool(docling_serve_settings.api_key)):
|
||||
with gr.Column():
|
||||
auth = gr.Textbox(
|
||||
label="Authentication",
|
||||
placeholder="API Key",
|
||||
type="password",
|
||||
)
|
||||
|
||||
# Options
|
||||
with gr.Accordion("Options") as options:
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1):
|
||||
to_formats = gr.CheckboxGroup(
|
||||
[
|
||||
("Docling (JSON)", "json"),
|
||||
("Markdown", "md"),
|
||||
("HTML", "html"),
|
||||
("Plain Text", "text"),
|
||||
("Doc Tags", "doctags"),
|
||||
],
|
||||
label="To Formats",
|
||||
value=["json", "md"],
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
image_export_mode = gr.Radio(
|
||||
[
|
||||
("Embedded", "embedded"),
|
||||
("Placeholder", "placeholder"),
|
||||
("Referenced", "referenced"),
|
||||
],
|
||||
label="Image Export Mode",
|
||||
value="embedded",
|
||||
)
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1, min_width=200):
|
||||
pipeline = gr.Radio(
|
||||
[(v.value.capitalize(), v.value) for v in ProcessingPipeline],
|
||||
label="Pipeline type",
|
||||
value=ProcessingPipeline.STANDARD.value,
|
||||
)
|
||||
with gr.Row():
|
||||
with gr.Column(scale=1, min_width=200):
|
||||
ocr = gr.Checkbox(label="Enable OCR", value=True)
|
||||
force_ocr = gr.Checkbox(label="Force OCR", value=False)
|
||||
with gr.Column(scale=1):
|
||||
engines_list = [
|
||||
("Auto", "auto"),
|
||||
("EasyOCR", "easyocr"),
|
||||
("Tesseract", "tesseract"),
|
||||
("RapidOCR", "rapidocr"),
|
||||
]
|
||||
if sys.platform == "darwin":
|
||||
engines_list.append(("OCRMac", "ocrmac"))
|
||||
|
||||
ocr_engine = gr.Radio(
|
||||
engines_list,
|
||||
label="OCR Engine",
|
||||
value="auto",
|
||||
)
|
||||
with gr.Column(scale=1, min_width=200):
|
||||
ocr_lang = gr.Textbox(
|
||||
label="OCR Language (beware of the format)",
|
||||
value="en,fr,de,es",
|
||||
visible=False,
|
||||
)
|
||||
ocr_engine.change(change_ocr_lang, inputs=[ocr_engine], outputs=[ocr_lang])
|
||||
with gr.Row():
|
||||
with gr.Column(scale=4):
|
||||
pdf_backend = gr.Radio(
|
||||
[v.value for v in PdfBackend],
|
||||
label="PDF Backend",
|
||||
value=PdfBackend.DLPARSE_V4.value,
|
||||
)
|
||||
with gr.Column(scale=2):
|
||||
table_mode = gr.Radio(
|
||||
[(v.value.capitalize(), v.value) for v in TableFormerMode],
|
||||
label="Table Mode",
|
||||
value=TableStructureOptions().mode.value,
|
||||
)
|
||||
with gr.Column(scale=1):
|
||||
abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
|
||||
return_as_file = gr.Checkbox(label="Return as File", value=False)
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
do_code_enrichment = gr.Checkbox(
|
||||
label="Enable code enrichment", value=False
|
||||
)
|
||||
do_formula_enrichment = gr.Checkbox(
|
||||
label="Enable formula enrichment", value=False
|
||||
)
|
||||
with gr.Column():
|
||||
do_picture_classification = gr.Checkbox(
|
||||
label="Enable picture classification", value=False
|
||||
)
|
||||
do_picture_description = gr.Checkbox(
|
||||
label="Enable picture description", value=False
|
||||
)
|
||||
|
||||
# Task id output
|
||||
with gr.Row(visible=False) as task_id_output:
|
||||
task_id_rendered = gr.Textbox(label="Task id", interactive=False)
|
||||
|
||||
# Document output
|
||||
with gr.Row(visible=False) as content_output:
|
||||
with gr.Tab("Docling (JSON)"):
|
||||
output_json = gr.Code(language="json", wrap_lines=True, show_label=False)
|
||||
with gr.Tab("Docling-Rendered"):
|
||||
output_json_rendered = gr.HTML(label="Response")
|
||||
with gr.Tab("Markdown"):
|
||||
output_markdown = gr.Code(
|
||||
language="markdown", wrap_lines=True, show_label=False
|
||||
)
|
||||
with gr.Tab("Markdown-Rendered"):
|
||||
output_markdown_rendered = gr.Markdown(label="Response")
|
||||
with gr.Tab("HTML"):
|
||||
output_html = gr.Code(language="html", wrap_lines=True, show_label=False)
|
||||
with gr.Tab("HTML-Rendered"):
|
||||
output_html_rendered = gr.HTML(label="Response")
|
||||
with gr.Tab("Text"):
|
||||
output_text = gr.Code(wrap_lines=True, show_label=False)
|
||||
with gr.Tab("DocTags"):
|
||||
output_doctags = gr.Code(wrap_lines=True, show_label=False)
|
||||
|
||||
# File download output
|
||||
with gr.Row(visible=False) as file_output:
|
||||
download_file_btn = gr.DownloadButton(label="Placeholder", scale=1)
|
||||
|
||||
##############
|
||||
# UI Actions #
|
||||
##############
|
||||
|
||||
# Handle Return as File
|
||||
url_input.change(
|
||||
auto_set_return_as_file,
|
||||
inputs=[url_input, file_input, image_export_mode],
|
||||
outputs=[return_as_file],
|
||||
)
|
||||
file_input.change(
|
||||
auto_set_return_as_file,
|
||||
inputs=[url_input, file_input, image_export_mode],
|
||||
outputs=[return_as_file],
|
||||
)
|
||||
image_export_mode.change(
|
||||
auto_set_return_as_file,
|
||||
inputs=[url_input, file_input, image_export_mode],
|
||||
outputs=[return_as_file],
|
||||
)
|
||||
|
||||
# URL processing
|
||||
url_process_btn.click(
|
||||
set_options_visibility, inputs=[false_bool], outputs=[options]
|
||||
).then(
|
||||
set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
|
||||
).then(
|
||||
clear_outputs,
|
||||
inputs=None,
|
||||
outputs=[
|
||||
task_id_rendered,
|
||||
output_markdown,
|
||||
output_markdown_rendered,
|
||||
output_json,
|
||||
output_json_rendered,
|
||||
output_html,
|
||||
output_html_rendered,
|
||||
output_text,
|
||||
output_doctags,
|
||||
],
|
||||
).then(
|
||||
set_task_id_visibility,
|
||||
inputs=[true_bool],
|
||||
outputs=[task_id_output],
|
||||
).then(
|
||||
process_url,
|
||||
inputs=[
|
||||
auth,
|
||||
url_input,
|
||||
to_formats,
|
||||
image_export_mode,
|
||||
pipeline,
|
||||
ocr,
|
||||
force_ocr,
|
||||
ocr_engine,
|
||||
ocr_lang,
|
||||
pdf_backend,
|
||||
table_mode,
|
||||
abort_on_error,
|
||||
return_as_file,
|
||||
do_code_enrichment,
|
||||
do_formula_enrichment,
|
||||
do_picture_classification,
|
||||
do_picture_description,
|
||||
],
|
||||
outputs=[
|
||||
task_id_rendered,
|
||||
],
|
||||
).then(
|
||||
set_outputs_visibility_process,
|
||||
inputs=[return_as_file],
|
||||
outputs=[content_output, file_output],
|
||||
).then(
|
||||
wait_task_finish,
|
||||
inputs=[auth, task_id_rendered, return_as_file],
|
||||
outputs=[
|
||||
output_markdown,
|
||||
output_markdown_rendered,
|
||||
output_json,
|
||||
output_json_rendered,
|
||||
output_html,
|
||||
output_html_rendered,
|
||||
output_text,
|
||||
output_doctags,
|
||||
download_file_btn,
|
||||
],
|
||||
)
|
||||
|
||||
url_reset_btn.click(
|
||||
clear_outputs,
|
||||
inputs=None,
|
||||
outputs=[
|
||||
output_markdown,
|
||||
output_markdown_rendered,
|
||||
output_json,
|
||||
output_json_rendered,
|
||||
output_html,
|
||||
output_html_rendered,
|
||||
output_text,
|
||||
output_doctags,
|
||||
],
|
||||
).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then(
|
||||
set_outputs_visibility_direct,
|
||||
inputs=[false_bool, false_bool],
|
||||
outputs=[content_output, file_output],
|
||||
).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
|
||||
clear_url_input, inputs=None, outputs=[url_input]
|
||||
)
|
||||
|
||||
# File processing
|
||||
file_process_btn.click(
|
||||
set_options_visibility, inputs=[false_bool], outputs=[options]
|
||||
).then(
|
||||
set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
|
||||
).then(
|
||||
clear_outputs,
|
||||
inputs=None,
|
||||
outputs=[
|
||||
task_id_rendered,
|
||||
output_markdown,
|
||||
output_markdown_rendered,
|
||||
output_json,
|
||||
output_json_rendered,
|
||||
output_html,
|
||||
output_html_rendered,
|
||||
output_text,
|
||||
output_doctags,
|
||||
],
|
||||
).then(
|
||||
set_task_id_visibility,
|
||||
inputs=[true_bool],
|
||||
outputs=[task_id_output],
|
||||
).then(
|
||||
process_file,
|
||||
inputs=[
|
||||
auth,
|
||||
file_input,
|
||||
to_formats,
|
||||
image_export_mode,
|
||||
pipeline,
|
||||
ocr,
|
||||
force_ocr,
|
||||
ocr_engine,
|
||||
ocr_lang,
|
||||
pdf_backend,
|
||||
table_mode,
|
||||
abort_on_error,
|
||||
return_as_file,
|
||||
do_code_enrichment,
|
||||
do_formula_enrichment,
|
||||
do_picture_classification,
|
||||
do_picture_description,
|
||||
],
|
||||
outputs=[
|
||||
task_id_rendered,
|
||||
],
|
||||
).then(
|
||||
set_outputs_visibility_process,
|
||||
inputs=[return_as_file],
|
||||
outputs=[content_output, file_output],
|
||||
).then(
|
||||
wait_task_finish,
|
||||
inputs=[auth, task_id_rendered, return_as_file],
|
||||
outputs=[
|
||||
output_markdown,
|
||||
output_markdown_rendered,
|
||||
output_json,
|
||||
output_json_rendered,
|
||||
output_html,
|
||||
output_html_rendered,
|
||||
output_text,
|
||||
output_doctags,
|
||||
download_file_btn,
|
||||
],
|
||||
)
|
||||
|
||||
file_reset_btn.click(
|
||||
clear_outputs,
|
||||
inputs=None,
|
||||
outputs=[
|
||||
output_markdown,
|
||||
output_markdown_rendered,
|
||||
output_json,
|
||||
output_json_rendered,
|
||||
output_html,
|
||||
output_html_rendered,
|
||||
output_text,
|
||||
output_doctags,
|
||||
],
|
||||
).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then(
|
||||
set_outputs_visibility_direct,
|
||||
inputs=[false_bool, false_bool],
|
||||
outputs=[content_output, file_output],
|
||||
).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
|
||||
clear_file_input, inputs=None, outputs=[file_input]
|
||||
)
|
||||
0
docling_serve/ui/__init__.py
Normal file
0
docling_serve/ui/__init__.py
Normal file
278
docling_serve/ui/app.py
Normal file
278
docling_serve/ui/app.py
Normal file
@@ -0,0 +1,278 @@
|
||||
import io
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import (
|
||||
BackgroundTasks,
|
||||
Depends,
|
||||
FastAPI,
|
||||
Form,
|
||||
HTTPException,
|
||||
Request,
|
||||
UploadFile,
|
||||
status,
|
||||
)
|
||||
from fastapi.responses import HTMLResponse, RedirectResponse, Response
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import AnyHttpUrl
|
||||
from pyjsx import auto_setup
|
||||
from starlette.exceptions import HTTPException as StarletteHTTPException
|
||||
|
||||
from docling.datamodel.base_models import OutputFormat
|
||||
from docling_core.types.doc.document import (
|
||||
FloatingItem,
|
||||
PageItem,
|
||||
RefItem,
|
||||
)
|
||||
from docling_jobkit.orchestrators.base_orchestrator import (
|
||||
BaseOrchestrator,
|
||||
)
|
||||
|
||||
from docling_serve.auth import APIKeyCookieAuth, AuthenticationResult
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
|
||||
from docling_serve.datamodel.requests import ConvertDocumentsRequest, HttpSourceRequest
|
||||
from docling_serve.helper_functions import FormDepends
|
||||
from docling_serve.orchestrator_factory import get_async_orchestrator
|
||||
from docling_serve.settings import docling_serve_settings
|
||||
|
||||
from .convert import ConvertPage # type: ignore
|
||||
from .pages import AuthPage, StatusPage, TaskPage, TasksPage # type: ignore
|
||||
|
||||
# Initialize JSX.
|
||||
auto_setup
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# TODO: Isolate passed functions into a controller?
|
||||
def create_ui_app(process_file, process_url, task_result, task_status_poll) -> FastAPI: # noqa: C901
|
||||
ui_app = FastAPI()
|
||||
require_auth = APIKeyCookieAuth(docling_serve_settings.api_key)
|
||||
|
||||
# Static files.
|
||||
ui_app.mount(
|
||||
"/static",
|
||||
StaticFiles(directory=Path(__file__).parent.absolute() / "static"),
|
||||
name="static",
|
||||
)
|
||||
|
||||
# Convert page.
|
||||
@ui_app.get("/")
|
||||
async def get_root():
|
||||
return RedirectResponse(url="convert")
|
||||
|
||||
@ui_app.get("/convert", response_class=HTMLResponse)
|
||||
async def get_convert(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
):
|
||||
return str(ConvertPage())
|
||||
|
||||
@ui_app.post("/convert", response_class=HTMLResponse)
|
||||
async def post_convert(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
background_tasks: BackgroundTasks,
|
||||
options: Annotated[
|
||||
ConvertDocumentsRequestOptions, FormDepends(ConvertDocumentsRequestOptions)
|
||||
],
|
||||
files: Annotated[list[UploadFile], Form()],
|
||||
url: Annotated[str, Form()],
|
||||
page_min: Annotated[str, Form()],
|
||||
page_max: Annotated[str, Form()],
|
||||
):
|
||||
# Refined model options and behavior.
|
||||
if len(page_min) > 0:
|
||||
options.page_range = (int(page_min), options.page_range[1])
|
||||
if len(page_max) > 0:
|
||||
options.page_range = (options.page_range[0], int(page_max))
|
||||
|
||||
options.ocr_lang = [
|
||||
sub_lang.strip()
|
||||
for lang in options.ocr_lang or []
|
||||
for sub_lang in lang.split(",")
|
||||
if len(sub_lang.strip()) > 0
|
||||
]
|
||||
|
||||
files = [f for f in files if f.size]
|
||||
if len(files) > 0:
|
||||
# Directly uploaded documents.
|
||||
response = await process_file(
|
||||
auth=auth,
|
||||
orchestrator=orchestrator,
|
||||
background_tasks=background_tasks,
|
||||
files=files,
|
||||
options=options,
|
||||
)
|
||||
elif len(url.strip()) > 0:
|
||||
# URLs of documents.
|
||||
source = HttpSourceRequest(url=AnyHttpUrl(url))
|
||||
request = ConvertDocumentsRequest(options=options, sources=[source])
|
||||
|
||||
response = await process_url(
|
||||
auth=auth,
|
||||
orchestrator=orchestrator,
|
||||
conversion_request=request,
|
||||
)
|
||||
else:
|
||||
validation = {
|
||||
"files": "Upload files or enter a URL",
|
||||
"url": "Enter a URL or upload files",
|
||||
}
|
||||
return str(ConvertPage(options=options, validation=validation))
|
||||
|
||||
return RedirectResponse(f"tasks/{response.task_id}/", status.HTTP_303_SEE_OTHER)
|
||||
|
||||
# Task overview page.
|
||||
@ui_app.get("/tasks/", response_class=HTMLResponse)
|
||||
async def get_tasks(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
):
|
||||
tasks = sorted(orchestrator.tasks.values(), key=lambda t: t.created_at)
|
||||
|
||||
return str(TasksPage(tasks=tasks))
|
||||
|
||||
# Task specific page.
|
||||
@ui_app.get("/tasks/{task_id}/", response_class=HTMLResponse)
|
||||
async def get_task(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
background_tasks: BackgroundTasks,
|
||||
task_id: str,
|
||||
):
|
||||
poll = await task_status_poll(auth, orchestrator, task_id)
|
||||
|
||||
result = None
|
||||
if poll.task_status in ["success", "failure"]:
|
||||
try:
|
||||
result = await task_result(
|
||||
auth, orchestrator, background_tasks, task_id
|
||||
)
|
||||
except Exception as ex:
|
||||
logging.error(ex)
|
||||
|
||||
return str(TaskPage(poll, result))
|
||||
|
||||
# Poll task via HTTP status.
|
||||
@ui_app.get("/tasks/{task_id}/poll", response_class=Response)
|
||||
async def poll_task(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
task_id: str,
|
||||
):
|
||||
poll = await task_status_poll(auth, orchestrator, task_id)
|
||||
return Response(
|
||||
status_code=status.HTTP_202_ACCEPTED
|
||||
if poll.task_status == "started"
|
||||
else status.HTTP_200_OK
|
||||
)
|
||||
|
||||
# Download the contents of zipped documents.
|
||||
@ui_app.get("/tasks/{task_id}/documents.zip")
|
||||
async def get_task_zip(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
background_tasks: BackgroundTasks,
|
||||
task_id: str,
|
||||
):
|
||||
return await task_result(auth, orchestrator, background_tasks, task_id)
|
||||
|
||||
# Get the output of a task, as a converted document in a specific format.
|
||||
@ui_app.get("/tasks/{task_id}/document.{format}")
|
||||
async def get_task_document_format(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
background_tasks: BackgroundTasks,
|
||||
task_id: str,
|
||||
format: str,
|
||||
):
|
||||
if format not in [f.value for f in OutputFormat]:
|
||||
raise HTTPException(status.HTTP_404_NOT_FOUND, "Output format not found.")
|
||||
else:
|
||||
response = await task_result(auth, orchestrator, background_tasks, task_id)
|
||||
|
||||
# TODO: Make this compatible with base_models FormatToMimeType?
|
||||
mimes = {
|
||||
"html": "text/html",
|
||||
"md": "text/markdown",
|
||||
"json": "application/json",
|
||||
}
|
||||
|
||||
content = (
|
||||
response.document.json_content.export_to_dict()
|
||||
if format == OutputFormat.JSON
|
||||
else response.document.dict()[f"{format}_content"]
|
||||
)
|
||||
|
||||
return Response(
|
||||
content=str(content),
|
||||
media_type=mimes.get(format, "text/plain"),
|
||||
)
|
||||
|
||||
@ui_app.get("/tasks/{task_id}/document/{cref:path}")
|
||||
async def get_task_document_item(
|
||||
request: Request,
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)],
|
||||
background_tasks: BackgroundTasks,
|
||||
task_id: str,
|
||||
cref: str,
|
||||
):
|
||||
response = await task_result(auth, orchestrator, background_tasks, task_id)
|
||||
doc = response.document.json_content
|
||||
item = RefItem(cref=f"#/{cref}").resolve(doc) # type: ignore
|
||||
|
||||
if "image/*" in (request.headers.get("Accept") or "") and isinstance(
|
||||
item, FloatingItem | PageItem
|
||||
):
|
||||
content = io.BytesIO()
|
||||
|
||||
if (
|
||||
isinstance(item, PageItem)
|
||||
and (img_ref := item.image)
|
||||
and img_ref.pil_image
|
||||
):
|
||||
img_ref.pil_image.save(content, format="PNG")
|
||||
elif isinstance(item, FloatingItem) and (img := item.get_image(doc)):
|
||||
img.save(content, format="PNG")
|
||||
|
||||
return Response(content=content.getvalue(), media_type="image/png")
|
||||
else:
|
||||
return item
|
||||
|
||||
# Page not found; catch all.
|
||||
@ui_app.api_route("/{path_name:path}")
|
||||
def no_page(
|
||||
auth: Annotated[AuthenticationResult, Depends(require_auth)],
|
||||
):
|
||||
raise HTTPException(status.HTTP_404_NOT_FOUND, "Page not found.")
|
||||
|
||||
# Exception and auth pages.
|
||||
@ui_app.exception_handler(StarletteHTTPException)
|
||||
@ui_app.exception_handler(Exception)
|
||||
async def exception_page(request: Request, ex: Exception):
|
||||
if not isinstance(ex, StarletteHTTPException):
|
||||
# Internal error.
|
||||
ex = HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR)
|
||||
|
||||
if request.method == "POST":
|
||||
# Authorization required -> API key dialog.
|
||||
form = await request.form()
|
||||
form_api_key = form.get("api_key")
|
||||
if isinstance(form_api_key, str):
|
||||
response = RedirectResponse(request.url, status.HTTP_303_SEE_OTHER)
|
||||
require_auth._set_api_key(response, form_api_key)
|
||||
return response
|
||||
|
||||
if ex.status_code == status.HTTP_401_UNAUTHORIZED:
|
||||
return HTMLResponse(str(AuthPage()), status.HTTP_401_UNAUTHORIZED)
|
||||
|
||||
# HTTP exception page; avoid referer loop.
|
||||
referer = request.headers.get("Referer")
|
||||
if referer == request.url:
|
||||
referer = None
|
||||
|
||||
return HTMLResponse(str(StatusPage(ex, referer)), ex.status_code)
|
||||
|
||||
return ui_app
|
||||
251
docling_serve/ui/convert.px
Normal file
251
docling_serve/ui/convert.px
Normal file
@@ -0,0 +1,251 @@
|
||||
import json
|
||||
import sys
|
||||
|
||||
from pyjsx import jsx, JSX
|
||||
|
||||
from docling.datamodel.base_models import FormatToExtensions, OutputFormat
|
||||
from docling.datamodel.pipeline_options import PdfBackend, ProcessingPipeline, TableFormerMode
|
||||
from docling_core.types.doc import ImageRefMode
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions, ocr_engines_enum
|
||||
|
||||
from .forms import EnumCheckboxes, EnumRadios, EnumSelect, ocr_engine_languages, ValidatedInput
|
||||
from .pages import Header, Page
|
||||
|
||||
base_convert_options = ConvertDocumentsRequestOptions()
|
||||
base_convert_options.to_formats.append(OutputFormat.JSON)
|
||||
|
||||
|
||||
def ConvertPage(
|
||||
options: ConvertDocumentsRequestOptions = base_convert_options,
|
||||
validation: None | dict[str, str] = None
|
||||
) -> JSX:
|
||||
file_accept = ",".join([f".{ext}" for exts in FormatToExtensions.values() for ext in exts])
|
||||
|
||||
return (
|
||||
<Page title="Convert">
|
||||
<main class="container">
|
||||
<Header />
|
||||
|
||||
<form class="convert" method="post" enctype="multipart/form-data">
|
||||
<legend>
|
||||
<b>Documents</b>
|
||||
</legend>
|
||||
<fieldset class="grid">
|
||||
<ValidatedInput
|
||||
name="files"
|
||||
type="file"
|
||||
multiple
|
||||
accept={file_accept}
|
||||
validation={validation}
|
||||
/>
|
||||
<ValidatedInput
|
||||
name="url"
|
||||
placeholder="or enter a URL: https://arxiv.org/pdf/2501.17887"
|
||||
validation={validation}
|
||||
/>
|
||||
</fieldset>
|
||||
|
||||
<fieldset class="grid">
|
||||
<EnumSelect
|
||||
enum={ProcessingPipeline}
|
||||
selected={options.pipeline}
|
||||
name="pipeline"
|
||||
title="Pipeline"
|
||||
/>
|
||||
<EnumSelect
|
||||
enum={PdfBackend}
|
||||
selected={options.pdf_backend}
|
||||
name="pdf_backend"
|
||||
title="PDF Backend"
|
||||
/>
|
||||
|
||||
<div>
|
||||
<label>Pages</label>
|
||||
<div role="group">
|
||||
<input
|
||||
type="number"
|
||||
name="page_min"
|
||||
min={1}
|
||||
step={1}
|
||||
placeholder="1"
|
||||
value={None if options.page_range[0] <= 1 else options.page_range[0]}
|
||||
/>
|
||||
<input
|
||||
type="number"
|
||||
name="page_max"
|
||||
min={1}
|
||||
step={1}
|
||||
placeholder="max."
|
||||
value={None if options.page_range[1] >= sys.maxsize else options.page_range[1]}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label>Timeout<small>in seconds</small></label>
|
||||
<input
|
||||
type="number"
|
||||
name="document_timeout"
|
||||
min={1}
|
||||
step={1}
|
||||
value={int(options.document_timeout)}
|
||||
/>
|
||||
</div>
|
||||
</fieldset>
|
||||
|
||||
<div class="grid">
|
||||
<EnumCheckboxes
|
||||
enum={OutputFormat}
|
||||
selected={options.to_formats}
|
||||
name="to_formats"
|
||||
title={<b>Output</b>}
|
||||
/>
|
||||
|
||||
<div>
|
||||
<fieldset>
|
||||
<label>
|
||||
<input
|
||||
type="checkbox"
|
||||
name="do_ocr"
|
||||
checked={options.do_ocr}
|
||||
/>
|
||||
<b>OCR</b>
|
||||
</label>
|
||||
<label display-when="do_ocr">
|
||||
<input
|
||||
type="checkbox"
|
||||
name="do_code_enrichment"
|
||||
checked={options.do_code_enrichment}
|
||||
/>
|
||||
Code
|
||||
</label>
|
||||
<label display-when="do_ocr">
|
||||
<input
|
||||
type="checkbox"
|
||||
name="do_formula_enrichment"
|
||||
checked={options.do_formula_enrichment}
|
||||
/>
|
||||
Formulas
|
||||
</label>
|
||||
</fieldset>
|
||||
|
||||
<EnumSelect
|
||||
display-when="do_ocr"
|
||||
enum={ocr_engines_enum}
|
||||
selected={options.ocr_engine}
|
||||
name="ocr_engine"
|
||||
title="Engine"
|
||||
/>
|
||||
|
||||
<label display-when="do_ocr">Language</label>
|
||||
<input
|
||||
display-when="do_ocr"
|
||||
name="ocr_lang"
|
||||
dep-on="ocr_engine"
|
||||
dep-values={json.dumps(ocr_engine_languages)}
|
||||
pattern="[\w+]*[,\w+]*"
|
||||
title="A comma separated list of language codes, of which the format depends on the selected engine."
|
||||
/>
|
||||
|
||||
<label display-when="do_ocr">
|
||||
<input
|
||||
type="checkbox"
|
||||
name="force_ocr"
|
||||
checked={options.force_ocr}
|
||||
/>
|
||||
Force
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<fieldset>
|
||||
<label>
|
||||
<input
|
||||
type="checkbox"
|
||||
name="include_images"
|
||||
checked={options.include_images}
|
||||
/>
|
||||
<b>Images</b>
|
||||
</label>
|
||||
<label display-when="include_images">
|
||||
<input
|
||||
type="checkbox"
|
||||
name="do_picture_classification"
|
||||
checked={options.do_picture_classification}
|
||||
/>
|
||||
Classification
|
||||
</label>
|
||||
<label display-when="include_images">
|
||||
<input
|
||||
type="checkbox"
|
||||
name="do_picture_description"
|
||||
checked={options.do_picture_description}
|
||||
/>
|
||||
Description
|
||||
</label>
|
||||
<label display-when="include_images,do_picture_description">Area threshold</label>
|
||||
<input
|
||||
display-when="include_images,do_picture_description"
|
||||
type="number"
|
||||
name="picture_description_area_threshold"
|
||||
min={0}
|
||||
max={1}
|
||||
step={0.01}
|
||||
value={options.picture_description_area_threshold}
|
||||
/>
|
||||
</fieldset>
|
||||
|
||||
<EnumSelect
|
||||
display-when="include_images"
|
||||
enum={ImageRefMode}
|
||||
selected={options.image_export_mode}
|
||||
name="image_export_mode"
|
||||
title="Export"
|
||||
/>
|
||||
<label display-when="include_images">Scale</label>
|
||||
<input
|
||||
display-when="include_images"
|
||||
type="number"
|
||||
name="images_scale"
|
||||
min={0}
|
||||
step={0.1}
|
||||
value={options.images_scale}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<fieldset>
|
||||
<label>
|
||||
<input
|
||||
type="checkbox"
|
||||
name="do_table_structure"
|
||||
checked={options.do_table_structure}
|
||||
/>
|
||||
<b>Tables</b>
|
||||
</label>
|
||||
<label display-when="do_table_structure">
|
||||
<input
|
||||
type="checkbox"
|
||||
name="table_cell_matching"
|
||||
checked={options.table_cell_matching}
|
||||
/>
|
||||
Cell matching
|
||||
</label>
|
||||
</fieldset>
|
||||
<EnumSelect
|
||||
display-when="do_table_structure"
|
||||
enum={TableFormerMode}
|
||||
selected={options.table_mode}
|
||||
name="table_mode"
|
||||
title="Mode"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="sticky-footer">
|
||||
<input type="submit" value="Convert" />
|
||||
</div>
|
||||
</form>
|
||||
</main>
|
||||
</Page>
|
||||
)
|
||||
127
docling_serve/ui/forms.px
Normal file
127
docling_serve/ui/forms.px
Normal file
@@ -0,0 +1,127 @@
|
||||
from enum import Enum
|
||||
from typing import Type
|
||||
|
||||
from pyjsx import jsx, JSX
|
||||
|
||||
from docling.datamodel.pipeline_options import OcrOptions
|
||||
|
||||
from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions
|
||||
|
||||
|
||||
ocr_engine_languages = {
|
||||
SubOptions.kind: ",".join(SubOptions().lang)
|
||||
for SubOptions in OcrOptions.__subclasses__()
|
||||
}
|
||||
|
||||
|
||||
def _format_label(label: str) -> str:
|
||||
return label.replace("_", " ").lower()
|
||||
|
||||
|
||||
def option_example(field_name: str) -> str | None:
|
||||
field = ConvertDocumentsRequestOptions.model_fields[field_name]
|
||||
return (field.examples or [])[0]
|
||||
|
||||
|
||||
def ValidatedInput(validation: None | dict[str, str], name: str, **kwargs) -> JSX:
|
||||
if validation:
|
||||
invalid = "true" if name in validation else "false"
|
||||
content = [<input name={name} aria-invalid={invalid} {...kwargs} />]
|
||||
|
||||
if name in validation:
|
||||
content.append(<small>{validation[name]}</small>)
|
||||
|
||||
return <div>{content}</div>
|
||||
else:
|
||||
return <input name={name} {...kwargs} />
|
||||
|
||||
|
||||
def EnumCheckboxes(
|
||||
children,
|
||||
enum: Type[Enum],
|
||||
selected: list[Enum],
|
||||
name: str,
|
||||
title: JSX = None,
|
||||
**kwargs
|
||||
) -> JSX:
|
||||
return (
|
||||
<fieldset {...kwargs}>
|
||||
{
|
||||
<legend>{title}</legend>
|
||||
if title
|
||||
else None
|
||||
}
|
||||
|
||||
{[
|
||||
<label>
|
||||
<input
|
||||
type="checkbox"
|
||||
name={name}
|
||||
value={e.value}
|
||||
checked={e.value in selected}
|
||||
/>
|
||||
{_format_label(e.name)}
|
||||
</label>
|
||||
for e in enum
|
||||
]}
|
||||
</fieldset>
|
||||
)
|
||||
|
||||
|
||||
def EnumRadios(
|
||||
children,
|
||||
enum: Type[Enum],
|
||||
selected: Enum,
|
||||
name: str,
|
||||
title: JSX = None,
|
||||
**kwargs
|
||||
) -> JSX:
|
||||
return (
|
||||
<fieldset {...kwargs}>
|
||||
{
|
||||
<legend>{title}</legend>
|
||||
if title
|
||||
else None
|
||||
}
|
||||
|
||||
{[
|
||||
<label>
|
||||
<input
|
||||
type="radio"
|
||||
name={name}
|
||||
value={e.value}
|
||||
checked={e.value == selected}
|
||||
/>
|
||||
{_format_label(e.name)}
|
||||
</label>
|
||||
for e in enum
|
||||
]}
|
||||
</fieldset>
|
||||
)
|
||||
|
||||
|
||||
def EnumSelect(
|
||||
children,
|
||||
enum: Type[Enum],
|
||||
selected: Enum,
|
||||
name: str,
|
||||
title: JSX = None,
|
||||
**kwargs
|
||||
) -> JSX:
|
||||
return (
|
||||
<div {...kwargs}>
|
||||
{
|
||||
<label>{title}</label>
|
||||
if title
|
||||
else None
|
||||
}
|
||||
<select name={name}>
|
||||
{[
|
||||
<option value={e.value} selected={e.value == selected}>
|
||||
{_format_label(e.name)}
|
||||
</option>
|
||||
for e in enum
|
||||
]}
|
||||
</select>
|
||||
</div>
|
||||
)
|
||||
220
docling_serve/ui/pages.px
Normal file
220
docling_serve/ui/pages.px
Normal file
@@ -0,0 +1,220 @@
|
||||
from importlib import metadata
|
||||
|
||||
from fastapi import FastAPI, HTTPException, Response
|
||||
from pyjsx import jsx, JSX
|
||||
|
||||
from docling.datamodel.base_models import OutputFormat
|
||||
from docling.datamodel.pipeline_options import PdfBackend, ProcessingPipeline, TableFormerMode
|
||||
from docling_jobkit.datamodel.task import Task
|
||||
from docling_serve.datamodel.responses import ConvertDocumentResponse
|
||||
|
||||
from .preview import DocPreview
|
||||
|
||||
|
||||
def Header(children, classname: str = "") -> JSX:
|
||||
return (
|
||||
<header class={classname}>
|
||||
<span class="title">
|
||||
D<img src="/ui/static/logo.svg" />CLING SERVE
|
||||
</span>
|
||||
|
||||
<span class="version" title="Docling version">
|
||||
{metadata.version('docling')}
|
||||
</span>
|
||||
|
||||
<nav>
|
||||
<ul>
|
||||
<li><a href="/ui/convert">Convert</a></li>
|
||||
<li><a href="/ui/tasks/">Tasks</a></li>
|
||||
</ul>
|
||||
</nav>
|
||||
</header>
|
||||
)
|
||||
|
||||
|
||||
def Page(children, title: str, poll: bool = False) -> JSX:
|
||||
return (
|
||||
<html lang="en" id="root">
|
||||
<head>
|
||||
<title>{title}</title>
|
||||
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
|
||||
<link rel="stylesheet" href="/ui/static/style.css" />
|
||||
<script src="/ui/static/main.js" />
|
||||
</head>
|
||||
|
||||
<body onload={'setInterval(async () => { if ((await fetch("poll")).status == 200) location.reload(); }, 3000)' if poll else None}>
|
||||
{children}
|
||||
</body>
|
||||
</html>
|
||||
)
|
||||
|
||||
|
||||
def AuthPage():
|
||||
return (
|
||||
<Page title="Authenticate">
|
||||
<form method="post">
|
||||
<dialog open>
|
||||
<article>
|
||||
<header>
|
||||
<h4>Authenticate</h4>
|
||||
</header>
|
||||
<input
|
||||
type="password"
|
||||
name="api_key"
|
||||
placeholder="Enter API key"
|
||||
required autofocus
|
||||
/>
|
||||
<footer>
|
||||
<input type="submit" value="Confirm" />
|
||||
</footer>
|
||||
</article>
|
||||
</dialog>
|
||||
</form>
|
||||
</Page>
|
||||
)
|
||||
|
||||
|
||||
def TasksPage(tasks: list[Task]) -> JSX:
|
||||
return (
|
||||
<Page title="Tasks">
|
||||
<main class="container">
|
||||
<Header />
|
||||
|
||||
{(
|
||||
<p>There are no active tasks. <a href="../convert">Convert</a> a document to create a new task.</p>
|
||||
) if len(tasks) == 0 else (
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Task</th>
|
||||
<th>Status</th>
|
||||
<th>ID</th>
|
||||
<th>Created</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{
|
||||
<tr>
|
||||
<td>{task.task_type.name}</td>
|
||||
<td>{task.task_status.name}</td>
|
||||
<td>
|
||||
<a href={f"{task.task_id}/"}>{task.task_id}</a>
|
||||
</td>
|
||||
<td>{task.created_at.strftime("%d-%m-%Y, %H:%M:%S")}</td>
|
||||
</tr>
|
||||
for task in tasks
|
||||
}
|
||||
</tbody>
|
||||
</table>
|
||||
)}
|
||||
</main>
|
||||
</Page>
|
||||
)
|
||||
|
||||
|
||||
def TaskPage(poll, task: ConvertDocumentResponse) -> JSX:
|
||||
def PlainPage(children, poll = False) -> JSX:
|
||||
return (
|
||||
<Page title="Task" poll={poll}>
|
||||
<main class="container">
|
||||
<Header classname={"loading" if poll else None} />
|
||||
{children}
|
||||
</main>
|
||||
</Page>
|
||||
)
|
||||
|
||||
if isinstance(task, Response):
|
||||
return (
|
||||
<PlainPage>
|
||||
<p>
|
||||
<ins>Converted multiple documents successfully</ins>
|
||||
</p>
|
||||
<a href="documents.zip">documents.zip</a>
|
||||
</PlainPage>
|
||||
)
|
||||
else:
|
||||
match poll.task_status:
|
||||
case "success":
|
||||
doc = task.document.dict()
|
||||
doc_json = task.document.json_content
|
||||
|
||||
return (
|
||||
<Page title={task.document.filename}>
|
||||
<main class="preview">
|
||||
<Header />
|
||||
|
||||
<div class="status">
|
||||
<div>
|
||||
<span>Task</span>
|
||||
<b>{poll.task_id}</b>
|
||||
</div>
|
||||
<div>
|
||||
<span>converted</span>
|
||||
<b>{task.document.filename}</b>
|
||||
</div>
|
||||
<div>
|
||||
<span>in</span>
|
||||
<b>{round(task.processing_time)} seconds</b>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="formats">
|
||||
{[
|
||||
<a class="secondary" href={f"document.{f.value}"} target="_blank">
|
||||
<button>{f.name}</button>
|
||||
</a>
|
||||
for f in OutputFormat
|
||||
if doc.get(f"{f.value}_content")
|
||||
]}
|
||||
<label class="configDarkImg">
|
||||
<input type="checkbox" name="invert-images" persist="preview" />
|
||||
Invert images
|
||||
</label>
|
||||
</div>
|
||||
|
||||
{
|
||||
<DocPreview doc={doc_json} />
|
||||
if doc_json
|
||||
else (<p>No document preview because JSON is missing as an output format.</p>)
|
||||
}
|
||||
</main>
|
||||
</Page>
|
||||
)
|
||||
case "started":
|
||||
return (
|
||||
<PlainPage poll>
|
||||
<p class="progress">Task <b>{poll.task_id}</b> is in progress...</p>
|
||||
<progress />
|
||||
</PlainPage>
|
||||
)
|
||||
case _:
|
||||
return (
|
||||
<PlainPage>
|
||||
<p class="fail">
|
||||
Task <b>{poll.task_id}</b> failed.
|
||||
</p>
|
||||
<button onclick="history.back()">
|
||||
Go back
|
||||
</button>
|
||||
</PlainPage>
|
||||
)
|
||||
|
||||
|
||||
def StatusPage(ex: HTTPException, referer: str | None) -> JSX:
|
||||
return (
|
||||
<Page title={ex.status_code}>
|
||||
<main class="container">
|
||||
<Header />
|
||||
<h4>{ex.status_code}</h4>
|
||||
<p>{ex.detail}</p>
|
||||
<p>
|
||||
<a href={referer or ".."}>
|
||||
<button>Go back</button>
|
||||
</a>
|
||||
</p>
|
||||
</main>
|
||||
</Page>
|
||||
)
|
||||
310
docling_serve/ui/preview.px
Normal file
310
docling_serve/ui/preview.px
Normal file
@@ -0,0 +1,310 @@
|
||||
from collections import defaultdict
|
||||
from html import escape
|
||||
from typing import Type
|
||||
|
||||
from docling_core.types.doc.document import (
|
||||
BaseAnnotation,
|
||||
CodeItem,
|
||||
ContentLayer,
|
||||
DescriptionAnnotation,
|
||||
DoclingDocument,
|
||||
DocItem,
|
||||
FloatingItem,
|
||||
FormulaItem,
|
||||
GroupItem,
|
||||
GroupLabel,
|
||||
ListGroup,
|
||||
ListItem,
|
||||
NodeItem,
|
||||
PictureClassificationData,
|
||||
PictureItem,
|
||||
ProvenanceItem,
|
||||
RefItem,
|
||||
SectionHeaderItem,
|
||||
TableCell,
|
||||
TableItem,
|
||||
TextItem,
|
||||
TitleItem
|
||||
)
|
||||
from pyjsx import jsx, JSX, JSXComponent
|
||||
|
||||
from .svg import circle, clipPath, image, path, rect, text
|
||||
|
||||
|
||||
_node_components: dict[str, JSXComponent] = {}
|
||||
|
||||
|
||||
def component(*node_types: list[Type[BaseAnnotation | NodeItem]]):
|
||||
def decorator(component):
|
||||
for t in node_types:
|
||||
_node_components[t.__name__] = component
|
||||
return decorator
|
||||
|
||||
|
||||
def AnnotationComponent(children, annotation: BaseAnnotation):
|
||||
Comp = _node_components.get(annotation.__class__.__name__)
|
||||
element = Comp(annotation=annotation, children=[]) if Comp else (
|
||||
<code>{escape(annotation.model_dump_json(indent=2))}</code>
|
||||
)
|
||||
|
||||
element.props["class"] = element.props.get("class", "") + " annotation"
|
||||
element.props["data-kind"] = annotation.kind
|
||||
|
||||
return element
|
||||
|
||||
|
||||
def NodeComponent(children, node: NodeItem | RefItem, doc: DoclingDocument):
|
||||
# Specific component or fallback.
|
||||
Comp = _node_components.get(node.__class__.__name__)
|
||||
element = Comp(node=node, doc=doc, children=[]) if Comp else (
|
||||
<span class="void"></span>
|
||||
)
|
||||
|
||||
# Wrap item component with annotations, if any.
|
||||
if isinstance(node, DocItem) and (anns := node.get_annotations()):
|
||||
element = (
|
||||
<div>
|
||||
{element}
|
||||
{[<AnnotationComponent annotation={ann} /> for ann in anns]}
|
||||
</div>
|
||||
)
|
||||
|
||||
# Extend interaction and styling.
|
||||
id = node.self_ref[2:]
|
||||
element.props["id"] = id
|
||||
element.props["onclick"] = "clickId(event)"
|
||||
element.props["class"] = element.props.get("class", "") + f" item {node.content_layer.value}"
|
||||
|
||||
return element
|
||||
|
||||
|
||||
def node_provs(node: NodeItem, doc: DoclingDocument) -> ProvenanceItem:
|
||||
return node.prov if isinstance(node, DocItem) else [
|
||||
p
|
||||
for c in node.children
|
||||
if isinstance(c.resolve(doc), DocItem)
|
||||
for p in c.resolve(doc).prov
|
||||
]
|
||||
|
||||
|
||||
def DocPage(children, page_no: int, items: list[NodeItem], doc: DoclingDocument):
|
||||
page = doc.pages[page_no]
|
||||
exclusive_items = [
|
||||
item
|
||||
for item in items
|
||||
if min([p.page_no for p in node_provs(item, doc)]) == page_no
|
||||
]
|
||||
|
||||
comps = []
|
||||
for i in range(len(exclusive_items)):
|
||||
item = exclusive_items[i]
|
||||
id = item.self_ref[2:]
|
||||
kind, *index = id.split("/")
|
||||
|
||||
parent_class = ""
|
||||
if isinstance(item, GroupItem):
|
||||
parent_class = "group"
|
||||
else:
|
||||
parent = item.parent.resolve(doc)
|
||||
if isinstance(parent, GroupItem) and parent.label is not GroupLabel.UNSPECIFIED:
|
||||
parent_class = "grouped"
|
||||
|
||||
comps.append(
|
||||
<div class={f"item-markers {parent_class} {item.content_layer.value}"} data-id={id}>
|
||||
<span>{"/".join(index)}</span>
|
||||
<span>{item.label.replace("_", " ")}</span>
|
||||
{
|
||||
<span>{item.content_layer.value.replace("_", " ")}</span>
|
||||
if item.content_layer is not ContentLayer.BODY
|
||||
else None
|
||||
}
|
||||
<a href={f"document/{id}"} target="_blank">{"{;}"}</a>
|
||||
</div>
|
||||
)
|
||||
comps.append(<NodeComponent node={item} doc={doc} />)
|
||||
|
||||
pages = set([p.page_no for p in node_provs(item, doc)])
|
||||
page_mark_class = "page-marker"
|
||||
if i == 0 or len(pages) > 1:
|
||||
page_mark_class += " border"
|
||||
comps.append(<div class={page_mark_class}></div>)
|
||||
|
||||
|
||||
def ItemBox(children, item: DocItem, prov: ProvenanceItem):
|
||||
item_id = item.self_ref[2:]
|
||||
sub_items = [
|
||||
(item_id, prov.bbox.to_top_left_origin(page.size.height))
|
||||
]
|
||||
|
||||
# Table cells.
|
||||
if isinstance(item, TableItem):
|
||||
for cell in item.data.table_cells:
|
||||
sub_items.append(
|
||||
(f"{item_id}/{cell.start_col_offset_idx}/{cell.start_row_offset_idx}", cell.bbox)
|
||||
)
|
||||
|
||||
return [
|
||||
<rect
|
||||
data-id={id}
|
||||
x={bbox.l - 1}
|
||||
y={bbox.t - 1}
|
||||
width={bbox.width + 2}
|
||||
height={bbox.height + 2}
|
||||
vector-effect="non-scaling-stroke"
|
||||
onclick="clickId(event)"
|
||||
/>
|
||||
for id, bbox in sub_items
|
||||
]
|
||||
|
||||
# Span extra row to fill up excess space.
|
||||
comps.append(
|
||||
<svg
|
||||
class="page-image"
|
||||
style={{ "grid-row": f"span {len(exclusive_items) + 1}" }}
|
||||
width="50vw"
|
||||
viewBox={f"0 0 {page.size.width} {page.size.height}"}
|
||||
>
|
||||
<image
|
||||
href={f"document/pages/{page_no}"}
|
||||
width={page.size.width}
|
||||
height={page.size.height}
|
||||
/>
|
||||
{[
|
||||
<ItemBox item={item} prov={prov} />
|
||||
for item in items
|
||||
if isinstance(item, DocItem)
|
||||
for prov in item.prov
|
||||
if prov.page_no == page_no
|
||||
]}
|
||||
|
||||
<text class="top-no" x={5} y={5}>{page_no}</text>
|
||||
<text class="bottom-no" x={5} y={page.size.height - 5}>{page_no}</text>
|
||||
</svg>
|
||||
)
|
||||
|
||||
return <div class="page">{comps}</div>
|
||||
|
||||
|
||||
def DocPreview(children, doc: DoclingDocument):
|
||||
page_items: dict[int, list[NodeItem]] = defaultdict(list)
|
||||
|
||||
for item, level in doc.iterate_items(
|
||||
with_groups=True,
|
||||
included_content_layers={*ContentLayer}
|
||||
):
|
||||
if not isinstance(item, GroupItem) or item.label is not GroupLabel.UNSPECIFIED:
|
||||
pages = set([p.page_no for p in node_provs(item, doc)])
|
||||
for page in pages:
|
||||
page_items[page].append(item)
|
||||
|
||||
return [
|
||||
<DocPage page_no={page_no} items={page_items[page_no]} doc={doc} />
|
||||
for page_no in sorted(page_items.keys())
|
||||
]
|
||||
|
||||
|
||||
@component(TextItem)
|
||||
def TextComponent(children, node: TextItem, doc: DoclingDocument):
|
||||
return <p class={node.label}>{escape(node.text)}</p>
|
||||
|
||||
|
||||
@component(TitleItem)
|
||||
def TitleComponent(children, node: TitleItem, doc: DoclingDocument):
|
||||
return <h1>{escape(node.text)}</h1>
|
||||
|
||||
|
||||
@component(SectionHeaderItem)
|
||||
def SectionHeaderComponent(children, node: SectionHeaderItem, doc: DoclingDocument):
|
||||
return <h4>{escape(node.text)}</h4>
|
||||
|
||||
|
||||
@component(ListItem)
|
||||
def ListComponent(children, node: ListItem, doc: DoclingDocument):
|
||||
return <li><b>{node.marker}</b> {escape(node.text)}</li>
|
||||
|
||||
|
||||
@component(CodeItem)
|
||||
def CodeComponent(children, node: CodeItem, doc: DoclingDocument):
|
||||
return <figure><code>{escape(node.text or node.orig)}</code></figure>
|
||||
|
||||
|
||||
@component(FormulaItem)
|
||||
def FormulaComponent(children, node: FormulaItem, doc: DoclingDocument):
|
||||
return <figure><code>{escape(node.text or node.orig)}</code></figure>
|
||||
|
||||
|
||||
@component(PictureItem)
|
||||
def PictureComponent(children, node: PictureItem, doc: DoclingDocument):
|
||||
return <figure><img src={f"document/{node.self_ref[2:]}"} loading="lazy" /></figure>
|
||||
|
||||
|
||||
@component(PictureClassificationData)
|
||||
def PictureClassificationComponent(children, annotation: PictureClassificationData):
|
||||
classes = annotation.predicted_classes[:5]
|
||||
return (
|
||||
<div>
|
||||
{[
|
||||
<div
|
||||
style={{ "width": f"{cls.confidence * 100}%" }}
|
||||
title={f"{cls.class_name}: {cls.confidence:.2f}"}
|
||||
>
|
||||
{f"{cls.class_name.replace("_", " ")} {cls.confidence:.2f}"}
|
||||
</div>
|
||||
for cls in classes
|
||||
]}
|
||||
</div>
|
||||
)
|
||||
|
||||
|
||||
@component(DescriptionAnnotation)
|
||||
def DescriptionAnnotation(children, annotation: DescriptionAnnotation):
|
||||
return <div>{escape(annotation.text)}</div>
|
||||
|
||||
|
||||
@component(TableItem)
|
||||
def TableComponent(children, node: TableItem, doc: DoclingDocument):
|
||||
covered_cells: set[(int, int)] = set()
|
||||
|
||||
def check_cover(cell: TableCell):
|
||||
is_covered = (cell.start_col_offset_idx, cell.start_row_offset_idx) in covered_cells
|
||||
|
||||
if not is_covered:
|
||||
for x in range(cell.start_col_offset_idx, cell.end_col_offset_idx):
|
||||
for y in range(cell.start_row_offset_idx, cell.end_row_offset_idx):
|
||||
covered_cells.add((x, y))
|
||||
|
||||
return is_covered
|
||||
|
||||
def Cell(children, cell: TableCell):
|
||||
id = f"{node.self_ref[2:]}/{cell.start_col_offset_idx}/{cell.start_row_offset_idx}"
|
||||
|
||||
return (
|
||||
<td
|
||||
id={id}
|
||||
class={"header" if cell.column_header or cell.row_header else None}
|
||||
colspan={cell.col_span or 1}
|
||||
rowspan={cell.row_span or 1}
|
||||
onclick="clickId(event)"
|
||||
>
|
||||
{escape(cell.text)}
|
||||
</td>
|
||||
)
|
||||
|
||||
return (
|
||||
<div class="table">
|
||||
<table>
|
||||
<tbody>
|
||||
{[
|
||||
<tr>
|
||||
{[
|
||||
<Cell cell={cell} />
|
||||
for cell in row
|
||||
if not check_cover(cell)
|
||||
]}
|
||||
</tr>
|
||||
for row in node.data.grid
|
||||
]}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
)
|
||||
116
docling_serve/ui/static/logo.svg
Normal file
116
docling_serve/ui/static/logo.svg
Normal file
@@ -0,0 +1,116 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||
<svg width="100%" height="100%" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:1.5;">
|
||||
<g id="Docling" transform="matrix(1.07666,0,0,1.07666,-35.9018,-84.1562)">
|
||||
<g id="Outline" transform="matrix(1,0,0,1,-0.429741,55.0879)">
|
||||
<path d="M394.709,69.09C417.34,35.077 467.97,30.178 478.031,55.609C486.35,55.043 494.726,54.701 503.158,54.589C533.157,45.238 560.496,47.419 584.65,60.732C800.941,96.66 966.069,284.814 966.069,511.232C966.069,763.284 761.435,967.918 509.383,967.918C433.692,967.918 362.277,949.464 299.385,916.808L242.3,931.993C203.092,943.242 187.715,928.369 208.575,891.871C208.935,891.24 216.518,879.37 223.997,867.677C119.604,783.975 52.698,655.355 52.698,511.232C52.698,298.778 198.086,120.013 394.709,69.09Z" style="fill:white;"/>
|
||||
</g>
|
||||
<g id="Color" transform="matrix(1.02317,0,0,1.02317,-11.55,-17.8333)">
|
||||
<path d="M284.8,894.232L179.735,783.955L130.222,645.203L125.538,504.726L185.211,385.816C209.006,322.738 249.951,278.973 302.281,248.028L406.684,203.333L413.483,175.767L436.637,152.428L451.408,153.312L457.726,183.183L485.164,165.379L526.92,159.699L557.014,177.545L612.652,211.018C679.009,226.066 740.505,264.146 797.138,325.26L862.813,423.477L891.583,560.826L883.273,683.32L814.268,809.924L734.431,894.384L644.495,926.906L497.146,954.121L361.064,940.647L284.8,894.232Z" style="fill:url(#_Linear1);"/>
|
||||
<path d="M699.932,887.255L634.427,825.291L597.884,782.352L594.906,738.956L610.14,709.396L643.207,699.954L685,710.111L730.425,736.425L765.204,778.79L775.166,849.531L719.381,894.082L699.932,887.255Z" style="fill:url(#_Linear2);"/>
|
||||
<g transform="matrix(-0.765945,0,0,1,839.727,5.47434)">
|
||||
<clipPath id="_clip3">
|
||||
<path d="M699.932,887.255L634.427,825.291L597.884,782.352L594.906,738.956L610.14,709.396L643.207,699.954L685,710.111L730.425,736.425L765.204,778.79L775.166,849.531L719.381,894.082L699.932,887.255Z"/>
|
||||
</clipPath>
|
||||
<g clip-path="url(#_clip3)">
|
||||
<g transform="matrix(-1.18516,0,0,0.907769,1039.04,88.3496)">
|
||||
<use xlink:href="#_Image4" x="223.969" y="674.21" width="152.098px" height="213.852px" transform="matrix(0.994105,0,0,0.999308,0,0)"/>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
<path d="M311.699,713.521C189.178,639.091 164.299,526.77 191.824,394.113L135.136,476.434L122.004,547.53C143.022,614.014 174.522,676.199 225.005,730.598C210.601,754.156 201.894,776.601 197.955,798.114L245.803,841.67C247.274,812.1 254.934,783.047 270.614,754.664L311.699,713.521Z" style="fill-opacity:0.22;"/>
|
||||
<g transform="matrix(-1,0,0,1,1022.04,2.74442)">
|
||||
<path d="M311.699,713.521C189.178,639.091 164.299,526.77 191.824,394.113L135.136,476.434L122.004,547.53C143.022,614.014 174.522,676.199 225.005,730.598C210.601,754.156 201.894,776.601 197.955,798.114L245.803,841.67C247.274,812.1 254.934,783.047 270.614,754.664L311.699,713.521Z" style="fill-opacity:0.22;"/>
|
||||
</g>
|
||||
<path d="M354.92,650.818L420.009,663.185L493.368,666.379L554.826,665.251L620.19,658.511L658.169,651.428L671.428,644.802L673.265,627.093L659.898,611.845L625.422,609.244L599.275,591.212L568.632,556.79L542.9,534.336L515.052,528.253L480.412,532.71L455.2,552.337L428.514,578.155L405.312,599.359L374.228,612.097L355.342,614.456L340.75,630.308L341.568,645.341L354.92,650.818Z" style="fill:url(#_Linear5);"/>
|
||||
<path d="M257.168,949.32L317.434,876.747L364.928,810.6L384.1,743.934L378.759,714.719L376.844,685.849L374.836,659.954L448.734,664.2L511.462,667.602L571.339,665.091L632.796,658.836L648.232,656.882L649.937,697.808L608.105,717.702L598.45,738.594L592.286,761.642L604.743,796.309L639.595,825.803L649.872,840.757L558.219,895.152L502.124,907.569L425.781,923.496L333.29,931.298L286.269,936.907L257.168,949.32Z" style="fill:url(#_Linear6);"/>
|
||||
<g transform="matrix(1,0,0,1.30081,-1.77636e-15,-196.488)">
|
||||
<path d="M374.165,685.268C463.946,706.599 553.728,707.491 643.51,688.593L641.903,653.199C549.263,671.731 459.645,672.22 373.059,654.611L374.165,685.268Z" style="fill-opacity:0.18;"/>
|
||||
</g>
|
||||
<path d="M459.633,571.457C476.7,536.091 530.064,535.913 553.1,568.767C520.703,551.407 489.553,552.374 459.633,571.457Z" style="fill:white;"/>
|
||||
<g transform="matrix(1,0,0,1,0.223468,-2.61949)">
|
||||
<path d="M355.3,267.232C500.64,173.156 720.699,241.362 793.691,423.582C766.716,384.84 735.725,357.078 697.53,349.014L717.306,335.248C698.537,321.49 675.794,320.957 651.039,327.119C652.235,315.768 658.995,306.991 674.188,302.115C641.864,287.427 617.356,289.473 596.258,298.818C597.049,286.116 605.827,278.087 620.068,273.254C589.192,267.477 564.13,270.926 544.651,283.232C545.822,271.831 550.709,260.943 560.913,250.79C517.498,257.095 492.995,267.925 482.892,282.202C477.311,269.499 477.274,257.221 487.625,245.739C439.161,252.932 421.555,265.094 410.355,278.286C407.697,269.01 407.705,260.632 410.853,253.316C389.633,254.773 372.178,260.663 355.3,267.232Z" style="fill:rgb(255,213,95);"/>
|
||||
</g>
|
||||
<path d="M475.656,209.175C479.639,175.037 503.437,173.299 532.412,180.026C507.242,183.404 486.969,195.251 473.705,219.215L475.656,209.175Z" style="fill:rgb(255,215,101);"/>
|
||||
<g transform="matrix(0.114323,-0.655229,0.82741,0.144365,224.632,497.317)">
|
||||
<path d="M475.656,209.175C479.639,175.037 503.437,173.299 532.412,180.026C507.242,183.404 486.969,195.251 473.705,219.215L475.656,209.175Z" style="fill:rgb(255,215,101);"/>
|
||||
</g>
|
||||
<g transform="matrix(1.6739,1.15217e-16,-1.15217e-16,-0.733075,-341.46,1039.77)">
|
||||
<path d="M447.449,560.911C468.179,536.963 546.237,539.305 565.638,560.831C533.166,555.541 477.296,553.494 447.449,560.911Z" style="fill:white;"/>
|
||||
</g>
|
||||
<path d="M348.201,622.341C395.549,653.534 622.351,660.854 661.936,616.729L677.568,633.834L667.044,650.308L557.802,667.518L498.074,670.562L446.718,666.416L391.404,658.406L348.154,652.501L340.161,637.119L348.201,622.341Z" style="fill:rgb(199,68,6);"/>
|
||||
</g>
|
||||
<g id="Black-outline" serif:id="Black outline" transform="matrix(1.02317,0,0,1.02317,-11.55,-17.8333)">
|
||||
<path d="M373.389,657.919C376.285,676.334 377.04,695.016 375.326,714.008" style="fill:none;stroke:black;stroke-width:15.73px;"/>
|
||||
<path d="M645.931,654.961C646.158,669.958 647.22,684.853 648.975,699.661" style="fill:none;stroke:black;stroke-width:15.73px;"/>
|
||||
<path d="M290.084,534.662C276.554,533.535 264.892,530.024 254.279,525.175C276.732,555.341 305.316,569.76 338.631,572.029L290.084,534.662Z"/>
|
||||
<g transform="matrix(0.94177,0,0,0.94909,28.8868,3.79501)">
|
||||
<ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
|
||||
</g>
|
||||
<g transform="matrix(0.112099,0.0552506,-0.0673118,0.136571,455.367,509.409)">
|
||||
<ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
|
||||
</g>
|
||||
<g transform="matrix(-0.112099,0.0552506,0.0673118,0.136571,560.529,509.492)">
|
||||
<ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
|
||||
</g>
|
||||
<g transform="matrix(-1,0,0,1,1013.33,-1.15187)">
|
||||
<path d="M290.084,534.662C276.554,533.535 264.892,530.024 254.279,525.175C276.732,555.341 305.316,569.76 338.631,572.029L290.084,534.662Z"/>
|
||||
</g>
|
||||
<g transform="matrix(-0.94177,0,0,0.94909,984.44,2.64314)">
|
||||
<ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
|
||||
</g>
|
||||
<g transform="matrix(1,0,0,1,1.9047,-5.57346)">
|
||||
<path d="M277.021,489.604C279.828,554.545 355.855,583.508 405.306,537.851C354.458,599.537 263.881,560.914 277.021,489.604Z" style="fill:white;"/>
|
||||
</g>
|
||||
<g transform="matrix(-1,0,0,1,1011.43,-5.7284)">
|
||||
<path d="M277.021,489.604C279.828,554.545 355.855,583.508 405.306,537.851C354.458,599.537 263.881,560.914 277.021,489.604Z" style="fill:white;"/>
|
||||
</g>
|
||||
<g transform="matrix(0.973815,0,0,1.00246,4.71761,-0.508759)">
|
||||
<path d="M407.22,206.891C107.655,339.384 134.447,630.03 314.615,708.305" style="fill:none;stroke:black;stroke-width:29.39px;"/>
|
||||
</g>
|
||||
<g transform="matrix(-0.973815,0,0,1.00246,1006.67,-1.31695)">
|
||||
<path d="M461.559,196.756C119.768,256.762 111.059,642.544 320.305,711.486" style="fill:none;stroke:black;stroke-width:29.39px;"/>
|
||||
</g>
|
||||
<g id="vector-duck" serif:id="vector duck">
|
||||
<path d="M240.912,850.71C248.043,740.231 325.609,685.992 371.268,715.193C386.487,724.926 392.506,757.72 358.575,816.753C327.005,871.68 300.465,894.596 288.329,903.447" style="fill:none;stroke:black;stroke-width:21.79px;"/>
|
||||
<path d="M638.382,843.426C427.991,964.695 389.022,902.942 251.512,947.641L307.759,889.573" style="fill:none;stroke:black;stroke-width:15.73px;"/>
|
||||
<path d="M770.991,853.754C779.364,764.998 730.67,727.923 666.385,704.966C629.568,691.819 580.483,723.886 595.974,772.596C606.285,805.016 650.54,839.029 707.786,886.778" style="fill:none;stroke:black;stroke-width:21.79px;"/>
|
||||
<g transform="matrix(1,0,0,1,-1.87208,0.908099)">
|
||||
<path d="M603.287,772.415C614.237,757.963 627.553,750.285 642.878,748.352C628.356,760.968 617.23,775.676 620.632,799.336C635.815,785.15 650.367,779.457 664.396,780.801C651.715,790.7 639.329,803.279 641.039,818.089C641.247,819.891 647.043,823.996 647.595,825.837C659.897,816.37 672.867,811.065 689.234,809.472C676.577,822.659 668.021,834.011 674.478,848.729L664.333,847.825L625.643,812.604L603.629,786.218L603.287,772.415Z"/>
|
||||
</g>
|
||||
<g transform="matrix(-0.969851,0.2437,0.2437,0.969851,773.329,-138.212)">
|
||||
<path d="M603.287,772.415C614.237,757.963 627.553,750.285 642.878,748.352C628.356,760.968 617.23,775.676 620.632,799.336C635.815,785.15 650.367,779.457 664.396,780.801C651.715,790.7 639.329,803.279 641.039,818.089C641.247,819.891 647.043,823.996 647.595,825.837C659.897,816.37 672.867,811.065 689.234,809.472C676.577,822.659 668.021,834.011 674.478,848.729L664.333,847.825L625.643,812.604L603.629,786.218L603.287,772.415Z"/>
|
||||
</g>
|
||||
<path d="M511.787,670.044C461.061,671.835 411.878,662.84 361.322,653.92C329.071,648.229 335.56,616.432 361.693,615.181C391.498,613.754 411.83,601.737 437.593,569.084C459.063,541.872 482.443,528.143 506.834,529.767" style="fill:none;stroke:black;stroke-width:15.73px;"/>
|
||||
<g transform="matrix(-1,0,0,1,1014.44,-0.213451)">
|
||||
<path d="M511.787,670.044C461.061,671.835 411.878,662.84 361.322,653.92C329.071,648.229 335.56,616.432 361.693,615.181C391.498,613.754 411.83,601.737 437.593,569.084C459.063,541.872 482.443,528.143 506.834,529.767" style="fill:none;stroke:black;stroke-width:15.73px;"/>
|
||||
</g>
|
||||
</g>
|
||||
<g transform="matrix(2.4586,0,0,2.5497,-444.527,-690.434)">
|
||||
<ellipse cx="312.566" cy="450.751" rx="10.63" ry="10.48" style="fill:white;"/>
|
||||
</g>
|
||||
<g transform="matrix(2.4586,0,0,2.5497,-127.75,-690.991)">
|
||||
<ellipse cx="312.566" cy="450.751" rx="10.63" ry="10.48" style="fill:white;"/>
|
||||
</g>
|
||||
<path d="M505.738,698.061L578.879,713.989" style="fill:none;stroke:black;stroke-width:12.1px;"/>
|
||||
<path d="M422.781,709.6L568.438,743.041" style="fill:none;stroke:black;stroke-width:12.1px;"/>
|
||||
<path d="M419.941,738.409L565.688,772.989" style="fill:none;stroke:black;stroke-width:12.1px;"/>
|
||||
<path d="M408.6,787.08L510.634,810.689" style="fill:none;stroke:black;stroke-width:12.1px;"/>
|
||||
<path d="M397.571,815.956L500.93,840.219" style="fill:none;stroke:black;stroke-width:12.1px;"/>
|
||||
<path d="M386.763,844.926L454.065,861.974" style="fill:none;stroke:black;stroke-width:12.1px;"/>
|
||||
<path d="M459.169,919.169C512.194,898.262 539.171,867.298 535.241,824.402C568.052,818.31 598.499,817.058 625.84,822.165" style="fill:none;stroke:black;stroke-width:16.95px;"/>
|
||||
<path d="M366.219,241.106C389.605,229.261 413.371,220.601 438.247,217.5C416.795,202.419 418.72,174.582 444.22,162.47C442.086,178.175 447.633,193.354 464.772,207.738C468.721,167.57 530.015,162.087 545.674,184.112C526.45,189.314 513.082,197.344 504.566,207.717C522.403,208.119 540.706,207.86 556.2,210.609L566.935,168.471C536.388,146.208 495.718,142.166 464.65,166.705C467.703,133.264 419.536,128.364 404.624,178.47L366.219,241.106Z"/>
|
||||
<path d="M392.617,924.576C428.953,936.938 467.84,943.636 508.258,943.636C708.944,943.636 871.876,778.49 871.876,575.076C871.876,382.463 725.788,224.162 539.898,207.895L554.137,173.696L554.485,168.187C757.218,191.602 914.895,366.003 914.895,577.383C914.895,804.698 732.549,989.249 507.949,989.249C435.381,989.249 367.223,969.983 308.199,936.232L392.617,924.576ZM279.206,917.988C171.663,843.819 101.002,718.887 101.002,577.383C101.002,383.006 234.333,219.898 413.398,176.712L424.375,216.389C264.082,254.803 144.64,400.913 144.64,575.076C144.64,703.735 209.822,817.086 308.514,883.023L279.206,917.988Z"/>
|
||||
<path d="M714.938,895.223L647.287,836.693L616.06,855.308L549.158,889.412L459.845,919.216L390.213,928.828L429.291,950.712L535.832,960.1L586.137,952.591L662.254,931.896L714.938,895.223Z"/>
|
||||
<path d="M423.538,929.39C509.164,917.593 580.815,890.465 640.827,850.566C635.677,886.828 622.639,918.218 594.006,939.977C530.254,930.953 474.955,928.632 423.538,929.39Z" style="fill:url(#_Linear7);"/>
|
||||
</g>
|
||||
</g>
|
||||
<defs>
|
||||
<linearGradient id="_Linear1" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(-52.3962,375.121,-375.121,-52.3962,471.134,384.463)"><stop offset="0" style="stop-color:rgb(255,176,44);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(255,73,2);stop-opacity:1"/></linearGradient>
|
||||
<linearGradient id="_Linear2" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(28.6198,-84.8913,84.8913,28.6198,647.831,831.55)"><stop offset="0" style="stop-color:rgb(255,73,2);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(255,176,44);stop-opacity:1"/></linearGradient>
|
||||
<image id="_Image4" width="153px" height="214px" xlink:href="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcHBw8LCwkMEQ8SEhEPERETFhwXExQaFRERGCEYGh0dHx8fExciJCIeJBweHx7/2wBDAQUFBQcGBw4ICA4eFBEUHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh7/wAARCADWAJkDAREAAhEBAxEB/8QAGQABAQEBAQEAAAAAAAAAAAAAAwACBwYF/8QAGBABAQEBAQAAAAAAAAAAAAAAAgABEhH/xAAbAQADAQADAQAAAAAAAAAAAAAAAQMCBAUGB//EABYRAQEBAAAAAAAAAAAAAAAAAAABEf/aAAwDAQACEQMRAD8A63fAX1BQFAUBQFAUBQFAUBQFAZShqQSUNyBSmpIJK0pIJqakgUptyCampIx1DWPS0XSqAoCgKAoCgKAoCgKAwlDUgkobkE1aVkClNuQbU1JAtTUkElNSQTU25GOptY9Vcd0KgKAoCgKAoCgKAoDDUNSCSmpIJqakgUptyCampIJKakgWpqSCSm3IJKakjHU2sewuM86oCgKAoCgKAoCgMJQ1IJqakgWpqSCSmpIJqbcgUpqSCSmpIJqbcgmpqSCSmpIx1PGse1uK8yoCgKAoCgKAoA2obkGlNuQLU1JBJTUkElPG5AtTUkElNSQSU1JBNTbkClNSQSU1JGOptY93cR5VQFAUBQFAUAbUNyCam3IJKakgmpqSBampIJKbcgmpqSBampIJKakgmptyBampINKakjHU8ax0C4byKgKAoCgLd8gDShuQTU25AtTVkElNuQTU1JBNTUkElNuQLU1JBNTUkElNSQLU25BJWlJBJTUkY6hrHRrhPGqAoCgLd8gDahuQSU1JAtTUkE1NuQSU1JBNTUkClNSQSU25BNTUkC1NSQSVpSQSUNyCatKSBSmpIx1DWOmXBeJUBQFu+QBtQ3IFqakgkpqSCam3IFqakgkpqSCampIJqbcgUpqSCampIJq0pIJKakgWptyCampIJKakjHU2sdRuveFUBbvkASUNyCSmpIJqakgkpqSBam3IJqakgkpqSCam3IFqakgmpqSCampIFq0pIJKbcgkpqSBSmpIJKakjHUNY6vde8Ct3yAJKG5BNTUkE1NSQLU1JBJTUkE1NuQLU1JBJWlJBJQpIJq03IFKakgkp4pIJqakgmptyBSmpIJqakgkpqSMdQeOt7vl1z5/INKG5BNTbkClPFJBJTUkE1NSQKU1JBJTbkE1NSQLU1JBtTbkC1aUkE1NSQSU1JAtTUkElNuQSU1JBJTUkC1NSRjqbWOupXWPnsgmpqSBSmpIJqbcgkpqSBampIJK0pIJKbcgWoUkE1aUkElNSQTU25ApTUkElNSQSU25AtTUkElNSQTU1JApTUkZ6g8dcautfPpBJTUkE1NSQLU25BJTUkE1aUkC1NuQSU1JBNTUkClNSQSU25BJTUkE1NSQSU1JAtTbkE1NSQSU8UkClNSQe77NtQHWErrXgJBJTUkE1NuQSU8UkClNSQTVpuQSU1JBJTUkC1NSQTU1JBJTbkE1NSQKU1JBJTUkElNuQLU1JBJTUkHu+zbUBQHU2rrnhJBJWlJAtTbkElNSQTU1JBJTbkC1NSQSU1JBNTUkElNuQKU1JBJTUkE1NSQSU1JAtTbkElNSQe77NtQFAUB01q694iQSU1JBNWm5BNTUkClNSQTU25BJTUkElNSQKU25BNTVkElNuQTU1JApTUkElNSQSU25B7vs21AUBQFAdIauC8ZIJKeNyCampIFKakgmp4pIJKbcgWpqSCSnikgmpqSCSm3IFKakgkpqSCampIFKakjG77NpQFAUBQFAdCauE8fIJKakgkpqSCampIFKakgkptyCSmpIFqakg0ptyBampIJqakgWpqSCSmpIxNpQFAUBQFAUB71q4bycgkpqSBampIJKakgmpqSCSm3IFqakgkpqSCSmpIJqbcgWrSkgkoUkYm0oCgKAoCgKAoD3CVxHl5AtTUkElNSQTU1JApTbkElNSQSU1JApTUkElNuQTU1JBJWlJGIaUBQFAUBQFAUBQHsmrivNyBSmpIJKakgkptyCatKyCSm3IFqFJBNWlJBJTUkElNuRiGlAUBQFAUBQFAUBQHrErjPPyCampIJKakgmpqSBatNyCShSQTU1JAtWlJBJQ3IzNpQFAUBQFAUBQFAUBQHp2rjujkElaUkClNSQTU25BJTUkElCkgWrSkgkpqSMwagKAoCgKAoCgKAoCgKA9ElQdPIFq0pIJKakgmobkC1aUkElNSQSU1JGYNQFAUBQFAUBQFAUBQFAUB9xqk6uQTU1JApTxSQTUNyBatKSDSmpIzBqAoCgKAoCgKAoCgKAoCgKA+u1TdfIFKcUkE1NuQTU1JBLZqSMwagKAoCgKAoCgKAoCgKAoCgKA/9k="/>
|
||||
<linearGradient id="_Linear5" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(-39.3403,137.423,-137.423,-39.3403,545.523,573.246)"><stop offset="0" style="stop-color:rgb(255,200,41);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(255,73,2);stop-opacity:1"/></linearGradient>
|
||||
<linearGradient id="_Linear6" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(1.01113,-68.2054,68.2054,1.01113,482.996,741.463)"><stop offset="0" style="stop-color:white;stop-opacity:1"/><stop offset="1" style="stop-color:rgb(179,179,179);stop-opacity:1"/></linearGradient>
|
||||
<linearGradient id="_Linear7" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(-7.13599,-34.117,34.117,-7.13599,578.793,922.144)"><stop offset="0" style="stop-color:rgb(164,164,164);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(106,106,106);stop-opacity:1"/></linearGradient>
|
||||
</defs>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 18 KiB |
115
docling_serve/ui/static/main.js
Normal file
115
docling_serve/ui/static/main.js
Normal file
@@ -0,0 +1,115 @@
|
||||
// Propagate URL hash to CSS target class for elements with the same id or data-id.
|
||||
window.addEventListener("hashchange", function (event) {
|
||||
[
|
||||
["remove", "oldURL"],
|
||||
["add", "newURL"],
|
||||
].forEach(([op, tense]) => {
|
||||
const hash = new URL(event[tense]).hash.slice(1);
|
||||
document
|
||||
.querySelectorAll(`[data-id="${hash}"], [id="${hash}"]`)
|
||||
.forEach((el) => el.classList[op]("target"));
|
||||
});
|
||||
});
|
||||
|
||||
// Navigate document items with cursor keys.
|
||||
document.addEventListener("keydown", function (event) {
|
||||
const target = document.querySelector("*:target");
|
||||
const tbounds = target?.getBoundingClientRect();
|
||||
const filters = {
|
||||
ArrowUp: (_x, y) => y < tbounds.top,
|
||||
ArrowDown: (_x, y) => y > tbounds.bottom,
|
||||
ArrowLeft: (x, _y) => x < tbounds.left,
|
||||
ArrowRight: (x, _y) => x > tbounds.right,
|
||||
};
|
||||
|
||||
if (target && filters[event.key]) {
|
||||
const elements = [...document.querySelectorAll(".item[id], .item *[id]")];
|
||||
|
||||
let minEl, minDist;
|
||||
for (const el of elements) {
|
||||
const elBounds = el.getBoundingClientRect();
|
||||
|
||||
if (
|
||||
filters[event.key](
|
||||
(elBounds.left + elBounds.right) / 2,
|
||||
(elBounds.top + elBounds.bottom) / 2
|
||||
)
|
||||
) {
|
||||
const elDist =
|
||||
Math.abs(tbounds.x - elBounds.x) + Math.abs(tbounds.y - elBounds.y);
|
||||
|
||||
if (el != target && elDist < (minDist ?? Number.MAX_VALUE)) {
|
||||
minEl = el;
|
||||
minDist = elDist;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (minEl) {
|
||||
event.preventDefault();
|
||||
location.href = `#${minEl.id}`;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Navigate to item with id when it is clicked.
|
||||
function clickId(e) {
|
||||
e.stopPropagation();
|
||||
const id = e.currentTarget.getAttribute("data-id") ?? e.currentTarget.id;
|
||||
location.href = `#${id}`;
|
||||
}
|
||||
|
||||
window.onload = () => {
|
||||
// (Re-)set the value of input[data-dep-on] to conform to a value of another input[name="data-dep-on"].
|
||||
document.querySelectorAll("input[dep-on]").forEach((element) => {
|
||||
const onName = element.getAttribute("dep-on");
|
||||
const onElement = document.getElementsByName(onName)[0];
|
||||
const depMap = JSON.parse(element.getAttribute("dep-values") ?? "{}");
|
||||
|
||||
if (onElement && depMap) {
|
||||
// On load.
|
||||
element.value = depMap[onElement.value] ?? "";
|
||||
|
||||
// On change.
|
||||
onElement.addEventListener(
|
||||
"change",
|
||||
(event) => (element.value = depMap[event.currentTarget.value] ?? "")
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// Toggle display of input[data-display-when] when it requires a different input[type=checkbox] to be checked.
|
||||
document.querySelectorAll("*[display-when]").forEach((element) => {
|
||||
const whenElements = element
|
||||
.getAttribute("display-when")
|
||||
.split(",")
|
||||
.flatMap((whenName) => [...document.getElementsByName(whenName.trim())]);
|
||||
|
||||
function update() {
|
||||
const allChecked = whenElements.every((el) => el.checked);
|
||||
element.classList[allChecked ? "remove" : "add"]("hidden");
|
||||
}
|
||||
|
||||
// On load.
|
||||
update();
|
||||
|
||||
// On change.
|
||||
whenElements.forEach((whenElement) =>
|
||||
whenElement.addEventListener("change", update)
|
||||
);
|
||||
});
|
||||
|
||||
// Persist input value in local storage.
|
||||
document
|
||||
.querySelectorAll("input[type=checkbox][persist]")
|
||||
.forEach((element) => {
|
||||
const prefix = element.getAttribute("persist");
|
||||
const name = element.getAttribute("name");
|
||||
const key = `docling-serve-${prefix}-${name}`;
|
||||
|
||||
element.checked = localStorage.getItem(key) === "true";
|
||||
element.addEventListener("change", (event) =>
|
||||
localStorage.setItem(key, event.target.checked)
|
||||
);
|
||||
});
|
||||
};
|
||||
2835
docling_serve/ui/static/pico.css
Normal file
2835
docling_serve/ui/static/pico.css
Normal file
File diff suppressed because it is too large
Load Diff
450
docling_serve/ui/static/style.css
Normal file
450
docling_serve/ui/static/style.css
Normal file
@@ -0,0 +1,450 @@
|
||||
@import "pico.css";
|
||||
|
||||
/* Pico configuration. */
|
||||
:root {
|
||||
--pico-font-size: 16px;
|
||||
}
|
||||
|
||||
/* Utilities. */
|
||||
.w-4 {
|
||||
width: calc(4 * var(--pico-spacing));
|
||||
}
|
||||
.w-full {
|
||||
width: 100%;
|
||||
}
|
||||
.max-w-full {
|
||||
max-width: 100%;
|
||||
}
|
||||
.mt-1 {
|
||||
margin-top: var(--pico-spacing);
|
||||
}
|
||||
.mr-auto {
|
||||
margin-right: auto;
|
||||
}
|
||||
.mb-1 {
|
||||
margin-bottom: var(--pico-spacing);
|
||||
}
|
||||
.mb-2 {
|
||||
margin-bottom: calc(2 * var(--pico-spacing));
|
||||
}
|
||||
.flex {
|
||||
display: flex;
|
||||
}
|
||||
.flex.row {
|
||||
flex-direction: row;
|
||||
}
|
||||
.flex.col {
|
||||
flex-direction: column;
|
||||
}
|
||||
.flex-1 {
|
||||
flex: 1 1 0%;
|
||||
}
|
||||
.flex-auto {
|
||||
flex: 1 1 auto;
|
||||
}
|
||||
.gap-0 {
|
||||
gap: 0;
|
||||
}
|
||||
.gap-1 {
|
||||
gap: 0.25rem;
|
||||
}
|
||||
.gap-2 {
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.gap-3 {
|
||||
gap: 1rem;
|
||||
}
|
||||
.hidden {
|
||||
display: none;
|
||||
}
|
||||
.sticky-footer {
|
||||
position: sticky;
|
||||
bottom: 0;
|
||||
padding-top: var(--pico-spacing);
|
||||
background: var(--pico-background-color);
|
||||
}
|
||||
|
||||
/* Customization. */
|
||||
|
||||
@view-transition {
|
||||
navigation: auto;
|
||||
}
|
||||
|
||||
:root {
|
||||
--highlight-factor: 0.8;
|
||||
--target: hsl(240, 100%, 34%);
|
||||
--mark: hsl(29, 100%, 40%);
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
:root {
|
||||
--highlight-factor: 1.5;
|
||||
--target: hsl(240, 100%, 70%);
|
||||
--mark: hsl(29, 100%, 70%);
|
||||
}
|
||||
}
|
||||
|
||||
html {
|
||||
scroll-behavior: smooth;
|
||||
}
|
||||
|
||||
header {
|
||||
position: relative;
|
||||
display: flex;
|
||||
gap: 5rem;
|
||||
margin-bottom: 2rem;
|
||||
|
||||
> .title {
|
||||
white-space: nowrap;
|
||||
font-size: 2rem;
|
||||
font-weight: 300;
|
||||
line-height: 1.75;
|
||||
|
||||
img {
|
||||
display: inline-block;
|
||||
max-height: 0.8em;
|
||||
margin: -0.2rem -0.2em 0.25rem -0.2em;
|
||||
}
|
||||
}
|
||||
|
||||
&.loading img {
|
||||
animation: shake 0.5s ease-in-out alternate infinite;
|
||||
scale: 1.5;
|
||||
translate: 0 1.5rem;
|
||||
}
|
||||
|
||||
> .version {
|
||||
position: absolute;
|
||||
left: 6.25rem;
|
||||
bottom: -0.5rem;
|
||||
padding: 0 0.25rem;
|
||||
font-size: 0.65rem;
|
||||
line-height: 1rem;
|
||||
border: solid 1px var(--pico-color);
|
||||
border-radius: 0.3rem;
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
--glow: hsl(29, 100%, 70%);
|
||||
|
||||
> .title {
|
||||
text-shadow: 0 0 0.25rem white, 0 0 0.5rem var(--glow),
|
||||
0 0 0.75rem var(--glow), 0 0 1rem var(--glow);
|
||||
color: white;
|
||||
|
||||
img {
|
||||
filter: drop-shadow(0 0 0.05rem white)
|
||||
drop-shadow(0 0 0.1rem var(--glow))
|
||||
drop-shadow(0 0 0.15rem var(--glow))
|
||||
drop-shadow(0 0 0.2rem var(--glow));
|
||||
}
|
||||
}
|
||||
|
||||
> .version {
|
||||
color: white;
|
||||
border-color: white;
|
||||
text-shadow: 0 0 0.05rem white, 0 0 0.1rem var(--glow),
|
||||
0 0 0.15rem var(--glow), 0 0 0.2rem var(--glow);
|
||||
box-shadow: 0 0 0.05rem white, 0 0 0.1rem var(--glow),
|
||||
0 0 0.15rem var(--glow), 0 0 0.2rem var(--glow);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@keyframes shake {
|
||||
50% {
|
||||
transform: rotate(-20deg);
|
||||
}
|
||||
100% {
|
||||
transform: rotate(20deg);
|
||||
}
|
||||
}
|
||||
|
||||
label > small {
|
||||
margin-left: var(--pico-spacing);
|
||||
opacity: 0.75;
|
||||
}
|
||||
|
||||
/* Conversion results. */
|
||||
.progress,
|
||||
.fail {
|
||||
margin-top: 3rem;
|
||||
}
|
||||
|
||||
.fail {
|
||||
color: var(--pico-del-color);
|
||||
}
|
||||
|
||||
main.preview {
|
||||
display: grid;
|
||||
grid:
|
||||
auto / 1fr 0.5rem minmax(20ch, 70ch) 0.5rem minmax(min-content, auto)
|
||||
minmax(0.5rem, 1fr);
|
||||
grid-auto-flow: dense;
|
||||
align-content: start;
|
||||
}
|
||||
|
||||
/* Header and task status. */
|
||||
main.preview {
|
||||
> header {
|
||||
grid-column: 3;
|
||||
padding: 0 0.5rem;
|
||||
}
|
||||
|
||||
> .status {
|
||||
grid-row: 2;
|
||||
grid-column: 3;
|
||||
display: inline-block;
|
||||
margin: 0 0.5rem 3rem 0.5rem;
|
||||
|
||||
span {
|
||||
display: inline-block;
|
||||
min-width: calc(5 * var(--pico-spacing));
|
||||
padding-right: calc(0.5 * var(--pico-spacing));
|
||||
}
|
||||
}
|
||||
|
||||
> .formats {
|
||||
grid-row: 2;
|
||||
grid-column: 5;
|
||||
margin-bottom: 3rem;
|
||||
display: flex;
|
||||
align-items: flex-end;
|
||||
gap: 1rem;
|
||||
|
||||
> .configDarkImg {
|
||||
display: none;
|
||||
grid-row: 2;
|
||||
grid-column: 6;
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
> .configDarkImg {
|
||||
display: block;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Invert images in dark mode (option). */
|
||||
@media (prefers-color-scheme: dark) {
|
||||
main.preview:has(.configDarkImg > input:checked) {
|
||||
--img-hover-border: white;
|
||||
|
||||
image,
|
||||
img {
|
||||
filter: invert(1) hue-rotate(180deg) saturate(1.25);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Document contents. */
|
||||
main.preview {
|
||||
--img-hover-border: black;
|
||||
|
||||
*[id] {
|
||||
scroll-margin-top: 20vh;
|
||||
}
|
||||
|
||||
> .page {
|
||||
position: relative;
|
||||
display: grid;
|
||||
grid-template-columns: subgrid;
|
||||
grid-auto-flow: dense;
|
||||
grid-column: 1 / span 6;
|
||||
|
||||
> .item {
|
||||
grid-column: 3;
|
||||
width: 100%;
|
||||
min-height: 3rem;
|
||||
max-height: fit-content;
|
||||
margin: 0;
|
||||
padding: 0.5rem;
|
||||
text-align: justify;
|
||||
background-color: var(--pico-background-color);
|
||||
|
||||
cursor: pointer;
|
||||
|
||||
&:hover {
|
||||
filter: brightness(var(--highlight-factor));
|
||||
}
|
||||
|
||||
&.target {
|
||||
outline: 2px solid var(--target);
|
||||
z-index: 10;
|
||||
}
|
||||
}
|
||||
|
||||
> .item.void {
|
||||
visibility: hidden;
|
||||
}
|
||||
|
||||
/* Items out of content layer. */
|
||||
> .item:not(.body),
|
||||
> .item-markers:not(.body) {
|
||||
opacity: 0.5;
|
||||
}
|
||||
|
||||
> li.item {
|
||||
list-style-type: none;
|
||||
}
|
||||
|
||||
> .item.caption {
|
||||
padding: 0.5rem 1.5rem;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
> .item.table {
|
||||
min-width: 0;
|
||||
overflow-x: auto;
|
||||
|
||||
table {
|
||||
font-size: 0.75rem;
|
||||
border-collapse: collapse;
|
||||
|
||||
td {
|
||||
vertical-align: top;
|
||||
}
|
||||
|
||||
td.header {
|
||||
font-weight: bold;
|
||||
background-color: var(--pico-code-background-color);
|
||||
}
|
||||
|
||||
td.target {
|
||||
outline: solid 2px var(--target);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.annotation {
|
||||
margin: 0.5rem 1rem;
|
||||
font-size: 0.9rem;
|
||||
color: var(--mark);
|
||||
|
||||
&::before {
|
||||
margin-right: 0.5rem;
|
||||
content: attr(data-kind);
|
||||
opacity: 0.7;
|
||||
}
|
||||
}
|
||||
|
||||
.annotation[data-kind="description"] {
|
||||
white-space: pre-line;
|
||||
}
|
||||
|
||||
.annotation[data-kind="classification"] {
|
||||
height: 1.5rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
|
||||
> div {
|
||||
padding: 0 0.25rem;
|
||||
background: var(--mark);
|
||||
border: solid 1px var(--pico-background-color);
|
||||
color: var(--pico-background-color);
|
||||
overflow: hidden;
|
||||
text-wrap: nowrap;
|
||||
}
|
||||
}
|
||||
|
||||
> .item-markers {
|
||||
position: relative;
|
||||
grid-column: 2;
|
||||
padding-top: 0.125rem;
|
||||
padding-right: 0.5rem;
|
||||
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: flex-end;
|
||||
|
||||
font-family: monospace;
|
||||
font-size: 0.675rem;
|
||||
line-height: 1.25;
|
||||
letter-spacing: 0;
|
||||
color: var(--mark);
|
||||
white-space: nowrap;
|
||||
border-top: solid 1px var(--mark);
|
||||
|
||||
> * {
|
||||
margin-right: 0.5rem;
|
||||
}
|
||||
|
||||
> a {
|
||||
padding: 0.125rem 0.25rem;
|
||||
margin-top: 0.5rem;
|
||||
border-radius: 0.125rem;
|
||||
color: var(--pico-contrast-inverse);
|
||||
background-color: var(--target);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
> a:hover {
|
||||
filter: brightness(--highlight-factor);
|
||||
}
|
||||
|
||||
&:not(.target) > a {
|
||||
display: none;
|
||||
}
|
||||
|
||||
&.group,
|
||||
&.grouped {
|
||||
border-left: 1px dashed var(--mark);
|
||||
}
|
||||
|
||||
&.group {
|
||||
margin-top: 0.5rem;
|
||||
border-top: none;
|
||||
}
|
||||
}
|
||||
|
||||
> .page-marker {
|
||||
grid-column: 4;
|
||||
|
||||
&.border {
|
||||
transform: translateY(-1px);
|
||||
border-top: solid 1px var(--mark);
|
||||
}
|
||||
}
|
||||
|
||||
> svg.page-image {
|
||||
grid-column: 5;
|
||||
position: sticky;
|
||||
top: 0.5rem;
|
||||
width: 100%;
|
||||
max-height: calc(100vh - 1rem);
|
||||
outline: 1px solid var(--mark);
|
||||
|
||||
rect {
|
||||
stroke: var(--mark);
|
||||
stroke-width: 1px;
|
||||
fill: var(--target);
|
||||
fill-opacity: 0.0001; /* To activate hover. */
|
||||
cursor: pointer;
|
||||
|
||||
&:hover {
|
||||
filter: brightness(0.8);
|
||||
fill-opacity: 0.1;
|
||||
stroke: var(--img-hover-border);
|
||||
stroke-width: 3px;
|
||||
}
|
||||
}
|
||||
|
||||
rect.target {
|
||||
stroke: var(--target);
|
||||
stroke-width: 3px;
|
||||
stroke-dasharray: none;
|
||||
}
|
||||
|
||||
text {
|
||||
font-size: 0.675rem;
|
||||
color: var(--mark);
|
||||
|
||||
&.top-no {
|
||||
alignment-baseline: hanging;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
25
docling_serve/ui/svg.py
Normal file
25
docling_serve/ui/svg.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from pyjsx import JSX
|
||||
|
||||
|
||||
def _tag(name: str):
|
||||
def factory(children, **args) -> JSX:
|
||||
props = " ".join([f'{k}="{v}"' for k, v in args.items()])
|
||||
|
||||
if children:
|
||||
child_renders = "".join([f"{c}" for c in children])
|
||||
return f"<{name} {props}>{child_renders}</{name}>"
|
||||
else:
|
||||
return f"<{name} {props} />"
|
||||
|
||||
return factory
|
||||
|
||||
|
||||
circle = _tag("circle")
|
||||
clipPath = _tag("clipPath")
|
||||
defs = _tag("defs")
|
||||
foreignObject = _tag("foreignobject")
|
||||
image = _tag("image")
|
||||
path = _tag("path")
|
||||
rect = _tag("rect")
|
||||
text = _tag("text")
|
||||
use = _tag("use")
|
||||
@@ -50,7 +50,7 @@ dependencies = [
|
||||
|
||||
[project.optional-dependencies]
|
||||
ui = [
|
||||
"gradio>=5.23.2,<6.0.0",
|
||||
"python-jsx>=0.2.0",
|
||||
]
|
||||
tesserocr = [
|
||||
"tesserocr~=2.7"
|
||||
|
||||
Reference in New Issue
Block a user