From 8d5892b176434c6b6760d492c73263e35f2dfffd Mon Sep 17 00:00:00 2001 From: DKL Date: Tue, 30 Sep 2025 15:40:15 +0200 Subject: [PATCH] Revamp UI to SSR. Signed-off-by: DKL --- docling_serve/app.py | 53 +- docling_serve/auth.py | 85 +- docling_serve/gradio_ui.py | 921 ---------- docling_serve/ui/__init__.py | 0 docling_serve/ui/app.py | 278 +++ docling_serve/ui/convert.px | 251 +++ docling_serve/ui/forms.px | 127 ++ docling_serve/ui/pages.px | 220 +++ docling_serve/ui/preview.px | 310 ++++ docling_serve/ui/static/logo.svg | 116 ++ docling_serve/ui/static/main.js | 115 ++ docling_serve/ui/static/pico.css | 2835 +++++++++++++++++++++++++++++ docling_serve/ui/static/style.css | 450 +++++ docling_serve/ui/svg.py | 25 + pyproject.toml | 2 +- uv.lock | 789 +++----- 16 files changed, 5046 insertions(+), 1531 deletions(-) delete mode 100644 docling_serve/gradio_ui.py create mode 100644 docling_serve/ui/__init__.py create mode 100644 docling_serve/ui/app.py create mode 100644 docling_serve/ui/convert.px create mode 100644 docling_serve/ui/forms.px create mode 100644 docling_serve/ui/pages.px create mode 100644 docling_serve/ui/preview.px create mode 100644 docling_serve/ui/static/logo.svg create mode 100644 docling_serve/ui/static/main.js create mode 100644 docling_serve/ui/static/pico.css create mode 100644 docling_serve/ui/static/style.css create mode 100644 docling_serve/ui/svg.py diff --git a/docling_serve/app.py b/docling_serve/app.py index 4fb4eef..aefc86e 100644 --- a/docling_serve/app.py +++ b/docling_serve/app.py @@ -54,7 +54,7 @@ from docling_jobkit.orchestrators.base_orchestrator import ( TaskNotFoundError, ) -from docling_serve.auth import APIKeyAuth, AuthenticationResult +from docling_serve.auth import APIKeyHeaderAuth, AuthenticationResult from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions from docling_serve.datamodel.requests import ( ConvertDocumentsRequest, @@ -167,7 +167,7 @@ def create_app(): # noqa: C901 offline_docs_assets = True _log.info("Found static assets.") - require_auth = APIKeyAuth(docling_serve_settings.api_key) + require_auth = APIKeyHeaderAuth(docling_serve_settings.api_key) app = FastAPI( title="Docling Serve", docs_url=None if offline_docs_assets else "/swagger", @@ -188,39 +188,6 @@ def create_app(): # noqa: C901 allow_headers=headers, ) - # Mount the Gradio app - if docling_serve_settings.enable_ui: - try: - import gradio as gr - - from docling_serve.gradio_ui import ui as gradio_ui - from docling_serve.settings import uvicorn_settings - - tmp_output_dir = get_scratch() / "gradio" - tmp_output_dir.mkdir(exist_ok=True, parents=True) - gradio_ui.gradio_output_dir = tmp_output_dir - - # Build the root_path for Gradio, accounting for UVICORN_ROOT_PATH - gradio_root_path = ( - f"{uvicorn_settings.root_path}/ui" - if uvicorn_settings.root_path - else "/ui" - ) - - app = gr.mount_gradio_app( - app, - gradio_ui, - path="/ui", - allowed_paths=["./logo.png", tmp_output_dir], - root_path=gradio_root_path, - ) - except ImportError: - _log.warning( - "Docling Serve enable_ui is activated, but gradio is not installed. " - "Install it with `pip install docling-serve[ui]` " - "or `pip install gradio`" - ) - ############################# # Offline assets definition # ############################# @@ -1021,4 +988,20 @@ def create_app(): # noqa: C901 await orchestrator.clear_results(older_than=older_then) return ClearResponse() + # Optional UI + if docling_serve_settings.enable_ui: + try: + from docling_serve.ui.app import create_ui_app + + ui_app = create_ui_app( + process_file_async, process_url_async, task_result, task_status_poll + ) + app.mount("/ui", app=ui_app, name="ui") + except ImportError as ex: + _log.error(ex) + _log.warning( + "Docling Serve enable_ui is activated, but its dependencies are not installed." + "Install it with `uv sync --extra ui`" + ) + return app diff --git a/docling_serve/auth.py b/docling_serve/auth.py index 639bb25..84997dd 100644 --- a/docling_serve/auth.py +++ b/docling_serve/auth.py @@ -1,7 +1,7 @@ from typing import Any -from fastapi import HTTPException, Request, status -from fastapi.security import APIKeyHeader +from fastapi import HTTPException, Request, Response, status +from fastapi.security import APIKeyCookie, APIKeyHeader from pydantic import BaseModel @@ -11,46 +11,79 @@ class AuthenticationResult(BaseModel): detail: Any | None = None -class APIKeyAuth(APIKeyHeader): - """ - FastAPI dependency which evaluates a status API Key. - """ - +class KeyValidator: def __init__( self, api_key: str, - header_name: str = "X-Api-Key", + field_name: str = "X-Api-Key", fail_on_unauthorized: bool = True, ) -> None: self.api_key = api_key - self.header_name = header_name - super().__init__(name=self.header_name, auto_error=False) + self.field_name = field_name + self.fail_on_unauthorized = fail_on_unauthorized - async def _validate_api_key(self, header_api_key: str | None): - if header_api_key is None: - return AuthenticationResult( - valid=False, errors=[f"Missing header {self.header_name}."] - ) + async def __call__(self, candidate_key: str | None): + if candidate_key is None: + return self._error(f"Missing field {self.field_name}.") - header_api_key = header_api_key.strip() + candidate_key = candidate_key.strip() # Otherwise check the apikey - if header_api_key == self.api_key or self.api_key == "": + if candidate_key == self.api_key or self.api_key == "": return AuthenticationResult( valid=True, - detail=header_api_key, + detail=candidate_key, # Remove? ) + else: + return self._error("The provided API Key is invalid.") + + def _error(self, error: str): + if self.fail_on_unauthorized and self.api_key: + raise HTTPException(status.HTTP_401_UNAUTHORIZED, error) else: return AuthenticationResult( valid=False, - errors=["The provided API Key is invalid."], + errors=[error], ) + +class APIKeyHeaderAuth(APIKeyHeader): + """ + FastAPI dependency which evaluates a status API Key in a header. + """ + + def __init__(self, validator: str | KeyValidator) -> None: + self.validator = ( + KeyValidator(validator) if isinstance(validator, str) else validator + ) + super().__init__(name=self.validator.field_name, auto_error=False) + async def __call__(self, request: Request) -> AuthenticationResult: # type: ignore - header_api_key = await super().__call__(request=request) - result = await self._validate_api_key(header_api_key) - if self.api_key and not result.valid: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, detail=result.detail - ) - return result + key = await super().__call__(request=request) + return await self.validator(key) + + +class APIKeyCookieAuth(APIKeyCookie): + """ + FastAPI dependency which evaluates a status API Key in a cookie. + """ + + def __init__(self, validator: str | KeyValidator) -> None: + self.validator = ( + KeyValidator(validator) if isinstance(validator, str) else validator + ) + super().__init__(name=self.validator.field_name, auto_error=False) + + async def __call__(self, request: Request) -> AuthenticationResult: # type: ignore + api_key = await super().__call__(request=request) + return await self.validator(api_key) + + def _set_api_key(self, response: Response, api_key: str, expires=24 * 3600): + response.set_cookie( + key=self.validator.field_name, + value=api_key, + expires=expires, + secure=True, + httponly=True, + samesite="strict", + ) diff --git a/docling_serve/gradio_ui.py b/docling_serve/gradio_ui.py deleted file mode 100644 index 173a851..0000000 --- a/docling_serve/gradio_ui.py +++ /dev/null @@ -1,921 +0,0 @@ -import base64 -import importlib -import itertools -import json -import logging -import ssl -import sys -import tempfile -import time -from pathlib import Path -from typing import Optional - -import certifi -import gradio as gr -import httpx - -from docling.datamodel.base_models import FormatToExtensions -from docling.datamodel.pipeline_options import ( - PdfBackend, - ProcessingPipeline, - TableFormerMode, - TableStructureOptions, -) - -from docling_serve.helper_functions import _to_list_of_strings -from docling_serve.settings import docling_serve_settings, uvicorn_settings - -logger = logging.getLogger(__name__) - -############################ -# Path of static artifacts # -############################ - -logo_path = "https://raw.githubusercontent.com/docling-project/docling/refs/heads/main/docs/assets/logo.svg" -js_components_url = "https://unpkg.com/@docling/docling-components@0.0.7" -if ( - docling_serve_settings.static_path is not None - and docling_serve_settings.static_path.is_dir() -): - logo_path = str(docling_serve_settings.static_path / "logo.svg") - js_components_url = "/static/docling-components.js" - - -############################## -# Head JS for web components # -############################## -head = f""" - -""" - -################# -# CSS and theme # -################# - -css = """ -#logo { - border-style: none; - background: none; - box-shadow: none; - min-width: 80px; -} -#dark_mode_column { - display: flex; - align-content: flex-end; -} -#title { - text-align: left; - display:block; - height: auto; - padding-top: 5px; - line-height: 0; -} -.title-text h1 > p, .title-text p { - margin-top: 0px !important; - margin-bottom: 2px !important; -} -#custom-container { - border: 0.909091px solid; - padding: 10px; - border-radius: 4px; -} -#custom-container h4 { - font-size: 14px; -} -#file_input_zone { - height: 140px; -} - -docling-img { - gap: 1rem; -} - -docling-img::part(page) { - box-shadow: 0 0.5rem 1rem 0 rgba(0, 0, 0, 0.2); -} -""" - -theme = gr.themes.Default( - text_size="md", - spacing_size="md", - font=[ - gr.themes.GoogleFont("Red Hat Display"), - "ui-sans-serif", - "system-ui", - "sans-serif", - ], - font_mono=[ - gr.themes.GoogleFont("Red Hat Mono"), - "ui-monospace", - "Consolas", - "monospace", - ], -) - -############# -# Variables # -############# - -gradio_output_dir = None # Will be set by FastAPI when mounted -file_output_path = None # Will be set when a new file is generated - -############# -# Functions # -############# - - -def get_api_endpoint() -> str: - protocol = "http" - if uvicorn_settings.ssl_keyfile is not None: - protocol = "https" - return f"{protocol}://{docling_serve_settings.api_host}:{uvicorn_settings.port}" - - -def get_ssl_context() -> ssl.SSLContext: - ctx = ssl.create_default_context(cafile=certifi.where()) - kube_sa_ca_cert_path = Path( - "/run/secrets/kubernetes.io/serviceaccount/service-ca.crt" - ) - if ( - uvicorn_settings.ssl_keyfile is not None - and ".svc." in docling_serve_settings.api_host - and kube_sa_ca_cert_path.exists() - ): - ctx.load_verify_locations(cafile=kube_sa_ca_cert_path) - return ctx - - -def health_check(): - response = httpx.get(f"{get_api_endpoint()}/health") - if response.status_code == 200: - return "Healthy" - return "Unhealthy" - - -def set_options_visibility(x): - return gr.Accordion("Options", open=x) - - -def set_outputs_visibility_direct(x, y): - content = gr.Row(visible=x) - file = gr.Row(visible=y) - return content, file - - -def set_task_id_visibility(x): - task_id_row = gr.Row(visible=x) - return task_id_row - - -def set_outputs_visibility_process(x): - content = gr.Row(visible=not x) - file = gr.Row(visible=x) - return content, file - - -def set_download_button_label(label_text: gr.State): - return gr.DownloadButton(label=str(label_text), scale=1) - - -def clear_outputs(): - task_id_rendered = "" - markdown_content = "" - json_content = "" - json_rendered_content = "" - html_content = "" - text_content = "" - doctags_content = "" - - return ( - task_id_rendered, - markdown_content, - markdown_content, - json_content, - json_rendered_content, - html_content, - html_content, - text_content, - doctags_content, - ) - - -def clear_url_input(): - return "" - - -def clear_file_input(): - return None - - -def auto_set_return_as_file( - url_input_value: str, - file_input_value: Optional[list[str]], - image_export_mode_value: str, -): - # If more than one input source is provided, return as file - if ( - (len(url_input_value.split(",")) > 1) - or (file_input_value and len(file_input_value) > 1) - or (image_export_mode_value == "referenced") - ): - return True - else: - return False - - -def change_ocr_lang(ocr_engine): - if ocr_engine == "easyocr": - return gr.update(visible=True, value="en,fr,de,es") - elif ocr_engine == "tesseract_cli": - return gr.update(visible=True, value="eng,fra,deu,spa") - elif ocr_engine == "tesseract": - return gr.update(visible=True, value="eng,fra,deu,spa") - elif ocr_engine == "rapidocr": - return gr.update(visible=True, value="english,chinese") - elif ocr_engine == "ocrmac": - return gr.update(visible=True, value="fr-FR,de-DE,es-ES,en-US") - - return gr.update(visible=False, value="") - - -def wait_task_finish(auth: str, task_id: str, return_as_file: bool): - conversion_sucess = False - task_finished = False - task_status = "" - - headers = {} - if docling_serve_settings.api_key: - headers["X-Api-Key"] = str(auth) - - ssl_ctx = get_ssl_context() - while not task_finished: - try: - response = httpx.get( - f"{get_api_endpoint()}/v1/status/poll/{task_id}?wait=5", - headers=headers, - verify=ssl_ctx, - timeout=15, - ) - task_status = response.json()["task_status"] - if task_status == "success": - conversion_sucess = True - task_finished = True - - if task_status in ("failure", "revoked"): - conversion_sucess = False - task_finished = True - raise RuntimeError(f"Task failed with status {task_status!r}") - time.sleep(5) - except Exception as e: - logger.error(f"Error processing file(s): {e}") - conversion_sucess = False - task_finished = True - raise gr.Error(f"Error processing file(s): {e}", print_exception=False) - - if conversion_sucess: - try: - response = httpx.get( - f"{get_api_endpoint()}/v1/result/{task_id}", - headers=headers, - timeout=15, - verify=ssl_ctx, - ) - output = response_to_output(response, return_as_file) - return output - except Exception as e: - logger.error(f"Error getting task result: {e}") - - raise gr.Error( - f"Error getting task result, conversion finished with status: {task_status}" - ) - - -def process_url( - auth, - input_sources, - to_formats, - image_export_mode, - pipeline, - ocr, - force_ocr, - ocr_engine, - ocr_lang, - pdf_backend, - table_mode, - abort_on_error, - return_as_file, - do_code_enrichment, - do_formula_enrichment, - do_picture_classification, - do_picture_description, -): - target = {"kind": "zip" if return_as_file else "inbody"} - parameters = { - "sources": [ - {"kind": "http", "url": source} for source in input_sources.split(",") - ], - "options": { - "to_formats": to_formats, - "image_export_mode": image_export_mode, - "pipeline": pipeline, - "ocr": ocr, - "force_ocr": force_ocr, - "ocr_engine": ocr_engine, - "ocr_lang": _to_list_of_strings(ocr_lang), - "pdf_backend": pdf_backend, - "table_mode": table_mode, - "abort_on_error": abort_on_error, - "do_code_enrichment": do_code_enrichment, - "do_formula_enrichment": do_formula_enrichment, - "do_picture_classification": do_picture_classification, - "do_picture_description": do_picture_description, - }, - "target": target, - } - if ( - not parameters["sources"] - or len(parameters["sources"]) == 0 - or parameters["sources"][0]["url"] == "" - ): - logger.error("No input sources provided.") - raise gr.Error("No input sources provided.", print_exception=False) - - headers = {} - if docling_serve_settings.api_key: - headers["X-Api-Key"] = str(auth) - - print(f"{headers=}") - try: - ssl_ctx = get_ssl_context() - response = httpx.post( - f"{get_api_endpoint()}/v1/convert/source/async", - json=parameters, - headers=headers, - verify=ssl_ctx, - timeout=60, - ) - except Exception as e: - logger.error(f"Error processing URL: {e}") - raise gr.Error(f"Error processing URL: {e}", print_exception=False) - if response.status_code != 200: - data = response.json() - error_message = data.get("detail", "An unknown error occurred.") - logger.error(f"Error processing file: {error_message}") - raise gr.Error(f"Error processing file: {error_message}", print_exception=False) - - task_id_rendered = response.json()["task_id"] - return task_id_rendered - - -def file_to_base64(file): - with open(file.name, "rb") as f: - encoded_string = base64.b64encode(f.read()).decode("utf-8") - return encoded_string - - -def process_file( - auth, - files, - to_formats, - image_export_mode, - pipeline, - ocr, - force_ocr, - ocr_engine, - ocr_lang, - pdf_backend, - table_mode, - abort_on_error, - return_as_file, - do_code_enrichment, - do_formula_enrichment, - do_picture_classification, - do_picture_description, -): - if not files or len(files) == 0: - logger.error("No files provided.") - raise gr.Error("No files provided.", print_exception=False) - files_data = [ - {"kind": "file", "base64_string": file_to_base64(file), "filename": file.name} - for file in files - ] - target = {"kind": "zip" if return_as_file else "inbody"} - - parameters = { - "sources": files_data, - "options": { - "to_formats": to_formats, - "image_export_mode": image_export_mode, - "pipeline": pipeline, - "ocr": ocr, - "force_ocr": force_ocr, - "ocr_engine": ocr_engine, - "ocr_lang": _to_list_of_strings(ocr_lang), - "pdf_backend": pdf_backend, - "table_mode": table_mode, - "abort_on_error": abort_on_error, - "return_as_file": return_as_file, - "do_code_enrichment": do_code_enrichment, - "do_formula_enrichment": do_formula_enrichment, - "do_picture_classification": do_picture_classification, - "do_picture_description": do_picture_description, - }, - "target": target, - } - - headers = {} - if docling_serve_settings.api_key: - headers["X-Api-Key"] = str(auth) - - try: - ssl_ctx = get_ssl_context() - response = httpx.post( - f"{get_api_endpoint()}/v1/convert/source/async", - json=parameters, - headers=headers, - verify=ssl_ctx, - timeout=60, - ) - except Exception as e: - logger.error(f"Error processing file(s): {e}") - raise gr.Error(f"Error processing file(s): {e}", print_exception=False) - if response.status_code != 200: - data = response.json() - error_message = data.get("detail", "An unknown error occurred.") - logger.error(f"Error processing file: {error_message}") - raise gr.Error(f"Error processing file: {error_message}", print_exception=False) - - task_id_rendered = response.json()["task_id"] - return task_id_rendered - - -def response_to_output(response, return_as_file): - markdown_content = "" - json_content = "" - json_rendered_content = "" - html_content = "" - text_content = "" - doctags_content = "" - download_button = gr.DownloadButton(visible=False, label="Download Output", scale=1) - if return_as_file: - filename = ( - response.headers.get("Content-Disposition").split("filename=")[1].strip('"') - ) - tmp_output_dir = Path(tempfile.mkdtemp(dir=gradio_output_dir, prefix="ui_")) - file_output_path = f"{tmp_output_dir}/{filename}" - # logger.info(f"Saving file to: {file_output_path}") - with open(file_output_path, "wb") as f: - f.write(response.content) - download_button = gr.DownloadButton( - visible=True, label=f"Download {filename}", scale=1, value=file_output_path - ) - else: - full_content = response.json() - markdown_content = full_content.get("document").get("md_content") - json_content = json.dumps( - full_content.get("document").get("json_content"), indent=2 - ) - # Embed document JSON and trigger load at client via an image. - json_rendered_content = f""" - - - - """ - html_content = full_content.get("document").get("html_content") - text_content = full_content.get("document").get("text_content") - doctags_content = full_content.get("document").get("doctags_content") - return ( - markdown_content, - markdown_content, - json_content, - json_rendered_content, - html_content, - html_content, - text_content, - doctags_content, - download_button, - ) - - -############ -# UI Setup # -############ - -with gr.Blocks( - head=head, - css=css, - theme=theme, - title="Docling Serve", - delete_cache=(3600, 36000), # Delete all files older than 10 hour every hour -) as ui: - # Constants stored in states to be able to pass them as inputs to functions - processing_text = gr.State("Processing your document(s), please wait...") - true_bool = gr.State(True) - false_bool = gr.State(False) - - # Banner - with gr.Row(elem_id="check_health"): - # Logo - with gr.Column(scale=1, min_width=90): - try: - gr.Image( - logo_path, - height=80, - width=80, - show_download_button=False, - show_label=False, - show_fullscreen_button=False, - container=False, - elem_id="logo", - scale=0, - ) - except Exception: - logger.warning("Logo not found.") - - # Title - with gr.Column(scale=1, min_width=200): - gr.Markdown( - f"# Docling Serve \n(docling version: " - f"{importlib.metadata.version('docling')})", - elem_id="title", - elem_classes=["title-text"], - ) - # Dark mode button - with gr.Column(scale=16, elem_id="dark_mode_column"): - dark_mode_btn = gr.Button("Dark/Light Mode", scale=0) - dark_mode_btn.click( - None, - None, - None, - js="""() => { - if (document.querySelectorAll('.dark').length) { - document.querySelectorAll('.dark').forEach( - el => el.classList.remove('dark') - ); - } else { - document.querySelector('body').classList.add('dark'); - } - }""", - show_api=False, - ) - - # URL Processing Tab - with gr.Tab("Convert URL"): - with gr.Row(): - with gr.Column(scale=4): - url_input = gr.Textbox( - label="URL Input Source", - placeholder="https://arxiv.org/pdf/2501.17887", - ) - with gr.Column(scale=1): - url_process_btn = gr.Button("Process URL", scale=1) - url_reset_btn = gr.Button("Reset", scale=1) - - # File Processing Tab - with gr.Tab("Convert File"): - with gr.Row(): - with gr.Column(scale=4): - raw_exts = itertools.chain.from_iterable(FormatToExtensions.values()) - file_input = gr.File( - elem_id="file_input_zone", - label="Upload File", - file_types=[ - f".{v.lower()}" - for v in raw_exts # lowercase - ] - + [ - f".{v.upper()}" - for v in raw_exts # uppercase - ], - file_count="multiple", - scale=4, - ) - with gr.Column(scale=1): - file_process_btn = gr.Button("Process File", scale=1) - file_reset_btn = gr.Button("Reset", scale=1) - - # Auth - with gr.Row(visible=bool(docling_serve_settings.api_key)): - with gr.Column(): - auth = gr.Textbox( - label="Authentication", - placeholder="API Key", - type="password", - ) - - # Options - with gr.Accordion("Options") as options: - with gr.Row(): - with gr.Column(scale=1): - to_formats = gr.CheckboxGroup( - [ - ("Docling (JSON)", "json"), - ("Markdown", "md"), - ("HTML", "html"), - ("Plain Text", "text"), - ("Doc Tags", "doctags"), - ], - label="To Formats", - value=["json", "md"], - ) - with gr.Column(scale=1): - image_export_mode = gr.Radio( - [ - ("Embedded", "embedded"), - ("Placeholder", "placeholder"), - ("Referenced", "referenced"), - ], - label="Image Export Mode", - value="embedded", - ) - - with gr.Row(): - with gr.Column(scale=1, min_width=200): - pipeline = gr.Radio( - [(v.value.capitalize(), v.value) for v in ProcessingPipeline], - label="Pipeline type", - value=ProcessingPipeline.STANDARD.value, - ) - with gr.Row(): - with gr.Column(scale=1, min_width=200): - ocr = gr.Checkbox(label="Enable OCR", value=True) - force_ocr = gr.Checkbox(label="Force OCR", value=False) - with gr.Column(scale=1): - engines_list = [ - ("Auto", "auto"), - ("EasyOCR", "easyocr"), - ("Tesseract", "tesseract"), - ("RapidOCR", "rapidocr"), - ] - if sys.platform == "darwin": - engines_list.append(("OCRMac", "ocrmac")) - - ocr_engine = gr.Radio( - engines_list, - label="OCR Engine", - value="auto", - ) - with gr.Column(scale=1, min_width=200): - ocr_lang = gr.Textbox( - label="OCR Language (beware of the format)", - value="en,fr,de,es", - visible=False, - ) - ocr_engine.change(change_ocr_lang, inputs=[ocr_engine], outputs=[ocr_lang]) - with gr.Row(): - with gr.Column(scale=4): - pdf_backend = gr.Radio( - [v.value for v in PdfBackend], - label="PDF Backend", - value=PdfBackend.DLPARSE_V4.value, - ) - with gr.Column(scale=2): - table_mode = gr.Radio( - [(v.value.capitalize(), v.value) for v in TableFormerMode], - label="Table Mode", - value=TableStructureOptions().mode.value, - ) - with gr.Column(scale=1): - abort_on_error = gr.Checkbox(label="Abort on Error", value=False) - return_as_file = gr.Checkbox(label="Return as File", value=False) - with gr.Row(): - with gr.Column(): - do_code_enrichment = gr.Checkbox( - label="Enable code enrichment", value=False - ) - do_formula_enrichment = gr.Checkbox( - label="Enable formula enrichment", value=False - ) - with gr.Column(): - do_picture_classification = gr.Checkbox( - label="Enable picture classification", value=False - ) - do_picture_description = gr.Checkbox( - label="Enable picture description", value=False - ) - - # Task id output - with gr.Row(visible=False) as task_id_output: - task_id_rendered = gr.Textbox(label="Task id", interactive=False) - - # Document output - with gr.Row(visible=False) as content_output: - with gr.Tab("Docling (JSON)"): - output_json = gr.Code(language="json", wrap_lines=True, show_label=False) - with gr.Tab("Docling-Rendered"): - output_json_rendered = gr.HTML(label="Response") - with gr.Tab("Markdown"): - output_markdown = gr.Code( - language="markdown", wrap_lines=True, show_label=False - ) - with gr.Tab("Markdown-Rendered"): - output_markdown_rendered = gr.Markdown(label="Response") - with gr.Tab("HTML"): - output_html = gr.Code(language="html", wrap_lines=True, show_label=False) - with gr.Tab("HTML-Rendered"): - output_html_rendered = gr.HTML(label="Response") - with gr.Tab("Text"): - output_text = gr.Code(wrap_lines=True, show_label=False) - with gr.Tab("DocTags"): - output_doctags = gr.Code(wrap_lines=True, show_label=False) - - # File download output - with gr.Row(visible=False) as file_output: - download_file_btn = gr.DownloadButton(label="Placeholder", scale=1) - - ############## - # UI Actions # - ############## - - # Handle Return as File - url_input.change( - auto_set_return_as_file, - inputs=[url_input, file_input, image_export_mode], - outputs=[return_as_file], - ) - file_input.change( - auto_set_return_as_file, - inputs=[url_input, file_input, image_export_mode], - outputs=[return_as_file], - ) - image_export_mode.change( - auto_set_return_as_file, - inputs=[url_input, file_input, image_export_mode], - outputs=[return_as_file], - ) - - # URL processing - url_process_btn.click( - set_options_visibility, inputs=[false_bool], outputs=[options] - ).then( - set_download_button_label, inputs=[processing_text], outputs=[download_file_btn] - ).then( - clear_outputs, - inputs=None, - outputs=[ - task_id_rendered, - output_markdown, - output_markdown_rendered, - output_json, - output_json_rendered, - output_html, - output_html_rendered, - output_text, - output_doctags, - ], - ).then( - set_task_id_visibility, - inputs=[true_bool], - outputs=[task_id_output], - ).then( - process_url, - inputs=[ - auth, - url_input, - to_formats, - image_export_mode, - pipeline, - ocr, - force_ocr, - ocr_engine, - ocr_lang, - pdf_backend, - table_mode, - abort_on_error, - return_as_file, - do_code_enrichment, - do_formula_enrichment, - do_picture_classification, - do_picture_description, - ], - outputs=[ - task_id_rendered, - ], - ).then( - set_outputs_visibility_process, - inputs=[return_as_file], - outputs=[content_output, file_output], - ).then( - wait_task_finish, - inputs=[auth, task_id_rendered, return_as_file], - outputs=[ - output_markdown, - output_markdown_rendered, - output_json, - output_json_rendered, - output_html, - output_html_rendered, - output_text, - output_doctags, - download_file_btn, - ], - ) - - url_reset_btn.click( - clear_outputs, - inputs=None, - outputs=[ - output_markdown, - output_markdown_rendered, - output_json, - output_json_rendered, - output_html, - output_html_rendered, - output_text, - output_doctags, - ], - ).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then( - set_outputs_visibility_direct, - inputs=[false_bool, false_bool], - outputs=[content_output, file_output], - ).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then( - clear_url_input, inputs=None, outputs=[url_input] - ) - - # File processing - file_process_btn.click( - set_options_visibility, inputs=[false_bool], outputs=[options] - ).then( - set_download_button_label, inputs=[processing_text], outputs=[download_file_btn] - ).then( - clear_outputs, - inputs=None, - outputs=[ - task_id_rendered, - output_markdown, - output_markdown_rendered, - output_json, - output_json_rendered, - output_html, - output_html_rendered, - output_text, - output_doctags, - ], - ).then( - set_task_id_visibility, - inputs=[true_bool], - outputs=[task_id_output], - ).then( - process_file, - inputs=[ - auth, - file_input, - to_formats, - image_export_mode, - pipeline, - ocr, - force_ocr, - ocr_engine, - ocr_lang, - pdf_backend, - table_mode, - abort_on_error, - return_as_file, - do_code_enrichment, - do_formula_enrichment, - do_picture_classification, - do_picture_description, - ], - outputs=[ - task_id_rendered, - ], - ).then( - set_outputs_visibility_process, - inputs=[return_as_file], - outputs=[content_output, file_output], - ).then( - wait_task_finish, - inputs=[auth, task_id_rendered, return_as_file], - outputs=[ - output_markdown, - output_markdown_rendered, - output_json, - output_json_rendered, - output_html, - output_html_rendered, - output_text, - output_doctags, - download_file_btn, - ], - ) - - file_reset_btn.click( - clear_outputs, - inputs=None, - outputs=[ - output_markdown, - output_markdown_rendered, - output_json, - output_json_rendered, - output_html, - output_html_rendered, - output_text, - output_doctags, - ], - ).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then( - set_outputs_visibility_direct, - inputs=[false_bool, false_bool], - outputs=[content_output, file_output], - ).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then( - clear_file_input, inputs=None, outputs=[file_input] - ) diff --git a/docling_serve/ui/__init__.py b/docling_serve/ui/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docling_serve/ui/app.py b/docling_serve/ui/app.py new file mode 100644 index 0000000..4ef629e --- /dev/null +++ b/docling_serve/ui/app.py @@ -0,0 +1,278 @@ +import io +import logging +from pathlib import Path +from typing import Annotated + +from fastapi import ( + BackgroundTasks, + Depends, + FastAPI, + Form, + HTTPException, + Request, + UploadFile, + status, +) +from fastapi.responses import HTMLResponse, RedirectResponse, Response +from fastapi.staticfiles import StaticFiles +from pydantic import AnyHttpUrl +from pyjsx import auto_setup +from starlette.exceptions import HTTPException as StarletteHTTPException + +from docling.datamodel.base_models import OutputFormat +from docling_core.types.doc.document import ( + FloatingItem, + PageItem, + RefItem, +) +from docling_jobkit.orchestrators.base_orchestrator import ( + BaseOrchestrator, +) + +from docling_serve.auth import APIKeyCookieAuth, AuthenticationResult +from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions +from docling_serve.datamodel.requests import ConvertDocumentsRequest, HttpSourceRequest +from docling_serve.helper_functions import FormDepends +from docling_serve.orchestrator_factory import get_async_orchestrator +from docling_serve.settings import docling_serve_settings + +from .convert import ConvertPage # type: ignore +from .pages import AuthPage, StatusPage, TaskPage, TasksPage # type: ignore + +# Initialize JSX. +auto_setup + +_log = logging.getLogger(__name__) + + +# TODO: Isolate passed functions into a controller? +def create_ui_app(process_file, process_url, task_result, task_status_poll) -> FastAPI: # noqa: C901 + ui_app = FastAPI() + require_auth = APIKeyCookieAuth(docling_serve_settings.api_key) + + # Static files. + ui_app.mount( + "/static", + StaticFiles(directory=Path(__file__).parent.absolute() / "static"), + name="static", + ) + + # Convert page. + @ui_app.get("/") + async def get_root(): + return RedirectResponse(url="convert") + + @ui_app.get("/convert", response_class=HTMLResponse) + async def get_convert( + auth: Annotated[AuthenticationResult, Depends(require_auth)], + ): + return str(ConvertPage()) + + @ui_app.post("/convert", response_class=HTMLResponse) + async def post_convert( + auth: Annotated[AuthenticationResult, Depends(require_auth)], + orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)], + background_tasks: BackgroundTasks, + options: Annotated[ + ConvertDocumentsRequestOptions, FormDepends(ConvertDocumentsRequestOptions) + ], + files: Annotated[list[UploadFile], Form()], + url: Annotated[str, Form()], + page_min: Annotated[str, Form()], + page_max: Annotated[str, Form()], + ): + # Refined model options and behavior. + if len(page_min) > 0: + options.page_range = (int(page_min), options.page_range[1]) + if len(page_max) > 0: + options.page_range = (options.page_range[0], int(page_max)) + + options.ocr_lang = [ + sub_lang.strip() + for lang in options.ocr_lang or [] + for sub_lang in lang.split(",") + if len(sub_lang.strip()) > 0 + ] + + files = [f for f in files if f.size] + if len(files) > 0: + # Directly uploaded documents. + response = await process_file( + auth=auth, + orchestrator=orchestrator, + background_tasks=background_tasks, + files=files, + options=options, + ) + elif len(url.strip()) > 0: + # URLs of documents. + source = HttpSourceRequest(url=AnyHttpUrl(url)) + request = ConvertDocumentsRequest(options=options, sources=[source]) + + response = await process_url( + auth=auth, + orchestrator=orchestrator, + conversion_request=request, + ) + else: + validation = { + "files": "Upload files or enter a URL", + "url": "Enter a URL or upload files", + } + return str(ConvertPage(options=options, validation=validation)) + + return RedirectResponse(f"tasks/{response.task_id}/", status.HTTP_303_SEE_OTHER) + + # Task overview page. + @ui_app.get("/tasks/", response_class=HTMLResponse) + async def get_tasks( + auth: Annotated[AuthenticationResult, Depends(require_auth)], + orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)], + ): + tasks = sorted(orchestrator.tasks.values(), key=lambda t: t.created_at) + + return str(TasksPage(tasks=tasks)) + + # Task specific page. + @ui_app.get("/tasks/{task_id}/", response_class=HTMLResponse) + async def get_task( + auth: Annotated[AuthenticationResult, Depends(require_auth)], + orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)], + background_tasks: BackgroundTasks, + task_id: str, + ): + poll = await task_status_poll(auth, orchestrator, task_id) + + result = None + if poll.task_status in ["success", "failure"]: + try: + result = await task_result( + auth, orchestrator, background_tasks, task_id + ) + except Exception as ex: + logging.error(ex) + + return str(TaskPage(poll, result)) + + # Poll task via HTTP status. + @ui_app.get("/tasks/{task_id}/poll", response_class=Response) + async def poll_task( + auth: Annotated[AuthenticationResult, Depends(require_auth)], + orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)], + task_id: str, + ): + poll = await task_status_poll(auth, orchestrator, task_id) + return Response( + status_code=status.HTTP_202_ACCEPTED + if poll.task_status == "started" + else status.HTTP_200_OK + ) + + # Download the contents of zipped documents. + @ui_app.get("/tasks/{task_id}/documents.zip") + async def get_task_zip( + auth: Annotated[AuthenticationResult, Depends(require_auth)], + orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)], + background_tasks: BackgroundTasks, + task_id: str, + ): + return await task_result(auth, orchestrator, background_tasks, task_id) + + # Get the output of a task, as a converted document in a specific format. + @ui_app.get("/tasks/{task_id}/document.{format}") + async def get_task_document_format( + auth: Annotated[AuthenticationResult, Depends(require_auth)], + orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)], + background_tasks: BackgroundTasks, + task_id: str, + format: str, + ): + if format not in [f.value for f in OutputFormat]: + raise HTTPException(status.HTTP_404_NOT_FOUND, "Output format not found.") + else: + response = await task_result(auth, orchestrator, background_tasks, task_id) + + # TODO: Make this compatible with base_models FormatToMimeType? + mimes = { + "html": "text/html", + "md": "text/markdown", + "json": "application/json", + } + + content = ( + response.document.json_content.export_to_dict() + if format == OutputFormat.JSON + else response.document.dict()[f"{format}_content"] + ) + + return Response( + content=str(content), + media_type=mimes.get(format, "text/plain"), + ) + + @ui_app.get("/tasks/{task_id}/document/{cref:path}") + async def get_task_document_item( + request: Request, + auth: Annotated[AuthenticationResult, Depends(require_auth)], + orchestrator: Annotated[BaseOrchestrator, Depends(get_async_orchestrator)], + background_tasks: BackgroundTasks, + task_id: str, + cref: str, + ): + response = await task_result(auth, orchestrator, background_tasks, task_id) + doc = response.document.json_content + item = RefItem(cref=f"#/{cref}").resolve(doc) # type: ignore + + if "image/*" in (request.headers.get("Accept") or "") and isinstance( + item, FloatingItem | PageItem + ): + content = io.BytesIO() + + if ( + isinstance(item, PageItem) + and (img_ref := item.image) + and img_ref.pil_image + ): + img_ref.pil_image.save(content, format="PNG") + elif isinstance(item, FloatingItem) and (img := item.get_image(doc)): + img.save(content, format="PNG") + + return Response(content=content.getvalue(), media_type="image/png") + else: + return item + + # Page not found; catch all. + @ui_app.api_route("/{path_name:path}") + def no_page( + auth: Annotated[AuthenticationResult, Depends(require_auth)], + ): + raise HTTPException(status.HTTP_404_NOT_FOUND, "Page not found.") + + # Exception and auth pages. + @ui_app.exception_handler(StarletteHTTPException) + @ui_app.exception_handler(Exception) + async def exception_page(request: Request, ex: Exception): + if not isinstance(ex, StarletteHTTPException): + # Internal error. + ex = HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR) + + if request.method == "POST": + # Authorization required -> API key dialog. + form = await request.form() + form_api_key = form.get("api_key") + if isinstance(form_api_key, str): + response = RedirectResponse(request.url, status.HTTP_303_SEE_OTHER) + require_auth._set_api_key(response, form_api_key) + return response + + if ex.status_code == status.HTTP_401_UNAUTHORIZED: + return HTMLResponse(str(AuthPage()), status.HTTP_401_UNAUTHORIZED) + + # HTTP exception page; avoid referer loop. + referer = request.headers.get("Referer") + if referer == request.url: + referer = None + + return HTMLResponse(str(StatusPage(ex, referer)), ex.status_code) + + return ui_app diff --git a/docling_serve/ui/convert.px b/docling_serve/ui/convert.px new file mode 100644 index 0000000..58a6dc1 --- /dev/null +++ b/docling_serve/ui/convert.px @@ -0,0 +1,251 @@ +import json +import sys + +from pyjsx import jsx, JSX + +from docling.datamodel.base_models import FormatToExtensions, OutputFormat +from docling.datamodel.pipeline_options import PdfBackend, ProcessingPipeline, TableFormerMode +from docling_core.types.doc import ImageRefMode +from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions, ocr_engines_enum + +from .forms import EnumCheckboxes, EnumRadios, EnumSelect, ocr_engine_languages, ValidatedInput +from .pages import Header, Page + +base_convert_options = ConvertDocumentsRequestOptions() +base_convert_options.to_formats.append(OutputFormat.JSON) + + +def ConvertPage( + options: ConvertDocumentsRequestOptions = base_convert_options, + validation: None | dict[str, str] = None +) -> JSX: + file_accept = ",".join([f".{ext}" for exts in FormatToExtensions.values() for ext in exts]) + + return ( + +
+
+ +
+ + Documents + +
+ + +
+ +
+ + + +
+ +
+ + = sys.maxsize else options.page_range[1]} + /> +
+
+ +
+ + +
+
+ +
+ Output} + /> + +
+
+ + + +
+ + + + + + + +
+ +
+
+ + + + + +
+ + + + +
+ +
+
+ + +
+ +
+
+ + +
+
+
+ ) diff --git a/docling_serve/ui/forms.px b/docling_serve/ui/forms.px new file mode 100644 index 0000000..db16de7 --- /dev/null +++ b/docling_serve/ui/forms.px @@ -0,0 +1,127 @@ +from enum import Enum +from typing import Type + +from pyjsx import jsx, JSX + +from docling.datamodel.pipeline_options import OcrOptions + +from docling_serve.datamodel.convert import ConvertDocumentsRequestOptions + + +ocr_engine_languages = { + SubOptions.kind: ",".join(SubOptions().lang) + for SubOptions in OcrOptions.__subclasses__() +} + + +def _format_label(label: str) -> str: + return label.replace("_", " ").lower() + + +def option_example(field_name: str) -> str | None: + field = ConvertDocumentsRequestOptions.model_fields[field_name] + return (field.examples or [])[0] + + +def ValidatedInput(validation: None | dict[str, str], name: str, **kwargs) -> JSX: + if validation: + invalid = "true" if name in validation else "false" + content = [] + + if name in validation: + content.append({validation[name]}) + + return
{content}
+ else: + return + + +def EnumCheckboxes( + children, + enum: Type[Enum], + selected: list[Enum], + name: str, + title: JSX = None, + **kwargs +) -> JSX: + return ( +
+ { + {title} + if title + else None + } + + {[ + + for e in enum + ]} +
+ ) + + +def EnumRadios( + children, + enum: Type[Enum], + selected: Enum, + name: str, + title: JSX = None, + **kwargs +) -> JSX: + return ( +
+ { + {title} + if title + else None + } + + {[ + + for e in enum + ]} +
+ ) + + +def EnumSelect( + children, + enum: Type[Enum], + selected: Enum, + name: str, + title: JSX = None, + **kwargs +) -> JSX: + return ( +
+ { + + if title + else None + } + +
+ ) diff --git a/docling_serve/ui/pages.px b/docling_serve/ui/pages.px new file mode 100644 index 0000000..369bbf0 --- /dev/null +++ b/docling_serve/ui/pages.px @@ -0,0 +1,220 @@ +from importlib import metadata + +from fastapi import FastAPI, HTTPException, Response +from pyjsx import jsx, JSX + +from docling.datamodel.base_models import OutputFormat +from docling.datamodel.pipeline_options import PdfBackend, ProcessingPipeline, TableFormerMode +from docling_jobkit.datamodel.task import Task +from docling_serve.datamodel.responses import ConvertDocumentResponse + +from .preview import DocPreview + + +def Header(children, classname: str = "") -> JSX: + return ( +
+ + DCLING SERVE + + + + {metadata.version('docling')} + + + +
+ ) + + +def Page(children, title: str, poll: bool = False) -> JSX: + return ( + + + {title} + + + + + +