api v1alpha1 (#17)

* api v1alpha1

Signed-off-by: Guillaume Moutier <gmoutier@redhat.com>
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* use actual types in request models and refactor

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* make gradio optional and update README

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* Run workflow jobs sequentially to avoid disk space outage (#19)

Github Action runners are running out of the space while
building both the images in parallel.

This change will build the image sequentially and also
clean up the cpu images before start building gpu image.

Signed-off-by: Anil Vishnoi <vishnoianil@gmail.com>
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* Add github job to build image (and not publish) on PR creation (#20)

Signed-off-by: Anil Vishnoi <vishnoianil@gmail.com>
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add start_server script for local dev

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* fix 3.12-only syntax

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* fix more py3.10-11 compatibility

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* rework output format and background tasks

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* speficy return schemas for openapi

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add processing time and update REDAME

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* lint markdown

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add MD033 to config

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* use port 5000

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* use port 5001 as default

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* update deps

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* refactor input request

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* return docling document

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* update new payload in README

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add base64 example

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* wrap example in <details>

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* rename /url in /source

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* move main execution to __main__

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Guillaume Moutier <gmoutier@redhat.com>
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Signed-off-by: Anil Vishnoi <vishnoianil@gmail.com>
Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
Co-authored-by: Anil Vishnoi <vishnoianil@gmail.com>
This commit is contained in:
Guillaume Moutier
2025-02-03 05:00:54 -05:00
committed by GitHub
parent ddf3144512
commit c6539c42de
25 changed files with 3642 additions and 1259 deletions

635
docling_serve/gradio_ui.py Normal file
View File

@@ -0,0 +1,635 @@
import importlib
import json
import logging
import os
import tempfile
from pathlib import Path
import gradio as gr
import requests
from docling_serve.helper_functions import _to_list_of_strings
logger = logging.getLogger(__name__)
#################
# CSS and theme #
#################
css = """
#logo {
border-style: none;
background: none;
box-shadow: none;
min-width: 80px;
}
#dark_mode_column {
display: flex;
align-content: flex-end;
}
#title {
text-align: left;
display:block;
height: auto;
padding-top: 5px;
line-height: 0;
}
.title-text h1 > p, .title-text p {
margin-top: 0px !important;
margin-bottom: 2px !important;
}
#custom-container {
border: 0.909091px solid;
padding: 10px;
border-radius: 4px;
}
#custom-container h4 {
font-size: 14px;
}
#file_input_zone {
height: 140px;
}
"""
theme = gr.themes.Default(
text_size="md",
spacing_size="md",
font=[
gr.themes.GoogleFont("Red Hat Display"),
"ui-sans-serif",
"system-ui",
"sans-serif",
],
font_mono=[
gr.themes.GoogleFont("Red Hat Mono"),
"ui-monospace",
"Consolas",
"monospace",
],
)
#############
# Variables #
#############
gradio_output_dir = None # Will be set by FastAPI when mounted
file_output_path = None # Will be set when a new file is generated
#############
# Functions #
#############
def health_check():
response = requests.get(f"http://localhost:{int(os.getenv('PORT', '5001'))}/health")
if response.status_code == 200:
return "Healthy"
return "Unhealthy"
def set_options_visibility(x):
return gr.Accordion("Options", open=x)
def set_outputs_visibility_direct(x, y):
content = gr.Row(visible=x)
file = gr.Row(visible=y)
return content, file
def set_outputs_visibility_process(x):
content = gr.Row(visible=not x)
file = gr.Row(visible=x)
return content, file
def set_download_button_label(label_text: gr.State):
return gr.DownloadButton(label=str(label_text), scale=1)
def clear_outputs():
markdown_content = ""
json_content = ""
html_content = ""
text_content = ""
doctags_content = ""
return (
markdown_content,
markdown_content,
json_content,
html_content,
html_content,
text_content,
doctags_content,
)
def clear_url_input():
return ""
def clear_file_input():
return None
def auto_set_return_as_file(url_input, file_input, image_export_mode):
# If more than one input source is provided, return as file
if (
(len(url_input.split(",")) > 1)
or (file_input and len(file_input) > 1)
or (image_export_mode == "referenced")
):
return True
else:
return False
def change_ocr_lang(ocr_engine):
if ocr_engine == "easyocr":
return "en,fr,de,es"
elif ocr_engine == "tesseract_cli":
return "eng,fra,deu,spa"
elif ocr_engine == "tesseract":
return "eng,fra,deu,spa"
elif ocr_engine == "rapidocr":
return "english,chinese"
def process_url(
input_sources,
to_formats,
image_export_mode,
ocr,
force_ocr,
ocr_engine,
ocr_lang,
pdf_backend,
table_mode,
abort_on_error,
return_as_file,
):
parameters = {
"http_sources": [{"url": source} for source in input_sources.split(",")],
"options": {
"to_formats": to_formats,
"image_export_mode": image_export_mode,
"ocr": ocr,
"force_ocr": force_ocr,
"ocr_engine": ocr_engine,
"ocr_lang": _to_list_of_strings(ocr_lang),
"pdf_backend": pdf_backend,
"table_mode": table_mode,
"abort_on_error": abort_on_error,
"return_as_file": return_as_file,
},
}
if (
not parameters["http_sources"]
or len(parameters["http_sources"]) == 0
or parameters["http_sources"][0]["url"] == ""
):
logger.error("No input sources provided.")
raise gr.Error("No input sources provided.", print_exception=False)
try:
response = requests.post(
f"http://localhost:{int(os.getenv('PORT', '5001'))}/v1alpha/convert/source",
json=parameters,
)
except Exception as e:
logger.error(f"Error processing URL: {e}")
raise gr.Error(f"Error processing URL: {e}", print_exception=False)
if response.status_code != 200:
data = response.json()
error_message = data.get("detail", "An unknown error occurred.")
logger.error(f"Error processing file: {error_message}")
raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
output = response_to_output(response, return_as_file)
return output
def process_file(
files,
to_formats,
image_export_mode,
ocr,
force_ocr,
ocr_engine,
ocr_lang,
pdf_backend,
table_mode,
abort_on_error,
return_as_file,
):
if not files or len(files) == 0 or files[0] == "":
logger.error("No files provided.")
raise gr.Error("No files provided.", print_exception=False)
files_data = [("files", (file.name, open(file.name, "rb"))) for file in files]
parameters = {
"to_formats": to_formats,
"image_export_mode": image_export_mode,
"ocr": str(ocr).lower(),
"force_ocr": str(force_ocr).lower(),
"ocr_engine": ocr_engine,
"ocr_lang": _to_list_of_strings(ocr_lang),
"pdf_backend": pdf_backend,
"table_mode": table_mode,
"abort_on_error": str(abort_on_error).lower(),
"return_as_file": str(return_as_file).lower(),
}
try:
response = requests.post(
f"http://localhost:{int(os.getenv('PORT', '5001'))}/v1alpha/convert/file",
files=files_data,
data=parameters,
)
except Exception as e:
logger.error(f"Error processing file(s): {e}")
raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
if response.status_code != 200:
data = response.json()
error_message = data.get("detail", "An unknown error occurred.")
logger.error(f"Error processing file: {error_message}")
raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
output = response_to_output(response, return_as_file)
return output
def response_to_output(response, return_as_file):
markdown_content = ""
json_content = ""
html_content = ""
text_content = ""
doctags_content = ""
download_button = gr.DownloadButton(visible=False, label="Download Output", scale=1)
if return_as_file:
filename = (
response.headers.get("Content-Disposition").split("filename=")[1].strip('"')
)
tmp_output_dir = Path(tempfile.mkdtemp(dir=gradio_output_dir, prefix="ui_"))
file_output_path = f"{tmp_output_dir}/{filename}"
# logger.info(f"Saving file to: {file_output_path}")
with open(file_output_path, "wb") as f:
f.write(response.content)
download_button = gr.DownloadButton(
visible=True, label=f"Download {filename}", scale=1, value=file_output_path
)
else:
full_content = response.json()
markdown_content = full_content.get("document").get("md_content")
json_content = json.dumps(
full_content.get("document").get("json_content"), indent=2
)
html_content = full_content.get("document").get("html_content")
text_content = full_content.get("document").get("text_content")
doctags_content = full_content.get("document").get("doctags_content")
return (
markdown_content,
markdown_content,
json_content,
html_content,
html_content,
text_content,
doctags_content,
download_button,
)
############
# UI Setup #
############
with gr.Blocks(
css=css,
theme=theme,
title="Docling Serve",
delete_cache=(3600, 3600), # Delete all files older than 1 hour every hour
) as ui:
# Constants stored in states to be able to pass them as inputs to functions
processing_text = gr.State("Processing your document(s), please wait...")
true_bool = gr.State(True)
false_bool = gr.State(False)
# Banner
with gr.Row(elem_id="check_health"):
# Logo
with gr.Column(scale=1, min_width=90):
gr.Image(
"https://ds4sd.github.io/docling/assets/logo.png",
height=80,
width=80,
show_download_button=False,
show_label=False,
show_fullscreen_button=False,
container=False,
elem_id="logo",
scale=0,
)
# Title
with gr.Column(scale=1, min_width=200):
gr.Markdown(
f"# Docling Serve \n(docling version: "
f"{importlib.metadata.version('docling')})",
elem_id="title",
elem_classes=["title-text"],
)
# Dark mode button
with gr.Column(scale=16, elem_id="dark_mode_column"):
dark_mode_btn = gr.Button("Dark/Light Mode", scale=0)
dark_mode_btn.click(
None,
None,
None,
js="""() => {
if (document.querySelectorAll('.dark').length) {
document.querySelectorAll('.dark').forEach(
el => el.classList.remove('dark')
);
} else {
document.querySelector('body').classList.add('dark');
}
}""",
show_api=False,
)
# URL Processing Tab
with gr.Tab("Convert URL(s)"):
with gr.Row():
with gr.Column(scale=4):
url_input = gr.Textbox(
label="Input Sources (comma-separated URLs)",
placeholder="https://arxiv.org/pdf/2206.01062",
)
with gr.Column(scale=1):
url_process_btn = gr.Button("Process URL(s)", scale=1)
url_reset_btn = gr.Button("Reset", scale=1)
# File Processing Tab
with gr.Tab("Convert File(s)"):
with gr.Row():
with gr.Column(scale=4):
file_input = gr.File(
elem_id="file_input_zone",
label="Upload Files",
file_types=[
".pdf",
".docx",
".pptx",
".html",
".xlsx",
".asciidoc",
".txt",
".md",
".jpg",
".jpeg",
".png",
".gif",
],
file_count="multiple",
scale=4,
)
with gr.Column(scale=1):
file_process_btn = gr.Button("Process File(s)", scale=1)
file_reset_btn = gr.Button("Reset", scale=1)
# Options
with gr.Accordion("Options") as options:
with gr.Row():
with gr.Column(scale=1):
to_formats = gr.CheckboxGroup(
[
("Markdown", "md"),
("Docling (JSON)", "json"),
("HTML", "html"),
("Plain Text", "text"),
("Doc Tags", "doctags"),
],
label="To Formats",
value=["md"],
)
with gr.Column(scale=1):
image_export_mode = gr.Radio(
[
("Embedded", "embedded"),
("Placeholder", "placeholder"),
("Referenced", "referenced"),
],
label="Image Export Mode",
value="embedded",
)
with gr.Row():
with gr.Column(scale=1, min_width=200):
ocr = gr.Checkbox(label="Enable OCR", value=True)
force_ocr = gr.Checkbox(label="Force OCR", value=False)
with gr.Column(scale=1):
ocr_engine = gr.Radio(
[
("EasyOCR", "easyocr"),
("Tesseract", "tesseract"),
("RapidOCR", "rapidocr"),
],
label="OCR Engine",
value="easyocr",
)
with gr.Column(scale=1, min_width=200):
ocr_lang = gr.Textbox(
label="OCR Language (beware of the format)", value="en,fr,de,es"
)
ocr_engine.change(change_ocr_lang, inputs=[ocr_engine], outputs=[ocr_lang])
with gr.Row():
with gr.Column(scale=2):
pdf_backend = gr.Radio(
["pypdfium2", "dlparse_v1", "dlparse_v2"],
label="PDF Backend",
value="dlparse_v2",
)
with gr.Column(scale=2):
table_mode = gr.Radio(
["fast", "accurate"], label="Table Mode", value="fast"
)
with gr.Column(scale=1):
abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
return_as_file = gr.Checkbox(label="Return as File", value=False)
# Document output
with gr.Row(visible=False) as content_output:
with gr.Tab("Markdown"):
output_markdown = gr.Code(
language="markdown", wrap_lines=True, show_label=False
)
with gr.Tab("Markdown-Rendered"):
output_markdown_rendered = gr.Markdown(label="Response")
with gr.Tab("Docling (JSON)"):
output_json = gr.Code(language="json", wrap_lines=True, show_label=False)
with gr.Tab("HTML"):
output_html = gr.Code(language="html", wrap_lines=True, show_label=False)
with gr.Tab("HTML-Rendered"):
output_html_rendered = gr.HTML(label="Response")
with gr.Tab("Text"):
output_text = gr.Code(wrap_lines=True, show_label=False)
with gr.Tab("DocTags"):
output_doctags = gr.Code(wrap_lines=True, show_label=False)
# File download output
with gr.Row(visible=False) as file_output:
download_file_btn = gr.DownloadButton(label="Placeholder", scale=1)
##############
# UI Actions #
##############
# Handle Return as File
url_input.change(
auto_set_return_as_file,
inputs=[url_input, file_input, image_export_mode],
outputs=[return_as_file],
)
file_input.change(
auto_set_return_as_file,
inputs=[url_input, file_input, image_export_mode],
outputs=[return_as_file],
)
image_export_mode.change(
auto_set_return_as_file,
inputs=[url_input, file_input, image_export_mode],
outputs=[return_as_file],
)
# URL processing
url_process_btn.click(
set_options_visibility, inputs=[false_bool], outputs=[options]
).then(
set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
).then(
set_outputs_visibility_process,
inputs=[return_as_file],
outputs=[content_output, file_output],
).then(
clear_outputs,
inputs=None,
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_html,
output_html_rendered,
output_text,
output_doctags,
],
).then(
process_url,
inputs=[
url_input,
to_formats,
image_export_mode,
ocr,
force_ocr,
ocr_engine,
ocr_lang,
pdf_backend,
table_mode,
abort_on_error,
return_as_file,
],
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_html,
output_html_rendered,
output_text,
output_doctags,
download_file_btn,
],
)
url_reset_btn.click(
clear_outputs,
inputs=None,
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_html,
output_html_rendered,
output_text,
output_doctags,
],
).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then(
set_outputs_visibility_direct,
inputs=[false_bool, false_bool],
outputs=[content_output, file_output],
).then(
clear_url_input, inputs=None, outputs=[url_input]
)
# File processing
file_process_btn.click(
set_options_visibility, inputs=[false_bool], outputs=[options]
).then(
set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
).then(
set_outputs_visibility_process,
inputs=[return_as_file],
outputs=[content_output, file_output],
).then(
clear_outputs,
inputs=None,
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_html,
output_html_rendered,
output_text,
output_doctags,
],
).then(
process_file,
inputs=[
file_input,
to_formats,
image_export_mode,
ocr,
force_ocr,
ocr_engine,
ocr_lang,
pdf_backend,
table_mode,
abort_on_error,
return_as_file,
],
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_html,
output_html_rendered,
output_text,
output_doctags,
download_file_btn,
],
)
file_reset_btn.click(
clear_outputs,
inputs=None,
outputs=[
output_markdown,
output_markdown_rendered,
output_json,
output_html,
output_html_rendered,
output_text,
output_doctags,
],
).then(set_options_visibility, inputs=[true_bool], outputs=[options]).then(
set_outputs_visibility_direct,
inputs=[false_bool, false_bool],
outputs=[content_output, file_output],
).then(
clear_file_input, inputs=None, outputs=[file_input]
)