mirror of
https://github.com/docling-project/docling-serve.git
synced 2025-11-29 00:23:36 +00:00
Cleanup. Text formatting. Fallback picture annotation.
This commit is contained in:
@@ -16,7 +16,7 @@ from fastapi import (
|
||||
from fastapi.responses import HTMLResponse, RedirectResponse, Response
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from pydantic import AnyHttpUrl
|
||||
from pyjsx import auto_setup
|
||||
from pyjsx import auto_setup # type: ignore
|
||||
from starlette.exceptions import HTTPException as StarletteHTTPException
|
||||
|
||||
from docling.datamodel.base_models import OutputFormat
|
||||
@@ -131,7 +131,7 @@ def create_ui_app(process_file, process_url, task_result, task_status_poll) -> F
|
||||
):
|
||||
tasks = sorted(orchestrator.tasks.values(), key=lambda t: t.created_at)
|
||||
|
||||
return str(TasksPage(tasks=tasks))
|
||||
return str(TasksPage(tasks))
|
||||
|
||||
# Task specific page.
|
||||
@ui_app.get("/tasks/{task_id}/", response_class=HTMLResponse)
|
||||
|
||||
@@ -10,6 +10,7 @@ from docling_core.types.doc.document import (
|
||||
DoclingDocument,
|
||||
DocItem,
|
||||
FloatingItem,
|
||||
Formatting,
|
||||
FormulaItem,
|
||||
GroupItem,
|
||||
GroupLabel,
|
||||
@@ -20,6 +21,7 @@ from docling_core.types.doc.document import (
|
||||
PictureItem,
|
||||
ProvenanceItem,
|
||||
RefItem,
|
||||
Script,
|
||||
SectionHeaderItem,
|
||||
TableCell,
|
||||
TableItem,
|
||||
@@ -28,7 +30,7 @@ from docling_core.types.doc.document import (
|
||||
)
|
||||
from pyjsx import jsx, JSX, JSXComponent
|
||||
|
||||
from .svg import circle, clipPath, image, path, rect, text
|
||||
from .svg import image, path, rect, text
|
||||
|
||||
|
||||
_node_components: dict[str, JSXComponent] = {}
|
||||
@@ -63,7 +65,7 @@ def NodeComponent(children, node: NodeItem | RefItem, doc: DoclingDocument):
|
||||
# Wrap item component with annotations, if any.
|
||||
if isinstance(node, DocItem) and (anns := node.get_annotations()):
|
||||
element = (
|
||||
<div>
|
||||
<div class="annotated">
|
||||
{element}
|
||||
{[<AnnotationComponent annotation={ann} /> for ann in anns]}
|
||||
</div>
|
||||
@@ -73,7 +75,9 @@ def NodeComponent(children, node: NodeItem | RefItem, doc: DoclingDocument):
|
||||
id = node.self_ref[2:]
|
||||
element.props["id"] = id
|
||||
element.props["onclick"] = "clickId(event)"
|
||||
element.props["class"] = element.props.get("class", "") + f" item {node.content_layer.value}"
|
||||
|
||||
classes = ["item", node.content_layer.value]
|
||||
element.props["class"] = f"{element.props.get("class", "")} {" ".join(classes)}"
|
||||
|
||||
return element
|
||||
|
||||
@@ -203,34 +207,67 @@ def DocPreview(children, doc: DoclingDocument):
|
||||
]
|
||||
|
||||
|
||||
def _text_classes(node: TextItem) -> str:
|
||||
classes = [node.label]
|
||||
|
||||
if frmt := node.formatting:
|
||||
formats = {
|
||||
"bold": frmt.bold,
|
||||
"italic": frmt.italic,
|
||||
"underline": frmt.underline,
|
||||
"strikethrough": frmt.strikethrough
|
||||
}
|
||||
classes.extend([cls for cls, active in formats.items() if active])
|
||||
classes.append(frmt.script)
|
||||
|
||||
return " ".join(classes)
|
||||
|
||||
|
||||
@component(TextItem)
|
||||
def TextComponent(children, node: TextItem, doc: DoclingDocument):
|
||||
return <p class={node.label}>{escape(node.text)}</p>
|
||||
return <p class={_text_classes(node)}>{escape(node.text)}</p>
|
||||
|
||||
|
||||
@component(TitleItem)
|
||||
def TitleComponent(children, node: TitleItem, doc: DoclingDocument):
|
||||
return <h1>{escape(node.text)}</h1>
|
||||
return <h1 class={_text_classes(node)}>{escape(node.text)}</h1>
|
||||
|
||||
|
||||
@component(SectionHeaderItem)
|
||||
def SectionHeaderComponent(children, node: SectionHeaderItem, doc: DoclingDocument):
|
||||
return <h4>{escape(node.text)}</h4>
|
||||
return <h4 class={_text_classes(node)}>{escape(node.text)}</h4>
|
||||
|
||||
|
||||
@component(ListItem)
|
||||
def ListComponent(children, node: ListItem, doc: DoclingDocument):
|
||||
return <li><b>{node.marker}</b> {escape(node.text)}</li>
|
||||
return (
|
||||
<li>
|
||||
<b>{node.marker}</b>
|
||||
<span class={_text_classes(node)}>{escape(node.text)}</span>
|
||||
</li>
|
||||
)
|
||||
|
||||
|
||||
@component(CodeItem)
|
||||
def CodeComponent(children, node: CodeItem, doc: DoclingDocument):
|
||||
return <figure><code>{escape(node.text or node.orig)}</code></figure>
|
||||
return (
|
||||
<figure>
|
||||
<code class={_text_classes(node)}>
|
||||
{escape(node.text or node.orig)}
|
||||
</code>
|
||||
</figure>
|
||||
)
|
||||
|
||||
|
||||
@component(FormulaItem)
|
||||
def FormulaComponent(children, node: FormulaItem, doc: DoclingDocument):
|
||||
return <figure><code>{escape(node.text or node.orig)}</code></figure>
|
||||
return (
|
||||
<figure>
|
||||
<code class={_text_classes(node)}>
|
||||
{escape(node.text or node.orig)}
|
||||
</code>
|
||||
</figure>
|
||||
)
|
||||
|
||||
|
||||
@component(PictureItem)
|
||||
@@ -240,25 +277,25 @@ def PictureComponent(children, node: PictureItem, doc: DoclingDocument):
|
||||
|
||||
@component(PictureClassificationData)
|
||||
def PictureClassificationComponent(children, annotation: PictureClassificationData):
|
||||
classes = annotation.predicted_classes[:5]
|
||||
return (
|
||||
<div>
|
||||
{[
|
||||
<div
|
||||
style={{ "width": f"{cls.confidence * 100}%" }}
|
||||
title={f"{cls.class_name}: {cls.confidence:.2f}"}
|
||||
>
|
||||
{f"{cls.class_name.replace("_", " ")} {cls.confidence:.2f}"}
|
||||
</div>
|
||||
for cls in classes
|
||||
]}
|
||||
</div>
|
||||
<table>
|
||||
<tbody>
|
||||
{[
|
||||
<tr>
|
||||
<td>{cls.class_name.replace("_", " ")}</td>
|
||||
<td>{f"{cls.confidence:.2f}"}</td>
|
||||
</tr>
|
||||
for cls in annotation.predicted_classes
|
||||
if cls.confidence > 0.01
|
||||
]}
|
||||
</tbody>
|
||||
</table>
|
||||
)
|
||||
|
||||
|
||||
@component(DescriptionAnnotation)
|
||||
def DescriptionAnnotation(children, annotation: DescriptionAnnotation):
|
||||
return <div>{escape(annotation.text)}</div>
|
||||
return <span>{escape(annotation.text)}</span>
|
||||
|
||||
|
||||
@component(TableItem)
|
||||
|
||||
@@ -1,79 +1,15 @@
|
||||
@import "pico.css";
|
||||
|
||||
/* Pico configuration. */
|
||||
:root {
|
||||
--pico-font-size: 16px;
|
||||
}
|
||||
|
||||
/* Utilities. */
|
||||
.w-4 {
|
||||
width: calc(4 * var(--pico-spacing));
|
||||
}
|
||||
.w-full {
|
||||
width: 100%;
|
||||
}
|
||||
.max-w-full {
|
||||
max-width: 100%;
|
||||
}
|
||||
.mt-1 {
|
||||
margin-top: var(--pico-spacing);
|
||||
}
|
||||
.mr-auto {
|
||||
margin-right: auto;
|
||||
}
|
||||
.mb-1 {
|
||||
margin-bottom: var(--pico-spacing);
|
||||
}
|
||||
.mb-2 {
|
||||
margin-bottom: calc(2 * var(--pico-spacing));
|
||||
}
|
||||
.flex {
|
||||
display: flex;
|
||||
}
|
||||
.flex.row {
|
||||
flex-direction: row;
|
||||
}
|
||||
.flex.col {
|
||||
flex-direction: column;
|
||||
}
|
||||
.flex-1 {
|
||||
flex: 1 1 0%;
|
||||
}
|
||||
.flex-auto {
|
||||
flex: 1 1 auto;
|
||||
}
|
||||
.gap-0 {
|
||||
gap: 0;
|
||||
}
|
||||
.gap-1 {
|
||||
gap: 0.25rem;
|
||||
}
|
||||
.gap-2 {
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.gap-3 {
|
||||
gap: 1rem;
|
||||
}
|
||||
.hidden {
|
||||
display: none;
|
||||
}
|
||||
.sticky-footer {
|
||||
position: sticky;
|
||||
bottom: 0;
|
||||
padding-top: var(--pico-spacing);
|
||||
background: var(--pico-background-color);
|
||||
}
|
||||
|
||||
/* Customization. */
|
||||
|
||||
@view-transition {
|
||||
navigation: auto;
|
||||
}
|
||||
|
||||
:root {
|
||||
--pico-font-size: 16px;
|
||||
|
||||
--highlight-factor: 0.8;
|
||||
--target: hsl(240, 100%, 34%);
|
||||
--mark: hsl(29, 100%, 40%);
|
||||
--mark: hsl(29, 100%, 35%);
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
@@ -84,6 +20,18 @@
|
||||
}
|
||||
}
|
||||
|
||||
/* Utilities. */
|
||||
.hidden {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.sticky-footer {
|
||||
position: sticky;
|
||||
bottom: 0;
|
||||
padding-top: var(--pico-spacing);
|
||||
background: var(--pico-background-color);
|
||||
}
|
||||
|
||||
html {
|
||||
scroll-behavior: smooth;
|
||||
}
|
||||
@@ -232,6 +180,10 @@ main.preview {
|
||||
main.preview:has(.configDarkImg > input:checked) {
|
||||
--img-hover-border: white;
|
||||
|
||||
svg.page-image {
|
||||
--mark: hsl(29, 100%, 70%)
|
||||
}
|
||||
|
||||
image,
|
||||
img {
|
||||
filter: invert(1) hue-rotate(180deg) saturate(1.25);
|
||||
@@ -280,6 +232,38 @@ main.preview {
|
||||
visibility: hidden;
|
||||
}
|
||||
|
||||
> .item.annotated {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: stretch;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
/* Formatting. */
|
||||
.bold {
|
||||
font-weight: bold;
|
||||
}
|
||||
.italic {
|
||||
font-style: italic;
|
||||
}
|
||||
.underline {
|
||||
text-decoration: underline;
|
||||
}
|
||||
.strikethrough {
|
||||
text-decoration: line-through;
|
||||
}
|
||||
.underline.strikethrough {
|
||||
text-decoration: underline line-through;
|
||||
}
|
||||
.sub {
|
||||
font-size: smaller;
|
||||
vertical-align: sub;
|
||||
}
|
||||
.super {
|
||||
font-size: smaller;
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
/* Items out of content layer. */
|
||||
> .item:not(.body),
|
||||
> .item-markers:not(.body) {
|
||||
@@ -319,34 +303,27 @@ main.preview {
|
||||
}
|
||||
|
||||
.annotation {
|
||||
margin: 0.5rem 1rem;
|
||||
font-size: 0.9rem;
|
||||
color: var(--mark);
|
||||
margin: 0;
|
||||
|
||||
&::before {
|
||||
margin-right: 0.5rem;
|
||||
content: attr(data-kind);
|
||||
opacity: 0.7;
|
||||
}
|
||||
|
||||
&,
|
||||
* {
|
||||
font-size: 0.9rem;
|
||||
color: var(--mark);
|
||||
}
|
||||
}
|
||||
|
||||
.annotation[data-kind="description"] {
|
||||
.annotation[data-kind="description"],
|
||||
code.annotation {
|
||||
white-space: pre-line;
|
||||
}
|
||||
|
||||
.annotation[data-kind="classification"] {
|
||||
height: 1.5rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
|
||||
> div {
|
||||
padding: 0 0.25rem;
|
||||
background: var(--mark);
|
||||
border: solid 1px var(--pico-background-color);
|
||||
color: var(--pico-background-color);
|
||||
overflow: hidden;
|
||||
text-wrap: nowrap;
|
||||
}
|
||||
width: fit-content;
|
||||
}
|
||||
|
||||
> .item-markers {
|
||||
@@ -409,6 +386,8 @@ main.preview {
|
||||
}
|
||||
|
||||
> svg.page-image {
|
||||
--mark: hsl(29, 100%, 35%);
|
||||
|
||||
grid-column: 5;
|
||||
position: sticky;
|
||||
top: 0.5rem;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from pyjsx import JSX
|
||||
from pyjsx import JSX # type: ignore
|
||||
|
||||
|
||||
def _tag(name: str):
|
||||
@@ -6,7 +6,7 @@ def _tag(name: str):
|
||||
props = " ".join([f'{k}="{v}"' for k, v in args.items()])
|
||||
|
||||
if children:
|
||||
child_renders = "".join([f"{c}" for c in children])
|
||||
child_renders = "".join([str(c) for c in children])
|
||||
return f"<{name} {props}>{child_renders}</{name}>"
|
||||
else:
|
||||
return f"<{name} {props} />"
|
||||
@@ -14,12 +14,7 @@ def _tag(name: str):
|
||||
return factory
|
||||
|
||||
|
||||
circle = _tag("circle")
|
||||
clipPath = _tag("clipPath")
|
||||
defs = _tag("defs")
|
||||
foreignObject = _tag("foreignobject")
|
||||
image = _tag("image")
|
||||
path = _tag("path")
|
||||
rect = _tag("rect")
|
||||
text = _tag("text")
|
||||
use = _tag("use")
|
||||
|
||||
Reference in New Issue
Block a user