Cleanup. Text formatting. Fallback picture annotation.

This commit is contained in:
DKL
2025-11-24 15:17:39 +01:00
parent 8d5892b176
commit 025c4c8942
4 changed files with 126 additions and 115 deletions

View File

@@ -16,7 +16,7 @@ from fastapi import (
from fastapi.responses import HTMLResponse, RedirectResponse, Response
from fastapi.staticfiles import StaticFiles
from pydantic import AnyHttpUrl
from pyjsx import auto_setup
from pyjsx import auto_setup # type: ignore
from starlette.exceptions import HTTPException as StarletteHTTPException
from docling.datamodel.base_models import OutputFormat
@@ -131,7 +131,7 @@ def create_ui_app(process_file, process_url, task_result, task_status_poll) -> F
):
tasks = sorted(orchestrator.tasks.values(), key=lambda t: t.created_at)
return str(TasksPage(tasks=tasks))
return str(TasksPage(tasks))
# Task specific page.
@ui_app.get("/tasks/{task_id}/", response_class=HTMLResponse)

View File

@@ -10,6 +10,7 @@ from docling_core.types.doc.document import (
DoclingDocument,
DocItem,
FloatingItem,
Formatting,
FormulaItem,
GroupItem,
GroupLabel,
@@ -20,6 +21,7 @@ from docling_core.types.doc.document import (
PictureItem,
ProvenanceItem,
RefItem,
Script,
SectionHeaderItem,
TableCell,
TableItem,
@@ -28,7 +30,7 @@ from docling_core.types.doc.document import (
)
from pyjsx import jsx, JSX, JSXComponent
from .svg import circle, clipPath, image, path, rect, text
from .svg import image, path, rect, text
_node_components: dict[str, JSXComponent] = {}
@@ -63,7 +65,7 @@ def NodeComponent(children, node: NodeItem | RefItem, doc: DoclingDocument):
# Wrap item component with annotations, if any.
if isinstance(node, DocItem) and (anns := node.get_annotations()):
element = (
<div>
<div class="annotated">
{element}
{[<AnnotationComponent annotation={ann} /> for ann in anns]}
</div>
@@ -73,7 +75,9 @@ def NodeComponent(children, node: NodeItem | RefItem, doc: DoclingDocument):
id = node.self_ref[2:]
element.props["id"] = id
element.props["onclick"] = "clickId(event)"
element.props["class"] = element.props.get("class", "") + f" item {node.content_layer.value}"
classes = ["item", node.content_layer.value]
element.props["class"] = f"{element.props.get("class", "")} {" ".join(classes)}"
return element
@@ -203,34 +207,67 @@ def DocPreview(children, doc: DoclingDocument):
]
def _text_classes(node: TextItem) -> str:
classes = [node.label]
if frmt := node.formatting:
formats = {
"bold": frmt.bold,
"italic": frmt.italic,
"underline": frmt.underline,
"strikethrough": frmt.strikethrough
}
classes.extend([cls for cls, active in formats.items() if active])
classes.append(frmt.script)
return " ".join(classes)
@component(TextItem)
def TextComponent(children, node: TextItem, doc: DoclingDocument):
return <p class={node.label}>{escape(node.text)}</p>
return <p class={_text_classes(node)}>{escape(node.text)}</p>
@component(TitleItem)
def TitleComponent(children, node: TitleItem, doc: DoclingDocument):
return <h1>{escape(node.text)}</h1>
return <h1 class={_text_classes(node)}>{escape(node.text)}</h1>
@component(SectionHeaderItem)
def SectionHeaderComponent(children, node: SectionHeaderItem, doc: DoclingDocument):
return <h4>{escape(node.text)}</h4>
return <h4 class={_text_classes(node)}>{escape(node.text)}</h4>
@component(ListItem)
def ListComponent(children, node: ListItem, doc: DoclingDocument):
return <li><b>{node.marker}</b> {escape(node.text)}</li>
return (
<li>
<b>{node.marker}</b>
<span class={_text_classes(node)}>{escape(node.text)}</span>
</li>
)
@component(CodeItem)
def CodeComponent(children, node: CodeItem, doc: DoclingDocument):
return <figure><code>{escape(node.text or node.orig)}</code></figure>
return (
<figure>
<code class={_text_classes(node)}>
{escape(node.text or node.orig)}
</code>
</figure>
)
@component(FormulaItem)
def FormulaComponent(children, node: FormulaItem, doc: DoclingDocument):
return <figure><code>{escape(node.text or node.orig)}</code></figure>
return (
<figure>
<code class={_text_classes(node)}>
{escape(node.text or node.orig)}
</code>
</figure>
)
@component(PictureItem)
@@ -240,25 +277,25 @@ def PictureComponent(children, node: PictureItem, doc: DoclingDocument):
@component(PictureClassificationData)
def PictureClassificationComponent(children, annotation: PictureClassificationData):
classes = annotation.predicted_classes[:5]
return (
<div>
{[
<div
style={{ "width": f"{cls.confidence * 100}%" }}
title={f"{cls.class_name}: {cls.confidence:.2f}"}
>
{f"{cls.class_name.replace("_", " ")} {cls.confidence:.2f}"}
</div>
for cls in classes
]}
</div>
<table>
<tbody>
{[
<tr>
<td>{cls.class_name.replace("_", " ")}</td>
<td>{f"{cls.confidence:.2f}"}</td>
</tr>
for cls in annotation.predicted_classes
if cls.confidence > 0.01
]}
</tbody>
</table>
)
@component(DescriptionAnnotation)
def DescriptionAnnotation(children, annotation: DescriptionAnnotation):
return <div>{escape(annotation.text)}</div>
return <span>{escape(annotation.text)}</span>
@component(TableItem)

View File

@@ -1,79 +1,15 @@
@import "pico.css";
/* Pico configuration. */
:root {
--pico-font-size: 16px;
}
/* Utilities. */
.w-4 {
width: calc(4 * var(--pico-spacing));
}
.w-full {
width: 100%;
}
.max-w-full {
max-width: 100%;
}
.mt-1 {
margin-top: var(--pico-spacing);
}
.mr-auto {
margin-right: auto;
}
.mb-1 {
margin-bottom: var(--pico-spacing);
}
.mb-2 {
margin-bottom: calc(2 * var(--pico-spacing));
}
.flex {
display: flex;
}
.flex.row {
flex-direction: row;
}
.flex.col {
flex-direction: column;
}
.flex-1 {
flex: 1 1 0%;
}
.flex-auto {
flex: 1 1 auto;
}
.gap-0 {
gap: 0;
}
.gap-1 {
gap: 0.25rem;
}
.gap-2 {
gap: 0.5rem;
}
.gap-3 {
gap: 1rem;
}
.hidden {
display: none;
}
.sticky-footer {
position: sticky;
bottom: 0;
padding-top: var(--pico-spacing);
background: var(--pico-background-color);
}
/* Customization. */
@view-transition {
navigation: auto;
}
:root {
--pico-font-size: 16px;
--highlight-factor: 0.8;
--target: hsl(240, 100%, 34%);
--mark: hsl(29, 100%, 40%);
--mark: hsl(29, 100%, 35%);
}
@media (prefers-color-scheme: dark) {
@@ -84,6 +20,18 @@
}
}
/* Utilities. */
.hidden {
display: none;
}
.sticky-footer {
position: sticky;
bottom: 0;
padding-top: var(--pico-spacing);
background: var(--pico-background-color);
}
html {
scroll-behavior: smooth;
}
@@ -232,6 +180,10 @@ main.preview {
main.preview:has(.configDarkImg > input:checked) {
--img-hover-border: white;
svg.page-image {
--mark: hsl(29, 100%, 70%)
}
image,
img {
filter: invert(1) hue-rotate(180deg) saturate(1.25);
@@ -280,6 +232,38 @@ main.preview {
visibility: hidden;
}
> .item.annotated {
display: flex;
flex-direction: column;
align-items: stretch;
gap: 1rem;
}
/* Formatting. */
.bold {
font-weight: bold;
}
.italic {
font-style: italic;
}
.underline {
text-decoration: underline;
}
.strikethrough {
text-decoration: line-through;
}
.underline.strikethrough {
text-decoration: underline line-through;
}
.sub {
font-size: smaller;
vertical-align: sub;
}
.super {
font-size: smaller;
vertical-align: super;
}
/* Items out of content layer. */
> .item:not(.body),
> .item-markers:not(.body) {
@@ -319,34 +303,27 @@ main.preview {
}
.annotation {
margin: 0.5rem 1rem;
font-size: 0.9rem;
color: var(--mark);
margin: 0;
&::before {
margin-right: 0.5rem;
content: attr(data-kind);
opacity: 0.7;
}
&,
* {
font-size: 0.9rem;
color: var(--mark);
}
}
.annotation[data-kind="description"] {
.annotation[data-kind="description"],
code.annotation {
white-space: pre-line;
}
.annotation[data-kind="classification"] {
height: 1.5rem;
display: flex;
align-items: center;
> div {
padding: 0 0.25rem;
background: var(--mark);
border: solid 1px var(--pico-background-color);
color: var(--pico-background-color);
overflow: hidden;
text-wrap: nowrap;
}
width: fit-content;
}
> .item-markers {
@@ -409,6 +386,8 @@ main.preview {
}
> svg.page-image {
--mark: hsl(29, 100%, 35%);
grid-column: 5;
position: sticky;
top: 0.5rem;

View File

@@ -1,4 +1,4 @@
from pyjsx import JSX
from pyjsx import JSX # type: ignore
def _tag(name: str):
@@ -6,7 +6,7 @@ def _tag(name: str):
props = " ".join([f'{k}="{v}"' for k, v in args.items()])
if children:
child_renders = "".join([f"{c}" for c in children])
child_renders = "".join([str(c) for c in children])
return f"<{name} {props}>{child_renders}</{name}>"
else:
return f"<{name} {props} />"
@@ -14,12 +14,7 @@ def _tag(name: str):
return factory
circle = _tag("circle")
clipPath = _tag("clipPath")
defs = _tag("defs")
foreignObject = _tag("foreignobject")
image = _tag("image")
path = _tag("path")
rect = _tag("rect")
text = _tag("text")
use = _tag("use")