from collections import defaultdict
from html import escape
from typing import Type
from docling_core.types.doc.document import (
BaseAnnotation,
CodeItem,
ContentLayer,
DescriptionAnnotation,
DoclingDocument,
DocItem,
FloatingItem,
Formatting,
FormulaItem,
GroupItem,
GroupLabel,
ListGroup,
ListItem,
NodeItem,
PictureClassificationData,
PictureItem,
ProvenanceItem,
RefItem,
Script,
SectionHeaderItem,
TableCell,
TableItem,
TextItem,
TitleItem
)
from pyjsx import jsx, JSX, JSXComponent
from .svg import image, path, rect, text
_node_components: dict[str, JSXComponent] = {}
def component(*node_types: list[Type[BaseAnnotation | NodeItem]]):
def decorator(component):
for t in node_types:
_node_components[t.__name__] = component
return decorator
def AnnotationComponent(children, annotation: BaseAnnotation):
Comp = _node_components.get(annotation.__class__.__name__)
element = Comp(annotation=annotation, children=[]) if Comp else (
{escape(annotation.model_dump_json(indent=2))}
)
element.props["class"] = element.props.get("class", "") + " annotation"
element.props["data-kind"] = annotation.kind
return element
def NodeComponent(children, node: NodeItem | RefItem, doc: DoclingDocument):
# Specific component or fallback.
Comp = _node_components.get(node.__class__.__name__)
element = Comp(node=node, doc=doc, children=[]) if Comp else (
)
# Wrap item component with annotations, if any.
if isinstance(node, DocItem) and (anns := node.get_annotations()):
element = (
{element}
{[ for ann in anns]}
)
# Extend interaction and styling.
id = node.self_ref[2:]
element.props["id"] = id
element.props["onclick"] = "clickId(event)"
classes = ["item", node.content_layer.value]
element.props["class"] = f"{element.props.get("class", "")} {" ".join(classes)}"
return element
def node_provs(node: NodeItem, doc: DoclingDocument) -> ProvenanceItem:
return node.prov if isinstance(node, DocItem) else [
p
for c in node.children
if isinstance(c.resolve(doc), DocItem)
for p in c.resolve(doc).prov
]
def DocPage(children, page_no: int, items: list[NodeItem], doc: DoclingDocument):
page = doc.pages[page_no]
exclusive_items = [
item
for item in items
if min([p.page_no for p in node_provs(item, doc)]) == page_no
]
comps = []
for i in range(len(exclusive_items)):
item = exclusive_items[i]
id = item.self_ref[2:]
kind, *index = id.split("/")
parent_class = ""
if isinstance(item, GroupItem):
parent_class = "group"
else:
parent = item.parent.resolve(doc)
if isinstance(parent, GroupItem) and parent.label is not GroupLabel.UNSPECIFIED:
parent_class = "grouped"
comps.append(
{"/".join(index)}{item.label.replace("_", " ")}
{
{item.content_layer.value.replace("_", " ")}
if item.content_layer is not ContentLayer.BODY
else None
}
{"{;}"}
)
comps.append()
pages = set([p.page_no for p in node_provs(item, doc)])
page_mark_class = "page-marker"
if i == 0 or len(pages) > 1:
page_mark_class += " border"
comps.append()
def ItemBox(children, item: DocItem, prov: ProvenanceItem):
item_id = item.self_ref[2:]
sub_items = [
(item_id, prov.bbox.to_top_left_origin(page.size.height))
]
# Table cells.
if isinstance(item, TableItem):
for cell in item.data.table_cells:
sub_items.append(
(f"{item_id}/{cell.start_col_offset_idx}/{cell.start_row_offset_idx}", cell.bbox)
)
return [
for id, bbox in sub_items
]
# Span extra row to fill up excess space.
comps.append(
)
return
{comps}
def DocPreview(children, doc: DoclingDocument):
page_items: dict[int, list[NodeItem]] = defaultdict(list)
for item, level in doc.iterate_items(
with_groups=True,
included_content_layers={*ContentLayer}
):
if not isinstance(item, GroupItem) or item.label is not GroupLabel.UNSPECIFIED:
pages = set([p.page_no for p in node_provs(item, doc)])
for page in pages:
page_items[page].append(item)
return [
for page_no in sorted(page_items.keys())
]
def _text_classes(node: TextItem) -> str:
classes = [node.label]
if frmt := node.formatting:
formats = {
"bold": frmt.bold,
"italic": frmt.italic,
"underline": frmt.underline,
"strikethrough": frmt.strikethrough
}
classes.extend([cls for cls, active in formats.items() if active])
classes.append(frmt.script)
return " ".join(classes)
@component(TextItem)
def TextComponent(children, node: TextItem, doc: DoclingDocument):
return