Merge pull request #2 from datalab-to/dev

Dev
This commit is contained in:
Vik Paruchuri
2025-10-23 16:55:57 -04:00
committed by GitHub
4 changed files with 8 additions and 1 deletions

View File

@@ -48,6 +48,7 @@ class InferenceManager:
page_box=[0, 0, input_item.image.width, input_item.image.height],
token_count=result.token_count,
images=extract_images(result.raw, chunks, input_item.image),
error=result.error,
)
)
return output

View File

@@ -27,3 +27,4 @@ class BatchOutputItem:
page_box: List[int]
token_count: int
images: dict
error: bool

View File

@@ -30,7 +30,11 @@ def extract_images(html: str, chunks: dict, image: Image.Image):
if not img:
continue
bbox = chunk["bbox"]
block_image = image.crop(bbox)
try:
block_image = image.crop(bbox)
except ValueError:
# Happens when bbox coordinates are invalid
continue
img_name = get_image_name(html, div_idx)
images[img_name] = block_image
return images

View File

@@ -137,6 +137,7 @@
padding: 30px;
line-height: 1.6;
color: #333;
font-size: 24px;
}
.markdown-content h1, .markdown-content h2, .markdown-content h3 {