diff --git a/chandra/input.py b/chandra/input.py
index d552d14..0d793a7 100644
--- a/chandra/input.py
+++ b/chandra/input.py
@@ -13,6 +13,15 @@ def flatten(page, flag=pdfium_c.FLAT_NORMALDISPLAY):
print(f"Failed to flatten annotations / form fields on page {page}.")
+def load_image(filepath: str):
+ image = Image.open(filepath).convert("RGB")
+ if image.width < settings.MIN_IMAGE_DIM or image.height < settings.MIN_IMAGE_DIM:
+ scale = settings.MIN_IMAGE_DIM / min(image.width, image.height)
+ new_size = (int(image.width * scale), int(image.height * scale))
+ image = image.resize(new_size, Image.Resampling.LANCZOS)
+ return image
+
+
def load_pdf_images(filepath: str, page_range: List[int]):
doc = pdfium.PdfDocument(filepath)
doc.init_forms()
@@ -22,7 +31,7 @@ def load_pdf_images(filepath: str, page_range: List[int]):
if not page_range or page in page_range:
page_obj = doc[page]
min_page_dim = min(page_obj.get_width(), page_obj.get_height())
- scale_dpi = (settings.MIN_IMAGE_DIM / min_page_dim) * 72
+ scale_dpi = (settings.MIN_PDF_IMAGE_DIM / min_page_dim) * 72
scale_dpi = max(scale_dpi, settings.IMAGE_DPI)
page_obj = doc[page]
flatten(page_obj)
@@ -56,5 +65,5 @@ def load_file(filepath: str, config: dict):
if input_type and input_type.extension == "pdf":
images = load_pdf_images(filepath, page_range)
else:
- images = [Image.open(filepath).convert("RGB")]
+ images = [load_image(filepath)]
return images
diff --git a/chandra/output.py b/chandra/output.py
index aa430fe..7d4d1c6 100644
--- a/chandra/output.py
+++ b/chandra/output.py
@@ -71,6 +71,17 @@ def parse_html(
else:
img = BeautifulSoup(f"", "html.parser")
div.append(img)
+
+ # Wrap text content in
tags if no inner HTML tags exist + if label in ["Text"] and not re.search( + "<.+>", str(div.decode_contents()).strip() + ): + # Add inner p tags if missing for text blocks + text_content = str(div.decode_contents()).strip() + text_content = f"
{text_content}
" + div.clear() + div.append(BeautifulSoup(text_content, "html.parser")) + content = str(div.decode_contents()) out_html += content return out_html diff --git a/chandra/scripts/screenshot_app.py b/chandra/scripts/screenshot_app.py index e7ab0f2..4083feb 100644 --- a/chandra/scripts/screenshot_app.py +++ b/chandra/scripts/screenshot_app.py @@ -143,6 +143,7 @@ def process(): "image_height": img_height, "blocks": blocks_data, "html": html_with_images, + "markdown": result.markdown, } ) diff --git a/chandra/scripts/templates/screenshot.html b/chandra/scripts/templates/screenshot.html index 42dbf60..92f6d4d 100644 --- a/chandra/scripts/templates/screenshot.html +++ b/chandra/scripts/templates/screenshot.html @@ -64,6 +64,20 @@ cursor: not-allowed; } + .controls label { + display: flex; + align-items: center; + gap: 8px; + color: white; + font-size: 14px; + cursor: pointer; + user-select: none; + } + + .controls input[type="checkbox"] { + cursor: pointer; + } + .loading { display: none; color: #f39c12; @@ -75,6 +89,11 @@ font-weight: bold; } + .success { + color: #27ae60; + font-weight: bold; + } + .screenshot-container { display: none; margin-top: 60px; @@ -88,8 +107,18 @@ display: flex; } - .left-panel, .right-panel { - flex: 1; + .left-panel { + flex: 0 0 40%; + display: flex; + flex-direction: column; + background: white; + border-radius: 8px; + overflow: hidden; + box-shadow: 0 4px 12px rgba(0,0,0,0.3); + } + + .right-panel { + flex: 0 0 60%; display: flex; flex-direction: column; background: white; @@ -216,8 +245,14 @@ + + Processing... +