diff --git a/chandra/input.py b/chandra/input.py index d552d14..0d793a7 100644 --- a/chandra/input.py +++ b/chandra/input.py @@ -13,6 +13,15 @@ def flatten(page, flag=pdfium_c.FLAT_NORMALDISPLAY): print(f"Failed to flatten annotations / form fields on page {page}.") +def load_image(filepath: str): + image = Image.open(filepath).convert("RGB") + if image.width < settings.MIN_IMAGE_DIM or image.height < settings.MIN_IMAGE_DIM: + scale = settings.MIN_IMAGE_DIM / min(image.width, image.height) + new_size = (int(image.width * scale), int(image.height * scale)) + image = image.resize(new_size, Image.Resampling.LANCZOS) + return image + + def load_pdf_images(filepath: str, page_range: List[int]): doc = pdfium.PdfDocument(filepath) doc.init_forms() @@ -22,7 +31,7 @@ def load_pdf_images(filepath: str, page_range: List[int]): if not page_range or page in page_range: page_obj = doc[page] min_page_dim = min(page_obj.get_width(), page_obj.get_height()) - scale_dpi = (settings.MIN_IMAGE_DIM / min_page_dim) * 72 + scale_dpi = (settings.MIN_PDF_IMAGE_DIM / min_page_dim) * 72 scale_dpi = max(scale_dpi, settings.IMAGE_DPI) page_obj = doc[page] flatten(page_obj) @@ -56,5 +65,5 @@ def load_file(filepath: str, config: dict): if input_type and input_type.extension == "pdf": images = load_pdf_images(filepath, page_range) else: - images = [Image.open(filepath).convert("RGB")] + images = [load_image(filepath)] return images diff --git a/chandra/output.py b/chandra/output.py index aa430fe..7d4d1c6 100644 --- a/chandra/output.py +++ b/chandra/output.py @@ -71,6 +71,17 @@ def parse_html( else: img = BeautifulSoup(f"", "html.parser") div.append(img) + + # Wrap text content in

tags if no inner HTML tags exist + if label in ["Text"] and not re.search( + "<.+>", str(div.decode_contents()).strip() + ): + # Add inner p tags if missing for text blocks + text_content = str(div.decode_contents()).strip() + text_content = f"

{text_content}

" + div.clear() + div.append(BeautifulSoup(text_content, "html.parser")) + content = str(div.decode_contents()) out_html += content return out_html diff --git a/chandra/scripts/screenshot_app.py b/chandra/scripts/screenshot_app.py index e7ab0f2..4083feb 100644 --- a/chandra/scripts/screenshot_app.py +++ b/chandra/scripts/screenshot_app.py @@ -143,6 +143,7 @@ def process(): "image_height": img_height, "blocks": blocks_data, "html": html_with_images, + "markdown": result.markdown, } ) diff --git a/chandra/scripts/templates/screenshot.html b/chandra/scripts/templates/screenshot.html index 42dbf60..92f6d4d 100644 --- a/chandra/scripts/templates/screenshot.html +++ b/chandra/scripts/templates/screenshot.html @@ -64,6 +64,20 @@ cursor: not-allowed; } + .controls label { + display: flex; + align-items: center; + gap: 8px; + color: white; + font-size: 14px; + cursor: pointer; + user-select: none; + } + + .controls input[type="checkbox"] { + cursor: pointer; + } + .loading { display: none; color: #f39c12; @@ -75,6 +89,11 @@ font-weight: bold; } + .success { + color: #27ae60; + font-weight: bold; + } + .screenshot-container { display: none; margin-top: 60px; @@ -88,8 +107,18 @@ display: flex; } - .left-panel, .right-panel { - flex: 1; + .left-panel { + flex: 0 0 40%; + display: flex; + flex-direction: column; + background: white; + border-radius: 8px; + overflow: hidden; + box-shadow: 0 4px 12px rgba(0,0,0,0.3); + } + + .right-panel { + flex: 0 0 60%; display: flex; flex-direction: column; background: white; @@ -216,8 +245,14 @@ + + Processing... +
@@ -243,6 +278,11 @@