From 417d210904e0b393cf92e3949fbb21b2f933fc03 Mon Sep 17 00:00:00 2001
From: Vik Paruchuri <vik.paruchuri@gmail.com>
Date: Mon, 20 Oct 2025 16:58:52 -0400
Subject: [PATCH] Cleanups

---
 README.md                                 |  48 ++-
 chandra/output.py                         |  13 +-
 chandra/scripts/__init__.py               |   0
 chandra_app.py => chandra/scripts/app.py  |   2 +-
 chandra_cli.py => chandra/scripts/cli.py  |   0
 chandra/scripts/run_app.py                |  25 ++
 chandra/scripts/screenshot_app.py         | 158 +++++++++
 chandra/scripts/templates/screenshot.html | 373 ++++++++++++++++++++++
 pyproject.toml                            |  10 +-
 uv.lock                                   |   4 +-
 10 files changed, 606 insertions(+), 27 deletions(-)
 create mode 100644 chandra/scripts/__init__.py
 rename chandra_app.py => chandra/scripts/app.py (98%)
 rename chandra_cli.py => chandra/scripts/cli.py (100%)
 create mode 100644 chandra/scripts/run_app.py
 create mode 100644 chandra/scripts/screenshot_app.py
 create mode 100644 chandra/scripts/templates/screenshot.html

diff --git a/README.md b/README.md
index 51c4033..9325f70 100644
--- a/README.md
+++ b/README.md
@@ -5,29 +5,43 @@ Chandra is a highly accurate OCR model that converts images and PDFs into struct
 ## Features
 
 - Convert documents to markdown, html, or json with detailed layout information
+- Good handwriting support
+- Reconstructs forms accurately, including checkboxes
 - Math equation support (LaTeX)
-- Reconstructs forms, including checkboxes
 - Precise table reconstruction
 - Support for 40+ languages
 - Two inference modes: local (HuggingFace) and remote (vLLM server)
 
+
 ## Benchmarks
 
-| **Model** | ArXiv | Old Scans Math | Tables | Old Scans | Headers and Footers | Multi column | Long tiny text | Base | Overall |
-|:----------|:-----:|:--------------:|:------:|:---------:|:-------------------:|:------------:|:--------------:|:----:|:-------:|
-| Datalab Chandra v0.1.0 | 81.4 | **80.3** | **89.4** | **50.0** | 88.3 | **81.0** | **91.6** | **99.9** | **82.7 ± 0.9** |
-| Datalab Marker v1.10.0 | **83.8** | 69.7 | 74.8 | 32.3 | 86.6 | 79.4 | 85.7 | 99.6 | 76.5 ± 1.0 |
-| Mistral OCR API | 77.2 | 67.5 | 60.6 | 29.3 | 93.6 | 71.3 | 77.1 | 99.4 | 72.0 ± 1.1 |
-| Deepseek OCR | 75.2 | 67.9 | 79.1 | 32.9 | 96.1 | 66.3 | 78.5 | 97.7 | 74.2 ± 1.0 |
-| Nanonets OCR | 67.0 | 68.6 | 77.7 | 39.5 | 40.7 | 69.9 | 53.4 | 99.3 | 64.5 ± 1.1 |
-| GPT-4o (Anchored) | 53.5 | 74.5 | 70.0 | 40.7 | 93.8 | 69.3 | 60.6 | 96.8 | 69.9 ± 1.1 |
-| Gemini Flash 2 (Anchored) | 54.5 | 56.1 | 72.1 | 34.2 | 64.7 | 61.5 | 71.5 | 95.6 | 63.8 ± 1.2 |
-| Qwen 2.5 VL (No Anchor) | 63.1 | 65.7 | 67.3 | 38.6 | 73.6 | 68.3 | 49.1 | 98.3 | 65.5 ± 1.2 |
-| olmOCR v0.3.0 | 78.6 | 79.9 | 72.9 | 43.9 | **95.1** | 77.3 | 81.2 | 98.9 | 78.5 ± 1.1 |
+| **Model** |  ArXiv   | Old Scans Math |  Tables  | Old Scans | Headers and Footers | Multi column | Long tiny text |   Base   |    Overall     |
+|:----------|:--------:|:--------------:|:--------:|:---------:|:-------------------:|:------------:|:--------------:|:--------:|:--------------:|
+| Datalab Chandra v0.1.0 |   81.4   |    **80.3**    | **89.4** | **50.0**  |        88.3         |   **81.0**   |    **91.6**    | **99.9** | **82.7 ± 0.9** |
+| Datalab Marker v1.10.0 | **83.8** |      69.7      |   74.8   |   32.3    |        86.6         |     79.4     |      85.7      |   99.6   |   76.5 ± 1.0   |
+| Mistral OCR API |   77.2   |      67.5      |   60.6   |   29.3    |        93.6         |     71.3     |      77.1      |   99.4   |   72.0 ± 1.1   |
+| Deepseek OCR |   75.2   |      67.9      |   79.1   |   32.9    |        96.1         |     66.3     |      78.5      |   97.7   |   74.2 ± 1.0   |
+| Nanonets OCR |   67.0   |      68.6      |   77.7   |   39.5    |        40.7         |     69.9     |      53.4      |   99.3   |   64.5 ± 1.1   |
+| GPT-4o (Anchored) |   53.5   |      74.5      |   70.0   |   40.7    |        93.8         |     69.3     |      60.6      |   96.8   |   69.9 ± 1.1   |
+| Gemini Flash 2 (Anchored) |   54.5   |      56.1      |   72.1   |   34.2    |        64.7         |     61.5     |      71.5      |   95.6   |   63.8 ± 1.2   |
+| Qwen 2.5 VL (No Anchor) |   63.1   |      65.7      |   67.3   |   38.6    |        73.6         |     68.3     |      49.1      |   98.3   |   65.5 ± 1.2   |
+| Qwen 3 VL |   70.2   |      75.1      |   45.6   |   37.5    |        89.1         |     62.1     |      43.0      |   94.3   |   64.6 ± 1.1   |
+| olmOCR v0.3.0 |   78.6   |      79.9      |   72.9   |   43.9    |      **95.1**       |     77.3     |      81.2      |   98.9   |   78.5 ± 1.1   |
+
 
 ## Installation
 
+### From PyPI (Recommended)
+
 ```bash
+pip install chandra-ocr
+```
+
+### From Source
+
+```bash
+git clone https://github.com/yourusername/chandra.git
+cd chandra
 uv sync
 source .venv/bin/activate
 ```
@@ -39,14 +53,14 @@ source .venv/bin/activate
 Process single files or entire directories:
 
 ```bash
-# Process a single PDF with vLLM
-python chandra_cli.py input.pdf ./output --method vllm
+# Single file, with vllm server (see below for how to launch)
+chandra input.pdf ./output --method vllm
 
 # Process all files in a directory with local model
-python chandra_cli.py ./documents ./output --method hf
+chandra ./documents ./output --method hf
 
 # Process specific pages with custom settings
-python chandra_cli.py document.pdf ./output --page-range "1-10,15,20-25" --max-workers 8
+chandra document.pdf ./output --page-range "1-10,15,20-25" --max-workers 8
 ```
 
 **CLI Options:**
@@ -71,7 +85,7 @@ Each processed file creates a subdirectory with:
 Launch the interactive demo for single-page processing:
 
 ```bash
-streamlit run chandra_app.py --server.fileWatcherType none --server.headless true
+chandra_app
 ```
 
 The web interface allows you to:
diff --git a/chandra/output.py b/chandra/output.py
index 6ff0b6e..e8540c4 100644
--- a/chandra/output.py
+++ b/chandra/output.py
@@ -15,13 +15,12 @@ def _hash_html(html: str):
     return hashlib.md5(html.encode("utf-8")).hexdigest()
 
 
-def get_image_name(html: str, div_idx: int, image_idx: int):
+def get_image_name(html: str, div_idx: int):
     html_hash = _hash_html(html)
-    return f"{html_hash}_{div_idx}_img{image_idx}.webp"
+    return f"{html_hash}_{div_idx}_img.webp"
 
 
 def extract_images(html: str, chunks: dict, image: Image.Image):
-    image_idx = 0
     images = {}
     div_idx = 0
     for idx, chunk in enumerate(chunks):
@@ -31,9 +30,9 @@ def extract_images(html: str, chunks: dict, image: Image.Image):
             if not img:
                 continue
             bbox = chunk["bbox"]
-            image = image.crop(bbox)
-            img_name = get_image_name(html, div_idx, image_idx)
-            images[img_name] = image
+            block_image = image.crop(bbox)
+            img_name = get_image_name(html, div_idx)
+            images[img_name] = block_image
     return images
 
 
@@ -59,7 +58,7 @@ def parse_html(
 
         if label in ["Image", "Figure"]:
             img = div.find("img")
-            img_src = get_image_name(html, div_idx, image_idx)
+            img_src = get_image_name(html, div_idx)
             if img:
                 img["src"] = img_src
                 image_idx += 1
diff --git a/chandra/scripts/__init__.py b/chandra/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/chandra_app.py b/chandra/scripts/app.py
similarity index 98%
rename from chandra_app.py
rename to chandra/scripts/app.py
index 368d4cc..cd1154a 100644
--- a/chandra_app.py
+++ b/chandra/scripts/app.py
@@ -64,7 +64,7 @@ def ocr_layout(
     return result, layout_image
 
 
-st.set_page_config(layout="wide")
+st.set_page_config(layout="wide", page_title="Chandra OCR Demo")
 col1, col2 = st.columns([0.5, 0.5])
 
 st.markdown("""
diff --git a/chandra_cli.py b/chandra/scripts/cli.py
similarity index 100%
rename from chandra_cli.py
rename to chandra/scripts/cli.py
diff --git a/chandra/scripts/run_app.py b/chandra/scripts/run_app.py
new file mode 100644
index 0000000..23a0777
--- /dev/null
+++ b/chandra/scripts/run_app.py
@@ -0,0 +1,25 @@
+import os
+import subprocess
+import sys
+
+
+def main():
+    argv = sys.argv[1:]
+    cur_dir = os.path.dirname(os.path.abspath(__file__))
+    app_path = os.path.join(cur_dir, "app.py")
+    cmd = [
+        "streamlit",
+        "run",
+        app_path,
+        "--server.fileWatcherType",
+        "none",
+        "--server.headless",
+        "true",
+    ]
+    if argv:
+        cmd += ["--"] + argv
+    subprocess.run(cmd)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/chandra/scripts/screenshot_app.py b/chandra/scripts/screenshot_app.py
new file mode 100644
index 0000000..e7ab0f2
--- /dev/null
+++ b/chandra/scripts/screenshot_app.py
@@ -0,0 +1,158 @@
+"""
+Simple Flask app for generating screenshot-ready OCR visualizations.
+Displays original image with layout overlays on the left and extracted markdown on the right.
+"""
+
+from flask import Flask, render_template, request, jsonify
+import base64
+from io import BytesIO
+
+from PIL import Image
+from chandra.model import InferenceManager
+from chandra.input import load_file
+from chandra.model.schema import BatchInputItem
+from chandra.output import parse_layout
+
+app = Flask(__name__)
+
+# Load model once at startup
+model = None
+
+
+def get_model():
+    global model
+    if model is None:
+        model = InferenceManager(method="vllm")
+    return model
+
+
+def pil_image_to_base64(pil_image: Image.Image, format: str = "PNG") -> str:
+    """Convert PIL image to base64 data URL."""
+    buffered = BytesIO()
+    pil_image.save(buffered, format=format)
+    img_str = base64.b64encode(buffered.getvalue()).decode()
+    return f"data:image/{format.lower()};base64,{img_str}"
+
+
+def get_color_palette():
+    """Return a color palette for different block types."""
+    return {
+        "Section-Header": "#4ECDC4",
+        "Text": "#45B7D1",
+        "List-Group": "#96CEB4",
+        "Table": "#FFEAA7",
+        "Figure": "#DDA15E",
+        "Image": "#BC6C25",
+        "Caption": "#C77DFF",
+        "Equation": "#9D4EDD",
+        "Page-Header": "#E0AFA0",
+        "Page-Footer": "#D4A5A5",
+        "Footnote": "#A8DADC",
+        "Form": "#F4A261",
+        "default": "#FF00FF",
+    }
+
+
+@app.route("/")
+def index():
+    return render_template("screenshot.html")
+
+
+@app.route("/process", methods=["POST"])
+def process():
+    data = request.json
+    file_path = data.get("file_path")
+    page_number = data.get("page_number", 0)
+
+    if not file_path:
+        return jsonify({"error": "file_path is required"}), 400
+
+    try:
+        # Load image
+        images = load_file(file_path, {"page_range": str(page_number)})
+        if not images:
+            return jsonify({"error": "No images found"}), 400
+
+        img = images[0]
+
+        # Run OCR
+        model = get_model()
+        batch = BatchInputItem(image=img, prompt_type="ocr_layout")
+        result = model.generate([batch])[0]
+
+        # Parse layout
+        layout_blocks = parse_layout(result.raw, img)
+
+        # Get markdown and HTML
+        html = result.html
+
+        # Convert extracted images to base64 and embed in HTML
+        from bs4 import BeautifulSoup
+
+        soup = BeautifulSoup(html, "html.parser")
+
+        for img_name, pil_img in result.images.items():
+            img_base64 = pil_image_to_base64(pil_img, format="PNG")
+            # Find all img tags with this src
+            img_tags = soup.find_all("img", src=img_name)
+            if len(img_tags) == 0:
+                print(f"Warning: No img tags found for {img_name}")
+            for img_tag in img_tags:
+                # Replace src with base64
+                img_tag["src"] = img_base64
+
+                # Wrap image with alt text display
+                alt_text = img_tag.get("alt", "")
+                if alt_text:
+                    wrapper = soup.new_tag("div", **{"class": "image-wrapper"})
+                    alt_div = soup.new_tag("div", **{"class": "image-alt-text"})
+                    alt_div.string = alt_text
+                    img_container = soup.new_tag(
+                        "div", **{"class": "image-container-wrapper"}
+                    )
+
+                    # Move img into container
+                    img_tag_copy = img_tag
+                    img_tag.replace_with(wrapper)
+                    img_container.append(img_tag_copy)
+
+                    wrapper.append(alt_div)
+                    wrapper.append(img_container)
+
+        # Convert back to HTML string
+        html_with_images = str(soup)
+
+        # Prepare response
+        img_base64 = pil_image_to_base64(img, format="PNG")
+        img_width, img_height = img.size
+
+        color_palette = get_color_palette()
+
+        # Prepare layout blocks data
+        blocks_data = []
+        for block in layout_blocks:
+            color = color_palette.get(block.label, color_palette["default"])
+            blocks_data.append(
+                {"bbox": block.bbox, "label": block.label, "color": color}
+            )
+
+        return jsonify(
+            {
+                "image_base64": img_base64,
+                "image_width": img_width,
+                "image_height": img_height,
+                "blocks": blocks_data,
+                "html": html_with_images,
+            }
+        )
+
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+
+
+def main():
+    app.run(host="0.0.0.0", port=8503)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/chandra/scripts/templates/screenshot.html b/chandra/scripts/templates/screenshot.html
new file mode 100644
index 0000000..d5398bc
--- /dev/null
+++ b/chandra/scripts/templates/screenshot.html
@@ -0,0 +1,373 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Chandra OCR Screenshot Mode</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
+            background: #1a1a1a;
+            color: white;
+            overflow: hidden;
+        }
+
+        .controls {
+            position: fixed;
+            top: 0;
+            left: 0;
+            right: 0;
+            background: #2c3e50;
+            padding: 15px 20px;
+            display: flex;
+            gap: 15px;
+            align-items: center;
+            z-index: 2000;
+            box-shadow: 0 2px 8px rgba(0,0,0,0.3);
+        }
+
+        .controls input, .controls button {
+            padding: 8px 15px;
+            border: none;
+            border-radius: 4px;
+            font-size: 14px;
+        }
+
+        .controls input {
+            flex: 1;
+            max-width: 500px;
+        }
+
+        .controls input[type="number"] {
+            max-width: 100px;
+        }
+
+        .controls button {
+            background: #3498db;
+            color: white;
+            cursor: pointer;
+            font-weight: bold;
+        }
+
+        .controls button:hover {
+            background: #2980b9;
+        }
+
+        .controls button:disabled {
+            background: #7f8c8d;
+            cursor: not-allowed;
+        }
+
+        .loading {
+            display: none;
+            color: #f39c12;
+            font-weight: bold;
+        }
+
+        .error {
+            color: #e74c3c;
+            font-weight: bold;
+        }
+
+        .screenshot-container {
+            display: none;
+            margin-top: 60px;
+            height: calc(100vh - 60px);
+            gap: 20px;
+            padding: 20px;
+            flex-direction: row;
+        }
+
+        .screenshot-container.active {
+            display: flex;
+        }
+
+        .left-panel, .right-panel {
+            flex: 1;
+            display: flex;
+            flex-direction: column;
+            background: white;
+            border-radius: 8px;
+            overflow: hidden;
+            box-shadow: 0 4px 12px rgba(0,0,0,0.3);
+        }
+
+        .panel-header {
+            background: #2c3e50;
+            color: white;
+            padding: 15px 20px;
+            font-size: 18px;
+            font-weight: bold;
+        }
+
+        .panel-content {
+            flex: 1;
+            overflow: auto;
+            position: relative;
+        }
+
+        .image-container {
+            position: relative;
+            width: 100%;
+            height: 100%;
+            display: flex;
+            justify-content: center;
+            align-items: center;
+            background: #f5f5f5;
+        }
+
+        .image-alt-text {
+            border: 1px solid #e5e7eb;
+        }
+
+        #layoutCanvas {
+            display: block;
+            max-width: 100%;
+            max-height: 100%;
+            object-fit: contain;
+        }
+
+        .markdown-content {
+            padding: 30px;
+            line-height: 1.6;
+            color: #333;
+        }
+
+        .markdown-content h1, .markdown-content h2, .markdown-content h3 {
+            margin-top: 24px;
+            margin-bottom: 16px;
+        }
+
+        .markdown-content h1 { font-size: 2em; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
+        .markdown-content h2 { font-size: 1.5em; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
+        .markdown-content h3 { font-size: 1.25em; }
+
+        .markdown-content table {
+            border-collapse: collapse;
+            width: 100%;
+            margin: 20px 0;
+        }
+
+        .markdown-content table th, .markdown-content table td {
+            border: 1px solid #ddd;
+            padding: 8px 12px;
+            text-align: left;
+        }
+
+        .markdown-content table th {
+            background-color: #f2f2f2;
+            font-weight: bold;
+        }
+
+        .markdown-content code {
+            background: #f4f4f4;
+            padding: 2px 6px;
+            border-radius: 3px;
+            font-family: 'Monaco', 'Courier New', monospace;
+            font-size: 0.9em;
+        }
+
+        .markdown-content pre {
+            background: #f4f4f4;
+            padding: 16px;
+            border-radius: 6px;
+            overflow-x: auto;
+        }
+
+        .markdown-content pre code {
+            background: none;
+            padding: 0;
+        }
+
+        .markdown-content img {
+            max-width: 100%;
+            height: auto;
+            display: block;
+            margin: 20px auto;
+            border-radius: 4px;
+            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+        }
+
+        .markdown-content figure {
+            margin: 20px 0;
+            text-align: center;
+        }
+
+        .markdown-content figure img {
+            margin: 0 auto 10px;
+        }
+
+        .markdown-content figcaption {
+            font-size: 0.9em;
+            color: #666;
+            font-style: italic;
+        }
+    </style>
+</head>
+<body>
+    <div class="controls">
+        <input type="text" id="filePath" placeholder="Enter file path (e.g., /path/to/document.pdf)">
+        <input type="number" id="pageNumber" placeholder="Page" value="0" min="0">
+        <button id="processBtn" onclick="processFile()">Process</button>
+        <span class="loading" id="loading">Processing...</span>
+        <span class="error" id="error"></span>
+    </div>
+
+    <div class="screenshot-container" id="container">
+        <div class="left-panel">
+            <div class="panel-header">Original Image with Layout Detection</div>
+            <div class="panel-content">
+                <div class="image-container">
+                    <canvas id="layoutCanvas"></canvas>
+                </div>
+            </div>
+        </div>
+        <div class="right-panel">
+            <div class="panel-header">Extracted Content</div>
+            <div class="panel-content">
+                <div class="markdown-content" id="markdownContent"></div>
+            </div>
+        </div>
+    </div>
+
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.25/dist/katex.min.css" integrity="sha384-WcoG4HRXMzYzfCgiyfrySxx90XSl2rxY5mnVY5TwtWE6KLrArNKn0T/mOgNL0Mmi" crossorigin="anonymous">
+    <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.25/dist/katex.min.js" integrity="sha384-J+9dG2KMoiR9hqcFao0IBLwxt6zpcyN68IgwzsCSkbreXUjmNVRhPFTssqdSGjwQ" crossorigin="anonymous"></script>
+    <script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.25/dist/contrib/auto-render.min.js" integrity="sha384-hCXGrW6PitJEwbkoStFjeJxv+fSOOQKOPbJxSfM6G5sWZjAyWhXiTIIAmQqnlLlh" crossorigin="anonymous"></script>
+    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+    <script>
+        async function processFile() {
+            const filePath = document.getElementById('filePath').value;
+            const pageNumber = parseInt(document.getElementById('pageNumber').value) || 0;
+            const loading = document.getElementById('loading');
+            const error = document.getElementById('error');
+            const processBtn = document.getElementById('processBtn');
+            const container = document.getElementById('container');
+
+            if (!filePath) {
+                error.textContent = 'Please enter a file path';
+                return;
+            }
+
+            error.textContent = '';
+            loading.style.display = 'inline';
+            processBtn.disabled = true;
+            container.classList.remove('active');
+
+            try {
+                const response = await fetch('/process', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ file_path: filePath, page_number: pageNumber })
+                });
+
+                if (!response.ok) {
+                    const errorData = await response.json();
+                    throw new Error(errorData.error || 'Processing failed');
+                }
+
+                const data = await response.json();
+                renderResults(data);
+                container.classList.add('active');
+
+            } catch (err) {
+                error.textContent = `Error: ${err.message}`;
+            } finally {
+                loading.style.display = 'none';
+                processBtn.disabled = false;
+            }
+        }
+
+        function renderResults(data) {
+            const canvas = document.getElementById('layoutCanvas');
+            const ctx = canvas.getContext('2d');
+            const markdownContent = document.getElementById('markdownContent');
+
+            // Draw image with layout overlays
+            const img = new Image();
+            img.onload = function() {
+                canvas.width = data.image_width;
+                canvas.height = data.image_height;
+
+                // Draw image
+                ctx.drawImage(img, 0, 0, data.image_width, data.image_height);
+
+                // Draw layout blocks
+                ctx.lineWidth = 3;
+                ctx.font = 'bold 14px -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif';
+
+                const labelCounts = {};
+                data.blocks.forEach((block) => {
+                    const [x1, y1, x2, y2] = block.bbox;
+                    const width = x2 - x1;
+                    const height = y2 - y1;
+
+                    // Draw rectangle with semi-transparent fill
+                    ctx.strokeStyle = block.color;
+                    ctx.fillStyle = block.color + '33';
+                    ctx.fillRect(x1, y1, width, height);
+                    ctx.strokeRect(x1, y1, width, height);
+
+                    // Count labels for unique identification
+                    labelCounts[block.label] = (labelCounts[block.label] || 0) + 1;
+                    const labelWithCount = `${block.label} #${labelCounts[block.label]}`;
+
+                    // Draw label with background
+                    const textMetrics = ctx.measureText(labelWithCount);
+                    const textWidth = textMetrics.width;
+                    const textHeight = 16;
+                    const padding = 6;
+
+                    const labelX = x1;
+                    const labelY = Math.max(y1 - textHeight - padding, textHeight);
+
+                    ctx.fillStyle = block.color;
+                    ctx.fillRect(labelX, labelY - textHeight, textWidth + padding * 2, textHeight + padding);
+
+                    ctx.fillStyle = 'white';
+                    ctx.textBaseline = 'top';
+                    ctx.fillText(labelWithCount, labelX + padding, labelY - textHeight + padding/2);
+                });
+            };
+            img.src = data.image_base64;
+
+            // Render HTML directly (with images embedded)
+            markdownContent.innerHTML = data.html;
+
+            // Render math with KaTeX - find all <math> tags and render them
+            const mathElements = markdownContent.querySelectorAll('math');
+            mathElements.forEach(mathEl => {
+                const latex = mathEl.textContent;
+                const isBlock = mathEl.getAttribute('display') === 'block';
+
+                try {
+                    const rendered = katex.renderToString(latex, {
+                        displayMode: isBlock,
+                        throwOnError: false
+                    });
+
+                    // Create a span to hold the rendered math
+                    const span = document.createElement('span');
+                    span.innerHTML = rendered;
+                    mathEl.replaceWith(span);
+                } catch (e) {
+                    console.error('KaTeX render error:', e);
+                }
+            });
+        }
+
+        // Allow Enter key to trigger processing
+        document.getElementById('filePath').addEventListener('keypress', function(e) {
+            if (e.key === 'Enter') processFile();
+        });
+        document.getElementById('pageNumber').addEventListener('keypress', function(e) {
+            if (e.key === 'Enter') processFile();
+        });
+    </script>
+</body>
+</html>
diff --git a/pyproject.toml b/pyproject.toml
index 1d15eb0..10d80c3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,7 +11,9 @@ authors = [
 keywords = ["ocr", "pdf", "markdown", "layout"]
 dependencies = [
     "beautifulsoup4>=4.14.2",
+    "click>=8.0.0",
     "filetype>=1.2.0",
+    "flask>=3.0.0",
     "markdownify==1.1.0",
     "openai>=2.2.0",
     "pillow>=10.2.0",
@@ -26,8 +28,14 @@ dependencies = [
     "streamlit>=1.50.0"
 ]
 
+[build-system]
+requires = ["setuptools>=61"]  # or "setuptools>=61", "flit-core", etc.
+build-backend = "setuptools.build_meta"
+
 [project.scripts]
-chandra = "chandra_cli:main"
+chandra = "chandra.scripts.cli:main"
+chandra_app = "chandra.scripts.run_app:main"
+chandra_screenshot = "chandra.scripts.screenshot_app:main"
 
 [tool.setuptools.packages.find]
 include = ["chandra*"]
diff --git a/uv.lock b/uv.lock
index f90f101..0c6e4f9 100644
--- a/uv.lock
+++ b/uv.lock
@@ -163,9 +163,10 @@ wheels = [
 [[package]]
 name = "chandra-ocr"
 version = "0.1.0"
-source = { virtual = "." }
+source = { editable = "." }
 dependencies = [
     { name = "beautifulsoup4" },
+    { name = "click" },
     { name = "filetype" },
     { name = "markdownify" },
     { name = "openai" },
@@ -190,6 +191,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "beautifulsoup4", specifier = ">=4.14.2" },
+    { name = "click", specifier = ">=8.0.0" },
     { name = "filetype", specifier = ">=1.2.0" },
     { name = "markdownify", specifier = "==1.1.0" },
     { name = "openai", specifier = ">=2.2.0" },