mirror of
https://github.com/datalab-to/chandra.git
synced 2026-01-20 05:50:42 +00:00
Cleanups
This commit is contained in:
48
README.md
48
README.md
@@ -5,29 +5,43 @@ Chandra is a highly accurate OCR model that converts images and PDFs into struct
|
||||
## Features
|
||||
|
||||
- Convert documents to markdown, html, or json with detailed layout information
|
||||
- Good handwriting support
|
||||
- Reconstructs forms accurately, including checkboxes
|
||||
- Math equation support (LaTeX)
|
||||
- Reconstructs forms, including checkboxes
|
||||
- Precise table reconstruction
|
||||
- Support for 40+ languages
|
||||
- Two inference modes: local (HuggingFace) and remote (vLLM server)
|
||||
|
||||
|
||||
## Benchmarks
|
||||
|
||||
| **Model** | ArXiv | Old Scans Math | Tables | Old Scans | Headers and Footers | Multi column | Long tiny text | Base | Overall |
|
||||
|:----------|:-----:|:--------------:|:------:|:---------:|:-------------------:|:------------:|:--------------:|:----:|:-------:|
|
||||
| Datalab Chandra v0.1.0 | 81.4 | **80.3** | **89.4** | **50.0** | 88.3 | **81.0** | **91.6** | **99.9** | **82.7 ± 0.9** |
|
||||
| Datalab Marker v1.10.0 | **83.8** | 69.7 | 74.8 | 32.3 | 86.6 | 79.4 | 85.7 | 99.6 | 76.5 ± 1.0 |
|
||||
| Mistral OCR API | 77.2 | 67.5 | 60.6 | 29.3 | 93.6 | 71.3 | 77.1 | 99.4 | 72.0 ± 1.1 |
|
||||
| Deepseek OCR | 75.2 | 67.9 | 79.1 | 32.9 | 96.1 | 66.3 | 78.5 | 97.7 | 74.2 ± 1.0 |
|
||||
| Nanonets OCR | 67.0 | 68.6 | 77.7 | 39.5 | 40.7 | 69.9 | 53.4 | 99.3 | 64.5 ± 1.1 |
|
||||
| GPT-4o (Anchored) | 53.5 | 74.5 | 70.0 | 40.7 | 93.8 | 69.3 | 60.6 | 96.8 | 69.9 ± 1.1 |
|
||||
| Gemini Flash 2 (Anchored) | 54.5 | 56.1 | 72.1 | 34.2 | 64.7 | 61.5 | 71.5 | 95.6 | 63.8 ± 1.2 |
|
||||
| Qwen 2.5 VL (No Anchor) | 63.1 | 65.7 | 67.3 | 38.6 | 73.6 | 68.3 | 49.1 | 98.3 | 65.5 ± 1.2 |
|
||||
| olmOCR v0.3.0 | 78.6 | 79.9 | 72.9 | 43.9 | **95.1** | 77.3 | 81.2 | 98.9 | 78.5 ± 1.1 |
|
||||
| **Model** | ArXiv | Old Scans Math | Tables | Old Scans | Headers and Footers | Multi column | Long tiny text | Base | Overall |
|
||||
|:----------|:--------:|:--------------:|:--------:|:---------:|:-------------------:|:------------:|:--------------:|:--------:|:--------------:|
|
||||
| Datalab Chandra v0.1.0 | 81.4 | **80.3** | **89.4** | **50.0** | 88.3 | **81.0** | **91.6** | **99.9** | **82.7 ± 0.9** |
|
||||
| Datalab Marker v1.10.0 | **83.8** | 69.7 | 74.8 | 32.3 | 86.6 | 79.4 | 85.7 | 99.6 | 76.5 ± 1.0 |
|
||||
| Mistral OCR API | 77.2 | 67.5 | 60.6 | 29.3 | 93.6 | 71.3 | 77.1 | 99.4 | 72.0 ± 1.1 |
|
||||
| Deepseek OCR | 75.2 | 67.9 | 79.1 | 32.9 | 96.1 | 66.3 | 78.5 | 97.7 | 74.2 ± 1.0 |
|
||||
| Nanonets OCR | 67.0 | 68.6 | 77.7 | 39.5 | 40.7 | 69.9 | 53.4 | 99.3 | 64.5 ± 1.1 |
|
||||
| GPT-4o (Anchored) | 53.5 | 74.5 | 70.0 | 40.7 | 93.8 | 69.3 | 60.6 | 96.8 | 69.9 ± 1.1 |
|
||||
| Gemini Flash 2 (Anchored) | 54.5 | 56.1 | 72.1 | 34.2 | 64.7 | 61.5 | 71.5 | 95.6 | 63.8 ± 1.2 |
|
||||
| Qwen 2.5 VL (No Anchor) | 63.1 | 65.7 | 67.3 | 38.6 | 73.6 | 68.3 | 49.1 | 98.3 | 65.5 ± 1.2 |
|
||||
| Qwen 3 VL | 70.2 | 75.1 | 45.6 | 37.5 | 89.1 | 62.1 | 43.0 | 94.3 | 64.6 ± 1.1 |
|
||||
| olmOCR v0.3.0 | 78.6 | 79.9 | 72.9 | 43.9 | **95.1** | 77.3 | 81.2 | 98.9 | 78.5 ± 1.1 |
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
### From PyPI (Recommended)
|
||||
|
||||
```bash
|
||||
pip install chandra-ocr
|
||||
```
|
||||
|
||||
### From Source
|
||||
|
||||
```bash
|
||||
git clone https://github.com/yourusername/chandra.git
|
||||
cd chandra
|
||||
uv sync
|
||||
source .venv/bin/activate
|
||||
```
|
||||
@@ -39,14 +53,14 @@ source .venv/bin/activate
|
||||
Process single files or entire directories:
|
||||
|
||||
```bash
|
||||
# Process a single PDF with vLLM
|
||||
python chandra_cli.py input.pdf ./output --method vllm
|
||||
# Single file, with vllm server (see below for how to launch)
|
||||
chandra input.pdf ./output --method vllm
|
||||
|
||||
# Process all files in a directory with local model
|
||||
python chandra_cli.py ./documents ./output --method hf
|
||||
chandra ./documents ./output --method hf
|
||||
|
||||
# Process specific pages with custom settings
|
||||
python chandra_cli.py document.pdf ./output --page-range "1-10,15,20-25" --max-workers 8
|
||||
chandra document.pdf ./output --page-range "1-10,15,20-25" --max-workers 8
|
||||
```
|
||||
|
||||
**CLI Options:**
|
||||
@@ -71,7 +85,7 @@ Each processed file creates a subdirectory with:
|
||||
Launch the interactive demo for single-page processing:
|
||||
|
||||
```bash
|
||||
streamlit run chandra_app.py --server.fileWatcherType none --server.headless true
|
||||
chandra_app
|
||||
```
|
||||
|
||||
The web interface allows you to:
|
||||
|
||||
@@ -15,13 +15,12 @@ def _hash_html(html: str):
|
||||
return hashlib.md5(html.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def get_image_name(html: str, div_idx: int, image_idx: int):
|
||||
def get_image_name(html: str, div_idx: int):
|
||||
html_hash = _hash_html(html)
|
||||
return f"{html_hash}_{div_idx}_img{image_idx}.webp"
|
||||
return f"{html_hash}_{div_idx}_img.webp"
|
||||
|
||||
|
||||
def extract_images(html: str, chunks: dict, image: Image.Image):
|
||||
image_idx = 0
|
||||
images = {}
|
||||
div_idx = 0
|
||||
for idx, chunk in enumerate(chunks):
|
||||
@@ -31,9 +30,9 @@ def extract_images(html: str, chunks: dict, image: Image.Image):
|
||||
if not img:
|
||||
continue
|
||||
bbox = chunk["bbox"]
|
||||
image = image.crop(bbox)
|
||||
img_name = get_image_name(html, div_idx, image_idx)
|
||||
images[img_name] = image
|
||||
block_image = image.crop(bbox)
|
||||
img_name = get_image_name(html, div_idx)
|
||||
images[img_name] = block_image
|
||||
return images
|
||||
|
||||
|
||||
@@ -59,7 +58,7 @@ def parse_html(
|
||||
|
||||
if label in ["Image", "Figure"]:
|
||||
img = div.find("img")
|
||||
img_src = get_image_name(html, div_idx, image_idx)
|
||||
img_src = get_image_name(html, div_idx)
|
||||
if img:
|
||||
img["src"] = img_src
|
||||
image_idx += 1
|
||||
|
||||
0
chandra/scripts/__init__.py
Normal file
0
chandra/scripts/__init__.py
Normal file
@@ -64,7 +64,7 @@ def ocr_layout(
|
||||
return result, layout_image
|
||||
|
||||
|
||||
st.set_page_config(layout="wide")
|
||||
st.set_page_config(layout="wide", page_title="Chandra OCR Demo")
|
||||
col1, col2 = st.columns([0.5, 0.5])
|
||||
|
||||
st.markdown("""
|
||||
25
chandra/scripts/run_app.py
Normal file
25
chandra/scripts/run_app.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
argv = sys.argv[1:]
|
||||
cur_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
app_path = os.path.join(cur_dir, "app.py")
|
||||
cmd = [
|
||||
"streamlit",
|
||||
"run",
|
||||
app_path,
|
||||
"--server.fileWatcherType",
|
||||
"none",
|
||||
"--server.headless",
|
||||
"true",
|
||||
]
|
||||
if argv:
|
||||
cmd += ["--"] + argv
|
||||
subprocess.run(cmd)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
158
chandra/scripts/screenshot_app.py
Normal file
158
chandra/scripts/screenshot_app.py
Normal file
@@ -0,0 +1,158 @@
|
||||
"""
|
||||
Simple Flask app for generating screenshot-ready OCR visualizations.
|
||||
Displays original image with layout overlays on the left and extracted markdown on the right.
|
||||
"""
|
||||
|
||||
from flask import Flask, render_template, request, jsonify
|
||||
import base64
|
||||
from io import BytesIO
|
||||
|
||||
from PIL import Image
|
||||
from chandra.model import InferenceManager
|
||||
from chandra.input import load_file
|
||||
from chandra.model.schema import BatchInputItem
|
||||
from chandra.output import parse_layout
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# Load model once at startup
|
||||
model = None
|
||||
|
||||
|
||||
def get_model():
|
||||
global model
|
||||
if model is None:
|
||||
model = InferenceManager(method="vllm")
|
||||
return model
|
||||
|
||||
|
||||
def pil_image_to_base64(pil_image: Image.Image, format: str = "PNG") -> str:
|
||||
"""Convert PIL image to base64 data URL."""
|
||||
buffered = BytesIO()
|
||||
pil_image.save(buffered, format=format)
|
||||
img_str = base64.b64encode(buffered.getvalue()).decode()
|
||||
return f"data:image/{format.lower()};base64,{img_str}"
|
||||
|
||||
|
||||
def get_color_palette():
|
||||
"""Return a color palette for different block types."""
|
||||
return {
|
||||
"Section-Header": "#4ECDC4",
|
||||
"Text": "#45B7D1",
|
||||
"List-Group": "#96CEB4",
|
||||
"Table": "#FFEAA7",
|
||||
"Figure": "#DDA15E",
|
||||
"Image": "#BC6C25",
|
||||
"Caption": "#C77DFF",
|
||||
"Equation": "#9D4EDD",
|
||||
"Page-Header": "#E0AFA0",
|
||||
"Page-Footer": "#D4A5A5",
|
||||
"Footnote": "#A8DADC",
|
||||
"Form": "#F4A261",
|
||||
"default": "#FF00FF",
|
||||
}
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return render_template("screenshot.html")
|
||||
|
||||
|
||||
@app.route("/process", methods=["POST"])
|
||||
def process():
|
||||
data = request.json
|
||||
file_path = data.get("file_path")
|
||||
page_number = data.get("page_number", 0)
|
||||
|
||||
if not file_path:
|
||||
return jsonify({"error": "file_path is required"}), 400
|
||||
|
||||
try:
|
||||
# Load image
|
||||
images = load_file(file_path, {"page_range": str(page_number)})
|
||||
if not images:
|
||||
return jsonify({"error": "No images found"}), 400
|
||||
|
||||
img = images[0]
|
||||
|
||||
# Run OCR
|
||||
model = get_model()
|
||||
batch = BatchInputItem(image=img, prompt_type="ocr_layout")
|
||||
result = model.generate([batch])[0]
|
||||
|
||||
# Parse layout
|
||||
layout_blocks = parse_layout(result.raw, img)
|
||||
|
||||
# Get markdown and HTML
|
||||
html = result.html
|
||||
|
||||
# Convert extracted images to base64 and embed in HTML
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
|
||||
for img_name, pil_img in result.images.items():
|
||||
img_base64 = pil_image_to_base64(pil_img, format="PNG")
|
||||
# Find all img tags with this src
|
||||
img_tags = soup.find_all("img", src=img_name)
|
||||
if len(img_tags) == 0:
|
||||
print(f"Warning: No img tags found for {img_name}")
|
||||
for img_tag in img_tags:
|
||||
# Replace src with base64
|
||||
img_tag["src"] = img_base64
|
||||
|
||||
# Wrap image with alt text display
|
||||
alt_text = img_tag.get("alt", "")
|
||||
if alt_text:
|
||||
wrapper = soup.new_tag("div", **{"class": "image-wrapper"})
|
||||
alt_div = soup.new_tag("div", **{"class": "image-alt-text"})
|
||||
alt_div.string = alt_text
|
||||
img_container = soup.new_tag(
|
||||
"div", **{"class": "image-container-wrapper"}
|
||||
)
|
||||
|
||||
# Move img into container
|
||||
img_tag_copy = img_tag
|
||||
img_tag.replace_with(wrapper)
|
||||
img_container.append(img_tag_copy)
|
||||
|
||||
wrapper.append(alt_div)
|
||||
wrapper.append(img_container)
|
||||
|
||||
# Convert back to HTML string
|
||||
html_with_images = str(soup)
|
||||
|
||||
# Prepare response
|
||||
img_base64 = pil_image_to_base64(img, format="PNG")
|
||||
img_width, img_height = img.size
|
||||
|
||||
color_palette = get_color_palette()
|
||||
|
||||
# Prepare layout blocks data
|
||||
blocks_data = []
|
||||
for block in layout_blocks:
|
||||
color = color_palette.get(block.label, color_palette["default"])
|
||||
blocks_data.append(
|
||||
{"bbox": block.bbox, "label": block.label, "color": color}
|
||||
)
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
"image_base64": img_base64,
|
||||
"image_width": img_width,
|
||||
"image_height": img_height,
|
||||
"blocks": blocks_data,
|
||||
"html": html_with_images,
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
def main():
|
||||
app.run(host="0.0.0.0", port=8503)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
373
chandra/scripts/templates/screenshot.html
Normal file
373
chandra/scripts/templates/screenshot.html
Normal file
@@ -0,0 +1,373 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Chandra OCR Screenshot Mode</title>
|
||||
<style>
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
||||
background: #1a1a1a;
|
||||
color: white;
|
||||
overflow: hidden;
|
||||
}
|
||||
|
||||
.controls {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
background: #2c3e50;
|
||||
padding: 15px 20px;
|
||||
display: flex;
|
||||
gap: 15px;
|
||||
align-items: center;
|
||||
z-index: 2000;
|
||||
box-shadow: 0 2px 8px rgba(0,0,0,0.3);
|
||||
}
|
||||
|
||||
.controls input, .controls button {
|
||||
padding: 8px 15px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.controls input {
|
||||
flex: 1;
|
||||
max-width: 500px;
|
||||
}
|
||||
|
||||
.controls input[type="number"] {
|
||||
max-width: 100px;
|
||||
}
|
||||
|
||||
.controls button {
|
||||
background: #3498db;
|
||||
color: white;
|
||||
cursor: pointer;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.controls button:hover {
|
||||
background: #2980b9;
|
||||
}
|
||||
|
||||
.controls button:disabled {
|
||||
background: #7f8c8d;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.loading {
|
||||
display: none;
|
||||
color: #f39c12;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.error {
|
||||
color: #e74c3c;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.screenshot-container {
|
||||
display: none;
|
||||
margin-top: 60px;
|
||||
height: calc(100vh - 60px);
|
||||
gap: 20px;
|
||||
padding: 20px;
|
||||
flex-direction: row;
|
||||
}
|
||||
|
||||
.screenshot-container.active {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.left-panel, .right-panel {
|
||||
flex: 1;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
overflow: hidden;
|
||||
box-shadow: 0 4px 12px rgba(0,0,0,0.3);
|
||||
}
|
||||
|
||||
.panel-header {
|
||||
background: #2c3e50;
|
||||
color: white;
|
||||
padding: 15px 20px;
|
||||
font-size: 18px;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.panel-content {
|
||||
flex: 1;
|
||||
overflow: auto;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.image-container {
|
||||
position: relative;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
background: #f5f5f5;
|
||||
}
|
||||
|
||||
.image-alt-text {
|
||||
border: 1px solid #e5e7eb;
|
||||
}
|
||||
|
||||
#layoutCanvas {
|
||||
display: block;
|
||||
max-width: 100%;
|
||||
max-height: 100%;
|
||||
object-fit: contain;
|
||||
}
|
||||
|
||||
.markdown-content {
|
||||
padding: 30px;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.markdown-content h1, .markdown-content h2, .markdown-content h3 {
|
||||
margin-top: 24px;
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
.markdown-content h1 { font-size: 2em; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
|
||||
.markdown-content h2 { font-size: 1.5em; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
|
||||
.markdown-content h3 { font-size: 1.25em; }
|
||||
|
||||
.markdown-content table {
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin: 20px 0;
|
||||
}
|
||||
|
||||
.markdown-content table th, .markdown-content table td {
|
||||
border: 1px solid #ddd;
|
||||
padding: 8px 12px;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.markdown-content table th {
|
||||
background-color: #f2f2f2;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.markdown-content code {
|
||||
background: #f4f4f4;
|
||||
padding: 2px 6px;
|
||||
border-radius: 3px;
|
||||
font-family: 'Monaco', 'Courier New', monospace;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
|
||||
.markdown-content pre {
|
||||
background: #f4f4f4;
|
||||
padding: 16px;
|
||||
border-radius: 6px;
|
||||
overflow-x: auto;
|
||||
}
|
||||
|
||||
.markdown-content pre code {
|
||||
background: none;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.markdown-content img {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
display: block;
|
||||
margin: 20px auto;
|
||||
border-radius: 4px;
|
||||
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
||||
}
|
||||
|
||||
.markdown-content figure {
|
||||
margin: 20px 0;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.markdown-content figure img {
|
||||
margin: 0 auto 10px;
|
||||
}
|
||||
|
||||
.markdown-content figcaption {
|
||||
font-size: 0.9em;
|
||||
color: #666;
|
||||
font-style: italic;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="controls">
|
||||
<input type="text" id="filePath" placeholder="Enter file path (e.g., /path/to/document.pdf)">
|
||||
<input type="number" id="pageNumber" placeholder="Page" value="0" min="0">
|
||||
<button id="processBtn" onclick="processFile()">Process</button>
|
||||
<span class="loading" id="loading">Processing...</span>
|
||||
<span class="error" id="error"></span>
|
||||
</div>
|
||||
|
||||
<div class="screenshot-container" id="container">
|
||||
<div class="left-panel">
|
||||
<div class="panel-header">Original Image with Layout Detection</div>
|
||||
<div class="panel-content">
|
||||
<div class="image-container">
|
||||
<canvas id="layoutCanvas"></canvas>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="right-panel">
|
||||
<div class="panel-header">Extracted Content</div>
|
||||
<div class="panel-content">
|
||||
<div class="markdown-content" id="markdownContent"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.25/dist/katex.min.css" integrity="sha384-WcoG4HRXMzYzfCgiyfrySxx90XSl2rxY5mnVY5TwtWE6KLrArNKn0T/mOgNL0Mmi" crossorigin="anonymous">
|
||||
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.25/dist/katex.min.js" integrity="sha384-J+9dG2KMoiR9hqcFao0IBLwxt6zpcyN68IgwzsCSkbreXUjmNVRhPFTssqdSGjwQ" crossorigin="anonymous"></script>
|
||||
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.25/dist/contrib/auto-render.min.js" integrity="sha384-hCXGrW6PitJEwbkoStFjeJxv+fSOOQKOPbJxSfM6G5sWZjAyWhXiTIIAmQqnlLlh" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||
<script>
|
||||
async function processFile() {
|
||||
const filePath = document.getElementById('filePath').value;
|
||||
const pageNumber = parseInt(document.getElementById('pageNumber').value) || 0;
|
||||
const loading = document.getElementById('loading');
|
||||
const error = document.getElementById('error');
|
||||
const processBtn = document.getElementById('processBtn');
|
||||
const container = document.getElementById('container');
|
||||
|
||||
if (!filePath) {
|
||||
error.textContent = 'Please enter a file path';
|
||||
return;
|
||||
}
|
||||
|
||||
error.textContent = '';
|
||||
loading.style.display = 'inline';
|
||||
processBtn.disabled = true;
|
||||
container.classList.remove('active');
|
||||
|
||||
try {
|
||||
const response = await fetch('/process', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ file_path: filePath, page_number: pageNumber })
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorData = await response.json();
|
||||
throw new Error(errorData.error || 'Processing failed');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
renderResults(data);
|
||||
container.classList.add('active');
|
||||
|
||||
} catch (err) {
|
||||
error.textContent = `Error: ${err.message}`;
|
||||
} finally {
|
||||
loading.style.display = 'none';
|
||||
processBtn.disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
function renderResults(data) {
|
||||
const canvas = document.getElementById('layoutCanvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
const markdownContent = document.getElementById('markdownContent');
|
||||
|
||||
// Draw image with layout overlays
|
||||
const img = new Image();
|
||||
img.onload = function() {
|
||||
canvas.width = data.image_width;
|
||||
canvas.height = data.image_height;
|
||||
|
||||
// Draw image
|
||||
ctx.drawImage(img, 0, 0, data.image_width, data.image_height);
|
||||
|
||||
// Draw layout blocks
|
||||
ctx.lineWidth = 3;
|
||||
ctx.font = 'bold 14px -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif';
|
||||
|
||||
const labelCounts = {};
|
||||
data.blocks.forEach((block) => {
|
||||
const [x1, y1, x2, y2] = block.bbox;
|
||||
const width = x2 - x1;
|
||||
const height = y2 - y1;
|
||||
|
||||
// Draw rectangle with semi-transparent fill
|
||||
ctx.strokeStyle = block.color;
|
||||
ctx.fillStyle = block.color + '33';
|
||||
ctx.fillRect(x1, y1, width, height);
|
||||
ctx.strokeRect(x1, y1, width, height);
|
||||
|
||||
// Count labels for unique identification
|
||||
labelCounts[block.label] = (labelCounts[block.label] || 0) + 1;
|
||||
const labelWithCount = `${block.label} #${labelCounts[block.label]}`;
|
||||
|
||||
// Draw label with background
|
||||
const textMetrics = ctx.measureText(labelWithCount);
|
||||
const textWidth = textMetrics.width;
|
||||
const textHeight = 16;
|
||||
const padding = 6;
|
||||
|
||||
const labelX = x1;
|
||||
const labelY = Math.max(y1 - textHeight - padding, textHeight);
|
||||
|
||||
ctx.fillStyle = block.color;
|
||||
ctx.fillRect(labelX, labelY - textHeight, textWidth + padding * 2, textHeight + padding);
|
||||
|
||||
ctx.fillStyle = 'white';
|
||||
ctx.textBaseline = 'top';
|
||||
ctx.fillText(labelWithCount, labelX + padding, labelY - textHeight + padding/2);
|
||||
});
|
||||
};
|
||||
img.src = data.image_base64;
|
||||
|
||||
// Render HTML directly (with images embedded)
|
||||
markdownContent.innerHTML = data.html;
|
||||
|
||||
// Render math with KaTeX - find all <math> tags and render them
|
||||
const mathElements = markdownContent.querySelectorAll('math');
|
||||
mathElements.forEach(mathEl => {
|
||||
const latex = mathEl.textContent;
|
||||
const isBlock = mathEl.getAttribute('display') === 'block';
|
||||
|
||||
try {
|
||||
const rendered = katex.renderToString(latex, {
|
||||
displayMode: isBlock,
|
||||
throwOnError: false
|
||||
});
|
||||
|
||||
// Create a span to hold the rendered math
|
||||
const span = document.createElement('span');
|
||||
span.innerHTML = rendered;
|
||||
mathEl.replaceWith(span);
|
||||
} catch (e) {
|
||||
console.error('KaTeX render error:', e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Allow Enter key to trigger processing
|
||||
document.getElementById('filePath').addEventListener('keypress', function(e) {
|
||||
if (e.key === 'Enter') processFile();
|
||||
});
|
||||
document.getElementById('pageNumber').addEventListener('keypress', function(e) {
|
||||
if (e.key === 'Enter') processFile();
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -11,7 +11,9 @@ authors = [
|
||||
keywords = ["ocr", "pdf", "markdown", "layout"]
|
||||
dependencies = [
|
||||
"beautifulsoup4>=4.14.2",
|
||||
"click>=8.0.0",
|
||||
"filetype>=1.2.0",
|
||||
"flask>=3.0.0",
|
||||
"markdownify==1.1.0",
|
||||
"openai>=2.2.0",
|
||||
"pillow>=10.2.0",
|
||||
@@ -26,8 +28,14 @@ dependencies = [
|
||||
"streamlit>=1.50.0"
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=61"] # or "setuptools>=61", "flit-core", etc.
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[project.scripts]
|
||||
chandra = "chandra_cli:main"
|
||||
chandra = "chandra.scripts.cli:main"
|
||||
chandra_app = "chandra.scripts.run_app:main"
|
||||
chandra_screenshot = "chandra.scripts.screenshot_app:main"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
include = ["chandra*"]
|
||||
|
||||
4
uv.lock
generated
4
uv.lock
generated
@@ -163,9 +163,10 @@ wheels = [
|
||||
[[package]]
|
||||
name = "chandra-ocr"
|
||||
version = "0.1.0"
|
||||
source = { virtual = "." }
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "click" },
|
||||
{ name = "filetype" },
|
||||
{ name = "markdownify" },
|
||||
{ name = "openai" },
|
||||
@@ -190,6 +191,7 @@ dev = [
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "beautifulsoup4", specifier = ">=4.14.2" },
|
||||
{ name = "click", specifier = ">=8.0.0" },
|
||||
{ name = "filetype", specifier = ">=1.2.0" },
|
||||
{ name = "markdownify", specifier = "==1.1.0" },
|
||||
{ name = "openai", specifier = ">=2.2.0" },
|
||||
|
||||
Reference in New Issue
Block a user