mirror of
https://github.com/datalab-to/chandra.git
synced 2026-01-20 14:00:42 +00:00
374 lines
12 KiB
HTML
374 lines
12 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Chandra OCR Screenshot Mode</title>
|
|
<style>
|
|
* {
|
|
margin: 0;
|
|
padding: 0;
|
|
box-sizing: border-box;
|
|
}
|
|
|
|
body {
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
|
background: #1a1a1a;
|
|
color: white;
|
|
overflow: hidden;
|
|
}
|
|
|
|
.controls {
|
|
position: fixed;
|
|
top: 0;
|
|
left: 0;
|
|
right: 0;
|
|
background: #2c3e50;
|
|
padding: 15px 20px;
|
|
display: flex;
|
|
gap: 15px;
|
|
align-items: center;
|
|
z-index: 2000;
|
|
box-shadow: 0 2px 8px rgba(0,0,0,0.3);
|
|
}
|
|
|
|
.controls input, .controls button {
|
|
padding: 8px 15px;
|
|
border: none;
|
|
border-radius: 4px;
|
|
font-size: 14px;
|
|
}
|
|
|
|
.controls input {
|
|
flex: 1;
|
|
max-width: 500px;
|
|
}
|
|
|
|
.controls input[type="number"] {
|
|
max-width: 100px;
|
|
}
|
|
|
|
.controls button {
|
|
background: #3498db;
|
|
color: white;
|
|
cursor: pointer;
|
|
font-weight: bold;
|
|
}
|
|
|
|
.controls button:hover {
|
|
background: #2980b9;
|
|
}
|
|
|
|
.controls button:disabled {
|
|
background: #7f8c8d;
|
|
cursor: not-allowed;
|
|
}
|
|
|
|
.loading {
|
|
display: none;
|
|
color: #f39c12;
|
|
font-weight: bold;
|
|
}
|
|
|
|
.error {
|
|
color: #e74c3c;
|
|
font-weight: bold;
|
|
}
|
|
|
|
.screenshot-container {
|
|
display: none;
|
|
margin-top: 60px;
|
|
height: calc(100vh - 60px);
|
|
gap: 20px;
|
|
padding: 20px;
|
|
flex-direction: row;
|
|
}
|
|
|
|
.screenshot-container.active {
|
|
display: flex;
|
|
}
|
|
|
|
.left-panel, .right-panel {
|
|
flex: 1;
|
|
display: flex;
|
|
flex-direction: column;
|
|
background: white;
|
|
border-radius: 8px;
|
|
overflow: hidden;
|
|
box-shadow: 0 4px 12px rgba(0,0,0,0.3);
|
|
}
|
|
|
|
.panel-header {
|
|
background: #2c3e50;
|
|
color: white;
|
|
padding: 15px 20px;
|
|
font-size: 18px;
|
|
font-weight: bold;
|
|
}
|
|
|
|
.panel-content {
|
|
flex: 1;
|
|
overflow: auto;
|
|
position: relative;
|
|
}
|
|
|
|
.image-container {
|
|
position: relative;
|
|
width: 100%;
|
|
height: 100%;
|
|
display: flex;
|
|
justify-content: center;
|
|
align-items: center;
|
|
background: #f5f5f5;
|
|
}
|
|
|
|
.image-alt-text {
|
|
border: 1px solid #e5e7eb;
|
|
}
|
|
|
|
#layoutCanvas {
|
|
display: block;
|
|
max-width: 100%;
|
|
max-height: 100%;
|
|
object-fit: contain;
|
|
}
|
|
|
|
.markdown-content {
|
|
padding: 30px;
|
|
line-height: 1.6;
|
|
color: #333;
|
|
}
|
|
|
|
.markdown-content h1, .markdown-content h2, .markdown-content h3 {
|
|
margin-top: 24px;
|
|
margin-bottom: 16px;
|
|
}
|
|
|
|
.markdown-content h1 { font-size: 2em; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
|
|
.markdown-content h2 { font-size: 1.5em; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
|
|
.markdown-content h3 { font-size: 1.25em; }
|
|
|
|
.markdown-content table {
|
|
border-collapse: collapse;
|
|
width: 100%;
|
|
margin: 20px 0;
|
|
}
|
|
|
|
.markdown-content table th, .markdown-content table td {
|
|
border: 1px solid #ddd;
|
|
padding: 8px 12px;
|
|
text-align: left;
|
|
}
|
|
|
|
.markdown-content table th {
|
|
background-color: #f2f2f2;
|
|
font-weight: bold;
|
|
}
|
|
|
|
.markdown-content code {
|
|
background: #f4f4f4;
|
|
padding: 2px 6px;
|
|
border-radius: 3px;
|
|
font-family: 'Monaco', 'Courier New', monospace;
|
|
font-size: 0.9em;
|
|
}
|
|
|
|
.markdown-content pre {
|
|
background: #f4f4f4;
|
|
padding: 16px;
|
|
border-radius: 6px;
|
|
overflow-x: auto;
|
|
}
|
|
|
|
.markdown-content pre code {
|
|
background: none;
|
|
padding: 0;
|
|
}
|
|
|
|
.markdown-content img {
|
|
max-width: 100%;
|
|
height: auto;
|
|
display: block;
|
|
margin: 20px auto;
|
|
border-radius: 4px;
|
|
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
|
}
|
|
|
|
.markdown-content figure {
|
|
margin: 20px 0;
|
|
text-align: center;
|
|
}
|
|
|
|
.markdown-content figure img {
|
|
margin: 0 auto 10px;
|
|
}
|
|
|
|
.markdown-content figcaption {
|
|
font-size: 0.9em;
|
|
color: #666;
|
|
font-style: italic;
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="controls">
|
|
<input type="text" id="filePath" placeholder="Enter file path (e.g., /path/to/document.pdf)">
|
|
<input type="number" id="pageNumber" placeholder="Page" value="0" min="0">
|
|
<button id="processBtn" onclick="processFile()">Process</button>
|
|
<span class="loading" id="loading">Processing...</span>
|
|
<span class="error" id="error"></span>
|
|
</div>
|
|
|
|
<div class="screenshot-container" id="container">
|
|
<div class="left-panel">
|
|
<div class="panel-header">Original Image with Layout Detection</div>
|
|
<div class="panel-content">
|
|
<div class="image-container">
|
|
<canvas id="layoutCanvas"></canvas>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="right-panel">
|
|
<div class="panel-header">Extracted Content</div>
|
|
<div class="panel-content">
|
|
<div class="markdown-content" id="markdownContent"></div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.25/dist/katex.min.css" integrity="sha384-WcoG4HRXMzYzfCgiyfrySxx90XSl2rxY5mnVY5TwtWE6KLrArNKn0T/mOgNL0Mmi" crossorigin="anonymous">
|
|
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.25/dist/katex.min.js" integrity="sha384-J+9dG2KMoiR9hqcFao0IBLwxt6zpcyN68IgwzsCSkbreXUjmNVRhPFTssqdSGjwQ" crossorigin="anonymous"></script>
|
|
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.25/dist/contrib/auto-render.min.js" integrity="sha384-hCXGrW6PitJEwbkoStFjeJxv+fSOOQKOPbJxSfM6G5sWZjAyWhXiTIIAmQqnlLlh" crossorigin="anonymous"></script>
|
|
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
|
<script>
|
|
async function processFile() {
|
|
const filePath = document.getElementById('filePath').value;
|
|
const pageNumber = parseInt(document.getElementById('pageNumber').value) || 0;
|
|
const loading = document.getElementById('loading');
|
|
const error = document.getElementById('error');
|
|
const processBtn = document.getElementById('processBtn');
|
|
const container = document.getElementById('container');
|
|
|
|
if (!filePath) {
|
|
error.textContent = 'Please enter a file path';
|
|
return;
|
|
}
|
|
|
|
error.textContent = '';
|
|
loading.style.display = 'inline';
|
|
processBtn.disabled = true;
|
|
container.classList.remove('active');
|
|
|
|
try {
|
|
const response = await fetch('/process', {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify({ file_path: filePath, page_number: pageNumber })
|
|
});
|
|
|
|
if (!response.ok) {
|
|
const errorData = await response.json();
|
|
throw new Error(errorData.error || 'Processing failed');
|
|
}
|
|
|
|
const data = await response.json();
|
|
renderResults(data);
|
|
container.classList.add('active');
|
|
|
|
} catch (err) {
|
|
error.textContent = `Error: ${err.message}`;
|
|
} finally {
|
|
loading.style.display = 'none';
|
|
processBtn.disabled = false;
|
|
}
|
|
}
|
|
|
|
function renderResults(data) {
|
|
const canvas = document.getElementById('layoutCanvas');
|
|
const ctx = canvas.getContext('2d');
|
|
const markdownContent = document.getElementById('markdownContent');
|
|
|
|
// Draw image with layout overlays
|
|
const img = new Image();
|
|
img.onload = function() {
|
|
canvas.width = data.image_width;
|
|
canvas.height = data.image_height;
|
|
|
|
// Draw image
|
|
ctx.drawImage(img, 0, 0, data.image_width, data.image_height);
|
|
|
|
// Draw layout blocks
|
|
ctx.lineWidth = 3;
|
|
ctx.font = 'bold 14px -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif';
|
|
|
|
const labelCounts = {};
|
|
data.blocks.forEach((block) => {
|
|
const [x1, y1, x2, y2] = block.bbox;
|
|
const width = x2 - x1;
|
|
const height = y2 - y1;
|
|
|
|
// Draw rectangle with semi-transparent fill
|
|
ctx.strokeStyle = block.color;
|
|
ctx.fillStyle = block.color + '33';
|
|
ctx.fillRect(x1, y1, width, height);
|
|
ctx.strokeRect(x1, y1, width, height);
|
|
|
|
// Count labels for unique identification
|
|
labelCounts[block.label] = (labelCounts[block.label] || 0) + 1;
|
|
const labelWithCount = `${block.label} #${labelCounts[block.label]}`;
|
|
|
|
// Draw label with background
|
|
const textMetrics = ctx.measureText(labelWithCount);
|
|
const textWidth = textMetrics.width;
|
|
const textHeight = 16;
|
|
const padding = 6;
|
|
|
|
const labelX = x1;
|
|
const labelY = Math.max(y1 - textHeight - padding, textHeight);
|
|
|
|
ctx.fillStyle = block.color;
|
|
ctx.fillRect(labelX, labelY - textHeight, textWidth + padding * 2, textHeight + padding);
|
|
|
|
ctx.fillStyle = 'white';
|
|
ctx.textBaseline = 'top';
|
|
ctx.fillText(labelWithCount, labelX + padding, labelY - textHeight + padding/2);
|
|
});
|
|
};
|
|
img.src = data.image_base64;
|
|
|
|
// Render HTML directly (with images embedded)
|
|
markdownContent.innerHTML = data.html;
|
|
|
|
// Render math with KaTeX - find all <math> tags and render them
|
|
const mathElements = markdownContent.querySelectorAll('math');
|
|
mathElements.forEach(mathEl => {
|
|
const latex = mathEl.textContent;
|
|
const isBlock = mathEl.getAttribute('display') === 'block';
|
|
|
|
try {
|
|
const rendered = katex.renderToString(latex, {
|
|
displayMode: isBlock,
|
|
throwOnError: false
|
|
});
|
|
|
|
// Create a span to hold the rendered math
|
|
const span = document.createElement('span');
|
|
span.innerHTML = rendered;
|
|
mathEl.replaceWith(span);
|
|
} catch (e) {
|
|
console.error('KaTeX render error:', e);
|
|
}
|
|
});
|
|
}
|
|
|
|
// Allow Enter key to trigger processing
|
|
document.getElementById('filePath').addEventListener('keypress', function(e) {
|
|
if (e.key === 'Enter') processFile();
|
|
});
|
|
document.getElementById('pageNumber').addEventListener('keypress', function(e) {
|
|
if (e.key === 'Enter') processFile();
|
|
});
|
|
</script>
|
|
</body>
|
|
</html>
|