mirror of
https://github.com/datalab-to/chandra.git
synced 2025-11-29 00:23:12 +00:00
Initial commit
This commit is contained in:
221
.gitignore
vendored
Normal file
221
.gitignore
vendored
Normal file
@@ -0,0 +1,221 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[codz]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
# Pipfile.lock
|
||||
|
||||
# UV
|
||||
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# uv.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
# poetry.lock
|
||||
# poetry.toml
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
||||
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
||||
# pdm.lock
|
||||
# pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
|
||||
# pixi
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
||||
# pixi.lock
|
||||
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
||||
# in the .venv directory. It is recommended not to include this directory in version control.
|
||||
.pixi
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# Redis
|
||||
*.rdb
|
||||
*.aof
|
||||
*.pid
|
||||
|
||||
# RabbitMQ
|
||||
mnesia/
|
||||
rabbitmq/
|
||||
rabbitmq-data/
|
||||
|
||||
# ActiveMQ
|
||||
activemq-data/
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.envrc
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
# .idea/
|
||||
|
||||
# Abstra
|
||||
# Abstra is an AI-powered process automation framework.
|
||||
# Ignore directories containing user credentials, local state, and settings.
|
||||
# Learn more at https://abstra.io/docs
|
||||
.abstra/
|
||||
|
||||
# Visual Studio Code
|
||||
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
||||
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
||||
# you could uncomment the following to ignore the entire vscode folder
|
||||
# .vscode/
|
||||
|
||||
# Ruff stuff:
|
||||
.ruff_cache/
|
||||
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
# Marimo
|
||||
marimo/_static/
|
||||
marimo/_lsp/
|
||||
__marimo__/
|
||||
|
||||
# Streamlit
|
||||
.streamlit/secrets.toml
|
||||
|
||||
# Virtual environments
|
||||
.venv
|
||||
|
||||
.idea/
|
||||
1
.python-version
Normal file
1
.python-version
Normal file
@@ -0,0 +1 @@
|
||||
3.12
|
||||
7
README.md
Normal file
7
README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# Chandra
|
||||
|
||||
Try:
|
||||
|
||||
```shell
|
||||
streamlit run chandra_app.py --server.fileWatcherType none --server.headless true
|
||||
```
|
||||
0
chandra/__init__.py
Normal file
0
chandra/__init__.py
Normal file
40
chandra/image.py
Normal file
40
chandra/image.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import math
|
||||
from typing import Tuple
|
||||
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def scale_to_fit(
|
||||
img: Image.Image,
|
||||
max_size: Tuple[int, int] = (1024, 1024),
|
||||
min_size: Tuple[int, int] = (28, 28),
|
||||
):
|
||||
resample_method = Image.Resampling.LANCZOS
|
||||
|
||||
width, height = img.size
|
||||
|
||||
# Check for empty or invalid image
|
||||
if width == 0 or height == 0:
|
||||
return img
|
||||
|
||||
max_width, max_height = max_size
|
||||
min_width, min_height = min_size
|
||||
|
||||
current_pixels = width * height
|
||||
max_pixels = max_width * max_height
|
||||
min_pixels = min_width * min_height
|
||||
|
||||
if current_pixels > max_pixels:
|
||||
scale_factor = (max_pixels / current_pixels) ** 0.5
|
||||
|
||||
new_width = math.floor(width * scale_factor)
|
||||
new_height = math.floor(height * scale_factor)
|
||||
elif current_pixels < min_pixels:
|
||||
scale_factor = (min_pixels / current_pixels) ** 0.5
|
||||
|
||||
new_width = math.ceil(width * scale_factor)
|
||||
new_height = math.ceil(height * scale_factor)
|
||||
else:
|
||||
return img
|
||||
|
||||
return img.resize((new_width, new_height), resample=resample_method)
|
||||
44
chandra/layout.py
Normal file
44
chandra/layout.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
|
||||
from PIL import Image
|
||||
from PIL.ImageDraw import ImageDraw
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
@dataclass
|
||||
class LayoutBlock:
|
||||
bbox: list[int]
|
||||
label: str
|
||||
content: str
|
||||
|
||||
def parse_layout(html: str, image: Image.Image):
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
top_level_divs = soup.find_all("div", recursive=False)
|
||||
width, height = image.size
|
||||
width_scaler = width / 1024
|
||||
height_scaler = height / 1024
|
||||
layout_blocks = []
|
||||
for div in top_level_divs:
|
||||
bbox = div.get("data-bbox")
|
||||
bbox = json.loads(bbox)
|
||||
bbox = list(map(int, bbox))
|
||||
# Normalize bbox
|
||||
bbox = [
|
||||
max(0, int(bbox[0] * width_scaler)),
|
||||
max(0, int(bbox[1] * height_scaler)),
|
||||
min(int(bbox[2] * width_scaler), width),
|
||||
min(int(bbox[3] * height_scaler), height),
|
||||
]
|
||||
label = div.get("data-label", "block")
|
||||
content = str(div.decode_contents())
|
||||
layout_blocks.append(LayoutBlock(bbox=bbox, label=label, content=content))
|
||||
return layout_blocks
|
||||
|
||||
def draw_layout(image: Image.Image, layout_blocks: list[LayoutBlock]):
|
||||
draw_image = image.copy()
|
||||
draw = ImageDraw(draw_image)
|
||||
for block in layout_blocks:
|
||||
draw.rectangle(block.bbox, outline="red", width=2)
|
||||
draw.text((block.bbox[0], block.bbox[1]), block.label, fill="blue")
|
||||
|
||||
return draw_image
|
||||
39
chandra/load.py
Normal file
39
chandra/load.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from typing import List
|
||||
import filetype
|
||||
from PIL import Image
|
||||
import pypdfium2 as pdfium
|
||||
|
||||
from chandra.settings import settings
|
||||
|
||||
|
||||
def load_pdf_images(filepath: str, page_range: List[int]):
|
||||
doc = pdfium.PdfDocument(filepath)
|
||||
images = [doc[i].render(scale=settings.IMAGE_DPI / 72).to_pil().convert("RGB") for i in range(len(doc)) if not page_range or i in page_range]
|
||||
doc.close()
|
||||
return images
|
||||
|
||||
|
||||
def parse_range_str(range_str: str) -> List[int]:
|
||||
range_lst = range_str.split(",")
|
||||
page_lst = []
|
||||
for i in range_lst:
|
||||
if "-" in i:
|
||||
start, end = i.split("-")
|
||||
page_lst += list(range(int(start), int(end) + 1))
|
||||
else:
|
||||
page_lst.append(int(i))
|
||||
page_lst = sorted(list(set(page_lst))) # Deduplicate page numbers and sort in order
|
||||
return page_lst
|
||||
|
||||
|
||||
def load_file(filepath: str, config: dict):
|
||||
page_range = config.get("page_range")
|
||||
if page_range:
|
||||
page_range = parse_range_str(page_range)
|
||||
|
||||
input_type = filetype.guess(filepath)
|
||||
if input_type and input_type.extension == "pdf":
|
||||
images = load_pdf_images(filepath, page_range)
|
||||
else:
|
||||
images = [Image.open(filepath).convert("RGB")]
|
||||
return images
|
||||
78
chandra/model.py
Normal file
78
chandra/model.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import List
|
||||
|
||||
from PIL import Image
|
||||
from transformers import Qwen2_5_VLForConditionalGeneration, Qwen2_5_VLProcessor
|
||||
|
||||
from chandra.image import scale_to_fit
|
||||
from chandra.prompts import PROMPT_MAPPING
|
||||
from chandra.settings import settings
|
||||
|
||||
from qwen_vl_utils import process_vision_info
|
||||
|
||||
@dataclass
|
||||
class BatchItem:
|
||||
images: List[Image.Image]
|
||||
prompt: str | None = None
|
||||
prompt_type: str | None = None
|
||||
|
||||
|
||||
def load():
|
||||
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
||||
settings.MODEL_CHECKPOINT,
|
||||
dtype=settings.TORCH_DTYPE,
|
||||
device_map="auto",
|
||||
attn_implementation=settings.TORCH_ATTN_IMPLEMENTATION,
|
||||
).to(settings.TORCH_DEVICE_MODEL)
|
||||
model = model.eval()
|
||||
processor = Qwen2_5_VLProcessor.from_pretrained(settings.MODEL_CHECKPOINT)
|
||||
model.processor = processor
|
||||
return model
|
||||
|
||||
def process_batch_element(item: BatchItem, processor):
|
||||
prompt = item.prompt
|
||||
prompt_type = item.prompt_type
|
||||
images = item.images
|
||||
|
||||
if not prompt:
|
||||
prompt = PROMPT_MAPPING[prompt_type]
|
||||
|
||||
content = []
|
||||
for image in images:
|
||||
image = scale_to_fit(image) # Guarantee max size
|
||||
content.append({"type": "image", "image": image})
|
||||
|
||||
content.append({"type": "text", "text": prompt})
|
||||
message = {
|
||||
"role": "user",
|
||||
"content": content
|
||||
}
|
||||
return message
|
||||
|
||||
|
||||
def generate(batch: List[BatchItem], model):
|
||||
messages = [process_batch_element(item, model.processor) for item in batch]
|
||||
text = model.processor.apply_chat_template(
|
||||
messages, tokenize=False, add_generation_prompt=True
|
||||
)
|
||||
|
||||
image_inputs, _ = process_vision_info(messages)
|
||||
inputs = model.processor(
|
||||
text=text,
|
||||
images=image_inputs,
|
||||
padding=True,
|
||||
return_tensors="pt",
|
||||
padding_side="left"
|
||||
)
|
||||
inputs = inputs.to("cuda")
|
||||
|
||||
# Inference: Generation of the output
|
||||
generated_ids = model.generate(**inputs, max_new_tokens=settings.MAX_OUTPUT_TOKENS)
|
||||
generated_ids_trimmed = [
|
||||
out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
|
||||
]
|
||||
output_text = model.processor.batch_decode(
|
||||
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
|
||||
)
|
||||
return output_text
|
||||
|
||||
99
chandra/prompts.py
Normal file
99
chandra/prompts.py
Normal file
@@ -0,0 +1,99 @@
|
||||
ALLOWED_TAGS = [
|
||||
"math",
|
||||
"br",
|
||||
"i",
|
||||
"b",
|
||||
"u",
|
||||
"del",
|
||||
"sup",
|
||||
"sub",
|
||||
"table",
|
||||
"tr",
|
||||
"td",
|
||||
"p",
|
||||
"th",
|
||||
"div",
|
||||
"pre",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"ul",
|
||||
"ol",
|
||||
"li",
|
||||
"input",
|
||||
"a",
|
||||
"span",
|
||||
"img",
|
||||
"hr",
|
||||
"tbody",
|
||||
"small",
|
||||
"caption",
|
||||
"strong",
|
||||
"thead",
|
||||
"big",
|
||||
"code",
|
||||
]
|
||||
ALLOWED_ATTRIBUTES = [
|
||||
"class",
|
||||
"colspan",
|
||||
"rowspan",
|
||||
"display",
|
||||
"checked",
|
||||
"type",
|
||||
"border",
|
||||
"value",
|
||||
"style",
|
||||
"href",
|
||||
"alt",
|
||||
"align",
|
||||
]
|
||||
|
||||
PROMPT_ENDING = f"""
|
||||
Only use these tags {ALLOWED_TAGS}, and these attributes {ALLOWED_ATTRIBUTES}.
|
||||
|
||||
Guidelines:
|
||||
* Inline math: Surround math with <math>...</math> tags. Math expressions should be rendered in KaTeX-compatible LaTeX. Use display for block math.
|
||||
* Tables: Use colspan and rowspan attributes to match table structure.
|
||||
* Formatting: Maintain consistent formatting with the image, including spacing, indentation, subscripts/superscripts, and special characters.
|
||||
* Images: Include a description of any images in the alt attribute of an <img> tag. Do not fill out the src property.
|
||||
* Forms: Mark checkboxes and radio buttons properly.
|
||||
* Text: join lines together properly into paragraphs using <p>...</p> tags. Use <br> tags for line breaks within paragraphs, but only when absolutely necessary to maintain meaning.
|
||||
* Use the simplest possible HTML structure that accurately represents the content of the block.
|
||||
* Make sure the text is accurate and easy for a human to read and interpret. Reading order should be correct and natural.
|
||||
""".strip()
|
||||
|
||||
OCR_LAYOUT_PROMPT = f"""
|
||||
OCR this image to HTML, arranged as layout blocks. Each layout block should be a div with the data-bbox attribute representing the bounding box of the block in [x0, y0, x1, y1] format. Bboxes are normalized 0-1024. The data-label attribute is the label for the block.
|
||||
|
||||
Use the following labels:
|
||||
- Caption
|
||||
- Footnote
|
||||
- Equation-Block
|
||||
- List-Item
|
||||
- Page-Header
|
||||
- Page-Footer
|
||||
- Image
|
||||
- Section-Header
|
||||
- Table
|
||||
- Text
|
||||
- Complex-Block
|
||||
- Code-Block
|
||||
- Form
|
||||
- Table-Of-Contents
|
||||
- Figure
|
||||
|
||||
{PROMPT_ENDING}
|
||||
""".strip()
|
||||
|
||||
OCR_PROMPT = f"""
|
||||
OCR this image to HTML.
|
||||
|
||||
{PROMPT_ENDING}
|
||||
""".strip()
|
||||
|
||||
PROMPT_MAPPING = {
|
||||
"ocr_layout": OCR_LAYOUT_PROMPT,
|
||||
"ocr": OCR_PROMPT,
|
||||
}
|
||||
48
chandra/settings.py
Normal file
48
chandra/settings.py
Normal file
@@ -0,0 +1,48 @@
|
||||
from dotenv import find_dotenv
|
||||
from pydantic import computed_field
|
||||
from pydantic_settings import BaseSettings
|
||||
import torch
|
||||
import os
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# Paths
|
||||
BASE_DIR: str = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
IMAGE_DPI: int = 96
|
||||
MODEL_CHECKPOINT: str = "datalab-to/chandra-0.2.1"
|
||||
TORCH_DEVICE: str | None = None
|
||||
MAX_OUTPUT_TOKENS: int = 2048
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def TORCH_DEVICE_MODEL(self) -> str:
|
||||
if self.TORCH_DEVICE is not None:
|
||||
return self.TORCH_DEVICE
|
||||
|
||||
if torch.cuda.is_available():
|
||||
return "cuda"
|
||||
|
||||
if torch.backends.mps.is_available():
|
||||
return "mps"
|
||||
|
||||
return "cpu"
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def TORCH_DTYPE(self) -> torch.dtype:
|
||||
return torch.bfloat16
|
||||
|
||||
@computed_field
|
||||
@property
|
||||
def TORCH_ATTN_IMPLEMENTATION(self) -> str:
|
||||
if self.TORCH_DEVICE_MODEL == "cuda":
|
||||
return "flash_attention_2"
|
||||
else:
|
||||
return "sdpa"
|
||||
|
||||
class Config:
|
||||
env_file = find_dotenv("local.env")
|
||||
extra = "ignore"
|
||||
|
||||
|
||||
settings = Settings()
|
||||
113
chandra_app.py
Normal file
113
chandra_app.py
Normal file
@@ -0,0 +1,113 @@
|
||||
import pypdfium2 as pdfium
|
||||
import streamlit as st
|
||||
from PIL import Image
|
||||
|
||||
from chandra.layout import parse_layout, draw_layout
|
||||
from chandra.load import load_pdf_images
|
||||
from chandra.model import load, BatchItem, generate
|
||||
|
||||
|
||||
@st.cache_resource()
|
||||
def load_model():
|
||||
return load()
|
||||
|
||||
@st.cache_data()
|
||||
def get_page_image(pdf_file, page_num):
|
||||
return load_pdf_images(pdf_file, [page_num])[0]
|
||||
|
||||
@st.cache_data()
|
||||
def page_counter(pdf_file):
|
||||
doc = pdfium.PdfDocument(pdf_file)
|
||||
doc_len = len(doc)
|
||||
doc.close()
|
||||
return doc_len
|
||||
|
||||
# Function for OCR
|
||||
def ocr_layout(
|
||||
img: Image.Image,
|
||||
) -> (Image.Image, str):
|
||||
batch = BatchItem(
|
||||
images=[img],
|
||||
prompt_type="ocr_layout",
|
||||
)
|
||||
html = generate([batch], model=model)[0]
|
||||
print(f"Generated HTML: {html[:500]}...")
|
||||
layout = parse_layout(html, img)
|
||||
layout_image = draw_layout(img, layout)
|
||||
return html, layout_image
|
||||
|
||||
def ocr(
|
||||
img: Image.Image,
|
||||
) -> str:
|
||||
batch = BatchItem(
|
||||
images=[img],
|
||||
prompt_type="ocr"
|
||||
)
|
||||
return generate([batch], model=model)[0]
|
||||
|
||||
st.set_page_config(layout="wide")
|
||||
col1, col2 = st.columns([0.5, 0.5])
|
||||
|
||||
model = load_model()
|
||||
|
||||
st.markdown("""
|
||||
# Chandra OCR Demo
|
||||
|
||||
This app will let you try chandra, a multilingual OCR toolkit.
|
||||
""")
|
||||
|
||||
in_file = st.sidebar.file_uploader(
|
||||
"PDF file or image:", type=["pdf", "png", "jpg", "jpeg", "gif", "webp"]
|
||||
)
|
||||
|
||||
if in_file is None:
|
||||
st.stop()
|
||||
|
||||
filetype = in_file.type
|
||||
page_count = None
|
||||
if "pdf" in filetype:
|
||||
page_count = page_counter(in_file)
|
||||
page_number = st.sidebar.number_input(
|
||||
f"Page number out of {page_count}:", min_value=0, value=0, max_value=page_count
|
||||
)
|
||||
|
||||
pil_image = get_page_image(in_file, page_number)
|
||||
else:
|
||||
pil_image = Image.open(in_file).convert("RGB")
|
||||
page_number = None
|
||||
|
||||
run_ocr = st.sidebar.button("Run OCR")
|
||||
prompt_type = st.sidebar.selectbox(
|
||||
"Prompt type",
|
||||
["ocr_layout", "ocr"],
|
||||
index=0,
|
||||
help="Select the prompt type for OCR.",
|
||||
)
|
||||
|
||||
if pil_image is None:
|
||||
st.stop()
|
||||
|
||||
if run_ocr:
|
||||
if prompt_type == "ocr_layout":
|
||||
pred, layout_image = ocr_layout(
|
||||
pil_image,
|
||||
)
|
||||
else:
|
||||
pred = ocr(
|
||||
pil_image,
|
||||
)
|
||||
layout_image = None
|
||||
|
||||
with col1:
|
||||
html_tab, text_tab, layout_tab = st.tabs(["HTML", "HTML as text", "Layout Image"])
|
||||
with html_tab:
|
||||
st.markdown(pred, unsafe_allow_html=True)
|
||||
with text_tab:
|
||||
st.text(pred)
|
||||
|
||||
if layout_image:
|
||||
with layout_tab:
|
||||
st.image(layout_image, caption="Detected Layout", use_container_width=True)
|
||||
|
||||
with col2:
|
||||
st.image(pil_image, caption="Uploaded Image", use_container_width=True)
|
||||
19
pyproject.toml
Normal file
19
pyproject.toml
Normal file
@@ -0,0 +1,19 @@
|
||||
[project]
|
||||
name = "chandra"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
"beautifulsoup4>=4.14.2",
|
||||
"filetype>=1.2.0",
|
||||
"pillow>=11.3.0",
|
||||
"pydantic>=2.12.0",
|
||||
"pydantic-settings>=2.11.0",
|
||||
"pypdfium2>=4.30.0",
|
||||
"python-dotenv>=1.1.1",
|
||||
"qwen-vl-utils>=0.0.14",
|
||||
"streamlit>=1.50.0",
|
||||
"torch>=2.8.0",
|
||||
"transformers>=4.57.0",
|
||||
]
|
||||
Reference in New Issue
Block a user