mirror of
https://github.com/datalab-to/chandra.git
synced 2026-01-20 05:50:42 +00:00
bbox scale
This commit is contained in:
@@ -9,6 +9,8 @@ from PIL import Image
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
from markdownify import MarkdownConverter, re_whitespace
|
||||
|
||||
from chandra.settings import settings
|
||||
|
||||
|
||||
@lru_cache
|
||||
def _hash_html(html: str):
|
||||
@@ -25,7 +27,7 @@ def fix_raw(html: str):
|
||||
numbers = re.findall(r"\d+", match.group(0))
|
||||
return "[" + ",".join(numbers) + "]"
|
||||
|
||||
result = re.sub(r"(?:<BBOX\d+>){4}", replace_group, html)
|
||||
result = re.sub(r"(?:\|BBOX\d+\|){4}", replace_group, html)
|
||||
return result
|
||||
|
||||
|
||||
@@ -232,8 +234,8 @@ def parse_layout(html: str, image: Image.Image):
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
top_level_divs = soup.find_all("div", recursive=False)
|
||||
width, height = image.size
|
||||
width_scaler = width / 1024
|
||||
height_scaler = height / 1024
|
||||
width_scaler = width / settings.BBOX_SCALE
|
||||
height_scaler = height / settings.BBOX_SCALE
|
||||
layout_blocks = []
|
||||
for div in top_level_divs:
|
||||
bbox = div.get("data-bbox")
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from chandra.settings import settings
|
||||
|
||||
ALLOWED_TAGS = [
|
||||
"math",
|
||||
"br",
|
||||
@@ -65,7 +67,7 @@ Guidelines:
|
||||
""".strip()
|
||||
|
||||
OCR_LAYOUT_PROMPT = f"""
|
||||
OCR this image to HTML, arranged as layout blocks. Each layout block should be a div with the data-bbox attribute representing the bounding box of the block in [x0, y0, x1, y1] format. Bboxes are normalized 0-1024. The data-label attribute is the label for the block.
|
||||
OCR this image to HTML, arranged as layout blocks. Each layout block should be a div with the data-bbox attribute representing the bounding box of the block in [x0, y0, x1, y1] format. Bboxes are normalized 0-{settings.BBOX_SCALE}. The data-label attribute is the label for the block.
|
||||
|
||||
Use the following labels:
|
||||
- Caption
|
||||
|
||||
@@ -15,6 +15,7 @@ class Settings(BaseSettings):
|
||||
TORCH_DEVICE: str | None = None
|
||||
MAX_OUTPUT_TOKENS: int = 12384
|
||||
TORCH_ATTN: str | None = None
|
||||
BBOX_SCALE: int = 1024
|
||||
|
||||
# vLLM server settings
|
||||
VLLM_API_KEY: str = "EMPTY"
|
||||
|
||||
Reference in New Issue
Block a user