mirror of
https://github.com/datalab-to/chandra.git
synced 2025-11-29 08:33:13 +00:00
Compare commits
2 Commits
0f5f3d485c
...
settings-f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eaa31d169d | ||
|
|
63c88d644d |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,6 +1,7 @@
|
|||||||
local.env
|
local.env
|
||||||
experiments
|
experiments
|
||||||
.claude
|
.claude
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
|||||||
@@ -155,14 +155,14 @@ VLLM_GPUS=0
|
|||||||
## Benchmark table
|
## Benchmark table
|
||||||
|
|
||||||
| **Model** | ArXiv | Old Scans Math | Tables | Old Scans | Headers and Footers | Multi column | Long tiny text | Base | Overall | Source |
|
| **Model** | ArXiv | Old Scans Math | Tables | Old Scans | Headers and Footers | Multi column | Long tiny text | Base | Overall | Source |
|
||||||
|:----------|:--------:|:--------------:|:--------:|:---------:|:-------------------:|:------------:|:--------------:|:----:|:--------------:|:------:|
|
|:--------------------------|:--------:|:--------------:|:--------:|:---------:|:-------------------:|:------------:|:--------------:|:----:|:--------------:|:------:|
|
||||||
| Datalab Chandra v0.1.0 | 82.2 | **80.3** | **88.0** | **50.4** | 90.8 | 81.2 | **92.3** | **99.9** | **83.1 ± 0.9** | Own benchmarks |
|
| Datalab Chandra v0.1.0 | 82.2 | **80.3** | **88.0** | **50.4** | 90.8 | 81.2 | **92.3** | **99.9** | **83.1 ± 0.9** | Own benchmarks |
|
||||||
| Datalab Marker v1.10.0 | **83.8** | 69.7 | 74.8 | 32.3 | 86.6 | 79.4 | 85.7 | 99.6 | 76.5 ± 1.0 | Own benchmarks |
|
| Datalab Marker v1.10.0 | **83.8** | 69.7 | 74.8 | 32.3 | 86.6 | 79.4 | 85.7 | 99.6 | 76.5 ± 1.0 | Own benchmarks |
|
||||||
| Mistral OCR API | 77.2 | 67.5 | 60.6 | 29.3 | 93.6 | 71.3 | 77.1 | 99.4 | 72.0 ± 1.1 | olmocr repo |
|
| Mistral OCR API | 77.2 | 67.5 | 60.6 | 29.3 | 93.6 | 71.3 | 77.1 | 99.4 | 72.0 ± 1.1 | olmocr repo |
|
||||||
| Deepseek OCR | 75.2 | 72.3 | 79.7 | 33.3 | 96.1 | 66.7 | 80.1 | 99.7 | 75.4 ± 1.0 | Own benchmarks |
|
| Deepseek OCR | 75.2 | 72.3 | 79.7 | 33.3 | 96.1 | 66.7 | 80.1 | 99.7 | 75.4 ± 1.0 | Own benchmarks |
|
||||||
| GPT-4o (Anchored) | 53.5 | 74.5 | 70.0 | 40.7 | 93.8 | 69.3 | 60.6 | 96.8 | 69.9 ± 1.1 | olmocr repo |
|
| GPT-4o (Anchored) | 53.5 | 74.5 | 70.0 | 40.7 | 93.8 | 69.3 | 60.6 | 96.8 | 69.9 ± 1.1 | olmocr repo |
|
||||||
| Gemini Flash 2 (Anchored) | 54.5 | 56.1 | 72.1 | 34.2 | 64.7 | 61.5 | 71.5 | 95.6 | 63.8 ± 1.2 | olmocr repo |
|
| Gemini Flash 2 (Anchored) | 54.5 | 56.1 | 72.1 | 34.2 | 64.7 | 61.5 | 71.5 | 95.6 | 63.8 ± 1.2 | olmocr repo |
|
||||||
| Qwen 3 VL | 70.2 | 75.1 | 45.6 | 37.5 | 89.1 | 62.1 | 43.0 | 94.3 | 64.6 ± 1.1 | Own benchmarks |
|
| Qwen 3 VL 8B | 70.2 | 75.1 | 45.6 | 37.5 | 89.1 | 62.1 | 43.0 | 94.3 | 64.6 ± 1.1 | Own benchmarks |
|
||||||
| olmOCR v0.3.0 | 78.6 | 79.9 | 72.9 | 43.9 | **95.1** | 77.3 | 81.2 | 98.9 | 78.5 ± 1.1 | olmocr repo |
|
| olmOCR v0.3.0 | 78.6 | 79.9 | 72.9 | 43.9 | **95.1** | 77.3 | 81.2 | 98.9 | 78.5 ± 1.1 | olmocr repo |
|
||||||
| dots.ocr | 82.1 | 64.2 | 88.3 | 40.9 | 94.1 | **82.4** | 81.2 | 99.5 | 79.1 ± 1.0 | dots.ocr repo |
|
| dots.ocr | 82.1 | 64.2 | 88.3 | 40.9 | 94.1 | **82.4** | 81.2 | 99.5 | 79.1 ± 1.0 | dots.ocr repo |
|
||||||
|
|
||||||
|
|||||||
@@ -68,11 +68,16 @@ def load_model():
|
|||||||
device_map = "auto"
|
device_map = "auto"
|
||||||
if settings.TORCH_DEVICE:
|
if settings.TORCH_DEVICE:
|
||||||
device_map = {"": settings.TORCH_DEVICE}
|
device_map = {"": settings.TORCH_DEVICE}
|
||||||
|
|
||||||
|
kwargs = {
|
||||||
|
"dtype": settings.TORCH_DTYPE,
|
||||||
|
"device_map": device_map,
|
||||||
|
}
|
||||||
|
if settings.TORCH_ATTN_IMPLEMENTATION:
|
||||||
|
kwargs["attn_implementation"] = settings.TORCH_ATTN_IMPLEMENTATION
|
||||||
|
|
||||||
model = Qwen3VLForConditionalGeneration.from_pretrained(
|
model = Qwen3VLForConditionalGeneration.from_pretrained(
|
||||||
settings.MODEL_CHECKPOINT,
|
settings.MODEL_CHECKPOINT, **kwargs
|
||||||
dtype=settings.TORCH_DTYPE,
|
|
||||||
device_map=device_map,
|
|
||||||
attn_implementation=settings.TORCH_ATTN_IMPLEMENTATION,
|
|
||||||
)
|
)
|
||||||
model = model.eval()
|
model = model.eval()
|
||||||
processor = Qwen3VLProcessor.from_pretrained(settings.MODEL_CHECKPOINT)
|
processor = Qwen3VLProcessor.from_pretrained(settings.MODEL_CHECKPOINT)
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ class Settings(BaseSettings):
|
|||||||
MODEL_CHECKPOINT: str = "datalab-to/chandra"
|
MODEL_CHECKPOINT: str = "datalab-to/chandra"
|
||||||
TORCH_DEVICE: str | None = None
|
TORCH_DEVICE: str | None = None
|
||||||
MAX_OUTPUT_TOKENS: int = 8192
|
MAX_OUTPUT_TOKENS: int = 8192
|
||||||
TORCH_ATTN: str | None = None
|
TORCH_ATTN_IMPLEMENTATION: str | None = None
|
||||||
|
|
||||||
# vLLM server settings
|
# vLLM server settings
|
||||||
VLLM_API_KEY: str = "EMPTY"
|
VLLM_API_KEY: str = "EMPTY"
|
||||||
@@ -42,17 +42,6 @@ class Settings(BaseSettings):
|
|||||||
def TORCH_DTYPE(self) -> torch.dtype:
|
def TORCH_DTYPE(self) -> torch.dtype:
|
||||||
return torch.bfloat16
|
return torch.bfloat16
|
||||||
|
|
||||||
@computed_field
|
|
||||||
@property
|
|
||||||
def TORCH_ATTN_IMPLEMENTATION(self) -> str:
|
|
||||||
if self.TORCH_ATTN is not None:
|
|
||||||
return self.TORCH_ATTN
|
|
||||||
|
|
||||||
if self.TORCH_DEVICE_MODEL == "cuda":
|
|
||||||
return "flash_attention_2"
|
|
||||||
else:
|
|
||||||
return "sdpa"
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
env_file = find_dotenv("local.env")
|
env_file = find_dotenv("local.env")
|
||||||
extra = "ignore"
|
extra = "ignore"
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "chandra-ocr"
|
name = "chandra-ocr"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
description = "OCR model that converts documents to markdown, HTML, or JSON."
|
description = "OCR model that converts documents to markdown, HTML, or JSON."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
|
|||||||
Reference in New Issue
Block a user