Fix attn impl

Fix up repo
Fix license and README
2025-11-29 16:43:11 +00:00 · 2025-10-21 11:15:29 -04:00 · 2025-10-21 10:40:19 -04:00 · 2025-10-21 09:27:09 -04:00
6 changed files with 38 additions and 28 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 local.env
 experiments
 .claude
+.DS_Store

 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/README.md
+++ b/README.md
@@ -37,20 +37,16 @@ chandra_app

 ## Benchmarks

-| **Model** |  ArXiv   | Old Scans Math |  Tables  | Old Scans | Headers and Footers | Multi column | Long tiny text |   Base   |    Overall     |
-|:----------|:--------:|:--------------:|:--------:|:---------:|:-------------------:|:------------:|:--------------:|:--------:|:--------------:|
-| Datalab Chandra v0.1.0 |   81.4   |    **80.3**    | **89.4** | **50.0**  |        88.3         |   **81.0**   |    **91.6**    | **99.9** | **82.7 ± 0.9** |
-| Datalab Marker v1.10.0 | **83.8** |      69.7      |   74.8   |   32.3    |        86.6         |     79.4     |      85.7      |   99.6   |   76.5 ± 1.0   |
-| Mistral OCR API |   77.2   |      67.5      |   60.6   |   29.3    |        93.6         |     71.3     |      77.1      |   99.4   |   72.0 ± 1.1   |
-| Deepseek OCR |   75.2   |      67.9      |   79.1   |   32.9    |        96.1         |     66.3     |      78.5      |   97.7   |   74.2 ± 1.0   |
-| GPT-4o (Anchored) |   53.5   |      74.5      |   70.0   |   40.7    |        93.8         |     69.3     |      60.6      |   96.8   |   69.9 ± 1.1   |
-| Gemini Flash 2 (Anchored) |   54.5   |      56.1      |   72.1   |   34.2    |        64.7         |     61.5     |      71.5      |   95.6   |   63.8 ± 1.2   |
-| Qwen 3 VL |   70.2   |      75.1      |   45.6   |   37.5    |        89.1         |     62.1     |      43.0      |   94.3   |   64.6 ± 1.1   |
-| olmOCR v0.3.0 |   78.6   |      79.9      |   72.9   |   43.9    |      **95.1**       |     77.3     |      81.2      |   98.9   |   78.5 ± 1.1   |
+These are overall scores on the olmocr bench.

+<img src="assets/benchmarks/bench.png" width="600px"/>
+
+See full scores [below](#benchmark-table).

 ## Examples

+<img src="assets/examples/forms/handwritten_form.png" width="600px"/>
+
 | Type | Name | Link |
 |------|------|------|
 | Tables | Water Damage Form | [View](https://github.com/datalab-to/chandra/blob/master/assets/examples/tables/water_damage.png) |
@@ -77,6 +73,8 @@ chandra_app
 pip install chandra-ocr
 ```

+If you're going to use the huggingface method, we also recommend installing [flash attention](https://github.com/Dao-AILab/flash-attention).
+
 ### From Source

 ```bash
@@ -156,6 +154,20 @@ VLLM_MODEL_NAME=chandra
 VLLM_GPUS=0
 ```

+## Benchmark table
+
+| **Model**                 |  ArXiv   | Old Scans Math |  Tables  | Old Scans | Headers and Footers | Multi column | Long tiny text | Base |    Overall     | Source |
+|:--------------------------|:--------:|:--------------:|:--------:|:---------:|:-------------------:|:------------:|:--------------:|:----:|:--------------:|:------:|
+| Datalab Chandra v0.1.0    |   82.2   | **80.3** | **88.0** | **50.4**  |        90.8         |     81.2     |    **92.3**    | **99.9** | **83.1 ± 0.9** | Own benchmarks |
+| Datalab Marker v1.10.0    | **83.8** | 69.7 |   74.8   |   32.3    |        86.6         |     79.4     |      85.7      | 99.6 |   76.5 ± 1.0   | Own benchmarks |
+| Mistral OCR API           |   77.2   | 67.5 |   60.6   |   29.3    |        93.6         |     71.3     |      77.1      | 99.4 |   72.0 ± 1.1   | olmocr repo |
+| Deepseek OCR              |   75.2   | 72.3 |   79.7   |   33.3    |        96.1         |     66.7     |      80.1      | 99.7 |   75.4 ± 1.0   | Own benchmarks |
+| GPT-4o (Anchored)         |   53.5   | 74.5 |   70.0   |   40.7    |        93.8         |     69.3     |      60.6      | 96.8 |   69.9 ± 1.1   | olmocr repo |
+| Gemini Flash 2 (Anchored) |   54.5   | 56.1 |   72.1   |   34.2    |        64.7         |     61.5     |      71.5      | 95.6 |   63.8 ± 1.2   | olmocr repo |
+| Qwen 3 VL 8B              |   70.2   | 75.1 |   45.6   |   37.5    |        89.1         |     62.1     |      43.0      | 94.3 |   64.6 ± 1.1   | Own benchmarks |
+| olmOCR v0.3.0             |   78.6   | 79.9 |   72.9   |   43.9    |      **95.1**       |     77.3     |      81.2      | 98.9 |   78.5 ± 1.1   | olmocr repo |
+| dots.ocr                  |   82.1   | 64.2 |   88.3   |   40.9    |        94.1         |   **82.4**   |      81.2      | 99.5 |   79.1 ± 1.0   | dots.ocr repo |
+
 # Commercial usage

 This code is Apache 2.0, and our model weights use a modified OpenRAIL-M license (free for research, personal use, and startups under $2M funding/revenue, cannot be used competitively with our API). To remove the OpenRAIL license requirements, or for broader commercial licensing, visit our pricing page [here](https://www.datalab.to/pricing?utm_source=gh-chandra).
--- a/assets/benchmarks/bench.png
+++ b/assets/benchmarks/bench.png
--- a/chandra/model/hf.py
+++ b/chandra/model/hf.py
@@ -65,12 +65,20 @@ def process_batch_element(item: BatchInputItem, processor):


 def load_model():
+    device_map = "auto"
+    if settings.TORCH_DEVICE:
+        device_map = {"": settings.TORCH_DEVICE}
+
+    kwargs = {
+        "dtype": settings.TORCH_DTYPE,
+        "device_map": device_map,
+    }
+    if settings.TORCH_ATTN:
+        kwargs["attn_implementation"] = settings.TORCH_ATTN
+
    model = Qwen3VLForConditionalGeneration.from_pretrained(
-        settings.MODEL_CHECKPOINT,
-        dtype=settings.TORCH_DTYPE,
-        device_map="auto",
-        attn_implementation=settings.TORCH_ATTN_IMPLEMENTATION,
-    ).to(settings.TORCH_DEVICE_MODEL)
+        settings.MODEL_CHECKPOINT, **kwargs
+    )
    model = model.eval()
    processor = Qwen3VLProcessor.from_pretrained(settings.MODEL_CHECKPOINT)
    model.processor = processor
--- a/chandra/settings.py
+++ b/chandra/settings.py
@@ -10,7 +10,7 @@ class Settings(BaseSettings):
    BASE_DIR: str = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    IMAGE_DPI: int = 192
    MIN_IMAGE_DIM: int = 1024
-    MODEL_CHECKPOINT: str = "datalab-to/chandra-0.2.8"
+    MODEL_CHECKPOINT: str = "datalab-to/chandra"
    TORCH_DEVICE: str | None = None
    MAX_OUTPUT_TOKENS: int = 8192
    TORCH_ATTN: str | None = None
@@ -42,17 +42,6 @@ class Settings(BaseSettings):
    def TORCH_DTYPE(self) -> torch.dtype:
        return torch.bfloat16

-    @computed_field
-    @property
-    def TORCH_ATTN_IMPLEMENTATION(self) -> str:
-        if self.TORCH_ATTN is not None:
-            return self.TORCH_ATTN
-
-        if self.TORCH_DEVICE_MODEL == "cuda":
-            return "flash_attention_2"
-        else:
-            return "sdpa"
-
    class Config:
        env_file = find_dotenv("local.env")
        extra = "ignore"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "chandra-ocr"
-version = "0.1.0"
+version = "0.1.3"
 description = "OCR model that converts documents to markdown, HTML, or JSON."
 readme = "README.md"
 requires-python = ">=3.10"
Author	SHA1	Message	Date
Vik Paruchuri	2e455aeb2c	Fix attn impl	2025-10-21 11:15:29 -04:00
Vik Paruchuri	812af690ce	Fix up repo	2025-10-21 10:40:19 -04:00
Vik Paruchuri	234f7df4ed	Fix license and README	2025-10-21 09:27:09 -04:00