Fix license and README

2025-11-29 16:43:11 +00:00 · 2025-10-21 07:13:20 -04:00
6 changed files with 29 additions and 37 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,6 @@
 local.env
 experiments
 .claude
-.DS_Store

 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/README.md
+++ b/README.md
@@ -37,16 +37,20 @@ chandra_app

 ## Benchmarks

-These are overall scores on the olmocr bench.
+| **Model** |  ArXiv   | Old Scans Math |  Tables  | Old Scans | Headers and Footers | Multi column | Long tiny text |   Base   |    Overall     |
+|:----------|:--------:|:--------------:|:--------:|:---------:|:-------------------:|:------------:|:--------------:|:--------:|:--------------:|
+| Datalab Chandra v0.1.0 |   81.4   |    **80.3**    | **89.4** | **50.0**  |        88.3         |   **81.0**   |    **91.6**    | **99.9** | **82.7 ± 0.9** |
+| Datalab Marker v1.10.0 | **83.8** |      69.7      |   74.8   |   32.3    |        86.6         |     79.4     |      85.7      |   99.6   |   76.5 ± 1.0   |
+| Mistral OCR API |   77.2   |      67.5      |   60.6   |   29.3    |        93.6         |     71.3     |      77.1      |   99.4   |   72.0 ± 1.1   |
+| Deepseek OCR |   75.2   |      67.9      |   79.1   |   32.9    |        96.1         |     66.3     |      78.5      |   97.7   |   74.2 ± 1.0   |
+| GPT-4o (Anchored) |   53.5   |      74.5      |   70.0   |   40.7    |        93.8         |     69.3     |      60.6      |   96.8   |   69.9 ± 1.1   |
+| Gemini Flash 2 (Anchored) |   54.5   |      56.1      |   72.1   |   34.2    |        64.7         |     61.5     |      71.5      |   95.6   |   63.8 ± 1.2   |
+| Qwen 3 VL |   70.2   |      75.1      |   45.6   |   37.5    |        89.1         |     62.1     |      43.0      |   94.3   |   64.6 ± 1.1   |
+| olmOCR v0.3.0 |   78.6   |      79.9      |   72.9   |   43.9    |      **95.1**       |     77.3     |      81.2      |   98.9   |   78.5 ± 1.1   |

-<img src="assets/benchmarks/bench.png" width="600px"/>
-
-See full scores [below](#benchmark-table).

 ## Examples

-<img src="assets/examples/forms/handwritten_form.png" width="600px"/>
-
 | Type | Name | Link |
 |------|------|------|
 | Tables | Water Damage Form | [View](https://github.com/datalab-to/chandra/blob/master/assets/examples/tables/water_damage.png) |
@@ -152,20 +156,6 @@ VLLM_MODEL_NAME=chandra
 VLLM_GPUS=0
 ```

-## Benchmark table
-
-| **Model**                 |  ArXiv   | Old Scans Math |  Tables  | Old Scans | Headers and Footers | Multi column | Long tiny text | Base |    Overall     | Source |
-|:--------------------------|:--------:|:--------------:|:--------:|:---------:|:-------------------:|:------------:|:--------------:|:----:|:--------------:|:------:|
-| Datalab Chandra v0.1.0    |   82.2   | **80.3** | **88.0** | **50.4**  |        90.8         |     81.2     |    **92.3**    | **99.9** | **83.1 ± 0.9** | Own benchmarks |
-| Datalab Marker v1.10.0    | **83.8** | 69.7 |   74.8   |   32.3    |        86.6         |     79.4     |      85.7      | 99.6 |   76.5 ± 1.0   | Own benchmarks |
-| Mistral OCR API           |   77.2   | 67.5 |   60.6   |   29.3    |        93.6         |     71.3     |      77.1      | 99.4 |   72.0 ± 1.1   | olmocr repo |
-| Deepseek OCR              |   75.2   | 72.3 |   79.7   |   33.3    |        96.1         |     66.7     |      80.1      | 99.7 |   75.4 ± 1.0   | Own benchmarks |
-| GPT-4o (Anchored)         |   53.5   | 74.5 |   70.0   |   40.7    |        93.8         |     69.3     |      60.6      | 96.8 |   69.9 ± 1.1   | olmocr repo |
-| Gemini Flash 2 (Anchored) |   54.5   | 56.1 |   72.1   |   34.2    |        64.7         |     61.5     |      71.5      | 95.6 |   63.8 ± 1.2   | olmocr repo |
-| Qwen 3 VL 8B              |   70.2   | 75.1 |   45.6   |   37.5    |        89.1         |     62.1     |      43.0      | 94.3 |   64.6 ± 1.1   | Own benchmarks |
-| olmOCR v0.3.0             |   78.6   | 79.9 |   72.9   |   43.9    |      **95.1**       |     77.3     |      81.2      | 98.9 |   78.5 ± 1.1   | olmocr repo |
-| dots.ocr                  |   82.1   | 64.2 |   88.3   |   40.9    |        94.1         |   **82.4**   |      81.2      | 99.5 |   79.1 ± 1.0   | dots.ocr repo |
-
 # Commercial usage

 This code is Apache 2.0, and our model weights use a modified OpenRAIL-M license (free for research, personal use, and startups under $2M funding/revenue, cannot be used competitively with our API). To remove the OpenRAIL license requirements, or for broader commercial licensing, visit our pricing page [here](https://www.datalab.to/pricing?utm_source=gh-chandra).
--- a/assets/benchmarks/bench.png
+++ b/assets/benchmarks/bench.png
--- a/chandra/model/hf.py
+++ b/chandra/model/hf.py
@@ -65,20 +65,12 @@ def process_batch_element(item: BatchInputItem, processor):


 def load_model():
-    device_map = "auto"
-    if settings.TORCH_DEVICE:
-        device_map = {"": settings.TORCH_DEVICE}
-
-    kwargs = {
-        "dtype": settings.TORCH_DTYPE,
-        "device_map": device_map,
-    }
-    if settings.TORCH_ATTN_IMPLEMENTATION:
-        kwargs["attn_implementation"] = settings.TORCH_ATTN_IMPLEMENTATION
-
    model = Qwen3VLForConditionalGeneration.from_pretrained(
-        settings.MODEL_CHECKPOINT, **kwargs
-    )
+        settings.MODEL_CHECKPOINT,
+        dtype=settings.TORCH_DTYPE,
+        device_map="auto",
+        attn_implementation=settings.TORCH_ATTN_IMPLEMENTATION,
+    ).to(settings.TORCH_DEVICE_MODEL)
    model = model.eval()
    processor = Qwen3VLProcessor.from_pretrained(settings.MODEL_CHECKPOINT)
    model.processor = processor
--- a/chandra/settings.py
+++ b/chandra/settings.py
@@ -10,10 +10,10 @@ class Settings(BaseSettings):
    BASE_DIR: str = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    IMAGE_DPI: int = 192
    MIN_IMAGE_DIM: int = 1024
-    MODEL_CHECKPOINT: str = "datalab-to/chandra"
+    MODEL_CHECKPOINT: str = "datalab-to/chandra-0.2.8"
    TORCH_DEVICE: str | None = None
    MAX_OUTPUT_TOKENS: int = 8192
-    TORCH_ATTN_IMPLEMENTATION: str | None = None
+    TORCH_ATTN: str | None = None

    # vLLM server settings
    VLLM_API_KEY: str = "EMPTY"
@@ -42,6 +42,17 @@ class Settings(BaseSettings):
    def TORCH_DTYPE(self) -> torch.dtype:
        return torch.bfloat16

+    @computed_field
+    @property
+    def TORCH_ATTN_IMPLEMENTATION(self) -> str:
+        if self.TORCH_ATTN is not None:
+            return self.TORCH_ATTN
+
+        if self.TORCH_DEVICE_MODEL == "cuda":
+            return "flash_attention_2"
+        else:
+            return "sdpa"
+
    class Config:
        env_file = find_dotenv("local.env")
        extra = "ignore"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "chandra-ocr"
-version = "0.1.2"
+version = "0.1.0"
 description = "OCR model that converts documents to markdown, HTML, or JSON."
 readme = "README.md"
 requires-python = ">=3.10"