From 47bd444f20b629f6455d635a3303ae0f8eba62ca Mon Sep 17 00:00:00 2001
From: Vik Paruchuri <vik.paruchuri@gmail.com>
Date: Tue, 21 Oct 2025 12:11:37 -0400
Subject: [PATCH] Code cleanup

---
 README.md             | 11 ++++++-----
 chandra/model/util.py |  9 ++++-----
 chandra/settings.py   | 15 ---------------
 3 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index c8a8044..969b236 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # Chandra
 
-Chandra is an OCR model that converts images and PDFs into structured HTML/Markdown/JSON while preserving layout information.
+Chandra is a highly accurate OCR model that converts images and PDFs into structured HTML/Markdown/JSON while preserving layout information.
 
 ## Features
 
@@ -154,7 +154,11 @@ VLLM_MODEL_NAME=chandra
 VLLM_GPUS=0
 ```
 
-## Benchmark table
+# Commercial usage
+
+This code is Apache 2.0, and our model weights use a modified OpenRAIL-M license (free for research, personal use, and startups under $2M funding/revenue, cannot be used competitively with our API). To remove the OpenRAIL license requirements, or for broader commercial licensing, visit our pricing page [here](https://www.datalab.to/pricing?utm_source=gh-chandra).
+
+# Benchmark table
 
 | **Model**                 |  ArXiv   | Old Scans Math |  Tables  | Old Scans | Headers and Footers | Multi column | Long tiny text | Base |    Overall     | Source |
 |:--------------------------|:--------:|:--------------:|:--------:|:---------:|:-------------------:|:------------:|:--------------:|:----:|:--------------:|:------:|
@@ -168,9 +172,6 @@ VLLM_GPUS=0
 | olmOCR v0.3.0             |   78.6   | 79.9 |   72.9   |   43.9    |      **95.1**       |     77.3     |      81.2      | 98.9 |   78.5 ± 1.1   | olmocr repo |
 | dots.ocr                  |   82.1   | 64.2 |   88.3   |   40.9    |        94.1         |   **82.4**   |      81.2      | 99.5 |   79.1 ± 1.0   | dots.ocr repo |
 
-# Commercial usage
-
-This code is Apache 2.0, and our model weights use a modified OpenRAIL-M license (free for research, personal use, and startups under $2M funding/revenue, cannot be used competitively with our API). To remove the OpenRAIL license requirements, or for broader commercial licensing, visit our pricing page [here](https://www.datalab.to/pricing?utm_source=gh-chandra).
 
 # Credits
 
diff --git a/chandra/model/util.py b/chandra/model/util.py
index 819fb5b..d43c1c6 100644
--- a/chandra/model/util.py
+++ b/chandra/model/util.py
@@ -43,7 +43,10 @@ def scale_to_fit(
 
 
 def detect_repeat_token(
-    predicted_tokens: str, max_repeats: int = 4, window_size: int = 500, cut_from_end: int = 0
+    predicted_tokens: str,
+    max_repeats: int = 4,
+    window_size: int = 500,
+    cut_from_end: int = 0,
 ):
     try:
         predicted_tokens = parse_markdown(predicted_tokens)
@@ -77,7 +80,3 @@ def detect_repeat_token(
             return True
 
     return False
-
-
-def layout_failed(predicted_tokens: str, image: Image.Image):
-    pass
diff --git a/chandra/settings.py b/chandra/settings.py
index 2c59ec3..d9a9898 100644
--- a/chandra/settings.py
+++ b/chandra/settings.py
@@ -22,21 +22,6 @@ class Settings(BaseSettings):
     VLLM_GPUS: str = "0"
     MAX_VLLM_RETRIES: int = 6
 
-    # Transformers settings
-    @computed_field
-    @property
-    def TORCH_DEVICE_MODEL(self) -> str:
-        if self.TORCH_DEVICE is not None:
-            return self.TORCH_DEVICE
-
-        if torch.cuda.is_available():
-            return "cuda"
-
-        if torch.backends.mps.is_available():
-            return "mps"
-
-        return "cpu"
-
     @computed_field
     @property
     def TORCH_DTYPE(self) -> torch.dtype: