Fix missing setting

Fix attn impl
2025-11-29 08:33:13 +00:00 · 2025-10-21 11:13:26 -04:00 · 2025-10-21 11:01:02 -04:00
6 changed files with 7 additions and 17 deletions
--- a/README.md
+++ b/README.md
@@ -73,8 +73,6 @@ See full scores [below](#benchmark-table).
 pip install chandra-ocr
 ```

-If you're going to use the huggingface method, we also recommend installing [flash attention](https://github.com/Dao-AILab/flash-attention).
-
 ### From Source

 ```bash
--- a/chandra/model/hf.py
+++ b/chandra/model/hf.py
@@ -73,8 +73,8 @@ def load_model():
        "dtype": settings.TORCH_DTYPE,
        "device_map": device_map,
    }
-    if settings.TORCH_ATTN:
-        kwargs["attn_implementation"] = settings.TORCH_ATTN
+    if settings.TORCH_ATTN_IMPLEMENTATION:
+        kwargs["attn_implementation"] = settings.TORCH_ATTN_IMPLEMENTATION

    model = Qwen3VLForConditionalGeneration.from_pretrained(
        settings.MODEL_CHECKPOINT, **kwargs
--- a/chandra/scripts/cli.py
+++ b/chandra/scripts/cli.py
@@ -172,7 +172,7 @@ def save_merged_output(
@click.option(
    "--batch-size",
    type=int,
-    default=None,
+    default=1,
    help="Number of pages to process in a batch.",
 )
@click.option(
@@ -194,16 +194,6 @@ def main(
    batch_size: int,
    paginate_output: bool,
 ):
-    if method == "hf":
-        click.echo(
-            "When using '--method hf', ensure that the batch size is set correctly.  We will default to batch size of 1."
-        )
-        if batch_size is None:
-            batch_size = 1
-    elif method == "vllm":
-        if batch_size is None:
-            batch_size = 28
-
    click.echo("Chandra CLI - Starting OCR processing")
    click.echo(f"Input: {input_path}")
    click.echo(f"Output: {output_path}")
--- a/chandra/scripts/vllm.py
+++ b/chandra/scripts/vllm.py
@@ -17,6 +17,8 @@ def main():
        "-v",
        f"{os.path.expanduser('~')}/.cache/huggingface:/root/.cache/huggingface",
        "--env",
+        f"HUGGING_FACE_HUB_TOKEN={os.getenv('HF_TOKEN')}",
+        "--env",
        "VLLM_ATTENTION_BACKEND=TORCH_SDPA",
        "-p",
        "8000:8000",
--- a/chandra/settings.py
+++ b/chandra/settings.py
@@ -13,7 +13,7 @@ class Settings(BaseSettings):
    MODEL_CHECKPOINT: str = "datalab-to/chandra"
    TORCH_DEVICE: str | None = None
    MAX_OUTPUT_TOKENS: int = 8192
-    TORCH_ATTN: str | None = None
+    TORCH_ATTN_IMPLEMENTATION: str | None = None

    # vLLM server settings
    VLLM_API_KEY: str = "EMPTY"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "chandra-ocr"
-version = "0.1.5"
+version = "0.1.2"
 description = "OCR model that converts documents to markdown, HTML, or JSON."
 readme = "README.md"
 requires-python = ">=3.10"
Author	SHA1	Message	Date
Tarun Menta	eaa31d169d	Fix missing setting	2025-10-21 11:13:26 -04:00
Vik Paruchuri	63c88d644d	Fix attn impl	2025-10-21 11:01:02 -04:00