3 Commits

Author SHA1 Message Date
Vik Paruchuri
2151833414 Fix file output dir 2025-10-21 11:54:05 -04:00
Vik Paruchuri
8c1bfe277f Set proper batch sizes 2025-10-21 11:43:09 -04:00
Vik Paruchuri
ad6508fbc3 Fix vllm token 2025-10-21 11:33:56 -04:00
3 changed files with 13 additions and 5 deletions

View File

@@ -87,7 +87,7 @@ def save_merged_output(
# Save extracted images if requested
if save_images and result.images:
images_dir = file_output_dir / "images"
images_dir = file_output_dir
images_dir.mkdir(exist_ok=True)
for img_name, pil_image in result.images.items():
@@ -172,7 +172,7 @@ def save_merged_output(
@click.option(
"--batch-size",
type=int,
default=1,
default=None,
help="Number of pages to process in a batch.",
)
@click.option(
@@ -194,6 +194,16 @@ def main(
batch_size: int,
paginate_output: bool,
):
if method == "hf":
click.echo(
"When using '--method hf', ensure that the batch size is set correctly. We will default to batch size of 1."
)
if batch_size is None:
batch_size = 1
elif method == "vllm":
if batch_size is None:
batch_size = 28
click.echo("Chandra CLI - Starting OCR processing")
click.echo(f"Input: {input_path}")
click.echo(f"Output: {output_path}")

View File

@@ -17,8 +17,6 @@ def main():
"-v",
f"{os.path.expanduser('~')}/.cache/huggingface:/root/.cache/huggingface",
"--env",
f"HUGGING_FACE_HUB_TOKEN={os.getenv('HF_TOKEN')}",
"--env",
"VLLM_ATTENTION_BACKEND=TORCH_SDPA",
"-p",
"8000:8000",

View File

@@ -1,6 +1,6 @@
[project]
name = "chandra-ocr"
version = "0.1.3"
version = "0.1.6"
description = "OCR model that converts documents to markdown, HTML, or JSON."
readme = "README.md"
requires-python = ">=3.10"