From d1cde9b608d6a61f5b96cf139240973d7ca9c70b Mon Sep 17 00:00:00 2001 From: Zach Nussbaum Date: Tue, 4 Nov 2025 13:16:57 -0500 Subject: [PATCH] fix: respect max output tokens --- chandra/model/vllm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chandra/model/vllm.py b/chandra/model/vllm.py index 4e36f0e..5528571 100644 --- a/chandra/model/vllm.py +++ b/chandra/model/vllm.py @@ -71,7 +71,7 @@ def generate_vllm( completion = client.chat.completions.create( model=model_name, messages=[{"role": "user", "content": content}], - max_tokens=settings.MAX_OUTPUT_TOKENS, + max_tokens=max_output_tokens, temperature=temperature, top_p=top_p, )