diff --git a/chandra/model/vllm.py b/chandra/model/vllm.py index 4e36f0e..5528571 100644 --- a/chandra/model/vllm.py +++ b/chandra/model/vllm.py @@ -71,7 +71,7 @@ def generate_vllm( completion = client.chat.completions.create( model=model_name, messages=[{"role": "user", "content": content}], - max_tokens=settings.MAX_OUTPUT_TOKENS, + max_tokens=max_output_tokens, temperature=temperature, top_p=top_p, )