Merge pull request #24 from datalab-to/tokens

fix: respect max output tokens
This commit is contained in:
Vik Paruchuri
2025-11-04 13:19:06 -05:00
committed by GitHub

View File

@@ -71,7 +71,7 @@ def generate_vllm(
completion = client.chat.completions.create(
model=model_name,
messages=[{"role": "user", "content": content}],
max_tokens=settings.MAX_OUTPUT_TOKENS,
max_tokens=max_output_tokens,
temperature=temperature,
top_p=top_p,
)