mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-04-07 21:38:21 +00:00
fix(claude): clamp max_tokens to model limit in normalizeClaudeBudget
When adjustedBudget < minBudget, the previous fix blindly set max_tokens = budgetTokens+1 which could exceed MaxCompletionTokens. Now: cap max_tokens at MaxCompletionTokens, recalculate budget, and disable thinking entirely if constraints are unsatisfiable. Add unit tests covering raise, clamp, disable, and no-op scenarios.
This commit is contained in:
@@ -174,7 +174,8 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
|
||||
// Ensure the request satisfies Claude constraints:
|
||||
// 1) Determine effective max_tokens (request overrides model default)
|
||||
// 2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
|
||||
// 3) If the adjusted budget falls below the model minimum, leave the request unchanged
|
||||
// 3) If the adjusted budget falls below the model minimum, try raising max_tokens
|
||||
// (clamped to MaxCompletionTokens); disable thinking if constraints are unsatisfiable
|
||||
// 4) If max_tokens came from model default, write it back into the request
|
||||
|
||||
effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
|
||||
@@ -193,10 +194,28 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
|
||||
minBudget = modelInfo.Thinking.Min
|
||||
}
|
||||
if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
|
||||
// If enforcing the max_tokens constraint would push the budget below the model minimum,
|
||||
// increase max_tokens to accommodate the original budget instead of leaving the
|
||||
// request unchanged (which would cause a 400 error from the API).
|
||||
body, _ = sjson.SetBytes(body, "max_tokens", budgetTokens+1)
|
||||
// Enforcing budget_tokens < max_tokens pushed the budget below the model minimum.
|
||||
// Try raising max_tokens to fit the original budget.
|
||||
needed := budgetTokens + 1
|
||||
maxAllowed := 0
|
||||
if modelInfo != nil {
|
||||
maxAllowed = modelInfo.MaxCompletionTokens
|
||||
}
|
||||
if maxAllowed > 0 && needed > maxAllowed {
|
||||
// Cannot use original budget; cap max_tokens at model limit.
|
||||
needed = maxAllowed
|
||||
}
|
||||
cappedBudget := needed - 1
|
||||
if cappedBudget < minBudget {
|
||||
// Impossible to satisfy both budget >= minBudget and budget < max_tokens
|
||||
// within the model's completion limit. Disable thinking entirely.
|
||||
body, _ = sjson.DeleteBytes(body, "thinking")
|
||||
return body
|
||||
}
|
||||
body, _ = sjson.SetBytes(body, "max_tokens", needed)
|
||||
if cappedBudget != budgetTokens {
|
||||
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", cappedBudget)
|
||||
}
|
||||
return body
|
||||
}
|
||||
|
||||
|
||||
99
internal/thinking/provider/claude/apply_test.go
Normal file
99
internal/thinking/provider/claude/apply_test.go
Normal file
@@ -0,0 +1,99 @@
|
||||
package claude
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
func TestNormalizeClaudeBudget_RaisesMaxTokens(t *testing.T) {
|
||||
a := &Applier{}
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||
}
|
||||
body := []byte(`{"max_tokens":1000,"thinking":{"type":"enabled","budget_tokens":5000}}`)
|
||||
|
||||
out := a.normalizeClaudeBudget(body, 5000, modelInfo)
|
||||
|
||||
maxTok := gjson.GetBytes(out, "max_tokens").Int()
|
||||
if maxTok != 5001 {
|
||||
t.Fatalf("max_tokens = %d, want 5001, body=%s", maxTok, string(out))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeBudget_ClampsToModelMax(t *testing.T) {
|
||||
a := &Applier{}
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||
}
|
||||
body := []byte(`{"max_tokens":500,"thinking":{"type":"enabled","budget_tokens":200000}}`)
|
||||
|
||||
out := a.normalizeClaudeBudget(body, 200000, modelInfo)
|
||||
|
||||
maxTok := gjson.GetBytes(out, "max_tokens").Int()
|
||||
if maxTok != 64000 {
|
||||
t.Fatalf("max_tokens = %d, want 64000 (capped to model limit), body=%s", maxTok, string(out))
|
||||
}
|
||||
budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
|
||||
if budget != 63999 {
|
||||
t.Fatalf("budget_tokens = %d, want 63999 (max_tokens-1), body=%s", budget, string(out))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeBudget_DisablesThinkingWhenUnsatisfiable(t *testing.T) {
|
||||
a := &Applier{}
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
MaxCompletionTokens: 1000,
|
||||
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||
}
|
||||
body := []byte(`{"max_tokens":500,"thinking":{"type":"enabled","budget_tokens":2000}}`)
|
||||
|
||||
out := a.normalizeClaudeBudget(body, 2000, modelInfo)
|
||||
|
||||
if gjson.GetBytes(out, "thinking").Exists() {
|
||||
t.Fatalf("thinking should be removed when constraints are unsatisfiable, body=%s", string(out))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeBudget_NoClamping(t *testing.T) {
|
||||
a := &Applier{}
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
MaxCompletionTokens: 64000,
|
||||
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||
}
|
||||
body := []byte(`{"max_tokens":32000,"thinking":{"type":"enabled","budget_tokens":16000}}`)
|
||||
|
||||
out := a.normalizeClaudeBudget(body, 16000, modelInfo)
|
||||
|
||||
maxTok := gjson.GetBytes(out, "max_tokens").Int()
|
||||
if maxTok != 32000 {
|
||||
t.Fatalf("max_tokens should remain 32000, got %d, body=%s", maxTok, string(out))
|
||||
}
|
||||
budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
|
||||
if budget != 16000 {
|
||||
t.Fatalf("budget_tokens should remain 16000, got %d, body=%s", budget, string(out))
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeClaudeBudget_AdjustsBudgetToMaxMinus1(t *testing.T) {
|
||||
a := &Applier{}
|
||||
modelInfo := ®istry.ModelInfo{
|
||||
MaxCompletionTokens: 8192,
|
||||
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||
}
|
||||
body := []byte(`{"max_tokens":8192,"thinking":{"type":"enabled","budget_tokens":10000}}`)
|
||||
|
||||
out := a.normalizeClaudeBudget(body, 10000, modelInfo)
|
||||
|
||||
maxTok := gjson.GetBytes(out, "max_tokens").Int()
|
||||
if maxTok != 8192 {
|
||||
t.Fatalf("max_tokens = %d, want 8192 (unchanged), body=%s", maxTok, string(out))
|
||||
}
|
||||
budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
|
||||
if budget != 8191 {
|
||||
t.Fatalf("budget_tokens = %d, want 8191 (max_tokens-1), body=%s", budget, string(out))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user