From 5ef2d59e0591bf9e5f683a0bc769b3ec59bbd973 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sat, 13 Dec 2025 08:18:06 +0800 Subject: [PATCH] fix(thinking): gate reasoning effort by model support Only map OpenAI reasoning effort to Claude thinking for models that support thinking and use budget tokens (not level-based thinking). Also add "xhigh" effort mapping and adjust minimal/low budgets, with new raw-payload conversion tests across protocols and models. --- .../chat-completions/claude_openai_request.go | 5 +- .../claude_openai-responses_request.go | 9 +- test/thinking_conversion_test.go | 273 +++++++++++++++++- 3 files changed, 281 insertions(+), 6 deletions(-) diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_request.go b/internal/translator/claude/openai/chat-completions/claude_openai_request.go index b3384ecc..9825c661 100644 --- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go +++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go @@ -16,6 +16,7 @@ import ( "strings" "github.com/google/uuid" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -65,7 +66,7 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream root := gjson.ParseBytes(rawJSON) - if v := root.Get("reasoning_effort"); v.Exists() { + if v := root.Get("reasoning_effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { out, _ = sjson.Set(out, "thinking.type", "enabled") switch v.String() { @@ -77,6 +78,8 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream out, _ = sjson.Set(out, "thinking.budget_tokens", 8192) case "high": out, _ = sjson.Set(out, "thinking.budget_tokens", 24576) + case "xhigh": + out, _ = sjson.Set(out, "thinking.budget_tokens", 32768) } } diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_request.go b/internal/translator/claude/openai/responses/claude_openai-responses_request.go index 764bb5c9..4a19bb92 100644 --- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go +++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go @@ -10,6 +10,7 @@ import ( "strings" "github.com/google/uuid" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -52,20 +53,22 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte root := gjson.ParseBytes(rawJSON) - if v := root.Get("reasoning.effort"); v.Exists() { + if v := root.Get("reasoning.effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { out, _ = sjson.Set(out, "thinking.type", "enabled") switch v.String() { case "none": out, _ = sjson.Set(out, "thinking.type", "disabled") case "minimal": - out, _ = sjson.Set(out, "thinking.budget_tokens", 1024) + out, _ = sjson.Set(out, "thinking.budget_tokens", 512) case "low": - out, _ = sjson.Set(out, "thinking.budget_tokens", 4096) + out, _ = sjson.Set(out, "thinking.budget_tokens", 1024) case "medium": out, _ = sjson.Set(out, "thinking.budget_tokens", 8192) case "high": out, _ = sjson.Set(out, "thinking.budget_tokens", 24576) + case "xhigh": + out, _ = sjson.Set(out, "thinking.budget_tokens", 32768) } } diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go index c2f4aa8d..a1462611 100644 --- a/test/thinking_conversion_test.go +++ b/test/thinking_conversion_test.go @@ -81,8 +81,10 @@ func applyReasoningEffortMetadataLocal(payload []byte, metadata map[string]any, return payload } if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if updated, err := sjson.SetBytes(payload, field, effort); err == nil { - return updated + if util.ModelUsesThinkingLevels(model) { + if updated, err := sjson.SetBytes(payload, field, effort); err == nil { + return updated + } } } if util.ModelUsesThinkingLevels(model) { @@ -523,6 +525,273 @@ func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) { } } +// buildRawPayloadWithThinking creates a payload with thinking parameters already in the body. +// This tests the path where thinking comes from the raw payload, not model suffix. +func buildRawPayloadWithThinking(fromProtocol, model string, thinkingParam any) []byte { + switch fromProtocol { + case "gemini": + base := fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, model) + if budget, ok := thinkingParam.(int); ok { + base, _ = sjson.Set(base, "generationConfig.thinkingConfig.thinkingBudget", budget) + } + return []byte(base) + case "openai-response": + base := fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, model) + if effort, ok := thinkingParam.(string); ok && effort != "" { + base, _ = sjson.Set(base, "reasoning.effort", effort) + } + return []byte(base) + case "openai": + base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model) + if effort, ok := thinkingParam.(string); ok && effort != "" { + base, _ = sjson.Set(base, "reasoning_effort", effort) + } + return []byte(base) + case "claude": + base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model) + if budget, ok := thinkingParam.(int); ok && budget > 0 { + base, _ = sjson.Set(base, "thinking.type", "enabled") + base, _ = sjson.Set(base, "thinking.budget_tokens", budget) + } + return []byte(base) + default: + return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)) + } +} + +// buildBodyForProtocolWithRawThinking translates payload with raw thinking params. +func buildBodyForProtocolWithRawThinking(t *testing.T, fromProtocol, toProtocol, model string, thinkingParam any) ([]byte, error) { + t.Helper() + raw := buildRawPayloadWithThinking(fromProtocol, model, thinkingParam) + stream := fromProtocol != toProtocol + + body := sdktranslator.TranslateRequest( + sdktranslator.FromString(fromProtocol), + sdktranslator.FromString(toProtocol), + model, + raw, + stream, + ) + + var err error + switch toProtocol { + case "gemini": + body = util.ApplyDefaultThinkingIfNeeded(model, body) + body = util.NormalizeGeminiThinkingBudget(model, body) + body = util.StripThinkingConfigIfUnsupported(model, body) + case "claude": + // For raw payload, Claude thinking is passed through by translator + // No additional processing needed as thinking is already in body + case "openai": + body = normalizeThinkingConfigLocal(body, model) + err = validateThinkingConfigLocal(body, model) + case "codex": + body, err = normalizeCodexPayload(body, model) + } + + body, _ = sjson.SetBytes(body, "model", model) + body = filterThinkingBody(toProtocol, body, model, model) + return body, err +} + +func TestRawPayloadThinkingConversions(t *testing.T) { + cleanup := registerCoreModels(t) + defer cleanup() + + models := []string{ + "gpt-5", // supports levels (low/medium/high) + "gemini-2.5-pro", // supports numeric budget + "qwen3-coder-flash", // no thinking support + } + fromProtocols := []string{"openai", "claude", "gemini", "openai-response"} + toProtocols := []string{"gemini", "claude", "openai", "codex"} + + type scenario struct { + name string + thinkingParam any // int for budget, string for effort level + } + + for _, model := range models { + supportsThinking := util.ModelSupportsThinking(model) + usesLevels := util.ModelUsesThinkingLevels(model) + + for _, from := range fromProtocols { + var cases []scenario + switch from { + case "openai", "openai-response": + cases = []scenario{ + {name: "no-thinking", thinkingParam: nil}, + {name: "effort-low", thinkingParam: "low"}, + {name: "effort-medium", thinkingParam: "medium"}, + {name: "effort-high", thinkingParam: "high"}, + {name: "effort-invalid-xhigh", thinkingParam: "xhigh"}, + {name: "effort-invalid-foo", thinkingParam: "foo"}, + } + case "gemini": + cases = []scenario{ + {name: "no-thinking", thinkingParam: nil}, + {name: "budget-1024", thinkingParam: 1024}, + {name: "budget-8192", thinkingParam: 8192}, + {name: "budget-16384", thinkingParam: 16384}, + } + case "claude": + cases = []scenario{ + {name: "no-thinking", thinkingParam: nil}, + {name: "budget-1024", thinkingParam: 1024}, + {name: "budget-8192", thinkingParam: 8192}, + {name: "budget-16384", thinkingParam: 16384}, + } + } + + for _, to := range toProtocols { + if from == to { + continue + } + t.Logf("═══════════════════════════════════════════════════════════════════════════════") + t.Logf(" RAW PAYLOAD: %s -> %s | model: %s", from, to, model) + t.Logf("═══════════════════════════════════════════════════════════════════════════════") + + for _, cs := range cases { + from := from + to := to + cs := cs + testName := fmt.Sprintf("raw/%s->%s/%s/%s", from, to, model, cs.name) + t.Run(testName, func(t *testing.T) { + expectPresent, expectValue, expectErr := func() (bool, string, bool) { + if cs.thinkingParam == nil { + // No thinking param provided + if to == "codex" && from != "openai-response" { + // Codex translators default to medium + if supportsThinking && usesLevels { + return true, "medium", false + } + } + return false, "", false + } + if !supportsThinking { + return false, "", false + } + + switch to { + case "gemini": + // Gemini expects numeric budget + if budget, ok := cs.thinkingParam.(int); ok { + norm := util.NormalizeThinkingBudget(model, budget) + return true, fmt.Sprintf("%d", norm), false + } + if effort, ok := cs.thinkingParam.(string); ok && effort != "" { + if b, okB := util.ThinkingEffortToBudget(model, effort); okB { + return true, fmt.Sprintf("%d", b), false + } + } + return false, "", false + case "claude": + // Claude expects numeric budget + if budget, ok := cs.thinkingParam.(int); ok && budget > 0 { + norm := util.NormalizeThinkingBudget(model, budget) + return true, fmt.Sprintf("%d", norm), false + } + if effort, ok := cs.thinkingParam.(string); ok && effort != "" { + if b, okB := util.ThinkingEffortToBudget(model, effort); okB && b > 0 { + return true, fmt.Sprintf("%d", b), false + } + } + return false, "", false + case "openai": + if !usesLevels { + return false, "", false + } + if effort, ok := cs.thinkingParam.(string); ok && effort != "" { + if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN { + return true, normalized, false + } + return false, "", true // invalid level + } + if budget, ok := cs.thinkingParam.(int); ok { + if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" { + return true, mapped, false + } + } + return false, "", false + case "codex": + if !usesLevels { + return false, "", false + } + if effort, ok := cs.thinkingParam.(string); ok && effort != "" { + if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN { + return true, normalized, false + } + return false, "", true + } + if budget, ok := cs.thinkingParam.(int); ok { + if mapped, okM := util.OpenAIThinkingBudgetToEffort(model, budget); okM && mapped != "" { + return true, mapped, false + } + } + // thinkingParam was non-nil but couldn't map - no default medium + return false, "", false + } + return false, "", false + }() + + body, err := buildBodyForProtocolWithRawThinking(t, from, to, model, cs.thinkingParam) + actualPresent, actualValue := func() (bool, string) { + path := "" + switch to { + case "gemini": + path = "generationConfig.thinkingConfig.thinkingBudget" + case "claude": + path = "thinking.budget_tokens" + case "openai": + path = "reasoning_effort" + case "codex": + path = "reasoning.effort" + } + if path == "" { + return false, "" + } + val := gjson.GetBytes(body, path) + if to == "codex" && !val.Exists() { + reasoning := gjson.GetBytes(body, "reasoning") + if reasoning.Exists() { + val = reasoning.Get("effort") + } + } + if !val.Exists() { + return false, "" + } + if val.Type == gjson.Number { + return true, fmt.Sprintf("%d", val.Int()) + } + return true, val.String() + }() + + t.Logf("from=%s to=%s model=%s param=%v present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s", + from, to, model, cs.thinkingParam, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body)) + + if expectErr { + if err == nil { + t.Fatalf("expected validation error but got none, body=%s", string(body)) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v body=%s", err, string(body)) + } + + if expectPresent != actualPresent { + t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body)) + } + if expectPresent && expectValue != actualValue { + t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body)) + } + }) + } + } + } + } +} + func TestOpenAIThinkingBudgetToEffortRanges(t *testing.T) { cleanup := registerCoreModels(t) defer cleanup()