diff --git a/internal/registry/model_definitions_static_data.go b/internal/registry/model_definitions_static_data.go index f70d3984..dcf5debf 100644 --- a/internal/registry/model_definitions_static_data.go +++ b/internal/registry/model_definitions_static_data.go @@ -37,7 +37,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4.6 Sonnet", ContextLength: 200000, MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false, Levels: []string{"low", "medium", "high"}}, }, { ID: "claude-opus-4-6", @@ -49,7 +49,7 @@ func GetClaudeModels() []*ModelInfo { Description: "Premium model combining maximum intelligence with practical performance", ContextLength: 1000000, MaxCompletionTokens: 128000, - Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false, Levels: []string{"low", "medium", "high", "max"}}, }, { ID: "claude-opus-4-5-20251101", diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 75ea04e1..805d31dd 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -634,6 +634,12 @@ func disableThinkingIfToolChoiceForced(body []byte) []byte { if toolChoiceType == "any" || toolChoiceType == "tool" { // Remove thinking configuration entirely to avoid API error body, _ = sjson.DeleteBytes(body, "thinking") + // Adaptive thinking may also set output_config.effort; remove it to avoid + // leaking thinking controls when tool_choice forces tool use. + body, _ = sjson.DeleteBytes(body, "output_config.effort") + if oc := gjson.GetBytes(body, "output_config"); oc.Exists() && oc.IsObject() && len(oc.Map()) == 0 { + body, _ = sjson.DeleteBytes(body, "output_config") + } } return body } diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go index 8a5a1d7d..16f1a2f9 100644 --- a/internal/thinking/apply.go +++ b/internal/thinking/apply.go @@ -353,6 +353,26 @@ func extractClaudeConfig(body []byte) ThinkingConfig { if thinkingType == "disabled" { return ThinkingConfig{Mode: ModeNone, Budget: 0} } + if thinkingType == "adaptive" || thinkingType == "auto" { + // Claude adaptive thinking uses output_config.effort (low/medium/high/max). + // We only treat it as a thinking config when effort is explicitly present; + // otherwise we passthrough and let upstream defaults apply. + if effort := gjson.GetBytes(body, "output_config.effort"); effort.Exists() && effort.Type == gjson.String { + value := strings.ToLower(strings.TrimSpace(effort.String())) + if value == "" { + return ThinkingConfig{} + } + switch value { + case "none": + return ThinkingConfig{Mode: ModeNone, Budget: 0} + case "auto": + return ThinkingConfig{Mode: ModeAuto, Budget: -1} + default: + return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)} + } + } + return ThinkingConfig{} + } // Check budget_tokens if budget := gjson.GetBytes(body, "thinking.budget_tokens"); budget.Exists() { diff --git a/internal/thinking/convert.go b/internal/thinking/convert.go index 776ccef6..8374ddbb 100644 --- a/internal/thinking/convert.go +++ b/internal/thinking/convert.go @@ -16,6 +16,9 @@ var levelToBudgetMap = map[string]int{ "medium": 8192, "high": 24576, "xhigh": 32768, + // "max" is used by Claude adaptive thinking effort. We map it to a large budget + // and rely on per-model clamping when converting to budget-only providers. + "max": 128000, } // ConvertLevelToBudget converts a thinking level to a budget value. @@ -31,6 +34,7 @@ var levelToBudgetMap = map[string]int{ // - medium → 8192 // - high → 24576 // - xhigh → 32768 +// - max → 128000 // // Returns: // - budget: The converted budget value diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go index 3c74d514..275be469 100644 --- a/internal/thinking/provider/claude/apply.go +++ b/internal/thinking/provider/claude/apply.go @@ -1,8 +1,10 @@ // Package claude implements thinking configuration scaffolding for Claude models. // -// Claude models use the thinking.budget_tokens format with values in the range -// 1024-128000. Some Claude models support ZeroAllowed (sonnet-4-5, opus-4-5), -// while older models do not. +// Claude models support two thinking control styles: +// - Manual thinking: thinking.type="enabled" with thinking.budget_tokens (token budget) +// - Adaptive thinking (Claude 4.6): thinking.type="adaptive" with output_config.effort (low/medium/high/max) +// +// Some Claude models support ZeroAllowed (sonnet-4-5, opus-4-5), while older models do not. // See: _bmad-output/planning-artifacts/architecture.md#Epic-6 package claude @@ -34,7 +36,11 @@ func init() { // - Budget clamping to model range // - ZeroAllowed constraint enforcement // -// Apply only processes ModeBudget and ModeNone; other modes are passed through unchanged. +// Apply processes: +// - ModeBudget: manual thinking budget_tokens +// - ModeLevel: adaptive thinking effort (Claude 4.6) +// - ModeAuto: provider default adaptive/manual behavior +// - ModeNone: disabled // // Expected output format when enabled: // @@ -45,6 +51,17 @@ func init() { // } // } // +// Expected output format for adaptive: +// +// { +// "thinking": { +// "type": "adaptive" +// }, +// "output_config": { +// "effort": "high" +// } +// } +// // Expected output format when disabled: // // { @@ -60,30 +77,91 @@ func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo * return body, nil } - // Only process ModeBudget and ModeNone; other modes pass through - // (caller should use ValidateConfig first to normalize modes) - if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone { - return body, nil - } - if len(body) == 0 || !gjson.ValidBytes(body) { body = []byte(`{}`) } - // Budget is expected to be pre-validated by ValidateConfig (clamped, ZeroAllowed enforced) - // Decide enabled/disabled based on budget value - if config.Budget == 0 { + supportsAdaptive := modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 + + switch config.Mode { + case thinking.ModeNone: result, _ := sjson.SetBytes(body, "thinking.type", "disabled") result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + result, _ = sjson.DeleteBytes(result, "output_config.effort") + if oc := gjson.GetBytes(result, "output_config"); oc.Exists() && oc.IsObject() && len(oc.Map()) == 0 { + result, _ = sjson.DeleteBytes(result, "output_config") + } return result, nil + + case thinking.ModeLevel: + // Adaptive thinking effort is only valid when the model advertises discrete levels. + // (Claude 4.6 uses output_config.effort.) + if supportsAdaptive && config.Level != "" { + result, _ := sjson.SetBytes(body, "thinking.type", "adaptive") + result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + result, _ = sjson.SetBytes(result, "output_config.effort", string(config.Level)) + return result, nil + } + + // Fallback for non-adaptive Claude models: convert level to budget_tokens. + if budget, ok := thinking.ConvertLevelToBudget(string(config.Level)); ok { + config.Mode = thinking.ModeBudget + config.Budget = budget + config.Level = "" + } else { + return body, nil + } + fallthrough + + case thinking.ModeBudget: + // Budget is expected to be pre-validated by ValidateConfig (clamped, ZeroAllowed enforced). + // Decide enabled/disabled based on budget value. + if config.Budget == 0 { + result, _ := sjson.SetBytes(body, "thinking.type", "disabled") + result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + result, _ = sjson.DeleteBytes(result, "output_config.effort") + if oc := gjson.GetBytes(result, "output_config"); oc.Exists() && oc.IsObject() && len(oc.Map()) == 0 { + result, _ = sjson.DeleteBytes(result, "output_config") + } + return result, nil + } + + result, _ := sjson.SetBytes(body, "thinking.type", "enabled") + result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget) + result, _ = sjson.DeleteBytes(result, "output_config.effort") + if oc := gjson.GetBytes(result, "output_config"); oc.Exists() && oc.IsObject() && len(oc.Map()) == 0 { + result, _ = sjson.DeleteBytes(result, "output_config") + } + + // Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint). + result = a.normalizeClaudeBudget(result, config.Budget, modelInfo) + return result, nil + + case thinking.ModeAuto: + // For Claude 4.6 models, auto maps to adaptive thinking with upstream defaults. + if supportsAdaptive { + result, _ := sjson.SetBytes(body, "thinking.type", "adaptive") + result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + // Explicit effort is optional for adaptive thinking; omit it to allow upstream default. + result, _ = sjson.DeleteBytes(result, "output_config.effort") + if oc := gjson.GetBytes(result, "output_config"); oc.Exists() && oc.IsObject() && len(oc.Map()) == 0 { + result, _ = sjson.DeleteBytes(result, "output_config") + } + return result, nil + } + + // Legacy fallback: enable thinking without specifying budget_tokens. + result, _ := sjson.SetBytes(body, "thinking.type", "enabled") + result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + result, _ = sjson.DeleteBytes(result, "output_config.effort") + if oc := gjson.GetBytes(result, "output_config"); oc.Exists() && oc.IsObject() && len(oc.Map()) == 0 { + result, _ = sjson.DeleteBytes(result, "output_config") + } + return result, nil + + default: + return body, nil } - - result, _ := sjson.SetBytes(body, "thinking.type", "enabled") - result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget) - - // Ensure max_tokens > thinking.budget_tokens (Anthropic API constraint) - result = a.normalizeClaudeBudget(result, config.Budget, modelInfo) - return result, nil } // normalizeClaudeBudget applies Claude-specific constraints to ensure max_tokens > budget_tokens. @@ -141,7 +219,7 @@ func (a *Applier) effectiveMaxTokens(body []byte, modelInfo *registry.ModelInfo) } func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) { - if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto && config.Mode != thinking.ModeLevel { return body, nil } @@ -153,14 +231,36 @@ func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, case thinking.ModeNone: result, _ := sjson.SetBytes(body, "thinking.type", "disabled") result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + result, _ = sjson.DeleteBytes(result, "output_config.effort") + if oc := gjson.GetBytes(result, "output_config"); oc.Exists() && oc.IsObject() && len(oc.Map()) == 0 { + result, _ = sjson.DeleteBytes(result, "output_config") + } return result, nil case thinking.ModeAuto: result, _ := sjson.SetBytes(body, "thinking.type", "enabled") result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + result, _ = sjson.DeleteBytes(result, "output_config.effort") + if oc := gjson.GetBytes(result, "output_config"); oc.Exists() && oc.IsObject() && len(oc.Map()) == 0 { + result, _ = sjson.DeleteBytes(result, "output_config") + } + return result, nil + case thinking.ModeLevel: + // For user-defined models, interpret ModeLevel as Claude adaptive thinking effort. + // Upstream is responsible for validating whether the target model supports it. + if config.Level == "" { + return body, nil + } + result, _ := sjson.SetBytes(body, "thinking.type", "adaptive") + result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + result, _ = sjson.SetBytes(result, "output_config.effort", string(config.Level)) return result, nil default: result, _ := sjson.SetBytes(body, "thinking.type", "enabled") result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget) + result, _ = sjson.DeleteBytes(result, "output_config.effort") + if oc := gjson.GetBytes(result, "output_config"); oc.Exists() && oc.IsObject() && len(oc.Map()) == 0 { + result, _ = sjson.DeleteBytes(result, "output_config") + } return result, nil } } diff --git a/internal/thinking/strip.go b/internal/thinking/strip.go index 514ab3f8..85498c01 100644 --- a/internal/thinking/strip.go +++ b/internal/thinking/strip.go @@ -30,7 +30,7 @@ func StripThinkingConfig(body []byte, provider string) []byte { var paths []string switch provider { case "claude": - paths = []string{"thinking"} + paths = []string{"thinking", "output_config.effort"} case "gemini": paths = []string{"generationConfig.thinkingConfig"} case "gemini-cli", "antigravity": @@ -59,5 +59,12 @@ func StripThinkingConfig(body []byte, provider string) []byte { for _, path := range paths { result, _ = sjson.DeleteBytes(result, path) } + + // Avoid leaving an empty output_config object for Claude when effort was the only field. + if provider == "claude" { + if oc := gjson.GetBytes(result, "output_config"); oc.Exists() && oc.IsObject() && len(oc.Map()) == 0 { + result, _ = sjson.DeleteBytes(result, "output_config") + } + } return result } diff --git a/internal/thinking/suffix.go b/internal/thinking/suffix.go index 275c0856..7f2959da 100644 --- a/internal/thinking/suffix.go +++ b/internal/thinking/suffix.go @@ -109,7 +109,7 @@ func ParseSpecialSuffix(rawSuffix string) (mode ThinkingMode, ok bool) { // ParseLevelSuffix attempts to parse a raw suffix as a discrete thinking level. // // This function parses the raw suffix content (from ParseSuffix.RawSuffix) as a level. -// Only discrete effort levels are valid: minimal, low, medium, high, xhigh. +// Only discrete effort levels are valid: minimal, low, medium, high, xhigh, max. // Level matching is case-insensitive. // // Special values (none, auto) are NOT handled by this function; use ParseSpecialSuffix @@ -140,6 +140,8 @@ func ParseLevelSuffix(rawSuffix string) (level ThinkingLevel, ok bool) { return LevelHigh, true case "xhigh": return LevelXHigh, true + case "max": + return LevelMax, true default: return "", false } diff --git a/internal/thinking/types.go b/internal/thinking/types.go index 6ae1e088..5e45fc6b 100644 --- a/internal/thinking/types.go +++ b/internal/thinking/types.go @@ -54,6 +54,9 @@ const ( LevelHigh ThinkingLevel = "high" // LevelXHigh sets extra-high thinking effort LevelXHigh ThinkingLevel = "xhigh" + // LevelMax sets maximum thinking effort. + // This is currently used by Claude 4.6 adaptive thinking (opus supports "max"). + LevelMax ThinkingLevel = "max" ) // ThinkingConfig represents a unified thinking configuration. diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go index f082ad56..7f5c57c5 100644 --- a/internal/thinking/validate.go +++ b/internal/thinking/validate.go @@ -201,7 +201,7 @@ func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupp } // standardLevelOrder defines the canonical ordering of thinking levels from lowest to highest. -var standardLevelOrder = []ThinkingLevel{LevelMinimal, LevelLow, LevelMedium, LevelHigh, LevelXHigh} +var standardLevelOrder = []ThinkingLevel{LevelMinimal, LevelLow, LevelMedium, LevelHigh, LevelXHigh, LevelMax} // clampLevel clamps the given level to the nearest supported level. // On tie, prefers the lower level. diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_request.go b/internal/translator/claude/openai/chat-completions/claude_openai_request.go index f94825b2..7155d1e0 100644 --- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go +++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go @@ -14,6 +14,7 @@ import ( "strings" "github.com/google/uuid" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/tidwall/gjson" "github.com/tidwall/sjson" @@ -68,17 +69,63 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream if v := root.Get("reasoning_effort"); v.Exists() { effort := strings.ToLower(strings.TrimSpace(v.String())) if effort != "" { - budget, ok := thinking.ConvertLevelToBudget(effort) - if ok { - switch budget { - case 0: + hasLevel := func(levels []string, target string) bool { + for _, level := range levels { + if strings.EqualFold(strings.TrimSpace(level), target) { + return true + } + } + return false + } + mi := registry.LookupModelInfo(modelName, "claude") + supportsAdaptive := mi != nil && mi.Thinking != nil && len(mi.Thinking.Levels) > 0 + supportsMax := supportsAdaptive && hasLevel(mi.Thinking.Levels, "max") + + // Claude 4.6 supports adaptive thinking with output_config.effort. + if supportsAdaptive { + switch effort { + case "none": out, _ = sjson.Set(out, "thinking.type", "disabled") - case -1: - out, _ = sjson.Set(out, "thinking.type", "enabled") + out, _ = sjson.Delete(out, "thinking.budget_tokens") + out, _ = sjson.Delete(out, "output_config.effort") + case "auto": + out, _ = sjson.Set(out, "thinking.type", "adaptive") + out, _ = sjson.Delete(out, "thinking.budget_tokens") + out, _ = sjson.Delete(out, "output_config.effort") default: - if budget > 0 { + // Map non-Claude effort levels into Claude 4.6 effort vocabulary. + switch effort { + case "minimal": + effort = "low" + case "xhigh": + if supportsMax { + effort = "max" + } else { + effort = "high" + } + case "max": + if !supportsMax { + effort = "high" + } + } + out, _ = sjson.Set(out, "thinking.type", "adaptive") + out, _ = sjson.Delete(out, "thinking.budget_tokens") + out, _ = sjson.Set(out, "output_config.effort", effort) + } + } else { + // Legacy/manual thinking (budget_tokens). + budget, ok := thinking.ConvertLevelToBudget(effort) + if ok { + switch budget { + case 0: + out, _ = sjson.Set(out, "thinking.type", "disabled") + case -1: out, _ = sjson.Set(out, "thinking.type", "enabled") - out, _ = sjson.Set(out, "thinking.budget_tokens", budget) + default: + if budget > 0 { + out, _ = sjson.Set(out, "thinking.type", "enabled") + out, _ = sjson.Set(out, "thinking.budget_tokens", budget) + } } } } diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_request.go b/internal/translator/claude/openai/responses/claude_openai-responses_request.go index 33a81124..cd1b8885 100644 --- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go +++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go @@ -9,6 +9,7 @@ import ( "strings" "github.com/google/uuid" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/tidwall/gjson" "github.com/tidwall/sjson" @@ -56,17 +57,63 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte if v := root.Get("reasoning.effort"); v.Exists() { effort := strings.ToLower(strings.TrimSpace(v.String())) if effort != "" { - budget, ok := thinking.ConvertLevelToBudget(effort) - if ok { - switch budget { - case 0: + hasLevel := func(levels []string, target string) bool { + for _, level := range levels { + if strings.EqualFold(strings.TrimSpace(level), target) { + return true + } + } + return false + } + mi := registry.LookupModelInfo(modelName, "claude") + supportsAdaptive := mi != nil && mi.Thinking != nil && len(mi.Thinking.Levels) > 0 + supportsMax := supportsAdaptive && hasLevel(mi.Thinking.Levels, "max") + + // Claude 4.6 supports adaptive thinking with output_config.effort. + if supportsAdaptive { + switch effort { + case "none": out, _ = sjson.Set(out, "thinking.type", "disabled") - case -1: - out, _ = sjson.Set(out, "thinking.type", "enabled") + out, _ = sjson.Delete(out, "thinking.budget_tokens") + out, _ = sjson.Delete(out, "output_config.effort") + case "auto": + out, _ = sjson.Set(out, "thinking.type", "adaptive") + out, _ = sjson.Delete(out, "thinking.budget_tokens") + out, _ = sjson.Delete(out, "output_config.effort") default: - if budget > 0 { + // Map non-Claude effort levels into Claude 4.6 effort vocabulary. + switch effort { + case "minimal": + effort = "low" + case "xhigh": + if supportsMax { + effort = "max" + } else { + effort = "high" + } + case "max": + if !supportsMax { + effort = "high" + } + } + out, _ = sjson.Set(out, "thinking.type", "adaptive") + out, _ = sjson.Delete(out, "thinking.budget_tokens") + out, _ = sjson.Set(out, "output_config.effort", effort) + } + } else { + // Legacy/manual thinking (budget_tokens). + budget, ok := thinking.ConvertLevelToBudget(effort) + if ok { + switch budget { + case 0: + out, _ = sjson.Set(out, "thinking.type", "disabled") + case -1: out, _ = sjson.Set(out, "thinking.type", "enabled") - out, _ = sjson.Set(out, "thinking.budget_tokens", budget) + default: + if budget > 0 { + out, _ = sjson.Set(out, "thinking.type", "enabled") + out, _ = sjson.Set(out, "thinking.budget_tokens", budget) + } } } } diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go index 739b39e9..b18cc132 100644 --- a/internal/translator/codex/claude/codex_claude_request.go +++ b/internal/translator/codex/claude/codex_claude_request.go @@ -231,9 +231,22 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool) } } case "adaptive", "auto": - // Claude adaptive/auto means "enable with max capacity"; keep it as highest level - // and let ApplyThinking normalize per target model capability. - reasoningEffort = string(thinking.LevelXHigh) + // Adaptive thinking can carry an explicit effort in output_config.effort (Claude 4.6). + // Preserve it when present; otherwise keep the previous "max capacity" sentinel. + effort := "" + if v := rootResult.Get("output_config.effort"); v.Exists() && v.Type == gjson.String { + effort = strings.ToLower(strings.TrimSpace(v.String())) + } + switch effort { + case "low", "medium", "high": + reasoningEffort = effort + case "max": + reasoningEffort = string(thinking.LevelXHigh) + default: + // Keep adaptive/auto as a high level sentinel; ApplyThinking resolves it + // to model-specific max capability. + reasoningEffort = string(thinking.LevelXHigh) + } case "disabled": if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" { reasoningEffort = effort diff --git a/internal/translator/openai/claude/openai_claude_request.go b/internal/translator/openai/claude/openai_claude_request.go index e3efb83c..397625cc 100644 --- a/internal/translator/openai/claude/openai_claude_request.go +++ b/internal/translator/openai/claude/openai_claude_request.go @@ -76,9 +76,22 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream } } case "adaptive", "auto": - // Claude adaptive/auto means "enable with max capacity"; keep it as highest level - // and let ApplyThinking normalize per target model capability. - out, _ = sjson.Set(out, "reasoning_effort", string(thinking.LevelXHigh)) + // Adaptive thinking can carry an explicit effort in output_config.effort (Claude 4.6). + // Preserve it when present; otherwise keep the previous "max capacity" sentinel. + effort := "" + if v := root.Get("output_config.effort"); v.Exists() && v.Type == gjson.String { + effort = strings.ToLower(strings.TrimSpace(v.String())) + } + switch effort { + case "low", "medium", "high": + out, _ = sjson.Set(out, "reasoning_effort", effort) + case "max": + out, _ = sjson.Set(out, "reasoning_effort", string(thinking.LevelXHigh)) + default: + // Keep adaptive/auto as a high level sentinel; ApplyThinking resolves it + // to model-specific max capability. + out, _ = sjson.Set(out, "reasoning_effort", string(thinking.LevelXHigh)) + } case "disabled": if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" { out, _ = sjson.Set(out, "reasoning_effort", effort)