From e332419081774d0504dd8e676d18f7fceb80ab21 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 30 Dec 2025 22:49:51 +0800 Subject: [PATCH 1/4] feat(registry): add thinking support for gemini-2.5-computer-use-preview model --- internal/registry/model_definitions.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index ed4d1c21..bd6b713a 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -773,7 +773,7 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig { return map[string]*AntigravityModelConfig{ "gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"}, "gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"}, - "gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"}, + "gemini-2.5-computer-use-preview-10-2025": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-2.5-computer-use-preview-10-2025"}, "gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"}, "gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"}, "gemini-3-flash-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"}, From 89db4e94810fb470cad4dfbc34e515ee329fffbf Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 31 Dec 2025 16:10:51 +0800 Subject: [PATCH 2/4] fix(thinking): use model alias for thinking config resolution in mapped models --- .../runtime/executor/antigravity_executor.go | 8 ++--- .../runtime/executor/gemini_cli_executor.go | 4 +-- internal/runtime/executor/payload_helpers.go | 18 +++++++---- internal/util/gemini_thinking.go | 32 +++++++++++++------ sdk/cliproxy/auth/model_name_mappings.go | 4 ++- 5 files changed, 44 insertions(+), 22 deletions(-) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 950141f0..96ee18d0 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -98,7 +98,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) translated = normalizeAntigravityThinking(req.Model, translated, isClaude) translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) @@ -193,7 +193,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) translated = normalizeAntigravityThinking(req.Model, translated, true) translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) @@ -529,7 +529,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) translated = normalizeAntigravityThinking(req.Model, translated, isClaude) translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) @@ -698,7 +698,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut for idx, baseURL := range baseURLs { payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model) - payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, payload) + payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload) payload = normalizeAntigravityThinking(req.Model, payload, isClaude) payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index a3b75839..74ded2a6 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -80,7 +80,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload) basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) @@ -219,7 +219,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload) basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index adb224a8..9fffb06d 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -14,15 +14,18 @@ import ( // ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192)) // for standard Gemini format payloads. It normalizes the budget when the model supports thinking. func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte { - budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata) + // Use the alias from metadata if available, as it's registered in the global registry + // with thinking metadata; the upstream model name may not be registered. + lookupModel := util.ResolveOriginalModel(model, metadata) + budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(lookupModel, metadata) if !ok || (budgetOverride == nil && includeOverride == nil) { return payload } - if !util.ModelSupportsThinking(model) { + if !util.ModelSupportsThinking(lookupModel) { return payload } if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(model, *budgetOverride) + norm := util.NormalizeThinkingBudget(lookupModel, *budgetOverride) budgetOverride = &norm } return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) @@ -31,15 +34,18 @@ func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string // applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192)) // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking. func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte { - budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata) + // Use the alias from metadata if available, as it's registered in the global registry + // with thinking metadata; the upstream model name may not be registered. + lookupModel := util.ResolveOriginalModel(model, metadata) + budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(lookupModel, metadata) if !ok || (budgetOverride == nil && includeOverride == nil) { return payload } - if !util.ModelSupportsThinking(model) { + if !util.ModelSupportsThinking(lookupModel) { return payload } if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(model, *budgetOverride) + norm := util.NormalizeThinkingBudget(lookupModel, *budgetOverride) budgetOverride = &norm } return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index 290d5f92..5ebbf426 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -290,15 +290,21 @@ func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte { // For standard Gemini API format (generationConfig.thinkingConfig path). // This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)). func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte { - if !IsGemini3Model(model) { + // Use the alias from metadata if available for model type detection + lookupModel := ResolveOriginalModel(model, metadata) + if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) { return body } effort, ok := ReasoningEffortFromMetadata(metadata) if !ok || effort == "" { return body } - // Validate and apply the thinkingLevel - if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid { + // Validate and apply the thinkingLevel using the model that matches Gemini 3 pattern + checkModel := model + if IsGemini3Model(lookupModel) { + checkModel = lookupModel + } + if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid { return ApplyGeminiThinkingLevel(body, level, nil) } return body @@ -308,15 +314,21 @@ func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any // For Gemini CLI API format (request.generationConfig.thinkingConfig path). // This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)). func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte { - if !IsGemini3Model(model) { + // Use the alias from metadata if available for model type detection + lookupModel := ResolveOriginalModel(model, metadata) + if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) { return body } effort, ok := ReasoningEffortFromMetadata(metadata) if !ok || effort == "" { return body } - // Validate and apply the thinkingLevel - if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid { + // Validate and apply the thinkingLevel using the model that matches Gemini 3 pattern + checkModel := model + if IsGemini3Model(lookupModel) { + checkModel = lookupModel + } + if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid { return ApplyGeminiCLIThinkingLevel(body, level, nil) } return body @@ -326,15 +338,17 @@ func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string] // For Gemini CLI API format (request.generationConfig.thinkingConfig path). // Returns the modified body if thinkingConfig was added, otherwise returns the original. // For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation. -func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte { - if !ModelHasDefaultThinking(model) { +func ApplyDefaultThinkingIfNeededCLI(model string, metadata map[string]any, body []byte) []byte { + // Use the alias from metadata if available for model property lookup + lookupModel := ResolveOriginalModel(model, metadata) + if !ModelHasDefaultThinking(lookupModel) && !ModelHasDefaultThinking(model) { return body } if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() { return body } // Gemini 3 models use thinkingLevel instead of thinkingBudget - if IsGemini3Model(model) { + if IsGemini3Model(lookupModel) || IsGemini3Model(model) { // Don't set a default - let the API use its dynamic default ("high") // Only set includeThoughts updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true) diff --git a/sdk/cliproxy/auth/model_name_mappings.go b/sdk/cliproxy/auth/model_name_mappings.go index f1b31aa5..03380c09 100644 --- a/sdk/cliproxy/auth/model_name_mappings.go +++ b/sdk/cliproxy/auth/model_name_mappings.go @@ -81,7 +81,9 @@ func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, meta out[k] = v } } - out[util.ModelMappingOriginalModelMetadataKey] = upstreamModel + // Store the requested alias (e.g., "gp") so downstream can use it to look up + // model metadata from the global registry where it was registered under this alias. + out[util.ModelMappingOriginalModelMetadataKey] = requestedModel return upstreamModel, out } From d00e3ea973688c71c3d7a0bce5ec695d137e92a6 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 31 Dec 2025 17:14:47 +0800 Subject: [PATCH 3/4] feat(thinking): add numeric budget to thinkingLevel conversion fallback --- internal/util/gemini_thinking.go | 56 +++++-- test/model_alias_thinking_suffix_test.go | 193 +++++++++++++++++++++++ 2 files changed, 233 insertions(+), 16 deletions(-) create mode 100644 test/model_alias_thinking_suffix_test.go diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index 5ebbf426..36287b49 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -288,49 +288,73 @@ func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte { // ApplyGemini3ThinkingLevelFromMetadata applies thinkingLevel from metadata for Gemini 3 models. // For standard Gemini API format (generationConfig.thinkingConfig path). -// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)). +// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)) +// or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel. func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte { // Use the alias from metadata if available for model type detection lookupModel := ResolveOriginalModel(model, metadata) if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) { return body } - effort, ok := ReasoningEffortFromMetadata(metadata) - if !ok || effort == "" { - return body - } - // Validate and apply the thinkingLevel using the model that matches Gemini 3 pattern + + // Determine which model to use for validation checkModel := model if IsGemini3Model(lookupModel) { checkModel = lookupModel } - if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid { - return ApplyGeminiThinkingLevel(body, level, nil) + + // First try to get effort string from metadata + effort, ok := ReasoningEffortFromMetadata(metadata) + if ok && effort != "" { + if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid { + return ApplyGeminiThinkingLevel(body, level, nil) + } } + + // Fallback: check for numeric budget and convert to thinkingLevel + budget, _, _, matched := ThinkingFromMetadata(metadata) + if matched && budget != nil { + if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid { + return ApplyGeminiThinkingLevel(body, level, nil) + } + } + return body } // ApplyGemini3ThinkingLevelFromMetadataCLI applies thinkingLevel from metadata for Gemini 3 models. // For Gemini CLI API format (request.generationConfig.thinkingConfig path). -// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)). +// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)) +// or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel. func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte { // Use the alias from metadata if available for model type detection lookupModel := ResolveOriginalModel(model, metadata) if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) { return body } - effort, ok := ReasoningEffortFromMetadata(metadata) - if !ok || effort == "" { - return body - } - // Validate and apply the thinkingLevel using the model that matches Gemini 3 pattern + + // Determine which model to use for validation checkModel := model if IsGemini3Model(lookupModel) { checkModel = lookupModel } - if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid { - return ApplyGeminiCLIThinkingLevel(body, level, nil) + + // First try to get effort string from metadata + effort, ok := ReasoningEffortFromMetadata(metadata) + if ok && effort != "" { + if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid { + return ApplyGeminiCLIThinkingLevel(body, level, nil) + } } + + // Fallback: check for numeric budget and convert to thinkingLevel + budget, _, _, matched := ThinkingFromMetadata(metadata) + if matched && budget != nil { + if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid { + return ApplyGeminiCLIThinkingLevel(body, level, nil) + } + } + return body } diff --git a/test/model_alias_thinking_suffix_test.go b/test/model_alias_thinking_suffix_test.go new file mode 100644 index 00000000..78e53339 --- /dev/null +++ b/test/model_alias_thinking_suffix_test.go @@ -0,0 +1,193 @@ +package test + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/tidwall/gjson" +) + +// TestModelAliasThinkingSuffix tests the 32 test cases defined in docs/thinking_suffix_test_cases.md +// These tests verify the thinking suffix parsing and application logic across different providers. +func TestModelAliasThinkingSuffix(t *testing.T) { + tests := []struct { + id int + name string + provider string + requestModel string + suffixType string + expectedField string // "thinkingBudget", "thinkingLevel", "budget_tokens", "reasoning_effort", "enable_thinking" + expectedValue any + upstreamModel string // The upstream model after alias resolution + isAlias bool + }{ + // === 1. Antigravity Provider === + // 1.1 Budget-only models (Gemini 2.5) + {1, "antigravity_original_numeric", "antigravity", "gemini-2.5-computer-use-preview-10-2025(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", false}, + {2, "antigravity_alias_numeric", "antigravity", "gp(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", true}, + // 1.2 Budget+Levels models (Gemini 3) + {3, "antigravity_original_numeric_to_level", "antigravity", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false}, + {4, "antigravity_original_level", "antigravity", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false}, + {5, "antigravity_alias_numeric_to_level", "antigravity", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true}, + {6, "antigravity_alias_level", "antigravity", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true}, + + // === 2. Gemini CLI Provider === + // 2.1 Budget-only models + {7, "gemini_cli_original_numeric", "gemini-cli", "gemini-2.5-pro(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", false}, + {8, "gemini_cli_alias_numeric", "gemini-cli", "g25p(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", true}, + // 2.2 Budget+Levels models + {9, "gemini_cli_original_numeric_to_level", "gemini-cli", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false}, + {10, "gemini_cli_original_level", "gemini-cli", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false}, + {11, "gemini_cli_alias_numeric_to_level", "gemini-cli", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true}, + {12, "gemini_cli_alias_level", "gemini-cli", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true}, + + // === 3. Vertex Provider === + // 3.1 Budget-only models + {13, "vertex_original_numeric", "vertex", "gemini-2.5-pro(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", false}, + {14, "vertex_alias_numeric", "vertex", "vg25p(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", true}, + // 3.2 Budget+Levels models + {15, "vertex_original_numeric_to_level", "vertex", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false}, + {16, "vertex_original_level", "vertex", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false}, + {17, "vertex_alias_numeric_to_level", "vertex", "vgf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true}, + {18, "vertex_alias_level", "vertex", "vgf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true}, + + // === 4. AI Studio Provider === + // 4.1 Budget-only models + {19, "aistudio_original_numeric", "aistudio", "gemini-2.5-pro(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", false}, + {20, "aistudio_alias_numeric", "aistudio", "ag25p(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", true}, + // 4.2 Budget+Levels models + {21, "aistudio_original_numeric_to_level", "aistudio", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false}, + {22, "aistudio_original_level", "aistudio", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false}, + {23, "aistudio_alias_numeric_to_level", "aistudio", "agf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true}, + {24, "aistudio_alias_level", "aistudio", "agf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true}, + + // === 5. Claude Provider === + {25, "claude_original_numeric", "claude", "claude-sonnet-4-5-20250929(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", false}, + {26, "claude_alias_numeric", "claude", "cs45(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", true}, + + // === 6. Codex Provider === + {27, "codex_original_level", "codex", "gpt-5(high)", "level", "reasoning_effort", "high", "gpt-5", false}, + {28, "codex_alias_level", "codex", "g5(high)", "level", "reasoning_effort", "high", "gpt-5", true}, + + // === 7. Qwen Provider === + {29, "qwen_original_level", "qwen", "qwen3-coder-plus(high)", "level", "enable_thinking", true, "qwen3-coder-plus", false}, + {30, "qwen_alias_level", "qwen", "qcp(high)", "level", "enable_thinking", true, "qwen3-coder-plus", true}, + + // === 8. iFlow Provider === + {31, "iflow_original_level", "iflow", "glm-4.7(high)", "level", "reasoning_effort", "high", "glm-4.7", false}, + {32, "iflow_alias_level", "iflow", "glm(high)", "level", "reasoning_effort", "high", "glm-4.7", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Step 1: Parse model suffix + baseModel, metadata := util.NormalizeThinkingModel(tt.requestModel) + + // Verify suffix was parsed + if metadata == nil && (tt.suffixType == "numeric" || tt.suffixType == "level") { + t.Errorf("Case #%d: NormalizeThinkingModel(%q) metadata is nil", tt.id, tt.requestModel) + return + } + + // Step 2: For aliases, simulate the model mapping by adding upstream model info + if tt.isAlias { + if metadata == nil { + metadata = make(map[string]any) + } + metadata[util.ModelMappingOriginalModelMetadataKey] = baseModel + } + + // Step 3: Verify metadata extraction + switch tt.suffixType { + case "numeric": + budget, _, _, matched := util.ThinkingFromMetadata(metadata) + if !matched { + t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id) + return + } + if budget == nil { + t.Errorf("Case #%d: expected budget in metadata", tt.id) + return + } + // For thinkingBudget/budget_tokens, verify the parsed budget value + if tt.expectedField == "thinkingBudget" || tt.expectedField == "budget_tokens" { + expectedBudget := tt.expectedValue.(int) + if *budget != expectedBudget { + t.Errorf("Case #%d: budget = %d, want %d", tt.id, *budget, expectedBudget) + } + } + // For thinkingLevel (Gemini 3), verify conversion from budget to level + if tt.expectedField == "thinkingLevel" { + level, ok := util.ThinkingBudgetToGemini3Level(tt.upstreamModel, *budget) + if !ok { + t.Errorf("Case #%d: ThinkingBudgetToGemini3Level failed", tt.id) + return + } + expectedLevel := tt.expectedValue.(string) + if level != expectedLevel { + t.Errorf("Case #%d: converted level = %q, want %q", tt.id, level, expectedLevel) + } + } + + case "level": + _, _, effort, matched := util.ThinkingFromMetadata(metadata) + if !matched { + t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id) + return + } + if effort == nil { + t.Errorf("Case #%d: expected effort in metadata", tt.id) + return + } + if tt.expectedField == "thinkingLevel" || tt.expectedField == "reasoning_effort" { + expectedEffort := tt.expectedValue.(string) + if *effort != expectedEffort { + t.Errorf("Case #%d: effort = %q, want %q", tt.id, *effort, expectedEffort) + } + } + } + + // Step 4: Test Gemini-specific thinkingLevel conversion for Gemini 3 models + if tt.expectedField == "thinkingLevel" && util.IsGemini3Model(tt.upstreamModel) { + body := []byte(`{"request":{"contents":[]}}`) + + // Build metadata for the function + testMetadata := make(map[string]any) + if tt.isAlias { + testMetadata[util.ModelMappingOriginalModelMetadataKey] = tt.upstreamModel + } + // Copy parsed metadata + for k, v := range metadata { + testMetadata[k] = v + } + + result := util.ApplyGemini3ThinkingLevelFromMetadataCLI(tt.upstreamModel, testMetadata, body) + levelVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel") + + expectedLevel := tt.expectedValue.(string) + if !levelVal.Exists() { + t.Errorf("Case #%d: expected thinkingLevel in result", tt.id) + } else if levelVal.String() != expectedLevel { + t.Errorf("Case #%d: thinkingLevel = %q, want %q", tt.id, levelVal.String(), expectedLevel) + } + } + + // Step 5: Test Gemini 2.5 thinkingBudget application + if tt.expectedField == "thinkingBudget" && util.IsGemini25Model(tt.upstreamModel) { + budget, _, _, _ := util.ThinkingFromMetadata(metadata) + if budget != nil { + body := []byte(`{"request":{"contents":[]}}`) + result := util.ApplyGeminiCLIThinkingConfig(body, budget, nil) + budgetVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget") + + expectedBudget := tt.expectedValue.(int) + if !budgetVal.Exists() { + t.Errorf("Case #%d: expected thinkingBudget in result", tt.id) + } else if int(budgetVal.Int()) != expectedBudget { + t.Errorf("Case #%d: thinkingBudget = %d, want %d", tt.id, int(budgetVal.Int()), expectedBudget) + } + } + } + }) + } +} From 8bf3305b2bd86a6580384e670e55109a47a144e9 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 31 Dec 2025 18:07:13 +0800 Subject: [PATCH 4/4] fix(thinking): fallback to upstream model for thinking support when alias not in registry --- .../runtime/executor/antigravity_executor.go | 8 +-- .../runtime/executor/gemini_cli_executor.go | 6 +- internal/runtime/executor/payload_helpers.go | 32 ++++++++--- internal/util/thinking.go | 25 +++++++-- test/model_alias_thinking_suffix_test.go | 56 ++++++++++++------- 5 files changed, 89 insertions(+), 38 deletions(-) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 96ee18d0..b331a9df 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -96,7 +96,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au to := sdktranslator.FromString("antigravity") translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model) translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) translated = normalizeAntigravityThinking(req.Model, translated, isClaude) @@ -191,7 +191,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * to := sdktranslator.FromString("antigravity") translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model) translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) translated = normalizeAntigravityThinking(req.Model, translated, true) @@ -527,7 +527,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya to := sdktranslator.FromString("antigravity") translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model) translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) translated = normalizeAntigravityThinking(req.Model, translated, isClaude) @@ -697,7 +697,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut for idx, baseURL := range baseURLs { payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model) + payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model) payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload) payload = normalizeAntigravityThinking(req.Model, payload, isClaude) payload = deleteJSONField(payload, "project") diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 74ded2a6..4213ffa0 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -78,7 +78,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload) basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) @@ -217,7 +217,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload) basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) @@ -421,7 +421,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. // Gemini CLI endpoint when iterating fallback variants. for _, attemptModel := range models { payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) - payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model) + payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model) payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload) payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index 9fffb06d..d823ef04 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -17,35 +17,51 @@ func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string // Use the alias from metadata if available, as it's registered in the global registry // with thinking metadata; the upstream model name may not be registered. lookupModel := util.ResolveOriginalModel(model, metadata) - budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(lookupModel, metadata) + + // Determine which model to use for thinking support check. + // If the alias (lookupModel) is not in the registry, fall back to the upstream model. + thinkingModel := lookupModel + if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) { + thinkingModel = model + } + + budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata) if !ok || (budgetOverride == nil && includeOverride == nil) { return payload } - if !util.ModelSupportsThinking(lookupModel) { + if !util.ModelSupportsThinking(thinkingModel) { return payload } if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(lookupModel, *budgetOverride) + norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride) budgetOverride = &norm } return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) } -// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192)) +// ApplyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192)) // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking. -func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte { +func ApplyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte { // Use the alias from metadata if available, as it's registered in the global registry // with thinking metadata; the upstream model name may not be registered. lookupModel := util.ResolveOriginalModel(model, metadata) - budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(lookupModel, metadata) + + // Determine which model to use for thinking support check. + // If the alias (lookupModel) is not in the registry, fall back to the upstream model. + thinkingModel := lookupModel + if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) { + thinkingModel = model + } + + budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata) if !ok || (budgetOverride == nil && includeOverride == nil) { return payload } - if !util.ModelSupportsThinking(lookupModel) { + if !util.ModelSupportsThinking(thinkingModel) { return payload } if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(lookupModel, *budgetOverride) + norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride) budgetOverride = &norm } return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) diff --git a/internal/util/thinking.go b/internal/util/thinking.go index 74808669..3ce1bb0d 100644 --- a/internal/util/thinking.go +++ b/internal/util/thinking.go @@ -12,9 +12,18 @@ func ModelSupportsThinking(model string) bool { if model == "" { return false } + // First check the global dynamic registry if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil { return info.Thinking != nil } + // Fallback: check static model definitions + if info := registry.LookupStaticModelInfo(model); info != nil { + return info.Thinking != nil + } + // Fallback: check Antigravity static config + if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil { + return cfg.Thinking != nil + } return false } @@ -63,11 +72,19 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero if model == "" { return false, 0, 0, false, false } - info := registry.GetGlobalRegistry().GetModelInfo(model) - if info == nil || info.Thinking == nil { - return false, 0, 0, false, false + // First check global dynamic registry + if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil && info.Thinking != nil { + return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed } - return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed + // Fallback: check static model definitions + if info := registry.LookupStaticModelInfo(model); info != nil && info.Thinking != nil { + return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed + } + // Fallback: check Antigravity static config + if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil && cfg.Thinking != nil { + return true, cfg.Thinking.Min, cfg.Thinking.Max, cfg.Thinking.ZeroAllowed, cfg.Thinking.DynamicAllowed + } + return false, 0, 0, false, false } // GetModelThinkingLevels returns the discrete reasoning effort levels for the model. diff --git a/test/model_alias_thinking_suffix_test.go b/test/model_alias_thinking_suffix_test.go index 78e53339..270e0cc7 100644 --- a/test/model_alias_thinking_suffix_test.go +++ b/test/model_alias_thinking_suffix_test.go @@ -3,6 +3,7 @@ package test import ( "testing" + "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" ) @@ -80,8 +81,9 @@ func TestModelAliasThinkingSuffix(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - // Step 1: Parse model suffix - baseModel, metadata := util.NormalizeThinkingModel(tt.requestModel) + // Step 1: Parse model suffix (simulates SDK layer normalization) + // For "gp(1000)" -> requestedModel="gp", metadata={thinking_budget: 1000} + requestedModel, metadata := util.NormalizeThinkingModel(tt.requestModel) // Verify suffix was parsed if metadata == nil && (tt.suffixType == "numeric" || tt.suffixType == "level") { @@ -89,12 +91,13 @@ func TestModelAliasThinkingSuffix(t *testing.T) { return } - // Step 2: For aliases, simulate the model mapping by adding upstream model info + // Step 2: Simulate OAuth model mapping + // Real flow: applyOAuthModelMapping stores requestedModel (the alias) in metadata if tt.isAlias { if metadata == nil { metadata = make(map[string]any) } - metadata[util.ModelMappingOriginalModelMetadataKey] = baseModel + metadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel } // Step 3: Verify metadata extraction @@ -151,12 +154,15 @@ func TestModelAliasThinkingSuffix(t *testing.T) { if tt.expectedField == "thinkingLevel" && util.IsGemini3Model(tt.upstreamModel) { body := []byte(`{"request":{"contents":[]}}`) - // Build metadata for the function + // Build metadata simulating real OAuth flow: + // - requestedModel (alias like "gf") is stored in model_mapping_original_model + // - upstreamModel is passed as the model parameter testMetadata := make(map[string]any) if tt.isAlias { - testMetadata[util.ModelMappingOriginalModelMetadataKey] = tt.upstreamModel + // Real flow: applyOAuthModelMapping stores requestedModel (the alias) + testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel } - // Copy parsed metadata + // Copy parsed metadata (thinking_budget, reasoning_effort, etc.) for k, v := range metadata { testMetadata[k] = v } @@ -172,20 +178,32 @@ func TestModelAliasThinkingSuffix(t *testing.T) { } } - // Step 5: Test Gemini 2.5 thinkingBudget application + // Step 5: Test Gemini 2.5 thinkingBudget application using real ApplyThinkingMetadataCLI flow if tt.expectedField == "thinkingBudget" && util.IsGemini25Model(tt.upstreamModel) { - budget, _, _, _ := util.ThinkingFromMetadata(metadata) - if budget != nil { - body := []byte(`{"request":{"contents":[]}}`) - result := util.ApplyGeminiCLIThinkingConfig(body, budget, nil) - budgetVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget") + body := []byte(`{"request":{"contents":[]}}`) - expectedBudget := tt.expectedValue.(int) - if !budgetVal.Exists() { - t.Errorf("Case #%d: expected thinkingBudget in result", tt.id) - } else if int(budgetVal.Int()) != expectedBudget { - t.Errorf("Case #%d: thinkingBudget = %d, want %d", tt.id, int(budgetVal.Int()), expectedBudget) - } + // Build metadata simulating real OAuth flow: + // - requestedModel (alias like "gp") is stored in model_mapping_original_model + // - upstreamModel is passed as the model parameter + testMetadata := make(map[string]any) + if tt.isAlias { + // Real flow: applyOAuthModelMapping stores requestedModel (the alias) + testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel + } + // Copy parsed metadata (thinking_budget, reasoning_effort, etc.) + for k, v := range metadata { + testMetadata[k] = v + } + + // Use the exported ApplyThinkingMetadataCLI which includes the fallback logic + result := executor.ApplyThinkingMetadataCLI(body, testMetadata, tt.upstreamModel) + budgetVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget") + + expectedBudget := tt.expectedValue.(int) + if !budgetVal.Exists() { + t.Errorf("Case #%d: expected thinkingBudget in result", tt.id) + } else if int(budgetVal.Int()) != expectedBudget { + t.Errorf("Case #%d: thinkingBudget = %d, want %d", tt.id, int(budgetVal.Int()), expectedBudget) } } })