From 680b3f5010b726368ae583f12c6e7da7040ea317 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 29 Oct 2025 10:46:25 +0800 Subject: [PATCH 1/9] fix(translator): avoid default thinkingConfig in Gemini requests --- .../claude/gemini-cli_claude_request.go | 34 ++++++++++--------- .../gemini-cli_openai_request.go | 6 ++-- .../chat-completions/gemini_openai_request.go | 6 ++-- .../gemini_openai-responses_request.go | 4 +-- 4 files changed, 24 insertions(+), 26 deletions(-) diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go index 229debee..f4ba7d37 100644 --- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go +++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go @@ -135,8 +135,8 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] tools = make([]client.ToolDeclaration, 0) } - // Build output Gemini CLI request JSON - out := `{"model":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}}` + // Build output Gemini CLI request JSON (no default thinkingConfig) + out := `{"model":"","request":{"contents":[]}}` out, _ = sjson.Set(out, "model", modelName) if systemInstruction != nil { b, _ := json.Marshal(systemInstruction) @@ -151,21 +151,23 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] out, _ = sjson.SetRaw(out, "request.tools", string(b)) } - // Map reasoning and sampling configs + // Map reasoning and sampling configs: only set thinkingConfig when explicitly requested reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort") - if reasoningEffortResult.String() == "none" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", false) - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0) - } else if reasoningEffortResult.String() == "auto" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) - } else if reasoningEffortResult.String() == "low" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024) - } else if reasoningEffortResult.String() == "medium" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) - } else if reasoningEffortResult.String() == "high" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576) - } else { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + if reasoningEffortResult.Exists() { + if reasoningEffortResult.String() == "none" { + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", false) + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0) + } else if reasoningEffortResult.String() == "auto" { + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + } else if reasoningEffortResult.String() == "low" { + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024) + } else if reasoningEffortResult.String() == "medium" { + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) + } else if reasoningEffortResult.String() == "high" { + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576) + } else { + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + } } if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number { out, _ = sjson.Set(out, "request.generationConfig.temperature", v.Num) diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index dc850c49..6892f67b 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -26,8 +26,8 @@ import ( // - []byte: The transformed request data in Gemini CLI API format func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bool) []byte { rawJSON := bytes.Clone(inputRawJSON) - // Base envelope - out := []byte(`{"project":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}},"model":"gemini-2.5-pro"}`) + // Base envelope (no default thinkingConfig) + out := []byte(`{"project":"","request":{"contents":[]},"model":"gemini-2.5-pro"}`) // Model out, _ = sjson.SetBytes(out, "model", modelName) @@ -50,8 +50,6 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo default: out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) } - } else { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) } // Temperature/top_p/top_k diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 44cad7d2..7597fcef 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -26,8 +26,8 @@ import ( // - []byte: The transformed request data in Gemini API format func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte { rawJSON := bytes.Clone(inputRawJSON) - // Base envelope - out := []byte(`{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`) + // Base envelope (no default thinkingConfig) + out := []byte(`{"contents":[]}`) // Model out, _ = sjson.SetBytes(out, "model", modelName) @@ -50,8 +50,6 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) default: out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) } - } else { - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) } // Temperature/top_p/top_k diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index beffb317..9058bae9 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -15,8 +15,8 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte _ = modelName // Unused but required by interface _ = stream // Unused but required by interface - // Base Gemini API template - out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}` + // Base Gemini API template (do not include thinkingConfig by default) + out := `{"contents":[]}` root := gjson.ParseBytes(rawJSON) From 3d7aca22c0466f6058e27f543f8d8ebfdbe47e30 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 29 Oct 2025 12:00:35 +0800 Subject: [PATCH 2/9] feat(registry): add thinking budget support; populate Gemini models --- internal/registry/model_definitions.go | 14 +++++++++---- internal/registry/model_registry.go | 28 ++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index ebc1a573..2536d27b 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -84,6 +84,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, }, { ID: "gemini-2.5-pro", @@ -98,6 +99,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "gemini-2.5-flash-lite", @@ -112,6 +114,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, }, { ID: "gemini-2.5-flash-image-preview", @@ -126,6 +129,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 8192, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + // image models don't support thinkingConfig; leave Thinking nil }, { ID: "gemini-2.5-flash-image", @@ -140,6 +144,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 8192, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + // image models don't support thinkingConfig; leave Thinking nil }, } } @@ -152,9 +157,8 @@ func GetGeminiCLIModels() []*ModelInfo { return GeminiModels() } // GetAIStudioModels returns the Gemini model definitions for AI Studio integrations func GetAIStudioModels() []*ModelInfo { - models := make([]*ModelInfo, 0, 8) - models = append(models, GeminiModels()...) - models = append(models, + base := GeminiModels() + return append(base, &ModelInfo{ ID: "gemini-pro-latest", Object: "model", @@ -168,6 +172,7 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, }, &ModelInfo{ ID: "gemini-flash-latest", @@ -182,6 +187,7 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, }, &ModelInfo{ ID: "gemini-flash-lite-latest", @@ -196,9 +202,9 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, }, ) - return models } // GetOpenAIModels returns the standard OpenAI model definitions diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go index e1223978..46ada713 100644 --- a/internal/registry/model_registry.go +++ b/internal/registry/model_registry.go @@ -45,6 +45,23 @@ type ModelInfo struct { MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` // SupportedParameters lists supported parameters SupportedParameters []string `json:"supported_parameters,omitempty"` + + // Thinking holds provider-specific reasoning/thinking budget capabilities. + // This is optional and currently used for Gemini thinking budget normalization. + Thinking *ThinkingSupport `json:"thinking,omitempty"` +} + +// ThinkingSupport describes a model family's supported internal reasoning budget range. +// Values are interpreted in provider-native token units. +type ThinkingSupport struct { + // Min is the minimum allowed thinking budget (inclusive). + Min int `json:"min,omitempty"` + // Max is the maximum allowed thinking budget (inclusive). + Max int `json:"max,omitempty"` + // ZeroAllowed indicates whether 0 is a valid value (to disable thinking). + ZeroAllowed bool `json:"zero_allowed,omitempty"` + // DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget). + DynamicAllowed bool `json:"dynamic_allowed,omitempty"` } // ModelRegistration tracks a model's availability @@ -652,6 +669,17 @@ func (r *ModelRegistry) GetModelProviders(modelID string) []string { return result } +// GetModelInfo returns the registered ModelInfo for the given model ID, if present. +// Returns nil if the model is unknown to the registry. +func (r *ModelRegistry) GetModelInfo(modelID string) *ModelInfo { + r.mutex.RLock() + defer r.mutex.RUnlock() + if reg, ok := r.models[modelID]; ok && reg != nil { + return reg.Info + } + return nil +} + // convertModelToMap converts ModelInfo to the appropriate format for different handler types func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) map[string]any { if model == nil { From 41577bce0789d962fa8276d88ccfd8f21369f6c3 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 29 Oct 2025 13:11:28 +0800 Subject: [PATCH 3/9] feat(claude): map Anthropic 'thinking' to Gemini thinkingBudget --- .../claude/gemini-cli_claude_request.go | 27 +++----- .../gemini/claude/gemini_claude_request.go | 27 +++----- internal/util/thinking.go | 69 +++++++++++++++++++ 3 files changed, 90 insertions(+), 33 deletions(-) create mode 100644 internal/util/thinking.go diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go index f4ba7d37..e4801d08 100644 --- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go +++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go @@ -11,6 +11,7 @@ import ( "strings" client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -135,7 +136,7 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] tools = make([]client.ToolDeclaration, 0) } - // Build output Gemini CLI request JSON (no default thinkingConfig) + // Build output Gemini CLI request JSON out := `{"model":"","request":{"contents":[]}}` out, _ = sjson.Set(out, "model", modelName) if systemInstruction != nil { @@ -151,22 +152,14 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] out, _ = sjson.SetRaw(out, "request.tools", string(b)) } - // Map reasoning and sampling configs: only set thinkingConfig when explicitly requested - reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort") - if reasoningEffortResult.Exists() { - if reasoningEffortResult.String() == "none" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", false) - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0) - } else if reasoningEffortResult.String() == "auto" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) - } else if reasoningEffortResult.String() == "low" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024) - } else if reasoningEffortResult.String() == "medium" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) - } else if reasoningEffortResult.String() == "high" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576) - } else { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + // Map Anthropic thinking -> Gemini thinkingBudget when type==enabled + if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) { + if t.Get("type").String() == "enabled" { + if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { + budget := int(b.Int()) + budget = util.NormalizeThinkingBudget(modelName, budget) + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) + } } } if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number { diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go index 6a477dbd..e9d79276 100644 --- a/internal/translator/gemini/claude/gemini_claude_request.go +++ b/internal/translator/gemini/claude/gemini_claude_request.go @@ -11,6 +11,7 @@ import ( "strings" client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -129,7 +130,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) } // Build output Gemini CLI request JSON - out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}` + out := `{"contents":[]}` out, _ = sjson.Set(out, "model", modelName) if systemInstruction != nil { b, _ := json.Marshal(systemInstruction) @@ -144,21 +145,15 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetRaw(out, "tools", string(b)) } - // Map reasoning and sampling configs - reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort") - if reasoningEffortResult.String() == "none" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false) - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0) - } else if reasoningEffortResult.String() == "auto" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) - } else if reasoningEffortResult.String() == "low" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) - } else if reasoningEffortResult.String() == "medium" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192) - } else if reasoningEffortResult.String() == "high" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576) - } else { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + // Map Anthropic thinking -> Gemini thinkingBudget when enabled + if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) { + if t.Get("type").String() == "enabled" { + if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { + budget := int(b.Int()) + budget = util.NormalizeThinkingBudget(modelName, budget) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) + } + } } if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number { out, _ = sjson.Set(out, "generationConfig.temperature", v.Num) diff --git a/internal/util/thinking.go b/internal/util/thinking.go new file mode 100644 index 00000000..c16b91cd --- /dev/null +++ b/internal/util/thinking.go @@ -0,0 +1,69 @@ +package util + +import ( + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" +) + +// ModelSupportsThinking reports whether the given model has Thinking capability +// according to the model registry metadata (provider-agnostic). +func ModelSupportsThinking(model string) bool { + if model == "" { + return false + } + if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil { + return info.Thinking != nil + } + return false +} + +// NormalizeThinkingBudget clamps the requested thinking budget to the +// supported range for the specified model using registry metadata only. +// If the model is unknown or has no Thinking metadata, returns the original budget. +// For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range +// or min (0 if zero is allowed and mid <= 0). +func NormalizeThinkingBudget(model string, budget int) int { + if budget == -1 { // dynamic + if found, min, max, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found { + if dynamicAllowed { + return -1 + } + mid := (min + max) / 2 + if mid <= 0 && zeroAllowed { + return 0 + } + if mid <= 0 { + return min + } + return mid + } + return -1 + } + if found, min, max, zeroAllowed, _ := thinkingRangeFromRegistry(model); found { + if budget == 0 { + if zeroAllowed { + return 0 + } + return min + } + if budget < min { + return min + } + if budget > max { + return max + } + return budget + } + return budget +} + +// thinkingRangeFromRegistry attempts to read thinking ranges from the model registry. +func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zeroAllowed bool, dynamicAllowed bool) { + if model == "" { + return false, 0, 0, false, false + } + info := registry.GetGlobalRegistry().GetModelInfo(model) + if info == nil || info.Thinking == nil { + return false, 0, 0, false, false + } + return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed +} From 2a3ee8d0e3747072fae2f2b4275153cdd3f99974 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 29 Oct 2025 14:00:47 +0800 Subject: [PATCH 4/9] fix(translators): normalize thinking budgets --- .../chat-completions/gemini-cli_openai_request.go | 8 ++++---- .../openai/chat-completions/gemini_openai_request.go | 8 ++++---- .../responses/gemini_openai-responses_request.go | 11 ++++++----- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index 6892f67b..5da6c2c6 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -34,7 +34,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo // Reasoning effort -> thinkingBudget/include_thoughts re := gjson.GetBytes(rawJSON, "reasoning_effort") - if re.Exists() { + if re.Exists() && util.ModelSupportsThinking(modelName) { switch re.String() { case "none": out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts") @@ -42,11 +42,11 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo case "auto": out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) case "low": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) case "medium": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) case "high": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) default: out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 7597fcef..f89ec16a 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -34,7 +34,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) // Reasoning effort -> thinkingBudget/include_thoughts re := gjson.GetBytes(rawJSON, "reasoning_effort") - if re.Exists() { + if re.Exists() && util.ModelSupportsThinking(modelName) { switch re.String() { case "none": out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts") @@ -42,11 +42,11 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) case "auto": out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) case "low": - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) case "medium": - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) case "high": - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 24576) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) default: out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) } diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 9058bae9..b54c2730 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -4,6 +4,7 @@ import ( "bytes" "strings" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -242,7 +243,7 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences) } - if reasoningEffort := root.Get("reasoning.effort"); reasoningEffort.Exists() { + if reasoningEffort := root.Get("reasoning.effort"); reasoningEffort.Exists() && util.ModelSupportsThinking(modelName) { switch reasoningEffort.String() { case "none": out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false) @@ -250,13 +251,13 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte case "auto": out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) case "minimal": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) case "low": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096)) case "medium": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) case "high": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) default: out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) } From 7dd93a4a256224454f756246759a6711a3d9b1bd Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 29 Oct 2025 15:50:10 +0800 Subject: [PATCH 5/9] fix(executor): only apply thinking config to supported models --- .../runtime/executor/aistudio_executor.go | 8 +++- .../runtime/executor/gemini_cli_executor.go | 47 +++++++------------ internal/runtime/executor/gemini_executor.go | 24 +++++++--- internal/util/gemini_thinking.go | 16 +++++++ 4 files changed, 58 insertions(+), 37 deletions(-) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index cfc86d0e..396f8eaa 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -256,10 +256,14 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c from := opts.SourceFormat to := sdktranslator.FromString("gemini") payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } payload = util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) } - payload = disableGeminiThinkingConfig(payload, req.Model) + payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) payload = fixGeminiImageAspectRatio(req.Model, payload) metadataAction := "generateContent" if req.Metadata != nil { diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 3d7a539d..e00fd485 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -63,7 +63,11 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth to := sdktranslator.FromString("gemini-cli") budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if hasOverride { + if hasOverride && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) } basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) @@ -101,7 +105,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "model", attemptModel) } - payload = disableGeminiThinkingConfig(payload, attemptModel) + payload = util.StripThinkingConfigIfUnsupported(attemptModel, payload) tok, errTok := tokenSource.Token() if errTok != nil { @@ -196,7 +200,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut to := sdktranslator.FromString("gemini-cli") budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if hasOverride { + if hasOverride && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) } basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) @@ -223,7 +231,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut payload := append([]byte(nil), basePayload...) payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "model", attemptModel) - payload = disableGeminiThinkingConfig(payload, attemptModel) + payload = util.StripThinkingConfigIfUnsupported(attemptModel, payload) tok, errTok := tokenSource.Token() if errTok != nil { @@ -393,12 +401,16 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) for _, attemptModel := range models { payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) - if hasOverride { + if hasOverride && util.ModelSupportsThinking(attemptModel) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(attemptModel, *budgetOverride) + budgetOverride = &norm + } payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) } payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") - payload = disableGeminiThinkingConfig(payload, attemptModel) + payload = util.StripThinkingConfigIfUnsupported(attemptModel, payload) payload = fixGeminiCLIImageAspectRatio(attemptModel, payload) tok, errTok := tokenSource.Token() @@ -623,29 +635,6 @@ func cliPreviewFallbackOrder(model string) []string { } } -func disableGeminiThinkingConfig(body []byte, model string) []byte { - if !geminiModelDisallowsThinking(model) { - return body - } - - updated := deleteJSONField(body, "request.generationConfig.thinkingConfig") - updated = deleteJSONField(updated, "generationConfig.thinkingConfig") - return updated -} - -func geminiModelDisallowsThinking(model string) bool { - if model == "" { - return false - } - lower := strings.ToLower(model) - for _, marker := range []string{"gemini-2.5-flash-image-preview", "gemini-2.5-flash-image"} { - if strings.Contains(lower, marker) { - return true - } - } - return false -} - // setJSONField sets a top-level JSON field on a byte slice payload via sjson. func setJSONField(body []byte, key, value string) []byte { if key == "" { diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index e3008cef..deba86fb 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -78,10 +78,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r from := opts.SourceFormat to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = disableGeminiThinkingConfig(body, req.Model) + body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) action := "generateContent" @@ -166,10 +170,14 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A from := opts.SourceFormat to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = disableGeminiThinkingConfig(body, req.Model) + body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent") @@ -269,10 +277,14 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut from := opts.SourceFormat to := sdktranslator.FromString("gemini") translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) } - translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model) + translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index 9403a8e4..33c9edcf 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -179,3 +179,19 @@ func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) { } return budgetPtr, includePtr, matched } + +// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body +// when the target model does not advertise Thinking capability. It cleans both +// standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net +// in case upstream injected thinking for an unsupported model. +func StripThinkingConfigIfUnsupported(model string, body []byte) []byte { + if ModelSupportsThinking(model) || len(body) == 0 { + return body + } + updated := body + // Gemini CLI path + updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig") + // Standard Gemini path + updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig") + return updated +} From 58d30369b4c4be9cfe8170f0545d3517434967c1 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 29 Oct 2025 16:28:57 +0800 Subject: [PATCH 6/9] fix(gemini-cli): correctly strip/normalize thinking config by model --- internal/runtime/executor/gemini_cli_executor.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index e00fd485..da108c3e 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -70,6 +70,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth } basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) } + basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) action := "generateContent" @@ -105,7 +106,6 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "model", attemptModel) } - payload = util.StripThinkingConfigIfUnsupported(attemptModel, payload) tok, errTok := tokenSource.Token() if errTok != nil { @@ -207,6 +207,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut } basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) } + basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id")) @@ -231,7 +232,6 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut payload := append([]byte(nil), basePayload...) payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "model", attemptModel) - payload = util.StripThinkingConfigIfUnsupported(attemptModel, payload) tok, errTok := tokenSource.Token() if errTok != nil { @@ -401,16 +401,16 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) for _, attemptModel := range models { payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) - if hasOverride && util.ModelSupportsThinking(attemptModel) { + if hasOverride && util.ModelSupportsThinking(req.Model) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(attemptModel, *budgetOverride) + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) budgetOverride = &norm } payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) } payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") - payload = util.StripThinkingConfigIfUnsupported(attemptModel, payload) + payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) payload = fixGeminiCLIImageAspectRatio(attemptModel, payload) tok, errTok := tokenSource.Token() From 3ac5f05e8c84ed99d30a10eef0a674a28a1bef76 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 29 Oct 2025 17:30:35 +0800 Subject: [PATCH 7/9] feat(gemini): prefer official reasoning fields, add extra_body(cherry studio) fallback --- .../gemini-cli_openai_request.go | 17 ++++++++++++++++- .../chat-completions/gemini_openai_request.go | 18 +++++++++++++++++- .../gemini_openai-responses_request.go | 18 +++++++++++++++++- 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index 5da6c2c6..a5edbcae 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -33,8 +33,10 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetBytes(out, "model", modelName) // Reasoning effort -> thinkingBudget/include_thoughts + // Note: OpenAI official fields take precedence over extra_body.google.thinking_config re := gjson.GetBytes(rawJSON, "reasoning_effort") - if re.Exists() && util.ModelSupportsThinking(modelName) { + hasOfficialThinking := re.Exists() + if hasOfficialThinking && util.ModelSupportsThinking(modelName) { switch re.String() { case "none": out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts") @@ -52,6 +54,19 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo } } + // Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent) + if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { + if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { + if v := tc.Get("thinking_budget"); v.Exists() { + budget := util.NormalizeThinkingBudget(modelName, int(v.Int())) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) + } + if v := tc.Get("include_thoughts"); v.Exists() { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) + } + } + } + // Temperature/top_p/top_k if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number { out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num) diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index f89ec16a..b842569d 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -33,8 +33,10 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetBytes(out, "model", modelName) // Reasoning effort -> thinkingBudget/include_thoughts + // Note: OpenAI official fields take precedence over extra_body.google.thinking_config re := gjson.GetBytes(rawJSON, "reasoning_effort") - if re.Exists() && util.ModelSupportsThinking(modelName) { + hasOfficialThinking := re.Exists() + if hasOfficialThinking && util.ModelSupportsThinking(modelName) { switch re.String() { case "none": out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts") @@ -52,6 +54,20 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) } } + // Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent) + if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { + if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { + if v := tc.Get("thinking_budget"); v.Exists() { + // Normalize budget to model range + budget := util.NormalizeThinkingBudget(modelName, int(v.Int())) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget) + } + if v := tc.Get("include_thoughts"); v.Exists() { + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) + } + } + } + // Temperature/top_p/top_k if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number { out, _ = sjson.SetBytes(out, "generationConfig.temperature", tr.Num) diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index b54c2730..7f14280d 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -243,7 +243,10 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences) } - if reasoningEffort := root.Get("reasoning.effort"); reasoningEffort.Exists() && util.ModelSupportsThinking(modelName) { + // OpenAI official reasoning fields take precedence + hasOfficialThinking := root.Get("reasoning.effort").Exists() + if hasOfficialThinking && util.ModelSupportsThinking(modelName) { + reasoningEffort := root.Get("reasoning.effort") switch reasoningEffort.String() { case "none": out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false) @@ -262,5 +265,18 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) } } + + // Cherry Studio extension (applies only when official fields are missing) + if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { + if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { + if v := tc.Get("thinking_budget"); v.Exists() { + budget := util.NormalizeThinkingBudget(modelName, int(v.Int())) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) + } + if v := tc.Get("include_thoughts"); v.Exists() { + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) + } + } + } return []byte(out) } From 5cb378256b48521c674640874067ad5400068634 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 29 Oct 2025 18:34:23 +0800 Subject: [PATCH 8/9] feat(gemini-translators): set include_thoughts when mapping thinking --- .../claude/gemini-cli_claude_request.go | 3 ++- .../gemini-cli_openai_request.go | 16 ++++++++++++++-- .../gemini/claude/gemini_claude_request.go | 3 ++- .../chat-completions/gemini_openai_request.go | 16 ++++++++++++++-- .../gemini_openai-responses_request.go | 17 +++++++++++++++-- 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go index e4801d08..1699f018 100644 --- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go +++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go @@ -152,13 +152,14 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] out, _ = sjson.SetRaw(out, "request.tools", string(b)) } - // Map Anthropic thinking -> Gemini thinkingBudget when type==enabled + // Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) { if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { budget := int(b.Int()) budget = util.NormalizeThinkingBudget(modelName, budget) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } } } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index a5edbcae..8369f2b4 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -43,26 +43,38 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0) case "auto": out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "low": out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "medium": out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "high": out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } } // Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent) if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { + var setBudget bool + var normalized int if v := tc.Get("thinking_budget"); v.Exists() { - budget := util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) + normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized) + setBudget = true } if v := tc.Get("include_thoughts"); v.Exists() { out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) + } else if setBudget { + if normalized != 0 { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) + } } } } diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go index e9d79276..9b5a1671 100644 --- a/internal/translator/gemini/claude/gemini_claude_request.go +++ b/internal/translator/gemini/claude/gemini_claude_request.go @@ -145,13 +145,14 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetRaw(out, "tools", string(b)) } - // Map Anthropic thinking -> Gemini thinkingBudget when enabled + // Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) { if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { budget := int(b.Int()) budget = util.NormalizeThinkingBudget(modelName, budget) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) } } } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index b842569d..60e3042a 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -43,27 +43,39 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 0) case "auto": out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) case "low": out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) case "medium": out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) case "high": out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) } } // Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent) if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { + var setBudget bool + var normalized int if v := tc.Get("thinking_budget"); v.Exists() { // Normalize budget to model range - budget := util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget) + normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized) + setBudget = true } if v := tc.Get("include_thoughts"); v.Exists() { out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) + } else if setBudget { + if normalized != 0 { + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) + } } } } diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 7f14280d..051eb2d5 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -253,28 +253,41 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0) case "auto": out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "minimal": out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "low": out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "medium": out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "high": out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) } } // Cherry Studio extension (applies only when official fields are missing) if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { + var setBudget bool + var normalized int if v := tc.Get("thinking_budget"); v.Exists() { - budget := util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) + normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized) + setBudget = true } if v := tc.Get("include_thoughts"); v.Exists() { out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) + } else if setBudget { + if normalized != 0 { + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) + } } } } From 1bbbd16df67abb352ff33e40b4d9b5c64036726a Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Wed, 29 Oct 2025 19:18:40 +0800 Subject: [PATCH 9/9] chore(logging): clarify 429 rate-limit retries in Gemini executor --- internal/runtime/executor/gemini_cli_executor.go | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index da108c3e..457d579b 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -97,7 +97,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth var lastStatus int var lastBody []byte - for _, attemptModel := range models { + for idx, attemptModel := range models { payload := append([]byte(nil), basePayload...) if action == "countTokens" { payload = deleteJSONField(payload, "project") @@ -170,7 +170,11 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth lastBody = append([]byte(nil), data...) log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data)) if httpResp.StatusCode == 429 { - log.Debugf("gemini cli executor: rate limited, retrying with next model") + if idx+1 < len(models) { + log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1]) + } else { + log.Debug("gemini cli executor: rate limited, no additional fallback model") + } continue } @@ -228,7 +232,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut var lastStatus int var lastBody []byte - for _, attemptModel := range models { + for idx, attemptModel := range models { payload := append([]byte(nil), basePayload...) payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "model", attemptModel) @@ -290,7 +294,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut lastBody = append([]byte(nil), data...) log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data)) if httpResp.StatusCode == 429 { - log.Debugf("gemini cli executor: rate limited, retrying with next model") + if idx+1 < len(models) { + log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1]) + } else { + log.Debug("gemini cli executor: rate limited, no additional fallback model") + } continue } err = statusErr{code: httpResp.StatusCode, msg: string(data)}