diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index ebc1a573..2536d27b 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -84,6 +84,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, }, { ID: "gemini-2.5-pro", @@ -98,6 +99,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "gemini-2.5-flash-lite", @@ -112,6 +114,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, }, { ID: "gemini-2.5-flash-image-preview", @@ -126,6 +129,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 8192, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + // image models don't support thinkingConfig; leave Thinking nil }, { ID: "gemini-2.5-flash-image", @@ -140,6 +144,7 @@ func GeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 8192, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + // image models don't support thinkingConfig; leave Thinking nil }, } } @@ -152,9 +157,8 @@ func GetGeminiCLIModels() []*ModelInfo { return GeminiModels() } // GetAIStudioModels returns the Gemini model definitions for AI Studio integrations func GetAIStudioModels() []*ModelInfo { - models := make([]*ModelInfo, 0, 8) - models = append(models, GeminiModels()...) - models = append(models, + base := GeminiModels() + return append(base, &ModelInfo{ ID: "gemini-pro-latest", Object: "model", @@ -168,6 +172,7 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, }, &ModelInfo{ ID: "gemini-flash-latest", @@ -182,6 +187,7 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, }, &ModelInfo{ ID: "gemini-flash-lite-latest", @@ -196,9 +202,9 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, + Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, }, ) - return models } // GetOpenAIModels returns the standard OpenAI model definitions diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go index e1223978..46ada713 100644 --- a/internal/registry/model_registry.go +++ b/internal/registry/model_registry.go @@ -45,6 +45,23 @@ type ModelInfo struct { MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` // SupportedParameters lists supported parameters SupportedParameters []string `json:"supported_parameters,omitempty"` + + // Thinking holds provider-specific reasoning/thinking budget capabilities. + // This is optional and currently used for Gemini thinking budget normalization. + Thinking *ThinkingSupport `json:"thinking,omitempty"` +} + +// ThinkingSupport describes a model family's supported internal reasoning budget range. +// Values are interpreted in provider-native token units. +type ThinkingSupport struct { + // Min is the minimum allowed thinking budget (inclusive). + Min int `json:"min,omitempty"` + // Max is the maximum allowed thinking budget (inclusive). + Max int `json:"max,omitempty"` + // ZeroAllowed indicates whether 0 is a valid value (to disable thinking). + ZeroAllowed bool `json:"zero_allowed,omitempty"` + // DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget). + DynamicAllowed bool `json:"dynamic_allowed,omitempty"` } // ModelRegistration tracks a model's availability @@ -652,6 +669,17 @@ func (r *ModelRegistry) GetModelProviders(modelID string) []string { return result } +// GetModelInfo returns the registered ModelInfo for the given model ID, if present. +// Returns nil if the model is unknown to the registry. +func (r *ModelRegistry) GetModelInfo(modelID string) *ModelInfo { + r.mutex.RLock() + defer r.mutex.RUnlock() + if reg, ok := r.models[modelID]; ok && reg != nil { + return reg.Info + } + return nil +} + // convertModelToMap converts ModelInfo to the appropriate format for different handler types func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) map[string]any { if model == nil { diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index cfc86d0e..396f8eaa 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -256,10 +256,14 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c from := opts.SourceFormat to := sdktranslator.FromString("gemini") payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } payload = util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) } - payload = disableGeminiThinkingConfig(payload, req.Model) + payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) payload = fixGeminiImageAspectRatio(req.Model, payload) metadataAction := "generateContent" if req.Metadata != nil { diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 3d7a539d..457d579b 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -63,9 +63,14 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth to := sdktranslator.FromString("gemini-cli") budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if hasOverride { + if hasOverride && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) } + basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) action := "generateContent" @@ -92,7 +97,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth var lastStatus int var lastBody []byte - for _, attemptModel := range models { + for idx, attemptModel := range models { payload := append([]byte(nil), basePayload...) if action == "countTokens" { payload = deleteJSONField(payload, "project") @@ -101,7 +106,6 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "model", attemptModel) } - payload = disableGeminiThinkingConfig(payload, attemptModel) tok, errTok := tokenSource.Token() if errTok != nil { @@ -166,7 +170,11 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth lastBody = append([]byte(nil), data...) log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data)) if httpResp.StatusCode == 429 { - log.Debugf("gemini cli executor: rate limited, retrying with next model") + if idx+1 < len(models) { + log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1]) + } else { + log.Debug("gemini cli executor: rate limited, no additional fallback model") + } continue } @@ -196,9 +204,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut to := sdktranslator.FromString("gemini-cli") budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if hasOverride { + if hasOverride && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) } + basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id")) @@ -219,11 +232,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut var lastStatus int var lastBody []byte - for _, attemptModel := range models { + for idx, attemptModel := range models { payload := append([]byte(nil), basePayload...) payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "model", attemptModel) - payload = disableGeminiThinkingConfig(payload, attemptModel) tok, errTok := tokenSource.Token() if errTok != nil { @@ -282,7 +294,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut lastBody = append([]byte(nil), data...) log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data)) if httpResp.StatusCode == 429 { - log.Debugf("gemini cli executor: rate limited, retrying with next model") + if idx+1 < len(models) { + log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1]) + } else { + log.Debug("gemini cli executor: rate limited, no additional fallback model") + } continue } err = statusErr{code: httpResp.StatusCode, msg: string(data)} @@ -393,12 +409,16 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) for _, attemptModel := range models { payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) - if hasOverride { + if hasOverride && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) } payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") - payload = disableGeminiThinkingConfig(payload, attemptModel) + payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) payload = fixGeminiCLIImageAspectRatio(attemptModel, payload) tok, errTok := tokenSource.Token() @@ -623,29 +643,6 @@ func cliPreviewFallbackOrder(model string) []string { } } -func disableGeminiThinkingConfig(body []byte, model string) []byte { - if !geminiModelDisallowsThinking(model) { - return body - } - - updated := deleteJSONField(body, "request.generationConfig.thinkingConfig") - updated = deleteJSONField(updated, "generationConfig.thinkingConfig") - return updated -} - -func geminiModelDisallowsThinking(model string) bool { - if model == "" { - return false - } - lower := strings.ToLower(model) - for _, marker := range []string{"gemini-2.5-flash-image-preview", "gemini-2.5-flash-image"} { - if strings.Contains(lower, marker) { - return true - } - } - return false -} - // setJSONField sets a top-level JSON field on a byte slice payload via sjson. func setJSONField(body []byte, key, value string) []byte { if key == "" { diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index e3008cef..deba86fb 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -78,10 +78,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r from := opts.SourceFormat to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = disableGeminiThinkingConfig(body, req.Model) + body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) action := "generateContent" @@ -166,10 +170,14 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A from := opts.SourceFormat to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = disableGeminiThinkingConfig(body, req.Model) + body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent") @@ -269,10 +277,14 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut from := opts.SourceFormat to := sdktranslator.FromString("gemini") translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { + if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + if budgetOverride != nil { + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + budgetOverride = &norm + } translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) } - translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model) + translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go index 229debee..1699f018 100644 --- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go +++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go @@ -11,6 +11,7 @@ import ( "strings" client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -136,7 +137,7 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] } // Build output Gemini CLI request JSON - out := `{"model":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}}` + out := `{"model":"","request":{"contents":[]}}` out, _ = sjson.Set(out, "model", modelName) if systemInstruction != nil { b, _ := json.Marshal(systemInstruction) @@ -151,21 +152,16 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] out, _ = sjson.SetRaw(out, "request.tools", string(b)) } - // Map reasoning and sampling configs - reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort") - if reasoningEffortResult.String() == "none" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", false) - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0) - } else if reasoningEffortResult.String() == "auto" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) - } else if reasoningEffortResult.String() == "low" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024) - } else if reasoningEffortResult.String() == "medium" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) - } else if reasoningEffortResult.String() == "high" { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576) - } else { - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + // Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled + if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) { + if t.Get("type").String() == "enabled" { + if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { + budget := int(b.Int()) + budget = util.NormalizeThinkingBudget(modelName, budget) + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true) + } + } } if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number { out, _ = sjson.Set(out, "request.generationConfig.temperature", v.Num) diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index dc850c49..8369f2b4 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -26,32 +26,57 @@ import ( // - []byte: The transformed request data in Gemini CLI API format func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bool) []byte { rawJSON := bytes.Clone(inputRawJSON) - // Base envelope - out := []byte(`{"project":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}},"model":"gemini-2.5-pro"}`) + // Base envelope (no default thinkingConfig) + out := []byte(`{"project":"","request":{"contents":[]},"model":"gemini-2.5-pro"}`) // Model out, _ = sjson.SetBytes(out, "model", modelName) // Reasoning effort -> thinkingBudget/include_thoughts + // Note: OpenAI official fields take precedence over extra_body.google.thinking_config re := gjson.GetBytes(rawJSON, "reasoning_effort") - if re.Exists() { + hasOfficialThinking := re.Exists() + if hasOfficialThinking && util.ModelSupportsThinking(modelName) { switch re.String() { case "none": out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts") out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0) case "auto": out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) + } + } + + // Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent) + if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { + if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { + var setBudget bool + var normalized int + if v := tc.Get("thinking_budget"); v.Exists() { + normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized) + setBudget = true + } + if v := tc.Get("include_thoughts"); v.Exists() { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) + } else if setBudget { + if normalized != 0 { + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) + } + } } - } else { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) } // Temperature/top_p/top_k diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go index 6a477dbd..9b5a1671 100644 --- a/internal/translator/gemini/claude/gemini_claude_request.go +++ b/internal/translator/gemini/claude/gemini_claude_request.go @@ -11,6 +11,7 @@ import ( "strings" client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -129,7 +130,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) } // Build output Gemini CLI request JSON - out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}` + out := `{"contents":[]}` out, _ = sjson.Set(out, "model", modelName) if systemInstruction != nil { b, _ := json.Marshal(systemInstruction) @@ -144,21 +145,16 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetRaw(out, "tools", string(b)) } - // Map reasoning and sampling configs - reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort") - if reasoningEffortResult.String() == "none" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false) - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0) - } else if reasoningEffortResult.String() == "auto" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) - } else if reasoningEffortResult.String() == "low" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) - } else if reasoningEffortResult.String() == "medium" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192) - } else if reasoningEffortResult.String() == "high" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576) - } else { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + // Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled + if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) { + if t.Get("type").String() == "enabled" { + if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { + budget := int(b.Int()) + budget = util.NormalizeThinkingBudget(modelName, budget) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) + } + } } if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number { out, _ = sjson.Set(out, "generationConfig.temperature", v.Num) diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 44cad7d2..60e3042a 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -26,32 +26,58 @@ import ( // - []byte: The transformed request data in Gemini API format func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte { rawJSON := bytes.Clone(inputRawJSON) - // Base envelope - out := []byte(`{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`) + // Base envelope (no default thinkingConfig) + out := []byte(`{"contents":[]}`) // Model out, _ = sjson.SetBytes(out, "model", modelName) // Reasoning effort -> thinkingBudget/include_thoughts + // Note: OpenAI official fields take precedence over extra_body.google.thinking_config re := gjson.GetBytes(rawJSON, "reasoning_effort") - if re.Exists() { + hasOfficialThinking := re.Exists() + if hasOfficialThinking && util.ModelSupportsThinking(modelName) { switch re.String() { case "none": out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts") out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 0) case "auto": out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 24576) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) + } + } + + // Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent) + if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { + if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { + var setBudget bool + var normalized int + if v := tc.Get("thinking_budget"); v.Exists() { + // Normalize budget to model range + normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized) + setBudget = true + } + if v := tc.Get("include_thoughts"); v.Exists() { + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) + } else if setBudget { + if normalized != 0 { + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) + } + } } - } else { - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) } // Temperature/top_p/top_k diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index beffb317..051eb2d5 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -4,6 +4,7 @@ import ( "bytes" "strings" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -15,8 +16,8 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte _ = modelName // Unused but required by interface _ = stream // Unused but required by interface - // Base Gemini API template - out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}` + // Base Gemini API template (do not include thinkingConfig by default) + out := `{"contents":[]}` root := gjson.ParseBytes(rawJSON) @@ -242,23 +243,52 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences) } - if reasoningEffort := root.Get("reasoning.effort"); reasoningEffort.Exists() { + // OpenAI official reasoning fields take precedence + hasOfficialThinking := root.Get("reasoning.effort").Exists() + if hasOfficialThinking && util.ModelSupportsThinking(modelName) { + reasoningEffort := root.Get("reasoning.effort") switch reasoningEffort.String() { case "none": out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0) case "auto": out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "minimal": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) + } + } + + // Cherry Studio extension (applies only when official fields are missing) + if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { + if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { + var setBudget bool + var normalized int + if v := tc.Get("thinking_budget"); v.Exists() { + normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized) + setBudget = true + } + if v := tc.Get("include_thoughts"); v.Exists() { + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) + } else if setBudget { + if normalized != 0 { + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) + } + } } } return []byte(out) diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index 9403a8e4..33c9edcf 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -179,3 +179,19 @@ func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) { } return budgetPtr, includePtr, matched } + +// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body +// when the target model does not advertise Thinking capability. It cleans both +// standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net +// in case upstream injected thinking for an unsupported model. +func StripThinkingConfigIfUnsupported(model string, body []byte) []byte { + if ModelSupportsThinking(model) || len(body) == 0 { + return body + } + updated := body + // Gemini CLI path + updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig") + // Standard Gemini path + updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig") + return updated +} diff --git a/internal/util/thinking.go b/internal/util/thinking.go new file mode 100644 index 00000000..c16b91cd --- /dev/null +++ b/internal/util/thinking.go @@ -0,0 +1,69 @@ +package util + +import ( + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" +) + +// ModelSupportsThinking reports whether the given model has Thinking capability +// according to the model registry metadata (provider-agnostic). +func ModelSupportsThinking(model string) bool { + if model == "" { + return false + } + if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil { + return info.Thinking != nil + } + return false +} + +// NormalizeThinkingBudget clamps the requested thinking budget to the +// supported range for the specified model using registry metadata only. +// If the model is unknown or has no Thinking metadata, returns the original budget. +// For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range +// or min (0 if zero is allowed and mid <= 0). +func NormalizeThinkingBudget(model string, budget int) int { + if budget == -1 { // dynamic + if found, min, max, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found { + if dynamicAllowed { + return -1 + } + mid := (min + max) / 2 + if mid <= 0 && zeroAllowed { + return 0 + } + if mid <= 0 { + return min + } + return mid + } + return -1 + } + if found, min, max, zeroAllowed, _ := thinkingRangeFromRegistry(model); found { + if budget == 0 { + if zeroAllowed { + return 0 + } + return min + } + if budget < min { + return min + } + if budget > max { + return max + } + return budget + } + return budget +} + +// thinkingRangeFromRegistry attempts to read thinking ranges from the model registry. +func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zeroAllowed bool, dynamicAllowed bool) { + if model == "" { + return false, 0, 0, false, false + } + info := registry.GetGlobalRegistry().GetModelInfo(model) + if info == nil || info.Thinking == nil { + return false, 0, 0, false, false + } + return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed +}