diff --git a/.github/pull.yml b/.github/pull.yml new file mode 100644 index 00000000..12471f9d --- /dev/null +++ b/.github/pull.yml @@ -0,0 +1,2 @@ +version: "1" +rules: [] diff --git a/USING_WITH_FACTORY_AND_AMP.md b/USING_WITH_FACTORY_AND_AMP.md new file mode 100644 index 00000000..8c93c813 --- /dev/null +++ b/USING_WITH_FACTORY_AND_AMP.md @@ -0,0 +1,155 @@ +# Using Factory CLI (Droid) and Amp CLI with CLIProxyAPI + +## ⚠️ Important Update + +**This fork has been merged upstream!** All Amp CLI integration features developed in this fork have been accepted and merged into the official [router-for-me/CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI) repository. + +**Please use the upstream repository for the latest features, updates, and support:** + +👉 **[github.com/router-for-me/CLIProxyAPI](https://github.com/router-for-me/CLIProxyAPI)** + +This document is maintained solely for legacy link preservation from previous social media posts and shared documentation. + +--- + +## Official Documentation + +### Amp CLI Integration + +For complete instructions on using Amp CLI with CLIProxyAPI, see the official documentation: + +📖 **[Amp CLI Integration Guide](https://github.com/router-for-me/CLIProxyAPI/blob/main/docs/amp-cli-integration.md)** + +This guide covers: +- OAuth setup for Gemini Pro/Ultra, ChatGPT Plus/Pro, and Claude Pro/Max subscriptions +- Configuration for Amp CLI and Amp IDE extensions +- Provider routing and management endpoints +- Troubleshooting and best practices + +### Factory CLI (Droid) Integration + +For instructions on using Factory AI's Droid CLI with CLIProxyAPI, see: + +📖 **[Factory Droid Documentation](https://help.router-for.me/agent-client/droid.html)** + +--- + +## Quick Reference: Factory CLI Custom Models + +For quick reference, here's an example `~/.factory/config.json` configuration for using CLIProxyAPI with Factory CLI: + +```json +{ + "custom_models": [ + { + "model_display_name": "Claude Haiku 4.5 [Proxy]", + "model": "claude-haiku-4-5-20251001", + "base_url": "http://localhost:8317", + "api_key": "dummy-not-used", + "provider": "anthropic" + }, + { + "model_display_name": "Claude Sonnet 4.5 [Proxy]", + "model": "claude-sonnet-4-5-20250929", + "base_url": "http://localhost:8317", + "api_key": "dummy-not-used", + "provider": "anthropic" + }, + { + "model_display_name": "Claude Opus 4.1 [Proxy]", + "model": "claude-opus-4-1-20250805", + "base_url": "http://localhost:8317", + "api_key": "dummy-not-used", + "provider": "anthropic" + }, + { + "model_display_name": "GPT-5.1 Low [Proxy]", + "model": "gpt-5.1-low", + "base_url": "http://localhost:8317/v1", + "api_key": "dummy-not-used", + "provider": "openai" + }, + { + "model_display_name": "GPT-5.1 Medium [Proxy]", + "model": "gpt-5.1-medium", + "base_url": "http://localhost:8317/v1", + "api_key": "dummy-not-used", + "provider": "openai" + }, + { + "model_display_name": "GPT-5.1 High [Proxy]", + "model": "gpt-5.1-high", + "base_url": "http://localhost:8317/v1", + "api_key": "dummy-not-used", + "provider": "openai" + }, + { + "model_display_name": "GPT-5.1 Codex Low [Proxy]", + "model": "gpt-5.1-codex-low", + "base_url": "http://localhost:8317/v1", + "api_key": "dummy-not-used", + "provider": "openai" + }, + { + "model_display_name": "GPT-5.1 Codex Medium [Proxy]", + "model": "gpt-5.1-codex-medium", + "base_url": "http://localhost:8317/v1", + "api_key": "dummy-not-used", + "provider": "openai" + }, + { + "model_display_name": "GPT-5.1 Codex High [Proxy]", + "model": "gpt-5.1-codex-high", + "base_url": "http://localhost:8317/v1", + "api_key": "dummy-not-used", + "provider": "openai" + }, + { + "model_display_name": "GPT-5.1 Codex Mini Medium [Proxy]", + "model": "gpt-5.1-codex-mini-medium", + "base_url": "http://localhost:8317/v1", + "api_key": "dummy-not-used", + "provider": "openai" + }, + { + "model_display_name": "GPT-5.1 Codex Mini High [Proxy]", + "model": "gpt-5.1-codex-mini-high", + "base_url": "http://localhost:8317/v1", + "api_key": "dummy-not-used", + "provider": "openai" + }, + { + "model_display_name": "Gemini 3 Pro Preview [Proxy]", + "model": "gemini-3-pro-preview", + "base_url": "http://localhost:8317/v1", + "api_key": "dummy-not-used", + "provider": "openai" + } + ] +} +``` + +### Key Points + +- **`base_url`**: Use `http://localhost:8317` for Anthropic models, `http://localhost:8317/v1` for OpenAI/generic models +- **`api_key`**: Use `"dummy-not-used"` when OAuth is configured via CLIProxyAPI +- **`provider`**: Set to `"anthropic"` for Claude models, `"openai"` for GPT/Gemini models + +--- + +## Installation + +Install the official CLIProxyAPI from the upstream repository: + +```bash +git clone https://github.com/router-for-me/CLIProxyAPI.git +cd CLIProxyAPI +go build -o cli-proxy-api ./cmd/server +``` + +Or via Homebrew (macOS/Linux): + +```bash +brew install cliproxyapi +brew services start cliproxyapi +``` diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index d44efc06..20f9b0a0 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -160,7 +160,7 @@ func GetGeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, }, { ID: "gemini-3-pro-image-preview", @@ -175,7 +175,7 @@ func GetGeminiModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, }, } } @@ -240,7 +240,7 @@ func GetGeminiVertexModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, }, { ID: "gemini-3-flash-preview", @@ -255,7 +255,7 @@ func GetGeminiVertexModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, }, { ID: "gemini-3-pro-image-preview", @@ -270,7 +270,7 @@ func GetGeminiVertexModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, }, } } @@ -336,7 +336,7 @@ func GetGeminiCLIModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, }, { ID: "gemini-3-flash-preview", @@ -351,7 +351,7 @@ func GetGeminiCLIModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, }, } } @@ -417,7 +417,7 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, }, { ID: "gemini-3-flash-preview", @@ -432,7 +432,7 @@ func GetAIStudioModels() []*ModelInfo { InputTokenLimit: 1048576, OutputTokenLimit: 65536, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, - Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, }, { ID: "gemini-pro-latest", @@ -743,8 +743,9 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig { "gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"}, "gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"}, "gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"}, - "gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-preview"}, - "gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-image-preview"}, + "gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"}, + "gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"}, + "gemini-3-flash-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"}, "gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, "gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, } diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index ada0af39..cac23c87 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -323,8 +323,9 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c to := sdktranslator.FromString("gemini") payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model) + payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload) payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload) - payload = util.ConvertThinkingLevelToBudget(payload) + payload = util.ConvertThinkingLevelToBudget(payload, req.Model) payload = util.NormalizeGeminiThinkingBudget(req.Model, payload) payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) payload = fixGeminiImageAspectRatio(req.Model, payload) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 8babaf60..8b4e37ee 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -90,6 +90,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) translated = normalizeAntigravityThinking(req.Model, translated) @@ -183,6 +184,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) translated = normalizeAntigravityThinking(req.Model, translated) @@ -515,6 +517,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) translated = normalizeAntigravityThinking(req.Model, translated) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index b3acb50f..b171041a 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -79,6 +79,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) @@ -217,6 +218,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) @@ -418,6 +420,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. for _, attemptModel := range models { payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model) + payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload) payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") payload = deleteJSONField(payload, "request.safetySettings") diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index 9e349d99..af244b60 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -1,6 +1,7 @@ package util import ( + "regexp" "strings" "github.com/tidwall/gjson" @@ -13,6 +14,44 @@ const ( GeminiOriginalModelMetadataKey = "gemini_original_model" ) +// Gemini model family detection patterns +var ( + gemini3Pattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]`) + gemini3ProPattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]pro`) + gemini3FlashPattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]flash`) + gemini25Pattern = regexp.MustCompile(`(?i)^gemini[_-]?2\.5[_-]`) +) + +// IsGemini3Model returns true if the model is a Gemini 3 family model. +// Gemini 3 models should use thinkingLevel (string) instead of thinkingBudget (number). +func IsGemini3Model(model string) bool { + return gemini3Pattern.MatchString(model) +} + +// IsGemini3ProModel returns true if the model is a Gemini 3 Pro variant. +// Gemini 3 Pro supports thinkingLevel: "low", "high" (default: "high") +func IsGemini3ProModel(model string) bool { + return gemini3ProPattern.MatchString(model) +} + +// IsGemini3FlashModel returns true if the model is a Gemini 3 Flash variant. +// Gemini 3 Flash supports thinkingLevel: "minimal", "low", "medium", "high" (default: "high") +func IsGemini3FlashModel(model string) bool { + return gemini3FlashPattern.MatchString(model) +} + +// IsGemini25Model returns true if the model is a Gemini 2.5 family model. +// Gemini 2.5 models should use thinkingBudget (number). +func IsGemini25Model(model string) bool { + return gemini25Pattern.MatchString(model) +} + +// Gemini3ProThinkingLevels are the valid thinkingLevel values for Gemini 3 Pro models. +var Gemini3ProThinkingLevels = []string{"low", "high"} + +// Gemini3FlashThinkingLevels are the valid thinkingLevel values for Gemini 3 Flash models. +var Gemini3FlashThinkingLevels = []string{"minimal", "low", "medium", "high"} + func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte { if budget == nil && includeThoughts == nil { return body @@ -69,10 +108,141 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo return updated } +// ApplyGeminiThinkingLevel applies thinkingLevel config for Gemini 3 models. +// For standard Gemini API format (generationConfig.thinkingConfig path). +// Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget. +func ApplyGeminiThinkingLevel(body []byte, level string, includeThoughts *bool) []byte { + if level == "" && includeThoughts == nil { + return body + } + updated := body + if level != "" { + valuePath := "generationConfig.thinkingConfig.thinkingLevel" + rewritten, err := sjson.SetBytes(updated, valuePath, level) + if err == nil { + updated = rewritten + } + } + // Default to including thoughts when a level is set but no explicit include flag is provided. + incl := includeThoughts + if incl == nil && level != "" { + defaultInclude := true + incl = &defaultInclude + } + if incl != nil { + valuePath := "generationConfig.thinkingConfig.includeThoughts" + rewritten, err := sjson.SetBytes(updated, valuePath, *incl) + if err == nil { + updated = rewritten + } + } + return updated +} + +// ApplyGeminiCLIThinkingLevel applies thinkingLevel config for Gemini 3 models. +// For Gemini CLI API format (request.generationConfig.thinkingConfig path). +// Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget. +func ApplyGeminiCLIThinkingLevel(body []byte, level string, includeThoughts *bool) []byte { + if level == "" && includeThoughts == nil { + return body + } + updated := body + if level != "" { + valuePath := "request.generationConfig.thinkingConfig.thinkingLevel" + rewritten, err := sjson.SetBytes(updated, valuePath, level) + if err == nil { + updated = rewritten + } + } + // Default to including thoughts when a level is set but no explicit include flag is provided. + incl := includeThoughts + if incl == nil && level != "" { + defaultInclude := true + incl = &defaultInclude + } + if incl != nil { + valuePath := "request.generationConfig.thinkingConfig.includeThoughts" + rewritten, err := sjson.SetBytes(updated, valuePath, *incl) + if err == nil { + updated = rewritten + } + } + return updated +} + +// ValidateGemini3ThinkingLevel validates that the thinkingLevel is valid for the Gemini 3 model variant. +// Returns the validated level (normalized to lowercase) and true if valid, or empty string and false if invalid. +func ValidateGemini3ThinkingLevel(model, level string) (string, bool) { + if level == "" { + return "", false + } + normalized := strings.ToLower(strings.TrimSpace(level)) + + var validLevels []string + if IsGemini3ProModel(model) { + validLevels = Gemini3ProThinkingLevels + } else if IsGemini3FlashModel(model) { + validLevels = Gemini3FlashThinkingLevels + } else if IsGemini3Model(model) { + // Unknown Gemini 3 variant - allow all levels as fallback + validLevels = Gemini3FlashThinkingLevels + } else { + return "", false + } + + for _, valid := range validLevels { + if normalized == valid { + return normalized, true + } + } + return "", false +} + +// ThinkingBudgetToGemini3Level converts a thinkingBudget to a thinkingLevel for Gemini 3 models. +// This provides backward compatibility when thinkingBudget is provided for Gemini 3 models. +// Returns the appropriate thinkingLevel and true if conversion is possible. +func ThinkingBudgetToGemini3Level(model string, budget int) (string, bool) { + if !IsGemini3Model(model) { + return "", false + } + + // Map budget to level based on Google's documentation + // Gemini 3 Pro: "low", "high" (default: "high") + // Gemini 3 Flash: "minimal", "low", "medium", "high" (default: "high") + switch { + case budget == -1: + // Dynamic budget maps to "high" (API default) + return "high", true + case budget == 0: + // Zero budget - Gemini 3 doesn't support disabling thinking + // Map to lowest available level + if IsGemini3FlashModel(model) { + return "minimal", true + } + return "low", true + case budget > 0 && budget <= 512: + if IsGemini3FlashModel(model) { + return "minimal", true + } + return "low", true + case budget <= 1024: + return "low", true + case budget <= 8192: + if IsGemini3FlashModel(model) { + return "medium", true + } + return "low", true // Pro doesn't have medium, use low + default: + return "high", true + } +} + // modelsWithDefaultThinking lists models that should have thinking enabled by default // when no explicit thinkingConfig is provided. var modelsWithDefaultThinking = map[string]bool{ - "gemini-3-pro-preview": true, + "gemini-3-pro-preview": true, + "gemini-3-pro-image-preview": true, + "gemini-3-flash-preview": true, } // ModelHasDefaultThinking returns true if the model should have thinking enabled by default. @@ -83,6 +253,7 @@ func ModelHasDefaultThinking(model string) bool { // ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it. // For standard Gemini API format (generationConfig.thinkingConfig path). // Returns the modified body if thinkingConfig was added, otherwise returns the original. +// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation. func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte { if !ModelHasDefaultThinking(model) { return body @@ -90,14 +261,59 @@ func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte { if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() { return body } + // Gemini 3 models use thinkingLevel instead of thinkingBudget + if IsGemini3Model(model) { + // Don't set a default - let the API use its dynamic default ("high") + // Only set includeThoughts + updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.includeThoughts", true) + return updated + } + // Gemini 2.5 and other models use thinkingBudget updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1) updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true) return updated } +// ApplyGemini3ThinkingLevelFromMetadata applies thinkingLevel from metadata for Gemini 3 models. +// For standard Gemini API format (generationConfig.thinkingConfig path). +// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)). +func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte { + if !IsGemini3Model(model) { + return body + } + effort, ok := ReasoningEffortFromMetadata(metadata) + if !ok || effort == "" { + return body + } + // Validate and apply the thinkingLevel + if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid { + return ApplyGeminiThinkingLevel(body, level, nil) + } + return body +} + +// ApplyGemini3ThinkingLevelFromMetadataCLI applies thinkingLevel from metadata for Gemini 3 models. +// For Gemini CLI API format (request.generationConfig.thinkingConfig path). +// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)). +func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte { + if !IsGemini3Model(model) { + return body + } + effort, ok := ReasoningEffortFromMetadata(metadata) + if !ok || effort == "" { + return body + } + // Validate and apply the thinkingLevel + if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid { + return ApplyGeminiCLIThinkingLevel(body, level, nil) + } + return body +} + // ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it. // For Gemini CLI API format (request.generationConfig.thinkingConfig path). // Returns the modified body if thinkingConfig was added, otherwise returns the original. +// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation. func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte { if !ModelHasDefaultThinking(model) { return body @@ -105,6 +321,14 @@ func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte { if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() { return body } + // Gemini 3 models use thinkingLevel instead of thinkingBudget + if IsGemini3Model(model) { + // Don't set a default - let the API use its dynamic default ("high") + // Only set includeThoughts + updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true) + return updated + } + // Gemini 2.5 and other models use thinkingBudget updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1) updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true) return updated @@ -128,12 +352,29 @@ func StripThinkingConfigIfUnsupported(model string, body []byte) []byte { // NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini // request body (generationConfig.thinkingConfig.thinkingBudget path). +// For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation. func NormalizeGeminiThinkingBudget(model string, body []byte) []byte { const budgetPath = "generationConfig.thinkingConfig.thinkingBudget" + const levelPath = "generationConfig.thinkingConfig.thinkingLevel" + budget := gjson.GetBytes(body, budgetPath) if !budget.Exists() { return body } + + // For Gemini 3 models, convert thinkingBudget to thinkingLevel + if IsGemini3Model(model) { + if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok { + updated, _ := sjson.SetBytes(body, levelPath, level) + updated, _ = sjson.DeleteBytes(updated, budgetPath) + return updated + } + // If conversion fails, just remove the budget (let API use default) + updated, _ := sjson.DeleteBytes(body, budgetPath) + return updated + } + + // For Gemini 2.5 and other models, normalize the budget value normalized := NormalizeThinkingBudget(model, int(budget.Int())) updated, _ := sjson.SetBytes(body, budgetPath, normalized) return updated @@ -141,12 +382,29 @@ func NormalizeGeminiThinkingBudget(model string, body []byte) []byte { // NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI // request body (request.generationConfig.thinkingConfig.thinkingBudget path). +// For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation. func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte { const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget" + const levelPath = "request.generationConfig.thinkingConfig.thinkingLevel" + budget := gjson.GetBytes(body, budgetPath) if !budget.Exists() { return body } + + // For Gemini 3 models, convert thinkingBudget to thinkingLevel + if IsGemini3Model(model) { + if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok { + updated, _ := sjson.SetBytes(body, levelPath, level) + updated, _ = sjson.DeleteBytes(updated, budgetPath) + return updated + } + // If conversion fails, just remove the budget (let API use default) + updated, _ := sjson.DeleteBytes(body, budgetPath) + return updated + } + + // For Gemini 2.5 and other models, normalize the budget value normalized := NormalizeThinkingBudget(model, int(budget.Int())) updated, _ := sjson.SetBytes(body, budgetPath, normalized) return updated @@ -218,34 +476,42 @@ func ApplyReasoningEffortToGeminiCLI(body []byte, effort string) []byte { } // ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel" -// and converts it to "thinkingBudget". -// "high" -> 32768 -// "low" -> 128 -// It removes "thinkingLevel" after conversion. -func ConvertThinkingLevelToBudget(body []byte) []byte { +// and converts it to "thinkingBudget" for Gemini 2.5 models. +// For Gemini 3 models, preserves thinkingLevel as-is (does not convert). +// Mappings for Gemini 2.5: +// - "high" -> 32768 +// - "medium" -> 8192 +// - "low" -> 1024 +// - "minimal" -> 512 +// +// It removes "thinkingLevel" after conversion (for Gemini 2.5 only). +func ConvertThinkingLevelToBudget(body []byte, model string) []byte { levelPath := "generationConfig.thinkingConfig.thinkingLevel" res := gjson.GetBytes(body, levelPath) if !res.Exists() { return body } + // For Gemini 3 models, preserve thinkingLevel - don't convert to budget + if IsGemini3Model(model) { + return body + } + level := strings.ToLower(res.String()) var budget int switch level { case "high": budget = 32768 + case "medium": + budget = 8192 case "low": - budget = 128 + budget = 1024 + case "minimal": + budget = 512 default: - // If unknown level, we might just leave it or default. - // User only specified high and low. We'll assume we shouldn't touch it if it's something else, - // or maybe we should just remove the invalid level? - // For safety adhering to strict instructions: "If high... if low...". - // If it's something else, the upstream might fail anyway if we leave it, - // but let's just delete the level if we processed it. - // Actually, let's check if we need to do anything for other values. - // For now, only handle high/low. - return body + // Unknown level - remove it and let the API use defaults + updated, _ := sjson.DeleteBytes(body, levelPath) + return updated } // Set budget @@ -262,3 +528,50 @@ func ConvertThinkingLevelToBudget(body []byte) []byte { } return updated } + +// ConvertThinkingLevelToBudgetCLI checks for "request.generationConfig.thinkingConfig.thinkingLevel" +// and converts it to "thinkingBudget" for Gemini 2.5 models. +// For Gemini 3 models, preserves thinkingLevel as-is (does not convert). +func ConvertThinkingLevelToBudgetCLI(body []byte, model string) []byte { + levelPath := "request.generationConfig.thinkingConfig.thinkingLevel" + res := gjson.GetBytes(body, levelPath) + if !res.Exists() { + return body + } + + // For Gemini 3 models, preserve thinkingLevel - don't convert to budget + if IsGemini3Model(model) { + return body + } + + level := strings.ToLower(res.String()) + var budget int + switch level { + case "high": + budget = 32768 + case "medium": + budget = 8192 + case "low": + budget = 1024 + case "minimal": + budget = 512 + default: + // Unknown level - remove it and let the API use defaults + updated, _ := sjson.DeleteBytes(body, levelPath) + return updated + } + + // Set budget + budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget" + updated, err := sjson.SetBytes(body, budgetPath, budget) + if err != nil { + return body + } + + // Remove level + updated, err = sjson.DeleteBytes(updated, levelPath) + if err != nil { + return body + } + return updated +} diff --git a/test/gemini3_thinking_level_test.go b/test/gemini3_thinking_level_test.go new file mode 100644 index 00000000..b26bcff3 --- /dev/null +++ b/test/gemini3_thinking_level_test.go @@ -0,0 +1,423 @@ +package test + +import ( + "fmt" + "testing" + "time" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/tidwall/gjson" +) + +// registerGemini3Models loads Gemini 3 models into the registry for testing. +func registerGemini3Models(t *testing.T) func() { + t.Helper() + reg := registry.GetGlobalRegistry() + uid := fmt.Sprintf("gemini3-test-%d", time.Now().UnixNano()) + reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels()) + reg.RegisterClient(uid+"-aistudio", "aistudio", registry.GetAIStudioModels()) + return func() { + reg.UnregisterClient(uid + "-gemini") + reg.UnregisterClient(uid + "-aistudio") + } +} + +func TestIsGemini3Model(t *testing.T) { + cases := []struct { + model string + expected bool + }{ + {"gemini-3-pro-preview", true}, + {"gemini-3-flash-preview", true}, + {"gemini_3_pro_preview", true}, + {"gemini-3-pro", true}, + {"gemini-3-flash", true}, + {"GEMINI-3-PRO-PREVIEW", true}, + {"gemini-2.5-pro", false}, + {"gemini-2.5-flash", false}, + {"gpt-5", false}, + {"claude-sonnet-4-5", false}, + {"", false}, + } + + for _, cs := range cases { + t.Run(cs.model, func(t *testing.T) { + got := util.IsGemini3Model(cs.model) + if got != cs.expected { + t.Fatalf("IsGemini3Model(%q) = %v, want %v", cs.model, got, cs.expected) + } + }) + } +} + +func TestIsGemini3ProModel(t *testing.T) { + cases := []struct { + model string + expected bool + }{ + {"gemini-3-pro-preview", true}, + {"gemini_3_pro_preview", true}, + {"gemini-3-pro", true}, + {"GEMINI-3-PRO-PREVIEW", true}, + {"gemini-3-flash-preview", false}, + {"gemini-3-flash", false}, + {"gemini-2.5-pro", false}, + {"", false}, + } + + for _, cs := range cases { + t.Run(cs.model, func(t *testing.T) { + got := util.IsGemini3ProModel(cs.model) + if got != cs.expected { + t.Fatalf("IsGemini3ProModel(%q) = %v, want %v", cs.model, got, cs.expected) + } + }) + } +} + +func TestIsGemini3FlashModel(t *testing.T) { + cases := []struct { + model string + expected bool + }{ + {"gemini-3-flash-preview", true}, + {"gemini_3_flash_preview", true}, + {"gemini-3-flash", true}, + {"GEMINI-3-FLASH-PREVIEW", true}, + {"gemini-3-pro-preview", false}, + {"gemini-3-pro", false}, + {"gemini-2.5-flash", false}, + {"", false}, + } + + for _, cs := range cases { + t.Run(cs.model, func(t *testing.T) { + got := util.IsGemini3FlashModel(cs.model) + if got != cs.expected { + t.Fatalf("IsGemini3FlashModel(%q) = %v, want %v", cs.model, got, cs.expected) + } + }) + } +} + +func TestValidateGemini3ThinkingLevel(t *testing.T) { + cases := []struct { + name string + model string + level string + wantOK bool + wantVal string + }{ + // Gemini 3 Pro: supports "low", "high" + {"pro-low", "gemini-3-pro-preview", "low", true, "low"}, + {"pro-high", "gemini-3-pro-preview", "high", true, "high"}, + {"pro-minimal-invalid", "gemini-3-pro-preview", "minimal", false, ""}, + {"pro-medium-invalid", "gemini-3-pro-preview", "medium", false, ""}, + + // Gemini 3 Flash: supports "minimal", "low", "medium", "high" + {"flash-minimal", "gemini-3-flash-preview", "minimal", true, "minimal"}, + {"flash-low", "gemini-3-flash-preview", "low", true, "low"}, + {"flash-medium", "gemini-3-flash-preview", "medium", true, "medium"}, + {"flash-high", "gemini-3-flash-preview", "high", true, "high"}, + + // Case insensitivity + {"flash-LOW-case", "gemini-3-flash-preview", "LOW", true, "low"}, + {"flash-High-case", "gemini-3-flash-preview", "High", true, "high"}, + {"pro-HIGH-case", "gemini-3-pro-preview", "HIGH", true, "high"}, + + // Invalid levels + {"flash-invalid", "gemini-3-flash-preview", "xhigh", false, ""}, + {"flash-invalid-auto", "gemini-3-flash-preview", "auto", false, ""}, + {"flash-empty", "gemini-3-flash-preview", "", false, ""}, + + // Non-Gemini 3 models + {"non-gemini3", "gemini-2.5-pro", "high", false, ""}, + {"gpt5", "gpt-5", "high", false, ""}, + } + + for _, cs := range cases { + t.Run(cs.name, func(t *testing.T) { + got, ok := util.ValidateGemini3ThinkingLevel(cs.model, cs.level) + if ok != cs.wantOK { + t.Fatalf("ValidateGemini3ThinkingLevel(%q, %q) ok = %v, want %v", cs.model, cs.level, ok, cs.wantOK) + } + if got != cs.wantVal { + t.Fatalf("ValidateGemini3ThinkingLevel(%q, %q) = %q, want %q", cs.model, cs.level, got, cs.wantVal) + } + }) + } +} + +func TestThinkingBudgetToGemini3Level(t *testing.T) { + cases := []struct { + name string + model string + budget int + wantOK bool + wantVal string + }{ + // Gemini 3 Pro: maps to "low" or "high" + {"pro-dynamic", "gemini-3-pro-preview", -1, true, "high"}, + {"pro-zero", "gemini-3-pro-preview", 0, true, "low"}, + {"pro-small", "gemini-3-pro-preview", 1000, true, "low"}, + {"pro-medium", "gemini-3-pro-preview", 8000, true, "low"}, + {"pro-large", "gemini-3-pro-preview", 20000, true, "high"}, + {"pro-huge", "gemini-3-pro-preview", 50000, true, "high"}, + + // Gemini 3 Flash: maps to "minimal", "low", "medium", "high" + {"flash-dynamic", "gemini-3-flash-preview", -1, true, "high"}, + {"flash-zero", "gemini-3-flash-preview", 0, true, "minimal"}, + {"flash-tiny", "gemini-3-flash-preview", 500, true, "minimal"}, + {"flash-small", "gemini-3-flash-preview", 1000, true, "low"}, + {"flash-medium-val", "gemini-3-flash-preview", 8000, true, "medium"}, + {"flash-large", "gemini-3-flash-preview", 20000, true, "high"}, + {"flash-huge", "gemini-3-flash-preview", 50000, true, "high"}, + + // Non-Gemini 3 models should return false + {"gemini25-budget", "gemini-2.5-pro", 8000, false, ""}, + {"gpt5-budget", "gpt-5", 8000, false, ""}, + } + + for _, cs := range cases { + t.Run(cs.name, func(t *testing.T) { + got, ok := util.ThinkingBudgetToGemini3Level(cs.model, cs.budget) + if ok != cs.wantOK { + t.Fatalf("ThinkingBudgetToGemini3Level(%q, %d) ok = %v, want %v", cs.model, cs.budget, ok, cs.wantOK) + } + if got != cs.wantVal { + t.Fatalf("ThinkingBudgetToGemini3Level(%q, %d) = %q, want %q", cs.model, cs.budget, got, cs.wantVal) + } + }) + } +} + +func TestApplyGemini3ThinkingLevelFromMetadata(t *testing.T) { + cleanup := registerGemini3Models(t) + defer cleanup() + + cases := []struct { + name string + model string + metadata map[string]any + inputBody string + wantLevel string + wantInclude bool + wantNoChange bool + }{ + { + name: "flash-minimal-from-suffix", + model: "gemini-3-flash-preview", + metadata: map[string]any{"reasoning_effort": "minimal"}, + inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`, + wantLevel: "minimal", + wantInclude: true, + }, + { + name: "flash-medium-from-suffix", + model: "gemini-3-flash-preview", + metadata: map[string]any{"reasoning_effort": "medium"}, + inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`, + wantLevel: "medium", + wantInclude: true, + }, + { + name: "pro-high-from-suffix", + model: "gemini-3-pro-preview", + metadata: map[string]any{"reasoning_effort": "high"}, + inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`, + wantLevel: "high", + wantInclude: true, + }, + { + name: "no-metadata-no-change", + model: "gemini-3-flash-preview", + metadata: nil, + inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`, + wantNoChange: true, + }, + { + name: "non-gemini3-no-change", + model: "gemini-2.5-pro", + metadata: map[string]any{"reasoning_effort": "high"}, + inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`, + wantNoChange: true, + }, + { + name: "invalid-level-no-change", + model: "gemini-3-flash-preview", + metadata: map[string]any{"reasoning_effort": "xhigh"}, + inputBody: `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`, + wantNoChange: true, + }, + } + + for _, cs := range cases { + t.Run(cs.name, func(t *testing.T) { + input := []byte(cs.inputBody) + result := util.ApplyGemini3ThinkingLevelFromMetadata(cs.model, cs.metadata, input) + + if cs.wantNoChange { + if string(result) != cs.inputBody { + t.Fatalf("expected no change, but got: %s", string(result)) + } + return + } + + level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel") + if !level.Exists() { + t.Fatalf("thinkingLevel not set in result: %s", string(result)) + } + if level.String() != cs.wantLevel { + t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel) + } + + include := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts") + if cs.wantInclude && (!include.Exists() || !include.Bool()) { + t.Fatalf("includeThoughts should be true, got: %s", string(result)) + } + }) + } +} + +func TestApplyGemini3ThinkingLevelFromMetadataCLI(t *testing.T) { + cleanup := registerGemini3Models(t) + defer cleanup() + + cases := []struct { + name string + model string + metadata map[string]any + inputBody string + wantLevel string + wantInclude bool + wantNoChange bool + }{ + { + name: "flash-minimal-from-suffix-cli", + model: "gemini-3-flash-preview", + metadata: map[string]any{"reasoning_effort": "minimal"}, + inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`, + wantLevel: "minimal", + wantInclude: true, + }, + { + name: "flash-low-from-suffix-cli", + model: "gemini-3-flash-preview", + metadata: map[string]any{"reasoning_effort": "low"}, + inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`, + wantLevel: "low", + wantInclude: true, + }, + { + name: "pro-low-from-suffix-cli", + model: "gemini-3-pro-preview", + metadata: map[string]any{"reasoning_effort": "low"}, + inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`, + wantLevel: "low", + wantInclude: true, + }, + { + name: "no-metadata-no-change-cli", + model: "gemini-3-flash-preview", + metadata: nil, + inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`, + wantNoChange: true, + }, + { + name: "non-gemini3-no-change-cli", + model: "gemini-2.5-pro", + metadata: map[string]any{"reasoning_effort": "high"}, + inputBody: `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}}`, + wantNoChange: true, + }, + } + + for _, cs := range cases { + t.Run(cs.name, func(t *testing.T) { + input := []byte(cs.inputBody) + result := util.ApplyGemini3ThinkingLevelFromMetadataCLI(cs.model, cs.metadata, input) + + if cs.wantNoChange { + if string(result) != cs.inputBody { + t.Fatalf("expected no change, but got: %s", string(result)) + } + return + } + + level := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel") + if !level.Exists() { + t.Fatalf("thinkingLevel not set in result: %s", string(result)) + } + if level.String() != cs.wantLevel { + t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel) + } + + include := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts") + if cs.wantInclude && (!include.Exists() || !include.Bool()) { + t.Fatalf("includeThoughts should be true, got: %s", string(result)) + } + }) + } +} + +func TestNormalizeGeminiThinkingBudget_Gemini3Conversion(t *testing.T) { + cleanup := registerGemini3Models(t) + defer cleanup() + + cases := []struct { + name string + model string + inputBody string + wantLevel string + wantBudget bool // if true, expect thinkingBudget instead of thinkingLevel + }{ + { + name: "gemini3-flash-budget-to-level", + model: "gemini-3-flash-preview", + inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8000}}}`, + wantLevel: "medium", + }, + { + name: "gemini3-pro-budget-to-level", + model: "gemini-3-pro-preview", + inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":20000}}}`, + wantLevel: "high", + }, + { + name: "gemini25-keeps-budget", + model: "gemini-2.5-pro", + inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8000}}}`, + wantBudget: true, + }, + } + + for _, cs := range cases { + t.Run(cs.name, func(t *testing.T) { + result := util.NormalizeGeminiThinkingBudget(cs.model, []byte(cs.inputBody)) + + if cs.wantBudget { + budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget") + if !budget.Exists() { + t.Fatalf("thinkingBudget should exist for non-Gemini3 model: %s", string(result)) + } + level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel") + if level.Exists() { + t.Fatalf("thinkingLevel should not exist for non-Gemini3 model: %s", string(result)) + } + } else { + level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel") + if !level.Exists() { + t.Fatalf("thinkingLevel should exist for Gemini3 model: %s", string(result)) + } + if level.String() != cs.wantLevel { + t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel) + } + budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget") + if budget.Exists() { + t.Fatalf("thinkingBudget should be removed for Gemini3 model: %s", string(result)) + } + } + }) + } +}