From e5312fb5a25f23183305c592602b668a81b992d0 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 16:54:13 +0800 Subject: [PATCH 1/9] feat(antigravity): support canonical names for antigravity models --- internal/registry/model_definitions.go | 13 ++++++++----- internal/runtime/executor/antigravity_executor.go | 9 +++++++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index c82c2b67..77015d14 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -991,16 +991,19 @@ func GetIFlowModels() []*ModelInfo { type AntigravityModelConfig struct { Thinking *ThinkingSupport MaxCompletionTokens int + Name string } // GetAntigravityModelConfig returns static configuration for antigravity models. // Keys use the ALIASED model names (after modelName2Alias conversion) for direct lookup. func GetAntigravityModelConfig() map[string]*AntigravityModelConfig { return map[string]*AntigravityModelConfig{ - "gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}}, - "gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}}, - "gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}}, - "gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, - "gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, + "gemini-2.5-flash": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"}, + "gemini-2.5-flash-lite": {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"}, + "gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"}, + "gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-preview"}, + "gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-image-preview"}, + "gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, + "gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, } } diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 730a32fb..052d4faf 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -373,9 +373,14 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c for originalName := range result.Map() { aliasName := modelName2Alias(originalName) if aliasName != "" { + cfg := modelConfig[aliasName] + modelName := aliasName + if cfg != nil && cfg.Name != "" { + modelName = cfg.Name + } modelInfo := ®istry.ModelInfo{ ID: aliasName, - Name: aliasName, + Name: modelName, Description: aliasName, DisplayName: aliasName, Version: aliasName, @@ -385,7 +390,7 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c Type: antigravityAuthType, } // Look up Thinking support from static config using alias name - if cfg, ok := modelConfig[aliasName]; ok { + if cfg != nil { if cfg.Thinking != nil { modelInfo.Thinking = cfg.Thinking } From da23ddb061a029bd51e321058183ada19f98cda9 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 17:34:15 +0800 Subject: [PATCH 2/9] fix(gemini): normalize model listing output --- sdk/api/handlers/gemini/gemini_handlers.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/sdk/api/handlers/gemini/gemini_handlers.go b/sdk/api/handlers/gemini/gemini_handlers.go index 7ba72a93..6cd9ee62 100644 --- a/sdk/api/handlers/gemini/gemini_handlers.go +++ b/sdk/api/handlers/gemini/gemini_handlers.go @@ -48,8 +48,24 @@ func (h *GeminiAPIHandler) Models() []map[string]any { // GeminiModels handles the Gemini models listing endpoint. // It returns a JSON response containing available Gemini models and their specifications. func (h *GeminiAPIHandler) GeminiModels(c *gin.Context) { + rawModels := h.Models() + normalizedModels := make([]map[string]any, 0, len(rawModels)) + defaultMethods := []string{"generateContent"} + for _, model := range rawModels { + normalizedModel := make(map[string]any, len(model)) + for k, v := range model { + normalizedModel[k] = v + } + if name, ok := normalizedModel["name"].(string); ok && name != "" && !strings.HasPrefix(name, "models/") { + normalizedModel["name"] = "models/" + name + } + if _, ok := normalizedModel["supportedGenerationMethods"]; !ok { + normalizedModel["supportedGenerationMethods"] = defaultMethods + } + normalizedModels = append(normalizedModels, normalizedModel) + } c.JSON(http.StatusOK, gin.H{ - "models": h.Models(), + "models": normalizedModels, }) } From 3cfe7008a2e857589b618f7eac5568b0c8d63acf Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 17:55:21 +0800 Subject: [PATCH 3/9] fix(registry): update gpt 5.1 model names --- internal/registry/model_definitions.go | 60 +++++++++++++------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 77015d14..de547182 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -693,8 +693,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5 Low", - Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", + DisplayName: "GPT 5.1 Nothink", + Description: "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -719,8 +719,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5 Medium", - Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", + DisplayName: "GPT 5.1 Medium", + Description: "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -732,8 +732,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5 High", - Description: "Stable version of GPT 5, The best model for coding and agentic tasks across domains.", + DisplayName: "GPT 5.1 High", + Description: "Stable version of GPT 5.1, The best model for coding and agentic tasks across domains.", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -745,8 +745,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5 Codex", - Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.", + DisplayName: "GPT 5.1 Codex", + Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -758,8 +758,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5 Codex Low", - Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.", + DisplayName: "GPT 5.1 Codex Low", + Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -771,8 +771,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5 Codex Medium", - Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.", + DisplayName: "GPT 5.1 Codex Medium", + Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -784,8 +784,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5 Codex High", - Description: "Stable version of GPT 5 Codex, The best model for coding and agentic tasks across domains.", + DisplayName: "GPT 5.1 Codex High", + Description: "Stable version of GPT 5.1 Codex, The best model for coding and agentic tasks across domains.", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -797,8 +797,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5 Codex Mini", - Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.", + DisplayName: "GPT 5.1 Codex Mini", + Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -810,8 +810,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5 Codex Mini Medium", - Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.", + DisplayName: "GPT 5.1 Codex Mini Medium", + Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -823,8 +823,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-2025-11-12", - DisplayName: "GPT 5 Codex Mini High", - Description: "Stable version of GPT 5 Codex Mini: cheaper, faster, but less capable version of GPT 5 Codex.", + DisplayName: "GPT 5.1 Codex Mini High", + Description: "Stable version of GPT 5.1 Codex Mini: cheaper, faster, but less capable version of GPT 5.1 Codex.", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -837,8 +837,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-max", - DisplayName: "GPT 5 Codex Max", - Description: "Stable version of GPT 5 Codex Max", + DisplayName: "GPT 5.1 Codex Max", + Description: "Stable version of GPT 5.1 Codex Max", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -850,8 +850,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-max", - DisplayName: "GPT 5 Codex Max Low", - Description: "Stable version of GPT 5 Codex Max Low", + DisplayName: "GPT 5.1 Codex Max Low", + Description: "Stable version of GPT 5.1 Codex Max Low", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -863,8 +863,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-max", - DisplayName: "GPT 5 Codex Max Medium", - Description: "Stable version of GPT 5 Codex Max Medium", + DisplayName: "GPT 5.1 Codex Max Medium", + Description: "Stable version of GPT 5.1 Codex Max Medium", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -876,8 +876,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-max", - DisplayName: "GPT 5 Codex Max High", - Description: "Stable version of GPT 5 Codex Max High", + DisplayName: "GPT 5.1 Codex Max High", + Description: "Stable version of GPT 5.1 Codex Max High", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, @@ -889,8 +889,8 @@ func GetOpenAIModels() []*ModelInfo { OwnedBy: "openai", Type: "openai", Version: "gpt-5.1-max", - DisplayName: "GPT 5 Codex Max XHigh", - Description: "Stable version of GPT 5 Codex Max XHigh", + DisplayName: "GPT 5.1 Codex Max XHigh", + Description: "Stable version of GPT 5.1 Codex Max XHigh", ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, From 347769b3e30ff5a1562675e4f09789906bec6a10 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 18:09:14 +0800 Subject: [PATCH 4/9] fix(openai-compat): use model id for auth model display --- sdk/cliproxy/service.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index 13d647dd..1ef829d1 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -779,7 +779,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) { Created: time.Now().Unix(), OwnedBy: compat.Name, Type: "openai-compatibility", - DisplayName: m.Name, + DisplayName: modelID, }) } // Register and return From 5ec9b5e5a9c6b095ce861eeedf91f1766303556c Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 09:25:25 +0800 Subject: [PATCH 5/9] feat(executor): normalize thinking budget across all Gemini executors --- .../runtime/executor/aistudio_executor.go | 5 +--- .../runtime/executor/gemini_cli_executor.go | 2 ++ internal/runtime/executor/gemini_executor.go | 2 ++ .../executor/gemini_vertex_executor.go | 4 +++ internal/util/gemini_thinking.go | 26 +++++++++++++++++++ 5 files changed, 35 insertions(+), 4 deletions(-) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index 898c08c7..94b48de7 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -310,10 +310,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) payload = applyThinkingMetadata(payload, req.Metadata, req.Model) payload = util.ConvertThinkingLevelToBudget(payload) - if budget := gjson.GetBytes(payload, "generationConfig.thinkingConfig.thinkingBudget"); budget.Exists() { - normalized := util.NormalizeThinkingBudget(req.Model, int(budget.Int())) - payload, _ = sjson.SetBytes(payload, "generationConfig.thinkingConfig.thinkingBudget", normalized) - } + payload = util.NormalizeGeminiThinkingBudget(req.Model, payload) payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) payload = fixGeminiImageAspectRatio(req.Model, payload) payload = applyPayloadConfig(e.cfg, req.Model, payload) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 147a1ea1..520320ec 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -64,6 +64,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) @@ -199,6 +200,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index fc7b8e19..4184e88b 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -80,6 +80,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body = applyThinkingMetadata(body, req.Metadata, req.Model) + body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) @@ -169,6 +170,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body = applyThinkingMetadata(body, req.Metadata, req.Model) + body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index de4ba072..3caf1cd0 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -296,6 +296,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) @@ -391,6 +392,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) @@ -487,6 +489,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) @@ -599,6 +602,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index 14077fa0..85f8d74d 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -223,6 +223,32 @@ func StripThinkingConfigIfUnsupported(model string, body []byte) []byte { return updated } +// NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini +// request body (generationConfig.thinkingConfig.thinkingBudget path). +func NormalizeGeminiThinkingBudget(model string, body []byte) []byte { + const budgetPath = "generationConfig.thinkingConfig.thinkingBudget" + budget := gjson.GetBytes(body, budgetPath) + if !budget.Exists() { + return body + } + normalized := NormalizeThinkingBudget(model, int(budget.Int())) + updated, _ := sjson.SetBytes(body, budgetPath, normalized) + return updated +} + +// NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI +// request body (request.generationConfig.thinkingConfig.thinkingBudget path). +func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte { + const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget" + budget := gjson.GetBytes(body, budgetPath) + if !budget.Exists() { + return body + } + normalized := NormalizeThinkingBudget(model, int(budget.Int())) + updated, _ := sjson.SetBytes(body, budgetPath, normalized) + return updated +} + // ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel" // and converts it to "thinkingBudget". // "high" -> 32768 From 5b6d201408f49be469c619cd83cebe370660470e Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 09:36:36 +0800 Subject: [PATCH 6/9] refactor(translator): remove thinking budget normalization across all translators --- .../claude/antigravity_claude_request.go | 1 - .../antigravity_openai_request.go | 20 +++++++++---------- .../claude/gemini-cli_claude_request.go | 1 - .../gemini-cli_openai_request.go | 18 ++++++++--------- .../gemini/claude/gemini_claude_request.go | 1 - .../chat-completions/gemini_openai_request.go | 18 ++++++++--------- .../gemini_openai-responses_request.go | 16 +++++++-------- 7 files changed, 36 insertions(+), 39 deletions(-) diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go index e1b73da0..a810ba7a 100644 --- a/internal/translator/antigravity/claude/antigravity_claude_request.go +++ b/internal/translator/antigravity/claude/antigravity_claude_request.go @@ -180,7 +180,6 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { budget := int(b.Int()) - budget = util.NormalizeThinkingBudget(modelName, budget) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go index 1c90a803..b3d8b04d 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go @@ -48,13 +48,13 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) @@ -66,15 +66,15 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { var setBudget bool - var normalized int + var budget int if v := tc.Get("thinkingBudget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } else if v := tc.Get("thinking_budget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } @@ -82,7 +82,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) } else if v := tc.Get("include_thoughts"); v.Exists() { out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if setBudget && normalized != 0 { + } else if setBudget && budget != 0 { out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } } @@ -94,7 +94,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() { if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { - budget := util.NormalizeThinkingBudget(modelName, int(b.Int())) + budget := int(b.Int()) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go index 50fd5a25..913727ce 100644 --- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go +++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go @@ -165,7 +165,6 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { budget := int(b.Int()) - budget = util.NormalizeThinkingBudget(modelName, budget) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index d14f1119..0cb3cd76 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -48,13 +48,13 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) @@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { var setBudget bool - var normalized int + var budget int if v := tc.Get("thinkingBudget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } else if v := tc.Get("thinking_budget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } @@ -82,7 +82,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) } else if v := tc.Get("include_thoughts"); v.Exists() { out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if setBudget && normalized != 0 { + } else if setBudget && budget != 0 { out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } } diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go index 05f9be5d..45a5a88f 100644 --- a/internal/translator/gemini/claude/gemini_claude_request.go +++ b/internal/translator/gemini/claude/gemini_claude_request.go @@ -158,7 +158,6 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { budget := int(b.Int()) - budget = util.NormalizeThinkingBudget(modelName, budget) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 0df8987f..8c48a5b3 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -48,13 +48,13 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 32768) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) @@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { var setBudget bool - var normalized int + var budget int if v := tc.Get("thinkingBudget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } else if v := tc.Get("thinking_budget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } @@ -82,7 +82,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) } else if v := tc.Get("include_thoughts"); v.Exists() { out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if setBudget && normalized != 0 { + } else if setBudget && budget != 0 { out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) } } diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 4ea75c18..1df1d226 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -400,16 +400,16 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "minimal": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 32768) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) @@ -421,16 +421,16 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { var setBudget bool - var normalized int + var budget int if v := tc.Get("thinking_budget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } if v := tc.Get("include_thoughts"); v.Exists() { out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) } else if setBudget { - if normalized != 0 { + if budget != 0 { out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) } } From 6a66b6801a8d7ea17acd7f1e5766ddab4335e42e Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 11:26:42 +0800 Subject: [PATCH 7/9] feat(executor): enforce minimum thinking budget for antigravity models --- .../runtime/executor/antigravity_executor.go | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 155193da..683285d1 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -839,9 +839,12 @@ func normalizeAntigravityThinking(model string, payload []byte) []byte { effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload) if effectiveMax > 0 && normalized >= effectiveMax { normalized = effectiveMax - 1 - if normalized < 1 { - normalized = 1 - } + } + minBudget := antigravityMinThinkingBudget(model) + if minBudget > 0 && normalized >= 0 && normalized < minBudget { + // Budget is below minimum, remove thinking config entirely + payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig") + return payload } if setDefaultMax { if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil { @@ -869,3 +872,12 @@ func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromM } return 0, false } + +// antigravityMinThinkingBudget returns the minimum thinking budget for a model. +// Falls back to -1 if no model info is found. +func antigravityMinThinkingBudget(model string) int { + if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.Thinking != nil { + return modelInfo.Thinking.Min + } + return -1 +} From 9b202b6c1c44adac285440390a165b481b06d292 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 13:23:50 +0800 Subject: [PATCH 8/9] fix(executor): centralize default thinking config --- .../runtime/executor/aistudio_executor.go | 1 + .../runtime/executor/antigravity_executor.go | 2 + .../runtime/executor/gemini_cli_executor.go | 2 + internal/runtime/executor/gemini_executor.go | 2 + .../executor/gemini_vertex_executor.go | 4 ++ .../antigravity_openai_request.go | 9 ---- .../gemini-cli_openai_request.go | 9 ---- .../gemini_openai-responses_request.go | 10 ----- internal/util/gemini_thinking.go | 41 +++++++++++++++++++ 9 files changed, 52 insertions(+), 28 deletions(-) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index 94b48de7..d37cd2c2 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -309,6 +309,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c to := sdktranslator.FromString("gemini") payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) payload = applyThinkingMetadata(payload, req.Metadata, req.Model) + payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload) payload = util.ConvertThinkingLevelToBudget(payload) payload = util.NormalizeGeminiThinkingBudget(req.Model, payload) payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 683285d1..52b91450 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -77,6 +77,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) translated = normalizeAntigravityThinking(req.Model, translated) baseURLs := antigravityBaseURLFallbackOrder(auth) @@ -171,6 +172,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) translated = normalizeAntigravityThinking(req.Model, translated) baseURLs := antigravityBaseURLFallbackOrder(auth) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 520320ec..a2e0ecec 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -64,6 +64,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) @@ -200,6 +201,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index 4184e88b..8879a4f1 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -80,6 +80,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body = applyThinkingMetadata(body, req.Metadata, req.Model) + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) @@ -170,6 +171,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body = applyThinkingMetadata(body, req.Metadata, req.Model) + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index 3caf1cd0..c7d10a67 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -296,6 +296,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) @@ -392,6 +393,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) @@ -489,6 +491,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) @@ -602,6 +605,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go index b3d8b04d..717f88f7 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go @@ -102,15 +102,6 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ } } - // For gemini-3-pro-preview, always send default thinkingConfig when none specified. - // This matches the official Gemini CLI behavior which always sends: - // { thinkingBudget: -1, includeThoughts: true } - // See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts - if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) - } - // Temperature/top_p/top_k/max_tokens if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number { out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num) diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index 0cb3cd76..b52bf224 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -88,15 +88,6 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo } } - // For gemini-3-pro-preview, always send default thinkingConfig when none specified. - // This matches the official Gemini CLI behavior which always sends: - // { thinkingBudget: -1, includeThoughts: true } - // See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts - if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) - } - // Temperature/top_p/top_k if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number { out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num) diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 1df1d226..bdf59785 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -437,16 +437,6 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte } } - // For gemini-3-pro-preview, always send default thinkingConfig when none specified. - // This matches the official Gemini CLI behavior which always sends: - // { thinkingBudget: -1, includeThoughts: true } - // See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts - if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) - // log.Debugf("Applied default thinkingConfig for gemini-3-pro-preview (matches Gemini CLI): thinkingBudget=-1, include_thoughts=true") - } - result := []byte(out) result = common.AttachDefaultSafetySettings(result, "safetySettings") return result diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index 85f8d74d..fc389511 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -207,6 +207,47 @@ func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) { return budgetPtr, includePtr, matched } +// modelsWithDefaultThinking lists models that should have thinking enabled by default +// when no explicit thinkingConfig is provided. +var modelsWithDefaultThinking = map[string]bool{ + "gemini-3-pro-preview": true, +} + +// ModelHasDefaultThinking returns true if the model should have thinking enabled by default. +func ModelHasDefaultThinking(model string) bool { + return modelsWithDefaultThinking[model] +} + +// ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it. +// For standard Gemini API format (generationConfig.thinkingConfig path). +// Returns the modified body if thinkingConfig was added, otherwise returns the original. +func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte { + if !ModelHasDefaultThinking(model) { + return body + } + if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() { + return body + } + updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1) + updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true) + return updated +} + +// ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it. +// For Gemini CLI API format (request.generationConfig.thinkingConfig path). +// Returns the modified body if thinkingConfig was added, otherwise returns the original. +func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte { + if !ModelHasDefaultThinking(model) { + return body + } + if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() { + return body + } + updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true) + return updated +} + // StripThinkingConfigIfUnsupported removes thinkingConfig from the request body // when the target model does not advertise Thinking capability. It cleans both // standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net From 70d6b95097ce38eb512484a7bd60667fd014221a Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 14:05:08 +0800 Subject: [PATCH 9/9] feat(amp): add /news.rss proxy route --- internal/api/modules/amp/routes.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/api/modules/amp/routes.go b/internal/api/modules/amp/routes.go index 0c1fcadb..48fbbbb9 100644 --- a/internal/api/modules/amp/routes.go +++ b/internal/api/modules/amp/routes.go @@ -156,6 +156,7 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha rootMiddleware := []gin.HandlerFunc{m.managementAvailabilityMiddleware(), noCORSMiddleware(), m.localhostOnlyMiddleware()} engine.GET("/threads/*path", append(rootMiddleware, proxyHandler)...) engine.GET("/threads.rss", append(rootMiddleware, proxyHandler)...) + engine.GET("/news.rss", append(rootMiddleware, proxyHandler)...) // Root-level auth routes for CLI login flow // Amp uses multiple auth routes: /auth/cli-login, /auth/callback, /auth/sign-in, /auth/logout