From 5ec9b5e5a9c6b095ce861eeedf91f1766303556c Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 09:25:25 +0800 Subject: [PATCH 1/5] feat(executor): normalize thinking budget across all Gemini executors --- .../runtime/executor/aistudio_executor.go | 5 +--- .../runtime/executor/gemini_cli_executor.go | 2 ++ internal/runtime/executor/gemini_executor.go | 2 ++ .../executor/gemini_vertex_executor.go | 4 +++ internal/util/gemini_thinking.go | 26 +++++++++++++++++++ 5 files changed, 35 insertions(+), 4 deletions(-) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index 898c08c7..94b48de7 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -310,10 +310,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) payload = applyThinkingMetadata(payload, req.Metadata, req.Model) payload = util.ConvertThinkingLevelToBudget(payload) - if budget := gjson.GetBytes(payload, "generationConfig.thinkingConfig.thinkingBudget"); budget.Exists() { - normalized := util.NormalizeThinkingBudget(req.Model, int(budget.Int())) - payload, _ = sjson.SetBytes(payload, "generationConfig.thinkingConfig.thinkingBudget", normalized) - } + payload = util.NormalizeGeminiThinkingBudget(req.Model, payload) payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) payload = fixGeminiImageAspectRatio(req.Model, payload) payload = applyPayloadConfig(e.cfg, req.Model, payload) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 147a1ea1..520320ec 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -64,6 +64,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) @@ -199,6 +200,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index fc7b8e19..4184e88b 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -80,6 +80,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body = applyThinkingMetadata(body, req.Metadata, req.Model) + body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) @@ -169,6 +170,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body = applyThinkingMetadata(body, req.Metadata, req.Model) + body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index de4ba072..3caf1cd0 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -296,6 +296,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) @@ -391,6 +392,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) @@ -487,6 +489,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) @@ -599,6 +602,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) body = applyPayloadConfig(e.cfg, req.Model, body) diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index 14077fa0..85f8d74d 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -223,6 +223,32 @@ func StripThinkingConfigIfUnsupported(model string, body []byte) []byte { return updated } +// NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini +// request body (generationConfig.thinkingConfig.thinkingBudget path). +func NormalizeGeminiThinkingBudget(model string, body []byte) []byte { + const budgetPath = "generationConfig.thinkingConfig.thinkingBudget" + budget := gjson.GetBytes(body, budgetPath) + if !budget.Exists() { + return body + } + normalized := NormalizeThinkingBudget(model, int(budget.Int())) + updated, _ := sjson.SetBytes(body, budgetPath, normalized) + return updated +} + +// NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI +// request body (request.generationConfig.thinkingConfig.thinkingBudget path). +func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte { + const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget" + budget := gjson.GetBytes(body, budgetPath) + if !budget.Exists() { + return body + } + normalized := NormalizeThinkingBudget(model, int(budget.Int())) + updated, _ := sjson.SetBytes(body, budgetPath, normalized) + return updated +} + // ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel" // and converts it to "thinkingBudget". // "high" -> 32768 From 5b6d201408f49be469c619cd83cebe370660470e Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 09:36:36 +0800 Subject: [PATCH 2/5] refactor(translator): remove thinking budget normalization across all translators --- .../claude/antigravity_claude_request.go | 1 - .../antigravity_openai_request.go | 20 +++++++++---------- .../claude/gemini-cli_claude_request.go | 1 - .../gemini-cli_openai_request.go | 18 ++++++++--------- .../gemini/claude/gemini_claude_request.go | 1 - .../chat-completions/gemini_openai_request.go | 18 ++++++++--------- .../gemini_openai-responses_request.go | 16 +++++++-------- 7 files changed, 36 insertions(+), 39 deletions(-) diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go index e1b73da0..a810ba7a 100644 --- a/internal/translator/antigravity/claude/antigravity_claude_request.go +++ b/internal/translator/antigravity/claude/antigravity_claude_request.go @@ -180,7 +180,6 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { budget := int(b.Int()) - budget = util.NormalizeThinkingBudget(modelName, budget) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go index 1c90a803..b3d8b04d 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go @@ -48,13 +48,13 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) @@ -66,15 +66,15 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { var setBudget bool - var normalized int + var budget int if v := tc.Get("thinkingBudget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } else if v := tc.Get("thinking_budget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } @@ -82,7 +82,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) } else if v := tc.Get("include_thoughts"); v.Exists() { out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if setBudget && normalized != 0 { + } else if setBudget && budget != 0 { out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } } @@ -94,7 +94,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() { if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { - budget := util.NormalizeThinkingBudget(modelName, int(b.Int())) + budget := int(b.Int()) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go index 50fd5a25..913727ce 100644 --- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go +++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go @@ -165,7 +165,6 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { budget := int(b.Int()) - budget = util.NormalizeThinkingBudget(modelName, budget) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index d14f1119..0cb3cd76 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -48,13 +48,13 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 32768) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) @@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { var setBudget bool - var normalized int + var budget int if v := tc.Get("thinkingBudget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } else if v := tc.Get("thinking_budget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } @@ -82,7 +82,7 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) } else if v := tc.Get("include_thoughts"); v.Exists() { out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if setBudget && normalized != 0 { + } else if setBudget && budget != 0 { out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) } } diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go index 05f9be5d..45a5a88f 100644 --- a/internal/translator/gemini/claude/gemini_claude_request.go +++ b/internal/translator/gemini/claude/gemini_claude_request.go @@ -158,7 +158,6 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { budget := int(b.Int()) - budget = util.NormalizeThinkingBudget(modelName, budget) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 0df8987f..8c48a5b3 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -48,13 +48,13 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 32768) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) @@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { var setBudget bool - var normalized int + var budget int if v := tc.Get("thinkingBudget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } else if v := tc.Get("thinking_budget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } @@ -82,7 +82,7 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) } else if v := tc.Get("include_thoughts"); v.Exists() { out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) - } else if setBudget && normalized != 0 { + } else if setBudget && budget != 0 { out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true) } } diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 4ea75c18..1df1d226 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -400,16 +400,16 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "minimal": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "low": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "medium": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) case "high": - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768)) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 32768) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) default: out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) @@ -421,16 +421,16 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte if !hasOfficialThinking && util.ModelSupportsThinking(modelName) { if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { var setBudget bool - var normalized int + var budget int if v := tc.Get("thinking_budget"); v.Exists() { - normalized = util.NormalizeThinkingBudget(modelName, int(v.Int())) - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized) + budget = int(v.Int()) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) setBudget = true } if v := tc.Get("include_thoughts"); v.Exists() { out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool()) } else if setBudget { - if normalized != 0 { + if budget != 0 { out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) } } From 6a66b6801a8d7ea17acd7f1e5766ddab4335e42e Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 11:26:42 +0800 Subject: [PATCH 3/5] feat(executor): enforce minimum thinking budget for antigravity models --- .../runtime/executor/antigravity_executor.go | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 155193da..683285d1 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -839,9 +839,12 @@ func normalizeAntigravityThinking(model string, payload []byte) []byte { effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload) if effectiveMax > 0 && normalized >= effectiveMax { normalized = effectiveMax - 1 - if normalized < 1 { - normalized = 1 - } + } + minBudget := antigravityMinThinkingBudget(model) + if minBudget > 0 && normalized >= 0 && normalized < minBudget { + // Budget is below minimum, remove thinking config entirely + payload, _ = sjson.DeleteBytes(payload, "request.generationConfig.thinkingConfig") + return payload } if setDefaultMax { if res, errSet := sjson.SetBytes(payload, "request.generationConfig.maxOutputTokens", effectiveMax); errSet == nil { @@ -869,3 +872,12 @@ func antigravityEffectiveMaxTokens(model string, payload []byte) (max int, fromM } return 0, false } + +// antigravityMinThinkingBudget returns the minimum thinking budget for a model. +// Falls back to -1 if no model info is found. +func antigravityMinThinkingBudget(model string) int { + if modelInfo := registry.GetGlobalRegistry().GetModelInfo(model); modelInfo != nil && modelInfo.Thinking != nil { + return modelInfo.Thinking.Min + } + return -1 +} From 9b202b6c1c44adac285440390a165b481b06d292 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 13:23:50 +0800 Subject: [PATCH 4/5] fix(executor): centralize default thinking config --- .../runtime/executor/aistudio_executor.go | 1 + .../runtime/executor/antigravity_executor.go | 2 + .../runtime/executor/gemini_cli_executor.go | 2 + internal/runtime/executor/gemini_executor.go | 2 + .../executor/gemini_vertex_executor.go | 4 ++ .../antigravity_openai_request.go | 9 ---- .../gemini-cli_openai_request.go | 9 ---- .../gemini_openai-responses_request.go | 10 ----- internal/util/gemini_thinking.go | 41 +++++++++++++++++++ 9 files changed, 52 insertions(+), 28 deletions(-) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index 94b48de7..d37cd2c2 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -309,6 +309,7 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c to := sdktranslator.FromString("gemini") payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) payload = applyThinkingMetadata(payload, req.Metadata, req.Model) + payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload) payload = util.ConvertThinkingLevelToBudget(payload) payload = util.NormalizeGeminiThinkingBudget(req.Model, payload) payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 683285d1..52b91450 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -77,6 +77,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) translated = normalizeAntigravityThinking(req.Model, translated) baseURLs := antigravityBaseURLFallbackOrder(auth) @@ -171,6 +172,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) translated = normalizeAntigravityThinking(req.Model, translated) baseURLs := antigravityBaseURLFallbackOrder(auth) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 520320ec..a2e0ecec 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -64,6 +64,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) @@ -200,6 +201,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut to := sdktranslator.FromString("gemini-cli") basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index 4184e88b..8879a4f1 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -80,6 +80,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body = applyThinkingMetadata(body, req.Metadata, req.Model) + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) @@ -170,6 +171,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A to := sdktranslator.FromString("gemini") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body = applyThinkingMetadata(body, req.Metadata, req.Model) + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index 3caf1cd0..c7d10a67 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -296,6 +296,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) @@ -392,6 +393,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) @@ -489,6 +491,7 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) @@ -602,6 +605,7 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) body = util.NormalizeGeminiThinkingBudget(req.Model, body) body = util.StripThinkingConfigIfUnsupported(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body) diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go index b3d8b04d..717f88f7 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go @@ -102,15 +102,6 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ } } - // For gemini-3-pro-preview, always send default thinkingConfig when none specified. - // This matches the official Gemini CLI behavior which always sends: - // { thinkingBudget: -1, includeThoughts: true } - // See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts - if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) - } - // Temperature/top_p/top_k/max_tokens if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number { out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num) diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index 0cb3cd76..b52bf224 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -88,15 +88,6 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo } } - // For gemini-3-pro-preview, always send default thinkingConfig when none specified. - // This matches the official Gemini CLI behavior which always sends: - // { thinkingBudget: -1, includeThoughts: true } - // See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts - if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" { - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true) - } - // Temperature/top_p/top_k if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number { out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num) diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 1df1d226..bdf59785 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -437,16 +437,6 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte } } - // For gemini-3-pro-preview, always send default thinkingConfig when none specified. - // This matches the official Gemini CLI behavior which always sends: - // { thinkingBudget: -1, includeThoughts: true } - // See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts - if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" { - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) - // log.Debugf("Applied default thinkingConfig for gemini-3-pro-preview (matches Gemini CLI): thinkingBudget=-1, include_thoughts=true") - } - result := []byte(out) result = common.AttachDefaultSafetySettings(result, "safetySettings") return result diff --git a/internal/util/gemini_thinking.go b/internal/util/gemini_thinking.go index 85f8d74d..fc389511 100644 --- a/internal/util/gemini_thinking.go +++ b/internal/util/gemini_thinking.go @@ -207,6 +207,47 @@ func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) { return budgetPtr, includePtr, matched } +// modelsWithDefaultThinking lists models that should have thinking enabled by default +// when no explicit thinkingConfig is provided. +var modelsWithDefaultThinking = map[string]bool{ + "gemini-3-pro-preview": true, +} + +// ModelHasDefaultThinking returns true if the model should have thinking enabled by default. +func ModelHasDefaultThinking(model string) bool { + return modelsWithDefaultThinking[model] +} + +// ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it. +// For standard Gemini API format (generationConfig.thinkingConfig path). +// Returns the modified body if thinkingConfig was added, otherwise returns the original. +func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte { + if !ModelHasDefaultThinking(model) { + return body + } + if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() { + return body + } + updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1) + updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true) + return updated +} + +// ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it. +// For Gemini CLI API format (request.generationConfig.thinkingConfig path). +// Returns the modified body if thinkingConfig was added, otherwise returns the original. +func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte { + if !ModelHasDefaultThinking(model) { + return body + } + if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() { + return body + } + updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1) + updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true) + return updated +} + // StripThinkingConfigIfUnsupported removes thinkingConfig from the request body // when the target model does not advertise Thinking capability. It cleans both // standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net From 70d6b95097ce38eb512484a7bd60667fd014221a Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 9 Dec 2025 14:05:08 +0800 Subject: [PATCH 5/5] feat(amp): add /news.rss proxy route --- internal/api/modules/amp/routes.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/api/modules/amp/routes.go b/internal/api/modules/amp/routes.go index 0c1fcadb..48fbbbb9 100644 --- a/internal/api/modules/amp/routes.go +++ b/internal/api/modules/amp/routes.go @@ -156,6 +156,7 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha rootMiddleware := []gin.HandlerFunc{m.managementAvailabilityMiddleware(), noCORSMiddleware(), m.localhostOnlyMiddleware()} engine.GET("/threads/*path", append(rootMiddleware, proxyHandler)...) engine.GET("/threads.rss", append(rootMiddleware, proxyHandler)...) + engine.GET("/news.rss", append(rootMiddleware, proxyHandler)...) // Root-level auth routes for CLI login flow // Amp uses multiple auth routes: /auth/cli-login, /auth/callback, /auth/sign-in, /auth/logout