Merge pull request #185 from router-for-me/thinking

Feat: Add reasoning effort support for Gemini models
This commit is contained in:
Luis Pater
2025-10-29 20:27:07 +08:00
committed by GitHub
12 changed files with 305 additions and 100 deletions

View File

@@ -84,6 +84,7 @@ func GeminiModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-pro",
@@ -98,6 +99,7 @@ func GeminiModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-lite",
@@ -112,6 +114,7 @@ func GeminiModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
{
ID: "gemini-2.5-flash-image-preview",
@@ -126,6 +129,7 @@ func GeminiModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
{
ID: "gemini-2.5-flash-image",
@@ -140,6 +144,7 @@ func GeminiModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
},
}
}
@@ -152,9 +157,8 @@ func GetGeminiCLIModels() []*ModelInfo { return GeminiModels() }
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
func GetAIStudioModels() []*ModelInfo {
models := make([]*ModelInfo, 0, 8)
models = append(models, GeminiModels()...)
models = append(models,
base := GeminiModels()
return append(base,
&ModelInfo{
ID: "gemini-pro-latest",
Object: "model",
@@ -168,6 +172,7 @@ func GetAIStudioModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
},
&ModelInfo{
ID: "gemini-flash-latest",
@@ -182,6 +187,7 @@ func GetAIStudioModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
&ModelInfo{
ID: "gemini-flash-lite-latest",
@@ -196,9 +202,9 @@ func GetAIStudioModels() []*ModelInfo {
InputTokenLimit: 1048576,
OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
},
)
return models
}
// GetOpenAIModels returns the standard OpenAI model definitions

View File

@@ -45,6 +45,23 @@ type ModelInfo struct {
MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
// SupportedParameters lists supported parameters
SupportedParameters []string `json:"supported_parameters,omitempty"`
// Thinking holds provider-specific reasoning/thinking budget capabilities.
// This is optional and currently used for Gemini thinking budget normalization.
Thinking *ThinkingSupport `json:"thinking,omitempty"`
}
// ThinkingSupport describes a model family's supported internal reasoning budget range.
// Values are interpreted in provider-native token units.
type ThinkingSupport struct {
// Min is the minimum allowed thinking budget (inclusive).
Min int `json:"min,omitempty"`
// Max is the maximum allowed thinking budget (inclusive).
Max int `json:"max,omitempty"`
// ZeroAllowed indicates whether 0 is a valid value (to disable thinking).
ZeroAllowed bool `json:"zero_allowed,omitempty"`
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
}
// ModelRegistration tracks a model's availability
@@ -652,6 +669,17 @@ func (r *ModelRegistry) GetModelProviders(modelID string) []string {
return result
}
// GetModelInfo returns the registered ModelInfo for the given model ID, if present.
// Returns nil if the model is unknown to the registry.
func (r *ModelRegistry) GetModelInfo(modelID string) *ModelInfo {
r.mutex.RLock()
defer r.mutex.RUnlock()
if reg, ok := r.models[modelID]; ok && reg != nil {
return reg.Info
}
return nil
}
// convertModelToMap converts ModelInfo to the appropriate format for different handler types
func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) map[string]any {
if model == nil {

View File

@@ -256,10 +256,14 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
payload = util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
}
payload = disableGeminiThinkingConfig(payload, req.Model)
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
payload = fixGeminiImageAspectRatio(req.Model, payload)
metadataAction := "generateContent"
if req.Metadata != nil {

View File

@@ -63,9 +63,14 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
to := sdktranslator.FromString("gemini-cli")
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if hasOverride {
if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
}
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
action := "generateContent"
@@ -92,7 +97,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
var lastStatus int
var lastBody []byte
for _, attemptModel := range models {
for idx, attemptModel := range models {
payload := append([]byte(nil), basePayload...)
if action == "countTokens" {
payload = deleteJSONField(payload, "project")
@@ -101,7 +106,6 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
payload = setJSONField(payload, "project", projectID)
payload = setJSONField(payload, "model", attemptModel)
}
payload = disableGeminiThinkingConfig(payload, attemptModel)
tok, errTok := tokenSource.Token()
if errTok != nil {
@@ -166,7 +170,11 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
lastBody = append([]byte(nil), data...)
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data))
if httpResp.StatusCode == 429 {
log.Debugf("gemini cli executor: rate limited, retrying with next model")
if idx+1 < len(models) {
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
} else {
log.Debug("gemini cli executor: rate limited, no additional fallback model")
}
continue
}
@@ -196,9 +204,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
to := sdktranslator.FromString("gemini-cli")
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if hasOverride {
if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
}
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id"))
@@ -219,11 +232,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
var lastStatus int
var lastBody []byte
for _, attemptModel := range models {
for idx, attemptModel := range models {
payload := append([]byte(nil), basePayload...)
payload = setJSONField(payload, "project", projectID)
payload = setJSONField(payload, "model", attemptModel)
payload = disableGeminiThinkingConfig(payload, attemptModel)
tok, errTok := tokenSource.Token()
if errTok != nil {
@@ -282,7 +294,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
lastBody = append([]byte(nil), data...)
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data))
if httpResp.StatusCode == 429 {
log.Debugf("gemini cli executor: rate limited, retrying with next model")
if idx+1 < len(models) {
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
} else {
log.Debug("gemini cli executor: rate limited, no additional fallback model")
}
continue
}
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
@@ -393,12 +409,16 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
for _, attemptModel := range models {
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
if hasOverride {
if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
}
payload = deleteJSONField(payload, "project")
payload = deleteJSONField(payload, "model")
payload = disableGeminiThinkingConfig(payload, attemptModel)
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
payload = fixGeminiCLIImageAspectRatio(attemptModel, payload)
tok, errTok := tokenSource.Token()
@@ -623,29 +643,6 @@ func cliPreviewFallbackOrder(model string) []string {
}
}
func disableGeminiThinkingConfig(body []byte, model string) []byte {
if !geminiModelDisallowsThinking(model) {
return body
}
updated := deleteJSONField(body, "request.generationConfig.thinkingConfig")
updated = deleteJSONField(updated, "generationConfig.thinkingConfig")
return updated
}
func geminiModelDisallowsThinking(model string) bool {
if model == "" {
return false
}
lower := strings.ToLower(model)
for _, marker := range []string{"gemini-2.5-flash-image-preview", "gemini-2.5-flash-image"} {
if strings.Contains(lower, marker) {
return true
}
}
return false
}
// setJSONField sets a top-level JSON field on a byte slice payload via sjson.
func setJSONField(body []byte, key, value string) []byte {
if key == "" {

View File

@@ -78,10 +78,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
}
body = disableGeminiThinkingConfig(body, req.Model)
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
action := "generateContent"
@@ -166,10 +170,14 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
}
body = disableGeminiThinkingConfig(body, req.Model)
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body)
url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent")
@@ -269,10 +277,14 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
from := opts.SourceFormat
to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
}
translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model)
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")

View File

@@ -11,6 +11,7 @@ import (
"strings"
client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
@@ -136,7 +137,7 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
}
// Build output Gemini CLI request JSON
out := `{"model":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}}`
out := `{"model":"","request":{"contents":[]}}`
out, _ = sjson.Set(out, "model", modelName)
if systemInstruction != nil {
b, _ := json.Marshal(systemInstruction)
@@ -151,21 +152,16 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
out, _ = sjson.SetRaw(out, "request.tools", string(b))
}
// Map reasoning and sampling configs
reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort")
if reasoningEffortResult.String() == "none" {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", false)
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
} else if reasoningEffortResult.String() == "auto" {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
} else if reasoningEffortResult.String() == "low" {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
} else if reasoningEffortResult.String() == "medium" {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
} else if reasoningEffortResult.String() == "high" {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576)
} else {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
if t.Get("type").String() == "enabled" {
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
budget := int(b.Int())
budget = util.NormalizeThinkingBudget(modelName, budget)
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
}
}
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
out, _ = sjson.Set(out, "request.generationConfig.temperature", v.Num)

View File

@@ -26,32 +26,57 @@ import (
// - []byte: The transformed request data in Gemini CLI API format
func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bool) []byte {
rawJSON := bytes.Clone(inputRawJSON)
// Base envelope
out := []byte(`{"project":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}},"model":"gemini-2.5-pro"}`)
// Base envelope (no default thinkingConfig)
out := []byte(`{"project":"","request":{"contents":[]},"model":"gemini-2.5-pro"}`)
// Model
out, _ = sjson.SetBytes(out, "model", modelName)
// Reasoning effort -> thinkingBudget/include_thoughts
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
re := gjson.GetBytes(rawJSON, "reasoning_effort")
if re.Exists() {
hasOfficialThinking := re.Exists()
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
switch re.String() {
case "none":
out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
case "auto":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "low":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "medium":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "high":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
default:
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
}
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
if v := tc.Get("thinking_budget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
setBudget = true
}
if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget {
if normalized != 0 {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
}
}
} else {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
}
// Temperature/top_p/top_k

View File

@@ -11,6 +11,7 @@ import (
"strings"
client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
@@ -129,7 +130,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
}
// Build output Gemini CLI request JSON
out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`
out := `{"contents":[]}`
out, _ = sjson.Set(out, "model", modelName)
if systemInstruction != nil {
b, _ := json.Marshal(systemInstruction)
@@ -144,21 +145,16 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
out, _ = sjson.SetRaw(out, "tools", string(b))
}
// Map reasoning and sampling configs
reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort")
if reasoningEffortResult.String() == "none" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
} else if reasoningEffortResult.String() == "auto" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
} else if reasoningEffortResult.String() == "low" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
} else if reasoningEffortResult.String() == "medium" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
} else if reasoningEffortResult.String() == "high" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
} else {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
if t.Get("type").String() == "enabled" {
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
budget := int(b.Int())
budget = util.NormalizeThinkingBudget(modelName, budget)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
}
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
out, _ = sjson.Set(out, "generationConfig.temperature", v.Num)

View File

@@ -26,32 +26,58 @@ import (
// - []byte: The transformed request data in Gemini API format
func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
rawJSON := bytes.Clone(inputRawJSON)
// Base envelope
out := []byte(`{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`)
// Base envelope (no default thinkingConfig)
out := []byte(`{"contents":[]}`)
// Model
out, _ = sjson.SetBytes(out, "model", modelName)
// Reasoning effort -> thinkingBudget/include_thoughts
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
re := gjson.GetBytes(rawJSON, "reasoning_effort")
if re.Exists() {
hasOfficialThinking := re.Exists()
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
switch re.String() {
case "none":
out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts")
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
case "auto":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "low":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "medium":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "high":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
default:
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
if v := tc.Get("thinking_budget"); v.Exists() {
// Normalize budget to model range
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
setBudget = true
}
if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget {
if normalized != 0 {
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
}
} else {
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
}
// Temperature/top_p/top_k

View File

@@ -4,6 +4,7 @@ import (
"bytes"
"strings"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
@@ -15,8 +16,8 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
_ = modelName // Unused but required by interface
_ = stream // Unused but required by interface
// Base Gemini API template
out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`
// Base Gemini API template (do not include thinkingConfig by default)
out := `{"contents":[]}`
root := gjson.ParseBytes(rawJSON)
@@ -242,23 +243,52 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences)
}
if reasoningEffort := root.Get("reasoning.effort"); reasoningEffort.Exists() {
// OpenAI official reasoning fields take precedence
hasOfficialThinking := root.Get("reasoning.effort").Exists()
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
reasoningEffort := root.Get("reasoning.effort")
switch reasoningEffort.String() {
case "none":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
case "auto":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "minimal":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "low":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "medium":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "high":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
default:
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
// Cherry Studio extension (applies only when official fields are missing)
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
if v := tc.Get("thinking_budget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
setBudget = true
}
if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget {
if normalized != 0 {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
}
}
return []byte(out)

View File

@@ -179,3 +179,19 @@ func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
}
return budgetPtr, includePtr, matched
}
// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
// when the target model does not advertise Thinking capability. It cleans both
// standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net
// in case upstream injected thinking for an unsupported model.
func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
if ModelSupportsThinking(model) || len(body) == 0 {
return body
}
updated := body
// Gemini CLI path
updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig")
// Standard Gemini path
updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig")
return updated
}

69
internal/util/thinking.go Normal file
View File

@@ -0,0 +1,69 @@
package util
import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
)
// ModelSupportsThinking reports whether the given model has Thinking capability
// according to the model registry metadata (provider-agnostic).
func ModelSupportsThinking(model string) bool {
if model == "" {
return false
}
if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil {
return info.Thinking != nil
}
return false
}
// NormalizeThinkingBudget clamps the requested thinking budget to the
// supported range for the specified model using registry metadata only.
// If the model is unknown or has no Thinking metadata, returns the original budget.
// For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range
// or min (0 if zero is allowed and mid <= 0).
func NormalizeThinkingBudget(model string, budget int) int {
if budget == -1 { // dynamic
if found, min, max, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
if dynamicAllowed {
return -1
}
mid := (min + max) / 2
if mid <= 0 && zeroAllowed {
return 0
}
if mid <= 0 {
return min
}
return mid
}
return -1
}
if found, min, max, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
if budget == 0 {
if zeroAllowed {
return 0
}
return min
}
if budget < min {
return min
}
if budget > max {
return max
}
return budget
}
return budget
}
// thinkingRangeFromRegistry attempts to read thinking ranges from the model registry.
func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zeroAllowed bool, dynamicAllowed bool) {
if model == "" {
return false, 0, 0, false, false
}
info := registry.GetGlobalRegistry().GetModelInfo(model)
if info == nil || info.Thinking == nil {
return false, 0, 0, false, false
}
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
}