diff --git a/internal/api/modules/amp/fallback_handlers.go b/internal/api/modules/amp/fallback_handlers.go index 940bd5e8..7d7f7f5f 100644 --- a/internal/api/modules/amp/fallback_handlers.go +++ b/internal/api/modules/amp/fallback_handlers.go @@ -8,6 +8,7 @@ import ( "time" "github.com/gin-gonic/gin" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" log "github.com/sirupsen/logrus" "github.com/tidwall/gjson" @@ -134,10 +135,11 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc } // Normalize model (handles dynamic thinking suffixes) - normalizedModel, thinkingMetadata := util.NormalizeThinkingModel(modelName) + suffixResult := thinking.ParseSuffix(modelName) + normalizedModel := suffixResult.ModelName thinkingSuffix := "" - if thinkingMetadata != nil && strings.HasPrefix(modelName, normalizedModel) { - thinkingSuffix = modelName[len(normalizedModel):] + if suffixResult.HasSuffix { + thinkingSuffix = "(" + suffixResult.RawSuffix + ")" } resolveMappedModel := func() (string, []string) { @@ -157,13 +159,13 @@ func (fh *FallbackHandler) WrapHandler(handler gin.HandlerFunc) gin.HandlerFunc // Preserve dynamic thinking suffix (e.g. "(xhigh)") when mapping applies, unless the target // already specifies its own thinking suffix. if thinkingSuffix != "" { - _, mappedThinkingMetadata := util.NormalizeThinkingModel(mappedModel) - if mappedThinkingMetadata == nil { + mappedSuffixResult := thinking.ParseSuffix(mappedModel) + if !mappedSuffixResult.HasSuffix { mappedModel += thinkingSuffix } } - mappedBaseModel, _ := util.NormalizeThinkingModel(mappedModel) + mappedBaseModel := thinking.ParseSuffix(mappedModel).ModelName mappedProviders := util.GetProviderName(mappedBaseModel) if len(mappedProviders) == 0 { return "", nil diff --git a/internal/api/modules/amp/model_mapping.go b/internal/api/modules/amp/model_mapping.go index 4b629b62..4159a2b5 100644 --- a/internal/api/modules/amp/model_mapping.go +++ b/internal/api/modules/amp/model_mapping.go @@ -8,6 +8,7 @@ import ( "sync" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" log "github.com/sirupsen/logrus" ) @@ -44,6 +45,11 @@ func NewModelMapper(mappings []config.AmpModelMapping) *DefaultModelMapper { // MapModel checks if a mapping exists for the requested model and if the // target model has available local providers. Returns the mapped model name // or empty string if no valid mapping exists. +// +// If the requested model contains a thinking suffix (e.g., "g25p(8192)"), +// the suffix is preserved in the returned model name (e.g., "gemini-2.5-pro(8192)"). +// However, if the mapping target already contains a suffix, the config suffix +// takes priority over the user's suffix. func (m *DefaultModelMapper) MapModel(requestedModel string) string { if requestedModel == "" { return "" @@ -52,16 +58,20 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string { m.mu.RLock() defer m.mu.RUnlock() - // Normalize the requested model for lookup - normalizedRequest := strings.ToLower(strings.TrimSpace(requestedModel)) + // Extract thinking suffix from requested model using ParseSuffix + requestResult := thinking.ParseSuffix(requestedModel) + baseModel := requestResult.ModelName - // Check for direct mapping - targetModel, exists := m.mappings[normalizedRequest] + // Normalize the base model for lookup (case-insensitive) + normalizedBase := strings.ToLower(strings.TrimSpace(baseModel)) + + // Check for direct mapping using base model name + targetModel, exists := m.mappings[normalizedBase] if !exists { - // Try regex mappings in order - base, _ := util.NormalizeThinkingModel(requestedModel) + // Try regex mappings in order using base model only + // (suffix is handled separately via ParseSuffix) for _, rm := range m.regexps { - if rm.re.MatchString(requestedModel) || (base != "" && rm.re.MatchString(base)) { + if rm.re.MatchString(baseModel) { targetModel = rm.to exists = true break @@ -72,14 +82,28 @@ func (m *DefaultModelMapper) MapModel(requestedModel string) string { } } - // Verify target model has available providers - normalizedTarget, _ := util.NormalizeThinkingModel(targetModel) - providers := util.GetProviderName(normalizedTarget) + // Check if target model already has a thinking suffix (config priority) + targetResult := thinking.ParseSuffix(targetModel) + + // Verify target model has available providers (use base model for lookup) + providers := util.GetProviderName(targetResult.ModelName) if len(providers) == 0 { log.Debugf("amp model mapping: target model %s has no available providers, skipping mapping", targetModel) return "" } + // Suffix handling: config suffix takes priority, otherwise preserve user suffix + if targetResult.HasSuffix { + // Config's "to" already contains a suffix - use it as-is (config priority) + return targetModel + } + + // Preserve user's thinking suffix on the mapped model + // (skip empty suffixes to avoid returning "model()") + if requestResult.HasSuffix && requestResult.RawSuffix != "" { + return targetModel + "(" + requestResult.RawSuffix + ")" + } + // Note: Detailed routing log is handled by logAmpRouting in fallback_handlers.go return targetModel } diff --git a/internal/api/modules/amp/model_mapping_test.go b/internal/api/modules/amp/model_mapping_test.go index 1b36f212..53165d22 100644 --- a/internal/api/modules/amp/model_mapping_test.go +++ b/internal/api/modules/amp/model_mapping_test.go @@ -217,10 +217,10 @@ func TestModelMapper_Regex_MatchBaseWithoutParens(t *testing.T) { mapper := NewModelMapper(mappings) - // Incoming model has reasoning suffix but should match base via regex + // Incoming model has reasoning suffix, regex matches base, suffix is preserved result := mapper.MapModel("gpt-5(high)") - if result != "gemini-2.5-pro" { - t.Errorf("Expected gemini-2.5-pro, got %s", result) + if result != "gemini-2.5-pro(high)" { + t.Errorf("Expected gemini-2.5-pro(high), got %s", result) } } @@ -281,3 +281,95 @@ func TestModelMapper_Regex_CaseInsensitive(t *testing.T) { t.Errorf("Expected claude-sonnet-4, got %s", result) } } + +func TestModelMapper_SuffixPreservation(t *testing.T) { + reg := registry.GetGlobalRegistry() + + // Register test models + reg.RegisterClient("test-client-suffix", "gemini", []*registry.ModelInfo{ + {ID: "gemini-2.5-pro", OwnedBy: "google", Type: "gemini"}, + }) + reg.RegisterClient("test-client-suffix-2", "claude", []*registry.ModelInfo{ + {ID: "claude-sonnet-4", OwnedBy: "anthropic", Type: "claude"}, + }) + defer reg.UnregisterClient("test-client-suffix") + defer reg.UnregisterClient("test-client-suffix-2") + + tests := []struct { + name string + mappings []config.AmpModelMapping + input string + want string + }{ + { + name: "numeric suffix preserved", + mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}}, + input: "g25p(8192)", + want: "gemini-2.5-pro(8192)", + }, + { + name: "level suffix preserved", + mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}}, + input: "g25p(high)", + want: "gemini-2.5-pro(high)", + }, + { + name: "no suffix unchanged", + mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}}, + input: "g25p", + want: "gemini-2.5-pro", + }, + { + name: "config suffix takes priority", + mappings: []config.AmpModelMapping{{From: "alias", To: "gemini-2.5-pro(medium)"}}, + input: "alias(high)", + want: "gemini-2.5-pro(medium)", + }, + { + name: "regex with suffix preserved", + mappings: []config.AmpModelMapping{{From: "^g25.*", To: "gemini-2.5-pro", Regex: true}}, + input: "g25p(8192)", + want: "gemini-2.5-pro(8192)", + }, + { + name: "auto suffix preserved", + mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}}, + input: "g25p(auto)", + want: "gemini-2.5-pro(auto)", + }, + { + name: "none suffix preserved", + mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}}, + input: "g25p(none)", + want: "gemini-2.5-pro(none)", + }, + { + name: "case insensitive base lookup with suffix", + mappings: []config.AmpModelMapping{{From: "G25P", To: "gemini-2.5-pro"}}, + input: "g25p(high)", + want: "gemini-2.5-pro(high)", + }, + { + name: "empty suffix filtered out", + mappings: []config.AmpModelMapping{{From: "g25p", To: "gemini-2.5-pro"}}, + input: "g25p()", + want: "gemini-2.5-pro", + }, + { + name: "incomplete suffix treated as no suffix", + mappings: []config.AmpModelMapping{{From: "g25p(high", To: "gemini-2.5-pro"}}, + input: "g25p(high", + want: "gemini-2.5-pro", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mapper := NewModelMapper(tt.mappings) + got := mapper.MapModel(tt.input) + if got != tt.want { + t.Errorf("MapModel(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} diff --git a/internal/config/config.go b/internal/config/config.go index 6843d6b8..effb44f5 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -266,6 +266,9 @@ type ClaudeKey struct { ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"` } +func (k ClaudeKey) GetAPIKey() string { return k.APIKey } +func (k ClaudeKey) GetBaseURL() string { return k.BaseURL } + // ClaudeModel describes a mapping between an alias and the actual upstream model name. type ClaudeModel struct { // Name is the upstream model identifier used when issuing requests. @@ -308,6 +311,9 @@ type CodexKey struct { ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"` } +func (k CodexKey) GetAPIKey() string { return k.APIKey } +func (k CodexKey) GetBaseURL() string { return k.BaseURL } + // CodexModel describes a mapping between an alias and the actual upstream model name. type CodexModel struct { // Name is the upstream model identifier used when issuing requests. @@ -349,6 +355,9 @@ type GeminiKey struct { ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"` } +func (k GeminiKey) GetAPIKey() string { return k.APIKey } +func (k GeminiKey) GetBaseURL() string { return k.BaseURL } + // GeminiModel describes a mapping between an alias and the actual upstream model name. type GeminiModel struct { // Name is the upstream model identifier used when issuing requests. @@ -406,6 +415,9 @@ type OpenAICompatibilityModel struct { Alias string `yaml:"alias" json:"alias"` } +func (m OpenAICompatibilityModel) GetName() string { return m.Name } +func (m OpenAICompatibilityModel) GetAlias() string { return m.Alias } + // LoadConfig reads a YAML configuration file from the given path, // unmarshals it into a Config struct, applies environment variable overrides, // and returns it. diff --git a/internal/config/vertex_compat.go b/internal/config/vertex_compat.go index 632bf7cc..786c5318 100644 --- a/internal/config/vertex_compat.go +++ b/internal/config/vertex_compat.go @@ -36,6 +36,9 @@ type VertexCompatKey struct { Models []VertexCompatModel `yaml:"models,omitempty" json:"models,omitempty"` } +func (k VertexCompatKey) GetAPIKey() string { return k.APIKey } +func (k VertexCompatKey) GetBaseURL() string { return k.BaseURL } + // VertexCompatModel represents a model configuration for Vertex compatibility, // including the actual model name and its alias for API routing. type VertexCompatModel struct { diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index bea2ecc3..268caeb4 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -27,7 +27,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4.5 Sonnet", ContextLength: 200000, MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, }, { ID: "claude-opus-4-5-20251101", @@ -39,7 +39,7 @@ func GetClaudeModels() []*ModelInfo { Description: "Premium model combining maximum intelligence with practical performance", ContextLength: 200000, MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, }, { ID: "claude-opus-4-1-20250805", @@ -50,7 +50,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4.1 Opus", ContextLength: 200000, MaxCompletionTokens: 32000, - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false}, }, { ID: "claude-opus-4-20250514", @@ -61,7 +61,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4 Opus", ContextLength: 200000, MaxCompletionTokens: 32000, - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false}, }, { ID: "claude-sonnet-4-20250514", @@ -72,7 +72,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4 Sonnet", ContextLength: 200000, MaxCompletionTokens: 64000, - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false}, }, { ID: "claude-3-7-sonnet-20250219", @@ -83,7 +83,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 3.7 Sonnet", ContextLength: 128000, MaxCompletionTokens: 8192, - Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, + Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: false, DynamicAllowed: false}, }, { ID: "claude-3-5-haiku-20241022", @@ -777,8 +777,8 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig { "gemini-3-pro-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"}, "gemini-3-pro-image-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"}, "gemini-3-flash-preview": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"}, - "gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, - "gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, + "gemini-claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, MaxCompletionTokens: 64000}, + "gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: false}, MaxCompletionTokens: 64000}, } } diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go index a4e9acdf..c90f6f61 100644 --- a/internal/registry/model_registry.go +++ b/internal/registry/model_registry.go @@ -51,6 +51,11 @@ type ModelInfo struct { // Thinking holds provider-specific reasoning/thinking budget capabilities. // This is optional and currently used for Gemini thinking budget normalization. Thinking *ThinkingSupport `json:"thinking,omitempty"` + + // UserDefined indicates this model was defined through config file's models[] + // array (e.g., openai-compatibility.*.models[], *-api-key.models[]). + // UserDefined models have thinking configuration passed through without validation. + UserDefined bool `json:"-"` } // ThinkingSupport describes a model family's supported internal reasoning budget range. diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index c3e3edb0..cf8e216e 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -14,7 +14,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/wsrelay" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -111,7 +111,8 @@ func (e *AIStudioExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.A // Execute performs a non-streaming request to the AI Studio API. func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) translatedReq, body, err := e.translateRequest(req, opts, false) @@ -119,7 +120,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, return resp, err } - endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt) + endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt) wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -166,7 +167,8 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, // ExecuteStream performs a streaming request to the AI Studio API. func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) translatedReq, body, err := e.translateRequest(req, opts, true) @@ -174,7 +176,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth return nil, err } - endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt) + endpoint := e.buildEndpoint(baseModel, body.action, opts.Alt) wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -315,6 +317,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth // CountTokens counts tokens for the given request using the AI Studio API. func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName _, body, err := e.translateRequest(req, opts, false) if err != nil { return cliproxyexecutor.Response{}, err @@ -324,7 +327,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A body.payload, _ = sjson.DeleteBytes(body.payload, "tools") body.payload, _ = sjson.DeleteBytes(body.payload, "safetySettings") - endpoint := e.buildEndpoint(req.Model, "countTokens", "") + endpoint := e.buildEndpoint(baseModel, "countTokens", "") wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -380,22 +383,19 @@ type translatedPayload struct { } func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + from := opts.SourceFormat to := sdktranslator.FromString("gemini") originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, stream) - payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) - payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model) - payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload) - payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload) - payload = util.ConvertThinkingLevelToBudget(payload, req.Model, true) - payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true) - payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) - payload = fixGeminiImageAspectRatio(req.Model, payload) - payload = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", payload, originalTranslated) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream) + payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream) + payload, _ = thinking.ApplyThinking(payload, req.Model, "gemini") + payload = fixGeminiImageAspectRatio(baseModel, payload) + payload = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", payload, originalTranslated) payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema") diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 8d1ef23d..4f704c05 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -24,6 +24,7 @@ import ( "github.com/google/uuid" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -107,8 +108,10 @@ func (e *AntigravityExecutor) HttpRequest(ctx context.Context, auth *cliproxyaut // Execute performs a non-streaming request to the Antigravity API. func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - isClaude := strings.Contains(strings.ToLower(req.Model), "claude") - if isClaude || strings.Contains(req.Model, "gemini-3-pro") { + baseModel := thinking.ParseSuffix(req.Model).ModelName + isClaude := strings.Contains(strings.ToLower(baseModel), "claude") + + if isClaude || strings.Contains(baseModel, "gemini-3-pro") { return e.executeClaudeNonStream(ctx, auth, req, opts) } @@ -120,23 +123,24 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au auth = updatedAuth } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("antigravity") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) - translated = normalizeAntigravityThinking(req.Model, translated, isClaude) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated) + translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity") + + // Preserve Claude special handling (use baseModel for registry lookups) + translated = normalizeAntigravityThinking(baseModel, translated, isClaude) + translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -146,7 +150,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au var lastErr error for idx, baseURL := range baseURLs { - httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, false, opts.Alt, baseURL) + httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, false, opts.Alt, baseURL) if errReq != nil { err = errReq return resp, err @@ -227,6 +231,8 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au // executeClaudeNonStream performs a claude non-streaming request to the Antigravity API. func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth) if errToken != nil { return resp, errToken @@ -235,23 +241,24 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * auth = updatedAuth } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("antigravity") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) - translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) - translated = normalizeAntigravityThinking(req.Model, translated, true) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated) + translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity") + + // Preserve Claude special handling (use baseModel for registry lookups) + translated = normalizeAntigravityThinking(baseModel, translated, true) + translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -261,7 +268,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * var lastErr error for idx, baseURL := range baseURLs { - httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL) + httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL) if errReq != nil { err = errReq return resp, err @@ -587,7 +594,10 @@ func (e *AntigravityExecutor) convertStreamToNonStream(stream []byte) []byte { // ExecuteStream performs a streaming request to the Antigravity API. func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + ctx = context.WithValue(ctx, "alt", "") + isClaude := strings.Contains(strings.ToLower(baseModel), "claude") token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth) if errToken != nil { @@ -597,25 +607,24 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya auth = updatedAuth } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) - isClaude := strings.Contains(strings.ToLower(req.Model), "claude") - from := opts.SourceFormat to := sdktranslator.FromString("antigravity") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) - translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated) - translated = normalizeAntigravityThinking(req.Model, translated, isClaude) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated, originalTranslated) + translated, _ = thinking.ApplyThinking(translated, req.Model, "antigravity") + + // Preserve Claude special handling (use baseModel for registry lookups) + translated = normalizeAntigravityThinking(baseModel, translated, isClaude) + translated = applyPayloadConfigWithRoot(e.cfg, baseModel, "antigravity", "request", translated, originalTranslated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -625,7 +634,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya var lastErr error for idx, baseURL := range baseURLs { - httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL) + httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, translated, true, opts.Alt, baseURL) if errReq != nil { err = errReq return nil, err @@ -771,6 +780,9 @@ func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Au // CountTokens counts tokens for the given request using the Antigravity API. func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + isClaude := strings.Contains(strings.ToLower(baseModel), "claude") + token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth) if errToken != nil { return cliproxyexecutor.Response{}, errToken @@ -786,7 +798,16 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut to := sdktranslator.FromString("antigravity") respCtx := context.WithValue(ctx, "alt", opts.Alt) - isClaude := strings.Contains(strings.ToLower(req.Model), "claude") + // Prepare payload once (doesn't depend on baseURL) + payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + payload, _ = thinking.ApplyThinking(payload, req.Model, "antigravity") + + // Preserve Claude special handling (use baseModel for registry lookups) + payload = normalizeAntigravityThinking(baseModel, payload, isClaude) + payload = deleteJSONField(payload, "project") + payload = deleteJSONField(payload, "model") + payload = deleteJSONField(payload, "request.safetySettings") baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -803,14 +824,6 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut var lastErr error for idx, baseURL := range baseURLs { - payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model) - payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload) - payload = normalizeAntigravityThinking(req.Model, payload, isClaude) - payload = deleteJSONField(payload, "project") - payload = deleteJSONField(payload, "model") - payload = deleteJSONField(payload, "request.safetySettings") - base := strings.TrimSuffix(baseURL, "/") if base == "" { base = buildBaseURL(auth) @@ -1462,11 +1475,18 @@ func alias2ModelName(modelName string) string { } } -// normalizeAntigravityThinking clamps or removes thinking config based on model support. -// For Claude models, it additionally ensures thinking budget < max_tokens. +// normalizeAntigravityThinking performs Antigravity-specific thinking config normalization. +// This function is called AFTER thinking.ApplyThinking() to apply Claude-specific constraints. +// +// It handles: +// - Stripping thinking config for unsupported models (via util.StripThinkingConfigIfUnsupported) +// - Normalizing budget to model range (via thinking.ClampBudget) +// - For Claude models: ensuring thinking budget < max_tokens +// - For Claude models: removing thinkingConfig if budget < minimum allowed func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) []byte { payload = util.StripThinkingConfigIfUnsupported(model, payload) - if !util.ModelSupportsThinking(model) { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(model) + if modelInfo == nil || modelInfo.Thinking == nil { return payload } budget := gjson.GetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget") @@ -1474,7 +1494,7 @@ func normalizeAntigravityThinking(model string, payload []byte, isClaude bool) [ return payload } raw := int(budget.Int()) - normalized := util.NormalizeThinkingBudget(model, raw) + normalized := thinking.ClampBudget(raw, modelInfo.Thinking.Min, modelInfo.Thinking.Max) if isClaude { effectiveMax, setDefaultMax := antigravityEffectiveMaxTokens(model, payload) diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 4242a244..9f2a5b22 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -18,6 +18,7 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -84,17 +85,15 @@ func (e *ClaudeExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut } func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - apiKey, baseURL := claudeCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := claudeCreds(auth) if baseURL == "" { baseURL = "https://api.anthropic.com" } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } from := opts.SourceFormat to := sdktranslator.FromString("claude") // Use streaming translation to preserve function calling, except for claude. @@ -103,22 +102,22 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, stream) - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream) - body, _ = sjson.SetBytes(body, "model", model) - // Inject thinking config based on model metadata for thinking variants - body = e.injectThinkingConfig(model, req.Metadata, body) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, stream) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream) + body, _ = sjson.SetBytes(body, "model", baseModel) - if !strings.HasPrefix(model, "claude-3-5-haiku") { + body, _ = thinking.ApplyThinking(body, req.Model, "claude") + + if !strings.HasPrefix(baseModel, "claude-3-5-haiku") { body = checkSystemInstructions(body) } - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(model, body) + body = ensureMaxTokensForThinking(baseModel, body) // Extract betas from body and convert to header var extraBetas []string @@ -218,36 +217,35 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - apiKey, baseURL := claudeCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := claudeCreds(auth) if baseURL == "" { baseURL = "https://api.anthropic.com" } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("claude") - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true) - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) - body, _ = sjson.SetBytes(body, "model", model) - // Inject thinking config based on model metadata for thinking variants - body = e.injectThinkingConfig(model, req.Metadata, body) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + body, _ = sjson.SetBytes(body, "model", baseModel) + + body, _ = thinking.ApplyThinking(body, req.Model, "claude") + body = checkSystemInstructions(body) - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(model, body) + body = ensureMaxTokensForThinking(baseModel, body) // Extract betas from body and convert to header var extraBetas []string @@ -381,8 +379,9 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A } func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - apiKey, baseURL := claudeCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := claudeCreds(auth) if baseURL == "" { baseURL = "https://api.anthropic.com" } @@ -391,14 +390,10 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut to := sdktranslator.FromString("claude") // Use streaming translation to preserve function calling, except for claude. stream := from != to - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream) - body, _ = sjson.SetBytes(body, "model", model) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), stream) + body, _ = sjson.SetBytes(body, "model", baseModel) - if !strings.HasPrefix(model, "claude-3-5-haiku") { + if !strings.HasPrefix(baseModel, "claude-3-5-haiku") { body = checkSystemInstructions(body) } @@ -527,17 +522,6 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) { return betas, body } -// injectThinkingConfig adds thinking configuration based on metadata using the unified flow. -// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata -// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini. -func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte { - budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata) - if !ok { - return body - } - return util.ApplyClaudeThinkingConfig(body, budget) -} - // disableThinkingIfToolChoiceForced checks if tool_choice forces tool use and disables thinking. // Anthropic API does not allow thinking when tool_choice is set to "any" or a specific tool. // See: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations @@ -587,51 +571,6 @@ func ensureMaxTokensForThinking(modelName string, body []byte) []byte { return body } -func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { - trimmed := strings.TrimSpace(alias) - if trimmed == "" { - return "" - } - - entry := e.resolveClaudeConfig(auth) - if entry == nil { - return "" - } - - normalizedModel, metadata := util.NormalizeThinkingModel(trimmed) - - // Candidate names to match against configured aliases/names. - candidates := []string{strings.TrimSpace(normalizedModel)} - if !strings.EqualFold(normalizedModel, trimmed) { - candidates = append(candidates, trimmed) - } - if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) { - candidates = append(candidates, original) - } - - for i := range entry.Models { - model := entry.Models[i] - name := strings.TrimSpace(model.Name) - modelAlias := strings.TrimSpace(model.Alias) - - for _, candidate := range candidates { - if candidate == "" { - continue - } - if modelAlias != "" && strings.EqualFold(modelAlias, candidate) { - if name != "" { - return name - } - return candidate - } - if name != "" && strings.EqualFold(name, candidate) { - return name - } - } - } - return "" -} - func (e *ClaudeExecutor) resolveClaudeConfig(auth *cliproxyauth.Auth) *config.ClaudeKey { if auth == nil || e.cfg == nil { return nil diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 2f4c6295..9e553e3c 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -13,6 +13,7 @@ import ( codexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/codex" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -72,18 +73,15 @@ func (e *CodexExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth } func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - apiKey, baseURL := codexCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := codexCreds(auth) if baseURL == "" { baseURL = "https://chatgpt.com/backend-api/codex" } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) - defer reporter.trackFailure(ctx, &err) - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("codex") @@ -93,17 +91,15 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re originalPayload = bytes.Clone(opts.OriginalRequest) } originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent) - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent) - body = sdktranslator.TranslateRequest(from, to, model, body, false) + body = sdktranslator.TranslateRequest(from, to, baseModel, body, false) body = misc.StripCodexUserAgent(body) - body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false) - body = NormalizeThinkingConfig(body, model, false) - if errValidate := ValidateThinkingConfig(body, model); errValidate != nil { - return resp, errValidate - } - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", model) + + body, _ = thinking.ApplyThinking(body, req.Model, "codex") + + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.SetBytes(body, "stream", true) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.DeleteBytes(body, "prompt_cache_retention") @@ -182,18 +178,15 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re } func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - apiKey, baseURL := codexCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := codexCreds(auth) if baseURL == "" { baseURL = "https://chatgpt.com/backend-api/codex" } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) - defer reporter.trackFailure(ctx, &err) - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("codex") @@ -203,20 +196,17 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au originalPayload = bytes.Clone(opts.OriginalRequest) } originalPayload = misc.InjectCodexUserAgent(originalPayload, userAgent) - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent) - body = sdktranslator.TranslateRequest(from, to, model, body, true) + body = sdktranslator.TranslateRequest(from, to, baseModel, body, true) body = misc.StripCodexUserAgent(body) - body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false) - body = NormalizeThinkingConfig(body, model, false) - if errValidate := ValidateThinkingConfig(body, model); errValidate != nil { - return nil, errValidate - } - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) + body, _ = thinking.ApplyThinking(body, req.Model, "codex") + + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.DeleteBytes(body, "prompt_cache_retention") - body, _ = sjson.SetBytes(body, "model", model) + body, _ = sjson.SetBytes(body, "model", baseModel) url := strings.TrimSuffix(baseURL, "/") + "/responses" httpReq, err := e.cacheHelper(ctx, from, url, req, body) @@ -303,25 +293,23 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au } func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } + baseModel := thinking.ParseSuffix(req.Model).ModelName from := opts.SourceFormat to := sdktranslator.FromString("codex") userAgent := codexUserAgent(ctx) body := misc.InjectCodexUserAgent(bytes.Clone(req.Payload), userAgent) - body = sdktranslator.TranslateRequest(from, to, model, body, false) + body = sdktranslator.TranslateRequest(from, to, baseModel, body, false) body = misc.StripCodexUserAgent(body) - body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false) - body, _ = sjson.SetBytes(body, "model", model) + body, _ = thinking.ApplyThinking(body, req.Model, "codex") + + body, _ = sjson.SetBytes(body, "model", baseModel) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.DeleteBytes(body, "prompt_cache_retention") body, _ = sjson.SetBytes(body, "stream", false) - enc, err := tokenizerForCodexModel(model) + enc, err := tokenizerForCodexModel(baseModel) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("codex executor: tokenizer init failed: %w", err) } @@ -593,51 +581,6 @@ func codexCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) { return } -func (e *CodexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { - trimmed := strings.TrimSpace(alias) - if trimmed == "" { - return "" - } - - entry := e.resolveCodexConfig(auth) - if entry == nil { - return "" - } - - normalizedModel, metadata := util.NormalizeThinkingModel(trimmed) - - // Candidate names to match against configured aliases/names. - candidates := []string{strings.TrimSpace(normalizedModel)} - if !strings.EqualFold(normalizedModel, trimmed) { - candidates = append(candidates, trimmed) - } - if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) { - candidates = append(candidates, original) - } - - for i := range entry.Models { - model := entry.Models[i] - name := strings.TrimSpace(model.Name) - modelAlias := strings.TrimSpace(model.Alias) - - for _, candidate := range candidates { - if candidate == "" { - continue - } - if modelAlias != "" && strings.EqualFold(modelAlias, candidate) { - if name != "" { - return name - } - return candidate - } - if name != "" && strings.EqualFold(name, candidate) { - return name - } - } - } - return "" -} - func (e *CodexExecutor) resolveCodexConfig(auth *cliproxyauth.Auth) *config.CodexKey { if auth == nil || e.cfg == nil { return nil diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 20b93a92..3d08b830 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -20,6 +20,7 @@ import ( "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/geminicli" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -102,28 +103,30 @@ func (e *GeminiCLIExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth. // Execute performs a non-streaming request to the Gemini CLI API. func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth) if err != nil { return resp, err } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) - basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) - basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload) - basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) - basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) - basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) - basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + basePayload, _ = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli") + + basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload) + basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated) action := "generateContent" if req.Metadata != nil { @@ -133,9 +136,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth } projectID := resolveGeminiProjectID(auth) - models := cliPreviewFallbackOrder(req.Model) - if len(models) == 0 || models[0] != req.Model { - models = append([]string{req.Model}, models...) + models := cliPreviewFallbackOrder(baseModel) + if len(models) == 0 || models[0] != baseModel { + models = append([]string{baseModel}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -246,34 +249,36 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth // ExecuteStream performs a streaming request to the Gemini CLI API. func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth) if err != nil { return nil, err } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) - basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload) - basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) - basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) - basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) - basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload, originalTranslated) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + basePayload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + + basePayload, _ = thinking.ApplyThinking(basePayload, req.Model, "gemini-cli") + + basePayload = fixGeminiCLIImageAspectRatio(baseModel, basePayload) + basePayload = applyPayloadConfigWithRoot(e.cfg, baseModel, "gemini", "request", basePayload, originalTranslated) projectID := resolveGeminiProjectID(auth) - models := cliPreviewFallbackOrder(req.Model) - if len(models) == 0 || models[0] != req.Model { - models = append([]string{req.Model}, models...) + models := cliPreviewFallbackOrder(baseModel) + if len(models) == 0 || models[0] != baseModel { + models = append([]string{baseModel}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -435,6 +440,8 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut // CountTokens counts tokens for the given request using the Gemini CLI API. func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + tokenSource, baseTokenData, err := prepareGeminiCLITokenSource(ctx, e.cfg, auth) if err != nil { return cliproxyexecutor.Response{}, err @@ -443,9 +450,9 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - models := cliPreviewFallbackOrder(req.Model) - if len(models) == 0 || models[0] != req.Model { - models = append([]string{req.Model}, models...) + models := cliPreviewFallbackOrder(baseModel) + if len(models) == 0 || models[0] != baseModel { + models = append([]string{baseModel}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -463,15 +470,15 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. // The loop variable attemptModel is only used as the concrete model id sent to the upstream // Gemini CLI endpoint when iterating fallback variants. - for _, attemptModel := range models { - payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) - payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model) - payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload) + for range models { + payload := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + payload, _ = thinking.ApplyThinking(payload, req.Model, "gemini-cli") + payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") payload = deleteJSONField(payload, "request.safetySettings") - payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) - payload = fixGeminiCLIImageAspectRatio(req.Model, payload) + payload = fixGeminiCLIImageAspectRatio(baseModel, payload) tok, errTok := tokenSource.Token() if errTok != nil { diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index a913a5c0..fd6ec22e 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -13,6 +13,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -102,16 +103,13 @@ func (e *GeminiExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Aut // - cliproxyexecutor.Response: The response from the API // - error: An error if the request fails func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, bearer := geminiCreds(auth) - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) - model := req.Model - if override := e.resolveUpstreamModel(model, auth); override != "" { - model = override - } - // Official Gemini API via API key or OAuth bearer from := opts.SourceFormat to := sdktranslator.FromString("gemini") @@ -119,15 +117,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false) - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) - body = ApplyThinkingMetadata(body, req.Metadata, model) - body = util.ApplyDefaultThinkingIfNeeded(model, body) - body = util.NormalizeGeminiThinkingBudget(model, body) - body = util.StripThinkingConfigIfUnsupported(model, body) - body = fixGeminiImageAspectRatio(model, body) - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", model) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + body, _ = thinking.ApplyThinking(body, req.Model, "gemini") + + body = fixGeminiImageAspectRatio(baseModel, body) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) action := "generateContent" if req.Metadata != nil { @@ -136,7 +133,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } } baseURL := resolveGeminiBaseURL(auth) - url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, action) + url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, action) if opts.Alt != "" && action != "countTokens" { url = url + fmt.Sprintf("?$alt=%s", opts.Alt) } @@ -206,34 +203,30 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r // ExecuteStream performs a streaming request to the Gemini API. func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, bearer := geminiCreds(auth) - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) - model := req.Model - if override := e.resolveUpstreamModel(model, auth); override != "" { - model = override - } - from := opts.SourceFormat to := sdktranslator.FromString("gemini") originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true) - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) - body = ApplyThinkingMetadata(body, req.Metadata, model) - body = util.ApplyDefaultThinkingIfNeeded(model, body) - body = util.NormalizeGeminiThinkingBudget(model, body) - body = util.StripThinkingConfigIfUnsupported(model, body) - body = fixGeminiImageAspectRatio(model, body) - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", model) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + + body, _ = thinking.ApplyThinking(body, req.Model, "gemini") + + body = fixGeminiImageAspectRatio(baseModel, body) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) baseURL := resolveGeminiBaseURL(auth) - url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "streamGenerateContent") + url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "streamGenerateContent") if opts.Alt == "" { url = url + "?alt=sse" } else { @@ -331,27 +324,25 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A // CountTokens counts tokens for the given request using the Gemini API. func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - apiKey, bearer := geminiCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName - model := req.Model - if override := e.resolveUpstreamModel(model, auth); override != "" { - model = override - } + apiKey, bearer := geminiCreds(auth) from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) - translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, model) - translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq) - translatedReq = fixGeminiImageAspectRatio(model, translatedReq) + translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini") + + translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") - translatedReq, _ = sjson.SetBytes(translatedReq, "model", model) + translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel) baseURL := resolveGeminiBaseURL(auth) - url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "countTokens") + url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, baseModel, "countTokens") requestBody := bytes.NewReader(translatedReq) @@ -450,51 +441,6 @@ func resolveGeminiBaseURL(auth *cliproxyauth.Auth) string { return base } -func (e *GeminiExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { - trimmed := strings.TrimSpace(alias) - if trimmed == "" { - return "" - } - - entry := e.resolveGeminiConfig(auth) - if entry == nil { - return "" - } - - normalizedModel, metadata := util.NormalizeThinkingModel(trimmed) - - // Candidate names to match against configured aliases/names. - candidates := []string{strings.TrimSpace(normalizedModel)} - if !strings.EqualFold(normalizedModel, trimmed) { - candidates = append(candidates, trimmed) - } - if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) { - candidates = append(candidates, original) - } - - for i := range entry.Models { - model := entry.Models[i] - name := strings.TrimSpace(model.Name) - modelAlias := strings.TrimSpace(model.Alias) - - for _, candidate := range candidates { - if candidate == "" { - continue - } - if modelAlias != "" && strings.EqualFold(modelAlias, candidate) { - if name != "" { - return name - } - return candidate - } - if name != "" && strings.EqualFold(name, candidate) { - return name - } - } - } - return "" -} - func (e *GeminiExecutor) resolveGeminiConfig(auth *cliproxyauth.Auth) *config.GeminiKey { if auth == nil || e.cfg == nil { return nil diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index eebf6b1b..18d9f8d6 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -15,7 +15,7 @@ import ( vertexauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/vertex" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" @@ -155,30 +155,26 @@ func (e *GeminiVertexExecutor) Refresh(_ context.Context, auth *cliproxyauth.Aut // executeWithServiceAccount handles authentication using service account credentials. // This method contains the original service account authentication logic. func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (resp cliproxyexecutor.Response, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) - budgetOverride = &norm - } - body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) - } - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", req.Model) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + body, _ = thinking.ApplyThinking(body, req.Model, "gemini") + + body = fixGeminiImageAspectRatio(baseModel, body) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) action := "generateContent" if req.Metadata != nil { @@ -187,7 +183,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au } } baseURL := vertexBaseURL(location) - url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action) + url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, action) if opts.Alt != "" && action != "countTokens" { url = url + fmt.Sprintf("?$alt=%s", opts.Alt) } @@ -258,35 +254,26 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au // executeWithAPIKey handles authentication using API key credentials. func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (resp cliproxyexecutor.Response, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) - defer reporter.trackFailure(ctx, &err) + baseModel := thinking.ParseSuffix(req.Model).ModelName - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, false) - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(model, *budgetOverride) - budgetOverride = &norm - } - body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) - } - body = util.ApplyDefaultThinkingIfNeeded(model, body) - body = util.NormalizeGeminiThinkingBudget(model, body) - body = util.StripThinkingConfigIfUnsupported(model, body) - body = fixGeminiImageAspectRatio(model, body) - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", model) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + body, _ = thinking.ApplyThinking(body, req.Model, "gemini") + + body = fixGeminiImageAspectRatio(baseModel, body) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) action := "generateContent" if req.Metadata != nil { @@ -299,7 +286,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip if baseURL == "" { baseURL = "https://generativelanguage.googleapis.com" } - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, action) + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, action) if opts.Alt != "" && action != "countTokens" { url = url + fmt.Sprintf("?$alt=%s", opts.Alt) } @@ -367,33 +354,29 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip // executeStreamWithServiceAccount handles streaming authentication using service account credentials. func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) - budgetOverride = &norm - } - body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) - } - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", req.Model) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + + body, _ = thinking.ApplyThinking(body, req.Model, "gemini") + + body = fixGeminiImageAspectRatio(baseModel, body) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) baseURL := vertexBaseURL(location) - url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent") + url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "streamGenerateContent") if opts.Alt == "" { url = url + "?alt=sse" } else { @@ -487,41 +470,32 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte // executeStreamWithAPIKey handles streaming authentication using API key credentials. func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) - defer reporter.trackFailure(ctx, &err) + baseModel := thinking.ParseSuffix(req.Model).ModelName - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) + defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("gemini") + originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, model, originalPayload, true) - body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(model, *budgetOverride) - budgetOverride = &norm - } - body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) - } - body = util.ApplyDefaultThinkingIfNeeded(model, body) - body = util.NormalizeGeminiThinkingBudget(model, body) - body = util.StripThinkingConfigIfUnsupported(model, body) - body = fixGeminiImageAspectRatio(model, body) - body = applyPayloadConfigWithRoot(e.cfg, model, to.String(), "", body, originalTranslated) - body, _ = sjson.SetBytes(body, "model", model) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + + body, _ = thinking.ApplyThinking(body, req.Model, "gemini") + + body = fixGeminiImageAspectRatio(baseModel, body) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) + body, _ = sjson.SetBytes(body, "model", baseModel) // For API key auth, use simpler URL format without project/location if baseURL == "" { baseURL = "https://generativelanguage.googleapis.com" } - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "streamGenerateContent") + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "streamGenerateContent") if opts.Alt == "" { url = url + "?alt=sse" } else { @@ -612,26 +586,24 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth // countTokensWithServiceAccount counts tokens using service account credentials. func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) - budgetOverride = &norm - } - translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) - } - translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) - translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) - translatedReq, _ = sjson.SetBytes(translatedReq, "model", req.Model) + + translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini") + + translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq) + translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") baseURL := vertexBaseURL(location) - url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens") + url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, baseModel, "countTokens") httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq)) if errNewReq != nil { @@ -688,10 +660,6 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context return cliproxyexecutor.Response{}, errRead } appendAPIResponseChunk(ctx, e.cfg, data) - if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) - return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)} - } count := gjson.GetBytes(data, "totalTokens").Int() out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data) return cliproxyexecutor.Response{Payload: []byte(out)}, nil @@ -699,24 +667,17 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context // countTokensWithAPIKey handles token counting using API key credentials. func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) { - model := req.Model - if override := e.resolveUpstreamModel(req.Model, auth); override != "" { - model = override - } + baseModel := thinking.ParseSuffix(req.Model).ModelName from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) { - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(model, *budgetOverride) - budgetOverride = &norm - } - translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) - } - translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq) - translatedReq = fixGeminiImageAspectRatio(model, translatedReq) - translatedReq, _ = sjson.SetBytes(translatedReq, "model", model) + + translatedReq := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + + translatedReq, _ = thinking.ApplyThinking(translatedReq, req.Model, "gemini") + + translatedReq = fixGeminiImageAspectRatio(baseModel, translatedReq) + translatedReq, _ = sjson.SetBytes(translatedReq, "model", baseModel) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") @@ -726,7 +687,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth * if baseURL == "" { baseURL = "https://generativelanguage.googleapis.com" } - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "countTokens") + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, baseModel, "countTokens") httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq)) if errNewReq != nil { @@ -780,10 +741,6 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth * return cliproxyexecutor.Response{}, errRead } appendAPIResponseChunk(ctx, e.cfg, data) - if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 { - log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data)) - return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(data)} - } count := gjson.GetBytes(data, "totalTokens").Int() out := sdktranslator.TranslateTokenCount(ctx, to, from, count, data) return cliproxyexecutor.Response{Payload: []byte(out)}, nil @@ -870,53 +827,6 @@ func vertexAccessToken(ctx context.Context, cfg *config.Config, auth *cliproxyau return tok.AccessToken, nil } -// resolveUpstreamModel resolves the upstream model name from vertex-api-key configuration. -// It matches the requested model alias against configured models and returns the actual upstream name. -func (e *GeminiVertexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { - trimmed := strings.TrimSpace(alias) - if trimmed == "" { - return "" - } - - entry := e.resolveVertexConfig(auth) - if entry == nil { - return "" - } - - normalizedModel, metadata := util.NormalizeThinkingModel(trimmed) - - // Candidate names to match against configured aliases/names. - candidates := []string{strings.TrimSpace(normalizedModel)} - if !strings.EqualFold(normalizedModel, trimmed) { - candidates = append(candidates, trimmed) - } - if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) { - candidates = append(candidates, original) - } - - for i := range entry.Models { - model := entry.Models[i] - name := strings.TrimSpace(model.Name) - modelAlias := strings.TrimSpace(model.Alias) - - for _, candidate := range candidates { - if candidate == "" { - continue - } - if modelAlias != "" && strings.EqualFold(modelAlias, candidate) { - if name != "" { - return name - } - return candidate - } - if name != "" && strings.EqualFold(name, candidate) { - return name - } - } - } - return "" -} - // resolveVertexConfig finds the matching vertex-api-key configuration entry for the given auth. func (e *GeminiVertexExecutor) resolveVertexConfig(auth *cliproxyauth.Auth) *config.VertexCompatKey { if auth == nil || e.cfg == nil { diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index c8b7706c..cc158250 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -12,6 +12,7 @@ import ( iflowauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/iflow" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -67,6 +68,8 @@ func (e *IFlowExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth // Execute performs a non-streaming chat completion request. func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := iflowCreds(auth) if strings.TrimSpace(apiKey) == "" { err = fmt.Errorf("iflow executor: missing api key") @@ -76,7 +79,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re baseURL = iflowauth.DefaultAPIBaseURL } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat @@ -85,17 +88,14 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - body, _ = sjson.SetBytes(body, "model", req.Model) - body = NormalizeThinkingConfig(body, req.Model, false) - if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { - return resp, errValidate - } - body = applyIFlowThinkingConfig(body) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + body, _ = sjson.SetBytes(body, "model", baseModel) + + body, _ = thinking.ApplyThinking(body, req.Model, "iflow") + body = preserveReasoningContentInMessages(body) - body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -154,6 +154,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re reporter.ensurePublished(ctx) var param any + // Note: TranslateNonStream uses req.Model (original with suffix) to preserve + // the original model name in the response for client compatibility. out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m) resp = cliproxyexecutor.Response{Payload: []byte(out)} return resp, nil @@ -161,6 +163,8 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re // ExecuteStream performs a streaming chat completion request. func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + apiKey, baseURL := iflowCreds(auth) if strings.TrimSpace(apiKey) == "" { err = fmt.Errorf("iflow executor: missing api key") @@ -170,7 +174,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au baseURL = iflowauth.DefaultAPIBaseURL } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat @@ -179,23 +183,19 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + body, _ = sjson.SetBytes(body, "model", baseModel) + + body, _ = thinking.ApplyThinking(body, req.Model, "iflow") - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - body, _ = sjson.SetBytes(body, "model", req.Model) - body = NormalizeThinkingConfig(body, req.Model, false) - if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { - return nil, errValidate - } - body = applyIFlowThinkingConfig(body) body = preserveReasoningContentInMessages(body) // Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour. toolsResult := gjson.GetBytes(body, "tools") if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 { body = ensureToolsArray(body) } - body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -278,11 +278,13 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au } func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - enc, err := tokenizerForModel(req.Model) + enc, err := tokenizerForModel(baseModel) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: tokenizer init failed: %w", err) } @@ -520,41 +522,3 @@ func preserveReasoningContentInMessages(body []byte) []byte { return body } - -// applyIFlowThinkingConfig converts normalized reasoning_effort to model-specific thinking configurations. -// This should be called after NormalizeThinkingConfig has processed the payload. -// -// Model-specific handling: -// - GLM-4.6/4.7: Uses chat_template_kwargs.enable_thinking (boolean) and chat_template_kwargs.clear_thinking=false -// - MiniMax M2/M2.1: Uses reasoning_split=true for OpenAI-style reasoning separation -func applyIFlowThinkingConfig(body []byte) []byte { - effort := gjson.GetBytes(body, "reasoning_effort") - if !effort.Exists() { - return body - } - - model := strings.ToLower(gjson.GetBytes(body, "model").String()) - val := strings.ToLower(strings.TrimSpace(effort.String())) - enableThinking := val != "none" && val != "" - - // Remove reasoning_effort as we'll convert to model-specific format - body, _ = sjson.DeleteBytes(body, "reasoning_effort") - body, _ = sjson.DeleteBytes(body, "thinking") - - // GLM-4.6/4.7: Use chat_template_kwargs - if strings.HasPrefix(model, "glm-4") { - body, _ = sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking) - if enableThinking { - body, _ = sjson.SetBytes(body, "chat_template_kwargs.clear_thinking", false) - } - return body - } - - // MiniMax M2/M2.1: Use reasoning_split - if strings.HasPrefix(model, "minimax-m2") { - body, _ = sjson.SetBytes(body, "reasoning_split", enableThinking) - return body - } - - return body -} diff --git a/internal/runtime/executor/iflow_executor_test.go b/internal/runtime/executor/iflow_executor_test.go new file mode 100644 index 00000000..e588548b --- /dev/null +++ b/internal/runtime/executor/iflow_executor_test.go @@ -0,0 +1,67 @@ +package executor + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" +) + +func TestIFlowExecutorParseSuffix(t *testing.T) { + tests := []struct { + name string + model string + wantBase string + wantLevel string + }{ + {"no suffix", "glm-4", "glm-4", ""}, + {"glm with suffix", "glm-4.1-flash(high)", "glm-4.1-flash", "high"}, + {"minimax no suffix", "minimax-m2", "minimax-m2", ""}, + {"minimax with suffix", "minimax-m2.1(medium)", "minimax-m2.1", "medium"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := thinking.ParseSuffix(tt.model) + if result.ModelName != tt.wantBase { + t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase) + } + }) + } +} + +func TestPreserveReasoningContentInMessages(t *testing.T) { + tests := []struct { + name string + input []byte + want []byte // nil means output should equal input + }{ + { + "non-glm model passthrough", + []byte(`{"model":"gpt-4","messages":[]}`), + nil, + }, + { + "glm model with empty messages", + []byte(`{"model":"glm-4","messages":[]}`), + nil, + }, + { + "glm model preserves existing reasoning_content", + []byte(`{"model":"glm-4","messages":[{"role":"assistant","content":"hi","reasoning_content":"thinking..."}]}`), + nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := preserveReasoningContentInMessages(tt.input) + want := tt.want + if want == nil { + want = tt.input + } + if string(got) != string(want) { + t.Errorf("preserveReasoningContentInMessages() = %s, want %s", got, want) + } + }) + } +} diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 04dbf23f..22e8b4c8 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -11,6 +11,7 @@ import ( "time" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -69,7 +70,9 @@ func (e *OpenAICompatExecutor) HttpRequest(ctx context.Context, auth *cliproxyau } func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) baseURL, apiKey := e.resolveCredentials(auth) @@ -85,19 +88,11 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, opts.Stream) - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), opts.Stream) - modelOverride := e.resolveUpstreamModel(req.Model, auth) - if modelOverride != "" { - translated = e.overrideModel(translated, modelOverride) - } - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated) - allowCompat := e.allowCompatReasoningEffort(req.Model, auth) - translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat) - translated = NormalizeThinkingConfig(translated, req.Model, allowCompat) - if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil { - return resp, errValidate - } + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, opts.Stream) + translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), opts.Stream) + translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated) + + translated, _ = thinking.ApplyThinking(translated, req.Model, "openai") url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) @@ -168,7 +163,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A } func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) baseURL, apiKey := e.resolveCredentials(auth) @@ -176,25 +173,18 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy err = statusErr{code: http.StatusUnauthorized, msg: "missing provider baseURL"} return nil, err } + from := opts.SourceFormat to := sdktranslator.FromString("openai") originalPayload := bytes.Clone(req.Payload) if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - modelOverride := e.resolveUpstreamModel(req.Model, auth) - if modelOverride != "" { - translated = e.overrideModel(translated, modelOverride) - } - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated, originalTranslated) - allowCompat := e.allowCompatReasoningEffort(req.Model, auth) - translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat) - translated = NormalizeThinkingConfig(translated, req.Model, allowCompat) - if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil { - return nil, errValidate - } + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + translated = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", translated, originalTranslated) + + translated, _ = thinking.ApplyThinking(translated, req.Model, "openai") url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) @@ -293,15 +283,15 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy } func (e *OpenAICompatExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + from := opts.SourceFormat to := sdktranslator.FromString("openai") - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + translated := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) - modelForCounting := req.Model - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { - translated = e.overrideModel(translated, modelOverride) - modelForCounting = modelOverride - } + modelForCounting := baseModel + + translated, _ = thinking.ApplyThinking(translated, req.Model, "openai") enc, err := tokenizerForModel(modelForCounting) if err != nil { @@ -336,53 +326,6 @@ func (e *OpenAICompatExecutor) resolveCredentials(auth *cliproxyauth.Auth) (base return } -func (e *OpenAICompatExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { - if alias == "" || auth == nil || e.cfg == nil { - return "" - } - compat := e.resolveCompatConfig(auth) - if compat == nil { - return "" - } - for i := range compat.Models { - model := compat.Models[i] - if model.Alias != "" { - if strings.EqualFold(model.Alias, alias) { - if model.Name != "" { - return model.Name - } - return alias - } - continue - } - if strings.EqualFold(model.Name, alias) { - return model.Name - } - } - return "" -} - -func (e *OpenAICompatExecutor) allowCompatReasoningEffort(model string, auth *cliproxyauth.Auth) bool { - trimmed := strings.TrimSpace(model) - if trimmed == "" || e == nil || e.cfg == nil { - return false - } - compat := e.resolveCompatConfig(auth) - if compat == nil || len(compat.Models) == 0 { - return false - } - for i := range compat.Models { - entry := compat.Models[i] - if strings.EqualFold(strings.TrimSpace(entry.Alias), trimmed) { - return true - } - if strings.EqualFold(strings.TrimSpace(entry.Name), trimmed) { - return true - } - } - return false -} - func (e *OpenAICompatExecutor) resolveCompatConfig(auth *cliproxyauth.Auth) *config.OpenAICompatibility { if auth == nil || e.cfg == nil { return nil diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index e3cfc5d4..9014af87 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -1,109 +1,13 @@ package executor import ( - "fmt" - "net/http" "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) -// ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192)) -// for standard Gemini format payloads. It normalizes the budget when the model supports thinking. -func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte { - // Use the alias from metadata if available, as it's registered in the global registry - // with thinking metadata; the upstream model name may not be registered. - lookupModel := util.ResolveOriginalModel(model, metadata) - - // Determine which model to use for thinking support check. - // If the alias (lookupModel) is not in the registry, fall back to the upstream model. - thinkingModel := lookupModel - if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) { - thinkingModel = model - } - - budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata) - if !ok || (budgetOverride == nil && includeOverride == nil) { - return payload - } - if !util.ModelSupportsThinking(thinkingModel) { - return payload - } - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride) - budgetOverride = &norm - } - return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) -} - -// ApplyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192)) -// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking. -func ApplyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte { - // Use the alias from metadata if available, as it's registered in the global registry - // with thinking metadata; the upstream model name may not be registered. - lookupModel := util.ResolveOriginalModel(model, metadata) - - // Determine which model to use for thinking support check. - // If the alias (lookupModel) is not in the registry, fall back to the upstream model. - thinkingModel := lookupModel - if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) { - thinkingModel = model - } - - budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata) - if !ok || (budgetOverride == nil && includeOverride == nil) { - return payload - } - if !util.ModelSupportsThinking(thinkingModel) { - return payload - } - if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride) - budgetOverride = &norm - } - return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) -} - -// ApplyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path. -// Metadata values take precedence over any existing field when the model supports thinking, intentionally -// overwriting caller-provided values to honor suffix/default metadata priority. -func ApplyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string, allowCompat bool) []byte { - if len(metadata) == 0 { - return payload - } - if field == "" { - return payload - } - baseModel := util.ResolveOriginalModel(model, metadata) - if baseModel == "" { - baseModel = model - } - if !util.ModelSupportsThinking(baseModel) && !allowCompat { - return payload - } - if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if util.ModelUsesThinkingLevels(baseModel) || allowCompat { - if updated, err := sjson.SetBytes(payload, field, effort); err == nil { - return updated - } - } - } - // Fallback: numeric thinking_budget suffix for level-based (OpenAI-style) models. - if util.ModelUsesThinkingLevels(baseModel) || allowCompat { - if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if effort, ok := util.ThinkingBudgetToEffort(baseModel, *budget); ok && effort != "" { - if updated, err := sjson.SetBytes(payload, field, effort); err == nil { - return updated - } - } - } - } - return payload -} - // applyPayloadConfigWithRoot behaves like applyPayloadConfig but treats all parameter // paths as relative to the provided root path (for example, "request" for Gemini CLI) // and restricts matches to the given protocol when supplied. Defaults are checked @@ -256,102 +160,3 @@ func matchModelPattern(pattern, model string) bool { } return pi == len(pattern) } - -// NormalizeThinkingConfig normalizes thinking-related fields in the payload -// based on model capabilities. For models without thinking support, it strips -// reasoning fields. For models with level-based thinking, it validates and -// normalizes the reasoning effort level. For models with numeric budget thinking, -// it strips the effort string fields. -func NormalizeThinkingConfig(payload []byte, model string, allowCompat bool) []byte { - if len(payload) == 0 || model == "" { - return payload - } - - if !util.ModelSupportsThinking(model) { - if allowCompat { - return payload - } - return StripThinkingFields(payload, false) - } - - if util.ModelUsesThinkingLevels(model) { - return NormalizeReasoningEffortLevel(payload, model) - } - - // Model supports thinking but uses numeric budgets, not levels. - // Strip effort string fields since they are not applicable. - return StripThinkingFields(payload, true) -} - -// StripThinkingFields removes thinking-related fields from the payload for -// models that do not support thinking. If effortOnly is true, only removes -// effort string fields (for models using numeric budgets). -func StripThinkingFields(payload []byte, effortOnly bool) []byte { - fieldsToRemove := []string{ - "reasoning_effort", - "reasoning.effort", - } - if !effortOnly { - fieldsToRemove = append([]string{"reasoning", "thinking"}, fieldsToRemove...) - } - out := payload - for _, field := range fieldsToRemove { - if gjson.GetBytes(out, field).Exists() { - out, _ = sjson.DeleteBytes(out, field) - } - } - return out -} - -// NormalizeReasoningEffortLevel validates and normalizes the reasoning_effort -// or reasoning.effort field for level-based thinking models. -func NormalizeReasoningEffortLevel(payload []byte, model string) []byte { - out := payload - - if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() { - if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok { - out, _ = sjson.SetBytes(out, "reasoning_effort", normalized) - } - } - - if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() { - if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok { - out, _ = sjson.SetBytes(out, "reasoning.effort", normalized) - } - } - - return out -} - -// ValidateThinkingConfig checks for unsupported reasoning levels on level-based models. -// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently -// downgrading requests. -func ValidateThinkingConfig(payload []byte, model string) error { - if len(payload) == 0 || model == "" { - return nil - } - if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) { - return nil - } - - levels := util.GetModelThinkingLevels(model) - checkField := func(path string) error { - if effort := gjson.GetBytes(payload, path); effort.Exists() { - if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok { - return statusErr{ - code: http.StatusBadRequest, - msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")), - } - } - } - return nil - } - - if err := checkField("reasoning_effort"); err != nil { - return err - } - if err := checkField("reasoning.effort"); err != nil { - return err - } - return nil -} diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index ee014fc7..f7162893 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -12,6 +12,7 @@ import ( qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" @@ -65,12 +66,14 @@ func (e *QwenExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth, } func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - token, baseURL := qwenCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + token, baseURL := qwenCreds(auth) if baseURL == "" { baseURL = "https://portal.qwen.ai/v1" } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat @@ -79,15 +82,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, false) - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - body, _ = sjson.SetBytes(body, "model", req.Model) - body = NormalizeThinkingConfig(body, req.Model, false) - if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { - return resp, errValidate - } - body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) + body, _ = sjson.SetBytes(body, "model", baseModel) + + body, _ = thinking.ApplyThinking(body, req.Model, "openai") + + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -140,18 +141,22 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req appendAPIResponseChunk(ctx, e.cfg, data) reporter.publish(ctx, parseOpenAIUsage(data)) var param any + // Note: TranslateNonStream uses req.Model (original with suffix) to preserve + // the original model name in the response for client compatibility. out := sdktranslator.TranslateNonStream(ctx, to, from, req.Model, bytes.Clone(opts.OriginalRequest), body, data, ¶m) resp = cliproxyexecutor.Response{Payload: []byte(out)} return resp, nil } func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (stream <-chan cliproxyexecutor.StreamChunk, err error) { - token, baseURL := qwenCreds(auth) + baseModel := thinking.ParseSuffix(req.Model).ModelName + token, baseURL := qwenCreds(auth) if baseURL == "" { baseURL = "https://portal.qwen.ai/v1" } - reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) + + reporter := newUsageReporter(ctx, e.Identifier(), baseModel, auth) defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat @@ -160,15 +165,12 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut if len(opts.OriginalRequest) > 0 { originalPayload = bytes.Clone(opts.OriginalRequest) } - originalTranslated := sdktranslator.TranslateRequest(from, to, req.Model, originalPayload, true) - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + originalTranslated := sdktranslator.TranslateRequest(from, to, baseModel, originalPayload, true) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), true) + body, _ = sjson.SetBytes(body, "model", baseModel) + + body, _ = thinking.ApplyThinking(body, req.Model, "openai") - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - body, _ = sjson.SetBytes(body, "model", req.Model) - body = NormalizeThinkingConfig(body, req.Model, false) - if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { - return nil, errValidate - } toolsResult := gjson.GetBytes(body, "tools") // I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response. // This will have no real consequences. It's just to scare Qwen3. @@ -176,7 +178,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`)) } body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) - body = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", body, originalTranslated) + body = applyPayloadConfigWithRoot(e.cfg, baseModel, to.String(), "", body, originalTranslated) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -256,13 +258,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut } func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + baseModel := thinking.ParseSuffix(req.Model).ModelName + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, baseModel, bytes.Clone(req.Payload), false) modelName := gjson.GetBytes(body, "model").String() if strings.TrimSpace(modelName) == "" { - modelName = req.Model + modelName = baseModel } enc, err := tokenizerForModel(modelName) diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go new file mode 100644 index 00000000..6a777c53 --- /dev/null +++ b/internal/runtime/executor/qwen_executor_test.go @@ -0,0 +1,30 @@ +package executor + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" +) + +func TestQwenExecutorParseSuffix(t *testing.T) { + tests := []struct { + name string + model string + wantBase string + wantLevel string + }{ + {"no suffix", "qwen-max", "qwen-max", ""}, + {"with level suffix", "qwen-max(high)", "qwen-max", "high"}, + {"with budget suffix", "qwen-max(16384)", "qwen-max", "16384"}, + {"complex model name", "qwen-plus-latest(medium)", "qwen-plus-latest", "medium"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := thinking.ParseSuffix(tt.model) + if result.ModelName != tt.wantBase { + t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantBase) + } + }) + } +} diff --git a/internal/runtime/executor/thinking_providers.go b/internal/runtime/executor/thinking_providers.go new file mode 100644 index 00000000..99ac468d --- /dev/null +++ b/internal/runtime/executor/thinking_providers.go @@ -0,0 +1,10 @@ +package executor + +import ( + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/codex" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai" +) diff --git a/internal/thinking/apply.go b/internal/thinking/apply.go new file mode 100644 index 00000000..8ee60b8d --- /dev/null +++ b/internal/thinking/apply.go @@ -0,0 +1,430 @@ +// Package thinking provides unified thinking configuration processing. +package thinking + +import ( + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + log "github.com/sirupsen/logrus" + "github.com/tidwall/gjson" +) + +// providerAppliers maps provider names to their ProviderApplier implementations. +var providerAppliers = map[string]ProviderApplier{ + "gemini": nil, + "gemini-cli": nil, + "claude": nil, + "openai": nil, + "codex": nil, + "iflow": nil, + "antigravity": nil, +} + +// GetProviderApplier returns the ProviderApplier for the given provider name. +// Returns nil if the provider is not registered. +func GetProviderApplier(provider string) ProviderApplier { + return providerAppliers[provider] +} + +// RegisterProvider registers a provider applier by name. +func RegisterProvider(name string, applier ProviderApplier) { + providerAppliers[name] = applier +} + +// IsUserDefinedModel reports whether the model is a user-defined model that should +// have thinking configuration passed through without validation. +// +// User-defined models are configured via config file's models[] array +// (e.g., openai-compatibility.*.models[], *-api-key.models[]). These models +// are marked with UserDefined=true at registration time. +// +// User-defined models should have their thinking configuration applied directly, +// letting the upstream service validate the configuration. +func IsUserDefinedModel(modelInfo *registry.ModelInfo) bool { + if modelInfo == nil { + return false + } + return modelInfo.UserDefined +} + +// ApplyThinking applies thinking configuration to a request body. +// +// This is the unified entry point for all providers. It follows the processing +// order defined in FR25: route check → model capability query → config extraction +// → validation → application. +// +// Suffix Priority: When the model name includes a thinking suffix (e.g., "gemini-2.5-pro(8192)"), +// the suffix configuration takes priority over any thinking parameters in the request body. +// This enables users to override thinking settings via the model name without modifying their +// request payload. +// +// Parameters: +// - body: Original request body JSON +// - model: Model name, optionally with thinking suffix (e.g., "claude-sonnet-4-5(16384)") +// - provider: Provider name (gemini, gemini-cli, antigravity, claude, openai, codex, iflow) +// +// Returns: +// - Modified request body JSON with thinking configuration applied +// - Error if validation fails (ThinkingError). On error, the original body +// is returned (not nil) to enable defensive programming patterns. +// +// Passthrough behavior (returns original body without error): +// - Unknown provider (not in providerAppliers map) +// - modelInfo is nil (model not found in registry) +// - modelInfo.Thinking is nil (model doesn't support thinking) +// +// Example: +// +// // With suffix - suffix config takes priority +// result, err := thinking.ApplyThinking(body, "gemini-2.5-pro(8192)", "gemini") +// +// // Without suffix - uses body config +// result, err := thinking.ApplyThinking(body, "gemini-2.5-pro", "gemini") +func ApplyThinking(body []byte, model string, provider string) ([]byte, error) { + // 1. Route check: Get provider applier + applier := GetProviderApplier(provider) + if applier == nil { + log.WithField("provider", provider).Debug("thinking: unknown provider, passthrough") + return body, nil + } + + // 2. Parse suffix and get modelInfo + suffixResult := ParseSuffix(model) + baseModel := suffixResult.ModelName + modelInfo := registry.GetGlobalRegistry().GetModelInfo(baseModel) + + // 3. Model capability check + if modelInfo == nil { + log.WithField("model", model).Debug("thinking: nil modelInfo, passthrough") + return body, nil + } + if modelInfo.Thinking == nil { + if IsUserDefinedModel(modelInfo) { + return applyUserDefinedModel(body, modelInfo, provider, suffixResult) + } + config := extractThinkingConfig(body, provider) + if hasThinkingConfig(config) { + log.WithFields(log.Fields{ + "model": modelInfo.ID, + "provider": provider, + }).Debug("thinking: model does not support thinking, stripping config") + return StripThinkingConfig(body, provider), nil + } + log.WithField("model", modelInfo.ID).Debug("thinking: model does not support thinking, passthrough") + return body, nil + } + + // 4. Get config: suffix priority over body + var config ThinkingConfig + if suffixResult.HasSuffix { + config = parseSuffixToConfig(suffixResult.RawSuffix) + log.WithFields(log.Fields{ + "provider": provider, + "model": model, + "raw_suffix": suffixResult.RawSuffix, + "config": config, + }).Debug("thinking: using suffix config (priority)") + } else { + config = extractThinkingConfig(body, provider) + log.WithFields(log.Fields{ + "provider": provider, + "model": modelInfo.ID, + "config": config, + }).Debug("thinking: extracted config from request body") + } + + if !hasThinkingConfig(config) { + log.WithFields(log.Fields{ + "provider": provider, + "model": modelInfo.ID, + }).Debug("thinking: no config found, passthrough") + return body, nil + } + + // 5. Validate and normalize configuration + validated, err := ValidateConfig(config, modelInfo.Thinking) + if err != nil { + log.WithFields(log.Fields{ + "provider": provider, + "model": modelInfo.ID, + "error": err.Error(), + }).Warn("thinking: validation failed, returning original body") + // Return original body on validation failure (defensive programming). + // This ensures callers who ignore the error won't receive nil body. + // The upstream service will decide how to handle the unmodified request. + return body, err + } + + // Defensive check: ValidateConfig should never return (nil, nil) + if validated == nil { + log.WithFields(log.Fields{ + "provider": provider, + "model": modelInfo.ID, + }).Warn("thinking: ValidateConfig returned nil config without error, passthrough") + return body, nil + } + + log.WithFields(log.Fields{ + "provider": provider, + "model": modelInfo.ID, + "validated": *validated, + }).Debug("thinking: applying validated config") + + // 6. Apply configuration using provider-specific applier + return applier.Apply(body, *validated, modelInfo) +} + +// parseSuffixToConfig converts a raw suffix string to ThinkingConfig. +// +// Parsing priority: +// 1. Special values: "none" → ModeNone, "auto"/"-1" → ModeAuto +// 2. Level names: "minimal", "low", "medium", "high", "xhigh" → ModeLevel +// 3. Numeric values: positive integers → ModeBudget, 0 → ModeNone +// +// If none of the above match, returns empty ThinkingConfig (treated as no config). +func parseSuffixToConfig(rawSuffix string) ThinkingConfig { + // 1. Try special values first (none, auto, -1) + if mode, ok := ParseSpecialSuffix(rawSuffix); ok { + switch mode { + case ModeNone: + return ThinkingConfig{Mode: ModeNone, Budget: 0} + case ModeAuto: + return ThinkingConfig{Mode: ModeAuto, Budget: -1} + } + } + + // 2. Try level parsing (minimal, low, medium, high, xhigh) + if level, ok := ParseLevelSuffix(rawSuffix); ok { + return ThinkingConfig{Mode: ModeLevel, Level: level} + } + + // 3. Try numeric parsing + if budget, ok := ParseNumericSuffix(rawSuffix); ok { + if budget == 0 { + return ThinkingConfig{Mode: ModeNone, Budget: 0} + } + return ThinkingConfig{Mode: ModeBudget, Budget: budget} + } + + // Unknown suffix format - return empty config + log.WithField("raw_suffix", rawSuffix).Debug("thinking: unknown suffix format, treating as no config") + return ThinkingConfig{} +} + +// applyUserDefinedModel applies thinking configuration for user-defined models +// without ThinkingSupport validation. +func applyUserDefinedModel(body []byte, modelInfo *registry.ModelInfo, provider string, suffixResult SuffixResult) ([]byte, error) { + // Get config: suffix priority over body + var config ThinkingConfig + if suffixResult.HasSuffix { + config = parseSuffixToConfig(suffixResult.RawSuffix) + } else { + config = extractThinkingConfig(body, provider) + } + + if !hasThinkingConfig(config) { + log.WithFields(log.Fields{ + "model": modelInfo.ID, + "provider": provider, + "user_defined": true, + "passthrough": true, + }).Debug("thinking: user-defined model, no config, passthrough") + return body, nil + } + + applier := GetProviderApplier(provider) + if applier == nil { + log.WithFields(log.Fields{ + "model": modelInfo.ID, + "provider": provider, + "user_defined": true, + "passthrough": true, + }).Debug("thinking: user-defined model, unknown provider, passthrough") + return body, nil + } + + log.WithFields(log.Fields{ + "model": modelInfo.ID, + "provider": provider, + "user_defined": true, + "passthrough": false, + "config": config, + }).Debug("thinking: applying config for user-defined model (skip validation)") + + return applier.Apply(body, config, modelInfo) +} + +// extractThinkingConfig extracts provider-specific thinking config from request body. +func extractThinkingConfig(body []byte, provider string) ThinkingConfig { + if len(body) == 0 || !gjson.ValidBytes(body) { + return ThinkingConfig{} + } + + switch provider { + case "claude": + return extractClaudeConfig(body) + case "gemini", "gemini-cli", "antigravity": + return extractGeminiConfig(body, provider) + case "openai": + return extractOpenAIConfig(body) + case "codex": + return extractCodexConfig(body) + case "iflow": + return extractIFlowConfig(body) + default: + return ThinkingConfig{} + } +} + +func hasThinkingConfig(config ThinkingConfig) bool { + return config.Mode != ModeBudget || config.Budget != 0 || config.Level != "" +} + +// extractClaudeConfig extracts thinking configuration from Claude format request body. +// +// Claude API format: +// - thinking.type: "enabled" or "disabled" +// - thinking.budget_tokens: integer (-1=auto, 0=disabled, >0=budget) +// +// Priority: thinking.type="disabled" takes precedence over budget_tokens. +// When type="enabled" without budget_tokens, returns ModeAuto to indicate +// the user wants thinking enabled but didn't specify a budget. +func extractClaudeConfig(body []byte) ThinkingConfig { + thinkingType := gjson.GetBytes(body, "thinking.type").String() + if thinkingType == "disabled" { + return ThinkingConfig{Mode: ModeNone, Budget: 0} + } + + // Check budget_tokens + if budget := gjson.GetBytes(body, "thinking.budget_tokens"); budget.Exists() { + value := int(budget.Int()) + switch value { + case 0: + return ThinkingConfig{Mode: ModeNone, Budget: 0} + case -1: + return ThinkingConfig{Mode: ModeAuto, Budget: -1} + default: + return ThinkingConfig{Mode: ModeBudget, Budget: value} + } + } + + // If type="enabled" but no budget_tokens, treat as auto (user wants thinking but no budget specified) + if thinkingType == "enabled" { + return ThinkingConfig{Mode: ModeAuto, Budget: -1} + } + + return ThinkingConfig{} +} + +// extractGeminiConfig extracts thinking configuration from Gemini format request body. +// +// Gemini API format: +// - generationConfig.thinkingConfig.thinkingLevel: "none", "auto", or level name (Gemini 3) +// - generationConfig.thinkingConfig.thinkingBudget: integer (Gemini 2.5) +// +// For gemini-cli and antigravity providers, the path is prefixed with "request.". +// +// Priority: thinkingLevel is checked first (Gemini 3 format), then thinkingBudget (Gemini 2.5 format). +// This allows newer Gemini 3 level-based configs to take precedence. +func extractGeminiConfig(body []byte, provider string) ThinkingConfig { + prefix := "generationConfig.thinkingConfig" + if provider == "gemini-cli" || provider == "antigravity" { + prefix = "request.generationConfig.thinkingConfig" + } + + // Check thinkingLevel first (Gemini 3 format takes precedence) + if level := gjson.GetBytes(body, prefix+".thinkingLevel"); level.Exists() { + value := level.String() + switch value { + case "none": + return ThinkingConfig{Mode: ModeNone, Budget: 0} + case "auto": + return ThinkingConfig{Mode: ModeAuto, Budget: -1} + default: + return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)} + } + } + + // Check thinkingBudget (Gemini 2.5 format) + if budget := gjson.GetBytes(body, prefix+".thinkingBudget"); budget.Exists() { + value := int(budget.Int()) + switch value { + case 0: + return ThinkingConfig{Mode: ModeNone, Budget: 0} + case -1: + return ThinkingConfig{Mode: ModeAuto, Budget: -1} + default: + return ThinkingConfig{Mode: ModeBudget, Budget: value} + } + } + + return ThinkingConfig{} +} + +// extractOpenAIConfig extracts thinking configuration from OpenAI format request body. +// +// OpenAI API format: +// - reasoning_effort: "none", "low", "medium", "high" (discrete levels) +// +// OpenAI uses level-based thinking configuration only, no numeric budget support. +// The "none" value is treated specially to return ModeNone. +func extractOpenAIConfig(body []byte) ThinkingConfig { + // Check reasoning_effort (OpenAI Chat Completions format) + if effort := gjson.GetBytes(body, "reasoning_effort"); effort.Exists() { + value := effort.String() + if value == "none" { + return ThinkingConfig{Mode: ModeNone, Budget: 0} + } + return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)} + } + + return ThinkingConfig{} +} + +// extractCodexConfig extracts thinking configuration from Codex format request body. +// +// Codex API format (OpenAI Responses API): +// - reasoning.effort: "none", "low", "medium", "high" +// +// This is similar to OpenAI but uses nested field "reasoning.effort" instead of "reasoning_effort". +func extractCodexConfig(body []byte) ThinkingConfig { + // Check reasoning.effort (Codex / OpenAI Responses API format) + if effort := gjson.GetBytes(body, "reasoning.effort"); effort.Exists() { + value := effort.String() + if value == "none" { + return ThinkingConfig{Mode: ModeNone, Budget: 0} + } + return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(value)} + } + + return ThinkingConfig{} +} + +// extractIFlowConfig extracts thinking configuration from iFlow format request body. +// +// iFlow API format (supports multiple model families): +// - GLM format: chat_template_kwargs.enable_thinking (boolean) +// - MiniMax format: reasoning_split (boolean) +// +// Returns ModeBudget with Budget=1 as a sentinel value indicating "enabled". +// The actual budget/configuration is determined by the iFlow applier based on model capabilities. +// Budget=1 is used because iFlow models don't use numeric budgets; they only support on/off. +func extractIFlowConfig(body []byte) ThinkingConfig { + // GLM format: chat_template_kwargs.enable_thinking + if enabled := gjson.GetBytes(body, "chat_template_kwargs.enable_thinking"); enabled.Exists() { + if enabled.Bool() { + // Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets) + return ThinkingConfig{Mode: ModeBudget, Budget: 1} + } + return ThinkingConfig{Mode: ModeNone, Budget: 0} + } + + // MiniMax format: reasoning_split + if split := gjson.GetBytes(body, "reasoning_split"); split.Exists() { + if split.Bool() { + // Budget=1 is a sentinel meaning "enabled" (iFlow doesn't use numeric budgets) + return ThinkingConfig{Mode: ModeBudget, Budget: 1} + } + return ThinkingConfig{Mode: ModeNone, Budget: 0} + } + + return ThinkingConfig{} +} diff --git a/internal/thinking/apply_main_test.go b/internal/thinking/apply_main_test.go new file mode 100644 index 00000000..93346109 --- /dev/null +++ b/internal/thinking/apply_main_test.go @@ -0,0 +1,144 @@ +// Package thinking provides unified thinking configuration processing logic. +package thinking + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/tidwall/gjson" +) + +// setupTestModels registers test models in the global registry for testing. +// This is required because ApplyThinking now looks up models by name. +func setupTestModels(t *testing.T) func() { + t.Helper() + reg := registry.GetGlobalRegistry() + + // Register test models via RegisterClient (the correct API) + clientID := "test-thinking-client" + testModels := []*registry.ModelInfo{ + {ID: "test-thinking-model", Thinking: ®istry.ThinkingSupport{Min: 1, Max: 10}}, + {ID: "test-no-thinking", Type: "gemini"}, + {ID: "gpt-5.2-test", Thinking: ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "medium", "high"}}}, + } + + reg.RegisterClient(clientID, "test", testModels) + + // Return cleanup function + return func() { + reg.UnregisterClient(clientID) + } +} + +func TestApplyThinkingPassthrough(t *testing.T) { + cleanup := setupTestModels(t) + defer cleanup() + + tests := []struct { + name string + body string + model string + provider string + }{ + {"unknown provider", `{"a":1}`, "test-thinking-model", "unknown"}, + {"unknown model", `{"a":1}`, "nonexistent-model", "gemini"}, + {"nil thinking support", `{"a":1}`, "test-no-thinking", "gemini"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ApplyThinking([]byte(tt.body), tt.model, tt.provider) + if err != nil { + t.Fatalf("ApplyThinking() error = %v", err) + } + if string(got) != tt.body { + t.Fatalf("ApplyThinking() = %s, want %s", string(got), tt.body) + } + }) + } +} + +func TestApplyThinkingValidationError(t *testing.T) { + cleanup := setupTestModels(t) + defer cleanup() + + tests := []struct { + name string + body string + model string + provider string + }{ + {"unsupported level", `{"reasoning_effort":"ultra"}`, "gpt-5.2-test", "openai"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ApplyThinking([]byte(tt.body), tt.model, tt.provider) + if err == nil { + t.Fatalf("ApplyThinking() error = nil, want error") + } + // On validation error, ApplyThinking returns original body (defensive programming) + if string(got) != tt.body { + t.Fatalf("ApplyThinking() body = %s, want original body %s", string(got), tt.body) + } + }) + } +} + +func TestApplyThinkingSuffixPriority(t *testing.T) { + cleanup := setupTestModels(t) + defer cleanup() + + // Register a model that supports thinking with budget + reg := registry.GetGlobalRegistry() + suffixClientID := "test-suffix-client" + testModels := []*registry.ModelInfo{ + { + ID: "gemini-2.5-pro-suffix-test", + Thinking: ®istry.ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: true}, + }, + } + reg.RegisterClient(suffixClientID, "gemini", testModels) + defer reg.UnregisterClient(suffixClientID) + + tests := []struct { + name string + body string + model string + provider string + checkPath string + expectedValue int + }{ + { + "suffix overrides body config", + `{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`, + "gemini-2.5-pro-suffix-test(8192)", + "gemini", + "generationConfig.thinkingConfig.thinkingBudget", + 8192, + }, + { + "suffix none disables thinking", + `{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`, + "gemini-2.5-pro-suffix-test(none)", + "gemini", + "generationConfig.thinkingConfig.thinkingBudget", + 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ApplyThinking([]byte(tt.body), tt.model, tt.provider) + if err != nil { + t.Fatalf("ApplyThinking() error = %v", err) + } + + // Use gjson to check the value + result := int(gjson.GetBytes(got, tt.checkPath).Int()) + if result != tt.expectedValue { + t.Fatalf("ApplyThinking() %s = %v, want %v", tt.checkPath, result, tt.expectedValue) + } + }) + } +} diff --git a/internal/thinking/apply_test.go b/internal/thinking/apply_test.go new file mode 100644 index 00000000..d89fff32 --- /dev/null +++ b/internal/thinking/apply_test.go @@ -0,0 +1,501 @@ +// Package thinking_test provides external tests for the thinking package. +package thinking_test + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai" +) + +// registerTestModels sets up test models in the registry and returns a cleanup function. +func registerTestModels(t *testing.T) func() { + t.Helper() + reg := registry.GetGlobalRegistry() + + testModels := []*registry.ModelInfo{ + geminiBudgetModel(), + geminiLevelModel(), + claudeBudgetModel(), + openAILevelModel(), + iFlowModel(), + {ID: "claude-3"}, + {ID: "gemini-2.5-pro-strip"}, + {ID: "glm-4.6-strip"}, + } + + clientID := "test-thinking-models" + reg.RegisterClient(clientID, "test", testModels) + + return func() { + reg.UnregisterClient(clientID) + } +} + +// TestApplyThinking tests the main ApplyThinking entry point. +// +// ApplyThinking is the unified entry point for applying thinking configuration. +// It routes to the appropriate provider-specific applier based on model. +// +// Depends on: Epic 10 Story 10-2 (apply-thinking main entry) +func TestApplyThinking(t *testing.T) { + cleanup := registerTestModels(t) + defer cleanup() + + tests := []struct { + name string + body string + model string + provider string + check string + }{ + {"gemini budget", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, "gemini-2.5-pro-test", "gemini", "geminiBudget"}, + {"gemini level", `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`, "gemini-3-pro-preview-test", "gemini", "geminiLevel"}, + {"gemini-cli budget", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, "gemini-2.5-pro-test", "gemini-cli", "geminiCliBudget"}, + {"antigravity budget", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, "gemini-2.5-pro-test", "antigravity", "geminiCliBudget"}, + {"claude budget", `{"thinking":{"budget_tokens":16384}}`, "claude-sonnet-4-5-test", "claude", "claudeBudget"}, + {"claude enabled type auto", `{"thinking":{"type":"enabled"}}`, "claude-sonnet-4-5-test", "claude", "claudeAuto"}, + {"openai level", `{"reasoning_effort":"high"}`, "gpt-5.2-test", "openai", "openaiLevel"}, + {"iflow enable", `{"chat_template_kwargs":{"enable_thinking":true}}`, "glm-4.6-test", "iflow", "iflowEnable"}, + {"unknown provider passthrough", `{"a":1}`, "gemini-2.5-pro-test", "unknown", "passthrough"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider) + if err != nil { + t.Fatalf("ApplyThinking() error = %v", err) + } + assertApplyThinkingCheck(t, tt.check, tt.body, got) + }) + } +} + +func TestApplyThinkingErrors(t *testing.T) { + cleanup := registerTestModels(t) + defer cleanup() + + tests := []struct { + name string + body string + model string + provider string + }{ + {"unsupported level openai", `{"reasoning_effort":"ultra"}`, "gpt-5.2-test", "openai"}, + {"unsupported level gemini", `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"ultra"}}}`, "gemini-3-pro-preview-test", "gemini"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider) + if err == nil { + t.Fatalf("ApplyThinking() error = nil, want error") + } + // On validation error, ApplyThinking returns original body (defensive programming) + if string(got) != tt.body { + t.Fatalf("ApplyThinking() body = %s, want original body %s", string(got), tt.body) + } + }) + } +} + +func TestApplyThinkingStripOnUnsupportedModel(t *testing.T) { + cleanup := registerTestModels(t) + defer cleanup() + + tests := []struct { + name string + body string + model string + provider string + stripped []string + preserved []string + }{ + {"claude strip", `{"thinking":{"budget_tokens":8192},"model":"claude-3"}`, "claude-3", "claude", []string{"thinking"}, []string{"model"}}, + {"gemini strip", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192},"temperature":0.7}}`, "gemini-2.5-pro-strip", "gemini", []string{"generationConfig.thinkingConfig"}, []string{"generationConfig.temperature"}}, + {"iflow strip", `{"chat_template_kwargs":{"enable_thinking":true,"clear_thinking":false,"other":"value"}}`, "glm-4.6-strip", "iflow", []string{"chat_template_kwargs.enable_thinking", "chat_template_kwargs.clear_thinking"}, []string{"chat_template_kwargs.other"}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider) + if err != nil { + t.Fatalf("ApplyThinking() error = %v", err) + } + + for _, path := range tt.stripped { + if gjson.GetBytes(got, path).Exists() { + t.Fatalf("expected %s to be stripped, got %s", path, string(got)) + } + } + for _, path := range tt.preserved { + if !gjson.GetBytes(got, path).Exists() { + t.Fatalf("expected %s to be preserved, got %s", path, string(got)) + } + } + }) + } +} + +func TestIsUserDefinedModel(t *testing.T) { + tests := []struct { + name string + modelInfo *registry.ModelInfo + want bool + }{ + {"nil modelInfo", nil, false}, + {"not user-defined no flag", ®istry.ModelInfo{ID: "test"}, false}, + {"not user-defined with type", ®istry.ModelInfo{ID: "test", Type: "openai"}, false}, + {"user-defined with flag", ®istry.ModelInfo{ID: "test", Type: "openai", UserDefined: true}, true}, + {"user-defined flag only", ®istry.ModelInfo{ID: "test", UserDefined: true}, true}, + {"has thinking not user-defined", ®istry.ModelInfo{ID: "test", Type: "openai", Thinking: ®istry.ThinkingSupport{Min: 1024}}, false}, + {"has thinking with user-defined flag", ®istry.ModelInfo{ID: "test", Type: "openai", Thinking: ®istry.ThinkingSupport{Min: 1024}, UserDefined: true}, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := thinking.IsUserDefinedModel(tt.modelInfo); got != tt.want { + t.Fatalf("IsUserDefinedModel() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestApplyThinking_UserDefinedModel(t *testing.T) { + // Register user-defined test models + reg := registry.GetGlobalRegistry() + userDefinedModels := []*registry.ModelInfo{ + {ID: "custom-gpt", Type: "openai", UserDefined: true}, + {ID: "or-claude", Type: "openai", UserDefined: true}, + {ID: "custom-gemini", Type: "gemini", UserDefined: true}, + {ID: "vertex-flash", Type: "gemini", UserDefined: true}, + {ID: "cli-gemini", Type: "gemini", UserDefined: true}, + {ID: "ag-gemini", Type: "gemini", UserDefined: true}, + {ID: "custom-claude", Type: "claude", UserDefined: true}, + {ID: "unknown"}, + } + clientID := "test-user-defined-models" + reg.RegisterClient(clientID, "test", userDefinedModels) + defer reg.UnregisterClient(clientID) + + tests := []struct { + name string + body string + model string + provider string + check string + }{ + { + "openai user-defined with reasoning_effort", + `{"model":"custom-gpt","reasoning_effort":"high"}`, + "custom-gpt", + "openai", + "openaiCompatible", + }, + { + "openai-compatibility model with reasoning_effort", + `{"model":"or-claude","reasoning_effort":"high"}`, + "or-claude", + "openai", + "openaiCompatible", + }, + { + "gemini user-defined with thinkingBudget", + `{"model":"custom-gemini","generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, + "custom-gemini", + "gemini", + "geminiCompatibleBudget", + }, + { + "vertex user-defined with thinkingBudget", + `{"model":"vertex-flash","generationConfig":{"thinkingConfig":{"thinkingBudget":16384}}}`, + "vertex-flash", + "gemini", + "geminiCompatibleBudget16384", + }, + { + "gemini-cli user-defined with thinkingBudget", + `{"model":"cli-gemini","request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, + "cli-gemini", + "gemini-cli", + "geminiCliCompatibleBudget", + }, + { + "antigravity user-defined with thinkingBudget", + `{"model":"ag-gemini","request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, + "ag-gemini", + "antigravity", + "geminiCliCompatibleBudget", + }, + { + "claude user-defined with thinking", + `{"model":"custom-claude","thinking":{"type":"enabled","budget_tokens":8192}}`, + "custom-claude", + "claude", + "claudeCompatibleBudget", + }, + { + "user-defined model no config", + `{"model":"custom-gpt","messages":[]}`, + "custom-gpt", + "openai", + "passthrough", + }, + { + "non-user-defined model strips config", + `{"model":"unknown","reasoning_effort":"high"}`, + "unknown", + "openai", + "stripReasoning", + }, + { + "user-defined model unknown provider", + `{"model":"custom-gpt","reasoning_effort":"high"}`, + "custom-gpt", + "unknown", + "passthrough", + }, + { + "unknown model passthrough", + `{"model":"nonexistent","reasoning_effort":"high"}`, + "nonexistent", + "openai", + "passthrough", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider) + if err != nil { + t.Fatalf("ApplyThinking() error = %v", err) + } + assertCompatibleModelCheck(t, tt.check, tt.body, got) + }) + } +} + +// TestApplyThinkingSuffixPriority tests suffix priority over body config. +func TestApplyThinkingSuffixPriority(t *testing.T) { + // Register test model + reg := registry.GetGlobalRegistry() + testModels := []*registry.ModelInfo{ + { + ID: "gemini-suffix-test", + Thinking: ®istry.ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: true}, + }, + } + clientID := "test-suffix-priority" + reg.RegisterClient(clientID, "gemini", testModels) + defer reg.UnregisterClient(clientID) + + tests := []struct { + name string + body string + model string + provider string + checkPath string + expectedValue int + }{ + { + "suffix overrides body budget", + `{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`, + "gemini-suffix-test(8192)", + "gemini", + "generationConfig.thinkingConfig.thinkingBudget", + 8192, + }, + { + "suffix none sets budget to 0", + `{"generationConfig":{"thinkingConfig":{"thinkingBudget":1000}}}`, + "gemini-suffix-test(none)", + "gemini", + "generationConfig.thinkingConfig.thinkingBudget", + 0, + }, + { + "no suffix uses body config", + `{"generationConfig":{"thinkingConfig":{"thinkingBudget":5000}}}`, + "gemini-suffix-test", + "gemini", + "generationConfig.thinkingConfig.thinkingBudget", + 5000, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := thinking.ApplyThinking([]byte(tt.body), tt.model, tt.provider) + if err != nil { + t.Fatalf("ApplyThinking() error = %v", err) + } + + result := int(gjson.GetBytes(got, tt.checkPath).Int()) + if result != tt.expectedValue { + t.Fatalf("ApplyThinking() %s = %v, want %v\nbody: %s", tt.checkPath, result, tt.expectedValue, string(got)) + } + }) + } +} + +func assertApplyThinkingCheck(t *testing.T, checkName, input string, body []byte) { + t.Helper() + + switch checkName { + case "geminiBudget": + assertJSONInt(t, body, "generationConfig.thinkingConfig.thinkingBudget", 8192) + assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true) + case "geminiLevel": + assertJSONString(t, body, "generationConfig.thinkingConfig.thinkingLevel", "high") + assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true) + case "geminiCliBudget": + assertJSONInt(t, body, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) + assertJSONBool(t, body, "request.generationConfig.thinkingConfig.includeThoughts", true) + case "claudeBudget": + assertJSONString(t, body, "thinking.type", "enabled") + assertJSONInt(t, body, "thinking.budget_tokens", 16384) + case "claudeAuto": + // When type=enabled without budget, auto mode is applied using mid-range budget + assertJSONString(t, body, "thinking.type", "enabled") + // Budget should be mid-range: (1024 + 128000) / 2 = 64512 + assertJSONInt(t, body, "thinking.budget_tokens", 64512) + case "openaiLevel": + assertJSONString(t, body, "reasoning_effort", "high") + case "iflowEnable": + assertJSONBool(t, body, "chat_template_kwargs.enable_thinking", true) + assertJSONBool(t, body, "chat_template_kwargs.clear_thinking", false) + case "passthrough": + if string(body) != input { + t.Fatalf("ApplyThinking() = %s, want %s", string(body), input) + } + default: + t.Fatalf("unknown check: %s", checkName) + } +} + +func assertCompatibleModelCheck(t *testing.T, checkName, input string, body []byte) { + t.Helper() + + switch checkName { + case "openaiCompatible": + assertJSONString(t, body, "reasoning_effort", "high") + case "geminiCompatibleBudget": + assertJSONInt(t, body, "generationConfig.thinkingConfig.thinkingBudget", 8192) + assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true) + case "geminiCompatibleBudget16384": + assertJSONInt(t, body, "generationConfig.thinkingConfig.thinkingBudget", 16384) + assertJSONBool(t, body, "generationConfig.thinkingConfig.includeThoughts", true) + case "geminiCliCompatibleBudget": + assertJSONInt(t, body, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) + assertJSONBool(t, body, "request.generationConfig.thinkingConfig.includeThoughts", true) + case "claudeCompatibleBudget": + assertJSONString(t, body, "thinking.type", "enabled") + assertJSONInt(t, body, "thinking.budget_tokens", 8192) + case "stripReasoning": + if gjson.GetBytes(body, "reasoning_effort").Exists() { + t.Fatalf("expected reasoning_effort to be stripped, got %s", string(body)) + } + case "passthrough": + if string(body) != input { + t.Fatalf("ApplyThinking() = %s, want %s", string(body), input) + } + default: + t.Fatalf("unknown check: %s", checkName) + } +} + +func assertJSONString(t *testing.T, body []byte, path, want string) { + t.Helper() + value := gjson.GetBytes(body, path) + if !value.Exists() { + t.Fatalf("expected %s to exist", path) + } + if value.String() != want { + t.Fatalf("value at %s = %s, want %s", path, value.String(), want) + } +} + +func assertJSONInt(t *testing.T, body []byte, path string, want int) { + t.Helper() + value := gjson.GetBytes(body, path) + if !value.Exists() { + t.Fatalf("expected %s to exist", path) + } + if int(value.Int()) != want { + t.Fatalf("value at %s = %d, want %d", path, value.Int(), want) + } +} + +func assertJSONBool(t *testing.T, body []byte, path string, want bool) { + t.Helper() + value := gjson.GetBytes(body, path) + if !value.Exists() { + t.Fatalf("expected %s to exist", path) + } + if value.Bool() != want { + t.Fatalf("value at %s = %t, want %t", path, value.Bool(), want) + } +} + +func geminiBudgetModel() *registry.ModelInfo { + return ®istry.ModelInfo{ + ID: "gemini-2.5-pro-test", + Thinking: ®istry.ThinkingSupport{ + Min: 128, + Max: 32768, + ZeroAllowed: true, + }, + } +} + +func geminiLevelModel() *registry.ModelInfo { + return ®istry.ModelInfo{ + ID: "gemini-3-pro-preview-test", + Thinking: ®istry.ThinkingSupport{ + Min: 128, + Max: 32768, + Levels: []string{"minimal", "low", "medium", "high"}, + }, + } +} + +func claudeBudgetModel() *registry.ModelInfo { + return ®istry.ModelInfo{ + ID: "claude-sonnet-4-5-test", + Thinking: ®istry.ThinkingSupport{ + Min: 1024, + Max: 128000, + ZeroAllowed: true, + }, + } +} + +func openAILevelModel() *registry.ModelInfo { + return ®istry.ModelInfo{ + ID: "gpt-5.2-test", + Thinking: ®istry.ThinkingSupport{ + Min: 128, + Max: 32768, + ZeroAllowed: true, + Levels: []string{"low", "medium", "high"}, + }, + } +} + +func iFlowModel() *registry.ModelInfo { + return ®istry.ModelInfo{ + ID: "glm-4.6-test", + Thinking: ®istry.ThinkingSupport{ + Min: 1, + Max: 10, + ZeroAllowed: true, + }, + } +} diff --git a/internal/thinking/convert.go b/internal/thinking/convert.go new file mode 100644 index 00000000..92e54120 --- /dev/null +++ b/internal/thinking/convert.go @@ -0,0 +1,233 @@ +package thinking + +import ( + "fmt" + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" +) + +// levelToBudgetMap defines the standard Level → Budget mapping. +// All keys are lowercase; lookups should use strings.ToLower. +var levelToBudgetMap = map[string]int{ + "none": 0, + "auto": -1, + "minimal": 512, + "low": 1024, + "medium": 8192, + "high": 24576, + "xhigh": 32768, +} + +// ConvertLevelToBudget converts a thinking level to a budget value. +// +// This is a semantic conversion that maps discrete levels to numeric budgets. +// Level matching is case-insensitive. +// +// Level → Budget mapping: +// - none → 0 +// - auto → -1 +// - minimal → 512 +// - low → 1024 +// - medium → 8192 +// - high → 24576 +// - xhigh → 32768 +// +// Returns: +// - budget: The converted budget value +// - ok: true if level is valid, false otherwise +func ConvertLevelToBudget(level string) (int, bool) { + budget, ok := levelToBudgetMap[strings.ToLower(level)] + return budget, ok +} + +// BudgetThreshold constants define the upper bounds for each thinking level. +// These are used by ConvertBudgetToLevel for range-based mapping. +const ( + // ThresholdMinimal is the upper bound for "minimal" level (1-512) + ThresholdMinimal = 512 + // ThresholdLow is the upper bound for "low" level (513-1024) + ThresholdLow = 1024 + // ThresholdMedium is the upper bound for "medium" level (1025-8192) + ThresholdMedium = 8192 + // ThresholdHigh is the upper bound for "high" level (8193-24576) + ThresholdHigh = 24576 +) + +// ConvertBudgetToLevel converts a budget value to the nearest thinking level. +// +// This is a semantic conversion that maps numeric budgets to discrete levels. +// Uses threshold-based mapping for range conversion. +// +// Budget → Level thresholds: +// - -1 → auto +// - 0 → none +// - 1-512 → minimal +// - 513-1024 → low +// - 1025-8192 → medium +// - 8193-24576 → high +// - 24577+ → xhigh +// +// Returns: +// - level: The converted thinking level string +// - ok: true if budget is valid, false for invalid negatives (< -1) +func ConvertBudgetToLevel(budget int) (string, bool) { + switch { + case budget < -1: + // Invalid negative values + return "", false + case budget == -1: + return string(LevelAuto), true + case budget == 0: + return string(LevelNone), true + case budget <= ThresholdMinimal: + return string(LevelMinimal), true + case budget <= ThresholdLow: + return string(LevelLow), true + case budget <= ThresholdMedium: + return string(LevelMedium), true + case budget <= ThresholdHigh: + return string(LevelHigh), true + default: + return string(LevelXHigh), true + } +} + +// ModelCapability describes the thinking format support of a model. +type ModelCapability int + +const ( + // CapabilityUnknown indicates modelInfo is nil (passthrough behavior, internal use). + CapabilityUnknown ModelCapability = iota - 1 + // CapabilityNone indicates model doesn't support thinking (Thinking is nil). + CapabilityNone + // CapabilityBudgetOnly indicates the model supports numeric budgets only. + CapabilityBudgetOnly + // CapabilityLevelOnly indicates the model supports discrete levels only. + CapabilityLevelOnly + // CapabilityHybrid indicates the model supports both budgets and levels. + CapabilityHybrid +) + +// detectModelCapability determines the thinking format capability of a model. +// +// This is an internal function used by NormalizeForModel to decide conversion strategy. +// It analyzes the model's ThinkingSupport configuration to classify the model: +// - CapabilityNone: modelInfo.Thinking is nil (model doesn't support thinking) +// - CapabilityBudgetOnly: Has Min/Max but no Levels (Claude, Gemini 2.5) +// - CapabilityLevelOnly: Has Levels but no Min/Max (OpenAI, iFlow) +// - CapabilityHybrid: Has both Min/Max and Levels (Gemini 3) +// +// Note: Returns a special sentinel value when modelInfo itself is nil (unknown model). +func detectModelCapability(modelInfo *registry.ModelInfo) ModelCapability { + if modelInfo == nil { + return CapabilityUnknown // sentinel for "passthrough" behavior + } + if modelInfo.Thinking == nil { + return CapabilityNone + } + support := modelInfo.Thinking + hasBudget := support.Min > 0 || support.Max > 0 + hasLevels := len(support.Levels) > 0 + + switch { + case hasBudget && hasLevels: + return CapabilityHybrid + case hasBudget: + return CapabilityBudgetOnly + case hasLevels: + return CapabilityLevelOnly + default: + return CapabilityNone + } +} + +// normalizeMixedConfig resolves a thinking configuration when both budget and level +// might be present, applying priority rules. +// +// Priority rules (Level takes precedence over Budget): +// - If level is non-empty: use level (special handling for "auto" and "none") +// - If level is empty and budget is set: use budget +// - If neither is set (budget=0, level=""): return ModeNone +// +// This function is used internally to handle ambiguous input configurations. +func normalizeMixedConfig(budget int, level string) ThinkingConfig { + normalizedLevel := strings.ToLower(strings.TrimSpace(level)) + if normalizedLevel != "" { + switch normalizedLevel { + case string(LevelAuto): + return ThinkingConfig{Mode: ModeAuto, Budget: -1, Level: ThinkingLevel(normalizedLevel)} + case string(LevelNone): + return ThinkingConfig{Mode: ModeNone, Budget: 0, Level: ThinkingLevel(normalizedLevel)} + default: + return ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel(normalizedLevel)} + } + } + switch budget { + case -1: + return ThinkingConfig{Mode: ModeAuto, Budget: -1} + case 0: + return ThinkingConfig{Mode: ModeNone, Budget: 0} + default: + return ThinkingConfig{Mode: ModeBudget, Budget: budget} + } +} + +// NormalizeForModel normalizes a thinking configuration for a specific model. +// +// This function converts the configuration format based on model capabilities: +// - Budget-only models (Claude, Gemini 2.5): Level → Budget conversion +// - Level-only models (OpenAI, iFlow): Budget → Level conversion +// - Hybrid models (Gemini 3): preserve the original format +// - No thinking support (Thinking is nil): degrade to ModeNone +// - Unknown model (modelInfo is nil): passthrough (preserve original format) +// +// Parameters: +// - config: The thinking configuration to normalize (must not be nil) +// - modelInfo: Model registry information containing ThinkingSupport properties +// +// Returns: +// - Normalized ThinkingConfig suitable for the model +// - Error if conversion fails (e.g., unsupported level or invalid budget) +func NormalizeForModel(config *ThinkingConfig, modelInfo *registry.ModelInfo) (*ThinkingConfig, error) { + if config == nil { + return nil, fmt.Errorf("thinking config is nil") + } + + normalized := *config + capability := detectModelCapability(modelInfo) + + // If model doesn't support thinking, degrade to ModeNone + if capability == CapabilityNone && config.Mode != ModeNone && config.Mode != ModeAuto { + return &ThinkingConfig{Mode: ModeNone, Budget: 0}, nil + } + + switch config.Mode { + case ModeAuto, ModeNone: + return &normalized, nil + case ModeBudget: + if capability == CapabilityLevelOnly { + level, ok := ConvertBudgetToLevel(config.Budget) + if !ok { + return nil, fmt.Errorf("invalid budget: %d", config.Budget) + } + normalized.Mode = ModeLevel + normalized.Level = ThinkingLevel(level) + normalized.Budget = 0 + } + return &normalized, nil + case ModeLevel: + if capability == CapabilityBudgetOnly { + budget, ok := ConvertLevelToBudget(string(config.Level)) + if !ok { + return nil, fmt.Errorf("unknown level: %s", config.Level) + } + normalized.Mode = ModeBudget + normalized.Budget = budget + normalized.Level = "" + } + return &normalized, nil + default: + return &normalized, nil + } +} diff --git a/internal/thinking/convert_test.go b/internal/thinking/convert_test.go new file mode 100644 index 00000000..eacc2532 --- /dev/null +++ b/internal/thinking/convert_test.go @@ -0,0 +1,277 @@ +// Package thinking provides unified thinking configuration processing logic. +package thinking + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" +) + +// TestConvertLevelToBudget tests the ConvertLevelToBudget function. +// +// ConvertLevelToBudget converts a thinking level to a budget value. +// This is a semantic conversion - it does NOT apply clamping. +// +// Level → Budget mapping: +// - none → 0 +// - auto → -1 +// - minimal → 512 +// - low → 1024 +// - medium → 8192 +// - high → 24576 +// - xhigh → 32768 +func TestConvertLevelToBudget(t *testing.T) { + tests := []struct { + name string + level string + want int + wantOK bool + }{ + // Standard levels + {"none", "none", 0, true}, + {"auto", "auto", -1, true}, + {"minimal", "minimal", 512, true}, + {"low", "low", 1024, true}, + {"medium", "medium", 8192, true}, + {"high", "high", 24576, true}, + {"xhigh", "xhigh", 32768, true}, + + // Case insensitive + {"case insensitive HIGH", "HIGH", 24576, true}, + {"case insensitive High", "High", 24576, true}, + {"case insensitive NONE", "NONE", 0, true}, + {"case insensitive Auto", "Auto", -1, true}, + + // Invalid levels + {"invalid ultra", "ultra", 0, false}, + {"invalid maximum", "maximum", 0, false}, + {"empty string", "", 0, false}, + {"whitespace", " ", 0, false}, + {"numeric string", "1000", 0, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + budget, ok := ConvertLevelToBudget(tt.level) + if ok != tt.wantOK { + t.Errorf("ConvertLevelToBudget(%q) ok = %v, want %v", tt.level, ok, tt.wantOK) + } + if budget != tt.want { + t.Errorf("ConvertLevelToBudget(%q) = %d, want %d", tt.level, budget, tt.want) + } + }) + } +} + +// TestConvertBudgetToLevel tests the ConvertBudgetToLevel function. +// +// ConvertBudgetToLevel converts a budget value to the nearest level. +// Uses threshold-based mapping for range conversion. +// +// Budget → Level thresholds: +// - -1 → auto +// - 0 → none +// - 1-512 → minimal +// - 513-1024 → low +// - 1025-8192 → medium +// - 8193-24576 → high +// - 24577+ → xhigh +// +// Depends on: Epic 4 Story 4-2 (budget to level conversion) +func TestConvertBudgetToLevel(t *testing.T) { + tests := []struct { + name string + budget int + want string + wantOK bool + }{ + // Special values + {"auto", -1, "auto", true}, + {"none", 0, "none", true}, + + // Invalid negative values + {"invalid negative -2", -2, "", false}, + {"invalid negative -100", -100, "", false}, + {"invalid negative extreme", -999999, "", false}, + + // Minimal range (1-512) + {"minimal min", 1, "minimal", true}, + {"minimal mid", 256, "minimal", true}, + {"minimal max", 512, "minimal", true}, + + // Low range (513-1024) + {"low start", 513, "low", true}, + {"low boundary", 1024, "low", true}, + + // Medium range (1025-8192) + {"medium start", 1025, "medium", true}, + {"medium mid", 4096, "medium", true}, + {"medium boundary", 8192, "medium", true}, + + // High range (8193-24576) + {"high start", 8193, "high", true}, + {"high mid", 16384, "high", true}, + {"high boundary", 24576, "high", true}, + + // XHigh range (24577+) + {"xhigh start", 24577, "xhigh", true}, + {"xhigh mid", 32768, "xhigh", true}, + {"xhigh large", 100000, "xhigh", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + level, ok := ConvertBudgetToLevel(tt.budget) + if ok != tt.wantOK { + t.Errorf("ConvertBudgetToLevel(%d) ok = %v, want %v", tt.budget, ok, tt.wantOK) + } + if level != tt.want { + t.Errorf("ConvertBudgetToLevel(%d) = %q, want %q", tt.budget, level, tt.want) + } + }) + } +} + +// TestConvertMixedFormat tests mixed format handling. +// +// Tests scenarios where both level and budget might be present, +// or where format conversion requires special handling. +// +// Depends on: Epic 4 Story 4-3 (mixed format handling) +func TestConvertMixedFormat(t *testing.T) { + tests := []struct { + name string + inputBudget int + inputLevel string + wantMode ThinkingMode + wantBudget int + wantLevel ThinkingLevel + }{ + // Level takes precedence when both present + {"level and budget - level wins", 8192, "high", ModeLevel, 0, LevelHigh}, + {"level and zero budget", 0, "high", ModeLevel, 0, LevelHigh}, + + // Budget only + {"budget only", 16384, "", ModeBudget, 16384, ""}, + + // Level only + {"level only", 0, "medium", ModeLevel, 0, LevelMedium}, + + // Neither (default) + {"neither", 0, "", ModeNone, 0, ""}, + + // Special values + {"auto level", 0, "auto", ModeAuto, -1, LevelAuto}, + {"none level", 0, "none", ModeNone, 0, LevelNone}, + {"auto budget", -1, "", ModeAuto, -1, ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := normalizeMixedConfig(tt.inputBudget, tt.inputLevel) + if got.Mode != tt.wantMode { + t.Errorf("normalizeMixedConfig(%d, %q) Mode = %v, want %v", tt.inputBudget, tt.inputLevel, got.Mode, tt.wantMode) + } + if got.Budget != tt.wantBudget { + t.Errorf("normalizeMixedConfig(%d, %q) Budget = %d, want %d", tt.inputBudget, tt.inputLevel, got.Budget, tt.wantBudget) + } + if got.Level != tt.wantLevel { + t.Errorf("normalizeMixedConfig(%d, %q) Level = %q, want %q", tt.inputBudget, tt.inputLevel, got.Level, tt.wantLevel) + } + }) + } +} + +// TestNormalizeForModel tests model-aware format normalization. +func TestNormalizeForModel(t *testing.T) { + budgetOnlyModel := ®istry.ModelInfo{ + Thinking: ®istry.ThinkingSupport{ + Min: 1024, + Max: 128000, + }, + } + levelOnlyModel := ®istry.ModelInfo{ + Thinking: ®istry.ThinkingSupport{ + Levels: []string{"low", "medium", "high"}, + }, + } + hybridModel := ®istry.ModelInfo{ + Thinking: ®istry.ThinkingSupport{ + Min: 128, + Max: 32768, + Levels: []string{"minimal", "low", "medium", "high"}, + }, + } + + tests := []struct { + name string + config ThinkingConfig + model *registry.ModelInfo + want ThinkingConfig + wantErr bool + }{ + {"budget-only keeps budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, budgetOnlyModel, ThinkingConfig{Mode: ModeBudget, Budget: 8192}, false}, + {"budget-only converts level", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, budgetOnlyModel, ThinkingConfig{Mode: ModeBudget, Budget: 24576}, false}, + {"level-only converts budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, levelOnlyModel, ThinkingConfig{Mode: ModeLevel, Level: LevelMedium}, false}, + {"level-only keeps level", ThinkingConfig{Mode: ModeLevel, Level: LevelLow}, levelOnlyModel, ThinkingConfig{Mode: ModeLevel, Level: LevelLow}, false}, + {"hybrid keeps budget", ThinkingConfig{Mode: ModeBudget, Budget: 16384}, hybridModel, ThinkingConfig{Mode: ModeBudget, Budget: 16384}, false}, + {"hybrid keeps level", ThinkingConfig{Mode: ModeLevel, Level: LevelMinimal}, hybridModel, ThinkingConfig{Mode: ModeLevel, Level: LevelMinimal}, false}, + {"auto passthrough", ThinkingConfig{Mode: ModeAuto, Budget: -1}, levelOnlyModel, ThinkingConfig{Mode: ModeAuto, Budget: -1}, false}, + {"none passthrough", ThinkingConfig{Mode: ModeNone, Budget: 0}, budgetOnlyModel, ThinkingConfig{Mode: ModeNone, Budget: 0}, false}, + {"invalid level", ThinkingConfig{Mode: ModeLevel, Level: "ultra"}, budgetOnlyModel, ThinkingConfig{}, true}, + {"invalid budget", ThinkingConfig{Mode: ModeBudget, Budget: -2}, levelOnlyModel, ThinkingConfig{}, true}, + {"nil modelInfo passthrough budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, nil, ThinkingConfig{Mode: ModeBudget, Budget: 8192}, false}, + {"nil modelInfo passthrough level", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, nil, ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, false}, + {"nil thinking degrades to none", ThinkingConfig{Mode: ModeBudget, Budget: 4096}, ®istry.ModelInfo{}, ThinkingConfig{Mode: ModeNone, Budget: 0}, false}, + {"nil thinking level degrades to none", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, ®istry.ModelInfo{}, ThinkingConfig{Mode: ModeNone, Budget: 0}, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := NormalizeForModel(&tt.config, tt.model) + if (err != nil) != tt.wantErr { + t.Fatalf("NormalizeForModel(%+v) error = %v, wantErr %v", tt.config, err, tt.wantErr) + } + if tt.wantErr { + return + } + if got == nil { + t.Fatalf("NormalizeForModel(%+v) returned nil config", tt.config) + } + if got.Mode != tt.want.Mode { + t.Errorf("NormalizeForModel(%+v) Mode = %v, want %v", tt.config, got.Mode, tt.want.Mode) + } + if got.Budget != tt.want.Budget { + t.Errorf("NormalizeForModel(%+v) Budget = %d, want %d", tt.config, got.Budget, tt.want.Budget) + } + if got.Level != tt.want.Level { + t.Errorf("NormalizeForModel(%+v) Level = %q, want %q", tt.config, got.Level, tt.want.Level) + } + }) + } +} + +// TestLevelToBudgetRoundTrip tests level → budget → level round trip. +// +// Verifies that converting level to budget and back produces consistent results. +// +// Depends on: Epic 4 Story 4-1, 4-2 +func TestLevelToBudgetRoundTrip(t *testing.T) { + levels := []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"} + + for _, level := range levels { + t.Run(level, func(t *testing.T) { + budget, ok := ConvertLevelToBudget(level) + if !ok { + t.Fatalf("ConvertLevelToBudget(%q) returned ok=false", level) + } + resultLevel, ok := ConvertBudgetToLevel(budget) + if !ok { + t.Fatalf("ConvertBudgetToLevel(%d) returned ok=false", budget) + } + if resultLevel != level { + t.Errorf("round trip: %q → %d → %q, want %q", level, budget, resultLevel, level) + } + }) + } +} diff --git a/internal/thinking/errors.go b/internal/thinking/errors.go new file mode 100644 index 00000000..56f82c68 --- /dev/null +++ b/internal/thinking/errors.go @@ -0,0 +1,71 @@ +// Package thinking provides unified thinking configuration processing logic. +package thinking + +// ErrorCode represents the type of thinking configuration error. +type ErrorCode string + +// Error codes for thinking configuration processing. +const ( + // ErrInvalidSuffix indicates the suffix format cannot be parsed. + // Example: "model(abc" (missing closing parenthesis) + ErrInvalidSuffix ErrorCode = "INVALID_SUFFIX" + + // ErrUnknownLevel indicates the level value is not in the valid list. + // Example: "model(ultra)" where "ultra" is not a valid level + ErrUnknownLevel ErrorCode = "UNKNOWN_LEVEL" + + // ErrThinkingNotSupported indicates the model does not support thinking. + // Example: claude-haiku-4-5 does not have thinking capability + ErrThinkingNotSupported ErrorCode = "THINKING_NOT_SUPPORTED" + + // ErrLevelNotSupported indicates the model does not support level mode. + // Example: using level with a budget-only model + ErrLevelNotSupported ErrorCode = "LEVEL_NOT_SUPPORTED" + + // ErrProviderMismatch indicates the provider does not match the model. + // Example: applying Claude format to a Gemini model + ErrProviderMismatch ErrorCode = "PROVIDER_MISMATCH" +) + +// ThinkingError represents an error that occurred during thinking configuration processing. +// +// This error type provides structured information about the error, including: +// - Code: A machine-readable error code for programmatic handling +// - Message: A human-readable description of the error +// - Model: The model name related to the error (optional) +// - Details: Additional context information (optional) +type ThinkingError struct { + // Code is the machine-readable error code + Code ErrorCode + // Message is the human-readable error description. + // Should be lowercase, no trailing period, with context if applicable. + Message string + // Model is the model name related to this error (optional) + Model string + // Details contains additional context information (optional) + Details map[string]interface{} +} + +// Error implements the error interface. +// Returns the message directly without code prefix. +// Use Code field for programmatic error handling. +func (e *ThinkingError) Error() string { + return e.Message +} + +// NewThinkingError creates a new ThinkingError with the given code and message. +func NewThinkingError(code ErrorCode, message string) *ThinkingError { + return &ThinkingError{ + Code: code, + Message: message, + } +} + +// NewThinkingErrorWithModel creates a new ThinkingError with model context. +func NewThinkingErrorWithModel(code ErrorCode, message, model string) *ThinkingError { + return &ThinkingError{ + Code: code, + Message: message, + Model: model, + } +} diff --git a/internal/thinking/errors_test.go b/internal/thinking/errors_test.go new file mode 100644 index 00000000..5ed2d0d0 --- /dev/null +++ b/internal/thinking/errors_test.go @@ -0,0 +1,34 @@ +// Package thinking provides unified thinking configuration processing logic. +package thinking + +import "testing" + +// TestThinkingErrorError tests the Error() method of ThinkingError. +// +// Error() returns the message directly without code prefix. +// Use Code field for programmatic error handling. +func TestThinkingErrorError(t *testing.T) { + tests := []struct { + name string + err *ThinkingError + wantMsg string + wantCode ErrorCode + }{ + {"invalid suffix format", NewThinkingError(ErrInvalidSuffix, "invalid suffix format: model(abc"), "invalid suffix format: model(abc", ErrInvalidSuffix}, + {"unknown level", NewThinkingError(ErrUnknownLevel, "unknown level: ultra"), "unknown level: ultra", ErrUnknownLevel}, + {"level not supported", NewThinkingError(ErrLevelNotSupported, "level \"xhigh\" not supported, valid levels: low, medium, high"), "level \"xhigh\" not supported, valid levels: low, medium, high", ErrLevelNotSupported}, + {"thinking not supported", NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", "claude-haiku"), "thinking not supported for this model", ErrThinkingNotSupported}, + {"provider mismatch", NewThinkingError(ErrProviderMismatch, "provider mismatch: expected claude, got gemini"), "provider mismatch: expected claude, got gemini", ErrProviderMismatch}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := tt.err.Error(); got != tt.wantMsg { + t.Errorf("Error() = %q, want %q", got, tt.wantMsg) + } + if tt.err.Code != tt.wantCode { + t.Errorf("Code = %q, want %q", tt.err.Code, tt.wantCode) + } + }) + } +} diff --git a/internal/thinking/extract_test.go b/internal/thinking/extract_test.go new file mode 100644 index 00000000..c697e130 --- /dev/null +++ b/internal/thinking/extract_test.go @@ -0,0 +1,42 @@ +// Package thinking provides unified thinking configuration processing logic. +package thinking + +import "testing" + +func TestExtractThinkingConfig(t *testing.T) { + tests := []struct { + name string + body string + provider string + want ThinkingConfig + }{ + {"claude budget", `{"thinking":{"budget_tokens":16384}}`, "claude", ThinkingConfig{Mode: ModeBudget, Budget: 16384}}, + {"claude disabled type", `{"thinking":{"type":"disabled"}}`, "claude", ThinkingConfig{Mode: ModeNone, Budget: 0}}, + {"claude auto budget", `{"thinking":{"budget_tokens":-1}}`, "claude", ThinkingConfig{Mode: ModeAuto, Budget: -1}}, + {"claude enabled type without budget", `{"thinking":{"type":"enabled"}}`, "claude", ThinkingConfig{Mode: ModeAuto, Budget: -1}}, + {"claude enabled type with budget", `{"thinking":{"type":"enabled","budget_tokens":8192}}`, "claude", ThinkingConfig{Mode: ModeBudget, Budget: 8192}}, + {"claude disabled type overrides budget", `{"thinking":{"type":"disabled","budget_tokens":8192}}`, "claude", ThinkingConfig{Mode: ModeNone, Budget: 0}}, + {"gemini budget", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, "gemini", ThinkingConfig{Mode: ModeBudget, Budget: 8192}}, + {"gemini level", `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`, "gemini", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}}, + {"gemini cli auto", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"auto"}}}}`, "gemini-cli", ThinkingConfig{Mode: ModeAuto, Budget: -1}}, + {"openai level", `{"reasoning_effort":"medium"}`, "openai", ThinkingConfig{Mode: ModeLevel, Level: LevelMedium}}, + {"openai none", `{"reasoning_effort":"none"}`, "openai", ThinkingConfig{Mode: ModeNone, Budget: 0}}, + {"codex effort high", `{"reasoning":{"effort":"high"}}`, "codex", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}}, + {"codex effort none", `{"reasoning":{"effort":"none"}}`, "codex", ThinkingConfig{Mode: ModeNone, Budget: 0}}, + {"iflow enable", `{"chat_template_kwargs":{"enable_thinking":true}}`, "iflow", ThinkingConfig{Mode: ModeBudget, Budget: 1}}, + {"iflow disable", `{"reasoning_split":false}`, "iflow", ThinkingConfig{Mode: ModeNone, Budget: 0}}, + {"unknown provider", `{"thinking":{"budget_tokens":123}}`, "unknown", ThinkingConfig{}}, + {"invalid json", `{"thinking":`, "claude", ThinkingConfig{}}, + {"empty body", "", "claude", ThinkingConfig{}}, + {"no config", `{}`, "gemini", ThinkingConfig{}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := extractThinkingConfig([]byte(tt.body), tt.provider) + if got != tt.want { + t.Fatalf("extractThinkingConfig() = %+v, want %+v", got, tt.want) + } + }) + } +} diff --git a/internal/thinking/provider/claude/apply.go b/internal/thinking/provider/claude/apply.go new file mode 100644 index 00000000..e1409389 --- /dev/null +++ b/internal/thinking/provider/claude/apply.go @@ -0,0 +1,116 @@ +// Package claude implements thinking configuration scaffolding for Claude models. +// +// Claude models use the thinking.budget_tokens format with values in the range +// 1024-128000. Some Claude models support ZeroAllowed (sonnet-4-5, opus-4-5), +// while older models do not. +// See: _bmad-output/planning-artifacts/architecture.md#Epic-6 +package claude + +import ( + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier implements thinking.ProviderApplier for Claude models. +// This applier is stateless and holds no configuration. +type Applier struct{} + +// NewApplier creates a new Claude thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + thinking.RegisterProvider("claude", NewApplier()) +} + +// Apply applies thinking configuration to Claude request body. +// +// IMPORTANT: This method expects config to be pre-validated by thinking.ValidateConfig. +// ValidateConfig handles: +// - Mode conversion (Level→Budget, Auto→Budget) +// - Budget clamping to model range +// - ZeroAllowed constraint enforcement +// +// Apply only processes ModeBudget and ModeNone; other modes are passed through unchanged. +// +// Expected output format when enabled: +// +// { +// "thinking": { +// "type": "enabled", +// "budget_tokens": 16384 +// } +// } +// +// Expected output format when disabled: +// +// { +// "thinking": { +// "type": "disabled" +// } +// } +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if modelInfo == nil { + return body, nil + } + if modelInfo.Thinking == nil { + if modelInfo.Type == "" { + modelID := modelInfo.ID + if modelID == "" { + modelID = "unknown" + } + return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID) + } + return applyCompatibleClaude(body, config) + } + + // Only process ModeBudget and ModeNone; other modes pass through + // (caller should use ValidateConfig first to normalize modes) + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + // Budget is expected to be pre-validated by ValidateConfig (clamped, ZeroAllowed enforced) + // Decide enabled/disabled based on budget value + if config.Budget == 0 { + result, _ := sjson.SetBytes(body, "thinking.type", "disabled") + result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + return result, nil + } + + result, _ := sjson.SetBytes(body, "thinking.type", "enabled") + result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget) + return result, nil +} + +func applyCompatibleClaude(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + switch config.Mode { + case thinking.ModeNone: + result, _ := sjson.SetBytes(body, "thinking.type", "disabled") + result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + return result, nil + case thinking.ModeAuto: + result, _ := sjson.SetBytes(body, "thinking.type", "enabled") + result, _ = sjson.DeleteBytes(result, "thinking.budget_tokens") + return result, nil + default: + result, _ := sjson.SetBytes(body, "thinking.type", "enabled") + result, _ = sjson.SetBytes(result, "thinking.budget_tokens", config.Budget) + return result, nil + } +} diff --git a/internal/thinking/provider/claude/apply_test.go b/internal/thinking/provider/claude/apply_test.go new file mode 100644 index 00000000..769a17c4 --- /dev/null +++ b/internal/thinking/provider/claude/apply_test.go @@ -0,0 +1,288 @@ +// Package claude implements thinking configuration for Claude models. +package claude + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" +) + +// ============================================================================= +// Unit Tests: Applier Creation and Interface +// ============================================================================= + +func TestNewApplier(t *testing.T) { + applier := NewApplier() + if applier == nil { + t.Fatal("NewApplier() returned nil") + } +} + +func TestApplierImplementsInterface(t *testing.T) { + var _ thinking.ProviderApplier = (*Applier)(nil) +} + +// ============================================================================= +// Unit Tests: Budget and Disable Logic (Pre-validated Config) +// ============================================================================= + +// TestClaudeApplyBudgetAndNone tests budget values and disable modes. +// NOTE: These tests assume config has been pre-validated by ValidateConfig. +// Apply trusts the input and does not perform clamping. +func TestClaudeApplyBudgetAndNone(t *testing.T) { + applier := NewApplier() + modelInfo := buildClaudeModelInfo() + + tests := []struct { + name string + config thinking.ThinkingConfig + wantType string + wantBudget int + wantBudgetOK bool + }{ + // Valid pre-validated budget values + {"budget 16k", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}, "enabled", 16384, true}, + {"budget min", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1024}, "enabled", 1024, true}, + {"budget max", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 128000}, "enabled", 128000, true}, + {"budget mid", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50000}, "enabled", 50000, true}, + // Disable cases + {"budget zero disables", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "disabled", 0, false}, + {"mode none disables", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "disabled", 0, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + thinkingType := gjson.GetBytes(result, "thinking.type").String() + if thinkingType != tt.wantType { + t.Fatalf("thinking.type = %q, want %q", thinkingType, tt.wantType) + } + + budgetValue := gjson.GetBytes(result, "thinking.budget_tokens") + if budgetValue.Exists() != tt.wantBudgetOK { + t.Fatalf("thinking.budget_tokens exists = %v, want %v", budgetValue.Exists(), tt.wantBudgetOK) + } + if tt.wantBudgetOK { + if got := int(budgetValue.Int()); got != tt.wantBudget { + t.Fatalf("thinking.budget_tokens = %d, want %d", got, tt.wantBudget) + } + } + }) + } +} + +// TestClaudeApplyPassthroughBudget tests that Apply trusts pre-validated budget values. +// It does NOT perform clamping - that's ValidateConfig's responsibility. +func TestClaudeApplyPassthroughBudget(t *testing.T) { + applier := NewApplier() + modelInfo := buildClaudeModelInfo() + + tests := []struct { + name string + config thinking.ThinkingConfig + wantBudget int + }{ + // Apply should pass through the budget value as-is + // (ValidateConfig would have clamped these, but Apply trusts the input) + {"passes through any budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 500}, 500}, + {"passes through large budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 200000}, 200000}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + if got := int(gjson.GetBytes(result, "thinking.budget_tokens").Int()); got != tt.wantBudget { + t.Fatalf("thinking.budget_tokens = %d, want %d (passthrough)", got, tt.wantBudget) + } + }) + } +} + +// ============================================================================= +// Unit Tests: Mode Passthrough (Strict Layering) +// ============================================================================= + +// TestClaudeApplyModePassthrough tests that non-Budget/None modes pass through unchanged. +// Apply expects ValidateConfig to have already converted Level/Auto to Budget. +func TestClaudeApplyModePassthrough(t *testing.T) { + applier := NewApplier() + modelInfo := buildClaudeModelInfo() + + tests := []struct { + name string + config thinking.ThinkingConfig + body string + }{ + {"ModeLevel passes through", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: "high"}, `{"model":"test"}`}, + {"ModeAuto passes through", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, `{"model":"test"}`}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := applier.Apply([]byte(tt.body), tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + // Should return body unchanged + if string(result) != tt.body { + t.Fatalf("Apply() = %s, want %s (passthrough)", string(result), tt.body) + } + }) + } +} + +// ============================================================================= +// Unit Tests: Output Format +// ============================================================================= + +// TestClaudeApplyOutputFormat tests the exact JSON output format. +// +// Claude expects: +// +// { +// "thinking": { +// "type": "enabled", +// "budget_tokens": 16384 +// } +// } +func TestClaudeApplyOutputFormat(t *testing.T) { + tests := []struct { + name string + config thinking.ThinkingConfig + wantJSON string + }{ + { + "enabled with budget", + thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}, + `{"thinking":{"type":"enabled","budget_tokens":16384}}`, + }, + { + "disabled", + thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, + `{"thinking":{"type":"disabled"}}`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + applier := NewApplier() + modelInfo := buildClaudeModelInfo() + + result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + if string(result) != tt.wantJSON { + t.Fatalf("Apply() = %s, want %s", result, tt.wantJSON) + } + }) + } +} + +// ============================================================================= +// Unit Tests: Body Merging +// ============================================================================= + +// TestClaudeApplyWithExistingBody tests applying config to existing request body. +func TestClaudeApplyWithExistingBody(t *testing.T) { + tests := []struct { + name string + body string + config thinking.ThinkingConfig + wantBody string + }{ + { + "add to empty body", + `{}`, + thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}, + `{"thinking":{"type":"enabled","budget_tokens":16384}}`, + }, + { + "preserve existing fields", + `{"model":"claude-sonnet-4-5","messages":[]}`, + thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, + `{"model":"claude-sonnet-4-5","messages":[],"thinking":{"type":"enabled","budget_tokens":8192}}`, + }, + { + "override existing thinking", + `{"thinking":{"type":"enabled","budget_tokens":1000}}`, + thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}, + `{"thinking":{"type":"enabled","budget_tokens":16384}}`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + applier := NewApplier() + modelInfo := buildClaudeModelInfo() + + result, err := applier.Apply([]byte(tt.body), tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + if string(result) != tt.wantBody { + t.Fatalf("Apply() = %s, want %s", result, tt.wantBody) + } + }) + } +} + +// TestClaudeApplyWithNilBody tests handling of nil/empty body. +func TestClaudeApplyWithNilBody(t *testing.T) { + applier := NewApplier() + modelInfo := buildClaudeModelInfo() + + tests := []struct { + name string + body []byte + wantBudget int + }{ + {"nil body", nil, 16384}, + {"empty body", []byte{}, 16384}, + {"empty object", []byte(`{}`), 16384}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384} + result, err := applier.Apply(tt.body, config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + if got := gjson.GetBytes(result, "thinking.type").String(); got != "enabled" { + t.Fatalf("thinking.type = %q, want %q", got, "enabled") + } + if got := int(gjson.GetBytes(result, "thinking.budget_tokens").Int()); got != tt.wantBudget { + t.Fatalf("thinking.budget_tokens = %d, want %d", got, tt.wantBudget) + } + }) + } +} + +// ============================================================================= +// Helper Functions +// ============================================================================= + +func buildClaudeModelInfo() *registry.ModelInfo { + return ®istry.ModelInfo{ + ID: "claude-sonnet-4-5", + Thinking: ®istry.ThinkingSupport{ + Min: 1024, + Max: 128000, + ZeroAllowed: true, + DynamicAllowed: false, + }, + } +} diff --git a/internal/thinking/provider/codex/apply.go b/internal/thinking/provider/codex/apply.go new file mode 100644 index 00000000..386185a6 --- /dev/null +++ b/internal/thinking/provider/codex/apply.go @@ -0,0 +1,138 @@ +// Package codex implements thinking configuration for Codex (OpenAI Responses API) models. +// +// Codex models use the reasoning.effort format with discrete levels +// (low/medium/high). This is similar to OpenAI but uses nested field +// "reasoning.effort" instead of "reasoning_effort". +// See: _bmad-output/planning-artifacts/architecture.md#Epic-8 +package codex + +import ( + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier implements thinking.ProviderApplier for Codex models. +// +// Codex-specific behavior: +// - Output format: reasoning.effort (string: low/medium/high/xhigh) +// - Level-only mode: no numeric budget support +// - Some models support ZeroAllowed (gpt-5.1, gpt-5.2) +type Applier struct{} + +var _ thinking.ProviderApplier = (*Applier)(nil) + +// NewApplier creates a new Codex thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + thinking.RegisterProvider("codex", NewApplier()) +} + +// Apply applies thinking configuration to Codex request body. +// +// Expected output format: +// +// { +// "reasoning": { +// "effort": "high" +// } +// } +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if modelInfo == nil { + return body, nil + } + if modelInfo.Thinking == nil { + if modelInfo.Type == "" { + modelID := modelInfo.ID + if modelID == "" { + modelID = "unknown" + } + return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID) + } + return applyCompatibleCodex(body, config) + } + + // Only handle ModeLevel and ModeNone; other modes pass through unchanged. + if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + if config.Mode == thinking.ModeLevel { + result, _ := sjson.SetBytes(body, "reasoning.effort", string(config.Level)) + return result, nil + } + + effort := "" + support := modelInfo.Thinking + if config.Budget == 0 { + if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) { + effort = string(thinking.LevelNone) + } + } + if effort == "" && config.Level != "" { + effort = string(config.Level) + } + if effort == "" && len(support.Levels) > 0 { + effort = support.Levels[0] + } + if effort == "" { + return body, nil + } + + result, _ := sjson.SetBytes(body, "reasoning.effort", effort) + return result, nil +} + +func applyCompatibleCodex(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + var effort string + switch config.Mode { + case thinking.ModeLevel: + if config.Level == "" { + return body, nil + } + effort = string(config.Level) + case thinking.ModeNone: + effort = string(thinking.LevelNone) + if config.Level != "" { + effort = string(config.Level) + } + case thinking.ModeAuto: + // Auto mode for user-defined models: pass through as "auto" + effort = string(thinking.LevelAuto) + case thinking.ModeBudget: + // Budget mode: convert budget to level using threshold mapping + level, ok := thinking.ConvertBudgetToLevel(config.Budget) + if !ok { + return body, nil + } + effort = level + default: + return body, nil + } + + result, _ := sjson.SetBytes(body, "reasoning.effort", effort) + return result, nil +} + +func hasLevel(levels []string, target string) bool { + for _, level := range levels { + if strings.EqualFold(strings.TrimSpace(level), target) { + return true + } + } + return false +} diff --git a/internal/thinking/provider/gemini/apply.go b/internal/thinking/provider/gemini/apply.go new file mode 100644 index 00000000..7de48919 --- /dev/null +++ b/internal/thinking/provider/gemini/apply.go @@ -0,0 +1,172 @@ +// Package gemini implements thinking configuration for Gemini models. +// +// Gemini models have two formats: +// - Gemini 2.5: Uses thinkingBudget (numeric) +// - Gemini 3.x: Uses thinkingLevel (string: minimal/low/medium/high) +// or thinkingBudget=-1 for auto/dynamic mode +// +// Output format is determined by ThinkingConfig.Mode and ThinkingSupport.Levels: +// - ModeAuto: Always uses thinkingBudget=-1 (both Gemini 2.5 and 3.x) +// - len(Levels) > 0: Uses thinkingLevel (Gemini 3.x discrete levels) +// - len(Levels) == 0: Uses thinkingBudget (Gemini 2.5) +package gemini + +import ( + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier applies thinking configuration for Gemini models. +// +// Gemini-specific behavior: +// - Gemini 2.5: thinkingBudget format, flash series supports ZeroAllowed +// - Gemini 3.x: thinkingLevel format, cannot be disabled +// - Use ThinkingSupport.Levels to decide output format +type Applier struct{} + +// NewApplier creates a new Gemini thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + thinking.RegisterProvider("gemini", NewApplier()) +} + +// Apply applies thinking configuration to Gemini request body. +// +// Expected output format (Gemini 2.5): +// +// { +// "generationConfig": { +// "thinkingConfig": { +// "thinkingBudget": 8192, +// "includeThoughts": true +// } +// } +// } +// +// Expected output format (Gemini 3.x): +// +// { +// "generationConfig": { +// "thinkingConfig": { +// "thinkingLevel": "high", +// "includeThoughts": true +// } +// } +// } +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if modelInfo == nil { + return body, nil + } + if modelInfo.Thinking == nil { + if modelInfo.Type == "" { + modelID := modelInfo.ID + if modelID == "" { + modelID = "unknown" + } + return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID) + } + return a.applyCompatible(body, config) + } + + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + // Choose format based on config.Mode and model capabilities: + // - ModeLevel: use Level format (validation will reject unsupported levels) + // - ModeNone: use Level format if model has Levels, else Budget format + // - ModeBudget/ModeAuto: use Budget format + switch config.Mode { + case thinking.ModeLevel: + return a.applyLevelFormat(body, config) + case thinking.ModeNone: + // ModeNone: route based on model capability (has Levels or not) + if len(modelInfo.Thinking.Levels) > 0 { + return a.applyLevelFormat(body, config) + } + return a.applyBudgetFormat(body, config) + default: + return a.applyBudgetFormat(body, config) + } +} + +func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + if config.Mode == thinking.ModeAuto { + return a.applyBudgetFormat(body, config) + } + + if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") { + return a.applyLevelFormat(body, config) + } + + return a.applyBudgetFormat(body, config) +} + +func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + // ModeNone semantics: + // - ModeNone + Budget=0: completely disable thinking (not possible for Level-only models) + // - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false) + // ValidateConfig sets config.Level to the lowest level when ModeNone + Budget > 0. + + // Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output + result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingBudget") + + if config.Mode == thinking.ModeNone { + result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", false) + if config.Level != "" { + result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", string(config.Level)) + } + return result, nil + } + + // Only handle ModeLevel - budget conversion should be done by upper layer + if config.Mode != thinking.ModeLevel { + return body, nil + } + + level := string(config.Level) + result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingLevel", level) + result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", true) + return result, nil +} + +func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + // Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output + result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig.thinkingLevel") + + budget := config.Budget + // ModeNone semantics: + // - ModeNone + Budget=0: completely disable thinking + // - ModeNone + Budget>0: forced to think but hide output (includeThoughts=false) + // When ZeroAllowed=false, ValidateConfig clamps Budget to Min while preserving ModeNone. + includeThoughts := false + switch config.Mode { + case thinking.ModeNone: + includeThoughts = false + case thinking.ModeAuto: + includeThoughts = true + default: + includeThoughts = budget > 0 + } + + result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.thinkingBudget", budget) + result, _ = sjson.SetBytes(result, "generationConfig.thinkingConfig.includeThoughts", includeThoughts) + return result, nil +} diff --git a/internal/thinking/provider/gemini/apply_test.go b/internal/thinking/provider/gemini/apply_test.go new file mode 100644 index 00000000..5f762a2f --- /dev/null +++ b/internal/thinking/provider/gemini/apply_test.go @@ -0,0 +1,526 @@ +// Package gemini implements thinking configuration for Gemini models. +package gemini + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" +) + +func TestNewApplier(t *testing.T) { + applier := NewApplier() + if applier == nil { + t.Fatal("NewApplier() returned nil") + } +} + +// parseConfigFromSuffix parses a raw suffix into a ThinkingConfig. +// This helper reduces code duplication in end-to-end tests (L1 fix). +func parseConfigFromSuffix(rawSuffix string) (thinking.ThinkingConfig, bool) { + if budget, ok := thinking.ParseNumericSuffix(rawSuffix); ok { + return thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: budget}, true + } + if level, ok := thinking.ParseLevelSuffix(rawSuffix); ok { + return thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: level}, true + } + if mode, ok := thinking.ParseSpecialSuffix(rawSuffix); ok { + config := thinking.ThinkingConfig{Mode: mode} + if mode == thinking.ModeAuto { + config.Budget = -1 + } + return config, true + } + return thinking.ThinkingConfig{}, false +} + +func TestApplierImplementsInterface(t *testing.T) { + // Compile-time check: if Applier doesn't implement the interface, this won't compile + var _ thinking.ProviderApplier = (*Applier)(nil) +} + +// TestGeminiApply tests the Gemini thinking applier. +// +// Gemini-specific behavior: +// - Gemini 2.5: thinkingBudget format (numeric) +// - Gemini 3.x: thinkingLevel format (string) +// - Flash series: ZeroAllowed=true +// - Pro series: ZeroAllowed=false, Min=128 +// - CRITICAL: When budget=0/none, set includeThoughts=false +// +// Depends on: Epic 7 Story 7-2, 7-3 +func TestGeminiApply(t *testing.T) { + applier := NewApplier() + tests := []struct { + name string + model string + config thinking.ThinkingConfig + wantField string + wantValue interface{} + wantIncludeThoughts bool // CRITICAL: includeThoughts field + }{ + // Gemini 2.5 Flash (ZeroAllowed=true) + {"flash budget 8k", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true}, + {"flash zero", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 0, false}, + {"flash none", "gemini-2.5-flash", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 0, false}, + + // Gemini 2.5 Pro (ZeroAllowed=false, Min=128) + {"pro budget 8k", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true}, + {"pro zero - clamp", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 128, false}, + {"pro none - clamp", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 128, false}, + {"pro below min", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50}, "thinkingBudget", 128, true}, + {"pro above max", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 50000}, "thinkingBudget", 32768, true}, + {"pro auto", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true}, + + // Gemini 3 Pro (Level mode, ZeroAllowed=false) + {"g3-pro high", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true}, + {"g3-pro low", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "thinkingLevel", "low", true}, + {"g3-pro auto", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true}, + + // Gemini 3 Flash (Level mode, minimal is lowest) + {"g3-flash high", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true}, + {"g3-flash medium", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "thinkingLevel", "medium", true}, + {"g3-flash minimal", "gemini-3-flash-preview", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, "thinkingLevel", "minimal", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + modelInfo := buildGeminiModelInfo(tt.model) + normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking) + if err != nil { + t.Fatalf("ValidateConfig() error = %v", err) + } + + result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField) + switch want := tt.wantValue.(type) { + case int: + if int(gotField.Int()) != want { + t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want) + } + case string: + if gotField.String() != want { + t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want) + } + case bool: + if gotField.Bool() != want { + t.Fatalf("%s = %v, want %v", tt.wantField, gotField.Bool(), want) + } + default: + t.Fatalf("unsupported wantValue type %T", tt.wantValue) + } + + gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool() + if gotIncludeThoughts != tt.wantIncludeThoughts { + t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts) + } + }) + } +} + +// TestGeminiApplyEndToEndBudgetZero tests suffix parsing + validation + apply for budget=0. +// +// This test covers the complete flow from suffix parsing to Apply output: +// - AC#1: ModeBudget+Budget=0 → ModeNone conversion +// - AC#3: Gemini 3 ModeNone+Budget>0 → includeThoughts=false + thinkingLevel=low +// - AC#4: Gemini 2.5 Pro (0) → clamped to 128 + includeThoughts=false +func TestGeminiApplyEndToEndBudgetZero(t *testing.T) { + tests := []struct { + name string + model string + wantModel string + wantField string // "thinkingBudget" or "thinkingLevel" + wantValue interface{} + wantIncludeThoughts bool + }{ + // AC#4: Gemini 2.5 Pro - Budget format + {"gemini-25-pro zero", "gemini-2.5-pro(0)", "gemini-2.5-pro", "thinkingBudget", 128, false}, + // AC#3: Gemini 3 Pro - Level format, ModeNone clamped to Budget=128, uses lowest level + {"gemini-3-pro zero", "gemini-3-pro-preview(0)", "gemini-3-pro-preview", "thinkingLevel", "low", false}, + {"gemini-3-pro none", "gemini-3-pro-preview(none)", "gemini-3-pro-preview", "thinkingLevel", "low", false}, + // Gemini 3 Flash - Level format, lowest level is "minimal" + {"gemini-3-flash zero", "gemini-3-flash-preview(0)", "gemini-3-flash-preview", "thinkingLevel", "minimal", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + suffix := thinking.ParseSuffix(tt.model) + if !suffix.HasSuffix { + t.Fatalf("ParseSuffix(%q) HasSuffix = false, want true", tt.model) + } + if suffix.ModelName != tt.wantModel { + t.Fatalf("ParseSuffix(%q) ModelName = %q, want %q", tt.model, suffix.ModelName, tt.wantModel) + } + + // Parse suffix value using helper function (L1 fix) + config, ok := parseConfigFromSuffix(suffix.RawSuffix) + if !ok { + t.Fatalf("ParseSuffix(%q) RawSuffix = %q is not a valid suffix", tt.model, suffix.RawSuffix) + } + + modelInfo := buildGeminiModelInfo(suffix.ModelName) + normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking) + if err != nil { + t.Fatalf("ValidateConfig() error = %v", err) + } + + applier := NewApplier() + result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + // Verify the output field value + gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField) + switch want := tt.wantValue.(type) { + case int: + if int(gotField.Int()) != want { + t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want) + } + case string: + if gotField.String() != want { + t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want) + } + } + + gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool() + if gotIncludeThoughts != tt.wantIncludeThoughts { + t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts) + } + }) + } +} + +// TestGeminiApplyEndToEndAuto tests auto mode through both suffix parsing and direct config. +// +// This test covers: +// - AC#2: Gemini 2.5 auto uses thinkingBudget=-1 +// - AC#3: Gemini 3 auto uses thinkingBudget=-1 (not thinkingLevel) +// - Suffix parsing path: (auto) and (-1) suffixes +// - Direct config path: ModeLevel + Level=auto → ModeAuto conversion +func TestGeminiApplyEndToEndAuto(t *testing.T) { + tests := []struct { + name string + model string // model name (with suffix for parsing, or plain for direct config) + directConfig *thinking.ThinkingConfig // if not nil, use direct config instead of suffix parsing + wantField string + wantValue int + wantIncludeThoughts bool + }{ + // Suffix parsing path - Budget-only model (Gemini 2.5) + {"suffix auto g25", "gemini-2.5-pro(auto)", nil, "thinkingBudget", -1, true}, + {"suffix -1 g25", "gemini-2.5-pro(-1)", nil, "thinkingBudget", -1, true}, + // Suffix parsing path - Hybrid model (Gemini 3) + {"suffix auto g3", "gemini-3-pro-preview(auto)", nil, "thinkingBudget", -1, true}, + {"suffix -1 g3", "gemini-3-pro-preview(-1)", nil, "thinkingBudget", -1, true}, + // Direct config path - Level=auto → ModeAuto conversion + {"direct level=auto g25", "gemini-2.5-pro", &thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelAuto}, "thinkingBudget", -1, true}, + {"direct level=auto g3", "gemini-3-pro-preview", &thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelAuto}, "thinkingBudget", -1, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var config thinking.ThinkingConfig + var modelName string + + if tt.directConfig != nil { + // Direct config path + config = *tt.directConfig + modelName = tt.model + } else { + // Suffix parsing path + suffix := thinking.ParseSuffix(tt.model) + if !suffix.HasSuffix { + t.Fatalf("ParseSuffix(%q) HasSuffix = false", tt.model) + } + modelName = suffix.ModelName + var ok bool + config, ok = parseConfigFromSuffix(suffix.RawSuffix) + if !ok { + t.Fatalf("parseConfigFromSuffix(%q) failed", suffix.RawSuffix) + } + } + + modelInfo := buildGeminiModelInfo(modelName) + normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking) + if err != nil { + t.Fatalf("ValidateConfig() error = %v", err) + } + + // Verify ModeAuto after validation + if normalized.Mode != thinking.ModeAuto { + t.Fatalf("ValidateConfig() Mode = %v, want ModeAuto", normalized.Mode) + } + + applier := NewApplier() + result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + gotField := gjson.GetBytes(result, "generationConfig.thinkingConfig."+tt.wantField) + if int(gotField.Int()) != tt.wantValue { + t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), tt.wantValue) + } + + gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool() + if gotIncludeThoughts != tt.wantIncludeThoughts { + t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts) + } + }) + } +} + +func TestGeminiApplyInvalidBody(t *testing.T) { + applier := NewApplier() + modelInfo := buildGeminiModelInfo("gemini-2.5-flash") + config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192} + normalized, err := thinking.ValidateConfig(config, modelInfo.Thinking) + if err != nil { + t.Fatalf("ValidateConfig() error = %v", err) + } + + tests := []struct { + name string + body []byte + }{ + {"nil body", nil}, + {"empty body", []byte{}}, + {"invalid json", []byte("{\"not json\"")}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := applier.Apply(tt.body, *normalized, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + gotBudget := int(gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget").Int()) + if gotBudget != 8192 { + t.Fatalf("thinkingBudget = %d, want %d", gotBudget, 8192) + } + + gotIncludeThoughts := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts").Bool() + if !gotIncludeThoughts { + t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, true) + } + }) + } +} + +// TestGeminiApplyConflictingFields tests that conflicting fields are removed. +// +// When applying Budget format, any existing thinkingLevel should be removed. +// When applying Level format, any existing thinkingBudget should be removed. +func TestGeminiApplyConflictingFields(t *testing.T) { + applier := NewApplier() + + tests := []struct { + name string + model string + config thinking.ThinkingConfig + existingBody string + wantField string // expected field to exist + wantNoField string // expected field to NOT exist + }{ + // Budget format should remove existing thinkingLevel + { + "budget removes level", + "gemini-2.5-pro", + thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, + `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`, + "thinkingBudget", + "thinkingLevel", + }, + // Level format should remove existing thinkingBudget + { + "level removes budget", + "gemini-3-pro-preview", + thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, + `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}`, + "thinkingLevel", + "thinkingBudget", + }, + // ModeAuto uses budget format, should remove thinkingLevel + { + "auto removes level", + "gemini-3-pro-preview", + thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, + `{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}`, + "thinkingBudget", + "thinkingLevel", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + modelInfo := buildGeminiModelInfo(tt.model) + result, err := applier.Apply([]byte(tt.existingBody), tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + // Verify expected field exists + wantPath := "generationConfig.thinkingConfig." + tt.wantField + if !gjson.GetBytes(result, wantPath).Exists() { + t.Fatalf("%s should exist in result: %s", tt.wantField, string(result)) + } + + // Verify conflicting field was removed + noPath := "generationConfig.thinkingConfig." + tt.wantNoField + if gjson.GetBytes(result, noPath).Exists() { + t.Fatalf("%s should NOT exist in result: %s", tt.wantNoField, string(result)) + } + }) + } +} + +// TestGeminiApplyThinkingNotSupported tests error handling when modelInfo.Thinking is nil. +func TestGeminiApplyThinkingNotSupported(t *testing.T) { + applier := NewApplier() + config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192} + + // Model with nil Thinking support + modelInfo := ®istry.ModelInfo{ID: "gemini-unknown", Thinking: nil} + + _, err := applier.Apply([]byte(`{}`), config, modelInfo) + if err == nil { + t.Fatal("Apply() expected error for nil Thinking, got nil") + } + + // Verify it's the correct error type + thinkErr, ok := err.(*thinking.ThinkingError) + if !ok { + t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err) + } + if thinkErr.Code != thinking.ErrThinkingNotSupported { + t.Fatalf("Apply() error code = %v, want %v", thinkErr.Code, thinking.ErrThinkingNotSupported) + } +} + +func buildGeminiModelInfo(modelID string) *registry.ModelInfo { + support := ®istry.ThinkingSupport{} + switch modelID { + case "gemini-2.5-pro": + support.Min = 128 + support.Max = 32768 + support.ZeroAllowed = false + support.DynamicAllowed = true + case "gemini-2.5-flash", "gemini-2.5-flash-lite": + support.Min = 0 + support.Max = 24576 + support.ZeroAllowed = true + support.DynamicAllowed = true + case "gemini-3-pro-preview": + support.Min = 128 + support.Max = 32768 + support.ZeroAllowed = false + support.DynamicAllowed = true + support.Levels = []string{"low", "high"} + case "gemini-3-flash-preview": + support.Min = 128 + support.Max = 32768 + support.ZeroAllowed = false + support.DynamicAllowed = true + support.Levels = []string{"minimal", "low", "medium", "high"} + default: + // Unknown model - return nil Thinking to trigger error path + return ®istry.ModelInfo{ID: modelID, Thinking: nil} + } + return ®istry.ModelInfo{ + ID: modelID, + Thinking: support, + } +} + +// TestGeminiApplyNilModelInfo tests Apply behavior when modelInfo is nil. +// Coverage: apply.go:56-58 (H1) +func TestGeminiApplyNilModelInfo(t *testing.T) { + applier := NewApplier() + config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192} + body := []byte(`{"existing": "data"}`) + + result, err := applier.Apply(body, config, nil) + if err != nil { + t.Fatalf("Apply() with nil modelInfo should not error, got: %v", err) + } + if string(result) != string(body) { + t.Fatalf("Apply() with nil modelInfo should return original body, got: %s", result) + } +} + +// TestGeminiApplyEmptyModelID tests Apply when modelID is empty. +// Coverage: apply.go:61-63 (H2) +func TestGeminiApplyEmptyModelID(t *testing.T) { + applier := NewApplier() + config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192} + modelInfo := ®istry.ModelInfo{ID: "", Thinking: nil} + + _, err := applier.Apply([]byte(`{}`), config, modelInfo) + if err == nil { + t.Fatal("Apply() with empty modelID and nil Thinking should error") + } + thinkErr, ok := err.(*thinking.ThinkingError) + if !ok { + t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err) + } + if thinkErr.Model != "unknown" { + t.Fatalf("Apply() error model = %q, want %q", thinkErr.Model, "unknown") + } +} + +// TestGeminiApplyModeBudgetWithLevels tests that ModeBudget is applied with budget format +// even for models with Levels. The Apply layer handles ModeBudget by applying thinkingBudget. +// Coverage: apply.go:88-90 +func TestGeminiApplyModeBudgetWithLevels(t *testing.T) { + applier := NewApplier() + modelInfo := buildGeminiModelInfo("gemini-3-flash-preview") + config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192} + body := []byte(`{"existing": "data"}`) + + result, err := applier.Apply(body, config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + // ModeBudget applies budget format + budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget").Int() + if budget != 8192 { + t.Fatalf("Apply() expected thinkingBudget=8192, got: %d", budget) + } +} + +// TestGeminiApplyUnsupportedMode tests behavior with unsupported Mode types. +// Coverage: apply.go:67-69 and 97-98 (H5, L2) +func TestGeminiApplyUnsupportedMode(t *testing.T) { + applier := NewApplier() + body := []byte(`{"existing": "data"}`) + + tests := []struct { + name string + model string + config thinking.ThinkingConfig + }{ + {"unknown mode with budget model", "gemini-2.5-pro", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Budget: 8192}}, + {"unknown mode with level model", "gemini-3-pro-preview", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Level: thinking.LevelHigh}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + modelInfo := buildGeminiModelInfo(tt.model) + result, err := applier.Apply(body, tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + // Unsupported modes return original body unchanged + if string(result) != string(body) { + t.Fatalf("Apply() with unsupported mode should return original body, got: %s", result) + } + }) + } +} diff --git a/internal/thinking/provider/geminicli/apply.go b/internal/thinking/provider/geminicli/apply.go new file mode 100644 index 00000000..b076b7a6 --- /dev/null +++ b/internal/thinking/provider/geminicli/apply.go @@ -0,0 +1,128 @@ +// Package geminicli implements thinking configuration for Gemini CLI API format. +// +// Gemini CLI uses request.generationConfig.thinkingConfig.* path instead of +// generationConfig.thinkingConfig.* used by standard Gemini API. +package geminicli + +import ( + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier applies thinking configuration for Gemini CLI API format. +type Applier struct{} + +var _ thinking.ProviderApplier = (*Applier)(nil) + +// NewApplier creates a new Gemini CLI thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + applier := NewApplier() + thinking.RegisterProvider("gemini-cli", applier) + thinking.RegisterProvider("antigravity", applier) +} + +// Apply applies thinking configuration to Gemini CLI request body. +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if modelInfo == nil { + return body, nil + } + if modelInfo.Thinking == nil { + if modelInfo.Type == "" { + modelID := modelInfo.ID + if modelID == "" { + modelID = "unknown" + } + return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID) + } + return a.applyCompatible(body, config) + } + + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + // ModeAuto: Always use Budget format with thinkingBudget=-1 + if config.Mode == thinking.ModeAuto { + return a.applyBudgetFormat(body, config) + } + + // For non-auto modes, choose format based on model capabilities + support := modelInfo.Thinking + if len(support.Levels) > 0 { + return a.applyLevelFormat(body, config) + } + return a.applyBudgetFormat(body, config) +} + +func (a *Applier) applyCompatible(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + if config.Mode != thinking.ModeBudget && config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone && config.Mode != thinking.ModeAuto { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + if config.Mode == thinking.ModeAuto { + return a.applyBudgetFormat(body, config) + } + + if config.Mode == thinking.ModeLevel || (config.Mode == thinking.ModeNone && config.Level != "") { + return a.applyLevelFormat(body, config) + } + + return a.applyBudgetFormat(body, config) +} + +func (a *Applier) applyLevelFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + // Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output + result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget") + + if config.Mode == thinking.ModeNone { + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", false) + if config.Level != "" { + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", string(config.Level)) + } + return result, nil + } + + // Only handle ModeLevel - budget conversion should be done by upper layer + if config.Mode != thinking.ModeLevel { + return body, nil + } + + level := string(config.Level) + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel", level) + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", true) + return result, nil +} + +func (a *Applier) applyBudgetFormat(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + // Remove conflicting field to avoid both thinkingLevel and thinkingBudget in output + result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig.thinkingLevel") + + budget := config.Budget + includeThoughts := false + switch config.Mode { + case thinking.ModeNone: + includeThoughts = false + case thinking.ModeAuto: + includeThoughts = true + default: + includeThoughts = budget > 0 + } + + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget", budget) + result, _ = sjson.SetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts", includeThoughts) + return result, nil +} diff --git a/internal/thinking/provider/geminicli/apply_test.go b/internal/thinking/provider/geminicli/apply_test.go new file mode 100644 index 00000000..a606457c --- /dev/null +++ b/internal/thinking/provider/geminicli/apply_test.go @@ -0,0 +1,382 @@ +// Package geminicli implements thinking configuration for Gemini CLI API format. +package geminicli + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" +) + +func TestNewApplier(t *testing.T) { + applier := NewApplier() + if applier == nil { + t.Fatal("NewApplier() returned nil") + } +} + +func TestApplierImplementsInterface(t *testing.T) { + // Compile-time check: if Applier doesn't implement the interface, this won't compile + var _ thinking.ProviderApplier = (*Applier)(nil) +} + +// TestGeminiCLIApply tests the Gemini CLI thinking applier. +// +// Gemini CLI uses request.generationConfig.thinkingConfig.* path. +// Behavior mirrors Gemini applier but with different JSON path prefix. +func TestGeminiCLIApply(t *testing.T) { + applier := NewApplier() + tests := []struct { + name string + model string + config thinking.ThinkingConfig + wantField string + wantValue interface{} + wantIncludeThoughts bool + }{ + // Budget mode (no Levels) + {"budget 8k", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "thinkingBudget", 8192, true}, + {"budget zero", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, "thinkingBudget", 0, false}, + {"none mode", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "thinkingBudget", 0, false}, + {"auto mode", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true}, + + // Level mode (has Levels) + {"level high", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "thinkingLevel", "high", true}, + {"level low", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "thinkingLevel", "low", true}, + {"level minimal", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, "thinkingLevel", "minimal", true}, + // ModeAuto with Levels model still uses thinkingBudget=-1 + {"auto with levels", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, "thinkingBudget", -1, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + modelInfo := buildGeminiCLIModelInfo(tt.model) + result, err := applier.Apply([]byte(`{}`), tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + gotField := gjson.GetBytes(result, "request.generationConfig.thinkingConfig."+tt.wantField) + switch want := tt.wantValue.(type) { + case int: + if int(gotField.Int()) != want { + t.Fatalf("%s = %d, want %d", tt.wantField, gotField.Int(), want) + } + case string: + if gotField.String() != want { + t.Fatalf("%s = %q, want %q", tt.wantField, gotField.String(), want) + } + case bool: + if gotField.Bool() != want { + t.Fatalf("%s = %v, want %v", tt.wantField, gotField.Bool(), want) + } + default: + t.Fatalf("unsupported wantValue type %T", tt.wantValue) + } + + gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool() + if gotIncludeThoughts != tt.wantIncludeThoughts { + t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, tt.wantIncludeThoughts) + } + }) + } +} + +// TestGeminiCLIApplyModeNoneWithLevel tests ModeNone with Level model. +// When ModeNone is used with a model that has Levels, includeThoughts should be false. +func TestGeminiCLIApplyModeNoneWithLevel(t *testing.T) { + applier := NewApplier() + modelInfo := buildGeminiCLIModelInfo("gemini-cli-level") + config := thinking.ThinkingConfig{Mode: thinking.ModeNone, Level: thinking.LevelLow} + + result, err := applier.Apply([]byte(`{}`), config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool() + if gotIncludeThoughts != false { + t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, false) + } + + gotLevel := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel").String() + if gotLevel != "low" { + t.Fatalf("thinkingLevel = %q, want %q", gotLevel, "low") + } +} + +// TestGeminiCLIApplyInvalidBody tests Apply behavior with invalid body inputs. +func TestGeminiCLIApplyInvalidBody(t *testing.T) { + applier := NewApplier() + modelInfo := buildGeminiCLIModelInfo("gemini-cli-budget") + config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192} + + tests := []struct { + name string + body []byte + }{ + {"nil body", nil}, + {"empty body", []byte{}}, + {"invalid json", []byte("{\"not json\"")}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := applier.Apply(tt.body, config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + gotBudget := int(gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget").Int()) + if gotBudget != 8192 { + t.Fatalf("thinkingBudget = %d, want %d", gotBudget, 8192) + } + + gotIncludeThoughts := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts").Bool() + if !gotIncludeThoughts { + t.Fatalf("includeThoughts = %v, want %v", gotIncludeThoughts, true) + } + }) + } +} + +// TestGeminiCLIApplyConflictingFields tests that conflicting fields are removed. +// +// When applying Budget format, any existing thinkingLevel should be removed. +// When applying Level format, any existing thinkingBudget should be removed. +func TestGeminiCLIApplyConflictingFields(t *testing.T) { + applier := NewApplier() + + tests := []struct { + name string + model string + config thinking.ThinkingConfig + existingBody string + wantField string // expected field to exist + wantNoField string // expected field to NOT exist + }{ + // Budget format should remove existing thinkingLevel + { + "budget removes level", + "gemini-cli-budget", + thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, + `{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}}`, + "thinkingBudget", + "thinkingLevel", + }, + // Level format should remove existing thinkingBudget + { + "level removes budget", + "gemini-cli-level", + thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, + `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192}}}}`, + "thinkingLevel", + "thinkingBudget", + }, + // ModeAuto uses budget format, should remove thinkingLevel + { + "auto removes level", + "gemini-cli-level", + thinking.ThinkingConfig{Mode: thinking.ModeAuto, Budget: -1}, + `{"request":{"generationConfig":{"thinkingConfig":{"thinkingLevel":"high"}}}}`, + "thinkingBudget", + "thinkingLevel", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + modelInfo := buildGeminiCLIModelInfo(tt.model) + result, err := applier.Apply([]byte(tt.existingBody), tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + // Verify expected field exists + wantPath := "request.generationConfig.thinkingConfig." + tt.wantField + if !gjson.GetBytes(result, wantPath).Exists() { + t.Fatalf("%s should exist in result: %s", tt.wantField, string(result)) + } + + // Verify conflicting field was removed + noPath := "request.generationConfig.thinkingConfig." + tt.wantNoField + if gjson.GetBytes(result, noPath).Exists() { + t.Fatalf("%s should NOT exist in result: %s", tt.wantNoField, string(result)) + } + }) + } +} + +// TestGeminiCLIApplyThinkingNotSupported tests error handling when modelInfo.Thinking is nil. +func TestGeminiCLIApplyThinkingNotSupported(t *testing.T) { + applier := NewApplier() + config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192} + + // Model with nil Thinking support + modelInfo := ®istry.ModelInfo{ID: "gemini-cli-unknown", Thinking: nil} + + _, err := applier.Apply([]byte(`{}`), config, modelInfo) + if err == nil { + t.Fatal("Apply() expected error for nil Thinking, got nil") + } + + // Verify it's the correct error type + thinkErr, ok := err.(*thinking.ThinkingError) + if !ok { + t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err) + } + if thinkErr.Code != thinking.ErrThinkingNotSupported { + t.Fatalf("Apply() error code = %v, want %v", thinkErr.Code, thinking.ErrThinkingNotSupported) + } +} + +// TestGeminiCLIApplyNilModelInfo tests Apply behavior when modelInfo is nil. +func TestGeminiCLIApplyNilModelInfo(t *testing.T) { + applier := NewApplier() + config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192} + body := []byte(`{"existing": "data"}`) + + result, err := applier.Apply(body, config, nil) + if err != nil { + t.Fatalf("Apply() with nil modelInfo should not error, got: %v", err) + } + if string(result) != string(body) { + t.Fatalf("Apply() with nil modelInfo should return original body, got: %s", result) + } +} + +// TestGeminiCLIApplyEmptyModelID tests Apply when modelID is empty. +func TestGeminiCLIApplyEmptyModelID(t *testing.T) { + applier := NewApplier() + config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192} + modelInfo := ®istry.ModelInfo{ID: "", Thinking: nil} + + _, err := applier.Apply([]byte(`{}`), config, modelInfo) + if err == nil { + t.Fatal("Apply() with empty modelID and nil Thinking should error") + } + thinkErr, ok := err.(*thinking.ThinkingError) + if !ok { + t.Fatalf("Apply() error type = %T, want *thinking.ThinkingError", err) + } + if thinkErr.Model != "unknown" { + t.Fatalf("Apply() error model = %q, want %q", thinkErr.Model, "unknown") + } +} + +// TestGeminiCLIApplyModeBudgetWithLevels tests that ModeBudget with Levels model passes through. +// Apply layer doesn't convert - upper layer should handle Budget→Level conversion. +func TestGeminiCLIApplyModeBudgetWithLevels(t *testing.T) { + applier := NewApplier() + modelInfo := buildGeminiCLIModelInfo("gemini-cli-level") + config := thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192} + body := []byte(`{"existing": "data"}`) + + result, err := applier.Apply(body, config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + // ModeBudget with Levels model: Apply returns body unchanged (conversion is upper layer's job) + if string(result) != string(body) { + t.Fatalf("Apply() ModeBudget with Levels should return original body, got: %s", result) + } +} + +// TestGeminiCLIApplyUnsupportedMode tests behavior with unsupported Mode types. +func TestGeminiCLIApplyUnsupportedMode(t *testing.T) { + applier := NewApplier() + body := []byte(`{"existing": "data"}`) + + tests := []struct { + name string + model string + config thinking.ThinkingConfig + }{ + {"unknown mode with budget model", "gemini-cli-budget", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Budget: 8192}}, + {"unknown mode with level model", "gemini-cli-level", thinking.ThinkingConfig{Mode: thinking.ThinkingMode(99), Level: thinking.LevelHigh}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + modelInfo := buildGeminiCLIModelInfo(tt.model) + result, err := applier.Apply(body, tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + // Unsupported modes return original body unchanged + if string(result) != string(body) { + t.Fatalf("Apply() with unsupported mode should return original body, got: %s", result) + } + }) + } +} + +// TestAntigravityUsesGeminiCLIFormat tests that antigravity provider uses gemini-cli format. +// Antigravity is registered with the same applier as gemini-cli. +func TestAntigravityUsesGeminiCLIFormat(t *testing.T) { + applier := NewApplier() + + tests := []struct { + name string + config thinking.ThinkingConfig + modelInfo *registry.ModelInfo + wantField string + }{ + { + "claude model budget", + thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 16384}, + ®istry.ModelInfo{ID: "gemini-claude-sonnet-4-5-thinking", Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 200000}}, + "request.generationConfig.thinkingConfig.thinkingBudget", + }, + { + "opus model budget", + thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 32768}, + ®istry.ModelInfo{ID: "gemini-claude-opus-4-5-thinking", Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 200000}}, + "request.generationConfig.thinkingConfig.thinkingBudget", + }, + { + "model with levels", + thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, + ®istry.ModelInfo{ID: "some-model-with-levels", Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 200000, Levels: []string{"low", "high"}}}, + "request.generationConfig.thinkingConfig.thinkingLevel", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := applier.Apply([]byte(`{}`), tt.config, tt.modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + + if !gjson.GetBytes(got, tt.wantField).Exists() { + t.Fatalf("expected field %s in output: %s", tt.wantField, string(got)) + } + }) + } +} + +func buildGeminiCLIModelInfo(modelID string) *registry.ModelInfo { + support := ®istry.ThinkingSupport{} + switch modelID { + case "gemini-cli-budget": + support.Min = 0 + support.Max = 32768 + support.ZeroAllowed = true + support.DynamicAllowed = true + case "gemini-cli-level": + support.Min = 128 + support.Max = 32768 + support.ZeroAllowed = false + support.DynamicAllowed = true + support.Levels = []string{"minimal", "low", "medium", "high"} + default: + // Unknown model - return nil Thinking to trigger error path + return ®istry.ModelInfo{ID: modelID, Thinking: nil} + } + return ®istry.ModelInfo{ + ID: modelID, + Thinking: support, + } +} diff --git a/internal/thinking/provider/iflow/apply.go b/internal/thinking/provider/iflow/apply.go new file mode 100644 index 00000000..5bca94f2 --- /dev/null +++ b/internal/thinking/provider/iflow/apply.go @@ -0,0 +1,160 @@ +// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax). +// +// iFlow models use boolean toggle semantics: +// - GLM models: chat_template_kwargs.enable_thinking (boolean) +// - MiniMax models: reasoning_split (boolean) +// +// Level values are converted to boolean: none=false, all others=true +// See: _bmad-output/planning-artifacts/architecture.md#Epic-9 +package iflow + +import ( + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier implements thinking.ProviderApplier for iFlow models. +// +// iFlow-specific behavior: +// - GLM models: enable_thinking boolean + clear_thinking=false +// - MiniMax models: reasoning_split boolean +// - Level to boolean: none=false, others=true +// - No quantized support (only on/off) +type Applier struct{} + +var _ thinking.ProviderApplier = (*Applier)(nil) + +// NewApplier creates a new iFlow thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + thinking.RegisterProvider("iflow", NewApplier()) +} + +// Apply applies thinking configuration to iFlow request body. +// +// Expected output format (GLM): +// +// { +// "chat_template_kwargs": { +// "enable_thinking": true, +// "clear_thinking": false +// } +// } +// +// Expected output format (MiniMax): +// +// { +// "reasoning_split": true +// } +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if modelInfo == nil { + return body, nil + } + if modelInfo.Thinking == nil { + modelID := modelInfo.ID + if modelID == "" { + modelID = "unknown" + } + return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID) + } + + if isGLMModel(modelInfo.ID) { + return applyGLM(body, config), nil + } + + if isMiniMaxModel(modelInfo.ID) { + return applyMiniMax(body, config), nil + } + + return body, nil +} + +// configToBoolean converts ThinkingConfig to boolean for iFlow models. +// +// Conversion rules: +// - ModeNone: false +// - ModeAuto: true +// - ModeBudget + Budget=0: false +// - ModeBudget + Budget>0: true +// - ModeLevel + Level="none": false +// - ModeLevel + any other level: true +// - Default (unknown mode): true +func configToBoolean(config thinking.ThinkingConfig) bool { + switch config.Mode { + case thinking.ModeNone: + return false + case thinking.ModeAuto: + return true + case thinking.ModeBudget: + return config.Budget > 0 + case thinking.ModeLevel: + return config.Level != thinking.LevelNone + default: + return true + } +} + +// applyGLM applies thinking configuration for GLM models. +// +// Output format when enabled: +// +// {"chat_template_kwargs": {"enable_thinking": true, "clear_thinking": false}} +// +// Output format when disabled: +// +// {"chat_template_kwargs": {"enable_thinking": false}} +// +// Note: clear_thinking is only set when thinking is enabled, to preserve +// thinking output in the response. +func applyGLM(body []byte, config thinking.ThinkingConfig) []byte { + enableThinking := configToBoolean(config) + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + result, _ := sjson.SetBytes(body, "chat_template_kwargs.enable_thinking", enableThinking) + + // clear_thinking only needed when thinking is enabled + if enableThinking { + result, _ = sjson.SetBytes(result, "chat_template_kwargs.clear_thinking", false) + } + + return result +} + +// applyMiniMax applies thinking configuration for MiniMax models. +// +// Output format: +// +// {"reasoning_split": true/false} +func applyMiniMax(body []byte, config thinking.ThinkingConfig) []byte { + reasoningSplit := configToBoolean(config) + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + result, _ := sjson.SetBytes(body, "reasoning_split", reasoningSplit) + + return result +} + +// isGLMModel determines if the model is a GLM series model. +// GLM models use chat_template_kwargs.enable_thinking format. +func isGLMModel(modelID string) bool { + return strings.HasPrefix(strings.ToLower(modelID), "glm") +} + +// isMiniMaxModel determines if the model is a MiniMax series model. +// MiniMax models use reasoning_split format. +func isMiniMaxModel(modelID string) bool { + return strings.HasPrefix(strings.ToLower(modelID), "minimax") +} diff --git a/internal/thinking/provider/iflow/apply_test.go b/internal/thinking/provider/iflow/apply_test.go new file mode 100644 index 00000000..f0c2a35b --- /dev/null +++ b/internal/thinking/provider/iflow/apply_test.go @@ -0,0 +1,328 @@ +// Package iflow implements thinking configuration for iFlow models (GLM, MiniMax). +package iflow + +import ( + "bytes" + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" +) + +func TestNewApplier(t *testing.T) { + tests := []struct { + name string + }{ + {"default"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + applier := NewApplier() + if applier == nil { + t.Fatalf("expected non-nil applier") + } + }) + } +} + +func TestApplierImplementsInterface(t *testing.T) { + tests := []struct { + name string + applier thinking.ProviderApplier + }{ + {"default", NewApplier()}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.applier == nil { + t.Fatalf("expected thinking.ProviderApplier implementation") + } + }) + } +} + +func TestApplyNilModelInfo(t *testing.T) { + applier := NewApplier() + + tests := []struct { + name string + body []byte + }{ + {"nil body", nil}, + {"empty body", []byte{}}, + {"json body", []byte(`{"model":"glm-4.6"}`)}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := applier.Apply(tt.body, thinking.ThinkingConfig{}, nil) + if err != nil { + t.Fatalf("expected nil error, got %v", err) + } + if !bytes.Equal(got, tt.body) { + t.Fatalf("expected body unchanged, got %s", string(got)) + } + }) + } +} + +func TestApplyMissingThinkingSupport(t *testing.T) { + applier := NewApplier() + + tests := []struct { + name string + modelID string + wantModel string + }{ + {"model id", "glm-4.6", "glm-4.6"}, + {"empty model id", "", "unknown"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + modelInfo := ®istry.ModelInfo{ID: tt.modelID} + got, err := applier.Apply([]byte(`{"model":"`+tt.modelID+`"}`), thinking.ThinkingConfig{}, modelInfo) + if err == nil { + t.Fatalf("expected error, got nil") + } + if got != nil { + t.Fatalf("expected nil body on error, got %s", string(got)) + } + thinkingErr, ok := err.(*thinking.ThinkingError) + if !ok { + t.Fatalf("expected ThinkingError, got %T", err) + } + if thinkingErr.Code != thinking.ErrThinkingNotSupported { + t.Fatalf("expected code %s, got %s", thinking.ErrThinkingNotSupported, thinkingErr.Code) + } + if thinkingErr.Model != tt.wantModel { + t.Fatalf("expected model %s, got %s", tt.wantModel, thinkingErr.Model) + } + }) + } +} + +func TestConfigToBoolean(t *testing.T) { + tests := []struct { + name string + config thinking.ThinkingConfig + want bool + }{ + {"mode none", thinking.ThinkingConfig{Mode: thinking.ModeNone}, false}, + {"mode auto", thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true}, + {"budget zero", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false}, + {"budget positive", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true}, + {"level none", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false}, + {"level minimal", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true}, + {"level low", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true}, + {"level medium", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true}, + {"level high", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true}, + {"level xhigh", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true}, + {"zero value config", thinking.ThinkingConfig{}, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := configToBoolean(tt.config); got != tt.want { + t.Fatalf("configToBoolean(%+v) = %v, want %v", tt.config, got, tt.want) + } + }) + } +} + +func TestApplyGLM(t *testing.T) { + applier := NewApplier() + + tests := []struct { + name string + modelID string + body []byte + config thinking.ThinkingConfig + wantEnable bool + wantPreserve string + }{ + {"mode none", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeNone}, false, ""}, + {"level none", "glm-4.7", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false, ""}, + {"mode auto", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""}, + {"level minimal", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true, ""}, + {"level low", "glm-4.7", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true, ""}, + {"level medium", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true, ""}, + {"level high", "GLM-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true, ""}, + {"level xhigh", "glm-z1-preview", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true, ""}, + {"budget zero", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false, ""}, + {"budget 1000", "glm-4.6", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true, ""}, + {"preserve fields", "glm-4.6", []byte(`{"model":"glm-4.6","extra":{"keep":true}}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "glm-4.6"}, + {"empty body", "glm-4.6", nil, thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""}, + {"malformed json", "glm-4.6", []byte(`{invalid`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + modelInfo := ®istry.ModelInfo{ + ID: tt.modelID, + Thinking: ®istry.ThinkingSupport{}, + } + got, err := applier.Apply(tt.body, tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + if !gjson.ValidBytes(got) { + t.Fatalf("expected valid JSON, got %s", string(got)) + } + + enableResult := gjson.GetBytes(got, "chat_template_kwargs.enable_thinking") + if !enableResult.Exists() { + t.Fatalf("enable_thinking missing") + } + gotEnable := enableResult.Bool() + if gotEnable != tt.wantEnable { + t.Fatalf("enable_thinking = %v, want %v", gotEnable, tt.wantEnable) + } + + // clear_thinking only set when enable_thinking=true + clearResult := gjson.GetBytes(got, "chat_template_kwargs.clear_thinking") + if tt.wantEnable { + if !clearResult.Exists() { + t.Fatalf("clear_thinking missing when enable_thinking=true") + } + if clearResult.Bool() { + t.Fatalf("clear_thinking = %v, want false", clearResult.Bool()) + } + } else { + if clearResult.Exists() { + t.Fatalf("clear_thinking should not exist when enable_thinking=false") + } + } + + if tt.wantPreserve != "" { + gotModel := gjson.GetBytes(got, "model").String() + if gotModel != tt.wantPreserve { + t.Fatalf("model = %q, want %q", gotModel, tt.wantPreserve) + } + if !gjson.GetBytes(got, "extra.keep").Bool() { + t.Fatalf("expected extra.keep preserved") + } + } + }) + } +} + +func TestApplyMiniMax(t *testing.T) { + applier := NewApplier() + + tests := []struct { + name string + modelID string + body []byte + config thinking.ThinkingConfig + wantSplit bool + wantModel string + wantKeep bool + }{ + {"mode none", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeNone}, false, "", false}, + {"level none", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelNone}, false, "", false}, + {"mode auto", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false}, + {"level high", "MINIMAX-M2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, true, "", false}, + {"level low", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, true, "", false}, + {"level minimal", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMinimal}, true, "", false}, + {"level medium", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, true, "", false}, + {"level xhigh", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, true, "", false}, + {"budget zero", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 0}, false, "", false}, + {"budget 1000", "minimax-m2.1", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 1000}, true, "", false}, + {"unknown level", "minimax-m2", []byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: "unknown"}, true, "", false}, + {"preserve fields", "minimax-m2", []byte(`{"model":"minimax-m2","extra":{"keep":true}}`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "minimax-m2", true}, + {"empty body", "minimax-m2", nil, thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false}, + {"malformed json", "minimax-m2", []byte(`{invalid`), thinking.ThinkingConfig{Mode: thinking.ModeAuto}, true, "", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + modelInfo := ®istry.ModelInfo{ + ID: tt.modelID, + Thinking: ®istry.ThinkingSupport{}, + } + got, err := applier.Apply(tt.body, tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + if !gjson.ValidBytes(got) { + t.Fatalf("expected valid JSON, got %s", string(got)) + } + + splitResult := gjson.GetBytes(got, "reasoning_split") + if !splitResult.Exists() { + t.Fatalf("reasoning_split missing") + } + // Verify JSON type is boolean, not string + if splitResult.Type != gjson.True && splitResult.Type != gjson.False { + t.Fatalf("reasoning_split should be boolean, got type %v", splitResult.Type) + } + gotSplit := splitResult.Bool() + if gotSplit != tt.wantSplit { + t.Fatalf("reasoning_split = %v, want %v", gotSplit, tt.wantSplit) + } + + if tt.wantModel != "" { + gotModel := gjson.GetBytes(got, "model").String() + if gotModel != tt.wantModel { + t.Fatalf("model = %q, want %q", gotModel, tt.wantModel) + } + if tt.wantKeep && !gjson.GetBytes(got, "extra.keep").Bool() { + t.Fatalf("expected extra.keep preserved") + } + } + }) + } +} + +// TestIsGLMModel tests the GLM model detection. +// +// Depends on: Epic 9 Story 9-1 +func TestIsGLMModel(t *testing.T) { + tests := []struct { + name string + model string + wantGLM bool + }{ + {"glm-4.6", "glm-4.6", true}, + {"glm-z1-preview", "glm-z1-preview", true}, + {"glm uppercase", "GLM-4.7", true}, + {"minimax-01", "minimax-01", false}, + {"gpt-5.2", "gpt-5.2", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isGLMModel(tt.model); got != tt.wantGLM { + t.Fatalf("isGLMModel(%q) = %v, want %v", tt.model, got, tt.wantGLM) + } + }) + } +} + +// TestIsMiniMaxModel tests the MiniMax model detection. +// +// Depends on: Epic 9 Story 9-1 +func TestIsMiniMaxModel(t *testing.T) { + tests := []struct { + name string + model string + wantMiniMax bool + }{ + {"minimax-01", "minimax-01", true}, + {"minimax uppercase", "MINIMAX-M2", true}, + {"glm-4.6", "glm-4.6", false}, + {"gpt-5.2", "gpt-5.2", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := isMiniMaxModel(tt.model); got != tt.wantMiniMax { + t.Fatalf("isMiniMaxModel(%q) = %v, want %v", tt.model, got, tt.wantMiniMax) + } + }) + } +} diff --git a/internal/thinking/provider/openai/apply.go b/internal/thinking/provider/openai/apply.go new file mode 100644 index 00000000..810faf34 --- /dev/null +++ b/internal/thinking/provider/openai/apply.go @@ -0,0 +1,135 @@ +// Package openai implements thinking configuration for OpenAI/Codex models. +// +// OpenAI models use the reasoning_effort format with discrete levels +// (low/medium/high). Some models support xhigh and none levels. +// See: _bmad-output/planning-artifacts/architecture.md#Epic-8 +package openai + +import ( + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Applier implements thinking.ProviderApplier for OpenAI models. +// +// OpenAI-specific behavior: +// - Output format: reasoning_effort (string: low/medium/high/xhigh) +// - Level-only mode: no numeric budget support +// - Some models support ZeroAllowed (gpt-5.1, gpt-5.2) +type Applier struct{} + +var _ thinking.ProviderApplier = (*Applier)(nil) + +// NewApplier creates a new OpenAI thinking applier. +func NewApplier() *Applier { + return &Applier{} +} + +func init() { + thinking.RegisterProvider("openai", NewApplier()) +} + +// Apply applies thinking configuration to OpenAI request body. +// +// Expected output format: +// +// { +// "reasoning_effort": "high" +// } +func (a *Applier) Apply(body []byte, config thinking.ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) { + if modelInfo == nil { + return body, nil + } + if modelInfo.Thinking == nil { + if modelInfo.Type == "" { + modelID := modelInfo.ID + if modelID == "" { + modelID = "unknown" + } + return nil, thinking.NewThinkingErrorWithModel(thinking.ErrThinkingNotSupported, "thinking not supported for this model", modelID) + } + return applyCompatibleOpenAI(body, config) + } + + // Only handle ModeLevel and ModeNone; other modes pass through unchanged. + if config.Mode != thinking.ModeLevel && config.Mode != thinking.ModeNone { + return body, nil + } + + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + if config.Mode == thinking.ModeLevel { + result, _ := sjson.SetBytes(body, "reasoning_effort", string(config.Level)) + return result, nil + } + + effort := "" + support := modelInfo.Thinking + if config.Budget == 0 { + if support.ZeroAllowed || hasLevel(support.Levels, string(thinking.LevelNone)) { + effort = string(thinking.LevelNone) + } + } + if effort == "" && config.Level != "" { + effort = string(config.Level) + } + if effort == "" && len(support.Levels) > 0 { + effort = support.Levels[0] + } + if effort == "" { + return body, nil + } + + result, _ := sjson.SetBytes(body, "reasoning_effort", effort) + return result, nil +} + +func applyCompatibleOpenAI(body []byte, config thinking.ThinkingConfig) ([]byte, error) { + if len(body) == 0 || !gjson.ValidBytes(body) { + body = []byte(`{}`) + } + + var effort string + switch config.Mode { + case thinking.ModeLevel: + if config.Level == "" { + return body, nil + } + effort = string(config.Level) + case thinking.ModeNone: + effort = string(thinking.LevelNone) + if config.Level != "" { + effort = string(config.Level) + } + case thinking.ModeAuto: + // Auto mode for user-defined models: pass through as "auto" + effort = string(thinking.LevelAuto) + case thinking.ModeBudget: + // Budget mode: convert budget to level using threshold mapping + level, ok := thinking.ConvertBudgetToLevel(config.Budget) + if !ok { + return body, nil + } + effort = level + default: + return body, nil + } + + result, _ := sjson.SetBytes(body, "reasoning_effort", effort) + return result, nil +} + +func hasLevel(levels []string, target string) bool { + for _, level := range levels { + if strings.EqualFold(strings.TrimSpace(level), target) { + return true + } + } + return false +} diff --git a/internal/thinking/provider/openai/apply_test.go b/internal/thinking/provider/openai/apply_test.go new file mode 100644 index 00000000..88c1800a --- /dev/null +++ b/internal/thinking/provider/openai/apply_test.go @@ -0,0 +1,343 @@ +// Package openai implements thinking configuration for OpenAI/Codex models. +package openai + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" +) + +func buildOpenAIModelInfo(modelID string) *registry.ModelInfo { + info := registry.LookupStaticModelInfo(modelID) + if info != nil { + return info + } + // Fallback with complete ThinkingSupport matching real OpenAI model capabilities + return ®istry.ModelInfo{ + ID: modelID, + Thinking: ®istry.ThinkingSupport{ + Min: 1024, + Max: 32768, + ZeroAllowed: true, + Levels: []string{"none", "low", "medium", "high", "xhigh"}, + }, + } +} + +func TestNewApplier(t *testing.T) { + applier := NewApplier() + if applier == nil { + t.Fatalf("expected non-nil applier") + } +} + +func TestApplierImplementsInterface(t *testing.T) { + _, ok := interface{}(NewApplier()).(thinking.ProviderApplier) + if !ok { + t.Fatalf("expected Applier to implement thinking.ProviderApplier") + } +} + +func TestApplyNilModelInfo(t *testing.T) { + applier := NewApplier() + body := []byte(`{"model":"gpt-5.2"}`) + got, err := applier.Apply(body, thinking.ThinkingConfig{}, nil) + if err != nil { + t.Fatalf("expected nil error, got %v", err) + } + if string(got) != string(body) { + t.Fatalf("expected body unchanged, got %s", string(got)) + } +} + +func TestApplyMissingThinkingSupport(t *testing.T) { + applier := NewApplier() + modelInfo := ®istry.ModelInfo{ID: "gpt-5.2"} + got, err := applier.Apply([]byte(`{"model":"gpt-5.2"}`), thinking.ThinkingConfig{}, modelInfo) + if err == nil { + t.Fatalf("expected error, got nil") + } + if got != nil { + t.Fatalf("expected nil body on error, got %s", string(got)) + } + thinkingErr, ok := err.(*thinking.ThinkingError) + if !ok { + t.Fatalf("expected ThinkingError, got %T", err) + } + if thinkingErr.Code != thinking.ErrThinkingNotSupported { + t.Fatalf("expected code %s, got %s", thinking.ErrThinkingNotSupported, thinkingErr.Code) + } + if thinkingErr.Model != "gpt-5.2" { + t.Fatalf("expected model gpt-5.2, got %s", thinkingErr.Model) + } +} + +// TestApplyLevel tests Apply with ModeLevel (unit test, no ValidateConfig). +func TestApplyLevel(t *testing.T) { + applier := NewApplier() + modelInfo := buildOpenAIModelInfo("gpt-5.2") + + tests := []struct { + name string + level thinking.ThinkingLevel + want string + }{ + {"high", thinking.LevelHigh, "high"}, + {"medium", thinking.LevelMedium, "medium"}, + {"low", thinking.LevelLow, "low"}, + {"xhigh", thinking.LevelXHigh, "xhigh"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := applier.Apply([]byte(`{}`), thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: tt.level}, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want { + t.Fatalf("reasoning_effort = %q, want %q", got, tt.want) + } + }) + } +} + +// TestApplyModeNone tests Apply with ModeNone (unit test). +func TestApplyModeNone(t *testing.T) { + applier := NewApplier() + + tests := []struct { + name string + config thinking.ThinkingConfig + modelInfo *registry.ModelInfo + want string + }{ + {"zero allowed", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, ®istry.ModelInfo{ID: "gpt-5.2", Thinking: ®istry.ThinkingSupport{ZeroAllowed: true, Levels: []string{"none", "low"}}}, "none"}, + {"clamped to level", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 128, Level: thinking.LevelLow}, ®istry.ModelInfo{ID: "gpt-5", Thinking: ®istry.ThinkingSupport{Levels: []string{"minimal", "low"}}}, "low"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := applier.Apply([]byte(`{}`), tt.config, tt.modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want { + t.Fatalf("reasoning_effort = %q, want %q", got, tt.want) + } + }) + } +} + +// TestApplyPassthrough tests that unsupported modes pass through unchanged. +func TestApplyPassthrough(t *testing.T) { + applier := NewApplier() + modelInfo := buildOpenAIModelInfo("gpt-5.2") + + tests := []struct { + name string + config thinking.ThinkingConfig + }{ + {"mode auto", thinking.ThinkingConfig{Mode: thinking.ModeAuto}}, + {"mode budget", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + body := []byte(`{"model":"gpt-5.2"}`) + result, err := applier.Apply(body, tt.config, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + if string(result) != string(body) { + t.Fatalf("Apply() result = %s, want %s", string(result), string(body)) + } + }) + } +} + +// TestApplyInvalidBody tests Apply with invalid body input. +func TestApplyInvalidBody(t *testing.T) { + applier := NewApplier() + modelInfo := buildOpenAIModelInfo("gpt-5.2") + + tests := []struct { + name string + body []byte + }{ + {"nil body", nil}, + {"empty body", []byte{}}, + {"invalid json", []byte(`{"not json"`)}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := applier.Apply(tt.body, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + if !gjson.ValidBytes(result) { + t.Fatalf("Apply() result is not valid JSON: %s", string(result)) + } + if got := gjson.GetBytes(result, "reasoning_effort").String(); got != "high" { + t.Fatalf("reasoning_effort = %q, want %q", got, "high") + } + }) + } +} + +// TestApplyPreservesFields tests that existing body fields are preserved. +func TestApplyPreservesFields(t *testing.T) { + applier := NewApplier() + modelInfo := buildOpenAIModelInfo("gpt-5.2") + + body := []byte(`{"model":"gpt-5.2","messages":[]}`) + result, err := applier.Apply(body, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + if got := gjson.GetBytes(result, "model").String(); got != "gpt-5.2" { + t.Fatalf("model = %q, want %q", got, "gpt-5.2") + } + if !gjson.GetBytes(result, "messages").Exists() { + t.Fatalf("messages missing from result: %s", string(result)) + } + if got := gjson.GetBytes(result, "reasoning_effort").String(); got != "low" { + t.Fatalf("reasoning_effort = %q, want %q", got, "low") + } +} + +// TestHasLevel tests the hasLevel helper function. +func TestHasLevel(t *testing.T) { + tests := []struct { + name string + levels []string + target string + want bool + }{ + {"exact match", []string{"low", "medium", "high"}, "medium", true}, + {"case insensitive", []string{"low", "medium", "high"}, "MEDIUM", true}, + {"with spaces", []string{"low", " medium ", "high"}, "medium", true}, + {"not found", []string{"low", "medium", "high"}, "xhigh", false}, + {"empty levels", []string{}, "medium", false}, + {"none level", []string{"none", "low", "medium"}, "none", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := hasLevel(tt.levels, tt.target); got != tt.want { + t.Fatalf("hasLevel(%v, %q) = %v, want %v", tt.levels, tt.target, got, tt.want) + } + }) + } +} + +// --- End-to-End Tests (ValidateConfig → Apply) --- + +// TestE2EApply tests the full flow: ValidateConfig → Apply. +func TestE2EApply(t *testing.T) { + tests := []struct { + name string + model string + config thinking.ThinkingConfig + want string + }{ + {"level high", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "high"}, + {"level medium", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "medium"}, + {"level low", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelLow}, "low"}, + {"level xhigh", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelXHigh}, "xhigh"}, + {"mode none", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, "none"}, + {"budget to level", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, "medium"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + modelInfo := buildOpenAIModelInfo(tt.model) + normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking) + if err != nil { + t.Fatalf("ValidateConfig() error = %v", err) + } + + applier := NewApplier() + result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.want { + t.Fatalf("reasoning_effort = %q, want %q", got, tt.want) + } + }) + } +} + +// TestE2EApplyOutputFormat tests the full flow with exact JSON output verification. +func TestE2EApplyOutputFormat(t *testing.T) { + tests := []struct { + name string + model string + config thinking.ThinkingConfig + wantJSON string + }{ + {"level high", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, `{"reasoning_effort":"high"}`}, + {"level none", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeNone, Budget: 0}, `{"reasoning_effort":"none"}`}, + {"budget converted", "gpt-5.2", thinking.ThinkingConfig{Mode: thinking.ModeBudget, Budget: 8192}, `{"reasoning_effort":"medium"}`}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + modelInfo := buildOpenAIModelInfo(tt.model) + normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking) + if err != nil { + t.Fatalf("ValidateConfig() error = %v", err) + } + + applier := NewApplier() + result, err := applier.Apply([]byte(`{}`), *normalized, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + if string(result) != tt.wantJSON { + t.Fatalf("Apply() result = %s, want %s", string(result), tt.wantJSON) + } + }) + } +} + +// TestE2EApplyWithExistingBody tests the full flow with existing body fields. +func TestE2EApplyWithExistingBody(t *testing.T) { + tests := []struct { + name string + body string + config thinking.ThinkingConfig + wantEffort string + wantModel string + }{ + {"empty body", `{}`, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelHigh}, "high", ""}, + {"preserve fields", `{"model":"gpt-5.2","messages":[]}`, thinking.ThinkingConfig{Mode: thinking.ModeLevel, Level: thinking.LevelMedium}, "medium", "gpt-5.2"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + modelInfo := buildOpenAIModelInfo("gpt-5.2") + normalized, err := thinking.ValidateConfig(tt.config, modelInfo.Thinking) + if err != nil { + t.Fatalf("ValidateConfig() error = %v", err) + } + + applier := NewApplier() + result, err := applier.Apply([]byte(tt.body), *normalized, modelInfo) + if err != nil { + t.Fatalf("Apply() error = %v", err) + } + if got := gjson.GetBytes(result, "reasoning_effort").String(); got != tt.wantEffort { + t.Fatalf("reasoning_effort = %q, want %q", got, tt.wantEffort) + } + if tt.wantModel != "" { + if got := gjson.GetBytes(result, "model").String(); got != tt.wantModel { + t.Fatalf("model = %q, want %q", got, tt.wantModel) + } + } + }) + } +} diff --git a/internal/thinking/provider_map_test.go b/internal/thinking/provider_map_test.go new file mode 100644 index 00000000..0944c246 --- /dev/null +++ b/internal/thinking/provider_map_test.go @@ -0,0 +1,51 @@ +// Package thinking_test provides external tests for the thinking package. +// +// This file uses package thinking_test (external) to allow importing provider +// subpackages, which triggers their init() functions to register appliers. +// This avoids import cycles that would occur if thinking package imported providers directly. +package thinking_test + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + + // Blank imports to trigger provider init() registration + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai" +) + +func TestProviderAppliersBasic(t *testing.T) { + tests := []struct { + name string + provider string + wantNil bool + }{ + {"gemini provider", "gemini", false}, + {"gemini-cli provider", "gemini-cli", false}, + {"claude provider", "claude", false}, + {"openai provider", "openai", false}, + {"iflow provider", "iflow", false}, + {"antigravity provider", "antigravity", false}, + {"unknown provider", "unknown", true}, + {"empty provider", "", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := thinking.GetProviderApplier(tt.provider) + if tt.wantNil { + if got != nil { + t.Fatalf("GetProviderApplier(%q) = %T, want nil", tt.provider, got) + } + return + } + if got == nil { + t.Fatalf("GetProviderApplier(%q) = nil, want non-nil", tt.provider) + } + }) + } +} diff --git a/internal/thinking/strip.go b/internal/thinking/strip.go new file mode 100644 index 00000000..4904d4d5 --- /dev/null +++ b/internal/thinking/strip.go @@ -0,0 +1,54 @@ +// Package thinking provides unified thinking configuration processing. +package thinking + +import ( + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// StripThinkingConfig removes thinking configuration fields from request body. +// +// This function is used when a model doesn't support thinking but the request +// contains thinking configuration. The configuration is silently removed to +// prevent upstream API errors. +// +// Parameters: +// - body: Original request body JSON +// - provider: Provider name (determines which fields to strip) +// +// Returns: +// - Modified request body JSON with thinking configuration removed +// - Original body is returned unchanged if: +// - body is empty or invalid JSON +// - provider is unknown +// - no thinking configuration found +func StripThinkingConfig(body []byte, provider string) []byte { + if len(body) == 0 || !gjson.ValidBytes(body) { + return body + } + + switch provider { + case "claude": + result, _ := sjson.DeleteBytes(body, "thinking") + return result + case "gemini": + result, _ := sjson.DeleteBytes(body, "generationConfig.thinkingConfig") + return result + case "gemini-cli", "antigravity": + result, _ := sjson.DeleteBytes(body, "request.generationConfig.thinkingConfig") + return result + case "openai": + result, _ := sjson.DeleteBytes(body, "reasoning_effort") + return result + case "codex": + result, _ := sjson.DeleteBytes(body, "reasoning.effort") + return result + case "iflow": + result, _ := sjson.DeleteBytes(body, "chat_template_kwargs.enable_thinking") + result, _ = sjson.DeleteBytes(result, "chat_template_kwargs.clear_thinking") + result, _ = sjson.DeleteBytes(result, "reasoning_split") + return result + default: + return body + } +} diff --git a/internal/thinking/strip_test.go b/internal/thinking/strip_test.go new file mode 100644 index 00000000..edd6bd1a --- /dev/null +++ b/internal/thinking/strip_test.go @@ -0,0 +1,66 @@ +// Package thinking_test provides tests for thinking config stripping. +package thinking_test + +import ( + "testing" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + "github.com/tidwall/gjson" +) + +func TestStripThinkingConfig(t *testing.T) { + tests := []struct { + name string + body string + provider string + stripped []string + preserved []string + }{ + {"claude thinking", `{"thinking":{"budget_tokens":8192},"model":"claude-3"}`, "claude", []string{"thinking"}, []string{"model"}}, + {"gemini thinkingConfig", `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192},"temperature":0.7}}`, "gemini", []string{"generationConfig.thinkingConfig"}, []string{"generationConfig.temperature"}}, + {"gemini-cli thinkingConfig", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":8192},"temperature":0.7}}}`, "gemini-cli", []string{"request.generationConfig.thinkingConfig"}, []string{"request.generationConfig.temperature"}}, + {"antigravity thinkingConfig", `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":4096},"maxTokens":1024}}}`, "antigravity", []string{"request.generationConfig.thinkingConfig"}, []string{"request.generationConfig.maxTokens"}}, + {"openai reasoning_effort", `{"reasoning_effort":"high","model":"gpt-5"}`, "openai", []string{"reasoning_effort"}, []string{"model"}}, + {"iflow glm", `{"chat_template_kwargs":{"enable_thinking":true,"clear_thinking":false,"other":"value"}}`, "iflow", []string{"chat_template_kwargs.enable_thinking", "chat_template_kwargs.clear_thinking"}, []string{"chat_template_kwargs.other"}}, + {"iflow minimax", `{"reasoning_split":true,"model":"minimax"}`, "iflow", []string{"reasoning_split"}, []string{"model"}}, + {"iflow both formats", `{"chat_template_kwargs":{"enable_thinking":true,"clear_thinking":false},"reasoning_split":true,"model":"mixed"}`, "iflow", []string{"chat_template_kwargs.enable_thinking", "chat_template_kwargs.clear_thinking", "reasoning_split"}, []string{"model"}}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := thinking.StripThinkingConfig([]byte(tt.body), tt.provider) + + for _, path := range tt.stripped { + if gjson.GetBytes(got, path).Exists() { + t.Fatalf("expected %s to be stripped, got %s", path, string(got)) + } + } + for _, path := range tt.preserved { + if !gjson.GetBytes(got, path).Exists() { + t.Fatalf("expected %s to be preserved, got %s", path, string(got)) + } + } + }) + } +} + +func TestStripThinkingConfigPassthrough(t *testing.T) { + tests := []struct { + name string + body string + provider string + }{ + {"empty body", ``, "claude"}, + {"invalid json", `{not valid`, "claude"}, + {"unknown provider", `{"thinking":{"budget_tokens":8192}}`, "unknown"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := thinking.StripThinkingConfig([]byte(tt.body), tt.provider) + if string(got) != tt.body { + t.Fatalf("StripThinkingConfig() = %s, want passthrough %s", string(got), tt.body) + } + }) + } +} diff --git a/internal/thinking/suffix.go b/internal/thinking/suffix.go new file mode 100644 index 00000000..e3b4087e --- /dev/null +++ b/internal/thinking/suffix.go @@ -0,0 +1,170 @@ +// Package thinking provides unified thinking configuration processing. +// +// This file implements suffix parsing functionality for extracting +// thinking configuration from model names in the format model(value). +package thinking + +import ( + "fmt" + "strconv" + "strings" +) + +// ParseSuffix extracts thinking suffix from a model name. +// +// The suffix format is: model-name(value) +// Examples: +// - "claude-sonnet-4-5(16384)" -> ModelName="claude-sonnet-4-5", RawSuffix="16384" +// - "gpt-5.2(high)" -> ModelName="gpt-5.2", RawSuffix="high" +// - "gemini-2.5-pro" -> ModelName="gemini-2.5-pro", HasSuffix=false +// +// This function only extracts the suffix; it does not validate or interpret +// the suffix content. Use ParseNumericSuffix, ParseLevelSuffix, etc. for +// content interpretation. +func ParseSuffix(model string) SuffixResult { + // Find the last opening parenthesis + lastOpen := strings.LastIndex(model, "(") + if lastOpen == -1 { + return SuffixResult{ModelName: model, HasSuffix: false} + } + + // Check if the string ends with a closing parenthesis + if !strings.HasSuffix(model, ")") { + return SuffixResult{ModelName: model, HasSuffix: false} + } + + // Extract components + modelName := model[:lastOpen] + rawSuffix := model[lastOpen+1 : len(model)-1] + + return SuffixResult{ + ModelName: modelName, + HasSuffix: true, + RawSuffix: rawSuffix, + } +} + +// ParseSuffixWithError extracts thinking suffix and returns an error on invalid format. +// +// Invalid format cases: +// - Contains "(" but does not end with ")" +// - Contains ")" without any "(" +// +// The error message includes the original input for debugging context. +func ParseSuffixWithError(model string) (SuffixResult, error) { + lastOpen := strings.LastIndex(model, "(") + if lastOpen == -1 { + if strings.Contains(model, ")") { + return SuffixResult{ModelName: model, HasSuffix: false}, NewThinkingError(ErrInvalidSuffix, fmt.Sprintf("invalid suffix format: %s", model)) + } + return SuffixResult{ModelName: model, HasSuffix: false}, nil + } + + if !strings.HasSuffix(model, ")") { + return SuffixResult{ModelName: model, HasSuffix: false}, NewThinkingError(ErrInvalidSuffix, fmt.Sprintf("invalid suffix format: %s", model)) + } + + return ParseSuffix(model), nil +} + +// ParseNumericSuffix attempts to parse a raw suffix as a numeric budget value. +// +// This function parses the raw suffix content (from ParseSuffix.RawSuffix) as an integer. +// Only non-negative integers are considered valid numeric suffixes. +// +// Platform note: The budget value uses Go's int type, which is 32-bit on 32-bit +// systems and 64-bit on 64-bit systems. Values exceeding the platform's int range +// will return ok=false. +// +// Leading zeros are accepted: "08192" parses as 8192. +// +// Examples: +// - "8192" -> budget=8192, ok=true +// - "0" -> budget=0, ok=true (represents ModeNone) +// - "08192" -> budget=8192, ok=true (leading zeros accepted) +// - "-1" -> budget=0, ok=false (negative numbers are not valid numeric suffixes) +// - "high" -> budget=0, ok=false (not a number) +// - "9223372036854775808" -> budget=0, ok=false (overflow on 64-bit systems) +// +// For special handling of -1 as auto mode, use ParseSpecialSuffix instead. +func ParseNumericSuffix(rawSuffix string) (budget int, ok bool) { + if rawSuffix == "" { + return 0, false + } + + value, err := strconv.Atoi(rawSuffix) + if err != nil { + return 0, false + } + + // Negative numbers are not valid numeric suffixes + // -1 should be handled by special value parsing as "auto" + if value < 0 { + return 0, false + } + + return value, true +} + +// ParseSpecialSuffix attempts to parse a raw suffix as a special thinking mode value. +// +// This function handles special strings that represent a change in thinking mode: +// - "none" -> ModeNone (disables thinking) +// - "auto" -> ModeAuto (automatic/dynamic thinking) +// - "-1" -> ModeAuto (numeric representation of auto mode) +// +// String values are case-insensitive. +func ParseSpecialSuffix(rawSuffix string) (mode ThinkingMode, ok bool) { + if rawSuffix == "" { + return ModeBudget, false + } + + // Case-insensitive matching + switch strings.ToLower(rawSuffix) { + case "none": + return ModeNone, true + case "auto", "-1": + return ModeAuto, true + default: + return ModeBudget, false + } +} + +// ParseLevelSuffix attempts to parse a raw suffix as a discrete thinking level. +// +// This function parses the raw suffix content (from ParseSuffix.RawSuffix) as a level. +// Only discrete effort levels are valid: minimal, low, medium, high, xhigh. +// Level matching is case-insensitive. +// +// Special values (none, auto) are NOT handled by this function; use ParseSpecialSuffix +// instead. This separation allows callers to prioritize special value handling. +// +// Examples: +// - "high" -> level=LevelHigh, ok=true +// - "HIGH" -> level=LevelHigh, ok=true (case insensitive) +// - "medium" -> level=LevelMedium, ok=true +// - "none" -> level="", ok=false (special value, use ParseSpecialSuffix) +// - "auto" -> level="", ok=false (special value, use ParseSpecialSuffix) +// - "8192" -> level="", ok=false (numeric, use ParseNumericSuffix) +// - "ultra" -> level="", ok=false (unknown level) +func ParseLevelSuffix(rawSuffix string) (level ThinkingLevel, ok bool) { + if rawSuffix == "" { + return "", false + } + + // Case-insensitive matching + switch strings.ToLower(rawSuffix) { + case "minimal": + return LevelMinimal, true + case "low": + return LevelLow, true + case "medium": + return LevelMedium, true + case "high": + return LevelHigh, true + case "xhigh": + return LevelXHigh, true + default: + return "", false + } +} diff --git a/internal/thinking/suffix_test.go b/internal/thinking/suffix_test.go new file mode 100644 index 00000000..b3ea3ed3 --- /dev/null +++ b/internal/thinking/suffix_test.go @@ -0,0 +1,313 @@ +// Package thinking provides unified thinking configuration processing. +package thinking + +import ( + "strings" + "testing" +) + +// TestParseSuffix tests the ParseSuffix function. +// +// ParseSuffix extracts thinking suffix from model name. +// Format: model-name(value) where value is the raw suffix content. +// This function only extracts; interpretation is done by other functions. +func TestParseSuffix(t *testing.T) { + tests := []struct { + name string + model string + wantModel string + wantSuffix bool + wantRaw string + }{ + {"no suffix", "claude-sonnet-4-5", "claude-sonnet-4-5", false, ""}, + {"numeric suffix", "model(1000)", "model", true, "1000"}, + {"level suffix", "gpt-5(high)", "gpt-5", true, "high"}, + {"auto suffix", "gemini-2.5-pro(auto)", "gemini-2.5-pro", true, "auto"}, + {"none suffix", "model(none)", "model", true, "none"}, + {"complex model name", "gemini-2.5-flash-lite(8192)", "gemini-2.5-flash-lite", true, "8192"}, + {"alias with suffix", "g25p(1000)", "g25p", true, "1000"}, + {"empty suffix", "model()", "model", true, ""}, + {"nested parens", "model(a(b))", "model(a", true, "b)"}, + {"no model name", "(1000)", "", true, "1000"}, + {"unmatched open", "model(", "model(", false, ""}, + {"unmatched close", "model)", "model)", false, ""}, + {"paren not at end", "model(1000)extra", "model(1000)extra", false, ""}, + {"empty string", "", "", false, ""}, + {"large budget", "claude-opus(128000)", "claude-opus", true, "128000"}, + {"xhigh level", "gpt-5.2(xhigh)", "gpt-5.2", true, "xhigh"}, + {"minimal level", "model(minimal)", "model", true, "minimal"}, + {"medium level", "model(medium)", "model", true, "medium"}, + {"low level", "model(low)", "model", true, "low"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ParseSuffix(tt.model) + if got.ModelName != tt.wantModel { + t.Errorf("ModelName = %q, want %q", got.ModelName, tt.wantModel) + } + if got.HasSuffix != tt.wantSuffix { + t.Errorf("HasSuffix = %v, want %v", got.HasSuffix, tt.wantSuffix) + } + if got.RawSuffix != tt.wantRaw { + t.Errorf("RawSuffix = %q, want %q", got.RawSuffix, tt.wantRaw) + } + }) + } +} + +// TestParseSuffixWithError tests invalid suffix error reporting. +func TestParseSuffixWithError(t *testing.T) { + tests := []struct { + name string + model string + wantHasSuffix bool + }{ + {"missing close paren", "model(abc", false}, + {"unmatched close paren", "model)", false}, + {"paren not at end", "model(1000)extra", false}, + {"no suffix", "gpt-5", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ParseSuffixWithError(tt.model) + if tt.name == "no suffix" { + if err != nil { + t.Fatalf("ParseSuffixWithError(%q) error = %v, want nil", tt.model, err) + } + if got.HasSuffix != tt.wantHasSuffix { + t.Errorf("HasSuffix = %v, want %v", got.HasSuffix, tt.wantHasSuffix) + } + return + } + + if err == nil { + t.Fatalf("ParseSuffixWithError(%q) error = nil, want error", tt.model) + } + thinkingErr, ok := err.(*ThinkingError) + if !ok { + t.Fatalf("ParseSuffixWithError(%q) error type = %T, want *ThinkingError", tt.model, err) + } + if thinkingErr.Code != ErrInvalidSuffix { + t.Errorf("error code = %v, want %v", thinkingErr.Code, ErrInvalidSuffix) + } + if !strings.Contains(thinkingErr.Message, tt.model) { + t.Errorf("message %q does not include input %q", thinkingErr.Message, tt.model) + } + if got.HasSuffix != tt.wantHasSuffix { + t.Errorf("HasSuffix = %v, want %v", got.HasSuffix, tt.wantHasSuffix) + } + }) + } +} + +// TestParseSuffixNumeric tests numeric suffix parsing. +// +// ParseNumericSuffix parses raw suffix content as integer budget. +// Only non-negative integers are valid. Negative numbers return ok=false. +func TestParseSuffixNumeric(t *testing.T) { + tests := []struct { + name string + rawSuffix string + wantBudget int + wantOK bool + }{ + {"small budget", "512", 512, true}, + {"standard budget", "8192", 8192, true}, + {"large budget", "100000", 100000, true}, + {"max int32", "2147483647", 2147483647, true}, + {"max int64", "9223372036854775807", 9223372036854775807, true}, + {"zero", "0", 0, true}, + {"negative one", "-1", 0, false}, + {"negative", "-100", 0, false}, + {"int64 overflow", "9223372036854775808", 0, false}, + {"large overflow", "99999999999999999999", 0, false}, + {"not a number", "abc", 0, false}, + {"level string", "high", 0, false}, + {"float", "1.5", 0, false}, + {"empty", "", 0, false}, + {"leading zero", "08192", 8192, true}, + {"whitespace", " 8192 ", 0, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + budget, ok := ParseNumericSuffix(tt.rawSuffix) + if budget != tt.wantBudget { + t.Errorf("budget = %d, want %d", budget, tt.wantBudget) + } + if ok != tt.wantOK { + t.Errorf("ok = %v, want %v", ok, tt.wantOK) + } + }) + } +} + +// TestParseSuffixLevel tests level suffix parsing. +// +// ParseLevelSuffix parses raw suffix content as discrete thinking level. +// Only effort levels (minimal, low, medium, high, xhigh) are valid. +// Special values (none, auto) return ok=false - use ParseSpecialSuffix instead. +func TestParseSuffixLevel(t *testing.T) { + tests := []struct { + name string + rawSuffix string + wantLevel ThinkingLevel + wantOK bool + }{ + {"minimal", "minimal", LevelMinimal, true}, + {"low", "low", LevelLow, true}, + {"medium", "medium", LevelMedium, true}, + {"high", "high", LevelHigh, true}, + {"xhigh", "xhigh", LevelXHigh, true}, + {"case HIGH", "HIGH", LevelHigh, true}, + {"case High", "High", LevelHigh, true}, + {"case hIgH", "hIgH", LevelHigh, true}, + {"case MINIMAL", "MINIMAL", LevelMinimal, true}, + {"case XHigh", "XHigh", LevelXHigh, true}, + {"none special", "none", "", false}, + {"auto special", "auto", "", false}, + {"unknown ultra", "ultra", "", false}, + {"unknown maximum", "maximum", "", false}, + {"unknown invalid", "invalid", "", false}, + {"numeric", "8192", "", false}, + {"numeric zero", "0", "", false}, + {"empty", "", "", false}, + {"whitespace", " high ", "", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + level, ok := ParseLevelSuffix(tt.rawSuffix) + if level != tt.wantLevel { + t.Errorf("level = %q, want %q", level, tt.wantLevel) + } + if ok != tt.wantOK { + t.Errorf("ok = %v, want %v", ok, tt.wantOK) + } + }) + } +} + +// TestParseSuffixSpecialValues tests special value suffix parsing. +// +// Depends on: Epic 3 Story 3-4 (special value suffix parsing) +func TestParseSuffixSpecialValues(t *testing.T) { + tests := []struct { + name string + rawSuffix string + wantMode ThinkingMode + wantOK bool + }{ + {"none", "none", ModeNone, true}, + {"auto", "auto", ModeAuto, true}, + {"negative one", "-1", ModeAuto, true}, + {"case NONE", "NONE", ModeNone, true}, + {"case Auto", "Auto", ModeAuto, true}, + {"case aUtO", "aUtO", ModeAuto, true}, + {"case NoNe", "NoNe", ModeNone, true}, + {"empty", "", ModeBudget, false}, + {"level high", "high", ModeBudget, false}, + {"numeric", "8192", ModeBudget, false}, + {"negative other", "-2", ModeBudget, false}, + {"whitespace", " none ", ModeBudget, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mode, ok := ParseSpecialSuffix(tt.rawSuffix) + if mode != tt.wantMode { + t.Errorf("mode = %q, want %q", mode, tt.wantMode) + } + if ok != tt.wantOK { + t.Errorf("ok = %v, want %v", ok, tt.wantOK) + } + }) + } +} + +// TestParseSuffixAliasFormats tests alias model suffix parsing. +// +// This test validates that short model aliases (e.g., g25p, cs45) work correctly +// with all suffix types. Alias-to-canonical-model mapping is caller's responsibility. +func TestParseSuffixAliasFormats(t *testing.T) { + tests := []struct { + name string // test case description + model string // input model string with optional suffix + wantName string // expected ModelName after parsing + wantSuffix bool // expected HasSuffix value + wantRaw string // expected RawSuffix value + checkBudget bool // if true, verify ParseNumericSuffix result + wantBudget int // expected budget (only when checkBudget=true) + checkLevel bool // if true, verify ParseLevelSuffix result + wantLevel ThinkingLevel // expected level (only when checkLevel=true) + checkMode bool // if true, verify ParseSpecialSuffix result + wantMode ThinkingMode // expected mode (only when checkMode=true) + }{ + // Alias + numeric suffix + {"alias numeric g25p", "g25p(1000)", "g25p", true, "1000", true, 1000, false, "", false, 0}, + {"alias numeric cs45", "cs45(16384)", "cs45", true, "16384", true, 16384, false, "", false, 0}, + {"alias numeric g3f", "g3f(8192)", "g3f", true, "8192", true, 8192, false, "", false, 0}, + // Alias + level suffix + {"alias level gpt52", "gpt52(high)", "gpt52", true, "high", false, 0, true, LevelHigh, false, 0}, + {"alias level g25f", "g25f(medium)", "g25f", true, "medium", false, 0, true, LevelMedium, false, 0}, + {"alias level cs4", "cs4(low)", "cs4", true, "low", false, 0, true, LevelLow, false, 0}, + // Alias + special suffix + {"alias auto g3f", "g3f(auto)", "g3f", true, "auto", false, 0, false, "", true, ModeAuto}, + {"alias none claude", "claude(none)", "claude", true, "none", false, 0, false, "", true, ModeNone}, + {"alias -1 g25p", "g25p(-1)", "g25p", true, "-1", false, 0, false, "", true, ModeAuto}, + // Single char alias + {"single char c", "c(1024)", "c", true, "1024", true, 1024, false, "", false, 0}, + {"single char g", "g(high)", "g", true, "high", false, 0, true, LevelHigh, false, 0}, + // Alias containing numbers + {"alias with num gpt5", "gpt5(medium)", "gpt5", true, "medium", false, 0, true, LevelMedium, false, 0}, + {"alias with num g25", "g25(1000)", "g25", true, "1000", true, 1000, false, "", false, 0}, + // Edge cases + {"no suffix", "g25p", "g25p", false, "", false, 0, false, "", false, 0}, + {"empty alias", "(1000)", "", true, "1000", true, 1000, false, "", false, 0}, + {"hyphen alias", "g-25-p(1000)", "g-25-p", true, "1000", true, 1000, false, "", false, 0}, + {"underscore alias", "g_25_p(high)", "g_25_p", true, "high", false, 0, true, LevelHigh, false, 0}, + {"nested parens", "g25p(test)(1000)", "g25p(test)", true, "1000", true, 1000, false, "", false, 0}, + } + + // ParseSuffix only extracts alias and suffix; mapping to canonical model is caller responsibility. + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ParseSuffix(tt.model) + + if result.ModelName != tt.wantName { + t.Errorf("ParseSuffix(%q).ModelName = %q, want %q", tt.model, result.ModelName, tt.wantName) + } + if result.HasSuffix != tt.wantSuffix { + t.Errorf("ParseSuffix(%q).HasSuffix = %v, want %v", tt.model, result.HasSuffix, tt.wantSuffix) + } + if result.RawSuffix != tt.wantRaw { + t.Errorf("ParseSuffix(%q).RawSuffix = %q, want %q", tt.model, result.RawSuffix, tt.wantRaw) + } + + if result.HasSuffix { + if tt.checkBudget { + budget, ok := ParseNumericSuffix(result.RawSuffix) + if !ok || budget != tt.wantBudget { + t.Errorf("ParseNumericSuffix(%q) = (%d, %v), want (%d, true)", + result.RawSuffix, budget, ok, tt.wantBudget) + } + } + if tt.checkLevel { + level, ok := ParseLevelSuffix(result.RawSuffix) + if !ok || level != tt.wantLevel { + t.Errorf("ParseLevelSuffix(%q) = (%q, %v), want (%q, true)", + result.RawSuffix, level, ok, tt.wantLevel) + } + } + if tt.checkMode { + mode, ok := ParseSpecialSuffix(result.RawSuffix) + if !ok || mode != tt.wantMode { + t.Errorf("ParseSpecialSuffix(%q) = (%v, %v), want (%v, true)", + result.RawSuffix, mode, ok, tt.wantMode) + } + } + } + }) + } +} diff --git a/internal/thinking/types.go b/internal/thinking/types.go new file mode 100644 index 00000000..7197fa6e --- /dev/null +++ b/internal/thinking/types.go @@ -0,0 +1,100 @@ +// Package thinking provides unified thinking configuration processing. +// +// This package offers a unified interface for parsing, validating, and applying +// thinking configurations across various AI providers (Claude, Gemini, OpenAI, iFlow). +package thinking + +import "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + +// ThinkingMode represents the type of thinking configuration mode. +type ThinkingMode int + +const ( + // ModeBudget indicates using a numeric budget (corresponds to suffix "(1000)" etc.) + ModeBudget ThinkingMode = iota + // ModeLevel indicates using a discrete level (corresponds to suffix "(high)" etc.) + ModeLevel + // ModeNone indicates thinking is disabled (corresponds to suffix "(none)" or budget=0) + ModeNone + // ModeAuto indicates automatic/dynamic thinking (corresponds to suffix "(auto)" or budget=-1) + ModeAuto +) + +// ThinkingLevel represents a discrete thinking level. +type ThinkingLevel string + +const ( + // LevelNone disables thinking + LevelNone ThinkingLevel = "none" + // LevelAuto enables automatic/dynamic thinking + LevelAuto ThinkingLevel = "auto" + // LevelMinimal sets minimal thinking effort + LevelMinimal ThinkingLevel = "minimal" + // LevelLow sets low thinking effort + LevelLow ThinkingLevel = "low" + // LevelMedium sets medium thinking effort + LevelMedium ThinkingLevel = "medium" + // LevelHigh sets high thinking effort + LevelHigh ThinkingLevel = "high" + // LevelXHigh sets extra-high thinking effort + LevelXHigh ThinkingLevel = "xhigh" +) + +// ThinkingConfig represents a unified thinking configuration. +// +// This struct is used to pass thinking configuration information between components. +// Depending on Mode, either Budget or Level field is effective: +// - ModeNone: Budget=0, Level is ignored +// - ModeAuto: Budget=-1, Level is ignored +// - ModeBudget: Budget is a positive integer, Level is ignored +// - ModeLevel: Budget is ignored, Level is a valid level +type ThinkingConfig struct { + // Mode specifies the configuration mode + Mode ThinkingMode + // Budget is the thinking budget (token count), only effective when Mode is ModeBudget. + // Special values: 0 means disabled, -1 means automatic + Budget int + // Level is the thinking level, only effective when Mode is ModeLevel + Level ThinkingLevel +} + +// SuffixResult represents the result of parsing a model name for thinking suffix. +// +// A thinking suffix is specified in the format model-name(value), where value +// can be a numeric budget (e.g., "16384") or a level name (e.g., "high"). +type SuffixResult struct { + // ModelName is the model name with the suffix removed. + // If no suffix was found, this equals the original input. + ModelName string + + // HasSuffix indicates whether a valid suffix was found. + HasSuffix bool + + // RawSuffix is the content inside the parentheses, without the parentheses. + // Empty string if HasSuffix is false. + RawSuffix string +} + +// ProviderApplier defines the interface for provider-specific thinking configuration application. +// +// Types implementing this interface are responsible for converting a unified ThinkingConfig +// into provider-specific format and applying it to the request body. +// +// Implementation requirements: +// - Apply method must be idempotent +// - Must not modify the input config or modelInfo +// - Returns a modified copy of the request body +// - Returns appropriate ThinkingError for unsupported configurations +type ProviderApplier interface { + // Apply applies the thinking configuration to the request body. + // + // Parameters: + // - body: Original request body JSON + // - config: Unified thinking configuration + // - modelInfo: Model registry information containing ThinkingSupport properties + // + // Returns: + // - Modified request body JSON + // - ThinkingError if the configuration is invalid or unsupported + Apply(body []byte, config ThinkingConfig, modelInfo *registry.ModelInfo) ([]byte, error) +} diff --git a/internal/thinking/validate.go b/internal/thinking/validate.go new file mode 100644 index 00000000..66f8160c --- /dev/null +++ b/internal/thinking/validate.go @@ -0,0 +1,260 @@ +// Package thinking provides unified thinking configuration processing logic. +package thinking + +import ( + "fmt" + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + log "github.com/sirupsen/logrus" +) + +// ClampBudget clamps a budget value to the specified range [min, max]. +// +// This function ensures budget values stay within model-supported bounds. +// When clamping occurs, a Debug-level log is recorded. +// +// Special handling: +// - Auto value (-1) passes through without clamping +// - Values below min are clamped to min +// - Values above max are clamped to max +// +// Parameters: +// - value: The budget value to clamp +// - min: Minimum allowed budget (inclusive) +// - max: Maximum allowed budget (inclusive) +// +// Returns: +// - The clamped budget value (min ≤ result ≤ max, or -1 for auto) +// +// Logging: +// - Debug level when value is clamped (either to min or max) +// - Fields: original_value, clamped_to, min, max +func ClampBudget(value, min, max int) int { + // Auto value (-1) passes through without clamping + if value == -1 { + return value + } + + // Clamp to min if below + if value < min { + logClamp(value, min, min, max) + return min + } + + // Clamp to max if above + if value > max { + logClamp(value, max, min, max) + return max + } + + // Within range, return original + return value +} + +// ClampBudgetWithZeroCheck clamps a budget value to the specified range [min, max] +// while honoring the ZeroAllowed constraint. +// +// This function extends ClampBudget with ZeroAllowed boundary handling. +// When zeroAllowed is false and value is 0, the value is clamped to min and logged. +// +// Parameters: +// - value: The budget value to clamp +// - min: Minimum allowed budget (inclusive) +// - max: Maximum allowed budget (inclusive) +// - zeroAllowed: Whether 0 (thinking disabled) is allowed +// +// Returns: +// - The clamped budget value (min ≤ result ≤ max, or -1 for auto) +// +// Logging: +// - Warn level when zeroAllowed=false and value=0 (zero not allowed for model) +// - Fields: original_value, clamped_to, reason +func ClampBudgetWithZeroCheck(value, min, max int, zeroAllowed bool) int { + if value == 0 { + if zeroAllowed { + return 0 + } + log.WithFields(log.Fields{ + "original_value": value, + "clamped_to": min, + "min": min, + "max": max, + "reason": "zero_not_allowed", + }).Warn("budget clamped: zero not allowed") + return min + } + + return ClampBudget(value, min, max) +} + +// ValidateConfig validates a thinking configuration against model capabilities. +// +// This function performs comprehensive validation: +// - Checks if the model supports thinking +// - Auto-converts between Budget and Level formats based on model capability +// - Validates that requested level is in the model's supported levels list +// - Clamps budget values to model's allowed range +// +// Parameters: +// - config: The thinking configuration to validate +// - support: Model's ThinkingSupport properties (nil means no thinking support) +// +// Returns: +// - Normalized ThinkingConfig with clamped values +// - ThinkingError if validation fails (ErrThinkingNotSupported, ErrLevelNotSupported, etc.) +// +// Auto-conversion behavior: +// - Budget-only model + Level config → Level converted to Budget +// - Level-only model + Budget config → Budget converted to Level +// - Hybrid model → preserve original format +func ValidateConfig(config ThinkingConfig, support *registry.ThinkingSupport) (*ThinkingConfig, error) { + normalized := config + if support == nil { + if config.Mode != ModeNone { + return nil, NewThinkingErrorWithModel(ErrThinkingNotSupported, "thinking not supported for this model", "unknown") + } + return &normalized, nil + } + + capability := detectModelCapability(®istry.ModelInfo{Thinking: support}) + switch capability { + case CapabilityBudgetOnly: + if normalized.Mode == ModeLevel { + if normalized.Level == LevelAuto { + break + } + budget, ok := ConvertLevelToBudget(string(normalized.Level)) + if !ok { + return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("unknown level: %s", normalized.Level)) + } + normalized.Mode = ModeBudget + normalized.Budget = budget + normalized.Level = "" + } + case CapabilityLevelOnly: + if normalized.Mode == ModeBudget { + level, ok := ConvertBudgetToLevel(normalized.Budget) + if !ok { + return nil, NewThinkingError(ErrUnknownLevel, fmt.Sprintf("budget %d cannot be converted to a valid level", normalized.Budget)) + } + normalized.Mode = ModeLevel + normalized.Level = ThinkingLevel(level) + normalized.Budget = 0 + } + case CapabilityHybrid: + } + + if normalized.Mode == ModeLevel && normalized.Level == LevelNone { + normalized.Mode = ModeNone + normalized.Budget = 0 + normalized.Level = "" + } + if normalized.Mode == ModeLevel && normalized.Level == LevelAuto { + normalized.Mode = ModeAuto + normalized.Budget = -1 + normalized.Level = "" + } + if normalized.Mode == ModeBudget && normalized.Budget == 0 { + normalized.Mode = ModeNone + normalized.Level = "" + } + + if len(support.Levels) > 0 && normalized.Mode == ModeLevel { + if !isLevelSupported(string(normalized.Level), support.Levels) { + validLevels := normalizeLevels(support.Levels) + message := fmt.Sprintf("level %q not supported, valid levels: %s", strings.ToLower(string(normalized.Level)), strings.Join(validLevels, ", ")) + return nil, NewThinkingError(ErrLevelNotSupported, message) + } + } + + // Convert ModeAuto to mid-range if dynamic not allowed + if normalized.Mode == ModeAuto && !support.DynamicAllowed { + normalized = convertAutoToMidRange(normalized, support) + } + + switch normalized.Mode { + case ModeBudget, ModeAuto, ModeNone: + clamped := ClampBudgetWithZeroCheck(normalized.Budget, support.Min, support.Max, support.ZeroAllowed) + normalized.Budget = clamped + } + + // ModeNone with clamped Budget > 0: set Level to lowest for Level-only/Hybrid models + // This ensures Apply layer doesn't need to access support.Levels + if normalized.Mode == ModeNone && normalized.Budget > 0 && len(support.Levels) > 0 { + normalized.Level = ThinkingLevel(support.Levels[0]) + } + + return &normalized, nil +} + +func isLevelSupported(level string, supported []string) bool { + for _, candidate := range supported { + if strings.EqualFold(level, strings.TrimSpace(candidate)) { + return true + } + } + return false +} + +func normalizeLevels(levels []string) []string { + normalized := make([]string, 0, len(levels)) + for _, level := range levels { + normalized = append(normalized, strings.ToLower(strings.TrimSpace(level))) + } + return normalized +} + +// convertAutoToMidRange converts ModeAuto to a mid-range value when dynamic is not allowed. +// +// This function handles the case where a model does not support dynamic/auto thinking. +// The auto mode is silently converted to a fixed value based on model capability: +// - Level-only models: convert to ModeLevel with LevelMedium +// - Budget models: convert to ModeBudget with mid = (Min + Max) / 2 +// +// Logging: +// - Debug level when conversion occurs +// - Fields: original_mode, clamped_to, reason +func convertAutoToMidRange(config ThinkingConfig, support *registry.ThinkingSupport) ThinkingConfig { + // For level-only models (has Levels but no Min/Max range), use ModeLevel with medium + if len(support.Levels) > 0 && support.Min == 0 && support.Max == 0 { + config.Mode = ModeLevel + config.Level = LevelMedium + config.Budget = 0 + log.WithFields(log.Fields{ + "original_mode": "auto", + "clamped_to": string(LevelMedium), + "reason": "dynamic_not_allowed_level_only", + }).Debug("thinking mode converted: dynamic not allowed, using medium level") + return config + } + + // For budget models, use mid-range budget + mid := (support.Min + support.Max) / 2 + if mid <= 0 && support.ZeroAllowed { + config.Mode = ModeNone + config.Budget = 0 + } else if mid <= 0 { + config.Mode = ModeBudget + config.Budget = support.Min + } else { + config.Mode = ModeBudget + config.Budget = mid + } + log.WithFields(log.Fields{ + "original_mode": "auto", + "clamped_to": config.Budget, + "reason": "dynamic_not_allowed", + }).Debug("thinking mode converted: dynamic not allowed") + return config +} + +// logClamp logs a debug message when budget clamping occurs. +func logClamp(original, clampedTo, min, max int) { + log.WithFields(log.Fields{ + "original_value": original, + "clamped_to": clampedTo, + "min": min, + "max": max, + }).Debug("budget clamped: value outside model range") +} diff --git a/internal/thinking/validate_test.go b/internal/thinking/validate_test.go new file mode 100644 index 00000000..e17a1586 --- /dev/null +++ b/internal/thinking/validate_test.go @@ -0,0 +1,349 @@ +// Package thinking provides unified thinking configuration processing logic. +package thinking + +import ( + "strings" + "testing" + "unicode" + "unicode/utf8" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + log "github.com/sirupsen/logrus" + logtest "github.com/sirupsen/logrus/hooks/test" +) + +// TestClampBudget tests the ClampBudget function. +// +// ClampBudget applies range constraints to a budget value: +// - budget < Min → clamp to Min (with Debug log) +// - budget > Max → clamp to Max (with Debug log) +// - Auto value (-1) passes through unchanged +func TestClampBudget(t *testing.T) { + tests := []struct { + name string + value int + min int + max int + want int + }{ + // Within range - no clamping + {"within range", 8192, 128, 32768, 8192}, + {"at min", 128, 128, 32768, 128}, + {"at max", 32768, 128, 32768, 32768}, + + // Below min - clamp to min + {"below min", 100, 128, 32768, 128}, + + // Above max - clamp to max + {"above max", 50000, 128, 32768, 32768}, + + // Edge cases + {"min equals max", 5000, 5000, 5000, 5000}, + {"zero min zero value", 0, 0, 100, 0}, + + // Auto value (-1) - passes through + {"auto value", -1, 128, 32768, -1}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ClampBudget(tt.value, tt.min, tt.max) + if got != tt.want { + t.Errorf("ClampBudget(%d, %d, %d) = %d, want %d", + tt.value, tt.min, tt.max, got, tt.want) + } + }) + } +} + +// TestZeroAllowedBoundaryHandling tests ZeroAllowed=false edge cases. +// +// When ZeroAllowed=false and user requests 0, clamp to Min + log Warn. +func TestZeroAllowedBoundaryHandling(t *testing.T) { + tests := []struct { + name string + value int + min int + max int + zeroAllowed bool + want int + }{ + // ZeroAllowed=true: 0 stays 0 + {"zero allowed - keep zero", 0, 128, 32768, true, 0}, + + // ZeroAllowed=false: 0 clamps to min + {"zero not allowed - clamp to min", 0, 128, 32768, false, 128}, + + // ZeroAllowed=false but non-zero value: normal clamping + {"zero not allowed - positive value", 8192, 1024, 100000, false, 8192}, + + // Auto value (-1) always passes through + {"auto value", -1, 128, 32768, false, -1}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ClampBudgetWithZeroCheck(tt.value, tt.min, tt.max, tt.zeroAllowed) + if got != tt.want { + t.Errorf("ClampBudgetWithZeroCheck(%d, %d, %d, %v) = %d, want %d", + tt.value, tt.min, tt.max, tt.zeroAllowed, got, tt.want) + } + }) + } +} + +// TestValidateConfigFramework verifies the ValidateConfig function framework. +// This test is merged into TestValidateConfig for consolidation. + +// TestValidateConfigNotSupported verifies nil support handling. +// This test is merged into TestValidateConfig for consolidation. + +// TestValidateConfigConversion verifies mode conversion based on capability. +// This test is merged into TestValidateConfig for consolidation. + +// TestValidateConfigLevelSupport verifies level list validation. +// This test is merged into TestValidateConfig for consolidation. + +// TestValidateConfigClamping verifies budget clamping behavior. +// This test is merged into TestValidateConfig for consolidation. + +// TestValidateConfig is the comprehensive test for ValidateConfig function. +// +// ValidateConfig checks if a ThinkingConfig is valid for a given model. +// This test covers all validation scenarios including: +// - Framework basics (nil support with ModeNone) +// - Error cases (thinking not supported, level not supported, dynamic not allowed) +// - Mode conversion (budget-only, level-only, hybrid) +// - Budget clamping (to max, to min) +// - ZeroAllowed boundary handling (ModeNone with ZeroAllowed=false) +// - DynamicAllowed validation +// +// Depends on: Epic 5 Story 5-3 (config validity validation) +func TestValidateConfig(t *testing.T) { + tests := []struct { + name string + config ThinkingConfig + support *registry.ThinkingSupport + wantMode ThinkingMode + wantBudget int + wantLevel ThinkingLevel + wantErr bool + wantCode ErrorCode + }{ + // Framework basics + {"nil support mode none", ThinkingConfig{Mode: ModeNone, Budget: 0}, nil, ModeNone, 0, "", false, ""}, + + // Valid configs - no conversion needed + {"budget-only keeps budget", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, ®istry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 8192, "", false, ""}, + + // Auto-conversion: Level → Budget + {"budget-only converts level", ThinkingConfig{Mode: ModeLevel, Level: LevelHigh}, ®istry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 24576, "", false, ""}, + + // Auto-conversion: Budget → Level + {"level-only converts budget", ThinkingConfig{Mode: ModeBudget, Budget: 5000}, ®istry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}, ModeLevel, 0, LevelMedium, false, ""}, + + // Hybrid preserves original format + {"hybrid preserves level", ThinkingConfig{Mode: ModeLevel, Level: LevelLow}, ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}}, ModeLevel, 0, LevelLow, false, ""}, + + // Budget clamping + {"budget clamped to max", ThinkingConfig{Mode: ModeBudget, Budget: 200000}, ®istry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 100000, "", false, ""}, + {"budget clamped to min", ThinkingConfig{Mode: ModeBudget, Budget: 100}, ®istry.ThinkingSupport{Min: 1024, Max: 100000}, ModeBudget, 1024, "", false, ""}, + + // Error: thinking not supported + {"thinking not supported", ThinkingConfig{Mode: ModeBudget, Budget: 8192}, nil, 0, 0, "", true, ErrThinkingNotSupported}, + + // Error: level not in list + {"level not supported", ThinkingConfig{Mode: ModeLevel, Level: LevelXHigh}, ®istry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}, 0, 0, "", true, ErrLevelNotSupported}, + + // Level case-insensitive + {"level supported case-insensitive", ThinkingConfig{Mode: ModeLevel, Level: ThinkingLevel("HIGH")}, ®istry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}, ModeLevel, 0, ThinkingLevel("HIGH"), false, ""}, + + // ModeAuto with DynamicAllowed + {"auto with dynamic allowed", ThinkingConfig{Mode: ModeAuto, Budget: -1}, ®istry.ThinkingSupport{Min: 128, Max: 32768, DynamicAllowed: true}, ModeAuto, -1, "", false, ""}, + + // ModeAuto with DynamicAllowed=false - converts to mid-range (M3) + {"auto with dynamic not allowed", ThinkingConfig{Mode: ModeAuto, Budget: -1}, ®istry.ThinkingSupport{Min: 128, Max: 32768, DynamicAllowed: false}, ModeBudget, 16448, "", false, ""}, + + // ModeNone with ZeroAllowed=true - stays as ModeNone + {"mode none with zero allowed", ThinkingConfig{Mode: ModeNone, Budget: 0}, ®istry.ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: true}, ModeNone, 0, "", false, ""}, + + // Budget=0 converts to ModeNone before clamping (M1) + {"budget zero converts to none", ThinkingConfig{Mode: ModeBudget, Budget: 0}, ®istry.ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false}, ModeNone, 128, "", false, ""}, + + // Level=none converts to ModeNone before clamping, then Level set to lowest + {"level none converts to none", ThinkingConfig{Mode: ModeLevel, Level: LevelNone}, ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, ZeroAllowed: false}, ModeNone, 128, ThinkingLevel("low"), false, ""}, + {"level auto converts to auto", ThinkingConfig{Mode: ModeLevel, Level: LevelAuto}, ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, DynamicAllowed: true}, ModeAuto, -1, "", false, ""}, + // M1: Level=auto with DynamicAllowed=false - converts to mid-range budget + {"level auto with dynamic not allowed", ThinkingConfig{Mode: ModeLevel, Level: LevelAuto}, ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, DynamicAllowed: false}, ModeBudget, 16448, "", false, ""}, + // M2: Level=auto on Budget-only model (no Levels) + {"level auto on budget-only model", ThinkingConfig{Mode: ModeLevel, Level: LevelAuto}, ®istry.ThinkingSupport{Min: 128, Max: 32768, DynamicAllowed: true}, ModeAuto, -1, "", false, ""}, + + // ModeNone with ZeroAllowed=false - clamps to min but preserves ModeNone (M1) + {"mode none with zero not allowed - preserve mode", ThinkingConfig{Mode: ModeNone, Budget: 0}, ®istry.ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false}, ModeNone, 1024, "", false, ""}, + + // ModeNone with clamped Budget > 0 and Levels: sets Level to lowest + {"mode none clamped with levels", ThinkingConfig{Mode: ModeNone, Budget: 0}, ®istry.ThinkingSupport{Min: 128, Max: 32768, Levels: []string{"low", "high"}, ZeroAllowed: false}, ModeNone, 128, ThinkingLevel("low"), false, ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ValidateConfig(tt.config, tt.support) + if tt.wantErr { + if err == nil { + t.Fatalf("ValidateConfig(%+v, support) error = nil, want %v", tt.config, tt.wantCode) + } + thinkingErr, ok := err.(*ThinkingError) + if !ok { + t.Fatalf("ValidateConfig(%+v, support) error type = %T, want *ThinkingError", tt.config, err) + } + if thinkingErr.Code != tt.wantCode { + t.Errorf("ValidateConfig(%+v, support) code = %v, want %v", tt.config, thinkingErr.Code, tt.wantCode) + } + return + } + if err != nil { + t.Fatalf("ValidateConfig(%+v, support) returned error: %v", tt.config, err) + } + if got == nil { + t.Fatalf("ValidateConfig(%+v, support) returned nil config", tt.config) + } + if got.Mode != tt.wantMode { + t.Errorf("ValidateConfig(%+v, support) Mode = %v, want %v", tt.config, got.Mode, tt.wantMode) + } + if got.Budget != tt.wantBudget { + t.Errorf("ValidateConfig(%+v, support) Budget = %d, want %d", tt.config, got.Budget, tt.wantBudget) + } + if got.Level != tt.wantLevel { + t.Errorf("ValidateConfig(%+v, support) Level = %q, want %q", tt.config, got.Level, tt.wantLevel) + } + }) + } +} + +// TestValidationErrorMessages tests error message formatting. +// +// Error messages should: +// - Be lowercase +// - Have no trailing period +// - Include context with %s/%d +// +// Depends on: Epic 5 Story 5-4 (validation error messages) +func TestValidationErrorMessages(t *testing.T) { + tests := []struct { + name string + getErr func() error + wantCode ErrorCode + wantContains string + }{ + {"invalid suffix", func() error { + _, err := ParseSuffixWithError("model(abc") + return err + }, ErrInvalidSuffix, "model(abc"}, + {"level not supported", func() error { + _, err := ValidateConfig(ThinkingConfig{Mode: ModeLevel, Level: LevelXHigh}, ®istry.ThinkingSupport{Levels: []string{"low", "medium", "high"}}) + return err + }, ErrLevelNotSupported, "valid levels: low, medium, high"}, + {"thinking not supported", func() error { + _, err := ValidateConfig(ThinkingConfig{Mode: ModeBudget, Budget: 1024}, nil) + return err + }, ErrThinkingNotSupported, "thinking not supported for this model"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.getErr() + if err == nil { + t.Fatalf("error = nil, want ThinkingError") + } + thinkingErr, ok := err.(*ThinkingError) + if !ok { + t.Fatalf("error type = %T, want *ThinkingError", err) + } + if thinkingErr.Code != tt.wantCode { + t.Errorf("code = %v, want %v", thinkingErr.Code, tt.wantCode) + } + if thinkingErr.Message == "" { + t.Fatalf("message is empty") + } + first, _ := utf8.DecodeRuneInString(thinkingErr.Message) + if unicode.IsLetter(first) && !unicode.IsLower(first) { + t.Errorf("message does not start with lowercase: %q", thinkingErr.Message) + } + if strings.HasSuffix(thinkingErr.Message, ".") { + t.Errorf("message has trailing period: %q", thinkingErr.Message) + } + if !strings.Contains(thinkingErr.Message, tt.wantContains) { + t.Errorf("message %q does not contain %q", thinkingErr.Message, tt.wantContains) + } + }) + } +} + +// TestClampingLogging tests that clamping produces correct log entries. +// +// Clamping behavior: +// - Normal clamp (budget outside range) → Debug log +// - ZeroAllowed=false + zero request → Warn log +// +// Depends on: Epic 5 Story 5-1, 5-2 +func TestClampingLogging(t *testing.T) { + tests := []struct { + name string + useZeroCheck bool + budget int + min int + max int + zeroAllowed bool + wantLevel log.Level + wantReason string + wantClamped int + }{ + {"above max - debug", false, 50000, 128, 32768, false, log.DebugLevel, "", 32768}, + {"below min - debug", false, 50, 128, 32768, false, log.DebugLevel, "", 128}, + {"zero not allowed - warn", true, 0, 128, 32768, false, log.WarnLevel, "zero_not_allowed", 128}, + } + + logger := log.StandardLogger() + originalLevel := logger.GetLevel() + logger.SetLevel(log.DebugLevel) + hook := logtest.NewLocal(logger) + t.Cleanup(func() { + logger.SetLevel(originalLevel) + hook.Reset() + }) + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + hook.Reset() + var got int + if tt.useZeroCheck { + got = ClampBudgetWithZeroCheck(tt.budget, tt.min, tt.max, tt.zeroAllowed) + } else { + got = ClampBudget(tt.budget, tt.min, tt.max) + } + if got != tt.wantClamped { + t.Fatalf("clamped budget = %d, want %d", got, tt.wantClamped) + } + + entry := hook.LastEntry() + if entry == nil { + t.Fatalf("no log entry captured") + } + if entry.Level != tt.wantLevel { + t.Errorf("log level = %v, want %v", entry.Level, tt.wantLevel) + } + + fields := []string{"original_value", "clamped_to", "min", "max"} + for _, key := range fields { + if _, ok := entry.Data[key]; !ok { + t.Errorf("missing field %q", key) + } + } + if tt.wantReason != "" { + if value, ok := entry.Data["reason"]; !ok || value != tt.wantReason { + t.Errorf("reason = %v, want %v", value, tt.wantReason) + } + } + }) + } +} diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go index d5064c3c..c3e4c63f 100644 --- a/internal/translator/antigravity/claude/antigravity_claude_request.go +++ b/internal/translator/antigravity/claude/antigravity_claude_request.go @@ -12,6 +12,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/cache" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" @@ -385,12 +386,15 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ } // Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled - if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) { - if t.Get("type").String() == "enabled" { - if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { - budget := int(b.Int()) - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true) + if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName) + if modelInfo != nil && modelInfo.Thinking != nil { + if t.Get("type").String() == "enabled" { + if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { + budget := int(b.Int()) + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true) + } } } } diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go index 7ca01b07..87782a5a 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" log "github.com/sirupsen/logrus" @@ -39,7 +40,8 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ // Note: OpenAI official fields take precedence over extra_body.google.thinking_config re := gjson.GetBytes(rawJSON, "reasoning_effort") hasOfficialThinking := re.Exists() - if hasOfficialThinking && util.ModelSupportsThinking(modelName) { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName) + if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil { effort := strings.ToLower(strings.TrimSpace(re.String())) if util.IsGemini3Model(modelName) { switch effort { @@ -53,14 +55,14 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ out = util.ApplyGeminiCLIThinkingLevel(out, level, nil) } } - } else if !util.ModelUsesThinkingLevels(modelName) { + } else if len(modelInfo.Thinking.Levels) == 0 { out = util.ApplyReasoningEffortToGeminiCLI(out, effort) } } // Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent) // Only apply for models that use numeric budgets, not discrete levels. - if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { + if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 { if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { var setBudget bool var budget int @@ -71,7 +73,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ setBudget = true } else if v := tc.Get("thinking_budget"); v.Exists() { budget = int(v.Int()) - out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) + out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingBudget.thinkingBudget", budget) setBudget = true } @@ -87,7 +89,7 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _ // Claude/Anthropic API format: thinking.type == "enabled" with budget_tokens // This allows Claude Code and other Claude API clients to pass thinking configuration - if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && util.ModelSupportsThinking(modelName) { + if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelInfo != nil && modelInfo.Thinking != nil { if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() { if t.Get("type").String() == "enabled" { if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { diff --git a/internal/translator/claude/gemini/claude_gemini_request.go b/internal/translator/claude/gemini/claude_gemini_request.go index faf1f9d1..c987c4b0 100644 --- a/internal/translator/claude/gemini/claude_gemini_request.go +++ b/internal/translator/claude/gemini/claude_gemini_request.go @@ -15,6 +15,8 @@ import ( "strings" "github.com/google/uuid" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" @@ -115,15 +117,18 @@ func ConvertGeminiRequestToClaude(modelName string, inputRawJSON []byte, stream } // Include thoughts configuration for reasoning process visibility // Only apply for models that support thinking and use numeric budgets, not discrete levels. - if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { - // Check for thinkingBudget first - if present, enable thinking with budget - if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 { - out, _ = sjson.Set(out, "thinking.type", "enabled") - normalizedBudget := util.NormalizeThinkingBudget(modelName, int(thinkingBudget.Int())) - out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget) - } else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True { - // Fallback to include_thoughts if no budget specified - out, _ = sjson.Set(out, "thinking.type", "enabled") + if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName) + if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 { + // Check for thinkingBudget first - if present, enable thinking with budget + if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() && thinkingBudget.Int() > 0 { + out, _ = sjson.Set(out, "thinking.type", "enabled") + normalizedBudget := thinking.ClampBudget(int(thinkingBudget.Int()), modelInfo.Thinking.Min, modelInfo.Thinking.Max) + out, _ = sjson.Set(out, "thinking.budget_tokens", normalizedBudget) + } else if includeThoughts := thinkingConfig.Get("include_thoughts"); includeThoughts.Exists() && includeThoughts.Type == gjson.True { + // Fallback to include_thoughts if no budget specified + out, _ = sjson.Set(out, "thinking.type", "enabled") + } } } } diff --git a/internal/translator/claude/openai/chat-completions/claude_openai_request.go b/internal/translator/claude/openai/chat-completions/claude_openai_request.go index ea04a97a..1ae1f274 100644 --- a/internal/translator/claude/openai/chat-completions/claude_openai_request.go +++ b/internal/translator/claude/openai/chat-completions/claude_openai_request.go @@ -15,7 +15,8 @@ import ( "strings" "github.com/google/uuid" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -65,20 +66,23 @@ func ConvertOpenAIRequestToClaude(modelName string, inputRawJSON []byte, stream root := gjson.ParseBytes(rawJSON) - if v := root.Get("reasoning_effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { - effort := strings.ToLower(strings.TrimSpace(v.String())) - if effort != "" { - budget, ok := util.ThinkingEffortToBudget(modelName, effort) - if ok { - switch budget { - case 0: - out, _ = sjson.Set(out, "thinking.type", "disabled") - case -1: - out, _ = sjson.Set(out, "thinking.type", "enabled") - default: - if budget > 0 { + if v := root.Get("reasoning_effort"); v.Exists() { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName) + if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 { + effort := strings.ToLower(strings.TrimSpace(v.String())) + if effort != "" { + budget, ok := thinking.ConvertLevelToBudget(effort) + if ok { + switch budget { + case 0: + out, _ = sjson.Set(out, "thinking.type", "disabled") + case -1: out, _ = sjson.Set(out, "thinking.type", "enabled") - out, _ = sjson.Set(out, "thinking.budget_tokens", budget) + default: + if budget > 0 { + out, _ = sjson.Set(out, "thinking.type", "enabled") + out, _ = sjson.Set(out, "thinking.budget_tokens", budget) + } } } } diff --git a/internal/translator/claude/openai/responses/claude_openai-responses_request.go b/internal/translator/claude/openai/responses/claude_openai-responses_request.go index d4b7e05f..3717afa4 100644 --- a/internal/translator/claude/openai/responses/claude_openai-responses_request.go +++ b/internal/translator/claude/openai/responses/claude_openai-responses_request.go @@ -10,7 +10,8 @@ import ( "strings" "github.com/google/uuid" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -53,20 +54,23 @@ func ConvertOpenAIResponsesRequestToClaude(modelName string, inputRawJSON []byte root := gjson.ParseBytes(rawJSON) - if v := root.Get("reasoning.effort"); v.Exists() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { - effort := strings.ToLower(strings.TrimSpace(v.String())) - if effort != "" { - budget, ok := util.ThinkingEffortToBudget(modelName, effort) - if ok { - switch budget { - case 0: - out, _ = sjson.Set(out, "thinking.type", "disabled") - case -1: - out, _ = sjson.Set(out, "thinking.type", "enabled") - default: - if budget > 0 { + if v := root.Get("reasoning.effort"); v.Exists() { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName) + if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 { + effort := strings.ToLower(strings.TrimSpace(v.String())) + if effort != "" { + budget, ok := thinking.ConvertLevelToBudget(effort) + if ok { + switch budget { + case 0: + out, _ = sjson.Set(out, "thinking.type", "disabled") + case -1: out, _ = sjson.Set(out, "thinking.type", "enabled") - out, _ = sjson.Set(out, "thinking.budget_tokens", budget) + default: + if budget > 0 { + out, _ = sjson.Set(out, "thinking.type", "enabled") + out, _ = sjson.Set(out, "thinking.budget_tokens", budget) + } } } } diff --git a/internal/translator/codex/claude/codex_claude_request.go b/internal/translator/codex/claude/codex_claude_request.go index 59cd5ecf..e31671b3 100644 --- a/internal/translator/codex/claude/codex_claude_request.go +++ b/internal/translator/codex/claude/codex_claude_request.go @@ -12,7 +12,8 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -219,19 +220,20 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool) // Convert thinking.budget_tokens to reasoning.effort for level-based models reasoningEffort := "medium" // default - if thinking := rootResult.Get("thinking"); thinking.Exists() && thinking.IsObject() { - switch thinking.Get("type").String() { + if thinkingConfig := rootResult.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName) + switch thinkingConfig.Get("type").String() { case "enabled": - if util.ModelUsesThinkingLevels(modelName) { - if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() { + if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 { + if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() { budget := int(budgetTokens.Int()) - if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" { + if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" { reasoningEffort = effort } } } case "disabled": - if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" { + if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" { reasoningEffort = effort } } diff --git a/internal/translator/codex/gemini/codex_gemini_request.go b/internal/translator/codex/gemini/codex_gemini_request.go index 944b95f6..f6b258ef 100644 --- a/internal/translator/codex/gemini/codex_gemini_request.go +++ b/internal/translator/codex/gemini/codex_gemini_request.go @@ -14,6 +14,8 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" @@ -251,10 +253,11 @@ func ConvertGeminiRequestToCodex(modelName string, inputRawJSON []byte, _ bool) reasoningEffort := "medium" // default if genConfig := root.Get("generationConfig"); genConfig.Exists() { if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() { - if util.ModelUsesThinkingLevels(modelName) { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName) + if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) > 0 { if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() { budget := int(thinkingBudget.Int()) - if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" { + if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" { reasoningEffort = effort } } diff --git a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go index 66e0385f..f522df81 100644 --- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go +++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go @@ -9,8 +9,8 @@ import ( "bytes" "strings" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -160,12 +160,15 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) [] } // Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled - if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) { - if t.Get("type").String() == "enabled" { - if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { - budget := int(b.Int()) - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) - out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true) + if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName) + if modelInfo != nil && modelInfo.Thinking != nil { + if t.Get("type").String() == "enabled" { + if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { + budget := int(b.Int()) + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true) + } } } } diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index 98188835..1a6505d0 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" log "github.com/sirupsen/logrus" @@ -39,13 +40,14 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo // Note: OpenAI official fields take precedence over extra_body.google.thinking_config re := gjson.GetBytes(rawJSON, "reasoning_effort") hasOfficialThinking := re.Exists() - if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName) + if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 { out = util.ApplyReasoningEffortToGeminiCLI(out, re.String()) } // Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent) // Only apply for models that use numeric budgets, not discrete levels. - if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { + if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 { if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { var setBudget bool var budget int diff --git a/internal/translator/gemini/claude/gemini_claude_request.go b/internal/translator/gemini/claude/gemini_claude_request.go index c410aad8..a10d00e9 100644 --- a/internal/translator/gemini/claude/gemini_claude_request.go +++ b/internal/translator/gemini/claude/gemini_claude_request.go @@ -9,8 +9,8 @@ import ( "bytes" "strings" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -154,12 +154,15 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool) // Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled // Only apply for models that use numeric budgets, not discrete levels. - if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { - if t.Get("type").String() == "enabled" { - if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { - budget := int(b.Int()) - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) - out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) + if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName) + if modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 { + if t.Get("type").String() == "enabled" { + if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number { + budget := int(b.Int()) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget) + out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true) + } } } } diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 57e150c1..2328ad36 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/misc" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" log "github.com/sirupsen/logrus" @@ -42,7 +43,8 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) // use thinkingLevel/includeThoughts instead. re := gjson.GetBytes(rawJSON, "reasoning_effort") hasOfficialThinking := re.Exists() - if hasOfficialThinking && util.ModelSupportsThinking(modelName) { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName) + if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil { effort := strings.ToLower(strings.TrimSpace(re.String())) if util.IsGemini3Model(modelName) { switch effort { @@ -56,14 +58,14 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) out = util.ApplyGeminiThinkingLevel(out, level, nil) } } - } else if !util.ModelUsesThinkingLevels(modelName) { + } else if len(modelInfo.Thinking.Levels) == 0 { out = util.ApplyReasoningEffortToGemini(out, effort) } } // Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent) // Only apply for models that use numeric budgets, not discrete levels. - if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { + if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 { if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { var setBudget bool var budget int diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go index 1bf67e7f..62e85eef 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go @@ -4,6 +4,7 @@ import ( "bytes" "strings" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" @@ -391,14 +392,15 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte // OpenAI official reasoning fields take precedence // Only convert for models that use numeric budgets (not discrete levels). hasOfficialThinking := root.Get("reasoning.effort").Exists() - if hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { + modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName) + if hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 { reasoningEffort := root.Get("reasoning.effort") out = string(util.ApplyReasoningEffortToGemini([]byte(out), reasoningEffort.String())) } // Cherry Studio extension (applies only when official fields are missing) // Only apply for models that use numeric budgets, not discrete levels. - if !hasOfficialThinking && util.ModelSupportsThinking(modelName) && !util.ModelUsesThinkingLevels(modelName) { + if !hasOfficialThinking && modelInfo != nil && modelInfo.Thinking != nil && len(modelInfo.Thinking.Levels) == 0 { if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() { var setBudget bool var budget int diff --git a/internal/translator/openai/claude/openai_claude_request.go b/internal/translator/openai/claude/openai_claude_request.go index cc7fd01e..44cb237e 100644 --- a/internal/translator/openai/claude/openai_claude_request.go +++ b/internal/translator/openai/claude/openai_claude_request.go @@ -9,6 +9,7 @@ import ( "bytes" "strings" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" "github.com/tidwall/sjson" @@ -61,23 +62,23 @@ func ConvertClaudeRequestToOpenAI(modelName string, inputRawJSON []byte, stream out, _ = sjson.Set(out, "stream", stream) // Thinking: Convert Claude thinking.budget_tokens to OpenAI reasoning_effort - if thinking := root.Get("thinking"); thinking.Exists() && thinking.IsObject() { - if thinkingType := thinking.Get("type"); thinkingType.Exists() { + if thinkingConfig := root.Get("thinking"); thinkingConfig.Exists() && thinkingConfig.IsObject() { + if thinkingType := thinkingConfig.Get("type"); thinkingType.Exists() { switch thinkingType.String() { case "enabled": - if budgetTokens := thinking.Get("budget_tokens"); budgetTokens.Exists() { + if budgetTokens := thinkingConfig.Get("budget_tokens"); budgetTokens.Exists() { budget := int(budgetTokens.Int()) - if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" { + if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" { out, _ = sjson.Set(out, "reasoning_effort", effort) } } else { // No budget_tokens specified, default to "auto" for enabled thinking - if effort, ok := util.ThinkingBudgetToEffort(modelName, -1); ok && effort != "" { + if effort, ok := thinking.ConvertBudgetToLevel(-1); ok && effort != "" { out, _ = sjson.Set(out, "reasoning_effort", effort) } } case "disabled": - if effort, ok := util.ThinkingBudgetToEffort(modelName, 0); ok && effort != "" { + if effort, ok := thinking.ConvertBudgetToLevel(0); ok && effort != "" { out, _ = sjson.Set(out, "reasoning_effort", effort) } } diff --git a/internal/translator/openai/gemini/openai_gemini_request.go b/internal/translator/openai/gemini/openai_gemini_request.go index f51d914b..7cdcb0f8 100644 --- a/internal/translator/openai/gemini/openai_gemini_request.go +++ b/internal/translator/openai/gemini/openai_gemini_request.go @@ -12,7 +12,7 @@ import ( "math/big" "strings" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -82,7 +82,7 @@ func ConvertGeminiRequestToOpenAI(modelName string, inputRawJSON []byte, stream if thinkingConfig := genConfig.Get("thinkingConfig"); thinkingConfig.Exists() && thinkingConfig.IsObject() { if thinkingBudget := thinkingConfig.Get("thinkingBudget"); thinkingBudget.Exists() { budget := int(thinkingBudget.Int()) - if effort, ok := util.ThinkingBudgetToEffort(modelName, budget); ok && effort != "" { + if effort, ok := thinking.ConvertBudgetToLevel(budget); ok && effort != "" { out, _ = sjson.Set(out, "reasoning_effort", effort) } } diff --git a/internal/util/thinking.go b/internal/util/thinking.go index 3ce1bb0d..3ed4ee42 100644 --- a/internal/util/thinking.go +++ b/internal/util/thinking.go @@ -8,6 +8,8 @@ import ( // ModelSupportsThinking reports whether the given model has Thinking capability // according to the model registry metadata (provider-agnostic). +// +// Deprecated: Use thinking.ApplyThinking with modelInfo.Thinking check. func ModelSupportsThinking(model string) bool { if model == "" { return false @@ -32,6 +34,8 @@ func ModelSupportsThinking(model string) bool { // If the model is unknown or has no Thinking metadata, returns the original budget. // For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range // or min (0 if zero is allowed and mid <= 0). +// +// Deprecated: Use thinking.ValidateConfig for budget normalization. func NormalizeThinkingBudget(model string, budget int) int { if budget == -1 { // dynamic if found, minBudget, maxBudget, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found { @@ -89,6 +93,8 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero // GetModelThinkingLevels returns the discrete reasoning effort levels for the model. // Returns nil if the model has no thinking support or no levels defined. +// +// Deprecated: Access modelInfo.Thinking.Levels directly. func GetModelThinkingLevels(model string) []string { if model == "" { return nil @@ -102,6 +108,8 @@ func GetModelThinkingLevels(model string) []string { // ModelUsesThinkingLevels reports whether the model uses discrete reasoning // effort levels instead of numeric budgets. +// +// Deprecated: Check len(modelInfo.Thinking.Levels) > 0. func ModelUsesThinkingLevels(model string) bool { levels := GetModelThinkingLevels(model) return len(levels) > 0 @@ -109,6 +117,8 @@ func ModelUsesThinkingLevels(model string) bool { // NormalizeReasoningEffortLevel validates and normalizes a reasoning effort // level for the given model. Returns false when the level is not supported. +// +// Deprecated: Use thinking.ValidateConfig for level validation. func NormalizeReasoningEffortLevel(model, effort string) (string, bool) { levels := GetModelThinkingLevels(model) if len(levels) == 0 { @@ -125,6 +135,8 @@ func NormalizeReasoningEffortLevel(model, effort string) (string, bool) { // IsOpenAICompatibilityModel reports whether the model is registered as an OpenAI-compatibility model. // These models may not advertise Thinking metadata in the registry. +// +// Deprecated: Check modelInfo.Type == "openai-compatibility". func IsOpenAICompatibilityModel(model string) bool { if model == "" { return false @@ -149,6 +161,8 @@ func IsOpenAICompatibilityModel(model string) bool { // - "xhigh" -> 32768 // // Returns false when the effort level is empty or unsupported. +// +// Deprecated: Use thinking.ConvertLevelToBudget instead. func ThinkingEffortToBudget(model, effort string) (int, bool) { if effort == "" { return 0, false @@ -186,6 +200,8 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) { // - "high" -> 32768 // // Returns false when the level is empty or unsupported. +// +// Deprecated: Use thinking.ConvertLevelToBudget instead. func ThinkingLevelToBudget(level string) (int, bool) { if level == "" { return 0, false @@ -217,6 +233,8 @@ func ThinkingLevelToBudget(level string) (int, bool) { // - 24577.. -> highest supported level for the model (defaults to "xhigh") // // Returns false when the budget is unsupported (negative values other than -1). +// +// Deprecated: Use thinking.ConvertBudgetToLevel instead. func ThinkingBudgetToEffort(model string, budget int) (string, bool) { switch { case budget == -1: diff --git a/internal/util/thinking_deprecation_test.go b/internal/util/thinking_deprecation_test.go new file mode 100644 index 00000000..6e513874 --- /dev/null +++ b/internal/util/thinking_deprecation_test.go @@ -0,0 +1,130 @@ +package util + +import ( + "go/ast" + "go/parser" + "go/token" + "os" + "path/filepath" + "runtime" + "strings" + "testing" +) + +func TestThinkingUtilDeprecationComments(t *testing.T) { + dir, err := thinkingSourceDir() + if err != nil { + t.Fatalf("resolve thinking source dir: %v", err) + } + + // Test thinking.go deprecation comments + t.Run("thinking.go", func(t *testing.T) { + docs := parseFuncDocs(t, filepath.Join(dir, "thinking.go")) + tests := []struct { + funcName string + want string + }{ + {"ModelSupportsThinking", "Deprecated: Use thinking.ApplyThinking with modelInfo.Thinking check."}, + {"NormalizeThinkingBudget", "Deprecated: Use thinking.ValidateConfig for budget normalization."}, + {"ThinkingEffortToBudget", "Deprecated: Use thinking.ConvertLevelToBudget instead."}, + {"ThinkingBudgetToEffort", "Deprecated: Use thinking.ConvertBudgetToLevel instead."}, + {"GetModelThinkingLevels", "Deprecated: Access modelInfo.Thinking.Levels directly."}, + {"ModelUsesThinkingLevels", "Deprecated: Check len(modelInfo.Thinking.Levels) > 0."}, + {"NormalizeReasoningEffortLevel", "Deprecated: Use thinking.ValidateConfig for level validation."}, + {"IsOpenAICompatibilityModel", "Deprecated: Check modelInfo.Type == \"openai-compatibility\"."}, + {"ThinkingLevelToBudget", "Deprecated: Use thinking.ConvertLevelToBudget instead."}, + } + for _, tt := range tests { + t.Run(tt.funcName, func(t *testing.T) { + doc, ok := docs[tt.funcName] + if !ok { + t.Fatalf("missing function %q in thinking.go", tt.funcName) + } + if !strings.Contains(doc, tt.want) { + t.Fatalf("missing deprecation note for %s: want %q, got %q", tt.funcName, tt.want, doc) + } + }) + } + }) + + // Test thinking_suffix.go deprecation comments + t.Run("thinking_suffix.go", func(t *testing.T) { + docs := parseFuncDocs(t, filepath.Join(dir, "thinking_suffix.go")) + tests := []struct { + funcName string + want string + }{ + {"NormalizeThinkingModel", "Deprecated: Use thinking.ParseSuffix instead."}, + {"ThinkingFromMetadata", "Deprecated: Access ThinkingConfig fields directly."}, + {"ResolveThinkingConfigFromMetadata", "Deprecated: Use thinking.ApplyThinking instead."}, + {"ReasoningEffortFromMetadata", "Deprecated: Use thinking.ConvertBudgetToLevel instead."}, + {"ResolveOriginalModel", "Deprecated: Parse model suffix with thinking.ParseSuffix."}, + } + for _, tt := range tests { + t.Run(tt.funcName, func(t *testing.T) { + doc, ok := docs[tt.funcName] + if !ok { + t.Fatalf("missing function %q in thinking_suffix.go", tt.funcName) + } + if !strings.Contains(doc, tt.want) { + t.Fatalf("missing deprecation note for %s: want %q, got %q", tt.funcName, tt.want, doc) + } + }) + } + }) + + // Test thinking_text.go deprecation comments + t.Run("thinking_text.go", func(t *testing.T) { + docs := parseFuncDocs(t, filepath.Join(dir, "thinking_text.go")) + tests := []struct { + funcName string + want string + }{ + {"GetThinkingText", "Deprecated: Use thinking package for thinking text extraction."}, + {"GetThinkingTextFromJSON", "Deprecated: Use thinking package for thinking text extraction."}, + {"SanitizeThinkingPart", "Deprecated: Use thinking package for thinking part sanitization."}, + {"StripCacheControl", "Deprecated: Use thinking package for cache control stripping."}, + } + for _, tt := range tests { + t.Run(tt.funcName, func(t *testing.T) { + doc, ok := docs[tt.funcName] + if !ok { + t.Fatalf("missing function %q in thinking_text.go", tt.funcName) + } + if !strings.Contains(doc, tt.want) { + t.Fatalf("missing deprecation note for %s: want %q, got %q", tt.funcName, tt.want, doc) + } + }) + } + }) +} + +func parseFuncDocs(t *testing.T, path string) map[string]string { + t.Helper() + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, path, nil, parser.ParseComments) + if err != nil { + t.Fatalf("parse %s: %v", path, err) + } + docs := map[string]string{} + for _, decl := range file.Decls { + fn, ok := decl.(*ast.FuncDecl) + if !ok || fn.Recv != nil { + continue + } + if fn.Doc == nil { + docs[fn.Name.Name] = "" + continue + } + docs[fn.Name.Name] = fn.Doc.Text() + } + return docs +} + +func thinkingSourceDir() (string, error) { + _, thisFile, _, ok := runtime.Caller(0) + if !ok { + return "", os.ErrNotExist + } + return filepath.Dir(thisFile), nil +} diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go index 0a72b4c5..c02cadaa 100644 --- a/internal/util/thinking_suffix.go +++ b/internal/util/thinking_suffix.go @@ -7,15 +7,30 @@ import ( ) const ( - ThinkingBudgetMetadataKey = "thinking_budget" - ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts" - ReasoningEffortMetadataKey = "reasoning_effort" - ThinkingOriginalModelMetadataKey = "thinking_original_model" + // Deprecated: No longer used. Thinking configuration is now passed via + // model name suffix and processed by thinking.ApplyThinking(). + ThinkingBudgetMetadataKey = "thinking_budget" + + // Deprecated: No longer used. See ThinkingBudgetMetadataKey. + ThinkingIncludeThoughtsMetadataKey = "thinking_include_thoughts" + + // Deprecated: No longer used. See ThinkingBudgetMetadataKey. + ReasoningEffortMetadataKey = "reasoning_effort" + + // Deprecated: No longer used. The original model name (with suffix) is now + // preserved directly in the model field. Use thinking.ParseSuffix() to + // extract the base model name if needed. + ThinkingOriginalModelMetadataKey = "thinking_original_model" + + // ModelMappingOriginalModelMetadataKey stores the client-requested model alias + // for OAuth model name mappings. This is NOT deprecated. ModelMappingOriginalModelMetadataKey = "model_mapping_original_model" ) // NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns // the normalized base model with extracted metadata. Supported pattern: +// +// Deprecated: Use thinking.ParseSuffix instead. // - "()" where value can be: // - A numeric budget (e.g., "(8192)", "(16384)") // - A reasoning effort level (e.g., "(high)", "(medium)", "(low)") @@ -89,6 +104,8 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) { // ThinkingFromMetadata extracts thinking overrides from metadata produced by NormalizeThinkingModel. // It accepts both the new generic keys and legacy Gemini-specific keys. +// +// Deprecated: Access ThinkingConfig fields directly. func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) { if len(metadata) == 0 { return nil, nil, nil, false @@ -159,6 +176,8 @@ func ThinkingFromMetadata(metadata map[string]any) (*int, *bool, *string, bool) // ResolveThinkingConfigFromMetadata derives thinking budget/include overrides, // converting reasoning effort strings into budgets when possible. +// +// Deprecated: Use thinking.ApplyThinking instead. func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (*int, *bool, bool) { budget, include, effort, matched := ThinkingFromMetadata(metadata) if !matched { @@ -180,6 +199,8 @@ func ResolveThinkingConfigFromMetadata(model string, metadata map[string]any) (* // ReasoningEffortFromMetadata resolves a reasoning effort string from metadata, // inferring "auto" and "none" when budgets request dynamic or disabled thinking. +// +// Deprecated: Use thinking.ConvertBudgetToLevel instead. func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) { budget, include, effort, matched := ThinkingFromMetadata(metadata) if !matched { @@ -204,6 +225,8 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) { // ResolveOriginalModel returns the original model name stored in metadata (if present), // otherwise falls back to the provided model. +// +// Deprecated: Parse model suffix with thinking.ParseSuffix. func ResolveOriginalModel(model string, metadata map[string]any) string { normalize := func(name string) string { if name == "" { diff --git a/internal/util/thinking_text.go b/internal/util/thinking_text.go index c36d202d..7ebb76fc 100644 --- a/internal/util/thinking_text.go +++ b/internal/util/thinking_text.go @@ -11,6 +11,8 @@ import ( // - Wrapped object: { "thinking": { "text": "text", "cache_control": {...} } } // - Gemini-style: { "thought": true, "text": "text" } // Returns the extracted text string. +// +// Deprecated: Use thinking package for thinking text extraction. func GetThinkingText(part gjson.Result) string { // Try direct text field first (Gemini-style) if text := part.Get("text"); text.Exists() && text.Type == gjson.String { @@ -42,6 +44,8 @@ func GetThinkingText(part gjson.Result) string { } // GetThinkingTextFromJSON extracts thinking text from a raw JSON string. +// +// Deprecated: Use thinking package for thinking text extraction. func GetThinkingTextFromJSON(jsonStr string) string { return GetThinkingText(gjson.Parse(jsonStr)) } @@ -49,6 +53,8 @@ func GetThinkingTextFromJSON(jsonStr string) string { // SanitizeThinkingPart normalizes a thinking part to a canonical form. // Strips cache_control and other non-essential fields. // Returns the sanitized part as JSON string. +// +// Deprecated: Use thinking package for thinking part sanitization. func SanitizeThinkingPart(part gjson.Result) string { // Gemini-style: { thought: true, text, thoughtSignature } if part.Get("thought").Bool() { @@ -79,6 +85,8 @@ func SanitizeThinkingPart(part gjson.Result) string { } // StripCacheControl removes cache_control and providerOptions from a JSON object. +// +// Deprecated: Use thinking package for cache control stripping. func StripCacheControl(jsonStr string) string { result := jsonStr result, _ = sjson.Delete(result, "cache_control") diff --git a/sdk/api/handlers/handlers.go b/sdk/api/handlers/handlers.go index 6160b9bd..232f0b95 100644 --- a/sdk/api/handlers/handlers.go +++ b/sdk/api/handlers/handlers.go @@ -16,6 +16,7 @@ import ( "github.com/google/uuid" "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" "github.com/router-for-me/CLIProxyAPI/v6/internal/logging" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" @@ -379,7 +380,7 @@ func appendAPIResponse(c *gin.Context, data []byte) { // ExecuteWithAuthManager executes a non-streaming request via the core auth manager. // This path is the only supported execution route. func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) { - providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName) + providers, normalizedModel, errMsg := h.getRequestDetails(modelName) if errMsg != nil { return nil, errMsg } @@ -388,16 +389,13 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType Model: normalizedModel, Payload: cloneBytes(rawJSON), } - if cloned := cloneMetadata(metadata); cloned != nil { - req.Metadata = cloned - } opts := coreexecutor.Options{ Stream: false, Alt: alt, OriginalRequest: cloneBytes(rawJSON), SourceFormat: sdktranslator.FromString(handlerType), } - opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta) + opts.Metadata = reqMeta resp, err := h.AuthManager.Execute(ctx, providers, req, opts) if err != nil { status := http.StatusInternalServerError @@ -420,7 +418,7 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType // ExecuteCountWithAuthManager executes a non-streaming request via the core auth manager. // This path is the only supported execution route. func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) { - providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName) + providers, normalizedModel, errMsg := h.getRequestDetails(modelName) if errMsg != nil { return nil, errMsg } @@ -429,16 +427,13 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle Model: normalizedModel, Payload: cloneBytes(rawJSON), } - if cloned := cloneMetadata(metadata); cloned != nil { - req.Metadata = cloned - } opts := coreexecutor.Options{ Stream: false, Alt: alt, OriginalRequest: cloneBytes(rawJSON), SourceFormat: sdktranslator.FromString(handlerType), } - opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta) + opts.Metadata = reqMeta resp, err := h.AuthManager.ExecuteCount(ctx, providers, req, opts) if err != nil { status := http.StatusInternalServerError @@ -461,7 +456,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle // ExecuteStreamWithAuthManager executes a streaming request via the core auth manager. // This path is the only supported execution route. func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) (<-chan []byte, <-chan *interfaces.ErrorMessage) { - providers, normalizedModel, metadata, errMsg := h.getRequestDetails(modelName) + providers, normalizedModel, errMsg := h.getRequestDetails(modelName) if errMsg != nil { errChan := make(chan *interfaces.ErrorMessage, 1) errChan <- errMsg @@ -473,16 +468,13 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl Model: normalizedModel, Payload: cloneBytes(rawJSON), } - if cloned := cloneMetadata(metadata); cloned != nil { - req.Metadata = cloned - } opts := coreexecutor.Options{ Stream: true, Alt: alt, OriginalRequest: cloneBytes(rawJSON), SourceFormat: sdktranslator.FromString(handlerType), } - opts.Metadata = mergeMetadata(cloneMetadata(metadata), reqMeta) + opts.Metadata = reqMeta chunks, err := h.AuthManager.ExecuteStream(ctx, providers, req, opts) if err != nil { errChan := make(chan *interfaces.ErrorMessage, 1) @@ -595,38 +587,40 @@ func statusFromError(err error) int { return 0 } -func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, metadata map[string]any, err *interfaces.ErrorMessage) { - // Resolve "auto" model to an actual available model first - resolvedModelName := util.ResolveAutoModel(modelName) - - // Normalize the model name to handle dynamic thinking suffixes before determining the provider. - normalizedModel, metadata = normalizeModelMetadata(resolvedModelName) - - // Use the normalizedModel to get the provider name. - providers = util.GetProviderName(normalizedModel) - if len(providers) == 0 && metadata != nil { - if originalRaw, ok := metadata[util.ThinkingOriginalModelMetadataKey]; ok { - if originalModel, okStr := originalRaw.(string); okStr { - originalModel = strings.TrimSpace(originalModel) - if originalModel != "" && !strings.EqualFold(originalModel, normalizedModel) { - if altProviders := util.GetProviderName(originalModel); len(altProviders) > 0 { - providers = altProviders - normalizedModel = originalModel - } - } - } +func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, err *interfaces.ErrorMessage) { + resolvedModelName := modelName + initialSuffix := thinking.ParseSuffix(modelName) + if initialSuffix.ModelName == "auto" { + resolvedBase := util.ResolveAutoModel(initialSuffix.ModelName) + if initialSuffix.HasSuffix { + resolvedModelName = fmt.Sprintf("%s(%s)", resolvedBase, initialSuffix.RawSuffix) + } else { + resolvedModelName = resolvedBase } + } else { + resolvedModelName = util.ResolveAutoModel(modelName) + } + + parsed := thinking.ParseSuffix(resolvedModelName) + baseModel := strings.TrimSpace(parsed.ModelName) + + providers = util.GetProviderName(baseModel) + // Fallback: if baseModel has no provider but differs from resolvedModelName, + // try using the full model name. This handles edge cases where custom models + // may be registered with their full suffixed name (e.g., "my-model(8192)"). + // Evaluated in Story 11.8: This fallback is intentionally preserved to support + // custom model registrations that include thinking suffixes. + if len(providers) == 0 && baseModel != resolvedModelName { + providers = util.GetProviderName(resolvedModelName) } if len(providers) == 0 { - return nil, "", nil, &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)} + return nil, "", &interfaces.ErrorMessage{StatusCode: http.StatusBadRequest, Error: fmt.Errorf("unknown provider for model %s", modelName)} } - // If it's a dynamic model, the normalizedModel was already set to extractedModelName. - // If it's a non-dynamic model, normalizedModel was set by normalizeModelMetadata. - // So, normalizedModel is already correctly set at this point. - - return providers, normalizedModel, metadata, nil + // The thinking suffix is preserved in the model name itself, so no + // metadata-based configuration passing is needed. + return providers, resolvedModelName, nil } func cloneBytes(src []byte) []byte { @@ -638,10 +632,6 @@ func cloneBytes(src []byte) []byte { return dst } -func normalizeModelMetadata(modelName string) (string, map[string]any) { - return util.NormalizeThinkingModel(modelName) -} - func cloneMetadata(src map[string]any) map[string]any { if len(src) == 0 { return nil diff --git a/sdk/api/handlers/handlers_request_details_test.go b/sdk/api/handlers/handlers_request_details_test.go new file mode 100644 index 00000000..b0f6b132 --- /dev/null +++ b/sdk/api/handlers/handlers_request_details_test.go @@ -0,0 +1,118 @@ +package handlers + +import ( + "reflect" + "testing" + "time" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" + sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config" +) + +func TestGetRequestDetails_PreservesSuffix(t *testing.T) { + modelRegistry := registry.GetGlobalRegistry() + now := time.Now().Unix() + + modelRegistry.RegisterClient("test-request-details-gemini", "gemini", []*registry.ModelInfo{ + {ID: "gemini-2.5-pro", Created: now + 30}, + {ID: "gemini-2.5-flash", Created: now + 25}, + }) + modelRegistry.RegisterClient("test-request-details-openai", "openai", []*registry.ModelInfo{ + {ID: "gpt-5.2", Created: now + 20}, + }) + modelRegistry.RegisterClient("test-request-details-claude", "claude", []*registry.ModelInfo{ + {ID: "claude-sonnet-4-5", Created: now + 5}, + }) + + // Ensure cleanup of all test registrations. + clientIDs := []string{ + "test-request-details-gemini", + "test-request-details-openai", + "test-request-details-claude", + } + for _, clientID := range clientIDs { + id := clientID + t.Cleanup(func() { + modelRegistry.UnregisterClient(id) + }) + } + + handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{}, coreauth.NewManager(nil, nil, nil)) + + tests := []struct { + name string + inputModel string + wantProviders []string + wantModel string + wantErr bool + }{ + { + name: "numeric suffix preserved", + inputModel: "gemini-2.5-pro(8192)", + wantProviders: []string{"gemini"}, + wantModel: "gemini-2.5-pro(8192)", + wantErr: false, + }, + { + name: "level suffix preserved", + inputModel: "gpt-5.2(high)", + wantProviders: []string{"openai"}, + wantModel: "gpt-5.2(high)", + wantErr: false, + }, + { + name: "no suffix unchanged", + inputModel: "claude-sonnet-4-5", + wantProviders: []string{"claude"}, + wantModel: "claude-sonnet-4-5", + wantErr: false, + }, + { + name: "unknown model with suffix", + inputModel: "unknown-model(8192)", + wantProviders: nil, + wantModel: "", + wantErr: true, + }, + { + name: "auto suffix resolved", + inputModel: "auto(high)", + wantProviders: []string{"gemini"}, + wantModel: "gemini-2.5-pro(high)", + wantErr: false, + }, + { + name: "special suffix none preserved", + inputModel: "gemini-2.5-flash(none)", + wantProviders: []string{"gemini"}, + wantModel: "gemini-2.5-flash(none)", + wantErr: false, + }, + { + name: "special suffix auto preserved", + inputModel: "claude-sonnet-4-5(auto)", + wantProviders: []string{"claude"}, + wantModel: "claude-sonnet-4-5(auto)", + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + providers, model, errMsg := handler.getRequestDetails(tt.inputModel) + if (errMsg != nil) != tt.wantErr { + t.Fatalf("getRequestDetails() error = %v, wantErr %v", errMsg, tt.wantErr) + } + if errMsg != nil { + return + } + if !reflect.DeepEqual(providers, tt.wantProviders) { + t.Fatalf("getRequestDetails() providers = %v, want %v", providers, tt.wantProviders) + } + if model != tt.wantModel { + t.Fatalf("getRequestDetails() model = %v, want %v", model, tt.wantModel) + } + }) + } +} diff --git a/sdk/cliproxy/auth/api_key_model_mappings_test.go b/sdk/cliproxy/auth/api_key_model_mappings_test.go new file mode 100644 index 00000000..fb4dbe86 --- /dev/null +++ b/sdk/cliproxy/auth/api_key_model_mappings_test.go @@ -0,0 +1,201 @@ +package auth + +import ( + "context" + "testing" + + internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" +) + +func TestLookupAPIKeyUpstreamModel(t *testing.T) { + cfg := &internalconfig.Config{ + GeminiKey: []internalconfig.GeminiKey{ + { + APIKey: "k", + BaseURL: "https://example.com", + Models: []internalconfig.GeminiModel{ + {Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"}, + {Name: "gemini-2.5-flash(low)", Alias: "g25f"}, + }, + }, + }, + } + + mgr := NewManager(nil, nil, nil) + mgr.SetConfig(cfg) + + ctx := context.Background() + _, _ = mgr.Register(ctx, &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k", "base_url": "https://example.com"}}) + + tests := []struct { + name string + authID string + input string + want string + }{ + // Fast path + suffix preservation + {"alias with suffix", "a1", "g25p(8192)", "gemini-2.5-pro-exp-03-25(8192)"}, + {"alias without suffix", "a1", "g25p", "gemini-2.5-pro-exp-03-25"}, + + // Config suffix takes priority + {"config suffix priority", "a1", "g25f(high)", "gemini-2.5-flash(low)"}, + {"config suffix no user suffix", "a1", "g25f", "gemini-2.5-flash(low)"}, + + // Case insensitive + {"uppercase alias", "a1", "G25P", "gemini-2.5-pro-exp-03-25"}, + {"mixed case with suffix", "a1", "G25p(4096)", "gemini-2.5-pro-exp-03-25(4096)"}, + + // Direct name lookup + {"upstream name direct", "a1", "gemini-2.5-pro-exp-03-25", "gemini-2.5-pro-exp-03-25"}, + {"upstream name with suffix", "a1", "gemini-2.5-pro-exp-03-25(8192)", "gemini-2.5-pro-exp-03-25(8192)"}, + + // Cache miss scenarios + {"non-existent auth", "non-existent", "g25p", ""}, + {"unknown alias", "a1", "unknown-alias", ""}, + {"empty auth ID", "", "g25p", ""}, + {"empty model", "a1", "", ""}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resolved := mgr.lookupAPIKeyUpstreamModel(tt.authID, tt.input) + if resolved != tt.want { + t.Errorf("lookupAPIKeyUpstreamModel(%q, %q) = %q, want %q", tt.authID, tt.input, resolved, tt.want) + } + }) + } +} + +func TestAPIKeyModelMappings_ConfigHotReload(t *testing.T) { + cfg := &internalconfig.Config{ + GeminiKey: []internalconfig.GeminiKey{ + { + APIKey: "k", + Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"}}, + }, + }, + } + + mgr := NewManager(nil, nil, nil) + mgr.SetConfig(cfg) + + ctx := context.Background() + _, _ = mgr.Register(ctx, &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k"}}) + + // Initial mapping + if resolved := mgr.lookupAPIKeyUpstreamModel("a1", "g25p"); resolved != "gemini-2.5-pro-exp-03-25" { + t.Fatalf("before reload: got %q, want %q", resolved, "gemini-2.5-pro-exp-03-25") + } + + // Hot reload with new mapping + mgr.SetConfig(&internalconfig.Config{ + GeminiKey: []internalconfig.GeminiKey{ + { + APIKey: "k", + Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-flash", Alias: "g25p"}}, + }, + }, + }) + + // New mapping should take effect + if resolved := mgr.lookupAPIKeyUpstreamModel("a1", "g25p"); resolved != "gemini-2.5-flash" { + t.Fatalf("after reload: got %q, want %q", resolved, "gemini-2.5-flash") + } +} + +func TestAPIKeyModelMappings_MultipleProviders(t *testing.T) { + cfg := &internalconfig.Config{ + GeminiKey: []internalconfig.GeminiKey{{APIKey: "gemini-key", Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro", Alias: "gp"}}}}, + ClaudeKey: []internalconfig.ClaudeKey{{APIKey: "claude-key", Models: []internalconfig.ClaudeModel{{Name: "claude-sonnet-4", Alias: "cs4"}}}}, + CodexKey: []internalconfig.CodexKey{{APIKey: "codex-key", Models: []internalconfig.CodexModel{{Name: "o3", Alias: "o"}}}}, + } + + mgr := NewManager(nil, nil, nil) + mgr.SetConfig(cfg) + + ctx := context.Background() + _, _ = mgr.Register(ctx, &Auth{ID: "gemini-auth", Provider: "gemini", Attributes: map[string]string{"api_key": "gemini-key"}}) + _, _ = mgr.Register(ctx, &Auth{ID: "claude-auth", Provider: "claude", Attributes: map[string]string{"api_key": "claude-key"}}) + _, _ = mgr.Register(ctx, &Auth{ID: "codex-auth", Provider: "codex", Attributes: map[string]string{"api_key": "codex-key"}}) + + tests := []struct { + authID, input, want string + }{ + {"gemini-auth", "gp", "gemini-2.5-pro"}, + {"claude-auth", "cs4", "claude-sonnet-4"}, + {"codex-auth", "o", "o3"}, + } + + for _, tt := range tests { + if resolved := mgr.lookupAPIKeyUpstreamModel(tt.authID, tt.input); resolved != tt.want { + t.Errorf("lookupAPIKeyUpstreamModel(%q, %q) = %q, want %q", tt.authID, tt.input, resolved, tt.want) + } + } +} + +func TestApplyAPIKeyModelMapping(t *testing.T) { + cfg := &internalconfig.Config{ + GeminiKey: []internalconfig.GeminiKey{ + {APIKey: "k", Models: []internalconfig.GeminiModel{{Name: "gemini-2.5-pro-exp-03-25", Alias: "g25p"}}}, + }, + } + + mgr := NewManager(nil, nil, nil) + mgr.SetConfig(cfg) + + ctx := context.Background() + apiKeyAuth := &Auth{ID: "a1", Provider: "gemini", Attributes: map[string]string{"api_key": "k"}} + oauthAuth := &Auth{ID: "oauth-auth", Provider: "gemini", Attributes: map[string]string{"auth_kind": "oauth"}} + _, _ = mgr.Register(ctx, apiKeyAuth) + + tests := []struct { + name string + auth *Auth + inputModel string + wantModel string + wantOriginal string + expectMapping bool + }{ + { + name: "api_key auth with alias", + auth: apiKeyAuth, + inputModel: "g25p(8192)", + wantModel: "gemini-2.5-pro-exp-03-25(8192)", + wantOriginal: "g25p(8192)", + expectMapping: true, + }, + { + name: "oauth auth passthrough", + auth: oauthAuth, + inputModel: "some-model", + wantModel: "some-model", + expectMapping: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metadata := map[string]any{"existing": "value"} + resolvedModel, resultMeta := mgr.applyAPIKeyModelMapping(tt.auth, tt.inputModel, metadata) + + if resolvedModel != tt.wantModel { + t.Errorf("model = %q, want %q", resolvedModel, tt.wantModel) + } + + if resultMeta["existing"] != "value" { + t.Error("existing metadata not preserved") + } + + original, hasOriginal := resultMeta["model_mapping_original_model"].(string) + if tt.expectMapping { + if !hasOriginal || original != tt.wantOriginal { + t.Errorf("original model = %q, want %q", original, tt.wantOriginal) + } + } else { + if hasOriginal { + t.Error("should not set model_mapping_original_model for non-api_key auth") + } + } + }) + } +} diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index dc7bc10b..5b1339be 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -15,8 +15,10 @@ import ( "time" "github.com/google/uuid" + internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/logging" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" log "github.com/sirupsen/logrus" @@ -120,6 +122,14 @@ type Manager struct { // modelNameMappings stores global model name alias mappings (alias -> upstream name) keyed by channel. modelNameMappings atomic.Value + // runtimeConfig stores the latest application config for request-time decisions. + // It is initialized in NewManager; never Load() before first Store(). + runtimeConfig atomic.Value + + // apiKeyModelMappings caches resolved model alias mappings for API-key auths. + // Keyed by auth.ID, value is alias(lower) -> upstream model (including suffix). + apiKeyModelMappings atomic.Value + // Optional HTTP RoundTripper provider injected by host. rtProvider RoundTripperProvider @@ -135,7 +145,7 @@ func NewManager(store Store, selector Selector, hook Hook) *Manager { if hook == nil { hook = NoopHook{} } - return &Manager{ + manager := &Manager{ store: store, executors: make(map[string]ProviderExecutor), selector: selector, @@ -143,6 +153,10 @@ func NewManager(store Store, selector Selector, hook Hook) *Manager { auths: make(map[string]*Auth), providerOffsets: make(map[string]int), } + // atomic.Value requires non-nil initial value. + manager.runtimeConfig.Store(&internalconfig.Config{}) + manager.apiKeyModelMappings.Store(apiKeyModelMappingTable(nil)) + return manager } func (m *Manager) SetSelector(selector Selector) { @@ -171,6 +185,181 @@ func (m *Manager) SetRoundTripperProvider(p RoundTripperProvider) { m.mu.Unlock() } +// SetConfig updates the runtime config snapshot used by request-time helpers. +// Callers should provide the latest config on reload so per-credential alias mapping stays in sync. +func (m *Manager) SetConfig(cfg *internalconfig.Config) { + if m == nil { + return + } + if cfg == nil { + cfg = &internalconfig.Config{} + } + m.runtimeConfig.Store(cfg) + m.rebuildAPIKeyModelMappingsFromRuntimeConfig() +} + +func (m *Manager) lookupAPIKeyUpstreamModel(authID, requestedModel string) string { + if m == nil { + return "" + } + authID = strings.TrimSpace(authID) + if authID == "" { + return "" + } + requestedModel = strings.TrimSpace(requestedModel) + if requestedModel == "" { + return "" + } + table, _ := m.apiKeyModelMappings.Load().(apiKeyModelMappingTable) + if table == nil { + return "" + } + byAlias := table[authID] + if len(byAlias) == 0 { + return "" + } + key := strings.ToLower(thinking.ParseSuffix(requestedModel).ModelName) + if key == "" { + key = strings.ToLower(requestedModel) + } + resolved := strings.TrimSpace(byAlias[key]) + if resolved == "" { + return "" + } + // Preserve thinking suffix from the client's requested model unless config already has one. + requestResult := thinking.ParseSuffix(requestedModel) + if thinking.ParseSuffix(resolved).HasSuffix { + return resolved + } + if requestResult.HasSuffix && requestResult.RawSuffix != "" { + return resolved + "(" + requestResult.RawSuffix + ")" + } + return resolved + +} + +func (m *Manager) rebuildAPIKeyModelMappingsFromRuntimeConfig() { + if m == nil { + return + } + cfg, _ := m.runtimeConfig.Load().(*internalconfig.Config) + if cfg == nil { + cfg = &internalconfig.Config{} + } + m.mu.Lock() + defer m.mu.Unlock() + m.rebuildAPIKeyModelMappingsLocked(cfg) +} + +func (m *Manager) rebuildAPIKeyModelMappingsLocked(cfg *internalconfig.Config) { + if m == nil { + return + } + if cfg == nil { + cfg = &internalconfig.Config{} + } + + out := make(apiKeyModelMappingTable) + for _, auth := range m.auths { + if auth == nil { + continue + } + if strings.TrimSpace(auth.ID) == "" { + continue + } + kind, _ := auth.AccountInfo() + if !strings.EqualFold(strings.TrimSpace(kind), "api_key") { + continue + } + + byAlias := make(map[string]string) + provider := strings.ToLower(strings.TrimSpace(auth.Provider)) + switch provider { + case "gemini": + if entry := resolveGeminiAPIKeyConfig(cfg, auth); entry != nil { + compileAPIKeyModelMappingsForModels(byAlias, entry.Models) + } + case "claude": + if entry := resolveClaudeAPIKeyConfig(cfg, auth); entry != nil { + compileAPIKeyModelMappingsForModels(byAlias, entry.Models) + } + case "codex": + if entry := resolveCodexAPIKeyConfig(cfg, auth); entry != nil { + compileAPIKeyModelMappingsForModels(byAlias, entry.Models) + } + case "vertex": + if entry := resolveVertexAPIKeyConfig(cfg, auth); entry != nil { + compileAPIKeyModelMappingsForModels(byAlias, entry.Models) + } + default: + // OpenAI-compat uses config selection from auth.Attributes. + providerKey := "" + compatName := "" + if auth.Attributes != nil { + providerKey = strings.TrimSpace(auth.Attributes["provider_key"]) + compatName = strings.TrimSpace(auth.Attributes["compat_name"]) + } + if compatName != "" || strings.EqualFold(strings.TrimSpace(auth.Provider), "openai-compatibility") { + if entry := resolveOpenAICompatConfig(cfg, providerKey, compatName, auth.Provider); entry != nil { + compileAPIKeyModelMappingsForModels(byAlias, entry.Models) + } + } + } + + if len(byAlias) > 0 { + out[auth.ID] = byAlias + } + } + + m.apiKeyModelMappings.Store(out) +} + +func compileAPIKeyModelMappingsForModels[T interface { + GetName() string + GetAlias() string +}](out map[string]string, models []T) { + if out == nil { + return + } + for i := range models { + alias := strings.TrimSpace(models[i].GetAlias()) + name := strings.TrimSpace(models[i].GetName()) + if alias == "" || name == "" { + continue + } + aliasKey := strings.ToLower(thinking.ParseSuffix(alias).ModelName) + if aliasKey == "" { + aliasKey = strings.ToLower(alias) + } + // Config priority: first alias wins. + if _, exists := out[aliasKey]; exists { + continue + } + out[aliasKey] = name + // Also allow direct lookup by upstream name (case-insensitive), so lookups on already-upstream + // models remain a cheap no-op. + nameKey := strings.ToLower(thinking.ParseSuffix(name).ModelName) + if nameKey == "" { + nameKey = strings.ToLower(name) + } + if nameKey != "" { + if _, exists := out[nameKey]; !exists { + out[nameKey] = name + } + } + // Preserve config suffix priority by seeding a base-name lookup when name already has suffix. + nameResult := thinking.ParseSuffix(name) + if nameResult.HasSuffix { + baseKey := strings.ToLower(strings.TrimSpace(nameResult.ModelName)) + if baseKey != "" { + if _, exists := out[baseKey]; !exists { + out[baseKey] = name + } + } + } + } +} + // SetRetryConfig updates retry attempts and cooldown wait interval. func (m *Manager) SetRetryConfig(retry int, maxRetryInterval time.Duration) { if m == nil { @@ -219,6 +408,7 @@ func (m *Manager) Register(ctx context.Context, auth *Auth) (*Auth, error) { m.mu.Lock() m.auths[auth.ID] = auth.Clone() m.mu.Unlock() + m.rebuildAPIKeyModelMappingsFromRuntimeConfig() _ = m.persist(ctx, auth) m.hook.OnAuthRegistered(ctx, auth.Clone()) return auth.Clone(), nil @@ -237,6 +427,7 @@ func (m *Manager) Update(ctx context.Context, auth *Auth) (*Auth, error) { auth.EnsureIndex() m.auths[auth.ID] = auth.Clone() m.mu.Unlock() + m.rebuildAPIKeyModelMappingsFromRuntimeConfig() _ = m.persist(ctx, auth) m.hook.OnAuthUpdated(ctx, auth.Clone()) return auth.Clone(), nil @@ -261,6 +452,11 @@ func (m *Manager) Load(ctx context.Context) error { auth.EnsureIndex() m.auths[auth.ID] = auth.Clone() } + cfg, _ := m.runtimeConfig.Load().(*internalconfig.Config) + if cfg == nil { + cfg = &internalconfig.Config{} + } + m.rebuildAPIKeyModelMappingsLocked(cfg) return nil } @@ -558,6 +754,7 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req execReq := req execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth) execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata) + execReq.Model, execReq.Metadata = m.applyAPIKeyModelMapping(auth, execReq.Model, execReq.Metadata) resp, errExec := executor.Execute(execCtx, auth, execReq, opts) result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil} if errExec != nil { @@ -606,6 +803,7 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string, execReq := req execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth) execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata) + execReq.Model, execReq.Metadata = m.applyAPIKeyModelMapping(auth, execReq.Model, execReq.Metadata) resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts) result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil} if errExec != nil { @@ -654,6 +852,7 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string execReq := req execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth) execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata) + execReq.Model, execReq.Metadata = m.applyAPIKeyModelMapping(auth, execReq.Model, execReq.Metadata) chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts) if errStream != nil { rerr := &Error{Message: errStream.Error()} @@ -712,7 +911,6 @@ func stripPrefixFromMetadata(metadata map[string]any, needle string) map[string] return metadata } keys := []string{ - util.ThinkingOriginalModelMetadataKey, util.GeminiOriginalModelMetadataKey, util.ModelMappingOriginalModelMetadataKey, } @@ -740,6 +938,215 @@ func stripPrefixFromMetadata(metadata map[string]any, needle string) map[string] return out } +func (m *Manager) applyAPIKeyModelMapping(auth *Auth, requestedModel string, metadata map[string]any) (string, map[string]any) { + if m == nil || auth == nil { + return requestedModel, metadata + } + + kind, _ := auth.AccountInfo() + if !strings.EqualFold(strings.TrimSpace(kind), "api_key") { + return requestedModel, metadata + } + + requestedModel = strings.TrimSpace(requestedModel) + if requestedModel == "" { + return requestedModel, metadata + } + + // Fast path: lookup per-auth mapping table (keyed by auth.ID). + if resolved := m.lookupAPIKeyUpstreamModel(auth.ID, requestedModel); resolved != "" { + return applyUpstreamModelOverride(requestedModel, resolved, metadata) + } + + // Slow path: scan config for the matching credential entry and resolve alias. + // This acts as a safety net if mappings are stale or auth.ID is missing. + cfg, _ := m.runtimeConfig.Load().(*internalconfig.Config) + if cfg == nil { + cfg = &internalconfig.Config{} + } + + provider := strings.ToLower(strings.TrimSpace(auth.Provider)) + upstreamModel := "" + switch provider { + case "gemini": + upstreamModel = resolveUpstreamModelForGeminiAPIKey(cfg, auth, requestedModel) + case "claude": + upstreamModel = resolveUpstreamModelForClaudeAPIKey(cfg, auth, requestedModel) + case "codex": + upstreamModel = resolveUpstreamModelForCodexAPIKey(cfg, auth, requestedModel) + case "vertex": + upstreamModel = resolveUpstreamModelForVertexAPIKey(cfg, auth, requestedModel) + default: + upstreamModel = resolveUpstreamModelForOpenAICompatAPIKey(cfg, auth, requestedModel) + } + + // applyUpstreamModelOverride lives in model_name_mappings.go. + return applyUpstreamModelOverride(requestedModel, upstreamModel, metadata) +} + +// APIKeyConfigEntry is a generic interface for API key configurations. +type APIKeyConfigEntry interface { + GetAPIKey() string + GetBaseURL() string +} + +func resolveAPIKeyConfig[T APIKeyConfigEntry](entries []T, auth *Auth) *T { + if auth == nil || len(entries) == 0 { + return nil + } + attrKey, attrBase := "", "" + if auth.Attributes != nil { + attrKey = strings.TrimSpace(auth.Attributes["api_key"]) + attrBase = strings.TrimSpace(auth.Attributes["base_url"]) + } + for i := range entries { + entry := &entries[i] + cfgKey := strings.TrimSpace((*entry).GetAPIKey()) + cfgBase := strings.TrimSpace((*entry).GetBaseURL()) + if attrKey != "" && attrBase != "" { + if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) { + return entry + } + continue + } + if attrKey != "" && strings.EqualFold(cfgKey, attrKey) { + if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) { + return entry + } + } + if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) { + return entry + } + } + if attrKey != "" { + for i := range entries { + entry := &entries[i] + if strings.EqualFold(strings.TrimSpace((*entry).GetAPIKey()), attrKey) { + return entry + } + } + } + return nil +} + +func resolveGeminiAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.GeminiKey { + if cfg == nil { + return nil + } + return resolveAPIKeyConfig(cfg.GeminiKey, auth) +} + +func resolveClaudeAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.ClaudeKey { + if cfg == nil { + return nil + } + return resolveAPIKeyConfig(cfg.ClaudeKey, auth) +} + +func resolveCodexAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.CodexKey { + if cfg == nil { + return nil + } + return resolveAPIKeyConfig(cfg.CodexKey, auth) +} + +func resolveVertexAPIKeyConfig(cfg *internalconfig.Config, auth *Auth) *internalconfig.VertexCompatKey { + if cfg == nil { + return nil + } + return resolveAPIKeyConfig(cfg.VertexCompatAPIKey, auth) +} + +func resolveUpstreamModelForGeminiAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string { + entry := resolveGeminiAPIKeyConfig(cfg, auth) + if entry == nil { + return "" + } + return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models)) +} + +func resolveUpstreamModelForClaudeAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string { + entry := resolveClaudeAPIKeyConfig(cfg, auth) + if entry == nil { + return "" + } + return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models)) +} + +func resolveUpstreamModelForCodexAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string { + entry := resolveCodexAPIKeyConfig(cfg, auth) + if entry == nil { + return "" + } + return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models)) +} + +func resolveUpstreamModelForVertexAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string { + entry := resolveVertexAPIKeyConfig(cfg, auth) + if entry == nil { + return "" + } + return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models)) +} + +func resolveUpstreamModelForOpenAICompatAPIKey(cfg *internalconfig.Config, auth *Auth, requestedModel string) string { + providerKey := "" + compatName := "" + if auth != nil && len(auth.Attributes) > 0 { + providerKey = strings.TrimSpace(auth.Attributes["provider_key"]) + compatName = strings.TrimSpace(auth.Attributes["compat_name"]) + } + if compatName == "" && !strings.EqualFold(strings.TrimSpace(auth.Provider), "openai-compatibility") { + return "" + } + entry := resolveOpenAICompatConfig(cfg, providerKey, compatName, auth.Provider) + if entry == nil { + return "" + } + return resolveModelAliasFromConfigModels(requestedModel, asModelAliasEntries(entry.Models)) +} + +type apiKeyModelMappingTable map[string]map[string]string + +func resolveOpenAICompatConfig(cfg *internalconfig.Config, providerKey, compatName, authProvider string) *internalconfig.OpenAICompatibility { + if cfg == nil { + return nil + } + candidates := make([]string, 0, 3) + if v := strings.TrimSpace(compatName); v != "" { + candidates = append(candidates, v) + } + if v := strings.TrimSpace(providerKey); v != "" { + candidates = append(candidates, v) + } + if v := strings.TrimSpace(authProvider); v != "" { + candidates = append(candidates, v) + } + for i := range cfg.OpenAICompatibility { + compat := &cfg.OpenAICompatibility[i] + for _, candidate := range candidates { + if candidate != "" && strings.EqualFold(strings.TrimSpace(candidate), compat.Name) { + return compat + } + } + } + return nil +} + +func asModelAliasEntries[T interface { + GetName() string + GetAlias() string +}](models []T) []modelMappingEntry { + if len(models) == 0 { + return nil + } + out := make([]modelMappingEntry, 0, len(models)) + for i := range models { + out = append(out, models[i]) + } + return out +} + func (m *Manager) normalizeProviders(providers []string) []string { if len(providers) == 0 { return nil diff --git a/sdk/cliproxy/auth/model_name_mappings.go b/sdk/cliproxy/auth/model_name_mappings.go index 03380c09..7fac0b5b 100644 --- a/sdk/cliproxy/auth/model_name_mappings.go +++ b/sdk/cliproxy/auth/model_name_mappings.go @@ -4,9 +4,15 @@ import ( "strings" internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" ) +type modelMappingEntry interface { + GetName() string + GetAlias() string +} + type modelNameMappingTable struct { // reverse maps channel -> alias (lower) -> original upstream model name. reverse map[string]map[string]string @@ -71,9 +77,14 @@ func (m *Manager) SetOAuthModelMappings(mappings map[string][]internalconfig.Mod // requested model for response translation. func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, metadata map[string]any) (string, map[string]any) { upstreamModel := m.resolveOAuthUpstreamModel(auth, requestedModel) + return applyUpstreamModelOverride(requestedModel, upstreamModel, metadata) +} + +func applyUpstreamModelOverride(requestedModel, upstreamModel string, metadata map[string]any) (string, map[string]any) { if upstreamModel == "" { return requestedModel, metadata } + out := make(map[string]any, 1) if len(metadata) > 0 { out = make(map[string]any, len(metadata)+1) @@ -81,24 +92,92 @@ func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, meta out[k] = v } } - // Store the requested alias (e.g., "gp") so downstream can use it to look up - // model metadata from the global registry where it was registered under this alias. + + // Preserve the original client model string (including any suffix) for downstream. out[util.ModelMappingOriginalModelMetadataKey] = requestedModel return upstreamModel, out } +func resolveModelAliasFromConfigModels(requestedModel string, models []modelMappingEntry) string { + requestedModel = strings.TrimSpace(requestedModel) + if requestedModel == "" { + return "" + } + if len(models) == 0 { + return "" + } + + requestResult := thinking.ParseSuffix(requestedModel) + base := requestResult.ModelName + candidates := []string{base} + if base != requestedModel { + candidates = append(candidates, requestedModel) + } + + preserveSuffix := func(resolved string) string { + resolved = strings.TrimSpace(resolved) + if resolved == "" { + return "" + } + if thinking.ParseSuffix(resolved).HasSuffix { + return resolved + } + if requestResult.HasSuffix && requestResult.RawSuffix != "" { + return resolved + "(" + requestResult.RawSuffix + ")" + } + return resolved + } + + for i := range models { + name := strings.TrimSpace(models[i].GetName()) + alias := strings.TrimSpace(models[i].GetAlias()) + for _, candidate := range candidates { + if candidate == "" { + continue + } + if alias != "" && strings.EqualFold(alias, candidate) { + if name != "" { + return preserveSuffix(name) + } + return preserveSuffix(candidate) + } + if name != "" && strings.EqualFold(name, candidate) { + return preserveSuffix(name) + } + } + } + return "" +} + +// resolveOAuthUpstreamModel resolves the upstream model name from OAuth model mappings. +// If a mapping exists, returns the original (upstream) model name that corresponds +// to the requested alias. +// +// If the requested model contains a thinking suffix (e.g., "gemini-2.5-pro(8192)"), +// the suffix is preserved in the returned model name. However, if the mapping's +// original name already contains a suffix, the config suffix takes priority. func (m *Manager) resolveOAuthUpstreamModel(auth *Auth, requestedModel string) string { + return resolveUpstreamModelFromMappingTable(m, auth, requestedModel, modelMappingChannel(auth)) +} + +func resolveUpstreamModelFromMappingTable(m *Manager, auth *Auth, requestedModel, channel string) string { if m == nil || auth == nil { return "" } - channel := modelMappingChannel(auth) if channel == "" { return "" } - key := strings.ToLower(strings.TrimSpace(requestedModel)) - if key == "" { - return "" + + // Extract thinking suffix from requested model using ParseSuffix + requestResult := thinking.ParseSuffix(requestedModel) + baseModel := requestResult.ModelName + + // Candidate keys to match: base model and raw input (handles suffix-parsing edge cases). + candidates := []string{baseModel} + if baseModel != requestedModel { + candidates = append(candidates, requestedModel) } + raw := m.modelNameMappings.Load() table, _ := raw.(*modelNameMappingTable) if table == nil || table.reverse == nil { @@ -108,11 +187,32 @@ func (m *Manager) resolveOAuthUpstreamModel(auth *Auth, requestedModel string) s if rev == nil { return "" } - original := strings.TrimSpace(rev[key]) - if original == "" || strings.EqualFold(original, requestedModel) { - return "" + + for _, candidate := range candidates { + key := strings.ToLower(strings.TrimSpace(candidate)) + if key == "" { + continue + } + original := strings.TrimSpace(rev[key]) + if original == "" { + continue + } + if strings.EqualFold(original, baseModel) { + return "" + } + + // If config already has suffix, it takes priority. + if thinking.ParseSuffix(original).HasSuffix { + return original + } + // Preserve user's thinking suffix on the resolved model. + if requestResult.HasSuffix && requestResult.RawSuffix != "" { + return original + "(" + requestResult.RawSuffix + ")" + } + return original } - return original + + return "" } // modelMappingChannel extracts the OAuth model mapping channel from an Auth object. diff --git a/sdk/cliproxy/auth/model_name_mappings_test.go b/sdk/cliproxy/auth/model_name_mappings_test.go new file mode 100644 index 00000000..121450cc --- /dev/null +++ b/sdk/cliproxy/auth/model_name_mappings_test.go @@ -0,0 +1,187 @@ +package auth + +import ( + "testing" + + internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" +) + +func TestResolveOAuthUpstreamModel_SuffixPreservation(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + mappings map[string][]internalconfig.ModelNameMapping + channel string + input string + want string + }{ + { + name: "numeric suffix preserved", + mappings: map[string][]internalconfig.ModelNameMapping{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro(8192)", + want: "gemini-2.5-pro-exp-03-25(8192)", + }, + { + name: "level suffix preserved", + mappings: map[string][]internalconfig.ModelNameMapping{ + "claude": {{Name: "claude-sonnet-4-5-20250514", Alias: "claude-sonnet-4-5"}}, + }, + channel: "claude", + input: "claude-sonnet-4-5(high)", + want: "claude-sonnet-4-5-20250514(high)", + }, + { + name: "no suffix unchanged", + mappings: map[string][]internalconfig.ModelNameMapping{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro", + want: "gemini-2.5-pro-exp-03-25", + }, + { + name: "config suffix takes priority", + mappings: map[string][]internalconfig.ModelNameMapping{ + "claude": {{Name: "claude-sonnet-4-5-20250514(low)", Alias: "claude-sonnet-4-5"}}, + }, + channel: "claude", + input: "claude-sonnet-4-5(high)", + want: "claude-sonnet-4-5-20250514(low)", + }, + { + name: "auto suffix preserved", + mappings: map[string][]internalconfig.ModelNameMapping{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro(auto)", + want: "gemini-2.5-pro-exp-03-25(auto)", + }, + { + name: "none suffix preserved", + mappings: map[string][]internalconfig.ModelNameMapping{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro(none)", + want: "gemini-2.5-pro-exp-03-25(none)", + }, + { + name: "case insensitive alias lookup with suffix", + mappings: map[string][]internalconfig.ModelNameMapping{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "Gemini-2.5-Pro"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro(high)", + want: "gemini-2.5-pro-exp-03-25(high)", + }, + { + name: "no mapping returns empty", + mappings: map[string][]internalconfig.ModelNameMapping{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "gemini-cli", + input: "unknown-model(high)", + want: "", + }, + { + name: "wrong channel returns empty", + mappings: map[string][]internalconfig.ModelNameMapping{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "claude", + input: "gemini-2.5-pro(high)", + want: "", + }, + { + name: "empty suffix filtered out", + mappings: map[string][]internalconfig.ModelNameMapping{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro()", + want: "gemini-2.5-pro-exp-03-25", + }, + { + name: "incomplete suffix treated as no suffix", + mappings: map[string][]internalconfig.ModelNameMapping{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro(high"}}, + }, + channel: "gemini-cli", + input: "gemini-2.5-pro(high", + want: "gemini-2.5-pro-exp-03-25", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + mgr := NewManager(nil, nil, nil) + mgr.SetConfig(&internalconfig.Config{}) + mgr.SetOAuthModelMappings(tt.mappings) + + auth := createAuthForChannel(tt.channel) + got := mgr.resolveOAuthUpstreamModel(auth, tt.input) + if got != tt.want { + t.Errorf("resolveOAuthUpstreamModel(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func createAuthForChannel(channel string) *Auth { + switch channel { + case "gemini-cli": + return &Auth{Provider: "gemini-cli"} + case "claude": + return &Auth{Provider: "claude", Attributes: map[string]string{"auth_kind": "oauth"}} + case "vertex": + return &Auth{Provider: "vertex", Attributes: map[string]string{"auth_kind": "oauth"}} + case "codex": + return &Auth{Provider: "codex", Attributes: map[string]string{"auth_kind": "oauth"}} + case "aistudio": + return &Auth{Provider: "aistudio"} + case "antigravity": + return &Auth{Provider: "antigravity"} + case "qwen": + return &Auth{Provider: "qwen"} + case "iflow": + return &Auth{Provider: "iflow"} + default: + return &Auth{Provider: channel} + } +} + +func TestApplyOAuthModelMapping_SuffixPreservation(t *testing.T) { + t.Parallel() + + mappings := map[string][]internalconfig.ModelNameMapping{ + "gemini-cli": {{Name: "gemini-2.5-pro-exp-03-25", Alias: "gemini-2.5-pro"}}, + } + + mgr := NewManager(nil, nil, nil) + mgr.SetConfig(&internalconfig.Config{}) + mgr.SetOAuthModelMappings(mappings) + + auth := &Auth{ID: "test-auth-id", Provider: "gemini-cli"} + metadata := map[string]any{"existing": "value"} + + resolvedModel, resultMeta := mgr.applyOAuthModelMapping(auth, "gemini-2.5-pro(8192)", metadata) + if resolvedModel != "gemini-2.5-pro-exp-03-25(8192)" { + t.Errorf("applyOAuthModelMapping() model = %q, want %q", resolvedModel, "gemini-2.5-pro-exp-03-25(8192)") + } + + originalModel, ok := resultMeta["model_mapping_original_model"].(string) + if !ok || originalModel != "gemini-2.5-pro(8192)" { + t.Errorf("applyOAuthModelMapping() metadata[model_mapping_original_model] = %v, want %q", resultMeta["model_mapping_original_model"], "gemini-2.5-pro(8192)") + } + + if resultMeta["existing"] != "value" { + t.Errorf("applyOAuthModelMapping() metadata[existing] = %v, want %q", resultMeta["existing"], "value") + } +} diff --git a/sdk/cliproxy/builder.go b/sdk/cliproxy/builder.go index 51d5dbac..2e2427f9 100644 --- a/sdk/cliproxy/builder.go +++ b/sdk/cliproxy/builder.go @@ -215,6 +215,7 @@ func (b *Builder) Build() (*Service, error) { } // Attach a default RoundTripper provider so providers can opt-in per-auth transports. coreManager.SetRoundTripperProvider(newDefaultRoundTripperProvider()) + coreManager.SetConfig(b.cfg) coreManager.SetOAuthModelMappings(b.cfg.OAuthModelMappings) service := &Service{ diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index 695a77c8..71603479 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -553,6 +553,7 @@ func (s *Service) Run(ctx context.Context) error { s.cfg = newCfg s.cfgMu.Unlock() if s.coreManager != nil { + s.coreManager.SetConfig(newCfg) s.coreManager.SetOAuthModelMappings(newCfg.OAuthModelMappings) } s.rebindExecutors() @@ -825,6 +826,7 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) { OwnedBy: compat.Name, Type: "openai-compatibility", DisplayName: modelID, + UserDefined: true, }) } // Register and return @@ -1157,6 +1159,7 @@ func buildConfigModels[T modelEntry](models []T, ownedBy, modelType string) []*M OwnedBy: ownedBy, Type: modelType, DisplayName: display, + UserDefined: true, } if name != "" { if upstream := registry.LookupStaticModelInfo(name); upstream != nil && upstream.Thinking != nil { diff --git a/test/model_alias_thinking_suffix_test.go b/test/model_alias_thinking_suffix_test.go index 270e0cc7..236fca4a 100644 --- a/test/model_alias_thinking_suffix_test.go +++ b/test/model_alias_thinking_suffix_test.go @@ -3,9 +3,10 @@ package test import ( "testing" - "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" "github.com/router-for-me/CLIProxyAPI/v6/internal/util" "github.com/tidwall/gjson" + "github.com/tidwall/sjson" ) // TestModelAliasThinkingSuffix tests the 32 test cases defined in docs/thinking_suffix_test_cases.md @@ -178,7 +179,7 @@ func TestModelAliasThinkingSuffix(t *testing.T) { } } - // Step 5: Test Gemini 2.5 thinkingBudget application using real ApplyThinkingMetadataCLI flow + // Step 5: Test Gemini 2.5 thinkingBudget application using thinking.ApplyThinking if tt.expectedField == "thinkingBudget" && util.IsGemini25Model(tt.upstreamModel) { body := []byte(`{"request":{"contents":[]}}`) @@ -195,8 +196,13 @@ func TestModelAliasThinkingSuffix(t *testing.T) { testMetadata[k] = v } - // Use the exported ApplyThinkingMetadataCLI which includes the fallback logic - result := executor.ApplyThinkingMetadataCLI(body, testMetadata, tt.upstreamModel) + // Merge thinking config from metadata into body + body = applyThinkingFromMetadata(body, testMetadata) + + // Use thinking.ApplyThinking for unified thinking config handling + // Note: ApplyThinking now takes model string, not *ModelInfo + result, _ := thinking.ApplyThinking(body, tt.upstreamModel, "gemini-cli") + budgetVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget") expectedBudget := tt.expectedValue.(int) @@ -209,3 +215,48 @@ func TestModelAliasThinkingSuffix(t *testing.T) { }) } } + +// applyThinkingFromMetadata merges thinking configuration from metadata into the payload. +func applyThinkingFromMetadata(payload []byte, metadata map[string]any) []byte { + if len(metadata) == 0 { + return payload + } + + // Merge thinking_budget from metadata if present + if budget, ok := metadata["thinking_budget"]; ok { + if budgetVal, okNum := parseNumberToInt(budget); okNum { + payload, _ = sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingBudget", budgetVal) + } + } + + // Merge reasoning_effort from metadata if present + if effort, ok := metadata["reasoning_effort"]; ok { + if effortStr, okStr := effort.(string); okStr && effortStr != "" { + payload, _ = sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.thinkingLevel", effortStr) + } + } + + // Merge thinking_include_thoughts from metadata if present + if include, ok := metadata["thinking_include_thoughts"]; ok { + if includeBool, okBool := include.(bool); okBool { + payload, _ = sjson.SetBytes(payload, "request.generationConfig.thinkingConfig.includeThoughts", includeBool) + } + } + + return payload +} + +// parseNumberToInt safely converts various numeric types to int +func parseNumberToInt(raw any) (int, bool) { + switch v := raw.(type) { + case int: + return v, true + case int32: + return int(v), true + case int64: + return int(v), true + case float64: + return int(v), true + } + return 0, false +} diff --git a/test/thinking_conversion_test.go b/test/thinking_conversion_test.go index 74a1bd8a..d0e88c78 100644 --- a/test/thinking_conversion_test.go +++ b/test/thinking_conversion_test.go @@ -2,796 +2,882 @@ package test import ( "fmt" - "strings" "testing" "time" _ "github.com/router-for-me/CLIProxyAPI/v6/internal/translator" + // Import provider packages to trigger init() registration of ProviderAppliers + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/claude" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/codex" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/gemini" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/geminicli" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/iflow" + _ "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking/provider/openai" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" - "github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" + "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" "github.com/tidwall/gjson" - "github.com/tidwall/sjson" ) -// isOpenAICompatModel returns true if the model is configured as an OpenAI-compatible -// model that should have reasoning effort passed through even if not in registry. -// This simulates the allowCompat behavior from OpenAICompatExecutor. -func isOpenAICompatModel(model string) bool { - return model == "openai-compat" -} - -// registerCoreModels loads representative models across providers into the registry -// so NormalizeThinkingBudget and level validation use real ranges. -func registerCoreModels(t *testing.T) func() { - t.Helper() +// TestThinkingE2EMatrix tests the thinking configuration transformation using the real data flow path. +// Data flow: Input JSON → TranslateRequest → ApplyThinking → Validate Output +// No helper functions are used; all test data is inline. +func TestThinkingE2EMatrix(t *testing.T) { + // Register test models directly reg := registry.GetGlobalRegistry() - uid := fmt.Sprintf("thinking-core-%d", time.Now().UnixNano()) - reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels()) - reg.RegisterClient(uid+"-claude", "claude", registry.GetClaudeModels()) - reg.RegisterClient(uid+"-openai", "codex", registry.GetOpenAIModels()) - reg.RegisterClient(uid+"-qwen", "qwen", registry.GetQwenModels()) - // Custom openai-compatible model with forced thinking suffix passthrough. - // No Thinking field - simulates an external model added via openai-compat - // where the registry has no knowledge of its thinking capabilities. - // The allowCompat flag should preserve reasoning effort for such models. - customOpenAIModels := []*registry.ModelInfo{ + uid := fmt.Sprintf("thinking-e2e-%d", time.Now().UnixNano()) + + testModels := []*registry.ModelInfo{ { - ID: "openai-compat", + ID: "level-model", Object: "model", Created: 1700000000, - OwnedBy: "custom-provider", + OwnedBy: "test", Type: "openai", - DisplayName: "OpenAI Compatible Model", - Description: "OpenAI-compatible model with forced thinking suffix support", + DisplayName: "Level Model", + Thinking: ®istry.ThinkingSupport{ + Levels: []string{"minimal", "low", "medium", "high"}, + ZeroAllowed: false, + DynamicAllowed: false, + }, + }, + { + ID: "gemini-budget-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "gemini", + DisplayName: "Gemini Budget Model", + Thinking: ®istry.ThinkingSupport{ + Min: 128, + Max: 20000, + ZeroAllowed: false, + DynamicAllowed: true, + }, + }, + { + ID: "gemini-mixed-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "gemini", + DisplayName: "Gemini Mixed Model", + Thinking: ®istry.ThinkingSupport{ + Min: 128, + Max: 32768, + Levels: []string{"low", "high"}, + ZeroAllowed: false, + DynamicAllowed: true, + }, + }, + { + ID: "claude-budget-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "claude", + DisplayName: "Claude Budget Model", + Thinking: ®istry.ThinkingSupport{ + Min: 1024, + Max: 128000, + ZeroAllowed: true, + DynamicAllowed: false, + }, + }, + { + ID: "no-thinking-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "openai", + DisplayName: "No Thinking Model", + Thinking: nil, + }, + { + ID: "user-defined-model", + Object: "model", + Created: 1700000000, + OwnedBy: "test", + Type: "openai", + DisplayName: "User Defined Model", + UserDefined: true, + Thinking: nil, }, } - reg.RegisterClient(uid+"-custom-openai", "codex", customOpenAIModels) - return func() { - reg.UnregisterClient(uid + "-gemini") - reg.UnregisterClient(uid + "-claude") - reg.UnregisterClient(uid + "-openai") - reg.UnregisterClient(uid + "-qwen") - reg.UnregisterClient(uid + "-custom-openai") - } -} -var ( - thinkingTestModels = []string{ - "gpt-5", // level-based thinking model - "gemini-2.5-pro", // numeric-budget thinking model - "qwen3-code-plus", // no thinking support - "openai-compat", // allowCompat=true (OpenAI-compatible channel) - } - thinkingTestFromProtocols = []string{"openai", "claude", "gemini", "openai-response"} - thinkingTestToProtocols = []string{"gemini", "claude", "openai", "codex"} + reg.RegisterClient(uid, "test", testModels) + defer reg.UnregisterClient(uid) - // Numeric budgets and their level equivalents: - // -1 -> auto - // 0 -> none - // 1..1024 -> low - // 1025..8192 -> medium - // 8193..24576 -> high - // >24576 -> model highest level (right-most in Levels) - thinkingNumericSamples = []int{-1, 0, 1023, 1025, 8193, 64000} - - // Levels and their numeric equivalents: - // auto -> -1 - // none -> 0 - // minimal -> 512 - // low -> 1024 - // medium -> 8192 - // high -> 24576 - // xhigh -> 32768 - // invalid -> invalid (no mapping) - thinkingLevelSamples = []string{"auto", "none", "minimal", "low", "medium", "high", "xhigh", "invalid"} -) - -func buildRawPayload(fromProtocol, modelWithSuffix string) []byte { - switch fromProtocol { - case "gemini": - return []byte(fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, modelWithSuffix)) - case "openai-response": - return []byte(fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, modelWithSuffix)) - default: // openai / claude and other chat-style payloads - return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, modelWithSuffix)) - } -} - -// normalizeCodexPayload mirrors codex_executor's reasoning + streaming tweaks. -func normalizeCodexPayload(body []byte, upstreamModel string, allowCompat bool) ([]byte, error) { - body = executor.NormalizeThinkingConfig(body, upstreamModel, allowCompat) - if err := executor.ValidateThinkingConfig(body, upstreamModel); err != nil { - return body, err - } - body, _ = sjson.SetBytes(body, "model", upstreamModel) - body, _ = sjson.SetBytes(body, "stream", true) - body, _ = sjson.DeleteBytes(body, "previous_response_id") - return body, nil -} - -// buildBodyForProtocol runs a minimal request through the same translation and -// thinking pipeline used in executors for the given target protocol. -func buildBodyForProtocol(t *testing.T, fromProtocol, toProtocol, modelWithSuffix string) ([]byte, error) { - t.Helper() - normalizedModel, metadata := util.NormalizeThinkingModel(modelWithSuffix) - upstreamModel := util.ResolveOriginalModel(normalizedModel, metadata) - raw := buildRawPayload(fromProtocol, modelWithSuffix) - stream := fromProtocol != toProtocol - - body := sdktranslator.TranslateRequest( - sdktranslator.FromString(fromProtocol), - sdktranslator.FromString(toProtocol), - normalizedModel, - raw, - stream, - ) - - var err error - allowCompat := isOpenAICompatModel(normalizedModel) - switch toProtocol { - case "gemini": - body = executor.ApplyThinkingMetadata(body, metadata, normalizedModel) - body = util.ApplyDefaultThinkingIfNeeded(normalizedModel, body) - body = util.NormalizeGeminiThinkingBudget(normalizedModel, body) - body = util.StripThinkingConfigIfUnsupported(normalizedModel, body) - case "claude": - if budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata); ok { - body = util.ApplyClaudeThinkingConfig(body, budget) - } - case "openai": - body = executor.ApplyReasoningEffortMetadata(body, metadata, normalizedModel, "reasoning_effort", allowCompat) - body = executor.NormalizeThinkingConfig(body, upstreamModel, allowCompat) - err = executor.ValidateThinkingConfig(body, upstreamModel) - case "codex": // OpenAI responses / codex - // Codex does not support allowCompat; always use false. - body = executor.ApplyReasoningEffortMetadata(body, metadata, normalizedModel, "reasoning.effort", false) - // Mirror CodexExecutor final normalization and model override so tests log the final body. - body, err = normalizeCodexPayload(body, upstreamModel, false) - default: + type testCase struct { + name string + from string + to string + modelSuffix string + inputJSON string + expectField string + expectValue string + includeThoughts string + expectErr bool } - // Mirror executor behavior: final payload uses the upstream (base) model name. - if upstreamModel != "" { - body, _ = sjson.SetBytes(body, "model", upstreamModel) + cases := []testCase{ + // level-model (Levels=minimal/low/medium/high, ZeroAllowed=false, DynamicAllowed=false) + // Case 1: No suffix, translator adds default medium for codex + { + name: "1", + from: "openai", + to: "codex", + modelSuffix: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 2: Explicit medium level + { + name: "2", + from: "openai", + to: "codex", + modelSuffix: "level-model(medium)", + inputJSON: `{"model":"level-model(medium)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 3: xhigh not in Levels=[minimal,low,medium,high] → ValidateConfig returns error + { + name: "3", + from: "openai", + to: "codex", + modelSuffix: "level-model(xhigh)", + inputJSON: `{"model":"level-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: true, + }, + // Case 4: none → ModeNone, ZeroAllowed=false → clamp to min level (minimal) + { + name: "4", + from: "openai", + to: "codex", + modelSuffix: "level-model(none)", + inputJSON: `{"model":"level-model(none)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "minimal", + expectErr: false, + }, + // Case 5: auto → ModeAuto, DynamicAllowed=false → convert to mid-range (medium) + { + name: "5", + from: "openai", + to: "codex", + modelSuffix: "level-model(auto)", + inputJSON: `{"model":"level-model(auto)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 6: No suffix from gemini → translator injects default reasoning.effort: medium + { + name: "6", + from: "gemini", + to: "codex", + modelSuffix: "level-model", + inputJSON: `{"model":"level-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 7: 8192 → medium (1025-8192) + { + name: "7", + from: "gemini", + to: "codex", + modelSuffix: "level-model(8192)", + inputJSON: `{"model":"level-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 8: 64000 → xhigh → not supported → error + { + name: "8", + from: "gemini", + to: "codex", + modelSuffix: "level-model(64000)", + inputJSON: `{"model":"level-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: true, + }, + // Case 9: 0 → ModeNone, ZeroAllowed=false → clamp to min level (minimal) + { + name: "9", + from: "gemini", + to: "codex", + modelSuffix: "level-model(0)", + inputJSON: `{"model":"level-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning.effort", + expectValue: "minimal", + expectErr: false, + }, + // Case 10: -1 → ModeAuto, DynamicAllowed=false → convert to mid-range (medium) + { + name: "10", + from: "gemini", + to: "codex", + modelSuffix: "level-model(-1)", + inputJSON: `{"model":"level-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // Case 11: No suffix from claude → no thinking config + { + name: "11", + from: "claude", + to: "openai", + modelSuffix: "level-model", + inputJSON: `{"model":"level-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // Case 12: 8192 → medium + { + name: "12", + from: "claude", + to: "openai", + modelSuffix: "level-model(8192)", + inputJSON: `{"model":"level-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning_effort", + expectValue: "medium", + expectErr: false, + }, + // Case 13: 64000 → xhigh → not supported → error + { + name: "13", + from: "claude", + to: "openai", + modelSuffix: "level-model(64000)", + inputJSON: `{"model":"level-model(64000)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: true, + }, + // Case 14: 0 → ModeNone, ZeroAllowed=false → clamp to min level (minimal) + { + name: "14", + from: "claude", + to: "openai", + modelSuffix: "level-model(0)", + inputJSON: `{"model":"level-model(0)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning_effort", + expectValue: "minimal", + expectErr: false, + }, + // Case 15: -1 → ModeAuto, DynamicAllowed=false → convert to mid-range (medium) + { + name: "15", + from: "claude", + to: "openai", + modelSuffix: "level-model(-1)", + inputJSON: `{"model":"level-model(-1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning_effort", + expectValue: "medium", + expectErr: false, + }, + + // gemini-budget-model (Min=128, Max=20000, ZeroAllowed=false, DynamicAllowed=true) + { + name: "16", + from: "openai", + to: "gemini", + modelSuffix: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // medium → 8192 + { + name: "17", + from: "openai", + to: "gemini", + modelSuffix: "gemini-budget-model(medium)", + inputJSON: `{"model":"gemini-budget-model(medium)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // xhigh → 32768 → clamp to 20000 + { + name: "18", + from: "openai", + to: "gemini", + modelSuffix: "gemini-budget-model(xhigh)", + inputJSON: `{"model":"gemini-budget-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + // none → 0 → ZeroAllowed=false → clamp to 128, includeThoughts=false + { + name: "19", + from: "openai", + to: "gemini", + modelSuffix: "gemini-budget-model(none)", + inputJSON: `{"model":"gemini-budget-model(none)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "128", + includeThoughts: "false", + expectErr: false, + }, + // auto → -1 dynamic allowed + { + name: "20", + from: "openai", + to: "gemini", + modelSuffix: "gemini-budget-model(auto)", + inputJSON: `{"model":"gemini-budget-model(auto)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + { + name: "21", + from: "claude", + to: "gemini", + modelSuffix: "gemini-budget-model", + inputJSON: `{"model":"gemini-budget-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + { + name: "22", + from: "claude", + to: "gemini", + modelSuffix: "gemini-budget-model(8192)", + inputJSON: `{"model":"gemini-budget-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + { + name: "23", + from: "claude", + to: "gemini", + modelSuffix: "gemini-budget-model(64000)", + inputJSON: `{"model":"gemini-budget-model(64000)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "20000", + includeThoughts: "true", + expectErr: false, + }, + { + name: "24", + from: "claude", + to: "gemini", + modelSuffix: "gemini-budget-model(0)", + inputJSON: `{"model":"gemini-budget-model(0)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "128", + includeThoughts: "false", + expectErr: false, + }, + { + name: "25", + from: "claude", + to: "gemini", + modelSuffix: "gemini-budget-model(-1)", + inputJSON: `{"model":"gemini-budget-model(-1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + + // gemini-mixed-model (Min=128, Max=32768, Levels=low/high, ZeroAllowed=false, DynamicAllowed=true) + { + name: "26", + from: "openai", + to: "gemini", + modelSuffix: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // high → use thinkingLevel + { + name: "27", + from: "openai", + to: "gemini", + modelSuffix: "gemini-mixed-model(high)", + inputJSON: `{"model":"gemini-mixed-model(high)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingLevel", + expectValue: "high", + includeThoughts: "true", + expectErr: false, + }, + // xhigh → not in Levels=[low,high] → error + { + name: "28", + from: "openai", + to: "gemini", + modelSuffix: "gemini-mixed-model(xhigh)", + inputJSON: `{"model":"gemini-mixed-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: true, + }, + // none → ModeNone, ZeroAllowed=false → set Level to lowest (low), includeThoughts=false + { + name: "29", + from: "openai", + to: "gemini", + modelSuffix: "gemini-mixed-model(none)", + inputJSON: `{"model":"gemini-mixed-model(none)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingLevel", + expectValue: "low", + includeThoughts: "false", + expectErr: false, + }, + // auto → dynamic allowed, use thinkingBudget=-1 + { + name: "30", + from: "openai", + to: "gemini", + modelSuffix: "gemini-mixed-model(auto)", + inputJSON: `{"model":"gemini-mixed-model(auto)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + { + name: "31", + from: "claude", + to: "gemini", + modelSuffix: "gemini-mixed-model", + inputJSON: `{"model":"gemini-mixed-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // 8192 → ModeBudget → clamp (in range) → thinkingBudget: 8192 + { + name: "32", + from: "claude", + to: "gemini", + modelSuffix: "gemini-mixed-model(8192)", + inputJSON: `{"model":"gemini-mixed-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "8192", + includeThoughts: "true", + expectErr: false, + }, + // 64000 → ModeBudget → clamp to 32768 → thinkingBudget: 32768 + { + name: "33", + from: "claude", + to: "gemini", + modelSuffix: "gemini-mixed-model(64000)", + inputJSON: `{"model":"gemini-mixed-model(64000)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "32768", + includeThoughts: "true", + expectErr: false, + }, + // 0 → ModeNone, ZeroAllowed=false → set Level to lowest (low), includeThoughts=false + { + name: "34", + from: "claude", + to: "gemini", + modelSuffix: "gemini-mixed-model(0)", + inputJSON: `{"model":"gemini-mixed-model(0)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingLevel", + expectValue: "low", + includeThoughts: "false", + expectErr: false, + }, + // -1 → auto, dynamic allowed + { + name: "35", + from: "claude", + to: "gemini", + modelSuffix: "gemini-mixed-model(-1)", + inputJSON: `{"model":"gemini-mixed-model(-1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "generationConfig.thinkingConfig.thinkingBudget", + expectValue: "-1", + includeThoughts: "true", + expectErr: false, + }, + + // claude-budget-model (Min=1024, Max=128000, ZeroAllowed=true, DynamicAllowed=false) + { + name: "36", + from: "openai", + to: "claude", + modelSuffix: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + // medium → 8192 + { + name: "37", + from: "openai", + to: "claude", + modelSuffix: "claude-budget-model(medium)", + inputJSON: `{"model":"claude-budget-model(medium)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "thinking.budget_tokens", + expectValue: "8192", + expectErr: false, + }, + // xhigh → 32768 + { + name: "38", + from: "openai", + to: "claude", + modelSuffix: "claude-budget-model(xhigh)", + inputJSON: `{"model":"claude-budget-model(xhigh)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "thinking.budget_tokens", + expectValue: "32768", + expectErr: false, + }, + // none → ZeroAllowed=true → disabled + { + name: "39", + from: "openai", + to: "claude", + modelSuffix: "claude-budget-model(none)", + inputJSON: `{"model":"claude-budget-model(none)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "thinking.type", + expectValue: "disabled", + expectErr: false, + }, + // auto → ModeAuto, DynamicAllowed=false → convert to mid-range + { + name: "40", + from: "openai", + to: "claude", + modelSuffix: "claude-budget-model(auto)", + inputJSON: `{"model":"claude-budget-model(auto)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "thinking.budget_tokens", + expectValue: "64512", + expectErr: false, + }, + { + name: "41", + from: "gemini", + to: "claude", + modelSuffix: "claude-budget-model", + inputJSON: `{"model":"claude-budget-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + { + name: "42", + from: "gemini", + to: "claude", + modelSuffix: "claude-budget-model(8192)", + inputJSON: `{"model":"claude-budget-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "thinking.budget_tokens", + expectValue: "8192", + expectErr: false, + }, + { + name: "43", + from: "gemini", + to: "claude", + modelSuffix: "claude-budget-model(200000)", + inputJSON: `{"model":"claude-budget-model(200000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "thinking.budget_tokens", + expectValue: "128000", + expectErr: false, + }, + // 0 → ZeroAllowed=true → disabled + { + name: "44", + from: "gemini", + to: "claude", + modelSuffix: "claude-budget-model(0)", + inputJSON: `{"model":"claude-budget-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "thinking.type", + expectValue: "disabled", + expectErr: false, + }, + // -1 → auto → DynamicAllowed=false → mid-range + { + name: "45", + from: "gemini", + to: "claude", + modelSuffix: "claude-budget-model(-1)", + inputJSON: `{"model":"claude-budget-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "thinking.budget_tokens", + expectValue: "64512", + expectErr: false, + }, + + // no-thinking-model (Thinking=nil) + { + name: "46", + from: "gemini", + to: "openai", + modelSuffix: "no-thinking-model", + inputJSON: `{"model":"no-thinking-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + { + name: "47", + from: "gemini", + to: "openai", + modelSuffix: "no-thinking-model(8192)", + inputJSON: `{"model":"no-thinking-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + { + name: "48", + from: "gemini", + to: "openai", + modelSuffix: "no-thinking-model(0)", + inputJSON: `{"model":"no-thinking-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + { + name: "49", + from: "gemini", + to: "openai", + modelSuffix: "no-thinking-model(-1)", + inputJSON: `{"model":"no-thinking-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + { + name: "50", + from: "claude", + to: "openai", + modelSuffix: "no-thinking-model", + inputJSON: `{"model":"no-thinking-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + { + name: "51", + from: "claude", + to: "openai", + modelSuffix: "no-thinking-model(8192)", + inputJSON: `{"model":"no-thinking-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + { + name: "52", + from: "claude", + to: "openai", + modelSuffix: "no-thinking-model(0)", + inputJSON: `{"model":"no-thinking-model(0)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + { + name: "53", + from: "claude", + to: "openai", + modelSuffix: "no-thinking-model(-1)", + inputJSON: `{"model":"no-thinking-model(-1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "", + expectErr: false, + }, + + // user-defined-model (UserDefined=true, Thinking=nil) + { + name: "54", + from: "gemini", + to: "openai", + modelSuffix: "user-defined-model", + inputJSON: `{"model":"user-defined-model","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "", + expectErr: false, + }, + // 8192 → medium (passthrough for UserDefined) + { + name: "55", + from: "gemini", + to: "openai", + modelSuffix: "user-defined-model(8192)", + inputJSON: `{"model":"user-defined-model(8192)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning_effort", + expectValue: "medium", + expectErr: false, + }, + // 64000 → xhigh + { + name: "56", + from: "gemini", + to: "openai", + modelSuffix: "user-defined-model(64000)", + inputJSON: `{"model":"user-defined-model(64000)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning_effort", + expectValue: "xhigh", + expectErr: false, + }, + // 0 → none + { + name: "57", + from: "gemini", + to: "openai", + modelSuffix: "user-defined-model(0)", + inputJSON: `{"model":"user-defined-model(0)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning_effort", + expectValue: "none", + expectErr: false, + }, + // -1 → auto + { + name: "58", + from: "gemini", + to: "openai", + modelSuffix: "user-defined-model(-1)", + inputJSON: `{"model":"user-defined-model(-1)","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, + expectField: "reasoning_effort", + expectValue: "auto", + expectErr: false, + }, + // Case 59: No suffix from claude → translator injects default reasoning.effort: medium + { + name: "59", + from: "claude", + to: "codex", + modelSuffix: "user-defined-model", + inputJSON: `{"model":"user-defined-model","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // 8192 → medium + { + name: "60", + from: "claude", + to: "codex", + modelSuffix: "user-defined-model(8192)", + inputJSON: `{"model":"user-defined-model(8192)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "medium", + expectErr: false, + }, + // 64000 → xhigh + { + name: "61", + from: "claude", + to: "codex", + modelSuffix: "user-defined-model(64000)", + inputJSON: `{"model":"user-defined-model(64000)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "xhigh", + expectErr: false, + }, + // 0 → none + { + name: "62", + from: "claude", + to: "codex", + modelSuffix: "user-defined-model(0)", + inputJSON: `{"model":"user-defined-model(0)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "none", + expectErr: false, + }, + // -1 → auto + { + name: "63", + from: "claude", + to: "codex", + modelSuffix: "user-defined-model(-1)", + inputJSON: `{"model":"user-defined-model(-1)","messages":[{"role":"user","content":"hi"}]}`, + expectField: "reasoning.effort", + expectValue: "auto", + expectErr: false, + }, } - // For tests we only keep model + thinking-related fields to avoid noise. - body = filterThinkingBody(toProtocol, body, upstreamModel, normalizedModel) - return body, err -} + for _, tc := range cases { + tc := tc + testName := fmt.Sprintf("Case%s_%s->%s_%s", tc.name, tc.from, tc.to, tc.modelSuffix) + t.Run(testName, func(t *testing.T) { + // Real data flow path: + // 1. Parse suffix to get base model + suffixResult := thinking.ParseSuffix(tc.modelSuffix) + baseModel := suffixResult.ModelName -// filterThinkingBody projects the translated payload down to only model and -// thinking-related fields for the given target protocol. -func filterThinkingBody(toProtocol string, body []byte, upstreamModel, normalizedModel string) []byte { - if len(body) == 0 { - return body - } - out := []byte(`{}`) + // 2. Translate request from source format to target format + body := sdktranslator.TranslateRequest( + sdktranslator.FromString(tc.from), + sdktranslator.FromString(tc.to), + baseModel, + []byte(tc.inputJSON), + true, + ) - // Preserve model if present, otherwise fall back to upstream/normalized model. - if m := gjson.GetBytes(body, "model"); m.Exists() { - out, _ = sjson.SetBytes(out, "model", m.Value()) - } else if upstreamModel != "" { - out, _ = sjson.SetBytes(out, "model", upstreamModel) - } else if normalizedModel != "" { - out, _ = sjson.SetBytes(out, "model", normalizedModel) - } + // 3. Apply thinking configuration (main entry point) + body, err := thinking.ApplyThinking(body, tc.modelSuffix, tc.to) - switch toProtocol { - case "gemini": - if tc := gjson.GetBytes(body, "generationConfig.thinkingConfig"); tc.Exists() { - out, _ = sjson.SetRawBytes(out, "generationConfig.thinkingConfig", []byte(tc.Raw)) - } - case "claude": - if tcfg := gjson.GetBytes(body, "thinking"); tcfg.Exists() { - out, _ = sjson.SetRawBytes(out, "thinking", []byte(tcfg.Raw)) - } - case "openai": - if re := gjson.GetBytes(body, "reasoning_effort"); re.Exists() { - out, _ = sjson.SetBytes(out, "reasoning_effort", re.Value()) - } - case "codex": - if re := gjson.GetBytes(body, "reasoning.effort"); re.Exists() { - out, _ = sjson.SetBytes(out, "reasoning.effort", re.Value()) - } - } - return out -} - -func TestThinkingConversionsAcrossProtocolsAndModels(t *testing.T) { - cleanup := registerCoreModels(t) - defer cleanup() - - type scenario struct { - name string - modelSuffix string - } - - numericName := func(budget int) string { - if budget < 0 { - return "numeric-neg1" - } - return fmt.Sprintf("numeric-%d", budget) - } - - for _, model := range thinkingTestModels { - _ = registry.GetGlobalRegistry().GetModelInfo(model) - - for _, from := range thinkingTestFromProtocols { - // Scenario selection follows protocol semantics: - // - OpenAI-style protocols (openai/openai-response) express thinking as levels. - // - Claude/Gemini-style protocols express thinking as numeric budgets. - cases := []scenario{ - {name: "no-suffix", modelSuffix: model}, + // Validate results + if tc.expectErr { + if err == nil { + t.Fatalf("expected error but got none, body=%s", string(body)) + } + return } - if from == "openai" || from == "openai-response" { - for _, lvl := range thinkingLevelSamples { - cases = append(cases, scenario{ - name: "level-" + lvl, - modelSuffix: fmt.Sprintf("%s(%s)", model, lvl), - }) - } - } else { // claude or gemini - for _, budget := range thinkingNumericSamples { - budget := budget - cases = append(cases, scenario{ - name: numericName(budget), - modelSuffix: fmt.Sprintf("%s(%d)", model, budget), - }) - } + if err != nil { + t.Fatalf("unexpected error: %v, body=%s", err, string(body)) } - for _, to := range thinkingTestToProtocols { - if from == to { - continue + // Check for expected field absence + if tc.expectField == "" { + var hasThinking bool + switch tc.to { + case "gemini": + hasThinking = gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() + case "claude": + hasThinking = gjson.GetBytes(body, "thinking").Exists() + case "openai": + hasThinking = gjson.GetBytes(body, "reasoning_effort").Exists() + case "codex": + hasThinking = gjson.GetBytes(body, "reasoning.effort").Exists() || gjson.GetBytes(body, "reasoning").Exists() } - t.Logf("─────────────────────────────────────────────────────────────────────────────────") - t.Logf(" %s -> %s | model: %s", from, to, model) - t.Logf("─────────────────────────────────────────────────────────────────────────────────") - for _, cs := range cases { - from := from - to := to - cs := cs - testName := fmt.Sprintf("%s->%s/%s/%s", from, to, model, cs.name) - t.Run(testName, func(t *testing.T) { - normalizedModel, metadata := util.NormalizeThinkingModel(cs.modelSuffix) - expectPresent, expectValue, expectErr := func() (bool, string, bool) { - switch to { - case "gemini": - budget, include, ok := util.ResolveThinkingConfigFromMetadata(normalizedModel, metadata) - if !ok || !util.ModelSupportsThinking(normalizedModel) { - return false, "", false - } - if include != nil && !*include { - return false, "", false - } - if budget == nil { - return false, "", false - } - norm := util.NormalizeThinkingBudget(normalizedModel, *budget) - return true, fmt.Sprintf("%d", norm), false - case "claude": - if !util.ModelSupportsThinking(normalizedModel) { - return false, "", false - } - budget, ok := util.ResolveClaudeThinkingConfig(normalizedModel, metadata) - if !ok || budget == nil { - return false, "", false - } - return true, fmt.Sprintf("%d", *budget), false - case "openai": - allowCompat := isOpenAICompatModel(normalizedModel) - if !util.ModelSupportsThinking(normalizedModel) && !allowCompat { - return false, "", false - } - // For allowCompat models, pass through effort directly without validation - if allowCompat { - effort, ok := util.ReasoningEffortFromMetadata(metadata) - if ok && strings.TrimSpace(effort) != "" { - return true, strings.ToLower(strings.TrimSpace(effort)), false - } - // Check numeric budget fallback for allowCompat - if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if mapped, okMap := util.ThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" { - return true, mapped, false - } - } - return false, "", false - } - if !util.ModelUsesThinkingLevels(normalizedModel) { - // Non-levels models don't support effort strings in openai - return false, "", false - } - effort, ok := util.ReasoningEffortFromMetadata(metadata) - if !ok || strings.TrimSpace(effort) == "" { - if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if mapped, okMap := util.ThinkingBudgetToEffort(normalizedModel, *budget); okMap { - effort = mapped - ok = true - } - } - } - if !ok || strings.TrimSpace(effort) == "" { - return false, "", false - } - effort = strings.ToLower(strings.TrimSpace(effort)) - if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel { - return true, normalized, false - } - return false, "", true // validation would fail - case "codex": - // Codex does not support allowCompat; require thinking-capable level models. - if !util.ModelSupportsThinking(normalizedModel) || !util.ModelUsesThinkingLevels(normalizedModel) { - return false, "", false - } - effort, ok := util.ReasoningEffortFromMetadata(metadata) - if ok && strings.TrimSpace(effort) != "" { - effort = strings.ToLower(strings.TrimSpace(effort)) - if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, effort); okLevel { - return true, normalized, false - } - return false, "", true - } - if budget, _, _, matched := util.ThinkingFromMetadata(metadata); matched && budget != nil { - if mapped, okMap := util.ThinkingBudgetToEffort(normalizedModel, *budget); okMap && mapped != "" { - mapped = strings.ToLower(strings.TrimSpace(mapped)) - if normalized, okLevel := util.NormalizeReasoningEffortLevel(normalizedModel, mapped); okLevel { - return true, normalized, false - } - return false, "", true - } - } - if from != "openai-response" { - // Codex translators default reasoning.effort to "medium" when - // no explicit thinking suffix/metadata is provided. - return true, "medium", false - } - return false, "", false - default: - return false, "", false - } - }() - - body, err := buildBodyForProtocol(t, from, to, cs.modelSuffix) - actualPresent, actualValue := func() (bool, string) { - path := "" - switch to { - case "gemini": - path = "generationConfig.thinkingConfig.thinkingBudget" - case "claude": - path = "thinking.budget_tokens" - case "openai": - path = "reasoning_effort" - case "codex": - path = "reasoning.effort" - } - if path == "" { - return false, "" - } - val := gjson.GetBytes(body, path) - if to == "codex" && !val.Exists() { - reasoning := gjson.GetBytes(body, "reasoning") - if reasoning.Exists() { - val = reasoning.Get("effort") - } - } - if !val.Exists() { - return false, "" - } - if val.Type == gjson.Number { - return true, fmt.Sprintf("%d", val.Int()) - } - return true, val.String() - }() - - t.Logf("from=%s to=%s model=%s suffix=%s present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s", - from, to, model, cs.modelSuffix, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body)) - - if expectErr { - if err == nil { - t.Fatalf("expected validation error but got none, body=%s", string(body)) - } - return - } - if err != nil { - t.Fatalf("unexpected error: %v body=%s", err, string(body)) - } - - if expectPresent != actualPresent { - t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body)) - } - if expectPresent && expectValue != actualValue { - t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body)) - } - }) - } - } - } - } -} - -// buildRawPayloadWithThinking creates a payload with thinking parameters already in the body. -// This tests the path where thinking comes from the raw payload, not model suffix. -func buildRawPayloadWithThinking(fromProtocol, model string, thinkingParam any) []byte { - switch fromProtocol { - case "gemini": - base := fmt.Sprintf(`{"model":"%s","contents":[{"role":"user","parts":[{"text":"hi"}]}]}`, model) - if budget, ok := thinkingParam.(int); ok { - base, _ = sjson.Set(base, "generationConfig.thinkingConfig.thinkingBudget", budget) - } - return []byte(base) - case "openai-response": - base := fmt.Sprintf(`{"model":"%s","input":[{"role":"user","content":[{"type":"text","text":"hi"}]}]}`, model) - if effort, ok := thinkingParam.(string); ok && effort != "" { - base, _ = sjson.Set(base, "reasoning.effort", effort) - } - return []byte(base) - case "openai": - base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model) - if effort, ok := thinkingParam.(string); ok && effort != "" { - base, _ = sjson.Set(base, "reasoning_effort", effort) - } - return []byte(base) - case "claude": - base := fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model) - if budget, ok := thinkingParam.(int); ok { - base, _ = sjson.Set(base, "thinking.type", "enabled") - base, _ = sjson.Set(base, "thinking.budget_tokens", budget) - } - return []byte(base) - default: - return []byte(fmt.Sprintf(`{"model":"%s","messages":[{"role":"user","content":"hi"}]}`, model)) - } -} - -// buildBodyForProtocolWithRawThinking translates payload with raw thinking params. -func buildBodyForProtocolWithRawThinking(t *testing.T, fromProtocol, toProtocol, model string, thinkingParam any) ([]byte, error) { - t.Helper() - raw := buildRawPayloadWithThinking(fromProtocol, model, thinkingParam) - stream := fromProtocol != toProtocol - - body := sdktranslator.TranslateRequest( - sdktranslator.FromString(fromProtocol), - sdktranslator.FromString(toProtocol), - model, - raw, - stream, - ) - - var err error - allowCompat := isOpenAICompatModel(model) - switch toProtocol { - case "gemini": - body = util.ApplyDefaultThinkingIfNeeded(model, body) - body = util.NormalizeGeminiThinkingBudget(model, body) - body = util.StripThinkingConfigIfUnsupported(model, body) - case "claude": - // For raw payload, Claude thinking is passed through by translator - // No additional processing needed as thinking is already in body - case "openai": - body = executor.NormalizeThinkingConfig(body, model, allowCompat) - err = executor.ValidateThinkingConfig(body, model) - case "codex": - // Codex does not support allowCompat; always use false. - body, err = normalizeCodexPayload(body, model, false) - } - - body, _ = sjson.SetBytes(body, "model", model) - body = filterThinkingBody(toProtocol, body, model, model) - return body, err -} - -func TestRawPayloadThinkingConversions(t *testing.T) { - cleanup := registerCoreModels(t) - defer cleanup() - - type scenario struct { - name string - thinkingParam any // int for budget, string for effort level - } - - numericName := func(budget int) string { - if budget < 0 { - return "budget-neg1" - } - return fmt.Sprintf("budget-%d", budget) - } - - for _, model := range thinkingTestModels { - supportsThinking := util.ModelSupportsThinking(model) - usesLevels := util.ModelUsesThinkingLevels(model) - allowCompat := isOpenAICompatModel(model) - - for _, from := range thinkingTestFromProtocols { - var cases []scenario - switch from { - case "openai", "openai-response": - cases = []scenario{ - {name: "no-thinking", thinkingParam: nil}, - } - for _, lvl := range thinkingLevelSamples { - cases = append(cases, scenario{ - name: "effort-" + lvl, - thinkingParam: lvl, - }) - } - case "gemini", "claude": - cases = []scenario{ - {name: "no-thinking", thinkingParam: nil}, - } - for _, budget := range thinkingNumericSamples { - budget := budget - cases = append(cases, scenario{ - name: numericName(budget), - thinkingParam: budget, - }) + if hasThinking { + t.Fatalf("expected no thinking field but found one, body=%s", string(body)) } + return } - for _, to := range thinkingTestToProtocols { - if from == to { - continue - } - t.Logf("═══════════════════════════════════════════════════════════════════════════════") - t.Logf(" RAW PAYLOAD: %s -> %s | model: %s", from, to, model) - t.Logf("═══════════════════════════════════════════════════════════════════════════════") - - for _, cs := range cases { - from := from - to := to - cs := cs - testName := fmt.Sprintf("raw/%s->%s/%s/%s", from, to, model, cs.name) - t.Run(testName, func(t *testing.T) { - expectPresent, expectValue, expectErr := func() (bool, string, bool) { - if cs.thinkingParam == nil { - if to == "codex" && from != "openai-response" && supportsThinking && usesLevels { - // Codex translators default reasoning.effort to "medium" for thinking-capable level models - return true, "medium", false - } - return false, "", false - } - - switch to { - case "gemini": - if !supportsThinking || usesLevels { - return false, "", false - } - // Gemini expects numeric budget (only for non-level models) - if budget, ok := cs.thinkingParam.(int); ok { - norm := util.NormalizeThinkingBudget(model, budget) - return true, fmt.Sprintf("%d", norm), false - } - // Convert effort level to budget for non-level models only - if effort, ok := cs.thinkingParam.(string); ok && effort != "" { - // "none" disables thinking - no thinkingBudget in output - if strings.ToLower(effort) == "none" { - return false, "", false - } - if budget, okB := util.ThinkingEffortToBudget(model, effort); okB { - // ThinkingEffortToBudget already returns normalized budget - return true, fmt.Sprintf("%d", budget), false - } - // Invalid effort does not map to a budget - return false, "", false - } - return false, "", false - case "claude": - if !supportsThinking || usesLevels { - return false, "", false - } - // Claude expects numeric budget (only for non-level models) - if budget, ok := cs.thinkingParam.(int); ok && budget > 0 { - norm := util.NormalizeThinkingBudget(model, budget) - return true, fmt.Sprintf("%d", norm), false - } - // Convert effort level to budget for non-level models only - if effort, ok := cs.thinkingParam.(string); ok && effort != "" { - // "none" and "auto" don't produce budget_tokens - lower := strings.ToLower(effort) - if lower == "none" || lower == "auto" { - return false, "", false - } - if budget, okB := util.ThinkingEffortToBudget(model, effort); okB { - // ThinkingEffortToBudget already returns normalized budget - return true, fmt.Sprintf("%d", budget), false - } - // Invalid effort - claude sets thinking.type:enabled but no budget_tokens - return false, "", false - } - return false, "", false - case "openai": - if allowCompat { - if effort, ok := cs.thinkingParam.(string); ok && strings.TrimSpace(effort) != "" { - normalized := strings.ToLower(strings.TrimSpace(effort)) - return true, normalized, false - } - if budget, ok := cs.thinkingParam.(int); ok { - if mapped, okM := util.ThinkingBudgetToEffort(model, budget); okM && mapped != "" { - return true, mapped, false - } - } - return false, "", false - } - if !supportsThinking || !usesLevels { - return false, "", false - } - if effort, ok := cs.thinkingParam.(string); ok && effort != "" { - if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN { - return true, normalized, false - } - return false, "", true // invalid level - } - if budget, ok := cs.thinkingParam.(int); ok { - if mapped, okM := util.ThinkingBudgetToEffort(model, budget); okM && mapped != "" { - // Check if the mapped effort is valid for this model - if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel { - return true, mapped, true // expect validation error - } - return true, mapped, false - } - } - return false, "", false - case "codex": - // Codex does not support allowCompat; require thinking-capable level models. - if !supportsThinking || !usesLevels { - return false, "", false - } - if effort, ok := cs.thinkingParam.(string); ok && effort != "" { - if normalized, okN := util.NormalizeReasoningEffortLevel(model, effort); okN { - return true, normalized, false - } - return false, "", true - } - if budget, ok := cs.thinkingParam.(int); ok { - if mapped, okM := util.ThinkingBudgetToEffort(model, budget); okM && mapped != "" { - // Check if the mapped effort is valid for this model - if _, validLevel := util.NormalizeReasoningEffortLevel(model, mapped); !validLevel { - return true, mapped, true // expect validation error - } - return true, mapped, false - } - } - if from != "openai-response" { - // Codex translators default reasoning.effort to "medium" for thinking-capable models - return true, "medium", false - } - return false, "", false - } - return false, "", false - }() - - body, err := buildBodyForProtocolWithRawThinking(t, from, to, model, cs.thinkingParam) - actualPresent, actualValue := func() (bool, string) { - path := "" - switch to { - case "gemini": - path = "generationConfig.thinkingConfig.thinkingBudget" - case "claude": - path = "thinking.budget_tokens" - case "openai": - path = "reasoning_effort" - case "codex": - path = "reasoning.effort" - } - if path == "" { - return false, "" - } - val := gjson.GetBytes(body, path) - if to == "codex" && !val.Exists() { - reasoning := gjson.GetBytes(body, "reasoning") - if reasoning.Exists() { - val = reasoning.Get("effort") - } - } - if !val.Exists() { - return false, "" - } - if val.Type == gjson.Number { - return true, fmt.Sprintf("%d", val.Int()) - } - return true, val.String() - }() - - t.Logf("from=%s to=%s model=%s param=%v present(expect=%v got=%v) value(expect=%s got=%s) err(expect=%v got=%v) body=%s", - from, to, model, cs.thinkingParam, expectPresent, actualPresent, expectValue, actualValue, expectErr, err != nil, string(body)) - - if expectErr { - if err == nil { - t.Fatalf("expected validation error but got none, body=%s", string(body)) - } - return - } - if err != nil { - t.Fatalf("unexpected error: %v body=%s", err, string(body)) - } - - if expectPresent != actualPresent { - t.Fatalf("presence mismatch: expect %v got %v body=%s", expectPresent, actualPresent, string(body)) - } - if expectPresent && expectValue != actualValue { - t.Fatalf("value mismatch: expect %s got %s body=%s", expectValue, actualValue, string(body)) - } - }) - } + // Check expected field value + val := gjson.GetBytes(body, tc.expectField) + if !val.Exists() { + t.Fatalf("expected field %s not found, body=%s", tc.expectField, string(body)) } - } - } -} -func TestThinkingBudgetToEffort(t *testing.T) { - cleanup := registerCoreModels(t) - defer cleanup() - - cases := []struct { - name string - model string - budget int - want string - ok bool - }{ - {name: "dynamic-auto", model: "gpt-5", budget: -1, want: "auto", ok: true}, - {name: "zero-none", model: "gpt-5", budget: 0, want: "minimal", ok: true}, - {name: "low-min", model: "gpt-5", budget: 1, want: "low", ok: true}, - {name: "low-max", model: "gpt-5", budget: 1024, want: "low", ok: true}, - {name: "medium-min", model: "gpt-5", budget: 1025, want: "medium", ok: true}, - {name: "medium-max", model: "gpt-5", budget: 8192, want: "medium", ok: true}, - {name: "high-min", model: "gpt-5", budget: 8193, want: "high", ok: true}, - {name: "high-max", model: "gpt-5", budget: 24576, want: "high", ok: true}, - {name: "over-max-clamps-to-highest", model: "gpt-5", budget: 64000, want: "high", ok: true}, - {name: "over-max-xhigh-model", model: "gpt-5.2", budget: 64000, want: "xhigh", ok: true}, - {name: "negative-unsupported", model: "gpt-5", budget: -5, want: "", ok: false}, - } - - for _, cs := range cases { - cs := cs - t.Run(cs.name, func(t *testing.T) { - got, ok := util.ThinkingBudgetToEffort(cs.model, cs.budget) - if ok != cs.ok { - t.Fatalf("ok mismatch for model=%s budget=%d: expect %v got %v", cs.model, cs.budget, cs.ok, ok) + actualValue := val.String() + if val.Type == gjson.Number { + actualValue = fmt.Sprintf("%d", val.Int()) } - if got != cs.want { - t.Fatalf("value mismatch for model=%s budget=%d: expect %q got %q", cs.model, cs.budget, cs.want, got) - } - }) - } -} - -func TestThinkingEffortToBudget(t *testing.T) { - cleanup := registerCoreModels(t) - defer cleanup() - - cases := []struct { - name string - model string - effort string - want int - ok bool - }{ - {name: "none", model: "gemini-2.5-pro", effort: "none", want: 0, ok: true}, - {name: "auto", model: "gemini-2.5-pro", effort: "auto", want: -1, ok: true}, - {name: "minimal", model: "gemini-2.5-pro", effort: "minimal", want: 512, ok: true}, - {name: "low", model: "gemini-2.5-pro", effort: "low", want: 1024, ok: true}, - {name: "medium", model: "gemini-2.5-pro", effort: "medium", want: 8192, ok: true}, - {name: "high", model: "gemini-2.5-pro", effort: "high", want: 24576, ok: true}, - {name: "xhigh", model: "gemini-2.5-pro", effort: "xhigh", want: 32768, ok: true}, - {name: "empty-unsupported", model: "gemini-2.5-pro", effort: "", want: 0, ok: false}, - {name: "invalid-unsupported", model: "gemini-2.5-pro", effort: "ultra", want: 0, ok: false}, - {name: "case-insensitive", model: "gemini-2.5-pro", effort: "LOW", want: 1024, ok: true}, - {name: "case-insensitive-medium", model: "gemini-2.5-pro", effort: "MEDIUM", want: 8192, ok: true}, - } - - for _, cs := range cases { - cs := cs - t.Run(cs.name, func(t *testing.T) { - got, ok := util.ThinkingEffortToBudget(cs.model, cs.effort) - if ok != cs.ok { - t.Fatalf("ok mismatch for model=%s effort=%s: expect %v got %v", cs.model, cs.effort, cs.ok, ok) - } - if got != cs.want { - t.Fatalf("value mismatch for model=%s effort=%s: expect %d got %d", cs.model, cs.effort, cs.want, got) + if actualValue != tc.expectValue { + t.Fatalf("field %s: expected %q, got %q, body=%s", tc.expectField, tc.expectValue, actualValue, string(body)) + } + + // Check includeThoughts for Gemini + if tc.includeThoughts != "" && tc.to == "gemini" { + itVal := gjson.GetBytes(body, "generationConfig.thinkingConfig.includeThoughts") + if !itVal.Exists() { + t.Fatalf("expected includeThoughts field not found, body=%s", string(body)) + } + actual := fmt.Sprintf("%v", itVal.Bool()) + if actual != tc.includeThoughts { + t.Fatalf("includeThoughts: expected %s, got %s, body=%s", tc.includeThoughts, actual, string(body)) + } } }) }