From 08ab6a7d77368bc37f727dd40bcd0878fe71035a Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 30 Dec 2025 13:27:57 +0800 Subject: [PATCH 1/9] feat(gemini): add per-key model alias support for Gemini provider --- config.example.yaml | 7 +- internal/config/config.go | 12 ++ internal/runtime/executor/gemini_executor.go | 110 ++++++++++++++++++- sdk/cliproxy/service.go | 45 +++++++- 4 files changed, 169 insertions(+), 5 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index 73e2a8ac..2a35fe68 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -90,6 +90,9 @@ ws-auth: false # headers: # X-Custom-Header: "custom-value" # proxy-url: "socks5://proxy.example.com:1080" +# models: +# - name: "gemini-2.5-flash" # upstream model name +# alias: "gemini-flash" # client alias mapped to the upstream model # excluded-models: # - "gemini-2.5-pro" # exclude specific models from this provider (exact match) # - "gemini-2.5-*" # wildcard matching prefix (e.g. gemini-2.5-flash, gemini-2.5-pro) @@ -106,7 +109,7 @@ ws-auth: false # X-Custom-Header: "custom-value" # proxy-url: "socks5://proxy.example.com:1080" # optional: per-key proxy override # models: -# - name: "gpt-5-codex" # upstream model name +# - name: "gpt-5-codex" # upstream model name # alias: "codex-latest" # client alias mapped to the upstream model # excluded-models: # - "gpt-5.1" # exclude specific models (exact match) @@ -125,7 +128,7 @@ ws-auth: false # proxy-url: "socks5://proxy.example.com:1080" # optional: per-key proxy override # models: # - name: "claude-3-5-sonnet-20241022" # upstream model name -# alias: "claude-sonnet-latest" # client alias mapped to the upstream model +# alias: "claude-sonnet-latest" # client alias mapped to the upstream model # excluded-models: # - "claude-opus-4-5-20251101" # exclude specific models (exact match) # - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219) diff --git a/internal/config/config.go b/internal/config/config.go index 760be600..0cde69c7 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -318,6 +318,9 @@ type GeminiKey struct { // ProxyURL optionally overrides the global proxy for this API key. ProxyURL string `yaml:"proxy-url,omitempty" json:"proxy-url,omitempty"` + // Models defines upstream model names and aliases for request routing. + Models []GeminiModel `yaml:"models,omitempty" json:"models,omitempty"` + // Headers optionally adds extra HTTP headers for requests sent with this key. Headers map[string]string `yaml:"headers,omitempty" json:"headers,omitempty"` @@ -325,6 +328,15 @@ type GeminiKey struct { ExcludedModels []string `yaml:"excluded-models,omitempty" json:"excluded-models,omitempty"` } +// GeminiModel describes a mapping between an alias and the actual upstream model name. +type GeminiModel struct { + // Name is the upstream model identifier used when issuing requests. + Name string `yaml:"name" json:"name"` + + // Alias is the client-facing model name that maps to Name. + Alias string `yaml:"alias" json:"alias"` +} + // OpenAICompatibility represents the configuration for OpenAI API compatibility // with external providers, allowing model aliases to be routed through OpenAI API format. type OpenAICompatibility struct { diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index f211ba62..da57150d 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -78,6 +78,13 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r defer reporter.trackFailure(ctx, &err) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" { + upstreamModel = modelOverride + } else if !strings.EqualFold(upstreamModel, req.Model) { + if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { + upstreamModel = modelOverride + } + } // Official Gemini API via API key or OAuth bearer from := opts.SourceFormat @@ -174,6 +181,13 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A defer reporter.trackFailure(ctx, &err) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" { + upstreamModel = modelOverride + } else if !strings.EqualFold(upstreamModel, req.Model) { + if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { + upstreamModel = modelOverride + } + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") @@ -287,6 +301,15 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { apiKey, bearer := geminiCreds(auth) + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" { + upstreamModel = modelOverride + } else if !strings.EqualFold(upstreamModel, req.Model) { + if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { + upstreamModel = modelOverride + } + } + from := opts.SourceFormat to := sdktranslator.FromString("gemini") translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) @@ -297,9 +320,10 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") + translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel) baseURL := resolveGeminiBaseURL(auth) - url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, req.Model, "countTokens") + url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, "countTokens") requestBody := bytes.NewReader(translatedReq) @@ -398,6 +422,90 @@ func resolveGeminiBaseURL(auth *cliproxyauth.Auth) string { return base } +func (e *GeminiExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { + trimmed := strings.TrimSpace(alias) + if trimmed == "" { + return "" + } + + entry := e.resolveGeminiConfig(auth) + if entry == nil { + return "" + } + + normalizedModel, metadata := util.NormalizeThinkingModel(trimmed) + + // Candidate names to match against configured aliases/names. + candidates := []string{strings.TrimSpace(normalizedModel)} + if !strings.EqualFold(normalizedModel, trimmed) { + candidates = append(candidates, trimmed) + } + if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) { + candidates = append(candidates, original) + } + + for i := range entry.Models { + model := entry.Models[i] + name := strings.TrimSpace(model.Name) + modelAlias := strings.TrimSpace(model.Alias) + + for _, candidate := range candidates { + if candidate == "" { + continue + } + if modelAlias != "" && strings.EqualFold(modelAlias, candidate) { + if name != "" { + return name + } + return candidate + } + if name != "" && strings.EqualFold(name, candidate) { + return name + } + } + } + return "" +} + +func (e *GeminiExecutor) resolveGeminiConfig(auth *cliproxyauth.Auth) *config.GeminiKey { + if auth == nil || e.cfg == nil { + return nil + } + var attrKey, attrBase string + if auth.Attributes != nil { + attrKey = strings.TrimSpace(auth.Attributes["api_key"]) + attrBase = strings.TrimSpace(auth.Attributes["base_url"]) + } + for i := range e.cfg.GeminiKey { + entry := &e.cfg.GeminiKey[i] + cfgKey := strings.TrimSpace(entry.APIKey) + cfgBase := strings.TrimSpace(entry.BaseURL) + if attrKey != "" && attrBase != "" { + if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) { + return entry + } + continue + } + if attrKey != "" && strings.EqualFold(cfgKey, attrKey) { + if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) { + return entry + } + } + if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) { + return entry + } + } + if attrKey != "" { + for i := range e.cfg.GeminiKey { + entry := &e.cfg.GeminiKey[i] + if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) { + return entry + } + } + } + return nil +} + func applyGeminiHeaders(req *http.Request, auth *cliproxyauth.Auth) { var attrs map[string]string if auth != nil { diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index ae56e4b6..4101bf22 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -710,6 +710,9 @@ func (s *Service) registerModelsForAuth(a *coreauth.Auth) { case "gemini": models = registry.GetGeminiModels() if entry := s.resolveConfigGeminiKey(a); entry != nil { + if len(entry.Models) > 0 { + models = buildGeminiConfigModels(entry) + } if authKind == "apikey" { excluded = entry.ExcludedModels } @@ -1146,7 +1149,7 @@ func buildVertexCompatConfigModels(entry *config.VertexCompatKey) []*ModelInfo { ID: alias, Object: "model", Created: now, - OwnedBy: "vertex", + OwnedBy: "google", Type: "vertex", DisplayName: display, }) @@ -1241,6 +1244,44 @@ func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, mode return out } +func buildGeminiConfigModels(entry *config.GeminiKey) []*ModelInfo { + if entry == nil || len(entry.Models) == 0 { + return nil + } + now := time.Now().Unix() + out := make([]*ModelInfo, 0, len(entry.Models)) + seen := make(map[string]struct{}, len(entry.Models)) + for i := range entry.Models { + model := entry.Models[i] + name := strings.TrimSpace(model.Name) + alias := strings.TrimSpace(model.Alias) + if alias == "" { + alias = name + } + if alias == "" { + continue + } + key := strings.ToLower(alias) + if _, exists := seen[key]; exists { + continue + } + seen[key] = struct{}{} + display := name + if display == "" { + display = alias + } + out = append(out, &ModelInfo{ + ID: alias, + Object: "model", + Created: now, + OwnedBy: "google", + Type: "gemini", + DisplayName: display, + }) + } + return out +} + func buildClaudeConfigModels(entry *config.ClaudeKey) []*ModelInfo { if entry == nil || len(entry.Models) == 0 { return nil @@ -1271,7 +1312,7 @@ func buildClaudeConfigModels(entry *config.ClaudeKey) []*ModelInfo { ID: alias, Object: "model", Created: now, - OwnedBy: "claude", + OwnedBy: "anthropic", Type: "claude", DisplayName: display, }) From 70fdd70b84581bcd7c85e5607e997eee6f2676b5 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 30 Dec 2025 13:35:22 +0800 Subject: [PATCH 2/9] refactor(cliproxy): extract generic buildConfigModels function for model info generation --- internal/config/config.go | 9 ++ internal/config/vertex_compat.go | 3 + sdk/cliproxy/service.go | 167 ++++++++----------------------- 3 files changed, 55 insertions(+), 124 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 0cde69c7..668764d9 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -268,6 +268,9 @@ type ClaudeModel struct { Alias string `yaml:"alias" json:"alias"` } +func (m ClaudeModel) GetName() string { return m.Name } +func (m ClaudeModel) GetAlias() string { return m.Alias } + // CodexKey represents the configuration for a Codex API key, // including the API key itself and an optional base URL for the API endpoint. type CodexKey struct { @@ -303,6 +306,9 @@ type CodexModel struct { Alias string `yaml:"alias" json:"alias"` } +func (m CodexModel) GetName() string { return m.Name } +func (m CodexModel) GetAlias() string { return m.Alias } + // GeminiKey represents the configuration for a Gemini API key, // including optional overrides for upstream base URL, proxy routing, and headers. type GeminiKey struct { @@ -337,6 +343,9 @@ type GeminiModel struct { Alias string `yaml:"alias" json:"alias"` } +func (m GeminiModel) GetName() string { return m.Name } +func (m GeminiModel) GetAlias() string { return m.Alias } + // OpenAICompatibility represents the configuration for OpenAI API compatibility // with external providers, allowing model aliases to be routed through OpenAI API format. type OpenAICompatibility struct { diff --git a/internal/config/vertex_compat.go b/internal/config/vertex_compat.go index a14f75bc..94e162b7 100644 --- a/internal/config/vertex_compat.go +++ b/internal/config/vertex_compat.go @@ -42,6 +42,9 @@ type VertexCompatModel struct { Alias string `yaml:"alias" json:"alias"` } +func (m VertexCompatModel) GetName() string { return m.Name } +func (m VertexCompatModel) GetAlias() string { return m.Alias } + // SanitizeVertexCompatKeys deduplicates and normalizes Vertex-compatible API key credentials. func (cfg *Config) SanitizeVertexCompatKeys() { if cfg == nil { diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index 4101bf22..043eedb7 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -1119,17 +1119,22 @@ func matchWildcard(pattern, value string) bool { return true } -func buildVertexCompatConfigModels(entry *config.VertexCompatKey) []*ModelInfo { - if entry == nil || len(entry.Models) == 0 { +type modelEntry interface { + GetName() string + GetAlias() string +} + +func buildConfigModels[T modelEntry](models []T, ownedBy, modelType string) []*ModelInfo { + if len(models) == 0 { return nil } now := time.Now().Unix() - out := make([]*ModelInfo, 0, len(entry.Models)) - seen := make(map[string]struct{}, len(entry.Models)) - for i := range entry.Models { - model := entry.Models[i] - name := strings.TrimSpace(model.Name) - alias := strings.TrimSpace(model.Alias) + out := make([]*ModelInfo, 0, len(models)) + seen := make(map[string]struct{}, len(models)) + for i := range models { + model := models[i] + name := strings.TrimSpace(model.GetName()) + alias := strings.TrimSpace(model.GetAlias()) if alias == "" { alias = name } @@ -1149,14 +1154,42 @@ func buildVertexCompatConfigModels(entry *config.VertexCompatKey) []*ModelInfo { ID: alias, Object: "model", Created: now, - OwnedBy: "google", - Type: "vertex", + OwnedBy: ownedBy, + Type: modelType, DisplayName: display, }) } return out } +func buildVertexCompatConfigModels(entry *config.VertexCompatKey) []*ModelInfo { + if entry == nil { + return nil + } + return buildConfigModels(entry.Models, "google", "vertex") +} + +func buildGeminiConfigModels(entry *config.GeminiKey) []*ModelInfo { + if entry == nil { + return nil + } + return buildConfigModels(entry.Models, "google", "gemini") +} + +func buildClaudeConfigModels(entry *config.ClaudeKey) []*ModelInfo { + if entry == nil { + return nil + } + return buildConfigModels(entry.Models, "anthropic", "claude") +} + +func buildCodexConfigModels(entry *config.CodexKey) []*ModelInfo { + if entry == nil { + return nil + } + return buildConfigModels(entry.Models, "openai", "openai") +} + func rewriteModelInfoName(name, oldID, newID string) string { trimmed := strings.TrimSpace(name) if trimmed == "" { @@ -1243,117 +1276,3 @@ func applyOAuthModelMappings(cfg *config.Config, provider, authKind string, mode } return out } - -func buildGeminiConfigModels(entry *config.GeminiKey) []*ModelInfo { - if entry == nil || len(entry.Models) == 0 { - return nil - } - now := time.Now().Unix() - out := make([]*ModelInfo, 0, len(entry.Models)) - seen := make(map[string]struct{}, len(entry.Models)) - for i := range entry.Models { - model := entry.Models[i] - name := strings.TrimSpace(model.Name) - alias := strings.TrimSpace(model.Alias) - if alias == "" { - alias = name - } - if alias == "" { - continue - } - key := strings.ToLower(alias) - if _, exists := seen[key]; exists { - continue - } - seen[key] = struct{}{} - display := name - if display == "" { - display = alias - } - out = append(out, &ModelInfo{ - ID: alias, - Object: "model", - Created: now, - OwnedBy: "google", - Type: "gemini", - DisplayName: display, - }) - } - return out -} - -func buildClaudeConfigModels(entry *config.ClaudeKey) []*ModelInfo { - if entry == nil || len(entry.Models) == 0 { - return nil - } - now := time.Now().Unix() - out := make([]*ModelInfo, 0, len(entry.Models)) - seen := make(map[string]struct{}, len(entry.Models)) - for i := range entry.Models { - model := entry.Models[i] - name := strings.TrimSpace(model.Name) - alias := strings.TrimSpace(model.Alias) - if alias == "" { - alias = name - } - if alias == "" { - continue - } - key := strings.ToLower(alias) - if _, exists := seen[key]; exists { - continue - } - seen[key] = struct{}{} - display := name - if display == "" { - display = alias - } - out = append(out, &ModelInfo{ - ID: alias, - Object: "model", - Created: now, - OwnedBy: "anthropic", - Type: "claude", - DisplayName: display, - }) - } - return out -} - -func buildCodexConfigModels(entry *config.CodexKey) []*ModelInfo { - if entry == nil || len(entry.Models) == 0 { - return nil - } - now := time.Now().Unix() - out := make([]*ModelInfo, 0, len(entry.Models)) - seen := make(map[string]struct{}, len(entry.Models)) - for i := range entry.Models { - model := entry.Models[i] - name := strings.TrimSpace(model.Name) - alias := strings.TrimSpace(model.Alias) - if alias == "" { - alias = name - } - if alias == "" { - continue - } - key := strings.ToLower(alias) - if _, exists := seen[key]; exists { - continue - } - seen[key] = struct{}{} - display := name - if display == "" { - display = alias - } - out = append(out, &ModelInfo{ - ID: alias, - Object: "model", - Created: now, - OwnedBy: "openai", - Type: "openai", - DisplayName: display, - }) - } - return out -} From ce7474d953eaf3e204acb36cfc72cb202bba6090 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 30 Dec 2025 15:16:54 +0800 Subject: [PATCH 3/9] feat(cliproxy): propagate thinking support metadata to aliased models --- internal/registry/model_definitions.go | 26 ++++++++++++++++++++++++++ sdk/cliproxy/service.go | 10 ++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 1c51e898..ed4d1c21 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -781,3 +781,29 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig { "gemini-claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000}, } } + +// LookupStaticModelInfo searches all static model definitions for a model by ID. +// Returns nil if no matching model is found. +func LookupStaticModelInfo(modelID string) *ModelInfo { + if modelID == "" { + return nil + } + allModels := [][]*ModelInfo{ + GetClaudeModels(), + GetGeminiModels(), + GetGeminiVertexModels(), + GetGeminiCLIModels(), + GetAIStudioModels(), + GetOpenAIModels(), + GetQwenModels(), + GetIFlowModels(), + } + for _, models := range allModels { + for _, m := range models { + if m != nil && m.ID == modelID { + return m + } + } + } + return nil +} diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index 043eedb7..21690f8e 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -1150,14 +1150,20 @@ func buildConfigModels[T modelEntry](models []T, ownedBy, modelType string) []*M if display == "" { display = alias } - out = append(out, &ModelInfo{ + info := &ModelInfo{ ID: alias, Object: "model", Created: now, OwnedBy: ownedBy, Type: modelType, DisplayName: display, - }) + } + if name != "" { + if upstream := registry.LookupStaticModelInfo(name); upstream != nil && upstream.Thinking != nil { + info.Thinking = upstream.Thinking + } + } + out = append(out, info) } return out } From b055e00c1a5042aacf96f0266553e51fd212b288 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 30 Dec 2025 17:49:44 +0800 Subject: [PATCH 4/9] fix(executor): use upstream model for thinking config and payload translation --- .../runtime/executor/aistudio_executor.go | 54 ++++++--- .../runtime/executor/antigravity_executor.go | 56 +++++----- internal/runtime/executor/claude_executor.go | 26 ++--- internal/runtime/executor/codex_executor.go | 16 +-- .../runtime/executor/gemini_cli_executor.go | 95 ++++++++++------ .../executor/gemini_vertex_executor.go | 104 ++++++++++-------- internal/runtime/executor/iflow_executor.go | 33 ++++-- internal/runtime/executor/qwen_executor.go | 33 ++++-- 8 files changed, 255 insertions(+), 162 deletions(-) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index 17c8170f..394a295e 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -55,11 +55,17 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - translatedReq, body, err := e.translateRequest(req, opts, false) + upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) + if upstreamModel == "" { + upstreamModel = strings.TrimSpace(req.Model) + } + + translatedReq, body, err := e.translateRequest(req, opts, false, upstreamModel) if err != nil { return resp, err } - endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt) + + endpoint := e.buildEndpoint(upstreamModel, body.action, opts.Alt) wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -109,11 +115,17 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - translatedReq, body, err := e.translateRequest(req, opts, true) + upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) + if upstreamModel == "" { + upstreamModel = strings.TrimSpace(req.Model) + } + + translatedReq, body, err := e.translateRequest(req, opts, true, upstreamModel) if err != nil { return nil, err } - endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt) + + endpoint := e.buildEndpoint(upstreamModel, body.action, opts.Alt) wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -254,7 +266,12 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth // CountTokens counts tokens for the given request using the AI Studio API. func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - _, body, err := e.translateRequest(req, opts, false) + upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) + if upstreamModel == "" { + upstreamModel = strings.TrimSpace(req.Model) + } + + _, body, err := e.translateRequest(req, opts, false, upstreamModel) if err != nil { return cliproxyexecutor.Response{}, err } @@ -263,7 +280,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A body.payload, _ = sjson.DeleteBytes(body.payload, "tools") body.payload, _ = sjson.DeleteBytes(body.payload, "safetySettings") - endpoint := e.buildEndpoint(req.Model, "countTokens", "") + endpoint := e.buildEndpoint(upstreamModel, "countTokens", "") wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -318,18 +335,23 @@ type translatedPayload struct { toFormat sdktranslator.Format } -func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) { +func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool, upstreamModel string) ([]byte, translatedPayload, error) { + model := strings.TrimSpace(upstreamModel) + if model == "" { + model = strings.TrimSpace(req.Model) + } + from := opts.SourceFormat to := sdktranslator.FromString("gemini") - payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) - payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model) - payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload) - payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload) - payload = util.ConvertThinkingLevelToBudget(payload, req.Model, true) - payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true) - payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) - payload = fixGeminiImageAspectRatio(req.Model, payload) - payload = applyPayloadConfig(e.cfg, req.Model, payload) + payload := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream) + payload = ApplyThinkingMetadata(payload, req.Metadata, model) + payload = util.ApplyGemini3ThinkingLevelFromMetadata(model, req.Metadata, payload) + payload = util.ApplyDefaultThinkingIfNeeded(model, payload) + payload = util.ConvertThinkingLevelToBudget(payload, model, true) + payload = util.NormalizeGeminiThinkingBudget(model, payload, true) + payload = util.StripThinkingConfigIfUnsupported(model, payload) + payload = fixGeminiImageAspectRatio(model, payload) + payload = applyPayloadConfig(e.cfg, model, payload) payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema") diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 9ade4fbb..c2aa4706 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -98,13 +98,13 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au from := opts.SourceFormat to := sdktranslator.FromString("antigravity") - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + translated := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) - translated = normalizeAntigravityThinking(req.Model, translated, isClaude) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) + translated = applyThinkingMetadataCLI(translated, req.Metadata, upstreamModel) + translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, translated) + translated = normalizeAntigravityThinking(upstreamModel, translated, isClaude) + translated = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "antigravity", "request", translated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -191,20 +191,20 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - from := opts.SourceFormat - to := sdktranslator.FromString("antigravity") - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel == "" { upstreamModel = req.Model } - translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) - translated = normalizeAntigravityThinking(req.Model, translated, true) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) + from := opts.SourceFormat + to := sdktranslator.FromString("antigravity") + translated := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + + translated = applyThinkingMetadataCLI(translated, req.Metadata, upstreamModel) + translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, translated) + translated = normalizeAntigravityThinking(upstreamModel, translated, true) + translated = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "antigravity", "request", translated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -530,21 +530,21 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - from := opts.SourceFormat - to := sdktranslator.FromString("antigravity") - translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel == "" { upstreamModel = req.Model } isClaude := strings.Contains(strings.ToLower(upstreamModel), "claude") - translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) - translated = normalizeAntigravityThinking(req.Model, translated, isClaude) - translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) + from := opts.SourceFormat + to := sdktranslator.FromString("antigravity") + translated := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + + translated = applyThinkingMetadataCLI(translated, req.Metadata, upstreamModel) + translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, translated) + translated = normalizeAntigravityThinking(upstreamModel, translated, isClaude) + translated = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "antigravity", "request", translated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -713,10 +713,10 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut var lastErr error for idx, baseURL := range baseURLs { - payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model) - payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, payload) - payload = normalizeAntigravityThinking(req.Model, payload, isClaude) + payload := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + payload = applyThinkingMetadataCLI(payload, req.Metadata, upstreamModel) + payload = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, payload) + payload = normalizeAntigravityThinking(upstreamModel, payload, isClaude) payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") payload = deleteJSONField(payload, "request.safetySettings") diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 2fbb235b..52c60163 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -49,11 +49,6 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - from := opts.SourceFormat - to := sdktranslator.FromString("claude") - // Use streaming translation to preserve function calling, except for claude. - stream := from != to - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel == "" { upstreamModel = req.Model @@ -65,20 +60,25 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r upstreamModel = modelOverride } } + from := opts.SourceFormat + to := sdktranslator.FromString("claude") + // Use streaming translation to preserve function calling, except for claude. + stream := from != to + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), stream) body, _ = sjson.SetBytes(body, "model", upstreamModel) // Inject thinking config based on model metadata for thinking variants - body = e.injectThinkingConfig(req.Model, req.Metadata, body) + body = e.injectThinkingConfig(upstreamModel, req.Metadata, body) if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") { body = checkSystemInstructions(body) } - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(req.Model, body) + body = ensureMaxTokensForThinking(upstreamModel, body) // Extract betas from body and convert to header var extraBetas []string @@ -170,7 +170,6 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("claude") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel == "" { upstreamModel = req.Model @@ -182,17 +181,18 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A upstreamModel = modelOverride } } + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) body, _ = sjson.SetBytes(body, "model", upstreamModel) // Inject thinking config based on model metadata for thinking variants - body = e.injectThinkingConfig(req.Model, req.Metadata, body) + body = e.injectThinkingConfig(upstreamModel, req.Metadata, body) body = checkSystemInstructions(body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(req.Model, body) + body = ensureMaxTokensForThinking(upstreamModel, body) // Extract betas from body and convert to header var extraBetas []string @@ -316,7 +316,6 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut to := sdktranslator.FromString("claude") // Use streaming translation to preserve function calling, except for claude. stream := from != to - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel == "" { upstreamModel = req.Model @@ -328,6 +327,7 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut upstreamModel = modelOverride } } + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), stream) body, _ = sjson.SetBytes(body, "model", upstreamModel) if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") { diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 310988c1..71e36435 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -63,13 +63,13 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("codex") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning.effort", false) body = NormalizeThinkingConfig(body, upstreamModel, false) if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate } - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.SetBytes(body, "stream", true) body, _ = sjson.DeleteBytes(body, "previous_response_id") @@ -170,14 +170,14 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au from := opts.SourceFormat to := sdktranslator.FromString("codex") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning.effort", false) body = NormalizeThinkingConfig(body, upstreamModel, false) if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { return nil, errValidate } - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "model", upstreamModel) @@ -280,11 +280,11 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth from := opts.SourceFormat to := sdktranslator.FromString("codex") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) modelForCounting := upstreamModel - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort", false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning.effort", false) body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "stream", false) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index b171041a..0be3bc76 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -75,16 +75,21 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) + if upstreamModel == "" { + upstreamModel = strings.TrimSpace(req.Model) + } + from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) - basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) - basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) - basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) - basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) - basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) + basePayload := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, upstreamModel) + basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, basePayload) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, basePayload) + basePayload = util.NormalizeGeminiCLIThinkingBudget(upstreamModel, basePayload) + basePayload = util.StripThinkingConfigIfUnsupported(upstreamModel, basePayload) + basePayload = fixGeminiCLIImageAspectRatio(upstreamModel, basePayload) + basePayload = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "gemini", "request", basePayload) action := "generateContent" if req.Metadata != nil { @@ -94,9 +99,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth } projectID := resolveGeminiProjectID(auth) - models := cliPreviewFallbackOrder(req.Model) - if len(models) == 0 || models[0] != req.Model { - models = append([]string{req.Model}, models...) + models := cliPreviewFallbackOrder(upstreamModel) + if len(models) == 0 || models[0] != upstreamModel { + models = append([]string{upstreamModel}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -110,6 +115,10 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth var lastStatus int var lastBody []byte + // NOTE: Model capability checks (thinking config, payload rules, image fixes, etc.) must be + // based on upstreamModel (resolved via oauth-model-mappings). The loop variable attemptModel + // is only used as the concrete model id sent to the upstream Gemini CLI endpoint (and the + // model label passed into response translation) when iterating fallback variants. for idx, attemptModel := range models { payload := append([]byte(nil), basePayload...) if action == "countTokens" { @@ -214,22 +223,27 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) + if upstreamModel == "" { + upstreamModel = strings.TrimSpace(req.Model) + } + from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) - basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) - basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) - basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) - basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) - basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) + basePayload := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, upstreamModel) + basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, basePayload) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, basePayload) + basePayload = util.NormalizeGeminiCLIThinkingBudget(upstreamModel, basePayload) + basePayload = util.StripThinkingConfigIfUnsupported(upstreamModel, basePayload) + basePayload = fixGeminiCLIImageAspectRatio(upstreamModel, basePayload) + basePayload = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "gemini", "request", basePayload) projectID := resolveGeminiProjectID(auth) - models := cliPreviewFallbackOrder(req.Model) - if len(models) == 0 || models[0] != req.Model { - models = append([]string{req.Model}, models...) + models := cliPreviewFallbackOrder(upstreamModel) + if len(models) == 0 || models[0] != upstreamModel { + models = append([]string{upstreamModel}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -243,6 +257,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut var lastStatus int var lastBody []byte + // NOTE: Model capability checks (thinking config, payload rules, image fixes, etc.) must be + // based on upstreamModel (resolved via oauth-model-mappings). The loop variable attemptModel + // is only used as the concrete model id sent to the upstream Gemini CLI endpoint (and the + // model label passed into response translation) when iterating fallback variants. for idx, attemptModel := range models { payload := append([]byte(nil), basePayload...) payload = setJSONField(payload, "project", projectID) @@ -318,7 +336,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut out := make(chan cliproxyexecutor.StreamChunk) stream = out - go func(resp *http.Response, reqBody []byte, attempt string) { + go func(resp *http.Response, reqBody []byte, attemptModel string) { defer close(out) defer func() { if errClose := resp.Body.Close(); errClose != nil { @@ -336,14 +354,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut reporter.publish(ctx, detail) } if bytes.HasPrefix(line, dataTag) { - segments := sdktranslator.TranslateStream(respCtx, to, from, attempt, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone(line), ¶m) + segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone(line), ¶m) for i := range segments { out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])} } } } - segments := sdktranslator.TranslateStream(respCtx, to, from, attempt, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), ¶m) + segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), ¶m) for i := range segments { out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])} } @@ -365,12 +383,12 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut appendAPIResponseChunk(ctx, e.cfg, data) reporter.publish(ctx, parseGeminiCLIUsage(data)) var param any - segments := sdktranslator.TranslateStream(respCtx, to, from, attempt, bytes.Clone(opts.OriginalRequest), reqBody, data, ¶m) + segments := sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, data, ¶m) for i := range segments { out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])} } - segments = sdktranslator.TranslateStream(respCtx, to, from, attempt, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), ¶m) + segments = sdktranslator.TranslateStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), reqBody, bytes.Clone([]byte("[DONE]")), ¶m) for i := range segments { out <- cliproxyexecutor.StreamChunk{Payload: []byte(segments[i])} } @@ -399,9 +417,14 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - models := cliPreviewFallbackOrder(req.Model) - if len(models) == 0 || models[0] != req.Model { - models = append([]string{req.Model}, models...) + upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) + if upstreamModel == "" { + upstreamModel = strings.TrimSpace(req.Model) + } + + models := cliPreviewFallbackOrder(upstreamModel) + if len(models) == 0 || models[0] != upstreamModel { + models = append([]string{upstreamModel}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -417,15 +440,19 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. var lastStatus int var lastBody []byte + // NOTE: Model capability checks (thinking config, payload rules, image fixes, etc.) must be + // based on upstreamModel (resolved via oauth-model-mappings). The loop variable attemptModel + // is only used as the concrete model id sent to the upstream Gemini CLI endpoint when iterating + // fallback variants. for _, attemptModel := range models { payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) - payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model) - payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload) + payload = applyThinkingMetadataCLI(payload, req.Metadata, upstreamModel) + payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, payload) payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") payload = deleteJSONField(payload, "request.safetySettings") - payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) - payload = fixGeminiCLIImageAspectRatio(attemptModel, payload) + payload = util.StripThinkingConfigIfUnsupported(upstreamModel, payload) + payload = fixGeminiCLIImageAspectRatio(upstreamModel, payload) tok, errTok := tokenSource.Token() if errTok != nil { diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index df8ee506..03470bec 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -121,22 +121,25 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au defer reporter.trackFailure(ctx, &err) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel == "" { + upstreamModel = req.Model + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) budgetOverride = &norm } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = util.ApplyDefaultThinkingIfNeeded(upstreamModel, body) + body = util.NormalizeGeminiThinkingBudget(upstreamModel, body) + body = util.StripThinkingConfigIfUnsupported(upstreamModel, body) + body = fixGeminiImageAspectRatio(upstreamModel, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) action := "generateContent" @@ -221,22 +224,25 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip defer reporter.trackFailure(ctx, &err) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel == "" { + upstreamModel = req.Model + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) budgetOverride = &norm } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = util.ApplyDefaultThinkingIfNeeded(upstreamModel, body) + body = util.NormalizeGeminiThinkingBudget(upstreamModel, body) + body = util.StripThinkingConfigIfUnsupported(upstreamModel, body) + body = fixGeminiImageAspectRatio(upstreamModel, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) action := "generateContent" @@ -322,22 +328,25 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte defer reporter.trackFailure(ctx, &err) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel == "" { + upstreamModel = req.Model + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) budgetOverride = &norm } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = util.ApplyDefaultThinkingIfNeeded(upstreamModel, body) + body = util.NormalizeGeminiThinkingBudget(upstreamModel, body) + body = util.StripThinkingConfigIfUnsupported(upstreamModel, body) + body = fixGeminiImageAspectRatio(upstreamModel, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) baseURL := vertexBaseURL(location) @@ -439,22 +448,25 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth defer reporter.trackFailure(ctx, &err) upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel == "" { + upstreamModel = req.Model + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) budgetOverride = &norm } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = util.ApplyDefaultThinkingIfNeeded(upstreamModel, body) + body = util.NormalizeGeminiThinkingBudget(upstreamModel, body) + body = util.StripThinkingConfigIfUnsupported(upstreamModel, body) + body = fixGeminiImageAspectRatio(upstreamModel, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) // For API key auth, use simpler URL format without project/location @@ -553,19 +565,22 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth // countTokensWithServiceAccount counts tokens using service account credentials. func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) { upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel == "" { + upstreamModel = req.Model + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + translatedReq := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) budgetOverride = &norm } translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) } - translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) - translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) + translatedReq = util.StripThinkingConfigIfUnsupported(upstreamModel, translatedReq) + translatedReq = fixGeminiImageAspectRatio(upstreamModel, translatedReq) translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") @@ -642,19 +657,22 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context // countTokensWithAPIKey handles token counting using API key credentials. func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) { upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel == "" { + upstreamModel = req.Model + } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { + translatedReq := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) + norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) budgetOverride = &norm } translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) } - translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) - translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) + translatedReq = util.StripThinkingConfigIfUnsupported(upstreamModel, translatedReq) + translatedReq = fixGeminiImageAspectRatio(upstreamModel, translatedReq) translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") @@ -665,7 +683,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth * if baseURL == "" { baseURL = "https://generativelanguage.googleapis.com" } - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, req.Model, "countTokens") + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, "countTokens") httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq)) if errNewReq != nil { diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index 124a984e..9ac1c9f3 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -54,11 +54,15 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if strings.TrimSpace(upstreamModel) == "" { + upstreamModel = req.Model + } + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning_effort", false) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } @@ -68,7 +72,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re } body = applyIFlowThinkingConfig(body) body = preserveReasoningContentInMessages(body) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -146,12 +150,16 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if strings.TrimSpace(upstreamModel) == "" { + upstreamModel = req.Model + } + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning_effort", false) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } @@ -166,7 +174,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 { body = ensureToolsArray(body) } - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -249,11 +257,16 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au } func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if strings.TrimSpace(upstreamModel) == "" { + upstreamModel = req.Model + } + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - enc, err := tokenizerForModel(req.Model) + enc, err := tokenizerForModel(upstreamModel) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: tokenizer init failed: %w", err) } diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index 1d4ef52d..cf4aa6e3 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -48,11 +48,15 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if strings.TrimSpace(upstreamModel) == "" { + upstreamModel = req.Model + } + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning_effort", false) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } @@ -60,7 +64,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate } - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -127,12 +131,16 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if strings.TrimSpace(upstreamModel) == "" { + upstreamModel = req.Model + } + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) - body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning_effort", false) if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } @@ -147,7 +155,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`)) } body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) - body = applyPayloadConfig(e.cfg, req.Model, body) + body = applyPayloadConfig(e.cfg, upstreamModel, body) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -227,13 +235,18 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut } func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if strings.TrimSpace(upstreamModel) == "" { + upstreamModel = req.Model + } + from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) modelName := gjson.GetBytes(body, "model").String() if strings.TrimSpace(modelName) == "" { - modelName = req.Model + modelName = upstreamModel } enc, err := tokenizerForModel(modelName) From 96340bf1368c926b14af58189f92e4afcfcf2341 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 30 Dec 2025 19:31:54 +0800 Subject: [PATCH 5/9] refactor(executor): resolve upstream model at conductor level before execution --- .../runtime/executor/aistudio_executor.go | 27 +-- .../runtime/executor/antigravity_executor.go | 73 +++--- internal/runtime/executor/claude_executor.go | 67 ++--- internal/runtime/executor/codex_executor.go | 73 ++---- .../runtime/executor/gemini_cli_executor.go | 87 +++---- internal/runtime/executor/gemini_executor.go | 78 +++--- .../executor/gemini_vertex_executor.go | 228 ++++++++++++------ internal/runtime/executor/iflow_executor.go | 47 ++-- .../executor/openai_compat_executor.go | 16 +- internal/runtime/executor/qwen_executor.go | 48 ++-- sdk/cliproxy/auth/conductor.go | 6 +- sdk/cliproxy/auth/model_name_mappings.go | 23 +- 12 files changed, 341 insertions(+), 432 deletions(-) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index 394a295e..3cd8cf8e 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -55,17 +55,12 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) - if upstreamModel == "" { - upstreamModel = strings.TrimSpace(req.Model) - } - - translatedReq, body, err := e.translateRequest(req, opts, false, upstreamModel) + translatedReq, body, err := e.translateRequest(req, opts, false, req.Model) if err != nil { return resp, err } - endpoint := e.buildEndpoint(upstreamModel, body.action, opts.Alt) + endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt) wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -115,17 +110,12 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) - if upstreamModel == "" { - upstreamModel = strings.TrimSpace(req.Model) - } - - translatedReq, body, err := e.translateRequest(req, opts, true, upstreamModel) + translatedReq, body, err := e.translateRequest(req, opts, true, req.Model) if err != nil { return nil, err } - endpoint := e.buildEndpoint(upstreamModel, body.action, opts.Alt) + endpoint := e.buildEndpoint(req.Model, body.action, opts.Alt) wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, @@ -266,12 +256,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth // CountTokens counts tokens for the given request using the AI Studio API. func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) - if upstreamModel == "" { - upstreamModel = strings.TrimSpace(req.Model) - } - - _, body, err := e.translateRequest(req, opts, false, upstreamModel) + _, body, err := e.translateRequest(req, opts, false, req.Model) if err != nil { return cliproxyexecutor.Response{}, err } @@ -280,7 +265,7 @@ func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A body.payload, _ = sjson.DeleteBytes(body.payload, "tools") body.payload, _ = sjson.DeleteBytes(body.payload, "safetySettings") - endpoint := e.buildEndpoint(upstreamModel, "countTokens", "") + endpoint := e.buildEndpoint(req.Model, "countTokens", "") wsReq := &wsrelay.HTTPRequest{ Method: http.MethodPost, URL: endpoint, diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index c2aa4706..950141f0 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -76,11 +76,7 @@ func (e *AntigravityExecutor) PrepareRequest(_ *http.Request, _ *cliproxyauth.Au // Execute performs a non-streaming request to the Antigravity API. func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model - } - isClaude := strings.Contains(strings.ToLower(upstreamModel), "claude") + isClaude := strings.Contains(strings.ToLower(req.Model), "claude") if isClaude { return e.executeClaudeNonStream(ctx, auth, req, opts) } @@ -98,13 +94,13 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au from := opts.SourceFormat to := sdktranslator.FromString("antigravity") - translated := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - translated = applyThinkingMetadataCLI(translated, req.Metadata, upstreamModel) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, translated) - translated = normalizeAntigravityThinking(upstreamModel, translated, isClaude) - translated = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "antigravity", "request", translated) + translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) + translated = normalizeAntigravityThinking(req.Model, translated, isClaude) + translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -114,7 +110,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au var lastErr error for idx, baseURL := range baseURLs { - httpReq, errReq := e.buildRequest(ctx, auth, token, upstreamModel, translated, false, opts.Alt, baseURL) + httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, false, opts.Alt, baseURL) if errReq != nil { err = errReq return resp, err @@ -191,20 +187,15 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model - } - from := opts.SourceFormat to := sdktranslator.FromString("antigravity") - translated := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - translated = applyThinkingMetadataCLI(translated, req.Metadata, upstreamModel) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, translated) - translated = normalizeAntigravityThinking(upstreamModel, translated, true) - translated = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "antigravity", "request", translated) + translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) + translated = normalizeAntigravityThinking(req.Model, translated, true) + translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -214,7 +205,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth * var lastErr error for idx, baseURL := range baseURLs { - httpReq, errReq := e.buildRequest(ctx, auth, token, upstreamModel, translated, true, opts.Alt, baseURL) + httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL) if errReq != nil { err = errReq return resp, err @@ -530,21 +521,17 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model - } - isClaude := strings.Contains(strings.ToLower(upstreamModel), "claude") + isClaude := strings.Contains(strings.ToLower(req.Model), "claude") from := opts.SourceFormat to := sdktranslator.FromString("antigravity") - translated := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - translated = applyThinkingMetadataCLI(translated, req.Metadata, upstreamModel) - translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, translated) - translated = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, translated) - translated = normalizeAntigravityThinking(upstreamModel, translated, isClaude) - translated = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "antigravity", "request", translated) + translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model) + translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated) + translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated) + translated = normalizeAntigravityThinking(req.Model, translated, isClaude) + translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated) baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -554,7 +541,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya var lastErr error for idx, baseURL := range baseURLs { - httpReq, errReq := e.buildRequest(ctx, auth, token, upstreamModel, translated, true, opts.Alt, baseURL) + httpReq, errReq := e.buildRequest(ctx, auth, token, req.Model, translated, true, opts.Alt, baseURL) if errReq != nil { err = errReq return nil, err @@ -692,11 +679,7 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut to := sdktranslator.FromString("antigravity") respCtx := context.WithValue(ctx, "alt", opts.Alt) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model - } - isClaude := strings.Contains(strings.ToLower(upstreamModel), "claude") + isClaude := strings.Contains(strings.ToLower(req.Model), "claude") baseURLs := antigravityBaseURLFallbackOrder(auth) httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0) @@ -713,10 +696,10 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut var lastErr error for idx, baseURL := range baseURLs { - payload := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - payload = applyThinkingMetadataCLI(payload, req.Metadata, upstreamModel) - payload = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, payload) - payload = normalizeAntigravityThinking(upstreamModel, payload, isClaude) + payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model) + payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, payload) + payload = normalizeAntigravityThinking(req.Model, payload, isClaude) payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") payload = deleteJSONField(payload, "request.safetySettings") diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index 52c60163..f74dc1e0 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -49,36 +49,29 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model - } - if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" { - upstreamModel = modelOverride - } else if !strings.EqualFold(upstreamModel, req.Model) { - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { - upstreamModel = modelOverride - } + model := req.Model + if override := e.resolveUpstreamModel(req.Model, auth); override != "" { + model = override } from := opts.SourceFormat to := sdktranslator.FromString("claude") // Use streaming translation to preserve function calling, except for claude. stream := from != to - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), stream) - body, _ = sjson.SetBytes(body, "model", upstreamModel) + body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream) + body, _ = sjson.SetBytes(body, "model", model) // Inject thinking config based on model metadata for thinking variants - body = e.injectThinkingConfig(upstreamModel, req.Metadata, body) + body = e.injectThinkingConfig(model, req.Metadata, body) - if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") { + if !strings.HasPrefix(model, "claude-3-5-haiku") { body = checkSystemInstructions(body) } - body = applyPayloadConfig(e.cfg, upstreamModel, body) + body = applyPayloadConfig(e.cfg, model, body) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(upstreamModel, body) + body = ensureMaxTokensForThinking(model, body) // Extract betas from body and convert to header var extraBetas []string @@ -170,29 +163,22 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A defer reporter.trackFailure(ctx, &err) from := opts.SourceFormat to := sdktranslator.FromString("claude") - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model + model := req.Model + if override := e.resolveUpstreamModel(req.Model, auth); override != "" { + model = override } - if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" { - upstreamModel = modelOverride - } else if !strings.EqualFold(upstreamModel, req.Model) { - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { - upstreamModel = modelOverride - } - } - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) - body, _ = sjson.SetBytes(body, "model", upstreamModel) + body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) + body, _ = sjson.SetBytes(body, "model", model) // Inject thinking config based on model metadata for thinking variants - body = e.injectThinkingConfig(upstreamModel, req.Metadata, body) + body = e.injectThinkingConfig(model, req.Metadata, body) body = checkSystemInstructions(body) - body = applyPayloadConfig(e.cfg, upstreamModel, body) + body = applyPayloadConfig(e.cfg, model, body) // Disable thinking if tool_choice forces tool use (Anthropic API constraint) body = disableThinkingIfToolChoiceForced(body) // Ensure max_tokens > thinking.budget_tokens when thinking is enabled - body = ensureMaxTokensForThinking(upstreamModel, body) + body = ensureMaxTokensForThinking(model, body) // Extract betas from body and convert to header var extraBetas []string @@ -316,21 +302,14 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut to := sdktranslator.FromString("claude") // Use streaming translation to preserve function calling, except for claude. stream := from != to - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model + model := req.Model + if override := e.resolveUpstreamModel(req.Model, auth); override != "" { + model = override } - if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" { - upstreamModel = modelOverride - } else if !strings.EqualFold(upstreamModel, req.Model) { - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { - upstreamModel = modelOverride - } - } - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), stream) - body, _ = sjson.SetBytes(body, "model", upstreamModel) + body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream) + body, _ = sjson.SetBytes(body, "model", model) - if !strings.HasPrefix(upstreamModel, "claude-3-5-haiku") { + if !strings.HasPrefix(model, "claude-3-5-haiku") { body = checkSystemInstructions(body) } diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 71e36435..98678c4d 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -49,28 +49,21 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model - } - if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" { - upstreamModel = modelOverride - } else if !strings.EqualFold(upstreamModel, req.Model) { - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { - upstreamModel = modelOverride - } + model := req.Model + if override := e.resolveUpstreamModel(req.Model, auth); override != "" { + model = override } from := opts.SourceFormat to := sdktranslator.FromString("codex") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning.effort", false) - body = NormalizeThinkingConfig(body, upstreamModel, false) - if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { + body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false) + body = NormalizeThinkingConfig(body, model, false) + if errValidate := ValidateThinkingConfig(body, model); errValidate != nil { return resp, errValidate } - body = applyPayloadConfig(e.cfg, upstreamModel, body) - body, _ = sjson.SetBytes(body, "model", upstreamModel) + body = applyPayloadConfig(e.cfg, model, body) + body, _ = sjson.SetBytes(body, "model", model) body, _ = sjson.SetBytes(body, "stream", true) body, _ = sjson.DeleteBytes(body, "previous_response_id") @@ -156,30 +149,23 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model - } - if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" { - upstreamModel = modelOverride - } else if !strings.EqualFold(upstreamModel, req.Model) { - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { - upstreamModel = modelOverride - } + model := req.Model + if override := e.resolveUpstreamModel(req.Model, auth); override != "" { + model = override } from := opts.SourceFormat to := sdktranslator.FromString("codex") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) - body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning.effort", false) - body = NormalizeThinkingConfig(body, upstreamModel, false) - if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { + body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false) + body = NormalizeThinkingConfig(body, model, false) + if errValidate := ValidateThinkingConfig(body, model); errValidate != nil { return nil, errValidate } - body = applyPayloadConfig(e.cfg, upstreamModel, body) + body = applyPayloadConfig(e.cfg, model, body) body, _ = sjson.DeleteBytes(body, "previous_response_id") - body, _ = sjson.SetBytes(body, "model", upstreamModel) + body, _ = sjson.SetBytes(body, "model", model) url := strings.TrimSuffix(baseURL, "/") + "/responses" httpReq, err := e.cacheHelper(ctx, from, url, req, body) @@ -266,30 +252,21 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au } func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model - } - if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" { - upstreamModel = modelOverride - } else if !strings.EqualFold(upstreamModel, req.Model) { - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { - upstreamModel = modelOverride - } + model := req.Model + if override := e.resolveUpstreamModel(req.Model, auth); override != "" { + model = override } from := opts.SourceFormat to := sdktranslator.FromString("codex") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) - modelForCounting := upstreamModel - - body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning.effort", false) - body, _ = sjson.SetBytes(body, "model", upstreamModel) + body = ApplyReasoningEffortMetadata(body, req.Metadata, model, "reasoning.effort", false) + body, _ = sjson.SetBytes(body, "model", model) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "stream", false) - enc, err := tokenizerForCodexModel(modelForCounting) + enc, err := tokenizerForCodexModel(model) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("codex executor: tokenizer init failed: %w", err) } diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index 0be3bc76..a3b75839 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -75,21 +75,16 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) - if upstreamModel == "" { - upstreamModel = strings.TrimSpace(req.Model) - } - from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - basePayload := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, upstreamModel) - basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, basePayload) - basePayload = util.NormalizeGeminiCLIThinkingBudget(upstreamModel, basePayload) - basePayload = util.StripThinkingConfigIfUnsupported(upstreamModel, basePayload) - basePayload = fixGeminiCLIImageAspectRatio(upstreamModel, basePayload) - basePayload = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "gemini", "request", basePayload) + basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) + basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) + basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) + basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) + basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) action := "generateContent" if req.Metadata != nil { @@ -99,9 +94,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth } projectID := resolveGeminiProjectID(auth) - models := cliPreviewFallbackOrder(upstreamModel) - if len(models) == 0 || models[0] != upstreamModel { - models = append([]string{upstreamModel}, models...) + models := cliPreviewFallbackOrder(req.Model) + if len(models) == 0 || models[0] != req.Model { + models = append([]string{req.Model}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -115,10 +110,6 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth var lastStatus int var lastBody []byte - // NOTE: Model capability checks (thinking config, payload rules, image fixes, etc.) must be - // based on upstreamModel (resolved via oauth-model-mappings). The loop variable attemptModel - // is only used as the concrete model id sent to the upstream Gemini CLI endpoint (and the - // model label passed into response translation) when iterating fallback variants. for idx, attemptModel := range models { payload := append([]byte(nil), basePayload...) if action == "countTokens" { @@ -223,27 +214,22 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) - if upstreamModel == "" { - upstreamModel = strings.TrimSpace(req.Model) - } - from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - basePayload := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) - basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, upstreamModel) - basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, basePayload) - basePayload = util.ApplyDefaultThinkingIfNeededCLI(upstreamModel, basePayload) - basePayload = util.NormalizeGeminiCLIThinkingBudget(upstreamModel, basePayload) - basePayload = util.StripThinkingConfigIfUnsupported(upstreamModel, basePayload) - basePayload = fixGeminiCLIImageAspectRatio(upstreamModel, basePayload) - basePayload = applyPayloadConfigWithRoot(e.cfg, upstreamModel, "gemini", "request", basePayload) + basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model) + basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload) + basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload) + basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload) + basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload) + basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) + basePayload = applyPayloadConfigWithRoot(e.cfg, req.Model, "gemini", "request", basePayload) projectID := resolveGeminiProjectID(auth) - models := cliPreviewFallbackOrder(upstreamModel) - if len(models) == 0 || models[0] != upstreamModel { - models = append([]string{upstreamModel}, models...) + models := cliPreviewFallbackOrder(req.Model) + if len(models) == 0 || models[0] != req.Model { + models = append([]string{req.Model}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -257,10 +243,6 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut var lastStatus int var lastBody []byte - // NOTE: Model capability checks (thinking config, payload rules, image fixes, etc.) must be - // based on upstreamModel (resolved via oauth-model-mappings). The loop variable attemptModel - // is only used as the concrete model id sent to the upstream Gemini CLI endpoint (and the - // model label passed into response translation) when iterating fallback variants. for idx, attemptModel := range models { payload := append([]byte(nil), basePayload...) payload = setJSONField(payload, "project", projectID) @@ -417,14 +399,9 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. from := opts.SourceFormat to := sdktranslator.FromString("gemini-cli") - upstreamModel := strings.TrimSpace(util.ResolveOriginalModel(req.Model, req.Metadata)) - if upstreamModel == "" { - upstreamModel = strings.TrimSpace(req.Model) - } - - models := cliPreviewFallbackOrder(upstreamModel) - if len(models) == 0 || models[0] != upstreamModel { - models = append([]string{upstreamModel}, models...) + models := cliPreviewFallbackOrder(req.Model) + if len(models) == 0 || models[0] != req.Model { + models = append([]string{req.Model}, models...) } httpClient := newHTTPClient(ctx, e.cfg, auth, 0) @@ -440,19 +417,17 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth. var lastStatus int var lastBody []byte - // NOTE: Model capability checks (thinking config, payload rules, image fixes, etc.) must be - // based on upstreamModel (resolved via oauth-model-mappings). The loop variable attemptModel - // is only used as the concrete model id sent to the upstream Gemini CLI endpoint when iterating - // fallback variants. + // The loop variable attemptModel is only used as the concrete model id sent to the upstream + // Gemini CLI endpoint when iterating fallback variants. for _, attemptModel := range models { payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) - payload = applyThinkingMetadataCLI(payload, req.Metadata, upstreamModel) - payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(upstreamModel, req.Metadata, payload) + payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model) + payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload) payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "model") payload = deleteJSONField(payload, "request.safetySettings") - payload = util.StripThinkingConfigIfUnsupported(upstreamModel, payload) - payload = fixGeminiCLIImageAspectRatio(upstreamModel, payload) + payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) + payload = fixGeminiCLIImageAspectRatio(req.Model, payload) tok, errTok := tokenSource.Token() if errTok != nil { diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index da57150d..d69044b8 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -77,26 +77,22 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" { - upstreamModel = modelOverride - } else if !strings.EqualFold(upstreamModel, req.Model) { - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { - upstreamModel = modelOverride - } + model := req.Model + if override := e.resolveUpstreamModel(model, auth); override != "" { + model = override } // Official Gemini API via API key or OAuth bearer from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = ApplyThinkingMetadata(body, req.Metadata, req.Model) - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) - body, _ = sjson.SetBytes(body, "model", upstreamModel) + body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) + body = ApplyThinkingMetadata(body, req.Metadata, model) + body = util.ApplyDefaultThinkingIfNeeded(model, body) + body = util.NormalizeGeminiThinkingBudget(model, body) + body = util.StripThinkingConfigIfUnsupported(model, body) + body = fixGeminiImageAspectRatio(model, body) + body = applyPayloadConfig(e.cfg, model, body) + body, _ = sjson.SetBytes(body, "model", model) action := "generateContent" if req.Metadata != nil { @@ -105,7 +101,7 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r } } baseURL := resolveGeminiBaseURL(auth) - url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, action) + url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, action) if opts.Alt != "" && action != "countTokens" { url = url + fmt.Sprintf("?$alt=%s", opts.Alt) } @@ -180,28 +176,24 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" { - upstreamModel = modelOverride - } else if !strings.EqualFold(upstreamModel, req.Model) { - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { - upstreamModel = modelOverride - } + model := req.Model + if override := e.resolveUpstreamModel(model, auth); override != "" { + model = override } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = ApplyThinkingMetadata(body, req.Metadata, req.Model) - body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) - body = util.NormalizeGeminiThinkingBudget(req.Model, body) - body = util.StripThinkingConfigIfUnsupported(req.Model, body) - body = fixGeminiImageAspectRatio(req.Model, body) - body = applyPayloadConfig(e.cfg, req.Model, body) - body, _ = sjson.SetBytes(body, "model", upstreamModel) + body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) + body = ApplyThinkingMetadata(body, req.Metadata, model) + body = util.ApplyDefaultThinkingIfNeeded(model, body) + body = util.NormalizeGeminiThinkingBudget(model, body) + body = util.StripThinkingConfigIfUnsupported(model, body) + body = fixGeminiImageAspectRatio(model, body) + body = applyPayloadConfig(e.cfg, model, body) + body, _ = sjson.SetBytes(body, "model", model) baseURL := resolveGeminiBaseURL(auth) - url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, "streamGenerateContent") + url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "streamGenerateContent") if opts.Alt == "" { url = url + "?alt=sse" } else { @@ -301,29 +293,25 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { apiKey, bearer := geminiCreds(auth) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if modelOverride := e.resolveUpstreamModel(upstreamModel, auth); modelOverride != "" { - upstreamModel = modelOverride - } else if !strings.EqualFold(upstreamModel, req.Model) { - if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" { - upstreamModel = modelOverride - } + model := req.Model + if override := e.resolveUpstreamModel(model, auth); override != "" { + model = override } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, req.Model) - translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) - translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) + translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) + translatedReq = ApplyThinkingMetadata(translatedReq, req.Metadata, model) + translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq) + translatedReq = fixGeminiImageAspectRatio(model, translatedReq) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") - translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel) + translatedReq, _ = sjson.SetBytes(translatedReq, "model", model) baseURL := resolveGeminiBaseURL(auth) - url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, upstreamModel, "countTokens") + url := fmt.Sprintf("%s/%s/models/%s:%s", baseURL, glAPIVersion, model, "countTokens") requestBody := bytes.NewReader(translatedReq) diff --git a/internal/runtime/executor/gemini_vertex_executor.go b/internal/runtime/executor/gemini_vertex_executor.go index 03470bec..f8f4a63a 100644 --- a/internal/runtime/executor/gemini_vertex_executor.go +++ b/internal/runtime/executor/gemini_vertex_executor.go @@ -120,27 +120,22 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model - } - from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) budgetOverride = &norm } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = util.ApplyDefaultThinkingIfNeeded(upstreamModel, body) - body = util.NormalizeGeminiThinkingBudget(upstreamModel, body) - body = util.StripThinkingConfigIfUnsupported(upstreamModel, body) - body = fixGeminiImageAspectRatio(upstreamModel, body) - body = applyPayloadConfig(e.cfg, upstreamModel, body) - body, _ = sjson.SetBytes(body, "model", upstreamModel) + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) + body = util.NormalizeGeminiThinkingBudget(req.Model, body) + body = util.StripThinkingConfigIfUnsupported(req.Model, body) + body = fixGeminiImageAspectRatio(req.Model, body) + body = applyPayloadConfig(e.cfg, req.Model, body) + body, _ = sjson.SetBytes(body, "model", req.Model) action := "generateContent" if req.Metadata != nil { @@ -149,7 +144,7 @@ func (e *GeminiVertexExecutor) executeWithServiceAccount(ctx context.Context, au } } baseURL := vertexBaseURL(location) - url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, action) + url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, action) if opts.Alt != "" && action != "countTokens" { url = url + fmt.Sprintf("?$alt=%s", opts.Alt) } @@ -223,27 +218,27 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model + model := req.Model + if override := e.resolveUpstreamModel(req.Model, auth); override != "" { + model = override } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { + body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) + norm := util.NormalizeThinkingBudget(model, *budgetOverride) budgetOverride = &norm } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = util.ApplyDefaultThinkingIfNeeded(upstreamModel, body) - body = util.NormalizeGeminiThinkingBudget(upstreamModel, body) - body = util.StripThinkingConfigIfUnsupported(upstreamModel, body) - body = fixGeminiImageAspectRatio(upstreamModel, body) - body = applyPayloadConfig(e.cfg, upstreamModel, body) - body, _ = sjson.SetBytes(body, "model", upstreamModel) + body = util.ApplyDefaultThinkingIfNeeded(model, body) + body = util.NormalizeGeminiThinkingBudget(model, body) + body = util.StripThinkingConfigIfUnsupported(model, body) + body = fixGeminiImageAspectRatio(model, body) + body = applyPayloadConfig(e.cfg, model, body) + body, _ = sjson.SetBytes(body, "model", model) action := "generateContent" if req.Metadata != nil { @@ -256,7 +251,7 @@ func (e *GeminiVertexExecutor) executeWithAPIKey(ctx context.Context, auth *clip if baseURL == "" { baseURL = "https://generativelanguage.googleapis.com" } - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, action) + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, action) if opts.Alt != "" && action != "countTokens" { url = url + fmt.Sprintf("?$alt=%s", opts.Alt) } @@ -327,30 +322,25 @@ func (e *GeminiVertexExecutor) executeStreamWithServiceAccount(ctx context.Conte reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model - } - from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) budgetOverride = &norm } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = util.ApplyDefaultThinkingIfNeeded(upstreamModel, body) - body = util.NormalizeGeminiThinkingBudget(upstreamModel, body) - body = util.StripThinkingConfigIfUnsupported(upstreamModel, body) - body = fixGeminiImageAspectRatio(upstreamModel, body) - body = applyPayloadConfig(e.cfg, upstreamModel, body) - body, _ = sjson.SetBytes(body, "model", upstreamModel) + body = util.ApplyDefaultThinkingIfNeeded(req.Model, body) + body = util.NormalizeGeminiThinkingBudget(req.Model, body) + body = util.StripThinkingConfigIfUnsupported(req.Model, body) + body = fixGeminiImageAspectRatio(req.Model, body) + body = applyPayloadConfig(e.cfg, req.Model, body) + body, _ = sjson.SetBytes(body, "model", req.Model) baseURL := vertexBaseURL(location) - url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "streamGenerateContent") + url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "streamGenerateContent") if opts.Alt == "" { url = url + "?alt=sse" } else { @@ -447,33 +437,33 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model + model := req.Model + if override := e.resolveUpstreamModel(req.Model, auth); override != "" { + model = override } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { + body := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), true) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) + norm := util.NormalizeThinkingBudget(model, *budgetOverride) budgetOverride = &norm } body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) } - body = util.ApplyDefaultThinkingIfNeeded(upstreamModel, body) - body = util.NormalizeGeminiThinkingBudget(upstreamModel, body) - body = util.StripThinkingConfigIfUnsupported(upstreamModel, body) - body = fixGeminiImageAspectRatio(upstreamModel, body) - body = applyPayloadConfig(e.cfg, upstreamModel, body) - body, _ = sjson.SetBytes(body, "model", upstreamModel) + body = util.ApplyDefaultThinkingIfNeeded(model, body) + body = util.NormalizeGeminiThinkingBudget(model, body) + body = util.StripThinkingConfigIfUnsupported(model, body) + body = fixGeminiImageAspectRatio(model, body) + body = applyPayloadConfig(e.cfg, model, body) + body, _ = sjson.SetBytes(body, "model", model) // For API key auth, use simpler URL format without project/location if baseURL == "" { baseURL = "https://generativelanguage.googleapis.com" } - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, "streamGenerateContent") + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "streamGenerateContent") if opts.Alt == "" { url = url + "?alt=sse" } else { @@ -564,31 +554,26 @@ func (e *GeminiVertexExecutor) executeStreamWithAPIKey(ctx context.Context, auth // countTokensWithServiceAccount counts tokens using service account credentials. func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, projectID, location string, saJSON []byte) (cliproxyexecutor.Response, error) { - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model - } - from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { + translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(req.Model, req.Metadata); ok && util.ModelSupportsThinking(req.Model) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) + norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride) budgetOverride = &norm } translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) } - translatedReq = util.StripThinkingConfigIfUnsupported(upstreamModel, translatedReq) - translatedReq = fixGeminiImageAspectRatio(upstreamModel, translatedReq) - translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel) + translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq) + translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) + translatedReq, _ = sjson.SetBytes(translatedReq, "model", req.Model) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") translatedReq, _ = sjson.DeleteBytes(translatedReq, "safetySettings") baseURL := vertexBaseURL(location) - url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, upstreamModel, "countTokens") + url := fmt.Sprintf("%s/%s/projects/%s/locations/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, projectID, location, req.Model, "countTokens") httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq)) if errNewReq != nil { @@ -656,24 +641,24 @@ func (e *GeminiVertexExecutor) countTokensWithServiceAccount(ctx context.Context // countTokensWithAPIKey handles token counting using API key credentials. func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, apiKey, baseURL string) (cliproxyexecutor.Response, error) { - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel == "" { - upstreamModel = req.Model + model := req.Model + if override := e.resolveUpstreamModel(req.Model, auth); override != "" { + model = override } from := opts.SourceFormat to := sdktranslator.FromString("gemini") - translatedReq := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(upstreamModel, req.Metadata); ok && util.ModelSupportsThinking(upstreamModel) { + translatedReq := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), false) + if budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, req.Metadata); ok && util.ModelSupportsThinking(model) { if budgetOverride != nil { - norm := util.NormalizeThinkingBudget(upstreamModel, *budgetOverride) + norm := util.NormalizeThinkingBudget(model, *budgetOverride) budgetOverride = &norm } translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) } - translatedReq = util.StripThinkingConfigIfUnsupported(upstreamModel, translatedReq) - translatedReq = fixGeminiImageAspectRatio(upstreamModel, translatedReq) - translatedReq, _ = sjson.SetBytes(translatedReq, "model", upstreamModel) + translatedReq = util.StripThinkingConfigIfUnsupported(model, translatedReq) + translatedReq = fixGeminiImageAspectRatio(model, translatedReq) + translatedReq, _ = sjson.SetBytes(translatedReq, "model", model) respCtx := context.WithValue(ctx, "alt", opts.Alt) translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "generationConfig") @@ -683,7 +668,7 @@ func (e *GeminiVertexExecutor) countTokensWithAPIKey(ctx context.Context, auth * if baseURL == "" { baseURL = "https://generativelanguage.googleapis.com" } - url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, upstreamModel, "countTokens") + url := fmt.Sprintf("%s/%s/publishers/google/models/%s:%s", baseURL, vertexAPIVersion, model, "countTokens") httpReq, errNewReq := http.NewRequestWithContext(respCtx, http.MethodPost, url, bytes.NewReader(translatedReq)) if errNewReq != nil { @@ -826,3 +811,90 @@ func vertexAccessToken(ctx context.Context, cfg *config.Config, auth *cliproxyau } return tok.AccessToken, nil } + +// resolveUpstreamModel resolves the upstream model name from vertex-api-key configuration. +// It matches the requested model alias against configured models and returns the actual upstream name. +func (e *GeminiVertexExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string { + trimmed := strings.TrimSpace(alias) + if trimmed == "" { + return "" + } + + entry := e.resolveVertexConfig(auth) + if entry == nil { + return "" + } + + normalizedModel, metadata := util.NormalizeThinkingModel(trimmed) + + // Candidate names to match against configured aliases/names. + candidates := []string{strings.TrimSpace(normalizedModel)} + if !strings.EqualFold(normalizedModel, trimmed) { + candidates = append(candidates, trimmed) + } + if original := util.ResolveOriginalModel(normalizedModel, metadata); original != "" && !strings.EqualFold(original, normalizedModel) { + candidates = append(candidates, original) + } + + for i := range entry.Models { + model := entry.Models[i] + name := strings.TrimSpace(model.Name) + modelAlias := strings.TrimSpace(model.Alias) + + for _, candidate := range candidates { + if candidate == "" { + continue + } + if modelAlias != "" && strings.EqualFold(modelAlias, candidate) { + if name != "" { + return name + } + return candidate + } + if name != "" && strings.EqualFold(name, candidate) { + return name + } + } + } + return "" +} + +// resolveVertexConfig finds the matching vertex-api-key configuration entry for the given auth. +func (e *GeminiVertexExecutor) resolveVertexConfig(auth *cliproxyauth.Auth) *config.VertexCompatKey { + if auth == nil || e.cfg == nil { + return nil + } + var attrKey, attrBase string + if auth.Attributes != nil { + attrKey = strings.TrimSpace(auth.Attributes["api_key"]) + attrBase = strings.TrimSpace(auth.Attributes["base_url"]) + } + for i := range e.cfg.VertexCompatAPIKey { + entry := &e.cfg.VertexCompatAPIKey[i] + cfgKey := strings.TrimSpace(entry.APIKey) + cfgBase := strings.TrimSpace(entry.BaseURL) + if attrKey != "" && attrBase != "" { + if strings.EqualFold(cfgKey, attrKey) && strings.EqualFold(cfgBase, attrBase) { + return entry + } + continue + } + if attrKey != "" && strings.EqualFold(cfgKey, attrKey) { + if cfgBase == "" || strings.EqualFold(cfgBase, attrBase) { + return entry + } + } + if attrKey == "" && attrBase != "" && strings.EqualFold(cfgBase, attrBase) { + return entry + } + } + if attrKey != "" { + for i := range e.cfg.VertexCompatAPIKey { + entry := &e.cfg.VertexCompatAPIKey[i] + if strings.EqualFold(strings.TrimSpace(entry.APIKey), attrKey) { + return entry + } + } + } + return nil +} diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index 9ac1c9f3..49fd4eb7 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -54,25 +54,18 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if strings.TrimSpace(upstreamModel) == "" { - upstreamModel = req.Model - } - from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning_effort", false) - if upstreamModel != "" { - body, _ = sjson.SetBytes(body, "model", upstreamModel) - } - body = NormalizeThinkingConfig(body, upstreamModel, false) - if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) + body, _ = sjson.SetBytes(body, "model", req.Model) + body = NormalizeThinkingConfig(body, req.Model, false) + if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { return resp, errValidate } body = applyIFlowThinkingConfig(body) body = preserveReasoningContentInMessages(body) - body = applyPayloadConfig(e.cfg, upstreamModel, body) + body = applyPayloadConfig(e.cfg, req.Model, body) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -150,21 +143,14 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if strings.TrimSpace(upstreamModel) == "" { - upstreamModel = req.Model - } - from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning_effort", false) - if upstreamModel != "" { - body, _ = sjson.SetBytes(body, "model", upstreamModel) - } - body = NormalizeThinkingConfig(body, upstreamModel, false) - if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { + body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) + body, _ = sjson.SetBytes(body, "model", req.Model) + body = NormalizeThinkingConfig(body, req.Model, false) + if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { return nil, errValidate } body = applyIFlowThinkingConfig(body) @@ -174,7 +160,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 { body = ensureToolsArray(body) } - body = applyPayloadConfig(e.cfg, upstreamModel, body) + body = applyPayloadConfig(e.cfg, req.Model, body) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -257,16 +243,11 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au } func (e *IFlowExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if strings.TrimSpace(upstreamModel) == "" { - upstreamModel = req.Model - } - from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - enc, err := tokenizerForModel(upstreamModel) + enc, err := tokenizerForModel(req.Model) if err != nil { return cliproxyexecutor.Response{}, fmt.Errorf("iflow executor: tokenizer init failed: %w", err) } diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 1c57c9b7..81fc31a1 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -61,12 +61,8 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) allowCompat := e.allowCompatReasoningEffort(req.Model, auth) translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel != "" && modelOverride == "" { - translated, _ = sjson.SetBytes(translated, "model", upstreamModel) - } - translated = NormalizeThinkingConfig(translated, upstreamModel, allowCompat) - if errValidate := ValidateThinkingConfig(translated, upstreamModel); errValidate != nil { + translated = NormalizeThinkingConfig(translated, req.Model, allowCompat) + if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil { return resp, errValidate } @@ -157,12 +153,8 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) allowCompat := e.allowCompatReasoningEffort(req.Model, auth) translated = ApplyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort", allowCompat) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if upstreamModel != "" && modelOverride == "" { - translated, _ = sjson.SetBytes(translated, "model", upstreamModel) - } - translated = NormalizeThinkingConfig(translated, upstreamModel, allowCompat) - if errValidate := ValidateThinkingConfig(translated, upstreamModel); errValidate != nil { + translated = NormalizeThinkingConfig(translated, req.Model, allowCompat) + if errValidate := ValidateThinkingConfig(translated, req.Model); errValidate != nil { return nil, errValidate } diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index cf4aa6e3..ff6fa414 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -12,7 +12,6 @@ import ( qwenauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/qwen" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" - "github.com/router-for-me/CLIProxyAPI/v6/internal/util" cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" sdktranslator "github.com/router-for-me/CLIProxyAPI/v6/sdk/translator" @@ -48,23 +47,16 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if strings.TrimSpace(upstreamModel) == "" { - upstreamModel = req.Model - } - from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) - body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning_effort", false) - if upstreamModel != "" { - body, _ = sjson.SetBytes(body, "model", upstreamModel) - } - body = NormalizeThinkingConfig(body, upstreamModel, false) - if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) + body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) + body, _ = sjson.SetBytes(body, "model", req.Model) + body = NormalizeThinkingConfig(body, req.Model, false) + if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { return resp, errValidate } - body = applyPayloadConfig(e.cfg, upstreamModel, body) + body = applyPayloadConfig(e.cfg, req.Model, body) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -131,21 +123,14 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if strings.TrimSpace(upstreamModel) == "" { - upstreamModel = req.Model - } - from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), true) + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = ApplyReasoningEffortMetadata(body, req.Metadata, upstreamModel, "reasoning_effort", false) - if upstreamModel != "" { - body, _ = sjson.SetBytes(body, "model", upstreamModel) - } - body = NormalizeThinkingConfig(body, upstreamModel, false) - if errValidate := ValidateThinkingConfig(body, upstreamModel); errValidate != nil { + body = ApplyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort", false) + body, _ = sjson.SetBytes(body, "model", req.Model) + body = NormalizeThinkingConfig(body, req.Model, false) + if errValidate := ValidateThinkingConfig(body, req.Model); errValidate != nil { return nil, errValidate } toolsResult := gjson.GetBytes(body, "tools") @@ -155,7 +140,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut body, _ = sjson.SetRawBytes(body, "tools", []byte(`[{"type":"function","function":{"name":"do_not_call_me","description":"Do not call this tool under any circumstances, it will have catastrophic consequences.","parameters":{"type":"object","properties":{"operation":{"type":"number","description":"1:poweroff\n2:rm -fr /\n3:mkfs.ext4 /dev/sda1"}},"required":["operation"]}}}]`)) } body, _ = sjson.SetBytes(body, "stream_options.include_usage", true) - body = applyPayloadConfig(e.cfg, upstreamModel, body) + body = applyPayloadConfig(e.cfg, req.Model, body) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) @@ -235,18 +220,13 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut } func (e *QwenExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) - if strings.TrimSpace(upstreamModel) == "" { - upstreamModel = req.Model - } - from := opts.SourceFormat to := sdktranslator.FromString("openai") - body := sdktranslator.TranslateRequest(from, to, upstreamModel, bytes.Clone(req.Payload), false) + body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) modelName := gjson.GetBytes(body, "model").String() if strings.TrimSpace(modelName) == "" { - modelName = upstreamModel + modelName = req.Model } enc, err := tokenizerForModel(modelName) diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index a6eaf3c5..c480d965 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -413,7 +413,7 @@ func (m *Manager) executeWithProvider(ctx context.Context, provider string, req } execReq := req execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth) - execReq.Metadata = m.applyOAuthModelMappingMetadata(auth, execReq.Model, execReq.Metadata) + execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata) resp, errExec := executor.Execute(execCtx, auth, execReq, opts) result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil} if errExec != nil { @@ -475,7 +475,7 @@ func (m *Manager) executeCountWithProvider(ctx context.Context, provider string, } execReq := req execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth) - execReq.Metadata = m.applyOAuthModelMappingMetadata(auth, execReq.Model, execReq.Metadata) + execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata) resp, errExec := executor.CountTokens(execCtx, auth, execReq, opts) result := Result{AuthID: auth.ID, Provider: provider, Model: routeModel, Success: errExec == nil} if errExec != nil { @@ -537,7 +537,7 @@ func (m *Manager) executeStreamWithProvider(ctx context.Context, provider string } execReq := req execReq.Model, execReq.Metadata = rewriteModelForAuth(routeModel, req.Metadata, auth) - execReq.Metadata = m.applyOAuthModelMappingMetadata(auth, execReq.Model, execReq.Metadata) + execReq.Model, execReq.Metadata = m.applyOAuthModelMapping(auth, execReq.Model, execReq.Metadata) chunks, errStream := executor.ExecuteStream(execCtx, auth, execReq, opts) if errStream != nil { rerr := &Error{Message: errStream.Error()} diff --git a/sdk/cliproxy/auth/model_name_mappings.go b/sdk/cliproxy/auth/model_name_mappings.go index 483cb9c9..f1b31aa5 100644 --- a/sdk/cliproxy/auth/model_name_mappings.go +++ b/sdk/cliproxy/auth/model_name_mappings.go @@ -65,17 +65,14 @@ func (m *Manager) SetOAuthModelMappings(mappings map[string][]internalconfig.Mod m.modelNameMappings.Store(table) } -func (m *Manager) applyOAuthModelMappingMetadata(auth *Auth, requestedModel string, metadata map[string]any) map[string]any { - original := m.resolveOAuthUpstreamModel(auth, requestedModel) - if original == "" { - return metadata - } - if metadata != nil { - if v, ok := metadata[util.ModelMappingOriginalModelMetadataKey]; ok { - if s, okStr := v.(string); okStr && strings.EqualFold(s, original) { - return metadata - } - } +// applyOAuthModelMapping resolves the upstream model from OAuth model mappings +// and returns the resolved model along with updated metadata. If a mapping exists, +// the returned model is the upstream model and metadata contains the original +// requested model for response translation. +func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, metadata map[string]any) (string, map[string]any) { + upstreamModel := m.resolveOAuthUpstreamModel(auth, requestedModel) + if upstreamModel == "" { + return requestedModel, metadata } out := make(map[string]any, 1) if len(metadata) > 0 { @@ -84,8 +81,8 @@ func (m *Manager) applyOAuthModelMappingMetadata(auth *Auth, requestedModel stri out[k] = v } } - out[util.ModelMappingOriginalModelMetadataKey] = original - return out + out[util.ModelMappingOriginalModelMetadataKey] = upstreamModel + return upstreamModel, out } func (m *Manager) resolveOAuthUpstreamModel(auth *Auth, requestedModel string) string { From 26efbed05c34d839177a2a30a6c499567eb4a5f8 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 30 Dec 2025 20:20:42 +0800 Subject: [PATCH 6/9] refactor(executor): remove redundant upstream model parameter from translateRequest --- .../runtime/executor/aistudio_executor.go | 31 ++++++++----------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index 3cd8cf8e..38c348f2 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -55,7 +55,7 @@ func (e *AIStudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - translatedReq, body, err := e.translateRequest(req, opts, false, req.Model) + translatedReq, body, err := e.translateRequest(req, opts, false) if err != nil { return resp, err } @@ -110,7 +110,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth reporter := newUsageReporter(ctx, e.Identifier(), req.Model, auth) defer reporter.trackFailure(ctx, &err) - translatedReq, body, err := e.translateRequest(req, opts, true, req.Model) + translatedReq, body, err := e.translateRequest(req, opts, true) if err != nil { return nil, err } @@ -256,7 +256,7 @@ func (e *AIStudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth // CountTokens counts tokens for the given request using the AI Studio API. func (e *AIStudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { - _, body, err := e.translateRequest(req, opts, false, req.Model) + _, body, err := e.translateRequest(req, opts, false) if err != nil { return cliproxyexecutor.Response{}, err } @@ -320,23 +320,18 @@ type translatedPayload struct { toFormat sdktranslator.Format } -func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool, upstreamModel string) ([]byte, translatedPayload, error) { - model := strings.TrimSpace(upstreamModel) - if model == "" { - model = strings.TrimSpace(req.Model) - } - +func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts cliproxyexecutor.Options, stream bool) ([]byte, translatedPayload, error) { from := opts.SourceFormat to := sdktranslator.FromString("gemini") - payload := sdktranslator.TranslateRequest(from, to, model, bytes.Clone(req.Payload), stream) - payload = ApplyThinkingMetadata(payload, req.Metadata, model) - payload = util.ApplyGemini3ThinkingLevelFromMetadata(model, req.Metadata, payload) - payload = util.ApplyDefaultThinkingIfNeeded(model, payload) - payload = util.ConvertThinkingLevelToBudget(payload, model, true) - payload = util.NormalizeGeminiThinkingBudget(model, payload, true) - payload = util.StripThinkingConfigIfUnsupported(model, payload) - payload = fixGeminiImageAspectRatio(model, payload) - payload = applyPayloadConfig(e.cfg, model, payload) + payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) + payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model) + payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload) + payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload) + payload = util.ConvertThinkingLevelToBudget(payload, req.Model, true) + payload = util.NormalizeGeminiThinkingBudget(req.Model, payload, true) + payload = util.StripThinkingConfigIfUnsupported(req.Model, payload) + payload = fixGeminiImageAspectRatio(req.Model, payload) + payload = applyPayloadConfig(e.cfg, req.Model, payload) payload, _ = sjson.DeleteBytes(payload, "generationConfig.maxOutputTokens") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseMimeType") payload, _ = sjson.DeleteBytes(payload, "generationConfig.responseJsonSchema") From c6b0e85b540122a1abc6be8ef229f4d4f113bef4 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Tue, 30 Dec 2025 20:44:13 +0800 Subject: [PATCH 7/9] Fixed: #790 fix(gemini): include full text in response output events --- .../openai/responses/gemini_openai-responses_response.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go index 27d2f9b6..5529d52a 100644 --- a/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go +++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_response.go @@ -23,6 +23,7 @@ type geminiToResponsesState struct { MsgIndex int CurrentMsgID string TextBuf strings.Builder + ItemTextBuf strings.Builder // reasoning aggregation ReasoningOpened bool @@ -189,6 +190,8 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, partAdded, _ = sjson.Set(partAdded, "item_id", st.CurrentMsgID) partAdded, _ = sjson.Set(partAdded, "output_index", st.MsgIndex) out = append(out, emitEvent("response.content_part.added", partAdded)) + st.ItemTextBuf.Reset() + st.ItemTextBuf.WriteString(t.String()) } st.TextBuf.WriteString(t.String()) msg := `{"type":"response.output_text.delta","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"delta":"","logprobs":[]}` @@ -250,20 +253,24 @@ func ConvertGeminiResponseToOpenAIResponses(_ context.Context, modelName string, finalizeReasoning() // Close message output if opened if st.MsgOpened { + fullText := st.ItemTextBuf.String() done := `{"type":"response.output_text.done","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"text":"","logprobs":[]}` done, _ = sjson.Set(done, "sequence_number", nextSeq()) done, _ = sjson.Set(done, "item_id", st.CurrentMsgID) done, _ = sjson.Set(done, "output_index", st.MsgIndex) + done, _ = sjson.Set(done, "text", fullText) out = append(out, emitEvent("response.output_text.done", done)) partDone := `{"type":"response.content_part.done","sequence_number":0,"item_id":"","output_index":0,"content_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""}}` partDone, _ = sjson.Set(partDone, "sequence_number", nextSeq()) partDone, _ = sjson.Set(partDone, "item_id", st.CurrentMsgID) partDone, _ = sjson.Set(partDone, "output_index", st.MsgIndex) + partDone, _ = sjson.Set(partDone, "part.text", fullText) out = append(out, emitEvent("response.content_part.done", partDone)) final := `{"type":"response.output_item.done","sequence_number":0,"output_index":0,"item":{"id":"","type":"message","status":"completed","content":[{"type":"output_text","text":""}],"role":"assistant"}}` final, _ = sjson.Set(final, "sequence_number", nextSeq()) final, _ = sjson.Set(final, "output_index", st.MsgIndex) final, _ = sjson.Set(final, "item.id", st.CurrentMsgID) + final, _ = sjson.Set(final, "item.content.0.text", fullText) out = append(out, emitEvent("response.output_item.done", final)) } From 2c01b2ef64138c7b83dbe86c861a1af261e9e2f8 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 30 Dec 2025 22:13:42 +0800 Subject: [PATCH 8/9] feat(watcher): add Gemini models and OAuth model mappings change detection --- internal/watcher/diff/config_diff.go | 8 ++ internal/watcher/diff/model_hash.go | 15 +++ internal/watcher/diff/oauth_excluded.go | 26 +++++ internal/watcher/diff/oauth_model_mappings.go | 98 +++++++++++++++++++ internal/watcher/synthesizer/config.go | 3 + 5 files changed, 150 insertions(+) create mode 100644 internal/watcher/diff/oauth_model_mappings.go diff --git a/internal/watcher/diff/config_diff.go b/internal/watcher/diff/config_diff.go index 1ce60151..c7bfaf07 100644 --- a/internal/watcher/diff/config_diff.go +++ b/internal/watcher/diff/config_diff.go @@ -90,6 +90,11 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string { if !equalStringMap(o.Headers, n.Headers) { changes = append(changes, fmt.Sprintf("gemini[%d].headers: updated", i)) } + oldModels := SummarizeGeminiModels(o.Models) + newModels := SummarizeGeminiModels(n.Models) + if oldModels.hash != newModels.hash { + changes = append(changes, fmt.Sprintf("gemini[%d].models: updated (%d -> %d entries)", i, oldModels.count, newModels.count)) + } oldExcluded := SummarizeExcludedModels(o.ExcludedModels) newExcluded := SummarizeExcludedModels(n.ExcludedModels) if oldExcluded.hash != newExcluded.hash { @@ -194,6 +199,9 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string { if entries, _ := DiffOAuthExcludedModelChanges(oldCfg.OAuthExcludedModels, newCfg.OAuthExcludedModels); len(entries) > 0 { changes = append(changes, entries...) } + if entries, _ := DiffOAuthModelMappingChanges(oldCfg.OAuthModelMappings, newCfg.OAuthModelMappings); len(entries) > 0 { + changes = append(changes, entries...) + } // Remote management (never print the key) if oldCfg.RemoteManagement.AllowRemote != newCfg.RemoteManagement.AllowRemote { diff --git a/internal/watcher/diff/model_hash.go b/internal/watcher/diff/model_hash.go index a224bdca..5779facc 100644 --- a/internal/watcher/diff/model_hash.go +++ b/internal/watcher/diff/model_hash.go @@ -71,6 +71,21 @@ func ComputeCodexModelsHash(models []config.CodexModel) string { return hashJoined(keys) } +// ComputeGeminiModelsHash returns a stable hash for Gemini model aliases. +func ComputeGeminiModelsHash(models []config.GeminiModel) string { + keys := normalizeModelPairs(func(out func(key string)) { + for _, model := range models { + name := strings.TrimSpace(model.Name) + alias := strings.TrimSpace(model.Alias) + if name == "" && alias == "" { + continue + } + out(strings.ToLower(name) + "|" + strings.ToLower(alias)) + } + }) + return hashJoined(keys) +} + // ComputeExcludedModelsHash returns a normalized hash for excluded model lists. func ComputeExcludedModelsHash(excluded []string) string { if len(excluded) == 0 { diff --git a/internal/watcher/diff/oauth_excluded.go b/internal/watcher/diff/oauth_excluded.go index 4f08c4d6..5cac62b3 100644 --- a/internal/watcher/diff/oauth_excluded.go +++ b/internal/watcher/diff/oauth_excluded.go @@ -122,6 +122,11 @@ type VertexModelsSummary struct { count int } +type GeminiModelsSummary struct { + hash string + count int +} + // SummarizeVertexModels hashes vertex-compatible models for change detection. func SummarizeVertexModels(models []config.VertexCompatModel) VertexModelsSummary { if len(models) == 0 { @@ -149,3 +154,24 @@ func SummarizeVertexModels(models []config.VertexCompatModel) VertexModelsSummar count: len(names), } } + +// SummarizeGeminiModels hashes Gemini model aliases for change detection. +func SummarizeGeminiModels(models []config.GeminiModel) GeminiModelsSummary { + if len(models) == 0 { + return GeminiModelsSummary{} + } + keys := normalizeModelPairs(func(out func(key string)) { + for _, model := range models { + name := strings.TrimSpace(model.Name) + alias := strings.TrimSpace(model.Alias) + if name == "" && alias == "" { + continue + } + out(strings.ToLower(name) + "|" + strings.ToLower(alias)) + } + }) + return GeminiModelsSummary{ + hash: hashJoined(keys), + count: len(keys), + } +} diff --git a/internal/watcher/diff/oauth_model_mappings.go b/internal/watcher/diff/oauth_model_mappings.go new file mode 100644 index 00000000..9228dbab --- /dev/null +++ b/internal/watcher/diff/oauth_model_mappings.go @@ -0,0 +1,98 @@ +package diff + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "sort" + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/config" +) + +type OAuthModelMappingsSummary struct { + hash string + count int +} + +// SummarizeOAuthModelMappings summarizes OAuth model mappings per channel. +func SummarizeOAuthModelMappings(entries map[string][]config.ModelNameMapping) map[string]OAuthModelMappingsSummary { + if len(entries) == 0 { + return nil + } + out := make(map[string]OAuthModelMappingsSummary, len(entries)) + for k, v := range entries { + key := strings.ToLower(strings.TrimSpace(k)) + if key == "" { + continue + } + out[key] = summarizeOAuthModelMappingList(v) + } + if len(out) == 0 { + return nil + } + return out +} + +// DiffOAuthModelMappingChanges compares OAuth model mappings maps. +func DiffOAuthModelMappingChanges(oldMap, newMap map[string][]config.ModelNameMapping) ([]string, []string) { + oldSummary := SummarizeOAuthModelMappings(oldMap) + newSummary := SummarizeOAuthModelMappings(newMap) + keys := make(map[string]struct{}, len(oldSummary)+len(newSummary)) + for k := range oldSummary { + keys[k] = struct{}{} + } + for k := range newSummary { + keys[k] = struct{}{} + } + changes := make([]string, 0, len(keys)) + affected := make([]string, 0, len(keys)) + for key := range keys { + oldInfo, okOld := oldSummary[key] + newInfo, okNew := newSummary[key] + switch { + case okOld && !okNew: + changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: removed", key)) + affected = append(affected, key) + case !okOld && okNew: + changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: added (%d entries)", key, newInfo.count)) + affected = append(affected, key) + case okOld && okNew && oldInfo.hash != newInfo.hash: + changes = append(changes, fmt.Sprintf("oauth-model-mappings[%s]: updated (%d -> %d entries)", key, oldInfo.count, newInfo.count)) + affected = append(affected, key) + } + } + sort.Strings(changes) + sort.Strings(affected) + return changes, affected +} + +func summarizeOAuthModelMappingList(list []config.ModelNameMapping) OAuthModelMappingsSummary { + if len(list) == 0 { + return OAuthModelMappingsSummary{} + } + seen := make(map[string]struct{}, len(list)) + normalized := make([]string, 0, len(list)) + for _, mapping := range list { + name := strings.ToLower(strings.TrimSpace(mapping.Name)) + alias := strings.ToLower(strings.TrimSpace(mapping.Alias)) + if name == "" || alias == "" { + continue + } + key := name + "->" + alias + if _, exists := seen[key]; exists { + continue + } + seen[key] = struct{}{} + normalized = append(normalized, key) + } + if len(normalized) == 0 { + return OAuthModelMappingsSummary{} + } + sort.Strings(normalized) + sum := sha256.Sum256([]byte(strings.Join(normalized, "|"))) + return OAuthModelMappingsSummary{ + hash: hex.EncodeToString(sum[:]), + count: len(normalized), + } +} diff --git a/internal/watcher/synthesizer/config.go b/internal/watcher/synthesizer/config.go index e7c845a1..2f2b2690 100644 --- a/internal/watcher/synthesizer/config.go +++ b/internal/watcher/synthesizer/config.go @@ -62,6 +62,9 @@ func (s *ConfigSynthesizer) synthesizeGeminiKeys(ctx *SynthesisContext) []*corea if base != "" { attrs["base_url"] = base } + if hash := diff.ComputeGeminiModelsHash(entry.Models); hash != "" { + attrs["models_hash"] = hash + } addConfigHeadersToAttrs(entry.Headers, attrs) a := &coreauth.Auth{ ID: id, From e0381a6ae03467e71b18404cf9c783f10cd298e2 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Tue, 30 Dec 2025 22:38:09 +0800 Subject: [PATCH 9/9] refactor(watcher): extract model summary functions to dedicated file --- internal/watcher/diff/config_diff.go | 10 ++ internal/watcher/diff/models_summary.go | 121 ++++++++++++++++++++++++ internal/watcher/diff/oauth_excluded.go | 59 ------------ 3 files changed, 131 insertions(+), 59 deletions(-) create mode 100644 internal/watcher/diff/models_summary.go diff --git a/internal/watcher/diff/config_diff.go b/internal/watcher/diff/config_diff.go index c7bfaf07..e24fc893 100644 --- a/internal/watcher/diff/config_diff.go +++ b/internal/watcher/diff/config_diff.go @@ -125,6 +125,11 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string { if !equalStringMap(o.Headers, n.Headers) { changes = append(changes, fmt.Sprintf("claude[%d].headers: updated", i)) } + oldModels := SummarizeClaudeModels(o.Models) + newModels := SummarizeClaudeModels(n.Models) + if oldModels.hash != newModels.hash { + changes = append(changes, fmt.Sprintf("claude[%d].models: updated (%d -> %d entries)", i, oldModels.count, newModels.count)) + } oldExcluded := SummarizeExcludedModels(o.ExcludedModels) newExcluded := SummarizeExcludedModels(n.ExcludedModels) if oldExcluded.hash != newExcluded.hash { @@ -155,6 +160,11 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string { if !equalStringMap(o.Headers, n.Headers) { changes = append(changes, fmt.Sprintf("codex[%d].headers: updated", i)) } + oldModels := SummarizeCodexModels(o.Models) + newModels := SummarizeCodexModels(n.Models) + if oldModels.hash != newModels.hash { + changes = append(changes, fmt.Sprintf("codex[%d].models: updated (%d -> %d entries)", i, oldModels.count, newModels.count)) + } oldExcluded := SummarizeExcludedModels(o.ExcludedModels) newExcluded := SummarizeExcludedModels(n.ExcludedModels) if oldExcluded.hash != newExcluded.hash { diff --git a/internal/watcher/diff/models_summary.go b/internal/watcher/diff/models_summary.go new file mode 100644 index 00000000..9c2aa91a --- /dev/null +++ b/internal/watcher/diff/models_summary.go @@ -0,0 +1,121 @@ +package diff + +import ( + "crypto/sha256" + "encoding/hex" + "sort" + "strings" + + "github.com/router-for-me/CLIProxyAPI/v6/internal/config" +) + +type GeminiModelsSummary struct { + hash string + count int +} + +type ClaudeModelsSummary struct { + hash string + count int +} + +type CodexModelsSummary struct { + hash string + count int +} + +type VertexModelsSummary struct { + hash string + count int +} + +// SummarizeGeminiModels hashes Gemini model aliases for change detection. +func SummarizeGeminiModels(models []config.GeminiModel) GeminiModelsSummary { + if len(models) == 0 { + return GeminiModelsSummary{} + } + keys := normalizeModelPairs(func(out func(key string)) { + for _, model := range models { + name := strings.TrimSpace(model.Name) + alias := strings.TrimSpace(model.Alias) + if name == "" && alias == "" { + continue + } + out(strings.ToLower(name) + "|" + strings.ToLower(alias)) + } + }) + return GeminiModelsSummary{ + hash: hashJoined(keys), + count: len(keys), + } +} + +// SummarizeClaudeModels hashes Claude model aliases for change detection. +func SummarizeClaudeModels(models []config.ClaudeModel) ClaudeModelsSummary { + if len(models) == 0 { + return ClaudeModelsSummary{} + } + keys := normalizeModelPairs(func(out func(key string)) { + for _, model := range models { + name := strings.TrimSpace(model.Name) + alias := strings.TrimSpace(model.Alias) + if name == "" && alias == "" { + continue + } + out(strings.ToLower(name) + "|" + strings.ToLower(alias)) + } + }) + return ClaudeModelsSummary{ + hash: hashJoined(keys), + count: len(keys), + } +} + +// SummarizeCodexModels hashes Codex model aliases for change detection. +func SummarizeCodexModels(models []config.CodexModel) CodexModelsSummary { + if len(models) == 0 { + return CodexModelsSummary{} + } + keys := normalizeModelPairs(func(out func(key string)) { + for _, model := range models { + name := strings.TrimSpace(model.Name) + alias := strings.TrimSpace(model.Alias) + if name == "" && alias == "" { + continue + } + out(strings.ToLower(name) + "|" + strings.ToLower(alias)) + } + }) + return CodexModelsSummary{ + hash: hashJoined(keys), + count: len(keys), + } +} + +// SummarizeVertexModels hashes Vertex-compatible model aliases for change detection. +func SummarizeVertexModels(models []config.VertexCompatModel) VertexModelsSummary { + if len(models) == 0 { + return VertexModelsSummary{} + } + names := make([]string, 0, len(models)) + for _, model := range models { + name := strings.TrimSpace(model.Name) + alias := strings.TrimSpace(model.Alias) + if name == "" && alias == "" { + continue + } + if alias != "" { + name = alias + } + names = append(names, name) + } + if len(names) == 0 { + return VertexModelsSummary{} + } + sort.Strings(names) + sum := sha256.Sum256([]byte(strings.Join(names, "|"))) + return VertexModelsSummary{ + hash: hex.EncodeToString(sum[:]), + count: len(names), + } +} diff --git a/internal/watcher/diff/oauth_excluded.go b/internal/watcher/diff/oauth_excluded.go index 5cac62b3..2039cf48 100644 --- a/internal/watcher/diff/oauth_excluded.go +++ b/internal/watcher/diff/oauth_excluded.go @@ -116,62 +116,3 @@ func SummarizeAmpModelMappings(mappings []config.AmpModelMapping) AmpModelMappin count: len(entries), } } - -type VertexModelsSummary struct { - hash string - count int -} - -type GeminiModelsSummary struct { - hash string - count int -} - -// SummarizeVertexModels hashes vertex-compatible models for change detection. -func SummarizeVertexModels(models []config.VertexCompatModel) VertexModelsSummary { - if len(models) == 0 { - return VertexModelsSummary{} - } - names := make([]string, 0, len(models)) - for _, m := range models { - name := strings.TrimSpace(m.Name) - alias := strings.TrimSpace(m.Alias) - if name == "" && alias == "" { - continue - } - if alias != "" { - name = alias - } - names = append(names, name) - } - if len(names) == 0 { - return VertexModelsSummary{} - } - sort.Strings(names) - sum := sha256.Sum256([]byte(strings.Join(names, "|"))) - return VertexModelsSummary{ - hash: hex.EncodeToString(sum[:]), - count: len(names), - } -} - -// SummarizeGeminiModels hashes Gemini model aliases for change detection. -func SummarizeGeminiModels(models []config.GeminiModel) GeminiModelsSummary { - if len(models) == 0 { - return GeminiModelsSummary{} - } - keys := normalizeModelPairs(func(out func(key string)) { - for _, model := range models { - name := strings.TrimSpace(model.Name) - alias := strings.TrimSpace(model.Alias) - if name == "" && alias == "" { - continue - } - out(strings.ToLower(name) + "|" + strings.ToLower(alias)) - } - }) - return GeminiModelsSummary{ - hash: hashJoined(keys), - count: len(keys), - } -}