From 8de0885b7dff2e52318856be617650f97277383e Mon Sep 17 00:00:00 2001 From: margbug01 Date: Sun, 1 Mar 2026 00:54:17 +0800 Subject: [PATCH 1/2] fix: support thinking.type="auto" from Amp client for Antigravity Claude models ## Problem When using Antigravity Claude models through CLIProxyAPI, the thinking chain (reasoning content) does not display in the Amp client. ## Root Cause The Amp client sends `thinking: {"type": "auto"}` in its requests, but `ConvertClaudeRequestToAntigravity` only handled `"enabled"` and `"adaptive"` types in its switch statement. The `"auto"` type was silently ignored, resulting in no `thinkingConfig` being set in the translated Gemini request. Without `thinkingConfig`, the Antigravity API returns responses without any thinking content. Additionally, the Antigravity API for Claude models does not support `thinkingBudget: -1` (auto mode sentinel). It requires a concrete positive budget value. The fix uses 128000 as the budget for "auto" mode, which `ApplyThinking` will then normalize to stay within the model's actual limits (e.g., capped to `maxOutputTokens - 1`). ## Changes ### internal/translator/antigravity/claude/antigravity_claude_request.go 1. **Add "auto" case** to the thinking type switch statement. Sets `thinkingBudget: 128000` and `includeThoughts: true`. The budget is subsequently normalized by `ApplyThinking` based on model-specific limits. 2. **Add "auto" to hasThinking check** so that interleaved thinking hints are injected for tool-use scenarios when Amp sends `thinking.type="auto"`. ### internal/registry/model_definitions_static_data.go 3. **Add Thinking configuration** for `claude-sonnet-4-6`, `claude-sonnet-4-5`, and `claude-opus-4-6` in `GetAntigravityModelConfig()` -- these were previously missing, causing `ApplyThinking` to skip thinking config entirely. ## Testing - Deployed to Railway test instance (cpa-thinking-test) - Verified via debug logging that: - Amp sends `thinking: {"type": "auto"}` - CPA now translates this to `thinkingConfig: {thinkingBudget: 128000, includeThoughts: true}` - `ApplyThinking` normalizes the budget to model-specific limits - Antigravity API receives the correct thinkingConfig Amp-Thread-ID: https://ampcode.com/threads/T-019ca511-710d-776d-a07c-4b750f871a93 Co-authored-by: Amp --- internal/registry/model_definitions_static_data.go | 5 +++-- .../antigravity/claude/antigravity_claude_request.go | 8 +++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/internal/registry/model_definitions_static_data.go b/internal/registry/model_definitions_static_data.go index b0e59092..2342f59e 100644 --- a/internal/registry/model_definitions_static_data.go +++ b/internal/registry/model_definitions_static_data.go @@ -958,8 +958,9 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig { "claude-sonnet-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, "claude-opus-4-5-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, "claude-opus-4-6-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, - "claude-sonnet-4-5": {MaxCompletionTokens: 64000}, - "claude-sonnet-4-6": {MaxCompletionTokens: 64000}, + "claude-opus-4-6": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 128000}, + "claude-sonnet-4-5": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, + "claude-sonnet-4-6": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, "claude-sonnet-4-6-thinking": {Thinking: &ThinkingSupport{Min: 1024, Max: 128000, ZeroAllowed: true, DynamicAllowed: true}, MaxCompletionTokens: 64000}, "gpt-oss-120b-medium": {}, "tab_flash_lite_preview": {}, diff --git a/internal/translator/antigravity/claude/antigravity_claude_request.go b/internal/translator/antigravity/claude/antigravity_claude_request.go index b634436d..a9939a3b 100644 --- a/internal/translator/antigravity/claude/antigravity_claude_request.go +++ b/internal/translator/antigravity/claude/antigravity_claude_request.go @@ -400,7 +400,7 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ hasTools := toolDeclCount > 0 thinkingResult := gjson.GetBytes(rawJSON, "thinking") thinkingType := thinkingResult.Get("type").String() - hasThinking := thinkingResult.Exists() && thinkingResult.IsObject() && (thinkingType == "enabled" || thinkingType == "adaptive") + hasThinking := thinkingResult.Exists() && thinkingResult.IsObject() && (thinkingType == "enabled" || thinkingType == "adaptive" || thinkingType == "auto") isClaudeThinking := util.IsClaudeThinkingModel(modelName) if hasTools && hasThinking && isClaudeThinking { @@ -440,6 +440,12 @@ func ConvertClaudeRequestToAntigravity(modelName string, inputRawJSON []byte, _ out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true) } + case "auto": + // Amp sends thinking.type="auto" — use max budget from model config + // Antigravity API for Claude models requires a concrete positive budget, + // not -1. Use a high default that ApplyThinking will cap to model max. + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 128000) + out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.includeThoughts", true) case "adaptive": // Keep adaptive as a high level sentinel; ApplyThinking resolves it // to model-specific max capability. From cc1d8f66293e090119c87364b326d39a1c259514 Mon Sep 17 00:00:00 2001 From: Luis Pater Date: Sun, 1 Mar 2026 02:42:36 +0800 Subject: [PATCH 2/2] Fixed: #1747 feat(auth): add configurable max-retry-credentials for finer control over cross-credential retries --- config.example.yaml | 4 + internal/api/server.go | 4 +- internal/config/config.go | 7 + internal/watcher/config_reload.go | 3 +- internal/watcher/diff/config_diff.go | 3 + internal/watcher/diff/config_diff_test.go | 6 + internal/watcher/watcher_test.go | 61 +++++++++ sdk/cliproxy/auth/conductor.go | 55 +++++--- sdk/cliproxy/auth/conductor_overrides_test.go | 126 +++++++++++++++++- sdk/cliproxy/service.go | 2 +- 10 files changed, 249 insertions(+), 22 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index f99ee74f..7a3265b4 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -75,6 +75,10 @@ passthrough-headers: false # Number of times to retry a request. Retries will occur if the HTTP response code is 403, 408, 500, 502, 503, or 504. request-retry: 3 +# Maximum number of different credentials to try for one failed request. +# Set to 0 to keep legacy behavior (try all available credentials). +max-retry-credentials: 0 + # Maximum wait time in seconds for a cooled-down credential before triggering a retry. max-retry-interval: 30 diff --git a/internal/api/server.go b/internal/api/server.go index 7f44d085..0325ca30 100644 --- a/internal/api/server.go +++ b/internal/api/server.go @@ -257,7 +257,7 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk s.oldConfigYaml, _ = yaml.Marshal(cfg) s.applyAccessConfig(nil, cfg) if authManager != nil { - authManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second) + authManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second, cfg.MaxRetryCredentials) } managementasset.SetCurrentConfig(cfg) auth.SetQuotaCooldownDisabled(cfg.DisableCooling) @@ -915,7 +915,7 @@ func (s *Server) UpdateClients(cfg *config.Config) { } if s.handlers != nil && s.handlers.AuthManager != nil { - s.handlers.AuthManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second) + s.handlers.AuthManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second, cfg.MaxRetryCredentials) } // Update log level dynamically when debug flag changes diff --git a/internal/config/config.go b/internal/config/config.go index ed57b993..d6e2bdc8 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -69,6 +69,9 @@ type Config struct { // RequestRetry defines the retry times when the request failed. RequestRetry int `yaml:"request-retry" json:"request-retry"` + // MaxRetryCredentials defines the maximum number of credentials to try for a failed request. + // Set to 0 or a negative value to keep trying all available credentials (legacy behavior). + MaxRetryCredentials int `yaml:"max-retry-credentials" json:"max-retry-credentials"` // MaxRetryInterval defines the maximum wait time in seconds before retrying a cooled-down credential. MaxRetryInterval int `yaml:"max-retry-interval" json:"max-retry-interval"` @@ -609,6 +612,10 @@ func LoadConfigOptional(configFile string, optional bool) (*Config, error) { cfg.ErrorLogsMaxFiles = 10 } + if cfg.MaxRetryCredentials < 0 { + cfg.MaxRetryCredentials = 0 + } + // Sanitize Gemini API key configuration and migrate legacy entries. cfg.SanitizeGeminiKeys() diff --git a/internal/watcher/config_reload.go b/internal/watcher/config_reload.go index edac3474..1bbf4ef2 100644 --- a/internal/watcher/config_reload.go +++ b/internal/watcher/config_reload.go @@ -127,7 +127,8 @@ func (w *Watcher) reloadConfig() bool { } authDirChanged := oldConfig == nil || oldConfig.AuthDir != newConfig.AuthDir - forceAuthRefresh := oldConfig != nil && (oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix || !reflect.DeepEqual(oldConfig.OAuthModelAlias, newConfig.OAuthModelAlias)) + retryConfigChanged := oldConfig != nil && (oldConfig.RequestRetry != newConfig.RequestRetry || oldConfig.MaxRetryInterval != newConfig.MaxRetryInterval || oldConfig.MaxRetryCredentials != newConfig.MaxRetryCredentials) + forceAuthRefresh := oldConfig != nil && (oldConfig.ForceModelPrefix != newConfig.ForceModelPrefix || !reflect.DeepEqual(oldConfig.OAuthModelAlias, newConfig.OAuthModelAlias) || retryConfigChanged) log.Infof("config successfully reloaded, triggering client reload") w.reloadClients(authDirChanged, affectedOAuthProviders, forceAuthRefresh) diff --git a/internal/watcher/diff/config_diff.go b/internal/watcher/diff/config_diff.go index 6687749e..b7d537da 100644 --- a/internal/watcher/diff/config_diff.go +++ b/internal/watcher/diff/config_diff.go @@ -54,6 +54,9 @@ func BuildConfigChangeDetails(oldCfg, newCfg *config.Config) []string { if oldCfg.RequestRetry != newCfg.RequestRetry { changes = append(changes, fmt.Sprintf("request-retry: %d -> %d", oldCfg.RequestRetry, newCfg.RequestRetry)) } + if oldCfg.MaxRetryCredentials != newCfg.MaxRetryCredentials { + changes = append(changes, fmt.Sprintf("max-retry-credentials: %d -> %d", oldCfg.MaxRetryCredentials, newCfg.MaxRetryCredentials)) + } if oldCfg.MaxRetryInterval != newCfg.MaxRetryInterval { changes = append(changes, fmt.Sprintf("max-retry-interval: %d -> %d", oldCfg.MaxRetryInterval, newCfg.MaxRetryInterval)) } diff --git a/internal/watcher/diff/config_diff_test.go b/internal/watcher/diff/config_diff_test.go index 82486659..f35ceeea 100644 --- a/internal/watcher/diff/config_diff_test.go +++ b/internal/watcher/diff/config_diff_test.go @@ -223,6 +223,7 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) { UsageStatisticsEnabled: false, DisableCooling: false, RequestRetry: 1, + MaxRetryCredentials: 1, MaxRetryInterval: 1, WebsocketAuth: false, QuotaExceeded: config.QuotaExceeded{SwitchProject: false, SwitchPreviewModel: false}, @@ -246,6 +247,7 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) { UsageStatisticsEnabled: true, DisableCooling: true, RequestRetry: 2, + MaxRetryCredentials: 3, MaxRetryInterval: 3, WebsocketAuth: true, QuotaExceeded: config.QuotaExceeded{SwitchProject: true, SwitchPreviewModel: true}, @@ -283,6 +285,7 @@ func TestBuildConfigChangeDetails_FlagsAndKeys(t *testing.T) { expectContains(t, details, "disable-cooling: false -> true") expectContains(t, details, "request-log: false -> true") expectContains(t, details, "request-retry: 1 -> 2") + expectContains(t, details, "max-retry-credentials: 1 -> 3") expectContains(t, details, "max-retry-interval: 1 -> 3") expectContains(t, details, "proxy-url: http://old-proxy -> http://new-proxy") expectContains(t, details, "ws-auth: false -> true") @@ -309,6 +312,7 @@ func TestBuildConfigChangeDetails_AllBranches(t *testing.T) { UsageStatisticsEnabled: false, DisableCooling: false, RequestRetry: 1, + MaxRetryCredentials: 1, MaxRetryInterval: 1, WebsocketAuth: false, QuotaExceeded: config.QuotaExceeded{SwitchProject: false, SwitchPreviewModel: false}, @@ -361,6 +365,7 @@ func TestBuildConfigChangeDetails_AllBranches(t *testing.T) { UsageStatisticsEnabled: true, DisableCooling: true, RequestRetry: 2, + MaxRetryCredentials: 3, MaxRetryInterval: 3, WebsocketAuth: true, QuotaExceeded: config.QuotaExceeded{SwitchProject: true, SwitchPreviewModel: true}, @@ -419,6 +424,7 @@ func TestBuildConfigChangeDetails_AllBranches(t *testing.T) { expectContains(t, changes, "usage-statistics-enabled: false -> true") expectContains(t, changes, "disable-cooling: false -> true") expectContains(t, changes, "request-retry: 1 -> 2") + expectContains(t, changes, "max-retry-credentials: 1 -> 3") expectContains(t, changes, "max-retry-interval: 1 -> 3") expectContains(t, changes, "proxy-url: http://old-proxy -> http://new-proxy") expectContains(t, changes, "ws-auth: false -> true") diff --git a/internal/watcher/watcher_test.go b/internal/watcher/watcher_test.go index 29113f59..a3be5877 100644 --- a/internal/watcher/watcher_test.go +++ b/internal/watcher/watcher_test.go @@ -1239,6 +1239,67 @@ func TestReloadConfigFiltersAffectedOAuthProviders(t *testing.T) { } } +func TestReloadConfigTriggersCallbackForMaxRetryCredentialsChange(t *testing.T) { + tmpDir := t.TempDir() + authDir := filepath.Join(tmpDir, "auth") + if err := os.MkdirAll(authDir, 0o755); err != nil { + t.Fatalf("failed to create auth dir: %v", err) + } + configPath := filepath.Join(tmpDir, "config.yaml") + + oldCfg := &config.Config{ + AuthDir: authDir, + MaxRetryCredentials: 0, + RequestRetry: 1, + MaxRetryInterval: 5, + } + newCfg := &config.Config{ + AuthDir: authDir, + MaxRetryCredentials: 2, + RequestRetry: 1, + MaxRetryInterval: 5, + } + data, errMarshal := yaml.Marshal(newCfg) + if errMarshal != nil { + t.Fatalf("failed to marshal config: %v", errMarshal) + } + if errWrite := os.WriteFile(configPath, data, 0o644); errWrite != nil { + t.Fatalf("failed to write config: %v", errWrite) + } + + callbackCalls := 0 + callbackMaxRetryCredentials := -1 + w := &Watcher{ + configPath: configPath, + authDir: authDir, + lastAuthHashes: make(map[string]string), + reloadCallback: func(cfg *config.Config) { + callbackCalls++ + if cfg != nil { + callbackMaxRetryCredentials = cfg.MaxRetryCredentials + } + }, + } + w.SetConfig(oldCfg) + + if ok := w.reloadConfig(); !ok { + t.Fatal("expected reloadConfig to succeed") + } + + if callbackCalls != 1 { + t.Fatalf("expected reload callback to be called once, got %d", callbackCalls) + } + if callbackMaxRetryCredentials != 2 { + t.Fatalf("expected callback MaxRetryCredentials=2, got %d", callbackMaxRetryCredentials) + } + + w.clientsMutex.RLock() + defer w.clientsMutex.RUnlock() + if w.config == nil || w.config.MaxRetryCredentials != 2 { + t.Fatalf("expected watcher config MaxRetryCredentials=2, got %+v", w.config) + } +} + func TestStartFailsWhenAuthDirMissing(t *testing.T) { tmpDir := t.TempDir() configPath := filepath.Join(tmpDir, "config.yaml") diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index 0294f1b4..3434b7a7 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -138,8 +138,9 @@ type Manager struct { providerOffsets map[string]int // Retry controls request retry behavior. - requestRetry atomic.Int32 - maxRetryInterval atomic.Int64 + requestRetry atomic.Int32 + maxRetryCredentials atomic.Int32 + maxRetryInterval atomic.Int64 // oauthModelAlias stores global OAuth model alias mappings (alias -> upstream name) keyed by channel. oauthModelAlias atomic.Value @@ -384,18 +385,22 @@ func compileAPIKeyModelAliasForModels[T interface { } } -// SetRetryConfig updates retry attempts and cooldown wait interval. -func (m *Manager) SetRetryConfig(retry int, maxRetryInterval time.Duration) { +// SetRetryConfig updates retry attempts, credential retry limit and cooldown wait interval. +func (m *Manager) SetRetryConfig(retry int, maxRetryInterval time.Duration, maxRetryCredentials int) { if m == nil { return } if retry < 0 { retry = 0 } + if maxRetryCredentials < 0 { + maxRetryCredentials = 0 + } if maxRetryInterval < 0 { maxRetryInterval = 0 } m.requestRetry.Store(int32(retry)) + m.maxRetryCredentials.Store(int32(maxRetryCredentials)) m.maxRetryInterval.Store(maxRetryInterval.Nanoseconds()) } @@ -506,11 +511,11 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"} } - _, maxWait := m.retrySettings() + _, maxRetryCredentials, maxWait := m.retrySettings() var lastErr error for attempt := 0; ; attempt++ { - resp, errExec := m.executeMixedOnce(ctx, normalized, req, opts) + resp, errExec := m.executeMixedOnce(ctx, normalized, req, opts, maxRetryCredentials) if errExec == nil { return resp, nil } @@ -537,11 +542,11 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"} } - _, maxWait := m.retrySettings() + _, maxRetryCredentials, maxWait := m.retrySettings() var lastErr error for attempt := 0; ; attempt++ { - resp, errExec := m.executeCountMixedOnce(ctx, normalized, req, opts) + resp, errExec := m.executeCountMixedOnce(ctx, normalized, req, opts, maxRetryCredentials) if errExec == nil { return resp, nil } @@ -568,11 +573,11 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"} } - _, maxWait := m.retrySettings() + _, maxRetryCredentials, maxWait := m.retrySettings() var lastErr error for attempt := 0; ; attempt++ { - result, errStream := m.executeStreamMixedOnce(ctx, normalized, req, opts) + result, errStream := m.executeStreamMixedOnce(ctx, normalized, req, opts, maxRetryCredentials) if errStream == nil { return result, nil } @@ -591,7 +596,7 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli return nil, &Error{Code: "auth_not_found", Message: "no auth available"} } -func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { +func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, maxRetryCredentials int) (cliproxyexecutor.Response, error) { if len(providers) == 0 { return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"} } @@ -600,6 +605,12 @@ func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req tried := make(map[string]struct{}) var lastErr error for { + if maxRetryCredentials > 0 && len(tried) >= maxRetryCredentials { + if lastErr != nil { + return cliproxyexecutor.Response{}, lastErr + } + return cliproxyexecutor.Response{}, &Error{Code: "auth_not_found", Message: "no auth available"} + } auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried) if errPick != nil { if lastErr != nil { @@ -647,7 +658,7 @@ func (m *Manager) executeMixedOnce(ctx context.Context, providers []string, req } } -func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { +func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, maxRetryCredentials int) (cliproxyexecutor.Response, error) { if len(providers) == 0 { return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"} } @@ -656,6 +667,12 @@ func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string, tried := make(map[string]struct{}) var lastErr error for { + if maxRetryCredentials > 0 && len(tried) >= maxRetryCredentials { + if lastErr != nil { + return cliproxyexecutor.Response{}, lastErr + } + return cliproxyexecutor.Response{}, &Error{Code: "auth_not_found", Message: "no auth available"} + } auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried) if errPick != nil { if lastErr != nil { @@ -703,7 +720,7 @@ func (m *Manager) executeCountMixedOnce(ctx context.Context, providers []string, } } -func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (*cliproxyexecutor.StreamResult, error) { +func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string, req cliproxyexecutor.Request, opts cliproxyexecutor.Options, maxRetryCredentials int) (*cliproxyexecutor.StreamResult, error) { if len(providers) == 0 { return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"} } @@ -712,6 +729,12 @@ func (m *Manager) executeStreamMixedOnce(ctx context.Context, providers []string tried := make(map[string]struct{}) var lastErr error for { + if maxRetryCredentials > 0 && len(tried) >= maxRetryCredentials { + if lastErr != nil { + return nil, lastErr + } + return nil, &Error{Code: "auth_not_found", Message: "no auth available"} + } auth, executor, provider, errPick := m.pickNextMixed(ctx, providers, routeModel, opts, tried) if errPick != nil { if lastErr != nil { @@ -1108,11 +1131,11 @@ func (m *Manager) normalizeProviders(providers []string) []string { return result } -func (m *Manager) retrySettings() (int, time.Duration) { +func (m *Manager) retrySettings() (int, int, time.Duration) { if m == nil { - return 0, 0 + return 0, 0, 0 } - return int(m.requestRetry.Load()), time.Duration(m.maxRetryInterval.Load()) + return int(m.requestRetry.Load()), int(m.maxRetryCredentials.Load()), time.Duration(m.maxRetryInterval.Load()) } func (m *Manager) closestCooldownWait(providers []string, model string, attempt int) (time.Duration, bool) { diff --git a/sdk/cliproxy/auth/conductor_overrides_test.go b/sdk/cliproxy/auth/conductor_overrides_test.go index ef39ed82..e5792c68 100644 --- a/sdk/cliproxy/auth/conductor_overrides_test.go +++ b/sdk/cliproxy/auth/conductor_overrides_test.go @@ -2,13 +2,17 @@ package auth import ( "context" + "net/http" + "sync" "testing" "time" + + cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" ) func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testing.T) { m := NewManager(nil, nil, nil) - m.SetRetryConfig(3, 30*time.Second) + m.SetRetryConfig(3, 30*time.Second, 0) model := "test-model" next := time.Now().Add(5 * time.Second) @@ -31,7 +35,7 @@ func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testi t.Fatalf("register auth: %v", errRegister) } - _, maxWait := m.retrySettings() + _, _, maxWait := m.retrySettings() wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 500, Message: "boom"}, 0, []string{"claude"}, model, maxWait) if shouldRetry { t.Fatalf("expected shouldRetry=false for request_retry=0, got true (wait=%v)", wait) @@ -56,6 +60,124 @@ func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testi } } +type credentialRetryLimitExecutor struct { + id string + + mu sync.Mutex + calls int +} + +func (e *credentialRetryLimitExecutor) Identifier() string { + return e.id +} + +func (e *credentialRetryLimitExecutor) Execute(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + e.recordCall() + return cliproxyexecutor.Response{}, &Error{HTTPStatus: 500, Message: "boom"} +} + +func (e *credentialRetryLimitExecutor) ExecuteStream(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (*cliproxyexecutor.StreamResult, error) { + e.recordCall() + return nil, &Error{HTTPStatus: 500, Message: "boom"} +} + +func (e *credentialRetryLimitExecutor) Refresh(_ context.Context, auth *Auth) (*Auth, error) { + return auth, nil +} + +func (e *credentialRetryLimitExecutor) CountTokens(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + e.recordCall() + return cliproxyexecutor.Response{}, &Error{HTTPStatus: 500, Message: "boom"} +} + +func (e *credentialRetryLimitExecutor) HttpRequest(context.Context, *Auth, *http.Request) (*http.Response, error) { + return nil, nil +} + +func (e *credentialRetryLimitExecutor) recordCall() { + e.mu.Lock() + defer e.mu.Unlock() + e.calls++ +} + +func (e *credentialRetryLimitExecutor) Calls() int { + e.mu.Lock() + defer e.mu.Unlock() + return e.calls +} + +func newCredentialRetryLimitTestManager(t *testing.T, maxRetryCredentials int) (*Manager, *credentialRetryLimitExecutor) { + t.Helper() + + m := NewManager(nil, nil, nil) + m.SetRetryConfig(0, 0, maxRetryCredentials) + + executor := &credentialRetryLimitExecutor{id: "claude"} + m.RegisterExecutor(executor) + + auth1 := &Auth{ID: "auth-1", Provider: "claude"} + auth2 := &Auth{ID: "auth-2", Provider: "claude"} + if _, errRegister := m.Register(context.Background(), auth1); errRegister != nil { + t.Fatalf("register auth1: %v", errRegister) + } + if _, errRegister := m.Register(context.Background(), auth2); errRegister != nil { + t.Fatalf("register auth2: %v", errRegister) + } + + return m, executor +} + +func TestManager_MaxRetryCredentials_LimitsCrossCredentialRetries(t *testing.T) { + request := cliproxyexecutor.Request{Model: "test-model"} + testCases := []struct { + name string + invoke func(*Manager) error + }{ + { + name: "execute", + invoke: func(m *Manager) error { + _, errExecute := m.Execute(context.Background(), []string{"claude"}, request, cliproxyexecutor.Options{}) + return errExecute + }, + }, + { + name: "execute_count", + invoke: func(m *Manager) error { + _, errExecute := m.ExecuteCount(context.Background(), []string{"claude"}, request, cliproxyexecutor.Options{}) + return errExecute + }, + }, + { + name: "execute_stream", + invoke: func(m *Manager) error { + _, errExecute := m.ExecuteStream(context.Background(), []string{"claude"}, request, cliproxyexecutor.Options{}) + return errExecute + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + limitedManager, limitedExecutor := newCredentialRetryLimitTestManager(t, 1) + if errInvoke := tc.invoke(limitedManager); errInvoke == nil { + t.Fatalf("expected error for limited retry execution") + } + if calls := limitedExecutor.Calls(); calls != 1 { + t.Fatalf("expected 1 call with max-retry-credentials=1, got %d", calls) + } + + unlimitedManager, unlimitedExecutor := newCredentialRetryLimitTestManager(t, 0) + if errInvoke := tc.invoke(unlimitedManager); errInvoke == nil { + t.Fatalf("expected error for unlimited retry execution") + } + if calls := unlimitedExecutor.Calls(); calls != 2 { + t.Fatalf("expected 2 calls with max-retry-credentials=0, got %d", calls) + } + }) + } +} + func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) { prev := quotaCooldownDisabled.Load() quotaCooldownDisabled.Store(false) diff --git a/sdk/cliproxy/service.go b/sdk/cliproxy/service.go index 1f9f4d6f..4be83816 100644 --- a/sdk/cliproxy/service.go +++ b/sdk/cliproxy/service.go @@ -336,7 +336,7 @@ func (s *Service) applyRetryConfig(cfg *config.Config) { return } maxInterval := time.Duration(cfg.MaxRetryInterval) * time.Second - s.coreManager.SetRetryConfig(cfg.RequestRetry, maxInterval) + s.coreManager.SetRetryConfig(cfg.RequestRetry, maxInterval, cfg.MaxRetryCredentials) } func openAICompatInfoFromAuth(a *coreauth.Auth) (providerKey string, compatName string, ok bool) {