diff --git a/internal/api/handlers/management/config_lists.go b/internal/api/handlers/management/config_lists.go index 083d4e31..fbaad956 100644 --- a/internal/api/handlers/management/config_lists.go +++ b/internal/api/handlers/management/config_lists.go @@ -214,19 +214,46 @@ func (h *Handler) PatchGeminiKey(c *gin.Context) { func (h *Handler) DeleteGeminiKey(c *gin.Context) { if val := strings.TrimSpace(c.Query("api-key")); val != "" { - out := make([]config.GeminiKey, 0, len(h.cfg.GeminiKey)) - for _, v := range h.cfg.GeminiKey { - if v.APIKey != val { + if baseRaw, okBase := c.GetQuery("base-url"); okBase { + base := strings.TrimSpace(baseRaw) + out := make([]config.GeminiKey, 0, len(h.cfg.GeminiKey)) + for _, v := range h.cfg.GeminiKey { + if strings.TrimSpace(v.APIKey) == val && strings.TrimSpace(v.BaseURL) == base { + continue + } out = append(out, v) } + if len(out) != len(h.cfg.GeminiKey) { + h.cfg.GeminiKey = out + h.cfg.SanitizeGeminiKeys() + h.persist(c) + } else { + c.JSON(404, gin.H{"error": "item not found"}) + } + return } - if len(out) != len(h.cfg.GeminiKey) { - h.cfg.GeminiKey = out - h.cfg.SanitizeGeminiKeys() - h.persist(c) - } else { + + matchIndex := -1 + matchCount := 0 + for i := range h.cfg.GeminiKey { + if strings.TrimSpace(h.cfg.GeminiKey[i].APIKey) == val { + matchCount++ + if matchIndex == -1 { + matchIndex = i + } + } + } + if matchCount == 0 { c.JSON(404, gin.H{"error": "item not found"}) + return } + if matchCount > 1 { + c.JSON(400, gin.H{"error": "multiple items match api-key; base-url is required"}) + return + } + h.cfg.GeminiKey = append(h.cfg.GeminiKey[:matchIndex], h.cfg.GeminiKey[matchIndex+1:]...) + h.cfg.SanitizeGeminiKeys() + h.persist(c) return } if idxStr := c.Query("index"); idxStr != "" { @@ -335,14 +362,39 @@ func (h *Handler) PatchClaudeKey(c *gin.Context) { } func (h *Handler) DeleteClaudeKey(c *gin.Context) { - if val := c.Query("api-key"); val != "" { - out := make([]config.ClaudeKey, 0, len(h.cfg.ClaudeKey)) - for _, v := range h.cfg.ClaudeKey { - if v.APIKey != val { + if val := strings.TrimSpace(c.Query("api-key")); val != "" { + if baseRaw, okBase := c.GetQuery("base-url"); okBase { + base := strings.TrimSpace(baseRaw) + out := make([]config.ClaudeKey, 0, len(h.cfg.ClaudeKey)) + for _, v := range h.cfg.ClaudeKey { + if strings.TrimSpace(v.APIKey) == val && strings.TrimSpace(v.BaseURL) == base { + continue + } out = append(out, v) } + h.cfg.ClaudeKey = out + h.cfg.SanitizeClaudeKeys() + h.persist(c) + return + } + + matchIndex := -1 + matchCount := 0 + for i := range h.cfg.ClaudeKey { + if strings.TrimSpace(h.cfg.ClaudeKey[i].APIKey) == val { + matchCount++ + if matchIndex == -1 { + matchIndex = i + } + } + } + if matchCount > 1 { + c.JSON(400, gin.H{"error": "multiple items match api-key; base-url is required"}) + return + } + if matchIndex != -1 { + h.cfg.ClaudeKey = append(h.cfg.ClaudeKey[:matchIndex], h.cfg.ClaudeKey[matchIndex+1:]...) } - h.cfg.ClaudeKey = out h.cfg.SanitizeClaudeKeys() h.persist(c) return @@ -601,13 +653,38 @@ func (h *Handler) PatchVertexCompatKey(c *gin.Context) { func (h *Handler) DeleteVertexCompatKey(c *gin.Context) { if val := strings.TrimSpace(c.Query("api-key")); val != "" { - out := make([]config.VertexCompatKey, 0, len(h.cfg.VertexCompatAPIKey)) - for _, v := range h.cfg.VertexCompatAPIKey { - if v.APIKey != val { + if baseRaw, okBase := c.GetQuery("base-url"); okBase { + base := strings.TrimSpace(baseRaw) + out := make([]config.VertexCompatKey, 0, len(h.cfg.VertexCompatAPIKey)) + for _, v := range h.cfg.VertexCompatAPIKey { + if strings.TrimSpace(v.APIKey) == val && strings.TrimSpace(v.BaseURL) == base { + continue + } out = append(out, v) } + h.cfg.VertexCompatAPIKey = out + h.cfg.SanitizeVertexCompatKeys() + h.persist(c) + return + } + + matchIndex := -1 + matchCount := 0 + for i := range h.cfg.VertexCompatAPIKey { + if strings.TrimSpace(h.cfg.VertexCompatAPIKey[i].APIKey) == val { + matchCount++ + if matchIndex == -1 { + matchIndex = i + } + } + } + if matchCount > 1 { + c.JSON(400, gin.H{"error": "multiple items match api-key; base-url is required"}) + return + } + if matchIndex != -1 { + h.cfg.VertexCompatAPIKey = append(h.cfg.VertexCompatAPIKey[:matchIndex], h.cfg.VertexCompatAPIKey[matchIndex+1:]...) } - h.cfg.VertexCompatAPIKey = out h.cfg.SanitizeVertexCompatKeys() h.persist(c) return @@ -915,14 +992,39 @@ func (h *Handler) PatchCodexKey(c *gin.Context) { } func (h *Handler) DeleteCodexKey(c *gin.Context) { - if val := c.Query("api-key"); val != "" { - out := make([]config.CodexKey, 0, len(h.cfg.CodexKey)) - for _, v := range h.cfg.CodexKey { - if v.APIKey != val { + if val := strings.TrimSpace(c.Query("api-key")); val != "" { + if baseRaw, okBase := c.GetQuery("base-url"); okBase { + base := strings.TrimSpace(baseRaw) + out := make([]config.CodexKey, 0, len(h.cfg.CodexKey)) + for _, v := range h.cfg.CodexKey { + if strings.TrimSpace(v.APIKey) == val && strings.TrimSpace(v.BaseURL) == base { + continue + } out = append(out, v) } + h.cfg.CodexKey = out + h.cfg.SanitizeCodexKeys() + h.persist(c) + return + } + + matchIndex := -1 + matchCount := 0 + for i := range h.cfg.CodexKey { + if strings.TrimSpace(h.cfg.CodexKey[i].APIKey) == val { + matchCount++ + if matchIndex == -1 { + matchIndex = i + } + } + } + if matchCount > 1 { + c.JSON(400, gin.H{"error": "multiple items match api-key; base-url is required"}) + return + } + if matchIndex != -1 { + h.cfg.CodexKey = append(h.cfg.CodexKey[:matchIndex], h.cfg.CodexKey[matchIndex+1:]...) } - h.cfg.CodexKey = out h.cfg.SanitizeCodexKeys() h.persist(c) return diff --git a/internal/api/handlers/management/config_lists_delete_keys_test.go b/internal/api/handlers/management/config_lists_delete_keys_test.go new file mode 100644 index 00000000..aaa43910 --- /dev/null +++ b/internal/api/handlers/management/config_lists_delete_keys_test.go @@ -0,0 +1,172 @@ +package management + +import ( + "net/http" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "github.com/gin-gonic/gin" + "github.com/router-for-me/CLIProxyAPI/v6/internal/config" +) + +func writeTestConfigFile(t *testing.T) string { + t.Helper() + + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + if errWrite := os.WriteFile(path, []byte("{}\n"), 0o600); errWrite != nil { + t.Fatalf("failed to write test config: %v", errWrite) + } + return path +} + +func TestDeleteGeminiKey_RequiresBaseURLWhenAPIKeyDuplicated(t *testing.T) { + t.Parallel() + gin.SetMode(gin.TestMode) + + h := &Handler{ + cfg: &config.Config{ + GeminiKey: []config.GeminiKey{ + {APIKey: "shared-key", BaseURL: "https://a.example.com"}, + {APIKey: "shared-key", BaseURL: "https://b.example.com"}, + }, + }, + configFilePath: writeTestConfigFile(t), + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + c.Request = httptest.NewRequest(http.MethodDelete, "/v0/management/gemini-api-key?api-key=shared-key", nil) + + h.DeleteGeminiKey(c) + + if rec.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d; body=%s", rec.Code, http.StatusBadRequest, rec.Body.String()) + } + if got := len(h.cfg.GeminiKey); got != 2 { + t.Fatalf("gemini keys len = %d, want 2", got) + } +} + +func TestDeleteGeminiKey_DeletesOnlyMatchingBaseURL(t *testing.T) { + t.Parallel() + gin.SetMode(gin.TestMode) + + h := &Handler{ + cfg: &config.Config{ + GeminiKey: []config.GeminiKey{ + {APIKey: "shared-key", BaseURL: "https://a.example.com"}, + {APIKey: "shared-key", BaseURL: "https://b.example.com"}, + }, + }, + configFilePath: writeTestConfigFile(t), + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + c.Request = httptest.NewRequest(http.MethodDelete, "/v0/management/gemini-api-key?api-key=shared-key&base-url=https://a.example.com", nil) + + h.DeleteGeminiKey(c) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", rec.Code, http.StatusOK, rec.Body.String()) + } + if got := len(h.cfg.GeminiKey); got != 1 { + t.Fatalf("gemini keys len = %d, want 1", got) + } + if got := h.cfg.GeminiKey[0].BaseURL; got != "https://b.example.com" { + t.Fatalf("remaining base-url = %q, want %q", got, "https://b.example.com") + } +} + +func TestDeleteClaudeKey_DeletesEmptyBaseURLWhenExplicitlyProvided(t *testing.T) { + t.Parallel() + gin.SetMode(gin.TestMode) + + h := &Handler{ + cfg: &config.Config{ + ClaudeKey: []config.ClaudeKey{ + {APIKey: "shared-key", BaseURL: ""}, + {APIKey: "shared-key", BaseURL: "https://claude.example.com"}, + }, + }, + configFilePath: writeTestConfigFile(t), + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + c.Request = httptest.NewRequest(http.MethodDelete, "/v0/management/claude-api-key?api-key=shared-key&base-url=", nil) + + h.DeleteClaudeKey(c) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", rec.Code, http.StatusOK, rec.Body.String()) + } + if got := len(h.cfg.ClaudeKey); got != 1 { + t.Fatalf("claude keys len = %d, want 1", got) + } + if got := h.cfg.ClaudeKey[0].BaseURL; got != "https://claude.example.com" { + t.Fatalf("remaining base-url = %q, want %q", got, "https://claude.example.com") + } +} + +func TestDeleteVertexCompatKey_DeletesOnlyMatchingBaseURL(t *testing.T) { + t.Parallel() + gin.SetMode(gin.TestMode) + + h := &Handler{ + cfg: &config.Config{ + VertexCompatAPIKey: []config.VertexCompatKey{ + {APIKey: "shared-key", BaseURL: "https://a.example.com"}, + {APIKey: "shared-key", BaseURL: "https://b.example.com"}, + }, + }, + configFilePath: writeTestConfigFile(t), + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + c.Request = httptest.NewRequest(http.MethodDelete, "/v0/management/vertex-api-key?api-key=shared-key&base-url=https://b.example.com", nil) + + h.DeleteVertexCompatKey(c) + + if rec.Code != http.StatusOK { + t.Fatalf("status = %d, want %d; body=%s", rec.Code, http.StatusOK, rec.Body.String()) + } + if got := len(h.cfg.VertexCompatAPIKey); got != 1 { + t.Fatalf("vertex keys len = %d, want 1", got) + } + if got := h.cfg.VertexCompatAPIKey[0].BaseURL; got != "https://a.example.com" { + t.Fatalf("remaining base-url = %q, want %q", got, "https://a.example.com") + } +} + +func TestDeleteCodexKey_RequiresBaseURLWhenAPIKeyDuplicated(t *testing.T) { + t.Parallel() + gin.SetMode(gin.TestMode) + + h := &Handler{ + cfg: &config.Config{ + CodexKey: []config.CodexKey{ + {APIKey: "shared-key", BaseURL: "https://a.example.com"}, + {APIKey: "shared-key", BaseURL: "https://b.example.com"}, + }, + }, + configFilePath: writeTestConfigFile(t), + } + + rec := httptest.NewRecorder() + c, _ := gin.CreateTestContext(rec) + c.Request = httptest.NewRequest(http.MethodDelete, "/v0/management/codex-api-key?api-key=shared-key", nil) + + h.DeleteCodexKey(c) + + if rec.Code != http.StatusBadRequest { + t.Fatalf("status = %d, want %d; body=%s", rec.Code, http.StatusBadRequest, rec.Body.String()) + } + if got := len(h.cfg.CodexKey); got != 2 { + t.Fatalf("codex keys len = %d, want 2", got) + } +} diff --git a/internal/config/config.go b/internal/config/config.go index 15847f57..f25b0aa2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -865,6 +865,7 @@ func (cfg *Config) SanitizeClaudeKeys() { } // SanitizeGeminiKeys deduplicates and normalizes Gemini credentials. +// It uses API key + base URL as the uniqueness key. func (cfg *Config) SanitizeGeminiKeys() { if cfg == nil { return @@ -883,10 +884,11 @@ func (cfg *Config) SanitizeGeminiKeys() { entry.ProxyURL = strings.TrimSpace(entry.ProxyURL) entry.Headers = NormalizeHeaders(entry.Headers) entry.ExcludedModels = NormalizeExcludedModels(entry.ExcludedModels) - if _, exists := seen[entry.APIKey]; exists { + uniqueKey := entry.APIKey + "|" + entry.BaseURL + if _, exists := seen[uniqueKey]; exists { continue } - seen[entry.APIKey] = struct{}{} + seen[uniqueKey] = struct{}{} out = append(out, entry) } cfg.GeminiKey = out diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index d8eec537..5c8ff039 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -25,23 +25,13 @@ import ( ) const ( - qwenUserAgent = "QwenCode/0.13.2 (darwin; arm64)" + qwenUserAgent = "QwenCode/0.14.2 (darwin; arm64)" qwenRateLimitPerMin = 60 // 60 requests per minute per credential qwenRateLimitWindow = time.Minute // sliding window duration ) var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`) -// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls. -var qwenBeijingLoc = func() *time.Location { - loc, err := time.LoadLocation("Asia/Shanghai") - if err != nil || loc == nil { - log.Warnf("qwen: failed to load Asia/Shanghai timezone: %v, using fixed UTC+8", err) - return time.FixedZone("CST", 8*3600) - } - return loc -}() - // qwenQuotaCodes is a package-level set of error codes that indicate quota exhaustion. var qwenQuotaCodes = map[string]struct{}{ "insufficient_quota": {}, @@ -156,22 +146,13 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int, // Qwen returns 403 for quota errors, 429 for rate limits if (httpCode == http.StatusForbidden || httpCode == http.StatusTooManyRequests) && isQwenQuotaError(body) { errCode = http.StatusTooManyRequests // Map to 429 to trigger quota logic - cooldown := timeUntilNextDay() - retryAfter = &cooldown - helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d), cooling down until tomorrow (%v)", httpCode, errCode, cooldown) + // Do not force an excessively long retry-after (e.g. until tomorrow), otherwise + // the global request-retry scheduler may skip retries due to max-retry-interval. + helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d)", httpCode, errCode) } return errCode, retryAfter } -// timeUntilNextDay returns duration until midnight Beijing time (UTC+8). -// Qwen's daily quota resets at 00:00 Beijing time. -func timeUntilNextDay() time.Duration { - now := time.Now() - nowLocal := now.In(qwenBeijingLoc) - tomorrow := time.Date(nowLocal.Year(), nowLocal.Month(), nowLocal.Day()+1, 0, 0, 0, 0, qwenBeijingLoc) - return tomorrow.Sub(now) -} - // ensureQwenSystemMessage ensures the request has a single system message at the beginning. // It always injects the default system prompt and merges any user-provided system messages // into the injected system message content to satisfy Qwen's strict message ordering rules. @@ -626,19 +607,23 @@ func (e *QwenExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*c } func applyQwenHeaders(r *http.Request, token string, stream bool) { - r.Header.Set("Content-Type", "application/json") - r.Header.Set("Authorization", "Bearer "+token) - r.Header.Set("User-Agent", qwenUserAgent) - r.Header["X-DashScope-UserAgent"] = []string{qwenUserAgent} r.Header.Set("X-Stainless-Runtime-Version", "v22.17.0") + r.Header.Set("User-Agent", qwenUserAgent) r.Header.Set("X-Stainless-Lang", "js") - r.Header.Set("X-Stainless-Arch", "arm64") - r.Header.Set("X-Stainless-Package-Version", "5.11.0") - r.Header["X-DashScope-CacheControl"] = []string{"enable"} - r.Header.Set("X-Stainless-Retry-Count", "0") + r.Header.Set("Accept-Language", "*") + r.Header.Set("X-Dashscope-Cachecontrol", "enable") r.Header.Set("X-Stainless-Os", "MacOS") - r.Header["X-DashScope-AuthType"] = []string{"qwen-oauth"} + r.Header.Set("X-Dashscope-Authtype", "qwen-oauth") + r.Header.Set("X-Stainless-Arch", "arm64") r.Header.Set("X-Stainless-Runtime", "node") + r.Header.Set("X-Stainless-Retry-Count", "0") + r.Header.Set("Accept-Encoding", "gzip, deflate") + r.Header.Set("Authorization", "Bearer "+token) + r.Header.Set("X-Stainless-Package-Version", "5.11.0") + r.Header.Set("Sec-Fetch-Mode", "cors") + r.Header.Set("Content-Type", "application/json") + r.Header.Set("Connection", "keep-alive") + r.Header.Set("X-Dashscope-Useragent", qwenUserAgent) if stream { r.Header.Set("Accept", "text/event-stream") @@ -647,6 +632,26 @@ func applyQwenHeaders(r *http.Request, token string, stream bool) { r.Header.Set("Accept", "application/json") } +func normaliseQwenBaseURL(resourceURL string) string { + raw := strings.TrimSpace(resourceURL) + if raw == "" { + return "" + } + + normalized := raw + lower := strings.ToLower(normalized) + if !strings.HasPrefix(lower, "http://") && !strings.HasPrefix(lower, "https://") { + normalized = "https://" + normalized + } + + normalized = strings.TrimRight(normalized, "/") + if !strings.HasSuffix(strings.ToLower(normalized), "/v1") { + normalized += "/v1" + } + + return normalized +} + func qwenCreds(a *cliproxyauth.Auth) (token, baseURL string) { if a == nil { return "", "" @@ -664,7 +669,7 @@ func qwenCreds(a *cliproxyauth.Auth) (token, baseURL string) { token = v } if v, ok := a.Metadata["resource_url"].(string); ok { - baseURL = fmt.Sprintf("https://%s/v1", v) + baseURL = normaliseQwenBaseURL(v) } } return diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go index b960eced..cf9ed21f 100644 --- a/internal/runtime/executor/qwen_executor_test.go +++ b/internal/runtime/executor/qwen_executor_test.go @@ -1,9 +1,12 @@ package executor import ( + "context" + "net/http" "testing" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" + cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth" "github.com/tidwall/gjson" ) @@ -152,3 +155,57 @@ func TestEnsureQwenSystemMessage_MergesMultipleSystemMessages(t *testing.T) { t.Fatalf("messages[0].content[2].text = %q, want %q", parts[2].Get("text").String(), "B") } } + +func TestWrapQwenError_InsufficientQuotaDoesNotSetRetryAfter(t *testing.T) { + body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`) + code, retryAfter := wrapQwenError(context.Background(), http.StatusTooManyRequests, body) + if code != http.StatusTooManyRequests { + t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests) + } + if retryAfter != nil { + t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter) + } +} + +func TestWrapQwenError_Maps403QuotaTo429WithoutRetryAfter(t *testing.T) { + body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`) + code, retryAfter := wrapQwenError(context.Background(), http.StatusForbidden, body) + if code != http.StatusTooManyRequests { + t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests) + } + if retryAfter != nil { + t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter) + } +} + +func TestQwenCreds_NormalizesResourceURL(t *testing.T) { + tests := []struct { + name string + resourceURL string + wantBaseURL string + }{ + {"host only", "portal.qwen.ai", "https://portal.qwen.ai/v1"}, + {"scheme no v1", "https://portal.qwen.ai", "https://portal.qwen.ai/v1"}, + {"scheme with v1", "https://portal.qwen.ai/v1", "https://portal.qwen.ai/v1"}, + {"scheme with v1 slash", "https://portal.qwen.ai/v1/", "https://portal.qwen.ai/v1"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + auth := &cliproxyauth.Auth{ + Metadata: map[string]any{ + "access_token": "test-token", + "resource_url": tt.resourceURL, + }, + } + + token, baseURL := qwenCreds(auth) + if token != "test-token" { + t.Fatalf("qwenCreds token = %q, want %q", token, "test-token") + } + if baseURL != tt.wantBaseURL { + t.Fatalf("qwenCreds baseURL = %q, want %q", baseURL, tt.wantBaseURL) + } + }) + } +} diff --git a/sdk/auth/qwen.go b/sdk/auth/qwen.go index 310d4987..d891021a 100644 --- a/sdk/auth/qwen.go +++ b/sdk/auth/qwen.go @@ -27,7 +27,7 @@ func (a *QwenAuthenticator) Provider() string { } func (a *QwenAuthenticator) RefreshLead() *time.Duration { - return new(3 * time.Hour) + return new(20 * time.Minute) } func (a *QwenAuthenticator) Login(ctx context.Context, cfg *config.Config, opts *LoginOptions) (*coreauth.Auth, error) { diff --git a/sdk/auth/qwen_refresh_lead_test.go b/sdk/auth/qwen_refresh_lead_test.go new file mode 100644 index 00000000..56f41fc0 --- /dev/null +++ b/sdk/auth/qwen_refresh_lead_test.go @@ -0,0 +1,19 @@ +package auth + +import ( + "testing" + "time" +) + +func TestQwenAuthenticator_RefreshLeadIsSane(t *testing.T) { + lead := NewQwenAuthenticator().RefreshLead() + if lead == nil { + t.Fatal("RefreshLead() = nil, want non-nil") + } + if *lead <= 0 { + t.Fatalf("RefreshLead() = %s, want > 0", *lead) + } + if *lead > 30*time.Minute { + t.Fatalf("RefreshLead() = %s, want <= %s", *lead, 30*time.Minute) + } +} diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index 357bf693..25cc7221 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -1830,7 +1830,11 @@ func (m *Manager) closestCooldownWait(providers []string, model string, attempt if attempt >= effectiveRetry { continue } - blocked, reason, next := isAuthBlockedForModel(auth, model, now) + checkModel := model + if strings.TrimSpace(model) != "" { + checkModel = m.selectionModelForAuth(auth, model) + } + blocked, reason, next := isAuthBlockedForModel(auth, checkModel, now) if !blocked || next.IsZero() || reason == blockReasonDisabled { continue } @@ -1846,6 +1850,50 @@ func (m *Manager) closestCooldownWait(providers []string, model string, attempt return minWait, found } +func (m *Manager) retryAllowed(attempt int, providers []string) bool { + if m == nil || attempt < 0 || len(providers) == 0 { + return false + } + defaultRetry := int(m.requestRetry.Load()) + if defaultRetry < 0 { + defaultRetry = 0 + } + providerSet := make(map[string]struct{}, len(providers)) + for i := range providers { + key := strings.TrimSpace(strings.ToLower(providers[i])) + if key == "" { + continue + } + providerSet[key] = struct{}{} + } + if len(providerSet) == 0 { + return false + } + + m.mu.RLock() + defer m.mu.RUnlock() + for _, auth := range m.auths { + if auth == nil { + continue + } + providerKey := strings.TrimSpace(strings.ToLower(auth.Provider)) + if _, ok := providerSet[providerKey]; !ok { + continue + } + effectiveRetry := defaultRetry + if override, ok := auth.RequestRetryOverride(); ok { + effectiveRetry = override + } + if effectiveRetry < 0 { + effectiveRetry = 0 + } + if attempt < effectiveRetry { + return true + } + } + return false +} + func (m *Manager) shouldRetryAfterError(err error, attempt int, providers []string, model string, maxWait time.Duration) (time.Duration, bool) { if err == nil { return 0, false @@ -1853,17 +1901,31 @@ func (m *Manager) shouldRetryAfterError(err error, attempt int, providers []stri if maxWait <= 0 { return 0, false } - if status := statusCodeFromError(err); status == http.StatusOK { + status := statusCodeFromError(err) + if status == http.StatusOK { return 0, false } if isRequestInvalidError(err) { return 0, false } wait, found := m.closestCooldownWait(providers, model, attempt) - if !found || wait > maxWait { + if found { + if wait > maxWait { + return 0, false + } + return wait, true + } + if status != http.StatusTooManyRequests { return 0, false } - return wait, true + if !m.retryAllowed(attempt, providers) { + return 0, false + } + retryAfter := retryAfterFromError(err) + if retryAfter == nil || *retryAfter <= 0 || *retryAfter > maxWait { + return 0, false + } + return *retryAfter, true } func waitForCooldown(ctx context.Context, wait time.Duration) error { diff --git a/sdk/cliproxy/auth/conductor_overrides_test.go b/sdk/cliproxy/auth/conductor_overrides_test.go index 0c72c833..1b74aab1 100644 --- a/sdk/cliproxy/auth/conductor_overrides_test.go +++ b/sdk/cliproxy/auth/conductor_overrides_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/google/uuid" + internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" ) @@ -64,6 +65,49 @@ func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testi } } +func TestManager_ShouldRetryAfterError_UsesOAuthModelAliasForCooldown(t *testing.T) { + m := NewManager(nil, nil, nil) + m.SetRetryConfig(3, 30*time.Second, 0) + m.SetOAuthModelAlias(map[string][]internalconfig.OAuthModelAlias{ + "qwen": { + {Name: "qwen3.6-plus", Alias: "coder-model"}, + }, + }) + + routeModel := "coder-model" + upstreamModel := "qwen3.6-plus" + next := time.Now().Add(5 * time.Second) + + auth := &Auth{ + ID: "auth-1", + Provider: "qwen", + ModelStates: map[string]*ModelState{ + upstreamModel: { + Unavailable: true, + Status: StatusError, + NextRetryAfter: next, + Quota: QuotaState{ + Exceeded: true, + Reason: "quota", + NextRecoverAt: next, + }, + }, + }, + } + if _, errRegister := m.Register(context.Background(), auth); errRegister != nil { + t.Fatalf("register auth: %v", errRegister) + } + + _, _, maxWait := m.retrySettings() + wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 429, Message: "quota"}, 0, []string{"qwen"}, routeModel, maxWait) + if !shouldRetry { + t.Fatalf("expected shouldRetry=true, got false (wait=%v)", wait) + } + if wait <= 0 { + t.Fatalf("expected wait > 0, got %v", wait) + } +} + type credentialRetryLimitExecutor struct { id string @@ -646,6 +690,57 @@ func TestManager_Execute_DisableCooling_DoesNotBlackoutAfter429RetryAfter(t *tes } } +func TestManager_Execute_DisableCooling_RetriesAfter429RetryAfter(t *testing.T) { + prev := quotaCooldownDisabled.Load() + quotaCooldownDisabled.Store(false) + t.Cleanup(func() { quotaCooldownDisabled.Store(prev) }) + + m := NewManager(nil, nil, nil) + m.SetRetryConfig(3, 100*time.Millisecond, 0) + + executor := &authFallbackExecutor{ + id: "claude", + executeErrors: map[string]error{ + "auth-429-retryafter-exec": &retryAfterStatusError{ + status: http.StatusTooManyRequests, + message: "quota exhausted", + retryAfter: 5 * time.Millisecond, + }, + }, + } + m.RegisterExecutor(executor) + + auth := &Auth{ + ID: "auth-429-retryafter-exec", + Provider: "claude", + Metadata: map[string]any{ + "disable_cooling": true, + }, + } + if _, errRegister := m.Register(context.Background(), auth); errRegister != nil { + t.Fatalf("register auth: %v", errRegister) + } + + model := "test-model-429-retryafter-exec" + reg := registry.GetGlobalRegistry() + reg.RegisterClient(auth.ID, "claude", []*registry.ModelInfo{{ID: model}}) + t.Cleanup(func() { reg.UnregisterClient(auth.ID) }) + + req := cliproxyexecutor.Request{Model: model} + _, errExecute := m.Execute(context.Background(), []string{"claude"}, req, cliproxyexecutor.Options{}) + if errExecute == nil { + t.Fatal("expected execute error") + } + if statusCodeFromError(errExecute) != http.StatusTooManyRequests { + t.Fatalf("execute status = %d, want %d", statusCodeFromError(errExecute), http.StatusTooManyRequests) + } + + calls := executor.ExecuteCalls() + if len(calls) != 4 { + t.Fatalf("execute calls = %d, want 4 (initial + 3 retries)", len(calls)) + } +} + func TestManager_MarkResult_RequestScopedNotFoundDoesNotCooldownAuth(t *testing.T) { m := NewManager(nil, nil, nil) diff --git a/sdk/cliproxy/auth/scheduler.go b/sdk/cliproxy/auth/scheduler.go index 1482bae6..b5a39282 100644 --- a/sdk/cliproxy/auth/scheduler.go +++ b/sdk/cliproxy/auth/scheduler.go @@ -97,6 +97,72 @@ type childBucket struct { // cooldownQueue is the blocked auth collection ordered by next retry time during rebuilds. type cooldownQueue []*scheduledAuth +type readyViewCursorState struct { + cursor int + parentCursor int + childCursors map[string]int +} + +type readyBucketCursorState struct { + all readyViewCursorState + ws readyViewCursorState +} + +func snapshotReadyViewCursors(view readyView) readyViewCursorState { + state := readyViewCursorState{ + cursor: view.cursor, + parentCursor: view.parentCursor, + } + if len(view.children) == 0 { + return state + } + state.childCursors = make(map[string]int, len(view.children)) + for parent, child := range view.children { + if child == nil { + continue + } + state.childCursors[parent] = child.cursor + } + return state +} + +func restoreReadyViewCursors(view *readyView, state readyViewCursorState) { + if view == nil { + return + } + if len(view.flat) > 0 { + view.cursor = normalizeCursor(state.cursor, len(view.flat)) + } + if len(view.parentOrder) == 0 || len(view.children) == 0 { + return + } + view.parentCursor = normalizeCursor(state.parentCursor, len(view.parentOrder)) + if len(state.childCursors) == 0 { + return + } + for parent, child := range view.children { + if child == nil || len(child.items) == 0 { + continue + } + cursor, ok := state.childCursors[parent] + if !ok { + continue + } + child.cursor = normalizeCursor(cursor, len(child.items)) + } +} + +func normalizeCursor(cursor, size int) int { + if size <= 0 || cursor <= 0 { + return 0 + } + cursor = cursor % size + if cursor < 0 { + cursor += size + } + return cursor +} + // newAuthScheduler constructs an empty scheduler configured for the supplied selector strategy. func newAuthScheduler(selector Selector) *authScheduler { return &authScheduler{ @@ -824,6 +890,17 @@ func (m *modelScheduler) availabilitySummaryLocked(predicate func(*scheduledAuth // rebuildIndexesLocked reconstructs ready and blocked views from the current entry map. func (m *modelScheduler) rebuildIndexesLocked() { + cursorStates := make(map[int]readyBucketCursorState, len(m.readyByPriority)) + for priority, bucket := range m.readyByPriority { + if bucket == nil { + continue + } + cursorStates[priority] = readyBucketCursorState{ + all: snapshotReadyViewCursors(bucket.all), + ws: snapshotReadyViewCursors(bucket.ws), + } + } + m.readyByPriority = make(map[int]*readyBucket) m.priorityOrder = m.priorityOrder[:0] m.blocked = m.blocked[:0] @@ -844,7 +921,12 @@ func (m *modelScheduler) rebuildIndexesLocked() { sort.Slice(entries, func(i, j int) bool { return entries[i].auth.ID < entries[j].auth.ID }) - m.readyByPriority[priority] = buildReadyBucket(entries) + bucket := buildReadyBucket(entries) + if cursorState, ok := cursorStates[priority]; ok && bucket != nil { + restoreReadyViewCursors(&bucket.all, cursorState.all) + restoreReadyViewCursors(&bucket.ws, cursorState.ws) + } + m.readyByPriority[priority] = bucket m.priorityOrder = append(m.priorityOrder, priority) } sort.Slice(m.priorityOrder, func(i, j int) bool {