diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index 60f8f3a4..cf4a9975 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -32,16 +32,6 @@ const ( var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`) -// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls. -var qwenBeijingLoc = func() *time.Location { - loc, err := time.LoadLocation("Asia/Shanghai") - if err != nil || loc == nil { - log.Warnf("qwen: failed to load Asia/Shanghai timezone: %v, using fixed UTC+8", err) - return time.FixedZone("CST", 8*3600) - } - return loc -}() - // qwenQuotaCodes is a package-level set of error codes that indicate quota exhaustion. var qwenQuotaCodes = map[string]struct{}{ "insufficient_quota": {}, @@ -156,22 +146,13 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int, // Qwen returns 403 for quota errors, 429 for rate limits if (httpCode == http.StatusForbidden || httpCode == http.StatusTooManyRequests) && isQwenQuotaError(body) { errCode = http.StatusTooManyRequests // Map to 429 to trigger quota logic - cooldown := timeUntilNextDay() - retryAfter = &cooldown - helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d), cooling down until tomorrow (%v)", httpCode, errCode, cooldown) + // Do not force an excessively long retry-after (e.g. until tomorrow), otherwise + // the global request-retry scheduler may skip retries due to max-retry-interval. + helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d)", httpCode, errCode) } return errCode, retryAfter } -// timeUntilNextDay returns duration until midnight Beijing time (UTC+8). -// Qwen's daily quota resets at 00:00 Beijing time. -func timeUntilNextDay() time.Duration { - now := time.Now() - nowLocal := now.In(qwenBeijingLoc) - tomorrow := time.Date(nowLocal.Year(), nowLocal.Month(), nowLocal.Day()+1, 0, 0, 0, 0, qwenBeijingLoc) - return tomorrow.Sub(now) -} - // ensureQwenSystemMessage ensures the request has a single system message at the beginning. // It always injects the default system prompt and merges any user-provided system messages // into the injected system message content to satisfy Qwen's strict message ordering rules. diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go index b960eced..d12c0a0b 100644 --- a/internal/runtime/executor/qwen_executor_test.go +++ b/internal/runtime/executor/qwen_executor_test.go @@ -1,6 +1,8 @@ package executor import ( + "context" + "net/http" "testing" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" @@ -152,3 +154,25 @@ func TestEnsureQwenSystemMessage_MergesMultipleSystemMessages(t *testing.T) { t.Fatalf("messages[0].content[2].text = %q, want %q", parts[2].Get("text").String(), "B") } } + +func TestWrapQwenError_InsufficientQuotaDoesNotSetRetryAfter(t *testing.T) { + body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`) + code, retryAfter := wrapQwenError(context.Background(), http.StatusTooManyRequests, body) + if code != http.StatusTooManyRequests { + t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests) + } + if retryAfter != nil { + t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter) + } +} + +func TestWrapQwenError_Maps403QuotaTo429WithoutRetryAfter(t *testing.T) { + body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`) + code, retryAfter := wrapQwenError(context.Background(), http.StatusForbidden, body) + if code != http.StatusTooManyRequests { + t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests) + } + if retryAfter != nil { + t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter) + } +} diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index 357bf693..0d41568c 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -1830,7 +1830,11 @@ func (m *Manager) closestCooldownWait(providers []string, model string, attempt if attempt >= effectiveRetry { continue } - blocked, reason, next := isAuthBlockedForModel(auth, model, now) + checkModel := model + if strings.TrimSpace(model) != "" { + checkModel = m.selectionModelForAuth(auth, model) + } + blocked, reason, next := isAuthBlockedForModel(auth, checkModel, now) if !blocked || next.IsZero() || reason == blockReasonDisabled { continue } diff --git a/sdk/cliproxy/auth/conductor_overrides_test.go b/sdk/cliproxy/auth/conductor_overrides_test.go index 0c72c833..e8dc1393 100644 --- a/sdk/cliproxy/auth/conductor_overrides_test.go +++ b/sdk/cliproxy/auth/conductor_overrides_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/google/uuid" + internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" ) @@ -64,6 +65,49 @@ func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testi } } +func TestManager_ShouldRetryAfterError_UsesOAuthModelAliasForCooldown(t *testing.T) { + m := NewManager(nil, nil, nil) + m.SetRetryConfig(3, 30*time.Second, 0) + m.SetOAuthModelAlias(map[string][]internalconfig.OAuthModelAlias{ + "qwen": { + {Name: "qwen3.6-plus", Alias: "coder-model"}, + }, + }) + + routeModel := "coder-model" + upstreamModel := "qwen3.6-plus" + next := time.Now().Add(5 * time.Second) + + auth := &Auth{ + ID: "auth-1", + Provider: "qwen", + ModelStates: map[string]*ModelState{ + upstreamModel: { + Unavailable: true, + Status: StatusError, + NextRetryAfter: next, + Quota: QuotaState{ + Exceeded: true, + Reason: "quota", + NextRecoverAt: next, + }, + }, + }, + } + if _, errRegister := m.Register(context.Background(), auth); errRegister != nil { + t.Fatalf("register auth: %v", errRegister) + } + + _, _, maxWait := m.retrySettings() + wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 429, Message: "quota"}, 0, []string{"qwen"}, routeModel, maxWait) + if !shouldRetry { + t.Fatalf("expected shouldRetry=true, got false (wait=%v)", wait) + } + if wait <= 0 { + t.Fatalf("expected wait > 0, got %v", wait) + } +} + type credentialRetryLimitExecutor struct { id string