diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index d8eec537..cf4a9975 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -25,23 +25,13 @@ import ( ) const ( - qwenUserAgent = "QwenCode/0.13.2 (darwin; arm64)" + qwenUserAgent = "QwenCode/0.14.2 (darwin; arm64)" qwenRateLimitPerMin = 60 // 60 requests per minute per credential qwenRateLimitWindow = time.Minute // sliding window duration ) var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`) -// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls. -var qwenBeijingLoc = func() *time.Location { - loc, err := time.LoadLocation("Asia/Shanghai") - if err != nil || loc == nil { - log.Warnf("qwen: failed to load Asia/Shanghai timezone: %v, using fixed UTC+8", err) - return time.FixedZone("CST", 8*3600) - } - return loc -}() - // qwenQuotaCodes is a package-level set of error codes that indicate quota exhaustion. var qwenQuotaCodes = map[string]struct{}{ "insufficient_quota": {}, @@ -156,22 +146,13 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int, // Qwen returns 403 for quota errors, 429 for rate limits if (httpCode == http.StatusForbidden || httpCode == http.StatusTooManyRequests) && isQwenQuotaError(body) { errCode = http.StatusTooManyRequests // Map to 429 to trigger quota logic - cooldown := timeUntilNextDay() - retryAfter = &cooldown - helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d), cooling down until tomorrow (%v)", httpCode, errCode, cooldown) + // Do not force an excessively long retry-after (e.g. until tomorrow), otherwise + // the global request-retry scheduler may skip retries due to max-retry-interval. + helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d)", httpCode, errCode) } return errCode, retryAfter } -// timeUntilNextDay returns duration until midnight Beijing time (UTC+8). -// Qwen's daily quota resets at 00:00 Beijing time. -func timeUntilNextDay() time.Duration { - now := time.Now() - nowLocal := now.In(qwenBeijingLoc) - tomorrow := time.Date(nowLocal.Year(), nowLocal.Month(), nowLocal.Day()+1, 0, 0, 0, 0, qwenBeijingLoc) - return tomorrow.Sub(now) -} - // ensureQwenSystemMessage ensures the request has a single system message at the beginning. // It always injects the default system prompt and merges any user-provided system messages // into the injected system message content to satisfy Qwen's strict message ordering rules. @@ -626,19 +607,23 @@ func (e *QwenExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*c } func applyQwenHeaders(r *http.Request, token string, stream bool) { - r.Header.Set("Content-Type", "application/json") - r.Header.Set("Authorization", "Bearer "+token) - r.Header.Set("User-Agent", qwenUserAgent) - r.Header["X-DashScope-UserAgent"] = []string{qwenUserAgent} r.Header.Set("X-Stainless-Runtime-Version", "v22.17.0") + r.Header.Set("User-Agent", qwenUserAgent) r.Header.Set("X-Stainless-Lang", "js") - r.Header.Set("X-Stainless-Arch", "arm64") - r.Header.Set("X-Stainless-Package-Version", "5.11.0") - r.Header["X-DashScope-CacheControl"] = []string{"enable"} - r.Header.Set("X-Stainless-Retry-Count", "0") + r.Header.Set("Accept-Language", "*") + r.Header.Set("X-Dashscope-Cachecontrol", "enable") r.Header.Set("X-Stainless-Os", "MacOS") - r.Header["X-DashScope-AuthType"] = []string{"qwen-oauth"} + r.Header.Set("X-Dashscope-Authtype", "qwen-oauth") + r.Header.Set("X-Stainless-Arch", "arm64") r.Header.Set("X-Stainless-Runtime", "node") + r.Header.Set("X-Stainless-Retry-Count", "0") + r.Header.Set("Accept-Encoding", "gzip, deflate") + r.Header.Set("Authorization", "Bearer "+token) + r.Header.Set("X-Stainless-Package-Version", "5.11.0") + r.Header.Set("Sec-Fetch-Mode", "cors") + r.Header.Set("Content-Type", "application/json") + r.Header.Set("Connection", "keep-alive") + r.Header.Set("X-Dashscope-Useragent", qwenUserAgent) if stream { r.Header.Set("Accept", "text/event-stream") diff --git a/internal/runtime/executor/qwen_executor_test.go b/internal/runtime/executor/qwen_executor_test.go index b960eced..d12c0a0b 100644 --- a/internal/runtime/executor/qwen_executor_test.go +++ b/internal/runtime/executor/qwen_executor_test.go @@ -1,6 +1,8 @@ package executor import ( + "context" + "net/http" "testing" "github.com/router-for-me/CLIProxyAPI/v6/internal/thinking" @@ -152,3 +154,25 @@ func TestEnsureQwenSystemMessage_MergesMultipleSystemMessages(t *testing.T) { t.Fatalf("messages[0].content[2].text = %q, want %q", parts[2].Get("text").String(), "B") } } + +func TestWrapQwenError_InsufficientQuotaDoesNotSetRetryAfter(t *testing.T) { + body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`) + code, retryAfter := wrapQwenError(context.Background(), http.StatusTooManyRequests, body) + if code != http.StatusTooManyRequests { + t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests) + } + if retryAfter != nil { + t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter) + } +} + +func TestWrapQwenError_Maps403QuotaTo429WithoutRetryAfter(t *testing.T) { + body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`) + code, retryAfter := wrapQwenError(context.Background(), http.StatusForbidden, body) + if code != http.StatusTooManyRequests { + t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests) + } + if retryAfter != nil { + t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter) + } +} diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index 1cf8c865..1f53242e 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -1830,7 +1830,11 @@ func (m *Manager) closestCooldownWait(providers []string, model string, attempt if attempt >= effectiveRetry { continue } - blocked, reason, next := isAuthBlockedForModel(auth, model, now) + checkModel := model + if strings.TrimSpace(model) != "" { + checkModel = m.selectionModelForAuth(auth, model) + } + blocked, reason, next := isAuthBlockedForModel(auth, checkModel, now) if !blocked || next.IsZero() || reason == blockReasonDisabled { continue } diff --git a/sdk/cliproxy/auth/conductor_overrides_test.go b/sdk/cliproxy/auth/conductor_overrides_test.go index 0c72c833..e8dc1393 100644 --- a/sdk/cliproxy/auth/conductor_overrides_test.go +++ b/sdk/cliproxy/auth/conductor_overrides_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/google/uuid" + internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config" "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor" ) @@ -64,6 +65,49 @@ func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testi } } +func TestManager_ShouldRetryAfterError_UsesOAuthModelAliasForCooldown(t *testing.T) { + m := NewManager(nil, nil, nil) + m.SetRetryConfig(3, 30*time.Second, 0) + m.SetOAuthModelAlias(map[string][]internalconfig.OAuthModelAlias{ + "qwen": { + {Name: "qwen3.6-plus", Alias: "coder-model"}, + }, + }) + + routeModel := "coder-model" + upstreamModel := "qwen3.6-plus" + next := time.Now().Add(5 * time.Second) + + auth := &Auth{ + ID: "auth-1", + Provider: "qwen", + ModelStates: map[string]*ModelState{ + upstreamModel: { + Unavailable: true, + Status: StatusError, + NextRetryAfter: next, + Quota: QuotaState{ + Exceeded: true, + Reason: "quota", + NextRecoverAt: next, + }, + }, + }, + } + if _, errRegister := m.Register(context.Background(), auth); errRegister != nil { + t.Fatalf("register auth: %v", errRegister) + } + + _, _, maxWait := m.retrySettings() + wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 429, Message: "quota"}, 0, []string{"qwen"}, routeModel, maxWait) + if !shouldRetry { + t.Fatalf("expected shouldRetry=true, got false (wait=%v)", wait) + } + if wait <= 0 { + t.Fatalf("expected wait > 0, got %v", wait) + } +} + type credentialRetryLimitExecutor struct { id string