Merge branch 'router-for-me:main' into main

This commit is contained in:
Luis Pater
2026-04-09 03:49:09 +08:00
committed by GitHub
4 changed files with 90 additions and 33 deletions

View File

@@ -25,23 +25,13 @@ import (
)
const (
qwenUserAgent = "QwenCode/0.13.2 (darwin; arm64)"
qwenUserAgent = "QwenCode/0.14.2 (darwin; arm64)"
qwenRateLimitPerMin = 60 // 60 requests per minute per credential
qwenRateLimitWindow = time.Minute // sliding window duration
)
var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
var qwenBeijingLoc = func() *time.Location {
loc, err := time.LoadLocation("Asia/Shanghai")
if err != nil || loc == nil {
log.Warnf("qwen: failed to load Asia/Shanghai timezone: %v, using fixed UTC+8", err)
return time.FixedZone("CST", 8*3600)
}
return loc
}()
// qwenQuotaCodes is a package-level set of error codes that indicate quota exhaustion.
var qwenQuotaCodes = map[string]struct{}{
"insufficient_quota": {},
@@ -156,22 +146,13 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int,
// Qwen returns 403 for quota errors, 429 for rate limits
if (httpCode == http.StatusForbidden || httpCode == http.StatusTooManyRequests) && isQwenQuotaError(body) {
errCode = http.StatusTooManyRequests // Map to 429 to trigger quota logic
cooldown := timeUntilNextDay()
retryAfter = &cooldown
helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d), cooling down until tomorrow (%v)", httpCode, errCode, cooldown)
// Do not force an excessively long retry-after (e.g. until tomorrow), otherwise
// the global request-retry scheduler may skip retries due to max-retry-interval.
helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d)", httpCode, errCode)
}
return errCode, retryAfter
}
// timeUntilNextDay returns duration until midnight Beijing time (UTC+8).
// Qwen's daily quota resets at 00:00 Beijing time.
func timeUntilNextDay() time.Duration {
now := time.Now()
nowLocal := now.In(qwenBeijingLoc)
tomorrow := time.Date(nowLocal.Year(), nowLocal.Month(), nowLocal.Day()+1, 0, 0, 0, 0, qwenBeijingLoc)
return tomorrow.Sub(now)
}
// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
// It always injects the default system prompt and merges any user-provided system messages
// into the injected system message content to satisfy Qwen's strict message ordering rules.
@@ -626,19 +607,23 @@ func (e *QwenExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (*c
}
func applyQwenHeaders(r *http.Request, token string, stream bool) {
r.Header.Set("Content-Type", "application/json")
r.Header.Set("Authorization", "Bearer "+token)
r.Header.Set("User-Agent", qwenUserAgent)
r.Header["X-DashScope-UserAgent"] = []string{qwenUserAgent}
r.Header.Set("X-Stainless-Runtime-Version", "v22.17.0")
r.Header.Set("User-Agent", qwenUserAgent)
r.Header.Set("X-Stainless-Lang", "js")
r.Header.Set("X-Stainless-Arch", "arm64")
r.Header.Set("X-Stainless-Package-Version", "5.11.0")
r.Header["X-DashScope-CacheControl"] = []string{"enable"}
r.Header.Set("X-Stainless-Retry-Count", "0")
r.Header.Set("Accept-Language", "*")
r.Header.Set("X-Dashscope-Cachecontrol", "enable")
r.Header.Set("X-Stainless-Os", "MacOS")
r.Header["X-DashScope-AuthType"] = []string{"qwen-oauth"}
r.Header.Set("X-Dashscope-Authtype", "qwen-oauth")
r.Header.Set("X-Stainless-Arch", "arm64")
r.Header.Set("X-Stainless-Runtime", "node")
r.Header.Set("X-Stainless-Retry-Count", "0")
r.Header.Set("Accept-Encoding", "gzip, deflate")
r.Header.Set("Authorization", "Bearer "+token)
r.Header.Set("X-Stainless-Package-Version", "5.11.0")
r.Header.Set("Sec-Fetch-Mode", "cors")
r.Header.Set("Content-Type", "application/json")
r.Header.Set("Connection", "keep-alive")
r.Header.Set("X-Dashscope-Useragent", qwenUserAgent)
if stream {
r.Header.Set("Accept", "text/event-stream")

View File

@@ -1,6 +1,8 @@
package executor
import (
"context"
"net/http"
"testing"
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
@@ -152,3 +154,25 @@ func TestEnsureQwenSystemMessage_MergesMultipleSystemMessages(t *testing.T) {
t.Fatalf("messages[0].content[2].text = %q, want %q", parts[2].Get("text").String(), "B")
}
}
func TestWrapQwenError_InsufficientQuotaDoesNotSetRetryAfter(t *testing.T) {
body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`)
code, retryAfter := wrapQwenError(context.Background(), http.StatusTooManyRequests, body)
if code != http.StatusTooManyRequests {
t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests)
}
if retryAfter != nil {
t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter)
}
}
func TestWrapQwenError_Maps403QuotaTo429WithoutRetryAfter(t *testing.T) {
body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`)
code, retryAfter := wrapQwenError(context.Background(), http.StatusForbidden, body)
if code != http.StatusTooManyRequests {
t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests)
}
if retryAfter != nil {
t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter)
}
}

View File

@@ -1830,7 +1830,11 @@ func (m *Manager) closestCooldownWait(providers []string, model string, attempt
if attempt >= effectiveRetry {
continue
}
blocked, reason, next := isAuthBlockedForModel(auth, model, now)
checkModel := model
if strings.TrimSpace(model) != "" {
checkModel = m.selectionModelForAuth(auth, model)
}
blocked, reason, next := isAuthBlockedForModel(auth, checkModel, now)
if !blocked || next.IsZero() || reason == blockReasonDisabled {
continue
}

View File

@@ -8,6 +8,7 @@ import (
"time"
"github.com/google/uuid"
internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
)
@@ -64,6 +65,49 @@ func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testi
}
}
func TestManager_ShouldRetryAfterError_UsesOAuthModelAliasForCooldown(t *testing.T) {
m := NewManager(nil, nil, nil)
m.SetRetryConfig(3, 30*time.Second, 0)
m.SetOAuthModelAlias(map[string][]internalconfig.OAuthModelAlias{
"qwen": {
{Name: "qwen3.6-plus", Alias: "coder-model"},
},
})
routeModel := "coder-model"
upstreamModel := "qwen3.6-plus"
next := time.Now().Add(5 * time.Second)
auth := &Auth{
ID: "auth-1",
Provider: "qwen",
ModelStates: map[string]*ModelState{
upstreamModel: {
Unavailable: true,
Status: StatusError,
NextRetryAfter: next,
Quota: QuotaState{
Exceeded: true,
Reason: "quota",
NextRecoverAt: next,
},
},
},
}
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
t.Fatalf("register auth: %v", errRegister)
}
_, _, maxWait := m.retrySettings()
wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 429, Message: "quota"}, 0, []string{"qwen"}, routeModel, maxWait)
if !shouldRetry {
t.Fatalf("expected shouldRetry=true, got false (wait=%v)", wait)
}
if wait <= 0 {
t.Fatalf("expected wait > 0, got %v", wait)
}
}
type credentialRetryLimitExecutor struct {
id string