mirror of
https://github.com/router-for-me/CLIProxyAPIPlus.git
synced 2026-04-20 22:51:45 +00:00
fix(auth): handle OAuth model alias in retry logic and refine Qwen quota handling
This commit is contained in:
@@ -32,16 +32,6 @@ const (
|
|||||||
|
|
||||||
var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
|
var qwenDefaultSystemMessage = []byte(`{"role":"system","content":[{"type":"text","text":"","cache_control":{"type":"ephemeral"}}]}`)
|
||||||
|
|
||||||
// qwenBeijingLoc caches the Beijing timezone to avoid repeated LoadLocation syscalls.
|
|
||||||
var qwenBeijingLoc = func() *time.Location {
|
|
||||||
loc, err := time.LoadLocation("Asia/Shanghai")
|
|
||||||
if err != nil || loc == nil {
|
|
||||||
log.Warnf("qwen: failed to load Asia/Shanghai timezone: %v, using fixed UTC+8", err)
|
|
||||||
return time.FixedZone("CST", 8*3600)
|
|
||||||
}
|
|
||||||
return loc
|
|
||||||
}()
|
|
||||||
|
|
||||||
// qwenQuotaCodes is a package-level set of error codes that indicate quota exhaustion.
|
// qwenQuotaCodes is a package-level set of error codes that indicate quota exhaustion.
|
||||||
var qwenQuotaCodes = map[string]struct{}{
|
var qwenQuotaCodes = map[string]struct{}{
|
||||||
"insufficient_quota": {},
|
"insufficient_quota": {},
|
||||||
@@ -156,22 +146,13 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int,
|
|||||||
// Qwen returns 403 for quota errors, 429 for rate limits
|
// Qwen returns 403 for quota errors, 429 for rate limits
|
||||||
if (httpCode == http.StatusForbidden || httpCode == http.StatusTooManyRequests) && isQwenQuotaError(body) {
|
if (httpCode == http.StatusForbidden || httpCode == http.StatusTooManyRequests) && isQwenQuotaError(body) {
|
||||||
errCode = http.StatusTooManyRequests // Map to 429 to trigger quota logic
|
errCode = http.StatusTooManyRequests // Map to 429 to trigger quota logic
|
||||||
cooldown := timeUntilNextDay()
|
// Do not force an excessively long retry-after (e.g. until tomorrow), otherwise
|
||||||
retryAfter = &cooldown
|
// the global request-retry scheduler may skip retries due to max-retry-interval.
|
||||||
helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d), cooling down until tomorrow (%v)", httpCode, errCode, cooldown)
|
helps.LogWithRequestID(ctx).Warnf("qwen quota exceeded (http %d -> %d)", httpCode, errCode)
|
||||||
}
|
}
|
||||||
return errCode, retryAfter
|
return errCode, retryAfter
|
||||||
}
|
}
|
||||||
|
|
||||||
// timeUntilNextDay returns duration until midnight Beijing time (UTC+8).
|
|
||||||
// Qwen's daily quota resets at 00:00 Beijing time.
|
|
||||||
func timeUntilNextDay() time.Duration {
|
|
||||||
now := time.Now()
|
|
||||||
nowLocal := now.In(qwenBeijingLoc)
|
|
||||||
tomorrow := time.Date(nowLocal.Year(), nowLocal.Month(), nowLocal.Day()+1, 0, 0, 0, 0, qwenBeijingLoc)
|
|
||||||
return tomorrow.Sub(now)
|
|
||||||
}
|
|
||||||
|
|
||||||
// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
|
// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
|
||||||
// It always injects the default system prompt and merges any user-provided system messages
|
// It always injects the default system prompt and merges any user-provided system messages
|
||||||
// into the injected system message content to satisfy Qwen's strict message ordering rules.
|
// into the injected system message content to satisfy Qwen's strict message ordering rules.
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
package executor
|
package executor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/thinking"
|
||||||
@@ -152,3 +154,25 @@ func TestEnsureQwenSystemMessage_MergesMultipleSystemMessages(t *testing.T) {
|
|||||||
t.Fatalf("messages[0].content[2].text = %q, want %q", parts[2].Get("text").String(), "B")
|
t.Fatalf("messages[0].content[2].text = %q, want %q", parts[2].Get("text").String(), "B")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestWrapQwenError_InsufficientQuotaDoesNotSetRetryAfter(t *testing.T) {
|
||||||
|
body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`)
|
||||||
|
code, retryAfter := wrapQwenError(context.Background(), http.StatusTooManyRequests, body)
|
||||||
|
if code != http.StatusTooManyRequests {
|
||||||
|
t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests)
|
||||||
|
}
|
||||||
|
if retryAfter != nil {
|
||||||
|
t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestWrapQwenError_Maps403QuotaTo429WithoutRetryAfter(t *testing.T) {
|
||||||
|
body := []byte(`{"error":{"code":"insufficient_quota","message":"You exceeded your current quota","type":"insufficient_quota"}}`)
|
||||||
|
code, retryAfter := wrapQwenError(context.Background(), http.StatusForbidden, body)
|
||||||
|
if code != http.StatusTooManyRequests {
|
||||||
|
t.Fatalf("wrapQwenError status = %d, want %d", code, http.StatusTooManyRequests)
|
||||||
|
}
|
||||||
|
if retryAfter != nil {
|
||||||
|
t.Fatalf("wrapQwenError retryAfter = %v, want nil", *retryAfter)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1830,7 +1830,11 @@ func (m *Manager) closestCooldownWait(providers []string, model string, attempt
|
|||||||
if attempt >= effectiveRetry {
|
if attempt >= effectiveRetry {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
blocked, reason, next := isAuthBlockedForModel(auth, model, now)
|
checkModel := model
|
||||||
|
if strings.TrimSpace(model) != "" {
|
||||||
|
checkModel = m.selectionModelForAuth(auth, model)
|
||||||
|
}
|
||||||
|
blocked, reason, next := isAuthBlockedForModel(auth, checkModel, now)
|
||||||
if !blocked || next.IsZero() || reason == blockReasonDisabled {
|
if !blocked || next.IsZero() || reason == blockReasonDisabled {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
|
internalconfig "github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||||
)
|
)
|
||||||
@@ -64,6 +65,49 @@ func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestManager_ShouldRetryAfterError_UsesOAuthModelAliasForCooldown(t *testing.T) {
|
||||||
|
m := NewManager(nil, nil, nil)
|
||||||
|
m.SetRetryConfig(3, 30*time.Second, 0)
|
||||||
|
m.SetOAuthModelAlias(map[string][]internalconfig.OAuthModelAlias{
|
||||||
|
"qwen": {
|
||||||
|
{Name: "qwen3.6-plus", Alias: "coder-model"},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
routeModel := "coder-model"
|
||||||
|
upstreamModel := "qwen3.6-plus"
|
||||||
|
next := time.Now().Add(5 * time.Second)
|
||||||
|
|
||||||
|
auth := &Auth{
|
||||||
|
ID: "auth-1",
|
||||||
|
Provider: "qwen",
|
||||||
|
ModelStates: map[string]*ModelState{
|
||||||
|
upstreamModel: {
|
||||||
|
Unavailable: true,
|
||||||
|
Status: StatusError,
|
||||||
|
NextRetryAfter: next,
|
||||||
|
Quota: QuotaState{
|
||||||
|
Exceeded: true,
|
||||||
|
Reason: "quota",
|
||||||
|
NextRecoverAt: next,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
|
||||||
|
t.Fatalf("register auth: %v", errRegister)
|
||||||
|
}
|
||||||
|
|
||||||
|
_, _, maxWait := m.retrySettings()
|
||||||
|
wait, shouldRetry := m.shouldRetryAfterError(&Error{HTTPStatus: 429, Message: "quota"}, 0, []string{"qwen"}, routeModel, maxWait)
|
||||||
|
if !shouldRetry {
|
||||||
|
t.Fatalf("expected shouldRetry=true, got false (wait=%v)", wait)
|
||||||
|
}
|
||||||
|
if wait <= 0 {
|
||||||
|
t.Fatalf("expected wait > 0, got %v", wait)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type credentialRetryLimitExecutor struct {
|
type credentialRetryLimitExecutor struct {
|
||||||
id string
|
id string
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user