fix(auth): honor disable-cooling and enrich no-auth errors

This commit is contained in:
zilianpn
2026-04-07 00:24:08 +08:00
parent ada8e2905e
commit 0ea768011b
6 changed files with 436 additions and 35 deletions

View File

@@ -87,6 +87,9 @@ max-retry-credentials: 0
# Maximum wait time in seconds for a cooled-down credential before triggering a retry.
max-retry-interval: 30
# When true, disable auth/model cooldown scheduling globally (prevents blackout windows after failure states).
disable-cooling: false
# Quota exceeded behavior
quota-exceeded:
switch-project: true # Whether to automatically switch to another project when a quota is exceeded

View File

@@ -6,6 +6,7 @@ package handlers
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"net/http"
"strings"
@@ -492,6 +493,7 @@ func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType
opts.Metadata = reqMeta
resp, err := h.AuthManager.Execute(ctx, providers, req, opts)
if err != nil {
err = enrichAuthSelectionError(err, providers, normalizedModel)
status := http.StatusInternalServerError
if se, ok := err.(interface{ StatusCode() int }); ok && se != nil {
if code := se.StatusCode(); code > 0 {
@@ -538,6 +540,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
opts.Metadata = reqMeta
resp, err := h.AuthManager.ExecuteCount(ctx, providers, req, opts)
if err != nil {
err = enrichAuthSelectionError(err, providers, normalizedModel)
status := http.StatusInternalServerError
if se, ok := err.(interface{ StatusCode() int }); ok && se != nil {
if code := se.StatusCode(); code > 0 {
@@ -588,6 +591,7 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
opts.Metadata = reqMeta
streamResult, err := h.AuthManager.ExecuteStream(ctx, providers, req, opts)
if err != nil {
err = enrichAuthSelectionError(err, providers, normalizedModel)
errChan := make(chan *interfaces.ErrorMessage, 1)
status := http.StatusInternalServerError
if se, ok := err.(interface{ StatusCode() int }); ok && se != nil {
@@ -697,7 +701,7 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
chunks = retryResult.Chunks
continue outer
}
streamErr = retryErr
streamErr = enrichAuthSelectionError(retryErr, providers, normalizedModel)
}
}
@@ -840,6 +844,54 @@ func replaceHeader(dst http.Header, src http.Header) {
}
}
func enrichAuthSelectionError(err error, providers []string, model string) error {
if err == nil {
return nil
}
var authErr *coreauth.Error
if !errors.As(err, &authErr) || authErr == nil {
return err
}
code := strings.TrimSpace(authErr.Code)
if code != "auth_not_found" && code != "auth_unavailable" {
return err
}
providerText := strings.Join(providers, ",")
if providerText == "" {
providerText = "unknown"
}
modelText := strings.TrimSpace(model)
if modelText == "" {
modelText = "unknown"
}
baseMessage := strings.TrimSpace(authErr.Message)
if baseMessage == "" {
baseMessage = "no auth available"
}
detail := fmt.Sprintf("%s (providers=%s, model=%s)", baseMessage, providerText, modelText)
// Clarify the most common alias confusion between Anthropic route names and internal provider keys.
if strings.Contains(","+providerText+",", ",claude,") {
detail += "; check Claude auth/key session and cooldown state via /v0/management/auth-files"
}
status := authErr.HTTPStatus
if status <= 0 {
status = http.StatusServiceUnavailable
}
return &coreauth.Error{
Code: authErr.Code,
Message: detail,
Retryable: authErr.Retryable,
HTTPStatus: status,
}
}
// WriteErrorResponse writes an error message to the response writer using the HTTP status embedded in the message.
func (h *BaseAPIHandler) WriteErrorResponse(c *gin.Context, msg *interfaces.ErrorMessage) {
status := http.StatusInternalServerError

View File

@@ -5,10 +5,12 @@ import (
"net/http"
"net/http/httptest"
"reflect"
"strings"
"testing"
"github.com/gin-gonic/gin"
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
sdkconfig "github.com/router-for-me/CLIProxyAPI/v6/sdk/config"
)
@@ -66,3 +68,46 @@ func TestWriteErrorResponse_AddonHeadersEnabled(t *testing.T) {
t.Fatalf("X-Request-Id = %#v, want %#v", got, []string{"new-1", "new-2"})
}
}
func TestEnrichAuthSelectionError_DefaultsTo503WithContext(t *testing.T) {
in := &coreauth.Error{Code: "auth_not_found", Message: "no auth available"}
out := enrichAuthSelectionError(in, []string{"claude"}, "claude-sonnet-4-6")
var got *coreauth.Error
if !errors.As(out, &got) || got == nil {
t.Fatalf("expected coreauth.Error, got %T", out)
}
if got.StatusCode() != http.StatusServiceUnavailable {
t.Fatalf("status = %d, want %d", got.StatusCode(), http.StatusServiceUnavailable)
}
if !strings.Contains(got.Message, "providers=claude") {
t.Fatalf("message missing provider context: %q", got.Message)
}
if !strings.Contains(got.Message, "model=claude-sonnet-4-6") {
t.Fatalf("message missing model context: %q", got.Message)
}
if !strings.Contains(got.Message, "/v0/management/auth-files") {
t.Fatalf("message missing management hint: %q", got.Message)
}
}
func TestEnrichAuthSelectionError_PreservesExplicitStatus(t *testing.T) {
in := &coreauth.Error{Code: "auth_unavailable", Message: "no auth available", HTTPStatus: http.StatusTooManyRequests}
out := enrichAuthSelectionError(in, []string{"gemini"}, "gemini-2.5-pro")
var got *coreauth.Error
if !errors.As(out, &got) || got == nil {
t.Fatalf("expected coreauth.Error, got %T", out)
}
if got.StatusCode() != http.StatusTooManyRequests {
t.Fatalf("status = %d, want %d", got.StatusCode(), http.StatusTooManyRequests)
}
}
func TestEnrichAuthSelectionError_IgnoresOtherErrors(t *testing.T) {
in := errors.New("boom")
out := enrichAuthSelectionError(in, []string{"claude"}, "claude-sonnet-4-6")
if out != in {
t.Fatalf("expected original error to be returned unchanged")
}
}

View File

@@ -2,10 +2,13 @@ package handlers
import (
"context"
"errors"
"net/http"
"strings"
"sync"
"testing"
"github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
coreauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
coreexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -463,6 +466,76 @@ func TestExecuteStreamWithAuthManager_DoesNotRetryAfterFirstByte(t *testing.T) {
}
}
func TestExecuteStreamWithAuthManager_EnrichesBootstrapRetryAuthUnavailableError(t *testing.T) {
executor := &failOnceStreamExecutor{}
manager := coreauth.NewManager(nil, nil, nil)
manager.RegisterExecutor(executor)
auth1 := &coreauth.Auth{
ID: "auth1",
Provider: "codex",
Status: coreauth.StatusActive,
Metadata: map[string]any{"email": "test1@example.com"},
}
if _, err := manager.Register(context.Background(), auth1); err != nil {
t.Fatalf("manager.Register(auth1): %v", err)
}
registry.GetGlobalRegistry().RegisterClient(auth1.ID, auth1.Provider, []*registry.ModelInfo{{ID: "test-model"}})
t.Cleanup(func() {
registry.GetGlobalRegistry().UnregisterClient(auth1.ID)
})
handler := NewBaseAPIHandlers(&sdkconfig.SDKConfig{
Streaming: sdkconfig.StreamingConfig{
BootstrapRetries: 1,
},
}, manager)
dataChan, _, errChan := handler.ExecuteStreamWithAuthManager(context.Background(), "openai", "test-model", []byte(`{"model":"test-model"}`), "")
if dataChan == nil || errChan == nil {
t.Fatalf("expected non-nil channels")
}
var got []byte
for chunk := range dataChan {
got = append(got, chunk...)
}
if len(got) != 0 {
t.Fatalf("expected empty payload, got %q", string(got))
}
var gotErr *interfaces.ErrorMessage
for msg := range errChan {
if msg != nil {
gotErr = msg
}
}
if gotErr == nil {
t.Fatalf("expected terminal error")
}
if gotErr.StatusCode != http.StatusServiceUnavailable {
t.Fatalf("status = %d, want %d", gotErr.StatusCode, http.StatusServiceUnavailable)
}
var authErr *coreauth.Error
if !errors.As(gotErr.Error, &authErr) || authErr == nil {
t.Fatalf("expected coreauth.Error, got %T", gotErr.Error)
}
if authErr.Code != "auth_unavailable" {
t.Fatalf("code = %q, want %q", authErr.Code, "auth_unavailable")
}
if !strings.Contains(authErr.Message, "providers=codex") {
t.Fatalf("message missing provider context: %q", authErr.Message)
}
if !strings.Contains(authErr.Message, "model=test-model") {
t.Fatalf("message missing model context: %q", authErr.Message)
}
if executor.Calls() != 1 {
t.Fatalf("expected exactly one upstream call before retry path selection failure, got %d", executor.Calls())
}
}
func TestExecuteStreamWithAuthManager_PinnedAuthKeepsSameUpstream(t *testing.T) {
executor := &authAwareStreamExecutor{}
manager := coreauth.NewManager(nil, nil, nil)

View File

@@ -1838,6 +1838,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
} else {
if result.Model != "" {
if !isRequestScopedNotFoundResultError(result.Error) {
disableCooling := quotaCooldownDisabledForAuth(auth)
state := ensureModelState(auth, result.Model)
state.Unavailable = true
state.Status = StatusError
@@ -1858,31 +1859,45 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
} else {
switch statusCode {
case 401:
next := now.Add(30 * time.Minute)
state.NextRetryAfter = next
suspendReason = "unauthorized"
shouldSuspendModel = true
if disableCooling {
state.NextRetryAfter = time.Time{}
} else {
next := now.Add(30 * time.Minute)
state.NextRetryAfter = next
suspendReason = "unauthorized"
shouldSuspendModel = true
}
case 402, 403:
next := now.Add(30 * time.Minute)
state.NextRetryAfter = next
suspendReason = "payment_required"
shouldSuspendModel = true
if disableCooling {
state.NextRetryAfter = time.Time{}
} else {
next := now.Add(30 * time.Minute)
state.NextRetryAfter = next
suspendReason = "payment_required"
shouldSuspendModel = true
}
case 404:
next := now.Add(12 * time.Hour)
state.NextRetryAfter = next
suspendReason = "not_found"
shouldSuspendModel = true
if disableCooling {
state.NextRetryAfter = time.Time{}
} else {
next := now.Add(12 * time.Hour)
state.NextRetryAfter = next
suspendReason = "not_found"
shouldSuspendModel = true
}
case 429:
var next time.Time
backoffLevel := state.Quota.BackoffLevel
if result.RetryAfter != nil {
next = now.Add(*result.RetryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
if cooldown > 0 {
next = now.Add(cooldown)
if !disableCooling {
if result.RetryAfter != nil {
next = now.Add(*result.RetryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, disableCooling)
if cooldown > 0 {
next = now.Add(cooldown)
}
backoffLevel = nextLevel
}
backoffLevel = nextLevel
}
state.NextRetryAfter = next
state.Quota = QuotaState{
@@ -1891,11 +1906,13 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
NextRecoverAt: next,
BackoffLevel: backoffLevel,
}
suspendReason = "quota"
shouldSuspendModel = true
setModelQuota = true
if !disableCooling {
suspendReason = "quota"
shouldSuspendModel = true
setModelQuota = true
}
case 408, 500, 502, 503, 504:
if quotaCooldownDisabledForAuth(auth) {
if disableCooling {
state.NextRetryAfter = time.Time{}
} else {
next := now.Add(1 * time.Minute)
@@ -2211,6 +2228,7 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
if isRequestScopedNotFoundResultError(resultErr) {
return
}
disableCooling := quotaCooldownDisabledForAuth(auth)
auth.Unavailable = true
auth.Status = StatusError
auth.UpdatedAt = now
@@ -2224,32 +2242,46 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
switch statusCode {
case 401:
auth.StatusMessage = "unauthorized"
auth.NextRetryAfter = now.Add(30 * time.Minute)
if disableCooling {
auth.NextRetryAfter = time.Time{}
} else {
auth.NextRetryAfter = now.Add(30 * time.Minute)
}
case 402, 403:
auth.StatusMessage = "payment_required"
auth.NextRetryAfter = now.Add(30 * time.Minute)
if disableCooling {
auth.NextRetryAfter = time.Time{}
} else {
auth.NextRetryAfter = now.Add(30 * time.Minute)
}
case 404:
auth.StatusMessage = "not_found"
auth.NextRetryAfter = now.Add(12 * time.Hour)
if disableCooling {
auth.NextRetryAfter = time.Time{}
} else {
auth.NextRetryAfter = now.Add(12 * time.Hour)
}
case 429:
auth.StatusMessage = "quota exhausted"
auth.Quota.Exceeded = true
auth.Quota.Reason = "quota"
var next time.Time
if retryAfter != nil {
next = now.Add(*retryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel, quotaCooldownDisabledForAuth(auth))
if cooldown > 0 {
next = now.Add(cooldown)
if !disableCooling {
if retryAfter != nil {
next = now.Add(*retryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel, disableCooling)
if cooldown > 0 {
next = now.Add(cooldown)
}
auth.Quota.BackoffLevel = nextLevel
}
auth.Quota.BackoffLevel = nextLevel
}
auth.Quota.NextRecoverAt = next
auth.NextRetryAfter = next
case 408, 500, 502, 503, 504:
auth.StatusMessage = "transient upstream error"
if quotaCooldownDisabledForAuth(auth) {
if disableCooling {
auth.NextRetryAfter = time.Time{}
} else {
auth.NextRetryAfter = now.Add(1 * time.Minute)

View File

@@ -180,6 +180,34 @@ func (e *authFallbackExecutor) StreamCalls() []string {
return out
}
type retryAfterStatusError struct {
status int
message string
retryAfter time.Duration
}
func (e *retryAfterStatusError) Error() string {
if e == nil {
return ""
}
return e.message
}
func (e *retryAfterStatusError) StatusCode() int {
if e == nil {
return 0
}
return e.status
}
func (e *retryAfterStatusError) RetryAfter() *time.Duration {
if e == nil {
return nil
}
d := e.retryAfter
return &d
}
func newCredentialRetryLimitTestManager(t *testing.T, maxRetryCredentials int) (*Manager, *credentialRetryLimitExecutor) {
t.Helper()
@@ -450,6 +478,174 @@ func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) {
}
}
func TestManager_MarkResult_RespectsAuthDisableCoolingOverride_On403(t *testing.T) {
prev := quotaCooldownDisabled.Load()
quotaCooldownDisabled.Store(false)
t.Cleanup(func() { quotaCooldownDisabled.Store(prev) })
m := NewManager(nil, nil, nil)
auth := &Auth{
ID: "auth-403",
Provider: "claude",
Metadata: map[string]any{
"disable_cooling": true,
},
}
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
t.Fatalf("register auth: %v", errRegister)
}
model := "test-model-403"
reg := registry.GetGlobalRegistry()
reg.RegisterClient(auth.ID, "claude", []*registry.ModelInfo{{ID: model}})
t.Cleanup(func() { reg.UnregisterClient(auth.ID) })
m.MarkResult(context.Background(), Result{
AuthID: auth.ID,
Provider: "claude",
Model: model,
Success: false,
Error: &Error{HTTPStatus: http.StatusForbidden, Message: "forbidden"},
})
updated, ok := m.GetByID(auth.ID)
if !ok || updated == nil {
t.Fatalf("expected auth to be present")
}
state := updated.ModelStates[model]
if state == nil {
t.Fatalf("expected model state to be present")
}
if !state.NextRetryAfter.IsZero() {
t.Fatalf("expected NextRetryAfter to be zero when disable_cooling=true, got %v", state.NextRetryAfter)
}
if count := reg.GetModelCount(model); count <= 0 {
t.Fatalf("expected model count > 0 when disable_cooling=true, got %d", count)
}
}
func TestManager_Execute_DisableCooling_DoesNotBlackoutAfter403(t *testing.T) {
prev := quotaCooldownDisabled.Load()
quotaCooldownDisabled.Store(false)
t.Cleanup(func() { quotaCooldownDisabled.Store(prev) })
m := NewManager(nil, nil, nil)
executor := &authFallbackExecutor{
id: "claude",
executeErrors: map[string]error{
"auth-403-exec": &Error{
HTTPStatus: http.StatusForbidden,
Message: "forbidden",
},
},
}
m.RegisterExecutor(executor)
auth := &Auth{
ID: "auth-403-exec",
Provider: "claude",
Metadata: map[string]any{
"disable_cooling": true,
},
}
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
t.Fatalf("register auth: %v", errRegister)
}
model := "test-model-403-exec"
reg := registry.GetGlobalRegistry()
reg.RegisterClient(auth.ID, "claude", []*registry.ModelInfo{{ID: model}})
t.Cleanup(func() { reg.UnregisterClient(auth.ID) })
req := cliproxyexecutor.Request{Model: model}
_, errExecute1 := m.Execute(context.Background(), []string{"claude"}, req, cliproxyexecutor.Options{})
if errExecute1 == nil {
t.Fatal("expected first execute error")
}
if statusCodeFromError(errExecute1) != http.StatusForbidden {
t.Fatalf("first execute status = %d, want %d", statusCodeFromError(errExecute1), http.StatusForbidden)
}
_, errExecute2 := m.Execute(context.Background(), []string{"claude"}, req, cliproxyexecutor.Options{})
if errExecute2 == nil {
t.Fatal("expected second execute error")
}
if statusCodeFromError(errExecute2) != http.StatusForbidden {
t.Fatalf("second execute status = %d, want %d", statusCodeFromError(errExecute2), http.StatusForbidden)
}
}
func TestManager_Execute_DisableCooling_DoesNotBlackoutAfter429RetryAfter(t *testing.T) {
prev := quotaCooldownDisabled.Load()
quotaCooldownDisabled.Store(false)
t.Cleanup(func() { quotaCooldownDisabled.Store(prev) })
m := NewManager(nil, nil, nil)
executor := &authFallbackExecutor{
id: "claude",
executeErrors: map[string]error{
"auth-429-exec": &retryAfterStatusError{
status: http.StatusTooManyRequests,
message: "quota exhausted",
retryAfter: 2 * time.Minute,
},
},
}
m.RegisterExecutor(executor)
auth := &Auth{
ID: "auth-429-exec",
Provider: "claude",
Metadata: map[string]any{
"disable_cooling": true,
},
}
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
t.Fatalf("register auth: %v", errRegister)
}
model := "test-model-429-exec"
reg := registry.GetGlobalRegistry()
reg.RegisterClient(auth.ID, "claude", []*registry.ModelInfo{{ID: model}})
t.Cleanup(func() { reg.UnregisterClient(auth.ID) })
req := cliproxyexecutor.Request{Model: model}
_, errExecute1 := m.Execute(context.Background(), []string{"claude"}, req, cliproxyexecutor.Options{})
if errExecute1 == nil {
t.Fatal("expected first execute error")
}
if statusCodeFromError(errExecute1) != http.StatusTooManyRequests {
t.Fatalf("first execute status = %d, want %d", statusCodeFromError(errExecute1), http.StatusTooManyRequests)
}
_, errExecute2 := m.Execute(context.Background(), []string{"claude"}, req, cliproxyexecutor.Options{})
if errExecute2 == nil {
t.Fatal("expected second execute error")
}
if statusCodeFromError(errExecute2) != http.StatusTooManyRequests {
t.Fatalf("second execute status = %d, want %d", statusCodeFromError(errExecute2), http.StatusTooManyRequests)
}
calls := executor.ExecuteCalls()
if len(calls) != 2 {
t.Fatalf("execute calls = %d, want 2", len(calls))
}
updated, ok := m.GetByID(auth.ID)
if !ok || updated == nil {
t.Fatalf("expected auth to be present")
}
state := updated.ModelStates[model]
if state == nil {
t.Fatalf("expected model state to be present")
}
if !state.NextRetryAfter.IsZero() {
t.Fatalf("expected NextRetryAfter to be zero when disable_cooling=true, got %v", state.NextRetryAfter)
}
}
func TestManager_MarkResult_RequestScopedNotFoundDoesNotCooldownAuth(t *testing.T) {
m := NewManager(nil, nil, nil)