fix: fall back on model support errors during auth rotation

This commit is contained in:
Tam Nhu Tran
2026-03-18 12:43:45 -04:00
parent db63f9b5d6
commit 5135c22cd6
3 changed files with 287 additions and 43 deletions

View File

@@ -1627,53 +1627,60 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
}
statusCode := statusCodeFromResult(result.Error)
switch statusCode {
case 401:
next := now.Add(30 * time.Minute)
state.NextRetryAfter = next
suspendReason = "unauthorized"
shouldSuspendModel = true
case 402, 403:
next := now.Add(30 * time.Minute)
state.NextRetryAfter = next
suspendReason = "payment_required"
shouldSuspendModel = true
case 404:
if isModelSupportResultError(result.Error) {
next := now.Add(12 * time.Hour)
state.NextRetryAfter = next
suspendReason = "not_found"
suspendReason = "model_not_supported"
shouldSuspendModel = true
case 429:
var next time.Time
backoffLevel := state.Quota.BackoffLevel
if result.RetryAfter != nil {
next = now.Add(*result.RetryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
if cooldown > 0 {
next = now.Add(cooldown)
}
backoffLevel = nextLevel
}
state.NextRetryAfter = next
state.Quota = QuotaState{
Exceeded: true,
Reason: "quota",
NextRecoverAt: next,
BackoffLevel: backoffLevel,
}
suspendReason = "quota"
shouldSuspendModel = true
setModelQuota = true
case 408, 500, 502, 503, 504:
if quotaCooldownDisabledForAuth(auth) {
state.NextRetryAfter = time.Time{}
} else {
next := now.Add(1 * time.Minute)
} else {
switch statusCode {
case 401:
next := now.Add(30 * time.Minute)
state.NextRetryAfter = next
suspendReason = "unauthorized"
shouldSuspendModel = true
case 402, 403:
next := now.Add(30 * time.Minute)
state.NextRetryAfter = next
suspendReason = "payment_required"
shouldSuspendModel = true
case 404:
next := now.Add(12 * time.Hour)
state.NextRetryAfter = next
suspendReason = "not_found"
shouldSuspendModel = true
case 429:
var next time.Time
backoffLevel := state.Quota.BackoffLevel
if result.RetryAfter != nil {
next = now.Add(*result.RetryAfter)
} else {
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
if cooldown > 0 {
next = now.Add(cooldown)
}
backoffLevel = nextLevel
}
state.NextRetryAfter = next
state.Quota = QuotaState{
Exceeded: true,
Reason: "quota",
NextRecoverAt: next,
BackoffLevel: backoffLevel,
}
suspendReason = "quota"
shouldSuspendModel = true
setModelQuota = true
case 408, 500, 502, 503, 504:
if quotaCooldownDisabledForAuth(auth) {
state.NextRetryAfter = time.Time{}
} else {
next := now.Add(1 * time.Minute)
state.NextRetryAfter = next
}
default:
state.NextRetryAfter = time.Time{}
}
default:
state.NextRetryAfter = time.Time{}
}
auth.Status = StatusError
@@ -1883,14 +1890,65 @@ func statusCodeFromResult(err *Error) int {
return err.StatusCode()
}
func isModelSupportErrorMessage(message string) bool {
lower := strings.ToLower(strings.TrimSpace(message))
if lower == "" {
return false
}
patterns := [...]string{
"model_not_supported",
"requested model is not supported",
"requested model is unsupported",
"requested model is unavailable",
"model is not supported",
"model not supported",
"unsupported model",
"model unavailable",
"not available for your plan",
"not available for your account",
}
for _, pattern := range patterns {
if strings.Contains(lower, pattern) {
return true
}
}
return false
}
func isModelSupportError(err error) bool {
if err == nil {
return false
}
status := statusCodeFromError(err)
if status != http.StatusBadRequest && status != http.StatusUnprocessableEntity {
return false
}
return isModelSupportErrorMessage(err.Error())
}
func isModelSupportResultError(err *Error) bool {
if err == nil {
return false
}
status := statusCodeFromResult(err)
if status != http.StatusBadRequest && status != http.StatusUnprocessableEntity {
return false
}
return isModelSupportErrorMessage(err.Message)
}
// isRequestInvalidError returns true if the error represents a client request
// error that should not be retried. Specifically, it treats 400 responses with
// "invalid_request_error" and all 422 responses as request-shape failures,
// where switching auths or pooled upstream models will not help.
// where switching auths or pooled upstream models will not help. Model-support
// errors are excluded so routing can fall through to another auth or upstream.
func isRequestInvalidError(err error) bool {
if err == nil {
return false
}
if isModelSupportError(err) {
return false
}
status := statusCodeFromError(err)
switch status {
case http.StatusBadRequest:

View File

@@ -108,6 +108,53 @@ func (e *credentialRetryLimitExecutor) Calls() int {
return e.calls
}
type authFallbackExecutor struct {
id string
mu sync.Mutex
executeCalls []string
executeErrors map[string]error
}
func (e *authFallbackExecutor) Identifier() string {
return e.id
}
func (e *authFallbackExecutor) Execute(_ context.Context, auth *Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
e.mu.Lock()
e.executeCalls = append(e.executeCalls, auth.ID)
err := e.executeErrors[auth.ID]
e.mu.Unlock()
if err != nil {
return cliproxyexecutor.Response{}, err
}
return cliproxyexecutor.Response{Payload: []byte(auth.ID)}, nil
}
func (e *authFallbackExecutor) ExecuteStream(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (*cliproxyexecutor.StreamResult, error) {
return nil, &Error{HTTPStatus: 500, Message: "not implemented"}
}
func (e *authFallbackExecutor) Refresh(_ context.Context, auth *Auth) (*Auth, error) {
return auth, nil
}
func (e *authFallbackExecutor) CountTokens(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
return cliproxyexecutor.Response{}, &Error{HTTPStatus: 500, Message: "not implemented"}
}
func (e *authFallbackExecutor) HttpRequest(context.Context, *Auth, *http.Request) (*http.Response, error) {
return nil, nil
}
func (e *authFallbackExecutor) ExecuteCalls() []string {
e.mu.Lock()
defer e.mu.Unlock()
out := make([]string, len(e.executeCalls))
copy(out, e.executeCalls)
return out
}
func newCredentialRetryLimitTestManager(t *testing.T, maxRetryCredentials int) (*Manager, *credentialRetryLimitExecutor) {
t.Helper()
@@ -191,6 +238,76 @@ func TestManager_MaxRetryCredentials_LimitsCrossCredentialRetries(t *testing.T)
}
}
func TestManager_ModelSupportBadRequest_FallsBackAndSuspendsAuth(t *testing.T) {
m := NewManager(nil, nil, nil)
executor := &authFallbackExecutor{
id: "claude",
executeErrors: map[string]error{
"aa-bad-auth": &Error{
HTTPStatus: http.StatusBadRequest,
Message: "invalid_request_error: The requested model is not supported.",
},
},
}
m.RegisterExecutor(executor)
model := "claude-opus-4-6"
badAuth := &Auth{ID: "aa-bad-auth", Provider: "claude"}
goodAuth := &Auth{ID: "bb-good-auth", Provider: "claude"}
reg := registry.GetGlobalRegistry()
reg.RegisterClient(badAuth.ID, "claude", []*registry.ModelInfo{{ID: model}})
reg.RegisterClient(goodAuth.ID, "claude", []*registry.ModelInfo{{ID: model}})
t.Cleanup(func() {
reg.UnregisterClient(badAuth.ID)
reg.UnregisterClient(goodAuth.ID)
})
if _, errRegister := m.Register(context.Background(), badAuth); errRegister != nil {
t.Fatalf("register bad auth: %v", errRegister)
}
if _, errRegister := m.Register(context.Background(), goodAuth); errRegister != nil {
t.Fatalf("register good auth: %v", errRegister)
}
request := cliproxyexecutor.Request{Model: model}
for i := 0; i < 2; i++ {
resp, errExecute := m.Execute(context.Background(), []string{"claude"}, request, cliproxyexecutor.Options{})
if errExecute != nil {
t.Fatalf("execute %d error = %v, want success", i, errExecute)
}
if string(resp.Payload) != goodAuth.ID {
t.Fatalf("execute %d payload = %q, want %q", i, string(resp.Payload), goodAuth.ID)
}
}
got := executor.ExecuteCalls()
want := []string{badAuth.ID, goodAuth.ID, goodAuth.ID}
if len(got) != len(want) {
t.Fatalf("execute calls = %v, want %v", got, want)
}
for i := range want {
if got[i] != want[i] {
t.Fatalf("execute call %d auth = %q, want %q", i, got[i], want[i])
}
}
updatedBad, ok := m.GetByID(badAuth.ID)
if !ok || updatedBad == nil {
t.Fatalf("expected bad auth to remain registered")
}
state := updatedBad.ModelStates[model]
if state == nil {
t.Fatalf("expected model state for %q", model)
}
if !state.Unavailable {
t.Fatalf("expected bad auth model state to be unavailable")
}
if state.NextRetryAfter.IsZero() {
t.Fatalf("expected bad auth model state cooldown to be set")
}
}
func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) {
prev := quotaCooldownDisabled.Load()
quotaCooldownDisabled.Store(false)

View File

@@ -243,6 +243,75 @@ func TestManagerExecute_OpenAICompatAliasPoolStopsOnBadRequest(t *testing.T) {
t.Fatalf("execute calls = %v, want only first invalid model", got)
}
}
func TestManagerExecute_OpenAICompatAliasPoolFallsBackOnModelSupportBadRequest(t *testing.T) {
alias := "claude-opus-4.66"
modelSupportErr := &Error{
HTTPStatus: http.StatusBadRequest,
Message: "invalid_request_error: The requested model is not supported.",
}
executor := &openAICompatPoolExecutor{
id: "pool",
executeErrors: map[string]error{"qwen3.5-plus": modelSupportErr},
}
m := newOpenAICompatPoolTestManager(t, alias, []internalconfig.OpenAICompatibilityModel{
{Name: "qwen3.5-plus", Alias: alias},
{Name: "glm-5", Alias: alias},
}, executor)
resp, err := m.Execute(context.Background(), []string{"pool"}, cliproxyexecutor.Request{Model: alias}, cliproxyexecutor.Options{})
if err != nil {
t.Fatalf("execute error = %v, want fallback success", err)
}
if string(resp.Payload) != "glm-5" {
t.Fatalf("payload = %q, want %q", string(resp.Payload), "glm-5")
}
got := executor.ExecuteModels()
want := []string{"qwen3.5-plus", "glm-5"}
if len(got) != len(want) {
t.Fatalf("execute calls = %v, want %v", got, want)
}
for i := range want {
if got[i] != want[i] {
t.Fatalf("execute call %d model = %q, want %q", i, got[i], want[i])
}
}
}
func TestManagerExecute_OpenAICompatAliasPoolFallsBackOnModelSupportUnprocessableEntity(t *testing.T) {
alias := "claude-opus-4.66"
modelSupportErr := &Error{
HTTPStatus: http.StatusUnprocessableEntity,
Message: "The requested model is not supported.",
}
executor := &openAICompatPoolExecutor{
id: "pool",
executeErrors: map[string]error{"qwen3.5-plus": modelSupportErr},
}
m := newOpenAICompatPoolTestManager(t, alias, []internalconfig.OpenAICompatibilityModel{
{Name: "qwen3.5-plus", Alias: alias},
{Name: "glm-5", Alias: alias},
}, executor)
resp, err := m.Execute(context.Background(), []string{"pool"}, cliproxyexecutor.Request{Model: alias}, cliproxyexecutor.Options{})
if err != nil {
t.Fatalf("execute error = %v, want fallback success", err)
}
if string(resp.Payload) != "glm-5" {
t.Fatalf("payload = %q, want %q", string(resp.Payload), "glm-5")
}
got := executor.ExecuteModels()
want := []string{"qwen3.5-plus", "glm-5"}
if len(got) != len(want) {
t.Fatalf("execute calls = %v, want %v", got, want)
}
for i := range want {
if got[i] != want[i] {
t.Fatalf("execute call %d model = %q, want %q", i, got[i], want[i])
}
}
}
func TestManagerExecute_OpenAICompatAliasPoolFallsBackWithinSameAuth(t *testing.T) {
alias := "claude-opus-4.66"
executor := &openAICompatPoolExecutor{