diff --git a/sdk/cliproxy/auth/conductor.go b/sdk/cliproxy/auth/conductor.go index b29e04db..88b12b9a 100644 --- a/sdk/cliproxy/auth/conductor.go +++ b/sdk/cliproxy/auth/conductor.go @@ -1627,53 +1627,60 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) { } statusCode := statusCodeFromResult(result.Error) - switch statusCode { - case 401: - next := now.Add(30 * time.Minute) - state.NextRetryAfter = next - suspendReason = "unauthorized" - shouldSuspendModel = true - case 402, 403: - next := now.Add(30 * time.Minute) - state.NextRetryAfter = next - suspendReason = "payment_required" - shouldSuspendModel = true - case 404: + if isModelSupportResultError(result.Error) { next := now.Add(12 * time.Hour) state.NextRetryAfter = next - suspendReason = "not_found" + suspendReason = "model_not_supported" shouldSuspendModel = true - case 429: - var next time.Time - backoffLevel := state.Quota.BackoffLevel - if result.RetryAfter != nil { - next = now.Add(*result.RetryAfter) - } else { - cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth)) - if cooldown > 0 { - next = now.Add(cooldown) - } - backoffLevel = nextLevel - } - state.NextRetryAfter = next - state.Quota = QuotaState{ - Exceeded: true, - Reason: "quota", - NextRecoverAt: next, - BackoffLevel: backoffLevel, - } - suspendReason = "quota" - shouldSuspendModel = true - setModelQuota = true - case 408, 500, 502, 503, 504: - if quotaCooldownDisabledForAuth(auth) { - state.NextRetryAfter = time.Time{} - } else { - next := now.Add(1 * time.Minute) + } else { + switch statusCode { + case 401: + next := now.Add(30 * time.Minute) state.NextRetryAfter = next + suspendReason = "unauthorized" + shouldSuspendModel = true + case 402, 403: + next := now.Add(30 * time.Minute) + state.NextRetryAfter = next + suspendReason = "payment_required" + shouldSuspendModel = true + case 404: + next := now.Add(12 * time.Hour) + state.NextRetryAfter = next + suspendReason = "not_found" + shouldSuspendModel = true + case 429: + var next time.Time + backoffLevel := state.Quota.BackoffLevel + if result.RetryAfter != nil { + next = now.Add(*result.RetryAfter) + } else { + cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth)) + if cooldown > 0 { + next = now.Add(cooldown) + } + backoffLevel = nextLevel + } + state.NextRetryAfter = next + state.Quota = QuotaState{ + Exceeded: true, + Reason: "quota", + NextRecoverAt: next, + BackoffLevel: backoffLevel, + } + suspendReason = "quota" + shouldSuspendModel = true + setModelQuota = true + case 408, 500, 502, 503, 504: + if quotaCooldownDisabledForAuth(auth) { + state.NextRetryAfter = time.Time{} + } else { + next := now.Add(1 * time.Minute) + state.NextRetryAfter = next + } + default: + state.NextRetryAfter = time.Time{} } - default: - state.NextRetryAfter = time.Time{} } auth.Status = StatusError @@ -1883,14 +1890,65 @@ func statusCodeFromResult(err *Error) int { return err.StatusCode() } +func isModelSupportErrorMessage(message string) bool { + lower := strings.ToLower(strings.TrimSpace(message)) + if lower == "" { + return false + } + patterns := [...]string{ + "model_not_supported", + "requested model is not supported", + "requested model is unsupported", + "requested model is unavailable", + "model is not supported", + "model not supported", + "unsupported model", + "model unavailable", + "not available for your plan", + "not available for your account", + } + for _, pattern := range patterns { + if strings.Contains(lower, pattern) { + return true + } + } + return false +} + +func isModelSupportError(err error) bool { + if err == nil { + return false + } + status := statusCodeFromError(err) + if status != http.StatusBadRequest && status != http.StatusUnprocessableEntity { + return false + } + return isModelSupportErrorMessage(err.Error()) +} + +func isModelSupportResultError(err *Error) bool { + if err == nil { + return false + } + status := statusCodeFromResult(err) + if status != http.StatusBadRequest && status != http.StatusUnprocessableEntity { + return false + } + return isModelSupportErrorMessage(err.Message) +} + // isRequestInvalidError returns true if the error represents a client request // error that should not be retried. Specifically, it treats 400 responses with // "invalid_request_error" and all 422 responses as request-shape failures, -// where switching auths or pooled upstream models will not help. +// where switching auths or pooled upstream models will not help. Model-support +// errors are excluded so routing can fall through to another auth or upstream. func isRequestInvalidError(err error) bool { if err == nil { return false } + if isModelSupportError(err) { + return false + } status := statusCodeFromError(err) switch status { case http.StatusBadRequest: diff --git a/sdk/cliproxy/auth/conductor_overrides_test.go b/sdk/cliproxy/auth/conductor_overrides_test.go index 7aca49da..586af489 100644 --- a/sdk/cliproxy/auth/conductor_overrides_test.go +++ b/sdk/cliproxy/auth/conductor_overrides_test.go @@ -108,6 +108,53 @@ func (e *credentialRetryLimitExecutor) Calls() int { return e.calls } +type authFallbackExecutor struct { + id string + + mu sync.Mutex + executeCalls []string + executeErrors map[string]error +} + +func (e *authFallbackExecutor) Identifier() string { + return e.id +} + +func (e *authFallbackExecutor) Execute(_ context.Context, auth *Auth, _ cliproxyexecutor.Request, _ cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + e.mu.Lock() + e.executeCalls = append(e.executeCalls, auth.ID) + err := e.executeErrors[auth.ID] + e.mu.Unlock() + if err != nil { + return cliproxyexecutor.Response{}, err + } + return cliproxyexecutor.Response{Payload: []byte(auth.ID)}, nil +} + +func (e *authFallbackExecutor) ExecuteStream(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (*cliproxyexecutor.StreamResult, error) { + return nil, &Error{HTTPStatus: 500, Message: "not implemented"} +} + +func (e *authFallbackExecutor) Refresh(_ context.Context, auth *Auth) (*Auth, error) { + return auth, nil +} + +func (e *authFallbackExecutor) CountTokens(context.Context, *Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) { + return cliproxyexecutor.Response{}, &Error{HTTPStatus: 500, Message: "not implemented"} +} + +func (e *authFallbackExecutor) HttpRequest(context.Context, *Auth, *http.Request) (*http.Response, error) { + return nil, nil +} + +func (e *authFallbackExecutor) ExecuteCalls() []string { + e.mu.Lock() + defer e.mu.Unlock() + out := make([]string, len(e.executeCalls)) + copy(out, e.executeCalls) + return out +} + func newCredentialRetryLimitTestManager(t *testing.T, maxRetryCredentials int) (*Manager, *credentialRetryLimitExecutor) { t.Helper() @@ -191,6 +238,76 @@ func TestManager_MaxRetryCredentials_LimitsCrossCredentialRetries(t *testing.T) } } +func TestManager_ModelSupportBadRequest_FallsBackAndSuspendsAuth(t *testing.T) { + m := NewManager(nil, nil, nil) + executor := &authFallbackExecutor{ + id: "claude", + executeErrors: map[string]error{ + "aa-bad-auth": &Error{ + HTTPStatus: http.StatusBadRequest, + Message: "invalid_request_error: The requested model is not supported.", + }, + }, + } + m.RegisterExecutor(executor) + + model := "claude-opus-4-6" + badAuth := &Auth{ID: "aa-bad-auth", Provider: "claude"} + goodAuth := &Auth{ID: "bb-good-auth", Provider: "claude"} + + reg := registry.GetGlobalRegistry() + reg.RegisterClient(badAuth.ID, "claude", []*registry.ModelInfo{{ID: model}}) + reg.RegisterClient(goodAuth.ID, "claude", []*registry.ModelInfo{{ID: model}}) + t.Cleanup(func() { + reg.UnregisterClient(badAuth.ID) + reg.UnregisterClient(goodAuth.ID) + }) + + if _, errRegister := m.Register(context.Background(), badAuth); errRegister != nil { + t.Fatalf("register bad auth: %v", errRegister) + } + if _, errRegister := m.Register(context.Background(), goodAuth); errRegister != nil { + t.Fatalf("register good auth: %v", errRegister) + } + + request := cliproxyexecutor.Request{Model: model} + for i := 0; i < 2; i++ { + resp, errExecute := m.Execute(context.Background(), []string{"claude"}, request, cliproxyexecutor.Options{}) + if errExecute != nil { + t.Fatalf("execute %d error = %v, want success", i, errExecute) + } + if string(resp.Payload) != goodAuth.ID { + t.Fatalf("execute %d payload = %q, want %q", i, string(resp.Payload), goodAuth.ID) + } + } + + got := executor.ExecuteCalls() + want := []string{badAuth.ID, goodAuth.ID, goodAuth.ID} + if len(got) != len(want) { + t.Fatalf("execute calls = %v, want %v", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("execute call %d auth = %q, want %q", i, got[i], want[i]) + } + } + + updatedBad, ok := m.GetByID(badAuth.ID) + if !ok || updatedBad == nil { + t.Fatalf("expected bad auth to remain registered") + } + state := updatedBad.ModelStates[model] + if state == nil { + t.Fatalf("expected model state for %q", model) + } + if !state.Unavailable { + t.Fatalf("expected bad auth model state to be unavailable") + } + if state.NextRetryAfter.IsZero() { + t.Fatalf("expected bad auth model state cooldown to be set") + } +} + func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) { prev := quotaCooldownDisabled.Load() quotaCooldownDisabled.Store(false) diff --git a/sdk/cliproxy/auth/openai_compat_pool_test.go b/sdk/cliproxy/auth/openai_compat_pool_test.go index 5a5ecb4f..5b1bf9aa 100644 --- a/sdk/cliproxy/auth/openai_compat_pool_test.go +++ b/sdk/cliproxy/auth/openai_compat_pool_test.go @@ -243,6 +243,75 @@ func TestManagerExecute_OpenAICompatAliasPoolStopsOnBadRequest(t *testing.T) { t.Fatalf("execute calls = %v, want only first invalid model", got) } } + +func TestManagerExecute_OpenAICompatAliasPoolFallsBackOnModelSupportBadRequest(t *testing.T) { + alias := "claude-opus-4.66" + modelSupportErr := &Error{ + HTTPStatus: http.StatusBadRequest, + Message: "invalid_request_error: The requested model is not supported.", + } + executor := &openAICompatPoolExecutor{ + id: "pool", + executeErrors: map[string]error{"qwen3.5-plus": modelSupportErr}, + } + m := newOpenAICompatPoolTestManager(t, alias, []internalconfig.OpenAICompatibilityModel{ + {Name: "qwen3.5-plus", Alias: alias}, + {Name: "glm-5", Alias: alias}, + }, executor) + + resp, err := m.Execute(context.Background(), []string{"pool"}, cliproxyexecutor.Request{Model: alias}, cliproxyexecutor.Options{}) + if err != nil { + t.Fatalf("execute error = %v, want fallback success", err) + } + if string(resp.Payload) != "glm-5" { + t.Fatalf("payload = %q, want %q", string(resp.Payload), "glm-5") + } + got := executor.ExecuteModels() + want := []string{"qwen3.5-plus", "glm-5"} + if len(got) != len(want) { + t.Fatalf("execute calls = %v, want %v", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("execute call %d model = %q, want %q", i, got[i], want[i]) + } + } +} + +func TestManagerExecute_OpenAICompatAliasPoolFallsBackOnModelSupportUnprocessableEntity(t *testing.T) { + alias := "claude-opus-4.66" + modelSupportErr := &Error{ + HTTPStatus: http.StatusUnprocessableEntity, + Message: "The requested model is not supported.", + } + executor := &openAICompatPoolExecutor{ + id: "pool", + executeErrors: map[string]error{"qwen3.5-plus": modelSupportErr}, + } + m := newOpenAICompatPoolTestManager(t, alias, []internalconfig.OpenAICompatibilityModel{ + {Name: "qwen3.5-plus", Alias: alias}, + {Name: "glm-5", Alias: alias}, + }, executor) + + resp, err := m.Execute(context.Background(), []string{"pool"}, cliproxyexecutor.Request{Model: alias}, cliproxyexecutor.Options{}) + if err != nil { + t.Fatalf("execute error = %v, want fallback success", err) + } + if string(resp.Payload) != "glm-5" { + t.Fatalf("payload = %q, want %q", string(resp.Payload), "glm-5") + } + got := executor.ExecuteModels() + want := []string{"qwen3.5-plus", "glm-5"} + if len(got) != len(want) { + t.Fatalf("execute calls = %v, want %v", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("execute call %d model = %q, want %q", i, got[i], want[i]) + } + } +} + func TestManagerExecute_OpenAICompatAliasPoolFallsBackWithinSameAuth(t *testing.T) { alias := "claude-opus-4.66" executor := &openAICompatPoolExecutor{