diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts index f581dd0ede2..a78e806759d 100644 --- a/src/agents/failover-error.test.ts +++ b/src/agents/failover-error.test.ts @@ -182,6 +182,68 @@ describe("failover-error", () => { ).toBe("billing"); }); + it("treats 402 with periodic usage limit as rate_limit", () => { + expect( + resolveFailoverReasonFromError({ + status: 402, + message: "Monthly spend limit reached. Please visit your billing settings.", + }), + ).toBe("rate_limit"); + expect( + resolveFailoverReasonFromError({ + status: 402, + message: "Weekly usage limit exhausted for this plan.", + }), + ).toBe("rate_limit"); + expect( + resolveFailoverReasonFromError({ + status: 402, + message: "Daily limit reached. Your limit will reset tomorrow.", + }), + ).toBe("rate_limit"); + }); + + it("treats 402 with organization/workspace limit as rate_limit", () => { + expect( + resolveFailoverReasonFromError({ + status: 402, + message: "Organization spending limit exceeded.", + }), + ).toBe("rate_limit"); + expect( + resolveFailoverReasonFromError({ + status: 402, + message: "Workspace spend limit reached. Contact your admin.", + }), + ).toBe("rate_limit"); + expect( + resolveFailoverReasonFromError({ + status: 402, + message: "Organization limit exceeded for this billing period.", + }), + ).toBe("rate_limit"); + }); + + it("keeps 402 with explicit billing signals as billing even with limit language", () => { + expect( + resolveFailoverReasonFromError({ + status: 402, + message: "Your credit balance is too low. Monthly limit exceeded.", + }), + ).toBe("billing"); + expect( + resolveFailoverReasonFromError({ + status: 402, + message: "Insufficient credits. Spend limit reached.", + }), + ).toBe("billing"); + }); + + it("keeps 402 without message body as billing", () => { + expect(resolveFailoverReasonFromError({ status: 402 })).toBe("billing"); + expect(resolveFailoverReasonFromError({ status: 402, message: undefined })).toBe("billing"); + }); + it("infers format errors from error messages", () => { expect( resolveFailoverReasonFromError({ diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts index bcb66628d66..480b8c31d38 100644 --- a/src/agents/model-fallback.probe.test.ts +++ b/src/agents/model-fallback.probe.test.ts @@ -345,4 +345,105 @@ describe("runWithModelFallback – probe logic", () => { allowTransientCooldownProbe: true, }); }); + + it("probes billing-cooldowned primary when no fallback candidates exist", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "openai/gpt-4.1-mini", + fallbacks: [], + }, + }, + }, + } as Partial); + + // Billing cooldown far from expiry — would normally be skipped + const expiresIn30Min = NOW + 30 * 60 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min); + mockedResolveProfilesUnavailableReason.mockReturnValue("billing"); + + const run = vi.fn().mockResolvedValue("billing-recovered"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + fallbacksOverride: [], + run, + }); + + expect(result.result).toBe("billing-recovered"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", { + allowTransientCooldownProbe: true, + }); + }); + + it("throttles billing probe for single-candidate at 30s intervals", async () => { + const cfg = makeCfg({ + agents: { + defaults: { + model: { + primary: "openai/gpt-4.1-mini", + fallbacks: [], + }, + }, + }, + } as Partial); + + mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 30 * 60 * 1000); + mockedResolveProfilesUnavailableReason.mockReturnValue("billing"); + + // Simulate a recent probe 10s ago + _probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 10_000); + + const run = vi.fn().mockResolvedValue("unreachable"); + + await expect( + runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + fallbacksOverride: [], + run, + }), + ).rejects.toThrow("All models failed"); + + expect(run).not.toHaveBeenCalled(); + }); + + it("probes billing-cooldowned primary with fallbacks when near cooldown expiry", async () => { + const cfg = makeCfg(); + // Cooldown expires in 1 minute — within 2-min probe margin + const expiresIn1Min = NOW + 60 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min); + mockedResolveProfilesUnavailableReason.mockReturnValue("billing"); + + const run = vi.fn().mockResolvedValue("billing-probe-ok"); + + const result = await runPrimaryCandidate(cfg, run); + + expect(result.result).toBe("billing-probe-ok"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", { + allowTransientCooldownProbe: true, + }); + }); + + it("skips billing-cooldowned primary with fallbacks when far from cooldown expiry", async () => { + const cfg = makeCfg(); + const expiresIn30Min = NOW + 30 * 60 * 1000; + mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min); + mockedResolveProfilesUnavailableReason.mockReturnValue("billing"); + + const run = vi.fn().mockResolvedValue("ok"); + + const result = await runPrimaryCandidate(cfg, run); + + expect(result.result).toBe("ok"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5"); + expect(result.attempts[0]?.reason).toBe("billing"); + }); }); diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 517c4448a27..8a6685c120c 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -415,11 +415,30 @@ function resolveCooldownDecision(params: { profileIds: params.profileIds, now: params.now, }) ?? "rate_limit"; - const isPersistentIssue = - inferredReason === "auth" || - inferredReason === "auth_permanent" || - inferredReason === "billing"; - if (isPersistentIssue) { + const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent"; + if (isPersistentAuthIssue) { + return { + type: "skip", + reason: inferredReason, + error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`, + }; + } + + // Billing is semi-persistent: the user may fix their balance, or a transient + // 402 might have been misclassified. Without fallback candidates, skipping is + // guaranteed failure so we attempt (throttled). With fallbacks, probe the + // primary when the standard probe schedule allows. + if (inferredReason === "billing") { + if (params.isPrimary) { + if (!params.hasFallbackCandidates) { + const lastProbe = lastProbeAttempt.get(params.probeThrottleKey) ?? 0; + if (params.now - lastProbe >= MIN_PROBE_INTERVAL_MS) { + return { type: "attempt", reason: inferredReason, markProbe: true }; + } + } else if (shouldProbe) { + return { type: "attempt", reason: inferredReason, markProbe: true }; + } + } return { type: "skip", reason: inferredReason, @@ -514,7 +533,11 @@ export async function runWithModelFallback(params: { if (decision.markProbe) { lastProbeAttempt.set(probeThrottleKey, now); } - if (decision.reason === "rate_limit" || decision.reason === "overloaded") { + if ( + decision.reason === "rate_limit" || + decision.reason === "overloaded" || + decision.reason === "billing" + ) { runOptions = { allowTransientCooldownProbe: true }; } } diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 4919bc607c0..1aaa92b5f54 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from "vitest"; import { classifyFailoverReason, + classifyFailoverReasonFromHttpStatus, isAuthErrorMessage, isAuthPermanentErrorMessage, isBillingErrorMessage, @@ -505,6 +506,56 @@ describe("image dimension errors", () => { }); }); +describe("classifyFailoverReasonFromHttpStatus – 402 temporary limits", () => { + it("reclassifies 402 with periodic usage limit as rate_limit", () => { + expect(classifyFailoverReasonFromHttpStatus(402, "Monthly spend limit reached.")).toBe( + "rate_limit", + ); + expect(classifyFailoverReasonFromHttpStatus(402, "Weekly usage limit exhausted.")).toBe( + "rate_limit", + ); + expect(classifyFailoverReasonFromHttpStatus(402, "Daily limit reached, resets tomorrow.")).toBe( + "rate_limit", + ); + }); + + it("reclassifies 402 with organization/workspace limit as rate_limit", () => { + expect(classifyFailoverReasonFromHttpStatus(402, "Organization spending limit exceeded.")).toBe( + "rate_limit", + ); + expect(classifyFailoverReasonFromHttpStatus(402, "Workspace spend limit reached.")).toBe( + "rate_limit", + ); + expect( + classifyFailoverReasonFromHttpStatus( + 402, + "Organization limit exceeded for this billing period.", + ), + ).toBe("rate_limit"); + }); + + it("keeps 402 as billing when explicit billing signals are present", () => { + expect( + classifyFailoverReasonFromHttpStatus( + 402, + "Your credit balance is too low. Monthly limit exceeded.", + ), + ).toBe("billing"); + expect( + classifyFailoverReasonFromHttpStatus( + 402, + "Insufficient credits. Organization limit reached.", + ), + ).toBe("billing"); + }); + + it("keeps 402 as billing without message or with generic message", () => { + expect(classifyFailoverReasonFromHttpStatus(402, undefined)).toBe("billing"); + expect(classifyFailoverReasonFromHttpStatus(402, "")).toBe("billing"); + expect(classifyFailoverReasonFromHttpStatus(402, "Payment required")).toBe("billing"); + }); +}); + describe("classifyFailoverReason", () => { it("classifies documented provider error messages", () => { expect(classifyFailoverReason(OPENAI_RATE_LIMIT_MESSAGE)).toBe("rate_limit"); diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 5e4fc4c541e..d4d5b0a7be7 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -262,12 +262,12 @@ export function classifyFailoverReasonFromHttpStatus( if (status === 402) { // Some providers (e.g. Anthropic Claude Max plan) surface temporary - // usage/rate-limit failures as HTTP 402. Use a narrow matcher for - // temporary limits to avoid misclassifying billing failures (#30484). + // usage/rate-limit failures as HTTP 402. Detect temporary limits to + // avoid misclassifying them as persistent billing failures (#30484). if (message) { const lower = message.toLowerCase(); - // Temporary usage limit signals: retry language + usage/limit terminology - const hasTemporarySignal = + // Explicit retry language + usage/limit terminology + const hasTemporaryRetrySignal = (lower.includes("try again") || lower.includes("retry") || lower.includes("temporary") || @@ -275,7 +275,23 @@ export function classifyFailoverReasonFromHttpStatus( (lower.includes("usage limit") || lower.includes("rate limit") || lower.includes("organization usage")); - if (hasTemporarySignal) { + if (hasTemporaryRetrySignal) { + return "rate_limit"; + } + // Periodic usage limits (daily/weekly/monthly) are inherently temporary + // and should not trigger persistent billing cooldown, unless the message + // also contains explicit billing signals (e.g. "insufficient credits"). + if (isPeriodicUsageLimitErrorMessage(message) && !isBillingErrorMessage(message)) { + return "rate_limit"; + } + // Spending/organization/workspace limits are typically resettable caps + // set by the organization admin, not permanent credit-balance failures. + const hasSpendOrOrgLimitSignal = + lower.includes("spend limit") || + lower.includes("spending limit") || + ((lower.includes("organization") || lower.includes("workspace")) && + (lower.includes("limit") || lower.includes("exceeded"))); + if (hasSpendOrOrgLimitSignal && !isBillingErrorMessage(message)) { return "rate_limit"; } } diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 80ef934d63e..c763fbd2a94 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -668,7 +668,9 @@ export async function runEmbeddedPiAgent( const allowTransientCooldownProbe = params.allowTransientCooldownProbe === true && allAutoProfilesInCooldown && - (unavailableReason === "rate_limit" || unavailableReason === "overloaded"); + (unavailableReason === "rate_limit" || + unavailableReason === "overloaded" || + unavailableReason === "billing"); let didTransientCooldownProbe = false; while (profileIndex < profileCandidates.length) {