From 227bf314bca925538cef61213bf6dba6579a2669 Mon Sep 17 00:00:00 2001 From: Altay Date: Sat, 7 Mar 2026 23:06:25 +0300 Subject: [PATCH] agents: tighten 402 billing guard --- CHANGELOG.md | 1 + src/agents/failover-error.test.ts | 45 +--------- ...dded-helpers.isbillingerrormessage.test.ts | 46 +++++------ src/agents/pi-embedded-helpers/errors.ts | 82 +++++++++++-------- 4 files changed, 75 insertions(+), 99 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54eb59255e7..f24f7ace12c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -290,6 +290,7 @@ Docs: https://docs.openclaw.ai - Heartbeat/requests-in-flight scheduling: stop advancing `nextDueMs` and avoid immediate `scheduleNext()` timer overrides on requests-in-flight skips, so wake-layer retry cooldowns are honored and heartbeat cadence no longer drifts under sustained contention. (#39182) Thanks @MumuTW. - Memory/SQLite contention resilience: re-apply `PRAGMA busy_timeout` on every sync-store and QMD connection open so process restarts/reopens no longer revert to immediate `SQLITE_BUSY` failures under lock contention. (#39183) Thanks @MumuTW. - Gateway/webchat route safety: block webchat/control-ui clients from inheriting stored external delivery routes on channel-scoped sessions (while preserving route inheritance for UI/TUI clients), preventing cross-channel leakage from scoped chats. (#39175) Thanks @widingmarcus-cyber. +- Agents/failover 402 recovery: keep temporary spend-limit `402` payloads retryable, preserve explicit insufficient-credit billing detection even in long provider payloads, and allow throttled billing-cooldown probes so single-provider setups can recover instead of staying locked out. (#38533) Thanks @xialonglee. ## 2026.3.2 diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts index a78e806759d..4865dbfb560 100644 --- a/src/agents/failover-error.test.ts +++ b/src/agents/failover-error.test.ts @@ -182,34 +182,13 @@ describe("failover-error", () => { ).toBe("billing"); }); - it("treats 402 with periodic usage limit as rate_limit", () => { + it("keeps temporary 402 spend limits retryable without downgrading explicit billing", () => { expect( resolveFailoverReasonFromError({ status: 402, message: "Monthly spend limit reached. Please visit your billing settings.", }), ).toBe("rate_limit"); - expect( - resolveFailoverReasonFromError({ - status: 402, - message: "Weekly usage limit exhausted for this plan.", - }), - ).toBe("rate_limit"); - expect( - resolveFailoverReasonFromError({ - status: 402, - message: "Daily limit reached. Your limit will reset tomorrow.", - }), - ).toBe("rate_limit"); - }); - - it("treats 402 with organization/workspace limit as rate_limit", () => { - expect( - resolveFailoverReasonFromError({ - status: 402, - message: "Organization spending limit exceeded.", - }), - ).toBe("rate_limit"); expect( resolveFailoverReasonFromError({ status: 402, @@ -219,29 +198,9 @@ describe("failover-error", () => { expect( resolveFailoverReasonFromError({ status: 402, - message: "Organization limit exceeded for this billing period.", - }), - ).toBe("rate_limit"); - }); - - it("keeps 402 with explicit billing signals as billing even with limit language", () => { - expect( - resolveFailoverReasonFromError({ - status: 402, - message: "Your credit balance is too low. Monthly limit exceeded.", + message: `${"x".repeat(520)} insufficient credits. Monthly spend limit reached.`, }), ).toBe("billing"); - expect( - resolveFailoverReasonFromError({ - status: 402, - message: "Insufficient credits. Spend limit reached.", - }), - ).toBe("billing"); - }); - - it("keeps 402 without message body as billing", () => { - expect(resolveFailoverReasonFromError({ status: 402 })).toBe("billing"); - expect(resolveFailoverReasonFromError({ status: 402, message: undefined })).toBe("billing"); }); it("infers format errors from error messages", () => { diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 1aaa92b5f54..e6d5a823c18 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -507,31 +507,26 @@ describe("image dimension errors", () => { }); describe("classifyFailoverReasonFromHttpStatus – 402 temporary limits", () => { - it("reclassifies 402 with periodic usage limit as rate_limit", () => { - expect(classifyFailoverReasonFromHttpStatus(402, "Monthly spend limit reached.")).toBe( - "rate_limit", - ); - expect(classifyFailoverReasonFromHttpStatus(402, "Weekly usage limit exhausted.")).toBe( - "rate_limit", - ); - expect(classifyFailoverReasonFromHttpStatus(402, "Daily limit reached, resets tomorrow.")).toBe( - "rate_limit", - ); + it("reclassifies periodic usage limits as rate_limit", () => { + const samples = [ + "Monthly spend limit reached.", + "Weekly usage limit exhausted.", + "Daily limit reached, resets tomorrow.", + ]; + for (const sample of samples) { + expect(classifyFailoverReasonFromHttpStatus(402, sample)).toBe("rate_limit"); + } }); - it("reclassifies 402 with organization/workspace limit as rate_limit", () => { - expect(classifyFailoverReasonFromHttpStatus(402, "Organization spending limit exceeded.")).toBe( - "rate_limit", - ); - expect(classifyFailoverReasonFromHttpStatus(402, "Workspace spend limit reached.")).toBe( - "rate_limit", - ); - expect( - classifyFailoverReasonFromHttpStatus( - 402, - "Organization limit exceeded for this billing period.", - ), - ).toBe("rate_limit"); + it("reclassifies org/workspace spend limits as rate_limit", () => { + const samples = [ + "Organization spending limit exceeded.", + "Workspace spend limit reached.", + "Organization limit exceeded for this billing period.", + ]; + for (const sample of samples) { + expect(classifyFailoverReasonFromHttpStatus(402, sample)).toBe("rate_limit"); + } }); it("keeps 402 as billing when explicit billing signals are present", () => { @@ -549,6 +544,11 @@ describe("classifyFailoverReasonFromHttpStatus – 402 temporary limits", () => ).toBe("billing"); }); + it("keeps long 402 payloads with explicit billing text as billing", () => { + const longBillingPayload = `${"x".repeat(520)} insufficient credits. Monthly spend limit reached.`; + expect(classifyFailoverReasonFromHttpStatus(402, longBillingPayload)).toBe("billing"); + }); + it("keeps 402 as billing without message or with generic message", () => { expect(classifyFailoverReasonFromHttpStatus(402, undefined)).toBe("billing"); expect(classifyFailoverReasonFromHttpStatus(402, "")).toBe("billing"); diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index d4d5b0a7be7..c1a22fc189b 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -208,6 +208,51 @@ const HTTP_ERROR_HINTS = [ "permission", ]; +function hasExplicitBillingSignalIn402Message(raw: string): boolean { + const lower = raw.toLowerCase(); + return ( + lower.includes("payment required") || + lower.includes("insufficient credits") || + lower.includes("insufficient quota") || + lower.includes("credit balance") || + lower.includes("insufficient balance") || + lower.includes("plans & billing") || + lower.includes("add more credits") || + lower.includes("top up") + ); +} + +function isTemporary402LimitMessage(raw: string): boolean { + if (hasExplicitBillingSignalIn402Message(raw)) { + return false; + } + + const lower = raw.toLowerCase(); + const hasTemporaryRetrySignal = + (lower.includes("try again") || + lower.includes("retry") || + lower.includes("temporary") || + lower.includes("cooldown")) && + (lower.includes("usage limit") || + lower.includes("rate limit") || + lower.includes("organization usage")); + if (hasTemporaryRetrySignal) { + return true; + } + + if (isPeriodicUsageLimitErrorMessage(raw)) { + return true; + } + + return ( + lower.includes("spend limit") || + lower.includes("spending limit") || + lower.includes("organization usage") || + ((lower.includes("organization") || lower.includes("workspace")) && + (lower.includes("limit") || lower.includes("exceeded"))) + ); +} + function extractLeadingHttpStatus(raw: string): { code: number; rest: string } | null { const match = raw.match(HTTP_STATUS_CODE_PREFIX_RE); if (!match) { @@ -261,39 +306,10 @@ export function classifyFailoverReasonFromHttpStatus( } if (status === 402) { - // Some providers (e.g. Anthropic Claude Max plan) surface temporary - // usage/rate-limit failures as HTTP 402. Detect temporary limits to - // avoid misclassifying them as persistent billing failures (#30484). - if (message) { - const lower = message.toLowerCase(); - // Explicit retry language + usage/limit terminology - const hasTemporaryRetrySignal = - (lower.includes("try again") || - lower.includes("retry") || - lower.includes("temporary") || - lower.includes("cooldown")) && - (lower.includes("usage limit") || - lower.includes("rate limit") || - lower.includes("organization usage")); - if (hasTemporaryRetrySignal) { - return "rate_limit"; - } - // Periodic usage limits (daily/weekly/monthly) are inherently temporary - // and should not trigger persistent billing cooldown, unless the message - // also contains explicit billing signals (e.g. "insufficient credits"). - if (isPeriodicUsageLimitErrorMessage(message) && !isBillingErrorMessage(message)) { - return "rate_limit"; - } - // Spending/organization/workspace limits are typically resettable caps - // set by the organization admin, not permanent credit-balance failures. - const hasSpendOrOrgLimitSignal = - lower.includes("spend limit") || - lower.includes("spending limit") || - ((lower.includes("organization") || lower.includes("workspace")) && - (lower.includes("limit") || lower.includes("exceeded"))); - if (hasSpendOrOrgLimitSignal && !isBillingErrorMessage(message)) { - return "rate_limit"; - } + // Some providers surface temporary usage caps as HTTP 402. Keep those + // retryable, but let explicit insufficient-credit signals stay billing. + if (message && isTemporary402LimitMessage(message)) { + return "rate_limit"; } return "billing"; }