fix(agents): broaden 402 temporary-limit detection and allow billing cooldown probes

- Broaden classifyFailoverReasonFromHttpStatus to treat periodic usage limits
  and org/workspace spend limits as rate_limit instead of billing
- Treat billing as semi-persistent in model-fallback: allow probes when
  no fallbacks exist (30s throttle) or when fallbacks exist (near expiry)
- Add tests for new 402 classification and billing probe behavior
This commit is contained in:
xialonglee
2026-03-07 11:27:56 +08:00
committed by Altay
parent 8ca326caa9
commit f511015c8c
6 changed files with 267 additions and 12 deletions

View File

@@ -182,6 +182,68 @@ describe("failover-error", () => {
).toBe("billing");
});
it("treats 402 with periodic usage limit as rate_limit", () => {
expect(
resolveFailoverReasonFromError({
status: 402,
message: "Monthly spend limit reached. Please visit your billing settings.",
}),
).toBe("rate_limit");
expect(
resolveFailoverReasonFromError({
status: 402,
message: "Weekly usage limit exhausted for this plan.",
}),
).toBe("rate_limit");
expect(
resolveFailoverReasonFromError({
status: 402,
message: "Daily limit reached. Your limit will reset tomorrow.",
}),
).toBe("rate_limit");
});
it("treats 402 with organization/workspace limit as rate_limit", () => {
expect(
resolveFailoverReasonFromError({
status: 402,
message: "Organization spending limit exceeded.",
}),
).toBe("rate_limit");
expect(
resolveFailoverReasonFromError({
status: 402,
message: "Workspace spend limit reached. Contact your admin.",
}),
).toBe("rate_limit");
expect(
resolveFailoverReasonFromError({
status: 402,
message: "Organization limit exceeded for this billing period.",
}),
).toBe("rate_limit");
});
it("keeps 402 with explicit billing signals as billing even with limit language", () => {
expect(
resolveFailoverReasonFromError({
status: 402,
message: "Your credit balance is too low. Monthly limit exceeded.",
}),
).toBe("billing");
expect(
resolveFailoverReasonFromError({
status: 402,
message: "Insufficient credits. Spend limit reached.",
}),
).toBe("billing");
});
it("keeps 402 without message body as billing", () => {
expect(resolveFailoverReasonFromError({ status: 402 })).toBe("billing");
expect(resolveFailoverReasonFromError({ status: 402, message: undefined })).toBe("billing");
});
it("infers format errors from error messages", () => {
expect(
resolveFailoverReasonFromError({

View File

@@ -345,4 +345,105 @@ describe("runWithModelFallback probe logic", () => {
allowTransientCooldownProbe: true,
});
});
it("probes billing-cooldowned primary when no fallback candidates exist", async () => {
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "openai/gpt-4.1-mini",
fallbacks: [],
},
},
},
} as Partial<OpenClawConfig>);
// Billing cooldown far from expiry — would normally be skipped
const expiresIn30Min = NOW + 30 * 60 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
const run = vi.fn().mockResolvedValue("billing-recovered");
const result = await runWithModelFallback({
cfg,
provider: "openai",
model: "gpt-4.1-mini",
fallbacksOverride: [],
run,
});
expect(result.result).toBe("billing-recovered");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
allowTransientCooldownProbe: true,
});
});
it("throttles billing probe for single-candidate at 30s intervals", async () => {
const cfg = makeCfg({
agents: {
defaults: {
model: {
primary: "openai/gpt-4.1-mini",
fallbacks: [],
},
},
},
} as Partial<OpenClawConfig>);
mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 30 * 60 * 1000);
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
// Simulate a recent probe 10s ago
_probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 10_000);
const run = vi.fn().mockResolvedValue("unreachable");
await expect(
runWithModelFallback({
cfg,
provider: "openai",
model: "gpt-4.1-mini",
fallbacksOverride: [],
run,
}),
).rejects.toThrow("All models failed");
expect(run).not.toHaveBeenCalled();
});
it("probes billing-cooldowned primary with fallbacks when near cooldown expiry", async () => {
const cfg = makeCfg();
// Cooldown expires in 1 minute — within 2-min probe margin
const expiresIn1Min = NOW + 60 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min);
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
const run = vi.fn().mockResolvedValue("billing-probe-ok");
const result = await runPrimaryCandidate(cfg, run);
expect(result.result).toBe("billing-probe-ok");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
allowTransientCooldownProbe: true,
});
});
it("skips billing-cooldowned primary with fallbacks when far from cooldown expiry", async () => {
const cfg = makeCfg();
const expiresIn30Min = NOW + 30 * 60 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
const run = vi.fn().mockResolvedValue("ok");
const result = await runPrimaryCandidate(cfg, run);
expect(result.result).toBe("ok");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5");
expect(result.attempts[0]?.reason).toBe("billing");
});
});

View File

@@ -415,11 +415,30 @@ function resolveCooldownDecision(params: {
profileIds: params.profileIds,
now: params.now,
}) ?? "rate_limit";
const isPersistentIssue =
inferredReason === "auth" ||
inferredReason === "auth_permanent" ||
inferredReason === "billing";
if (isPersistentIssue) {
const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent";
if (isPersistentAuthIssue) {
return {
type: "skip",
reason: inferredReason,
error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`,
};
}
// Billing is semi-persistent: the user may fix their balance, or a transient
// 402 might have been misclassified. Without fallback candidates, skipping is
// guaranteed failure so we attempt (throttled). With fallbacks, probe the
// primary when the standard probe schedule allows.
if (inferredReason === "billing") {
if (params.isPrimary) {
if (!params.hasFallbackCandidates) {
const lastProbe = lastProbeAttempt.get(params.probeThrottleKey) ?? 0;
if (params.now - lastProbe >= MIN_PROBE_INTERVAL_MS) {
return { type: "attempt", reason: inferredReason, markProbe: true };
}
} else if (shouldProbe) {
return { type: "attempt", reason: inferredReason, markProbe: true };
}
}
return {
type: "skip",
reason: inferredReason,
@@ -514,7 +533,11 @@ export async function runWithModelFallback<T>(params: {
if (decision.markProbe) {
lastProbeAttempt.set(probeThrottleKey, now);
}
if (decision.reason === "rate_limit" || decision.reason === "overloaded") {
if (
decision.reason === "rate_limit" ||
decision.reason === "overloaded" ||
decision.reason === "billing"
) {
runOptions = { allowTransientCooldownProbe: true };
}
}

View File

@@ -1,6 +1,7 @@
import { describe, expect, it } from "vitest";
import {
classifyFailoverReason,
classifyFailoverReasonFromHttpStatus,
isAuthErrorMessage,
isAuthPermanentErrorMessage,
isBillingErrorMessage,
@@ -505,6 +506,56 @@ describe("image dimension errors", () => {
});
});
describe("classifyFailoverReasonFromHttpStatus 402 temporary limits", () => {
it("reclassifies 402 with periodic usage limit as rate_limit", () => {
expect(classifyFailoverReasonFromHttpStatus(402, "Monthly spend limit reached.")).toBe(
"rate_limit",
);
expect(classifyFailoverReasonFromHttpStatus(402, "Weekly usage limit exhausted.")).toBe(
"rate_limit",
);
expect(classifyFailoverReasonFromHttpStatus(402, "Daily limit reached, resets tomorrow.")).toBe(
"rate_limit",
);
});
it("reclassifies 402 with organization/workspace limit as rate_limit", () => {
expect(classifyFailoverReasonFromHttpStatus(402, "Organization spending limit exceeded.")).toBe(
"rate_limit",
);
expect(classifyFailoverReasonFromHttpStatus(402, "Workspace spend limit reached.")).toBe(
"rate_limit",
);
expect(
classifyFailoverReasonFromHttpStatus(
402,
"Organization limit exceeded for this billing period.",
),
).toBe("rate_limit");
});
it("keeps 402 as billing when explicit billing signals are present", () => {
expect(
classifyFailoverReasonFromHttpStatus(
402,
"Your credit balance is too low. Monthly limit exceeded.",
),
).toBe("billing");
expect(
classifyFailoverReasonFromHttpStatus(
402,
"Insufficient credits. Organization limit reached.",
),
).toBe("billing");
});
it("keeps 402 as billing without message or with generic message", () => {
expect(classifyFailoverReasonFromHttpStatus(402, undefined)).toBe("billing");
expect(classifyFailoverReasonFromHttpStatus(402, "")).toBe("billing");
expect(classifyFailoverReasonFromHttpStatus(402, "Payment required")).toBe("billing");
});
});
describe("classifyFailoverReason", () => {
it("classifies documented provider error messages", () => {
expect(classifyFailoverReason(OPENAI_RATE_LIMIT_MESSAGE)).toBe("rate_limit");

View File

@@ -262,12 +262,12 @@ export function classifyFailoverReasonFromHttpStatus(
if (status === 402) {
// Some providers (e.g. Anthropic Claude Max plan) surface temporary
// usage/rate-limit failures as HTTP 402. Use a narrow matcher for
// temporary limits to avoid misclassifying billing failures (#30484).
// usage/rate-limit failures as HTTP 402. Detect temporary limits to
// avoid misclassifying them as persistent billing failures (#30484).
if (message) {
const lower = message.toLowerCase();
// Temporary usage limit signals: retry language + usage/limit terminology
const hasTemporarySignal =
// Explicit retry language + usage/limit terminology
const hasTemporaryRetrySignal =
(lower.includes("try again") ||
lower.includes("retry") ||
lower.includes("temporary") ||
@@ -275,7 +275,23 @@ export function classifyFailoverReasonFromHttpStatus(
(lower.includes("usage limit") ||
lower.includes("rate limit") ||
lower.includes("organization usage"));
if (hasTemporarySignal) {
if (hasTemporaryRetrySignal) {
return "rate_limit";
}
// Periodic usage limits (daily/weekly/monthly) are inherently temporary
// and should not trigger persistent billing cooldown, unless the message
// also contains explicit billing signals (e.g. "insufficient credits").
if (isPeriodicUsageLimitErrorMessage(message) && !isBillingErrorMessage(message)) {
return "rate_limit";
}
// Spending/organization/workspace limits are typically resettable caps
// set by the organization admin, not permanent credit-balance failures.
const hasSpendOrOrgLimitSignal =
lower.includes("spend limit") ||
lower.includes("spending limit") ||
((lower.includes("organization") || lower.includes("workspace")) &&
(lower.includes("limit") || lower.includes("exceeded")));
if (hasSpendOrOrgLimitSignal && !isBillingErrorMessage(message)) {
return "rate_limit";
}
}

View File

@@ -668,7 +668,9 @@ export async function runEmbeddedPiAgent(
const allowTransientCooldownProbe =
params.allowTransientCooldownProbe === true &&
allAutoProfilesInCooldown &&
(unavailableReason === "rate_limit" || unavailableReason === "overloaded");
(unavailableReason === "rate_limit" ||
unavailableReason === "overloaded" ||
unavailableReason === "billing");
let didTransientCooldownProbe = false;
while (profileIndex < profileCandidates.length) {