mirror of
https://github.com/moltbot/moltbot.git
synced 2026-03-08 06:54:24 +00:00
fix(agents): broaden 402 temporary-limit detection and allow billing cooldown probes
- Broaden classifyFailoverReasonFromHttpStatus to treat periodic usage limits and org/workspace spend limits as rate_limit instead of billing - Treat billing as semi-persistent in model-fallback: allow probes when no fallbacks exist (30s throttle) or when fallbacks exist (near expiry) - Add tests for new 402 classification and billing probe behavior
This commit is contained in:
@@ -182,6 +182,68 @@ describe("failover-error", () => {
|
||||
).toBe("billing");
|
||||
});
|
||||
|
||||
it("treats 402 with periodic usage limit as rate_limit", () => {
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 402,
|
||||
message: "Monthly spend limit reached. Please visit your billing settings.",
|
||||
}),
|
||||
).toBe("rate_limit");
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 402,
|
||||
message: "Weekly usage limit exhausted for this plan.",
|
||||
}),
|
||||
).toBe("rate_limit");
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 402,
|
||||
message: "Daily limit reached. Your limit will reset tomorrow.",
|
||||
}),
|
||||
).toBe("rate_limit");
|
||||
});
|
||||
|
||||
it("treats 402 with organization/workspace limit as rate_limit", () => {
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 402,
|
||||
message: "Organization spending limit exceeded.",
|
||||
}),
|
||||
).toBe("rate_limit");
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 402,
|
||||
message: "Workspace spend limit reached. Contact your admin.",
|
||||
}),
|
||||
).toBe("rate_limit");
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 402,
|
||||
message: "Organization limit exceeded for this billing period.",
|
||||
}),
|
||||
).toBe("rate_limit");
|
||||
});
|
||||
|
||||
it("keeps 402 with explicit billing signals as billing even with limit language", () => {
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 402,
|
||||
message: "Your credit balance is too low. Monthly limit exceeded.",
|
||||
}),
|
||||
).toBe("billing");
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
status: 402,
|
||||
message: "Insufficient credits. Spend limit reached.",
|
||||
}),
|
||||
).toBe("billing");
|
||||
});
|
||||
|
||||
it("keeps 402 without message body as billing", () => {
|
||||
expect(resolveFailoverReasonFromError({ status: 402 })).toBe("billing");
|
||||
expect(resolveFailoverReasonFromError({ status: 402, message: undefined })).toBe("billing");
|
||||
});
|
||||
|
||||
it("infers format errors from error messages", () => {
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
|
||||
@@ -345,4 +345,105 @@ describe("runWithModelFallback – probe logic", () => {
|
||||
allowTransientCooldownProbe: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("probes billing-cooldowned primary when no fallback candidates exist", async () => {
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "openai/gpt-4.1-mini",
|
||||
fallbacks: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as Partial<OpenClawConfig>);
|
||||
|
||||
// Billing cooldown far from expiry — would normally be skipped
|
||||
const expiresIn30Min = NOW + 30 * 60 * 1000;
|
||||
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
|
||||
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
|
||||
|
||||
const run = vi.fn().mockResolvedValue("billing-recovered");
|
||||
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
fallbacksOverride: [],
|
||||
run,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("billing-recovered");
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
|
||||
allowTransientCooldownProbe: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("throttles billing probe for single-candidate at 30s intervals", async () => {
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "openai/gpt-4.1-mini",
|
||||
fallbacks: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as Partial<OpenClawConfig>);
|
||||
|
||||
mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 30 * 60 * 1000);
|
||||
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
|
||||
|
||||
// Simulate a recent probe 10s ago
|
||||
_probeThrottleInternals.lastProbeAttempt.set("openai", NOW - 10_000);
|
||||
|
||||
const run = vi.fn().mockResolvedValue("unreachable");
|
||||
|
||||
await expect(
|
||||
runWithModelFallback({
|
||||
cfg,
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
fallbacksOverride: [],
|
||||
run,
|
||||
}),
|
||||
).rejects.toThrow("All models failed");
|
||||
|
||||
expect(run).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("probes billing-cooldowned primary with fallbacks when near cooldown expiry", async () => {
|
||||
const cfg = makeCfg();
|
||||
// Cooldown expires in 1 minute — within 2-min probe margin
|
||||
const expiresIn1Min = NOW + 60 * 1000;
|
||||
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn1Min);
|
||||
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
|
||||
|
||||
const run = vi.fn().mockResolvedValue("billing-probe-ok");
|
||||
|
||||
const result = await runPrimaryCandidate(cfg, run);
|
||||
|
||||
expect(result.result).toBe("billing-probe-ok");
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
|
||||
allowTransientCooldownProbe: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("skips billing-cooldowned primary with fallbacks when far from cooldown expiry", async () => {
|
||||
const cfg = makeCfg();
|
||||
const expiresIn30Min = NOW + 30 * 60 * 1000;
|
||||
mockedGetSoonestCooldownExpiry.mockReturnValue(expiresIn30Min);
|
||||
mockedResolveProfilesUnavailableReason.mockReturnValue("billing");
|
||||
|
||||
const run = vi.fn().mockResolvedValue("ok");
|
||||
|
||||
const result = await runPrimaryCandidate(cfg, run);
|
||||
|
||||
expect(result.result).toBe("ok");
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
expect(run).toHaveBeenCalledWith("anthropic", "claude-haiku-3-5");
|
||||
expect(result.attempts[0]?.reason).toBe("billing");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -415,11 +415,30 @@ function resolveCooldownDecision(params: {
|
||||
profileIds: params.profileIds,
|
||||
now: params.now,
|
||||
}) ?? "rate_limit";
|
||||
const isPersistentIssue =
|
||||
inferredReason === "auth" ||
|
||||
inferredReason === "auth_permanent" ||
|
||||
inferredReason === "billing";
|
||||
if (isPersistentIssue) {
|
||||
const isPersistentAuthIssue = inferredReason === "auth" || inferredReason === "auth_permanent";
|
||||
if (isPersistentAuthIssue) {
|
||||
return {
|
||||
type: "skip",
|
||||
reason: inferredReason,
|
||||
error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`,
|
||||
};
|
||||
}
|
||||
|
||||
// Billing is semi-persistent: the user may fix their balance, or a transient
|
||||
// 402 might have been misclassified. Without fallback candidates, skipping is
|
||||
// guaranteed failure so we attempt (throttled). With fallbacks, probe the
|
||||
// primary when the standard probe schedule allows.
|
||||
if (inferredReason === "billing") {
|
||||
if (params.isPrimary) {
|
||||
if (!params.hasFallbackCandidates) {
|
||||
const lastProbe = lastProbeAttempt.get(params.probeThrottleKey) ?? 0;
|
||||
if (params.now - lastProbe >= MIN_PROBE_INTERVAL_MS) {
|
||||
return { type: "attempt", reason: inferredReason, markProbe: true };
|
||||
}
|
||||
} else if (shouldProbe) {
|
||||
return { type: "attempt", reason: inferredReason, markProbe: true };
|
||||
}
|
||||
}
|
||||
return {
|
||||
type: "skip",
|
||||
reason: inferredReason,
|
||||
@@ -514,7 +533,11 @@ export async function runWithModelFallback<T>(params: {
|
||||
if (decision.markProbe) {
|
||||
lastProbeAttempt.set(probeThrottleKey, now);
|
||||
}
|
||||
if (decision.reason === "rate_limit" || decision.reason === "overloaded") {
|
||||
if (
|
||||
decision.reason === "rate_limit" ||
|
||||
decision.reason === "overloaded" ||
|
||||
decision.reason === "billing"
|
||||
) {
|
||||
runOptions = { allowTransientCooldownProbe: true };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
classifyFailoverReason,
|
||||
classifyFailoverReasonFromHttpStatus,
|
||||
isAuthErrorMessage,
|
||||
isAuthPermanentErrorMessage,
|
||||
isBillingErrorMessage,
|
||||
@@ -505,6 +506,56 @@ describe("image dimension errors", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("classifyFailoverReasonFromHttpStatus – 402 temporary limits", () => {
|
||||
it("reclassifies 402 with periodic usage limit as rate_limit", () => {
|
||||
expect(classifyFailoverReasonFromHttpStatus(402, "Monthly spend limit reached.")).toBe(
|
||||
"rate_limit",
|
||||
);
|
||||
expect(classifyFailoverReasonFromHttpStatus(402, "Weekly usage limit exhausted.")).toBe(
|
||||
"rate_limit",
|
||||
);
|
||||
expect(classifyFailoverReasonFromHttpStatus(402, "Daily limit reached, resets tomorrow.")).toBe(
|
||||
"rate_limit",
|
||||
);
|
||||
});
|
||||
|
||||
it("reclassifies 402 with organization/workspace limit as rate_limit", () => {
|
||||
expect(classifyFailoverReasonFromHttpStatus(402, "Organization spending limit exceeded.")).toBe(
|
||||
"rate_limit",
|
||||
);
|
||||
expect(classifyFailoverReasonFromHttpStatus(402, "Workspace spend limit reached.")).toBe(
|
||||
"rate_limit",
|
||||
);
|
||||
expect(
|
||||
classifyFailoverReasonFromHttpStatus(
|
||||
402,
|
||||
"Organization limit exceeded for this billing period.",
|
||||
),
|
||||
).toBe("rate_limit");
|
||||
});
|
||||
|
||||
it("keeps 402 as billing when explicit billing signals are present", () => {
|
||||
expect(
|
||||
classifyFailoverReasonFromHttpStatus(
|
||||
402,
|
||||
"Your credit balance is too low. Monthly limit exceeded.",
|
||||
),
|
||||
).toBe("billing");
|
||||
expect(
|
||||
classifyFailoverReasonFromHttpStatus(
|
||||
402,
|
||||
"Insufficient credits. Organization limit reached.",
|
||||
),
|
||||
).toBe("billing");
|
||||
});
|
||||
|
||||
it("keeps 402 as billing without message or with generic message", () => {
|
||||
expect(classifyFailoverReasonFromHttpStatus(402, undefined)).toBe("billing");
|
||||
expect(classifyFailoverReasonFromHttpStatus(402, "")).toBe("billing");
|
||||
expect(classifyFailoverReasonFromHttpStatus(402, "Payment required")).toBe("billing");
|
||||
});
|
||||
});
|
||||
|
||||
describe("classifyFailoverReason", () => {
|
||||
it("classifies documented provider error messages", () => {
|
||||
expect(classifyFailoverReason(OPENAI_RATE_LIMIT_MESSAGE)).toBe("rate_limit");
|
||||
|
||||
@@ -262,12 +262,12 @@ export function classifyFailoverReasonFromHttpStatus(
|
||||
|
||||
if (status === 402) {
|
||||
// Some providers (e.g. Anthropic Claude Max plan) surface temporary
|
||||
// usage/rate-limit failures as HTTP 402. Use a narrow matcher for
|
||||
// temporary limits to avoid misclassifying billing failures (#30484).
|
||||
// usage/rate-limit failures as HTTP 402. Detect temporary limits to
|
||||
// avoid misclassifying them as persistent billing failures (#30484).
|
||||
if (message) {
|
||||
const lower = message.toLowerCase();
|
||||
// Temporary usage limit signals: retry language + usage/limit terminology
|
||||
const hasTemporarySignal =
|
||||
// Explicit retry language + usage/limit terminology
|
||||
const hasTemporaryRetrySignal =
|
||||
(lower.includes("try again") ||
|
||||
lower.includes("retry") ||
|
||||
lower.includes("temporary") ||
|
||||
@@ -275,7 +275,23 @@ export function classifyFailoverReasonFromHttpStatus(
|
||||
(lower.includes("usage limit") ||
|
||||
lower.includes("rate limit") ||
|
||||
lower.includes("organization usage"));
|
||||
if (hasTemporarySignal) {
|
||||
if (hasTemporaryRetrySignal) {
|
||||
return "rate_limit";
|
||||
}
|
||||
// Periodic usage limits (daily/weekly/monthly) are inherently temporary
|
||||
// and should not trigger persistent billing cooldown, unless the message
|
||||
// also contains explicit billing signals (e.g. "insufficient credits").
|
||||
if (isPeriodicUsageLimitErrorMessage(message) && !isBillingErrorMessage(message)) {
|
||||
return "rate_limit";
|
||||
}
|
||||
// Spending/organization/workspace limits are typically resettable caps
|
||||
// set by the organization admin, not permanent credit-balance failures.
|
||||
const hasSpendOrOrgLimitSignal =
|
||||
lower.includes("spend limit") ||
|
||||
lower.includes("spending limit") ||
|
||||
((lower.includes("organization") || lower.includes("workspace")) &&
|
||||
(lower.includes("limit") || lower.includes("exceeded")));
|
||||
if (hasSpendOrOrgLimitSignal && !isBillingErrorMessage(message)) {
|
||||
return "rate_limit";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -668,7 +668,9 @@ export async function runEmbeddedPiAgent(
|
||||
const allowTransientCooldownProbe =
|
||||
params.allowTransientCooldownProbe === true &&
|
||||
allAutoProfilesInCooldown &&
|
||||
(unavailableReason === "rate_limit" || unavailableReason === "overloaded");
|
||||
(unavailableReason === "rate_limit" ||
|
||||
unavailableReason === "overloaded" ||
|
||||
unavailableReason === "billing");
|
||||
let didTransientCooldownProbe = false;
|
||||
|
||||
while (profileIndex < profileCandidates.length) {
|
||||
|
||||
Reference in New Issue
Block a user