From 353b3dfddbcfdd931ce4df361f634220ed09176e Mon Sep 17 00:00:00 2001 From: Gustavo Madeira Santana Date: Wed, 25 Feb 2026 20:03:41 -0500 Subject: [PATCH] fix(agents): correct cooldown fallback eligibility --- CHANGELOG.md | 1 + src/agents/model-fallback.probe.test.ts | 30 ++++++------ src/agents/model-fallback.test.ts | 63 ++++++++++++------------- src/agents/model-fallback.ts | 46 +++++++----------- 4 files changed, 62 insertions(+), 78 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0df4711abaf..b4b4a5b4064 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai - Security/Browser uploads: revalidate upload paths at use-time in Playwright file-chooser and direct-input flows so missing/rebound paths are rejected before `setFiles`, with regression coverage for strict missing-path handling. - Security/LINE: cap unsigned webhook body reads before auth/signature handling to bound unauthenticated body processing. (#26095) Thanks @bmendonca3. - Agents/Model fallback: keep explicit text + image fallback chains reachable even when `agents.defaults.models` allowlists are present, prefer explicit run `agentId` over session-key parsing for followup fallback override resolution (with session-key fallback), treat agent-level fallback overrides as configured in embedded runner preflight, and classify `model_cooldown` / `cooling down` errors as `rate_limit` so failover continues. (#11972, #24137, #17231) +- Agents/Model fallback: keep same-provider fallback chains active when session model differs from configured primary, infer cooldown reason from provider profile state (instead of `disabledReason` only), keep no-profile fallback providers eligible (env/models.json paths), and only relax same-provider cooldown fallback attempts for `rate_limit`. (#23816) thanks @ramezgaberiel. - Followups/Routing: when explicit origin routing fails, allow same-channel fallback dispatch (while still blocking cross-channel fallback) so followup replies do not get dropped on transient origin-adapter failures. (#26109) Thanks @Sid-Qin. - Agents/Model fallback: continue fallback traversal on unrecognized errors when candidates remain, while still throwing the original unknown error on the last candidate. (#26106) Thanks @Sid-Qin. - Telegram/Markdown spoilers: keep valid `||spoiler||` pairs while leaving unmatched trailing `||` delimiters as literal text, avoiding false all-or-nothing spoiler suppression. (#26105) Thanks @Sid-Qin. diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts index 0c222ec2115..3e36366c4ad 100644 --- a/src/agents/model-fallback.probe.test.ts +++ b/src/agents/model-fallback.probe.test.ts @@ -163,7 +163,7 @@ describe("runWithModelFallback – probe logic", () => { expectPrimaryProbeSuccess(result, run, "recovered"); }); - it("does NOT probe non-primary candidates during cooldown", async () => { + it("attempts non-primary fallbacks during rate-limit cooldown after primary probe failure", async () => { const cfg = makeCfg({ agents: { defaults: { @@ -182,25 +182,23 @@ describe("runWithModelFallback – probe logic", () => { const almostExpired = NOW + 30 * 1000; // 30s remaining mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired); - // Primary probe fails with 429 + // Primary probe fails with 429; fallback should still be attempted for rate_limit cooldowns. const run = vi .fn() .mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 })) - .mockResolvedValue("should-not-reach"); + .mockResolvedValue("fallback-ok"); - try { - await runWithModelFallback({ - cfg, - provider: "openai", - model: "gpt-4.1-mini", - run, - }); - expect.unreachable("should have thrown since all candidates exhausted"); - } catch { - // Primary was probed (i === 0 + within margin), non-primary were skipped - expect(run).toHaveBeenCalledTimes(1); // only primary was actually called - expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini"); - } + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + expect(result.result).toBe("fallback-ok"); + expect(run).toHaveBeenCalledTimes(2); + expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini"); + expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5"); }); it("throttles probe when called within 30s interval", async () => { diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index ef3c4a5daf4..cbb3148aca4 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -7,7 +7,6 @@ import type { OpenClawConfig } from "../config/config.js"; import type { AuthProfileStore } from "./auth-profiles.js"; import { saveAuthProfileStore } from "./auth-profiles.js"; import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js"; -import type { AuthProfileFailureReason } from "./auth-profiles/types.js"; import { isAnthropicBillingError } from "./live-auth-keys.js"; import { runWithImageModelFallback, runWithModelFallback } from "./model-fallback.js"; import { makeModelFallbackCfg } from "./test-helpers/model-fallback-config-fixture.js"; @@ -943,14 +942,15 @@ describe("runWithModelFallback", () => { [`${provider}:default`]: reason === "rate_limit" ? { - // Rate limits use cooldownUntil + // Real rate-limit cooldowns are tracked through cooldownUntil + // and failureCounts, not disabledReason. cooldownUntil: now + 300000, - disabledReason: reason as AuthProfileFailureReason, + failureCounts: { rate_limit: 1 }, } : { // Auth/billing issues use disabledUntil disabledUntil: now + 300000, - disabledReason: reason as AuthProfileFailureReason, + disabledReason: reason, }, }, }; @@ -986,7 +986,7 @@ describe("runWithModelFallback", () => { expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5"); }); - it("does NOT attempt fallbacks during auth cooldown", async () => { + it("skips same-provider models on auth cooldown but still tries no-profile fallback providers", async () => { const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth"); const cfg = makeCfg({ agents: { @@ -999,23 +999,22 @@ describe("runWithModelFallback", () => { }, }); - const run = vi.fn().mockResolvedValueOnce("should not be called"); + const run = vi.fn().mockResolvedValueOnce("groq success"); - await expect( - runWithModelFallback({ - cfg, - provider: "anthropic", - model: "claude-opus-4-6", - run, - agentDir: dir, - }), - ).rejects.toThrow("All models failed"); + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: dir, + }); - // Auth cooldown should skip both primary and same-provider fallbacks - expect(run).toHaveBeenCalledTimes(0); // No attempts made + expect(result.result).toBe("groq success"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile"); }); - it("does NOT attempt fallbacks during billing cooldown", async () => { + it("skips same-provider models on billing cooldown but still tries no-profile fallback providers", async () => { const { dir } = await makeAuthStoreWithCooldown("anthropic", "billing"); const cfg = makeCfg({ agents: { @@ -1028,20 +1027,19 @@ describe("runWithModelFallback", () => { }, }); - const run = vi.fn().mockResolvedValueOnce("should not be called"); + const run = vi.fn().mockResolvedValueOnce("groq success"); - await expect( - runWithModelFallback({ - cfg, - provider: "anthropic", - model: "claude-opus-4-6", - run, - agentDir: dir, - }), - ).rejects.toThrow("All models failed"); + const result = await runWithModelFallback({ + cfg, + provider: "anthropic", + model: "claude-opus-4-6", + run, + agentDir: dir, + }); - // Billing cooldown should skip both primary and same-provider fallbacks - expect(run).toHaveBeenCalledTimes(0); // No attempts made + expect(result.result).toBe("groq success"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile"); }); it("tries cross-provider fallbacks when same provider has rate limit", async () => { @@ -1055,8 +1053,9 @@ describe("runWithModelFallback", () => { }, usageStats: { "anthropic:default": { - cooldownUntil: Date.now() + 300000, // Rate limit uses cooldownUntil - disabledReason: "rate_limit" as AuthProfileFailureReason, + // Rate-limit reason is inferred from failureCounts for cooldown windows. + cooldownUntil: Date.now() + 300000, + failureCounts: { rate_limit: 2 }, }, // Groq not in cooldown }, diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index 24ea6e042b7..9691b5091f0 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -340,24 +340,6 @@ export async function runWithModelFallback(params: { }); const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id)); - if (profileIds.length === 0) { - // Skip no-profile providers when there are persistent auth or billing issues elsewhere. - // This prevents wasting time on unconfigured providers when auth problems exist. - // Note: If Provider A has auth/billing issues, Provider B with no profiles will also be skipped. - const hasAuthOrBillingIssues = Object.values(authStore.usageStats || {}).some( - (stats) => stats?.disabledReason === "auth" || stats?.disabledReason === "billing", - ); - if (hasAuthOrBillingIssues) { - attempts.push({ - provider: candidate.provider, - model: candidate.model, - error: `No auth profiles configured for provider ${candidate.provider}`, - reason: "auth", - }); - continue; - } - } - if (profileIds.length > 0 && !isAnyProfileAvailable) { // All profiles for this provider are in cooldown. const isPrimary = i === 0; @@ -374,29 +356,33 @@ export async function runWithModelFallback(params: { profileIds, }); - const disabledReason = authStore.usageStats?.[profileIds[0]]?.disabledReason; - const isPersistentIssue = disabledReason === "auth" || disabledReason === "billing"; + const inferredReason = + resolveProfilesUnavailableReason({ + store: authStore, + profileIds, + now, + }) ?? "rate_limit"; + const isPersistentIssue = + inferredReason === "auth" || + inferredReason === "auth_permanent" || + inferredReason === "billing"; if (isPersistentIssue) { attempts.push({ provider: candidate.provider, model: candidate.model, - error: `Provider ${candidate.provider} has ${disabledReason} issue (skipping all models)`, - reason: disabledReason, + error: `Provider ${candidate.provider} has ${inferredReason} issue (skipping all models)`, + reason: inferredReason, }); continue; } - // For primary: try when requested model or when probe allows. For fallbacks: attempt only when explicitly rate_limit (model-specific), skip otherwise. + // For primary: try when requested model or when probe allows. + // For same-provider fallbacks: only relax cooldown on rate_limit, which + // is commonly model-scoped and can recover on a sibling model. const shouldAttemptDespiteCooldown = (isPrimary && (!requestedModel || shouldProbe)) || - (!isPrimary && disabledReason === "rate_limit"); + (!isPrimary && inferredReason === "rate_limit"); if (!shouldAttemptDespiteCooldown) { - const inferredReason = - resolveProfilesUnavailableReason({ - store: authStore, - profileIds, - now, - }) ?? "rate_limit"; attempts.push({ provider: candidate.provider, model: candidate.model,