From 353b3dfddbcfdd931ce4df361f634220ed09176e Mon Sep 17 00:00:00 2001
From: Gustavo Madeira Santana <gumadeiras@gmail.com>
Date: Wed, 25 Feb 2026 20:03:41 -0500
Subject: [PATCH] fix(agents): correct cooldown fallback eligibility

---
 CHANGELOG.md                            |  1 +
 src/agents/model-fallback.probe.test.ts | 30 ++++++------
 src/agents/model-fallback.test.ts       | 63 ++++++++++++-------------
 src/agents/model-fallback.ts            | 46 +++++++-----------
 4 files changed, 62 insertions(+), 78 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0df4711abaf..b4b4a5b4064 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai
 - Security/Browser uploads: revalidate upload paths at use-time in Playwright file-chooser and direct-input flows so missing/rebound paths are rejected before `setFiles`, with regression coverage for strict missing-path handling.
 - Security/LINE: cap unsigned webhook body reads before auth/signature handling to bound unauthenticated body processing. (#26095) Thanks @bmendonca3.
 - Agents/Model fallback: keep explicit text + image fallback chains reachable even when `agents.defaults.models` allowlists are present, prefer explicit run `agentId` over session-key parsing for followup fallback override resolution (with session-key fallback), treat agent-level fallback overrides as configured in embedded runner preflight, and classify `model_cooldown` / `cooling down` errors as `rate_limit` so failover continues. (#11972, #24137, #17231)
+- Agents/Model fallback: keep same-provider fallback chains active when session model differs from configured primary, infer cooldown reason from provider profile state (instead of `disabledReason` only), keep no-profile fallback providers eligible (env/models.json paths), and only relax same-provider cooldown fallback attempts for `rate_limit`. (#23816) thanks @ramezgaberiel.
 - Followups/Routing: when explicit origin routing fails, allow same-channel fallback dispatch (while still blocking cross-channel fallback) so followup replies do not get dropped on transient origin-adapter failures. (#26109) Thanks @Sid-Qin.
 - Agents/Model fallback: continue fallback traversal on unrecognized errors when candidates remain, while still throwing the original unknown error on the last candidate. (#26106) Thanks @Sid-Qin.
 - Telegram/Markdown spoilers: keep valid `||spoiler||` pairs while leaving unmatched trailing `||` delimiters as literal text, avoiding false all-or-nothing spoiler suppression. (#26105) Thanks @Sid-Qin.
diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts
index 0c222ec2115..3e36366c4ad 100644
--- a/src/agents/model-fallback.probe.test.ts
+++ b/src/agents/model-fallback.probe.test.ts
@@ -163,7 +163,7 @@ describe("runWithModelFallback – probe logic", () => {
     expectPrimaryProbeSuccess(result, run, "recovered");
   });
 
-  it("does NOT probe non-primary candidates during cooldown", async () => {
+  it("attempts non-primary fallbacks during rate-limit cooldown after primary probe failure", async () => {
     const cfg = makeCfg({
       agents: {
         defaults: {
@@ -182,25 +182,23 @@ describe("runWithModelFallback – probe logic", () => {
     const almostExpired = NOW + 30 * 1000; // 30s remaining
     mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
 
-    // Primary probe fails with 429
+    // Primary probe fails with 429; fallback should still be attempted for rate_limit cooldowns.
     const run = vi
       .fn()
       .mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 }))
-      .mockResolvedValue("should-not-reach");
+      .mockResolvedValue("fallback-ok");
 
-    try {
-      await runWithModelFallback({
-        cfg,
-        provider: "openai",
-        model: "gpt-4.1-mini",
-        run,
-      });
-      expect.unreachable("should have thrown since all candidates exhausted");
-    } catch {
-      // Primary was probed (i === 0 + within margin), non-primary were skipped
-      expect(run).toHaveBeenCalledTimes(1); // only primary was actually called
-      expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
-    }
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      run,
+    });
+
+    expect(result.result).toBe("fallback-ok");
+    expect(run).toHaveBeenCalledTimes(2);
+    expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini");
+    expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5");
   });
 
   it("throttles probe when called within 30s interval", async () => {
diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts
index ef3c4a5daf4..cbb3148aca4 100644
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -7,7 +7,6 @@ import type { OpenClawConfig } from "../config/config.js";
 import type { AuthProfileStore } from "./auth-profiles.js";
 import { saveAuthProfileStore } from "./auth-profiles.js";
 import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js";
-import type { AuthProfileFailureReason } from "./auth-profiles/types.js";
 import { isAnthropicBillingError } from "./live-auth-keys.js";
 import { runWithImageModelFallback, runWithModelFallback } from "./model-fallback.js";
 import { makeModelFallbackCfg } from "./test-helpers/model-fallback-config-fixture.js";
@@ -943,14 +942,15 @@ describe("runWithModelFallback", () => {
           [`${provider}:default`]:
             reason === "rate_limit"
               ? {
-                  // Rate limits use cooldownUntil
+                  // Real rate-limit cooldowns are tracked through cooldownUntil
+                  // and failureCounts, not disabledReason.
                   cooldownUntil: now + 300000,
-                  disabledReason: reason as AuthProfileFailureReason,
+                  failureCounts: { rate_limit: 1 },
                 }
               : {
                   // Auth/billing issues use disabledUntil
                   disabledUntil: now + 300000,
-                  disabledReason: reason as AuthProfileFailureReason,
+                  disabledReason: reason,
                 },
         },
       };
@@ -986,7 +986,7 @@ describe("runWithModelFallback", () => {
       expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5");
     });
 
-    it("does NOT attempt fallbacks during auth cooldown", async () => {
+    it("skips same-provider models on auth cooldown but still tries no-profile fallback providers", async () => {
       const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth");
       const cfg = makeCfg({
         agents: {
@@ -999,23 +999,22 @@ describe("runWithModelFallback", () => {
         },
       });
 
-      const run = vi.fn().mockResolvedValueOnce("should not be called");
+      const run = vi.fn().mockResolvedValueOnce("groq success");
 
-      await expect(
-        runWithModelFallback({
-          cfg,
-          provider: "anthropic",
-          model: "claude-opus-4-6",
-          run,
-          agentDir: dir,
-        }),
-      ).rejects.toThrow("All models failed");
+      const result = await runWithModelFallback({
+        cfg,
+        provider: "anthropic",
+        model: "claude-opus-4-6",
+        run,
+        agentDir: dir,
+      });
 
-      // Auth cooldown should skip both primary and same-provider fallbacks
-      expect(run).toHaveBeenCalledTimes(0); // No attempts made
+      expect(result.result).toBe("groq success");
+      expect(run).toHaveBeenCalledTimes(1);
+      expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
     });
 
-    it("does NOT attempt fallbacks during billing cooldown", async () => {
+    it("skips same-provider models on billing cooldown but still tries no-profile fallback providers", async () => {
       const { dir } = await makeAuthStoreWithCooldown("anthropic", "billing");
       const cfg = makeCfg({
         agents: {
@@ -1028,20 +1027,19 @@ describe("runWithModelFallback", () => {
         },
       });
 
-      const run = vi.fn().mockResolvedValueOnce("should not be called");
+      const run = vi.fn().mockResolvedValueOnce("groq success");
 
-      await expect(
-        runWithModelFallback({
-          cfg,
-          provider: "anthropic",
-          model: "claude-opus-4-6",
-          run,
-          agentDir: dir,
-        }),
-      ).rejects.toThrow("All models failed");
+      const result = await runWithModelFallback({
+        cfg,
+        provider: "anthropic",
+        model: "claude-opus-4-6",
+        run,
+        agentDir: dir,
+      });
 
-      // Billing cooldown should skip both primary and same-provider fallbacks
-      expect(run).toHaveBeenCalledTimes(0); // No attempts made
+      expect(result.result).toBe("groq success");
+      expect(run).toHaveBeenCalledTimes(1);
+      expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
     });
 
     it("tries cross-provider fallbacks when same provider has rate limit", async () => {
@@ -1055,8 +1053,9 @@ describe("runWithModelFallback", () => {
         },
         usageStats: {
           "anthropic:default": {
-            cooldownUntil: Date.now() + 300000, // Rate limit uses cooldownUntil
-            disabledReason: "rate_limit" as AuthProfileFailureReason,
+            // Rate-limit reason is inferred from failureCounts for cooldown windows.
+            cooldownUntil: Date.now() + 300000,
+            failureCounts: { rate_limit: 2 },
           },
           // Groq not in cooldown
         },
diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts
index 24ea6e042b7..9691b5091f0 100644
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -340,24 +340,6 @@ export async function runWithModelFallback<T>(params: {
       });
       const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
 
-      if (profileIds.length === 0) {
-        // Skip no-profile providers when there are persistent auth or billing issues elsewhere.
-        // This prevents wasting time on unconfigured providers when auth problems exist.
-        // Note: If Provider A has auth/billing issues, Provider B with no profiles will also be skipped.
-        const hasAuthOrBillingIssues = Object.values(authStore.usageStats || {}).some(
-          (stats) => stats?.disabledReason === "auth" || stats?.disabledReason === "billing",
-        );
-        if (hasAuthOrBillingIssues) {
-          attempts.push({
-            provider: candidate.provider,
-            model: candidate.model,
-            error: `No auth profiles configured for provider ${candidate.provider}`,
-            reason: "auth",
-          });
-          continue;
-        }
-      }
-
       if (profileIds.length > 0 && !isAnyProfileAvailable) {
         // All profiles for this provider are in cooldown.
         const isPrimary = i === 0;
@@ -374,29 +356,33 @@ export async function runWithModelFallback<T>(params: {
           profileIds,
         });
 
-        const disabledReason = authStore.usageStats?.[profileIds[0]]?.disabledReason;
-        const isPersistentIssue = disabledReason === "auth" || disabledReason === "billing";
+        const inferredReason =
+          resolveProfilesUnavailableReason({
+            store: authStore,
+            profileIds,
+            now,
+          }) ?? "rate_limit";
+        const isPersistentIssue =
+          inferredReason === "auth" ||
+          inferredReason === "auth_permanent" ||
+          inferredReason === "billing";
         if (isPersistentIssue) {
           attempts.push({
             provider: candidate.provider,
             model: candidate.model,
-            error: `Provider ${candidate.provider} has ${disabledReason} issue (skipping all models)`,
-            reason: disabledReason,
+            error: `Provider ${candidate.provider} has ${inferredReason} issue (skipping all models)`,
+            reason: inferredReason,
           });
           continue;
         }
 
-        // For primary: try when requested model or when probe allows. For fallbacks: attempt only when explicitly rate_limit (model-specific), skip otherwise.
+        // For primary: try when requested model or when probe allows.
+        // For same-provider fallbacks: only relax cooldown on rate_limit, which
+        // is commonly model-scoped and can recover on a sibling model.
         const shouldAttemptDespiteCooldown =
           (isPrimary && (!requestedModel || shouldProbe)) ||
-          (!isPrimary && disabledReason === "rate_limit");
+          (!isPrimary && inferredReason === "rate_limit");
         if (!shouldAttemptDespiteCooldown) {
-          const inferredReason =
-            resolveProfilesUnavailableReason({
-              store: authStore,
-              profileIds,
-              now,
-            }) ?? "rate_limit";
           attempts.push({
             provider: candidate.provider,
             model: candidate.model,