mirror of
https://github.com/moltbot/moltbot.git
synced 2026-03-08 06:54:24 +00:00
fix(agents): correct cooldown fallback eligibility
This commit is contained in:
@@ -37,6 +37,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Security/Browser uploads: revalidate upload paths at use-time in Playwright file-chooser and direct-input flows so missing/rebound paths are rejected before `setFiles`, with regression coverage for strict missing-path handling.
|
||||
- Security/LINE: cap unsigned webhook body reads before auth/signature handling to bound unauthenticated body processing. (#26095) Thanks @bmendonca3.
|
||||
- Agents/Model fallback: keep explicit text + image fallback chains reachable even when `agents.defaults.models` allowlists are present, prefer explicit run `agentId` over session-key parsing for followup fallback override resolution (with session-key fallback), treat agent-level fallback overrides as configured in embedded runner preflight, and classify `model_cooldown` / `cooling down` errors as `rate_limit` so failover continues. (#11972, #24137, #17231)
|
||||
- Agents/Model fallback: keep same-provider fallback chains active when session model differs from configured primary, infer cooldown reason from provider profile state (instead of `disabledReason` only), keep no-profile fallback providers eligible (env/models.json paths), and only relax same-provider cooldown fallback attempts for `rate_limit`. (#23816) thanks @ramezgaberiel.
|
||||
- Followups/Routing: when explicit origin routing fails, allow same-channel fallback dispatch (while still blocking cross-channel fallback) so followup replies do not get dropped on transient origin-adapter failures. (#26109) Thanks @Sid-Qin.
|
||||
- Agents/Model fallback: continue fallback traversal on unrecognized errors when candidates remain, while still throwing the original unknown error on the last candidate. (#26106) Thanks @Sid-Qin.
|
||||
- Telegram/Markdown spoilers: keep valid `||spoiler||` pairs while leaving unmatched trailing `||` delimiters as literal text, avoiding false all-or-nothing spoiler suppression. (#26105) Thanks @Sid-Qin.
|
||||
|
||||
@@ -163,7 +163,7 @@ describe("runWithModelFallback – probe logic", () => {
|
||||
expectPrimaryProbeSuccess(result, run, "recovered");
|
||||
});
|
||||
|
||||
it("does NOT probe non-primary candidates during cooldown", async () => {
|
||||
it("attempts non-primary fallbacks during rate-limit cooldown after primary probe failure", async () => {
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
@@ -182,25 +182,23 @@ describe("runWithModelFallback – probe logic", () => {
|
||||
const almostExpired = NOW + 30 * 1000; // 30s remaining
|
||||
mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
|
||||
|
||||
// Primary probe fails with 429
|
||||
// Primary probe fails with 429; fallback should still be attempted for rate_limit cooldowns.
|
||||
const run = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(Object.assign(new Error("rate limited"), { status: 429 }))
|
||||
.mockResolvedValue("should-not-reach");
|
||||
.mockResolvedValue("fallback-ok");
|
||||
|
||||
try {
|
||||
await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
run,
|
||||
});
|
||||
expect.unreachable("should have thrown since all candidates exhausted");
|
||||
} catch {
|
||||
// Primary was probed (i === 0 + within margin), non-primary were skipped
|
||||
expect(run).toHaveBeenCalledTimes(1); // only primary was actually called
|
||||
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
|
||||
}
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "openai",
|
||||
model: "gpt-4.1-mini",
|
||||
run,
|
||||
});
|
||||
|
||||
expect(result.result).toBe("fallback-ok");
|
||||
expect(run).toHaveBeenCalledTimes(2);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini");
|
||||
expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5");
|
||||
});
|
||||
|
||||
it("throttles probe when called within 30s interval", async () => {
|
||||
|
||||
@@ -7,7 +7,6 @@ import type { OpenClawConfig } from "../config/config.js";
|
||||
import type { AuthProfileStore } from "./auth-profiles.js";
|
||||
import { saveAuthProfileStore } from "./auth-profiles.js";
|
||||
import { AUTH_STORE_VERSION } from "./auth-profiles/constants.js";
|
||||
import type { AuthProfileFailureReason } from "./auth-profiles/types.js";
|
||||
import { isAnthropicBillingError } from "./live-auth-keys.js";
|
||||
import { runWithImageModelFallback, runWithModelFallback } from "./model-fallback.js";
|
||||
import { makeModelFallbackCfg } from "./test-helpers/model-fallback-config-fixture.js";
|
||||
@@ -943,14 +942,15 @@ describe("runWithModelFallback", () => {
|
||||
[`${provider}:default`]:
|
||||
reason === "rate_limit"
|
||||
? {
|
||||
// Rate limits use cooldownUntil
|
||||
// Real rate-limit cooldowns are tracked through cooldownUntil
|
||||
// and failureCounts, not disabledReason.
|
||||
cooldownUntil: now + 300000,
|
||||
disabledReason: reason as AuthProfileFailureReason,
|
||||
failureCounts: { rate_limit: 1 },
|
||||
}
|
||||
: {
|
||||
// Auth/billing issues use disabledUntil
|
||||
disabledUntil: now + 300000,
|
||||
disabledReason: reason as AuthProfileFailureReason,
|
||||
disabledReason: reason,
|
||||
},
|
||||
},
|
||||
};
|
||||
@@ -986,7 +986,7 @@ describe("runWithModelFallback", () => {
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5");
|
||||
});
|
||||
|
||||
it("does NOT attempt fallbacks during auth cooldown", async () => {
|
||||
it("skips same-provider models on auth cooldown but still tries no-profile fallback providers", async () => {
|
||||
const { dir } = await makeAuthStoreWithCooldown("anthropic", "auth");
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
@@ -999,23 +999,22 @@ describe("runWithModelFallback", () => {
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi.fn().mockResolvedValueOnce("should not be called");
|
||||
const run = vi.fn().mockResolvedValueOnce("groq success");
|
||||
|
||||
await expect(
|
||||
runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: dir,
|
||||
}),
|
||||
).rejects.toThrow("All models failed");
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: dir,
|
||||
});
|
||||
|
||||
// Auth cooldown should skip both primary and same-provider fallbacks
|
||||
expect(run).toHaveBeenCalledTimes(0); // No attempts made
|
||||
expect(result.result).toBe("groq success");
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
|
||||
});
|
||||
|
||||
it("does NOT attempt fallbacks during billing cooldown", async () => {
|
||||
it("skips same-provider models on billing cooldown but still tries no-profile fallback providers", async () => {
|
||||
const { dir } = await makeAuthStoreWithCooldown("anthropic", "billing");
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
@@ -1028,20 +1027,19 @@ describe("runWithModelFallback", () => {
|
||||
},
|
||||
});
|
||||
|
||||
const run = vi.fn().mockResolvedValueOnce("should not be called");
|
||||
const run = vi.fn().mockResolvedValueOnce("groq success");
|
||||
|
||||
await expect(
|
||||
runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: dir,
|
||||
}),
|
||||
).rejects.toThrow("All models failed");
|
||||
const result = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-6",
|
||||
run,
|
||||
agentDir: dir,
|
||||
});
|
||||
|
||||
// Billing cooldown should skip both primary and same-provider fallbacks
|
||||
expect(run).toHaveBeenCalledTimes(0); // No attempts made
|
||||
expect(result.result).toBe("groq success");
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "groq", "llama-3.3-70b-versatile");
|
||||
});
|
||||
|
||||
it("tries cross-provider fallbacks when same provider has rate limit", async () => {
|
||||
@@ -1055,8 +1053,9 @@ describe("runWithModelFallback", () => {
|
||||
},
|
||||
usageStats: {
|
||||
"anthropic:default": {
|
||||
cooldownUntil: Date.now() + 300000, // Rate limit uses cooldownUntil
|
||||
disabledReason: "rate_limit" as AuthProfileFailureReason,
|
||||
// Rate-limit reason is inferred from failureCounts for cooldown windows.
|
||||
cooldownUntil: Date.now() + 300000,
|
||||
failureCounts: { rate_limit: 2 },
|
||||
},
|
||||
// Groq not in cooldown
|
||||
},
|
||||
|
||||
@@ -340,24 +340,6 @@ export async function runWithModelFallback<T>(params: {
|
||||
});
|
||||
const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id));
|
||||
|
||||
if (profileIds.length === 0) {
|
||||
// Skip no-profile providers when there are persistent auth or billing issues elsewhere.
|
||||
// This prevents wasting time on unconfigured providers when auth problems exist.
|
||||
// Note: If Provider A has auth/billing issues, Provider B with no profiles will also be skipped.
|
||||
const hasAuthOrBillingIssues = Object.values(authStore.usageStats || {}).some(
|
||||
(stats) => stats?.disabledReason === "auth" || stats?.disabledReason === "billing",
|
||||
);
|
||||
if (hasAuthOrBillingIssues) {
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error: `No auth profiles configured for provider ${candidate.provider}`,
|
||||
reason: "auth",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (profileIds.length > 0 && !isAnyProfileAvailable) {
|
||||
// All profiles for this provider are in cooldown.
|
||||
const isPrimary = i === 0;
|
||||
@@ -374,29 +356,33 @@ export async function runWithModelFallback<T>(params: {
|
||||
profileIds,
|
||||
});
|
||||
|
||||
const disabledReason = authStore.usageStats?.[profileIds[0]]?.disabledReason;
|
||||
const isPersistentIssue = disabledReason === "auth" || disabledReason === "billing";
|
||||
const inferredReason =
|
||||
resolveProfilesUnavailableReason({
|
||||
store: authStore,
|
||||
profileIds,
|
||||
now,
|
||||
}) ?? "rate_limit";
|
||||
const isPersistentIssue =
|
||||
inferredReason === "auth" ||
|
||||
inferredReason === "auth_permanent" ||
|
||||
inferredReason === "billing";
|
||||
if (isPersistentIssue) {
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error: `Provider ${candidate.provider} has ${disabledReason} issue (skipping all models)`,
|
||||
reason: disabledReason,
|
||||
error: `Provider ${candidate.provider} has ${inferredReason} issue (skipping all models)`,
|
||||
reason: inferredReason,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
// For primary: try when requested model or when probe allows. For fallbacks: attempt only when explicitly rate_limit (model-specific), skip otherwise.
|
||||
// For primary: try when requested model or when probe allows.
|
||||
// For same-provider fallbacks: only relax cooldown on rate_limit, which
|
||||
// is commonly model-scoped and can recover on a sibling model.
|
||||
const shouldAttemptDespiteCooldown =
|
||||
(isPrimary && (!requestedModel || shouldProbe)) ||
|
||||
(!isPrimary && disabledReason === "rate_limit");
|
||||
(!isPrimary && inferredReason === "rate_limit");
|
||||
if (!shouldAttemptDespiteCooldown) {
|
||||
const inferredReason =
|
||||
resolveProfilesUnavailableReason({
|
||||
store: authStore,
|
||||
profileIds,
|
||||
now,
|
||||
}) ?? "rate_limit";
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
|
||||
Reference in New Issue
Block a user