mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-26 16:06:16 +00:00
fix(agents): honor explicit rate-limit cooldown probes in fallback runs
This commit is contained in:
@@ -52,7 +52,9 @@ function expectPrimaryProbeSuccess(
|
||||
) {
|
||||
expect(result.result).toBe(expectedResult);
|
||||
expect(run).toHaveBeenCalledTimes(1);
|
||||
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini");
|
||||
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
|
||||
allowRateLimitCooldownProbe: true,
|
||||
});
|
||||
}
|
||||
|
||||
describe("runWithModelFallback – probe logic", () => {
|
||||
@@ -197,8 +199,12 @@ describe("runWithModelFallback – probe logic", () => {
|
||||
|
||||
expect(result.result).toBe("fallback-ok");
|
||||
expect(run).toHaveBeenCalledTimes(2);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini");
|
||||
expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5");
|
||||
expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini", {
|
||||
allowRateLimitCooldownProbe: true,
|
||||
});
|
||||
expect(run).toHaveBeenNthCalledWith(2, "anthropic", "claude-haiku-3-5", {
|
||||
allowRateLimitCooldownProbe: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("throttles probe when called within 30s interval", async () => {
|
||||
@@ -319,7 +325,11 @@ describe("runWithModelFallback – probe logic", () => {
|
||||
run,
|
||||
});
|
||||
|
||||
expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini");
|
||||
expect(run).toHaveBeenNthCalledWith(2, "openai", "gpt-4.1-mini");
|
||||
expect(run).toHaveBeenNthCalledWith(1, "openai", "gpt-4.1-mini", {
|
||||
allowRateLimitCooldownProbe: true,
|
||||
});
|
||||
expect(run).toHaveBeenNthCalledWith(2, "openai", "gpt-4.1-mini", {
|
||||
allowRateLimitCooldownProbe: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1116,7 +1116,9 @@ describe("runWithModelFallback", () => {
|
||||
|
||||
expect(result.result).toBe("sonnet success");
|
||||
expect(run).toHaveBeenCalledTimes(1); // Primary skipped, fallback attempted
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5");
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
|
||||
allowRateLimitCooldownProbe: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("skips same-provider models on auth cooldown but still tries no-profile fallback providers", async () => {
|
||||
@@ -1221,7 +1223,9 @@ describe("runWithModelFallback", () => {
|
||||
|
||||
expect(result.result).toBe("groq success");
|
||||
expect(run).toHaveBeenCalledTimes(2);
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5"); // Rate limit allows attempt
|
||||
expect(run).toHaveBeenNthCalledWith(1, "anthropic", "claude-sonnet-4-5", {
|
||||
allowRateLimitCooldownProbe: true,
|
||||
}); // Rate limit allows attempt
|
||||
expect(run).toHaveBeenNthCalledWith(2, "groq", "llama-3.3-70b-versatile"); // Cross-provider works
|
||||
});
|
||||
});
|
||||
|
||||
@@ -33,6 +33,16 @@ type ModelCandidate = {
|
||||
model: string;
|
||||
};
|
||||
|
||||
export type ModelFallbackRunOptions = {
|
||||
allowRateLimitCooldownProbe?: boolean;
|
||||
};
|
||||
|
||||
type ModelFallbackRunFn<T> = (
|
||||
provider: string,
|
||||
model: string,
|
||||
options?: ModelFallbackRunOptions,
|
||||
) => Promise<T>;
|
||||
|
||||
type FallbackAttempt = {
|
||||
provider: string;
|
||||
model: string;
|
||||
@@ -124,14 +134,18 @@ function buildFallbackSuccess<T>(params: {
|
||||
}
|
||||
|
||||
async function runFallbackCandidate<T>(params: {
|
||||
run: (provider: string, model: string) => Promise<T>;
|
||||
run: ModelFallbackRunFn<T>;
|
||||
provider: string;
|
||||
model: string;
|
||||
options?: ModelFallbackRunOptions;
|
||||
}): Promise<{ ok: true; result: T } | { ok: false; error: unknown }> {
|
||||
try {
|
||||
const result = params.options
|
||||
? await params.run(params.provider, params.model, params.options)
|
||||
: await params.run(params.provider, params.model);
|
||||
return {
|
||||
ok: true,
|
||||
result: await params.run(params.provider, params.model),
|
||||
result,
|
||||
};
|
||||
} catch (err) {
|
||||
if (shouldRethrowAbort(err)) {
|
||||
@@ -142,15 +156,17 @@ async function runFallbackCandidate<T>(params: {
|
||||
}
|
||||
|
||||
async function runFallbackAttempt<T>(params: {
|
||||
run: (provider: string, model: string) => Promise<T>;
|
||||
run: ModelFallbackRunFn<T>;
|
||||
provider: string;
|
||||
model: string;
|
||||
attempts: FallbackAttempt[];
|
||||
options?: ModelFallbackRunOptions;
|
||||
}): Promise<{ success: ModelFallbackRunResult<T> } | { error: unknown }> {
|
||||
const runResult = await runFallbackCandidate({
|
||||
run: params.run,
|
||||
provider: params.provider,
|
||||
model: params.model,
|
||||
options: params.options,
|
||||
});
|
||||
if (runResult.ok) {
|
||||
return {
|
||||
@@ -439,7 +455,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
agentDir?: string;
|
||||
/** Optional explicit fallbacks list; when provided (even empty), replaces agents.defaults.model.fallbacks. */
|
||||
fallbacksOverride?: string[];
|
||||
run: (provider: string, model: string) => Promise<T>;
|
||||
run: ModelFallbackRunFn<T>;
|
||||
onError?: ModelFallbackErrorHandler;
|
||||
}): Promise<ModelFallbackRunResult<T>> {
|
||||
const candidates = resolveFallbackCandidates({
|
||||
@@ -458,6 +474,7 @@ export async function runWithModelFallback<T>(params: {
|
||||
|
||||
for (let i = 0; i < candidates.length; i += 1) {
|
||||
const candidate = candidates[i];
|
||||
let runOptions: ModelFallbackRunOptions | undefined;
|
||||
if (authStore) {
|
||||
const profileIds = resolveAuthProfileOrder({
|
||||
cfg: params.cfg,
|
||||
@@ -497,10 +514,18 @@ export async function runWithModelFallback<T>(params: {
|
||||
if (decision.markProbe) {
|
||||
lastProbeAttempt.set(probeThrottleKey, now);
|
||||
}
|
||||
if (decision.reason === "rate_limit") {
|
||||
runOptions = { allowRateLimitCooldownProbe: true };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const attemptRun = await runFallbackAttempt({ run: params.run, ...candidate, attempts });
|
||||
const attemptRun = await runFallbackAttempt({
|
||||
run: params.run,
|
||||
...candidate,
|
||||
attempts,
|
||||
options: runOptions,
|
||||
});
|
||||
if ("success" in attemptRun) {
|
||||
return attemptRun.success;
|
||||
}
|
||||
|
||||
@@ -829,6 +829,46 @@ describe("runEmbeddedPiAgent auth profile rotation", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("can probe one cooldowned profile when rate-limit cooldown probe is explicitly allowed", async () => {
|
||||
await withTimedAgentWorkspace(async ({ agentDir, workspaceDir, now }) => {
|
||||
await writeAuthStore(agentDir, {
|
||||
usageStats: {
|
||||
"openai:p1": { lastUsed: 1, cooldownUntil: now + 60 * 60 * 1000 },
|
||||
"openai:p2": { lastUsed: 2, cooldownUntil: now + 60 * 60 * 1000 },
|
||||
},
|
||||
});
|
||||
|
||||
runEmbeddedAttemptMock.mockResolvedValueOnce(
|
||||
makeAttempt({
|
||||
assistantTexts: ["ok"],
|
||||
lastAssistant: buildAssistant({
|
||||
stopReason: "stop",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
}),
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runEmbeddedPiAgent({
|
||||
sessionId: "session:test",
|
||||
sessionKey: "agent:test:cooldown-probe",
|
||||
sessionFile: path.join(workspaceDir, "session.jsonl"),
|
||||
workspaceDir,
|
||||
agentDir,
|
||||
config: makeConfig({ fallbacks: ["openai/mock-2"] }),
|
||||
prompt: "hello",
|
||||
provider: "openai",
|
||||
model: "mock-1",
|
||||
authProfileIdSource: "auto",
|
||||
allowRateLimitCooldownProbe: true,
|
||||
timeoutMs: 5_000,
|
||||
runId: "run:cooldown-probe",
|
||||
});
|
||||
|
||||
expect(runEmbeddedAttemptMock).toHaveBeenCalledTimes(1);
|
||||
expect(result.payloads?.[0]?.text ?? "").toContain("ok");
|
||||
});
|
||||
});
|
||||
|
||||
it("treats agent-level fallbacks as configured when defaults have none", async () => {
|
||||
await withTimedAgentWorkspace(async ({ agentDir, workspaceDir, now }) => {
|
||||
await writeAuthStore(agentDir, {
|
||||
|
||||
@@ -633,15 +633,39 @@ export async function runEmbeddedPiAgent(
|
||||
};
|
||||
|
||||
try {
|
||||
const autoProfileCandidates = profileCandidates.filter(
|
||||
(candidate): candidate is string =>
|
||||
typeof candidate === "string" && candidate.length > 0 && candidate !== lockedProfileId,
|
||||
);
|
||||
const allAutoProfilesInCooldown =
|
||||
autoProfileCandidates.length > 0 &&
|
||||
autoProfileCandidates.every((candidate) => isProfileInCooldown(authStore, candidate));
|
||||
const unavailableReason = allAutoProfilesInCooldown
|
||||
? (resolveProfilesUnavailableReason({
|
||||
store: authStore,
|
||||
profileIds: autoProfileCandidates,
|
||||
}) ?? "rate_limit")
|
||||
: null;
|
||||
const allowRateLimitCooldownProbe =
|
||||
params.allowRateLimitCooldownProbe === true &&
|
||||
allAutoProfilesInCooldown &&
|
||||
unavailableReason === "rate_limit";
|
||||
let didRateLimitCooldownProbe = false;
|
||||
|
||||
while (profileIndex < profileCandidates.length) {
|
||||
const candidate = profileCandidates[profileIndex];
|
||||
if (
|
||||
candidate &&
|
||||
candidate !== lockedProfileId &&
|
||||
isProfileInCooldown(authStore, candidate)
|
||||
) {
|
||||
profileIndex += 1;
|
||||
continue;
|
||||
const inCooldown =
|
||||
candidate && candidate !== lockedProfileId && isProfileInCooldown(authStore, candidate);
|
||||
if (inCooldown) {
|
||||
if (allowRateLimitCooldownProbe && !didRateLimitCooldownProbe) {
|
||||
didRateLimitCooldownProbe = true;
|
||||
log.warn(
|
||||
`probing cooldowned auth profile for ${provider}/${modelId} due to rate_limit unavailability`,
|
||||
);
|
||||
} else {
|
||||
profileIndex += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
await applyApiKeyInfo(profileCandidates[profileIndex]);
|
||||
break;
|
||||
|
||||
@@ -113,4 +113,12 @@ export type RunEmbeddedPiAgentParams = {
|
||||
streamParams?: AgentStreamParams;
|
||||
ownerNumbers?: string[];
|
||||
enforceFinalTag?: boolean;
|
||||
/**
|
||||
* Allow a single run attempt even when all auth profiles are in cooldown,
|
||||
* but only for inferred `rate_limit` cooldowns.
|
||||
*
|
||||
* This is used by model fallback when trying sibling models on providers
|
||||
* where rate limits are often model-scoped.
|
||||
*/
|
||||
allowRateLimitCooldownProbe?: boolean;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user