fix(agents): enforce idle timeout during stream setup

This commit is contained in:
Peter Steinberger
2026-05-10 09:39:23 +01:00
parent 72ffd3a464
commit 7d5cccaef4
3 changed files with 82 additions and 25 deletions

View File

@@ -176,6 +176,7 @@ Docs: https://docs.openclaw.ai
- Plugins/doctor: invalidate persisted plugin registry snapshots when plugin diagnostics point at deleted source paths, so `openclaw doctor` stops repeating stale warnings after a local extension is replaced by a managed npm plugin. Fixes #80087. (#80134) Thanks @hclsys.
- Doctor/OpenAI Codex: preserve Codex auth intent when auto-repairing legacy `openai-codex/*` model refs to canonical `openai/*` by adding provider/model-scoped Codex runtime policy, preventing repaired configs from falling through to direct OpenAI API-key auth. Fixes #78533 and #78570. Thanks @superck110 and @Azmodump.
- CLI/agents: surface durable message delivery status from `sendDurableMessageBatch` in `deliverAgentCommandResult` and `openclaw agent --json --deliver`, preserving suppressed hook outcomes as terminal no-retry results while exposing partial and failed sends for automation. Supersedes #53961 and #57755. Thanks @Kaspre.
- Agents: apply the LLM idle watchdog while provider stream setup is still pending, preventing silent pre-stream model hangs from waiting for the full agent timeout.
- Cron: let isolated self-cleanup runs inspect their own job run history while keeping other cron jobs and mutation actions blocked. Fixes #80019. Thanks @hclsys.
- Cron: report isolated agent-turn setup and pre-model stalls with phase-specific timeout errors instead of waiting for the full job budget when no model call starts. Fixes #74803. Thanks @jeffsteinbok-openclaw and @dgkim311.
- CLI/plugins: treat arbitrary unknown subcommands outside plugin CLI metadata as normal unknown commands instead of suggesting `plugins.allow`, while preserving allowlist guidance for real plugin command roots. Fixes #80109. (#80123) Thanks @kagura-agent.

View File

@@ -42,12 +42,20 @@ describe("resolveLlmIdleTimeoutMs", () => {
expect(resolveLlmIdleTimeoutMs({ runTimeoutMs: 2_147_000_000 })).toBe(0);
});
it("uses the provider request timeout as the model idle watchdog", () => {
expect(resolveLlmIdleTimeoutMs({ modelRequestTimeoutMs: 300_000 })).toBe(300_000);
it("caps remote provider request timeouts at the default idle watchdog", () => {
expect(resolveLlmIdleTimeoutMs({ modelRequestTimeoutMs: 300_000 })).toBe(
DEFAULT_LLM_IDLE_TIMEOUT_MS,
);
});
it("uses remote provider request timeouts when shorter than the default idle watchdog", () => {
expect(resolveLlmIdleTimeoutMs({ modelRequestTimeoutMs: 30_000 })).toBe(30_000);
});
it("caps provider request timeout at the max safe timeout", () => {
expect(resolveLlmIdleTimeoutMs({ modelRequestTimeoutMs: 10_000_000_000 })).toBe(2_147_000_000);
expect(
resolveLlmIdleTimeoutMs({ trigger: "cron", modelRequestTimeoutMs: 10_000_000_000 }),
).toBe(2_147_000_000);
});
it("ignores invalid provider request timeout values", () => {
@@ -296,6 +304,23 @@ describe("streamWithIdleTimeout", () => {
await next;
});
it("throws when a promise stream never resolves", async () => {
vi.useFakeTimers();
const baseFn = vi.fn().mockReturnValue(new Promise<AsyncIterable<unknown>>(() => {}));
const onIdleTimeout = vi.fn();
const wrapped = streamWithIdleTimeout(baseFn, 50, onIdleTimeout);
const model = {} as Parameters<typeof baseFn>[0];
const context = {} as Parameters<typeof baseFn>[1];
const options = {} as Parameters<typeof baseFn>[2];
const stream = expect(wrapped(model, context, options)).rejects.toThrow(/LLM idle timeout/);
await vi.advanceTimersByTimeAsync(50);
await stream;
expect(onIdleTimeout).toHaveBeenCalledTimes(1);
});
it("resets timer on each chunk", async () => {
const chunks = [{ text: "a" }, { text: "b" }, { text: "c" }];
const mockStream = createMockAsyncIterable(chunks);

View File

@@ -144,6 +144,9 @@ export function resolveLlmIdleTimeoutMs(params?: {
value > 0 &&
value < MAX_SAFE_TIMEOUT_MS,
);
const baseUrl = params?.model?.baseUrl;
const isLocalProvider =
typeof baseUrl === "string" && baseUrl.length > 0 && isLocalProviderBaseUrl(baseUrl);
const modelRequestTimeoutMs = params?.modelRequestTimeoutMs;
if (
@@ -151,7 +154,11 @@ export function resolveLlmIdleTimeoutMs(params?: {
Number.isFinite(modelRequestTimeoutMs) &&
modelRequestTimeoutMs > 0
) {
return clampTimeoutMs(Math.min(modelRequestTimeoutMs, ...timeoutBounds));
const boundedTimeoutMs = Math.min(modelRequestTimeoutMs, ...timeoutBounds);
if (params?.trigger === "cron" || isLocalProvider) {
return clampTimeoutMs(boundedTimeoutMs);
}
return clampImplicitTimeoutMs(boundedTimeoutMs);
}
if (typeof runTimeoutMs === "number" && Number.isFinite(runTimeoutMs) && runTimeoutMs > 0) {
@@ -176,13 +183,7 @@ export function resolveLlmIdleTimeoutMs(params?: {
// baseUrl pointing at loopback / private-network / `.local`. Ollama cloud
// models are still hosted remotely even when proxied through local Ollama, so
// keep the cloud watchdog for `*:cloud` model ids.
const baseUrl = params?.model?.baseUrl;
if (
typeof baseUrl === "string" &&
baseUrl.length > 0 &&
isLocalProviderBaseUrl(baseUrl) &&
!isOllamaCloudModel(params?.model)
) {
if (isLocalProvider && !isOllamaCloudModel(params?.model)) {
return 0;
}
@@ -206,6 +207,21 @@ export function streamWithIdleTimeout(
return (model, context, options) => {
const maybeStream = baseFn(model, context, options);
const createIdleTimeoutError = () =>
new Error(`LLM idle timeout (${Math.floor(timeoutMs / 1000)}s): no response from model`);
const createTimeoutPromise = (setTimer: (timer: NodeJS.Timeout) => void): Promise<never> => {
return new Promise((_, reject) => {
const timer = setTimeout(() => {
const error = createIdleTimeoutError();
onIdleTimeout?.(error);
reject(error);
}, timeoutMs);
timer.unref?.();
setTimer(timer);
});
};
const wrapStream = (stream: ReturnType<typeof streamSimple>) => {
const originalAsyncIterator = stream[Symbol.asyncIterator].bind(stream);
(stream as { [Symbol.asyncIterator]: typeof originalAsyncIterator })[Symbol.asyncIterator] =
@@ -213,18 +229,6 @@ export function streamWithIdleTimeout(
const iterator = originalAsyncIterator();
let idleTimer: NodeJS.Timeout | null = null;
const createTimeoutPromise = (): Promise<never> => {
return new Promise((_, reject) => {
idleTimer = setTimeout(() => {
const error = new Error(
`LLM idle timeout (${Math.floor(timeoutMs / 1000)}s): no response from model`,
);
onIdleTimeout?.(error);
reject(error);
}, timeoutMs);
});
};
const clearTimer = () => {
if (idleTimer) {
clearTimeout(idleTimer);
@@ -239,7 +243,12 @@ export function streamWithIdleTimeout(
try {
// Race between the actual next() and the timeout
const result = await Promise.race([streamIterator.next(), createTimeoutPromise()]);
const result = await Promise.race([
streamIterator.next(),
createTimeoutPromise((timer) => {
idleTimer = timer;
}),
]);
if (result.done) {
clearTimer();
@@ -268,7 +277,29 @@ export function streamWithIdleTimeout(
};
if (maybeStream && typeof maybeStream === "object" && "then" in maybeStream) {
return Promise.resolve(maybeStream).then(wrapStream);
let streamPromiseTimer: NodeJS.Timeout | null = null;
const clearStreamPromiseTimer = () => {
if (streamPromiseTimer) {
clearTimeout(streamPromiseTimer);
streamPromiseTimer = null;
}
};
return Promise.race([
Promise.resolve(maybeStream),
createTimeoutPromise((timer) => {
streamPromiseTimer = timer;
}),
]).then(
(stream) => {
clearStreamPromiseTimer();
return wrapStream(stream);
},
(error) => {
clearStreamPromiseTimer();
throw error;
},
);
}
return wrapStream(maybeStream);
};