feat: add LLM idle timeout for streaming responses

Problem: When LLM stops responding, the agent hangs for ~5 minutes with no feedback.
Users had to use /stop to recover.

Solution: Add idle timeout detection for LLM streaming responses.
This commit is contained in:
Liu Yuan
2026-03-27 15:52:10 +08:00
committed by Ayaan Zaidi
parent 47839d3b9a
commit 84b72e66b9
6 changed files with 404 additions and 1 deletions

View File

@@ -177,6 +177,7 @@ import {
} from "./compaction-timeout.js";
import { pruneProcessedHistoryImages } from "./history-image-prune.js";
import { detectAndLoadPromptImages } from "./images.js";
import { resolveLlmIdleTimeoutMs, streamWithIdleTimeout } from "./llm-idle-timeout.js";
import type { EmbeddedRunAttemptParams, EmbeddedRunAttemptResult } from "./types.js";
export {
@@ -1056,7 +1057,6 @@ export async function runEmbeddedAttempt(
activeSession.agent.streamFn,
);
}
// Anthropic-compatible providers can add new stop reasons before pi-ai maps them.
// Recover the known "sensitive" stop reason here so a model refusal does not
// bubble out as an uncaught runner error and stall channel polling.
@@ -1064,6 +1064,18 @@ export async function runEmbeddedAttempt(
activeSession.agent.streamFn,
);
let idleTimeoutTrigger: ((error: Error) => void) | undefined;
// Wrap stream with idle timeout detection
const idleTimeoutMs = resolveLlmIdleTimeoutMs(params.config);
if (idleTimeoutMs > 0) {
activeSession.agent.streamFn = streamWithIdleTimeout(
activeSession.agent.streamFn,
idleTimeoutMs,
(error) => idleTimeoutTrigger?.(error),
);
}
try {
const prior = await sanitizeSessionHistory({
messages: activeSession.messages,
@@ -1156,6 +1168,13 @@ export async function runEmbeddedAttempt(
};
const makeAbortError = (signal: AbortSignal): Error => {
const reason = getAbortReason(signal);
// If the reason is already an Error, preserve it to keep the original message
// (e.g., "LLM idle timeout (60s): no response from model" instead of "aborted")
if (reason instanceof Error) {
const err = new Error(reason.message, { cause: reason });
err.name = "AbortError";
return err;
}
const err = reason ? new Error("aborted", { cause: reason }) : new Error("aborted");
err.name = "AbortError";
return err;
@@ -1187,6 +1206,9 @@ export async function runEmbeddedAttempt(
abortCompaction();
void activeSession.abort();
};
idleTimeoutTrigger = (error) => {
abortRun(true, error);
};
const abortable = <T>(promise: Promise<T>): Promise<T> => {
const signal = runAbortController.signal;
if (signal.aborted) {

View File

@@ -0,0 +1,219 @@
import { describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../../config/config.js";
import {
DEFAULT_LLM_IDLE_TIMEOUT_MS,
resolveLlmIdleTimeoutMs,
streamWithIdleTimeout,
} from "./llm-idle-timeout.js";
describe("resolveLlmIdleTimeoutMs", () => {
it("returns default when config is undefined", () => {
expect(resolveLlmIdleTimeoutMs(undefined)).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
});
it("returns default when llm config is missing", () => {
const cfg = { agents: {} } as OpenClawConfig;
expect(resolveLlmIdleTimeoutMs(cfg)).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
});
it("returns default when idleTimeoutSeconds is not set", () => {
const cfg = { agents: { defaults: { llm: {} } } } as OpenClawConfig;
expect(resolveLlmIdleTimeoutMs(cfg)).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
});
it("returns 0 when idleTimeoutSeconds is 0 (disabled)", () => {
const cfg = { agents: { defaults: { llm: { idleTimeoutSeconds: 0 } } } } as OpenClawConfig;
expect(resolveLlmIdleTimeoutMs(cfg)).toBe(0);
});
it("returns configured value in milliseconds", () => {
const cfg = { agents: { defaults: { llm: { idleTimeoutSeconds: 30 } } } } as OpenClawConfig;
expect(resolveLlmIdleTimeoutMs(cfg)).toBe(30_000);
});
it("caps at max safe timeout", () => {
const cfg = {
agents: { defaults: { llm: { idleTimeoutSeconds: 10_000_000 } } },
} as OpenClawConfig;
expect(resolveLlmIdleTimeoutMs(cfg)).toBe(2_147_000_000);
});
it("ignores negative values", () => {
const cfg = { agents: { defaults: { llm: { idleTimeoutSeconds: -10 } } } } as OpenClawConfig;
expect(resolveLlmIdleTimeoutMs(cfg)).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
});
it("ignores non-finite values", () => {
const cfg = {
agents: { defaults: { llm: { idleTimeoutSeconds: Infinity } } },
} as OpenClawConfig;
expect(resolveLlmIdleTimeoutMs(cfg)).toBe(DEFAULT_LLM_IDLE_TIMEOUT_MS);
});
});
describe("streamWithIdleTimeout", () => {
// Helper to create a mock async iterable
function createMockAsyncIterable<T>(chunks: T[]): AsyncIterable<T> {
return {
[Symbol.asyncIterator]() {
let index = 0;
return {
async next() {
if (index < chunks.length) {
return { done: false, value: chunks[index++] };
}
return { done: true, value: undefined };
},
async return() {
return { done: true, value: undefined };
},
};
},
};
}
it("wraps stream function", () => {
const mockStream = createMockAsyncIterable([]);
const baseFn = vi.fn().mockReturnValue(mockStream);
const wrapped = streamWithIdleTimeout(baseFn, 1000);
expect(typeof wrapped).toBe("function");
});
it("passes through model, context, and options", async () => {
const mockStream = createMockAsyncIterable([]);
const baseFn = vi.fn().mockReturnValue(mockStream);
const wrapped = streamWithIdleTimeout(baseFn, 1000);
const model = { api: "openai" } as Parameters<typeof baseFn>[0];
const context = {} as Parameters<typeof baseFn>[1];
const options = {} as Parameters<typeof baseFn>[2];
void wrapped(model, context, options);
expect(baseFn).toHaveBeenCalledWith(model, context, options);
});
it("throws on idle timeout", async () => {
// Create a stream that never yields
const slowStream: AsyncIterable<unknown> = {
[Symbol.asyncIterator]() {
return {
async next() {
// Never resolves - simulates hung LLM
return new Promise<IteratorResult<unknown>>(() => {});
},
};
},
};
const baseFn = vi.fn().mockReturnValue(slowStream);
const wrapped = streamWithIdleTimeout(baseFn, 50); // 50ms timeout
const model = {} as Parameters<typeof baseFn>[0];
const context = {} as Parameters<typeof baseFn>[1];
const options = {} as Parameters<typeof baseFn>[2];
const stream = wrapped(model, context, options) as AsyncIterable<unknown>;
const iterator = stream[Symbol.asyncIterator]();
await expect(iterator.next()).rejects.toThrow(/LLM idle timeout/);
});
it("resets timer on each chunk", async () => {
const chunks = [{ text: "a" }, { text: "b" }, { text: "c" }];
const mockStream = createMockAsyncIterable(chunks);
const baseFn = vi.fn().mockReturnValue(mockStream);
const wrapped = streamWithIdleTimeout(baseFn, 1000);
const model = {} as Parameters<typeof baseFn>[0];
const context = {} as Parameters<typeof baseFn>[1];
const options = {} as Parameters<typeof baseFn>[2];
const stream = wrapped(model, context, options) as AsyncIterable<unknown>;
const results: unknown[] = [];
for await (const chunk of stream) {
results.push(chunk);
}
expect(results).toHaveLength(3);
expect(results).toEqual(chunks);
});
it("handles stream with delays between chunks", async () => {
// Create a stream with small delays
const delayedStream: AsyncIterable<{ text: string }> = {
[Symbol.asyncIterator]() {
let count = 0;
return {
async next() {
if (count < 3) {
await new Promise((r) => setTimeout(r, 10)); // 10ms delay
return { done: false, value: { text: String(count++) } };
}
return { done: true, value: undefined };
},
};
},
};
const baseFn = vi.fn().mockReturnValue(delayedStream);
const wrapped = streamWithIdleTimeout(baseFn, 100); // 100ms timeout - should be enough
const model = {} as Parameters<typeof baseFn>[0];
const context = {} as Parameters<typeof baseFn>[1];
const options = {} as Parameters<typeof baseFn>[2];
const stream = wrapped(model, context, options) as AsyncIterable<{ text: string }>;
const results: { text: string }[] = [];
for await (const chunk of stream) {
results.push(chunk);
}
expect(results).toHaveLength(3);
});
it("aborts controller on idle timeout", async () => {
// Create a stream that never yields
const slowStream: AsyncIterable<unknown> = {
[Symbol.asyncIterator]() {
return {
async next() {
// Never resolves - simulates hung LLM
return new Promise<IteratorResult<unknown>>(() => {});
},
};
},
};
const baseFn = vi.fn().mockReturnValue(slowStream);
const controller = new AbortController();
const wrapped = streamWithIdleTimeout(baseFn, 50, controller); // 50ms timeout
const model = {} as Parameters<typeof baseFn>[0];
const context = {} as Parameters<typeof baseFn>[1];
const options = {} as Parameters<typeof baseFn>[2];
const stream = wrapped(model, context, options) as AsyncIterable<unknown>;
const iterator = stream[Symbol.asyncIterator]();
try {
await iterator.next();
// Should not reach here
expect.fail("Expected timeout error");
} catch (error) {
// Verify the error message is preserved
expect(error).toBeInstanceOf(Error);
expect((error as Error).message).toMatch(/LLM idle timeout/);
// Verify the controller was aborted
expect(controller.signal.aborted).toBe(true);
// Verify the abort reason is the same error
const reason = controller.signal.reason;
expect(reason).toBeInstanceOf(Error);
expect((reason as Error).message).toMatch(/LLM idle timeout/);
}
});
});

View File

@@ -0,0 +1,121 @@
import type { StreamFn } from "@mariozechner/pi-agent-core";
import { streamSimple } from "@mariozechner/pi-ai";
import type { OpenClawConfig } from "../../../config/config.js";
/**
* Default idle timeout for LLM streaming responses in milliseconds.
* If no token is received within this time, the request is aborted.
* Set to 0 to disable (never timeout).
* Default: 60 seconds.
*/
export const DEFAULT_LLM_IDLE_TIMEOUT_MS = 60_000;
/**
* Maximum safe timeout value (approximately 24.8 days).
*/
const MAX_SAFE_TIMEOUT_MS = 2_147_000_000;
/**
* Resolves the LLM idle timeout from configuration.
* @param cfg - OpenClaw configuration
* @returns Idle timeout in milliseconds, or 0 to disable
*/
export function resolveLlmIdleTimeoutMs(cfg?: OpenClawConfig): number {
const raw = cfg?.agents?.defaults?.llm?.idleTimeoutSeconds;
// 0 means disabled (no timeout)
if (raw === 0) {
return 0;
}
if (typeof raw === "number" && Number.isFinite(raw) && raw > 0) {
return Math.min(Math.floor(raw) * 1000, MAX_SAFE_TIMEOUT_MS);
}
return DEFAULT_LLM_IDLE_TIMEOUT_MS;
}
/**
* Wraps a stream function with idle timeout detection.
* If no token is received within the specified timeout, the request is aborted.
*
* @param baseFn - The base stream function to wrap
* @param timeoutMs - Idle timeout in milliseconds
* @param controller - Optional abort controller to abort on timeout
* @returns A wrapped stream function with idle timeout detection
*/
export function streamWithIdleTimeout(
baseFn: StreamFn,
timeoutMs: number,
controller?: AbortController,
): StreamFn {
return (model, context, options) => {
const maybeStream = baseFn(model, context, options);
const wrapStream = (stream: ReturnType<typeof streamSimple>) => {
const originalAsyncIterator = stream[Symbol.asyncIterator].bind(stream);
(stream as { [Symbol.asyncIterator]: typeof originalAsyncIterator })[Symbol.asyncIterator] =
function () {
const iterator = originalAsyncIterator();
let idleTimer: NodeJS.Timeout | null = null;
const createTimeoutPromise = (): Promise<never> => {
return new Promise((_, reject) => {
idleTimer = setTimeout(() => {
const error = new Error(
`LLM idle timeout (${Math.floor(timeoutMs / 1000)}s): no response from model`,
);
if (controller && !controller.signal.aborted) {
controller.abort(error);
}
reject(error);
}, timeoutMs);
});
};
const clearTimer = () => {
if (idleTimer) {
clearTimeout(idleTimer);
idleTimer = null;
}
};
return {
async next() {
clearTimer();
try {
// Race between the actual next() and the timeout
const result = await Promise.race([iterator.next(), createTimeoutPromise()]);
if (result.done) {
clearTimer();
return result;
}
clearTimer();
return result;
} catch (error) {
clearTimer();
throw error;
}
},
return() {
clearTimer();
return iterator.return?.() ?? Promise.resolve({ done: true, value: undefined });
},
throw(error?: unknown) {
clearTimer();
return iterator.throw?.(error) ?? Promise.reject(error);
},
};
};
return stream;
};
if (maybeStream && typeof maybeStream === "object" && "then" in maybeStream) {
return Promise.resolve(maybeStream).then(wrapStream);
}
return wrapStream(maybeStream);
};
}

View File

@@ -2303,6 +2303,19 @@ export const GENERATED_BASE_CONFIG_SCHEMA = {
},
additionalProperties: false,
},
llm: {
type: "object",
properties: {
idleTimeoutSeconds: {
description:
"Idle timeout for LLM streaming responses in seconds. If no token is received within this time, the request is aborted. Set to 0 to disable. Default: 60 seconds.",
type: "integer",
minimum: 0,
maximum: 9007199254740991,
},
},
additionalProperties: false,
},
compaction: {
type: "object",
properties: {

View File

@@ -171,6 +171,8 @@ export type AgentDefaultsConfig = {
cliBackends?: Record<string, CliBackendConfig>;
/** Opt-in: prune old tool results from the LLM context to reduce token usage. */
contextPruning?: AgentContextPruningConfig;
/** LLM timeout configuration. */
llm?: AgentLlmConfig;
/** Compaction tuning and pre-compaction memory flush behavior. */
compaction?: AgentCompactionConfig;
/** Embedded Pi runner hardening and compatibility controls. */
@@ -365,3 +367,16 @@ export type AgentCompactionMemoryFlushConfig = {
/** System prompt appended for the memory flush turn. */
systemPrompt?: string;
};
/**
* LLM timeout configuration.
*/
export type AgentLlmConfig = {
/**
* Idle timeout for LLM streaming responses in seconds.
* If no token is received within this time, the request is aborted.
* Set to 0 to disable (never timeout).
* Default: 60 seconds.
*/
idleTimeoutSeconds?: number;
};

View File

@@ -85,6 +85,19 @@ export const AgentDefaultsSchema = z
})
.strict()
.optional(),
llm: z
.object({
idleTimeoutSeconds: z
.number()
.int()
.nonnegative()
.optional()
.describe(
"Idle timeout for LLM streaming responses in seconds. If no token is received within this time, the request is aborted. Set to 0 to disable. Default: 60 seconds.",
),
})
.strict()
.optional(),
compaction: z
.object({
mode: z.union([z.literal("default"), z.literal("safeguard")]).optional(),