agent: preemptive context overflow detection during tool loops (#29371)

Merged via squash.

Prepared head SHA: 19661b8fb1
Co-authored-by: keshav55 <3821985+keshav55@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman
This commit is contained in:
Keshav Rao
2026-03-16 19:04:00 -07:00
committed by GitHub
parent 76500c7a78
commit 3aa4199ef0
4 changed files with 137 additions and 1 deletions

View File

@@ -3,6 +3,7 @@ import { describe, expect, it } from "vitest";
import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
import {
CONTEXT_LIMIT_TRUNCATION_NOTICE,
PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE,
PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER,
installToolResultContextGuard,
} from "./tool-result-context-guard.js";
@@ -268,4 +269,63 @@ describe("installToolResultContextGuard", () => {
expect(oldResult.details).toBeUndefined();
expect(newResult.details).toBeUndefined();
});
it("throws preemptive context overflow when context exceeds 90% after tool-result compaction", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
// contextBudgetChars = 1000 * 4 * 0.75 = 3000
// preemptiveOverflowChars = 1000 * 4 * 0.9 = 3600
contextWindowTokens: 1_000,
});
// Large user message (non-compactable) pushes context past 90% threshold.
const contextForNextCall = [makeUser("u".repeat(3_700)), makeToolResult("call_1", "small")];
await expect(
agent.transformContext?.(contextForNextCall, new AbortController().signal),
).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
});
it("does not throw when context is under 90% after tool-result compaction", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
});
// Context well under the 3600-char preemptive threshold.
const contextForNextCall = [makeUser("u".repeat(1_000)), makeToolResult("call_1", "small")];
await expect(
agent.transformContext?.(contextForNextCall, new AbortController().signal),
).resolves.not.toThrow();
});
it("compacts tool results before checking the preemptive overflow threshold", async () => {
const agent = makeGuardableAgent();
installToolResultContextGuard({
agent,
contextWindowTokens: 1_000,
});
// Large user message + large tool result. The guard should compact the tool
// result first, then check the overflow threshold. Even after compaction the
// user content alone pushes past 90%, so the overflow error fires.
const contextForNextCall = [
makeUser("u".repeat(3_700)),
makeToolResult("call_old", "x".repeat(2_000)),
];
await expect(
agent.transformContext?.(contextForNextCall, new AbortController().signal),
).rejects.toThrow(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
// Tool result should have been compacted before the overflow check.
const toolResultText = getToolResultText(contextForNextCall[1]);
expect(toolResultText).toBe(PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER);
});
});

View File

@@ -14,6 +14,9 @@ import {
// Keep a conservative input budget to absorb tokenizer variance and provider framing overhead.
const CONTEXT_INPUT_HEADROOM_RATIO = 0.75;
const SINGLE_TOOL_RESULT_CONTEXT_SHARE = 0.5;
// High-water mark: if context exceeds this ratio after tool-result compaction,
// trigger full session compaction via the existing overflow recovery cascade.
const PREEMPTIVE_OVERFLOW_RATIO = 0.9;
export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "[truncated: output exceeded context limit]";
const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`;
@@ -21,6 +24,9 @@ const CONTEXT_LIMIT_TRUNCATION_SUFFIX = `\n${CONTEXT_LIMIT_TRUNCATION_NOTICE}`;
export const PREEMPTIVE_TOOL_RESULT_COMPACTION_PLACEHOLDER =
"[compacted: tool output removed to free context]";
export const PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE =
"Preemptive context overflow: estimated context size exceeds safe threshold during tool loop";
type GuardableTransformContext = (
messages: AgentMessage[],
signal: AbortSignal,
@@ -196,6 +202,10 @@ export function installToolResultContextGuard(params: {
contextWindowTokens * TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE * SINGLE_TOOL_RESULT_CONTEXT_SHARE,
),
);
const preemptiveOverflowChars = Math.max(
contextBudgetChars,
Math.floor(contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE * PREEMPTIVE_OVERFLOW_RATIO),
);
// Agent.transformContext is private in pi-coding-agent, so access it via a
// narrow runtime view to keep callsites type-safe while preserving behavior.
@@ -214,6 +224,18 @@ export function installToolResultContextGuard(params: {
maxSingleToolResultChars,
});
// After tool-result compaction, check if context still exceeds the high-water mark.
// If it does, non-tool-result content dominates and only full LLM-based session
// compaction can reduce context size. Throwing a context overflow error triggers
// the existing overflow recovery cascade in run.ts.
const postEnforcementChars = estimateContextChars(
contextMessages,
createMessageCharEstimateCache(),
);
if (postEnforcementChars > preemptiveOverflowChars) {
throw new Error(PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE);
}
return contextMessages;
}) as GuardableTransformContext;