mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-07 07:58:36 +00:00
feat(agents): add mid-turn compaction precheck (#73499)
Co-authored-by: haoxingjun <haoxingjun@bytedance.com>
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "more characters truncated";
|
||||
|
||||
export function formatContextLimitTruncationNotice(truncatedChars: number): string {
|
||||
return `[... ${Math.max(1, Math.floor(truncatedChars))} ${CONTEXT_LIMIT_TRUNCATION_NOTICE}]`;
|
||||
}
|
||||
@@ -2,6 +2,7 @@ import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../../../config/types.js";
|
||||
import { buildMemorySystemPromptAddition } from "../../../context-engine/delegate.js";
|
||||
import {
|
||||
clearMemoryPluginState,
|
||||
@@ -29,6 +30,7 @@ import {
|
||||
buildEmbeddedSubscriptionParams,
|
||||
cleanupEmbeddedAttemptResources,
|
||||
} from "./attempt.subscription-cleanup.js";
|
||||
import type { MidTurnPrecheckRequest } from "./midturn-precheck.js";
|
||||
|
||||
const hoisted = getHoisted();
|
||||
const embeddedSessionId = "embedded-session";
|
||||
@@ -37,6 +39,11 @@ const seedMessage = { role: "user", content: "seed", timestamp: 1 } as AgentMess
|
||||
const doneMessage = { role: "assistant", content: "done", timestamp: 2 } as unknown as AgentMessage;
|
||||
type AfterTurnPromptCacheCall = { runtimeContext?: { promptCache?: Record<string, unknown> } };
|
||||
type TrajectoryEvent = { type?: string; data?: Record<string, unknown> };
|
||||
type ToolResultGuardInstallParams = {
|
||||
midTurnPrecheck?: {
|
||||
onMidTurnPrecheck?: (request: MidTurnPrecheckRequest) => void;
|
||||
};
|
||||
};
|
||||
|
||||
function createTestContextEngine(params: Partial<AttemptContextEngine>): AttemptContextEngine {
|
||||
return {
|
||||
@@ -770,3 +777,95 @@ describe("runEmbeddedAttempt context engine sessionKey forwarding", () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("runEmbeddedAttempt context engine mid-turn precheck integration", () => {
|
||||
const sessionKey = "agent:main:guildchat:channel:midturn-precheck";
|
||||
const tempPaths: string[] = [];
|
||||
|
||||
beforeEach(() => {
|
||||
resetEmbeddedAttemptHarness();
|
||||
clearMemoryPluginState();
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await cleanupTempPaths(tempPaths);
|
||||
clearMemoryPluginState();
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
it("keeps mid-turn precheck out of the context-engine-owned compaction hook", async () => {
|
||||
await createContextEngineAttemptRunner({
|
||||
contextEngine: {
|
||||
...createContextEngineBootstrapAndAssemble(),
|
||||
info: { ownsCompaction: true },
|
||||
},
|
||||
sessionKey,
|
||||
tempPaths,
|
||||
attemptOverrides: {
|
||||
config: {
|
||||
agents: {
|
||||
defaults: {
|
||||
compaction: {
|
||||
mode: "safeguard",
|
||||
midTurnPrecheck: { enabled: true },
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
},
|
||||
});
|
||||
|
||||
expect(hoisted.installContextEngineLoopHookMock).toHaveBeenCalledWith(
|
||||
expect.not.objectContaining({ midTurnPrecheck: expect.anything() }),
|
||||
);
|
||||
});
|
||||
|
||||
it("recovers when Pi persists the mid-turn precheck as an assistant error", async () => {
|
||||
hoisted.installToolResultContextGuardMock.mockImplementation((...args: unknown[]) => {
|
||||
const params = args[0] as ToolResultGuardInstallParams;
|
||||
params.midTurnPrecheck?.onMidTurnPrecheck?.({
|
||||
route: "compact_only",
|
||||
estimatedPromptTokens: 9000,
|
||||
promptBudgetBeforeReserve: 7000,
|
||||
overflowTokens: 2000,
|
||||
toolResultReducibleChars: 0,
|
||||
effectiveReserveTokens: 1000,
|
||||
});
|
||||
return () => {};
|
||||
});
|
||||
|
||||
const syntheticPiError = {
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "" }],
|
||||
stopReason: "error",
|
||||
errorMessage: "Context overflow: prompt too large for the model (mid-turn precheck).",
|
||||
timestamp: 3,
|
||||
} as unknown as AgentMessage;
|
||||
|
||||
const result = await createContextEngineAttemptRunner({
|
||||
contextEngine: createContextEngineBootstrapAndAssemble(),
|
||||
sessionKey,
|
||||
tempPaths,
|
||||
attemptOverrides: {
|
||||
config: {
|
||||
agents: {
|
||||
defaults: {
|
||||
compaction: {
|
||||
mode: "safeguard",
|
||||
midTurnPrecheck: { enabled: true },
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig,
|
||||
},
|
||||
sessionMessages: [seedMessage],
|
||||
sessionPrompt: async (session) => {
|
||||
session.messages = [...session.messages, syntheticPiError];
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.promptErrorSource).toBe("precheck");
|
||||
expect(result.preflightRecovery).toEqual({ route: "compact_only" });
|
||||
expect(result.messagesSnapshot).toEqual([seedMessage]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -63,6 +63,7 @@ type AttemptSpawnWorkspaceHoisted = {
|
||||
subscribeEmbeddedPiSessionMock: Mock<SubscribeEmbeddedPiSessionFn>;
|
||||
acquireSessionWriteLockMock: Mock<AcquireSessionWriteLockFn>;
|
||||
installToolResultContextGuardMock: UnknownMock;
|
||||
installContextEngineLoopHookMock: UnknownMock;
|
||||
flushPendingToolResultsAfterIdleMock: AsyncUnknownMock;
|
||||
releaseWsSessionMock: UnknownMock;
|
||||
resolveBootstrapContextForRunMock: Mock<() => Promise<BootstrapContext>>;
|
||||
@@ -117,6 +118,7 @@ const hoisted = vi.hoisted((): AttemptSpawnWorkspaceHoisted => {
|
||||
const ensureGlobalUndiciStreamTimeoutsMock = vi.fn();
|
||||
const buildEmbeddedMessageActionDiscoveryInputMock = vi.fn((params: unknown) => params);
|
||||
const installToolResultContextGuardMock = vi.fn(() => () => {});
|
||||
const installContextEngineLoopHookMock = vi.fn(() => () => {});
|
||||
const flushPendingToolResultsAfterIdleMock = vi.fn(async () => {});
|
||||
const releaseWsSessionMock = vi.fn(() => {});
|
||||
const subscribeEmbeddedPiSessionMock = vi.fn<SubscribeEmbeddedPiSessionFn>(() =>
|
||||
@@ -166,6 +168,7 @@ const hoisted = vi.hoisted((): AttemptSpawnWorkspaceHoisted => {
|
||||
subscribeEmbeddedPiSessionMock,
|
||||
acquireSessionWriteLockMock,
|
||||
installToolResultContextGuardMock,
|
||||
installContextEngineLoopHookMock,
|
||||
flushPendingToolResultsAfterIdleMock,
|
||||
releaseWsSessionMock,
|
||||
resolveBootstrapContextForRunMock,
|
||||
@@ -218,7 +221,7 @@ vi.mock("../../sandbox.js", () => ({
|
||||
}));
|
||||
|
||||
vi.mock("../../session-tool-result-guard-wrapper.js", () => ({
|
||||
guardSessionManager: () => hoisted.sessionManager,
|
||||
guardSessionManager: (sessionManager: unknown) => sessionManager,
|
||||
}));
|
||||
|
||||
vi.mock("../../pi-embedded-subscribe.js", () => ({
|
||||
@@ -355,6 +358,8 @@ vi.mock("../tool-result-context-guard.js", async () => {
|
||||
`[... ${Math.max(1, Math.floor(truncatedChars))} more characters truncated]`,
|
||||
installToolResultContextGuard: (...args: unknown[]) =>
|
||||
(hoisted.installToolResultContextGuardMock as (...args: unknown[]) => unknown)(...args),
|
||||
installContextEngineLoopHook: (...args: unknown[]) =>
|
||||
(hoisted.installContextEngineLoopHookMock as (...args: unknown[]) => unknown)(...args),
|
||||
};
|
||||
});
|
||||
|
||||
@@ -750,6 +755,7 @@ export function resetEmbeddedAttemptHarness(
|
||||
release: async () => {},
|
||||
});
|
||||
hoisted.installToolResultContextGuardMock.mockReset().mockReturnValue(() => {});
|
||||
hoisted.installContextEngineLoopHookMock.mockReset().mockReturnValue(() => {});
|
||||
hoisted.flushPendingToolResultsAfterIdleMock.mockReset().mockResolvedValue(undefined);
|
||||
hoisted.releaseWsSessionMock.mockReset().mockReturnValue(undefined);
|
||||
hoisted.resolveBootstrapContextForRunMock.mockReset().mockResolvedValue({
|
||||
|
||||
@@ -318,6 +318,11 @@ import { detectAndLoadPromptImages } from "./images.js";
|
||||
import { buildAttemptReplayMetadata } from "./incomplete-turn.js";
|
||||
import { resolveLlmIdleTimeoutMs, streamWithIdleTimeout } from "./llm-idle-timeout.js";
|
||||
import { resolveMessageMergeStrategy } from "./message-merge-strategy.js";
|
||||
import {
|
||||
MID_TURN_PRECHECK_ERROR_MESSAGE,
|
||||
isMidTurnPrecheckSignal,
|
||||
type MidTurnPrecheckRequest,
|
||||
} from "./midturn-precheck.js";
|
||||
import {
|
||||
PREEMPTIVE_OVERFLOW_ERROR_TEXT,
|
||||
shouldPreemptivelyCompactBeforePrompt,
|
||||
@@ -494,6 +499,57 @@ export function normalizeMessagesForLlmBoundary(messages: AgentMessage[]): Agent
|
||||
return stripRuntimeContextCustomMessages(normalized);
|
||||
}
|
||||
|
||||
function isMidTurnPrecheckAssistantError(message: AgentMessage | undefined): boolean {
|
||||
if (!message || message.role !== "assistant") {
|
||||
return false;
|
||||
}
|
||||
const record = message as unknown as { stopReason?: unknown; errorMessage?: unknown };
|
||||
return record.stopReason === "error" && record.errorMessage === MID_TURN_PRECHECK_ERROR_MESSAGE;
|
||||
}
|
||||
|
||||
function removeTrailingMidTurnPrecheckAssistantError(params: {
|
||||
activeSession: { agent: { state: { messages: AgentMessage[] } } };
|
||||
sessionManager: ReturnType<typeof guardSessionManager>;
|
||||
}): void {
|
||||
const messages = params.activeSession.agent.state.messages;
|
||||
if (isMidTurnPrecheckAssistantError(messages.at(-1))) {
|
||||
params.activeSession.agent.state.messages = messages.slice(0, -1);
|
||||
}
|
||||
|
||||
const mutableSessionManager = params.sessionManager as unknown as {
|
||||
fileEntries?: Array<{
|
||||
type?: string;
|
||||
id?: string;
|
||||
parentId?: string | null;
|
||||
message?: AgentMessage;
|
||||
}>;
|
||||
byId?: Map<string, unknown>;
|
||||
leafId?: string | null;
|
||||
_rewriteFile?: () => void;
|
||||
};
|
||||
const lastEntry = mutableSessionManager.fileEntries?.at(-1);
|
||||
if (lastEntry?.type !== "message" || !isMidTurnPrecheckAssistantError(lastEntry.message)) {
|
||||
if (isMidTurnPrecheckAssistantError(params.activeSession.agent.state.messages.at(-1))) {
|
||||
log.warn(
|
||||
"[context-overflow-midturn-precheck] removed synthetic assistant error from active session but could not locate matching persisted SessionManager entry",
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (typeof mutableSessionManager._rewriteFile !== "function") {
|
||||
log.warn(
|
||||
"[context-overflow-midturn-precheck] removed synthetic assistant error from active session but SessionManager rewrite hook is unavailable",
|
||||
);
|
||||
return;
|
||||
}
|
||||
mutableSessionManager.fileEntries?.pop();
|
||||
if (lastEntry.id) {
|
||||
mutableSessionManager.byId?.delete(lastEntry.id);
|
||||
}
|
||||
mutableSessionManager.leafId = lastEntry.parentId ?? null;
|
||||
mutableSessionManager._rewriteFile();
|
||||
}
|
||||
|
||||
export function shouldCreateBundleMcpRuntimeForAttempt(params: {
|
||||
toolsEnabled: boolean;
|
||||
disableTools?: boolean;
|
||||
@@ -1470,6 +1526,21 @@ export async function runEmbeddedAttempt(
|
||||
queueYieldInterruptForSession = () => {
|
||||
queueSessionsYieldInterruptMessage(activeSession);
|
||||
};
|
||||
const contextTokenBudgetForGuard = Math.max(
|
||||
1,
|
||||
Math.floor(params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS),
|
||||
);
|
||||
const toolResultMaxCharsForGuard = resolveLiveToolResultMaxChars({
|
||||
contextWindowTokens: contextTokenBudgetForGuard,
|
||||
cfg: params.config,
|
||||
agentId: sessionAgentId,
|
||||
});
|
||||
const midTurnPrecheckEnabled =
|
||||
params.config?.agents?.defaults?.compaction?.midTurnPrecheck?.enabled === true;
|
||||
let pendingMidTurnPrecheckRequest: MidTurnPrecheckRequest | null = null;
|
||||
const onMidTurnPrecheck = (request: MidTurnPrecheckRequest) => {
|
||||
pendingMidTurnPrecheckRequest = request;
|
||||
};
|
||||
if (!activeContextEngine || activeContextEngine.info.ownsCompaction !== true) {
|
||||
removeToolResultContextGuard = installToolResultContextGuard({
|
||||
agent: activeSession.agent,
|
||||
@@ -1479,6 +1550,19 @@ export async function runEmbeddedAttempt(
|
||||
params.model.contextWindow ?? params.model.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
|
||||
),
|
||||
),
|
||||
...(midTurnPrecheckEnabled
|
||||
? {
|
||||
midTurnPrecheck: {
|
||||
enabled: true,
|
||||
contextTokenBudget: contextTokenBudgetForGuard,
|
||||
reserveTokens: () => settingsManager.getCompactionReserveTokens(),
|
||||
toolResultMaxChars: toolResultMaxCharsForGuard,
|
||||
getSystemPrompt: () => systemPromptText,
|
||||
getPrePromptMessageCount: () => prePromptMessageCount,
|
||||
onMidTurnPrecheck,
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
});
|
||||
} else {
|
||||
removeToolResultContextGuard = installContextEngineLoopHook({
|
||||
@@ -2271,8 +2355,67 @@ export async function runEmbeddedAttempt(
|
||||
// Hook runner was already obtained earlier before tool creation
|
||||
const hookAgentId = sessionAgentId;
|
||||
|
||||
const activeSessionManager = sessionManager;
|
||||
let preflightRecovery: EmbeddedRunAttemptResult["preflightRecovery"];
|
||||
let promptErrorSource: "prompt" | "compaction" | "precheck" | null = null;
|
||||
const handleMidTurnPrecheckRequest = (request: MidTurnPrecheckRequest) => {
|
||||
const logMidTurnPrecheck = (route: string, extra?: string) => {
|
||||
log.warn(
|
||||
`[context-overflow-midturn-precheck] sessionKey=${params.sessionKey ?? params.sessionId} ` +
|
||||
`provider=${params.provider}/${params.modelId} route=${route} ` +
|
||||
`estimatedPromptTokens=${request.estimatedPromptTokens} ` +
|
||||
`promptBudgetBeforeReserve=${request.promptBudgetBeforeReserve} ` +
|
||||
`overflowTokens=${request.overflowTokens} ` +
|
||||
`toolResultReducibleChars=${request.toolResultReducibleChars} ` +
|
||||
`effectiveReserveTokens=${request.effectiveReserveTokens} ` +
|
||||
`prePromptMessageCount=${prePromptMessageCount} ` +
|
||||
(extra ? `${extra} ` : "") +
|
||||
`sessionFile=${params.sessionFile}`,
|
||||
);
|
||||
};
|
||||
if (request.route === "truncate_tool_results_only") {
|
||||
const contextTokenBudget = params.contextTokenBudget ?? DEFAULT_CONTEXT_TOKENS;
|
||||
const toolResultMaxChars = resolveLiveToolResultMaxChars({
|
||||
contextWindowTokens: contextTokenBudget,
|
||||
cfg: params.config,
|
||||
agentId: sessionAgentId,
|
||||
});
|
||||
const truncationResult = truncateOversizedToolResultsInSessionManager({
|
||||
sessionManager: activeSessionManager,
|
||||
contextWindowTokens: contextTokenBudget,
|
||||
maxCharsOverride: toolResultMaxChars,
|
||||
sessionFile: params.sessionFile,
|
||||
sessionId: params.sessionId,
|
||||
sessionKey: params.sessionKey,
|
||||
});
|
||||
if (truncationResult.truncated) {
|
||||
preflightRecovery = {
|
||||
route: "truncate_tool_results_only",
|
||||
handled: true,
|
||||
truncatedCount: truncationResult.truncatedCount,
|
||||
};
|
||||
const sessionContext = activeSessionManager.buildSessionContext();
|
||||
activeSession.agent.state.messages = sessionContext.messages;
|
||||
logMidTurnPrecheck(
|
||||
request.route,
|
||||
`handled=true truncatedCount=${truncationResult.truncatedCount}`,
|
||||
);
|
||||
} else {
|
||||
preflightRecovery = { route: "compact_only" };
|
||||
promptError = new Error(PREEMPTIVE_OVERFLOW_ERROR_TEXT);
|
||||
promptErrorSource = "precheck";
|
||||
logMidTurnPrecheck(
|
||||
"compact_only",
|
||||
`truncateFallbackReason=${truncationResult.reason ?? "unknown"}`,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
preflightRecovery = { route: request.route };
|
||||
promptError = new Error(PREEMPTIVE_OVERFLOW_ERROR_TEXT);
|
||||
promptErrorSource = "precheck";
|
||||
logMidTurnPrecheck(request.route);
|
||||
}
|
||||
};
|
||||
let skipPromptSubmission = false;
|
||||
try {
|
||||
const promptStartedAt = Date.now();
|
||||
@@ -2782,6 +2925,8 @@ export async function runEmbeddedAttempt(
|
||||
if (yieldMessage) {
|
||||
await persistSessionsYieldContextMessage(activeSession, yieldMessage);
|
||||
}
|
||||
} else if (isMidTurnPrecheckSignal(err)) {
|
||||
handleMidTurnPrecheckRequest(err.request);
|
||||
} else {
|
||||
promptError = err;
|
||||
promptErrorSource = "prompt";
|
||||
@@ -2792,6 +2937,20 @@ export async function runEmbeddedAttempt(
|
||||
);
|
||||
}
|
||||
|
||||
if (pendingMidTurnPrecheckRequest) {
|
||||
const request = pendingMidTurnPrecheckRequest;
|
||||
pendingMidTurnPrecheckRequest = null;
|
||||
removeTrailingMidTurnPrecheckAssistantError({
|
||||
activeSession,
|
||||
sessionManager,
|
||||
});
|
||||
if (!preflightRecovery && promptErrorSource !== "precheck") {
|
||||
promptError = null;
|
||||
promptErrorSource = null;
|
||||
handleMidTurnPrecheckRequest(request);
|
||||
}
|
||||
}
|
||||
|
||||
// Capture snapshot before compaction wait so we have complete messages if timeout occurs
|
||||
// Check compaction state before and after to avoid race condition where compaction starts during capture
|
||||
// Use session state (not subscription) for snapshot decisions - need instantaneous compaction status
|
||||
|
||||
27
src/agents/pi-embedded-runner/run/midturn-precheck.ts
Normal file
27
src/agents/pi-embedded-runner/run/midturn-precheck.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
import type { PreemptiveCompactionRoute } from "./preemptive-compaction.types.js";
|
||||
|
||||
export type MidTurnPrecheckRequest = {
|
||||
route: Exclude<PreemptiveCompactionRoute, "fits">;
|
||||
estimatedPromptTokens: number;
|
||||
promptBudgetBeforeReserve: number;
|
||||
overflowTokens: number;
|
||||
toolResultReducibleChars: number;
|
||||
effectiveReserveTokens: number;
|
||||
};
|
||||
|
||||
export const MID_TURN_PRECHECK_ERROR_MESSAGE =
|
||||
"Context overflow: prompt too large for the model (mid-turn precheck).";
|
||||
|
||||
export class MidTurnPrecheckSignal extends Error {
|
||||
readonly request: MidTurnPrecheckRequest;
|
||||
|
||||
constructor(request: MidTurnPrecheckRequest) {
|
||||
super(MID_TURN_PRECHECK_ERROR_MESSAGE);
|
||||
this.name = "MidTurnPrecheckSignal";
|
||||
this.request = request;
|
||||
}
|
||||
}
|
||||
|
||||
export function isMidTurnPrecheckSignal(error: unknown): error is MidTurnPrecheckSignal {
|
||||
return error instanceof MidTurnPrecheckSignal;
|
||||
}
|
||||
@@ -2,6 +2,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import type { ContextEngine } from "../../context-engine/types.js";
|
||||
import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
|
||||
import { MidTurnPrecheckSignal } from "./run/midturn-precheck.js";
|
||||
import {
|
||||
CONTEXT_LIMIT_TRUNCATION_NOTICE,
|
||||
formatContextLimitTruncationNotice,
|
||||
@@ -104,6 +105,36 @@ async function applyGuardToContext(
|
||||
return await agent.transformContext?.(contextForNextCall, new AbortController().signal);
|
||||
}
|
||||
|
||||
async function applyMidTurnPrecheckGuardToContext(
|
||||
agent: { transformContext?: (messages: AgentMessage[], signal: AbortSignal) => unknown },
|
||||
contextForNextCall: AgentMessage[],
|
||||
options: {
|
||||
contextWindowTokens?: number;
|
||||
contextTokenBudget?: number;
|
||||
reserveTokens?: number;
|
||||
toolResultMaxChars?: number;
|
||||
prePromptMessageCount?: number;
|
||||
systemPrompt?: string;
|
||||
} = {},
|
||||
) {
|
||||
const contextWindowTokens = options.contextWindowTokens ?? options.contextTokenBudget ?? 20_000;
|
||||
installToolResultContextGuard({
|
||||
agent,
|
||||
contextWindowTokens,
|
||||
midTurnPrecheck: {
|
||||
enabled: true,
|
||||
contextTokenBudget: options.contextTokenBudget ?? contextWindowTokens,
|
||||
reserveTokens: () => options.reserveTokens ?? 10_000,
|
||||
toolResultMaxChars: options.toolResultMaxChars,
|
||||
getSystemPrompt: () => options.systemPrompt,
|
||||
...(options.prePromptMessageCount !== undefined
|
||||
? { getPrePromptMessageCount: () => options.prePromptMessageCount as number }
|
||||
: {}),
|
||||
},
|
||||
});
|
||||
return await agent.transformContext?.(contextForNextCall, new AbortController().signal);
|
||||
}
|
||||
|
||||
function expectPiStyleTruncation(text: string): void {
|
||||
expect(text).toContain(CONTEXT_LIMIT_TRUNCATION_NOTICE);
|
||||
expect(text).toMatch(/\[\.\.\. \d+ more characters truncated\]$/);
|
||||
@@ -249,6 +280,66 @@ describe("installToolResultContextGuard", () => {
|
||||
|
||||
expectPiStyleTruncation(getToolResultText(transformed[0]));
|
||||
});
|
||||
|
||||
it("raises a structured mid-turn precheck signal after a new tool result overflows", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
const contextForNextCall = [
|
||||
makeUser("prompt already in history"),
|
||||
makeToolResult("call_big", "x".repeat(80_000)),
|
||||
];
|
||||
|
||||
await expect(
|
||||
applyMidTurnPrecheckGuardToContext(agent, contextForNextCall, {
|
||||
contextWindowTokens: 200_000,
|
||||
contextTokenBudget: 20_000,
|
||||
reserveTokens: 12_000,
|
||||
toolResultMaxChars: 16_000,
|
||||
prePromptMessageCount: 1,
|
||||
}),
|
||||
).rejects.toMatchObject({
|
||||
name: "MidTurnPrecheckSignal",
|
||||
request: expect.objectContaining({
|
||||
route: "compact_then_truncate",
|
||||
overflowTokens: expect.any(Number),
|
||||
toolResultReducibleChars: expect.any(Number),
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
it("does not run mid-turn precheck when no new tool result was appended", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
const contextForNextCall = [makeUser("u".repeat(80_000))];
|
||||
|
||||
const transformed = await applyMidTurnPrecheckGuardToContext(agent, contextForNextCall, {
|
||||
contextWindowTokens: 200_000,
|
||||
contextTokenBudget: 20_000,
|
||||
reserveTokens: 12_000,
|
||||
prePromptMessageCount: 0,
|
||||
});
|
||||
|
||||
expect(transformed).toBe(contextForNextCall);
|
||||
});
|
||||
|
||||
it("uses compact_only route when mid-turn overflow is not reducible by tool truncation", async () => {
|
||||
const agent = makeGuardableAgent();
|
||||
const contextForNextCall = [
|
||||
makeUser("u".repeat(80_000)),
|
||||
makeToolResult("call_small", "small output"),
|
||||
];
|
||||
|
||||
try {
|
||||
await applyMidTurnPrecheckGuardToContext(agent, contextForNextCall, {
|
||||
contextWindowTokens: 200_000,
|
||||
contextTokenBudget: 20_000,
|
||||
reserveTokens: 12_000,
|
||||
prePromptMessageCount: 1,
|
||||
});
|
||||
throw new Error("expected mid-turn precheck signal");
|
||||
} catch (err) {
|
||||
expect(err).toBeInstanceOf(MidTurnPrecheckSignal);
|
||||
expect((err as MidTurnPrecheckSignal).request.route).toBe("compact_only");
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
type MockedEngine = ContextEngine & {
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import type { ContextEngine, ContextEngineRuntimeContext } from "../../context-engine/types.js";
|
||||
import {
|
||||
CONTEXT_LIMIT_TRUNCATION_NOTICE,
|
||||
formatContextLimitTruncationNotice,
|
||||
} from "./context-truncation-notice.js";
|
||||
import { log } from "./logger.js";
|
||||
import { MidTurnPrecheckSignal, type MidTurnPrecheckRequest } from "./run/midturn-precheck.js";
|
||||
import { shouldPreemptivelyCompactBeforePrompt } from "./run/preemptive-compaction.js";
|
||||
import {
|
||||
CHARS_PER_TOKEN_ESTIMATE,
|
||||
TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE,
|
||||
@@ -15,7 +22,6 @@ import {
|
||||
const SINGLE_TOOL_RESULT_CONTEXT_SHARE = 0.5;
|
||||
const PREEMPTIVE_OVERFLOW_RATIO = 0.9;
|
||||
|
||||
export const CONTEXT_LIMIT_TRUNCATION_NOTICE = "more characters truncated";
|
||||
export const PREEMPTIVE_CONTEXT_OVERFLOW_MESSAGE =
|
||||
"Context overflow: estimated context size exceeds safe threshold during tool loop.";
|
||||
const TOOL_RESULT_ESTIMATE_TO_TEXT_RATIO = 4 / TOOL_RESULT_CHARS_PER_TOKEN_ESTIMATE;
|
||||
@@ -31,9 +37,17 @@ type GuardableAgentRecord = {
|
||||
transformContext?: GuardableTransformContext;
|
||||
};
|
||||
|
||||
export function formatContextLimitTruncationNotice(truncatedChars: number): string {
|
||||
return `[... ${Math.max(1, Math.floor(truncatedChars))} ${CONTEXT_LIMIT_TRUNCATION_NOTICE}]`;
|
||||
}
|
||||
type MidTurnPrecheckOptions = {
|
||||
enabled?: boolean;
|
||||
contextTokenBudget: number;
|
||||
reserveTokens: () => number;
|
||||
toolResultMaxChars?: number;
|
||||
getSystemPrompt?: () => string | undefined;
|
||||
getPrePromptMessageCount?: () => number;
|
||||
onMidTurnPrecheck?: (request: MidTurnPrecheckRequest) => void;
|
||||
};
|
||||
|
||||
export { CONTEXT_LIMIT_TRUNCATION_NOTICE, formatContextLimitTruncationNotice };
|
||||
|
||||
function truncateTextToBudget(text: string, maxChars: number): string {
|
||||
if (text.length <= maxChars) {
|
||||
@@ -184,6 +198,34 @@ function enforceToolResultLimitInPlace(params: {
|
||||
}
|
||||
}
|
||||
|
||||
function hasNewToolResultAfterFence(params: {
|
||||
messages: AgentMessage[];
|
||||
prePromptMessageCount: number;
|
||||
}): boolean {
|
||||
for (const message of params.messages.slice(params.prePromptMessageCount)) {
|
||||
if (isToolResultMessage(message)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function toMidTurnPrecheckRequest(
|
||||
result: ReturnType<typeof shouldPreemptivelyCompactBeforePrompt>,
|
||||
): MidTurnPrecheckRequest | null {
|
||||
if (result.route === "fits") {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
route: result.route,
|
||||
estimatedPromptTokens: result.estimatedPromptTokens,
|
||||
promptBudgetBeforeReserve: result.promptBudgetBeforeReserve,
|
||||
overflowTokens: result.overflowTokens,
|
||||
toolResultReducibleChars: result.toolResultReducibleChars,
|
||||
effectiveReserveTokens: result.effectiveReserveTokens,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-iteration `afterTurn` + `assemble` wrapper for sessions where
|
||||
* the context engine owns compaction. Lets the engine compact inside
|
||||
@@ -231,7 +273,6 @@ export function installContextEngineLoopHook(params: {
|
||||
if (!hasNewMessages) {
|
||||
return lastAssembledView ?? sourceMessages;
|
||||
}
|
||||
|
||||
try {
|
||||
if (typeof contextEngine.afterTurn === "function") {
|
||||
await contextEngine.afterTurn({
|
||||
@@ -295,6 +336,7 @@ export function installContextEngineLoopHook(params: {
|
||||
export function installToolResultContextGuard(params: {
|
||||
agent: GuardableAgent;
|
||||
contextWindowTokens: number;
|
||||
midTurnPrecheck?: MidTurnPrecheckOptions;
|
||||
}): () => void {
|
||||
const contextWindowTokens = Math.max(1, Math.floor(params.contextWindowTokens));
|
||||
const maxContextChars = Math.max(
|
||||
@@ -312,6 +354,7 @@ export function installToolResultContextGuard(params: {
|
||||
// narrow runtime view to keep callsites type-safe while preserving behavior.
|
||||
const mutableAgent = params.agent as GuardableAgentRecord;
|
||||
const originalTransformContext = mutableAgent.transformContext;
|
||||
let lastSeenLength: number | null = null;
|
||||
|
||||
mutableAgent.transformContext = (async (messages: AgentMessage[], signal: AbortSignal) => {
|
||||
const transformed = originalTransformContext
|
||||
@@ -331,6 +374,50 @@ export function installToolResultContextGuard(params: {
|
||||
maxSingleToolResultChars,
|
||||
});
|
||||
}
|
||||
if (params.midTurnPrecheck?.enabled) {
|
||||
const prePromptMessageCount = Math.max(
|
||||
0,
|
||||
Math.min(
|
||||
contextMessages.length,
|
||||
lastSeenLength ??
|
||||
params.midTurnPrecheck.getPrePromptMessageCount?.() ??
|
||||
contextMessages.length,
|
||||
),
|
||||
);
|
||||
lastSeenLength = prePromptMessageCount;
|
||||
if (
|
||||
hasNewToolResultAfterFence({
|
||||
messages: contextMessages,
|
||||
prePromptMessageCount,
|
||||
})
|
||||
) {
|
||||
// Use the same post-truncation view Pi will send to the next model call.
|
||||
// Recovery re-applies truncation to the persisted session manager, so
|
||||
// this precheck is only a routing signal, not the source of truth.
|
||||
const precheck = shouldPreemptivelyCompactBeforePrompt({
|
||||
messages: contextMessages,
|
||||
systemPrompt: params.midTurnPrecheck.getSystemPrompt?.(),
|
||||
// During a tool loop, the active user prompt is already part of messages.
|
||||
prompt: "",
|
||||
contextTokenBudget: params.midTurnPrecheck.contextTokenBudget,
|
||||
reserveTokens: params.midTurnPrecheck.reserveTokens(),
|
||||
toolResultMaxChars: params.midTurnPrecheck.toolResultMaxChars,
|
||||
});
|
||||
const request = toMidTurnPrecheckRequest(precheck);
|
||||
log.debug(
|
||||
`[context-overflow-midturn-precheck] tool-result-guard check route=${precheck.route} ` +
|
||||
`messages=${contextMessages.length} prePromptMessageCount=${prePromptMessageCount} ` +
|
||||
`estimatedPromptTokens=${precheck.estimatedPromptTokens} ` +
|
||||
`promptBudgetBeforeReserve=${precheck.promptBudgetBeforeReserve} ` +
|
||||
`overflowTokens=${precheck.overflowTokens}`,
|
||||
);
|
||||
if (request) {
|
||||
params.midTurnPrecheck.onMidTurnPrecheck?.(request);
|
||||
throw new MidTurnPrecheckSignal(request);
|
||||
}
|
||||
}
|
||||
lastSeenLength = contextMessages.length;
|
||||
}
|
||||
if (
|
||||
exceedsPreemptiveOverflowThreshold({
|
||||
messages: contextMessages,
|
||||
|
||||
@@ -7,8 +7,8 @@ import { emitSessionTranscriptUpdate } from "../../sessions/transcript-events.js
|
||||
import { normalizeLowercaseStringOrEmpty } from "../../shared/string-coerce.js";
|
||||
import { resolveAgentContextLimits } from "../agent-scope.js";
|
||||
import { acquireSessionWriteLock } from "../session-write-lock.js";
|
||||
import { formatContextLimitTruncationNotice } from "./context-truncation-notice.js";
|
||||
import { log } from "./logger.js";
|
||||
import { formatContextLimitTruncationNotice } from "./tool-result-context-guard.js";
|
||||
import { rewriteTranscriptEntriesInSessionManager } from "./transcript-rewrite.js";
|
||||
|
||||
/**
|
||||
|
||||
@@ -12,7 +12,7 @@ import type {
|
||||
} from "../plugins/types.js";
|
||||
import { emitSessionTranscriptUpdate } from "../sessions/transcript-events.js";
|
||||
import { normalizeOptionalString } from "../shared/string-coerce.js";
|
||||
import { formatContextLimitTruncationNotice } from "./pi-embedded-runner/tool-result-context-guard.js";
|
||||
import { formatContextLimitTruncationNotice } from "./pi-embedded-runner/context-truncation-notice.js";
|
||||
import {
|
||||
DEFAULT_MAX_LIVE_TOOL_RESULT_CHARS,
|
||||
truncateToolResultMessage,
|
||||
|
||||
@@ -26,6 +26,9 @@ describe("config compaction settings", () => {
|
||||
enabled: true,
|
||||
maxRetries: 2,
|
||||
},
|
||||
midTurnPrecheck: {
|
||||
enabled: true,
|
||||
},
|
||||
memoryFlush: {
|
||||
enabled: false,
|
||||
model: "ollama/qwen3:8b",
|
||||
@@ -44,6 +47,7 @@ describe("config compaction settings", () => {
|
||||
expect(compaction?.identifierInstructions).toBe("Keep ticket IDs unchanged.");
|
||||
expect(compaction?.qualityGuard?.enabled).toBe(true);
|
||||
expect(compaction?.qualityGuard?.maxRetries).toBe(2);
|
||||
expect(compaction?.midTurnPrecheck?.enabled).toBe(true);
|
||||
expect(compaction?.memoryFlush?.enabled).toBe(false);
|
||||
expect(compaction?.memoryFlush?.model).toBe("ollama/qwen3:8b");
|
||||
expect(compaction?.memoryFlush?.softThresholdTokens).toBe(1234);
|
||||
|
||||
@@ -5025,6 +5025,21 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
description:
|
||||
"Quality-audit retry settings for safeguard compaction summaries. Safeguard mode enables this by default; set enabled: false to skip summary audits and regeneration.",
|
||||
},
|
||||
midTurnPrecheck: {
|
||||
type: "object",
|
||||
properties: {
|
||||
enabled: {
|
||||
type: "boolean",
|
||||
title: "Compaction Mid-turn Precheck Enabled",
|
||||
description:
|
||||
"Enable structured mid-turn context pressure checks for Pi tool loops. Default: false. Keep disabled unless long tool-heavy sessions hit context overflow before normal turn-end compaction can run.",
|
||||
},
|
||||
},
|
||||
additionalProperties: false,
|
||||
title: "Compaction Mid-turn Precheck",
|
||||
description:
|
||||
"Optional Pi tool-loop precheck that detects context pressure after a tool result is appended and before the next model call. When enabled, OpenClaw reuses existing precheck recovery to truncate tool results or compact before retrying.",
|
||||
},
|
||||
postIndexSync: {
|
||||
type: "string",
|
||||
enum: ["off", "async", "await"],
|
||||
@@ -27251,6 +27266,16 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
help: "Maximum number of regeneration retries after a failed safeguard summary quality audit. Use small values to bound extra latency and token cost.",
|
||||
tags: ["performance"],
|
||||
},
|
||||
"agents.defaults.compaction.midTurnPrecheck": {
|
||||
label: "Compaction Mid-turn Precheck",
|
||||
help: "Optional Pi tool-loop precheck that detects context pressure after a tool result is appended and before the next model call. When enabled, OpenClaw reuses existing precheck recovery to truncate tool results or compact before retrying.",
|
||||
tags: ["advanced"],
|
||||
},
|
||||
"agents.defaults.compaction.midTurnPrecheck.enabled": {
|
||||
label: "Compaction Mid-turn Precheck Enabled",
|
||||
help: "Enable structured mid-turn context pressure checks for Pi tool loops. Default: false. Keep disabled unless long tool-heavy sessions hit context overflow before normal turn-end compaction can run.",
|
||||
tags: ["advanced"],
|
||||
},
|
||||
"agents.defaults.compaction.postIndexSync": {
|
||||
label: "Compaction Post-Index Sync",
|
||||
help: 'Controls post-compaction session memory reindex mode: "off", "async", or "await" (default: "async"). Use "await" for strongest freshness, "async" for lower compaction latency, and "off" only when session-memory sync is handled elsewhere.',
|
||||
|
||||
@@ -401,6 +401,8 @@ const TARGET_KEYS = [
|
||||
"agents.defaults.compaction.qualityGuard",
|
||||
"agents.defaults.compaction.qualityGuard.enabled",
|
||||
"agents.defaults.compaction.qualityGuard.maxRetries",
|
||||
"agents.defaults.compaction.midTurnPrecheck",
|
||||
"agents.defaults.compaction.midTurnPrecheck.enabled",
|
||||
"agents.defaults.compaction.postCompactionSections",
|
||||
"agents.defaults.compaction.timeoutSeconds",
|
||||
"agents.defaults.compaction.model",
|
||||
@@ -821,6 +823,9 @@ describe("config help copy quality", () => {
|
||||
expect(/recent.*turn|verbatim/i.test(recentTurnsPreserve)).toBe(true);
|
||||
expect(/default:\s*3/i.test(recentTurnsPreserve)).toBe(true);
|
||||
|
||||
const midTurnPrecheck = FIELD_HELP["agents.defaults.compaction.midTurnPrecheck.enabled"];
|
||||
expect(/mid-turn|tool loop|default:\s*false/i.test(midTurnPrecheck)).toBe(true);
|
||||
|
||||
const postCompactionSections = FIELD_HELP["agents.defaults.compaction.postCompactionSections"];
|
||||
expect(/Session Startup|Red Lines/i.test(postCompactionSections)).toBe(true);
|
||||
expect(/Every Session|Safety/i.test(postCompactionSections)).toBe(true);
|
||||
|
||||
@@ -1307,6 +1307,10 @@ export const FIELD_HELP: Record<string, string> = {
|
||||
"Enables summary quality audits and regeneration retries for safeguard compaction. Default: true in safeguard mode.",
|
||||
"agents.defaults.compaction.qualityGuard.maxRetries":
|
||||
"Maximum number of regeneration retries after a failed safeguard summary quality audit. Use small values to bound extra latency and token cost.",
|
||||
"agents.defaults.compaction.midTurnPrecheck":
|
||||
"Optional Pi tool-loop precheck that detects context pressure after a tool result is appended and before the next model call. When enabled, OpenClaw reuses existing precheck recovery to truncate tool results or compact before retrying.",
|
||||
"agents.defaults.compaction.midTurnPrecheck.enabled":
|
||||
"Enable structured mid-turn context pressure checks for Pi tool loops. Default: false. Keep disabled unless long tool-heavy sessions hit context overflow before normal turn-end compaction can run.",
|
||||
"agents.defaults.compaction.postIndexSync":
|
||||
'Controls post-compaction session memory reindex mode: "off", "async", or "await" (default: "async"). Use "await" for strongest freshness, "async" for lower compaction latency, and "off" only when session-memory sync is handled elsewhere.',
|
||||
"agents.defaults.compaction.postCompactionSections":
|
||||
|
||||
@@ -608,6 +608,8 @@ export const FIELD_LABELS: Record<string, string> = {
|
||||
"agents.defaults.compaction.qualityGuard": "Compaction Quality Guard",
|
||||
"agents.defaults.compaction.qualityGuard.enabled": "Compaction Quality Guard Enabled",
|
||||
"agents.defaults.compaction.qualityGuard.maxRetries": "Compaction Quality Guard Max Retries",
|
||||
"agents.defaults.compaction.midTurnPrecheck": "Compaction Mid-turn Precheck",
|
||||
"agents.defaults.compaction.midTurnPrecheck.enabled": "Compaction Mid-turn Precheck Enabled",
|
||||
"agents.defaults.compaction.postIndexSync": "Compaction Post-Index Sync",
|
||||
"agents.defaults.compaction.postCompactionSections": "Post-Compaction Context Sections",
|
||||
"agents.defaults.compaction.timeoutSeconds": "Compaction Timeout (Seconds)",
|
||||
|
||||
@@ -432,6 +432,14 @@ export type AgentCompactionQualityGuardConfig = {
|
||||
maxRetries?: number;
|
||||
};
|
||||
|
||||
export type AgentCompactionMidTurnPrecheckConfig = {
|
||||
/**
|
||||
* Enable structured context pressure checks after tool results are appended
|
||||
* and before the next Pi model call. Default: false.
|
||||
*/
|
||||
enabled?: boolean;
|
||||
};
|
||||
|
||||
export type AgentCompactionConfig = {
|
||||
/** Compaction summarization mode. */
|
||||
mode?: AgentCompactionMode;
|
||||
@@ -453,6 +461,8 @@ export type AgentCompactionConfig = {
|
||||
identifierInstructions?: string;
|
||||
/** Optional quality-audit retries for safeguard compaction summaries. */
|
||||
qualityGuard?: AgentCompactionQualityGuardConfig;
|
||||
/** Mid-turn precheck for tool-loop context pressure. Default: disabled. */
|
||||
midTurnPrecheck?: AgentCompactionMidTurnPrecheckConfig;
|
||||
/** Post-compaction session memory index sync mode. */
|
||||
postIndexSync?: AgentCompactionPostIndexSyncMode;
|
||||
/** Pre-compaction memory flush (agentic turn). Default: enabled. */
|
||||
|
||||
@@ -103,6 +103,19 @@ describe("agent defaults schema", () => {
|
||||
expect(result.compaction?.maxActiveTranscriptBytes).toBe("20mb");
|
||||
});
|
||||
|
||||
it("accepts compaction.midTurnPrecheck.enabled", () => {
|
||||
const result = AgentDefaultsSchema.parse({
|
||||
compaction: {
|
||||
mode: "safeguard",
|
||||
midTurnPrecheck: {
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
})!;
|
||||
|
||||
expect(result.compaction?.midTurnPrecheck?.enabled).toBe(true);
|
||||
});
|
||||
|
||||
it("accepts focused contextLimits on defaults and agent entries", () => {
|
||||
const defaults = AgentDefaultsSchema.parse({
|
||||
contextLimits: {
|
||||
|
||||
@@ -182,6 +182,12 @@ export const AgentDefaultsSchema = z
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
midTurnPrecheck: z
|
||||
.object({
|
||||
enabled: z.boolean().optional(),
|
||||
})
|
||||
.strict()
|
||||
.optional(),
|
||||
postIndexSync: z.enum(["off", "async", "await"]).optional(),
|
||||
postCompactionSections: z.array(z.string()).optional(),
|
||||
model: z.string().optional(),
|
||||
|
||||
Reference in New Issue
Block a user