mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-24 23:21:30 +00:00
fix: align cache-ttl pruning with thinking replay sanitization
This commit is contained in:
@@ -10,6 +10,7 @@ import { setContextPruningRuntime } from "../pi-hooks/context-pruning/runtime.js
|
||||
import { computeEffectiveSettings } from "../pi-hooks/context-pruning/settings.js";
|
||||
import { makeToolPrunablePredicate } from "../pi-hooks/context-pruning/tools.js";
|
||||
import { ensurePiCompactionReserveTokens } from "../pi-settings.js";
|
||||
import { resolveTranscriptPolicy } from "../transcript-policy.js";
|
||||
import { isCacheTtlEligibleProvider, readLastCacheTtlTimestamp } from "./cache-ttl.js";
|
||||
|
||||
function resolveContextWindowTokens(params: {
|
||||
@@ -46,11 +47,17 @@ function buildContextPruningFactory(params: {
|
||||
if (!settings) {
|
||||
return undefined;
|
||||
}
|
||||
const transcriptPolicy = resolveTranscriptPolicy({
|
||||
modelApi: params.model?.api,
|
||||
provider: params.provider,
|
||||
modelId: params.modelId,
|
||||
});
|
||||
|
||||
setContextPruningRuntime(params.sessionManager, {
|
||||
settings,
|
||||
contextWindowTokens: resolveContextWindowTokens(params),
|
||||
isToolPrunable: makeToolPrunablePredicate(settings.tools),
|
||||
dropThinkingBlocks: transcriptPolicy.dropThinkingBlocks,
|
||||
lastCacheTouchAt: readLastCacheTtlTimestamp(params.sessionManager),
|
||||
});
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ export default function contextPruningExtension(api: ExtensionAPI): void {
|
||||
ctx,
|
||||
isToolPrunable: runtime.isToolPrunable,
|
||||
contextWindowTokensOverride: runtime.contextWindowTokens ?? undefined,
|
||||
dropThinkingBlocksForEstimate: runtime.dropThinkingBlocks,
|
||||
});
|
||||
|
||||
if (next === event.messages) {
|
||||
|
||||
@@ -194,6 +194,39 @@ describe("pruneContextMessages", () => {
|
||||
expect(textBlock.text).toContain("[Tool result trimmed:");
|
||||
});
|
||||
|
||||
it("ignores non-latest thinking signatures that will be dropped before send", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
makeUser("first"),
|
||||
makeAssistant([
|
||||
{
|
||||
type: "thinking",
|
||||
thinking: "internal",
|
||||
thinkingSignature: "S".repeat(40_000),
|
||||
} as unknown as AssistantContentBlock,
|
||||
{ type: "text", text: "older reply" },
|
||||
]),
|
||||
makeToolResult([{ type: "text", text: "X".repeat(2_000) }]),
|
||||
makeUser("latest"),
|
||||
makeAssistant([{ type: "text", text: "latest reply" }]),
|
||||
];
|
||||
|
||||
const result = pruneContextMessages({
|
||||
messages,
|
||||
settings: {
|
||||
...DEFAULT_CONTEXT_PRUNING_SETTINGS,
|
||||
keepLastAssistants: 1,
|
||||
softTrimRatio: 0.5,
|
||||
softTrim: { maxChars: 200, headChars: 100, tailChars: 50 },
|
||||
hardClear: { ...DEFAULT_CONTEXT_PRUNING_SETTINGS.hardClear, enabled: false },
|
||||
},
|
||||
ctx: { model: { contextWindow: 5_000 } } as unknown as ExtensionContext,
|
||||
isToolPrunable: () => true,
|
||||
dropThinkingBlocksForEstimate: true,
|
||||
});
|
||||
|
||||
expect(result).toBe(messages);
|
||||
});
|
||||
|
||||
it("soft-trims image-containing tool results by replacing image blocks with placeholders", () => {
|
||||
const messages: AgentMessage[] = [
|
||||
makeUser("summarize this"),
|
||||
|
||||
@@ -2,6 +2,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import type { ImageContent, TextContent, ToolResultMessage } from "@mariozechner/pi-ai";
|
||||
import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
|
||||
import { CHARS_PER_TOKEN_ESTIMATE, estimateStringChars } from "../../../utils/cjk-chars.js";
|
||||
import { dropThinkingBlocks } from "../../pi-embedded-runner/thinking.js";
|
||||
import type { EffectiveContextPruningSettings } from "./settings.js";
|
||||
import { makeToolPrunablePredicate } from "./tools.js";
|
||||
|
||||
@@ -261,6 +262,7 @@ export function pruneContextMessages(params: {
|
||||
ctx: Pick<ExtensionContext, "model">;
|
||||
isToolPrunable?: (toolName: string) => boolean;
|
||||
contextWindowTokensOverride?: number;
|
||||
dropThinkingBlocksForEstimate?: boolean;
|
||||
}): AgentMessage[] {
|
||||
const { messages, settings, ctx } = params;
|
||||
const contextWindowTokens =
|
||||
@@ -290,8 +292,11 @@ export function pruneContextMessages(params: {
|
||||
const pruneStartIndex = firstUserIndex === null ? messages.length : firstUserIndex;
|
||||
|
||||
const isToolPrunable = params.isToolPrunable ?? makeToolPrunablePredicate(settings.tools);
|
||||
const estimatedMessages = params.dropThinkingBlocksForEstimate
|
||||
? dropThinkingBlocks(messages)
|
||||
: messages;
|
||||
|
||||
const totalCharsBefore = estimateContextChars(messages);
|
||||
const totalCharsBefore = estimateContextChars(estimatedMessages);
|
||||
let totalChars = totalCharsBefore;
|
||||
let ratio = totalChars / charWindow;
|
||||
if (ratio < settings.softTrimRatio) {
|
||||
|
||||
@@ -5,6 +5,7 @@ export type ContextPruningRuntimeValue = {
|
||||
settings: EffectiveContextPruningSettings;
|
||||
contextWindowTokens?: number | null;
|
||||
isToolPrunable: (toolName: string) => boolean;
|
||||
dropThinkingBlocks: boolean;
|
||||
lastCacheTouchAt?: number | null;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user