mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-20 21:23:23 +00:00
* docs: correct overstated prompt-cache comments from #58036 #58037 #58038 * docs: restore purpose context in MCP tool sort comment * docs: drop misleading 'legacy' framing from image-prune comments * docs: restore useful context stripped from image-prune comments * docs: restore 'deterministically' in MCP tool sort comment * docs: restore 'idempotent' at attempt.ts callsite * docs: restore 'provider prompt cache' in context-guard comment
This commit is contained in:
@@ -112,9 +112,9 @@ export async function materializeBundleMcpToolsForRun(params: {
|
||||
});
|
||||
}
|
||||
|
||||
// Sort tools deterministically by name so the tools block in API requests is
|
||||
// stable across turns. MCP's listTools() does not guarantee order, and any
|
||||
// change in the tools array busts the prompt cache at the tools block.
|
||||
// Sort tools deterministically by name so the tools block in API requests is stable across
|
||||
// turns (defensive — listTools() order is usually stable but not guaranteed).
|
||||
// Cannot fix name collisions: collision suffixes above are order-dependent.
|
||||
tools.sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
return {
|
||||
|
||||
@@ -1570,9 +1570,9 @@ export async function runEmbeddedAttempt(
|
||||
(sessionManager.getLeafEntry() as { id?: string } | null | undefined)?.id ?? null;
|
||||
|
||||
try {
|
||||
// Idempotent cleanup for legacy sessions with persisted image payloads.
|
||||
// Only mutates user turns older than a few assistant replies so recent
|
||||
// history stays byte-identical for prompt-cache prefix matching.
|
||||
// Idempotent cleanup: prune old image blocks to limit context
|
||||
// growth. Only mutates turns older than a few assistant replies;
|
||||
// the delay also reduces prompt-cache churn.
|
||||
const didPruneImages = pruneProcessedHistoryImages(activeSession.messages);
|
||||
if (didPruneImages) {
|
||||
activeSession.agent.state.messages = activeSession.messages;
|
||||
|
||||
@@ -3,9 +3,9 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]";
|
||||
|
||||
/**
|
||||
* Number of most-recent assistant turns whose preceding user/toolResult image blocks are
|
||||
* kept intact. Pruning these would diverge the request bytes from what the provider
|
||||
* cached on the previous turn, invalidating the prompt-cache prefix.
|
||||
* Number of most-recent completed turns whose preceding user/toolResult image
|
||||
* blocks are kept intact. Counts all completed turns, not just image-bearing
|
||||
* ones, so text-only turns consume the window.
|
||||
*/
|
||||
const PRESERVE_RECENT_COMPLETED_TURNS = 3;
|
||||
|
||||
@@ -46,10 +46,10 @@ function resolvePruneBeforeIndex(messages: AgentMessage[]): number {
|
||||
}
|
||||
|
||||
/**
|
||||
* Idempotent cleanup for legacy sessions that persisted image blocks in history.
|
||||
* Called each run; mutates only completed turns older than
|
||||
* {@link PRESERVE_RECENT_COMPLETED_TURNS} so recent turns remain
|
||||
* byte-identical for prompt caching.
|
||||
* Idempotent cleanup: prune persisted image blocks from completed turns older
|
||||
* than {@link PRESERVE_RECENT_COMPLETED_TURNS}. The delay also reduces
|
||||
* prompt-cache churn, though prefix stability additionally depends on the
|
||||
* replay sanitizer being idempotent.
|
||||
*/
|
||||
export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean {
|
||||
const pruneBeforeIndex = resolvePruneBeforeIndex(messages);
|
||||
|
||||
@@ -108,8 +108,9 @@ function compactExistingToolResultsInPlace(params: {
|
||||
}
|
||||
|
||||
let reduced = 0;
|
||||
// Compact newest-first so the cached prefix stays intact: rewriting messages[k]
|
||||
// for small k invalidates the provider prompt cache from that point onward.
|
||||
// Compact newest-first so more of the cached prefix survives: rewriting
|
||||
// messages[k] for small k invalidates the provider prompt cache from that point onward.
|
||||
// Tradeoff: the model loses recent tool output instead of old.
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const msg = messages[i];
|
||||
if (!isToolResultMessage(msg)) {
|
||||
@@ -181,8 +182,8 @@ function enforceToolResultContextBudgetInPlace(params: {
|
||||
return;
|
||||
}
|
||||
|
||||
// Compact newest tool outputs first to preserve the cached prefix; stop once
|
||||
// the context is back under budget.
|
||||
// Compact newest tool outputs first so more of the cached prefix survives;
|
||||
// stop once the context is back under budget.
|
||||
compactExistingToolResultsInPlace({
|
||||
messages,
|
||||
charsNeeded: currentChars - contextBudgetChars,
|
||||
|
||||
Reference in New Issue
Block a user