docs: correct overstated prompt-cache comments from #58036 #58037 #58038 (#60633)

* docs: correct overstated prompt-cache comments from #58036 #58037 #58038

* docs: restore purpose context in MCP tool sort comment

* docs: drop misleading 'legacy' framing from image-prune comments

* docs: restore useful context stripped from image-prune comments

* docs: restore 'deterministically' in MCP tool sort comment

* docs: restore 'idempotent' at attempt.ts callsite

* docs: restore 'provider prompt cache' in context-guard comment
This commit is contained in:
Ted Li
2026-04-05 00:32:51 -07:00
committed by GitHub
parent c2bf2cc2b7
commit b474e098d1
4 changed files with 18 additions and 17 deletions

View File

@@ -112,9 +112,9 @@ export async function materializeBundleMcpToolsForRun(params: {
});
}
// Sort tools deterministically by name so the tools block in API requests is
// stable across turns. MCP's listTools() does not guarantee order, and any
// change in the tools array busts the prompt cache at the tools block.
// Sort tools deterministically by name so the tools block in API requests is stable across
// turns (defensive — listTools() order is usually stable but not guaranteed).
// Cannot fix name collisions: collision suffixes above are order-dependent.
tools.sort((a, b) => a.name.localeCompare(b.name));
return {

View File

@@ -1570,9 +1570,9 @@ export async function runEmbeddedAttempt(
(sessionManager.getLeafEntry() as { id?: string } | null | undefined)?.id ?? null;
try {
// Idempotent cleanup for legacy sessions with persisted image payloads.
// Only mutates user turns older than a few assistant replies so recent
// history stays byte-identical for prompt-cache prefix matching.
// Idempotent cleanup: prune old image blocks to limit context
// growth. Only mutates turns older than a few assistant replies;
// the delay also reduces prompt-cache churn.
const didPruneImages = pruneProcessedHistoryImages(activeSession.messages);
if (didPruneImages) {
activeSession.agent.state.messages = activeSession.messages;

View File

@@ -3,9 +3,9 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]";
/**
* Number of most-recent assistant turns whose preceding user/toolResult image blocks are
* kept intact. Pruning these would diverge the request bytes from what the provider
* cached on the previous turn, invalidating the prompt-cache prefix.
* Number of most-recent completed turns whose preceding user/toolResult image
* blocks are kept intact. Counts all completed turns, not just image-bearing
* ones, so text-only turns consume the window.
*/
const PRESERVE_RECENT_COMPLETED_TURNS = 3;
@@ -46,10 +46,10 @@ function resolvePruneBeforeIndex(messages: AgentMessage[]): number {
}
/**
* Idempotent cleanup for legacy sessions that persisted image blocks in history.
* Called each run; mutates only completed turns older than
* {@link PRESERVE_RECENT_COMPLETED_TURNS} so recent turns remain
* byte-identical for prompt caching.
* Idempotent cleanup: prune persisted image blocks from completed turns older
* than {@link PRESERVE_RECENT_COMPLETED_TURNS}. The delay also reduces
* prompt-cache churn, though prefix stability additionally depends on the
* replay sanitizer being idempotent.
*/
export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean {
const pruneBeforeIndex = resolvePruneBeforeIndex(messages);

View File

@@ -108,8 +108,9 @@ function compactExistingToolResultsInPlace(params: {
}
let reduced = 0;
// Compact newest-first so the cached prefix stays intact: rewriting messages[k]
// for small k invalidates the provider prompt cache from that point onward.
// Compact newest-first so more of the cached prefix survives: rewriting
// messages[k] for small k invalidates the provider prompt cache from that point onward.
// Tradeoff: the model loses recent tool output instead of old.
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i];
if (!isToolResultMessage(msg)) {
@@ -181,8 +182,8 @@ function enforceToolResultContextBudgetInPlace(params: {
return;
}
// Compact newest tool outputs first to preserve the cached prefix; stop once
// the context is back under budget.
// Compact newest tool outputs first so more of the cached prefix survives;
// stop once the context is back under budget.
compactExistingToolResultsInPlace({
messages,
charsNeeded: currentChars - contextBudgetChars,