docs: correct overstated prompt-cache comments from #58036 #58037 #58038 (#60633)

* docs: correct overstated prompt-cache comments from #58036 #58037 #58038 * docs: restore purpose context in MCP tool sort comment * docs: drop misleading 'legacy' framing from image-prune comments * docs: restore useful context stripped from image-prune comments * docs: restore 'deterministically' in MCP tool sort comment * docs: restore 'idempotent' at attempt.ts callsite * docs: restore 'provider prompt cache' in context-guard comment
2026-04-20 21:23:23 +00:00 · 2026-04-05 00:32:51 -07:00
parent c2bf2cc2b7
commit b474e098d1
4 changed files with 18 additions and 17 deletions
--- a/src/agents/pi-bundle-mcp-materialize.ts
+++ b/src/agents/pi-bundle-mcp-materialize.ts
@@ -112,9 +112,9 @@ export async function materializeBundleMcpToolsForRun(params: {
    });
  }

-  // Sort tools deterministically by name so the tools block in API requests is
-  // stable across turns. MCP's listTools() does not guarantee order, and any
-  // change in the tools array busts the prompt cache at the tools block.
+  // Sort tools deterministically by name so the tools block in API requests is stable across
+  // turns (defensive — listTools() order is usually stable but not guaranteed).
+  // Cannot fix name collisions: collision suffixes above are order-dependent.
  tools.sort((a, b) => a.name.localeCompare(b.name));

  return {
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -1570,9 +1570,9 @@ export async function runEmbeddedAttempt(
          (sessionManager.getLeafEntry() as { id?: string } | null | undefined)?.id ?? null;

        try {
-          // Idempotent cleanup for legacy sessions with persisted image payloads.
-          // Only mutates user turns older than a few assistant replies so recent
-          // history stays byte-identical for prompt-cache prefix matching.
+          // Idempotent cleanup: prune old image blocks to limit context
+          // growth. Only mutates turns older than a few assistant replies;
+          // the delay also reduces prompt-cache churn.
          const didPruneImages = pruneProcessedHistoryImages(activeSession.messages);
          if (didPruneImages) {
            activeSession.agent.state.messages = activeSession.messages;
--- a/src/agents/pi-embedded-runner/run/history-image-prune.ts
+++ b/src/agents/pi-embedded-runner/run/history-image-prune.ts
@@ -3,9 +3,9 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
 export const PRUNED_HISTORY_IMAGE_MARKER = "[image data removed - already processed by model]";

 /**
- * Number of most-recent assistant turns whose preceding user/toolResult image blocks are
- * kept intact. Pruning these would diverge the request bytes from what the provider
- * cached on the previous turn, invalidating the prompt-cache prefix.
+ * Number of most-recent completed turns whose preceding user/toolResult image
+ * blocks are kept intact. Counts all completed turns, not just image-bearing
+ * ones, so text-only turns consume the window.
 */
 const PRESERVE_RECENT_COMPLETED_TURNS = 3;

@@ -46,10 +46,10 @@ function resolvePruneBeforeIndex(messages: AgentMessage[]): number {
 }

 /**
- * Idempotent cleanup for legacy sessions that persisted image blocks in history.
- * Called each run; mutates only completed turns older than
- * {@link PRESERVE_RECENT_COMPLETED_TURNS} so recent turns remain
- * byte-identical for prompt caching.
+ * Idempotent cleanup: prune persisted image blocks from completed turns older
+ * than {@link PRESERVE_RECENT_COMPLETED_TURNS}. The delay also reduces
+ * prompt-cache churn, though prefix stability additionally depends on the
+ * replay sanitizer being idempotent.
 */
 export function pruneProcessedHistoryImages(messages: AgentMessage[]): boolean {
  const pruneBeforeIndex = resolvePruneBeforeIndex(messages);
--- a/src/agents/pi-embedded-runner/tool-result-context-guard.ts
+++ b/src/agents/pi-embedded-runner/tool-result-context-guard.ts
@@ -108,8 +108,9 @@ function compactExistingToolResultsInPlace(params: {
  }

  let reduced = 0;
-  // Compact newest-first so the cached prefix stays intact: rewriting messages[k]
-  // for small k invalidates the provider prompt cache from that point onward.
+  // Compact newest-first so more of the cached prefix survives: rewriting
+  // messages[k] for small k invalidates the provider prompt cache from that point onward.
+  // Tradeoff: the model loses recent tool output instead of old.
  for (let i = messages.length - 1; i >= 0; i--) {
    const msg = messages[i];
    if (!isToolResultMessage(msg)) {
@@ -181,8 +182,8 @@ function enforceToolResultContextBudgetInPlace(params: {
    return;
  }

-  // Compact newest tool outputs first to preserve the cached prefix; stop once
-  // the context is back under budget.
+  // Compact newest tool outputs first so more of the cached prefix survives;
+  // stop once the context is back under budget.
  compactExistingToolResultsInPlace({
    messages,
    charsNeeded: currentChars - contextBudgetChars,