test(security): add overflow compaction truncation-budget regression

2026-03-07 22:44:16 +00:00 · 2026-02-21 12:58:44 +01:00
parent 084f621025
commit b577228d6b
2 changed files with 71 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai

 ### Fixes

+- Security/Agents: keep overflow compaction retry budgeting global across tool-result truncation recovery so successful truncation cannot reset the overflow retry counter and amplify retry/cost cycles. This ships in the next npm release. Thanks @aether-ai-agent for reporting.
 - BlueBubbles/Security (optional beta iMessage plugin): require webhook token authentication for all BlueBubbles webhook requests (including loopback/proxied setups), removing passwordless webhook fallback behavior. Thanks @zpbrent.
 - iOS/Security: force `https://` for non-loopback manual gateway hosts during iOS onboarding to block insecure remote transport URLs. (#21969) Thanks @mbelinky.
 - Gateway/Security: remove shared-IP fallback for canvas endpoints and require token or session capability for canvas access. Thanks @thewilloftheshadow.
--- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts
+++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts
@@ -2,11 +2,20 @@ import "./run.overflow-compaction.mocks.shared.js";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 import { compactEmbeddedPiSessionDirect } from "./compact.js";
 import { runEmbeddedPiAgent } from "./run.js";
-import { mockOverflowRetrySuccess } from "./run.overflow-compaction.fixture.js";
+import { makeAttemptResult, mockOverflowRetrySuccess } from "./run.overflow-compaction.fixture.js";
 import { runEmbeddedAttempt } from "./run/attempt.js";
+import type { EmbeddedRunAttemptResult } from "./run/types.js";
+import {
+  sessionLikelyHasOversizedToolResults,
+  truncateOversizedToolResultsInSession,
+} from "./tool-result-truncation.js";

 const mockedRunEmbeddedAttempt = vi.mocked(runEmbeddedAttempt);
 const mockedCompactDirect = vi.mocked(compactEmbeddedPiSessionDirect);
+const mockedSessionLikelyHasOversizedToolResults = vi.mocked(sessionLikelyHasOversizedToolResults);
+const mockedTruncateOversizedToolResultsInSession = vi.mocked(
+  truncateOversizedToolResultsInSession,
+);

 describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
  beforeEach(() => {
@@ -37,4 +46,64 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
      }),
    );
  });
+
+  it("does not reset compaction attempt budget after successful tool-result truncation", async () => {
+    const overflowError = new Error("request_too_large: Request size exceeds model context window");
+
+    mockedRunEmbeddedAttempt
+      .mockResolvedValueOnce(
+        makeAttemptResult({
+          promptError: overflowError,
+          messagesSnapshot: [
+            {
+              role: "assistant",
+              content: "big tool output",
+            } as unknown as EmbeddedRunAttemptResult["messagesSnapshot"][number],
+          ],
+        }),
+      )
+      .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
+      .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
+      .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
+      // Keep one extra mocked response so legacy reset behavior does not crash the test.
+      .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }));
+
+    mockedCompactDirect
+      .mockResolvedValueOnce({
+        ok: false,
+        compacted: false,
+        reason: "nothing to compact",
+      })
+      .mockResolvedValueOnce({
+        ok: true,
+        compacted: true,
+        result: { summary: "Compacted 2", firstKeptEntryId: "entry-5", tokensBefore: 160000 },
+      })
+      .mockResolvedValueOnce({
+        ok: true,
+        compacted: true,
+        result: { summary: "Compacted 3", firstKeptEntryId: "entry-7", tokensBefore: 140000 },
+      });
+
+    mockedSessionLikelyHasOversizedToolResults.mockReturnValue(true);
+    mockedTruncateOversizedToolResultsInSession.mockResolvedValueOnce({
+      truncated: true,
+      truncatedCount: 1,
+    });
+
+    const result = await runEmbeddedPiAgent({
+      sessionId: "test-session",
+      sessionKey: "test-key",
+      sessionFile: "/tmp/session.json",
+      workspaceDir: "/tmp/workspace",
+      prompt: "hello",
+      timeoutMs: 30000,
+      runId: "run-1",
+    });
+
+    expect(mockedCompactDirect).toHaveBeenCalledTimes(3);
+    expect(mockedTruncateOversizedToolResultsInSession).toHaveBeenCalledTimes(1);
+    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(4);
+    expect(result.meta.error?.kind).toBe("context_overflow");
+  });
 });