diff --git a/CHANGELOG.md b/CHANGELOG.md index bb464099b4d..2e34dbf87dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Security/Agents: keep overflow compaction retry budgeting global across tool-result truncation recovery so successful truncation cannot reset the overflow retry counter and amplify retry/cost cycles. This ships in the next npm release. Thanks @aether-ai-agent for reporting. - BlueBubbles/Security (optional beta iMessage plugin): require webhook token authentication for all BlueBubbles webhook requests (including loopback/proxied setups), removing passwordless webhook fallback behavior. Thanks @zpbrent. - iOS/Security: force `https://` for non-loopback manual gateway hosts during iOS onboarding to block insecure remote transport URLs. (#21969) Thanks @mbelinky. - Gateway/Security: remove shared-IP fallback for canvas endpoints and require token or session capability for canvas access. Thanks @thewilloftheshadow. diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts index 6aac2ea77e1..1dc794baa81 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts @@ -2,11 +2,20 @@ import "./run.overflow-compaction.mocks.shared.js"; import { beforeEach, describe, expect, it, vi } from "vitest"; import { compactEmbeddedPiSessionDirect } from "./compact.js"; import { runEmbeddedPiAgent } from "./run.js"; -import { mockOverflowRetrySuccess } from "./run.overflow-compaction.fixture.js"; +import { makeAttemptResult, mockOverflowRetrySuccess } from "./run.overflow-compaction.fixture.js"; import { runEmbeddedAttempt } from "./run/attempt.js"; +import type { EmbeddedRunAttemptResult } from "./run/types.js"; +import { + sessionLikelyHasOversizedToolResults, + truncateOversizedToolResultsInSession, +} from "./tool-result-truncation.js"; const mockedRunEmbeddedAttempt = vi.mocked(runEmbeddedAttempt); const mockedCompactDirect = vi.mocked(compactEmbeddedPiSessionDirect); +const mockedSessionLikelyHasOversizedToolResults = vi.mocked(sessionLikelyHasOversizedToolResults); +const mockedTruncateOversizedToolResultsInSession = vi.mocked( + truncateOversizedToolResultsInSession, +); describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { beforeEach(() => { @@ -37,4 +46,64 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { }), ); }); + + it("does not reset compaction attempt budget after successful tool-result truncation", async () => { + const overflowError = new Error("request_too_large: Request size exceeds model context window"); + + mockedRunEmbeddedAttempt + .mockResolvedValueOnce( + makeAttemptResult({ + promptError: overflowError, + messagesSnapshot: [ + { + role: "assistant", + content: "big tool output", + } as unknown as EmbeddedRunAttemptResult["messagesSnapshot"][number], + ], + }), + ) + .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError })) + .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError })) + .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError })) + // Keep one extra mocked response so legacy reset behavior does not crash the test. + .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError })); + + mockedCompactDirect + .mockResolvedValueOnce({ + ok: false, + compacted: false, + reason: "nothing to compact", + }) + .mockResolvedValueOnce({ + ok: true, + compacted: true, + result: { summary: "Compacted 2", firstKeptEntryId: "entry-5", tokensBefore: 160000 }, + }) + .mockResolvedValueOnce({ + ok: true, + compacted: true, + result: { summary: "Compacted 3", firstKeptEntryId: "entry-7", tokensBefore: 140000 }, + }); + + mockedSessionLikelyHasOversizedToolResults.mockReturnValue(true); + mockedTruncateOversizedToolResultsInSession.mockResolvedValueOnce({ + truncated: true, + truncatedCount: 1, + }); + + const result = await runEmbeddedPiAgent({ + sessionId: "test-session", + sessionKey: "test-key", + sessionFile: "/tmp/session.json", + workspaceDir: "/tmp/workspace", + prompt: "hello", + timeoutMs: 30000, + runId: "run-1", + }); + + expect(mockedCompactDirect).toHaveBeenCalledTimes(3); + expect(mockedTruncateOversizedToolResultsInSession).toHaveBeenCalledTimes(1); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(4); + expect(result.meta.error?.kind).toBe("context_overflow"); + }); });