test(security): add overflow compaction truncation-budget regression

This commit is contained in:
Peter Steinberger
2026-02-21 12:58:44 +01:00
parent 084f621025
commit b577228d6b
2 changed files with 71 additions and 1 deletions

View File

@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Security/Agents: keep overflow compaction retry budgeting global across tool-result truncation recovery so successful truncation cannot reset the overflow retry counter and amplify retry/cost cycles. This ships in the next npm release. Thanks @aether-ai-agent for reporting.
- BlueBubbles/Security (optional beta iMessage plugin): require webhook token authentication for all BlueBubbles webhook requests (including loopback/proxied setups), removing passwordless webhook fallback behavior. Thanks @zpbrent.
- iOS/Security: force `https://` for non-loopback manual gateway hosts during iOS onboarding to block insecure remote transport URLs. (#21969) Thanks @mbelinky.
- Gateway/Security: remove shared-IP fallback for canvas endpoints and require token or session capability for canvas access. Thanks @thewilloftheshadow.

View File

@@ -2,11 +2,20 @@ import "./run.overflow-compaction.mocks.shared.js";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { compactEmbeddedPiSessionDirect } from "./compact.js";
import { runEmbeddedPiAgent } from "./run.js";
import { mockOverflowRetrySuccess } from "./run.overflow-compaction.fixture.js";
import { makeAttemptResult, mockOverflowRetrySuccess } from "./run.overflow-compaction.fixture.js";
import { runEmbeddedAttempt } from "./run/attempt.js";
import type { EmbeddedRunAttemptResult } from "./run/types.js";
import {
sessionLikelyHasOversizedToolResults,
truncateOversizedToolResultsInSession,
} from "./tool-result-truncation.js";
const mockedRunEmbeddedAttempt = vi.mocked(runEmbeddedAttempt);
const mockedCompactDirect = vi.mocked(compactEmbeddedPiSessionDirect);
const mockedSessionLikelyHasOversizedToolResults = vi.mocked(sessionLikelyHasOversizedToolResults);
const mockedTruncateOversizedToolResultsInSession = vi.mocked(
truncateOversizedToolResultsInSession,
);
describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
beforeEach(() => {
@@ -37,4 +46,64 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
}),
);
});
it("does not reset compaction attempt budget after successful tool-result truncation", async () => {
const overflowError = new Error("request_too_large: Request size exceeds model context window");
mockedRunEmbeddedAttempt
.mockResolvedValueOnce(
makeAttemptResult({
promptError: overflowError,
messagesSnapshot: [
{
role: "assistant",
content: "big tool output",
} as unknown as EmbeddedRunAttemptResult["messagesSnapshot"][number],
],
}),
)
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
// Keep one extra mocked response so legacy reset behavior does not crash the test.
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }));
mockedCompactDirect
.mockResolvedValueOnce({
ok: false,
compacted: false,
reason: "nothing to compact",
})
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 2", firstKeptEntryId: "entry-5", tokensBefore: 160000 },
})
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 3", firstKeptEntryId: "entry-7", tokensBefore: 140000 },
});
mockedSessionLikelyHasOversizedToolResults.mockReturnValue(true);
mockedTruncateOversizedToolResultsInSession.mockResolvedValueOnce({
truncated: true,
truncatedCount: 1,
});
const result = await runEmbeddedPiAgent({
sessionId: "test-session",
sessionKey: "test-key",
sessionFile: "/tmp/session.json",
workspaceDir: "/tmp/workspace",
prompt: "hello",
timeoutMs: 30000,
runId: "run-1",
});
expect(mockedCompactDirect).toHaveBeenCalledTimes(3);
expect(mockedTruncateOversizedToolResultsInSession).toHaveBeenCalledTimes(1);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(4);
expect(result.meta.error?.kind).toBe("context_overflow");
});
});