mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-29 09:41:08 +00:00
522 lines
17 KiB
TypeScript
522 lines
17 KiB
TypeScript
import fs from "node:fs";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
|
import type { Api, Model } from "@mariozechner/pi-ai";
|
|
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
import { describe, expect, it, vi } from "vitest";
|
|
import { castAgentMessage } from "../test-helpers/agent-message-fixtures.js";
|
|
import {
|
|
getCompactionSafeguardRuntime,
|
|
setCompactionSafeguardRuntime,
|
|
} from "./compaction-safeguard-runtime.js";
|
|
import compactionSafeguardExtension, { __testing } from "./compaction-safeguard.js";
|
|
|
|
const {
|
|
collectToolFailures,
|
|
formatToolFailuresSection,
|
|
computeAdaptiveChunkRatio,
|
|
isOversizedForSummary,
|
|
readWorkspaceContextForSummary,
|
|
BASE_CHUNK_RATIO,
|
|
MIN_CHUNK_RATIO,
|
|
SAFETY_MARGIN,
|
|
} = __testing;
|
|
|
|
function stubSessionManager(): ExtensionContext["sessionManager"] {
|
|
const stub: ExtensionContext["sessionManager"] = {
|
|
getCwd: () => "/stub",
|
|
getSessionDir: () => "/stub",
|
|
getSessionId: () => "stub-id",
|
|
getSessionFile: () => undefined,
|
|
getLeafId: () => null,
|
|
getLeafEntry: () => undefined,
|
|
getEntry: () => undefined,
|
|
getLabel: () => undefined,
|
|
getBranch: () => [],
|
|
getHeader: () => null,
|
|
getEntries: () => [],
|
|
getTree: () => [],
|
|
getSessionName: () => undefined,
|
|
};
|
|
return stub;
|
|
}
|
|
|
|
function createAnthropicModelFixture(overrides: Partial<Model<Api>> = {}): Model<Api> {
|
|
return {
|
|
id: "claude-opus-4-5",
|
|
name: "Claude Opus 4.5",
|
|
provider: "anthropic",
|
|
api: "anthropic" as const,
|
|
baseUrl: "https://api.anthropic.com",
|
|
contextWindow: 200000,
|
|
maxTokens: 4096,
|
|
reasoning: false,
|
|
input: ["text"] as const,
|
|
cost: { input: 15, output: 75, cacheRead: 0, cacheWrite: 0 },
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
type CompactionHandler = (event: unknown, ctx: unknown) => Promise<unknown>;
|
|
const createCompactionHandler = () => {
|
|
let compactionHandler: CompactionHandler | undefined;
|
|
const mockApi = {
|
|
on: vi.fn((event: string, handler: CompactionHandler) => {
|
|
if (event === "session_before_compact") {
|
|
compactionHandler = handler;
|
|
}
|
|
}),
|
|
} as unknown as ExtensionAPI;
|
|
compactionSafeguardExtension(mockApi);
|
|
expect(compactionHandler).toBeDefined();
|
|
return compactionHandler as CompactionHandler;
|
|
};
|
|
|
|
const createCompactionEvent = (params: { messageText: string; tokensBefore: number }) => ({
|
|
preparation: {
|
|
messagesToSummarize: [
|
|
{ role: "user", content: params.messageText, timestamp: Date.now() },
|
|
] as AgentMessage[],
|
|
turnPrefixMessages: [] as AgentMessage[],
|
|
firstKeptEntryId: "entry-1",
|
|
tokensBefore: params.tokensBefore,
|
|
fileOps: {
|
|
read: [],
|
|
edited: [],
|
|
written: [],
|
|
},
|
|
},
|
|
customInstructions: "",
|
|
signal: new AbortController().signal,
|
|
});
|
|
|
|
const createCompactionContext = (params: {
|
|
sessionManager: ExtensionContext["sessionManager"];
|
|
getApiKeyMock: ReturnType<typeof vi.fn>;
|
|
}) =>
|
|
({
|
|
model: undefined,
|
|
sessionManager: params.sessionManager,
|
|
modelRegistry: {
|
|
getApiKey: params.getApiKeyMock,
|
|
},
|
|
}) as unknown as Partial<ExtensionContext>;
|
|
|
|
async function runCompactionScenario(params: {
|
|
sessionManager: ExtensionContext["sessionManager"];
|
|
event: unknown;
|
|
apiKey: string | null;
|
|
}) {
|
|
const compactionHandler = createCompactionHandler();
|
|
const getApiKeyMock = vi.fn().mockResolvedValue(params.apiKey);
|
|
const mockContext = createCompactionContext({
|
|
sessionManager: params.sessionManager,
|
|
getApiKeyMock,
|
|
});
|
|
const result = (await compactionHandler(params.event, mockContext)) as {
|
|
cancel?: boolean;
|
|
};
|
|
return { result, getApiKeyMock };
|
|
}
|
|
|
|
describe("compaction-safeguard tool failures", () => {
|
|
it("formats tool failures with meta and summary", () => {
|
|
const messages: AgentMessage[] = [
|
|
{
|
|
role: "toolResult",
|
|
toolCallId: "call-1",
|
|
toolName: "exec",
|
|
isError: true,
|
|
details: { status: "failed", exitCode: 1 },
|
|
content: [{ type: "text", text: "ENOENT: missing file" }],
|
|
timestamp: Date.now(),
|
|
},
|
|
{
|
|
role: "toolResult",
|
|
toolCallId: "call-2",
|
|
toolName: "read",
|
|
isError: false,
|
|
content: [{ type: "text", text: "ok" }],
|
|
timestamp: Date.now(),
|
|
},
|
|
];
|
|
|
|
const failures = collectToolFailures(messages);
|
|
expect(failures).toHaveLength(1);
|
|
|
|
const section = formatToolFailuresSection(failures);
|
|
expect(section).toContain("## Tool Failures");
|
|
expect(section).toContain("exec (status=failed exitCode=1): ENOENT: missing file");
|
|
});
|
|
|
|
it("dedupes by toolCallId and handles empty output", () => {
|
|
const messages: AgentMessage[] = [
|
|
{
|
|
role: "toolResult",
|
|
toolCallId: "call-1",
|
|
toolName: "exec",
|
|
isError: true,
|
|
details: { exitCode: 2 },
|
|
content: [],
|
|
timestamp: Date.now(),
|
|
},
|
|
{
|
|
role: "toolResult",
|
|
toolCallId: "call-1",
|
|
toolName: "exec",
|
|
isError: true,
|
|
content: [{ type: "text", text: "ignored" }],
|
|
timestamp: Date.now(),
|
|
},
|
|
];
|
|
|
|
const failures = collectToolFailures(messages);
|
|
expect(failures).toHaveLength(1);
|
|
|
|
const section = formatToolFailuresSection(failures);
|
|
expect(section).toContain("exec (exitCode=2): failed");
|
|
});
|
|
|
|
it("caps the number of failures and adds overflow line", () => {
|
|
const messages: AgentMessage[] = Array.from({ length: 9 }, (_, idx) => ({
|
|
role: "toolResult",
|
|
toolCallId: `call-${idx}`,
|
|
toolName: "exec",
|
|
isError: true,
|
|
content: [{ type: "text", text: `error ${idx}` }],
|
|
timestamp: Date.now(),
|
|
}));
|
|
|
|
const failures = collectToolFailures(messages);
|
|
const section = formatToolFailuresSection(failures);
|
|
expect(section).toContain("## Tool Failures");
|
|
expect(section).toContain("...and 1 more");
|
|
});
|
|
|
|
it("omits section when there are no tool failures", () => {
|
|
const messages: AgentMessage[] = [
|
|
{
|
|
role: "toolResult",
|
|
toolCallId: "ok",
|
|
toolName: "exec",
|
|
isError: false,
|
|
content: [{ type: "text", text: "ok" }],
|
|
timestamp: Date.now(),
|
|
},
|
|
];
|
|
|
|
const failures = collectToolFailures(messages);
|
|
const section = formatToolFailuresSection(failures);
|
|
expect(section).toBe("");
|
|
});
|
|
});
|
|
|
|
describe("computeAdaptiveChunkRatio", () => {
|
|
const CONTEXT_WINDOW = 200_000;
|
|
|
|
it("returns BASE_CHUNK_RATIO for normal messages", () => {
|
|
// Small messages: 1000 tokens each, well under 10% of context
|
|
const messages: AgentMessage[] = [
|
|
{ role: "user", content: "x".repeat(1000), timestamp: Date.now() },
|
|
castAgentMessage({
|
|
role: "assistant",
|
|
content: [{ type: "text", text: "y".repeat(1000) }],
|
|
timestamp: Date.now(),
|
|
}),
|
|
];
|
|
|
|
const ratio = computeAdaptiveChunkRatio(messages, CONTEXT_WINDOW);
|
|
expect(ratio).toBe(BASE_CHUNK_RATIO);
|
|
});
|
|
|
|
it("reduces ratio when average message > 10% of context", () => {
|
|
// Large messages: ~50K tokens each (25% of context)
|
|
const messages: AgentMessage[] = [
|
|
{ role: "user", content: "x".repeat(50_000 * 4), timestamp: Date.now() },
|
|
castAgentMessage({
|
|
role: "assistant",
|
|
content: [{ type: "text", text: "y".repeat(50_000 * 4) }],
|
|
timestamp: Date.now(),
|
|
}),
|
|
];
|
|
|
|
const ratio = computeAdaptiveChunkRatio(messages, CONTEXT_WINDOW);
|
|
expect(ratio).toBeLessThan(BASE_CHUNK_RATIO);
|
|
expect(ratio).toBeGreaterThanOrEqual(MIN_CHUNK_RATIO);
|
|
});
|
|
|
|
it("respects MIN_CHUNK_RATIO floor", () => {
|
|
// Very large messages that would push ratio below minimum
|
|
const messages: AgentMessage[] = [
|
|
{ role: "user", content: "x".repeat(150_000 * 4), timestamp: Date.now() },
|
|
];
|
|
|
|
const ratio = computeAdaptiveChunkRatio(messages, CONTEXT_WINDOW);
|
|
expect(ratio).toBeGreaterThanOrEqual(MIN_CHUNK_RATIO);
|
|
});
|
|
|
|
it("handles empty message array", () => {
|
|
const ratio = computeAdaptiveChunkRatio([], CONTEXT_WINDOW);
|
|
expect(ratio).toBe(BASE_CHUNK_RATIO);
|
|
});
|
|
|
|
it("handles single huge message", () => {
|
|
// Single massive message
|
|
const messages: AgentMessage[] = [
|
|
{ role: "user", content: "x".repeat(180_000 * 4), timestamp: Date.now() },
|
|
];
|
|
|
|
const ratio = computeAdaptiveChunkRatio(messages, CONTEXT_WINDOW);
|
|
expect(ratio).toBeGreaterThanOrEqual(MIN_CHUNK_RATIO);
|
|
expect(ratio).toBeLessThanOrEqual(BASE_CHUNK_RATIO);
|
|
});
|
|
});
|
|
|
|
describe("isOversizedForSummary", () => {
|
|
const CONTEXT_WINDOW = 200_000;
|
|
|
|
it("returns false for small messages", () => {
|
|
const msg: AgentMessage = {
|
|
role: "user",
|
|
content: "Hello, world!",
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
expect(isOversizedForSummary(msg, CONTEXT_WINDOW)).toBe(false);
|
|
});
|
|
|
|
it("returns true for messages > 50% of context", () => {
|
|
// Message with ~120K tokens (60% of 200K context)
|
|
// After safety margin (1.2x), effective is 144K which is > 100K (50%)
|
|
const msg: AgentMessage = {
|
|
role: "user",
|
|
content: "x".repeat(120_000 * 4),
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
expect(isOversizedForSummary(msg, CONTEXT_WINDOW)).toBe(true);
|
|
});
|
|
|
|
it("applies safety margin", () => {
|
|
// Message at exactly 50% of context before margin
|
|
// After SAFETY_MARGIN (1.2), it becomes 60% which is > 50%
|
|
const halfContextChars = (CONTEXT_WINDOW * 0.5) / SAFETY_MARGIN;
|
|
const msg: AgentMessage = {
|
|
role: "user",
|
|
content: "x".repeat(Math.floor(halfContextChars * 4)),
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
// With safety margin applied, this should be at the boundary
|
|
// The function checks if tokens * SAFETY_MARGIN > contextWindow * 0.5
|
|
const isOversized = isOversizedForSummary(msg, CONTEXT_WINDOW);
|
|
// Due to token estimation, this could be either true or false at the boundary
|
|
expect(typeof isOversized).toBe("boolean");
|
|
});
|
|
});
|
|
|
|
describe("compaction-safeguard runtime registry", () => {
|
|
it("stores and retrieves config by session manager identity", () => {
|
|
const sm = {};
|
|
setCompactionSafeguardRuntime(sm, { maxHistoryShare: 0.3 });
|
|
const runtime = getCompactionSafeguardRuntime(sm);
|
|
expect(runtime).toEqual({ maxHistoryShare: 0.3 });
|
|
});
|
|
|
|
it("returns null for unknown session manager", () => {
|
|
const sm = {};
|
|
expect(getCompactionSafeguardRuntime(sm)).toBeNull();
|
|
});
|
|
|
|
it("clears entry when value is null", () => {
|
|
const sm = {};
|
|
setCompactionSafeguardRuntime(sm, { maxHistoryShare: 0.7 });
|
|
expect(getCompactionSafeguardRuntime(sm)).not.toBeNull();
|
|
setCompactionSafeguardRuntime(sm, null);
|
|
expect(getCompactionSafeguardRuntime(sm)).toBeNull();
|
|
});
|
|
|
|
it("ignores non-object session managers", () => {
|
|
setCompactionSafeguardRuntime(null, { maxHistoryShare: 0.5 });
|
|
expect(getCompactionSafeguardRuntime(null)).toBeNull();
|
|
setCompactionSafeguardRuntime(undefined, { maxHistoryShare: 0.5 });
|
|
expect(getCompactionSafeguardRuntime(undefined)).toBeNull();
|
|
});
|
|
|
|
it("isolates different session managers", () => {
|
|
const sm1 = {};
|
|
const sm2 = {};
|
|
setCompactionSafeguardRuntime(sm1, { maxHistoryShare: 0.3 });
|
|
setCompactionSafeguardRuntime(sm2, { maxHistoryShare: 0.8 });
|
|
expect(getCompactionSafeguardRuntime(sm1)).toEqual({ maxHistoryShare: 0.3 });
|
|
expect(getCompactionSafeguardRuntime(sm2)).toEqual({ maxHistoryShare: 0.8 });
|
|
});
|
|
|
|
it("stores and retrieves model from runtime (fallback for compact.ts workflow)", () => {
|
|
const sm = {};
|
|
const model = createAnthropicModelFixture();
|
|
setCompactionSafeguardRuntime(sm, { model });
|
|
const retrieved = getCompactionSafeguardRuntime(sm);
|
|
expect(retrieved?.model).toEqual(model);
|
|
});
|
|
|
|
it("stores and retrieves contextWindowTokens from runtime", () => {
|
|
const sm = {};
|
|
setCompactionSafeguardRuntime(sm, { contextWindowTokens: 200000 });
|
|
const retrieved = getCompactionSafeguardRuntime(sm);
|
|
expect(retrieved?.contextWindowTokens).toBe(200000);
|
|
});
|
|
|
|
it("stores and retrieves combined runtime values", () => {
|
|
const sm = {};
|
|
const model = createAnthropicModelFixture();
|
|
setCompactionSafeguardRuntime(sm, {
|
|
maxHistoryShare: 0.6,
|
|
contextWindowTokens: 200000,
|
|
model,
|
|
});
|
|
const retrieved = getCompactionSafeguardRuntime(sm);
|
|
expect(retrieved).toEqual({
|
|
maxHistoryShare: 0.6,
|
|
contextWindowTokens: 200000,
|
|
model,
|
|
});
|
|
});
|
|
});
|
|
|
|
describe("compaction-safeguard extension model fallback", () => {
|
|
it("uses runtime.model when ctx.model is undefined (compact.ts workflow)", async () => {
|
|
// This test verifies the root-cause fix: when extensionRunner.initialize() is not called
|
|
// (as happens in compact.ts), ctx.model is undefined but runtime.model is available.
|
|
const sessionManager = stubSessionManager();
|
|
const model = createAnthropicModelFixture();
|
|
|
|
// Set up runtime with model (mimics buildEmbeddedExtensionPaths behavior)
|
|
setCompactionSafeguardRuntime(sessionManager, { model });
|
|
|
|
const mockEvent = createCompactionEvent({
|
|
messageText: "test message",
|
|
tokensBefore: 1000,
|
|
});
|
|
const { result, getApiKeyMock } = await runCompactionScenario({
|
|
sessionManager,
|
|
event: mockEvent,
|
|
apiKey: null,
|
|
});
|
|
|
|
expect(result).toEqual({ cancel: true });
|
|
|
|
// KEY ASSERTION: Prove the fallback path was exercised
|
|
// The handler should have called getApiKey with runtime.model (via ctx.model ?? runtime?.model)
|
|
expect(getApiKeyMock).toHaveBeenCalledWith(model);
|
|
|
|
// Verify runtime.model is still available (for completeness)
|
|
const retrieved = getCompactionSafeguardRuntime(sessionManager);
|
|
expect(retrieved?.model).toEqual(model);
|
|
});
|
|
|
|
it("cancels compaction when both ctx.model and runtime.model are undefined", async () => {
|
|
const sessionManager = stubSessionManager();
|
|
|
|
// Do NOT set runtime.model (both ctx.model and runtime.model will be undefined)
|
|
|
|
const mockEvent = createCompactionEvent({
|
|
messageText: "test",
|
|
tokensBefore: 500,
|
|
});
|
|
const { result, getApiKeyMock } = await runCompactionScenario({
|
|
sessionManager,
|
|
event: mockEvent,
|
|
apiKey: null,
|
|
});
|
|
|
|
expect(result).toEqual({ cancel: true });
|
|
|
|
// Verify early return: getApiKey should NOT have been called when both models are missing
|
|
expect(getApiKeyMock).not.toHaveBeenCalled();
|
|
});
|
|
});
|
|
|
|
describe("compaction-safeguard double-compaction guard", () => {
|
|
it("cancels compaction when there are no real messages to summarize", async () => {
|
|
const sessionManager = stubSessionManager();
|
|
const model = createAnthropicModelFixture();
|
|
setCompactionSafeguardRuntime(sessionManager, { model });
|
|
|
|
const mockEvent = {
|
|
preparation: {
|
|
messagesToSummarize: [] as AgentMessage[],
|
|
turnPrefixMessages: [] as AgentMessage[],
|
|
firstKeptEntryId: "entry-1",
|
|
tokensBefore: 1500,
|
|
fileOps: { read: [], edited: [], written: [] },
|
|
},
|
|
customInstructions: "",
|
|
signal: new AbortController().signal,
|
|
};
|
|
const { result, getApiKeyMock } = await runCompactionScenario({
|
|
sessionManager,
|
|
event: mockEvent,
|
|
apiKey: "sk-test",
|
|
});
|
|
expect(result).toEqual({ cancel: true });
|
|
expect(getApiKeyMock).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it("continues when messages include real conversation content", async () => {
|
|
const sessionManager = stubSessionManager();
|
|
const model = createAnthropicModelFixture();
|
|
setCompactionSafeguardRuntime(sessionManager, { model });
|
|
|
|
const mockEvent = createCompactionEvent({
|
|
messageText: "real message",
|
|
tokensBefore: 1500,
|
|
});
|
|
const { result, getApiKeyMock } = await runCompactionScenario({
|
|
sessionManager,
|
|
event: mockEvent,
|
|
apiKey: null,
|
|
});
|
|
expect(result).toEqual({ cancel: true });
|
|
expect(getApiKeyMock).toHaveBeenCalled();
|
|
});
|
|
});
|
|
|
|
async function expectWorkspaceSummaryEmptyForAgentsAlias(
|
|
createAlias: (outsidePath: string, agentsPath: string) => void,
|
|
) {
|
|
const root = fs.mkdtempSync(path.join(os.tmpdir(), "openclaw-compaction-summary-"));
|
|
const prevCwd = process.cwd();
|
|
try {
|
|
const outside = path.join(root, "outside-secret.txt");
|
|
fs.writeFileSync(outside, "secret");
|
|
createAlias(outside, path.join(root, "AGENTS.md"));
|
|
process.chdir(root);
|
|
await expect(readWorkspaceContextForSummary()).resolves.toBe("");
|
|
} finally {
|
|
process.chdir(prevCwd);
|
|
fs.rmSync(root, { recursive: true, force: true });
|
|
}
|
|
}
|
|
|
|
describe("readWorkspaceContextForSummary", () => {
|
|
it.runIf(process.platform !== "win32")(
|
|
"returns empty when AGENTS.md is a symlink escape",
|
|
async () => {
|
|
await expectWorkspaceSummaryEmptyForAgentsAlias((outside, agentsPath) => {
|
|
fs.symlinkSync(outside, agentsPath);
|
|
});
|
|
},
|
|
);
|
|
|
|
it.runIf(process.platform !== "win32")(
|
|
"returns empty when AGENTS.md is a hardlink alias",
|
|
async () => {
|
|
await expectWorkspaceSummaryEmptyForAgentsAlias((outside, agentsPath) => {
|
|
fs.linkSync(outside, agentsPath);
|
|
});
|
|
},
|
|
);
|
|
});
|