mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-23 14:45:46 +00:00
test: isolate MCP live cache probe
This commit is contained in:
@@ -24,12 +24,54 @@ const env = {
|
||||
OPENCLAW_LIVE_TEST_QUIET: quietOverride ?? process.env.OPENCLAW_LIVE_TEST_QUIET ?? "1",
|
||||
};
|
||||
|
||||
function parsePositiveInt(value, fallback) {
|
||||
if (!value) {
|
||||
return fallback;
|
||||
}
|
||||
const parsed = Number.parseInt(value, 10);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
|
||||
}
|
||||
|
||||
const heartbeatMs = parsePositiveInt(process.env.OPENCLAW_LIVE_WRAPPER_HEARTBEAT_MS, 20_000);
|
||||
const startedAt = Date.now();
|
||||
let lastOutputAt = startedAt;
|
||||
|
||||
const child = spawnPnpmRunner({
|
||||
stdio: ["inherit", "pipe", "pipe"],
|
||||
pnpmArgs: ["exec", "vitest", "run", "--config", "vitest.live.config.ts", ...forwardedArgs],
|
||||
env,
|
||||
});
|
||||
|
||||
const noteOutput = () => {
|
||||
lastOutputAt = Date.now();
|
||||
};
|
||||
|
||||
child.stdout?.on("data", (chunk) => {
|
||||
noteOutput();
|
||||
process.stdout.write(chunk);
|
||||
});
|
||||
|
||||
child.stderr?.on("data", (chunk) => {
|
||||
noteOutput();
|
||||
process.stderr.write(chunk);
|
||||
});
|
||||
|
||||
const heartbeat = setInterval(() => {
|
||||
const now = Date.now();
|
||||
if (now - lastOutputAt < heartbeatMs) {
|
||||
return;
|
||||
}
|
||||
const elapsedSec = Math.max(1, Math.round((now - startedAt) / 1_000));
|
||||
const quietSec = Math.max(1, Math.round((now - lastOutputAt) / 1_000));
|
||||
process.stderr.write(
|
||||
`[test:live] still running (${elapsedSec}s elapsed, ${quietSec}s since last output)\n`,
|
||||
);
|
||||
lastOutputAt = now;
|
||||
}, heartbeatMs);
|
||||
heartbeat.unref?.();
|
||||
|
||||
child.on("exit", (code, signal) => {
|
||||
clearInterval(heartbeat);
|
||||
if (signal) {
|
||||
process.kill(process.pid, signal);
|
||||
return;
|
||||
@@ -38,6 +80,7 @@ child.on("exit", (code, signal) => {
|
||||
});
|
||||
|
||||
child.on("error", (error) => {
|
||||
clearInterval(heartbeat);
|
||||
console.error(error);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import fs from "node:fs/promises";
|
||||
import type { AssistantMessage, Message, Tool } from "@mariozechner/pi-ai";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import { beforeAll, describe, expect, it } from "vitest";
|
||||
@@ -19,6 +20,10 @@ const OPENAI_SESSION_ID = "live-cache-openai-stable-session";
|
||||
const ANTHROPIC_SESSION_ID = "live-cache-anthropic-stable-session";
|
||||
const OPENAI_PREFIX = buildStableCachePrefix("openai");
|
||||
const ANTHROPIC_PREFIX = buildStableCachePrefix("anthropic");
|
||||
const LIVE_TEST_PNG_URL = new URL(
|
||||
"../../apps/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png",
|
||||
import.meta.url,
|
||||
);
|
||||
|
||||
type CacheRun = {
|
||||
hitRate: number;
|
||||
@@ -32,17 +37,50 @@ const NOOP_TOOL: Tool = {
|
||||
description: "Return ok.",
|
||||
parameters: Type.Object({}, { additionalProperties: false }),
|
||||
};
|
||||
let liveTestPngBase64 = "";
|
||||
|
||||
type UserContent = Extract<Message, { role: "user" }>["content"];
|
||||
|
||||
function makeAssistantHistoryTurn(text: string): Message {
|
||||
return {
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text }],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
function makeUserHistoryTurn(content: UserContent): Message {
|
||||
return {
|
||||
role: "user",
|
||||
content,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
function makeImageUserTurn(text: string): Message {
|
||||
if (!liveTestPngBase64) {
|
||||
throw new Error("live test PNG not loaded");
|
||||
}
|
||||
return makeUserHistoryTurn([
|
||||
{ type: "text", text },
|
||||
{ type: "image", mimeType: "image/png", data: liveTestPngBase64 },
|
||||
]);
|
||||
}
|
||||
|
||||
function extractFirstToolCall(message: AssistantMessage) {
|
||||
return message.content.find((block) => block.type === "toolCall");
|
||||
}
|
||||
|
||||
function buildToolResultMessage(toolCallId: string): Extract<Message, { role: "toolResult" }> {
|
||||
function buildToolResultMessage(
|
||||
toolCallId: string,
|
||||
toolName = "noop",
|
||||
text = "ok",
|
||||
): Extract<Message, { role: "toolResult" }> {
|
||||
return {
|
||||
role: "toolResult",
|
||||
toolCallId,
|
||||
toolName: "noop",
|
||||
content: [{ type: "text", text: "ok" }],
|
||||
toolName,
|
||||
content: [{ type: "text", text }],
|
||||
isError: false,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
@@ -55,9 +93,9 @@ async function runToolOnlyTurn(params: {
|
||||
providerTag: "anthropic" | "openai";
|
||||
sessionId: string;
|
||||
systemPrompt: string;
|
||||
tool: Tool;
|
||||
}) {
|
||||
let prompt =
|
||||
"Call the tool `noop` with {}. IMPORTANT: respond ONLY with the tool call and no other text.";
|
||||
let prompt = `Call the tool \`${params.tool.name}\` with {}. IMPORTANT: respond ONLY with the tool call and no other text.`;
|
||||
let response = await completeSimpleWithLiveTimeout(
|
||||
params.model,
|
||||
{
|
||||
@@ -69,7 +107,7 @@ async function runToolOnlyTurn(params: {
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
tools: [NOOP_TOOL],
|
||||
tools: [params.tool],
|
||||
},
|
||||
{
|
||||
apiKey: params.apiKey,
|
||||
@@ -77,6 +115,7 @@ async function runToolOnlyTurn(params: {
|
||||
sessionId: params.sessionId,
|
||||
maxTokens: 128,
|
||||
temperature: 0,
|
||||
...(params.providerTag === "openai" ? { reasoning: "none" as unknown as never } : {}),
|
||||
},
|
||||
`${params.providerTag} tool-only turn`,
|
||||
params.providerTag === "openai" ? OPENAI_TIMEOUT_MS : ANTHROPIC_TIMEOUT_MS,
|
||||
@@ -85,7 +124,7 @@ async function runToolOnlyTurn(params: {
|
||||
let toolCall = extractFirstToolCall(response);
|
||||
let text = extractAssistantText(response);
|
||||
for (let attempt = 0; attempt < 2 && (!toolCall || text.length > 0); attempt += 1) {
|
||||
prompt = "Return only a tool call for `noop` with {}. No text.";
|
||||
prompt = `Return only a tool call for \`${params.tool.name}\` with {}. No text.`;
|
||||
response = await completeSimpleWithLiveTimeout(
|
||||
params.model,
|
||||
{
|
||||
@@ -97,7 +136,7 @@ async function runToolOnlyTurn(params: {
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
tools: [NOOP_TOOL],
|
||||
tools: [params.tool],
|
||||
},
|
||||
{
|
||||
apiKey: params.apiKey,
|
||||
@@ -105,6 +144,7 @@ async function runToolOnlyTurn(params: {
|
||||
sessionId: params.sessionId,
|
||||
maxTokens: 128,
|
||||
temperature: 0,
|
||||
...(params.providerTag === "openai" ? { reasoning: "none" as unknown as never } : {}),
|
||||
},
|
||||
`${params.providerTag} tool-only retry ${attempt + 1}`,
|
||||
params.providerTag === "openai" ? OPENAI_TIMEOUT_MS : ANTHROPIC_TIMEOUT_MS,
|
||||
@@ -139,6 +179,7 @@ async function runOpenAiToolCacheProbe(params: {
|
||||
providerTag: "openai",
|
||||
sessionId: params.sessionId,
|
||||
systemPrompt: OPENAI_PREFIX,
|
||||
tool: NOOP_TOOL,
|
||||
});
|
||||
const response = await completeSimpleWithLiveTimeout(
|
||||
params.model,
|
||||
@@ -151,7 +192,10 @@ async function runOpenAiToolCacheProbe(params: {
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
toolTurn.response,
|
||||
buildToolResultMessage(toolTurn.toolCall.id),
|
||||
buildToolResultMessage(toolTurn.toolCall.id, NOOP_TOOL.name, "ok"),
|
||||
makeAssistantHistoryTurn("TOOL HISTORY ACKNOWLEDGED"),
|
||||
makeUserHistoryTurn("Keep the tool output stable in history."),
|
||||
makeAssistantHistoryTurn("TOOL HISTORY PRESERVED"),
|
||||
{
|
||||
role: "user",
|
||||
content: `Reply with exactly CACHE-OK ${params.suffix}.`,
|
||||
@@ -166,6 +210,7 @@ async function runOpenAiToolCacheProbe(params: {
|
||||
sessionId: params.sessionId,
|
||||
maxTokens: 64,
|
||||
temperature: 0,
|
||||
reasoning: "none" as unknown as never,
|
||||
},
|
||||
`openai cache probe ${params.suffix}`,
|
||||
OPENAI_TIMEOUT_MS,
|
||||
@@ -218,6 +263,47 @@ async function runOpenAiCacheProbe(params: {
|
||||
};
|
||||
}
|
||||
|
||||
async function runOpenAiImageCacheProbe(params: {
|
||||
apiKey: string;
|
||||
model: Awaited<ReturnType<typeof resolveLiveDirectModel>>["model"];
|
||||
sessionId: string;
|
||||
suffix: string;
|
||||
}): Promise<CacheRun> {
|
||||
const response = await completeSimpleWithLiveTimeout(
|
||||
params.model,
|
||||
{
|
||||
systemPrompt: OPENAI_PREFIX,
|
||||
messages: [
|
||||
makeImageUserTurn(
|
||||
"An image is attached. Ignore image semantics but keep the bytes in history.",
|
||||
),
|
||||
makeAssistantHistoryTurn("IMAGE HISTORY ACKNOWLEDGED"),
|
||||
makeUserHistoryTurn("Keep the earlier image turn stable in context."),
|
||||
makeAssistantHistoryTurn("IMAGE HISTORY PRESERVED"),
|
||||
makeUserHistoryTurn(`Reply with exactly CACHE-OK ${params.suffix}.`),
|
||||
],
|
||||
},
|
||||
{
|
||||
apiKey: params.apiKey,
|
||||
cacheRetention: "short",
|
||||
sessionId: params.sessionId,
|
||||
maxTokens: 64,
|
||||
temperature: 0,
|
||||
reasoning: "none" as unknown as never,
|
||||
},
|
||||
`openai image cache probe ${params.suffix}`,
|
||||
OPENAI_TIMEOUT_MS,
|
||||
);
|
||||
const text = extractAssistantText(response);
|
||||
expect(text.toLowerCase()).toContain(params.suffix.toLowerCase());
|
||||
return {
|
||||
suffix: params.suffix,
|
||||
text,
|
||||
usage: response.usage,
|
||||
hitRate: computeCacheHitRate(response.usage),
|
||||
};
|
||||
}
|
||||
|
||||
async function runAnthropicCacheProbe(params: {
|
||||
apiKey: string;
|
||||
model: Awaited<ReturnType<typeof resolveLiveDirectModel>>["model"];
|
||||
@@ -271,6 +357,7 @@ async function runAnthropicToolCacheProbe(params: {
|
||||
providerTag: "anthropic",
|
||||
sessionId: params.sessionId,
|
||||
systemPrompt: ANTHROPIC_PREFIX,
|
||||
tool: NOOP_TOOL,
|
||||
});
|
||||
const response = await completeSimpleWithLiveTimeout(
|
||||
params.model,
|
||||
@@ -283,7 +370,10 @@ async function runAnthropicToolCacheProbe(params: {
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
toolTurn.response,
|
||||
buildToolResultMessage(toolTurn.toolCall.id),
|
||||
buildToolResultMessage(toolTurn.toolCall.id, NOOP_TOOL.name, "ok"),
|
||||
makeAssistantHistoryTurn("TOOL HISTORY ACKNOWLEDGED"),
|
||||
makeUserHistoryTurn("Keep the tool output stable in history."),
|
||||
makeAssistantHistoryTurn("TOOL HISTORY PRESERVED"),
|
||||
{
|
||||
role: "user",
|
||||
content: `Reply with exactly CACHE-OK ${params.suffix}.`,
|
||||
@@ -312,7 +402,52 @@ async function runAnthropicToolCacheProbe(params: {
|
||||
};
|
||||
}
|
||||
|
||||
async function runAnthropicImageCacheProbe(params: {
|
||||
apiKey: string;
|
||||
model: Awaited<ReturnType<typeof resolveLiveDirectModel>>["model"];
|
||||
sessionId: string;
|
||||
suffix: string;
|
||||
cacheRetention: "none" | "short" | "long";
|
||||
}): Promise<CacheRun> {
|
||||
const response = await completeSimpleWithLiveTimeout(
|
||||
params.model,
|
||||
{
|
||||
systemPrompt: ANTHROPIC_PREFIX,
|
||||
messages: [
|
||||
makeImageUserTurn(
|
||||
"An image is attached. Ignore image semantics but keep the bytes in history.",
|
||||
),
|
||||
makeAssistantHistoryTurn("IMAGE HISTORY ACKNOWLEDGED"),
|
||||
makeUserHistoryTurn("Keep the earlier image turn stable in context."),
|
||||
makeAssistantHistoryTurn("IMAGE HISTORY PRESERVED"),
|
||||
makeUserHistoryTurn(`Reply with exactly CACHE-OK ${params.suffix}.`),
|
||||
],
|
||||
},
|
||||
{
|
||||
apiKey: params.apiKey,
|
||||
cacheRetention: params.cacheRetention,
|
||||
sessionId: params.sessionId,
|
||||
maxTokens: 64,
|
||||
temperature: 0,
|
||||
},
|
||||
`anthropic image cache probe ${params.suffix} (${params.cacheRetention})`,
|
||||
ANTHROPIC_TIMEOUT_MS,
|
||||
);
|
||||
const text = extractAssistantText(response);
|
||||
expect(text.toLowerCase()).toContain(params.suffix.toLowerCase());
|
||||
return {
|
||||
suffix: params.suffix,
|
||||
text,
|
||||
usage: response.usage,
|
||||
hitRate: computeCacheHitRate(response.usage),
|
||||
};
|
||||
}
|
||||
|
||||
describeCacheLive("pi embedded runner prompt caching (live)", () => {
|
||||
beforeAll(async () => {
|
||||
liveTestPngBase64 = (await fs.readFile(LIVE_TEST_PNG_URL)).toString("base64");
|
||||
}, 120_000);
|
||||
|
||||
describe("openai", () => {
|
||||
let fixture: Awaited<ReturnType<typeof resolveLiveDirectModel>>;
|
||||
|
||||
@@ -396,6 +531,39 @@ describeCacheLive("pi embedded runner prompt caching (live)", () => {
|
||||
},
|
||||
8 * 60_000,
|
||||
);
|
||||
|
||||
it(
|
||||
"keeps high cache-read rates across image-heavy followup turns",
|
||||
async () => {
|
||||
const warmup = await runOpenAiImageCacheProbe({
|
||||
...fixture,
|
||||
sessionId: `${OPENAI_SESSION_ID}-image`,
|
||||
suffix: "image-warmup",
|
||||
});
|
||||
logLiveCache(
|
||||
`openai image warmup cacheRead=${warmup.usage.cacheRead} input=${warmup.usage.input} rate=${warmup.hitRate.toFixed(3)}`,
|
||||
);
|
||||
|
||||
const hitA = await runOpenAiImageCacheProbe({
|
||||
...fixture,
|
||||
sessionId: `${OPENAI_SESSION_ID}-image`,
|
||||
suffix: "image-hit-a",
|
||||
});
|
||||
const hitB = await runOpenAiImageCacheProbe({
|
||||
...fixture,
|
||||
sessionId: `${OPENAI_SESSION_ID}-image`,
|
||||
suffix: "image-hit-b",
|
||||
});
|
||||
const bestHit = (hitA.usage.cacheRead ?? 0) >= (hitB.usage.cacheRead ?? 0) ? hitA : hitB;
|
||||
logLiveCache(
|
||||
`openai image best-hit suffix=${bestHit.suffix} cacheRead=${bestHit.usage.cacheRead} input=${bestHit.usage.input} rate=${bestHit.hitRate.toFixed(3)}`,
|
||||
);
|
||||
|
||||
expect(bestHit.usage.cacheRead ?? 0).toBeGreaterThan(1_024);
|
||||
expect(bestHit.hitRate).toBeGreaterThanOrEqual(0.6);
|
||||
},
|
||||
6 * 60_000,
|
||||
);
|
||||
});
|
||||
|
||||
describe("anthropic", () => {
|
||||
@@ -490,6 +658,42 @@ describeCacheLive("pi embedded runner prompt caching (live)", () => {
|
||||
8 * 60_000,
|
||||
);
|
||||
|
||||
it(
|
||||
"keeps high cache-read rates across image-heavy followup turns",
|
||||
async () => {
|
||||
const warmup = await runAnthropicImageCacheProbe({
|
||||
...fixture,
|
||||
sessionId: `${ANTHROPIC_SESSION_ID}-image`,
|
||||
suffix: "image-warmup",
|
||||
cacheRetention: "short",
|
||||
});
|
||||
logLiveCache(
|
||||
`anthropic image warmup cacheWrite=${warmup.usage.cacheWrite} cacheRead=${warmup.usage.cacheRead} input=${warmup.usage.input} rate=${warmup.hitRate.toFixed(3)}`,
|
||||
);
|
||||
|
||||
const hitA = await runAnthropicImageCacheProbe({
|
||||
...fixture,
|
||||
sessionId: `${ANTHROPIC_SESSION_ID}-image`,
|
||||
suffix: "image-hit-a",
|
||||
cacheRetention: "short",
|
||||
});
|
||||
const hitB = await runAnthropicImageCacheProbe({
|
||||
...fixture,
|
||||
sessionId: `${ANTHROPIC_SESSION_ID}-image`,
|
||||
suffix: "image-hit-b",
|
||||
cacheRetention: "short",
|
||||
});
|
||||
const bestHit = (hitA.usage.cacheRead ?? 0) >= (hitB.usage.cacheRead ?? 0) ? hitA : hitB;
|
||||
logLiveCache(
|
||||
`anthropic image best-hit suffix=${bestHit.suffix} cacheWrite=${bestHit.usage.cacheWrite} cacheRead=${bestHit.usage.cacheRead} input=${bestHit.usage.input} rate=${bestHit.hitRate.toFixed(3)}`,
|
||||
);
|
||||
|
||||
expect(bestHit.usage.cacheRead ?? 0).toBeGreaterThan(1_024);
|
||||
expect(bestHit.hitRate).toBeGreaterThanOrEqual(0.6);
|
||||
},
|
||||
6 * 60_000,
|
||||
);
|
||||
|
||||
it(
|
||||
"does not report meaningful cache activity when retention is disabled",
|
||||
async () => {
|
||||
|
||||
210
src/agents/pi-mcp-style.cache.live.test.ts
Normal file
210
src/agents/pi-mcp-style.cache.live.test.ts
Normal file
@@ -0,0 +1,210 @@
|
||||
import type { AssistantMessage, Tool } from "@mariozechner/pi-ai";
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
buildStableCachePrefix,
|
||||
completeSimpleWithLiveTimeout,
|
||||
computeCacheHitRate,
|
||||
extractAssistantText,
|
||||
LIVE_CACHE_TEST_ENABLED,
|
||||
logLiveCache,
|
||||
resolveLiveDirectModel,
|
||||
} from "./live-cache-test-support.js";
|
||||
|
||||
const describeCacheLive = LIVE_CACHE_TEST_ENABLED ? describe : describe.skip;
|
||||
const OPENAI_TIMEOUT_MS = 120_000;
|
||||
const OPENAI_SESSION_ID = "live-cache-openai-mcp-style-session";
|
||||
const OPENAI_PREFIX = buildStableCachePrefix("openai-mcp-style");
|
||||
|
||||
const MCP_TOOL: Tool = {
|
||||
name: "bundleProbe__bundle_probe",
|
||||
description: "Return bundle MCP probe text.",
|
||||
parameters: Type.Object({}, { additionalProperties: false }),
|
||||
};
|
||||
|
||||
type CacheRun = {
|
||||
hitRate: number;
|
||||
suffix: string;
|
||||
text: string;
|
||||
usage: AssistantMessage["usage"];
|
||||
};
|
||||
|
||||
function extractFirstToolCall(message: AssistantMessage) {
|
||||
return message.content.find((block) => block.type === "toolCall");
|
||||
}
|
||||
|
||||
function buildToolResultMessage(toolCallId: string) {
|
||||
return {
|
||||
role: "toolResult" as const,
|
||||
toolCallId,
|
||||
toolName: MCP_TOOL.name,
|
||||
content: [{ type: "text" as const, text: "FROM-BUNDLE" }],
|
||||
isError: false,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
async function runToolOnlyTurn(params: {
|
||||
apiKey: string;
|
||||
model: Awaited<ReturnType<typeof resolveLiveDirectModel>>["model"];
|
||||
sessionId: string;
|
||||
}) {
|
||||
let prompt = `Call the tool \`${MCP_TOOL.name}\` with {}. IMPORTANT: respond ONLY with the tool call and no other text.`;
|
||||
let response = await completeSimpleWithLiveTimeout(
|
||||
params.model,
|
||||
{
|
||||
systemPrompt: OPENAI_PREFIX,
|
||||
messages: [{ role: "user", content: prompt, timestamp: Date.now() }],
|
||||
tools: [MCP_TOOL],
|
||||
},
|
||||
{
|
||||
apiKey: params.apiKey,
|
||||
cacheRetention: "short",
|
||||
sessionId: params.sessionId,
|
||||
maxTokens: 128,
|
||||
temperature: 0,
|
||||
reasoning: "none" as unknown as never,
|
||||
},
|
||||
"openai mcp-style tool-only turn",
|
||||
OPENAI_TIMEOUT_MS,
|
||||
);
|
||||
|
||||
let toolCall = extractFirstToolCall(response);
|
||||
let text = extractAssistantText(response);
|
||||
for (let attempt = 0; attempt < 2 && (!toolCall || text.length > 0); attempt += 1) {
|
||||
prompt = `Return only a tool call for \`${MCP_TOOL.name}\` with {}. No text.`;
|
||||
response = await completeSimpleWithLiveTimeout(
|
||||
params.model,
|
||||
{
|
||||
systemPrompt: OPENAI_PREFIX,
|
||||
messages: [{ role: "user", content: prompt, timestamp: Date.now() }],
|
||||
tools: [MCP_TOOL],
|
||||
},
|
||||
{
|
||||
apiKey: params.apiKey,
|
||||
cacheRetention: "short",
|
||||
sessionId: params.sessionId,
|
||||
maxTokens: 128,
|
||||
temperature: 0,
|
||||
reasoning: "none" as unknown as never,
|
||||
},
|
||||
`openai mcp-style tool-only retry ${attempt + 1}`,
|
||||
OPENAI_TIMEOUT_MS,
|
||||
);
|
||||
toolCall = extractFirstToolCall(response);
|
||||
text = extractAssistantText(response);
|
||||
}
|
||||
|
||||
expect(toolCall).toBeTruthy();
|
||||
expect(text.length).toBe(0);
|
||||
if (!toolCall || toolCall.type !== "toolCall") {
|
||||
throw new Error("expected tool call");
|
||||
}
|
||||
return {
|
||||
prompt,
|
||||
response,
|
||||
toolCall,
|
||||
};
|
||||
}
|
||||
|
||||
async function runOpenAiMcpStyleCacheProbe(params: {
|
||||
apiKey: string;
|
||||
model: Awaited<ReturnType<typeof resolveLiveDirectModel>>["model"];
|
||||
sessionId: string;
|
||||
suffix: string;
|
||||
}): Promise<CacheRun> {
|
||||
const toolTurn = await runToolOnlyTurn(params);
|
||||
const response = await completeSimpleWithLiveTimeout(
|
||||
params.model,
|
||||
{
|
||||
systemPrompt: OPENAI_PREFIX,
|
||||
messages: [
|
||||
{ role: "user", content: toolTurn.prompt, timestamp: Date.now() },
|
||||
toolTurn.response,
|
||||
buildToolResultMessage(toolTurn.toolCall.id),
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "MCP TOOL HISTORY ACKNOWLEDGED" }],
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: "Keep the MCP tool output stable in history.",
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "MCP TOOL HISTORY PRESERVED" }],
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: `Reply with exactly CACHE-OK ${params.suffix}.`,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
],
|
||||
tools: [MCP_TOOL],
|
||||
},
|
||||
{
|
||||
apiKey: params.apiKey,
|
||||
cacheRetention: "short",
|
||||
sessionId: params.sessionId,
|
||||
maxTokens: 64,
|
||||
temperature: 0,
|
||||
reasoning: "none" as unknown as never,
|
||||
},
|
||||
`openai mcp-style cache probe ${params.suffix}`,
|
||||
OPENAI_TIMEOUT_MS,
|
||||
);
|
||||
const text = extractAssistantText(response);
|
||||
expect(text.toLowerCase()).toContain(params.suffix.toLowerCase());
|
||||
return {
|
||||
suffix: params.suffix,
|
||||
text,
|
||||
usage: response.usage,
|
||||
hitRate: computeCacheHitRate(response.usage),
|
||||
};
|
||||
}
|
||||
|
||||
describeCacheLive("MCP-style prompt caching (live)", () => {
|
||||
it(
|
||||
"keeps high cache-read rates across MCP-style followup turns",
|
||||
async () => {
|
||||
const fixture = await resolveLiveDirectModel({
|
||||
provider: "openai",
|
||||
api: "openai-responses",
|
||||
envVar: "OPENCLAW_LIVE_OPENAI_CACHE_MODEL",
|
||||
preferredModelIds: ["gpt-5.4-mini", "gpt-5.4", "gpt-5.2"],
|
||||
});
|
||||
logLiveCache(`openai mcp-style model=${fixture.model.provider}/${fixture.model.id}`);
|
||||
|
||||
const warmup = await runOpenAiMcpStyleCacheProbe({
|
||||
...fixture,
|
||||
sessionId: OPENAI_SESSION_ID,
|
||||
suffix: "mcp-warmup",
|
||||
});
|
||||
logLiveCache(
|
||||
`openai mcp-style warmup cacheRead=${warmup.usage.cacheRead} input=${warmup.usage.input} rate=${warmup.hitRate.toFixed(3)}`,
|
||||
);
|
||||
|
||||
const hitA = await runOpenAiMcpStyleCacheProbe({
|
||||
...fixture,
|
||||
sessionId: OPENAI_SESSION_ID,
|
||||
suffix: "mcp-hit-a",
|
||||
});
|
||||
const hitB = await runOpenAiMcpStyleCacheProbe({
|
||||
...fixture,
|
||||
sessionId: OPENAI_SESSION_ID,
|
||||
suffix: "mcp-hit-b",
|
||||
});
|
||||
const bestHit = (hitA.usage.cacheRead ?? 0) >= (hitB.usage.cacheRead ?? 0) ? hitA : hitB;
|
||||
logLiveCache(
|
||||
`openai mcp-style best-hit suffix=${bestHit.suffix} cacheRead=${bestHit.usage.cacheRead} input=${bestHit.usage.input} rate=${bestHit.hitRate.toFixed(3)}`,
|
||||
);
|
||||
|
||||
expect(bestHit.usage.cacheRead ?? 0).toBeGreaterThan(1_024);
|
||||
expect(bestHit.hitRate).toBeGreaterThanOrEqual(0.6);
|
||||
},
|
||||
10 * 60_000,
|
||||
);
|
||||
});
|
||||
Reference in New Issue
Block a user