From 4509420dd44e136e047ef3d8992ab0d3e8542526 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Tue, 28 Apr 2026 13:19:40 -0700 Subject: [PATCH] test(qa): add gateway CPU scenario pack --- docs/help/testing.md | 10 + extensions/qa-lab/src/gateway-child.ts | 2 + .../qa-lab/src/process-tree-cpu.test.ts | 16 + extensions/qa-lab/src/process-tree-cpu.ts | 72 +++++ extensions/qa-lab/src/suite-runtime-types.ts | 1 + extensions/qa-lab/src/suite-summary.ts | 5 + .../qa-lab/src/suite.summary-json.test.ts | 16 + extensions/qa-lab/src/suite.ts | 32 ++ package.json | 1 + .../gpt55-thinking-visibility-switch.md | 6 +- .../models/thinking-slash-model-remap.md | 2 + scripts/bench-gateway-startup.ts | 106 ++++++- scripts/check-gateway-cpu-scenarios.mjs | 280 ++++++++++++++++++ 13 files changed, 544 insertions(+), 5 deletions(-) create mode 100644 extensions/qa-lab/src/process-tree-cpu.test.ts create mode 100644 extensions/qa-lab/src/process-tree-cpu.ts create mode 100644 scripts/check-gateway-cpu-scenarios.mjs diff --git a/docs/help/testing.md b/docs/help/testing.md index f3c66ed98b7..c396c384207 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -124,6 +124,16 @@ the fast Matrix and Telegram lanes before release approval. `aimock` starts a local AIMock-backed provider server for experimental fixture and protocol-mock coverage without replacing the scenario-aware `mock-openai` lane. +- `pnpm test:gateway:cpu-scenarios` + - Runs the gateway startup bench plus a small mock QA Lab scenario pack + (`channel-chat-baseline`, `memory-failure-fallback`, + `gateway-restart-inflight-run`) and writes a combined CPU observation + summary under `.artifacts/gateway-cpu-scenarios/`. + - Flags only sustained hot CPU observations by default (`--cpu-core-warn` + plus `--hot-wall-warn-ms`), so short startup bursts are recorded as metrics + without looking like the minutes-long gateway peg regression. + - Uses built `dist` artifacts; run a build first when the checkout does not + already have fresh runtime output. - `pnpm openclaw qa suite --runner multipass` - Runs the same QA suite inside a disposable Multipass Linux VM. - Keeps the same scenario-selection behavior as `qa suite` on the host. diff --git a/extensions/qa-lab/src/gateway-child.ts b/extensions/qa-lab/src/gateway-child.ts index f7b6ba10dd7..77847acf5df 100644 --- a/extensions/qa-lab/src/gateway-child.ts +++ b/extensions/qa-lab/src/gateway-child.ts @@ -22,6 +22,7 @@ import { formatQaGatewayLogsForError, redactQaGatewayDebugText } from "./gateway import { startQaGatewayRpcClient } from "./gateway-rpc-client.js"; import { splitQaModelRef, type QaProviderMode } from "./model-selection.js"; import { resolveQaNodeExecPath } from "./node-exec.js"; +import { readProcessTreeCpuMs } from "./process-tree-cpu.js"; import { normalizeQaProviderModeEnv, QA_LIVE_PROVIDER_CONFIG_PATH_ENV, @@ -825,6 +826,7 @@ export async function startQaGatewayChild(params: { baseUrl, wsUrl, pid: child.pid ?? null, + getProcessCpuMs: () => readProcessTreeCpuMs(activeChild.pid ?? null), token: gatewayToken, workspaceDir, tempRoot, diff --git a/extensions/qa-lab/src/process-tree-cpu.test.ts b/extensions/qa-lab/src/process-tree-cpu.test.ts new file mode 100644 index 00000000000..241c1bbc810 --- /dev/null +++ b/extensions/qa-lab/src/process-tree-cpu.test.ts @@ -0,0 +1,16 @@ +import { describe, expect, it } from "vitest"; +import { parsePsCpuTimeMs } from "./process-tree-cpu.js"; + +describe("process tree CPU helpers", () => { + it("parses ps CPU time strings", () => { + expect(parsePsCpuTimeMs("00:01")).toBe(1_000); + expect(parsePsCpuTimeMs("01:02")).toBe(62_000); + expect(parsePsCpuTimeMs("01:02:03")).toBe(3_723_000); + }); + + it("rejects malformed ps CPU time strings", () => { + expect(parsePsCpuTimeMs("")).toBeNull(); + expect(parsePsCpuTimeMs("nope")).toBeNull(); + expect(parsePsCpuTimeMs("1:2:3:4")).toBeNull(); + }); +}); diff --git a/extensions/qa-lab/src/process-tree-cpu.ts b/extensions/qa-lab/src/process-tree-cpu.ts new file mode 100644 index 00000000000..31b9b812d76 --- /dev/null +++ b/extensions/qa-lab/src/process-tree-cpu.ts @@ -0,0 +1,72 @@ +import { spawnSync } from "node:child_process"; + +export function parsePsCpuTimeMs(raw: string): number | null { + const parts = raw.trim().split(":").map(Number); + if (parts.some((part) => !Number.isFinite(part) || part < 0)) { + return null; + } + if (parts.length === 2) { + return Math.round((parts[0] * 60 + parts[1]) * 1000); + } + if (parts.length === 3) { + return Math.round((parts[0] * 60 * 60 + parts[1] * 60 + parts[2]) * 1000); + } + return null; +} + +export function readProcessTreeCpuMs(rootPid: number | null | undefined): number | null { + if ( + typeof rootPid !== "number" || + !Number.isInteger(rootPid) || + rootPid <= 0 || + process.platform === "win32" + ) { + return null; + } + const result = spawnSync("ps", ["-eo", "pid=,ppid=,time="], { + encoding: "utf8", + stdio: ["ignore", "pipe", "ignore"], + }); + if (result.status !== 0) { + return null; + } + + const childrenByParent = new Map(); + const cpuByPid = new Map(); + for (const line of result.stdout.split("\n")) { + const match = line.trim().match(/^(\d+)\s+(\d+)\s+(\S+)$/u); + if (!match) { + continue; + } + const [, pidRaw, ppidRaw, cpuRaw] = match; + const pid = Number(pidRaw); + const ppid = Number(ppidRaw); + const cpuMs = parsePsCpuTimeMs(cpuRaw ?? ""); + if (!Number.isInteger(pid) || !Number.isInteger(ppid) || cpuMs === null) { + continue; + } + cpuByPid.set(pid, cpuMs); + const children = childrenByParent.get(ppid) ?? []; + children.push(pid); + childrenByParent.set(ppid, children); + } + if (!cpuByPid.has(rootPid)) { + return null; + } + + let totalCpuMs = 0; + const seen = new Set(); + const stack: number[] = [rootPid]; + while (stack.length > 0) { + const pid = stack.pop(); + if (pid === undefined || seen.has(pid)) { + continue; + } + seen.add(pid); + totalCpuMs += cpuByPid.get(pid) ?? 0; + for (const childPid of childrenByParent.get(pid) ?? []) { + stack.push(childPid); + } + } + return totalCpuMs; +} diff --git a/extensions/qa-lab/src/suite-runtime-types.ts b/extensions/qa-lab/src/suite-runtime-types.ts index 97e6acf7b87..fc3127b77a1 100644 --- a/extensions/qa-lab/src/suite-runtime-types.ts +++ b/extensions/qa-lab/src/suite-runtime-types.ts @@ -7,6 +7,7 @@ export type QaRuntimeGatewayClient = { tempRoot: string; workspaceDir: string; runtimeEnv: NodeJS.ProcessEnv; + getProcessCpuMs?: () => number | null; restartAfterStateMutation?: ( mutateState: (context: { configPath: string; diff --git a/extensions/qa-lab/src/suite-summary.ts b/extensions/qa-lab/src/suite-summary.ts index e0d06c8d999..14958cf1b2a 100644 --- a/extensions/qa-lab/src/suite-summary.ts +++ b/extensions/qa-lab/src/suite-summary.ts @@ -14,6 +14,11 @@ export type QaSuiteSummaryJson = { passed: number; failed: number; }; + metrics?: { + wallMs: number; + gatewayProcessCpuMs?: number | null; + gatewayCpuCoreRatio?: number | null; + }; run: { startedAt: string; finishedAt: string; diff --git a/extensions/qa-lab/src/suite.summary-json.test.ts b/extensions/qa-lab/src/suite.summary-json.test.ts index 90256e17dbf..8a1c3e40ac9 100644 --- a/extensions/qa-lab/src/suite.summary-json.test.ts +++ b/extensions/qa-lab/src/suite.summary-json.test.ts @@ -98,4 +98,20 @@ describe("buildQaSuiteSummaryJson", () => { failed: 1, }); }); + + it("records optional runtime metrics when provided", () => { + const json = buildQaSuiteSummaryJson({ + ...baseParams, + metrics: { + wallMs: 12_000, + gatewayProcessCpuMs: 3_400, + gatewayCpuCoreRatio: 0.283, + }, + }); + expect(json.metrics).toEqual({ + wallMs: 12_000, + gatewayProcessCpuMs: 3_400, + gatewayCpuCoreRatio: 0.283, + }); + }); }); diff --git a/extensions/qa-lab/src/suite.ts b/extensions/qa-lab/src/suite.ts index 97c77e4dbb9..b319bb42f49 100644 --- a/extensions/qa-lab/src/suite.ts +++ b/extensions/qa-lab/src/suite.ts @@ -277,6 +277,7 @@ export type QaSuiteSummaryJsonParams = { scenarios: QaSuiteScenarioResult[]; startedAt: Date; finishedAt: Date; + metrics?: QaSuiteSummaryJson["metrics"]; providerMode: QaProviderMode; primaryModel: string; alternateModel: string; @@ -317,6 +318,7 @@ export function buildQaSuiteSummaryJson(params: QaSuiteSummaryJsonParams): QaSui passed: params.scenarios.filter((scenario) => scenario.status === "pass").length, failed: countQaSuiteFailedScenarios(params.scenarios), }, + ...(params.metrics ? { metrics: params.metrics } : {}), run: { startedAt: params.startedAt.toISOString(), finishedAt: params.finishedAt.toISOString(), @@ -340,6 +342,7 @@ async function writeQaSuiteArtifacts(params: { startedAt: Date; finishedAt: Date; scenarios: QaSuiteScenarioResult[]; + metrics?: QaSuiteSummaryJson["metrics"]; transport: QaTransportAdapter; // Reuse the canonical QaProviderMode union instead of re-declaring it // inline. Loop 6 already unified `QaSuiteSummaryJsonParams.providerMode` @@ -376,6 +379,27 @@ async function writeQaSuiteArtifacts(params: { return { report, reportPath, summaryPath }; } +function buildQaSuiteRuntimeMetrics(params: { + startedAt: Date; + finishedAt: Date; + gatewayProcessCpuStartMs: number | null; + gatewayProcessCpuEndMs: number | null; +}): QaSuiteSummaryJson["metrics"] { + const wallMs = Math.max(1, params.finishedAt.getTime() - params.startedAt.getTime()); + if (params.gatewayProcessCpuStartMs === null || params.gatewayProcessCpuEndMs === null) { + return { wallMs }; + } + const gatewayProcessCpuMs = Math.max( + 0, + params.gatewayProcessCpuEndMs - params.gatewayProcessCpuStartMs, + ); + return { + wallMs, + gatewayProcessCpuMs, + gatewayCpuCoreRatio: Math.round((gatewayProcessCpuMs / wallMs) * 1000) / 1000, + }; +} + export async function runQaSuite(params?: QaSuiteRunParams): Promise { const startedAt = new Date(); const repoRoot = path.resolve(params?.repoRoot ?? process.cwd()); @@ -730,6 +754,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise scenario.status === "fail").length; if (scenarios.some((scenario) => scenario.status === "fail")) { preserveGatewayRuntimeDir = path.join(outputDir, "artifacts", "gateway-runtime"); @@ -789,6 +820,7 @@ export async function runQaSuite(params?: QaSuiteRunParams): Promise sample.firstOutputMs) .filter((value): value is number => typeof value === "number"), ), + cpuCoreRatio: summarizeNumbers( + samples + .map((sample) => sample.cpuCoreRatio) + .filter((value): value is number => typeof value === "number"), + ), + cpuMs: summarizeNumbers( + samples + .map((sample) => sample.cpuMs) + .filter((value): value is number => typeof value === "number"), + ), healthzMs: summarizeNumbers( samples .map((sample) => sample.healthz.ms) @@ -308,6 +322,13 @@ function formatMb(value: number | null): string { return `${value.toFixed(1)}MB`; } +function formatRatio(value: number | null): string { + if (value == null) { + return "n/a"; + } + return value.toFixed(3); +} + function formatStats(stats: SummaryStats | null): string { if (!stats) { return "n/a"; @@ -322,6 +343,13 @@ function formatMemoryStats(stats: SummaryStats | null): string { return `p50=${formatMb(stats.p50)} avg=${formatMb(stats.avg)} min=${formatMb(stats.min)} max=${formatMb(stats.max)}`; } +function formatRatioStats(stats: SummaryStats | null): string { + if (!stats) { + return "n/a"; + } + return `p50=${formatRatio(stats.p50)} avg=${formatRatio(stats.avg)} min=${formatRatio(stats.min)} max=${formatRatio(stats.max)}`; +} + async function getFreePort(): Promise { return new Promise((resolve, reject) => { const server = createServer(); @@ -547,6 +575,71 @@ function readProcessRssMb(pid: number | undefined): number | null { return Number.isFinite(rssKb) && rssKb > 0 ? rssKb / 1024 : null; } +function parsePsCpuTimeMs(raw: string): number | null { + const parts = raw.trim().split(":").map(Number); + if (parts.some((part) => !Number.isFinite(part) || part < 0)) { + return null; + } + if (parts.length === 2) { + return Math.round((parts[0] * 60 + parts[1]) * 1000); + } + if (parts.length === 3) { + return Math.round((parts[0] * 60 * 60 + parts[1] * 60 + parts[2]) * 1000); + } + return null; +} + +function readProcessTreeCpuMs(rootPid: number | undefined): number | null { + if (!rootPid || process.platform === "win32") { + return null; + } + const result = spawnSync("ps", ["-eo", "pid=,ppid=,time="], { + encoding: "utf8", + stdio: ["ignore", "pipe", "ignore"], + }); + if (result.status !== 0) { + return null; + } + + const childrenByParent = new Map(); + const cpuByPid = new Map(); + for (const line of result.stdout.split("\n")) { + const match = line.trim().match(/^(\d+)\s+(\d+)\s+(\S+)$/u); + if (!match) { + continue; + } + const pid = Number(match[1]); + const ppid = Number(match[2]); + const cpuMs = parsePsCpuTimeMs(match[3]); + if (!Number.isInteger(pid) || !Number.isInteger(ppid) || cpuMs === null) { + continue; + } + cpuByPid.set(pid, cpuMs); + const children = childrenByParent.get(ppid) ?? []; + children.push(pid); + childrenByParent.set(ppid, children); + } + if (!cpuByPid.has(rootPid)) { + return null; + } + + let totalCpuMs = 0; + const seen = new Set(); + const stack = [rootPid]; + while (stack.length > 0) { + const pid = stack.pop(); + if (!pid || seen.has(pid)) { + continue; + } + seen.add(pid); + totalCpuMs += cpuByPid.get(pid) ?? 0; + for (const childPid of childrenByParent.get(pid) ?? []) { + stack.push(childPid); + } + } + return totalCpuMs; +} + async function runGatewaySample(options: { benchCase: GatewayBenchCase; entry: string; @@ -583,6 +676,7 @@ async function runGatewaySample(options: { ], { cwd: process.cwd(), detached: process.platform !== "win32", env }, ); + const cpuStartMs = readProcessTreeCpuMs(child.pid); const sampleRss = () => { const rssMb = readProcessRssMb(child.pid); if (rssMb != null) { @@ -636,6 +730,10 @@ async function runGatewaySample(options: { startAt, }), ]); + const readyAt = performance.now(); + const cpuEndMs = readProcessTreeCpuMs(child.pid); + const cpuMs = cpuStartMs == null || cpuEndMs == null ? null : Math.max(0, cpuEndMs - cpuStartMs); + const cpuCoreRatio = cpuMs == null ? null : cpuMs / Math.max(1, readyAt - startAt); const exit = await stopChild(child); clearInterval(rssTimer); sampleRss(); @@ -643,6 +741,8 @@ async function runGatewaySample(options: { rmSync(root, { force: true, maxRetries: 3, recursive: true, retryDelay: 100 }); return { + cpuCoreRatio, + cpuMs, exitCode: exit.exitCode, firstOutputMs, healthz, @@ -673,11 +773,11 @@ async function runCase(options: { if (index >= options.warmup) { samples.push(sample); console.log( - `[gateway-startup-bench] ${options.benchCase.id} run ${samples.length}/${options.runs}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} readyLog=${formatMs(sample.readyLogMs)} rss=${formatMb(sample.maxRssMb)}`, + `[gateway-startup-bench] ${options.benchCase.id} run ${samples.length}/${options.runs}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} readyLog=${formatMs(sample.readyLogMs)} cpu=${formatMs(sample.cpuMs)} cpuCore=${formatRatio(sample.cpuCoreRatio)} rss=${formatMb(sample.maxRssMb)}`, ); } else { console.log( - `[gateway-startup-bench] ${options.benchCase.id} warmup ${index + 1}/${options.warmup}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} rss=${formatMb(sample.maxRssMb)}`, + `[gateway-startup-bench] ${options.benchCase.id} warmup ${index + 1}/${options.warmup}: healthz=${formatMs(sample.healthz.ms)} readyz=${formatMs(sample.readyz.ms)} cpu=${formatMs(sample.cpuMs)} cpuCore=${formatRatio(sample.cpuCoreRatio)} rss=${formatMb(sample.maxRssMb)}`, ); } } @@ -687,6 +787,8 @@ async function runCase(options: { function printResult(result: CaseResult): void { console.log(`\n${result.name} (${result.id})`); console.log(` first output: ${formatStats(result.summary.firstOutputMs)}`); + console.log(` CPU: ${formatStats(result.summary.cpuMs)}`); + console.log(` CPU core: ${formatRatioStats(result.summary.cpuCoreRatio)}`); console.log(` /healthz: ${formatStats(result.summary.healthzMs)}`); console.log(` ready log: ${formatStats(result.summary.readyLogMs)}`); console.log(` /readyz: ${formatStats(result.summary.readyzMs)}`); diff --git a/scripts/check-gateway-cpu-scenarios.mjs b/scripts/check-gateway-cpu-scenarios.mjs new file mode 100644 index 00000000000..a2946f3f852 --- /dev/null +++ b/scripts/check-gateway-cpu-scenarios.mjs @@ -0,0 +1,280 @@ +#!/usr/bin/env node + +import { spawnSync } from "node:child_process"; +import fs from "node:fs"; +import path from "node:path"; +import process from "node:process"; + +const DEFAULT_STARTUP_CASES = ["default", "oneInternalHook", "allInternalHooks"]; +const DEFAULT_QA_SCENARIOS = [ + "channel-chat-baseline", + "memory-failure-fallback", + "gateway-restart-inflight-run", +]; +const DEFAULT_CPU_CORE_WARN = 0.9; +const DEFAULT_HOT_WALL_WARN_MS = 30_000; + +function parseArgs(argv) { + const options = { + outputDir: path.join( + process.cwd(), + ".artifacts", + "gateway-cpu-scenarios", + new Date().toISOString().replace(/[:.]/g, "-"), + ), + startupCases: [], + qaScenarios: [], + runs: 1, + warmup: 0, + skipStartup: false, + skipQa: false, + cpuCoreWarn: DEFAULT_CPU_CORE_WARN, + hotWallWarnMs: DEFAULT_HOT_WALL_WARN_MS, + }; + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index]; + const readValue = () => { + const value = argv[index + 1]; + if (!value) { + throw new Error(`Missing value for ${arg}`); + } + index += 1; + return value; + }; + switch (arg) { + case "--output-dir": + options.outputDir = path.resolve(readValue()); + break; + case "--startup-case": + options.startupCases.push(readValue()); + break; + case "--qa-scenario": + options.qaScenarios.push(readValue()); + break; + case "--runs": + options.runs = parsePositiveInt(readValue(), "--runs"); + break; + case "--warmup": + options.warmup = parseNonNegativeInt(readValue(), "--warmup"); + break; + case "--cpu-core-warn": + options.cpuCoreWarn = parsePositiveNumber(readValue(), "--cpu-core-warn"); + break; + case "--hot-wall-warn-ms": + options.hotWallWarnMs = parsePositiveInt(readValue(), "--hot-wall-warn-ms"); + break; + case "--skip-startup": + options.skipStartup = true; + break; + case "--skip-qa": + options.skipQa = true; + break; + case "--help": + printHelp(); + process.exit(0); + break; + default: + throw new Error(`Unknown argument: ${arg}`); + } + } + if (options.startupCases.length === 0) { + options.startupCases = [...DEFAULT_STARTUP_CASES]; + } + if (options.qaScenarios.length === 0) { + options.qaScenarios = [...DEFAULT_QA_SCENARIOS]; + } + return options; +} + +function parsePositiveInt(raw, label) { + const value = Number(raw); + if (!Number.isInteger(value) || value < 1) { + throw new Error(`${label} must be a positive integer`); + } + return value; +} + +function parseNonNegativeInt(raw, label) { + const value = Number(raw); + if (!Number.isInteger(value) || value < 0) { + throw new Error(`${label} must be a non-negative integer`); + } + return value; +} + +function parsePositiveNumber(raw, label) { + const value = Number(raw); + if (!Number.isFinite(value) || value <= 0) { + throw new Error(`${label} must be a positive number`); + } + return value; +} + +function printHelp() { + console.log(`Usage: pnpm test:gateway:cpu-scenarios [options] + +Runs a small gateway CPU scenario suite against built dist artifacts. + +Options: + --output-dir Artifact directory + --startup-case Startup bench case, repeatable + --qa-scenario QA Lab scenario, repeatable + --runs Startup bench runs per case (default: 1) + --warmup Startup bench warmup runs per case (default: 0) + --cpu-core-warn Hot CPU observation threshold (default: 0.9) + --hot-wall-warn-ms Minimum wall time for hot CPU observations (default: 30000) + --skip-startup Skip startup bench + --skip-qa Skip QA Lab scenario smoke +`); +} + +function readJsonIfExists(filePath) { + if (!fs.existsSync(filePath)) { + return null; + } + return JSON.parse(fs.readFileSync(filePath, "utf8")); +} + +function runStep(name, command, args) { + console.error(`[gateway-cpu] start ${name}`); + const result = spawnSync(command, args, { + cwd: process.cwd(), + env: process.env, + stdio: "inherit", + }); + const status = result.status ?? (result.signal ? 1 : 0); + console.error(`[gateway-cpu] ${status === 0 ? "pass" : "fail"} ${name}`); + return { name, status, signal: result.signal ?? null }; +} + +function pnpmCommand() { + return process.platform === "win32" ? "pnpm.cmd" : "pnpm"; +} + +function toRepoRelativePath(absolutePath) { + const relativePath = path.relative(process.cwd(), absolutePath); + if (!relativePath || relativePath.startsWith("..") || path.isAbsolute(relativePath)) { + throw new Error(`Output path must stay inside the repo root: ${absolutePath}`); + } + return relativePath; +} + +function collectObservations(params) { + const observations = []; + for (const result of params.startup?.results ?? []) { + const cpuCoreMax = result.summary?.cpuCoreRatio?.max; + const wallMax = result.summary?.readyz?.max ?? result.summary?.healthz?.max; + if ( + typeof cpuCoreMax === "number" && + typeof wallMax === "number" && + cpuCoreMax >= params.cpuCoreWarn && + wallMax >= params.hotWallWarnMs + ) { + observations.push({ + kind: "startup-cpu-hot", + id: result.id, + cpuCoreRatioMax: cpuCoreMax, + wallMsMax: wallMax, + }); + } + } + const qaCpuCoreRatio = params.qa?.metrics?.gatewayCpuCoreRatio; + const qaWallMs = params.qa?.metrics?.wallMs; + if ( + typeof qaCpuCoreRatio === "number" && + typeof qaWallMs === "number" && + qaCpuCoreRatio >= params.cpuCoreWarn && + qaWallMs >= params.hotWallWarnMs + ) { + observations.push({ + kind: "qa-cpu-hot", + id: "qa-suite", + cpuCoreRatio: qaCpuCoreRatio, + wallMs: qaWallMs, + }); + } + return observations; +} + +async function main() { + const options = parseArgs(process.argv.slice(2)); + fs.mkdirSync(options.outputDir, { recursive: true }); + + const startupOutput = path.join(options.outputDir, "gateway-startup-bench.json"); + const qaOutputDir = path.join(options.outputDir, "qa-suite"); + const qaOutputArg = toRepoRelativePath(qaOutputDir); + const steps = []; + + if (!options.skipStartup) { + steps.push( + runStep("startup bench", process.execPath, [ + "--import", + "tsx", + "scripts/bench-gateway-startup.ts", + "--runs", + String(options.runs), + "--warmup", + String(options.warmup), + "--output", + startupOutput, + ...options.startupCases.flatMap((id) => ["--case", id]), + ]), + ); + } + + if (!options.skipQa) { + steps.push( + runStep("qa suite", pnpmCommand(), [ + "openclaw", + "qa", + "suite", + "--provider-mode", + "mock-openai", + "--concurrency", + "1", + "--output-dir", + qaOutputArg, + ...options.qaScenarios.flatMap((id) => ["--scenario", id]), + ]), + ); + } + + const startup = readJsonIfExists(startupOutput); + const qa = readJsonIfExists(path.join(qaOutputDir, "qa-suite-summary.json")); + const observations = collectObservations({ + startup, + qa, + cpuCoreWarn: options.cpuCoreWarn, + hotWallWarnMs: options.hotWallWarnMs, + }); + const summary = { + generatedAt: new Date().toISOString(), + outputDir: options.outputDir, + startupOutput: fs.existsSync(startupOutput) ? startupOutput : null, + qaSummary: fs.existsSync(path.join(qaOutputDir, "qa-suite-summary.json")) + ? path.join(qaOutputDir, "qa-suite-summary.json") + : null, + options: { + startupCases: options.startupCases, + qaScenarios: options.qaScenarios, + runs: options.runs, + warmup: options.warmup, + cpuCoreWarn: options.cpuCoreWarn, + hotWallWarnMs: options.hotWallWarnMs, + }, + steps, + observations, + }; + const summaryPath = path.join(options.outputDir, "summary.json"); + fs.writeFileSync(summaryPath, `${JSON.stringify(summary, null, 2)}\n`); + console.log(JSON.stringify(summary, null, 2)); + + if (steps.some((step) => step.status !== 0)) { + process.exitCode = 1; + } +} + +main().catch((error) => { + console.error(error instanceof Error ? error.stack : String(error)); + process.exitCode = 1; +});