feat: add interactive qa lab suite runner

This commit is contained in:
Peter Steinberger
2026-04-06 17:20:41 +01:00
parent e70168212d
commit 350238d402
12 changed files with 996 additions and 33 deletions

View File

@@ -40,6 +40,7 @@ describe("qa-lab server", () => {
kickoffTask: string;
scenarios: Array<{ id: string; title: string }>;
defaults: { conversationId: string; senderId: string };
runner: { status: string; selection: { providerMode: string; scenarioIds: string[] } };
};
expect(bootstrap.defaults.conversationId).toBe("qa-operator");
expect(bootstrap.defaults.senderId).toBe("qa-operator");
@@ -48,6 +49,9 @@ describe("qa-lab server", () => {
expect(bootstrap.kickoffTask).toContain("Lobster Invaders");
expect(bootstrap.scenarios.length).toBeGreaterThanOrEqual(10);
expect(bootstrap.scenarios.some((scenario) => scenario.id === "dm-chat-baseline")).toBe(true);
expect(bootstrap.runner.status).toBe("idle");
expect(bootstrap.runner.selection.providerMode).toBe("mock-openai");
expect(bootstrap.runner.selection.scenarioIds).toHaveLength(bootstrap.scenarios.length);
const messageResponse = await fetch(`${lab.baseUrl}/api/inbound/message`, {
method: "POST",

View File

@@ -14,6 +14,12 @@ import { fileURLToPath } from "node:url";
import { handleQaBusRequest, writeError, writeJson } from "./bus-server.js";
import { createQaBusState, type QaBusState } from "./bus-state.js";
import { createQaRunnerRuntime } from "./harness-runtime.js";
import type { QaRunnerModelOption } from "./model-catalog.runtime.js";
import {
createIdleQaRunnerSnapshot,
createQaRunOutputDir,
normalizeQaRunSelection,
} from "./run-config.js";
import { qaChannelPlugin, setQaChannelRuntime, type OpenClawConfig } from "./runtime-api.js";
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
import { runQaSelfCheckAgainstState, type QaSelfCheckResult } from "./self-check.js";
@@ -24,6 +30,8 @@ type QaLabLatestReport = {
generatedAt: string;
};
export type { QaLabLatestReport };
type QaLabBootstrapDefaults = {
conversationKind: "direct" | "channel";
conversationId: string;
@@ -416,6 +424,10 @@ export async function startQaLabServer(params?: {
let latestScenarioRun: QaLabScenarioRun | null = null;
const scenarioCatalog = readQaBootstrapScenarioCatalog();
const bootstrapDefaults = createBootstrapDefaults(params?.autoKickoffTarget);
let runnerModelOptions: QaRunnerModelOption[] = [];
let runnerModelCatalogStatus: "loading" | "ready" | "failed" = "loading";
let runnerSnapshot = createIdleQaRunnerSnapshot(scenarioCatalog.scenarios);
let activeSuiteRun: Promise<void> | null = null;
let controlUiProxyTarget = params?.controlUiProxyTarget?.trim()
? new URL(params.controlUiProxyTarget)
: null;
@@ -428,8 +440,34 @@ export async function startQaLabServer(params?: {
}
| undefined;
const embeddedGatewayEnabled = params?.embeddedGateway !== "disabled";
let labHandle: {
baseUrl: string;
listenUrl: string;
state: QaBusState;
setControlUi: (next: {
controlUiUrl?: string | null;
controlUiToken?: string | null;
controlUiProxyTarget?: string | null;
}) => void;
setScenarioRun: (next: Omit<QaLabScenarioRun, "counts"> | null) => void;
setLatestReport: (next: QaLabLatestReport | null) => void;
runSelfCheck: () => Promise<QaSelfCheckResult>;
stop: () => Promise<void>;
} | null = null;
let publicBaseUrl = "";
const runnerModelCatalogPromise = (async () => {
try {
const { loadQaRunnerModelOptions } = await import("./model-catalog.runtime.js");
runnerModelOptions = await loadQaRunnerModelOptions({
repoRoot: process.cwd(),
});
runnerModelCatalogStatus = "ready";
} catch {
runnerModelOptions = [];
runnerModelCatalogStatus = "failed";
}
})();
const server = createServer(async (req, res) => {
const url = new URL(req.url ?? "/", "http://127.0.0.1");
@@ -465,6 +503,11 @@ export async function startQaLabServer(params?: {
kickoffTask: scenarioCatalog.kickoffTask,
scenarios: scenarioCatalog.scenarios,
defaults: bootstrapDefaults,
runner: runnerSnapshot,
runnerCatalog: {
status: runnerModelCatalogStatus,
real: runnerModelOptions,
},
});
return;
}
@@ -485,7 +528,21 @@ export async function startQaLabServer(params?: {
return;
}
if (req.method === "POST" && url.pathname === "/api/reset") {
if (activeSuiteRun) {
writeError(res, 409, "QA suite run already in progress");
return;
}
state.reset();
latestReport = null;
latestScenarioRun = null;
runnerSnapshot = {
...runnerSnapshot,
status: "idle",
artifacts: null,
error: null,
startedAt: undefined,
finishedAt: undefined,
};
writeJson(res, 200, { ok: true });
return;
}
@@ -507,6 +564,10 @@ export async function startQaLabServer(params?: {
return;
}
if (req.method === "POST" && url.pathname === "/api/scenario/self-check") {
if (activeSuiteRun) {
writeError(res, 409, "QA suite run already in progress");
return;
}
latestScenarioRun = withQaLabRunCounts({
kind: "self-check",
status: "running",
@@ -547,6 +608,68 @@ export async function startQaLabServer(params?: {
writeJson(res, 200, serializeSelfCheck(result));
return;
}
if (req.method === "POST" && url.pathname === "/api/scenario/suite") {
if (activeSuiteRun) {
writeError(res, 409, "QA suite run already in progress");
return;
}
const selection = normalizeQaRunSelection(await readJson(req), scenarioCatalog.scenarios);
state.reset();
latestReport = null;
latestScenarioRun = null;
const startedAt = new Date().toISOString();
runnerSnapshot = {
status: "running",
selection,
startedAt,
finishedAt: undefined,
artifacts: null,
error: null,
};
activeSuiteRun = (async () => {
try {
const { runQaSuiteFromRuntime } = await import("./suite-launch.runtime.js");
const result = await runQaSuiteFromRuntime({
lab: labHandle ?? undefined,
outputDir: createQaRunOutputDir(),
providerMode: selection.providerMode,
primaryModel: selection.primaryModel,
alternateModel: selection.alternateModel,
fastMode: selection.fastMode,
scenarioIds: selection.scenarioIds,
});
runnerSnapshot = {
status: "completed",
selection,
startedAt,
finishedAt: new Date().toISOString(),
artifacts: {
outputDir: result.outputDir,
reportPath: result.reportPath,
summaryPath: result.summaryPath,
watchUrl: result.watchUrl,
},
error: null,
};
} catch (error) {
runnerSnapshot = {
status: "failed",
selection,
startedAt,
finishedAt: new Date().toISOString(),
artifacts: null,
error: error instanceof Error ? error.message : String(error),
};
} finally {
activeSuiteRun = null;
}
})();
writeJson(res, 202, {
ok: true,
runner: runnerSnapshot,
});
return;
}
if (req.method !== "GET" && req.method !== "HEAD") {
writeError(res, 404, "not found");
@@ -611,6 +734,7 @@ export async function startQaLabServer(params?: {
kickoffTask: scenarioCatalog.kickoffTask,
});
}
void runnerModelCatalogPromise;
server.on("upgrade", (req, socket, head) => {
const url = new URL(req.url ?? "/", "http://127.0.0.1");
@@ -626,7 +750,7 @@ export async function startQaLabServer(params?: {
});
});
return {
const lab = {
baseUrl: publicBaseUrl,
listenUrl,
state,
@@ -644,6 +768,9 @@ export async function startQaLabServer(params?: {
setScenarioRun(next: Omit<QaLabScenarioRun, "counts"> | null) {
latestScenarioRun = next ? withQaLabRunCounts(next) : null;
},
setLatestReport(next: QaLabLatestReport | null) {
latestReport = next;
},
async runSelfCheck() {
latestScenarioRun = withQaLabRunCounts({
kind: "self-check",
@@ -691,6 +818,8 @@ export async function startQaLabServer(params?: {
);
},
};
labHandle = lab;
return lab;
}
function serializeSelfCheck(result: QaSelfCheckResult) {

View File

@@ -0,0 +1,32 @@
import { describe, expect, it } from "vitest";
import { selectQaRunnerModelOptions } from "./model-catalog.runtime.js";
describe("qa runner model catalog", () => {
it("filters to available rows and prefers gpt-5.4 first", () => {
expect(
selectQaRunnerModelOptions([
{
key: "anthropic/claude-sonnet-4-5",
name: "Claude Sonnet 4.5",
input: "text",
available: true,
missing: false,
},
{
key: "openai/gpt-5.4",
name: "gpt-5.4",
input: "text,image",
available: true,
missing: false,
},
{
key: "openrouter/auto",
name: "OpenRouter Auto",
input: "text",
available: false,
missing: false,
},
]).map((entry) => entry.key),
).toEqual(["openai/gpt-5.4", "anthropic/claude-sonnet-4-5"]);
});
});

View File

@@ -0,0 +1,126 @@
import { spawn } from "node:child_process";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { buildQaGatewayConfig } from "./qa-gateway-config.js";
type ModelRow = {
key: string;
name: string;
input: string;
available: boolean | null;
missing: boolean;
};
export type QaRunnerModelOption = {
key: string;
name: string;
provider: string;
input: string;
preferred: boolean;
};
function splitModelKey(key: string) {
const slash = key.indexOf("/");
if (slash <= 0 || slash === key.length - 1) {
return null;
}
return {
provider: key.slice(0, slash),
model: key.slice(slash + 1),
};
}
export function selectQaRunnerModelOptions(rows: ModelRow[]): QaRunnerModelOption[] {
const options = rows
.filter((row) => row.available === true && !row.missing)
.map((row) => {
const parsed = splitModelKey(row.key);
return {
key: row.key,
name: row.name,
provider: parsed?.provider ?? "unknown",
input: row.input,
preferred: row.key === "openai/gpt-5.4",
} satisfies QaRunnerModelOption;
});
return options.toSorted((left, right) => {
if (left.preferred !== right.preferred) {
return left.preferred ? -1 : 1;
}
const providerCompare = left.provider.localeCompare(right.provider);
if (providerCompare !== 0) {
return providerCompare;
}
return left.name.localeCompare(right.name);
});
}
export async function loadQaRunnerModelOptions(params: { repoRoot: string }) {
const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-qa-model-catalog-"));
const workspaceDir = path.join(tempRoot, "workspace");
const stateDir = path.join(tempRoot, "state");
const homeDir = path.join(tempRoot, "home");
const configPath = path.join(tempRoot, "openclaw.json");
try {
await Promise.all([
fs.mkdir(workspaceDir, { recursive: true }),
fs.mkdir(stateDir, { recursive: true }),
fs.mkdir(homeDir, { recursive: true }),
]);
const cfg = buildQaGatewayConfig({
bind: "loopback",
gatewayPort: 0,
gatewayToken: "qa-model-catalog",
qaBusBaseUrl: "http://127.0.0.1:9",
workspaceDir,
providerMode: "live-openai",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
controlUiEnabled: false,
});
await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
const stdout: Buffer[] = [];
const stderr: Buffer[] = [];
await new Promise<void>((resolve, reject) => {
const child = spawn(
process.execPath,
["dist/index.js", "models", "list", "--all", "--json"],
{
cwd: params.repoRoot,
env: {
...process.env,
HOME: homeDir,
OPENCLAW_HOME: homeDir,
OPENCLAW_CONFIG_PATH: configPath,
OPENCLAW_STATE_DIR: stateDir,
OPENCLAW_OAUTH_DIR: path.join(stateDir, "credentials"),
},
stdio: ["ignore", "pipe", "pipe"],
},
);
child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk)));
child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk)));
child.once("error", reject);
child.once("exit", (code) => {
if (code === 0) {
resolve();
return;
}
reject(
new Error(
`qa model catalog failed (${code ?? "unknown"}): ${Buffer.concat(stderr).toString("utf8").trim()}`,
),
);
});
});
const payload = JSON.parse(Buffer.concat(stdout).toString("utf8")) as { models?: ModelRow[] };
return selectQaRunnerModelOptions(payload.models ?? []);
} finally {
await fs.rm(tempRoot, { recursive: true, force: true });
}
}

View File

@@ -38,6 +38,16 @@ export function buildQaGatewayConfig(params: {
alternateModel?: string;
fastMode?: boolean;
}): OpenClawConfig {
const splitModelRef = (ref: string) => {
const slash = ref.indexOf("/");
if (slash <= 0 || slash === ref.length - 1) {
return null;
}
return {
provider: ref.slice(0, slash),
model: ref.slice(slash + 1),
};
};
const mockProviderBaseUrl = params.providerBaseUrl ?? "http://127.0.0.1:44080/v1";
const mockOpenAiProvider: ModelProviderConfig = {
baseUrl: mockProviderBaseUrl,
@@ -92,10 +102,6 @@ export function buildQaGatewayConfig(params: {
],
};
const providerMode = params.providerMode ?? "mock-openai";
const allowedPlugins =
providerMode === "live-openai"
? ["memory-core", "openai", "qa-channel"]
: ["memory-core", "qa-channel"];
const primaryModel =
params.primaryModel ??
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4");
@@ -104,6 +110,20 @@ export function buildQaGatewayConfig(params: {
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4-alt");
const imageGenerationModelRef =
providerMode === "live-openai" ? "openai/gpt-image-1" : "mock-openai/gpt-image-1";
const selectedProviderIds =
providerMode === "live-openai"
? [
...new Set(
[primaryModel, alternateModel, imageGenerationModelRef]
.map((ref) => splitModelRef(ref)?.provider)
.filter((provider): provider is string => Boolean(provider)),
),
]
: [];
const pluginEntries =
providerMode === "live-openai"
? Object.fromEntries(selectedProviderIds.map((providerId) => [providerId, { enabled: true }]))
: {};
const liveModelParams =
providerMode === "live-openai"
? {
@@ -127,7 +147,7 @@ export function buildQaGatewayConfig(params: {
return {
plugins: {
allow: allowedPlugins,
...(providerMode === "mock-openai" ? { allow: ["memory-core", "qa-channel"] } : {}),
entries: {
acpx: {
enabled: false,
@@ -135,13 +155,7 @@ export function buildQaGatewayConfig(params: {
"memory-core": {
enabled: true,
},
...(providerMode === "live-openai"
? {
openai: {
enabled: true,
},
}
: {}),
...pluginEntries,
},
},
agents: {

View File

@@ -0,0 +1,70 @@
import { describe, expect, it } from "vitest";
import {
createDefaultQaRunSelection,
createIdleQaRunnerSnapshot,
normalizeQaRunSelection,
} from "./run-config.js";
const scenarios = [
{
id: "dm-chat-baseline",
title: "DM baseline",
surface: "dm",
objective: "test DM",
successCriteria: ["reply"],
},
{
id: "thread-lifecycle",
title: "Thread lifecycle",
surface: "thread",
objective: "test thread",
successCriteria: ["thread reply"],
},
];
describe("qa run config", () => {
it("creates a synthetic-by-default selection that arms every scenario", () => {
expect(createDefaultQaRunSelection(scenarios)).toEqual({
providerMode: "mock-openai",
primaryModel: "mock-openai/gpt-5.4",
alternateModel: "mock-openai/gpt-5.4-alt",
fastMode: false,
scenarioIds: ["dm-chat-baseline", "thread-lifecycle"],
});
});
it("normalizes live selections and filters unknown scenario ids", () => {
expect(
normalizeQaRunSelection(
{
providerMode: "live-openai",
primaryModel: "openai/gpt-5.4",
alternateModel: "",
fastMode: false,
scenarioIds: ["thread-lifecycle", "missing", "thread-lifecycle"],
},
scenarios,
),
).toEqual({
providerMode: "live-openai",
primaryModel: "openai/gpt-5.4",
alternateModel: "openai/gpt-5.4",
fastMode: false,
scenarioIds: ["thread-lifecycle"],
});
});
it("falls back to all scenarios when selection would otherwise be empty", () => {
const snapshot = createIdleQaRunnerSnapshot(scenarios);
expect(snapshot.status).toBe("idle");
expect(snapshot.selection.scenarioIds).toEqual(["dm-chat-baseline", "thread-lifecycle"]);
expect(
normalizeQaRunSelection(
{
scenarioIds: [],
},
scenarios,
).scenarioIds,
).toEqual(["dm-chat-baseline", "thread-lifecycle"]);
});
});

View File

@@ -0,0 +1,97 @@
import path from "node:path";
import type { QaSeedScenario } from "./scenario-catalog.js";
export type QaProviderMode = "mock-openai" | "live-openai";
export type QaLabRunSelection = {
providerMode: QaProviderMode;
primaryModel: string;
alternateModel: string;
fastMode: boolean;
scenarioIds: string[];
};
export type QaLabRunArtifacts = {
outputDir: string;
reportPath: string;
summaryPath: string;
watchUrl: string;
};
export type QaLabRunnerSnapshot = {
status: "idle" | "running" | "completed" | "failed";
selection: QaLabRunSelection;
startedAt?: string;
finishedAt?: string;
artifacts: QaLabRunArtifacts | null;
error: string | null;
};
export function createDefaultQaRunSelection(scenarios: QaSeedScenario[]): QaLabRunSelection {
return {
providerMode: "mock-openai",
primaryModel: "mock-openai/gpt-5.4",
alternateModel: "mock-openai/gpt-5.4-alt",
fastMode: false,
scenarioIds: scenarios.map((scenario) => scenario.id),
};
}
function defaultModelForMode(mode: QaProviderMode, alternate = false) {
if (mode === "live-openai") {
return "openai/gpt-5.4";
}
return alternate ? "mock-openai/gpt-5.4-alt" : "mock-openai/gpt-5.4";
}
function normalizeProviderMode(input: unknown): QaProviderMode {
return input === "live-openai" ? "live-openai" : "mock-openai";
}
function normalizeModel(input: unknown, fallback: string) {
const value = typeof input === "string" ? input.trim() : "";
return value || fallback;
}
function normalizeScenarioIds(input: unknown, scenarios: QaSeedScenario[]) {
const availableIds = new Set(scenarios.map((scenario) => scenario.id));
const requestedIds = Array.isArray(input)
? input
.map((value) => (typeof value === "string" ? value.trim() : ""))
.filter((value) => value.length > 0)
: [];
const selectedIds = requestedIds.filter((id, index) => {
return availableIds.has(id) && requestedIds.indexOf(id) === index;
});
return selectedIds.length > 0 ? selectedIds : scenarios.map((scenario) => scenario.id);
}
export function normalizeQaRunSelection(
input: unknown,
scenarios: QaSeedScenario[],
): QaLabRunSelection {
const payload = input && typeof input === "object" ? (input as Record<string, unknown>) : {};
const providerMode = normalizeProviderMode(payload.providerMode);
return {
providerMode,
primaryModel: normalizeModel(payload.primaryModel, defaultModelForMode(providerMode)),
alternateModel: normalizeModel(payload.alternateModel, defaultModelForMode(providerMode, true)),
fastMode:
typeof payload.fastMode === "boolean" ? payload.fastMode : providerMode === "live-openai",
scenarioIds: normalizeScenarioIds(payload.scenarioIds, scenarios),
};
}
export function createIdleQaRunnerSnapshot(scenarios: QaSeedScenario[]): QaLabRunnerSnapshot {
return {
status: "idle",
selection: createDefaultQaRunSelection(scenarios),
artifacts: null,
error: null,
};
}
export function createQaRunOutputDir(baseDir = process.cwd()) {
const stamp = new Date().toISOString().replaceAll(":", "").replaceAll(".", "").replace("T", "-");
return path.join(baseDir, ".artifacts", "qa-e2e", `lab-${stamp}`);
}

View File

@@ -0,0 +1,6 @@
export async function runQaSuiteFromRuntime(
...args: Parameters<typeof import("./suite.js").runQaSuite>
) {
const { runQaSuite } = await import("./suite.js");
return await runQaSuite(...args);
}

View File

@@ -11,7 +11,7 @@ import type { QaBusState } from "./bus-state.js";
import { extractQaToolPayload } from "./extract-tool-payload.js";
import { startQaGatewayChild } from "./gateway-child.js";
import { startQaLabServer } from "./lab-server.js";
import type { QaLabScenarioOutcome } from "./lab-server.js";
import type { QaLabLatestReport, QaLabScenarioOutcome } from "./lab-server.js";
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } from "./report.js";
import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js";
@@ -1760,6 +1760,7 @@ export async function runQaSuite(params?: {
alternateModel?: string;
fastMode?: boolean;
scenarioIds?: string[];
lab?: Awaited<ReturnType<typeof startQaLabServer>>;
}) {
const startedAt = new Date();
const providerMode = params?.providerMode ?? "mock-openai";
@@ -1775,11 +1776,14 @@ export async function runQaSuite(params?: {
path.join(process.cwd(), ".artifacts", "qa-e2e", `suite-${Date.now().toString(36)}`);
await fs.mkdir(outputDir, { recursive: true });
const lab = await startQaLabServer({
host: "127.0.0.1",
port: 0,
embeddedGateway: "disabled",
});
const ownsLab = !params?.lab;
const lab =
params?.lab ??
(await startQaLabServer({
host: "127.0.0.1",
port: 0,
embeddedGateway: "disabled",
}));
const mock =
providerMode === "mock-openai"
? await startQaMockOpenAiServer({
@@ -1946,6 +1950,12 @@ export async function runQaSuite(params?: {
)}\n`,
"utf8",
);
const latestReport = {
outputPath: reportPath,
markdown: report,
generatedAt: finishedAt.toISOString(),
} satisfies QaLabLatestReport;
lab.setLatestReport(latestReport);
return {
outputDir,
@@ -1961,6 +1971,14 @@ export async function runQaSuite(params?: {
keepTemp,
});
await mock?.stop();
await lab.stop();
if (ownsLab) {
await lab.stop();
} else {
lab.setControlUi({
controlUiUrl: null,
controlUiToken: null,
controlUiProxyTarget: null,
});
}
}
}

View File

@@ -2,6 +2,7 @@ import {
type Bootstrap,
type OutcomesEnvelope,
type ReportEnvelope,
type RunnerSelection,
type Snapshot,
type TabId,
type UiState,
@@ -31,6 +32,25 @@ async function postJson<T>(path: string, body: unknown): Promise<T> {
return (await response.json()) as T;
}
function defaultModelsForProviderMode(
mode: RunnerSelection["providerMode"],
bootstrap?: Bootstrap | null,
): Pick<RunnerSelection, "primaryModel" | "alternateModel" | "fastMode"> {
if (mode === "live-openai") {
const preferred = bootstrap?.runnerCatalog.real[0]?.key;
return {
primaryModel: preferred ?? "openai/gpt-5.4",
alternateModel: preferred ?? "openai/gpt-5.4",
fastMode: true,
};
}
return {
primaryModel: "mock-openai/gpt-5.4",
alternateModel: "mock-openai/gpt-5.4-alt",
fastMode: false,
};
}
export async function createQaLabApp(root: HTMLDivElement) {
const state: UiState = {
bootstrap: null,
@@ -41,6 +61,8 @@ export async function createQaLabApp(root: HTMLDivElement) {
selectedThreadId: null,
selectedScenarioId: null,
activeTab: "debug",
runnerDraft: null,
runnerDraftDirty: false,
composer: {
conversationKind: "direct",
conversationId: "alice",
@@ -64,6 +86,13 @@ export async function createQaLabApp(root: HTMLDivElement) {
state.snapshot = snapshot;
state.latestReport = report.report ?? bootstrap.latestReport;
state.scenarioRun = outcomes.run;
if (!state.runnerDraft || !state.runnerDraftDirty) {
state.runnerDraft = {
...bootstrap.runner.selection,
scenarioIds: [...bootstrap.runner.selection.scenarioIds],
};
state.runnerDraftDirty = false;
}
if (!state.selectedConversationId) {
state.selectedConversationId = snapshot.conversations[0]?.id ?? null;
}
@@ -86,6 +115,22 @@ export async function createQaLabApp(root: HTMLDivElement) {
render();
}
function updateRunnerDraft(mutator: (draft: RunnerSelection) => RunnerSelection) {
const fallback = state.bootstrap?.runner.selection;
if (!state.runnerDraft && fallback) {
state.runnerDraft = {
...fallback,
scenarioIds: [...fallback.scenarioIds],
};
}
if (!state.runnerDraft) {
return;
}
state.runnerDraft = mutator(state.runnerDraft);
state.runnerDraftDirty = true;
render();
}
async function runSelfCheck() {
state.busy = true;
state.error = null;
@@ -163,6 +208,42 @@ export async function createQaLabApp(root: HTMLDivElement) {
}
}
async function runSuite() {
if (!state.runnerDraft) {
state.error = "Runner selection not ready yet.";
render();
return;
}
state.busy = true;
state.error = null;
render();
try {
const result = await postJson<{ runner: { selection: RunnerSelection } }>(
"/api/scenario/suite",
{
providerMode: state.runnerDraft.providerMode,
primaryModel: state.runnerDraft.primaryModel,
alternateModel: state.runnerDraft.alternateModel,
fastMode: state.runnerDraft.fastMode,
scenarioIds: state.runnerDraft.scenarioIds,
},
);
state.runnerDraft = {
...result.runner.selection,
scenarioIds: [...result.runner.selection.scenarioIds],
};
state.runnerDraftDirty = false;
state.activeTab = "debug";
await refresh();
} catch (error) {
state.error = error instanceof Error ? error.message : String(error);
render();
} finally {
state.busy = false;
render();
}
}
function downloadReport() {
if (!state.latestReport?.markdown) {
return;
@@ -221,10 +302,32 @@ export async function createQaLabApp(root: HTMLDivElement) {
void resetState();
});
root
.querySelector<HTMLButtonElement>("[data-action='self-check']")!
.addEventListener("click", () => {
.querySelector<HTMLButtonElement>("[data-action='self-check']")
?.addEventListener("click", () => {
void runSelfCheck();
});
root
.querySelector<HTMLButtonElement>("[data-action='run-suite']")
?.addEventListener("click", () => {
void runSuite();
});
root
.querySelector<HTMLButtonElement>("[data-action='select-all-scenarios']")
?.addEventListener("click", () => {
updateRunnerDraft((draft) => ({
...draft,
scenarioIds:
state.bootstrap?.scenarios.map((scenario) => scenario.id) ?? draft.scenarioIds,
}));
});
root
.querySelector<HTMLButtonElement>("[data-action='clear-scenarios']")
?.addEventListener("click", () => {
updateRunnerDraft((draft) => ({
...draft,
scenarioIds: [],
}));
});
root.querySelector<HTMLButtonElement>("[data-action='send']")?.addEventListener("click", () => {
void sendInbound();
});
@@ -233,6 +336,58 @@ export async function createQaLabApp(root: HTMLDivElement) {
?.addEventListener("click", () => {
downloadReport();
});
root.querySelector<HTMLSelectElement>("#provider-mode")?.addEventListener("change", (event) => {
const mode =
(event.currentTarget as HTMLSelectElement).value === "live-openai"
? "live-openai"
: "mock-openai";
updateRunnerDraft((draft) => ({
...draft,
providerMode: mode,
...defaultModelsForProviderMode(mode, state.bootstrap),
}));
});
root.querySelector<HTMLInputElement>("#fast-mode")?.addEventListener("change", (event) => {
updateRunnerDraft((draft) => ({
...draft,
fastMode: (event.currentTarget as HTMLInputElement).checked,
}));
});
root.querySelector<HTMLInputElement>("#primary-model")?.addEventListener("input", (event) => {
updateRunnerDraft((draft) => ({
...draft,
primaryModel: (event.currentTarget as HTMLInputElement).value,
}));
});
root.querySelector<HTMLInputElement>("#alternate-model")?.addEventListener("input", (event) => {
updateRunnerDraft((draft) => ({
...draft,
alternateModel: (event.currentTarget as HTMLInputElement).value,
}));
});
root.querySelectorAll<HTMLInputElement>("[data-scenario-toggle-id]").forEach((node) => {
node.addEventListener("change", () => {
const scenarioId = node.dataset.scenarioToggleId;
if (!scenarioId) {
return;
}
updateRunnerDraft((draft) => {
const selected = new Set(draft.scenarioIds);
if (node.checked) {
selected.add(scenarioId);
} else {
selected.delete(scenarioId);
}
const orderedIds = state.bootstrap?.scenarios
.map((scenario) => scenario.id)
.filter((id) => selected.has(id)) ?? [...selected];
return {
...draft,
scenarioIds: orderedIds,
};
});
});
});
root
.querySelector<HTMLSelectElement>("#conversation-kind")

View File

@@ -442,6 +442,98 @@ textarea {
gap: 0.75rem;
}
.run-form-grid {
display: grid;
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 0.8rem;
margin-bottom: 1rem;
}
.checkbox-label {
display: flex;
flex-direction: column;
justify-content: end;
gap: 0.45rem;
}
.checkbox-label input {
width: 1.05rem;
height: 1.05rem;
margin: 0;
}
.panel-header.compact {
align-items: end;
}
.toolbar.mini button {
padding: 0.48rem 0.78rem;
}
.scenario-picker {
display: flex;
flex-direction: column;
gap: 0.6rem;
max-height: 28vh;
overflow: auto;
}
.scenario-toggle {
display: grid;
grid-template-columns: auto minmax(0, 1fr);
gap: 0.75rem;
align-items: start;
padding: 0.82rem 0.9rem;
border-radius: 16px;
border: 1px solid rgba(255, 255, 255, 0.08);
background: rgba(255, 255, 255, 0.03);
}
.scenario-toggle input {
width: 1rem;
height: 1rem;
margin-top: 0.18rem;
}
.scenario-toggle span {
display: flex;
flex-direction: column;
gap: 0.22rem;
}
.scenario-toggle small {
color: var(--muted);
}
.scenario-toggle.selected {
border-color: rgba(121, 224, 198, 0.34);
background: linear-gradient(180deg, rgba(121, 224, 198, 0.11), rgba(121, 224, 198, 0.04));
}
.artifact-list {
display: flex;
flex-direction: column;
gap: 0.45rem;
margin-top: 0.9rem;
}
.artifact-list code {
display: block;
padding: 0.62rem 0.72rem;
border-radius: 12px;
background: rgba(255, 255, 255, 0.04);
border: 1px solid rgba(255, 255, 255, 0.06);
color: #dce6f4;
white-space: pre-wrap;
word-break: break-word;
}
.runner-error {
margin: 0.9rem 0 0;
color: var(--danger);
white-space: pre-wrap;
}
.meta-label {
display: block;
margin-bottom: 0.28rem;
@@ -588,6 +680,11 @@ label span {
grid-template-columns: 1fr;
}
.run-form-grid,
.composer-grid {
grid-template-columns: 1fr;
}
.messages,
.report {
max-height: none;

View File

@@ -67,6 +67,11 @@ export type Bootstrap = {
senderId: string;
senderName: string;
};
runner: RunnerSnapshot;
runnerCatalog: {
status: "loading" | "ready" | "failed";
real: RunnerModelOption[];
};
};
export type ScenarioStep = {
@@ -101,6 +106,36 @@ export type ScenarioRun = {
};
};
export type RunnerSelection = {
providerMode: "mock-openai" | "live-openai";
primaryModel: string;
alternateModel: string;
fastMode: boolean;
scenarioIds: string[];
};
export type RunnerSnapshot = {
status: "idle" | "running" | "completed" | "failed";
selection: RunnerSelection;
startedAt?: string;
finishedAt?: string;
artifacts: null | {
outputDir: string;
reportPath: string;
summaryPath: string;
watchUrl: string;
};
error: string | null;
};
export type RunnerModelOption = {
key: string;
name: string;
provider: string;
input: string;
preferred: boolean;
};
export type OutcomesEnvelope = {
run: ScenarioRun | null;
};
@@ -116,6 +151,8 @@ export type UiState = {
selectedThreadId: string | null;
selectedScenarioId: string | null;
activeTab: TabId;
runnerDraft: RunnerSelection | null;
runnerDraftDirty: boolean;
composer: {
conversationKind: "direct" | "channel";
conversationId: string;
@@ -200,6 +237,49 @@ function renderStatusChip(status: ScenarioOutcome["status"]) {
return `<span class="status-chip status-${status}">${escapeHtml(label)}</span>`;
}
function renderRunnerStatusChip(status: RunnerSnapshot["status"]) {
const tone = status === "failed" ? "fail" : status === "completed" ? "pass" : status;
return `<span class="status-chip status-${tone}">${escapeHtml(status)}</span>`;
}
function deriveRunnerSelection(state: UiState): RunnerSelection | null {
return state.runnerDraft ?? state.bootstrap?.runner.selection ?? null;
}
function renderRunnerModelSelect(params: {
id: string;
label: string;
value: string;
options: RunnerModelOption[];
disabled: boolean;
}) {
const values = new Set(params.options.map((option) => option.key));
const options = [...params.options];
if (!values.has(params.value) && params.value.trim()) {
options.unshift({
key: params.value,
name: params.value,
provider: params.value.split("/")[0] ?? "custom",
input: "text",
preferred: false,
});
}
return `
<label>
<span>${escapeHtml(params.label)}</span>
<select id="${escapeHtml(params.id)}"${params.disabled ? " disabled" : ""}>
${options
.map(
(option) => `
<option value="${escapeHtml(option.key)}"${option.key === params.value ? " selected" : ""}>
${escapeHtml(option.key)}
</option>`,
)
.join("")}
</select>
</label>`;
}
function renderRefs(refs: string[] | undefined, kind: "docs" | "code") {
if (!refs?.length) {
return `<p class="empty">No ${kind} refs attached.</p>`;
@@ -318,29 +398,158 @@ function renderScenarioInspector(state: UiState, scenarios: SeedScenario[]) {
function renderRunPanel(state: UiState) {
const run = state.scenarioRun;
if (!run) {
const runner = state.bootstrap?.runner ?? null;
if (!run && !runner) {
return `
<section class="panel">
<h2>Run state</h2>
<p class="empty">No structured scenario run yet. Seed plan loaded; outcomes arrive once a suite or self-check starts.</p>
</section>`;
}
const selection = runner?.selection ?? null;
return `
<section class="panel">
<div class="panel-header">
<div>
<p class="eyebrow">Live run</p>
<h2>${escapeHtml(run.kind === "suite" ? "Scenario suite" : "Self-check")}</h2>
<p class="eyebrow">Run state</p>
<h2>${escapeHtml(run?.kind === "self-check" ? "Self-check" : "Scenario suite")}</h2>
</div>
<span class="status-chip status-${run.status === "completed" ? "pass" : run.status === "running" ? "running" : "pending"}">${escapeHtml(run.status)}</span>
${runner ? renderRunnerStatusChip(runner.status) : ""}
</div>
<div class="run-grid">
<div><span class="meta-label">Total</span><strong>${run.counts.total}</strong></div>
<div><span class="meta-label">Pass</span><strong>${run.counts.passed}</strong></div>
<div><span class="meta-label">Fail</span><strong>${run.counts.failed}</strong></div>
<div><span class="meta-label">Pending</span><strong>${run.counts.pending}</strong></div>
${
run
? `
<div class="run-grid">
<div><span class="meta-label">Total</span><strong>${run.counts.total}</strong></div>
<div><span class="meta-label">Pass</span><strong>${run.counts.passed}</strong></div>
<div><span class="meta-label">Fail</span><strong>${run.counts.failed}</strong></div>
<div><span class="meta-label">Pending</span><strong>${run.counts.pending}</strong></div>
</div>`
: '<p class="empty">Waiting for structured outcomes.</p>'
}
${
selection
? `<p class="subtle">${escapeHtml(selection.providerMode === "live-openai" ? "Real provider lane" : "Synthetic OpenAI")} · ${escapeHtml(selection.primaryModel)} · ${selection.scenarioIds.length} scenarios</p>`
: ""
}
<p class="subtle">Started ${escapeHtml(formatIso(runner?.startedAt ?? run?.startedAt))} · Finished ${escapeHtml(formatIso(runner?.finishedAt ?? run?.finishedAt))}</p>
${
runner?.artifacts
? `
<div class="artifact-list">
<code>${escapeHtml(runner.artifacts.outputDir)}</code>
<code>${escapeHtml(runner.artifacts.reportPath)}</code>
<code>${escapeHtml(runner.artifacts.summaryPath)}</code>
</div>`
: ""
}
${runner?.error ? `<p class="runner-error">${escapeHtml(runner.error)}</p>` : ""}
</section>`;
}
function renderRunnerConsole(state: UiState, scenarios: SeedScenario[]) {
const selection = deriveRunnerSelection(state);
if (!selection) {
return "";
}
const runner = state.bootstrap?.runner ?? null;
const realModelOptions = state.bootstrap?.runnerCatalog.real ?? [];
const selectedIds = new Set(selection.scenarioIds);
const isRunning = runner?.status === "running";
const usesRealCatalog = selection.providerMode === "live-openai" && realModelOptions.length > 0;
return `
<section class="panel run-console">
<div class="panel-header">
<div>
<p class="eyebrow">Suite console</p>
<h2>Launch matrix</h2>
</div>
${runner ? renderRunnerStatusChip(runner.status) : ""}
</div>
<div class="run-form-grid">
<label>
<span>Lane</span>
<select id="provider-mode"${isRunning ? " disabled" : ""}>
<option value="mock-openai"${selection.providerMode === "mock-openai" ? " selected" : ""}>Synthetic</option>
<option value="live-openai"${selection.providerMode === "live-openai" ? " selected" : ""}>Real providers</option>
</select>
</label>
<label class="checkbox-label">
<span>Fast mode</span>
<input id="fast-mode" type="checkbox"${selection.fastMode ? " checked" : ""}${isRunning ? " disabled" : ""} />
</label>
${
usesRealCatalog
? renderRunnerModelSelect({
id: "primary-model",
label: "Primary model",
value: selection.primaryModel,
options: realModelOptions,
disabled: isRunning,
})
: `<label>
<span>Primary model</span>
<input id="primary-model" value="${escapeHtml(selection.primaryModel)}"${isRunning ? " disabled" : ""} />
</label>`
}
${
usesRealCatalog
? renderRunnerModelSelect({
id: "alternate-model",
label: "Alt model",
value: selection.alternateModel,
options: realModelOptions,
disabled: isRunning,
})
: `<label>
<span>Alt model</span>
<input id="alternate-model" value="${escapeHtml(selection.alternateModel)}"${isRunning ? " disabled" : ""} />
</label>`
}
</div>
${
selection.providerMode === "live-openai"
? `<p class="subtle">${escapeHtml(
state.bootstrap?.runnerCatalog.status === "loading"
? "Loading real model catalog…"
: state.bootstrap?.runnerCatalog.status === "failed"
? "Real model catalog unavailable; using manual refs."
: `${realModelOptions.length} real models ready. gpt-5.4 stays pinned first when available.`,
)}</p>`
: ""
}
<div class="panel-header compact">
<div>
<p class="eyebrow">Scenario selection</p>
<h3>${selection.scenarioIds.length}/${scenarios.length} armed</h3>
</div>
<div class="toolbar mini">
<button data-action="select-all-scenarios"${isRunning ? " disabled" : ""}>All</button>
<button data-action="clear-scenarios"${isRunning ? " disabled" : ""}>None</button>
</div>
</div>
<div class="scenario-picker">
${
scenarios.length === 0
? '<p class="empty">No scenarios available.</p>'
: scenarios
.map(
(scenario) => `
<label class="scenario-toggle${selectedIds.has(scenario.id) ? " selected" : ""}">
<input type="checkbox" data-scenario-toggle-id="${escapeHtml(scenario.id)}"${selectedIds.has(scenario.id) ? " checked" : ""}${isRunning ? " disabled" : ""} />
<span>
<strong>${escapeHtml(scenario.title)}</strong>
<small>${escapeHtml(scenario.id)} · ${escapeHtml(scenario.surface)}</small>
</span>
</label>`,
)
.join("")
}
</div>
<div class="toolbar lower">
<button class="accent" data-action="run-suite"${isRunning || selection.scenarioIds.length === 0 || state.busy ? " disabled" : ""}>Run selected scenarios</button>
<button data-action="self-check"${isRunning || state.busy ? " disabled" : ""}>Run self-check</button>
</div>
<p class="subtle">Started ${escapeHtml(formatIso(run.startedAt))} · Finished ${escapeHtml(formatIso(run.finishedAt))}</p>
</section>`;
}
@@ -507,6 +716,7 @@ export function renderQaLabUi(state: UiState) {
const hasControlUi = Boolean(state.bootstrap?.controlUiEmbeddedUrl);
const dashboardShellClass = hasControlUi ? "dashboard split-dashboard" : "dashboard";
const run = state.scenarioRun;
const runner = state.bootstrap?.runner ?? null;
return `
<div class="${dashboardShellClass}">
@@ -539,7 +749,6 @@ export function renderQaLabUi(state: UiState) {
<div class="toolbar">
<button data-action="refresh"${state.busy ? " disabled" : ""}>Refresh</button>
<button data-action="reset"${state.busy ? " disabled" : ""}>Reset</button>
<button class="accent" data-action="self-check"${state.busy ? " disabled" : ""}>Run self-check</button>
</div>
</header>
<section class="statusbar">
@@ -552,11 +761,17 @@ export function renderQaLabUi(state: UiState) {
? `<span class="pill success">${escapeHtml(run.kind)} ${escapeHtml(run.status)} · ${run.counts.passed}/${run.counts.total} pass</span>`
: '<span class="pill">No structured run yet</span>'
}
${
runner
? `<span class="pill${runner.status === "failed" ? " error" : runner.status === "completed" ? " success" : ""}">${escapeHtml(runner.status)} lane · ${escapeHtml(runner.selection.providerMode)}</span>`
: ""
}
${state.latestReport ? `<span class="pill">Report ${escapeHtml(state.latestReport.outputPath)}</span>` : '<span class="pill">No report yet</span>'}
${state.error ? `<span class="pill error">${escapeHtml(state.error)}</span>` : ""}
</section>
<main class="workspace">
<aside class="rail">
${renderRunnerConsole(state, scenarios)}
${renderRunPanel(state)}
<section class="panel">
<h2>Conversations</h2>