mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-28 00:43:57 +00:00
feat: add interactive qa lab suite runner
This commit is contained in:
@@ -40,6 +40,7 @@ describe("qa-lab server", () => {
|
||||
kickoffTask: string;
|
||||
scenarios: Array<{ id: string; title: string }>;
|
||||
defaults: { conversationId: string; senderId: string };
|
||||
runner: { status: string; selection: { providerMode: string; scenarioIds: string[] } };
|
||||
};
|
||||
expect(bootstrap.defaults.conversationId).toBe("qa-operator");
|
||||
expect(bootstrap.defaults.senderId).toBe("qa-operator");
|
||||
@@ -48,6 +49,9 @@ describe("qa-lab server", () => {
|
||||
expect(bootstrap.kickoffTask).toContain("Lobster Invaders");
|
||||
expect(bootstrap.scenarios.length).toBeGreaterThanOrEqual(10);
|
||||
expect(bootstrap.scenarios.some((scenario) => scenario.id === "dm-chat-baseline")).toBe(true);
|
||||
expect(bootstrap.runner.status).toBe("idle");
|
||||
expect(bootstrap.runner.selection.providerMode).toBe("mock-openai");
|
||||
expect(bootstrap.runner.selection.scenarioIds).toHaveLength(bootstrap.scenarios.length);
|
||||
|
||||
const messageResponse = await fetch(`${lab.baseUrl}/api/inbound/message`, {
|
||||
method: "POST",
|
||||
|
||||
@@ -14,6 +14,12 @@ import { fileURLToPath } from "node:url";
|
||||
import { handleQaBusRequest, writeError, writeJson } from "./bus-server.js";
|
||||
import { createQaBusState, type QaBusState } from "./bus-state.js";
|
||||
import { createQaRunnerRuntime } from "./harness-runtime.js";
|
||||
import type { QaRunnerModelOption } from "./model-catalog.runtime.js";
|
||||
import {
|
||||
createIdleQaRunnerSnapshot,
|
||||
createQaRunOutputDir,
|
||||
normalizeQaRunSelection,
|
||||
} from "./run-config.js";
|
||||
import { qaChannelPlugin, setQaChannelRuntime, type OpenClawConfig } from "./runtime-api.js";
|
||||
import { readQaBootstrapScenarioCatalog } from "./scenario-catalog.js";
|
||||
import { runQaSelfCheckAgainstState, type QaSelfCheckResult } from "./self-check.js";
|
||||
@@ -24,6 +30,8 @@ type QaLabLatestReport = {
|
||||
generatedAt: string;
|
||||
};
|
||||
|
||||
export type { QaLabLatestReport };
|
||||
|
||||
type QaLabBootstrapDefaults = {
|
||||
conversationKind: "direct" | "channel";
|
||||
conversationId: string;
|
||||
@@ -416,6 +424,10 @@ export async function startQaLabServer(params?: {
|
||||
let latestScenarioRun: QaLabScenarioRun | null = null;
|
||||
const scenarioCatalog = readQaBootstrapScenarioCatalog();
|
||||
const bootstrapDefaults = createBootstrapDefaults(params?.autoKickoffTarget);
|
||||
let runnerModelOptions: QaRunnerModelOption[] = [];
|
||||
let runnerModelCatalogStatus: "loading" | "ready" | "failed" = "loading";
|
||||
let runnerSnapshot = createIdleQaRunnerSnapshot(scenarioCatalog.scenarios);
|
||||
let activeSuiteRun: Promise<void> | null = null;
|
||||
let controlUiProxyTarget = params?.controlUiProxyTarget?.trim()
|
||||
? new URL(params.controlUiProxyTarget)
|
||||
: null;
|
||||
@@ -428,8 +440,34 @@ export async function startQaLabServer(params?: {
|
||||
}
|
||||
| undefined;
|
||||
const embeddedGatewayEnabled = params?.embeddedGateway !== "disabled";
|
||||
let labHandle: {
|
||||
baseUrl: string;
|
||||
listenUrl: string;
|
||||
state: QaBusState;
|
||||
setControlUi: (next: {
|
||||
controlUiUrl?: string | null;
|
||||
controlUiToken?: string | null;
|
||||
controlUiProxyTarget?: string | null;
|
||||
}) => void;
|
||||
setScenarioRun: (next: Omit<QaLabScenarioRun, "counts"> | null) => void;
|
||||
setLatestReport: (next: QaLabLatestReport | null) => void;
|
||||
runSelfCheck: () => Promise<QaSelfCheckResult>;
|
||||
stop: () => Promise<void>;
|
||||
} | null = null;
|
||||
|
||||
let publicBaseUrl = "";
|
||||
const runnerModelCatalogPromise = (async () => {
|
||||
try {
|
||||
const { loadQaRunnerModelOptions } = await import("./model-catalog.runtime.js");
|
||||
runnerModelOptions = await loadQaRunnerModelOptions({
|
||||
repoRoot: process.cwd(),
|
||||
});
|
||||
runnerModelCatalogStatus = "ready";
|
||||
} catch {
|
||||
runnerModelOptions = [];
|
||||
runnerModelCatalogStatus = "failed";
|
||||
}
|
||||
})();
|
||||
const server = createServer(async (req, res) => {
|
||||
const url = new URL(req.url ?? "/", "http://127.0.0.1");
|
||||
|
||||
@@ -465,6 +503,11 @@ export async function startQaLabServer(params?: {
|
||||
kickoffTask: scenarioCatalog.kickoffTask,
|
||||
scenarios: scenarioCatalog.scenarios,
|
||||
defaults: bootstrapDefaults,
|
||||
runner: runnerSnapshot,
|
||||
runnerCatalog: {
|
||||
status: runnerModelCatalogStatus,
|
||||
real: runnerModelOptions,
|
||||
},
|
||||
});
|
||||
return;
|
||||
}
|
||||
@@ -485,7 +528,21 @@ export async function startQaLabServer(params?: {
|
||||
return;
|
||||
}
|
||||
if (req.method === "POST" && url.pathname === "/api/reset") {
|
||||
if (activeSuiteRun) {
|
||||
writeError(res, 409, "QA suite run already in progress");
|
||||
return;
|
||||
}
|
||||
state.reset();
|
||||
latestReport = null;
|
||||
latestScenarioRun = null;
|
||||
runnerSnapshot = {
|
||||
...runnerSnapshot,
|
||||
status: "idle",
|
||||
artifacts: null,
|
||||
error: null,
|
||||
startedAt: undefined,
|
||||
finishedAt: undefined,
|
||||
};
|
||||
writeJson(res, 200, { ok: true });
|
||||
return;
|
||||
}
|
||||
@@ -507,6 +564,10 @@ export async function startQaLabServer(params?: {
|
||||
return;
|
||||
}
|
||||
if (req.method === "POST" && url.pathname === "/api/scenario/self-check") {
|
||||
if (activeSuiteRun) {
|
||||
writeError(res, 409, "QA suite run already in progress");
|
||||
return;
|
||||
}
|
||||
latestScenarioRun = withQaLabRunCounts({
|
||||
kind: "self-check",
|
||||
status: "running",
|
||||
@@ -547,6 +608,68 @@ export async function startQaLabServer(params?: {
|
||||
writeJson(res, 200, serializeSelfCheck(result));
|
||||
return;
|
||||
}
|
||||
if (req.method === "POST" && url.pathname === "/api/scenario/suite") {
|
||||
if (activeSuiteRun) {
|
||||
writeError(res, 409, "QA suite run already in progress");
|
||||
return;
|
||||
}
|
||||
const selection = normalizeQaRunSelection(await readJson(req), scenarioCatalog.scenarios);
|
||||
state.reset();
|
||||
latestReport = null;
|
||||
latestScenarioRun = null;
|
||||
const startedAt = new Date().toISOString();
|
||||
runnerSnapshot = {
|
||||
status: "running",
|
||||
selection,
|
||||
startedAt,
|
||||
finishedAt: undefined,
|
||||
artifacts: null,
|
||||
error: null,
|
||||
};
|
||||
activeSuiteRun = (async () => {
|
||||
try {
|
||||
const { runQaSuiteFromRuntime } = await import("./suite-launch.runtime.js");
|
||||
const result = await runQaSuiteFromRuntime({
|
||||
lab: labHandle ?? undefined,
|
||||
outputDir: createQaRunOutputDir(),
|
||||
providerMode: selection.providerMode,
|
||||
primaryModel: selection.primaryModel,
|
||||
alternateModel: selection.alternateModel,
|
||||
fastMode: selection.fastMode,
|
||||
scenarioIds: selection.scenarioIds,
|
||||
});
|
||||
runnerSnapshot = {
|
||||
status: "completed",
|
||||
selection,
|
||||
startedAt,
|
||||
finishedAt: new Date().toISOString(),
|
||||
artifacts: {
|
||||
outputDir: result.outputDir,
|
||||
reportPath: result.reportPath,
|
||||
summaryPath: result.summaryPath,
|
||||
watchUrl: result.watchUrl,
|
||||
},
|
||||
error: null,
|
||||
};
|
||||
} catch (error) {
|
||||
runnerSnapshot = {
|
||||
status: "failed",
|
||||
selection,
|
||||
startedAt,
|
||||
finishedAt: new Date().toISOString(),
|
||||
artifacts: null,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
} finally {
|
||||
activeSuiteRun = null;
|
||||
}
|
||||
})();
|
||||
writeJson(res, 202, {
|
||||
ok: true,
|
||||
runner: runnerSnapshot,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (req.method !== "GET" && req.method !== "HEAD") {
|
||||
writeError(res, 404, "not found");
|
||||
@@ -611,6 +734,7 @@ export async function startQaLabServer(params?: {
|
||||
kickoffTask: scenarioCatalog.kickoffTask,
|
||||
});
|
||||
}
|
||||
void runnerModelCatalogPromise;
|
||||
|
||||
server.on("upgrade", (req, socket, head) => {
|
||||
const url = new URL(req.url ?? "/", "http://127.0.0.1");
|
||||
@@ -626,7 +750,7 @@ export async function startQaLabServer(params?: {
|
||||
});
|
||||
});
|
||||
|
||||
return {
|
||||
const lab = {
|
||||
baseUrl: publicBaseUrl,
|
||||
listenUrl,
|
||||
state,
|
||||
@@ -644,6 +768,9 @@ export async function startQaLabServer(params?: {
|
||||
setScenarioRun(next: Omit<QaLabScenarioRun, "counts"> | null) {
|
||||
latestScenarioRun = next ? withQaLabRunCounts(next) : null;
|
||||
},
|
||||
setLatestReport(next: QaLabLatestReport | null) {
|
||||
latestReport = next;
|
||||
},
|
||||
async runSelfCheck() {
|
||||
latestScenarioRun = withQaLabRunCounts({
|
||||
kind: "self-check",
|
||||
@@ -691,6 +818,8 @@ export async function startQaLabServer(params?: {
|
||||
);
|
||||
},
|
||||
};
|
||||
labHandle = lab;
|
||||
return lab;
|
||||
}
|
||||
|
||||
function serializeSelfCheck(result: QaSelfCheckResult) {
|
||||
|
||||
32
extensions/qa-lab/src/model-catalog.runtime.test.ts
Normal file
32
extensions/qa-lab/src/model-catalog.runtime.test.ts
Normal file
@@ -0,0 +1,32 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { selectQaRunnerModelOptions } from "./model-catalog.runtime.js";
|
||||
|
||||
describe("qa runner model catalog", () => {
|
||||
it("filters to available rows and prefers gpt-5.4 first", () => {
|
||||
expect(
|
||||
selectQaRunnerModelOptions([
|
||||
{
|
||||
key: "anthropic/claude-sonnet-4-5",
|
||||
name: "Claude Sonnet 4.5",
|
||||
input: "text",
|
||||
available: true,
|
||||
missing: false,
|
||||
},
|
||||
{
|
||||
key: "openai/gpt-5.4",
|
||||
name: "gpt-5.4",
|
||||
input: "text,image",
|
||||
available: true,
|
||||
missing: false,
|
||||
},
|
||||
{
|
||||
key: "openrouter/auto",
|
||||
name: "OpenRouter Auto",
|
||||
input: "text",
|
||||
available: false,
|
||||
missing: false,
|
||||
},
|
||||
]).map((entry) => entry.key),
|
||||
).toEqual(["openai/gpt-5.4", "anthropic/claude-sonnet-4-5"]);
|
||||
});
|
||||
});
|
||||
126
extensions/qa-lab/src/model-catalog.runtime.ts
Normal file
126
extensions/qa-lab/src/model-catalog.runtime.ts
Normal file
@@ -0,0 +1,126 @@
|
||||
import { spawn } from "node:child_process";
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { buildQaGatewayConfig } from "./qa-gateway-config.js";
|
||||
|
||||
type ModelRow = {
|
||||
key: string;
|
||||
name: string;
|
||||
input: string;
|
||||
available: boolean | null;
|
||||
missing: boolean;
|
||||
};
|
||||
|
||||
export type QaRunnerModelOption = {
|
||||
key: string;
|
||||
name: string;
|
||||
provider: string;
|
||||
input: string;
|
||||
preferred: boolean;
|
||||
};
|
||||
|
||||
function splitModelKey(key: string) {
|
||||
const slash = key.indexOf("/");
|
||||
if (slash <= 0 || slash === key.length - 1) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
provider: key.slice(0, slash),
|
||||
model: key.slice(slash + 1),
|
||||
};
|
||||
}
|
||||
|
||||
export function selectQaRunnerModelOptions(rows: ModelRow[]): QaRunnerModelOption[] {
|
||||
const options = rows
|
||||
.filter((row) => row.available === true && !row.missing)
|
||||
.map((row) => {
|
||||
const parsed = splitModelKey(row.key);
|
||||
return {
|
||||
key: row.key,
|
||||
name: row.name,
|
||||
provider: parsed?.provider ?? "unknown",
|
||||
input: row.input,
|
||||
preferred: row.key === "openai/gpt-5.4",
|
||||
} satisfies QaRunnerModelOption;
|
||||
});
|
||||
|
||||
return options.toSorted((left, right) => {
|
||||
if (left.preferred !== right.preferred) {
|
||||
return left.preferred ? -1 : 1;
|
||||
}
|
||||
const providerCompare = left.provider.localeCompare(right.provider);
|
||||
if (providerCompare !== 0) {
|
||||
return providerCompare;
|
||||
}
|
||||
return left.name.localeCompare(right.name);
|
||||
});
|
||||
}
|
||||
|
||||
export async function loadQaRunnerModelOptions(params: { repoRoot: string }) {
|
||||
const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-qa-model-catalog-"));
|
||||
const workspaceDir = path.join(tempRoot, "workspace");
|
||||
const stateDir = path.join(tempRoot, "state");
|
||||
const homeDir = path.join(tempRoot, "home");
|
||||
const configPath = path.join(tempRoot, "openclaw.json");
|
||||
|
||||
try {
|
||||
await Promise.all([
|
||||
fs.mkdir(workspaceDir, { recursive: true }),
|
||||
fs.mkdir(stateDir, { recursive: true }),
|
||||
fs.mkdir(homeDir, { recursive: true }),
|
||||
]);
|
||||
const cfg = buildQaGatewayConfig({
|
||||
bind: "loopback",
|
||||
gatewayPort: 0,
|
||||
gatewayToken: "qa-model-catalog",
|
||||
qaBusBaseUrl: "http://127.0.0.1:9",
|
||||
workspaceDir,
|
||||
providerMode: "live-openai",
|
||||
primaryModel: "openai/gpt-5.4",
|
||||
alternateModel: "openai/gpt-5.4",
|
||||
controlUiEnabled: false,
|
||||
});
|
||||
await fs.writeFile(configPath, `${JSON.stringify(cfg, null, 2)}\n`, "utf8");
|
||||
|
||||
const stdout: Buffer[] = [];
|
||||
const stderr: Buffer[] = [];
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
const child = spawn(
|
||||
process.execPath,
|
||||
["dist/index.js", "models", "list", "--all", "--json"],
|
||||
{
|
||||
cwd: params.repoRoot,
|
||||
env: {
|
||||
...process.env,
|
||||
HOME: homeDir,
|
||||
OPENCLAW_HOME: homeDir,
|
||||
OPENCLAW_CONFIG_PATH: configPath,
|
||||
OPENCLAW_STATE_DIR: stateDir,
|
||||
OPENCLAW_OAUTH_DIR: path.join(stateDir, "credentials"),
|
||||
},
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
},
|
||||
);
|
||||
child.stdout.on("data", (chunk) => stdout.push(Buffer.from(chunk)));
|
||||
child.stderr.on("data", (chunk) => stderr.push(Buffer.from(chunk)));
|
||||
child.once("error", reject);
|
||||
child.once("exit", (code) => {
|
||||
if (code === 0) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
reject(
|
||||
new Error(
|
||||
`qa model catalog failed (${code ?? "unknown"}): ${Buffer.concat(stderr).toString("utf8").trim()}`,
|
||||
),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
const payload = JSON.parse(Buffer.concat(stdout).toString("utf8")) as { models?: ModelRow[] };
|
||||
return selectQaRunnerModelOptions(payload.models ?? []);
|
||||
} finally {
|
||||
await fs.rm(tempRoot, { recursive: true, force: true });
|
||||
}
|
||||
}
|
||||
@@ -38,6 +38,16 @@ export function buildQaGatewayConfig(params: {
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
}): OpenClawConfig {
|
||||
const splitModelRef = (ref: string) => {
|
||||
const slash = ref.indexOf("/");
|
||||
if (slash <= 0 || slash === ref.length - 1) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
provider: ref.slice(0, slash),
|
||||
model: ref.slice(slash + 1),
|
||||
};
|
||||
};
|
||||
const mockProviderBaseUrl = params.providerBaseUrl ?? "http://127.0.0.1:44080/v1";
|
||||
const mockOpenAiProvider: ModelProviderConfig = {
|
||||
baseUrl: mockProviderBaseUrl,
|
||||
@@ -92,10 +102,6 @@ export function buildQaGatewayConfig(params: {
|
||||
],
|
||||
};
|
||||
const providerMode = params.providerMode ?? "mock-openai";
|
||||
const allowedPlugins =
|
||||
providerMode === "live-openai"
|
||||
? ["memory-core", "openai", "qa-channel"]
|
||||
: ["memory-core", "qa-channel"];
|
||||
const primaryModel =
|
||||
params.primaryModel ??
|
||||
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4");
|
||||
@@ -104,6 +110,20 @@ export function buildQaGatewayConfig(params: {
|
||||
(providerMode === "live-openai" ? "openai/gpt-5.4" : "mock-openai/gpt-5.4-alt");
|
||||
const imageGenerationModelRef =
|
||||
providerMode === "live-openai" ? "openai/gpt-image-1" : "mock-openai/gpt-image-1";
|
||||
const selectedProviderIds =
|
||||
providerMode === "live-openai"
|
||||
? [
|
||||
...new Set(
|
||||
[primaryModel, alternateModel, imageGenerationModelRef]
|
||||
.map((ref) => splitModelRef(ref)?.provider)
|
||||
.filter((provider): provider is string => Boolean(provider)),
|
||||
),
|
||||
]
|
||||
: [];
|
||||
const pluginEntries =
|
||||
providerMode === "live-openai"
|
||||
? Object.fromEntries(selectedProviderIds.map((providerId) => [providerId, { enabled: true }]))
|
||||
: {};
|
||||
const liveModelParams =
|
||||
providerMode === "live-openai"
|
||||
? {
|
||||
@@ -127,7 +147,7 @@ export function buildQaGatewayConfig(params: {
|
||||
|
||||
return {
|
||||
plugins: {
|
||||
allow: allowedPlugins,
|
||||
...(providerMode === "mock-openai" ? { allow: ["memory-core", "qa-channel"] } : {}),
|
||||
entries: {
|
||||
acpx: {
|
||||
enabled: false,
|
||||
@@ -135,13 +155,7 @@ export function buildQaGatewayConfig(params: {
|
||||
"memory-core": {
|
||||
enabled: true,
|
||||
},
|
||||
...(providerMode === "live-openai"
|
||||
? {
|
||||
openai: {
|
||||
enabled: true,
|
||||
},
|
||||
}
|
||||
: {}),
|
||||
...pluginEntries,
|
||||
},
|
||||
},
|
||||
agents: {
|
||||
|
||||
70
extensions/qa-lab/src/run-config.test.ts
Normal file
70
extensions/qa-lab/src/run-config.test.ts
Normal file
@@ -0,0 +1,70 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
createDefaultQaRunSelection,
|
||||
createIdleQaRunnerSnapshot,
|
||||
normalizeQaRunSelection,
|
||||
} from "./run-config.js";
|
||||
|
||||
const scenarios = [
|
||||
{
|
||||
id: "dm-chat-baseline",
|
||||
title: "DM baseline",
|
||||
surface: "dm",
|
||||
objective: "test DM",
|
||||
successCriteria: ["reply"],
|
||||
},
|
||||
{
|
||||
id: "thread-lifecycle",
|
||||
title: "Thread lifecycle",
|
||||
surface: "thread",
|
||||
objective: "test thread",
|
||||
successCriteria: ["thread reply"],
|
||||
},
|
||||
];
|
||||
|
||||
describe("qa run config", () => {
|
||||
it("creates a synthetic-by-default selection that arms every scenario", () => {
|
||||
expect(createDefaultQaRunSelection(scenarios)).toEqual({
|
||||
providerMode: "mock-openai",
|
||||
primaryModel: "mock-openai/gpt-5.4",
|
||||
alternateModel: "mock-openai/gpt-5.4-alt",
|
||||
fastMode: false,
|
||||
scenarioIds: ["dm-chat-baseline", "thread-lifecycle"],
|
||||
});
|
||||
});
|
||||
|
||||
it("normalizes live selections and filters unknown scenario ids", () => {
|
||||
expect(
|
||||
normalizeQaRunSelection(
|
||||
{
|
||||
providerMode: "live-openai",
|
||||
primaryModel: "openai/gpt-5.4",
|
||||
alternateModel: "",
|
||||
fastMode: false,
|
||||
scenarioIds: ["thread-lifecycle", "missing", "thread-lifecycle"],
|
||||
},
|
||||
scenarios,
|
||||
),
|
||||
).toEqual({
|
||||
providerMode: "live-openai",
|
||||
primaryModel: "openai/gpt-5.4",
|
||||
alternateModel: "openai/gpt-5.4",
|
||||
fastMode: false,
|
||||
scenarioIds: ["thread-lifecycle"],
|
||||
});
|
||||
});
|
||||
|
||||
it("falls back to all scenarios when selection would otherwise be empty", () => {
|
||||
const snapshot = createIdleQaRunnerSnapshot(scenarios);
|
||||
expect(snapshot.status).toBe("idle");
|
||||
expect(snapshot.selection.scenarioIds).toEqual(["dm-chat-baseline", "thread-lifecycle"]);
|
||||
expect(
|
||||
normalizeQaRunSelection(
|
||||
{
|
||||
scenarioIds: [],
|
||||
},
|
||||
scenarios,
|
||||
).scenarioIds,
|
||||
).toEqual(["dm-chat-baseline", "thread-lifecycle"]);
|
||||
});
|
||||
});
|
||||
97
extensions/qa-lab/src/run-config.ts
Normal file
97
extensions/qa-lab/src/run-config.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import path from "node:path";
|
||||
import type { QaSeedScenario } from "./scenario-catalog.js";
|
||||
|
||||
export type QaProviderMode = "mock-openai" | "live-openai";
|
||||
|
||||
export type QaLabRunSelection = {
|
||||
providerMode: QaProviderMode;
|
||||
primaryModel: string;
|
||||
alternateModel: string;
|
||||
fastMode: boolean;
|
||||
scenarioIds: string[];
|
||||
};
|
||||
|
||||
export type QaLabRunArtifacts = {
|
||||
outputDir: string;
|
||||
reportPath: string;
|
||||
summaryPath: string;
|
||||
watchUrl: string;
|
||||
};
|
||||
|
||||
export type QaLabRunnerSnapshot = {
|
||||
status: "idle" | "running" | "completed" | "failed";
|
||||
selection: QaLabRunSelection;
|
||||
startedAt?: string;
|
||||
finishedAt?: string;
|
||||
artifacts: QaLabRunArtifacts | null;
|
||||
error: string | null;
|
||||
};
|
||||
|
||||
export function createDefaultQaRunSelection(scenarios: QaSeedScenario[]): QaLabRunSelection {
|
||||
return {
|
||||
providerMode: "mock-openai",
|
||||
primaryModel: "mock-openai/gpt-5.4",
|
||||
alternateModel: "mock-openai/gpt-5.4-alt",
|
||||
fastMode: false,
|
||||
scenarioIds: scenarios.map((scenario) => scenario.id),
|
||||
};
|
||||
}
|
||||
|
||||
function defaultModelForMode(mode: QaProviderMode, alternate = false) {
|
||||
if (mode === "live-openai") {
|
||||
return "openai/gpt-5.4";
|
||||
}
|
||||
return alternate ? "mock-openai/gpt-5.4-alt" : "mock-openai/gpt-5.4";
|
||||
}
|
||||
|
||||
function normalizeProviderMode(input: unknown): QaProviderMode {
|
||||
return input === "live-openai" ? "live-openai" : "mock-openai";
|
||||
}
|
||||
|
||||
function normalizeModel(input: unknown, fallback: string) {
|
||||
const value = typeof input === "string" ? input.trim() : "";
|
||||
return value || fallback;
|
||||
}
|
||||
|
||||
function normalizeScenarioIds(input: unknown, scenarios: QaSeedScenario[]) {
|
||||
const availableIds = new Set(scenarios.map((scenario) => scenario.id));
|
||||
const requestedIds = Array.isArray(input)
|
||||
? input
|
||||
.map((value) => (typeof value === "string" ? value.trim() : ""))
|
||||
.filter((value) => value.length > 0)
|
||||
: [];
|
||||
const selectedIds = requestedIds.filter((id, index) => {
|
||||
return availableIds.has(id) && requestedIds.indexOf(id) === index;
|
||||
});
|
||||
return selectedIds.length > 0 ? selectedIds : scenarios.map((scenario) => scenario.id);
|
||||
}
|
||||
|
||||
export function normalizeQaRunSelection(
|
||||
input: unknown,
|
||||
scenarios: QaSeedScenario[],
|
||||
): QaLabRunSelection {
|
||||
const payload = input && typeof input === "object" ? (input as Record<string, unknown>) : {};
|
||||
const providerMode = normalizeProviderMode(payload.providerMode);
|
||||
return {
|
||||
providerMode,
|
||||
primaryModel: normalizeModel(payload.primaryModel, defaultModelForMode(providerMode)),
|
||||
alternateModel: normalizeModel(payload.alternateModel, defaultModelForMode(providerMode, true)),
|
||||
fastMode:
|
||||
typeof payload.fastMode === "boolean" ? payload.fastMode : providerMode === "live-openai",
|
||||
scenarioIds: normalizeScenarioIds(payload.scenarioIds, scenarios),
|
||||
};
|
||||
}
|
||||
|
||||
export function createIdleQaRunnerSnapshot(scenarios: QaSeedScenario[]): QaLabRunnerSnapshot {
|
||||
return {
|
||||
status: "idle",
|
||||
selection: createDefaultQaRunSelection(scenarios),
|
||||
artifacts: null,
|
||||
error: null,
|
||||
};
|
||||
}
|
||||
|
||||
export function createQaRunOutputDir(baseDir = process.cwd()) {
|
||||
const stamp = new Date().toISOString().replaceAll(":", "").replaceAll(".", "").replace("T", "-");
|
||||
return path.join(baseDir, ".artifacts", "qa-e2e", `lab-${stamp}`);
|
||||
}
|
||||
6
extensions/qa-lab/src/suite-launch.runtime.ts
Normal file
6
extensions/qa-lab/src/suite-launch.runtime.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
export async function runQaSuiteFromRuntime(
|
||||
...args: Parameters<typeof import("./suite.js").runQaSuite>
|
||||
) {
|
||||
const { runQaSuite } = await import("./suite.js");
|
||||
return await runQaSuite(...args);
|
||||
}
|
||||
@@ -11,7 +11,7 @@ import type { QaBusState } from "./bus-state.js";
|
||||
import { extractQaToolPayload } from "./extract-tool-payload.js";
|
||||
import { startQaGatewayChild } from "./gateway-child.js";
|
||||
import { startQaLabServer } from "./lab-server.js";
|
||||
import type { QaLabScenarioOutcome } from "./lab-server.js";
|
||||
import type { QaLabLatestReport, QaLabScenarioOutcome } from "./lab-server.js";
|
||||
import { startQaMockOpenAiServer } from "./mock-openai-server.js";
|
||||
import { renderQaMarkdownReport, type QaReportCheck, type QaReportScenario } from "./report.js";
|
||||
import { qaChannelPlugin, type QaBusMessage } from "./runtime-api.js";
|
||||
@@ -1760,6 +1760,7 @@ export async function runQaSuite(params?: {
|
||||
alternateModel?: string;
|
||||
fastMode?: boolean;
|
||||
scenarioIds?: string[];
|
||||
lab?: Awaited<ReturnType<typeof startQaLabServer>>;
|
||||
}) {
|
||||
const startedAt = new Date();
|
||||
const providerMode = params?.providerMode ?? "mock-openai";
|
||||
@@ -1775,11 +1776,14 @@ export async function runQaSuite(params?: {
|
||||
path.join(process.cwd(), ".artifacts", "qa-e2e", `suite-${Date.now().toString(36)}`);
|
||||
await fs.mkdir(outputDir, { recursive: true });
|
||||
|
||||
const lab = await startQaLabServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
embeddedGateway: "disabled",
|
||||
});
|
||||
const ownsLab = !params?.lab;
|
||||
const lab =
|
||||
params?.lab ??
|
||||
(await startQaLabServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
embeddedGateway: "disabled",
|
||||
}));
|
||||
const mock =
|
||||
providerMode === "mock-openai"
|
||||
? await startQaMockOpenAiServer({
|
||||
@@ -1946,6 +1950,12 @@ export async function runQaSuite(params?: {
|
||||
)}\n`,
|
||||
"utf8",
|
||||
);
|
||||
const latestReport = {
|
||||
outputPath: reportPath,
|
||||
markdown: report,
|
||||
generatedAt: finishedAt.toISOString(),
|
||||
} satisfies QaLabLatestReport;
|
||||
lab.setLatestReport(latestReport);
|
||||
|
||||
return {
|
||||
outputDir,
|
||||
@@ -1961,6 +1971,14 @@ export async function runQaSuite(params?: {
|
||||
keepTemp,
|
||||
});
|
||||
await mock?.stop();
|
||||
await lab.stop();
|
||||
if (ownsLab) {
|
||||
await lab.stop();
|
||||
} else {
|
||||
lab.setControlUi({
|
||||
controlUiUrl: null,
|
||||
controlUiToken: null,
|
||||
controlUiProxyTarget: null,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ import {
|
||||
type Bootstrap,
|
||||
type OutcomesEnvelope,
|
||||
type ReportEnvelope,
|
||||
type RunnerSelection,
|
||||
type Snapshot,
|
||||
type TabId,
|
||||
type UiState,
|
||||
@@ -31,6 +32,25 @@ async function postJson<T>(path: string, body: unknown): Promise<T> {
|
||||
return (await response.json()) as T;
|
||||
}
|
||||
|
||||
function defaultModelsForProviderMode(
|
||||
mode: RunnerSelection["providerMode"],
|
||||
bootstrap?: Bootstrap | null,
|
||||
): Pick<RunnerSelection, "primaryModel" | "alternateModel" | "fastMode"> {
|
||||
if (mode === "live-openai") {
|
||||
const preferred = bootstrap?.runnerCatalog.real[0]?.key;
|
||||
return {
|
||||
primaryModel: preferred ?? "openai/gpt-5.4",
|
||||
alternateModel: preferred ?? "openai/gpt-5.4",
|
||||
fastMode: true,
|
||||
};
|
||||
}
|
||||
return {
|
||||
primaryModel: "mock-openai/gpt-5.4",
|
||||
alternateModel: "mock-openai/gpt-5.4-alt",
|
||||
fastMode: false,
|
||||
};
|
||||
}
|
||||
|
||||
export async function createQaLabApp(root: HTMLDivElement) {
|
||||
const state: UiState = {
|
||||
bootstrap: null,
|
||||
@@ -41,6 +61,8 @@ export async function createQaLabApp(root: HTMLDivElement) {
|
||||
selectedThreadId: null,
|
||||
selectedScenarioId: null,
|
||||
activeTab: "debug",
|
||||
runnerDraft: null,
|
||||
runnerDraftDirty: false,
|
||||
composer: {
|
||||
conversationKind: "direct",
|
||||
conversationId: "alice",
|
||||
@@ -64,6 +86,13 @@ export async function createQaLabApp(root: HTMLDivElement) {
|
||||
state.snapshot = snapshot;
|
||||
state.latestReport = report.report ?? bootstrap.latestReport;
|
||||
state.scenarioRun = outcomes.run;
|
||||
if (!state.runnerDraft || !state.runnerDraftDirty) {
|
||||
state.runnerDraft = {
|
||||
...bootstrap.runner.selection,
|
||||
scenarioIds: [...bootstrap.runner.selection.scenarioIds],
|
||||
};
|
||||
state.runnerDraftDirty = false;
|
||||
}
|
||||
if (!state.selectedConversationId) {
|
||||
state.selectedConversationId = snapshot.conversations[0]?.id ?? null;
|
||||
}
|
||||
@@ -86,6 +115,22 @@ export async function createQaLabApp(root: HTMLDivElement) {
|
||||
render();
|
||||
}
|
||||
|
||||
function updateRunnerDraft(mutator: (draft: RunnerSelection) => RunnerSelection) {
|
||||
const fallback = state.bootstrap?.runner.selection;
|
||||
if (!state.runnerDraft && fallback) {
|
||||
state.runnerDraft = {
|
||||
...fallback,
|
||||
scenarioIds: [...fallback.scenarioIds],
|
||||
};
|
||||
}
|
||||
if (!state.runnerDraft) {
|
||||
return;
|
||||
}
|
||||
state.runnerDraft = mutator(state.runnerDraft);
|
||||
state.runnerDraftDirty = true;
|
||||
render();
|
||||
}
|
||||
|
||||
async function runSelfCheck() {
|
||||
state.busy = true;
|
||||
state.error = null;
|
||||
@@ -163,6 +208,42 @@ export async function createQaLabApp(root: HTMLDivElement) {
|
||||
}
|
||||
}
|
||||
|
||||
async function runSuite() {
|
||||
if (!state.runnerDraft) {
|
||||
state.error = "Runner selection not ready yet.";
|
||||
render();
|
||||
return;
|
||||
}
|
||||
state.busy = true;
|
||||
state.error = null;
|
||||
render();
|
||||
try {
|
||||
const result = await postJson<{ runner: { selection: RunnerSelection } }>(
|
||||
"/api/scenario/suite",
|
||||
{
|
||||
providerMode: state.runnerDraft.providerMode,
|
||||
primaryModel: state.runnerDraft.primaryModel,
|
||||
alternateModel: state.runnerDraft.alternateModel,
|
||||
fastMode: state.runnerDraft.fastMode,
|
||||
scenarioIds: state.runnerDraft.scenarioIds,
|
||||
},
|
||||
);
|
||||
state.runnerDraft = {
|
||||
...result.runner.selection,
|
||||
scenarioIds: [...result.runner.selection.scenarioIds],
|
||||
};
|
||||
state.runnerDraftDirty = false;
|
||||
state.activeTab = "debug";
|
||||
await refresh();
|
||||
} catch (error) {
|
||||
state.error = error instanceof Error ? error.message : String(error);
|
||||
render();
|
||||
} finally {
|
||||
state.busy = false;
|
||||
render();
|
||||
}
|
||||
}
|
||||
|
||||
function downloadReport() {
|
||||
if (!state.latestReport?.markdown) {
|
||||
return;
|
||||
@@ -221,10 +302,32 @@ export async function createQaLabApp(root: HTMLDivElement) {
|
||||
void resetState();
|
||||
});
|
||||
root
|
||||
.querySelector<HTMLButtonElement>("[data-action='self-check']")!
|
||||
.addEventListener("click", () => {
|
||||
.querySelector<HTMLButtonElement>("[data-action='self-check']")
|
||||
?.addEventListener("click", () => {
|
||||
void runSelfCheck();
|
||||
});
|
||||
root
|
||||
.querySelector<HTMLButtonElement>("[data-action='run-suite']")
|
||||
?.addEventListener("click", () => {
|
||||
void runSuite();
|
||||
});
|
||||
root
|
||||
.querySelector<HTMLButtonElement>("[data-action='select-all-scenarios']")
|
||||
?.addEventListener("click", () => {
|
||||
updateRunnerDraft((draft) => ({
|
||||
...draft,
|
||||
scenarioIds:
|
||||
state.bootstrap?.scenarios.map((scenario) => scenario.id) ?? draft.scenarioIds,
|
||||
}));
|
||||
});
|
||||
root
|
||||
.querySelector<HTMLButtonElement>("[data-action='clear-scenarios']")
|
||||
?.addEventListener("click", () => {
|
||||
updateRunnerDraft((draft) => ({
|
||||
...draft,
|
||||
scenarioIds: [],
|
||||
}));
|
||||
});
|
||||
root.querySelector<HTMLButtonElement>("[data-action='send']")?.addEventListener("click", () => {
|
||||
void sendInbound();
|
||||
});
|
||||
@@ -233,6 +336,58 @@ export async function createQaLabApp(root: HTMLDivElement) {
|
||||
?.addEventListener("click", () => {
|
||||
downloadReport();
|
||||
});
|
||||
root.querySelector<HTMLSelectElement>("#provider-mode")?.addEventListener("change", (event) => {
|
||||
const mode =
|
||||
(event.currentTarget as HTMLSelectElement).value === "live-openai"
|
||||
? "live-openai"
|
||||
: "mock-openai";
|
||||
updateRunnerDraft((draft) => ({
|
||||
...draft,
|
||||
providerMode: mode,
|
||||
...defaultModelsForProviderMode(mode, state.bootstrap),
|
||||
}));
|
||||
});
|
||||
root.querySelector<HTMLInputElement>("#fast-mode")?.addEventListener("change", (event) => {
|
||||
updateRunnerDraft((draft) => ({
|
||||
...draft,
|
||||
fastMode: (event.currentTarget as HTMLInputElement).checked,
|
||||
}));
|
||||
});
|
||||
root.querySelector<HTMLInputElement>("#primary-model")?.addEventListener("input", (event) => {
|
||||
updateRunnerDraft((draft) => ({
|
||||
...draft,
|
||||
primaryModel: (event.currentTarget as HTMLInputElement).value,
|
||||
}));
|
||||
});
|
||||
root.querySelector<HTMLInputElement>("#alternate-model")?.addEventListener("input", (event) => {
|
||||
updateRunnerDraft((draft) => ({
|
||||
...draft,
|
||||
alternateModel: (event.currentTarget as HTMLInputElement).value,
|
||||
}));
|
||||
});
|
||||
root.querySelectorAll<HTMLInputElement>("[data-scenario-toggle-id]").forEach((node) => {
|
||||
node.addEventListener("change", () => {
|
||||
const scenarioId = node.dataset.scenarioToggleId;
|
||||
if (!scenarioId) {
|
||||
return;
|
||||
}
|
||||
updateRunnerDraft((draft) => {
|
||||
const selected = new Set(draft.scenarioIds);
|
||||
if (node.checked) {
|
||||
selected.add(scenarioId);
|
||||
} else {
|
||||
selected.delete(scenarioId);
|
||||
}
|
||||
const orderedIds = state.bootstrap?.scenarios
|
||||
.map((scenario) => scenario.id)
|
||||
.filter((id) => selected.has(id)) ?? [...selected];
|
||||
return {
|
||||
...draft,
|
||||
scenarioIds: orderedIds,
|
||||
};
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
root
|
||||
.querySelector<HTMLSelectElement>("#conversation-kind")
|
||||
|
||||
@@ -442,6 +442,98 @@ textarea {
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
.run-form-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(2, minmax(0, 1fr));
|
||||
gap: 0.8rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.checkbox-label {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: end;
|
||||
gap: 0.45rem;
|
||||
}
|
||||
|
||||
.checkbox-label input {
|
||||
width: 1.05rem;
|
||||
height: 1.05rem;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.panel-header.compact {
|
||||
align-items: end;
|
||||
}
|
||||
|
||||
.toolbar.mini button {
|
||||
padding: 0.48rem 0.78rem;
|
||||
}
|
||||
|
||||
.scenario-picker {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.6rem;
|
||||
max-height: 28vh;
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
.scenario-toggle {
|
||||
display: grid;
|
||||
grid-template-columns: auto minmax(0, 1fr);
|
||||
gap: 0.75rem;
|
||||
align-items: start;
|
||||
padding: 0.82rem 0.9rem;
|
||||
border-radius: 16px;
|
||||
border: 1px solid rgba(255, 255, 255, 0.08);
|
||||
background: rgba(255, 255, 255, 0.03);
|
||||
}
|
||||
|
||||
.scenario-toggle input {
|
||||
width: 1rem;
|
||||
height: 1rem;
|
||||
margin-top: 0.18rem;
|
||||
}
|
||||
|
||||
.scenario-toggle span {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.22rem;
|
||||
}
|
||||
|
||||
.scenario-toggle small {
|
||||
color: var(--muted);
|
||||
}
|
||||
|
||||
.scenario-toggle.selected {
|
||||
border-color: rgba(121, 224, 198, 0.34);
|
||||
background: linear-gradient(180deg, rgba(121, 224, 198, 0.11), rgba(121, 224, 198, 0.04));
|
||||
}
|
||||
|
||||
.artifact-list {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.45rem;
|
||||
margin-top: 0.9rem;
|
||||
}
|
||||
|
||||
.artifact-list code {
|
||||
display: block;
|
||||
padding: 0.62rem 0.72rem;
|
||||
border-radius: 12px;
|
||||
background: rgba(255, 255, 255, 0.04);
|
||||
border: 1px solid rgba(255, 255, 255, 0.06);
|
||||
color: #dce6f4;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.runner-error {
|
||||
margin: 0.9rem 0 0;
|
||||
color: var(--danger);
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
.meta-label {
|
||||
display: block;
|
||||
margin-bottom: 0.28rem;
|
||||
@@ -588,6 +680,11 @@ label span {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.run-form-grid,
|
||||
.composer-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.messages,
|
||||
.report {
|
||||
max-height: none;
|
||||
|
||||
@@ -67,6 +67,11 @@ export type Bootstrap = {
|
||||
senderId: string;
|
||||
senderName: string;
|
||||
};
|
||||
runner: RunnerSnapshot;
|
||||
runnerCatalog: {
|
||||
status: "loading" | "ready" | "failed";
|
||||
real: RunnerModelOption[];
|
||||
};
|
||||
};
|
||||
|
||||
export type ScenarioStep = {
|
||||
@@ -101,6 +106,36 @@ export type ScenarioRun = {
|
||||
};
|
||||
};
|
||||
|
||||
export type RunnerSelection = {
|
||||
providerMode: "mock-openai" | "live-openai";
|
||||
primaryModel: string;
|
||||
alternateModel: string;
|
||||
fastMode: boolean;
|
||||
scenarioIds: string[];
|
||||
};
|
||||
|
||||
export type RunnerSnapshot = {
|
||||
status: "idle" | "running" | "completed" | "failed";
|
||||
selection: RunnerSelection;
|
||||
startedAt?: string;
|
||||
finishedAt?: string;
|
||||
artifacts: null | {
|
||||
outputDir: string;
|
||||
reportPath: string;
|
||||
summaryPath: string;
|
||||
watchUrl: string;
|
||||
};
|
||||
error: string | null;
|
||||
};
|
||||
|
||||
export type RunnerModelOption = {
|
||||
key: string;
|
||||
name: string;
|
||||
provider: string;
|
||||
input: string;
|
||||
preferred: boolean;
|
||||
};
|
||||
|
||||
export type OutcomesEnvelope = {
|
||||
run: ScenarioRun | null;
|
||||
};
|
||||
@@ -116,6 +151,8 @@ export type UiState = {
|
||||
selectedThreadId: string | null;
|
||||
selectedScenarioId: string | null;
|
||||
activeTab: TabId;
|
||||
runnerDraft: RunnerSelection | null;
|
||||
runnerDraftDirty: boolean;
|
||||
composer: {
|
||||
conversationKind: "direct" | "channel";
|
||||
conversationId: string;
|
||||
@@ -200,6 +237,49 @@ function renderStatusChip(status: ScenarioOutcome["status"]) {
|
||||
return `<span class="status-chip status-${status}">${escapeHtml(label)}</span>`;
|
||||
}
|
||||
|
||||
function renderRunnerStatusChip(status: RunnerSnapshot["status"]) {
|
||||
const tone = status === "failed" ? "fail" : status === "completed" ? "pass" : status;
|
||||
return `<span class="status-chip status-${tone}">${escapeHtml(status)}</span>`;
|
||||
}
|
||||
|
||||
function deriveRunnerSelection(state: UiState): RunnerSelection | null {
|
||||
return state.runnerDraft ?? state.bootstrap?.runner.selection ?? null;
|
||||
}
|
||||
|
||||
function renderRunnerModelSelect(params: {
|
||||
id: string;
|
||||
label: string;
|
||||
value: string;
|
||||
options: RunnerModelOption[];
|
||||
disabled: boolean;
|
||||
}) {
|
||||
const values = new Set(params.options.map((option) => option.key));
|
||||
const options = [...params.options];
|
||||
if (!values.has(params.value) && params.value.trim()) {
|
||||
options.unshift({
|
||||
key: params.value,
|
||||
name: params.value,
|
||||
provider: params.value.split("/")[0] ?? "custom",
|
||||
input: "text",
|
||||
preferred: false,
|
||||
});
|
||||
}
|
||||
return `
|
||||
<label>
|
||||
<span>${escapeHtml(params.label)}</span>
|
||||
<select id="${escapeHtml(params.id)}"${params.disabled ? " disabled" : ""}>
|
||||
${options
|
||||
.map(
|
||||
(option) => `
|
||||
<option value="${escapeHtml(option.key)}"${option.key === params.value ? " selected" : ""}>
|
||||
${escapeHtml(option.key)}
|
||||
</option>`,
|
||||
)
|
||||
.join("")}
|
||||
</select>
|
||||
</label>`;
|
||||
}
|
||||
|
||||
function renderRefs(refs: string[] | undefined, kind: "docs" | "code") {
|
||||
if (!refs?.length) {
|
||||
return `<p class="empty">No ${kind} refs attached.</p>`;
|
||||
@@ -318,29 +398,158 @@ function renderScenarioInspector(state: UiState, scenarios: SeedScenario[]) {
|
||||
|
||||
function renderRunPanel(state: UiState) {
|
||||
const run = state.scenarioRun;
|
||||
if (!run) {
|
||||
const runner = state.bootstrap?.runner ?? null;
|
||||
if (!run && !runner) {
|
||||
return `
|
||||
<section class="panel">
|
||||
<h2>Run state</h2>
|
||||
<p class="empty">No structured scenario run yet. Seed plan loaded; outcomes arrive once a suite or self-check starts.</p>
|
||||
</section>`;
|
||||
}
|
||||
const selection = runner?.selection ?? null;
|
||||
return `
|
||||
<section class="panel">
|
||||
<div class="panel-header">
|
||||
<div>
|
||||
<p class="eyebrow">Live run</p>
|
||||
<h2>${escapeHtml(run.kind === "suite" ? "Scenario suite" : "Self-check")}</h2>
|
||||
<p class="eyebrow">Run state</p>
|
||||
<h2>${escapeHtml(run?.kind === "self-check" ? "Self-check" : "Scenario suite")}</h2>
|
||||
</div>
|
||||
<span class="status-chip status-${run.status === "completed" ? "pass" : run.status === "running" ? "running" : "pending"}">${escapeHtml(run.status)}</span>
|
||||
${runner ? renderRunnerStatusChip(runner.status) : ""}
|
||||
</div>
|
||||
<div class="run-grid">
|
||||
<div><span class="meta-label">Total</span><strong>${run.counts.total}</strong></div>
|
||||
<div><span class="meta-label">Pass</span><strong>${run.counts.passed}</strong></div>
|
||||
<div><span class="meta-label">Fail</span><strong>${run.counts.failed}</strong></div>
|
||||
<div><span class="meta-label">Pending</span><strong>${run.counts.pending}</strong></div>
|
||||
${
|
||||
run
|
||||
? `
|
||||
<div class="run-grid">
|
||||
<div><span class="meta-label">Total</span><strong>${run.counts.total}</strong></div>
|
||||
<div><span class="meta-label">Pass</span><strong>${run.counts.passed}</strong></div>
|
||||
<div><span class="meta-label">Fail</span><strong>${run.counts.failed}</strong></div>
|
||||
<div><span class="meta-label">Pending</span><strong>${run.counts.pending}</strong></div>
|
||||
</div>`
|
||||
: '<p class="empty">Waiting for structured outcomes.</p>'
|
||||
}
|
||||
${
|
||||
selection
|
||||
? `<p class="subtle">${escapeHtml(selection.providerMode === "live-openai" ? "Real provider lane" : "Synthetic OpenAI")} · ${escapeHtml(selection.primaryModel)} · ${selection.scenarioIds.length} scenarios</p>`
|
||||
: ""
|
||||
}
|
||||
<p class="subtle">Started ${escapeHtml(formatIso(runner?.startedAt ?? run?.startedAt))} · Finished ${escapeHtml(formatIso(runner?.finishedAt ?? run?.finishedAt))}</p>
|
||||
${
|
||||
runner?.artifacts
|
||||
? `
|
||||
<div class="artifact-list">
|
||||
<code>${escapeHtml(runner.artifacts.outputDir)}</code>
|
||||
<code>${escapeHtml(runner.artifacts.reportPath)}</code>
|
||||
<code>${escapeHtml(runner.artifacts.summaryPath)}</code>
|
||||
</div>`
|
||||
: ""
|
||||
}
|
||||
${runner?.error ? `<p class="runner-error">${escapeHtml(runner.error)}</p>` : ""}
|
||||
</section>`;
|
||||
}
|
||||
|
||||
function renderRunnerConsole(state: UiState, scenarios: SeedScenario[]) {
|
||||
const selection = deriveRunnerSelection(state);
|
||||
if (!selection) {
|
||||
return "";
|
||||
}
|
||||
const runner = state.bootstrap?.runner ?? null;
|
||||
const realModelOptions = state.bootstrap?.runnerCatalog.real ?? [];
|
||||
const selectedIds = new Set(selection.scenarioIds);
|
||||
const isRunning = runner?.status === "running";
|
||||
const usesRealCatalog = selection.providerMode === "live-openai" && realModelOptions.length > 0;
|
||||
return `
|
||||
<section class="panel run-console">
|
||||
<div class="panel-header">
|
||||
<div>
|
||||
<p class="eyebrow">Suite console</p>
|
||||
<h2>Launch matrix</h2>
|
||||
</div>
|
||||
${runner ? renderRunnerStatusChip(runner.status) : ""}
|
||||
</div>
|
||||
<div class="run-form-grid">
|
||||
<label>
|
||||
<span>Lane</span>
|
||||
<select id="provider-mode"${isRunning ? " disabled" : ""}>
|
||||
<option value="mock-openai"${selection.providerMode === "mock-openai" ? " selected" : ""}>Synthetic</option>
|
||||
<option value="live-openai"${selection.providerMode === "live-openai" ? " selected" : ""}>Real providers</option>
|
||||
</select>
|
||||
</label>
|
||||
<label class="checkbox-label">
|
||||
<span>Fast mode</span>
|
||||
<input id="fast-mode" type="checkbox"${selection.fastMode ? " checked" : ""}${isRunning ? " disabled" : ""} />
|
||||
</label>
|
||||
${
|
||||
usesRealCatalog
|
||||
? renderRunnerModelSelect({
|
||||
id: "primary-model",
|
||||
label: "Primary model",
|
||||
value: selection.primaryModel,
|
||||
options: realModelOptions,
|
||||
disabled: isRunning,
|
||||
})
|
||||
: `<label>
|
||||
<span>Primary model</span>
|
||||
<input id="primary-model" value="${escapeHtml(selection.primaryModel)}"${isRunning ? " disabled" : ""} />
|
||||
</label>`
|
||||
}
|
||||
${
|
||||
usesRealCatalog
|
||||
? renderRunnerModelSelect({
|
||||
id: "alternate-model",
|
||||
label: "Alt model",
|
||||
value: selection.alternateModel,
|
||||
options: realModelOptions,
|
||||
disabled: isRunning,
|
||||
})
|
||||
: `<label>
|
||||
<span>Alt model</span>
|
||||
<input id="alternate-model" value="${escapeHtml(selection.alternateModel)}"${isRunning ? " disabled" : ""} />
|
||||
</label>`
|
||||
}
|
||||
</div>
|
||||
${
|
||||
selection.providerMode === "live-openai"
|
||||
? `<p class="subtle">${escapeHtml(
|
||||
state.bootstrap?.runnerCatalog.status === "loading"
|
||||
? "Loading real model catalog…"
|
||||
: state.bootstrap?.runnerCatalog.status === "failed"
|
||||
? "Real model catalog unavailable; using manual refs."
|
||||
: `${realModelOptions.length} real models ready. gpt-5.4 stays pinned first when available.`,
|
||||
)}</p>`
|
||||
: ""
|
||||
}
|
||||
<div class="panel-header compact">
|
||||
<div>
|
||||
<p class="eyebrow">Scenario selection</p>
|
||||
<h3>${selection.scenarioIds.length}/${scenarios.length} armed</h3>
|
||||
</div>
|
||||
<div class="toolbar mini">
|
||||
<button data-action="select-all-scenarios"${isRunning ? " disabled" : ""}>All</button>
|
||||
<button data-action="clear-scenarios"${isRunning ? " disabled" : ""}>None</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="scenario-picker">
|
||||
${
|
||||
scenarios.length === 0
|
||||
? '<p class="empty">No scenarios available.</p>'
|
||||
: scenarios
|
||||
.map(
|
||||
(scenario) => `
|
||||
<label class="scenario-toggle${selectedIds.has(scenario.id) ? " selected" : ""}">
|
||||
<input type="checkbox" data-scenario-toggle-id="${escapeHtml(scenario.id)}"${selectedIds.has(scenario.id) ? " checked" : ""}${isRunning ? " disabled" : ""} />
|
||||
<span>
|
||||
<strong>${escapeHtml(scenario.title)}</strong>
|
||||
<small>${escapeHtml(scenario.id)} · ${escapeHtml(scenario.surface)}</small>
|
||||
</span>
|
||||
</label>`,
|
||||
)
|
||||
.join("")
|
||||
}
|
||||
</div>
|
||||
<div class="toolbar lower">
|
||||
<button class="accent" data-action="run-suite"${isRunning || selection.scenarioIds.length === 0 || state.busy ? " disabled" : ""}>Run selected scenarios</button>
|
||||
<button data-action="self-check"${isRunning || state.busy ? " disabled" : ""}>Run self-check</button>
|
||||
</div>
|
||||
<p class="subtle">Started ${escapeHtml(formatIso(run.startedAt))} · Finished ${escapeHtml(formatIso(run.finishedAt))}</p>
|
||||
</section>`;
|
||||
}
|
||||
|
||||
@@ -507,6 +716,7 @@ export function renderQaLabUi(state: UiState) {
|
||||
const hasControlUi = Boolean(state.bootstrap?.controlUiEmbeddedUrl);
|
||||
const dashboardShellClass = hasControlUi ? "dashboard split-dashboard" : "dashboard";
|
||||
const run = state.scenarioRun;
|
||||
const runner = state.bootstrap?.runner ?? null;
|
||||
|
||||
return `
|
||||
<div class="${dashboardShellClass}">
|
||||
@@ -539,7 +749,6 @@ export function renderQaLabUi(state: UiState) {
|
||||
<div class="toolbar">
|
||||
<button data-action="refresh"${state.busy ? " disabled" : ""}>Refresh</button>
|
||||
<button data-action="reset"${state.busy ? " disabled" : ""}>Reset</button>
|
||||
<button class="accent" data-action="self-check"${state.busy ? " disabled" : ""}>Run self-check</button>
|
||||
</div>
|
||||
</header>
|
||||
<section class="statusbar">
|
||||
@@ -552,11 +761,17 @@ export function renderQaLabUi(state: UiState) {
|
||||
? `<span class="pill success">${escapeHtml(run.kind)} ${escapeHtml(run.status)} · ${run.counts.passed}/${run.counts.total} pass</span>`
|
||||
: '<span class="pill">No structured run yet</span>'
|
||||
}
|
||||
${
|
||||
runner
|
||||
? `<span class="pill${runner.status === "failed" ? " error" : runner.status === "completed" ? " success" : ""}">${escapeHtml(runner.status)} lane · ${escapeHtml(runner.selection.providerMode)}</span>`
|
||||
: ""
|
||||
}
|
||||
${state.latestReport ? `<span class="pill">Report ${escapeHtml(state.latestReport.outputPath)}</span>` : '<span class="pill">No report yet</span>'}
|
||||
${state.error ? `<span class="pill error">${escapeHtml(state.error)}</span>` : ""}
|
||||
</section>
|
||||
<main class="workspace">
|
||||
<aside class="rail">
|
||||
${renderRunnerConsole(state, scenarios)}
|
||||
${renderRunPanel(state)}
|
||||
<section class="panel">
|
||||
<h2>Conversations</h2>
|
||||
|
||||
Reference in New Issue
Block a user