diff --git a/.agents/skills/openclaw-qa-testing/SKILL.md b/.agents/skills/openclaw-qa-testing/SKILL.md index ade3b448382..1391d7c8008 100644 --- a/.agents/skills/openclaw-qa-testing/SKILL.md +++ b/.agents/skills/openclaw-qa-testing/SKILL.md @@ -62,6 +62,24 @@ scenario through qa-channel, decodes the emitted protobuf spans, and verifies the exported trace names and privacy contract. It does not require Opik, Langfuse, or external collector credentials. +## Matrix live profiles + +`pnpm openclaw qa matrix` defaults to the full `all` profile. Use explicit +profiles for faster CI/release proof: + +```bash +OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS=3000 \ +pnpm openclaw qa matrix --profile fast --fail-fast +``` + +- `fast`: release-critical transport contract, excluding generated image and + deep E2EE recovery inventory. +- `transport`, `media`, `e2ee-smoke`, `e2ee-deep`, `e2ee-cli`: sharded full + Matrix coverage. +- `QA-Lab - All Lanes` uses explicit `fast` Matrix on scheduled runs. Manual + dispatch keeps `matrix_profile=all` as the default and can shard full Matrix + with `matrix_profile=all` and `matrix_shards=true`. + ## QA credentials and 1Password - Use `op` only inside `tmux` for QA secret lookup in this repo. diff --git a/.agents/skills/openclaw-testing/SKILL.md b/.agents/skills/openclaw-testing/SKILL.md index a3c72f9660b..fdc41f1786c 100644 --- a/.agents/skills/openclaw-testing/SKILL.md +++ b/.agents/skills/openclaw-testing/SKILL.md @@ -110,7 +110,7 @@ dispatches: - manual `CI` for the full normal CI graph - `OpenClaw Release Checks` for install smoke, cross-OS release checks, live and - E2E checks, Docker release-path suites, OpenWebUI, QA Lab, Matrix, and + E2E checks, Docker release-path suites, OpenWebUI, QA Lab, fast Matrix, and Telegram release lanes - optional post-publish Telegram E2E when a package spec is supplied @@ -175,6 +175,23 @@ gh workflow run openclaw-release-checks.yml \ -f mode=both ``` +### QA Lab Matrix Profiles + +`pnpm openclaw qa matrix` defaults to `--profile all`. Do not assume the CLI +default is the fast release path. Use explicit profiles: + +- `--profile fast --fail-fast`: release-critical Matrix transport contract +- `--profile transport|media|e2ee-smoke|e2ee-deep|e2ee-cli`: sharded full + Matrix proof +- `OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS=3000`: CI-friendly no-reply quiet + window when paired with fast or sharded gates + +`QA-Lab - All Lanes` uses explicit fast Matrix on scheduled runs; manual +dispatch keeps `matrix_profile=all` as the default and can shard full Matrix +with `matrix_profile=all` and `matrix_shards=true`. `OpenClaw Release Checks` +uses explicit fast Matrix; run the sharded all-lanes workflow when release +investigation needs full Matrix media/E2EE inventory. + ### Reusable Live/E2E Checks `OpenClaw Live And E2E Checks (Reusable)` diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml index 4839cbc497a..577986ea70f 100644 --- a/.github/workflows/openclaw-release-checks.yml +++ b/.github/workflows/openclaw-release-checks.yml @@ -355,6 +355,7 @@ jobs: env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1" + OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS: "3000" run: | set -euo pipefail @@ -367,7 +368,9 @@ jobs: --provider-mode live-frontier \ --model "${OPENCLAW_CI_OPENAI_MODEL}" \ --alt-model "${OPENCLAW_CI_OPENAI_MODEL}" \ - --fast + --profile fast \ + --fast \ + --fail-fast - name: Upload Matrix QA artifacts if: always() diff --git a/.github/workflows/qa-live-transports-convex.yml b/.github/workflows/qa-live-transports-convex.yml index f60032ae0e9..a1b8865db84 100644 --- a/.github/workflows/qa-live-transports-convex.yml +++ b/.github/workflows/qa-live-transports-convex.yml @@ -18,6 +18,27 @@ on: description: Optional comma-separated Discord scenario ids required: false type: string + matrix_profile: + description: Matrix QA profile for the live Matrix lane + required: false + default: all + type: choice + options: + - fast + - all + - transport + - media + - e2ee-smoke + - e2ee-deep + - e2ee-cli + matrix_shards: + description: Shard matrix_profile=all into parallel Matrix profile jobs + required: false + default: "false" + type: choice + options: + - "false" + - "true" permissions: contents: read @@ -199,6 +220,7 @@ jobs: run_live_matrix: name: Run Matrix live QA lane needs: [authorize_actor, validate_selected_ref] + if: ${{ !(github.event_name == 'workflow_dispatch' && inputs.matrix_profile == 'all' && inputs.matrix_shards == 'true') }} runs-on: blacksmith-32vcpu-ubuntu-2404 timeout-minutes: 60 environment: qa-live-shared @@ -236,7 +258,9 @@ jobs: shell: bash env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + INPUT_MATRIX_PROFILE: ${{ github.event_name == 'workflow_dispatch' && inputs.matrix_profile || 'fast' }} OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1" + OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS: "3000" run: | set -euo pipefail @@ -249,7 +273,9 @@ jobs: --provider-mode live-frontier \ --model "${OPENCLAW_CI_OPENAI_MODEL}" \ --alt-model "${OPENCLAW_CI_OPENAI_MODEL}" \ - --fast + --profile "${INPUT_MATRIX_PROFILE}" \ + --fast \ + --fail-fast - name: Upload Matrix QA artifacts if: always() @@ -260,6 +286,83 @@ jobs: retention-days: 14 if-no-files-found: warn + run_live_matrix_sharded: + name: Run Matrix live QA lane (${{ matrix.profile }}) + needs: [authorize_actor, validate_selected_ref] + if: ${{ github.event_name == 'workflow_dispatch' && inputs.matrix_profile == 'all' && inputs.matrix_shards == 'true' }} + runs-on: blacksmith-32vcpu-ubuntu-2404 + timeout-minutes: 60 + environment: qa-live-shared + strategy: + fail-fast: false + matrix: + profile: + - transport + - media + - e2ee-smoke + - e2ee-deep + - e2ee-cli + steps: + - name: Checkout selected ref + uses: actions/checkout@v6 + with: + ref: ${{ needs.validate_selected_ref.outputs.selected_sha }} + fetch-depth: 1 + + - name: Setup Node environment + uses: ./.github/actions/setup-node-env + with: + node-version: ${{ env.NODE_VERSION }} + pnpm-version: ${{ env.PNPM_VERSION }} + install-bun: "true" + + - name: Validate required QA credential env + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + shell: bash + run: | + set -euo pipefail + + if [[ -z "${OPENAI_API_KEY:-}" ]]; then + echo "Missing required OPENAI_API_KEY." >&2 + exit 1 + fi + + - name: Build private QA runtime + run: pnpm build + + - name: Run Matrix live lane shard + id: run_lane + shell: bash + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1" + OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS: "3000" + run: | + set -euo pipefail + + output_dir=".artifacts/qa-e2e/matrix-live-${{ matrix.profile }}-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" + echo "output_dir=${output_dir}" >> "$GITHUB_OUTPUT" + + pnpm openclaw qa matrix \ + --repo-root . \ + --output-dir "${output_dir}" \ + --provider-mode live-frontier \ + --model "${OPENCLAW_CI_OPENAI_MODEL}" \ + --alt-model "${OPENCLAW_CI_OPENAI_MODEL}" \ + --profile "${{ matrix.profile }}" \ + --fast \ + --fail-fast + + - name: Upload Matrix QA shard artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: qa-live-matrix-${{ matrix.profile }}-${{ github.run_id }}-${{ github.run_attempt }} + path: ${{ steps.run_lane.outputs.output_dir }} + retention-days: 14 + if-no-files-found: warn + run_live_telegram: name: Run Telegram live QA lane with Convex leases needs: [authorize_actor, validate_selected_ref] diff --git a/docs/ci.md b/docs/ci.md index c3df14b047e..bdd4afbb2c5 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -145,9 +145,13 @@ QA Lab has dedicated CI lanes outside the main smart-scoped workflow. The builds the private QA runtime and compares the mock GPT-5.5 and Opus 4.6 agentic packs. The `QA-Lab - All Lanes` workflow runs nightly on `main` and on manual dispatch; it fans out the mock parity gate, live Matrix lane, and live -Telegram lane as parallel jobs. The live jobs use the `qa-live-shared` -environment, and the Telegram lane uses Convex leases. `OpenClaw Release -Checks` also runs the same QA Lab lanes before release approval. +Telegram and Discord lanes as parallel jobs. The live jobs use the +`qa-live-shared` environment, and Telegram/Discord use Convex leases. Matrix +uses `--profile fast --fail-fast` for scheduled and release gates while the CLI +default and manual workflow input remain `all`; manual all-lanes dispatch can +shard full Matrix coverage into `transport`, `media`, `e2ee-smoke`, +`e2ee-deep`, and `e2ee-cli` jobs. `OpenClaw Release Checks` also runs the +release-critical QA Lab lanes before release approval. The `Duplicate PRs After Merge` workflow is a manual maintainer workflow for post-land duplicate cleanup. It defaults to dry-run and only closes explicitly diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md index fb75fb73ed9..be8eabf3113 100644 --- a/docs/concepts/qa-e2e-automation.md +++ b/docs/concepts/qa-e2e-automation.md @@ -73,7 +73,7 @@ instrumentation. For a transport-real Matrix smoke lane, run: ```bash -pnpm openclaw qa matrix +pnpm openclaw qa matrix --profile fast --fail-fast ``` That lane provisions a disposable Tuwunel homeserver in Docker, registers @@ -84,9 +84,15 @@ the child config scoped to the transport under test, so Matrix runs without a combined stdout/stderr log into the selected Matrix QA output directory. To capture the outer `scripts/run-node.mjs` build/launcher output too, set `OPENCLAW_RUN_NODE_OUTPUT_LOG=` to a repo-local log file. -Matrix progress is printed by default. `OPENCLAW_QA_MATRIX_TIMEOUT_MS` bounds -the full run, and `OPENCLAW_QA_MATRIX_CLEANUP_TIMEOUT_MS` bounds cleanup so a -stuck Docker teardown reports the exact recovery command instead of hanging. +Matrix progress is printed by default. The CLI default profile is `all`, so +plain `pnpm openclaw qa matrix` still runs the full catalog. Use `--profile +fast` for the release-critical transport contract, or shard full coverage with +`transport`, `media`, `e2ee-smoke`, `e2ee-deep`, and `e2ee-cli`. `--fail-fast` +stops after the first failed scenario when you want a release gate instead of a +full inventory. `OPENCLAW_QA_MATRIX_TIMEOUT_MS` bounds the full run, +`OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS` can shorten no-reply quiet windows for +CI, and `OPENCLAW_QA_MATRIX_CLEANUP_TIMEOUT_MS` bounds cleanup so a stuck +Docker teardown reports the exact recovery command instead of hanging. For a transport-real Telegram smoke lane, run: diff --git a/docs/help/testing.md b/docs/help/testing.md index 4284c7938af..52bd52c639b 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -92,9 +92,13 @@ These commands sit beside the main test suites when you need QA-lab realism: CI runs QA Lab in dedicated workflows. `Parity gate` runs on matching PRs and from manual dispatch with mock providers. `QA-Lab - All Lanes` runs nightly on -`main` and from manual dispatch with the mock parity gate, live Matrix lane, and -Convex-managed live Telegram lane as parallel jobs. `OpenClaw Release Checks` -runs the same lanes before release approval. +`main` and from manual dispatch with the mock parity gate, live Matrix lane, +Convex-managed live Telegram lane, and Convex-managed live Discord lane as +parallel jobs. Scheduled QA and release checks pass Matrix `--profile fast` +explicitly, while the Matrix CLI and manual workflow input default remain +`all`; manual dispatch can shard `all` into `transport`, `media`, `e2ee-smoke`, +`e2ee-deep`, and `e2ee-cli` jobs. `OpenClaw Release Checks` runs parity plus +the fast Matrix and Telegram lanes before release approval. - `pnpm openclaw qa suite` - Runs repo-backed QA scenarios directly on the host. @@ -248,10 +252,11 @@ gh workflow run package-acceptance.yml --ref main \ - Repo checkouts load the bundled runner directly; no separate plugin install step is needed. - Provisions three temporary Matrix users (`driver`, `sut`, `observer`) plus one private room, then starts a QA gateway child with the real Matrix plugin as the SUT transport. + - Defaults to `--profile all`. Use `--profile fast --fail-fast` for release-critical transport proof, or `--profile transport|media|e2ee-smoke|e2ee-deep|e2ee-cli` when sharding the full catalog. - Uses the pinned stable Tuwunel image `ghcr.io/matrix-construct/tuwunel:v1.5.1` by default. Override with `OPENCLAW_QA_MATRIX_TUWUNEL_IMAGE` when you need to test a different image. - Matrix does not expose shared credential-source flags because the lane provisions disposable users locally. - Writes a Matrix QA report, summary, observed-events artifact, and combined stdout/stderr output log under `.artifacts/qa-e2e/...`. - - Emits progress by default and enforces a hard run timeout with `OPENCLAW_QA_MATRIX_TIMEOUT_MS` (default 30 minutes). Cleanup is bounded by `OPENCLAW_QA_MATRIX_CLEANUP_TIMEOUT_MS` and failures include the recovery `docker compose ... down --remove-orphans` command. + - Emits progress by default and enforces a hard run timeout with `OPENCLAW_QA_MATRIX_TIMEOUT_MS` (default 30 minutes). `OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS` tunes negative no-reply quiet windows, and cleanup is bounded by `OPENCLAW_QA_MATRIX_CLEANUP_TIMEOUT_MS` with failures including the recovery `docker compose ... down --remove-orphans` command. - `pnpm openclaw qa telegram` - Runs the Telegram live QA lane against a real private group using the driver and SUT bot tokens from env. - Requires `OPENCLAW_QA_TELEGRAM_GROUP_ID`, `OPENCLAW_QA_TELEGRAM_DRIVER_BOT_TOKEN`, and `OPENCLAW_QA_TELEGRAM_SUT_BOT_TOKEN`. The group id must be the numeric Telegram chat id. @@ -267,10 +272,11 @@ Live transport lanes share one standard contract so new transports do not drift: `qa-channel` remains the broad synthetic QA suite and is not part of the live transport coverage matrix. -| Lane | Canary | Mention gating | Allowlist block | Top-level reply | Restart resume | Thread follow-up | Thread isolation | Reaction observation | Help command | -| -------- | ------ | -------------- | --------------- | --------------- | -------------- | ---------------- | ---------------- | -------------------- | ------------ | -| Matrix | x | x | x | x | x | x | x | x | | -| Telegram | x | | | | | | | | x | +| Lane | Canary | Mention gating | Allowlist block | Top-level reply | Restart resume | Thread follow-up | Thread isolation | Reaction observation | Help command | Native command registration | +| -------- | ------ | -------------- | --------------- | --------------- | -------------- | ---------------- | ---------------- | -------------------- | ------------ | --------------------------- | +| Matrix | x | x | x | x | x | x | x | x | | | +| Telegram | x | x | | | | | | | x | | +| Discord | x | x | | | | | | | | x | ### Shared Telegram credentials via Convex (v1) diff --git a/docs/reference/RELEASING.md b/docs/reference/RELEASING.md index 390fc59f5a7..9d4354bfc16 100644 --- a/docs/reference/RELEASING.md +++ b/docs/reference/RELEASING.md @@ -137,9 +137,12 @@ the maintainer-only release runbook. - Run `pnpm release:check` before every tagged release - Release checks now run in a separate manual workflow: `OpenClaw Release Checks` -- `OpenClaw Release Checks` also runs the QA Lab mock parity gate plus the live - Matrix and Telegram QA lanes before release approval. The live lanes use the - `qa-live-shared` environment; Telegram also uses Convex CI credential leases. +- `OpenClaw Release Checks` also runs the QA Lab mock parity gate plus the fast + live Matrix profile and Telegram QA lane before release approval. The live + lanes use the `qa-live-shared` environment; Telegram also uses Convex CI + credential leases. Run the manual `QA-Lab - All Lanes` workflow with + `matrix_profile=all` and `matrix_shards=true` when you want full Matrix + transport, media, and E2EE inventory in parallel. - Cross-OS install and upgrade runtime validation is dispatched from the private caller workflow `openclaw/releases-private/.github/workflows/openclaw-cross-os-release-checks.yml`, @@ -338,13 +341,14 @@ Release QA Lab coverage includes: - mock parity gate comparing the OpenAI candidate lane against the Opus 4.6 baseline using the agentic parity pack -- live Matrix QA lane using the `qa-live-shared` environment +- fast live Matrix QA profile using the `qa-live-shared` environment - live Telegram QA lane using Convex CI credential leases - `pnpm qa:otel:smoke` when release telemetry needs explicit local proof Use this box to answer "does the release behave correctly in QA scenarios and live channel flows?" Keep the artifact URLs for parity, Matrix, and Telegram -lanes when approving the release. +lanes when approving the release. Full Matrix coverage remains available as a +manual sharded QA-Lab run rather than the default release-critical lane. ### Package diff --git a/extensions/qa-matrix/src/cli.test.ts b/extensions/qa-matrix/src/cli.test.ts index 31668e22091..cb939ba3cef 100644 --- a/extensions/qa-matrix/src/cli.test.ts +++ b/extensions/qa-matrix/src/cli.test.ts @@ -67,6 +67,8 @@ describe("matrix qa cli registration", () => { "--alt-model", "--scenario", "--fast", + "--profile", + "--fail-fast", "--sut-account", ]), ); diff --git a/extensions/qa-matrix/src/cli.ts b/extensions/qa-matrix/src/cli.ts index 8e5018c2e02..b81b83a06c7 100644 --- a/extensions/qa-matrix/src/cli.ts +++ b/extensions/qa-matrix/src/cli.ts @@ -56,6 +56,9 @@ export const matrixQaCliRegistration: LiveTransportQaCliRegistration = commandName: "matrix", description: "Run the Docker-backed Matrix live QA lane against a disposable homeserver", outputDirHelp: "Matrix QA artifact directory", + profileHelp: + "Matrix QA profile: all, fast, transport, media, e2ee-smoke, e2ee-deep, or e2ee-cli (default: all)", + failFastHelp: "Stop after the first failed Matrix check or scenario", scenarioHelp: "Run only the named Matrix QA scenario (repeatable)", sutAccountHelp: "Temporary Matrix account id inside the QA gateway config", run: runQaMatrix, diff --git a/extensions/qa-matrix/src/runners/contract/runtime.test.ts b/extensions/qa-matrix/src/runners/contract/runtime.test.ts index 3b7c781d713..ce1d9cdc794 100644 --- a/extensions/qa-matrix/src/runners/contract/runtime.test.ts +++ b/extensions/qa-matrix/src/runners/contract/runtime.test.ts @@ -491,11 +491,14 @@ describe("matrix live qa runtime", () => { expect(report).toContain("observed events: /tmp/observed.json"); }); - it("keeps Matrix scenario execution in catalog order across config changes", () => { + it("groups Matrix scenario execution by gateway config while preserving tail scenarios", () => { const scenarios = liveTesting.findMatrixQaScenarios([ + "matrix-thread-follow-up", "matrix-e2ee-cli-encryption-setup-multi-account", + "matrix-thread-isolation", "matrix-e2ee-cli-setup-then-gateway-reply", "matrix-e2ee-cli-self-verification", + "matrix-e2ee-wrong-account-recovery-key", ]); expect( @@ -503,9 +506,12 @@ describe("matrix live qa runtime", () => { .scheduleMatrixQaScenariosInCatalogOrder(scenarios) .map(({ scenario }) => scenario.id), ).toEqual([ + "matrix-thread-follow-up", + "matrix-thread-isolation", + "matrix-e2ee-cli-self-verification", "matrix-e2ee-cli-encryption-setup-multi-account", "matrix-e2ee-cli-setup-then-gateway-reply", - "matrix-e2ee-cli-self-verification", + "matrix-e2ee-wrong-account-recovery-key", ]); }); diff --git a/extensions/qa-matrix/src/runners/contract/runtime.ts b/extensions/qa-matrix/src/runners/contract/runtime.ts index 216a9edd9bf..6921d35ad58 100644 --- a/extensions/qa-matrix/src/runners/contract/runtime.ts +++ b/extensions/qa-matrix/src/runners/contract/runtime.ts @@ -61,6 +61,8 @@ function buildMatrixQaGatewayConfigKey(overrides?: MatrixQaConfigOverrides) { return JSON.stringify(overrides ?? null); } +const MATRIX_QA_EXECUTION_TAIL_SCENARIO_IDS = new Set(["matrix-e2ee-wrong-account-recovery-key"]); + type MatrixQaScenarioResult = { artifacts?: MatrixQaScenarioArtifacts; details: string; @@ -313,7 +315,27 @@ function buildMatrixQaScenarioResult(params: { function scheduleMatrixQaScenariosInCatalogOrder( scenarios: readonly (typeof MATRIX_QA_SCENARIOS)[number][], ): MatrixQaScheduledScenario[] { - return scenarios.map((scenario, originalIndex) => ({ originalIndex, scenario })); + const entries = scenarios.map((scenario, originalIndex) => ({ originalIndex, scenario })); + const groupedEntries: MatrixQaScheduledScenario[][] = []; + const groupIndexes = new Map(); + const tailEntries: MatrixQaScheduledScenario[] = []; + + for (const entry of entries) { + if (MATRIX_QA_EXECUTION_TAIL_SCENARIO_IDS.has(entry.scenario.id)) { + tailEntries.push(entry); + continue; + } + const key = buildMatrixQaGatewayConfigKey(entry.scenario.configOverrides); + const existingIndex = groupIndexes.get(key); + if (existingIndex !== undefined) { + groupedEntries[existingIndex]?.push(entry); + continue; + } + groupIndexes.set(key, groupedEntries.length); + groupedEntries.push([entry]); + } + + return [...groupedEntries.flat(), ...tailEntries]; } function getMatrixQaScenarioRestartReadyTimeoutMs(scenario: { timeoutMs: number }): number { @@ -498,8 +520,10 @@ async function startMatrixQaLiveLaneGateway(params: { export async function runMatrixQaLive(params: { fastMode?: boolean; + failFast?: boolean; outputDir?: string; primaryModel?: string; + profile?: string; providerMode?: QaProviderModeInput; repoRoot?: string; scenarioIds?: string[]; @@ -518,7 +542,7 @@ export async function runMatrixQaLive(params: { alternateModel: params.alternateModel, }); const sutAccountId = params.sutAccountId?.trim() || "sut"; - const scenarios = findMatrixQaScenarios(params.scenarioIds); + const scenarios = findMatrixQaScenarios(params.scenarioIds, params.profile); const runSuffix = randomUUID().slice(0, 8); const topology = buildMatrixQaTopologyForScenarios({ defaultRoomName: `OpenClaw Matrix QA ${runSuffix}`, @@ -531,7 +555,7 @@ export async function runMatrixQaLive(params: { const runStartedAtMs = Date.now(); const runDeadline = createMatrixQaRunDeadline(); writeMatrixQaProgress( - `suite start scenarios=${scenarios.length} provider=${providerMode} output=${outputDir} timeout=${formatMatrixQaDurationMs(runDeadline.timeoutMs)}`, + `suite start scenarios=${scenarios.length} profile=${params.profile?.trim() || "all"} provider=${providerMode} output=${outputDir} timeout=${formatMatrixQaDurationMs(runDeadline.timeoutMs)}`, ); const { durationMs: harnessBootMs, result: harness } = await measureMatrixQaStep(() => @@ -895,6 +919,10 @@ export async function runMatrixQaLive(params: { status: "fail", }); writeMatrixQaProgress(`scenario fail ${scenario.id} ${formatErrorMessage(error)}`); + if (params.failFast) { + writeMatrixQaProgress("fail-fast stop"); + break; + } } } } diff --git a/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts b/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts index 0c5c468a611..a66d40af598 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-catalog.ts @@ -96,6 +96,15 @@ export type MatrixQaScenarioDefinition = LiveTransportScenarioDefinition(ids); +} + +function normalizeMatrixQaProfile(profile?: string): MatrixQaProfile { + const normalized = profile?.trim().toLowerCase() || "all"; + if (MATRIX_QA_PROFILE_NAMES.includes(normalized as MatrixQaProfile)) { + return normalized as MatrixQaProfile; + } + throw new Error( + `unknown Matrix QA profile "${profile}"; expected one of: ${MATRIX_QA_PROFILE_NAMES.join(", ")}`, + ); +} + +function getMatrixQaProfileScenarioIds(profile: MatrixQaProfile): MatrixQaScenarioId[] { + const allIds = MATRIX_QA_SCENARIOS.map((scenario) => scenario.id); + const mediaIds = buildMatrixQaScenarioIdSet(MATRIX_QA_MEDIA_PROFILE_SCENARIO_IDS); + const smokeIds = buildMatrixQaScenarioIdSet(MATRIX_QA_E2EE_SMOKE_PROFILE_SCENARIO_IDS); + switch (profile) { + case "all": + return allIds; + case "fast": + return [...MATRIX_QA_FAST_PROFILE_SCENARIO_IDS]; + case "transport": + return allIds.filter((id) => !isMatrixQaE2eeScenarioId(id) && !mediaIds.has(id)); + case "media": + return [...MATRIX_QA_MEDIA_PROFILE_SCENARIO_IDS]; + case "e2ee-smoke": + return [...MATRIX_QA_E2EE_SMOKE_PROFILE_SCENARIO_IDS]; + case "e2ee-cli": + return allIds.filter(isMatrixQaCliE2eeScenarioId); + case "e2ee-deep": + return allIds.filter( + (id) => + isMatrixQaE2eeScenarioId(id) && + !isMatrixQaCliE2eeScenarioId(id) && + !mediaIds.has(id) && + !smokeIds.has(id), + ); + default: { + const exhaustiveProfile: never = profile; + return exhaustiveProfile; + } + } +} + +export function findMatrixQaScenarios(ids?: string[], profile?: string) { + const normalizedProfile = normalizeMatrixQaProfile(profile); + const selectedIds = + ids && ids.length > 0 ? ids : getMatrixQaProfileScenarioIds(normalizedProfile); return selectLiveTransportScenarios({ - ids, + ids: selectedIds, laneLabel: "Matrix", scenarios: MATRIX_QA_SCENARIOS, }); } +export const __matrixQaProfileTesting = { + getMatrixQaProfileScenarioIds, + normalizeMatrixQaProfile, +}; + export function buildMatrixQaTopologyForScenarios(params: { defaultRoomName: string; scenarios: MatrixQaScenarioDefinition[]; diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-dm.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-dm.ts index 16bfbb9c07e..92e40964f41 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-dm.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-dm.ts @@ -12,8 +12,8 @@ import { buildMatrixReplyArtifact, buildMatrixReplyDetails, createMatrixQaScenarioClient, - NO_REPLY_WINDOW_MS, advanceMatrixQaActorCursor, + resolveMatrixQaNoReplyWindowMs, runConfigurableTopLevelScenario, type MatrixQaScenarioContext, } from "./scenario-runtime-shared.js"; @@ -98,7 +98,7 @@ async function runDmSharedSessionFlow(params: { event.body.includes("channels.matrix.dm.sessionScope"), roomId: secondRoomId, since: noticeSince, - timeoutMs: Math.min(NO_REPLY_WINDOW_MS, params.context.timeoutMs), + timeoutMs: resolveMatrixQaNoReplyWindowMs(params.context.timeoutMs), }), ]); diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee.ts index 105e0cd98f3..830774876d3 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-e2ee.ts @@ -50,7 +50,7 @@ import { buildMentionPrompt, doesMatrixQaReplyBodyMatchToken, isMatrixQaExactMarkerReply, - NO_REPLY_WINDOW_MS, + resolveMatrixQaNoReplyWindowMs, type MatrixQaScenarioContext, } from "./scenario-runtime-shared.js"; import type { MatrixQaReplyArtifact, MatrixQaScenarioExecution } from "./scenario-types.js"; @@ -3310,14 +3310,14 @@ export async function runMatrixQaE2eeVerificationNoticeNoTriggerScenario( token, }), roomId, - timeoutMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs), + timeoutMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs), }); if (result.matched) { throw new Error(`unexpected E2EE verification-notice reply: ${result.event.eventId}`); } return { artifacts: { - expectedNoReplyWindowMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs), + expectedNoReplyWindowMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs), noticeEventId, roomKey, roomId, @@ -3326,7 +3326,7 @@ export async function runMatrixQaE2eeVerificationNoticeNoTriggerScenario( `encrypted room key: ${roomKey}`, `encrypted room id: ${roomId}`, `verification notice event: ${noticeEventId}`, - `waited ${Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs)}ms with no SUT reply`, + `waited ${resolveMatrixQaNoReplyWindowMs(context.timeoutMs)}ms with no SUT reply`, ].join("\n"), }; }, diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-restart.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-restart.ts index 076493b452e..78ea0e8253e 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-restart.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-restart.ts @@ -12,8 +12,8 @@ import { isMatrixQaExactMarkerReply, assertTopLevelReplyArtifact, advanceMatrixQaActorCursor, - NO_REPLY_WINDOW_MS, primeMatrixQaDriverScenarioClient, + resolveMatrixQaNoReplyWindowMs, runAssertedDriverTopLevelScenario, type MatrixQaScenarioContext, } from "./scenario-runtime-shared.js"; @@ -254,7 +254,7 @@ async function assertNoRestartReplayDuplicate(params: { token: params.replayToken, }), roomId: params.roomId, - timeoutMs: Math.min(NO_REPLY_WINDOW_MS, params.context.timeoutMs), + timeoutMs: resolveMatrixQaNoReplyWindowMs(params.context.timeoutMs), }); if (duplicate.matched) { throw new Error( @@ -313,7 +313,7 @@ export async function runRestartReplayDedupeScenario(context: MatrixQaScenarioCo return { artifacts: { - duplicateWindowMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs), + duplicateWindowMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs), firstDriverEventId: replayDriverEventId, firstReply, firstToken: replayToken, @@ -328,7 +328,7 @@ export async function runRestartReplayDedupeScenario(context: MatrixQaScenarioCo "restart signal: SIGUSR1", `first driver event: ${replayDriverEventId}`, ...buildMatrixReplyDetails("first reply", firstReply), - `duplicate replay window: ${Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs)}ms`, + `duplicate replay window: ${resolveMatrixQaNoReplyWindowMs(context.timeoutMs)}ms`, `fresh post-restart driver event: ${postRestart.driverEventId}`, ...buildMatrixReplyDetails("fresh reply", postRestart.reply), ].join("\n"), @@ -401,7 +401,7 @@ export async function runStaleSyncReplayDedupeScenario(context: MatrixQaScenario return { artifacts: { dedupeCommitObserved: true, - duplicateWindowMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs), + duplicateWindowMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs), firstDriverEventId: replayDriverEventId, firstReply, firstToken: replayToken, @@ -418,7 +418,7 @@ export async function runStaleSyncReplayDedupeScenario(context: MatrixQaScenario `stale sync cursor: ${staleCursor}`, `first driver event: ${replayDriverEventId}`, ...buildMatrixReplyDetails("first reply", firstReply), - `duplicate replay window: ${Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs)}ms`, + `duplicate replay window: ${resolveMatrixQaNoReplyWindowMs(context.timeoutMs)}ms`, `fresh post-restart driver event: ${postRestart.driverEventId}`, ...buildMatrixReplyDetails("fresh reply", postRestart.reply), ].join("\n"), diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts index ea59f03d923..57102a28596 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-room.ts @@ -24,9 +24,9 @@ import { createMatrixQaScenarioClient, isMatrixQaExactMarkerReply, isMatrixQaMessageLikeKind, - NO_REPLY_WINDOW_MS, primeMatrixQaActorCursor, primeMatrixQaDriverScenarioClient, + resolveMatrixQaNoReplyWindowMs, runAssertedDriverTopLevelScenario, runConfigurableTopLevelScenario, runDriverTopLevelMentionScenario, @@ -530,7 +530,7 @@ export async function runAllowlistHotReloadScenario(context: MatrixQaScenarioCon sutUserId: context.sutUserId, token: blockedToken, }), - timeoutMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs), + timeoutMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs), token: blockedToken, }); @@ -767,7 +767,7 @@ export async function runMembershipLossScenario(context: MatrixQaScenarioContext syncState: context.syncState, syncStreams: context.syncStreams, sutUserId: context.sutUserId, - timeoutMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs), + timeoutMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs), token: noReplyToken, }); diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime-shared.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime-shared.ts index 926c9fa7833..d27b2e7b664 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime-shared.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime-shared.ts @@ -56,6 +56,14 @@ export type MatrixQaScenarioContext = { }; export const NO_REPLY_WINDOW_MS = 8_000; +const NO_REPLY_WINDOW_ENV = "OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS"; + +export function resolveMatrixQaNoReplyWindowMs(timeoutMs: number) { + const raw = process.env[NO_REPLY_WINDOW_ENV]; + const parsed = raw === undefined ? NO_REPLY_WINDOW_MS : Number(raw); + const windowMs = Number.isFinite(parsed) && parsed >= 1 ? Math.floor(parsed) : NO_REPLY_WINDOW_MS; + return Math.min(windowMs, timeoutMs); +} export function buildMentionPrompt(sutUserId: string, token: string) { return `${sutUserId} reply with only this exact marker: ${token}`; @@ -316,7 +324,7 @@ export async function assertNoSutReplyWindow(params: { unexpectedLines?: string[]; unexpectedMessage: string; }) { - const noReplyWindowMs = Math.min(NO_REPLY_WINDOW_MS, params.context.timeoutMs); + const noReplyWindowMs = resolveMatrixQaNoReplyWindowMs(params.context.timeoutMs); const result = await params.client.waitForOptionalRoomEvent({ observedEvents: params.context.observedEvents, predicate: (event) => diff --git a/extensions/qa-matrix/src/runners/contract/scenario-runtime.ts b/extensions/qa-matrix/src/runners/contract/scenario-runtime.ts index 2e7012c9cf3..ca41bca453a 100644 --- a/extensions/qa-matrix/src/runners/contract/scenario-runtime.ts +++ b/extensions/qa-matrix/src/runners/contract/scenario-runtime.ts @@ -94,8 +94,8 @@ import { buildMatrixReplyArtifact, buildMatrixReplyDetails, buildMentionPrompt, - NO_REPLY_WINDOW_MS, readMatrixQaSyncCursor, + resolveMatrixQaNoReplyWindowMs, runNoReplyExpectedScenario, runTopologyScopedTopLevelScenario, writeMatrixQaSyncCursor, @@ -167,7 +167,7 @@ async function runMultiActorOrderingScenario(context: MatrixQaScenarioContext) { body: buildMentionPrompt(context.sutUserId, blockedToken), mentionUserIds: [context.sutUserId], context, - timeoutMs: Math.min(NO_REPLY_WINDOW_MS, context.timeoutMs), + timeoutMs: resolveMatrixQaNoReplyWindowMs(context.timeoutMs), token: blockedToken, }); const accepted = await runDriverTopologyScopedScenario({ diff --git a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts index 8a8ed497fa3..47579f21045 100644 --- a/extensions/qa-matrix/src/runners/contract/scenarios.test.ts +++ b/extensions/qa-matrix/src/runners/contract/scenarios.test.ts @@ -256,6 +256,54 @@ describe("matrix live qa scenarios", () => { } }); + it("keeps the Matrix CLI default profile on the full catalog", () => { + const allIds = scenarioTesting.findMatrixQaScenarios().map((scenario) => scenario.id); + + expect( + scenarioTesting.findMatrixQaScenarios(undefined, "all").map((scenario) => scenario.id), + ).toEqual(allIds); + }); + + it("selects the fast release-critical Matrix profile without media or deep E2EE inventory", () => { + expect( + scenarioTesting.findMatrixQaScenarios(undefined, "fast").map((scenario) => scenario.id), + ).toEqual([ + "matrix-thread-follow-up", + "matrix-thread-isolation", + "matrix-top-level-reply-shape", + "matrix-reaction-notification", + "matrix-restart-resume", + "matrix-mention-gating", + "matrix-allowlist-block", + "matrix-e2ee-basic-reply", + ]); + }); + + it("keeps the full Matrix shard profiles exhaustive and disjoint", () => { + const allIds = scenarioTesting.findMatrixQaScenarios().map((scenario) => scenario.id); + const shardIds = ["transport", "media", "e2ee-smoke", "e2ee-deep", "e2ee-cli"].flatMap( + (profile) => + scenarioTesting.findMatrixQaScenarios(undefined, profile).map((scenario) => scenario.id), + ); + + expect(new Set(shardIds).size).toBe(shardIds.length); + expect(shardIds.toSorted()).toEqual(allIds.toSorted()); + }); + + it("lets explicit Matrix scenario ids override the selected profile", () => { + expect( + scenarioTesting + .findMatrixQaScenarios(["matrix-room-generated-image-delivery"], "fast") + .map((scenario) => scenario.id), + ).toEqual(["matrix-room-generated-image-delivery"]); + }); + + it("fails when the Matrix profile is unknown", () => { + expect(() => scenarioTesting.findMatrixQaScenarios(undefined, "speedy")).toThrow( + 'unknown Matrix QA profile "speedy"', + ); + }); + it("uses the repo-wide exact marker prompt shape for Matrix mentions", () => { expect( scenarioTesting.buildMentionPrompt("@sut:matrix-qa.test", "MATRIX_QA_CANARY_TOKEN"), diff --git a/extensions/qa-matrix/src/runners/contract/scenarios.ts b/extensions/qa-matrix/src/runners/contract/scenarios.ts index 1bd2c12f71c..304a14cb45f 100644 --- a/extensions/qa-matrix/src/runners/contract/scenarios.ts +++ b/extensions/qa-matrix/src/runners/contract/scenarios.ts @@ -3,6 +3,7 @@ import { MATRIX_QA_DRIVER_DM_SHARED_ROOM_KEY, MATRIX_QA_E2EE_ROOM_KEY, MATRIX_QA_MEDIA_ROOM_KEY, + MATRIX_QA_PROFILE_NAMES, MATRIX_QA_MEMBERSHIP_ROOM_KEY, MATRIX_QA_SCENARIOS, MATRIX_QA_SECONDARY_ROOM_KEY, @@ -13,6 +14,8 @@ import { resolveMatrixQaScenarioRoomId, type MatrixQaScenarioDefinition, type MatrixQaScenarioId, + type MatrixQaProfile, + __matrixQaProfileTesting, } from "./scenario-catalog.js"; import { buildMatrixReplyArtifact, @@ -34,6 +37,7 @@ import type { export type { MatrixQaScenarioDefinition, MatrixQaScenarioId }; export { + MATRIX_QA_PROFILE_NAMES, MATRIX_QA_SCENARIOS, MATRIX_QA_STANDARD_SCENARIO_IDS, buildMatrixReplyArtifact, @@ -46,6 +50,7 @@ export { runMatrixQaCanary, runMatrixQaScenario, }; +export type { MatrixQaProfile }; export type { MatrixQaCanaryArtifact, MatrixQaReplyArtifact, @@ -61,6 +66,7 @@ export const __testing = { MATRIX_QA_E2EE_ROOM_KEY, MATRIX_QA_MEDIA_ROOM_KEY, MATRIX_QA_MEMBERSHIP_ROOM_KEY, + MATRIX_QA_PROFILE_NAMES, MATRIX_QA_SECONDARY_ROOM_KEY, MATRIX_QA_STANDARD_SCENARIO_IDS, buildMatrixQaE2eeScenarioRoomKey, @@ -69,6 +75,8 @@ export const __testing = { buildMatrixReplyArtifact, buildMentionPrompt, findMatrixQaScenarios, + getMatrixQaProfileScenarioIds: __matrixQaProfileTesting.getMatrixQaProfileScenarioIds, + normalizeMatrixQaProfile: __matrixQaProfileTesting.normalizeMatrixQaProfile, readMatrixQaSyncCursor, resolveMatrixQaScenarioRoomId, writeMatrixQaSyncCursor, diff --git a/extensions/qa-matrix/src/shared/live-transport-cli.runtime.ts b/extensions/qa-matrix/src/shared/live-transport-cli.runtime.ts index ffea62128d6..d71c92ec576 100644 --- a/extensions/qa-matrix/src/shared/live-transport-cli.runtime.ts +++ b/extensions/qa-matrix/src/shared/live-transport-cli.runtime.ts @@ -27,6 +27,8 @@ export function resolveLiveTransportQaRunOptions( primaryModel: opts.primaryModel, alternateModel: opts.alternateModel, fastMode: opts.fastMode, + failFast: opts.failFast, + profile: opts.profile?.trim(), scenarioIds: opts.scenarioIds, sutAccountId: opts.sutAccountId, credentialSource: opts.credentialSource?.trim(), diff --git a/extensions/qa-matrix/src/shared/live-transport-cli.ts b/extensions/qa-matrix/src/shared/live-transport-cli.ts index 92f320b4eb4..1e6fa97e78a 100644 --- a/extensions/qa-matrix/src/shared/live-transport-cli.ts +++ b/extensions/qa-matrix/src/shared/live-transport-cli.ts @@ -9,6 +9,8 @@ export type LiveTransportQaCommandOptions = { primaryModel?: string; alternateModel?: string; fastMode?: boolean; + failFast?: boolean; + profile?: string; scenarioIds?: string[]; sutAccountId?: string; credentialSource?: string; @@ -23,6 +25,8 @@ type LiveTransportQaCommanderOptions = { altModel?: string; scenario?: string[]; fast?: boolean; + failFast?: boolean; + profile?: string; sutAccount?: string; credentialSource?: string; credentialRole?: string; @@ -56,6 +60,8 @@ export function mapLiveTransportQaCommanderOptions( primaryModel: opts.model, alternateModel: opts.altModel, fastMode: opts.fast, + failFast: opts.failFast, + profile: opts.profile, scenarioIds: opts.scenario, sutAccountId: opts.sutAccount, credentialSource: opts.credentialSource, @@ -69,6 +75,8 @@ export function registerLiveTransportQaCli(params: { credentialOptions?: LiveTransportQaCredentialCliOptions; description: string; outputDirHelp: string; + profileHelp?: string; + failFastHelp?: string; scenarioHelp: string; sutAccountHelp: string; run: (opts: LiveTransportQaCommandOptions) => Promise; @@ -89,6 +97,14 @@ export function registerLiveTransportQaCli(params: { .option("--fast", "Enable provider fast mode where supported", false) .option("--sut-account ", params.sutAccountHelp, "sut"); + if (params.profileHelp) { + command.option("--profile ", params.profileHelp); + } + + if (params.failFastHelp) { + command.option("--fail-fast", params.failFastHelp, false); + } + if (params.credentialOptions) { command.option( "--credential-source ", @@ -110,6 +126,8 @@ export function createLiveTransportQaCliRegistration(params: { credentialOptions?: LiveTransportQaCredentialCliOptions; description: string; outputDirHelp: string; + profileHelp?: string; + failFastHelp?: string; scenarioHelp: string; sutAccountHelp: string; run: (opts: LiveTransportQaCommandOptions) => Promise; @@ -123,6 +141,8 @@ export function createLiveTransportQaCliRegistration(params: { credentialOptions: params.credentialOptions, description: params.description, outputDirHelp: params.outputDirHelp, + profileHelp: params.profileHelp, + failFastHelp: params.failFastHelp, scenarioHelp: params.scenarioHelp, sutAccountHelp: params.sutAccountHelp, run: params.run,