fix(ci): harden full release validation monitors

This commit is contained in:
Peter Steinberger
2026-04-28 22:36:14 +01:00
parent d9b46e0551
commit 69fb7455c6
4 changed files with 74 additions and 2 deletions

View File

@@ -227,6 +227,7 @@ jobs:
fi
sleep 30
done
trap - EXIT INT TERM
conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')"
url="$(gh run view "$run_id" --json url --jq '.url')"
@@ -328,6 +329,7 @@ jobs:
fi
sleep 30
done
trap - EXIT INT TERM
conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')"
url="$(gh run view "$run_id" --json url --jq '.url')"
@@ -435,6 +437,7 @@ jobs:
fi
sleep 30
done
trap - EXIT INT TERM
conclusion="$(gh run view "$run_id" --json conclusion --jq '.conclusion')"
url="$(gh run view "$run_id" --json url --jq '.url')"

View File

@@ -83,4 +83,53 @@ describe("live cache regression runner", () => {
),
).toBe(false);
});
it("accepts a warmup that already hits the provider cache", () => {
const findings = __testing.evaluateAgainstBaseline({
lane: "image",
provider: "anthropic",
result: {
best: {
hitRate: 0.999,
suffix: "image-hit",
text: "CACHE-OK image-hit",
usage: { cacheRead: 5_742, cacheWrite: 0, input: 3 },
},
warmup: {
hitRate: 0.999,
suffix: "image-warmup",
text: "CACHE-OK image-warmup",
usage: { cacheRead: 5_741, cacheWrite: 0, input: 3 },
},
},
});
expect(findings).toEqual({ regressions: [], warnings: [] });
});
it("still rejects warmups with no cache write or cache hit evidence", () => {
const findings = __testing.evaluateAgainstBaseline({
lane: "image",
provider: "anthropic",
result: {
best: {
hitRate: 0.999,
suffix: "image-hit",
text: "CACHE-OK image-hit",
usage: { cacheRead: 5_742, cacheWrite: 0, input: 3 },
},
warmup: {
hitRate: 0,
suffix: "image-warmup",
text: "CACHE-OK image-warmup",
usage: { cacheRead: 0, cacheWrite: 0, input: 5_741 },
},
},
});
expect(findings).toEqual({
regressions: ["anthropic:image warmup cacheWrite=0 < min=1"],
warnings: [],
});
});
});

View File

@@ -359,6 +359,21 @@ function formatUsage(usage: CacheUsage | undefined) {
return `cacheRead=${usage?.cacheRead ?? 0} cacheWrite=${usage?.cacheWrite ?? 0} input=${usage?.input ?? 0}`;
}
function warmupHasCacheEvidence(params: { floor: LiveCacheFloor; warmup: CacheRun }): boolean {
const cacheRead = params.warmup.usage.cacheRead ?? 0;
const cacheWrite = params.warmup.usage.cacheWrite ?? 0;
if (params.floor.minCacheReadOrWrite !== undefined) {
return Math.max(cacheRead, cacheWrite) >= params.floor.minCacheReadOrWrite;
}
if (params.floor.minCacheRead !== undefined && cacheRead < params.floor.minCacheRead) {
return false;
}
if (params.floor.minHitRate !== undefined && params.warmup.hitRate < params.floor.minHitRate) {
return false;
}
return params.floor.minCacheRead !== undefined || params.floor.minHitRate !== undefined;
}
function assertAgainstBaseline(params: {
lane: BaselineLane;
provider: ProviderKey;
@@ -401,8 +416,12 @@ function assertAgainstBaseline(params: {
}
if (params.result.warmup) {
const warmupUsage = params.result.warmup.usage;
if ((warmupUsage.cacheWrite ?? 0) < (floor.minCacheWrite ?? 0)) {
const warmup = params.result.warmup;
const warmupUsage = warmup.usage;
if (
(warmupUsage.cacheWrite ?? 0) < (floor.minCacheWrite ?? 0) &&
!warmupHasCacheEvidence({ floor, warmup })
) {
recordRegression(
`${params.provider}:${params.lane} warmup cacheWrite=${warmupUsage.cacheWrite ?? 0} < min=${floor.minCacheWrite}`,
);

View File

@@ -307,6 +307,7 @@ describe("package artifact reuse", () => {
expect(workflow).toContain("child_rerun_group=all");
expect(workflow).toContain('-f rerun_group="$child_rerun_group"');
expect(workflow).toContain("NORMAL_CI_RESULT: ${{ needs.normal_ci.result }}");
expect(workflow.match(/trap - EXIT INT TERM/g)).toHaveLength(6);
expect(workflow).not.toContain("workflow_ref:");
expect(workflow).not.toContain("inputs.workflow_ref");
});