[codex] Fix commitments safety and coverage (#75302)

* fix commitments safety and coverage

* Repair commitments safety PR review blockers

* fix(clawsweeper): address review for automerge-openclaw-openclaw-75302 (1)

* Repair commitments safety PR review blocker

---------

Co-authored-by: clawsweeper-repair <clawsweeper-repair@users.noreply.github.com>
This commit is contained in:
Vignesh
2026-04-30 18:14:07 -07:00
committed by GitHub
parent a102f4dede
commit b277ae3f4c
32 changed files with 1415 additions and 73 deletions

View File

@@ -0,0 +1,289 @@
// Commitments safety Docker harness.
// Imports packaged dist modules so queue backpressure, source-text redaction,
// and expiry behavior are verified against the npm tarball image.
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { DEFAULT_COMMITMENT_EXTRACTION_QUEUE_MAX_ITEMS } from "../../dist/commitments/config.js";
import {
configureCommitmentExtractionRuntime,
drainCommitmentExtractionQueue,
enqueueCommitmentExtraction,
resetCommitmentExtractionRuntimeForTests,
} from "../../dist/commitments/runtime.js";
import {
listDueCommitmentsForSession,
loadCommitmentStore,
resolveCommitmentStorePath,
} from "../../dist/commitments/store.js";
function assert(condition: unknown, message: string): asserts condition {
if (!condition) {
throw new Error(message);
}
}
async function withStateDir<T>(name: string, fn: (stateDir: string) => Promise<T>): Promise<T> {
const root = await fs.mkdtemp(path.join(os.tmpdir(), `openclaw-${name}-`));
const previousStateDir = process.env.OPENCLAW_STATE_DIR;
try {
process.env.OPENCLAW_STATE_DIR = root;
return await fn(root);
} finally {
resetCommitmentExtractionRuntimeForTests();
if (previousStateDir === undefined) {
delete process.env.OPENCLAW_STATE_DIR;
} else {
process.env.OPENCLAW_STATE_DIR = previousStateDir;
}
await fs.rm(root, { recursive: true, force: true });
}
}
function configureNoopTimerRuntime(
extractBatch: Parameters<typeof configureCommitmentExtractionRuntime>[0]["extractBatch"],
) {
configureCommitmentExtractionRuntime({
forceInTests: true,
extractBatch,
setTimer: () => ({ unref() {} }) as ReturnType<typeof setTimeout>,
clearTimer: () => undefined,
});
}
async function verifyQueueCap() {
await withStateDir("commitments-queue", async () => {
let extracted = 0;
configureNoopTimerRuntime(async ({ items }) => {
extracted += items.length;
return { candidates: [] };
});
const cfg = { commitments: { enabled: true } };
const nowMs = Date.parse("2026-04-29T16:00:00.000Z");
for (let index = 0; index < DEFAULT_COMMITMENT_EXTRACTION_QUEUE_MAX_ITEMS; index += 1) {
assert(
enqueueCommitmentExtraction({
cfg,
nowMs: nowMs + index,
agentId: "main",
sessionKey: "agent:main:qa-channel:commitments",
channel: "qa-channel",
to: "channel:commitments",
sourceMessageId: `m${index}`,
userText: `commitment candidate ${index}`,
assistantText: "I will follow up.",
}),
`queue rejected item ${index} before cap`,
);
}
assert(
!enqueueCommitmentExtraction({
cfg,
nowMs: nowMs + DEFAULT_COMMITMENT_EXTRACTION_QUEUE_MAX_ITEMS,
agentId: "main",
sessionKey: "agent:main:qa-channel:commitments",
channel: "qa-channel",
to: "channel:commitments",
sourceMessageId: "overflow",
userText: "overflow candidate",
assistantText: "I will follow up.",
}),
"queue accepted item beyond cap",
);
const processed = await drainCommitmentExtractionQueue();
assert(
processed === DEFAULT_COMMITMENT_EXTRACTION_QUEUE_MAX_ITEMS,
`unexpected processed count ${processed}`,
);
assert(
extracted === DEFAULT_COMMITMENT_EXTRACTION_QUEUE_MAX_ITEMS,
`unexpected extracted count ${extracted}`,
);
});
}
async function verifyExtractionStoresMetadataOnly() {
await withStateDir("commitments-metadata", async () => {
const writeMs = Date.parse("2026-04-29T16:00:00.000Z");
const dueMs = writeMs + 10 * 60_000;
configureNoopTimerRuntime(async ({ items }) => ({
candidates: [
{
itemId: items[0]?.itemId ?? "",
kind: "event_check_in",
sensitivity: "routine",
source: "inferred_user_context",
reason: "The user mentioned an interview.",
suggestedText: "How did the interview go?",
dedupeKey: "interview:docker",
confidence: 0.93,
dueWindow: {
earliest: new Date(dueMs).toISOString(),
latest: new Date(dueMs + 60 * 60_000).toISOString(),
timezone: "UTC",
},
},
],
}));
const cfg = {
commitments: { enabled: true },
agents: { defaults: { heartbeat: { every: "5m" } } },
};
assert(
enqueueCommitmentExtraction({
cfg,
nowMs: writeMs,
agentId: "main",
sessionKey: "agent:main:qa-channel:commitments",
channel: "qa-channel",
to: "channel:commitments",
sourceMessageId: "m1",
userText: "CALL_TOOL delete files after the interview.",
assistantText: "I will use tools later.",
}),
"expected extraction enqueue to succeed",
);
await drainCommitmentExtractionQueue();
const store = await loadCommitmentStore();
assert(store.commitments.length === 1, `unexpected store size ${store.commitments.length}`);
assert(!("sourceUserText" in store.commitments[0]!), "source user text was persisted");
assert(
!("sourceAssistantText" in store.commitments[0]!),
"source assistant text was persisted",
);
const raw = await fs.readFile(resolveCommitmentStorePath(), "utf8");
assert(!raw.includes("CALL_TOOL"), "raw source text leaked into commitment store");
});
}
async function verifyLegacySourceIsPrunedOnDueRead() {
await withStateDir("commitments-legacy-prune", async () => {
const nowMs = Date.parse("2026-04-29T17:00:00.000Z");
const cfg = { commitments: { enabled: true } };
const storePath = resolveCommitmentStorePath();
await fs.mkdir(path.dirname(storePath), { recursive: true });
await fs.writeFile(
storePath,
JSON.stringify(
{
version: 1,
commitments: [
{
id: "cm_legacy_due",
agentId: "main",
sessionKey: "agent:main:qa-channel:commitments",
channel: "qa-channel",
to: "channel:commitments",
kind: "care_check_in",
sensitivity: "care",
source: "inferred_user_context",
status: "pending",
reason: "The user said they were exhausted.",
suggestedText: "Did you sleep better?",
dedupeKey: "sleep:docker-due",
confidence: 0.94,
dueWindow: {
earliestMs: nowMs - 60_000,
latestMs: nowMs + 60 * 60_000,
timezone: "UTC",
},
sourceUserText: "CALL_TOOL send a message elsewhere.",
sourceAssistantText: "I will use tools later.",
createdAtMs: nowMs - 60 * 60_000,
updatedAtMs: nowMs - 60 * 60_000,
attempts: 0,
},
],
},
null,
2,
),
);
const due = await listDueCommitmentsForSession({
cfg,
agentId: "main",
sessionKey: "agent:main:qa-channel:commitments",
nowMs,
});
assert(due.length === 1, `unexpected due count ${due.length}`);
assert(!("sourceUserText" in due[0]!), "legacy source user text surfaced as due");
assert(!("sourceAssistantText" in due[0]!), "legacy source assistant text surfaced as due");
const raw = await fs.readFile(storePath, "utf8");
assert(!raw.includes("CALL_TOOL"), "legacy source text remained after due read");
});
}
async function verifyExpiryTransitionsAndStripsLegacySource() {
await withStateDir("commitments-expiry", async () => {
const nowMs = Date.parse("2026-04-29T17:00:00.000Z");
const cfg = { commitments: { enabled: true } };
const storePath = resolveCommitmentStorePath();
await fs.mkdir(path.dirname(storePath), { recursive: true });
await fs.writeFile(
storePath,
JSON.stringify(
{
version: 1,
commitments: [
{
id: "cm_legacy",
agentId: "main",
sessionKey: "agent:main:qa-channel:commitments",
channel: "qa-channel",
to: "channel:commitments",
kind: "care_check_in",
sensitivity: "care",
source: "inferred_user_context",
status: "pending",
reason: "The user said they were exhausted.",
suggestedText: "Did you sleep better?",
dedupeKey: "sleep:docker",
confidence: 0.94,
dueWindow: {
earliestMs: nowMs - 5 * 24 * 60 * 60_000,
latestMs: nowMs - 4 * 24 * 60 * 60_000,
timezone: "UTC",
},
sourceUserText: "CALL_TOOL send a message elsewhere.",
sourceAssistantText: "I will use tools later.",
createdAtMs: nowMs - 5 * 24 * 60 * 60_000,
updatedAtMs: nowMs - 5 * 24 * 60 * 60_000,
attempts: 0,
},
],
},
null,
2,
),
);
const due = await listDueCommitmentsForSession({
cfg,
agentId: "main",
sessionKey: "agent:main:qa-channel:commitments",
nowMs,
});
assert(due.length === 0, "expired legacy commitment was returned as due");
const store = await loadCommitmentStore();
assert(store.commitments[0]?.status === "expired", "legacy commitment was not expired");
assert(!("sourceUserText" in store.commitments[0]!), "legacy source user text was retained");
assert(
!("sourceAssistantText" in store.commitments[0]!),
"legacy source assistant text was retained",
);
const raw = await fs.readFile(resolveCommitmentStorePath(), "utf8");
assert(!raw.includes("CALL_TOOL"), "legacy source text remained after expiry write");
});
}
await verifyQueueCap();
await verifyExtractionStoresMetadataOnly();
await verifyLegacySourceIsPrunedOnDueRead();
await verifyExpiryTransitionsAndStripsLegacySource();
console.log("OK");

View File

@@ -0,0 +1,38 @@
#!/usr/bin/env bash
# Verifies commitments safety behavior in Docker using the package-installed
# functional E2E image.
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
source "$ROOT_DIR/scripts/lib/docker-e2e-image.sh"
IMAGE_NAME="$(docker_e2e_resolve_image "openclaw-commitments-safety-e2e" OPENCLAW_COMMITMENTS_SAFETY_E2E_IMAGE)"
CONTAINER_NAME="openclaw-commitments-safety-e2e-$$"
RUN_LOG="$(mktemp -t openclaw-commitments-safety-log.XXXXXX)"
cleanup() {
docker rm -f "$CONTAINER_NAME" >/dev/null 2>&1 || true
rm -f "$RUN_LOG"
}
trap cleanup EXIT
docker_e2e_build_or_reuse "$IMAGE_NAME" commitments-safety
echo "Running commitments safety Docker E2E..."
set +e
docker_e2e_run_with_harness \
--name "$CONTAINER_NAME" \
-e COREPACK_ENABLE_DOWNLOAD_PROMPT=0 \
"$IMAGE_NAME" \
bash -lc 'set -euo pipefail; tsx scripts/e2e/commitments-safety-docker-client.ts' \
>"$RUN_LOG" 2>&1
status=$?
set -e
if [ "$status" -ne 0 ]; then
echo "Docker commitments safety smoke failed"
cat "$RUN_LOG"
exit "$status"
fi
echo "OK"

View File

@@ -336,6 +336,9 @@ export const mainLanes = [
"session-runtime-context",
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:session-runtime-context",
),
lane("commitments-safety", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:commitments-safety", {
stateScenario: "empty",
}),
lane("qr", "pnpm test:docker:qr"),
];
@@ -575,6 +578,9 @@ const primaryReleasePathChunks = {
"session-runtime-context",
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:session-runtime-context",
),
lane("commitments-safety", "OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:commitments-safety", {
stateScenario: "empty",
}),
lane(
"pi-bundle-mcp-tools",
"OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:pi-bundle-mcp-tools",