fix(release): stabilize Codex live validation

This commit is contained in:
Peter Steinberger
2026-05-09 15:24:18 +01:00
parent 16f3350b84
commit bcb4c8d597
9 changed files with 81 additions and 16 deletions

View File

@@ -2122,8 +2122,8 @@ jobs:
# inside the already-isolated container to keep MCP cron/tool
# execution representative instead of failing on nested sandbox
# setup.
echo 'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","-c","service_tier=\"priority\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
echo 'OPENCLAW_LIVE_CLI_BACKEND_RESUME_ARGS=["exec","resume","{sessionId}","-c","sandbox_mode=\"danger-full-access\"","-c","service_tier=\"priority\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
echo 'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","-c","service_tier=\"fast\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
echo 'OPENCLAW_LIVE_CLI_BACKEND_RESUME_ARGS=["exec","resume","{sessionId}","-c","sandbox_mode=\"danger-full-access\"","-c","service_tier=\"fast\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
echo "OPENCLAW_LIVE_CLI_BACKEND_DEBUG=1" >> "$GITHUB_ENV"
echo "OPENCLAW_CLI_BACKEND_LOG_OUTPUT=1" >> "$GITHUB_ENV"
echo "OPENCLAW_TEST_CONSOLE=1" >> "$GITHUB_ENV"
@@ -2341,8 +2341,8 @@ jobs:
live-cli-backend-docker)
echo "OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.4" >> "$GITHUB_ENV"
echo "OPENCLAW_LIVE_CLI_BACKEND_AUTH=api-key" >> "$GITHUB_ENV"
echo 'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","-c","service_tier=\"priority\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
echo 'OPENCLAW_LIVE_CLI_BACKEND_RESUME_ARGS=["exec","resume","{sessionId}","-c","sandbox_mode=\"danger-full-access\"","-c","service_tier=\"priority\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
echo 'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","-c","service_tier=\"fast\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
echo 'OPENCLAW_LIVE_CLI_BACKEND_RESUME_ARGS=["exec","resume","{sessionId}","-c","sandbox_mode=\"danger-full-access\"","-c","service_tier=\"fast\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
echo "OPENCLAW_LIVE_CLI_BACKEND_DEBUG=1" >> "$GITHUB_ENV"
echo "OPENCLAW_CLI_BACKEND_LOG_OUTPUT=1" >> "$GITHUB_ENV"
echo "OPENCLAW_TEST_CONSOLE=1" >> "$GITHUB_ENV"

View File

@@ -32,7 +32,7 @@ export function buildOpenAICodexCliBackend(): CliBackendPlugin {
"--sandbox",
"workspace-write",
"-c",
'service_tier="priority"',
'service_tier="fast"',
"--skip-git-repo-check",
],
resumeArgs: [
@@ -42,7 +42,7 @@ export function buildOpenAICodexCliBackend(): CliBackendPlugin {
"-c",
'sandbox_mode="workspace-write"',
"-c",
'service_tier="priority"',
'service_tier="fast"',
"--skip-git-repo-check",
],
output: "jsonl",

View File

@@ -694,6 +694,43 @@ describe("qa mock openai server", () => {
expect(payload.output?.[0]?.content?.[0]?.text).toContain("Status: complete");
});
it("continues repo-contract followthrough when a retry user item follows tool output", async () => {
const server = await startQaMockOpenAiServer({
host: "127.0.0.1",
port: 0,
});
cleanups.push(async () => {
await server.stop();
});
const prompt =
"Repo contract followthrough check. Read AGENT.md, SOUL.md, and FOLLOWTHROUGH_INPUT.md first. Then follow the repo contract exactly, write ./repo-contract-summary.txt, and reply with three labeled lines: Read, Wrote, Status.";
const response = await fetch(`${server.baseUrl}/v1/responses`, {
method: "POST",
headers: { "content-type": "application/json" },
body: JSON.stringify({
stream: true,
model: "gpt-5.5",
input: [
{ role: "user", content: [{ type: "input_text", text: prompt }] },
{
type: "function_call_output",
output:
"# Repo contract\n\nStep order:\n1. Read AGENT.md.\n2. Read SOUL.md.\n3. Read FOLLOWTHROUGH_INPUT.md.\n4. Write ./repo-contract-summary.txt.\n",
},
{
role: "user",
content: [{ type: "input_text", text: "Continue after compaction." }],
},
],
}),
});
expect(response.status).toBe(200);
expect(await response.text()).toContain('"arguments":"{\\"path\\":\\"SOUL.md\\"}"');
});
it("drives the compaction retry mutating tool parity flow", async () => {
const server = await startQaMockOpenAiServer({
host: "127.0.0.1",

View File

@@ -281,6 +281,17 @@ function findLastUserIndex(input: ResponsesInputItem[]) {
return -1;
}
function isToolOutputContinuationText(text: string) {
const trimmed = text.trim();
if (!trimmed) {
return false;
}
return (
/^(?:continue|keep going|resume|retry|carry on)(?:[.!?])?$/i.test(trimmed) ||
/\b(?:continue|continuation|compaction|post-compaction|retry|resume)\b/i.test(trimmed)
);
}
function extractToolOutput(input: ResponsesInputItem[]) {
const lastUserIndex = findLastUserIndex(input);
for (let index = input.length - 1; index > lastUserIndex; index -= 1) {
@@ -289,6 +300,23 @@ function extractToolOutput(input: ResponsesInputItem[]) {
return item.output;
}
}
for (let index = input.length - 1; index >= 0; index -= 1) {
const item = input[index];
if (item.type === "function_call_output" && typeof item.output === "string" && item.output) {
const laterUserTexts = input
.slice(index + 1)
.filter((laterItem) => laterItem.role === "user" && Array.isArray(laterItem.content))
.map((laterItem) => extractInputText(laterItem.content as unknown[]))
.filter(Boolean);
if (
laterUserTexts.length > 0 &&
laterUserTexts.every((text) => isToolOutputContinuationText(text))
) {
return item.output;
}
continue;
}
}
return "";
}
@@ -788,7 +816,7 @@ function buildAssistantText(
if (/tool continuity check/i.test(prompt) && toolOutput) {
return `Protocol note: model switch handoff confirmed on ${model || "the requested model"}. QA mission from QA_KICKOFF_TASK.md still applies: understand this OpenClaw repo from source + docs before acting.`;
}
if (toolOutput && /repo contract followthrough check/i.test(prompt)) {
if (toolOutput && /repo contract followthrough check/i.test(allInputText)) {
if (
/successfully (?:wrote|created|updated|replaced)/i.test(toolOutput) ||
/status:\s*complete/i.test(toolOutput)
@@ -1723,7 +1751,7 @@ async function buildResponsesPayload(
if (/tool continuity check/i.test(prompt) && !toolOutput) {
return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" });
}
if (/repo contract followthrough check/i.test(prompt)) {
if (/repo contract followthrough check/i.test(allInputText)) {
if (!toolOutput) {
return buildToolCallEventsWithArgs("read", { path: "AGENT.md" });
}

View File

@@ -208,7 +208,7 @@ func runCodexExecPrompt(ctx context.Context, req codexPromptRequest) (string, er
"exec",
"--model", req.Model,
"-c", fmt.Sprintf("model_reasoning_effort=%q", normalizeThinking(req.Thinking)),
"-c", `service_tier="priority"`,
"-c", `service_tier="fast"`,
"--sandbox", "read-only",
"--ignore-rules",
"--skip-git-repo-check",

View File

@@ -183,7 +183,7 @@ while [ "$#" -gt 0 ]; do
model_reasoning_effort=\"high\")
saw_effort=1
;;
service_tier=\"priority\")
service_tier=\"fast\")
saw_service=1
;;
esac

View File

@@ -420,7 +420,7 @@ if [ "$provider" = "codex-cli" ] && [ "${OPENCLAW_LIVE_CLI_BACKEND_AUTH:-auto}"
--sandbox \
danger-full-access \
-c \
'service_tier="priority"' \
'service_tier="fast"' \
--skip-git-repo-check \
--model \
"$codex_probe_model" \

View File

@@ -298,7 +298,7 @@ beforeEach(() => {
"--sandbox",
"workspace-write",
"-c",
'service_tier="priority"',
'service_tier="fast"',
"--skip-git-repo-check",
],
resumeArgs: [
@@ -308,7 +308,7 @@ beforeEach(() => {
"-c",
'sandbox_mode="workspace-write"',
"-c",
'service_tier="priority"',
'service_tier="fast"',
"--skip-git-repo-check",
],
systemPromptFileConfigArg: "-c",
@@ -395,7 +395,7 @@ describe("resolveCliBackendConfig reliability merge", () => {
"--sandbox",
"workspace-write",
"-c",
'service_tier="priority"',
'service_tier="fast"',
"--skip-git-repo-check",
]);
expect(resolved.config.resumeArgs).toEqual([
@@ -405,7 +405,7 @@ describe("resolveCliBackendConfig reliability merge", () => {
"-c",
'sandbox_mode="workspace-write"',
"-c",
'service_tier="priority"',
'service_tier="fast"',
"--skip-git-repo-check",
]);
});

View File

@@ -372,7 +372,7 @@ describe("package artifact reuse", () => {
expect(workflow).toContain("OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.4");
expect(workflow).toContain("OPENCLAW_LIVE_CLI_BACKEND_AUTH=api-key");
expect(workflow).toContain("OPENCLAW_LIVE_CLI_BACKEND_USE_CI_SAFE_CODEX_CONFIG=1");
expect((workflow.match(/service_tier=\\"priority\\"/g) ?? []).length).toBeGreaterThanOrEqual(2);
expect((workflow.match(/service_tier=\\"fast\\"/g) ?? []).length).toBeGreaterThanOrEqual(2);
expect(workflow).not.toContain(
'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","--skip-git-repo-check"]',
);