mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-10 20:45:15 +00:00
fix(release): stabilize Codex live validation
This commit is contained in:
@@ -2122,8 +2122,8 @@ jobs:
|
||||
# inside the already-isolated container to keep MCP cron/tool
|
||||
# execution representative instead of failing on nested sandbox
|
||||
# setup.
|
||||
echo 'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","-c","service_tier=\"priority\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
|
||||
echo 'OPENCLAW_LIVE_CLI_BACKEND_RESUME_ARGS=["exec","resume","{sessionId}","-c","sandbox_mode=\"danger-full-access\"","-c","service_tier=\"priority\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
|
||||
echo 'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","-c","service_tier=\"fast\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
|
||||
echo 'OPENCLAW_LIVE_CLI_BACKEND_RESUME_ARGS=["exec","resume","{sessionId}","-c","sandbox_mode=\"danger-full-access\"","-c","service_tier=\"fast\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
|
||||
echo "OPENCLAW_LIVE_CLI_BACKEND_DEBUG=1" >> "$GITHUB_ENV"
|
||||
echo "OPENCLAW_CLI_BACKEND_LOG_OUTPUT=1" >> "$GITHUB_ENV"
|
||||
echo "OPENCLAW_TEST_CONSOLE=1" >> "$GITHUB_ENV"
|
||||
@@ -2341,8 +2341,8 @@ jobs:
|
||||
live-cli-backend-docker)
|
||||
echo "OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.4" >> "$GITHUB_ENV"
|
||||
echo "OPENCLAW_LIVE_CLI_BACKEND_AUTH=api-key" >> "$GITHUB_ENV"
|
||||
echo 'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","-c","service_tier=\"priority\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
|
||||
echo 'OPENCLAW_LIVE_CLI_BACKEND_RESUME_ARGS=["exec","resume","{sessionId}","-c","sandbox_mode=\"danger-full-access\"","-c","service_tier=\"priority\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
|
||||
echo 'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","-c","service_tier=\"fast\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
|
||||
echo 'OPENCLAW_LIVE_CLI_BACKEND_RESUME_ARGS=["exec","resume","{sessionId}","-c","sandbox_mode=\"danger-full-access\"","-c","service_tier=\"fast\"","--skip-git-repo-check"]' >> "$GITHUB_ENV"
|
||||
echo "OPENCLAW_LIVE_CLI_BACKEND_DEBUG=1" >> "$GITHUB_ENV"
|
||||
echo "OPENCLAW_CLI_BACKEND_LOG_OUTPUT=1" >> "$GITHUB_ENV"
|
||||
echo "OPENCLAW_TEST_CONSOLE=1" >> "$GITHUB_ENV"
|
||||
|
||||
@@ -32,7 +32,7 @@ export function buildOpenAICodexCliBackend(): CliBackendPlugin {
|
||||
"--sandbox",
|
||||
"workspace-write",
|
||||
"-c",
|
||||
'service_tier="priority"',
|
||||
'service_tier="fast"',
|
||||
"--skip-git-repo-check",
|
||||
],
|
||||
resumeArgs: [
|
||||
@@ -42,7 +42,7 @@ export function buildOpenAICodexCliBackend(): CliBackendPlugin {
|
||||
"-c",
|
||||
'sandbox_mode="workspace-write"',
|
||||
"-c",
|
||||
'service_tier="priority"',
|
||||
'service_tier="fast"',
|
||||
"--skip-git-repo-check",
|
||||
],
|
||||
output: "jsonl",
|
||||
|
||||
@@ -694,6 +694,43 @@ describe("qa mock openai server", () => {
|
||||
expect(payload.output?.[0]?.content?.[0]?.text).toContain("Status: complete");
|
||||
});
|
||||
|
||||
it("continues repo-contract followthrough when a retry user item follows tool output", async () => {
|
||||
const server = await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
port: 0,
|
||||
});
|
||||
cleanups.push(async () => {
|
||||
await server.stop();
|
||||
});
|
||||
|
||||
const prompt =
|
||||
"Repo contract followthrough check. Read AGENT.md, SOUL.md, and FOLLOWTHROUGH_INPUT.md first. Then follow the repo contract exactly, write ./repo-contract-summary.txt, and reply with three labeled lines: Read, Wrote, Status.";
|
||||
|
||||
const response = await fetch(`${server.baseUrl}/v1/responses`, {
|
||||
method: "POST",
|
||||
headers: { "content-type": "application/json" },
|
||||
body: JSON.stringify({
|
||||
stream: true,
|
||||
model: "gpt-5.5",
|
||||
input: [
|
||||
{ role: "user", content: [{ type: "input_text", text: prompt }] },
|
||||
{
|
||||
type: "function_call_output",
|
||||
output:
|
||||
"# Repo contract\n\nStep order:\n1. Read AGENT.md.\n2. Read SOUL.md.\n3. Read FOLLOWTHROUGH_INPUT.md.\n4. Write ./repo-contract-summary.txt.\n",
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: [{ type: "input_text", text: "Continue after compaction." }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
});
|
||||
|
||||
expect(response.status).toBe(200);
|
||||
expect(await response.text()).toContain('"arguments":"{\\"path\\":\\"SOUL.md\\"}"');
|
||||
});
|
||||
|
||||
it("drives the compaction retry mutating tool parity flow", async () => {
|
||||
const server = await startQaMockOpenAiServer({
|
||||
host: "127.0.0.1",
|
||||
|
||||
@@ -281,6 +281,17 @@ function findLastUserIndex(input: ResponsesInputItem[]) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
function isToolOutputContinuationText(text: string) {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
return false;
|
||||
}
|
||||
return (
|
||||
/^(?:continue|keep going|resume|retry|carry on)(?:[.!?])?$/i.test(trimmed) ||
|
||||
/\b(?:continue|continuation|compaction|post-compaction|retry|resume)\b/i.test(trimmed)
|
||||
);
|
||||
}
|
||||
|
||||
function extractToolOutput(input: ResponsesInputItem[]) {
|
||||
const lastUserIndex = findLastUserIndex(input);
|
||||
for (let index = input.length - 1; index > lastUserIndex; index -= 1) {
|
||||
@@ -289,6 +300,23 @@ function extractToolOutput(input: ResponsesInputItem[]) {
|
||||
return item.output;
|
||||
}
|
||||
}
|
||||
for (let index = input.length - 1; index >= 0; index -= 1) {
|
||||
const item = input[index];
|
||||
if (item.type === "function_call_output" && typeof item.output === "string" && item.output) {
|
||||
const laterUserTexts = input
|
||||
.slice(index + 1)
|
||||
.filter((laterItem) => laterItem.role === "user" && Array.isArray(laterItem.content))
|
||||
.map((laterItem) => extractInputText(laterItem.content as unknown[]))
|
||||
.filter(Boolean);
|
||||
if (
|
||||
laterUserTexts.length > 0 &&
|
||||
laterUserTexts.every((text) => isToolOutputContinuationText(text))
|
||||
) {
|
||||
return item.output;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
@@ -788,7 +816,7 @@ function buildAssistantText(
|
||||
if (/tool continuity check/i.test(prompt) && toolOutput) {
|
||||
return `Protocol note: model switch handoff confirmed on ${model || "the requested model"}. QA mission from QA_KICKOFF_TASK.md still applies: understand this OpenClaw repo from source + docs before acting.`;
|
||||
}
|
||||
if (toolOutput && /repo contract followthrough check/i.test(prompt)) {
|
||||
if (toolOutput && /repo contract followthrough check/i.test(allInputText)) {
|
||||
if (
|
||||
/successfully (?:wrote|created|updated|replaced)/i.test(toolOutput) ||
|
||||
/status:\s*complete/i.test(toolOutput)
|
||||
@@ -1723,7 +1751,7 @@ async function buildResponsesPayload(
|
||||
if (/tool continuity check/i.test(prompt) && !toolOutput) {
|
||||
return buildToolCallEventsWithArgs("read", { path: "QA_KICKOFF_TASK.md" });
|
||||
}
|
||||
if (/repo contract followthrough check/i.test(prompt)) {
|
||||
if (/repo contract followthrough check/i.test(allInputText)) {
|
||||
if (!toolOutput) {
|
||||
return buildToolCallEventsWithArgs("read", { path: "AGENT.md" });
|
||||
}
|
||||
|
||||
@@ -208,7 +208,7 @@ func runCodexExecPrompt(ctx context.Context, req codexPromptRequest) (string, er
|
||||
"exec",
|
||||
"--model", req.Model,
|
||||
"-c", fmt.Sprintf("model_reasoning_effort=%q", normalizeThinking(req.Thinking)),
|
||||
"-c", `service_tier="priority"`,
|
||||
"-c", `service_tier="fast"`,
|
||||
"--sandbox", "read-only",
|
||||
"--ignore-rules",
|
||||
"--skip-git-repo-check",
|
||||
|
||||
@@ -183,7 +183,7 @@ while [ "$#" -gt 0 ]; do
|
||||
model_reasoning_effort=\"high\")
|
||||
saw_effort=1
|
||||
;;
|
||||
service_tier=\"priority\")
|
||||
service_tier=\"fast\")
|
||||
saw_service=1
|
||||
;;
|
||||
esac
|
||||
|
||||
@@ -420,7 +420,7 @@ if [ "$provider" = "codex-cli" ] && [ "${OPENCLAW_LIVE_CLI_BACKEND_AUTH:-auto}"
|
||||
--sandbox \
|
||||
danger-full-access \
|
||||
-c \
|
||||
'service_tier="priority"' \
|
||||
'service_tier="fast"' \
|
||||
--skip-git-repo-check \
|
||||
--model \
|
||||
"$codex_probe_model" \
|
||||
|
||||
@@ -298,7 +298,7 @@ beforeEach(() => {
|
||||
"--sandbox",
|
||||
"workspace-write",
|
||||
"-c",
|
||||
'service_tier="priority"',
|
||||
'service_tier="fast"',
|
||||
"--skip-git-repo-check",
|
||||
],
|
||||
resumeArgs: [
|
||||
@@ -308,7 +308,7 @@ beforeEach(() => {
|
||||
"-c",
|
||||
'sandbox_mode="workspace-write"',
|
||||
"-c",
|
||||
'service_tier="priority"',
|
||||
'service_tier="fast"',
|
||||
"--skip-git-repo-check",
|
||||
],
|
||||
systemPromptFileConfigArg: "-c",
|
||||
@@ -395,7 +395,7 @@ describe("resolveCliBackendConfig reliability merge", () => {
|
||||
"--sandbox",
|
||||
"workspace-write",
|
||||
"-c",
|
||||
'service_tier="priority"',
|
||||
'service_tier="fast"',
|
||||
"--skip-git-repo-check",
|
||||
]);
|
||||
expect(resolved.config.resumeArgs).toEqual([
|
||||
@@ -405,7 +405,7 @@ describe("resolveCliBackendConfig reliability merge", () => {
|
||||
"-c",
|
||||
'sandbox_mode="workspace-write"',
|
||||
"-c",
|
||||
'service_tier="priority"',
|
||||
'service_tier="fast"',
|
||||
"--skip-git-repo-check",
|
||||
]);
|
||||
});
|
||||
|
||||
@@ -372,7 +372,7 @@ describe("package artifact reuse", () => {
|
||||
expect(workflow).toContain("OPENCLAW_LIVE_CLI_BACKEND_MODEL=codex-cli/gpt-5.4");
|
||||
expect(workflow).toContain("OPENCLAW_LIVE_CLI_BACKEND_AUTH=api-key");
|
||||
expect(workflow).toContain("OPENCLAW_LIVE_CLI_BACKEND_USE_CI_SAFE_CODEX_CONFIG=1");
|
||||
expect((workflow.match(/service_tier=\\"priority\\"/g) ?? []).length).toBeGreaterThanOrEqual(2);
|
||||
expect((workflow.match(/service_tier=\\"fast\\"/g) ?? []).length).toBeGreaterThanOrEqual(2);
|
||||
expect(workflow).not.toContain(
|
||||
'OPENCLAW_LIVE_CLI_BACKEND_ARGS=["exec","--json","--color","never","--sandbox","danger-full-access","--skip-git-repo-check"]',
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user