mirror of
https://github.com/moltbot/moltbot.git
synced 2026-03-07 22:44:16 +00:00
fix(gateway): retry exec-read live tool probe
This commit is contained in:
@@ -28,7 +28,12 @@ import { DEFAULT_AGENT_ID } from "../routing/session-key.js";
|
||||
import { GATEWAY_CLIENT_MODES, GATEWAY_CLIENT_NAMES } from "../utils/message-channel.js";
|
||||
import { GatewayClient } from "./client.js";
|
||||
import { renderCatNoncePngBase64 } from "./live-image-probe.js";
|
||||
import { hasExpectedToolNonce, shouldRetryToolReadProbe } from "./live-tool-probe-utils.js";
|
||||
import {
|
||||
hasExpectedSingleNonce,
|
||||
hasExpectedToolNonce,
|
||||
shouldRetryExecReadProbe,
|
||||
shouldRetryToolReadProbe,
|
||||
} from "./live-tool-probe-utils.js";
|
||||
import { startGatewayServer } from "./server.js";
|
||||
import { extractPayloadText } from "./test-helpers.agent-results.js";
|
||||
|
||||
@@ -862,41 +867,77 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
||||
logProgress(`${progressLabel}: tool-exec`);
|
||||
const nonceC = randomUUID();
|
||||
const toolWritePath = path.join(tempDir, `write-${runIdTool}.txt`);
|
||||
|
||||
const execReadProbe = await client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runIdTool}-exec-read`,
|
||||
message:
|
||||
"OpenClaw live tool probe (local, safe): " +
|
||||
"use the tool named `exec` (or `Exec`) to run this command: " +
|
||||
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
|
||||
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
|
||||
"Finally reply including the nonce text you read back.",
|
||||
thinking: params.thinkingLevel,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
);
|
||||
if (execReadProbe?.status !== "ok") {
|
||||
throw new Error(`exec+read probe failed: status=${String(execReadProbe?.status)}`);
|
||||
}
|
||||
const execReadText = extractPayloadText(execReadProbe?.result);
|
||||
if (
|
||||
isEmptyStreamText(execReadText) &&
|
||||
(model.provider === "minimax" || model.provider === "openai-codex")
|
||||
const maxExecReadAttempts = 3;
|
||||
let execReadText = "";
|
||||
for (
|
||||
let execReadAttempt = 0;
|
||||
execReadAttempt < maxExecReadAttempts;
|
||||
execReadAttempt += 1
|
||||
) {
|
||||
logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
|
||||
break;
|
||||
const strictReply = execReadAttempt > 0;
|
||||
const execReadProbe = await client.request<AgentFinalPayload>(
|
||||
"agent",
|
||||
{
|
||||
sessionKey,
|
||||
idempotencyKey: `idem-${runIdTool}-exec-read-${execReadAttempt + 1}`,
|
||||
message: strictReply
|
||||
? "OpenClaw live tool probe (local, safe): " +
|
||||
"use the tool named `exec` (or `Exec`) to run this command: " +
|
||||
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
|
||||
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
|
||||
`Then reply with exactly: ${nonceC}. No extra text.`
|
||||
: "OpenClaw live tool probe (local, safe): " +
|
||||
"use the tool named `exec` (or `Exec`) to run this command: " +
|
||||
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
|
||||
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
|
||||
"Finally reply including the nonce text you read back.",
|
||||
thinking: params.thinkingLevel,
|
||||
deliver: false,
|
||||
},
|
||||
{ expectFinal: true },
|
||||
);
|
||||
if (execReadProbe?.status !== "ok") {
|
||||
if (execReadAttempt + 1 < maxExecReadAttempts) {
|
||||
logProgress(
|
||||
`${progressLabel}: tool-exec retry (${execReadAttempt + 2}/${maxExecReadAttempts}) status=${String(execReadProbe?.status)}`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw new Error(`exec+read probe failed: status=${String(execReadProbe?.status)}`);
|
||||
}
|
||||
execReadText = extractPayloadText(execReadProbe?.result);
|
||||
if (
|
||||
isEmptyStreamText(execReadText) &&
|
||||
(model.provider === "minimax" || model.provider === "openai-codex")
|
||||
) {
|
||||
logProgress(`${progressLabel}: skip (${model.provider} empty response)`);
|
||||
break;
|
||||
}
|
||||
assertNoReasoningTags({
|
||||
text: execReadText,
|
||||
model: modelKey,
|
||||
phase: "tool-exec",
|
||||
label: params.label,
|
||||
});
|
||||
if (hasExpectedSingleNonce(execReadText, nonceC)) {
|
||||
break;
|
||||
}
|
||||
if (
|
||||
shouldRetryExecReadProbe({
|
||||
text: execReadText,
|
||||
nonce: nonceC,
|
||||
attempt: execReadAttempt,
|
||||
maxAttempts: maxExecReadAttempts,
|
||||
})
|
||||
) {
|
||||
logProgress(
|
||||
`${progressLabel}: tool-exec retry (${execReadAttempt + 2}/${maxExecReadAttempts}) malformed tool output`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
throw new Error(`exec+read probe missing nonce: ${execReadText}`);
|
||||
}
|
||||
assertNoReasoningTags({
|
||||
text: execReadText,
|
||||
model: modelKey,
|
||||
phase: "tool-exec",
|
||||
label: params.label,
|
||||
});
|
||||
if (!execReadText.includes(nonceC)) {
|
||||
if (!hasExpectedSingleNonce(execReadText, nonceC)) {
|
||||
throw new Error(`exec+read probe missing nonce: ${execReadText}`);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { hasExpectedToolNonce, shouldRetryToolReadProbe } from "./live-tool-probe-utils.js";
|
||||
import {
|
||||
hasExpectedSingleNonce,
|
||||
hasExpectedToolNonce,
|
||||
shouldRetryExecReadProbe,
|
||||
shouldRetryToolReadProbe,
|
||||
} from "./live-tool-probe-utils.js";
|
||||
|
||||
describe("live tool probe utils", () => {
|
||||
it("matches nonce pair when both are present", () => {
|
||||
@@ -7,6 +12,11 @@ describe("live tool probe utils", () => {
|
||||
expect(hasExpectedToolNonce("value a-1 only", "a-1", "b-2")).toBe(false);
|
||||
});
|
||||
|
||||
it("matches single nonce when present", () => {
|
||||
expect(hasExpectedSingleNonce("value nonce-1", "nonce-1")).toBe(true);
|
||||
expect(hasExpectedSingleNonce("value nonce-2", "nonce-1")).toBe(false);
|
||||
});
|
||||
|
||||
it("retries malformed tool output when attempts remain", () => {
|
||||
expect(
|
||||
shouldRetryToolReadProbe({
|
||||
@@ -97,4 +107,37 @@ describe("live tool probe utils", () => {
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("retries malformed exec+read output when attempts remain", () => {
|
||||
expect(
|
||||
shouldRetryExecReadProbe({
|
||||
text: "read[object Object]",
|
||||
nonce: "nonce-c",
|
||||
attempt: 0,
|
||||
maxAttempts: 3,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("does not retry exec+read once max attempts are exhausted", () => {
|
||||
expect(
|
||||
shouldRetryExecReadProbe({
|
||||
text: "read[object Object]",
|
||||
nonce: "nonce-c",
|
||||
attempt: 2,
|
||||
maxAttempts: 3,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("does not retry exec+read when nonce is present", () => {
|
||||
expect(
|
||||
shouldRetryExecReadProbe({
|
||||
text: "nonce-c",
|
||||
nonce: "nonce-c",
|
||||
attempt: 0,
|
||||
maxAttempts: 3,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -2,6 +2,25 @@ export function hasExpectedToolNonce(text: string, nonceA: string, nonceB: strin
|
||||
return text.includes(nonceA) && text.includes(nonceB);
|
||||
}
|
||||
|
||||
export function hasExpectedSingleNonce(text: string, nonce: string): boolean {
|
||||
return text.includes(nonce);
|
||||
}
|
||||
|
||||
function hasMalformedToolOutput(text: string): boolean {
|
||||
const trimmed = text.trim();
|
||||
if (!trimmed) {
|
||||
return true;
|
||||
}
|
||||
const lower = trimmed.toLowerCase();
|
||||
if (trimmed.includes("[object Object]")) {
|
||||
return true;
|
||||
}
|
||||
if (/\bread\s*\[/.test(lower) || /\btool\b/.test(lower) || /\bfunction\b/.test(lower)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export function shouldRetryToolReadProbe(params: {
|
||||
text: string;
|
||||
nonceA: string;
|
||||
@@ -16,19 +35,27 @@ export function shouldRetryToolReadProbe(params: {
|
||||
if (hasExpectedToolNonce(params.text, params.nonceA, params.nonceB)) {
|
||||
return false;
|
||||
}
|
||||
const trimmed = params.text.trim();
|
||||
if (!trimmed) {
|
||||
return true;
|
||||
}
|
||||
const lower = trimmed.toLowerCase();
|
||||
if (trimmed.includes("[object Object]")) {
|
||||
return true;
|
||||
}
|
||||
if (/\bread\s*\[/.test(lower) || /\btool\b/.test(lower) || /\bfunction\b/.test(lower)) {
|
||||
if (hasMalformedToolOutput(params.text)) {
|
||||
return true;
|
||||
}
|
||||
const lower = params.text.trim().toLowerCase();
|
||||
if (params.provider === "mistral" && (lower.includes("noncea=") || lower.includes("nonceb="))) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export function shouldRetryExecReadProbe(params: {
|
||||
text: string;
|
||||
nonce: string;
|
||||
attempt: number;
|
||||
maxAttempts: number;
|
||||
}): boolean {
|
||||
if (params.attempt + 1 >= params.maxAttempts) {
|
||||
return false;
|
||||
}
|
||||
if (hasExpectedSingleNonce(params.text, params.nonce)) {
|
||||
return false;
|
||||
}
|
||||
return hasMalformedToolOutput(params.text);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user