refactor: centralize tool-error visibility policy

This commit is contained in:
Peter Steinberger
2026-02-22 15:30:42 +01:00
parent ac3ac6a83a
commit 4c355a28a3
4 changed files with 91 additions and 67 deletions

View File

@@ -113,10 +113,10 @@ describe("buildEmbeddedRunPayloads", () => {
lastToolError: { toolName: "browser", error: "tab not found" },
});
expect(payloads).toHaveLength(1);
expect(payloads[0]?.isError).toBe(true);
expect(payloads[0]?.text).toContain("Browser");
expect(payloads[0]?.text).not.toContain("tab not found");
expectSingleToolErrorPayload(payloads, {
title: "Browser",
absentDetail: "tab not found",
});
});
it("does not add tool error fallback when assistant output exists", () => {
@@ -237,18 +237,6 @@ describe("buildEmbeddedRunPayloads", () => {
expect(payloads).toHaveLength(0);
});
it("still shows mutating tool errors when messages.suppressToolErrors is enabled", () => {
const payloads = buildPayloads({
lastToolError: { toolName: "write", error: "connection timeout" },
config: { messages: { suppressToolErrors: true } },
});
expect(payloads).toHaveLength(1);
expect(payloads[0]?.isError).toBe(true);
expect(payloads[0]?.text).toContain("Write");
expect(payloads[0]?.text).not.toContain("connection timeout");
});
it("suppresses mutating tool errors when suppressToolErrorWarnings is enabled", () => {
const payloads = buildPayloads({
lastToolError: { toolName: "exec", error: "command not found" },
@@ -258,15 +246,35 @@ describe("buildEmbeddedRunPayloads", () => {
expect(payloads).toHaveLength(0);
});
it("shows recoverable tool errors for mutating tools", () => {
const payloads = buildPayloads({
lastToolError: { toolName: "message", meta: "reply", error: "text required" },
});
expect(payloads).toHaveLength(1);
expect(payloads[0]?.isError).toBe(true);
expect(payloads[0]?.text).toContain("Message");
expect(payloads[0]?.text).not.toContain("required");
it.each([
{
name: "still shows mutating tool errors when messages.suppressToolErrors is enabled",
payload: {
lastToolError: { toolName: "write", error: "connection timeout" },
config: { messages: { suppressToolErrors: true } },
},
title: "Write",
absentDetail: "connection timeout",
},
{
name: "shows recoverable tool errors for mutating tools",
payload: {
lastToolError: { toolName: "message", meta: "reply", error: "text required" },
},
title: "Message",
absentDetail: "required",
},
{
name: "shows non-recoverable tool failure summaries to the user",
payload: {
lastToolError: { toolName: "browser", error: "connection timeout" },
},
title: "Browser",
absentDetail: "connection timeout",
},
])("$name", ({ payload, title, absentDetail }) => {
const payloads = buildPayloads(payload);
expectSingleToolErrorPayload(payloads, { title, absentDetail });
});
it("shows mutating tool errors even when assistant output exists", () => {
@@ -323,27 +331,15 @@ describe("buildEmbeddedRunPayloads", () => {
expect(payloads[0]?.text).toBe(warningText);
});
it("shows non-recoverable tool failure summaries to the user", () => {
const payloads = buildPayloads({
lastToolError: { toolName: "browser", error: "connection timeout" },
});
// Non-recoverable errors should still be shown
expect(payloads).toHaveLength(1);
expect(payloads[0]?.isError).toBe(true);
expect(payloads[0]?.text).toContain("Browser");
expect(payloads[0]?.text).not.toContain("connection timeout");
});
it("includes non-recoverable tool error details when verbose mode is on", () => {
const payloads = buildPayloads({
lastToolError: { toolName: "browser", error: "connection timeout" },
verboseLevel: "on",
});
expect(payloads).toHaveLength(1);
expect(payloads[0]?.isError).toBe(true);
expect(payloads[0]?.text).toContain("Browser");
expect(payloads[0]?.text).toContain("connection timeout");
expectSingleToolErrorPayload(payloads, {
title: "Browser",
detail: "connection timeout",
});
});
});

View File

@@ -32,10 +32,15 @@ export function expectSinglePayloadText(
export function expectSingleToolErrorPayload(
payloads: RunPayloads,
params: { title: string; detail: string },
params: { title: string; detail?: string; absentDetail?: string },
): void {
expect(payloads).toHaveLength(1);
expect(payloads[0]?.isError).toBe(true);
expect(payloads[0]?.text).toContain(params.title);
expect(payloads[0]?.text).toContain(params.detail);
if (typeof params.detail === "string") {
expect(payloads[0]?.text).toContain(params.detail);
}
if (typeof params.absentDetail === "string") {
expect(payloads[0]?.text).not.toContain(params.absentDetail);
}
}

View File

@@ -29,21 +29,35 @@ describe("buildEmbeddedRunPayloads tool-error warnings", () => {
verboseLevel: "off",
});
expect(payloads).toHaveLength(1);
expect(payloads[0]?.isError).toBe(true);
expect(payloads[0]?.text).toContain("Write");
expect(payloads[0]?.text).not.toContain("permission denied");
expectSingleToolErrorPayload(payloads, {
title: "Write",
absentDetail: "permission denied",
});
});
it("includes mutating tool error details when verbose mode is on", () => {
it.each([
{
name: "includes details for mutating tool failures when verbose is on",
verboseLevel: "on" as const,
detail: "permission denied",
absentDetail: undefined,
},
{
name: "includes details for mutating tool failures when verbose is full",
verboseLevel: "full" as const,
detail: "permission denied",
absentDetail: undefined,
},
])("$name", ({ verboseLevel, detail, absentDetail }) => {
const payloads = buildPayloads({
lastToolError: { toolName: "write", error: "permission denied" },
verboseLevel: "on",
verboseLevel,
});
expect(payloads).toHaveLength(1);
expect(payloads[0]?.isError).toBe(true);
expect(payloads[0]?.text).toContain("Write");
expect(payloads[0]?.text).toContain("permission denied");
expectSingleToolErrorPayload(payloads, {
title: "Write",
detail,
absentDetail,
});
});
});

View File

@@ -28,6 +28,10 @@ type LastToolError = {
mutatingAction?: boolean;
actionFingerprint?: string;
};
type ToolErrorWarningPolicy = {
showWarning: boolean;
includeDetails: boolean;
};
const RECOVERABLE_TOOL_ERROR_KEYWORDS = [
"required",
@@ -44,30 +48,37 @@ function isRecoverableToolError(error: string | undefined): boolean {
return RECOVERABLE_TOOL_ERROR_KEYWORDS.some((keyword) => errorLower.includes(keyword));
}
function shouldShowToolErrorWarning(params: {
function isVerboseToolDetailEnabled(level?: VerboseLevel): boolean {
return level === "on" || level === "full";
}
function resolveToolErrorWarningPolicy(params: {
lastToolError: LastToolError;
hasUserFacingReply: boolean;
suppressToolErrors: boolean;
suppressToolErrorWarnings?: boolean;
verboseLevel?: VerboseLevel;
}): boolean {
}): ToolErrorWarningPolicy {
const includeDetails = isVerboseToolDetailEnabled(params.verboseLevel);
if (params.suppressToolErrorWarnings) {
return false;
return { showWarning: false, includeDetails };
}
const normalizedToolName = params.lastToolError.toolName.trim().toLowerCase();
const verboseEnabled = params.verboseLevel === "on" || params.verboseLevel === "full";
if ((normalizedToolName === "exec" || normalizedToolName === "bash") && !verboseEnabled) {
return false;
if ((normalizedToolName === "exec" || normalizedToolName === "bash") && !includeDetails) {
return { showWarning: false, includeDetails };
}
const isMutatingToolError =
params.lastToolError.mutatingAction ?? isLikelyMutatingToolName(params.lastToolError.toolName);
if (isMutatingToolError) {
return true;
return { showWarning: true, includeDetails };
}
if (params.suppressToolErrors) {
return false;
return { showWarning: false, includeDetails };
}
return !params.hasUserFacingReply && !isRecoverableToolError(params.lastToolError.error);
return {
showWarning: !params.hasUserFacingReply && !isRecoverableToolError(params.lastToolError.error),
includeDetails,
};
}
export function buildEmbeddedRunPayloads(params: {
@@ -256,7 +267,7 @@ export function buildEmbeddedRunPayloads(params: {
}
if (params.lastToolError) {
const shouldShowToolError = shouldShowToolErrorWarning({
const warningPolicy = resolveToolErrorWarningPolicy({
lastToolError: params.lastToolError,
hasUserFacingReply: hasUserFacingAssistantReply,
suppressToolErrors: Boolean(params.config?.messages?.suppressToolErrors),
@@ -266,16 +277,14 @@ export function buildEmbeddedRunPayloads(params: {
// Always surface mutating tool failures so we do not silently confirm actions that did not happen.
// Otherwise, keep the previous behavior and only surface non-recoverable failures when no reply exists.
if (shouldShowToolError) {
if (warningPolicy.showWarning) {
const toolSummary = formatToolAggregate(
params.lastToolError.toolName,
params.lastToolError.meta ? [params.lastToolError.meta] : undefined,
{ markdown: useMarkdown },
);
const verboseErrorDetailsEnabled =
params.verboseLevel === "on" || params.verboseLevel === "full";
const errorSuffix =
verboseErrorDetailsEnabled && params.lastToolError.error
warningPolicy.includeDetails && params.lastToolError.error
? `: ${params.lastToolError.error}`
: "";
const warningText = `⚠️ ${toolSummary} failed${errorSuffix}`;