fix(telegram): hide acknowledged failed-tool warnings from chat (#72410)

* fix(telegram): hide acknowledged failed-tool warnings from chat

* fix(clownfish): address review for ghcrawl-207034-agentic-merge (1)

* fix(clownfish): address review for ghcrawl-207034-agentic-merge (1)
This commit is contained in:
Vincent Koc
2026-04-26 23:29:19 -07:00
committed by GitHub
parent ca44ab65e6
commit dcff28d285
4 changed files with 140 additions and 3 deletions

View File

@@ -328,7 +328,7 @@ describe("buildEmbeddedRunPayloads", () => {
expectSingleToolErrorPayload(payloads, { title, absentDetail });
});
it("shows mutating tool errors even when assistant output exists", () => {
it("shows mutating tool errors when assistant output claims success", () => {
const payloads = buildPayloads({
assistantTexts: ["Done."],
lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage,
@@ -342,6 +342,63 @@ describe("buildEmbeddedRunPayloads", () => {
expect(payloads[1]?.text).not.toContain("missing");
});
it("shows mutating tool errors when assistant output does not acknowledge the failure", () => {
const payloads = buildPayloads({
assistantTexts: ["No issues found. The update is complete."],
lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage,
lastToolError: { toolName: "edit", error: "file missing" },
});
expect(payloads).toHaveLength(2);
expect(payloads[0]?.text).toBe("No issues found. The update is complete.");
expect(payloads[1]?.isError).toBe(true);
expect(payloads[1]?.text).toContain("Edit");
expect(payloads[1]?.text).not.toContain("missing");
});
it("shows mutating tool errors when assistant says it did not find issues in the file", () => {
const text = "I did not find any issues in the file. The update is complete.";
const payloads = buildPayloads({
assistantTexts: [text],
lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage,
lastToolError: { toolName: "edit", error: "file missing" },
});
expect(payloads).toHaveLength(2);
expect(payloads[0]?.text).toBe(text);
expect(payloads[1]?.isError).toBe(true);
expect(payloads[1]?.text).toContain("Edit");
expect(payloads[1]?.text).not.toContain("missing");
});
it.each([
"I did not need to update the file; it is already correct.",
"I did not have to edit the file because it was already correct.",
])("shows mutating tool errors when assistant output uses no-op phrasing: %s", (text) => {
const payloads = buildPayloads({
assistantTexts: [text],
lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage,
lastToolError: { toolName: "edit", error: "file missing" },
});
expect(payloads).toHaveLength(2);
expect(payloads[0]?.text).toBe(text);
expect(payloads[1]?.isError).toBe(true);
expect(payloads[1]?.text).toContain("Edit");
expect(payloads[1]?.text).not.toContain("missing");
});
it("suppresses mutating tool errors when assistant output explicitly acknowledges the failed action", () => {
const text = "I couldn't update the file, so no changes were applied.";
const payloads = buildPayloads({
assistantTexts: [text],
lastAssistant: { stopReason: "end_turn" } as unknown as AssistantMessage,
lastToolError: { toolName: "edit", error: "file missing" },
});
expectSinglePayloadSummary(payloads, { text });
});
it("does not treat session_status read failures as mutating when explicitly flagged", () => {
const payloads = buildPayloads({
assistantTexts: ["Status loaded."],

View File

@@ -44,11 +44,54 @@ const RECOVERABLE_TOOL_ERROR_KEYWORDS = [
"requires",
] as const;
const MUTATING_FAILURE_ACTION_PATTERN =
"(?:write|edit|update|save|create|delete|remove|modify|change|apply|patch|move|rename|send|reply|message|tool|action|operation)";
const MUTATING_FAILURE_INABILITY_PATTERN = new RegExp(
`\\b(?:couldn't|could not|can't|cannot|unable to|am unable to|wasn't able to|was not able to|were unable to)\\b.{0,100}\\b${MUTATING_FAILURE_ACTION_PATTERN}\\b`,
"u",
);
const MUTATING_FAILURE_ACTION_THEN_FAILURE_PATTERN = new RegExp(
`\\b${MUTATING_FAILURE_ACTION_PATTERN}\\b.{0,100}\\b(?:failed|failure|errored)\\b`,
"u",
);
const MUTATING_FAILURE_FAILURE_THEN_ACTION_PATTERN = new RegExp(
`\\b(?:failed|failure)\\b.{0,100}\\b${MUTATING_FAILURE_ACTION_PATTERN}\\b`,
"u",
);
const MUTATING_FAILURE_ERROR_WHILE_ACTION_PATTERN = new RegExp(
`\\b(?:hit|encountered|ran into)\\b.{0,60}\\berror\\b.{0,100}\\b(?:while|trying to|when)\\b.{0,100}\\b${MUTATING_FAILURE_ACTION_PATTERN}\\b`,
"u",
);
const DID_NOT_FAIL_PATTERN = /\b(?:did not|didn't)\s+fail\b/u;
const NEGATED_FAILURE_PATTERN = /\b(?:no|not|without)\s+(?:failures?|errors?)\b/u;
function isRecoverableToolError(error: string | undefined): boolean {
const errorLower = normalizeOptionalLowercaseString(error) ?? "";
return RECOVERABLE_TOOL_ERROR_KEYWORDS.some((keyword) => errorLower.includes(keyword));
}
function hasExplicitMutatingToolFailureAcknowledgement(text: string): boolean {
const normalizedText = normalizeTextForComparison(text);
if (!normalizedText) {
return false;
}
if (DID_NOT_FAIL_PATTERN.test(normalizedText)) {
return false;
}
if (MUTATING_FAILURE_INABILITY_PATTERN.test(normalizedText)) {
return true;
}
if (NEGATED_FAILURE_PATTERN.test(normalizedText)) {
return false;
}
return (
MUTATING_FAILURE_ACTION_THEN_FAILURE_PATTERN.test(normalizedText) ||
MUTATING_FAILURE_FAILURE_THEN_ACTION_PATTERN.test(normalizedText) ||
MUTATING_FAILURE_ERROR_WHILE_ACTION_PATTERN.test(normalizedText)
);
}
function isVerboseToolDetailEnabled(level?: VerboseLevel): boolean {
return level === "on" || level === "full";
}
@@ -84,6 +127,7 @@ function shouldIncludeToolErrorDetails(params: {
function resolveToolErrorWarningPolicy(params: {
lastToolError: ToolErrorSummary;
hasUserFacingReply: boolean;
hasUserFacingFailureAcknowledgement: boolean;
suppressToolErrors: boolean;
suppressToolErrorWarnings?: boolean;
isCronTrigger?: boolean;
@@ -107,7 +151,10 @@ function resolveToolErrorWarningPolicy(params: {
const isMutatingToolError =
params.lastToolError.mutatingAction ?? isLikelyMutatingToolName(params.lastToolError.toolName);
if (isMutatingToolError) {
return { showWarning: true, includeDetails };
return {
showWarning: !params.hasUserFacingFailureAcknowledgement,
includeDetails,
};
}
if (params.suppressToolErrors) {
return { showWarning: false, includeDetails };
@@ -316,6 +363,7 @@ export function buildEmbeddedRunPayloads(params: {
).filter((text) => !shouldSuppressRawErrorText(text));
let hasUserFacingAssistantReply = false;
let hasUserFacingFailureAcknowledgement = false;
for (const text of answerTexts) {
const {
text: cleanedText,
@@ -337,12 +385,16 @@ export function buildEmbeddedRunPayloads(params: {
replyToCurrent,
});
hasUserFacingAssistantReply = true;
if (cleanedText && hasExplicitMutatingToolFailureAcknowledgement(cleanedText)) {
hasUserFacingFailureAcknowledgement = true;
}
}
if (params.lastToolError) {
const warningPolicy = resolveToolErrorWarningPolicy({
lastToolError: params.lastToolError,
hasUserFacingReply: hasUserFacingAssistantReply,
hasUserFacingFailureAcknowledgement,
suppressToolErrors: Boolean(params.config?.messages?.suppressToolErrors),
suppressToolErrorWarnings: params.suppressToolErrorWarnings,
isCronTrigger: params.isCronTrigger,
@@ -350,7 +402,7 @@ export function buildEmbeddedRunPayloads(params: {
verboseLevel: params.verboseLevel,
});
// Always surface mutating tool failures so we do not silently confirm actions that did not happen.
// Surface mutating failures unless the assistant explicitly acknowledged the failed action.
// Otherwise, keep the previous behavior and only surface non-recoverable failures when no reply exists.
if (warningPolicy.showWarning) {
const toolSummary = formatToolAggregate(