fix(gateway): classify wrapped "fetch failed" messages as transient network errors (openclaw#38530)

Verified:
- pnpm build
- pnpm check
- pnpm test:macmini

Co-authored-by: xinhuagu <562450+xinhuagu@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
Xinhua Gu
2026-03-07 04:47:32 +01:00
committed by GitHub
parent fa69f836c4
commit 1a022a31de
4 changed files with 49 additions and 7 deletions

View File

@@ -221,6 +221,7 @@ Docs: https://docs.openclaw.ai
- Agents/skill API write pacing: add a global prompt guardrail that treats skill-driven external API writes as rate-limited by default, so runners prefer batched writes, avoid tight request loops, and respect `429`/`Retry-After`. Thanks @vincentkoc.
- Google Chat/multi-account webhook auth fallback: when `channels.googlechat.accounts.default` carries shared webhook audience/path settings (for example after config normalization), inherit those defaults for named accounts while preserving top-level and per-account overrides, so inbound webhook verification no longer fails silently for named accounts missing duplicated audience fields. Fixes #38369.
- Models/tool probing: raise the tool-capability probe budget from 32 to 256 tokens so reasoning models that spend tokens on thinking before returning a required tool call are less likely to be misclassified as not supporting tools. (#7521) Thanks @jakobdylanc.
- Gateway/transient network classification: treat wrapped `...: fetch failed` transport messages as transient while avoiding broad matches like `Web fetch failed (404): ...`, preventing Discord reconnect wrappers from crashing the gateway without suppressing non-network tool failures. (#38530) Thanks @xinhuagu.
## 2026.3.2

View File

@@ -86,7 +86,7 @@ describe("installUnhandledRejectionHandler - fatal detection", () => {
describe("non-fatal errors", () => {
it("does not exit on known transient network errors", () => {
const transientCases = [
const transientCases: unknown[] = [
Object.assign(new TypeError("fetch failed"), {
cause: { code: "UND_ERR_CONNECT_TIMEOUT", syscall: "connect" },
}),
@@ -111,6 +111,11 @@ describe("installUnhandledRejectionHandler - fatal detection", () => {
}),
];
// Wrapped fetch-failed (e.g. Discord: "Failed to get gateway information from Discord: fetch failed")
transientCases.push(
new Error("Failed to get gateway information from Discord: fetch failed"),
);
for (const transientErr of transientCases) {
expectExitCodeFromUnhandled(transientErr, []);
}

View File

@@ -56,10 +56,13 @@ describe("isTransientNetworkError", () => {
"EHOSTUNREACH",
"ENETUNREACH",
"EAI_AGAIN",
"EPROTO",
"UND_ERR_CONNECT_TIMEOUT",
"UND_ERR_SOCKET",
"UND_ERR_HEADERS_TIMEOUT",
"UND_ERR_BODY_TIMEOUT",
"ERR_SSL_WRONG_VERSION_NUMBER",
"ERR_SSL_PROTOCOL_RETURNED_AN_ERROR",
];
for (const code of codes) {
@@ -122,6 +125,26 @@ describe("isTransientNetworkError", () => {
expect(isTransientNetworkError(error)).toBe(true);
});
it("returns true for wrapped fetch-failed messages from integration clients", () => {
const error = new Error("Failed to get gateway information from Discord: fetch failed");
expect(isTransientNetworkError(error)).toBe(true);
});
it("returns false for non-network fetch-failed wrappers from tools", () => {
const error = new Error("Web fetch failed (404): Not Found");
expect(isTransientNetworkError(error)).toBe(false);
});
it("returns true for TLS/SSL transient message snippets", () => {
expect(isTransientNetworkError(new Error("write EPROTO 00A8B0C9:error"))).toBe(true);
expect(
isTransientNetworkError(
new Error("SSL routines:OPENSSL_internal:WRONG_VERSION_NUMBER while connecting"),
),
).toBe(true);
expect(isTransientNetworkError(new Error("tlsv1 alert protocol version"))).toBe(true);
});
it("returns false for regular errors without network codes", () => {
expect(isTransientNetworkError(new Error("Something went wrong"))).toBe(false);
expect(isTransientNetworkError(new TypeError("Cannot read property"))).toBe(false);

View File

@@ -38,6 +38,9 @@ const TRANSIENT_NETWORK_CODES = new Set([
"UND_ERR_SOCKET",
"UND_ERR_HEADERS_TIMEOUT",
"UND_ERR_BODY_TIMEOUT",
"EPROTO",
"ERR_SSL_WRONG_VERSION_NUMBER",
"ERR_SSL_PROTOCOL_RETURNED_AN_ERROR",
]);
const TRANSIENT_NETWORK_ERROR_NAMES = new Set([
@@ -49,7 +52,7 @@ const TRANSIENT_NETWORK_ERROR_NAMES = new Set([
]);
const TRANSIENT_NETWORK_MESSAGE_CODE_RE =
/\b(ECONNRESET|ECONNREFUSED|ENOTFOUND|ETIMEDOUT|ESOCKETTIMEDOUT|ECONNABORTED|EPIPE|EHOSTUNREACH|ENETUNREACH|EAI_AGAIN|UND_ERR_CONNECT_TIMEOUT|UND_ERR_DNS_RESOLVE_FAILED|UND_ERR_CONNECT|UND_ERR_SOCKET|UND_ERR_HEADERS_TIMEOUT|UND_ERR_BODY_TIMEOUT)\b/i;
/\b(ECONNRESET|ECONNREFUSED|ENOTFOUND|ETIMEDOUT|ESOCKETTIMEDOUT|ECONNABORTED|EPIPE|EHOSTUNREACH|ENETUNREACH|EAI_AGAIN|EPROTO|UND_ERR_CONNECT_TIMEOUT|UND_ERR_DNS_RESOLVE_FAILED|UND_ERR_CONNECT|UND_ERR_SOCKET|UND_ERR_HEADERS_TIMEOUT|UND_ERR_BODY_TIMEOUT)\b/i;
const TRANSIENT_NETWORK_MESSAGE_SNIPPETS = [
"getaddrinfo",
@@ -58,8 +61,22 @@ const TRANSIENT_NETWORK_MESSAGE_SNIPPETS = [
"network error",
"network is unreachable",
"temporary failure in name resolution",
"tlsv1 alert",
"ssl routines",
"packet length too long",
"write eproto",
];
function isWrappedFetchFailedMessage(message: string): boolean {
if (message === "fetch failed") {
return true;
}
// Keep wrapped variants (for example "...: fetch failed") while avoiding broad
// matches like "Web fetch failed (404): ..." that are not transport failures.
return /:\s*fetch failed$/.test(message);
}
function getErrorCause(err: unknown): unknown {
if (!err || typeof err !== "object") {
return undefined;
@@ -154,10 +171,6 @@ export function isTransientNetworkError(err: unknown): boolean {
return true;
}
if (candidate instanceof TypeError && candidate.message === "fetch failed") {
return true;
}
if (!candidate || typeof candidate !== "object") {
continue;
}
@@ -169,7 +182,7 @@ export function isTransientNetworkError(err: unknown): boolean {
if (TRANSIENT_NETWORK_MESSAGE_CODE_RE.test(message)) {
return true;
}
if (message === "fetch failed") {
if (isWrappedFetchFailedMessage(message)) {
return true;
}
if (TRANSIENT_NETWORK_MESSAGE_SNIPPETS.some((snippet) => message.includes(snippet))) {