fix(infra): treat nested network request errors as non-fatal

This commit is contained in:
Doruk Ardahan
2026-02-23 18:24:38 +03:00
committed by Peter Steinberger
parent 445c7a65e6
commit daaad03593
3 changed files with 152 additions and 19 deletions

View File

@@ -93,6 +93,10 @@ describe("installUnhandledRejectionHandler - fatal detection", () => {
Object.assign(new Error("DNS resolve failed"), { code: "UND_ERR_DNS_RESOLVE_FAILED" }),
Object.assign(new Error("Connection reset"), { code: "ECONNRESET" }),
Object.assign(new Error("Timeout"), { code: "ETIMEDOUT" }),
Object.assign(new Error("A request error occurred: getaddrinfo EAI_AGAIN slack.com"), {
code: "slack_webapi_request_error",
original: { code: "EAI_AGAIN", syscall: "getaddrinfo", hostname: "slack.com" },
}),
];
for (const transientErr of transientCases) {

View File

@@ -92,6 +92,30 @@ describe("isTransientNetworkError", () => {
expect(isTransientNetworkError(error)).toBe(true);
});
it("returns true for Slack request errors that wrap network codes in .original", () => {
const error = Object.assign(new Error("A request error occurred: getaddrinfo EAI_AGAIN"), {
code: "slack_webapi_request_error",
original: {
errno: -3001,
code: "EAI_AGAIN",
syscall: "getaddrinfo",
hostname: "slack.com",
},
});
expect(isTransientNetworkError(error)).toBe(true);
});
it("returns true for network codes nested in .data payloads", () => {
const error = {
code: "slack_webapi_request_error",
message: "A request error occurred",
data: {
code: "EAI_AGAIN",
},
};
expect(isTransientNetworkError(error)).toBe(true);
});
it("returns true for AggregateError containing network errors", () => {
const networkError = Object.assign(new Error("timeout"), { code: "ETIMEDOUT" });
const error = new AggregateError([networkError], "Multiple errors");
@@ -109,6 +133,18 @@ describe("isTransientNetworkError", () => {
expect(isTransientNetworkError(error)).toBe(false);
});
it("returns false for Slack request errors without network indicators", () => {
const error = Object.assign(new Error("A request error occurred"), {
code: "slack_webapi_request_error",
});
expect(isTransientNetworkError(error)).toBe(false);
});
it("returns false for non-transient undici codes that only appear in message text", () => {
const error = new Error("Request failed with UND_ERR_INVALID_ARG");
expect(isTransientNetworkError(error)).toBe(false);
});
it.each([null, undefined, "string error", 42, { message: "plain object" }])(
"returns false for non-network input %#",
(value) => {

View File

@@ -35,6 +35,25 @@ const TRANSIENT_NETWORK_CODES = new Set([
"UND_ERR_BODY_TIMEOUT",
]);
const TRANSIENT_NETWORK_ERROR_NAMES = new Set([
"AbortError",
"ConnectTimeoutError",
"HeadersTimeoutError",
"BodyTimeoutError",
"TimeoutError",
]);
const TRANSIENT_NETWORK_MESSAGE_CODE_RE =
/\b(ECONNRESET|ECONNREFUSED|ENOTFOUND|ETIMEDOUT|ESOCKETTIMEDOUT|ECONNABORTED|EPIPE|EHOSTUNREACH|ENETUNREACH|EAI_AGAIN|UND_ERR_CONNECT_TIMEOUT|UND_ERR_DNS_RESOLVE_FAILED|UND_ERR_CONNECT|UND_ERR_SOCKET|UND_ERR_HEADERS_TIMEOUT|UND_ERR_BODY_TIMEOUT)\b/i;
const TRANSIENT_NETWORK_MESSAGE_SNIPPETS = [
"getaddrinfo",
"socket hang up",
"network error",
"network is unreachable",
"temporary failure in name resolution",
];
function getErrorCause(err: unknown): unknown {
if (!err || typeof err !== "object") {
return undefined;
@@ -42,6 +61,32 @@ function getErrorCause(err: unknown): unknown {
return (err as { cause?: unknown }).cause;
}
function getErrorName(err: unknown): string {
if (!err || typeof err !== "object") {
return "";
}
const name = (err as { name?: unknown }).name;
return typeof name === "string" ? name : "";
}
function extractErrorCodeOrErrno(err: unknown): string | undefined {
const code = extractErrorCode(err);
if (code) {
return code.trim().toUpperCase();
}
if (!err || typeof err !== "object") {
return undefined;
}
const errno = (err as { errno?: unknown }).errno;
if (typeof errno === "string" && errno.trim()) {
return errno.trim().toUpperCase();
}
if (typeof errno === "number" && Number.isFinite(errno)) {
return String(errno);
}
return undefined;
}
function extractErrorCodeWithCause(err: unknown): string | undefined {
const direct = extractErrorCode(err);
if (direct) {
@@ -50,6 +95,44 @@ function extractErrorCodeWithCause(err: unknown): string | undefined {
return extractErrorCode(getErrorCause(err));
}
function collectErrorCandidates(err: unknown): unknown[] {
const queue: unknown[] = [err];
const seen = new Set<unknown>();
const candidates: unknown[] = [];
while (queue.length > 0) {
const current = queue.shift();
if (current == null || seen.has(current)) {
continue;
}
seen.add(current);
candidates.push(current);
if (!current || typeof current !== "object") {
continue;
}
const maybeNested: Array<unknown> = [
(current as { cause?: unknown }).cause,
(current as { reason?: unknown }).reason,
(current as { original?: unknown }).original,
(current as { error?: unknown }).error,
(current as { data?: unknown }).data,
];
const errors = (current as { errors?: unknown }).errors;
if (Array.isArray(errors)) {
maybeNested.push(...errors);
}
for (const nested of maybeNested) {
if (nested != null && !seen.has(nested)) {
queue.push(nested);
}
}
}
return candidates;
}
/**
* Checks if an error is an AbortError.
* These are typically intentional cancellations (e.g., during shutdown) and shouldn't crash.
@@ -88,28 +171,38 @@ export function isTransientNetworkError(err: unknown): boolean {
if (!err) {
return false;
}
for (const candidate of collectErrorCandidates(err)) {
const code = extractErrorCodeOrErrno(candidate);
if (code && TRANSIENT_NETWORK_CODES.has(code)) {
return true;
}
const code = extractErrorCodeWithCause(err);
if (code && TRANSIENT_NETWORK_CODES.has(code)) {
return true;
}
const name = getErrorName(candidate);
if (name && TRANSIENT_NETWORK_ERROR_NAMES.has(name)) {
return true;
}
// "fetch failed" TypeError from undici (Node's native fetch).
// Treat as transient regardless of nested cause code because causes vary
// across runtimes and can be unclassified even for real network faults.
if (err instanceof TypeError && err.message === "fetch failed") {
return true;
}
if (candidate instanceof TypeError && candidate.message === "fetch failed") {
return true;
}
// Check the cause chain recursively
const cause = getErrorCause(err);
if (cause && cause !== err) {
return isTransientNetworkError(cause);
}
// AggregateError may wrap multiple causes
if (err instanceof AggregateError && err.errors?.length) {
return err.errors.some((e) => isTransientNetworkError(e));
if (!candidate || typeof candidate !== "object") {
continue;
}
const rawMessage = (candidate as { message?: unknown }).message;
const message = typeof rawMessage === "string" ? rawMessage.toLowerCase().trim() : "";
if (!message) {
continue;
}
if (TRANSIENT_NETWORK_MESSAGE_CODE_RE.test(message)) {
return true;
}
if (message === "fetch failed") {
return true;
}
if (TRANSIENT_NETWORK_MESSAGE_SNIPPETS.some((snippet) => message.includes(snippet))) {
return true;
}
}
return false;