mirror of
https://github.com/moltbot/moltbot.git
synced 2026-05-06 23:55:12 +00:00
fix(discord): retry transient outbound failures
This commit is contained in:
@@ -40,6 +40,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Discord/doctor: migrate unsupported per-channel `agentId` entries under guild channel config into top-level `bindings[]` routes, so `openclaw doctor --fix` preserves the intended agent route instead of stripping it as an unknown key. Fixes #62455. Thanks @lobster-biscuit.
|
||||
- Discord/DMs: set inbound direct-message `ctx.To` to the semantic `user:<id>` target while keeping delivery routed through the DM channel, so mirror and recovery paths do not treat DMs as channel conversations. Fixes #68126. Thanks @illuminate0623.
|
||||
- Discord/DMs: keep no-guild inbound messages on direct-message routing when Discord channel lookup is temporarily unavailable, preventing degraded DMs from forking into channel sessions. Fixes #59817. Thanks @DooPeePey.
|
||||
- Discord: retry outbound API calls on HTTP 5xx, request-timeout, and transient transport failures instead of only Discord rate limits, reducing dropped cron and agent replies during short Discord or network outages. Fixes #52396. Thanks @sunshineo.
|
||||
- Gateway/config: log config health-state write failures instead of silently hiding config observe-recovery write errors. Thanks @sallyom.
|
||||
- Diagnostics: reset stuck-session timers on reply, tool, status, block, and ACP progress events, and back off repeated `session.stuck` diagnostics while a session remains unchanged. Supersedes #72010. Thanks @rubencu.
|
||||
|
||||
|
||||
@@ -37,7 +37,9 @@ title: "Retry policy"
|
||||
|
||||
### Discord
|
||||
|
||||
- Retries only on rate-limit errors (HTTP 429).
|
||||
- Retries on rate-limit errors (HTTP 429), request timeouts, HTTP 5xx responses,
|
||||
and transient transport failures such as DNS lookup failures, connection
|
||||
resets, socket closes, and fetch failures.
|
||||
- Uses Discord `retry_after` when available, otherwise exponential backoff.
|
||||
|
||||
### Telegram
|
||||
|
||||
@@ -5,6 +5,7 @@ import {
|
||||
type RetryConfig,
|
||||
} from "openclaw/plugin-sdk/retry-runtime";
|
||||
import { resolveDiscordAccount } from "./accounts.js";
|
||||
import { DiscordError } from "./internal/discord.js";
|
||||
|
||||
const DISCORD_DELIVERY_RETRY_DEFAULTS = {
|
||||
attempts: 3,
|
||||
@@ -13,7 +14,10 @@ const DISCORD_DELIVERY_RETRY_DEFAULTS = {
|
||||
jitter: 0,
|
||||
} satisfies Required<RetryConfig>;
|
||||
|
||||
function isRetryableDiscordDeliveryError(err: unknown): boolean {
|
||||
export function isRetryableDiscordDeliveryError(err: unknown): boolean {
|
||||
if (err instanceof DiscordError) {
|
||||
return false;
|
||||
}
|
||||
const status = (err as { status?: number }).status ?? (err as { statusCode?: number }).statusCode;
|
||||
return status === 429 || (status !== undefined && status >= 500);
|
||||
}
|
||||
|
||||
83
extensions/discord/src/retry.test.ts
Normal file
83
extensions/discord/src/retry.test.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { isRetryableDiscordDeliveryError } from "./delivery-retry.js";
|
||||
import { DiscordError, RateLimitError } from "./internal/discord.js";
|
||||
import { createDiscordRetryRunner, isRetryableDiscordTransientError } from "./retry.js";
|
||||
|
||||
const ZERO_DELAY_RETRY = { attempts: 2, minDelayMs: 0, maxDelayMs: 0, jitter: 0 };
|
||||
|
||||
function createRateLimitError(retryAfter = 0): RateLimitError {
|
||||
const response = new Response(null, {
|
||||
status: 429,
|
||||
headers: {
|
||||
"X-RateLimit-Scope": "user",
|
||||
"X-RateLimit-Bucket": "bucket-1",
|
||||
},
|
||||
});
|
||||
const RateLimitErrorCtor = RateLimitError as unknown as new (
|
||||
response: Response,
|
||||
body: { message: string; retry_after: number; global: boolean },
|
||||
) => RateLimitError;
|
||||
return new RateLimitErrorCtor(response, {
|
||||
message: "rate limited",
|
||||
retry_after: retryAfter,
|
||||
global: false,
|
||||
});
|
||||
}
|
||||
|
||||
describe("isRetryableDiscordTransientError", () => {
|
||||
it.each([
|
||||
["rate limit", createRateLimitError()],
|
||||
["408 status", Object.assign(new Error("request timeout"), { status: 408 })],
|
||||
["502 status", Object.assign(new Error("bad gateway"), { status: 502 })],
|
||||
["503 statusCode", Object.assign(new Error("service unavailable"), { statusCode: 503 })],
|
||||
["fetch failed", new TypeError("fetch failed")],
|
||||
["ECONNRESET", Object.assign(new Error("socket hang up"), { code: "ECONNRESET" })],
|
||||
["ETIMEDOUT cause", new Error("request failed", { cause: { code: "ETIMEDOUT" } })],
|
||||
["abort", Object.assign(new Error("aborted"), { name: "AbortError" })],
|
||||
])("retries %s", (_name, err) => {
|
||||
expect(isRetryableDiscordTransientError(err)).toBe(true);
|
||||
});
|
||||
|
||||
it.each([
|
||||
["400 status", Object.assign(new Error("bad request"), { status: 400 })],
|
||||
["403 status", Object.assign(new Error("missing permissions"), { statusCode: 403 })],
|
||||
["unknown channel", new Error("Unknown Channel")],
|
||||
["plain string", "fetch failed"],
|
||||
])("does not retry %s", (_name, err) => {
|
||||
expect(isRetryableDiscordTransientError(err)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("createDiscordRetryRunner", () => {
|
||||
it("retries transient transport errors", async () => {
|
||||
const fn = vi.fn().mockRejectedValueOnce(new TypeError("fetch failed")).mockResolvedValue("ok");
|
||||
const runner = createDiscordRetryRunner({ retry: ZERO_DELAY_RETRY });
|
||||
|
||||
await expect(runner(fn, "send")).resolves.toBe("ok");
|
||||
expect(fn).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("stops after configured transient retry attempts", async () => {
|
||||
const fn = vi.fn().mockRejectedValue(new TypeError("fetch failed"));
|
||||
const runner = createDiscordRetryRunner({ retry: ZERO_DELAY_RETRY });
|
||||
|
||||
await expect(runner(fn, "send")).rejects.toThrow("fetch failed");
|
||||
expect(fn).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isRetryableDiscordDeliveryError", () => {
|
||||
it("retries status-coded errors from injected delivery dependencies", () => {
|
||||
expect(
|
||||
isRetryableDiscordDeliveryError(Object.assign(new Error("bad gateway"), { status: 502 })),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("does not retry Discord client errors after the request runner handled them", () => {
|
||||
const err = new DiscordError(new Response("upstream", { status: 502 }), {
|
||||
message: "Bad Gateway",
|
||||
});
|
||||
|
||||
expect(isRetryableDiscordDeliveryError(err)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -1,3 +1,9 @@
|
||||
import {
|
||||
collectErrorGraphCandidates,
|
||||
extractErrorCode,
|
||||
formatErrorMessage,
|
||||
readErrorName,
|
||||
} from "openclaw/plugin-sdk/error-runtime";
|
||||
import {
|
||||
createRateLimitRetryRunner,
|
||||
type RetryConfig,
|
||||
@@ -12,6 +18,71 @@ const DISCORD_RETRY_DEFAULTS = {
|
||||
jitter: 0.1,
|
||||
} satisfies RetryConfig;
|
||||
|
||||
const DISCORD_RETRYABLE_STATUS_CODES = new Set([408, 429]);
|
||||
const DISCORD_RETRYABLE_ERROR_CODES = new Set([
|
||||
"EAI_AGAIN",
|
||||
"ECONNREFUSED",
|
||||
"ECONNRESET",
|
||||
"ENETUNREACH",
|
||||
"ENOTFOUND",
|
||||
"EPIPE",
|
||||
"ETIMEDOUT",
|
||||
"UND_ERR_BODY_TIMEOUT",
|
||||
"UND_ERR_CONNECT_TIMEOUT",
|
||||
"UND_ERR_HEADERS_TIMEOUT",
|
||||
"UND_ERR_SOCKET",
|
||||
]);
|
||||
const DISCORD_TRANSIENT_MESSAGE_RE =
|
||||
/\b(?:bad gateway|fetch failed|network error|networkerror|service unavailable|socket hang up|temporarily unavailable|timed out|timeout)\b|connection (?:closed|reset|refused)/i;
|
||||
|
||||
function readDiscordErrorStatus(err: unknown): number | undefined {
|
||||
if (!err || typeof err !== "object") {
|
||||
return undefined;
|
||||
}
|
||||
const raw =
|
||||
"status" in err && err.status !== undefined
|
||||
? err.status
|
||||
: "statusCode" in err && err.statusCode !== undefined
|
||||
? err.statusCode
|
||||
: undefined;
|
||||
if (typeof raw === "number" && Number.isFinite(raw)) {
|
||||
return raw;
|
||||
}
|
||||
if (typeof raw === "string" && /^\d+$/.test(raw)) {
|
||||
return Number(raw);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function isRetryableDiscordTransientError(err: unknown): boolean {
|
||||
if (err instanceof RateLimitError) {
|
||||
return true;
|
||||
}
|
||||
for (const candidate of collectErrorGraphCandidates(err, (current) => [
|
||||
current.cause,
|
||||
current.error,
|
||||
])) {
|
||||
const status = readDiscordErrorStatus(candidate);
|
||||
if (status !== undefined && (DISCORD_RETRYABLE_STATUS_CODES.has(status) || status >= 500)) {
|
||||
return true;
|
||||
}
|
||||
const code = extractErrorCode(candidate);
|
||||
if (code && DISCORD_RETRYABLE_ERROR_CODES.has(code.toUpperCase())) {
|
||||
return true;
|
||||
}
|
||||
if (readErrorName(candidate) === "AbortError") {
|
||||
return true;
|
||||
}
|
||||
if (
|
||||
(candidate instanceof Error || (candidate !== null && typeof candidate === "object")) &&
|
||||
DISCORD_TRANSIENT_MESSAGE_RE.test(formatErrorMessage(candidate))
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export function createDiscordRetryRunner(params: {
|
||||
retry?: RetryConfig;
|
||||
configRetry?: RetryConfig;
|
||||
@@ -21,7 +92,7 @@ export function createDiscordRetryRunner(params: {
|
||||
...params,
|
||||
defaults: DISCORD_RETRY_DEFAULTS,
|
||||
logLabel: "discord",
|
||||
shouldRetry: (err) => err instanceof RateLimitError,
|
||||
shouldRetry: isRetryableDiscordTransientError,
|
||||
retryAfterMs: (err) => (err instanceof RateLimitError ? err.retryAfter * 1000 : undefined),
|
||||
});
|
||||
}
|
||||
|
||||
@@ -547,16 +547,33 @@ describe("retry rate limits", () => {
|
||||
expect(postMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("does not retry non-rate-limit errors", async () => {
|
||||
it("does not retry permanent non-rate-limit errors", async () => {
|
||||
const { rest, postMock } = makeDiscordRest();
|
||||
postMock.mockRejectedValueOnce(new Error("network error"));
|
||||
postMock.mockRejectedValueOnce(new Error("invalid request"));
|
||||
|
||||
await expect(
|
||||
sendMessageDiscord("channel:789", "hello", discordClientOpts(rest)),
|
||||
).rejects.toThrow("network error");
|
||||
).rejects.toThrow("invalid request");
|
||||
expect(postMock).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it("retries transient network errors", async () => {
|
||||
const { rest, postMock } = makeDiscordRest();
|
||||
postMock
|
||||
.mockRejectedValueOnce(new TypeError("fetch failed"))
|
||||
.mockResolvedValueOnce({ id: "msg1", channel_id: "789" });
|
||||
|
||||
const result = await sendMessageDiscord("channel:789", "hello", {
|
||||
cfg: DISCORD_TEST_CFG,
|
||||
rest,
|
||||
token: "t",
|
||||
retry: { attempts: 2, minDelayMs: 0, maxDelayMs: 0, jitter: 0 },
|
||||
});
|
||||
|
||||
expect(result).toEqual({ messageId: "msg1", channelId: "789" });
|
||||
expect(postMock).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("retries reactions on rate limits", async () => {
|
||||
const { rest, putMock } = makeDiscordRest();
|
||||
const rateLimitError = createMockRateLimitError(0);
|
||||
|
||||
Reference in New Issue
Block a user