fix(whatsapp): retry opening-phase 428 closes

Co-authored-by: dataCenter430 <titan032000@gmail.com>
This commit is contained in:
Peter Steinberger
2026-05-11 13:54:18 +01:00
parent 1f8d29e532
commit ac7e1c36eb
3 changed files with 123 additions and 1 deletions

View File

@@ -66,6 +66,7 @@ Docs: https://docs.openclaw.ai
- Gateway: scope `sessions.resolve` sessionId and label store loads to the requested agent so large unrelated agent stores are not parsed for scoped lookups. Fixes #51264. (#79474) Thanks @samzong.
- Gateway: share serialized streaming event envelopes across eligible WebSocket and node subscribers while preserving per-client sequence numbers. (#80299) Thanks @samzong.
- Browser: report Chrome MCP existing-session page readiness in browser status without letting status probes exceed the client timeout. Fixes #80268. (#80280) Thanks @ai-hpc.
- WhatsApp: route opening-phase Baileys 428 connectionClosed through the WhatsApp reconnect policy and keep post-open 428 closes retryable, so transient setup socket closes retry with WhatsApp diagnostics instead of escaping as a bare `channel exited` error. Fixes #75736; mitigates #77443. Thanks @dataCenter430.
- Providers/self-hosted: read model-scoped llama.cpp runtime context from `/props.default_generation_settings.n_ctx` while keeping top-level `n_ctx` as a fallback, so session budgeting reflects the loaded context window. Fixes #73664. (#74057) Thanks @brokemac79.
- Memory: reject symlinked directory components in configured extra memory paths before reading Markdown files. (#80331) Thanks @samzong.
- Sessions/transcripts: replace whole-file `readFile` scans with shared streaming helpers (`streamSessionTranscriptLines` and `streamSessionTranscriptLinesReverse`) for idempotency lookup, latest/tail assistant text reads, delivery-mirror dedupe, and compaction fork loading, so long-running sessions no longer materialize the full transcript in memory. Forward scans use `readline` over a bounded `createReadStream`; reverse scans read bounded chunks from the file end and decode complete JSONL lines newest-first without a fixed tail cap. Synthetic 200 MiB transcript: peak RSS delta drops from +252 MiB to +27 MiB while preserving malformed-line tolerance and idempotency-key return semantics. Fixes #54296. Thanks @jack-stormentswe.

View File

@@ -169,6 +169,75 @@ describe("web auto-reply connection", () => {
}
});
it("retries opening-phase Boom 428 through the reconnect policy", async () => {
const boom428 = {
output: {
statusCode: 428,
payload: { error: "Precondition Required", message: "Connection Terminated" },
},
};
const listenerFactory = vi.fn(async () => {
throw boom428;
});
const sleep = vi.fn(async () => {});
const { runtime, run } = startWebAutoReplyMonitor({
monitorWebChannelFn: monitorWebChannel as never,
listenerFactory,
sleep,
reconnect: { initialMs: 10, maxMs: 10, maxAttempts: 2, factor: 1.1 },
});
await run;
expect(listenerFactory).toHaveBeenCalledTimes(2);
expect(sleep).toHaveBeenCalled();
expectErrorContaining(runtime.error, "status 428");
expectErrorContaining(runtime.error, "Retry 1/2");
expectErrorContaining(runtime.error, "2/2 attempts");
});
it("keeps post-open Baileys 428 on the reconnect path", async () => {
const sleep = vi.fn(async () => {});
const scripted = createScriptedWebListenerFactory();
const { controller, run } = startWebAutoReplyMonitor({
monitorWebChannelFn: monitorWebChannel as never,
listenerFactory: scripted.listenerFactory,
sleep,
reconnect: { initialMs: 10, maxMs: 10, maxAttempts: 3, factor: 1.1 },
});
await vi.waitFor(
() => {
expect(scripted.getListenerCount()).toBe(1);
},
{ timeout: 250, interval: 2 },
);
scripted.resolveClose(0, {
status: 428,
isLoggedOut: false,
error: "Connection Terminated",
});
await vi.waitFor(
() => {
expect(scripted.getListenerCount()).toBeGreaterThanOrEqual(2);
},
{ timeout: 250, interval: 2 },
);
controller.abort();
scripted.resolveClose(scripted.getListenerCount() - 1, {
status: 499,
isLoggedOut: false,
error: "aborted",
});
await run;
expect(scripted.getListenerCount()).toBeGreaterThanOrEqual(2);
expect(sleep).toHaveBeenCalled();
});
it("treats status 440 as non-retryable and stops without retrying", async () => {
const sleep = vi.fn(async () => {});
const scripted = createScriptedWebListenerFactory();

View File

@@ -29,7 +29,13 @@ import {
resolveReconnectPolicy,
sleepWithAbort,
} from "../reconnect.js";
import { formatError, getWebAuthAgeMs, logoutWeb, readWebSelfId } from "../session.js";
import {
formatError,
getStatusCode,
getWebAuthAgeMs,
logoutWeb,
readWebSelfId,
} from "../session.js";
import { resolveWhatsAppSocketTiming } from "../socket-timing.js";
import { getRuntimeConfig, getRuntimeConfigSourceSnapshot } from "./config.runtime.js";
import { whatsappHeartbeatLog, whatsappLog } from "./loggers.js";
@@ -43,6 +49,8 @@ import { isLikelyWhatsAppCryptoError } from "./util.js";
function isNonRetryableWebCloseStatus(statusCode: unknown): boolean {
// WhatsApp 440 = session conflict ("Unknown Stream Errored (conflict)").
// This is persistent until the operator resolves the conflicting session.
// Baileys 428 = DisconnectReason.connectionClosed, a generic WebSocket close
// that is often transient and must stay on the reconnect path.
return statusCode === 440;
}
@@ -395,6 +403,50 @@ export async function monitorWebChannel(
},
});
} catch (error) {
if (getStatusCode(error) === 428) {
const retryDecision = controller.consumeReconnectAttempt();
statusController.noteReconnectAttempts(retryDecision.reconnectAttempts);
statusController.noteClose({
statusCode: 428,
error: formatError(error),
reconnectAttempts: retryDecision.reconnectAttempts,
healthState: retryDecision.healthState,
});
if (retryDecision.action === "stop") {
reconnectLogger.warn(
{
connectionId,
status: 428,
reconnectAttempts: retryDecision.reconnectAttempts,
maxAttempts: reconnectPolicy.maxAttempts,
},
"web reconnect: 428 during opening; max attempts reached",
);
runtime.error(
`WhatsApp Web connection closed during setup (status 428) after ${retryDecision.reconnectAttempts}/${reconnectPolicy.maxAttempts} attempts. Relink with \`${formatCliCommand("openclaw channels login --channel whatsapp")}\` if the issue persists.`,
);
await controller.shutdown();
break;
}
reconnectLogger.info(
{
connectionId,
status: 428,
reconnectAttempts: retryDecision.reconnectAttempts,
delayMs: retryDecision.delayMs,
},
"web reconnect: 428 during opening; retrying",
);
runtime.error(
`WhatsApp Web connection closed during setup (status 428). Retry ${retryDecision.reconnectAttempts}/${reconnectPolicy.maxAttempts || "∞"} in ${formatDurationPrecise(retryDecision.delayMs ?? 0)}.`,
);
try {
await controller.waitBeforeRetry(retryDecision.delayMs ?? 0);
} catch {
break;
}
continue;
}
if (!isRetryableAuthUnstableError(error)) {
throw error;
}