mirror of
https://github.com/moltbot/moltbot.git
synced 2026-03-08 06:54:24 +00:00
fix(agents): recognize connection errors as retryable timeout failures (#31697)
* fix(agents): recognize connection errors as retryable timeout failures ## Problem When a model endpoint becomes unreachable (e.g., local proxy down, relay server offline), the failover system fails to switch to the next candidate model. Errors like "Connection error." are not classified as retryable, causing the session to hang on a broken endpoint instead of falling back to healthy alternatives. ## Root Cause Connection/network errors are not recognized by the current failover classifier: - Text patterns like "Connection error.", "fetch failed", "network error" - Error codes like ECONNREFUSED, ENOTFOUND, EAI_AGAIN (in message text) While `failover-error.ts` handles these as error codes (err.code), it misses them when they appear as plain text in error messages. ## Solution Extend timeout error patterns to include connection/network failures: **In `errors.ts` (ERROR_PATTERNS.timeout):** - Text: "connection error", "network error", "fetch failed", etc. - Regex: /\beconn(?:refused|reset|aborted)\b/i, /\benotfound\b/i, /\beai_again\b/i **In `failover-error.ts` (TIMEOUT_HINT_RE):** - Same patterns for non-assistant error paths ## Testing Added test cases covering: - "Connection error." - "fetch failed" - "network error: ECONNREFUSED" - "ENOTFOUND" / "EAI_AGAIN" in message text ## Impact - **Compatibility:** High - only expands retryable error detection - **Behavior:** Connection failures now trigger automatic fallback - **Risk:** Low - changes are additive and well-tested * style: fix code formatting for test file
This commit is contained in:
@@ -48,6 +48,22 @@ describe("failover-error", () => {
|
||||
expect(resolveFailoverReasonFromError({ message: "reason: error" })).toBe("timeout");
|
||||
});
|
||||
|
||||
it("infers timeout from connection/network error messages", () => {
|
||||
expect(resolveFailoverReasonFromError({ message: "Connection error." })).toBe("timeout");
|
||||
expect(resolveFailoverReasonFromError({ message: "fetch failed" })).toBe("timeout");
|
||||
expect(resolveFailoverReasonFromError({ message: "Network error: ECONNREFUSED" })).toBe(
|
||||
"timeout",
|
||||
);
|
||||
expect(
|
||||
resolveFailoverReasonFromError({
|
||||
message: "dial tcp: lookup api.example.com: no such host (ENOTFOUND)",
|
||||
}),
|
||||
).toBe("timeout");
|
||||
expect(resolveFailoverReasonFromError({ message: "temporary dns failure EAI_AGAIN" })).toBe(
|
||||
"timeout",
|
||||
);
|
||||
});
|
||||
|
||||
it("treats AbortError reason=abort as timeout", () => {
|
||||
const err = Object.assign(new Error("aborted"), {
|
||||
name: "AbortError",
|
||||
|
||||
@@ -6,7 +6,7 @@ import {
|
||||
} from "./pi-embedded-helpers.js";
|
||||
|
||||
const TIMEOUT_HINT_RE =
|
||||
/timeout|timed out|deadline exceeded|context deadline exceeded|stop reason:\s*(?:abort|error)|reason:\s*(?:abort|error)|unhandled stop reason:\s*(?:abort|error)/i;
|
||||
/timeout|timed out|deadline exceeded|context deadline exceeded|connection error|network error|network request failed|fetch failed|socket hang up|econnrefused|econnreset|econnaborted|enotfound|eai_again|stop reason:\s*(?:abort|error)|reason:\s*(?:abort|error)|unhandled stop reason:\s*(?:abort|error)/i;
|
||||
const ABORT_TIMEOUT_RE = /request was aborted|request aborted/i;
|
||||
|
||||
export class FailoverError extends Error {
|
||||
|
||||
@@ -415,6 +415,7 @@ describe("isFailoverErrorMessage", () => {
|
||||
"429 rate limit exceeded",
|
||||
"Your credit balance is too low",
|
||||
"request timed out",
|
||||
"Connection error.",
|
||||
"invalid request format",
|
||||
];
|
||||
for (const sample of samples) {
|
||||
@@ -494,6 +495,13 @@ describe("classifyFailoverReason", () => {
|
||||
expect(classifyFailoverReason("credit balance too low")).toBe("billing");
|
||||
expect(classifyFailoverReason("deadline exceeded")).toBe("timeout");
|
||||
expect(classifyFailoverReason("request ended without sending any chunks")).toBe("timeout");
|
||||
expect(classifyFailoverReason("Connection error.")).toBe("timeout");
|
||||
expect(classifyFailoverReason("fetch failed")).toBe("timeout");
|
||||
expect(classifyFailoverReason("network error: ECONNREFUSED")).toBe("timeout");
|
||||
expect(
|
||||
classifyFailoverReason("dial tcp: lookup api.example.com: no such host (ENOTFOUND)"),
|
||||
).toBe("timeout");
|
||||
expect(classifyFailoverReason("temporary dns failure EAI_AGAIN")).toBe("timeout");
|
||||
expect(
|
||||
classifyFailoverReason(
|
||||
"521 <!DOCTYPE html><html><head><title>Web server is down</title></head><body>Cloudflare</body></html>",
|
||||
|
||||
@@ -640,6 +640,14 @@ const ERROR_PATTERNS = {
|
||||
"timed out",
|
||||
"deadline exceeded",
|
||||
"context deadline exceeded",
|
||||
"connection error",
|
||||
"network error",
|
||||
"network request failed",
|
||||
"fetch failed",
|
||||
"socket hang up",
|
||||
/\beconn(?:refused|reset|aborted)\b/i,
|
||||
/\benotfound\b/i,
|
||||
/\beai_again\b/i,
|
||||
/without sending (?:any )?chunks?/i,
|
||||
/\bstop reason:\s*(?:abort|error)\b/i,
|
||||
/\breason:\s*(?:abort|error)\b/i,
|
||||
|
||||
Reference in New Issue
Block a user