fix(agents): recognize connection errors as retryable timeout failures (#31697)

* fix(agents): recognize connection errors as retryable timeout failures

## Problem

When a model endpoint becomes unreachable (e.g., local proxy down,
relay server offline), the failover system fails to switch to the
next candidate model. Errors like "Connection error." are not
classified as retryable, causing the session to hang on a broken
endpoint instead of falling back to healthy alternatives.

## Root Cause

Connection/network errors are not recognized by the current failover
classifier:
- Text patterns like "Connection error.", "fetch failed", "network error"
- Error codes like ECONNREFUSED, ENOTFOUND, EAI_AGAIN (in message text)

While `failover-error.ts` handles these as error codes (err.code),
it misses them when they appear as plain text in error messages.

## Solution

Extend timeout error patterns to include connection/network failures:

**In `errors.ts` (ERROR_PATTERNS.timeout):**
- Text: "connection error", "network error", "fetch failed", etc.
- Regex: /\beconn(?:refused|reset|aborted)\b/i, /\benotfound\b/i, /\beai_again\b/i

**In `failover-error.ts` (TIMEOUT_HINT_RE):**
- Same patterns for non-assistant error paths

## Testing

Added test cases covering:
- "Connection error."
- "fetch failed"
- "network error: ECONNREFUSED"
- "ENOTFOUND" / "EAI_AGAIN" in message text

## Impact

- **Compatibility:** High - only expands retryable error detection
- **Behavior:** Connection failures now trigger automatic fallback
- **Risk:** Low - changes are additive and well-tested

* style: fix code formatting for test file
This commit is contained in:
AI南柯(KingMo)
2026-03-03 10:37:23 +08:00
committed by GitHub
parent 5e1a2ea019
commit 30ab9b2068
4 changed files with 33 additions and 1 deletions

View File

@@ -48,6 +48,22 @@ describe("failover-error", () => {
expect(resolveFailoverReasonFromError({ message: "reason: error" })).toBe("timeout");
});
it("infers timeout from connection/network error messages", () => {
expect(resolveFailoverReasonFromError({ message: "Connection error." })).toBe("timeout");
expect(resolveFailoverReasonFromError({ message: "fetch failed" })).toBe("timeout");
expect(resolveFailoverReasonFromError({ message: "Network error: ECONNREFUSED" })).toBe(
"timeout",
);
expect(
resolveFailoverReasonFromError({
message: "dial tcp: lookup api.example.com: no such host (ENOTFOUND)",
}),
).toBe("timeout");
expect(resolveFailoverReasonFromError({ message: "temporary dns failure EAI_AGAIN" })).toBe(
"timeout",
);
});
it("treats AbortError reason=abort as timeout", () => {
const err = Object.assign(new Error("aborted"), {
name: "AbortError",

View File

@@ -6,7 +6,7 @@ import {
} from "./pi-embedded-helpers.js";
const TIMEOUT_HINT_RE =
/timeout|timed out|deadline exceeded|context deadline exceeded|stop reason:\s*(?:abort|error)|reason:\s*(?:abort|error)|unhandled stop reason:\s*(?:abort|error)/i;
/timeout|timed out|deadline exceeded|context deadline exceeded|connection error|network error|network request failed|fetch failed|socket hang up|econnrefused|econnreset|econnaborted|enotfound|eai_again|stop reason:\s*(?:abort|error)|reason:\s*(?:abort|error)|unhandled stop reason:\s*(?:abort|error)/i;
const ABORT_TIMEOUT_RE = /request was aborted|request aborted/i;
export class FailoverError extends Error {

View File

@@ -415,6 +415,7 @@ describe("isFailoverErrorMessage", () => {
"429 rate limit exceeded",
"Your credit balance is too low",
"request timed out",
"Connection error.",
"invalid request format",
];
for (const sample of samples) {
@@ -494,6 +495,13 @@ describe("classifyFailoverReason", () => {
expect(classifyFailoverReason("credit balance too low")).toBe("billing");
expect(classifyFailoverReason("deadline exceeded")).toBe("timeout");
expect(classifyFailoverReason("request ended without sending any chunks")).toBe("timeout");
expect(classifyFailoverReason("Connection error.")).toBe("timeout");
expect(classifyFailoverReason("fetch failed")).toBe("timeout");
expect(classifyFailoverReason("network error: ECONNREFUSED")).toBe("timeout");
expect(
classifyFailoverReason("dial tcp: lookup api.example.com: no such host (ENOTFOUND)"),
).toBe("timeout");
expect(classifyFailoverReason("temporary dns failure EAI_AGAIN")).toBe("timeout");
expect(
classifyFailoverReason(
"521 <!DOCTYPE html><html><head><title>Web server is down</title></head><body>Cloudflare</body></html>",

View File

@@ -640,6 +640,14 @@ const ERROR_PATTERNS = {
"timed out",
"deadline exceeded",
"context deadline exceeded",
"connection error",
"network error",
"network request failed",
"fetch failed",
"socket hang up",
/\beconn(?:refused|reset|aborted)\b/i,
/\benotfound\b/i,
/\beai_again\b/i,
/without sending (?:any )?chunks?/i,
/\bstop reason:\s*(?:abort|error)\b/i,
/\breason:\s*(?:abort|error)\b/i,