From 2af3415fac67553d8846fe63a2198c22d4ba76f2 Mon Sep 17 00:00:00 2001 From: Protocol Zero <257158451+Protocol-zero-0@users.noreply.github.com> Date: Fri, 20 Feb 2026 04:45:09 +0800 Subject: [PATCH] fix: treat HTTP 503 as failover-eligible for LLM provider errors (#21086) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: treat HTTP 503 as failover-eligible for LLM provider errors When LLM SDKs wrap 503 responses, the leading "503" prefix is lost (e.g. Google Gemini returns "high demand" / "UNAVAILABLE" without a numeric prefix). The existing isTransientHttpError only matches messages starting with "503 ...", so these wrapped errors silently skip failover — no profile rotation, no model fallback. This patch closes that gap: - resolveFailoverReasonFromError: map HTTP status 503 → rate_limit (covers structured error objects with a status field) - ERROR_PATTERNS.overloaded: add /\b503\b/, "service unavailable", "high demand" (covers message-only classification when the leading status prefix is absent) Existing isTransientHttpError behavior is unchanged; these additions are complementary and only fire for errors that previously fell through unclassified. * fix: address review feedback — drop /\b503\b/ pattern, add test coverage - Remove `/\b503\b/` from ERROR_PATTERNS.overloaded to resolve the semantic inconsistency noted by reviewers: `isTransientHttpError` already handles messages prefixed with "503" (→ "timeout"), so a redundant overloaded pattern would classify the same class of errors differently depending on message formatting. - Keep "service unavailable" and "high demand" patterns — these are the real gap-fillers for SDK-rewritten messages that lack a numeric prefix. - Add test case for JSON-wrapped 503 error body containing "overloaded" to strengthen coverage. * fix: unify 503 classification — status 503 → timeout (consistent with isTransientHttpError) resolveFailoverReasonFromError previously mapped status 503 → "rate_limit", while the string-based isTransientHttpError mapped "503 ..." → "timeout". Align both paths: structured {status: 503} now also returns "timeout", matching the existing transient-error convention. Both reasons are failover-eligible, so runtime behavior is unchanged. --------- Co-authored-by: Vincent Koc --- src/agents/failover-error.e2e.test.ts | 1 + src/agents/failover-error.ts | 3 +++ ...bedded-helpers.isbillingerrormessage.e2e.test.ts | 13 +++++++++++++ src/agents/pi-embedded-helpers/errors.ts | 7 ++++++- 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/agents/failover-error.e2e.test.ts b/src/agents/failover-error.e2e.test.ts index 5fb9d06e602..ab31855cbb5 100644 --- a/src/agents/failover-error.e2e.test.ts +++ b/src/agents/failover-error.e2e.test.ts @@ -13,6 +13,7 @@ describe("failover-error", () => { expect(resolveFailoverReasonFromError({ status: 403 })).toBe("auth"); expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout"); expect(resolveFailoverReasonFromError({ status: 400 })).toBe("format"); + expect(resolveFailoverReasonFromError({ status: 503 })).toBe("timeout"); }); it("infers format errors from error messages", () => { diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts index 6592cfc7f73..d2ec6c35c52 100644 --- a/src/agents/failover-error.ts +++ b/src/agents/failover-error.ts @@ -161,6 +161,9 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n if (status === 408) { return "timeout"; } + if (status === 503) { + return "timeout"; + } if (status === 400) { return "format"; } diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.e2e.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.e2e.test.ts index 931a1bbe342..c62aac873b6 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.e2e.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.e2e.test.ts @@ -348,4 +348,17 @@ describe("classifyFailoverReason", () => { "rate_limit", ); }); + it("classifies provider high-demand / service-unavailable messages as rate_limit", () => { + expect( + classifyFailoverReason( + "This model is currently experiencing high demand. Please try again later.", + ), + ).toBe("rate_limit"); + expect(classifyFailoverReason("LLM error: service unavailable")).toBe("rate_limit"); + expect( + classifyFailoverReason( + '{"error":{"code":503,"message":"The model is overloaded. Please try later","status":"UNAVAILABLE"}}', + ), + ).toBe("rate_limit"); + }); }); diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 5233eb9c421..088707eef56 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -583,7 +583,12 @@ const ERROR_PATTERNS = { "resource_exhausted", "usage limit", ], - overloaded: [/overloaded_error|"type"\s*:\s*"overloaded_error"/i, "overloaded"], + overloaded: [ + /overloaded_error|"type"\s*:\s*"overloaded_error"/i, + "overloaded", + "service unavailable", + "high demand", + ], timeout: [ "timeout", "timed out",