From 2af3415fac67553d8846fe63a2198c22d4ba76f2 Mon Sep 17 00:00:00 2001
From: Protocol Zero <257158451+Protocol-zero-0@users.noreply.github.com>
Date: Fri, 20 Feb 2026 04:45:09 +0800
Subject: [PATCH] fix: treat HTTP 503 as failover-eligible for LLM provider
 errors (#21086)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: treat HTTP 503 as failover-eligible for LLM provider errors

When LLM SDKs wrap 503 responses, the leading "503" prefix is lost
(e.g. Google Gemini returns "high demand" / "UNAVAILABLE" without a
numeric prefix). The existing isTransientHttpError only matches
messages starting with "503 ...", so these wrapped errors silently
skip failover — no profile rotation, no model fallback.

This patch closes that gap:

- resolveFailoverReasonFromError: map HTTP status 503 → rate_limit
  (covers structured error objects with a status field)
- ERROR_PATTERNS.overloaded: add /\b503\b/, "service unavailable",
  "high demand" (covers message-only classification when the leading
  status prefix is absent)

Existing isTransientHttpError behavior is unchanged; these additions
are complementary and only fire for errors that previously fell
through unclassified.

* fix: address review feedback — drop /\b503\b/ pattern, add test coverage

- Remove `/\b503\b/` from ERROR_PATTERNS.overloaded to resolve the
  semantic inconsistency noted by reviewers: `isTransientHttpError`
  already handles messages prefixed with "503" (→ "timeout"), so a
  redundant overloaded pattern would classify the same class of errors
  differently depending on message formatting.

- Keep "service unavailable" and "high demand" patterns — these are the
  real gap-fillers for SDK-rewritten messages that lack a numeric prefix.

- Add test case for JSON-wrapped 503 error body containing "overloaded"
  to strengthen coverage.

* fix: unify 503 classification — status 503 → timeout (consistent with isTransientHttpError)

resolveFailoverReasonFromError previously mapped status 503 → "rate_limit",
while the string-based isTransientHttpError mapped "503 ..." → "timeout".

Align both paths: structured {status: 503} now also returns "timeout",
matching the existing transient-error convention. Both reasons are
failover-eligible, so runtime behavior is unchanged.

---------

Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
---
 src/agents/failover-error.e2e.test.ts               |  1 +
 src/agents/failover-error.ts                        |  3 +++
 ...bedded-helpers.isbillingerrormessage.e2e.test.ts | 13 +++++++++++++
 src/agents/pi-embedded-helpers/errors.ts            |  7 ++++++-
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/agents/failover-error.e2e.test.ts b/src/agents/failover-error.e2e.test.ts
index 5fb9d06e602..ab31855cbb5 100644
--- a/src/agents/failover-error.e2e.test.ts
+++ b/src/agents/failover-error.e2e.test.ts
@@ -13,6 +13,7 @@ describe("failover-error", () => {
     expect(resolveFailoverReasonFromError({ status: 403 })).toBe("auth");
     expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout");
     expect(resolveFailoverReasonFromError({ status: 400 })).toBe("format");
+    expect(resolveFailoverReasonFromError({ status: 503 })).toBe("timeout");
   });
 
   it("infers format errors from error messages", () => {
diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts
index 6592cfc7f73..d2ec6c35c52 100644
--- a/src/agents/failover-error.ts
+++ b/src/agents/failover-error.ts
@@ -161,6 +161,9 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n
   if (status === 408) {
     return "timeout";
   }
+  if (status === 503) {
+    return "timeout";
+  }
   if (status === 400) {
     return "format";
   }
diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.e2e.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.e2e.test.ts
index 931a1bbe342..c62aac873b6 100644
--- a/src/agents/pi-embedded-helpers.isbillingerrormessage.e2e.test.ts
+++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.e2e.test.ts
@@ -348,4 +348,17 @@ describe("classifyFailoverReason", () => {
       "rate_limit",
     );
   });
+  it("classifies provider high-demand / service-unavailable messages as rate_limit", () => {
+    expect(
+      classifyFailoverReason(
+        "This model is currently experiencing high demand. Please try again later.",
+      ),
+    ).toBe("rate_limit");
+    expect(classifyFailoverReason("LLM error: service unavailable")).toBe("rate_limit");
+    expect(
+      classifyFailoverReason(
+        '{"error":{"code":503,"message":"The model is overloaded. Please try later","status":"UNAVAILABLE"}}',
+      ),
+    ).toBe("rate_limit");
+  });
 });
diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts
index 5233eb9c421..088707eef56 100644
--- a/src/agents/pi-embedded-helpers/errors.ts
+++ b/src/agents/pi-embedded-helpers/errors.ts
@@ -583,7 +583,12 @@ const ERROR_PATTERNS = {
     "resource_exhausted",
     "usage limit",
   ],
-  overloaded: [/overloaded_error|"type"\s*:\s*"overloaded_error"/i, "overloaded"],
+  overloaded: [
+    /overloaded_error|"type"\s*:\s*"overloaded_error"/i,
+    "overloaded",
+    "service unavailable",
+    "high demand",
+  ],
   timeout: [
     "timeout",
     "timed out",