diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts
index 2b40307217a..9100304533d 100644
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -59,6 +59,30 @@ describe("runWithModelFallback", () => {
expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
});
+ it("falls back on transient HTTP 5xx errors", async () => {
+ const cfg = makeCfg();
+ const run = vi
+ .fn()
+ .mockRejectedValueOnce(
+ new Error(
+ "521
Web server is downCloudflare",
+ ),
+ )
+ .mockResolvedValueOnce("ok");
+
+ const result = await runWithModelFallback({
+ cfg,
+ provider: "openai",
+ model: "gpt-4.1-mini",
+ run,
+ });
+
+ expect(result.result).toBe("ok");
+ expect(run).toHaveBeenCalledTimes(2);
+ expect(run.mock.calls[1]?.[0]).toBe("anthropic");
+ expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
+ });
+
it("falls back on 402 payment required", async () => {
const cfg = makeCfg();
const run = vi
diff --git a/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts b/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts
index 749a5241406..1b175e77b41 100644
--- a/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts
+++ b/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts
@@ -24,6 +24,11 @@ describe("classifyFailoverReason", () => {
expect(classifyFailoverReason("invalid request format")).toBe("format");
expect(classifyFailoverReason("credit balance too low")).toBe("billing");
expect(classifyFailoverReason("deadline exceeded")).toBe("timeout");
+ expect(
+ classifyFailoverReason(
+ "521 Web server is downCloudflare",
+ ),
+ ).toBe("timeout");
expect(classifyFailoverReason("string should match pattern")).toBe("format");
expect(classifyFailoverReason("bad request")).toBeNull();
expect(
diff --git a/src/agents/pi-embedded-helpers.formatrawassistanterrorforui.test.ts b/src/agents/pi-embedded-helpers.formatrawassistanterrorforui.test.ts
index 137bf8536e3..8fd0ed1aff8 100644
--- a/src/agents/pi-embedded-helpers.formatrawassistanterrorforui.test.ts
+++ b/src/agents/pi-embedded-helpers.formatrawassistanterrorforui.test.ts
@@ -22,4 +22,16 @@ describe("formatRawAssistantErrorForUi", () => {
"HTTP 500: Internal Server Error",
);
});
+
+ it("sanitizes HTML error pages into a clean unavailable message", () => {
+ const htmlError = `521
+
+ Web server is down | example.com | Cloudflare
+ Ray ID: abc123
+`;
+
+ expect(formatRawAssistantErrorForUi(htmlError)).toBe(
+ "The AI service is temporarily unavailable (HTTP 521). Please try again in a moment.",
+ );
+ });
});
diff --git a/src/agents/pi-embedded-helpers.iscloudflareorhtmlerrorpage.test.ts b/src/agents/pi-embedded-helpers.iscloudflareorhtmlerrorpage.test.ts
new file mode 100644
index 00000000000..ebdb22c6c5d
--- /dev/null
+++ b/src/agents/pi-embedded-helpers.iscloudflareorhtmlerrorpage.test.ts
@@ -0,0 +1,29 @@
+import { describe, expect, it } from "vitest";
+import { isCloudflareOrHtmlErrorPage } from "./pi-embedded-helpers.js";
+
+describe("isCloudflareOrHtmlErrorPage", () => {
+ it("detects Cloudflare 521 HTML pages", () => {
+ const htmlError = `521
+
+ Web server is down | example.com | Cloudflare
+ Web server is down
+`;
+
+ expect(isCloudflareOrHtmlErrorPage(htmlError)).toBe(true);
+ });
+
+ it("detects generic 5xx HTML pages", () => {
+ const htmlError = `503 Service Unavailabledown`;
+ expect(isCloudflareOrHtmlErrorPage(htmlError)).toBe(true);
+ });
+
+ it("does not flag non-HTML status lines", () => {
+ expect(isCloudflareOrHtmlErrorPage("500 Internal Server Error")).toBe(false);
+ expect(isCloudflareOrHtmlErrorPage("429 Too Many Requests")).toBe(false);
+ });
+
+ it("does not flag quoted HTML without a closing html tag", () => {
+ const plainTextWithHtmlPrefix = "500 upstream responded with partial HTML text";
+ expect(isCloudflareOrHtmlErrorPage(plainTextWithHtmlPrefix)).toBe(false);
+ });
+});
diff --git a/src/agents/pi-embedded-helpers.istransienthttperror.test.ts b/src/agents/pi-embedded-helpers.istransienthttperror.test.ts
new file mode 100644
index 00000000000..faaf4a20139
--- /dev/null
+++ b/src/agents/pi-embedded-helpers.istransienthttperror.test.ts
@@ -0,0 +1,18 @@
+import { describe, expect, it } from "vitest";
+import { isTransientHttpError } from "./pi-embedded-helpers.js";
+
+describe("isTransientHttpError", () => {
+ it("returns true for retryable 5xx status codes", () => {
+ expect(isTransientHttpError("500 Internal Server Error")).toBe(true);
+ expect(isTransientHttpError("502 Bad Gateway")).toBe(true);
+ expect(isTransientHttpError("503 Service Unavailable")).toBe(true);
+ expect(isTransientHttpError("521 ")).toBe(true);
+ expect(isTransientHttpError("529 Overloaded")).toBe(true);
+ });
+
+ it("returns false for non-retryable or non-http text", () => {
+ expect(isTransientHttpError("504 Gateway Timeout")).toBe(false);
+ expect(isTransientHttpError("429 Too Many Requests")).toBe(false);
+ expect(isTransientHttpError("network timeout")).toBe(false);
+ });
+});
diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts
index f8fb4f0ec5a..e468843aec6 100644
--- a/src/agents/pi-embedded-helpers.ts
+++ b/src/agents/pi-embedded-helpers.ts
@@ -17,6 +17,7 @@ export {
parseApiErrorInfo,
sanitizeUserFacingText,
isBillingErrorMessage,
+ isCloudflareOrHtmlErrorPage,
isCloudCodeAssistFormatError,
isCompactionFailureError,
isContextOverflowError,
@@ -29,6 +30,7 @@ export {
isRawApiErrorPayload,
isRateLimitAssistantError,
isRateLimitErrorMessage,
+ isTransientHttpError,
isTimeoutErrorMessage,
parseImageDimensionError,
parseImageSizeError,
diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts
index 4865833cd71..12461074fa6 100644
--- a/src/agents/pi-embedded-helpers/errors.ts
+++ b/src/agents/pi-embedded-helpers/errors.ts
@@ -78,6 +78,10 @@ const ERROR_PREFIX_RE =
const CONTEXT_OVERFLOW_ERROR_HEAD_RE =
/^(?:context overflow:|request_too_large\b|request size exceeds\b|request exceeds the maximum size\b|context length exceeded\b|maximum context length\b|prompt is too long\b|exceeds model context window\b)/i;
const HTTP_STATUS_PREFIX_RE = /^(?:http\s*)?(\d{3})\s+(.+)$/i;
+const HTTP_STATUS_CODE_PREFIX_RE = /^(?:http\s*)?(\d{3})(?:\s+([\s\S]+))?$/i;
+const HTML_ERROR_PREFIX_RE = /^\s*(?:/i.test(status.rest)
+ );
+}
+
+export function isTransientHttpError(raw: string): boolean {
+ const trimmed = raw.trim();
+ if (!trimmed) {
+ return false;
+ }
+ const status = extractLeadingHttpStatus(trimmed);
+ if (!status) {
+ return false;
+ }
+ return TRANSIENT_HTTP_ERROR_CODES.has(status.code);
+}
+
function stripFinalTagsFromText(text: string): string {
if (!text) {
return text;
@@ -133,6 +181,9 @@ function collapseConsecutiveDuplicateBlocks(text: string): string {
}
function isLikelyHttpErrorText(raw: string): boolean {
+ if (isCloudflareOrHtmlErrorPage(raw)) {
+ return true;
+ }
const match = raw.match(HTTP_STATUS_PREFIX_RE);
if (!match) {
return false;
@@ -311,6 +362,11 @@ export function formatRawAssistantErrorForUi(raw?: string): string {
return "LLM request failed with an unknown error.";
}
+ const leadingStatus = extractLeadingHttpStatus(trimmed);
+ if (leadingStatus && isCloudflareOrHtmlErrorPage(trimmed)) {
+ return `The AI service is temporarily unavailable (HTTP ${leadingStatus.code}). Please try again in a moment.`;
+ }
+
const httpMatch = trimmed.match(HTTP_STATUS_PREFIX_RE);
if (httpMatch) {
const rest = httpMatch[2].trim();
@@ -641,6 +697,10 @@ export function classifyFailoverReason(raw: string): FailoverReason | null {
if (isImageSizeError(raw)) {
return null;
}
+ if (isTransientHttpError(raw)) {
+ // Treat transient 5xx provider failures as retryable transport issues.
+ return "timeout";
+ }
if (isRateLimitErrorMessage(raw)) {
return "rate_limit";
}
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
index 0979f31ccdb..c1e1b4c66cd 100644
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -14,6 +14,7 @@ import {
isCompactionFailureError,
isContextOverflowError,
isLikelyContextOverflowError,
+ isTransientHttpError,
sanitizeUserFacingText,
} from "../../agents/pi-embedded-helpers.js";
import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js";
@@ -79,6 +80,7 @@ export async function runAgentTurnWithFallback(params: {
storePath?: string;
resolvedVerboseLevel: VerboseLevel;
}): Promise {
+ const TRANSIENT_HTTP_RETRY_DELAY_MS = 2_500;
let didLogHeartbeatStrip = false;
let autoCompactionCompleted = false;
// Track payloads sent directly (not via pipeline) during tool flush to avoid duplicates.
@@ -97,6 +99,7 @@ export async function runAgentTurnWithFallback(params: {
let fallbackProvider = params.followupRun.run.provider;
let fallbackModel = params.followupRun.run.model;
let didResetAfterCompactionFailure = false;
+ let didRetryTransientHttpError = false;
while (true) {
try {
@@ -506,6 +509,7 @@ export async function runAgentTurnWithFallback(params: {
const isCompactionFailure = isCompactionFailureError(message);
const isSessionCorruption = /function call turn comes immediately after/i.test(message);
const isRoleOrderingError = /incorrect role information|roles must alternate/i.test(message);
+ const isTransientHttp = isTransientHttpError(message);
if (
isCompactionFailure &&
@@ -577,8 +581,26 @@ export async function runAgentTurnWithFallback(params: {
};
}
+ if (isTransientHttp && !didRetryTransientHttpError) {
+ didRetryTransientHttpError = true;
+ // Retry the full runWithModelFallback() cycle — transient errors
+ // (502/521/etc.) typically affect the whole provider, so falling
+ // back to an alternate model first would not help. Instead we wait
+ // and retry the complete primary→fallback chain.
+ defaultRuntime.error(
+ `Transient HTTP provider error before reply (${message}). Retrying once in ${TRANSIENT_HTTP_RETRY_DELAY_MS}ms.`,
+ );
+ await new Promise((resolve) => {
+ setTimeout(resolve, TRANSIENT_HTTP_RETRY_DELAY_MS);
+ });
+ continue;
+ }
+
defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
- const trimmedMessage = message.replace(/\.\s*$/, "");
+ const safeMessage = isTransientHttp
+ ? sanitizeUserFacingText(message, { errorContext: true })
+ : message;
+ const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
const fallbackText = isContextOverflow
? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
: isRoleOrderingError
diff --git a/src/auto-reply/reply/agent-runner.transient-http-retry.test.ts b/src/auto-reply/reply/agent-runner.transient-http-retry.test.ts
new file mode 100644
index 00000000000..5f21a40a9cc
--- /dev/null
+++ b/src/auto-reply/reply/agent-runner.transient-http-retry.test.ts
@@ -0,0 +1,136 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { TemplateContext } from "../templating.js";
+import type { FollowupRun, QueueSettings } from "./queue.js";
+import { createMockTypingController } from "./test-helpers.js";
+
+const runEmbeddedPiAgentMock = vi.fn();
+const runtimeErrorMock = vi.fn();
+
+vi.mock("../../agents/model-fallback.js", () => ({
+ runWithModelFallback: async ({
+ provider,
+ model,
+ run,
+ }: {
+ provider: string;
+ model: string;
+ run: (provider: string, model: string) => Promise;
+ }) => ({
+ result: await run(provider, model),
+ provider,
+ model,
+ }),
+}));
+
+vi.mock("../../agents/pi-embedded.js", () => ({
+ queueEmbeddedPiMessage: vi.fn().mockReturnValue(false),
+ runEmbeddedPiAgent: (params: unknown) => runEmbeddedPiAgentMock(params),
+}));
+
+vi.mock("../../runtime.js", () => ({
+ defaultRuntime: {
+ log: vi.fn(),
+ error: (...args: unknown[]) => runtimeErrorMock(...args),
+ exit: vi.fn(),
+ },
+}));
+
+vi.mock("./queue.js", async () => {
+ const actual = await vi.importActual("./queue.js");
+ return {
+ ...actual,
+ enqueueFollowupRun: vi.fn(),
+ scheduleFollowupDrain: vi.fn(),
+ };
+});
+
+import { runReplyAgent } from "./agent-runner.js";
+
+describe("runReplyAgent transient HTTP retry", () => {
+ beforeEach(() => {
+ runEmbeddedPiAgentMock.mockReset();
+ runtimeErrorMock.mockReset();
+ vi.useFakeTimers();
+ });
+
+ afterEach(() => {
+ vi.useRealTimers();
+ });
+
+ it("retries once after transient 521 HTML failure and then succeeds", async () => {
+ runEmbeddedPiAgentMock
+ .mockRejectedValueOnce(
+ new Error(
+ `521 Web server is downCloudflare`,
+ ),
+ )
+ .mockResolvedValueOnce({
+ payloads: [{ text: "Recovered response" }],
+ meta: {},
+ });
+
+ const typing = createMockTypingController();
+ const sessionCtx = {
+ Provider: "telegram",
+ MessageSid: "msg",
+ } as unknown as TemplateContext;
+ const resolvedQueue = { mode: "interrupt" } as unknown as QueueSettings;
+ const followupRun = {
+ prompt: "hello",
+ summaryLine: "hello",
+ enqueuedAt: Date.now(),
+ run: {
+ sessionId: "session",
+ sessionKey: "main",
+ messageProvider: "telegram",
+ sessionFile: "/tmp/session.jsonl",
+ workspaceDir: "/tmp",
+ config: {},
+ skillsSnapshot: {},
+ provider: "anthropic",
+ model: "claude",
+ thinkLevel: "low",
+ verboseLevel: "off",
+ elevatedLevel: "off",
+ bashElevated: {
+ enabled: false,
+ allowed: false,
+ defaultLevel: "off",
+ },
+ timeoutMs: 1_000,
+ blockReplyBreak: "message_end",
+ },
+ } as unknown as FollowupRun;
+
+ const runPromise = runReplyAgent({
+ commandBody: "hello",
+ followupRun,
+ queueKey: "main",
+ resolvedQueue,
+ shouldSteer: false,
+ shouldFollowup: false,
+ isActive: false,
+ isStreaming: false,
+ typing,
+ sessionCtx,
+ defaultModel: "anthropic/claude-opus-4-5",
+ resolvedVerboseLevel: "off",
+ isNewSession: false,
+ blockStreamingEnabled: false,
+ resolvedBlockStreamingBreak: "message_end",
+ shouldInjectGroupIntro: false,
+ typingMode: "instant",
+ });
+
+ await vi.advanceTimersByTimeAsync(2_500);
+ const result = await runPromise;
+
+ expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(2);
+ expect(runtimeErrorMock).toHaveBeenCalledWith(
+ expect.stringContaining("Transient HTTP provider error before reply"),
+ );
+
+ const payload = Array.isArray(result) ? result[0] : result;
+ expect(payload?.text).toContain("Recovered response");
+ });
+});
diff --git a/src/memory/qmd-manager.test.ts b/src/memory/qmd-manager.test.ts
index 7af091b8b30..e8396802862 100644
--- a/src/memory/qmd-manager.test.ts
+++ b/src/memory/qmd-manager.test.ts
@@ -387,7 +387,7 @@ describe("QmdMemoryManager", () => {
);
expect(searchAndQueryCalls).toEqual([
["search", "test", "--json"],
- ["query", "test", "--json", "-n", String(maxResults)],
+ ["query", "test", "--json", "-n", String(maxResults), "-c", "workspace"],
]);
await manager.close();
});