diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts
index 2b40307217a..9100304533d 100644
--- a/src/agents/model-fallback.test.ts
+++ b/src/agents/model-fallback.test.ts
@@ -59,6 +59,30 @@ describe("runWithModelFallback", () => {
     expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
   });
 
+  it("falls back on transient HTTP 5xx errors", async () => {
+    const cfg = makeCfg();
+    const run = vi
+      .fn()
+      .mockRejectedValueOnce(
+        new Error(
+          "521 <!DOCTYPE html><html><head><title>Web server is down</title></head><body>Cloudflare</body></html>",
+        ),
+      )
+      .mockResolvedValueOnce("ok");
+
+    const result = await runWithModelFallback({
+      cfg,
+      provider: "openai",
+      model: "gpt-4.1-mini",
+      run,
+    });
+
+    expect(result.result).toBe("ok");
+    expect(run).toHaveBeenCalledTimes(2);
+    expect(run.mock.calls[1]?.[0]).toBe("anthropic");
+    expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5");
+  });
+
   it("falls back on 402 payment required", async () => {
     const cfg = makeCfg();
     const run = vi
diff --git a/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts b/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts
index 749a5241406..1b175e77b41 100644
--- a/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts
+++ b/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts
@@ -24,6 +24,11 @@ describe("classifyFailoverReason", () => {
     expect(classifyFailoverReason("invalid request format")).toBe("format");
     expect(classifyFailoverReason("credit balance too low")).toBe("billing");
     expect(classifyFailoverReason("deadline exceeded")).toBe("timeout");
+    expect(
+      classifyFailoverReason(
+        "521 <!DOCTYPE html><html><head><title>Web server is down</title></head><body>Cloudflare</body></html>",
+      ),
+    ).toBe("timeout");
     expect(classifyFailoverReason("string should match pattern")).toBe("format");
     expect(classifyFailoverReason("bad request")).toBeNull();
     expect(
diff --git a/src/agents/pi-embedded-helpers.formatrawassistanterrorforui.test.ts b/src/agents/pi-embedded-helpers.formatrawassistanterrorforui.test.ts
index 137bf8536e3..8fd0ed1aff8 100644
--- a/src/agents/pi-embedded-helpers.formatrawassistanterrorforui.test.ts
+++ b/src/agents/pi-embedded-helpers.formatrawassistanterrorforui.test.ts
@@ -22,4 +22,16 @@ describe("formatRawAssistantErrorForUi", () => {
       "HTTP 500: Internal Server Error",
     );
   });
+
+  it("sanitizes HTML error pages into a clean unavailable message", () => {
+    const htmlError = `521 <!DOCTYPE html>
+<html lang="en-US">
+  <head><title>Web server is down | example.com | Cloudflare</title></head>
+  <body>Ray ID: abc123</body>
+</html>`;
+
+    expect(formatRawAssistantErrorForUi(htmlError)).toBe(
+      "The AI service is temporarily unavailable (HTTP 521). Please try again in a moment.",
+    );
+  });
 });
diff --git a/src/agents/pi-embedded-helpers.iscloudflareorhtmlerrorpage.test.ts b/src/agents/pi-embedded-helpers.iscloudflareorhtmlerrorpage.test.ts
new file mode 100644
index 00000000000..ebdb22c6c5d
--- /dev/null
+++ b/src/agents/pi-embedded-helpers.iscloudflareorhtmlerrorpage.test.ts
@@ -0,0 +1,29 @@
+import { describe, expect, it } from "vitest";
+import { isCloudflareOrHtmlErrorPage } from "./pi-embedded-helpers.js";
+
+describe("isCloudflareOrHtmlErrorPage", () => {
+  it("detects Cloudflare 521 HTML pages", () => {
+    const htmlError = `521 <!DOCTYPE html>
+<html lang="en-US">
+  <head><title>Web server is down | example.com | Cloudflare</title></head>
+  <body><h1>Web server is down</h1></body>
+</html>`;
+
+    expect(isCloudflareOrHtmlErrorPage(htmlError)).toBe(true);
+  });
+
+  it("detects generic 5xx HTML pages", () => {
+    const htmlError = `503 <html><head><title>Service Unavailable</title></head><body>down</body></html>`;
+    expect(isCloudflareOrHtmlErrorPage(htmlError)).toBe(true);
+  });
+
+  it("does not flag non-HTML status lines", () => {
+    expect(isCloudflareOrHtmlErrorPage("500 Internal Server Error")).toBe(false);
+    expect(isCloudflareOrHtmlErrorPage("429 Too Many Requests")).toBe(false);
+  });
+
+  it("does not flag quoted HTML without a closing html tag", () => {
+    const plainTextWithHtmlPrefix = "500 <!DOCTYPE html> upstream responded with partial HTML text";
+    expect(isCloudflareOrHtmlErrorPage(plainTextWithHtmlPrefix)).toBe(false);
+  });
+});
diff --git a/src/agents/pi-embedded-helpers.istransienthttperror.test.ts b/src/agents/pi-embedded-helpers.istransienthttperror.test.ts
new file mode 100644
index 00000000000..faaf4a20139
--- /dev/null
+++ b/src/agents/pi-embedded-helpers.istransienthttperror.test.ts
@@ -0,0 +1,18 @@
+import { describe, expect, it } from "vitest";
+import { isTransientHttpError } from "./pi-embedded-helpers.js";
+
+describe("isTransientHttpError", () => {
+  it("returns true for retryable 5xx status codes", () => {
+    expect(isTransientHttpError("500 Internal Server Error")).toBe(true);
+    expect(isTransientHttpError("502 Bad Gateway")).toBe(true);
+    expect(isTransientHttpError("503 Service Unavailable")).toBe(true);
+    expect(isTransientHttpError("521 <!DOCTYPE html><html></html>")).toBe(true);
+    expect(isTransientHttpError("529 Overloaded")).toBe(true);
+  });
+
+  it("returns false for non-retryable or non-http text", () => {
+    expect(isTransientHttpError("504 Gateway Timeout")).toBe(false);
+    expect(isTransientHttpError("429 Too Many Requests")).toBe(false);
+    expect(isTransientHttpError("network timeout")).toBe(false);
+  });
+});
diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts
index f8fb4f0ec5a..e468843aec6 100644
--- a/src/agents/pi-embedded-helpers.ts
+++ b/src/agents/pi-embedded-helpers.ts
@@ -17,6 +17,7 @@ export {
   parseApiErrorInfo,
   sanitizeUserFacingText,
   isBillingErrorMessage,
+  isCloudflareOrHtmlErrorPage,
   isCloudCodeAssistFormatError,
   isCompactionFailureError,
   isContextOverflowError,
@@ -29,6 +30,7 @@ export {
   isRawApiErrorPayload,
   isRateLimitAssistantError,
   isRateLimitErrorMessage,
+  isTransientHttpError,
   isTimeoutErrorMessage,
   parseImageDimensionError,
   parseImageSizeError,
diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts
index 4865833cd71..12461074fa6 100644
--- a/src/agents/pi-embedded-helpers/errors.ts
+++ b/src/agents/pi-embedded-helpers/errors.ts
@@ -78,6 +78,10 @@ const ERROR_PREFIX_RE =
 const CONTEXT_OVERFLOW_ERROR_HEAD_RE =
   /^(?:context overflow:|request_too_large\b|request size exceeds\b|request exceeds the maximum size\b|context length exceeded\b|maximum context length\b|prompt is too long\b|exceeds model context window\b)/i;
 const HTTP_STATUS_PREFIX_RE = /^(?:http\s*)?(\d{3})\s+(.+)$/i;
+const HTTP_STATUS_CODE_PREFIX_RE = /^(?:http\s*)?(\d{3})(?:\s+([\s\S]+))?$/i;
+const HTML_ERROR_PREFIX_RE = /^\s*(?:<!doctype\s+html\b|<html\b)/i;
+const CLOUDFLARE_HTML_ERROR_CODES = new Set([521, 522, 523, 524, 525, 526, 530]);
+const TRANSIENT_HTTP_ERROR_CODES = new Set([500, 502, 503, 521, 522, 523, 524, 529]);
 const HTTP_ERROR_HINTS = [
   "error",
   "bad request",
@@ -96,6 +100,50 @@ const HTTP_ERROR_HINTS = [
   "permission",
 ];
 
+function extractLeadingHttpStatus(raw: string): { code: number; rest: string } | null {
+  const match = raw.match(HTTP_STATUS_CODE_PREFIX_RE);
+  if (!match) {
+    return null;
+  }
+  const code = Number(match[1]);
+  if (!Number.isFinite(code)) {
+    return null;
+  }
+  return { code, rest: (match[2] ?? "").trim() };
+}
+
+export function isCloudflareOrHtmlErrorPage(raw: string): boolean {
+  const trimmed = raw.trim();
+  if (!trimmed) {
+    return false;
+  }
+
+  const status = extractLeadingHttpStatus(trimmed);
+  if (!status || status.code < 500) {
+    return false;
+  }
+
+  if (CLOUDFLARE_HTML_ERROR_CODES.has(status.code)) {
+    return true;
+  }
+
+  return (
+    status.code < 600 && HTML_ERROR_PREFIX_RE.test(status.rest) && /<\/html>/i.test(status.rest)
+  );
+}
+
+export function isTransientHttpError(raw: string): boolean {
+  const trimmed = raw.trim();
+  if (!trimmed) {
+    return false;
+  }
+  const status = extractLeadingHttpStatus(trimmed);
+  if (!status) {
+    return false;
+  }
+  return TRANSIENT_HTTP_ERROR_CODES.has(status.code);
+}
+
 function stripFinalTagsFromText(text: string): string {
   if (!text) {
     return text;
@@ -133,6 +181,9 @@ function collapseConsecutiveDuplicateBlocks(text: string): string {
 }
 
 function isLikelyHttpErrorText(raw: string): boolean {
+  if (isCloudflareOrHtmlErrorPage(raw)) {
+    return true;
+  }
   const match = raw.match(HTTP_STATUS_PREFIX_RE);
   if (!match) {
     return false;
@@ -311,6 +362,11 @@ export function formatRawAssistantErrorForUi(raw?: string): string {
     return "LLM request failed with an unknown error.";
   }
 
+  const leadingStatus = extractLeadingHttpStatus(trimmed);
+  if (leadingStatus && isCloudflareOrHtmlErrorPage(trimmed)) {
+    return `The AI service is temporarily unavailable (HTTP ${leadingStatus.code}). Please try again in a moment.`;
+  }
+
   const httpMatch = trimmed.match(HTTP_STATUS_PREFIX_RE);
   if (httpMatch) {
     const rest = httpMatch[2].trim();
@@ -641,6 +697,10 @@ export function classifyFailoverReason(raw: string): FailoverReason | null {
   if (isImageSizeError(raw)) {
     return null;
   }
+  if (isTransientHttpError(raw)) {
+    // Treat transient 5xx provider failures as retryable transport issues.
+    return "timeout";
+  }
   if (isRateLimitErrorMessage(raw)) {
     return "rate_limit";
   }
diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts
index 0979f31ccdb..c1e1b4c66cd 100644
--- a/src/auto-reply/reply/agent-runner-execution.ts
+++ b/src/auto-reply/reply/agent-runner-execution.ts
@@ -14,6 +14,7 @@ import {
   isCompactionFailureError,
   isContextOverflowError,
   isLikelyContextOverflowError,
+  isTransientHttpError,
   sanitizeUserFacingText,
 } from "../../agents/pi-embedded-helpers.js";
 import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js";
@@ -79,6 +80,7 @@ export async function runAgentTurnWithFallback(params: {
   storePath?: string;
   resolvedVerboseLevel: VerboseLevel;
 }): Promise<AgentRunLoopResult> {
+  const TRANSIENT_HTTP_RETRY_DELAY_MS = 2_500;
   let didLogHeartbeatStrip = false;
   let autoCompactionCompleted = false;
   // Track payloads sent directly (not via pipeline) during tool flush to avoid duplicates.
@@ -97,6 +99,7 @@ export async function runAgentTurnWithFallback(params: {
   let fallbackProvider = params.followupRun.run.provider;
   let fallbackModel = params.followupRun.run.model;
   let didResetAfterCompactionFailure = false;
+  let didRetryTransientHttpError = false;
 
   while (true) {
     try {
@@ -506,6 +509,7 @@ export async function runAgentTurnWithFallback(params: {
       const isCompactionFailure = isCompactionFailureError(message);
       const isSessionCorruption = /function call turn comes immediately after/i.test(message);
       const isRoleOrderingError = /incorrect role information|roles must alternate/i.test(message);
+      const isTransientHttp = isTransientHttpError(message);
 
       if (
         isCompactionFailure &&
@@ -577,8 +581,26 @@ export async function runAgentTurnWithFallback(params: {
         };
       }
 
+      if (isTransientHttp && !didRetryTransientHttpError) {
+        didRetryTransientHttpError = true;
+        // Retry the full runWithModelFallback() cycle — transient errors
+        // (502/521/etc.) typically affect the whole provider, so falling
+        // back to an alternate model first would not help. Instead we wait
+        // and retry the complete primary→fallback chain.
+        defaultRuntime.error(
+          `Transient HTTP provider error before reply (${message}). Retrying once in ${TRANSIENT_HTTP_RETRY_DELAY_MS}ms.`,
+        );
+        await new Promise<void>((resolve) => {
+          setTimeout(resolve, TRANSIENT_HTTP_RETRY_DELAY_MS);
+        });
+        continue;
+      }
+
       defaultRuntime.error(`Embedded agent failed before reply: ${message}`);
-      const trimmedMessage = message.replace(/\.\s*$/, "");
+      const safeMessage = isTransientHttp
+        ? sanitizeUserFacingText(message, { errorContext: true })
+        : message;
+      const trimmedMessage = safeMessage.replace(/\.\s*$/, "");
       const fallbackText = isContextOverflow
         ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model."
         : isRoleOrderingError
diff --git a/src/auto-reply/reply/agent-runner.transient-http-retry.test.ts b/src/auto-reply/reply/agent-runner.transient-http-retry.test.ts
new file mode 100644
index 00000000000..5f21a40a9cc
--- /dev/null
+++ b/src/auto-reply/reply/agent-runner.transient-http-retry.test.ts
@@ -0,0 +1,136 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import type { TemplateContext } from "../templating.js";
+import type { FollowupRun, QueueSettings } from "./queue.js";
+import { createMockTypingController } from "./test-helpers.js";
+
+const runEmbeddedPiAgentMock = vi.fn();
+const runtimeErrorMock = vi.fn();
+
+vi.mock("../../agents/model-fallback.js", () => ({
+  runWithModelFallback: async ({
+    provider,
+    model,
+    run,
+  }: {
+    provider: string;
+    model: string;
+    run: (provider: string, model: string) => Promise<unknown>;
+  }) => ({
+    result: await run(provider, model),
+    provider,
+    model,
+  }),
+}));
+
+vi.mock("../../agents/pi-embedded.js", () => ({
+  queueEmbeddedPiMessage: vi.fn().mockReturnValue(false),
+  runEmbeddedPiAgent: (params: unknown) => runEmbeddedPiAgentMock(params),
+}));
+
+vi.mock("../../runtime.js", () => ({
+  defaultRuntime: {
+    log: vi.fn(),
+    error: (...args: unknown[]) => runtimeErrorMock(...args),
+    exit: vi.fn(),
+  },
+}));
+
+vi.mock("./queue.js", async () => {
+  const actual = await vi.importActual<typeof import("./queue.js")>("./queue.js");
+  return {
+    ...actual,
+    enqueueFollowupRun: vi.fn(),
+    scheduleFollowupDrain: vi.fn(),
+  };
+});
+
+import { runReplyAgent } from "./agent-runner.js";
+
+describe("runReplyAgent transient HTTP retry", () => {
+  beforeEach(() => {
+    runEmbeddedPiAgentMock.mockReset();
+    runtimeErrorMock.mockReset();
+    vi.useFakeTimers();
+  });
+
+  afterEach(() => {
+    vi.useRealTimers();
+  });
+
+  it("retries once after transient 521 HTML failure and then succeeds", async () => {
+    runEmbeddedPiAgentMock
+      .mockRejectedValueOnce(
+        new Error(
+          `521 <!DOCTYPE html><html lang="en-US"><head><title>Web server is down</title></head><body>Cloudflare</body></html>`,
+        ),
+      )
+      .mockResolvedValueOnce({
+        payloads: [{ text: "Recovered response" }],
+        meta: {},
+      });
+
+    const typing = createMockTypingController();
+    const sessionCtx = {
+      Provider: "telegram",
+      MessageSid: "msg",
+    } as unknown as TemplateContext;
+    const resolvedQueue = { mode: "interrupt" } as unknown as QueueSettings;
+    const followupRun = {
+      prompt: "hello",
+      summaryLine: "hello",
+      enqueuedAt: Date.now(),
+      run: {
+        sessionId: "session",
+        sessionKey: "main",
+        messageProvider: "telegram",
+        sessionFile: "/tmp/session.jsonl",
+        workspaceDir: "/tmp",
+        config: {},
+        skillsSnapshot: {},
+        provider: "anthropic",
+        model: "claude",
+        thinkLevel: "low",
+        verboseLevel: "off",
+        elevatedLevel: "off",
+        bashElevated: {
+          enabled: false,
+          allowed: false,
+          defaultLevel: "off",
+        },
+        timeoutMs: 1_000,
+        blockReplyBreak: "message_end",
+      },
+    } as unknown as FollowupRun;
+
+    const runPromise = runReplyAgent({
+      commandBody: "hello",
+      followupRun,
+      queueKey: "main",
+      resolvedQueue,
+      shouldSteer: false,
+      shouldFollowup: false,
+      isActive: false,
+      isStreaming: false,
+      typing,
+      sessionCtx,
+      defaultModel: "anthropic/claude-opus-4-5",
+      resolvedVerboseLevel: "off",
+      isNewSession: false,
+      blockStreamingEnabled: false,
+      resolvedBlockStreamingBreak: "message_end",
+      shouldInjectGroupIntro: false,
+      typingMode: "instant",
+    });
+
+    await vi.advanceTimersByTimeAsync(2_500);
+    const result = await runPromise;
+
+    expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(2);
+    expect(runtimeErrorMock).toHaveBeenCalledWith(
+      expect.stringContaining("Transient HTTP provider error before reply"),
+    );
+
+    const payload = Array.isArray(result) ? result[0] : result;
+    expect(payload?.text).toContain("Recovered response");
+  });
+});
diff --git a/src/memory/qmd-manager.test.ts b/src/memory/qmd-manager.test.ts
index 7af091b8b30..e8396802862 100644
--- a/src/memory/qmd-manager.test.ts
+++ b/src/memory/qmd-manager.test.ts
@@ -387,7 +387,7 @@ describe("QmdMemoryManager", () => {
       );
     expect(searchAndQueryCalls).toEqual([
       ["search", "test", "--json"],
-      ["query", "test", "--json", "-n", String(maxResults)],
+      ["query", "test", "--json", "-n", String(maxResults), "-c", "workspace"],
     ]);
     await manager.close();
   });