diff --git a/src/agents/model-fallback.test.ts b/src/agents/model-fallback.test.ts index 2b40307217a..9100304533d 100644 --- a/src/agents/model-fallback.test.ts +++ b/src/agents/model-fallback.test.ts @@ -59,6 +59,30 @@ describe("runWithModelFallback", () => { expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5"); }); + it("falls back on transient HTTP 5xx errors", async () => { + const cfg = makeCfg(); + const run = vi + .fn() + .mockRejectedValueOnce( + new Error( + "521 Web server is downCloudflare", + ), + ) + .mockResolvedValueOnce("ok"); + + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + run, + }); + + expect(result.result).toBe("ok"); + expect(run).toHaveBeenCalledTimes(2); + expect(run.mock.calls[1]?.[0]).toBe("anthropic"); + expect(run.mock.calls[1]?.[1]).toBe("claude-haiku-3-5"); + }); + it("falls back on 402 payment required", async () => { const cfg = makeCfg(); const run = vi diff --git a/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts b/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts index 749a5241406..1b175e77b41 100644 --- a/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts +++ b/src/agents/pi-embedded-helpers.classifyfailoverreason.test.ts @@ -24,6 +24,11 @@ describe("classifyFailoverReason", () => { expect(classifyFailoverReason("invalid request format")).toBe("format"); expect(classifyFailoverReason("credit balance too low")).toBe("billing"); expect(classifyFailoverReason("deadline exceeded")).toBe("timeout"); + expect( + classifyFailoverReason( + "521 Web server is downCloudflare", + ), + ).toBe("timeout"); expect(classifyFailoverReason("string should match pattern")).toBe("format"); expect(classifyFailoverReason("bad request")).toBeNull(); expect( diff --git a/src/agents/pi-embedded-helpers.formatrawassistanterrorforui.test.ts b/src/agents/pi-embedded-helpers.formatrawassistanterrorforui.test.ts index 137bf8536e3..8fd0ed1aff8 100644 --- a/src/agents/pi-embedded-helpers.formatrawassistanterrorforui.test.ts +++ b/src/agents/pi-embedded-helpers.formatrawassistanterrorforui.test.ts @@ -22,4 +22,16 @@ describe("formatRawAssistantErrorForUi", () => { "HTTP 500: Internal Server Error", ); }); + + it("sanitizes HTML error pages into a clean unavailable message", () => { + const htmlError = `521 + + Web server is down | example.com | Cloudflare + Ray ID: abc123 +`; + + expect(formatRawAssistantErrorForUi(htmlError)).toBe( + "The AI service is temporarily unavailable (HTTP 521). Please try again in a moment.", + ); + }); }); diff --git a/src/agents/pi-embedded-helpers.iscloudflareorhtmlerrorpage.test.ts b/src/agents/pi-embedded-helpers.iscloudflareorhtmlerrorpage.test.ts new file mode 100644 index 00000000000..ebdb22c6c5d --- /dev/null +++ b/src/agents/pi-embedded-helpers.iscloudflareorhtmlerrorpage.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, it } from "vitest"; +import { isCloudflareOrHtmlErrorPage } from "./pi-embedded-helpers.js"; + +describe("isCloudflareOrHtmlErrorPage", () => { + it("detects Cloudflare 521 HTML pages", () => { + const htmlError = `521 + + Web server is down | example.com | Cloudflare +

Web server is down

+`; + + expect(isCloudflareOrHtmlErrorPage(htmlError)).toBe(true); + }); + + it("detects generic 5xx HTML pages", () => { + const htmlError = `503 Service Unavailabledown`; + expect(isCloudflareOrHtmlErrorPage(htmlError)).toBe(true); + }); + + it("does not flag non-HTML status lines", () => { + expect(isCloudflareOrHtmlErrorPage("500 Internal Server Error")).toBe(false); + expect(isCloudflareOrHtmlErrorPage("429 Too Many Requests")).toBe(false); + }); + + it("does not flag quoted HTML without a closing html tag", () => { + const plainTextWithHtmlPrefix = "500 upstream responded with partial HTML text"; + expect(isCloudflareOrHtmlErrorPage(plainTextWithHtmlPrefix)).toBe(false); + }); +}); diff --git a/src/agents/pi-embedded-helpers.istransienthttperror.test.ts b/src/agents/pi-embedded-helpers.istransienthttperror.test.ts new file mode 100644 index 00000000000..faaf4a20139 --- /dev/null +++ b/src/agents/pi-embedded-helpers.istransienthttperror.test.ts @@ -0,0 +1,18 @@ +import { describe, expect, it } from "vitest"; +import { isTransientHttpError } from "./pi-embedded-helpers.js"; + +describe("isTransientHttpError", () => { + it("returns true for retryable 5xx status codes", () => { + expect(isTransientHttpError("500 Internal Server Error")).toBe(true); + expect(isTransientHttpError("502 Bad Gateway")).toBe(true); + expect(isTransientHttpError("503 Service Unavailable")).toBe(true); + expect(isTransientHttpError("521 ")).toBe(true); + expect(isTransientHttpError("529 Overloaded")).toBe(true); + }); + + it("returns false for non-retryable or non-http text", () => { + expect(isTransientHttpError("504 Gateway Timeout")).toBe(false); + expect(isTransientHttpError("429 Too Many Requests")).toBe(false); + expect(isTransientHttpError("network timeout")).toBe(false); + }); +}); diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index f8fb4f0ec5a..e468843aec6 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -17,6 +17,7 @@ export { parseApiErrorInfo, sanitizeUserFacingText, isBillingErrorMessage, + isCloudflareOrHtmlErrorPage, isCloudCodeAssistFormatError, isCompactionFailureError, isContextOverflowError, @@ -29,6 +30,7 @@ export { isRawApiErrorPayload, isRateLimitAssistantError, isRateLimitErrorMessage, + isTransientHttpError, isTimeoutErrorMessage, parseImageDimensionError, parseImageSizeError, diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 4865833cd71..12461074fa6 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -78,6 +78,10 @@ const ERROR_PREFIX_RE = const CONTEXT_OVERFLOW_ERROR_HEAD_RE = /^(?:context overflow:|request_too_large\b|request size exceeds\b|request exceeds the maximum size\b|context length exceeded\b|maximum context length\b|prompt is too long\b|exceeds model context window\b)/i; const HTTP_STATUS_PREFIX_RE = /^(?:http\s*)?(\d{3})\s+(.+)$/i; +const HTTP_STATUS_CODE_PREFIX_RE = /^(?:http\s*)?(\d{3})(?:\s+([\s\S]+))?$/i; +const HTML_ERROR_PREFIX_RE = /^\s*(?:/i.test(status.rest) + ); +} + +export function isTransientHttpError(raw: string): boolean { + const trimmed = raw.trim(); + if (!trimmed) { + return false; + } + const status = extractLeadingHttpStatus(trimmed); + if (!status) { + return false; + } + return TRANSIENT_HTTP_ERROR_CODES.has(status.code); +} + function stripFinalTagsFromText(text: string): string { if (!text) { return text; @@ -133,6 +181,9 @@ function collapseConsecutiveDuplicateBlocks(text: string): string { } function isLikelyHttpErrorText(raw: string): boolean { + if (isCloudflareOrHtmlErrorPage(raw)) { + return true; + } const match = raw.match(HTTP_STATUS_PREFIX_RE); if (!match) { return false; @@ -311,6 +362,11 @@ export function formatRawAssistantErrorForUi(raw?: string): string { return "LLM request failed with an unknown error."; } + const leadingStatus = extractLeadingHttpStatus(trimmed); + if (leadingStatus && isCloudflareOrHtmlErrorPage(trimmed)) { + return `The AI service is temporarily unavailable (HTTP ${leadingStatus.code}). Please try again in a moment.`; + } + const httpMatch = trimmed.match(HTTP_STATUS_PREFIX_RE); if (httpMatch) { const rest = httpMatch[2].trim(); @@ -641,6 +697,10 @@ export function classifyFailoverReason(raw: string): FailoverReason | null { if (isImageSizeError(raw)) { return null; } + if (isTransientHttpError(raw)) { + // Treat transient 5xx provider failures as retryable transport issues. + return "timeout"; + } if (isRateLimitErrorMessage(raw)) { return "rate_limit"; } diff --git a/src/auto-reply/reply/agent-runner-execution.ts b/src/auto-reply/reply/agent-runner-execution.ts index 0979f31ccdb..c1e1b4c66cd 100644 --- a/src/auto-reply/reply/agent-runner-execution.ts +++ b/src/auto-reply/reply/agent-runner-execution.ts @@ -14,6 +14,7 @@ import { isCompactionFailureError, isContextOverflowError, isLikelyContextOverflowError, + isTransientHttpError, sanitizeUserFacingText, } from "../../agents/pi-embedded-helpers.js"; import { runEmbeddedPiAgent } from "../../agents/pi-embedded.js"; @@ -79,6 +80,7 @@ export async function runAgentTurnWithFallback(params: { storePath?: string; resolvedVerboseLevel: VerboseLevel; }): Promise { + const TRANSIENT_HTTP_RETRY_DELAY_MS = 2_500; let didLogHeartbeatStrip = false; let autoCompactionCompleted = false; // Track payloads sent directly (not via pipeline) during tool flush to avoid duplicates. @@ -97,6 +99,7 @@ export async function runAgentTurnWithFallback(params: { let fallbackProvider = params.followupRun.run.provider; let fallbackModel = params.followupRun.run.model; let didResetAfterCompactionFailure = false; + let didRetryTransientHttpError = false; while (true) { try { @@ -506,6 +509,7 @@ export async function runAgentTurnWithFallback(params: { const isCompactionFailure = isCompactionFailureError(message); const isSessionCorruption = /function call turn comes immediately after/i.test(message); const isRoleOrderingError = /incorrect role information|roles must alternate/i.test(message); + const isTransientHttp = isTransientHttpError(message); if ( isCompactionFailure && @@ -577,8 +581,26 @@ export async function runAgentTurnWithFallback(params: { }; } + if (isTransientHttp && !didRetryTransientHttpError) { + didRetryTransientHttpError = true; + // Retry the full runWithModelFallback() cycle — transient errors + // (502/521/etc.) typically affect the whole provider, so falling + // back to an alternate model first would not help. Instead we wait + // and retry the complete primary→fallback chain. + defaultRuntime.error( + `Transient HTTP provider error before reply (${message}). Retrying once in ${TRANSIENT_HTTP_RETRY_DELAY_MS}ms.`, + ); + await new Promise((resolve) => { + setTimeout(resolve, TRANSIENT_HTTP_RETRY_DELAY_MS); + }); + continue; + } + defaultRuntime.error(`Embedded agent failed before reply: ${message}`); - const trimmedMessage = message.replace(/\.\s*$/, ""); + const safeMessage = isTransientHttp + ? sanitizeUserFacingText(message, { errorContext: true }) + : message; + const trimmedMessage = safeMessage.replace(/\.\s*$/, ""); const fallbackText = isContextOverflow ? "⚠️ Context overflow — prompt too large for this model. Try a shorter message or a larger-context model." : isRoleOrderingError diff --git a/src/auto-reply/reply/agent-runner.transient-http-retry.test.ts b/src/auto-reply/reply/agent-runner.transient-http-retry.test.ts new file mode 100644 index 00000000000..5f21a40a9cc --- /dev/null +++ b/src/auto-reply/reply/agent-runner.transient-http-retry.test.ts @@ -0,0 +1,136 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import type { TemplateContext } from "../templating.js"; +import type { FollowupRun, QueueSettings } from "./queue.js"; +import { createMockTypingController } from "./test-helpers.js"; + +const runEmbeddedPiAgentMock = vi.fn(); +const runtimeErrorMock = vi.fn(); + +vi.mock("../../agents/model-fallback.js", () => ({ + runWithModelFallback: async ({ + provider, + model, + run, + }: { + provider: string; + model: string; + run: (provider: string, model: string) => Promise; + }) => ({ + result: await run(provider, model), + provider, + model, + }), +})); + +vi.mock("../../agents/pi-embedded.js", () => ({ + queueEmbeddedPiMessage: vi.fn().mockReturnValue(false), + runEmbeddedPiAgent: (params: unknown) => runEmbeddedPiAgentMock(params), +})); + +vi.mock("../../runtime.js", () => ({ + defaultRuntime: { + log: vi.fn(), + error: (...args: unknown[]) => runtimeErrorMock(...args), + exit: vi.fn(), + }, +})); + +vi.mock("./queue.js", async () => { + const actual = await vi.importActual("./queue.js"); + return { + ...actual, + enqueueFollowupRun: vi.fn(), + scheduleFollowupDrain: vi.fn(), + }; +}); + +import { runReplyAgent } from "./agent-runner.js"; + +describe("runReplyAgent transient HTTP retry", () => { + beforeEach(() => { + runEmbeddedPiAgentMock.mockReset(); + runtimeErrorMock.mockReset(); + vi.useFakeTimers(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + it("retries once after transient 521 HTML failure and then succeeds", async () => { + runEmbeddedPiAgentMock + .mockRejectedValueOnce( + new Error( + `521 Web server is downCloudflare`, + ), + ) + .mockResolvedValueOnce({ + payloads: [{ text: "Recovered response" }], + meta: {}, + }); + + const typing = createMockTypingController(); + const sessionCtx = { + Provider: "telegram", + MessageSid: "msg", + } as unknown as TemplateContext; + const resolvedQueue = { mode: "interrupt" } as unknown as QueueSettings; + const followupRun = { + prompt: "hello", + summaryLine: "hello", + enqueuedAt: Date.now(), + run: { + sessionId: "session", + sessionKey: "main", + messageProvider: "telegram", + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp", + config: {}, + skillsSnapshot: {}, + provider: "anthropic", + model: "claude", + thinkLevel: "low", + verboseLevel: "off", + elevatedLevel: "off", + bashElevated: { + enabled: false, + allowed: false, + defaultLevel: "off", + }, + timeoutMs: 1_000, + blockReplyBreak: "message_end", + }, + } as unknown as FollowupRun; + + const runPromise = runReplyAgent({ + commandBody: "hello", + followupRun, + queueKey: "main", + resolvedQueue, + shouldSteer: false, + shouldFollowup: false, + isActive: false, + isStreaming: false, + typing, + sessionCtx, + defaultModel: "anthropic/claude-opus-4-5", + resolvedVerboseLevel: "off", + isNewSession: false, + blockStreamingEnabled: false, + resolvedBlockStreamingBreak: "message_end", + shouldInjectGroupIntro: false, + typingMode: "instant", + }); + + await vi.advanceTimersByTimeAsync(2_500); + const result = await runPromise; + + expect(runEmbeddedPiAgentMock).toHaveBeenCalledTimes(2); + expect(runtimeErrorMock).toHaveBeenCalledWith( + expect.stringContaining("Transient HTTP provider error before reply"), + ); + + const payload = Array.isArray(result) ? result[0] : result; + expect(payload?.text).toContain("Recovered response"); + }); +}); diff --git a/src/memory/qmd-manager.test.ts b/src/memory/qmd-manager.test.ts index 7af091b8b30..e8396802862 100644 --- a/src/memory/qmd-manager.test.ts +++ b/src/memory/qmd-manager.test.ts @@ -387,7 +387,7 @@ describe("QmdMemoryManager", () => { ); expect(searchAndQueryCalls).toEqual([ ["search", "test", "--json"], - ["query", "test", "--json", "-n", String(maxResults)], + ["query", "test", "--json", "-n", String(maxResults), "-c", "workspace"], ]); await manager.close(); });