Files
moltbot/src/shared/assistant-error-format.ts
Chris Yau 36dd58ac2a Prevent Codex HTML challenge pages from looking like DNS failures
Cloudflare challenge pages from chatgpt.com/backend-api can arrive as raw HTML without an HTTP status prefix. The transport sanitizer scanned for generic "dns" substrings before HTML detection, so these pages could surface as DNS lookup failures instead of the existing HTML/CDN block message.

Constraint: Must preserve DNS transport classification for real ENOTFOUND/getaddrinfo failures
Rejected: Treat every bare HTML document as an upstream HTML error | too broad for arbitrary model text/errors
Confidence: high
Scope-risk: narrow
Directive: Keep standalone HTML challenge detection ahead of generic transport keyword matching so CDN block pages do not regress into DNS copy
Tested: oxfmt --check on changed files; targeted node --import tsx verification for standalone Cloudflare HTML classification and DNS control case
Not-tested: Full Vitest shard run in this environment
2026-04-16 12:47:12 -07:00

214 lines
6.0 KiB
TypeScript

const ERROR_PAYLOAD_PREFIX_RE =
/^(?:error|(?:[a-z][\w-]*\s+)?api\s*error|apierror|openai\s*error|anthropic\s*error|gateway\s*error|codex\s*error)(?:\s+\d{3})?[:\s-]+/i;
const HTTP_STATUS_DELIMITER_RE = /(?:\s*:\s*|\s+)/;
const HTTP_STATUS_PREFIX_RE = new RegExp(
`^(?:http\\s*)?(\\d{3})${HTTP_STATUS_DELIMITER_RE.source}(.+)$`,
"i",
);
const HTTP_STATUS_CODE_PREFIX_RE = new RegExp(
`^(?:http\\s*)?(\\d{3})(?:${HTTP_STATUS_DELIMITER_RE.source}([\\s\\S]+))?$`,
"i",
);
const HTML_ERROR_PREFIX_RE = /^\s*(?:<!doctype\s+html\b|<html\b)/i;
const HTML_CLOSE_RE = /<\/html>/i;
const CLOUDFLARE_HTML_ERROR_CODES = new Set([521, 522, 523, 524, 525, 526, 530]);
const STANDALONE_HTML_ERROR_HINT_RE =
/\bcloudflare\b|cdn-cgi\/challenge-platform|challenge-error-text|enable javascript and cookies to continue|access denied|forbidden|service unavailable|bad gateway|web server is down|captcha|attention required/i;
type ErrorPayload = Record<string, unknown>;
export type ApiErrorInfo = {
httpCode?: string;
type?: string;
message?: string;
requestId?: string;
};
function isErrorPayloadObject(payload: unknown): payload is ErrorPayload {
if (!payload || typeof payload !== "object" || Array.isArray(payload)) {
return false;
}
const record = payload as ErrorPayload;
if (record.type === "error") {
return true;
}
if (typeof record.request_id === "string" || typeof record.requestId === "string") {
return true;
}
if ("error" in record) {
const err = record.error;
if (err && typeof err === "object" && !Array.isArray(err)) {
const errRecord = err as ErrorPayload;
if (
typeof errRecord.message === "string" ||
typeof errRecord.type === "string" ||
typeof errRecord.code === "string"
) {
return true;
}
}
}
return false;
}
export function parseApiErrorPayload(raw?: string): ErrorPayload | null {
if (!raw) {
return null;
}
const trimmed = raw.trim();
if (!trimmed) {
return null;
}
const candidates = [trimmed];
if (ERROR_PAYLOAD_PREFIX_RE.test(trimmed)) {
candidates.push(trimmed.replace(ERROR_PAYLOAD_PREFIX_RE, "").trim());
}
for (const candidate of candidates) {
if (!candidate.startsWith("{") || !candidate.endsWith("}")) {
continue;
}
try {
const parsed = JSON.parse(candidate) as unknown;
if (isErrorPayloadObject(parsed)) {
return parsed;
}
} catch {
// ignore parse errors
}
}
return null;
}
export function extractLeadingHttpStatus(raw: string): { code: number; rest: string } | null {
const match = raw.match(HTTP_STATUS_CODE_PREFIX_RE);
if (!match) {
return null;
}
const code = Number(match[1]);
if (!Number.isFinite(code)) {
return null;
}
return { code, rest: (match[2] ?? "").trim() };
}
export function isCloudflareOrHtmlErrorPage(raw: string): boolean {
const trimmed = raw.trim();
if (!trimmed) {
return false;
}
if (
HTML_ERROR_PREFIX_RE.test(trimmed) &&
HTML_CLOSE_RE.test(trimmed) &&
STANDALONE_HTML_ERROR_HINT_RE.test(trimmed)
) {
return true;
}
const status = extractLeadingHttpStatus(trimmed);
if (!status || status.code < 500) {
return false;
}
if (CLOUDFLARE_HTML_ERROR_CODES.has(status.code)) {
return true;
}
return (
status.code < 600 && HTML_ERROR_PREFIX_RE.test(status.rest) && HTML_CLOSE_RE.test(status.rest)
);
}
export function parseApiErrorInfo(raw?: string): ApiErrorInfo | null {
if (!raw) {
return null;
}
const trimmed = raw.trim();
if (!trimmed) {
return null;
}
let httpCode: string | undefined;
let candidate = trimmed;
const httpPrefixMatch = candidate.match(/^(\d{3})\s+(.+)$/s);
if (httpPrefixMatch) {
httpCode = httpPrefixMatch[1];
candidate = httpPrefixMatch[2].trim();
}
const payload = parseApiErrorPayload(candidate);
if (!payload) {
return null;
}
const requestId =
typeof payload.request_id === "string"
? payload.request_id
: typeof payload.requestId === "string"
? payload.requestId
: undefined;
const topType = typeof payload.type === "string" ? payload.type : undefined;
const topMessage = typeof payload.message === "string" ? payload.message : undefined;
let errType: string | undefined;
let errMessage: string | undefined;
if (payload.error && typeof payload.error === "object" && !Array.isArray(payload.error)) {
const err = payload.error as Record<string, unknown>;
if (typeof err.type === "string") {
errType = err.type;
}
if (typeof err.code === "string" && !errType) {
errType = err.code;
}
if (typeof err.message === "string") {
errMessage = err.message;
}
}
return {
httpCode,
type: errType ?? topType,
message: errMessage ?? topMessage,
requestId,
};
}
export function formatRawAssistantErrorForUi(raw?: string): string {
const trimmed = (raw ?? "").trim();
if (!trimmed) {
return "LLM request failed with an unknown error.";
}
const leadingStatus = extractLeadingHttpStatus(trimmed);
if (leadingStatus && isCloudflareOrHtmlErrorPage(trimmed)) {
return `The AI service is temporarily unavailable (HTTP ${leadingStatus.code}). Please try again in a moment.`;
}
if (isCloudflareOrHtmlErrorPage(trimmed)) {
return (
"The provider returned an HTML error page instead of an API response. " +
"This usually means a CDN or gateway (e.g. Cloudflare) blocked the request. " +
"Retry in a moment or check provider status."
);
}
const httpMatch = trimmed.match(HTTP_STATUS_PREFIX_RE);
if (httpMatch) {
const rest = httpMatch[2].trim();
if (!rest.startsWith("{")) {
return `HTTP ${httpMatch[1]}: ${rest}`;
}
}
const info = parseApiErrorInfo(trimmed);
if (info?.message) {
const prefix = info.httpCode ? `HTTP ${info.httpCode}` : "LLM error";
const type = info.type ? ` ${info.type}` : "";
return `${prefix}${type}: ${info.message}`;
}
return trimmed.length > 600 ? `${trimmed.slice(0, 600)}` : trimmed;
}