From 00a08908892d1743d1fc52e5cbd9499dd5da2fe0 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 14 Feb 2026 15:13:12 +0100 Subject: [PATCH] fix(media): bound input media payload sizes --- CHANGELOG.md | 1 + src/media/input-files.fetch-guard.test.ts | 93 +++++++++++++++++++++++ src/media/input-files.ts | 78 +++++++++++++++++-- 3 files changed, 164 insertions(+), 8 deletions(-) create mode 100644 src/media/input-files.fetch-guard.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f231461890..82209923a72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai ### Fixes - Security/Skills: harden archive extraction for download-installed skills to prevent path traversal outside the target directory. Thanks @markmusson. +- Security/Media: stream and bound URL-backed input media fetches to prevent memory exhaustion from oversized responses. Thanks @vincentkoc. - Security/Signal: harden signal-cli archive extraction during install to prevent path traversal outside the install root. - Security/Hooks: restrict hook transform modules to `~/.openclaw/hooks/transforms` (prevents path traversal/escape module loads via config). Config note: `hooks.transformsDir` must now be within that directory. Thanks @akhmittra. - Security/Hooks: ignore hook package manifest entries that point outside the package directory (prevents out-of-tree handler loads during hook discovery). diff --git a/src/media/input-files.fetch-guard.test.ts b/src/media/input-files.fetch-guard.test.ts new file mode 100644 index 00000000000..e4ab10b7a9b --- /dev/null +++ b/src/media/input-files.fetch-guard.test.ts @@ -0,0 +1,93 @@ +import { describe, expect, it, vi } from "vitest"; + +const fetchWithSsrFGuardMock = vi.fn(); + +vi.mock("../infra/net/fetch-guard.js", () => ({ + fetchWithSsrFGuard: (...args: unknown[]) => fetchWithSsrFGuardMock(...args), +})); + +describe("fetchWithGuard", () => { + it("rejects oversized streamed payloads and cancels the stream", async () => { + let canceled = false; + let pulls = 0; + const stream = new ReadableStream({ + start(controller) { + controller.enqueue(new Uint8Array([1, 2, 3, 4])); + }, + pull(controller) { + pulls += 1; + if (pulls === 1) { + controller.enqueue(new Uint8Array([5, 6, 7, 8])); + } + // keep stream open; cancel() should stop it once maxBytes exceeded + }, + cancel() { + canceled = true; + }, + }); + + const release = vi.fn(async () => {}); + fetchWithSsrFGuardMock.mockResolvedValueOnce({ + response: new Response(stream, { + status: 200, + headers: { "content-type": "application/octet-stream" }, + }), + release, + finalUrl: "https://example.com/file.bin", + }); + + const { fetchWithGuard } = await import("./input-files.js"); + await expect( + fetchWithGuard({ + url: "https://example.com/file.bin", + maxBytes: 6, + timeoutMs: 1000, + maxRedirects: 0, + }), + ).rejects.toThrow("Content too large"); + + // Allow cancel() microtask to run. + await new Promise((resolve) => setTimeout(resolve, 0)); + + expect(canceled).toBe(true); + expect(release).toHaveBeenCalledTimes(1); + }); +}); + +describe("base64 size guards", () => { + it("rejects oversized base64 images before decoding", async () => { + const data = Buffer.alloc(7).toString("base64"); + const { extractImageContentFromSource } = await import("./input-files.js"); + await expect( + extractImageContentFromSource( + { type: "base64", data, mediaType: "image/png" }, + { + allowUrl: false, + allowedMimes: new Set(["image/png"]), + maxBytes: 6, + maxRedirects: 0, + timeoutMs: 1, + }, + ), + ).rejects.toThrow("Image too large"); + }); + + it("rejects oversized base64 files before decoding", async () => { + const data = Buffer.alloc(7).toString("base64"); + const { extractFileContentFromSource } = await import("./input-files.js"); + await expect( + extractFileContentFromSource({ + source: { type: "base64", data, mediaType: "text/plain", filename: "x.txt" }, + limits: { + allowUrl: false, + allowedMimes: new Set(["text/plain"]), + maxBytes: 6, + maxChars: 100, + maxRedirects: 0, + timeoutMs: 1, + pdf: { maxPages: 1, maxPixels: 1, minTextChars: 1 }, + }, + }), + ).rejects.toThrow("File too large"); + }); +}); diff --git a/src/media/input-files.ts b/src/media/input-files.ts index 60df09cf50e..30784a99058 100644 --- a/src/media/input-files.ts +++ b/src/media/input-files.ts @@ -110,6 +110,29 @@ export const DEFAULT_INPUT_PDF_MAX_PAGES = 4; export const DEFAULT_INPUT_PDF_MAX_PIXELS = 4_000_000; export const DEFAULT_INPUT_PDF_MIN_TEXT_CHARS = 200; +function estimateBase64DecodedBytes(base64: string): number { + const cleaned = base64.trim().replace(/\s+/g, ""); + if (!cleaned) { + return 0; + } + const padding = cleaned.endsWith("==") ? 2 : cleaned.endsWith("=") ? 1 : 0; + const estimated = Math.floor((cleaned.length * 3) / 4) - padding; + return Math.max(0, estimated); +} + +function rejectOversizedBase64Payload(params: { + data: string; + maxBytes: number; + label: "Image" | "File"; +}): void { + const estimated = estimateBase64DecodedBytes(params.data); + if (estimated > params.maxBytes) { + throw new Error( + `${params.label} too large: ${estimated} bytes (limit: ${params.maxBytes} bytes)`, + ); + } +} + export function normalizeMimeType(value: string | undefined): string | undefined { if (!value) { return undefined; @@ -163,18 +186,13 @@ export async function fetchWithGuard(params: { const contentLength = response.headers.get("content-length"); if (contentLength) { - const size = parseInt(contentLength, 10); - if (size > params.maxBytes) { + const size = Number(contentLength); + if (Number.isFinite(size) && size > params.maxBytes) { throw new Error(`Content too large: ${size} bytes (limit: ${params.maxBytes} bytes)`); } } - const buffer = Buffer.from(await response.arrayBuffer()); - if (buffer.byteLength > params.maxBytes) { - throw new Error( - `Content too large: ${buffer.byteLength} bytes (limit: ${params.maxBytes} bytes)`, - ); - } + const buffer = await readResponseWithLimit(response, params.maxBytes); const contentType = response.headers.get("content-type") || undefined; const parsed = parseContentType(contentType); @@ -185,6 +203,48 @@ export async function fetchWithGuard(params: { } } +async function readResponseWithLimit(res: Response, maxBytes: number): Promise { + const body = res.body; + if (!body || typeof body.getReader !== "function") { + const fallback = Buffer.from(await res.arrayBuffer()); + if (fallback.byteLength > maxBytes) { + throw new Error(`Content too large: ${fallback.byteLength} bytes (limit: ${maxBytes} bytes)`); + } + return fallback; + } + + const reader = body.getReader(); + const chunks: Uint8Array[] = []; + let total = 0; + try { + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + if (value?.length) { + total += value.length; + if (total > maxBytes) { + try { + await reader.cancel(); + } catch {} + throw new Error(`Content too large: ${total} bytes (limit: ${maxBytes} bytes)`); + } + chunks.push(value); + } + } + } finally { + try { + reader.releaseLock(); + } catch {} + } + + return Buffer.concat( + chunks.map((chunk) => Buffer.from(chunk)), + total, + ); +} + function decodeTextContent(buffer: Buffer, charset: string | undefined): string { const encoding = charset?.trim().toLowerCase() || "utf-8"; try { @@ -268,6 +328,7 @@ export async function extractImageContentFromSource( if (!source.data) { throw new Error("input_image base64 source missing 'data' field"); } + rejectOversizedBase64Payload({ data: source.data, maxBytes: limits.maxBytes, label: "Image" }); const mimeType = normalizeMimeType(source.mediaType) ?? "image/png"; if (!limits.allowedMimes.has(mimeType)) { throw new Error(`Unsupported image MIME type: ${mimeType}`); @@ -320,6 +381,7 @@ export async function extractFileContentFromSource(params: { if (!source.data) { throw new Error("input_file base64 source missing 'data' field"); } + rejectOversizedBase64Payload({ data: source.data, maxBytes: limits.maxBytes, label: "File" }); const parsed = parseContentType(source.mediaType); mimeType = parsed.mimeType; charset = parsed.charset;