diff --git a/CHANGELOG.md b/CHANGELOG.md index 82d501fa9ec..370ed4eaaf6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -137,6 +137,7 @@ Docs: https://docs.openclaw.ai - Discord/voice messages: request upload slots with JSON fetch calls so voice message uploads no longer fail with content-type errors. Thanks @thewilloftheshadow. - Discord/voice decoder fallback: drop the native Opus dependency and use opusscript for voice decoding to avoid native-opus installs. Thanks @thewilloftheshadow. - Discord/auto presence health signal: add runtime availability-driven presence updates plus connected-state reporting to improve health monitoring and operator visibility. (#33277) Thanks @thewilloftheshadow. +- HEIC image inputs: accept HEIC/HEIF `input_image` sources in Gateway HTTP APIs, normalize them to JPEG before provider delivery, and document the expanded default MIME allowlist. Thanks @vincentkoc. - Telegram/draft-stream boundary stability: materialize DM draft previews at assistant-message/tool boundaries, serialize lane-boundary callbacks before final delivery, and scope preview cleanup to the active preview so multi-step Telegram streams no longer lose, overwrite, or leave stale preview bubbles. (#33842) Thanks @ngutman. - Telegram/DM draft finalization reliability: require verified final-text draft emission before treating preview finalization as delivered, and fall back to normal payload send when final draft delivery is not confirmed (preventing missing final responses and preserving media/button delivery). (#32118) Thanks @OpenCils. - Telegram/DM draft final delivery: materialize text-only `sendMessageDraft` previews into one permanent final message and skip duplicate final payload sends, while preserving fallback behavior when materialization fails. (#34318) Thanks @Brotherinlaw-13. diff --git a/docs/gateway/openresponses-http-api.md b/docs/gateway/openresponses-http-api.md index d62cc8edb59..b5b4045ac62 100644 --- a/docs/gateway/openresponses-http-api.md +++ b/docs/gateway/openresponses-http-api.md @@ -242,7 +242,14 @@ Defaults can be tuned under `gateway.http.endpoints.responses`: images: { allowUrl: true, urlAllowlist: ["images.example.com"], - allowedMimes: ["image/jpeg", "image/png", "image/gif", "image/webp"], + allowedMimes: [ + "image/jpeg", + "image/png", + "image/gif", + "image/webp", + "image/heic", + "image/heif", + ], maxBytes: 10485760, maxRedirects: 3, timeoutMs: 10000, @@ -268,6 +275,7 @@ Defaults when omitted: - `images.maxBytes`: 10MB - `images.maxRedirects`: 3 - `images.timeoutMs`: 10s +- HEIC/HEIF `input_image` sources are accepted and normalized to JPEG before provider delivery. Security note: diff --git a/src/gateway/open-responses.schema.ts b/src/gateway/open-responses.schema.ts index e07288610fb..ca23f8de235 100644 --- a/src/gateway/open-responses.schema.ts +++ b/src/gateway/open-responses.schema.ts @@ -35,7 +35,14 @@ export const InputImageSourceSchema = z.discriminatedUnion("type", [ }), z.object({ type: z.literal("base64"), - media_type: z.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]), + media_type: z.enum([ + "image/jpeg", + "image/png", + "image/gif", + "image/webp", + "image/heic", + "image/heif", + ]), data: z.string().min(1), // base64-encoded }), ]); diff --git a/src/gateway/openresponses-parity.test.ts b/src/gateway/openresponses-parity.test.ts index 3e4b2dc535b..c69a4206754 100644 --- a/src/gateway/openresponses-parity.test.ts +++ b/src/gateway/openresponses-parity.test.ts @@ -54,6 +54,20 @@ describe("OpenResponses Feature Parity", () => { expect(result.success).toBe(true); }); + it("should validate input_image with HEIC base64 source", async () => { + const validImage = { + type: "input_image" as const, + source: { + type: "base64" as const, + media_type: "image/heic" as const, + data: "aGVpYy1pbWFnZQ==", + }, + }; + + const result = InputImageContentPartSchema.safeParse(validImage); + expect(result.success).toBe(true); + }); + it("should reject input_image with invalid mime type", async () => { const invalidImage = { type: "input_image" as const, diff --git a/src/media/input-files.fetch-guard.test.ts b/src/media/input-files.fetch-guard.test.ts index 64f8377bcfd..6d83738c73a 100644 --- a/src/media/input-files.fetch-guard.test.ts +++ b/src/media/input-files.fetch-guard.test.ts @@ -1,11 +1,16 @@ -import { beforeAll, describe, expect, it, vi } from "vitest"; +import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest"; const fetchWithSsrFGuardMock = vi.fn(); +const convertHeicToJpegMock = vi.fn(); vi.mock("../infra/net/fetch-guard.js", () => ({ fetchWithSsrFGuard: (...args: unknown[]) => fetchWithSsrFGuardMock(...args), })); +vi.mock("./image-ops.js", () => ({ + convertHeicToJpeg: (...args: unknown[]) => convertHeicToJpegMock(...args), +})); + async function waitForMicrotaskTurn(): Promise { await new Promise((resolve) => queueMicrotask(resolve)); } @@ -19,6 +24,75 @@ beforeAll(async () => { await import("./input-files.js")); }); +beforeEach(() => { + vi.clearAllMocks(); +}); + +describe("HEIC input image normalization", () => { + it("converts base64 HEIC images to JPEG before returning them", async () => { + const normalized = Buffer.from("jpeg-normalized"); + convertHeicToJpegMock.mockResolvedValueOnce(normalized); + + const image = await extractImageContentFromSource( + { + type: "base64", + data: Buffer.from("heic-source").toString("base64"), + mediaType: "image/heic", + }, + { + allowUrl: false, + allowedMimes: new Set(["image/heic", "image/jpeg"]), + maxBytes: 1024 * 1024, + maxRedirects: 0, + timeoutMs: 1, + }, + ); + + expect(convertHeicToJpegMock).toHaveBeenCalledTimes(1); + expect(image).toEqual({ + type: "image", + data: normalized.toString("base64"), + mimeType: "image/jpeg", + }); + }); + + it("converts URL HEIC images to JPEG before returning them", async () => { + const release = vi.fn(async () => {}); + fetchWithSsrFGuardMock.mockResolvedValueOnce({ + response: new Response(Buffer.from("heic-url-source"), { + status: 200, + headers: { "content-type": "image/heic" }, + }), + release, + finalUrl: "https://example.com/photo.heic", + }); + const normalized = Buffer.from("jpeg-url-normalized"); + convertHeicToJpegMock.mockResolvedValueOnce(normalized); + + const image = await extractImageContentFromSource( + { + type: "url", + url: "https://example.com/photo.heic", + }, + { + allowUrl: true, + allowedMimes: new Set(["image/heic", "image/jpeg"]), + maxBytes: 1024 * 1024, + maxRedirects: 0, + timeoutMs: 1000, + }, + ); + + expect(convertHeicToJpegMock).toHaveBeenCalledTimes(1); + expect(image).toEqual({ + type: "image", + data: normalized.toString("base64"), + mimeType: "image/jpeg", + }); + expect(release).toHaveBeenCalledTimes(1); + }); +}); + describe("fetchWithGuard", () => { it("rejects oversized streamed payloads and cancels the stream", async () => { let canceled = false; diff --git a/src/media/input-files.ts b/src/media/input-files.ts index 11e7a917857..dcf3cd5872d 100644 --- a/src/media/input-files.ts +++ b/src/media/input-files.ts @@ -2,6 +2,8 @@ import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js"; import type { SsrFPolicy } from "../infra/net/ssrf.js"; import { logWarn } from "../logger.js"; import { canonicalizeBase64, estimateBase64DecodedBytes } from "./base64.js"; +import { convertHeicToJpeg } from "./image-ops.js"; +import { detectMime } from "./mime.js"; import { extractPdfContent, type PdfExtractedImage } from "./pdf-extract.js"; import { readResponseWithLimit } from "./read-response-with-limit.js"; @@ -85,7 +87,14 @@ export type InputFetchResult = { contentType?: string; }; -export const DEFAULT_INPUT_IMAGE_MIMES = ["image/jpeg", "image/png", "image/gif", "image/webp"]; +export const DEFAULT_INPUT_IMAGE_MIMES = [ + "image/jpeg", + "image/png", + "image/gif", + "image/webp", + "image/heic", + "image/heif", +]; export const DEFAULT_INPUT_FILE_MIMES = [ "text/plain", "text/markdown", @@ -102,6 +111,8 @@ export const DEFAULT_INPUT_TIMEOUT_MS = 10_000; export const DEFAULT_INPUT_PDF_MAX_PAGES = 4; export const DEFAULT_INPUT_PDF_MAX_PIXELS = 4_000_000; export const DEFAULT_INPUT_PDF_MIN_TEXT_CHARS = 200; +const NORMALIZED_INPUT_IMAGE_MIME = "image/jpeg"; +const HEIC_INPUT_IMAGE_MIMES = new Set(["image/heic", "image/heif"]); function rejectOversizedBase64Payload(params: { data: string; @@ -218,6 +229,40 @@ function clampText(text: string, maxChars: number): string { return text.slice(0, maxChars); } +async function normalizeInputImage(params: { + buffer: Buffer; + mimeType?: string; + limits: InputImageLimits; +}): Promise { + const sourceMime = + normalizeMimeType(await detectMime({ buffer: params.buffer, headerMime: params.mimeType })) ?? + normalizeMimeType(params.mimeType) ?? + "application/octet-stream"; + if (!params.limits.allowedMimes.has(sourceMime)) { + throw new Error(`Unsupported image MIME type: ${sourceMime}`); + } + + if (!HEIC_INPUT_IMAGE_MIMES.has(sourceMime)) { + return { + type: "image", + data: params.buffer.toString("base64"), + mimeType: sourceMime, + }; + } + + const normalizedBuffer = await convertHeicToJpeg(params.buffer); + if (normalizedBuffer.byteLength > params.limits.maxBytes) { + throw new Error( + `Image too large after HEIC conversion: ${normalizedBuffer.byteLength} bytes (limit: ${params.limits.maxBytes} bytes)`, + ); + } + return { + type: "image", + data: normalizedBuffer.toString("base64"), + mimeType: NORMALIZED_INPUT_IMAGE_MIME, + }; +} + export async function extractImageContentFromSource( source: InputImageSource, limits: InputImageLimits, @@ -228,17 +273,17 @@ export async function extractImageContentFromSource( if (!canonicalData) { throw new Error("input_image base64 source has invalid 'data' field"); } - const mimeType = normalizeMimeType(source.mediaType) ?? "image/png"; - if (!limits.allowedMimes.has(mimeType)) { - throw new Error(`Unsupported image MIME type: ${mimeType}`); - } const buffer = Buffer.from(canonicalData, "base64"); if (buffer.byteLength > limits.maxBytes) { throw new Error( `Image too large: ${buffer.byteLength} bytes (limit: ${limits.maxBytes} bytes)`, ); } - return { type: "image", data: canonicalData, mimeType }; + return await normalizeInputImage({ + buffer, + mimeType: normalizeMimeType(source.mediaType) ?? "image/png", + limits, + }); } if (source.type === "url") { @@ -256,10 +301,11 @@ export async function extractImageContentFromSource( }, auditContext: "openresponses.input_image", }); - if (!limits.allowedMimes.has(result.mimeType)) { - throw new Error(`Unsupported image MIME type from URL: ${result.mimeType}`); - } - return { type: "image", data: result.buffer.toString("base64"), mimeType: result.mimeType }; + return await normalizeInputImage({ + buffer: result.buffer, + mimeType: result.mimeType, + limits, + }); } throw new Error(`Unsupported input_image source type: ${(source as { type: string }).type}`);