Gateway: normalize HEIC input_image sources (#38122)

* Media: normalize HEIC input images

* Gateway: accept HEIC image input schema

* Media: add HEIC input normalization tests

* Gateway: cover HEIC input schema parity

* Docs: document HEIC input image support

* Changelog: note HEIC input image fix
This commit is contained in:
Vincent Koc
2026-03-06 11:19:36 -05:00
committed by GitHub
parent 81f22ae109
commit 9aceb51379
6 changed files with 163 additions and 13 deletions

View File

@@ -137,6 +137,7 @@ Docs: https://docs.openclaw.ai
- Discord/voice messages: request upload slots with JSON fetch calls so voice message uploads no longer fail with content-type errors. Thanks @thewilloftheshadow.
- Discord/voice decoder fallback: drop the native Opus dependency and use opusscript for voice decoding to avoid native-opus installs. Thanks @thewilloftheshadow.
- Discord/auto presence health signal: add runtime availability-driven presence updates plus connected-state reporting to improve health monitoring and operator visibility. (#33277) Thanks @thewilloftheshadow.
- HEIC image inputs: accept HEIC/HEIF `input_image` sources in Gateway HTTP APIs, normalize them to JPEG before provider delivery, and document the expanded default MIME allowlist. Thanks @vincentkoc.
- Telegram/draft-stream boundary stability: materialize DM draft previews at assistant-message/tool boundaries, serialize lane-boundary callbacks before final delivery, and scope preview cleanup to the active preview so multi-step Telegram streams no longer lose, overwrite, or leave stale preview bubbles. (#33842) Thanks @ngutman.
- Telegram/DM draft finalization reliability: require verified final-text draft emission before treating preview finalization as delivered, and fall back to normal payload send when final draft delivery is not confirmed (preventing missing final responses and preserving media/button delivery). (#32118) Thanks @OpenCils.
- Telegram/DM draft final delivery: materialize text-only `sendMessageDraft` previews into one permanent final message and skip duplicate final payload sends, while preserving fallback behavior when materialization fails. (#34318) Thanks @Brotherinlaw-13.

View File

@@ -242,7 +242,14 @@ Defaults can be tuned under `gateway.http.endpoints.responses`:
images: {
allowUrl: true,
urlAllowlist: ["images.example.com"],
allowedMimes: ["image/jpeg", "image/png", "image/gif", "image/webp"],
allowedMimes: [
"image/jpeg",
"image/png",
"image/gif",
"image/webp",
"image/heic",
"image/heif",
],
maxBytes: 10485760,
maxRedirects: 3,
timeoutMs: 10000,
@@ -268,6 +275,7 @@ Defaults when omitted:
- `images.maxBytes`: 10MB
- `images.maxRedirects`: 3
- `images.timeoutMs`: 10s
- HEIC/HEIF `input_image` sources are accepted and normalized to JPEG before provider delivery.
Security note:

View File

@@ -35,7 +35,14 @@ export const InputImageSourceSchema = z.discriminatedUnion("type", [
}),
z.object({
type: z.literal("base64"),
media_type: z.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
media_type: z.enum([
"image/jpeg",
"image/png",
"image/gif",
"image/webp",
"image/heic",
"image/heif",
]),
data: z.string().min(1), // base64-encoded
}),
]);

View File

@@ -54,6 +54,20 @@ describe("OpenResponses Feature Parity", () => {
expect(result.success).toBe(true);
});
it("should validate input_image with HEIC base64 source", async () => {
const validImage = {
type: "input_image" as const,
source: {
type: "base64" as const,
media_type: "image/heic" as const,
data: "aGVpYy1pbWFnZQ==",
},
};
const result = InputImageContentPartSchema.safeParse(validImage);
expect(result.success).toBe(true);
});
it("should reject input_image with invalid mime type", async () => {
const invalidImage = {
type: "input_image" as const,

View File

@@ -1,11 +1,16 @@
import { beforeAll, describe, expect, it, vi } from "vitest";
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
const fetchWithSsrFGuardMock = vi.fn();
const convertHeicToJpegMock = vi.fn();
vi.mock("../infra/net/fetch-guard.js", () => ({
fetchWithSsrFGuard: (...args: unknown[]) => fetchWithSsrFGuardMock(...args),
}));
vi.mock("./image-ops.js", () => ({
convertHeicToJpeg: (...args: unknown[]) => convertHeicToJpegMock(...args),
}));
async function waitForMicrotaskTurn(): Promise<void> {
await new Promise<void>((resolve) => queueMicrotask(resolve));
}
@@ -19,6 +24,75 @@ beforeAll(async () => {
await import("./input-files.js"));
});
beforeEach(() => {
vi.clearAllMocks();
});
describe("HEIC input image normalization", () => {
it("converts base64 HEIC images to JPEG before returning them", async () => {
const normalized = Buffer.from("jpeg-normalized");
convertHeicToJpegMock.mockResolvedValueOnce(normalized);
const image = await extractImageContentFromSource(
{
type: "base64",
data: Buffer.from("heic-source").toString("base64"),
mediaType: "image/heic",
},
{
allowUrl: false,
allowedMimes: new Set(["image/heic", "image/jpeg"]),
maxBytes: 1024 * 1024,
maxRedirects: 0,
timeoutMs: 1,
},
);
expect(convertHeicToJpegMock).toHaveBeenCalledTimes(1);
expect(image).toEqual({
type: "image",
data: normalized.toString("base64"),
mimeType: "image/jpeg",
});
});
it("converts URL HEIC images to JPEG before returning them", async () => {
const release = vi.fn(async () => {});
fetchWithSsrFGuardMock.mockResolvedValueOnce({
response: new Response(Buffer.from("heic-url-source"), {
status: 200,
headers: { "content-type": "image/heic" },
}),
release,
finalUrl: "https://example.com/photo.heic",
});
const normalized = Buffer.from("jpeg-url-normalized");
convertHeicToJpegMock.mockResolvedValueOnce(normalized);
const image = await extractImageContentFromSource(
{
type: "url",
url: "https://example.com/photo.heic",
},
{
allowUrl: true,
allowedMimes: new Set(["image/heic", "image/jpeg"]),
maxBytes: 1024 * 1024,
maxRedirects: 0,
timeoutMs: 1000,
},
);
expect(convertHeicToJpegMock).toHaveBeenCalledTimes(1);
expect(image).toEqual({
type: "image",
data: normalized.toString("base64"),
mimeType: "image/jpeg",
});
expect(release).toHaveBeenCalledTimes(1);
});
});
describe("fetchWithGuard", () => {
it("rejects oversized streamed payloads and cancels the stream", async () => {
let canceled = false;

View File

@@ -2,6 +2,8 @@ import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
import type { SsrFPolicy } from "../infra/net/ssrf.js";
import { logWarn } from "../logger.js";
import { canonicalizeBase64, estimateBase64DecodedBytes } from "./base64.js";
import { convertHeicToJpeg } from "./image-ops.js";
import { detectMime } from "./mime.js";
import { extractPdfContent, type PdfExtractedImage } from "./pdf-extract.js";
import { readResponseWithLimit } from "./read-response-with-limit.js";
@@ -85,7 +87,14 @@ export type InputFetchResult = {
contentType?: string;
};
export const DEFAULT_INPUT_IMAGE_MIMES = ["image/jpeg", "image/png", "image/gif", "image/webp"];
export const DEFAULT_INPUT_IMAGE_MIMES = [
"image/jpeg",
"image/png",
"image/gif",
"image/webp",
"image/heic",
"image/heif",
];
export const DEFAULT_INPUT_FILE_MIMES = [
"text/plain",
"text/markdown",
@@ -102,6 +111,8 @@ export const DEFAULT_INPUT_TIMEOUT_MS = 10_000;
export const DEFAULT_INPUT_PDF_MAX_PAGES = 4;
export const DEFAULT_INPUT_PDF_MAX_PIXELS = 4_000_000;
export const DEFAULT_INPUT_PDF_MIN_TEXT_CHARS = 200;
const NORMALIZED_INPUT_IMAGE_MIME = "image/jpeg";
const HEIC_INPUT_IMAGE_MIMES = new Set(["image/heic", "image/heif"]);
function rejectOversizedBase64Payload(params: {
data: string;
@@ -218,6 +229,40 @@ function clampText(text: string, maxChars: number): string {
return text.slice(0, maxChars);
}
async function normalizeInputImage(params: {
buffer: Buffer;
mimeType?: string;
limits: InputImageLimits;
}): Promise<InputImageContent> {
const sourceMime =
normalizeMimeType(await detectMime({ buffer: params.buffer, headerMime: params.mimeType })) ??
normalizeMimeType(params.mimeType) ??
"application/octet-stream";
if (!params.limits.allowedMimes.has(sourceMime)) {
throw new Error(`Unsupported image MIME type: ${sourceMime}`);
}
if (!HEIC_INPUT_IMAGE_MIMES.has(sourceMime)) {
return {
type: "image",
data: params.buffer.toString("base64"),
mimeType: sourceMime,
};
}
const normalizedBuffer = await convertHeicToJpeg(params.buffer);
if (normalizedBuffer.byteLength > params.limits.maxBytes) {
throw new Error(
`Image too large after HEIC conversion: ${normalizedBuffer.byteLength} bytes (limit: ${params.limits.maxBytes} bytes)`,
);
}
return {
type: "image",
data: normalizedBuffer.toString("base64"),
mimeType: NORMALIZED_INPUT_IMAGE_MIME,
};
}
export async function extractImageContentFromSource(
source: InputImageSource,
limits: InputImageLimits,
@@ -228,17 +273,17 @@ export async function extractImageContentFromSource(
if (!canonicalData) {
throw new Error("input_image base64 source has invalid 'data' field");
}
const mimeType = normalizeMimeType(source.mediaType) ?? "image/png";
if (!limits.allowedMimes.has(mimeType)) {
throw new Error(`Unsupported image MIME type: ${mimeType}`);
}
const buffer = Buffer.from(canonicalData, "base64");
if (buffer.byteLength > limits.maxBytes) {
throw new Error(
`Image too large: ${buffer.byteLength} bytes (limit: ${limits.maxBytes} bytes)`,
);
}
return { type: "image", data: canonicalData, mimeType };
return await normalizeInputImage({
buffer,
mimeType: normalizeMimeType(source.mediaType) ?? "image/png",
limits,
});
}
if (source.type === "url") {
@@ -256,10 +301,11 @@ export async function extractImageContentFromSource(
},
auditContext: "openresponses.input_image",
});
if (!limits.allowedMimes.has(result.mimeType)) {
throw new Error(`Unsupported image MIME type from URL: ${result.mimeType}`);
}
return { type: "image", data: result.buffer.toString("base64"), mimeType: result.mimeType };
return await normalizeInputImage({
buffer: result.buffer,
mimeType: result.mimeType,
limits,
});
}
throw new Error(`Unsupported input_image source type: ${(source as { type: string }).type}`);