mirror of
https://github.com/moltbot/moltbot.git
synced 2026-03-08 06:54:24 +00:00
Gateway: normalize HEIC input_image sources (#38122)
* Media: normalize HEIC input images * Gateway: accept HEIC image input schema * Media: add HEIC input normalization tests * Gateway: cover HEIC input schema parity * Docs: document HEIC input image support * Changelog: note HEIC input image fix
This commit is contained in:
@@ -137,6 +137,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Discord/voice messages: request upload slots with JSON fetch calls so voice message uploads no longer fail with content-type errors. Thanks @thewilloftheshadow.
|
||||
- Discord/voice decoder fallback: drop the native Opus dependency and use opusscript for voice decoding to avoid native-opus installs. Thanks @thewilloftheshadow.
|
||||
- Discord/auto presence health signal: add runtime availability-driven presence updates plus connected-state reporting to improve health monitoring and operator visibility. (#33277) Thanks @thewilloftheshadow.
|
||||
- HEIC image inputs: accept HEIC/HEIF `input_image` sources in Gateway HTTP APIs, normalize them to JPEG before provider delivery, and document the expanded default MIME allowlist. Thanks @vincentkoc.
|
||||
- Telegram/draft-stream boundary stability: materialize DM draft previews at assistant-message/tool boundaries, serialize lane-boundary callbacks before final delivery, and scope preview cleanup to the active preview so multi-step Telegram streams no longer lose, overwrite, or leave stale preview bubbles. (#33842) Thanks @ngutman.
|
||||
- Telegram/DM draft finalization reliability: require verified final-text draft emission before treating preview finalization as delivered, and fall back to normal payload send when final draft delivery is not confirmed (preventing missing final responses and preserving media/button delivery). (#32118) Thanks @OpenCils.
|
||||
- Telegram/DM draft final delivery: materialize text-only `sendMessageDraft` previews into one permanent final message and skip duplicate final payload sends, while preserving fallback behavior when materialization fails. (#34318) Thanks @Brotherinlaw-13.
|
||||
|
||||
@@ -242,7 +242,14 @@ Defaults can be tuned under `gateway.http.endpoints.responses`:
|
||||
images: {
|
||||
allowUrl: true,
|
||||
urlAllowlist: ["images.example.com"],
|
||||
allowedMimes: ["image/jpeg", "image/png", "image/gif", "image/webp"],
|
||||
allowedMimes: [
|
||||
"image/jpeg",
|
||||
"image/png",
|
||||
"image/gif",
|
||||
"image/webp",
|
||||
"image/heic",
|
||||
"image/heif",
|
||||
],
|
||||
maxBytes: 10485760,
|
||||
maxRedirects: 3,
|
||||
timeoutMs: 10000,
|
||||
@@ -268,6 +275,7 @@ Defaults when omitted:
|
||||
- `images.maxBytes`: 10MB
|
||||
- `images.maxRedirects`: 3
|
||||
- `images.timeoutMs`: 10s
|
||||
- HEIC/HEIF `input_image` sources are accepted and normalized to JPEG before provider delivery.
|
||||
|
||||
Security note:
|
||||
|
||||
|
||||
@@ -35,7 +35,14 @@ export const InputImageSourceSchema = z.discriminatedUnion("type", [
|
||||
}),
|
||||
z.object({
|
||||
type: z.literal("base64"),
|
||||
media_type: z.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
|
||||
media_type: z.enum([
|
||||
"image/jpeg",
|
||||
"image/png",
|
||||
"image/gif",
|
||||
"image/webp",
|
||||
"image/heic",
|
||||
"image/heif",
|
||||
]),
|
||||
data: z.string().min(1), // base64-encoded
|
||||
}),
|
||||
]);
|
||||
|
||||
@@ -54,6 +54,20 @@ describe("OpenResponses Feature Parity", () => {
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
|
||||
it("should validate input_image with HEIC base64 source", async () => {
|
||||
const validImage = {
|
||||
type: "input_image" as const,
|
||||
source: {
|
||||
type: "base64" as const,
|
||||
media_type: "image/heic" as const,
|
||||
data: "aGVpYy1pbWFnZQ==",
|
||||
},
|
||||
};
|
||||
|
||||
const result = InputImageContentPartSchema.safeParse(validImage);
|
||||
expect(result.success).toBe(true);
|
||||
});
|
||||
|
||||
it("should reject input_image with invalid mime type", async () => {
|
||||
const invalidImage = {
|
||||
type: "input_image" as const,
|
||||
|
||||
@@ -1,11 +1,16 @@
|
||||
import { beforeAll, describe, expect, it, vi } from "vitest";
|
||||
import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const fetchWithSsrFGuardMock = vi.fn();
|
||||
const convertHeicToJpegMock = vi.fn();
|
||||
|
||||
vi.mock("../infra/net/fetch-guard.js", () => ({
|
||||
fetchWithSsrFGuard: (...args: unknown[]) => fetchWithSsrFGuardMock(...args),
|
||||
}));
|
||||
|
||||
vi.mock("./image-ops.js", () => ({
|
||||
convertHeicToJpeg: (...args: unknown[]) => convertHeicToJpegMock(...args),
|
||||
}));
|
||||
|
||||
async function waitForMicrotaskTurn(): Promise<void> {
|
||||
await new Promise<void>((resolve) => queueMicrotask(resolve));
|
||||
}
|
||||
@@ -19,6 +24,75 @@ beforeAll(async () => {
|
||||
await import("./input-files.js"));
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("HEIC input image normalization", () => {
|
||||
it("converts base64 HEIC images to JPEG before returning them", async () => {
|
||||
const normalized = Buffer.from("jpeg-normalized");
|
||||
convertHeicToJpegMock.mockResolvedValueOnce(normalized);
|
||||
|
||||
const image = await extractImageContentFromSource(
|
||||
{
|
||||
type: "base64",
|
||||
data: Buffer.from("heic-source").toString("base64"),
|
||||
mediaType: "image/heic",
|
||||
},
|
||||
{
|
||||
allowUrl: false,
|
||||
allowedMimes: new Set(["image/heic", "image/jpeg"]),
|
||||
maxBytes: 1024 * 1024,
|
||||
maxRedirects: 0,
|
||||
timeoutMs: 1,
|
||||
},
|
||||
);
|
||||
|
||||
expect(convertHeicToJpegMock).toHaveBeenCalledTimes(1);
|
||||
expect(image).toEqual({
|
||||
type: "image",
|
||||
data: normalized.toString("base64"),
|
||||
mimeType: "image/jpeg",
|
||||
});
|
||||
});
|
||||
|
||||
it("converts URL HEIC images to JPEG before returning them", async () => {
|
||||
const release = vi.fn(async () => {});
|
||||
fetchWithSsrFGuardMock.mockResolvedValueOnce({
|
||||
response: new Response(Buffer.from("heic-url-source"), {
|
||||
status: 200,
|
||||
headers: { "content-type": "image/heic" },
|
||||
}),
|
||||
release,
|
||||
finalUrl: "https://example.com/photo.heic",
|
||||
});
|
||||
const normalized = Buffer.from("jpeg-url-normalized");
|
||||
convertHeicToJpegMock.mockResolvedValueOnce(normalized);
|
||||
|
||||
const image = await extractImageContentFromSource(
|
||||
{
|
||||
type: "url",
|
||||
url: "https://example.com/photo.heic",
|
||||
},
|
||||
{
|
||||
allowUrl: true,
|
||||
allowedMimes: new Set(["image/heic", "image/jpeg"]),
|
||||
maxBytes: 1024 * 1024,
|
||||
maxRedirects: 0,
|
||||
timeoutMs: 1000,
|
||||
},
|
||||
);
|
||||
|
||||
expect(convertHeicToJpegMock).toHaveBeenCalledTimes(1);
|
||||
expect(image).toEqual({
|
||||
type: "image",
|
||||
data: normalized.toString("base64"),
|
||||
mimeType: "image/jpeg",
|
||||
});
|
||||
expect(release).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("fetchWithGuard", () => {
|
||||
it("rejects oversized streamed payloads and cancels the stream", async () => {
|
||||
let canceled = false;
|
||||
|
||||
@@ -2,6 +2,8 @@ import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
|
||||
import type { SsrFPolicy } from "../infra/net/ssrf.js";
|
||||
import { logWarn } from "../logger.js";
|
||||
import { canonicalizeBase64, estimateBase64DecodedBytes } from "./base64.js";
|
||||
import { convertHeicToJpeg } from "./image-ops.js";
|
||||
import { detectMime } from "./mime.js";
|
||||
import { extractPdfContent, type PdfExtractedImage } from "./pdf-extract.js";
|
||||
import { readResponseWithLimit } from "./read-response-with-limit.js";
|
||||
|
||||
@@ -85,7 +87,14 @@ export type InputFetchResult = {
|
||||
contentType?: string;
|
||||
};
|
||||
|
||||
export const DEFAULT_INPUT_IMAGE_MIMES = ["image/jpeg", "image/png", "image/gif", "image/webp"];
|
||||
export const DEFAULT_INPUT_IMAGE_MIMES = [
|
||||
"image/jpeg",
|
||||
"image/png",
|
||||
"image/gif",
|
||||
"image/webp",
|
||||
"image/heic",
|
||||
"image/heif",
|
||||
];
|
||||
export const DEFAULT_INPUT_FILE_MIMES = [
|
||||
"text/plain",
|
||||
"text/markdown",
|
||||
@@ -102,6 +111,8 @@ export const DEFAULT_INPUT_TIMEOUT_MS = 10_000;
|
||||
export const DEFAULT_INPUT_PDF_MAX_PAGES = 4;
|
||||
export const DEFAULT_INPUT_PDF_MAX_PIXELS = 4_000_000;
|
||||
export const DEFAULT_INPUT_PDF_MIN_TEXT_CHARS = 200;
|
||||
const NORMALIZED_INPUT_IMAGE_MIME = "image/jpeg";
|
||||
const HEIC_INPUT_IMAGE_MIMES = new Set(["image/heic", "image/heif"]);
|
||||
|
||||
function rejectOversizedBase64Payload(params: {
|
||||
data: string;
|
||||
@@ -218,6 +229,40 @@ function clampText(text: string, maxChars: number): string {
|
||||
return text.slice(0, maxChars);
|
||||
}
|
||||
|
||||
async function normalizeInputImage(params: {
|
||||
buffer: Buffer;
|
||||
mimeType?: string;
|
||||
limits: InputImageLimits;
|
||||
}): Promise<InputImageContent> {
|
||||
const sourceMime =
|
||||
normalizeMimeType(await detectMime({ buffer: params.buffer, headerMime: params.mimeType })) ??
|
||||
normalizeMimeType(params.mimeType) ??
|
||||
"application/octet-stream";
|
||||
if (!params.limits.allowedMimes.has(sourceMime)) {
|
||||
throw new Error(`Unsupported image MIME type: ${sourceMime}`);
|
||||
}
|
||||
|
||||
if (!HEIC_INPUT_IMAGE_MIMES.has(sourceMime)) {
|
||||
return {
|
||||
type: "image",
|
||||
data: params.buffer.toString("base64"),
|
||||
mimeType: sourceMime,
|
||||
};
|
||||
}
|
||||
|
||||
const normalizedBuffer = await convertHeicToJpeg(params.buffer);
|
||||
if (normalizedBuffer.byteLength > params.limits.maxBytes) {
|
||||
throw new Error(
|
||||
`Image too large after HEIC conversion: ${normalizedBuffer.byteLength} bytes (limit: ${params.limits.maxBytes} bytes)`,
|
||||
);
|
||||
}
|
||||
return {
|
||||
type: "image",
|
||||
data: normalizedBuffer.toString("base64"),
|
||||
mimeType: NORMALIZED_INPUT_IMAGE_MIME,
|
||||
};
|
||||
}
|
||||
|
||||
export async function extractImageContentFromSource(
|
||||
source: InputImageSource,
|
||||
limits: InputImageLimits,
|
||||
@@ -228,17 +273,17 @@ export async function extractImageContentFromSource(
|
||||
if (!canonicalData) {
|
||||
throw new Error("input_image base64 source has invalid 'data' field");
|
||||
}
|
||||
const mimeType = normalizeMimeType(source.mediaType) ?? "image/png";
|
||||
if (!limits.allowedMimes.has(mimeType)) {
|
||||
throw new Error(`Unsupported image MIME type: ${mimeType}`);
|
||||
}
|
||||
const buffer = Buffer.from(canonicalData, "base64");
|
||||
if (buffer.byteLength > limits.maxBytes) {
|
||||
throw new Error(
|
||||
`Image too large: ${buffer.byteLength} bytes (limit: ${limits.maxBytes} bytes)`,
|
||||
);
|
||||
}
|
||||
return { type: "image", data: canonicalData, mimeType };
|
||||
return await normalizeInputImage({
|
||||
buffer,
|
||||
mimeType: normalizeMimeType(source.mediaType) ?? "image/png",
|
||||
limits,
|
||||
});
|
||||
}
|
||||
|
||||
if (source.type === "url") {
|
||||
@@ -256,10 +301,11 @@ export async function extractImageContentFromSource(
|
||||
},
|
||||
auditContext: "openresponses.input_image",
|
||||
});
|
||||
if (!limits.allowedMimes.has(result.mimeType)) {
|
||||
throw new Error(`Unsupported image MIME type from URL: ${result.mimeType}`);
|
||||
}
|
||||
return { type: "image", data: result.buffer.toString("base64"), mimeType: result.mimeType };
|
||||
return await normalizeInputImage({
|
||||
buffer: result.buffer,
|
||||
mimeType: result.mimeType,
|
||||
limits,
|
||||
});
|
||||
}
|
||||
|
||||
throw new Error(`Unsupported input_image source type: ${(source as { type: string }).type}`);
|
||||
|
||||
Reference in New Issue
Block a user