mirror of
https://github.com/moltbot/moltbot.git
synced 2026-03-07 22:44:16 +00:00
fix: flatten remote markdown images
This commit is contained in:
@@ -13,6 +13,8 @@ enum ChatMarkdownPreprocessor {
|
||||
"Chat history since last reply (untrusted, for context):",
|
||||
]
|
||||
|
||||
private static let markdownImagePattern = #"!\[([^\]]*)\]\(([^)]+)\)"#
|
||||
|
||||
struct InlineImage: Identifiable {
|
||||
let id = UUID()
|
||||
let label: String
|
||||
@@ -27,8 +29,7 @@ enum ChatMarkdownPreprocessor {
|
||||
static func preprocess(markdown raw: String) -> Result {
|
||||
let withoutContextBlocks = self.stripInboundContextBlocks(raw)
|
||||
let withoutTimestamps = self.stripPrefixedTimestamps(withoutContextBlocks)
|
||||
let pattern = #"!\[([^\]]*)\]\((data:image\/[^;]+;base64,[^)]+)\)"#
|
||||
guard let re = try? NSRegularExpression(pattern: pattern) else {
|
||||
guard let re = try? NSRegularExpression(pattern: self.markdownImagePattern) else {
|
||||
return Result(cleaned: self.normalize(withoutTimestamps), images: [])
|
||||
}
|
||||
|
||||
@@ -44,24 +45,41 @@ enum ChatMarkdownPreprocessor {
|
||||
for match in matches.reversed() {
|
||||
guard match.numberOfRanges >= 3 else { continue }
|
||||
let label = ns.substring(with: match.range(at: 1))
|
||||
let dataURL = ns.substring(with: match.range(at: 2))
|
||||
|
||||
let image: OpenClawPlatformImage? = {
|
||||
guard let comma = dataURL.firstIndex(of: ",") else { return nil }
|
||||
let b64 = String(dataURL[dataURL.index(after: comma)...])
|
||||
guard let data = Data(base64Encoded: b64) else { return nil }
|
||||
return OpenClawPlatformImage(data: data)
|
||||
}()
|
||||
images.append(InlineImage(label: label, image: image))
|
||||
let source = ns.substring(with: match.range(at: 2))
|
||||
|
||||
let start = cleaned.index(cleaned.startIndex, offsetBy: match.range.location)
|
||||
let end = cleaned.index(start, offsetBy: match.range.length)
|
||||
cleaned.replaceSubrange(start..<end, with: "")
|
||||
if let inlineImage = self.inlineImage(label: label, source: source) {
|
||||
images.append(inlineImage)
|
||||
cleaned.replaceSubrange(start..<end, with: "")
|
||||
} else {
|
||||
cleaned.replaceSubrange(start..<end, with: self.fallbackImageLabel(label))
|
||||
}
|
||||
}
|
||||
|
||||
return Result(cleaned: self.normalize(cleaned), images: images.reversed())
|
||||
}
|
||||
|
||||
private static func inlineImage(label: String, source: String) -> InlineImage? {
|
||||
let trimmed = source.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
guard let comma = trimmed.firstIndex(of: ","),
|
||||
trimmed[..<comma].range(
|
||||
of: #"^data:image\/[^;]+;base64$"#,
|
||||
options: [.regularExpression, .caseInsensitive]) != nil
|
||||
else {
|
||||
return nil
|
||||
}
|
||||
|
||||
let b64 = String(trimmed[trimmed.index(after: comma)...])
|
||||
let image = Data(base64Encoded: b64).flatMap(OpenClawPlatformImage.init(data:))
|
||||
return InlineImage(label: label, image: image)
|
||||
}
|
||||
|
||||
private static func fallbackImageLabel(_ label: String) -> String {
|
||||
let trimmed = label.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
return trimmed.isEmpty ? "image" : trimmed
|
||||
}
|
||||
|
||||
private static func stripInboundContextBlocks(_ raw: String) -> String {
|
||||
guard self.inboundContextHeaders.contains(where: raw.contains) else {
|
||||
return raw
|
||||
|
||||
@@ -18,6 +18,30 @@ struct ChatMarkdownPreprocessorTests {
|
||||
#expect(result.images.first?.image != nil)
|
||||
}
|
||||
|
||||
@Test func flattensRemoteMarkdownImagesIntoText() {
|
||||
let base64 = "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVQIHWP4////GQAJ+wP/2hN8NwAAAABJRU5ErkJggg=="
|
||||
let markdown = """
|
||||

|
||||
|
||||
)
|
||||
"""
|
||||
|
||||
let result = ChatMarkdownPreprocessor.preprocess(markdown: markdown)
|
||||
|
||||
#expect(result.cleaned == "Leak")
|
||||
#expect(result.images.count == 1)
|
||||
#expect(result.images.first?.image != nil)
|
||||
}
|
||||
|
||||
@Test func usesFallbackTextForUnlabeledRemoteMarkdownImages() {
|
||||
let markdown = ""
|
||||
|
||||
let result = ChatMarkdownPreprocessor.preprocess(markdown: markdown)
|
||||
|
||||
#expect(result.cleaned == "image")
|
||||
#expect(result.images.isEmpty)
|
||||
}
|
||||
|
||||
@Test func stripsInboundUntrustedContextBlocks() {
|
||||
let markdown = """
|
||||
Conversation info (untrusted metadata):
|
||||
|
||||
@@ -1712,6 +1712,22 @@
|
||||
return text.replace(/<(?=[a-zA-Z/])/g, "<");
|
||||
}
|
||||
|
||||
const INLINE_DATA_IMAGE_RE = /^data:image\/[a-z0-9.+-]+;base64,/i;
|
||||
|
||||
function normalizeMarkdownImageLabel(text) {
|
||||
const trimmed = typeof text === "string" ? text.trim() : "";
|
||||
return trimmed || "image";
|
||||
}
|
||||
|
||||
function renderMarkdownImage(token) {
|
||||
const label = normalizeMarkdownImageLabel(token?.text);
|
||||
const href = typeof token?.href === "string" ? token.href.trim() : "";
|
||||
if (!INLINE_DATA_IMAGE_RE.test(href)) {
|
||||
return escapeHtml(label);
|
||||
}
|
||||
return `<img src="${escapeHtml(href)}" alt="${escapeHtml(label)}">`;
|
||||
}
|
||||
|
||||
// Configure marked with syntax highlighting and HTML escaping for text
|
||||
marked.use({
|
||||
breaks: true,
|
||||
@@ -1750,6 +1766,9 @@
|
||||
html(token) {
|
||||
return escapeHtml(token.text);
|
||||
},
|
||||
image(token) {
|
||||
return renderMarkdownImage(token);
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
@@ -250,4 +250,38 @@ describe("export html security hardening", () => {
|
||||
expect(img?.getAttribute("onerror")).toBeNull();
|
||||
expect(img?.getAttribute("src")).toBe("data:application/octet-stream;base64,AAAA");
|
||||
});
|
||||
|
||||
it("flattens remote markdown images but keeps data-image markdown", () => {
|
||||
const dataImage = "data:image/png;base64,AAAA";
|
||||
const session: SessionData = {
|
||||
header: { id: "session-4", timestamp: now() },
|
||||
entries: [
|
||||
{
|
||||
id: "1",
|
||||
parentId: null,
|
||||
timestamp: now(),
|
||||
type: "message",
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Leak:\n\n\n\n`,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
leafId: "1",
|
||||
systemPrompt: "",
|
||||
tools: [],
|
||||
};
|
||||
|
||||
const { document } = renderTemplate(session);
|
||||
const messages = document.getElementById("messages");
|
||||
expect(messages).toBeTruthy();
|
||||
expect(messages?.querySelector('img[src^="https://"]')).toBeNull();
|
||||
expect(messages?.textContent).toContain("exfil");
|
||||
expect(messages?.querySelector(`img[src="${dataImage}"]`)).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -30,11 +30,10 @@ describe("toSanitizedMarkdownHtml", () => {
|
||||
expect(html).toContain("console.log(1)");
|
||||
});
|
||||
|
||||
it("preserves img tags with src and alt from markdown images (#15437)", () => {
|
||||
it("flattens remote markdown images into alt text", () => {
|
||||
const html = toSanitizedMarkdownHtml("");
|
||||
expect(html).toContain("<img");
|
||||
expect(html).toContain('src="https://example.com/image.png"');
|
||||
expect(html).toContain('alt="Alt text"');
|
||||
expect(html).not.toContain("<img");
|
||||
expect(html).toContain("Alt text");
|
||||
});
|
||||
|
||||
it("preserves base64 data URI images (#15437)", () => {
|
||||
@@ -43,11 +42,17 @@ describe("toSanitizedMarkdownHtml", () => {
|
||||
expect(html).toContain("data:image/png;base64,");
|
||||
});
|
||||
|
||||
it("strips javascript image urls", () => {
|
||||
it("flattens non-data markdown image urls", () => {
|
||||
const html = toSanitizedMarkdownHtml(")");
|
||||
expect(html).toContain("<img");
|
||||
expect(html).not.toContain("<img");
|
||||
expect(html).not.toContain("javascript:");
|
||||
expect(html).not.toContain("src=");
|
||||
expect(html).toContain("X");
|
||||
});
|
||||
|
||||
it("uses a plain fallback label for unlabeled markdown images", () => {
|
||||
const html = toSanitizedMarkdownHtml("");
|
||||
expect(html).not.toContain("<img");
|
||||
expect(html).toContain("image");
|
||||
});
|
||||
|
||||
it("renders GFM markdown tables (#20410)", () => {
|
||||
|
||||
@@ -43,6 +43,7 @@ const MARKDOWN_CHAR_LIMIT = 140_000;
|
||||
const MARKDOWN_PARSE_LIMIT = 40_000;
|
||||
const MARKDOWN_CACHE_LIMIT = 200;
|
||||
const MARKDOWN_CACHE_MAX_CHARS = 50_000;
|
||||
const INLINE_DATA_IMAGE_RE = /^data:image\/[a-z0-9.+-]+;base64,/i;
|
||||
const markdownCache = new Map<string, string>();
|
||||
|
||||
function getCachedMarkdown(key: string): string | null {
|
||||
@@ -137,6 +138,19 @@ export function toSanitizedMarkdownHtml(markdown: string): string {
|
||||
// pages) as formatted output is confusing UX (#13937).
|
||||
const htmlEscapeRenderer = new marked.Renderer();
|
||||
htmlEscapeRenderer.html = ({ text }: { text: string }) => escapeHtml(text);
|
||||
htmlEscapeRenderer.image = (token: { href?: string | null; text?: string | null }) => {
|
||||
const label = normalizeMarkdownImageLabel(token.text);
|
||||
const href = token.href?.trim() ?? "";
|
||||
if (!INLINE_DATA_IMAGE_RE.test(href)) {
|
||||
return escapeHtml(label);
|
||||
}
|
||||
return `<img src="${escapeHtml(href)}" alt="${escapeHtml(label)}">`;
|
||||
};
|
||||
|
||||
function normalizeMarkdownImageLabel(text?: string | null): string {
|
||||
const trimmed = text?.trim();
|
||||
return trimmed ? trimmed : "image";
|
||||
}
|
||||
|
||||
function escapeHtml(value: string): string {
|
||||
return value
|
||||
|
||||
Reference in New Issue
Block a user