diff --git a/src/markdown/ir.blockquote-spacing.test.ts b/src/markdown/ir.blockquote-spacing.test.ts new file mode 100644 index 00000000000..635b1f8e807 --- /dev/null +++ b/src/markdown/ir.blockquote-spacing.test.ts @@ -0,0 +1,202 @@ +/** + * Blockquote Spacing Tests + * + * Per CommonMark spec (§5.1 Block quotes), blockquotes are "container blocks" that + * contain other block-level elements (paragraphs, code blocks, etc.). + * + * In plaintext rendering, the expected spacing between block-level elements is + * a single blank line (double newline `\n\n`). This is the standard paragraph + * separation used throughout markdown. + * + * CORRECT behavior: + * - Blockquote content followed by paragraph: "quote\n\nparagraph" (double \n) + * - Two consecutive blockquotes: "first\n\nsecond" (double \n) + * + * BUG (current behavior): + * - Produces triple newlines: "quote\n\n\nparagraph" + * + * Root cause: + * 1. `paragraph_close` inside blockquote adds `\n\n` (correct) + * 2. `blockquote_close` adds another `\n` (incorrect) + * 3. Result: `\n\n\n` (triple newlines - incorrect) + * + * The fix: `blockquote_close` should NOT add `\n` because: + * - Blockquotes are container blocks, not leaf blocks + * - The inner content (paragraph, heading, etc.) already provides block separation + * - Container closings shouldn't add their own spacing + */ + +import { describe, it, expect } from "vitest"; +import { markdownToIR } from "./ir.js"; + +describe("blockquote spacing", () => { + describe("blockquote followed by paragraph", () => { + it("should have double newline (one blank line) between blockquote and paragraph", () => { + const input = "> quote\n\nparagraph"; + const result = markdownToIR(input); + + // CORRECT: "quote\n\nparagraph" (double newline) + // BUG: "quote\n\n\nparagraph" (triple newline) + expect(result.text).toBe("quote\n\nparagraph"); + }); + + it("should not produce triple newlines", () => { + const input = "> quote\n\nparagraph"; + const result = markdownToIR(input); + + expect(result.text).not.toContain("\n\n\n"); + }); + }); + + describe("consecutive blockquotes", () => { + it("should have double newline between two blockquotes", () => { + const input = "> first\n\n> second"; + const result = markdownToIR(input); + + expect(result.text).toBe("first\n\nsecond"); + }); + + it("should not produce triple newlines between blockquotes", () => { + const input = "> first\n\n> second"; + const result = markdownToIR(input); + + expect(result.text).not.toContain("\n\n\n"); + }); + }); + + describe("nested blockquotes", () => { + it("should handle nested blockquotes correctly", () => { + const input = "> outer\n>> inner"; + const result = markdownToIR(input); + + // Inner blockquote becomes separate paragraph + expect(result.text).toBe("outer\n\ninner"); + }); + + it("should not produce triple newlines in nested blockquotes", () => { + const input = "> outer\n>> inner\n\nparagraph"; + const result = markdownToIR(input); + + expect(result.text).not.toContain("\n\n\n"); + }); + + it("should handle deeply nested blockquotes", () => { + const input = "> level 1\n>> level 2\n>>> level 3"; + const result = markdownToIR(input); + + // Each nested level is a new paragraph + expect(result.text).not.toContain("\n\n\n"); + }); + }); + + describe("blockquote followed by other block elements", () => { + it("should have double newline between blockquote and heading", () => { + const input = "> quote\n\n# Heading"; + const result = markdownToIR(input); + + expect(result.text).toBe("quote\n\nHeading"); + expect(result.text).not.toContain("\n\n\n"); + }); + + it("should have double newline between blockquote and list", () => { + const input = "> quote\n\n- item"; + const result = markdownToIR(input); + + // The list item becomes "• item" + expect(result.text).toBe("quote\n\n• item"); + expect(result.text).not.toContain("\n\n\n"); + }); + + it("should have double newline between blockquote and code block", () => { + const input = "> quote\n\n```\ncode\n```"; + const result = markdownToIR(input); + + // Code blocks preserve their trailing newline + expect(result.text.startsWith("quote\n\ncode")).toBe(true); + expect(result.text).not.toContain("\n\n\n"); + }); + + it("should have double newline between blockquote and horizontal rule", () => { + const input = "> quote\n\n---\n\nparagraph"; + const result = markdownToIR(input); + + // HR just adds a newline in IR, but should not create triple newlines + expect(result.text).not.toContain("\n\n\n"); + }); + }); + + describe("blockquote with multi-paragraph content", () => { + it("should handle multi-paragraph blockquote followed by paragraph", () => { + const input = "> first paragraph\n>\n> second paragraph\n\nfollowing paragraph"; + const result = markdownToIR(input); + + // Multi-paragraph blockquote should have proper internal spacing + // AND proper spacing with following content + expect(result.text).toContain("first paragraph\n\nsecond paragraph"); + expect(result.text).not.toContain("\n\n\n"); + }); + }); + + describe("blockquote prefix option", () => { + it("should include prefix and maintain proper spacing", () => { + const input = "> quote\n\nparagraph"; + const result = markdownToIR(input, { blockquotePrefix: "> " }); + + // With prefix, should still have proper spacing + expect(result.text).toBe("> quote\n\nparagraph"); + expect(result.text).not.toContain("\n\n\n"); + }); + }); + + describe("edge cases", () => { + it("should handle empty blockquote followed by paragraph", () => { + const input = ">\n\nparagraph"; + const result = markdownToIR(input); + + expect(result.text).not.toContain("\n\n\n"); + }); + + it("should handle blockquote at end of document", () => { + const input = "paragraph\n\n> quote"; + const result = markdownToIR(input); + + // No trailing triple newlines + expect(result.text).not.toContain("\n\n\n"); + }); + + it("should handle multiple blockquotes with paragraphs between", () => { + const input = "> first\n\nparagraph\n\n> second"; + const result = markdownToIR(input); + + expect(result.text).toBe("first\n\nparagraph\n\nsecond"); + expect(result.text).not.toContain("\n\n\n"); + }); + }); +}); + +describe("comparison with other block elements (control group)", () => { + it("paragraphs should have double newline separation", () => { + const input = "paragraph 1\n\nparagraph 2"; + const result = markdownToIR(input); + + expect(result.text).toBe("paragraph 1\n\nparagraph 2"); + expect(result.text).not.toContain("\n\n\n"); + }); + + it("list followed by paragraph should have double newline", () => { + const input = "- item 1\n- item 2\n\nparagraph"; + const result = markdownToIR(input); + + // Lists already work correctly + expect(result.text).toContain("• item 2\n\nparagraph"); + expect(result.text).not.toContain("\n\n\n"); + }); + + it("heading followed by paragraph should have double newline", () => { + const input = "# Heading\n\nparagraph"; + const result = markdownToIR(input); + + expect(result.text).toBe("Heading\n\nparagraph"); + expect(result.text).not.toContain("\n\n\n"); + }); +}); diff --git a/src/markdown/ir.hr-spacing.test.ts b/src/markdown/ir.hr-spacing.test.ts new file mode 100644 index 00000000000..c5bb6127bad --- /dev/null +++ b/src/markdown/ir.hr-spacing.test.ts @@ -0,0 +1,173 @@ +import { describe, it, expect } from "vitest"; +import { markdownToIR } from "./ir.js"; + +/** + * HR (Thematic Break) Spacing Analysis + * ===================================== + * + * CommonMark Spec (0.31.2) Section 4.1 - Thematic Breaks: + * - Thematic breaks (---, ***, ___) produce
Foo
\nbar
" + * + * PLAIN TEXT OUTPUT DECISION: + * + * The HR element is a block-level thematic separator. In plain text output, + * we render HRs as a visible separator "───" to maintain visual distinction. + */ + +describe("hr (thematic break) spacing", () => { + describe("current behavior documentation", () => { + it("just hr alone renders as separator", () => { + const result = markdownToIR("---"); + expect(result.text).toBe("───"); + }); + + it("hr between paragraphs renders with separator", () => { + const input = `Para 1 + +--- + +Para 2`; + const result = markdownToIR(input); + expect(result.text).toBe("Para 1\n\n───\n\nPara 2"); + }); + + it("hr interrupting paragraph (setext heading case)", () => { + // Note: "Para\n---" is a setext heading in CommonMark! + // Using *** to test actual HR behavior + const input = `Para 1 +*** +Para 2`; + const result = markdownToIR(input); + // HR interrupts para, renders visibly + expect(result.text).toContain("───"); + }); + }); + + describe("expected behavior (tests assert CORRECT behavior)", () => { + it("hr between paragraphs should render with separator", () => { + const input = `Para 1 + +--- + +Para 2`; + const result = markdownToIR(input); + expect(result.text).toBe("Para 1\n\n───\n\nPara 2"); + }); + + it("hr between paragraphs using *** should render with separator", () => { + const input = `Para 1 + +*** + +Para 2`; + const result = markdownToIR(input); + expect(result.text).toBe("Para 1\n\n───\n\nPara 2"); + }); + + it("hr between paragraphs using ___ should render with separator", () => { + const input = `Para 1 + +___ + +Para 2`; + const result = markdownToIR(input); + expect(result.text).toBe("Para 1\n\n───\n\nPara 2"); + }); + + it("consecutive hrs should produce multiple separators", () => { + const input = `--- +--- +---`; + const result = markdownToIR(input); + // Each HR renders as a separator + expect(result.text).toBe("───\n\n───\n\n───"); + }); + + it("hr at document end renders separator", () => { + const input = `Para + +---`; + const result = markdownToIR(input); + expect(result.text).toBe("Para\n\n───"); + }); + + it("hr at document start renders separator", () => { + const input = `--- + +Para`; + const result = markdownToIR(input); + expect(result.text).toBe("───\n\nPara"); + }); + + it("should not produce triple newlines regardless of hr placement", () => { + const inputs = [ + "Para 1\n\n---\n\nPara 2", + "Para 1\n---\nPara 2", + "---\nPara", + "Para\n---", + "Para 1\n\n---\n\n---\n\nPara 2", + "Para 1\n\n***\n\n---\n\n___\n\nPara 2", + ]; + + for (const input of inputs) { + const result = markdownToIR(input); + expect(result.text, `Input: ${JSON.stringify(input)}`).not.toMatch(/\n{3,}/); + } + }); + + it("multiple consecutive hrs between paragraphs should each render as separator", () => { + const input = `Para 1 + +--- + +--- + +--- + +Para 2`; + const result = markdownToIR(input); + expect(result.text).toBe("Para 1\n\n───\n\n───\n\n───\n\nPara 2"); + }); + }); + + describe("edge cases", () => { + it("hr between list items renders as separator without extra spacing", () => { + const input = `- Item 1 +- --- +- Item 2`; + const result = markdownToIR(input); + expect(result.text).toBe("• Item 1\n\n───\n\n• Item 2"); + expect(result.text).not.toMatch(/\n{3,}/); + }); + + it("hr followed immediately by heading", () => { + const input = `--- + +# Heading + +Para`; + const result = markdownToIR(input); + // HR renders as separator, heading renders, para follows + expect(result.text).not.toMatch(/\n{3,}/); + expect(result.text).toContain("───"); + }); + + it("heading followed by hr", () => { + const input = `# Heading + +--- + +Para`; + const result = markdownToIR(input); + // Heading ends, HR renders, para follows + expect(result.text).not.toMatch(/\n{3,}/); + expect(result.text).toContain("───"); + }); + }); +}); diff --git a/src/markdown/ir.list-spacing.test.ts b/src/markdown/ir.list-spacing.test.ts new file mode 100644 index 00000000000..56a8b3f174c --- /dev/null +++ b/src/markdown/ir.list-spacing.test.ts @@ -0,0 +1,33 @@ +import { describe, it, expect } from "vitest"; +import { markdownToIR } from "./ir.js"; + +describe("list paragraph spacing", () => { + it("adds blank line between bullet list and following paragraph", () => { + const input = `- item 1 +- item 2 + +Paragraph after`; + const result = markdownToIR(input); + // Should have two newlines between "item 2" and "Paragraph" + expect(result.text).toContain("item 2\n\nParagraph"); + }); + + it("adds blank line between ordered list and following paragraph", () => { + const input = `1. item 1 +2. item 2 + +Paragraph after`; + const result = markdownToIR(input); + expect(result.text).toContain("item 2\n\nParagraph"); + }); + + it("does not produce triple newlines", () => { + const input = `- item 1 +- item 2 + +Paragraph after`; + const result = markdownToIR(input); + // Should NOT have three consecutive newlines + expect(result.text).not.toContain("\n\n\n"); + }); +}); diff --git a/src/markdown/ir.nested-lists.test.ts b/src/markdown/ir.nested-lists.test.ts new file mode 100644 index 00000000000..4ca4e9bd9d7 --- /dev/null +++ b/src/markdown/ir.nested-lists.test.ts @@ -0,0 +1,301 @@ +/** + * Nested List Rendering Tests + * + * This test file documents and validates the expected behavior for nested lists + * when rendering Markdown to plain text. + * + * ## Expected Plain Text Behavior + * + * Per CommonMark spec, nested lists create a hierarchical structure. When rendering + * to plain text for messaging platforms, we expect: + * + * 1. **Indentation**: Each nesting level adds 2 spaces of indentation + * 2. **Bullet markers**: Bullet lists use "•" (Unicode bullet) + * 3. **Ordered markers**: Ordered lists use "N. " format + * 4. **Line endings**: Each list item ends with a single newline + * 5. **List termination**: A trailing newline after the entire list (for top-level only) + * + * ## markdown-it Token Sequence + * + * For nested lists, markdown-it emits tokens in this order: + * - bullet_list_open (outer) + * - list_item_open + * - paragraph_open (hidden=true for tight lists) + * - inline (with text children) + * - paragraph_close + * - bullet_list_open (nested) + * - list_item_open + * - paragraph_open + * - inline + * - paragraph_close + * - list_item_close + * - bullet_list_close + * - list_item_close + * - bullet_list_close + * + * The key insight is that nested lists appear INSIDE the parent list_item, + * between the paragraph and the list_item_close. + */ + +import { describe, it, expect } from "vitest"; +import { markdownToIR } from "./ir.js"; + +describe("Nested Lists - 2 Level Nesting", () => { + it("renders bullet items nested inside bullet items with proper indentation", () => { + const input = `- Item 1 + - Nested 1.1 + - Nested 1.2 +- Item 2`; + + const result = markdownToIR(input); + + // Expected output: + // • Item 1 + // • Nested 1.1 + // • Nested 1.2 + // • Item 2 + // Note: markdownToIR trims trailing whitespace, so no final newline + const expected = `• Item 1 + • Nested 1.1 + • Nested 1.2 +• Item 2`; + + expect(result.text).toBe(expected); + }); + + it("renders ordered items nested inside bullet items", () => { + const input = `- Bullet item + 1. Ordered sub-item 1 + 2. Ordered sub-item 2 +- Another bullet`; + + const result = markdownToIR(input); + + // Expected output: + // • Bullet item + // 1. Ordered sub-item 1 + // 2. Ordered sub-item 2 + // • Another bullet + const expected = `• Bullet item + 1. Ordered sub-item 1 + 2. Ordered sub-item 2 +• Another bullet`; + + expect(result.text).toBe(expected); + }); + + it("renders bullet items nested inside ordered items", () => { + const input = `1. Ordered 1 + - Bullet sub 1 + - Bullet sub 2 +2. Ordered 2`; + + const result = markdownToIR(input); + + // Expected output: + // 1. Ordered 1 + // • Bullet sub 1 + // • Bullet sub 2 + // 2. Ordered 2 + const expected = `1. Ordered 1 + • Bullet sub 1 + • Bullet sub 2 +2. Ordered 2`; + + expect(result.text).toBe(expected); + }); + + it("renders ordered items nested inside ordered items", () => { + const input = `1. First + 1. Sub-first + 2. Sub-second +2. Second`; + + const result = markdownToIR(input); + + const expected = `1. First + 1. Sub-first + 2. Sub-second +2. Second`; + + expect(result.text).toBe(expected); + }); +}); + +describe("Nested Lists - 3+ Level Deep Nesting", () => { + it("renders 3 levels of bullet nesting", () => { + const input = `- Level 1 + - Level 2 + - Level 3 +- Back to 1`; + + const result = markdownToIR(input); + + // Expected output with progressive indentation: + // • Level 1 + // • Level 2 + // • Level 3 + // • Back to 1 + const expected = `• Level 1 + • Level 2 + • Level 3 +• Back to 1`; + + expect(result.text).toBe(expected); + }); + + it("renders 4 levels of bullet nesting", () => { + const input = `- L1 + - L2 + - L3 + - L4 +- Back`; + + const result = markdownToIR(input); + + const expected = `• L1 + • L2 + • L3 + • L4 +• Back`; + + expect(result.text).toBe(expected); + }); + + it("renders 3 levels with multiple items at each level", () => { + const input = `- A1 + - B1 + - C1 + - C2 + - B2 +- A2`; + + const result = markdownToIR(input); + + const expected = `• A1 + • B1 + • C1 + • C2 + • B2 +• A2`; + + expect(result.text).toBe(expected); + }); +}); + +describe("Nested Lists - Mixed Nesting", () => { + it("renders complex mixed nesting (bullet > ordered > bullet)", () => { + const input = `- Bullet 1 + 1. Ordered 1.1 + - Deep bullet + 2. Ordered 1.2 +- Bullet 2`; + + const result = markdownToIR(input); + + const expected = `• Bullet 1 + 1. Ordered 1.1 + • Deep bullet + 2. Ordered 1.2 +• Bullet 2`; + + expect(result.text).toBe(expected); + }); + + it("renders ordered > bullet > ordered nesting", () => { + const input = `1. First + - Sub bullet + 1. Deep ordered + - Another bullet +2. Second`; + + const result = markdownToIR(input); + + const expected = `1. First + • Sub bullet + 1. Deep ordered + • Another bullet +2. Second`; + + expect(result.text).toBe(expected); + }); +}); + +describe("Nested Lists - Newline Handling", () => { + it("does not produce triple newlines in nested lists", () => { + const input = `- Item 1 + - Nested +- Item 2`; + + const result = markdownToIR(input); + expect(result.text).not.toContain("\n\n\n"); + }); + + it("does not produce double newlines between nested items", () => { + const input = `- A + - B + - C +- D`; + + const result = markdownToIR(input); + + // Between B and C there should be exactly one newline + expect(result.text).toContain(" • B\n • C"); + expect(result.text).not.toContain(" • B\n\n • C"); + }); + + it("properly terminates top-level list (trimmed output)", () => { + const input = `- Item 1 + - Nested +- Item 2`; + + const result = markdownToIR(input); + + // markdownToIR trims trailing whitespace, so output should end with Item 2 + // (no trailing newline after trimming) + expect(result.text).toMatch(/Item 2$/); + // Should not have excessive newlines before Item 2 + expect(result.text).not.toContain("\n\n• Item 2"); + }); +}); + +describe("Nested Lists - Edge Cases", () => { + it("handles empty parent with nested items", () => { + // This is a bit of an edge case - a list item that's just a marker followed by nested content + const input = `- + - Nested only +- Normal`; + + const result = markdownToIR(input); + + // Should still render the nested item with proper indentation + expect(result.text).toContain(" • Nested only"); + }); + + it("handles nested list as first child of parent item", () => { + const input = `- Parent text + - Child +- Another parent`; + + const result = markdownToIR(input); + + // The child should appear indented under the parent + expect(result.text).toContain("• Parent text\n • Child"); + }); + + it("handles sibling nested lists at same level", () => { + const input = `- A + - A1 +- B + - B1`; + + const result = markdownToIR(input); + + const expected = `• A + • A1 +• B + • B1`; + + expect(result.text).toBe(expected); + }); +}); diff --git a/src/markdown/ir.table-code.test.ts b/src/markdown/ir.table-code.test.ts new file mode 100644 index 00000000000..5a52c3dd22c --- /dev/null +++ b/src/markdown/ir.table-code.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from "vitest"; +import { markdownToIR } from "./ir.js"; + +describe("markdownToIR tableMode code - style overlap", () => { + it("should not have overlapping styles when cell has bold text", () => { + const md = ` +| Name | Value | +|------|-------| +| **Bold** | Normal | +`.trim(); + + const ir = markdownToIR(md, { tableMode: "code" }); + + // Check for overlapping styles + const codeBlockSpan = ir.styles.find((s) => s.style === "code_block"); + const boldSpan = ir.styles.find((s) => s.style === "bold"); + + // Either: + // 1. There should be no bold spans in code mode (inner styles stripped), OR + // 2. If bold spans exist, they should not overlap with code_block span + if (codeBlockSpan && boldSpan) { + // Check for overlap + const overlaps = boldSpan.start < codeBlockSpan.end && boldSpan.end > codeBlockSpan.start; + // Overlapping styles are the bug - this should fail until fixed + expect(overlaps).toBe(false); + } + }); + + it("should not have overlapping styles when cell has italic text", () => { + const md = ` +| Name | Value | +|------|-------| +| *Italic* | Normal | +`.trim(); + + const ir = markdownToIR(md, { tableMode: "code" }); + + const codeBlockSpan = ir.styles.find((s) => s.style === "code_block"); + const italicSpan = ir.styles.find((s) => s.style === "italic"); + + if (codeBlockSpan && italicSpan) { + const overlaps = italicSpan.start < codeBlockSpan.end && italicSpan.end > codeBlockSpan.start; + expect(overlaps).toBe(false); + } + }); + + it("should not have overlapping styles when cell has inline code", () => { + const md = ` +| Name | Value | +|------|-------| +| \`code\` | Normal | +`.trim(); + + const ir = markdownToIR(md, { tableMode: "code" }); + + const codeBlockSpan = ir.styles.find((s) => s.style === "code_block"); + const codeSpan = ir.styles.find((s) => s.style === "code"); + + if (codeBlockSpan && codeSpan) { + const overlaps = codeSpan.start < codeBlockSpan.end && codeSpan.end > codeBlockSpan.start; + expect(overlaps).toBe(false); + } + }); + + it("should not have overlapping styles with multiple styled cells", () => { + const md = ` +| Name | Value | +|------|-------| +| **A** | *B* | +| _C_ | ~~D~~ | +`.trim(); + + const ir = markdownToIR(md, { tableMode: "code" }); + + const codeBlockSpan = ir.styles.find((s) => s.style === "code_block"); + if (!codeBlockSpan) { + return; + } + + // Check that no non-code_block style overlaps with code_block + for (const style of ir.styles) { + if (style.style === "code_block") { + continue; + } + const overlaps = style.start < codeBlockSpan.end && style.end > codeBlockSpan.start; + expect(overlaps).toBe(false); + } + }); +}); diff --git a/src/markdown/ir.ts b/src/markdown/ir.ts index 37c15c198ad..f72d472b67f 100644 --- a/src/markdown/ir.ts +++ b/src/markdown/ir.ts @@ -364,6 +364,14 @@ function appendCell(state: RenderState, cell: TableCell) { } } +function appendCellTextOnly(state: RenderState, cell: TableCell) { + if (!cell.text) { + return; + } + state.text += cell.text; + // Do not append styles - this is used for code blocks where inner styles would overlap +} + function renderTableAsBullets(state: RenderState) { if (!state.table) { return; @@ -474,7 +482,8 @@ function renderTableAsCode(state: RenderState) { state.text += " "; const cell = cells[i]; if (cell) { - appendCell(state, cell); + // Use text-only append to avoid overlapping styles with code_block + appendCellTextOnly(state, cell); } const pad = widths[i] - (cell?.text.length ?? 0); if (pad > 0) { @@ -589,27 +598,43 @@ function renderTokens(tokens: MarkdownToken[], state: RenderState): void { break; case "blockquote_close": closeStyle(state, "blockquote"); - state.text += "\n"; break; case "bullet_list_open": + // Add newline before nested list starts (so nested items appear on new line) + if (state.env.listStack.length > 0) { + state.text += "\n"; + } state.env.listStack.push({ type: "bullet", index: 0 }); break; case "bullet_list_close": state.env.listStack.pop(); + if (state.env.listStack.length === 0) { + state.text += "\n"; + } break; case "ordered_list_open": { + // Add newline before nested list starts (so nested items appear on new line) + if (state.env.listStack.length > 0) { + state.text += "\n"; + } const start = Number(getAttr(token, "start") ?? "1"); state.env.listStack.push({ type: "ordered", index: start - 1 }); break; } case "ordered_list_close": state.env.listStack.pop(); + if (state.env.listStack.length === 0) { + state.text += "\n"; + } break; case "list_item_open": appendListPrefix(state); break; case "list_item_close": - state.text += "\n"; + // Avoid double newlines (nested list's last item already added newline) + if (!state.text.endsWith("\n")) { + state.text += "\n"; + } break; case "code_block": case "fence": @@ -680,7 +705,8 @@ function renderTokens(tokens: MarkdownToken[], state: RenderState): void { break; case "hr": - state.text += "\n"; + // Render as a visual separator + state.text += "───\n\n"; break; default: if (token.children) { @@ -744,7 +770,13 @@ function mergeStyleSpans(spans: MarkdownStyleSpan[]): MarkdownStyleSpan[] { const merged: MarkdownStyleSpan[] = []; for (const span of sorted) { const prev = merged[merged.length - 1]; - if (prev && prev.style === span.style && span.start <= prev.end) { + if ( + prev && + prev.style === span.style && + // Blockquotes are container blocks. Adjacent blockquote spans should not merge or + // consecutive blockquotes can "style bleed" across the paragraph boundary. + (span.start < prev.end || (span.start === prev.end && span.style !== "blockquote")) + ) { prev.end = Math.max(prev.end, span.end); continue; } diff --git a/src/signal/format.chunking.test.ts b/src/signal/format.chunking.test.ts new file mode 100644 index 00000000000..ded30c842bb --- /dev/null +++ b/src/signal/format.chunking.test.ts @@ -0,0 +1,390 @@ +import { describe, expect, it } from "vitest"; +import { markdownToSignalTextChunks } from "./format.js"; + +describe("splitSignalFormattedText", () => { + // We test the internal chunking behavior via markdownToSignalTextChunks with + // pre-rendered SignalFormattedText. The helper is not exported, so we test + // it indirectly through integration tests and by constructing scenarios that + // exercise the splitting logic. + + describe("style-aware splitting - basic text", () => { + it("text with no styles splits correctly at whitespace", () => { + // Create text that exceeds limit and must be split + const limit = 20; + const markdown = "hello world this is a test"; + const chunks = markdownToSignalTextChunks(markdown, limit); + + expect(chunks.length).toBeGreaterThan(1); + for (const chunk of chunks) { + expect(chunk.text.length).toBeLessThanOrEqual(limit); + } + // Verify all text is preserved (joined chunks should contain all words) + const joinedText = chunks.map((c) => c.text).join(" "); + expect(joinedText).toContain("hello"); + expect(joinedText).toContain("world"); + expect(joinedText).toContain("test"); + }); + + it("empty text returns empty array", () => { + // Empty input produces no chunks (not an empty chunk) + const chunks = markdownToSignalTextChunks("", 100); + expect(chunks).toEqual([]); + }); + + it("text under limit returns single chunk unchanged", () => { + const markdown = "short text"; + const chunks = markdownToSignalTextChunks(markdown, 100); + + expect(chunks).toHaveLength(1); + expect(chunks[0].text).toBe("short text"); + }); + }); + + describe("style-aware splitting - style preservation", () => { + it("style fully within first chunk stays in first chunk", () => { + // Create a message where bold text is in the first chunk + const limit = 30; + const markdown = "**bold** word more words here that exceed limit"; + const chunks = markdownToSignalTextChunks(markdown, limit); + + expect(chunks.length).toBeGreaterThan(1); + // First chunk should contain the bold style + const firstChunk = chunks[0]; + expect(firstChunk.text).toContain("bold"); + expect(firstChunk.styles.some((s) => s.style === "BOLD")).toBe(true); + // The bold style should start at position 0 in the first chunk + const boldStyle = firstChunk.styles.find((s) => s.style === "BOLD"); + expect(boldStyle).toBeDefined(); + expect(boldStyle!.start).toBe(0); + expect(boldStyle!.length).toBe(4); // "bold" + }); + + it("style fully within second chunk has offset adjusted to chunk-local position", () => { + // Create a message where the styled text is in the second chunk + const limit = 30; + const markdown = "some filler text here **bold** at the end"; + const chunks = markdownToSignalTextChunks(markdown, limit); + + expect(chunks.length).toBeGreaterThan(1); + // Find the chunk containing "bold" + const chunkWithBold = chunks.find((c) => c.text.includes("bold")); + expect(chunkWithBold).toBeDefined(); + expect(chunkWithBold!.styles.some((s) => s.style === "BOLD")).toBe(true); + + // The bold style should have chunk-local offset (not original text offset) + const boldStyle = chunkWithBold!.styles.find((s) => s.style === "BOLD"); + expect(boldStyle).toBeDefined(); + // The offset should be the position within this chunk, not the original text + const boldPos = chunkWithBold!.text.indexOf("bold"); + expect(boldStyle!.start).toBe(boldPos); + expect(boldStyle!.length).toBe(4); + }); + + it("style spanning chunk boundary is split into two ranges", () => { + // Create text where a styled span crosses the chunk boundary + const limit = 15; + // "hello **bold text here** end" - the bold spans across chunk boundary + const markdown = "hello **boldtexthere** end"; + const chunks = markdownToSignalTextChunks(markdown, limit); + + expect(chunks.length).toBeGreaterThan(1); + + // Both chunks should have BOLD styles if the span was split + const chunksWithBold = chunks.filter((c) => c.styles.some((s) => s.style === "BOLD")); + // At least one chunk should have the bold style + expect(chunksWithBold.length).toBeGreaterThanOrEqual(1); + + // For each chunk with bold, verify the style range is valid for that chunk + for (const chunk of chunksWithBold) { + for (const style of chunk.styles.filter((s) => s.style === "BOLD")) { + expect(style.start).toBeGreaterThanOrEqual(0); + expect(style.start + style.length).toBeLessThanOrEqual(chunk.text.length); + } + } + }); + + it("style starting exactly at split point goes entirely to second chunk", () => { + // Create text where style starts right at where we'd split + const limit = 10; + const markdown = "abcdefghi **bold**"; + const chunks = markdownToSignalTextChunks(markdown, limit); + + expect(chunks.length).toBeGreaterThan(1); + + // Find chunk with bold + const chunkWithBold = chunks.find((c) => c.styles.some((s) => s.style === "BOLD")); + expect(chunkWithBold).toBeDefined(); + + // Verify the bold style is valid within its chunk + const boldStyle = chunkWithBold!.styles.find((s) => s.style === "BOLD"); + expect(boldStyle).toBeDefined(); + expect(boldStyle!.start).toBeGreaterThanOrEqual(0); + expect(boldStyle!.start + boldStyle!.length).toBeLessThanOrEqual(chunkWithBold!.text.length); + }); + + it("style ending exactly at split point stays entirely in first chunk", () => { + const limit = 10; + const markdown = "**bold** rest of text"; + const chunks = markdownToSignalTextChunks(markdown, limit); + + // First chunk should have the complete bold style + const firstChunk = chunks[0]; + if (firstChunk.text.includes("bold")) { + const boldStyle = firstChunk.styles.find((s) => s.style === "BOLD"); + expect(boldStyle).toBeDefined(); + expect(boldStyle!.start + boldStyle!.length).toBeLessThanOrEqual(firstChunk.text.length); + } + }); + + it("multiple styles, some spanning boundary, some not", () => { + const limit = 25; + // Mix of styles: italic at start, bold spanning boundary, monospace at end + const markdown = "_italic_ some text **bold text** and `code`"; + const chunks = markdownToSignalTextChunks(markdown, limit); + + expect(chunks.length).toBeGreaterThan(1); + + // Verify all style ranges are valid within their respective chunks + for (const chunk of chunks) { + for (const style of chunk.styles) { + expect(style.start).toBeGreaterThanOrEqual(0); + expect(style.start + style.length).toBeLessThanOrEqual(chunk.text.length); + expect(style.length).toBeGreaterThan(0); + } + } + + // Collect all styles across chunks + const allStyles = chunks.flatMap((c) => c.styles.map((s) => s.style)); + // We should have at least italic, bold, and monospace somewhere + expect(allStyles).toContain("ITALIC"); + expect(allStyles).toContain("BOLD"); + expect(allStyles).toContain("MONOSPACE"); + }); + }); + + describe("style-aware splitting - edge cases", () => { + it("handles zero-length text with styles gracefully", () => { + // Edge case: empty markdown produces no chunks + const chunks = markdownToSignalTextChunks("", 100); + expect(chunks).toHaveLength(0); + }); + + it("handles text that splits exactly at limit", () => { + const limit = 10; + const markdown = "1234567890"; // exactly 10 chars + const chunks = markdownToSignalTextChunks(markdown, limit); + + expect(chunks).toHaveLength(1); + expect(chunks[0].text).toBe("1234567890"); + }); + + it("preserves style through whitespace trimming", () => { + const limit = 30; + const markdown = "**bold** some text that is longer than limit"; + const chunks = markdownToSignalTextChunks(markdown, limit); + + // Bold should be preserved in first chunk + const firstChunk = chunks[0]; + if (firstChunk.text.includes("bold")) { + expect(firstChunk.styles.some((s) => s.style === "BOLD")).toBe(true); + } + }); + + it("handles repeated substrings correctly (no indexOf fragility)", () => { + // This test exposes the fragility of using indexOf to find chunk positions. + // If the same substring appears multiple times, indexOf finds the first + // occurrence, not necessarily the correct one. + const limit = 20; + // "word" appears multiple times - indexOf("word") would always find first + const markdown = "word **bold word** word more text here to chunk"; + const chunks = markdownToSignalTextChunks(markdown, limit); + + // Verify chunks are under limit + for (const chunk of chunks) { + expect(chunk.text.length).toBeLessThanOrEqual(limit); + } + + // Find chunk(s) with bold style + const chunksWithBold = chunks.filter((c) => c.styles.some((s) => s.style === "BOLD")); + expect(chunksWithBold.length).toBeGreaterThanOrEqual(1); + + // The bold style should correctly cover "bold word" (or part of it if split) + // and NOT incorrectly point to the first "word" in the text + for (const chunk of chunksWithBold) { + for (const style of chunk.styles.filter((s) => s.style === "BOLD")) { + const styledText = chunk.text.slice(style.start, style.start + style.length); + // The styled text should be part of "bold word", not the initial "word" + expect(styledText).toMatch(/^(bold( word)?|word)$/); + expect(style.start).toBeGreaterThanOrEqual(0); + expect(style.start + style.length).toBeLessThanOrEqual(chunk.text.length); + } + } + }); + + it("handles chunk that starts with whitespace after split", () => { + // When text is split at whitespace, the next chunk might have leading + // whitespace trimmed. Styles must account for this. + const limit = 15; + const markdown = "some text **bold** at end"; + const chunks = markdownToSignalTextChunks(markdown, limit); + + // All style ranges must be valid + for (const chunk of chunks) { + for (const style of chunk.styles) { + expect(style.start).toBeGreaterThanOrEqual(0); + expect(style.start + style.length).toBeLessThanOrEqual(chunk.text.length); + } + } + }); + + it("deterministically tracks position without indexOf fragility", () => { + // This test ensures the chunker doesn't rely on finding chunks via indexOf + // which can fail when chunkText trims whitespace or when duplicates exist. + // Create text with lots of whitespace and repeated patterns. + const limit = 25; + const markdown = "aaa **bold** aaa **bold** aaa extra text to force split"; + const chunks = markdownToSignalTextChunks(markdown, limit); + + // Multiple chunks expected + expect(chunks.length).toBeGreaterThan(1); + + // All chunks should respect limit + for (const chunk of chunks) { + expect(chunk.text.length).toBeLessThanOrEqual(limit); + } + + // All style ranges must be valid within their chunks + for (const chunk of chunks) { + for (const style of chunk.styles) { + expect(style.start).toBeGreaterThanOrEqual(0); + expect(style.start + style.length).toBeLessThanOrEqual(chunk.text.length); + // The styled text at that position should actually be "bold" + if (style.style === "BOLD") { + const styledText = chunk.text.slice(style.start, style.start + style.length); + expect(styledText).toBe("bold"); + } + } + } + }); + }); +}); + +describe("markdownToSignalTextChunks", () => { + describe("link expansion chunk limit", () => { + it("does not exceed chunk limit after link expansion", () => { + // Create text that is close to limit, with a link that will expand + const limit = 100; + // Create text that's 90 chars, leaving only 10 chars of headroom + const filler = "x".repeat(80); + // This link will expand from "[link](url)" to "link (https://example.com/very/long/path)" + const markdown = `${filler} [link](https://example.com/very/long/path/that/will/exceed/limit)`; + + const chunks = markdownToSignalTextChunks(markdown, limit); + + for (const chunk of chunks) { + expect(chunk.text.length).toBeLessThanOrEqual(limit); + } + }); + + it("handles multiple links near chunk boundary", () => { + const limit = 100; + const filler = "x".repeat(60); + const markdown = `${filler} [a](https://a.com) [b](https://b.com) [c](https://c.com)`; + + const chunks = markdownToSignalTextChunks(markdown, limit); + + for (const chunk of chunks) { + expect(chunk.text.length).toBeLessThanOrEqual(limit); + } + }); + }); + + describe("link expansion with style preservation", () => { + it("long message with links that expand beyond limit preserves all text", () => { + const limit = 80; + const filler = "a".repeat(50); + const markdown = `${filler} [click here](https://example.com/very/long/path/to/page) more text`; + + const chunks = markdownToSignalTextChunks(markdown, limit); + + // All chunks should be under limit + for (const chunk of chunks) { + expect(chunk.text.length).toBeLessThanOrEqual(limit); + } + + // Combined text should contain all original content + const combined = chunks.map((c) => c.text).join(""); + expect(combined).toContain(filler); + expect(combined).toContain("click here"); + expect(combined).toContain("example.com"); + }); + + it("styles (bold, italic) survive chunking correctly after link expansion", () => { + const limit = 60; + const markdown = + "**bold start** text [link](https://example.com/path) _italic_ more content here to force chunking"; + + const chunks = markdownToSignalTextChunks(markdown, limit); + + // Should have multiple chunks + expect(chunks.length).toBeGreaterThan(1); + + // All style ranges should be valid within their chunks + for (const chunk of chunks) { + for (const style of chunk.styles) { + expect(style.start).toBeGreaterThanOrEqual(0); + expect(style.start + style.length).toBeLessThanOrEqual(chunk.text.length); + expect(style.length).toBeGreaterThan(0); + } + } + + // Verify styles exist somewhere + const allStyles = chunks.flatMap((c) => c.styles.map((s) => s.style)); + expect(allStyles).toContain("BOLD"); + expect(allStyles).toContain("ITALIC"); + }); + + it("multiple links near chunk boundary all get properly chunked", () => { + const limit = 50; + const markdown = + "[first](https://first.com/long/path) [second](https://second.com/another/path) [third](https://third.com)"; + + const chunks = markdownToSignalTextChunks(markdown, limit); + + // All chunks should respect limit + for (const chunk of chunks) { + expect(chunk.text.length).toBeLessThanOrEqual(limit); + } + + // All link labels should appear somewhere + const combined = chunks.map((c) => c.text).join(""); + expect(combined).toContain("first"); + expect(combined).toContain("second"); + expect(combined).toContain("third"); + }); + + it("preserves spoiler style through link expansion and chunking", () => { + const limit = 40; + const markdown = + "||secret content|| and [link](https://example.com/path) with more text to chunk"; + + const chunks = markdownToSignalTextChunks(markdown, limit); + + // All chunks should respect limit + for (const chunk of chunks) { + expect(chunk.text.length).toBeLessThanOrEqual(limit); + } + + // Spoiler style should exist and be valid + const chunkWithSpoiler = chunks.find((c) => c.styles.some((s) => s.style === "SPOILER")); + expect(chunkWithSpoiler).toBeDefined(); + + const spoilerStyle = chunkWithSpoiler!.styles.find((s) => s.style === "SPOILER"); + expect(spoilerStyle).toBeDefined(); + expect(spoilerStyle!.start).toBeGreaterThanOrEqual(0); + expect(spoilerStyle!.start + spoilerStyle!.length).toBeLessThanOrEqual( + chunkWithSpoiler!.text.length, + ); + }); + }); +}); diff --git a/src/signal/format.links.test.ts b/src/signal/format.links.test.ts new file mode 100644 index 00000000000..7ef77e71db5 --- /dev/null +++ b/src/signal/format.links.test.ts @@ -0,0 +1,58 @@ +import { describe, expect, it } from "vitest"; +import { markdownToSignalText } from "./format.js"; + +describe("markdownToSignalText", () => { + describe("duplicate URL display", () => { + it("does not duplicate URL when label matches URL without protocol", () => { + // [selfh.st](http://selfh.st) should render as "selfh.st" not "selfh.st (http://selfh.st)" + const res = markdownToSignalText("[selfh.st](http://selfh.st)"); + expect(res.text).toBe("selfh.st"); + }); + + it("does not duplicate URL when label matches URL without https protocol", () => { + const res = markdownToSignalText("[example.com](https://example.com)"); + expect(res.text).toBe("example.com"); + }); + + it("does not duplicate URL when label matches URL without www prefix", () => { + const res = markdownToSignalText("[www.example.com](https://example.com)"); + expect(res.text).toBe("www.example.com"); + }); + + it("does not duplicate URL when label matches URL without trailing slash", () => { + const res = markdownToSignalText("[example.com](https://example.com/)"); + expect(res.text).toBe("example.com"); + }); + + it("does not duplicate URL when label matches URL with multiple trailing slashes", () => { + const res = markdownToSignalText("[example.com](https://example.com///)"); + expect(res.text).toBe("example.com"); + }); + + it("does not duplicate URL when label includes www but URL does not", () => { + const res = markdownToSignalText("[example.com](https://www.example.com)"); + expect(res.text).toBe("example.com"); + }); + + it("handles case-insensitive domain comparison", () => { + const res = markdownToSignalText("[EXAMPLE.COM](https://example.com)"); + expect(res.text).toBe("EXAMPLE.COM"); + }); + + it("still shows URL when label is meaningfully different", () => { + const res = markdownToSignalText("[click here](https://example.com)"); + expect(res.text).toBe("click here (https://example.com)"); + }); + + it("handles URL with path - should show URL when label is just domain", () => { + // Label is just domain, URL has path - these are meaningfully different + const res = markdownToSignalText("[example.com](https://example.com/page)"); + expect(res.text).toBe("example.com (https://example.com/page)"); + }); + + it("does not duplicate when label matches full URL with path", () => { + const res = markdownToSignalText("[example.com/page](https://example.com/page)"); + expect(res.text).toBe("example.com/page"); + }); + }); +}); diff --git a/src/signal/format.test.ts b/src/signal/format.test.ts index 40e509fa891..e22a6607f99 100644 --- a/src/signal/format.test.ts +++ b/src/signal/format.test.ts @@ -21,6 +21,18 @@ describe("markdownToSignalText", () => { expect(res.styles).toEqual([]); }); + it("keeps style offsets correct with multiple expanded links", () => { + const markdown = + "[first](https://example.com/first) **bold** [second](https://example.com/second)"; + const res = markdownToSignalText(markdown); + + const expectedText = + "first (https://example.com/first) bold second (https://example.com/second)"; + + expect(res.text).toBe(expectedText); + expect(res.styles).toEqual([{ start: expectedText.indexOf("bold"), length: 4, style: "BOLD" }]); + }); + it("applies spoiler styling", () => { const res = markdownToSignalText("hello ||secret|| world"); diff --git a/src/signal/format.ts b/src/signal/format.ts index f310b75a6ee..8f35a34f2da 100644 --- a/src/signal/format.ts +++ b/src/signal/format.ts @@ -34,6 +34,17 @@ type Insertion = { length: number; }; +function normalizeUrlForComparison(url: string): string { + let normalized = url.toLowerCase(); + // Strip protocol + normalized = normalized.replace(/^https?:\/\//, ""); + // Strip www. prefix + normalized = normalized.replace(/^www\./, ""); + // Strip trailing slashes + normalized = normalized.replace(/\/+$/, ""); + return normalized; +} + function mapStyle(style: MarkdownStyle): SignalTextStyle | null { switch (style) { case "bold": @@ -100,15 +111,17 @@ function applyInsertionsToStyles( } const sortedInsertions = [...insertions].toSorted((a, b) => a.pos - b.pos); let updated = spans; + let cumulativeShift = 0; for (const insertion of sortedInsertions) { + const insertionPos = insertion.pos + cumulativeShift; const next: SignalStyleSpan[] = []; for (const span of updated) { - if (span.end <= insertion.pos) { + if (span.end <= insertionPos) { next.push(span); continue; } - if (span.start >= insertion.pos) { + if (span.start >= insertionPos) { next.push({ start: span.start + insertion.length, end: span.end + insertion.length, @@ -116,15 +129,15 @@ function applyInsertionsToStyles( }); continue; } - if (span.start < insertion.pos && span.end > insertion.pos) { - if (insertion.pos > span.start) { + if (span.start < insertionPos && span.end > insertionPos) { + if (insertionPos > span.start) { next.push({ start: span.start, - end: insertion.pos, + end: insertionPos, style: span.style, }); } - const shiftedStart = insertion.pos + insertion.length; + const shiftedStart = insertionPos + insertion.length; const shiftedEnd = span.end + insertion.length; if (shiftedEnd > shiftedStart) { next.push({ @@ -136,6 +149,7 @@ function applyInsertionsToStyles( } } updated = next; + cumulativeShift += insertion.length; } return updated; @@ -161,16 +175,26 @@ function renderSignalText(ir: MarkdownIR): SignalFormattedText { const href = link.href.trim(); const label = text.slice(link.start, link.end); const trimmedLabel = label.trim(); - const comparableHref = href.startsWith("mailto:") ? href.slice("mailto:".length) : href; if (href) { if (!trimmedLabel) { out += href; insertions.push({ pos: link.end, length: href.length }); - } else if (trimmedLabel !== href && trimmedLabel !== comparableHref) { - const addition = ` (${href})`; - out += addition; - insertions.push({ pos: link.end, length: addition.length }); + } else { + // Check if label is similar enough to URL that showing both would be redundant + const normalizedLabel = normalizeUrlForComparison(trimmedLabel); + let comparableHref = href; + if (href.startsWith("mailto:")) { + comparableHref = href.slice("mailto:".length); + } + const normalizedHref = normalizeUrlForComparison(comparableHref); + + // Only show URL if label is meaningfully different from it + if (normalizedLabel !== normalizedHref) { + const addition = ` (${href})`; + out += addition; + insertions.push({ pos: link.end, length: addition.length }); + } } } @@ -214,13 +238,136 @@ export function markdownToSignalText( const ir = markdownToIR(markdown ?? "", { linkify: true, enableSpoilers: true, - headingStyle: "none", - blockquotePrefix: "", + headingStyle: "bold", + blockquotePrefix: "> ", tableMode: options.tableMode, }); return renderSignalText(ir); } +function sliceSignalStyles( + styles: SignalTextStyleRange[], + start: number, + end: number, +): SignalTextStyleRange[] { + const sliced: SignalTextStyleRange[] = []; + for (const style of styles) { + const styleEnd = style.start + style.length; + const sliceStart = Math.max(style.start, start); + const sliceEnd = Math.min(styleEnd, end); + if (sliceEnd > sliceStart) { + sliced.push({ + start: sliceStart - start, + length: sliceEnd - sliceStart, + style: style.style, + }); + } + } + return sliced; +} + +/** + * Split Signal formatted text into chunks under the limit while preserving styles. + * + * This implementation deterministically tracks cursor position without using indexOf, + * which is fragile when chunks are trimmed or when duplicate substrings exist. + * Styles spanning chunk boundaries are split into separate ranges for each chunk. + */ +function splitSignalFormattedText( + formatted: SignalFormattedText, + limit: number, +): SignalFormattedText[] { + const { text, styles } = formatted; + + if (text.length <= limit) { + return [formatted]; + } + + const results: SignalFormattedText[] = []; + let remaining = text; + let offset = 0; // Track position in original text for style slicing + + while (remaining.length > 0) { + if (remaining.length <= limit) { + // Last chunk - take everything remaining + const trimmed = remaining.trimEnd(); + if (trimmed.length > 0) { + results.push({ + text: trimmed, + styles: mergeStyles(sliceSignalStyles(styles, offset, offset + trimmed.length)), + }); + } + break; + } + + // Find a good break point within the limit + const window = remaining.slice(0, limit); + let breakIdx = findBreakIndex(window); + + // If no good break point found, hard break at limit + if (breakIdx <= 0) { + breakIdx = limit; + } + + // Extract chunk and trim trailing whitespace + const rawChunk = remaining.slice(0, breakIdx); + const chunk = rawChunk.trimEnd(); + + if (chunk.length > 0) { + results.push({ + text: chunk, + styles: mergeStyles(sliceSignalStyles(styles, offset, offset + chunk.length)), + }); + } + + // Advance past the chunk and any whitespace separator + const brokeOnWhitespace = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]); + const nextStart = Math.min(remaining.length, breakIdx + (brokeOnWhitespace ? 1 : 0)); + + // Chunks are sent as separate messages, so we intentionally drop boundary whitespace. + // Keep `offset` in sync with the dropped characters so style slicing stays correct. + remaining = remaining.slice(nextStart).trimStart(); + offset = text.length - remaining.length; + } + + return results; +} + +/** + * Find the best break index within a text window. + * Prefers newlines over whitespace, avoids breaking inside parentheses. + */ +function findBreakIndex(window: string): number { + let lastNewline = -1; + let lastWhitespace = -1; + let parenDepth = 0; + + for (let i = 0; i < window.length; i++) { + const char = window[i]; + + if (char === "(") { + parenDepth++; + continue; + } + if (char === ")" && parenDepth > 0) { + parenDepth--; + continue; + } + + // Only consider break points outside parentheses + if (parenDepth === 0) { + if (char === "\n") { + lastNewline = i; + } else if (/\s/.test(char)) { + lastWhitespace = i; + } + } + } + + // Prefer newline break, fall back to whitespace + return lastNewline > 0 ? lastNewline : lastWhitespace; +} + export function markdownToSignalTextChunks( markdown: string, limit: number, @@ -229,10 +376,22 @@ export function markdownToSignalTextChunks( const ir = markdownToIR(markdown ?? "", { linkify: true, enableSpoilers: true, - headingStyle: "none", - blockquotePrefix: "", + headingStyle: "bold", + blockquotePrefix: "> ", tableMode: options.tableMode, }); const chunks = chunkMarkdownIR(ir, limit); - return chunks.map((chunk) => renderSignalText(chunk)); + const results: SignalFormattedText[] = []; + + for (const chunk of chunks) { + const rendered = renderSignalText(chunk); + // If link expansion caused the chunk to exceed the limit, re-chunk it + if (rendered.text.length > limit) { + results.push(...splitSignalFormattedText(rendered, limit)); + } else { + results.push(rendered); + } + } + + return results; } diff --git a/src/signal/format.visual.test.ts b/src/signal/format.visual.test.ts new file mode 100644 index 00000000000..78f913b7945 --- /dev/null +++ b/src/signal/format.visual.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, it } from "vitest"; +import { markdownToSignalText } from "./format.js"; + +describe("markdownToSignalText", () => { + describe("headings visual distinction", () => { + it("renders headings as bold text", () => { + const res = markdownToSignalText("# Heading 1"); + expect(res.text).toBe("Heading 1"); + expect(res.styles).toContainEqual({ start: 0, length: 9, style: "BOLD" }); + }); + + it("renders h2 headings as bold text", () => { + const res = markdownToSignalText("## Heading 2"); + expect(res.text).toBe("Heading 2"); + expect(res.styles).toContainEqual({ start: 0, length: 9, style: "BOLD" }); + }); + + it("renders h3 headings as bold text", () => { + const res = markdownToSignalText("### Heading 3"); + expect(res.text).toBe("Heading 3"); + expect(res.styles).toContainEqual({ start: 0, length: 9, style: "BOLD" }); + }); + }); + + describe("blockquote visual distinction", () => { + it("renders blockquotes with a visible prefix", () => { + const res = markdownToSignalText("> This is a quote"); + // Should have some kind of prefix to distinguish it + expect(res.text).toMatch(/^[│>]/); + expect(res.text).toContain("This is a quote"); + }); + + it("renders multi-line blockquotes with prefix", () => { + const res = markdownToSignalText("> Line 1\n> Line 2"); + // Should start with the prefix + expect(res.text).toMatch(/^[│>]/); + expect(res.text).toContain("Line 1"); + expect(res.text).toContain("Line 2"); + }); + }); + + describe("horizontal rule rendering", () => { + it("renders horizontal rules as a visible separator", () => { + const res = markdownToSignalText("Para 1\n\n---\n\nPara 2"); + // Should contain some kind of visual separator like ─── + expect(res.text).toMatch(/[─—-]{3,}/); + }); + + it("renders horizontal rule between content", () => { + const res = markdownToSignalText("Above\n\n***\n\nBelow"); + expect(res.text).toContain("Above"); + expect(res.text).toContain("Below"); + // Should have a separator + expect(res.text).toMatch(/[─—-]{3,}/); + }); + }); +}); diff --git a/src/slack/format.test.ts b/src/slack/format.test.ts index 7ccda8e8758..eebb2bbf79b 100644 --- a/src/slack/format.test.ts +++ b/src/slack/format.test.ts @@ -82,10 +82,10 @@ describe("markdownToSlackMrkdwn", () => { expect(res).toBe("> Quote"); }); - it("handles adjacent list items", () => { + it("handles nested list items", () => { const res = markdownToSlackMrkdwn("- item\n - nested"); - // markdown-it treats indented items as continuation, not nesting - expect(res).toBe("• item • nested"); + // markdown-it correctly parses this as a nested list + expect(res).toBe("• item\n • nested"); }); it("handles complex message with multiple elements", () => {