mirror of
https://github.com/moltbot/moltbot.git
synced 2026-03-07 22:44:16 +00:00
fix(signal): outbound formatting and markdown IR rendering improvements (#9781)
* fix: Signal and markdown formatting improvements Markdown IR fixes: - Fix list-paragraph spacing (extra newline between list items and following paragraphs) - Fix nested list indentation and newline handling - Fix blockquote_close emitting redundant newline (inner content handles spacing) - Render horizontal rules as visible ─── separator instead of silent drop - Strip inner cell styles in code-mode tables to prevent overlapping with code_block span Signal formatting fixes: - Normalize URLs for dedup comparison (strip protocol, www., trailing slash) - Render headings as bold text (headingStyle: 'bold') - Add '> ' prefix to blockquotes for visual distinction - Re-chunk after link expansion to respect chunk size limits Tests: - 51 new tests for markdown IR (spacing, lists, blockquotes, tables, HR) - 18 new tests for Signal formatting (URL dedup, headings, blockquotes, HR, chunking) - Update Slack nested list test expectation to match corrected IR output * refactor: style-aware Signal text chunker Replace indexOf-based chunk position tracking with deterministic cursor tracking. The new splitSignalFormattedText: - Splits at whitespace/newline boundaries within the limit - Avoids breaking inside parentheses (preserves expanded link URLs) - Slices style ranges at chunk boundaries with correct local offsets - Tracks position via offset arithmetic instead of fragile indexOf Removes dependency on chunkText from auto-reply/chunk. Tests: 19 new tests covering style preservation across chunk boundaries, edge cases (empty text, under limit, exact split points), and integration with link expansion. * fix: correct Signal style offsets with multiple link expansions applyInsertionsToStyles() was using original coordinates for each insertion without tracking cumulative shift from prior insertions. This caused bold/italic/etc styles to drift to wrong text positions when multiple markdown links expanded in a single message. Added cumulative shift tracking and a regression test. * test: clean up test noise and fix ineffective assertions - Remove console.log from ir.list-spacing and ir.hr-spacing tests - Fix ir.nested-lists.test.ts: remove ineffective regex assertion - Fix ir.hr-spacing.test.ts: add actual assertions to edge case test * refactor: split Signal formatting tests (#9781) (thanks @heyhudson) --------- Co-authored-by: Hudson <258693705+hudson-rivera@users.noreply.github.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
This commit is contained in:
202
src/markdown/ir.blockquote-spacing.test.ts
Normal file
202
src/markdown/ir.blockquote-spacing.test.ts
Normal file
@@ -0,0 +1,202 @@
|
||||
/**
|
||||
* Blockquote Spacing Tests
|
||||
*
|
||||
* Per CommonMark spec (§5.1 Block quotes), blockquotes are "container blocks" that
|
||||
* contain other block-level elements (paragraphs, code blocks, etc.).
|
||||
*
|
||||
* In plaintext rendering, the expected spacing between block-level elements is
|
||||
* a single blank line (double newline `\n\n`). This is the standard paragraph
|
||||
* separation used throughout markdown.
|
||||
*
|
||||
* CORRECT behavior:
|
||||
* - Blockquote content followed by paragraph: "quote\n\nparagraph" (double \n)
|
||||
* - Two consecutive blockquotes: "first\n\nsecond" (double \n)
|
||||
*
|
||||
* BUG (current behavior):
|
||||
* - Produces triple newlines: "quote\n\n\nparagraph"
|
||||
*
|
||||
* Root cause:
|
||||
* 1. `paragraph_close` inside blockquote adds `\n\n` (correct)
|
||||
* 2. `blockquote_close` adds another `\n` (incorrect)
|
||||
* 3. Result: `\n\n\n` (triple newlines - incorrect)
|
||||
*
|
||||
* The fix: `blockquote_close` should NOT add `\n` because:
|
||||
* - Blockquotes are container blocks, not leaf blocks
|
||||
* - The inner content (paragraph, heading, etc.) already provides block separation
|
||||
* - Container closings shouldn't add their own spacing
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { markdownToIR } from "./ir.js";
|
||||
|
||||
describe("blockquote spacing", () => {
|
||||
describe("blockquote followed by paragraph", () => {
|
||||
it("should have double newline (one blank line) between blockquote and paragraph", () => {
|
||||
const input = "> quote\n\nparagraph";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// CORRECT: "quote\n\nparagraph" (double newline)
|
||||
// BUG: "quote\n\n\nparagraph" (triple newline)
|
||||
expect(result.text).toBe("quote\n\nparagraph");
|
||||
});
|
||||
|
||||
it("should not produce triple newlines", () => {
|
||||
const input = "> quote\n\nparagraph";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
});
|
||||
|
||||
describe("consecutive blockquotes", () => {
|
||||
it("should have double newline between two blockquotes", () => {
|
||||
const input = "> first\n\n> second";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
expect(result.text).toBe("first\n\nsecond");
|
||||
});
|
||||
|
||||
it("should not produce triple newlines between blockquotes", () => {
|
||||
const input = "> first\n\n> second";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
});
|
||||
|
||||
describe("nested blockquotes", () => {
|
||||
it("should handle nested blockquotes correctly", () => {
|
||||
const input = "> outer\n>> inner";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// Inner blockquote becomes separate paragraph
|
||||
expect(result.text).toBe("outer\n\ninner");
|
||||
});
|
||||
|
||||
it("should not produce triple newlines in nested blockquotes", () => {
|
||||
const input = "> outer\n>> inner\n\nparagraph";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
|
||||
it("should handle deeply nested blockquotes", () => {
|
||||
const input = "> level 1\n>> level 2\n>>> level 3";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// Each nested level is a new paragraph
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
});
|
||||
|
||||
describe("blockquote followed by other block elements", () => {
|
||||
it("should have double newline between blockquote and heading", () => {
|
||||
const input = "> quote\n\n# Heading";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
expect(result.text).toBe("quote\n\nHeading");
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
|
||||
it("should have double newline between blockquote and list", () => {
|
||||
const input = "> quote\n\n- item";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// The list item becomes "• item"
|
||||
expect(result.text).toBe("quote\n\n• item");
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
|
||||
it("should have double newline between blockquote and code block", () => {
|
||||
const input = "> quote\n\n```\ncode\n```";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// Code blocks preserve their trailing newline
|
||||
expect(result.text.startsWith("quote\n\ncode")).toBe(true);
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
|
||||
it("should have double newline between blockquote and horizontal rule", () => {
|
||||
const input = "> quote\n\n---\n\nparagraph";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// HR just adds a newline in IR, but should not create triple newlines
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
});
|
||||
|
||||
describe("blockquote with multi-paragraph content", () => {
|
||||
it("should handle multi-paragraph blockquote followed by paragraph", () => {
|
||||
const input = "> first paragraph\n>\n> second paragraph\n\nfollowing paragraph";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// Multi-paragraph blockquote should have proper internal spacing
|
||||
// AND proper spacing with following content
|
||||
expect(result.text).toContain("first paragraph\n\nsecond paragraph");
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
});
|
||||
|
||||
describe("blockquote prefix option", () => {
|
||||
it("should include prefix and maintain proper spacing", () => {
|
||||
const input = "> quote\n\nparagraph";
|
||||
const result = markdownToIR(input, { blockquotePrefix: "> " });
|
||||
|
||||
// With prefix, should still have proper spacing
|
||||
expect(result.text).toBe("> quote\n\nparagraph");
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
});
|
||||
|
||||
describe("edge cases", () => {
|
||||
it("should handle empty blockquote followed by paragraph", () => {
|
||||
const input = ">\n\nparagraph";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
|
||||
it("should handle blockquote at end of document", () => {
|
||||
const input = "paragraph\n\n> quote";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// No trailing triple newlines
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
|
||||
it("should handle multiple blockquotes with paragraphs between", () => {
|
||||
const input = "> first\n\nparagraph\n\n> second";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
expect(result.text).toBe("first\n\nparagraph\n\nsecond");
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("comparison with other block elements (control group)", () => {
|
||||
it("paragraphs should have double newline separation", () => {
|
||||
const input = "paragraph 1\n\nparagraph 2";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
expect(result.text).toBe("paragraph 1\n\nparagraph 2");
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
|
||||
it("list followed by paragraph should have double newline", () => {
|
||||
const input = "- item 1\n- item 2\n\nparagraph";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// Lists already work correctly
|
||||
expect(result.text).toContain("• item 2\n\nparagraph");
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
|
||||
it("heading followed by paragraph should have double newline", () => {
|
||||
const input = "# Heading\n\nparagraph";
|
||||
const result = markdownToIR(input);
|
||||
|
||||
expect(result.text).toBe("Heading\n\nparagraph");
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
});
|
||||
173
src/markdown/ir.hr-spacing.test.ts
Normal file
173
src/markdown/ir.hr-spacing.test.ts
Normal file
@@ -0,0 +1,173 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { markdownToIR } from "./ir.js";
|
||||
|
||||
/**
|
||||
* HR (Thematic Break) Spacing Analysis
|
||||
* =====================================
|
||||
*
|
||||
* CommonMark Spec (0.31.2) Section 4.1 - Thematic Breaks:
|
||||
* - Thematic breaks (---, ***, ___) produce <hr /> in HTML
|
||||
* - "Thematic breaks do not need blank lines before or after"
|
||||
* - A thematic break can interrupt a paragraph
|
||||
*
|
||||
* HTML Output per spec:
|
||||
* Input: "Foo\n***\nbar"
|
||||
* HTML: "<p>Foo</p>\n<hr />\n<p>bar</p>"
|
||||
*
|
||||
* PLAIN TEXT OUTPUT DECISION:
|
||||
*
|
||||
* The HR element is a block-level thematic separator. In plain text output,
|
||||
* we render HRs as a visible separator "───" to maintain visual distinction.
|
||||
*/
|
||||
|
||||
describe("hr (thematic break) spacing", () => {
|
||||
describe("current behavior documentation", () => {
|
||||
it("just hr alone renders as separator", () => {
|
||||
const result = markdownToIR("---");
|
||||
expect(result.text).toBe("───");
|
||||
});
|
||||
|
||||
it("hr between paragraphs renders with separator", () => {
|
||||
const input = `Para 1
|
||||
|
||||
---
|
||||
|
||||
Para 2`;
|
||||
const result = markdownToIR(input);
|
||||
expect(result.text).toBe("Para 1\n\n───\n\nPara 2");
|
||||
});
|
||||
|
||||
it("hr interrupting paragraph (setext heading case)", () => {
|
||||
// Note: "Para\n---" is a setext heading in CommonMark!
|
||||
// Using *** to test actual HR behavior
|
||||
const input = `Para 1
|
||||
***
|
||||
Para 2`;
|
||||
const result = markdownToIR(input);
|
||||
// HR interrupts para, renders visibly
|
||||
expect(result.text).toContain("───");
|
||||
});
|
||||
});
|
||||
|
||||
describe("expected behavior (tests assert CORRECT behavior)", () => {
|
||||
it("hr between paragraphs should render with separator", () => {
|
||||
const input = `Para 1
|
||||
|
||||
---
|
||||
|
||||
Para 2`;
|
||||
const result = markdownToIR(input);
|
||||
expect(result.text).toBe("Para 1\n\n───\n\nPara 2");
|
||||
});
|
||||
|
||||
it("hr between paragraphs using *** should render with separator", () => {
|
||||
const input = `Para 1
|
||||
|
||||
***
|
||||
|
||||
Para 2`;
|
||||
const result = markdownToIR(input);
|
||||
expect(result.text).toBe("Para 1\n\n───\n\nPara 2");
|
||||
});
|
||||
|
||||
it("hr between paragraphs using ___ should render with separator", () => {
|
||||
const input = `Para 1
|
||||
|
||||
___
|
||||
|
||||
Para 2`;
|
||||
const result = markdownToIR(input);
|
||||
expect(result.text).toBe("Para 1\n\n───\n\nPara 2");
|
||||
});
|
||||
|
||||
it("consecutive hrs should produce multiple separators", () => {
|
||||
const input = `---
|
||||
---
|
||||
---`;
|
||||
const result = markdownToIR(input);
|
||||
// Each HR renders as a separator
|
||||
expect(result.text).toBe("───\n\n───\n\n───");
|
||||
});
|
||||
|
||||
it("hr at document end renders separator", () => {
|
||||
const input = `Para
|
||||
|
||||
---`;
|
||||
const result = markdownToIR(input);
|
||||
expect(result.text).toBe("Para\n\n───");
|
||||
});
|
||||
|
||||
it("hr at document start renders separator", () => {
|
||||
const input = `---
|
||||
|
||||
Para`;
|
||||
const result = markdownToIR(input);
|
||||
expect(result.text).toBe("───\n\nPara");
|
||||
});
|
||||
|
||||
it("should not produce triple newlines regardless of hr placement", () => {
|
||||
const inputs = [
|
||||
"Para 1\n\n---\n\nPara 2",
|
||||
"Para 1\n---\nPara 2",
|
||||
"---\nPara",
|
||||
"Para\n---",
|
||||
"Para 1\n\n---\n\n---\n\nPara 2",
|
||||
"Para 1\n\n***\n\n---\n\n___\n\nPara 2",
|
||||
];
|
||||
|
||||
for (const input of inputs) {
|
||||
const result = markdownToIR(input);
|
||||
expect(result.text, `Input: ${JSON.stringify(input)}`).not.toMatch(/\n{3,}/);
|
||||
}
|
||||
});
|
||||
|
||||
it("multiple consecutive hrs between paragraphs should each render as separator", () => {
|
||||
const input = `Para 1
|
||||
|
||||
---
|
||||
|
||||
---
|
||||
|
||||
---
|
||||
|
||||
Para 2`;
|
||||
const result = markdownToIR(input);
|
||||
expect(result.text).toBe("Para 1\n\n───\n\n───\n\n───\n\nPara 2");
|
||||
});
|
||||
});
|
||||
|
||||
describe("edge cases", () => {
|
||||
it("hr between list items renders as separator without extra spacing", () => {
|
||||
const input = `- Item 1
|
||||
- ---
|
||||
- Item 2`;
|
||||
const result = markdownToIR(input);
|
||||
expect(result.text).toBe("• Item 1\n\n───\n\n• Item 2");
|
||||
expect(result.text).not.toMatch(/\n{3,}/);
|
||||
});
|
||||
|
||||
it("hr followed immediately by heading", () => {
|
||||
const input = `---
|
||||
|
||||
# Heading
|
||||
|
||||
Para`;
|
||||
const result = markdownToIR(input);
|
||||
// HR renders as separator, heading renders, para follows
|
||||
expect(result.text).not.toMatch(/\n{3,}/);
|
||||
expect(result.text).toContain("───");
|
||||
});
|
||||
|
||||
it("heading followed by hr", () => {
|
||||
const input = `# Heading
|
||||
|
||||
---
|
||||
|
||||
Para`;
|
||||
const result = markdownToIR(input);
|
||||
// Heading ends, HR renders, para follows
|
||||
expect(result.text).not.toMatch(/\n{3,}/);
|
||||
expect(result.text).toContain("───");
|
||||
});
|
||||
});
|
||||
});
|
||||
33
src/markdown/ir.list-spacing.test.ts
Normal file
33
src/markdown/ir.list-spacing.test.ts
Normal file
@@ -0,0 +1,33 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { markdownToIR } from "./ir.js";
|
||||
|
||||
describe("list paragraph spacing", () => {
|
||||
it("adds blank line between bullet list and following paragraph", () => {
|
||||
const input = `- item 1
|
||||
- item 2
|
||||
|
||||
Paragraph after`;
|
||||
const result = markdownToIR(input);
|
||||
// Should have two newlines between "item 2" and "Paragraph"
|
||||
expect(result.text).toContain("item 2\n\nParagraph");
|
||||
});
|
||||
|
||||
it("adds blank line between ordered list and following paragraph", () => {
|
||||
const input = `1. item 1
|
||||
2. item 2
|
||||
|
||||
Paragraph after`;
|
||||
const result = markdownToIR(input);
|
||||
expect(result.text).toContain("item 2\n\nParagraph");
|
||||
});
|
||||
|
||||
it("does not produce triple newlines", () => {
|
||||
const input = `- item 1
|
||||
- item 2
|
||||
|
||||
Paragraph after`;
|
||||
const result = markdownToIR(input);
|
||||
// Should NOT have three consecutive newlines
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
});
|
||||
301
src/markdown/ir.nested-lists.test.ts
Normal file
301
src/markdown/ir.nested-lists.test.ts
Normal file
@@ -0,0 +1,301 @@
|
||||
/**
|
||||
* Nested List Rendering Tests
|
||||
*
|
||||
* This test file documents and validates the expected behavior for nested lists
|
||||
* when rendering Markdown to plain text.
|
||||
*
|
||||
* ## Expected Plain Text Behavior
|
||||
*
|
||||
* Per CommonMark spec, nested lists create a hierarchical structure. When rendering
|
||||
* to plain text for messaging platforms, we expect:
|
||||
*
|
||||
* 1. **Indentation**: Each nesting level adds 2 spaces of indentation
|
||||
* 2. **Bullet markers**: Bullet lists use "•" (Unicode bullet)
|
||||
* 3. **Ordered markers**: Ordered lists use "N. " format
|
||||
* 4. **Line endings**: Each list item ends with a single newline
|
||||
* 5. **List termination**: A trailing newline after the entire list (for top-level only)
|
||||
*
|
||||
* ## markdown-it Token Sequence
|
||||
*
|
||||
* For nested lists, markdown-it emits tokens in this order:
|
||||
* - bullet_list_open (outer)
|
||||
* - list_item_open
|
||||
* - paragraph_open (hidden=true for tight lists)
|
||||
* - inline (with text children)
|
||||
* - paragraph_close
|
||||
* - bullet_list_open (nested)
|
||||
* - list_item_open
|
||||
* - paragraph_open
|
||||
* - inline
|
||||
* - paragraph_close
|
||||
* - list_item_close
|
||||
* - bullet_list_close
|
||||
* - list_item_close
|
||||
* - bullet_list_close
|
||||
*
|
||||
* The key insight is that nested lists appear INSIDE the parent list_item,
|
||||
* between the paragraph and the list_item_close.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { markdownToIR } from "./ir.js";
|
||||
|
||||
describe("Nested Lists - 2 Level Nesting", () => {
|
||||
it("renders bullet items nested inside bullet items with proper indentation", () => {
|
||||
const input = `- Item 1
|
||||
- Nested 1.1
|
||||
- Nested 1.2
|
||||
- Item 2`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// Expected output:
|
||||
// • Item 1
|
||||
// • Nested 1.1
|
||||
// • Nested 1.2
|
||||
// • Item 2
|
||||
// Note: markdownToIR trims trailing whitespace, so no final newline
|
||||
const expected = `• Item 1
|
||||
• Nested 1.1
|
||||
• Nested 1.2
|
||||
• Item 2`;
|
||||
|
||||
expect(result.text).toBe(expected);
|
||||
});
|
||||
|
||||
it("renders ordered items nested inside bullet items", () => {
|
||||
const input = `- Bullet item
|
||||
1. Ordered sub-item 1
|
||||
2. Ordered sub-item 2
|
||||
- Another bullet`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// Expected output:
|
||||
// • Bullet item
|
||||
// 1. Ordered sub-item 1
|
||||
// 2. Ordered sub-item 2
|
||||
// • Another bullet
|
||||
const expected = `• Bullet item
|
||||
1. Ordered sub-item 1
|
||||
2. Ordered sub-item 2
|
||||
• Another bullet`;
|
||||
|
||||
expect(result.text).toBe(expected);
|
||||
});
|
||||
|
||||
it("renders bullet items nested inside ordered items", () => {
|
||||
const input = `1. Ordered 1
|
||||
- Bullet sub 1
|
||||
- Bullet sub 2
|
||||
2. Ordered 2`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// Expected output:
|
||||
// 1. Ordered 1
|
||||
// • Bullet sub 1
|
||||
// • Bullet sub 2
|
||||
// 2. Ordered 2
|
||||
const expected = `1. Ordered 1
|
||||
• Bullet sub 1
|
||||
• Bullet sub 2
|
||||
2. Ordered 2`;
|
||||
|
||||
expect(result.text).toBe(expected);
|
||||
});
|
||||
|
||||
it("renders ordered items nested inside ordered items", () => {
|
||||
const input = `1. First
|
||||
1. Sub-first
|
||||
2. Sub-second
|
||||
2. Second`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
const expected = `1. First
|
||||
1. Sub-first
|
||||
2. Sub-second
|
||||
2. Second`;
|
||||
|
||||
expect(result.text).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Nested Lists - 3+ Level Deep Nesting", () => {
|
||||
it("renders 3 levels of bullet nesting", () => {
|
||||
const input = `- Level 1
|
||||
- Level 2
|
||||
- Level 3
|
||||
- Back to 1`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// Expected output with progressive indentation:
|
||||
// • Level 1
|
||||
// • Level 2
|
||||
// • Level 3
|
||||
// • Back to 1
|
||||
const expected = `• Level 1
|
||||
• Level 2
|
||||
• Level 3
|
||||
• Back to 1`;
|
||||
|
||||
expect(result.text).toBe(expected);
|
||||
});
|
||||
|
||||
it("renders 4 levels of bullet nesting", () => {
|
||||
const input = `- L1
|
||||
- L2
|
||||
- L3
|
||||
- L4
|
||||
- Back`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
const expected = `• L1
|
||||
• L2
|
||||
• L3
|
||||
• L4
|
||||
• Back`;
|
||||
|
||||
expect(result.text).toBe(expected);
|
||||
});
|
||||
|
||||
it("renders 3 levels with multiple items at each level", () => {
|
||||
const input = `- A1
|
||||
- B1
|
||||
- C1
|
||||
- C2
|
||||
- B2
|
||||
- A2`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
const expected = `• A1
|
||||
• B1
|
||||
• C1
|
||||
• C2
|
||||
• B2
|
||||
• A2`;
|
||||
|
||||
expect(result.text).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Nested Lists - Mixed Nesting", () => {
|
||||
it("renders complex mixed nesting (bullet > ordered > bullet)", () => {
|
||||
const input = `- Bullet 1
|
||||
1. Ordered 1.1
|
||||
- Deep bullet
|
||||
2. Ordered 1.2
|
||||
- Bullet 2`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
const expected = `• Bullet 1
|
||||
1. Ordered 1.1
|
||||
• Deep bullet
|
||||
2. Ordered 1.2
|
||||
• Bullet 2`;
|
||||
|
||||
expect(result.text).toBe(expected);
|
||||
});
|
||||
|
||||
it("renders ordered > bullet > ordered nesting", () => {
|
||||
const input = `1. First
|
||||
- Sub bullet
|
||||
1. Deep ordered
|
||||
- Another bullet
|
||||
2. Second`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
const expected = `1. First
|
||||
• Sub bullet
|
||||
1. Deep ordered
|
||||
• Another bullet
|
||||
2. Second`;
|
||||
|
||||
expect(result.text).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Nested Lists - Newline Handling", () => {
|
||||
it("does not produce triple newlines in nested lists", () => {
|
||||
const input = `- Item 1
|
||||
- Nested
|
||||
- Item 2`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
expect(result.text).not.toContain("\n\n\n");
|
||||
});
|
||||
|
||||
it("does not produce double newlines between nested items", () => {
|
||||
const input = `- A
|
||||
- B
|
||||
- C
|
||||
- D`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// Between B and C there should be exactly one newline
|
||||
expect(result.text).toContain(" • B\n • C");
|
||||
expect(result.text).not.toContain(" • B\n\n • C");
|
||||
});
|
||||
|
||||
it("properly terminates top-level list (trimmed output)", () => {
|
||||
const input = `- Item 1
|
||||
- Nested
|
||||
- Item 2`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// markdownToIR trims trailing whitespace, so output should end with Item 2
|
||||
// (no trailing newline after trimming)
|
||||
expect(result.text).toMatch(/Item 2$/);
|
||||
// Should not have excessive newlines before Item 2
|
||||
expect(result.text).not.toContain("\n\n• Item 2");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Nested Lists - Edge Cases", () => {
|
||||
it("handles empty parent with nested items", () => {
|
||||
// This is a bit of an edge case - a list item that's just a marker followed by nested content
|
||||
const input = `-
|
||||
- Nested only
|
||||
- Normal`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// Should still render the nested item with proper indentation
|
||||
expect(result.text).toContain(" • Nested only");
|
||||
});
|
||||
|
||||
it("handles nested list as first child of parent item", () => {
|
||||
const input = `- Parent text
|
||||
- Child
|
||||
- Another parent`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
// The child should appear indented under the parent
|
||||
expect(result.text).toContain("• Parent text\n • Child");
|
||||
});
|
||||
|
||||
it("handles sibling nested lists at same level", () => {
|
||||
const input = `- A
|
||||
- A1
|
||||
- B
|
||||
- B1`;
|
||||
|
||||
const result = markdownToIR(input);
|
||||
|
||||
const expected = `• A
|
||||
• A1
|
||||
• B
|
||||
• B1`;
|
||||
|
||||
expect(result.text).toBe(expected);
|
||||
});
|
||||
});
|
||||
89
src/markdown/ir.table-code.test.ts
Normal file
89
src/markdown/ir.table-code.test.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { markdownToIR } from "./ir.js";
|
||||
|
||||
describe("markdownToIR tableMode code - style overlap", () => {
|
||||
it("should not have overlapping styles when cell has bold text", () => {
|
||||
const md = `
|
||||
| Name | Value |
|
||||
|------|-------|
|
||||
| **Bold** | Normal |
|
||||
`.trim();
|
||||
|
||||
const ir = markdownToIR(md, { tableMode: "code" });
|
||||
|
||||
// Check for overlapping styles
|
||||
const codeBlockSpan = ir.styles.find((s) => s.style === "code_block");
|
||||
const boldSpan = ir.styles.find((s) => s.style === "bold");
|
||||
|
||||
// Either:
|
||||
// 1. There should be no bold spans in code mode (inner styles stripped), OR
|
||||
// 2. If bold spans exist, they should not overlap with code_block span
|
||||
if (codeBlockSpan && boldSpan) {
|
||||
// Check for overlap
|
||||
const overlaps = boldSpan.start < codeBlockSpan.end && boldSpan.end > codeBlockSpan.start;
|
||||
// Overlapping styles are the bug - this should fail until fixed
|
||||
expect(overlaps).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it("should not have overlapping styles when cell has italic text", () => {
|
||||
const md = `
|
||||
| Name | Value |
|
||||
|------|-------|
|
||||
| *Italic* | Normal |
|
||||
`.trim();
|
||||
|
||||
const ir = markdownToIR(md, { tableMode: "code" });
|
||||
|
||||
const codeBlockSpan = ir.styles.find((s) => s.style === "code_block");
|
||||
const italicSpan = ir.styles.find((s) => s.style === "italic");
|
||||
|
||||
if (codeBlockSpan && italicSpan) {
|
||||
const overlaps = italicSpan.start < codeBlockSpan.end && italicSpan.end > codeBlockSpan.start;
|
||||
expect(overlaps).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it("should not have overlapping styles when cell has inline code", () => {
|
||||
const md = `
|
||||
| Name | Value |
|
||||
|------|-------|
|
||||
| \`code\` | Normal |
|
||||
`.trim();
|
||||
|
||||
const ir = markdownToIR(md, { tableMode: "code" });
|
||||
|
||||
const codeBlockSpan = ir.styles.find((s) => s.style === "code_block");
|
||||
const codeSpan = ir.styles.find((s) => s.style === "code");
|
||||
|
||||
if (codeBlockSpan && codeSpan) {
|
||||
const overlaps = codeSpan.start < codeBlockSpan.end && codeSpan.end > codeBlockSpan.start;
|
||||
expect(overlaps).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it("should not have overlapping styles with multiple styled cells", () => {
|
||||
const md = `
|
||||
| Name | Value |
|
||||
|------|-------|
|
||||
| **A** | *B* |
|
||||
| _C_ | ~~D~~ |
|
||||
`.trim();
|
||||
|
||||
const ir = markdownToIR(md, { tableMode: "code" });
|
||||
|
||||
const codeBlockSpan = ir.styles.find((s) => s.style === "code_block");
|
||||
if (!codeBlockSpan) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Check that no non-code_block style overlaps with code_block
|
||||
for (const style of ir.styles) {
|
||||
if (style.style === "code_block") {
|
||||
continue;
|
||||
}
|
||||
const overlaps = style.start < codeBlockSpan.end && style.end > codeBlockSpan.start;
|
||||
expect(overlaps).toBe(false);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -364,6 +364,14 @@ function appendCell(state: RenderState, cell: TableCell) {
|
||||
}
|
||||
}
|
||||
|
||||
function appendCellTextOnly(state: RenderState, cell: TableCell) {
|
||||
if (!cell.text) {
|
||||
return;
|
||||
}
|
||||
state.text += cell.text;
|
||||
// Do not append styles - this is used for code blocks where inner styles would overlap
|
||||
}
|
||||
|
||||
function renderTableAsBullets(state: RenderState) {
|
||||
if (!state.table) {
|
||||
return;
|
||||
@@ -474,7 +482,8 @@ function renderTableAsCode(state: RenderState) {
|
||||
state.text += " ";
|
||||
const cell = cells[i];
|
||||
if (cell) {
|
||||
appendCell(state, cell);
|
||||
// Use text-only append to avoid overlapping styles with code_block
|
||||
appendCellTextOnly(state, cell);
|
||||
}
|
||||
const pad = widths[i] - (cell?.text.length ?? 0);
|
||||
if (pad > 0) {
|
||||
@@ -589,27 +598,43 @@ function renderTokens(tokens: MarkdownToken[], state: RenderState): void {
|
||||
break;
|
||||
case "blockquote_close":
|
||||
closeStyle(state, "blockquote");
|
||||
state.text += "\n";
|
||||
break;
|
||||
case "bullet_list_open":
|
||||
// Add newline before nested list starts (so nested items appear on new line)
|
||||
if (state.env.listStack.length > 0) {
|
||||
state.text += "\n";
|
||||
}
|
||||
state.env.listStack.push({ type: "bullet", index: 0 });
|
||||
break;
|
||||
case "bullet_list_close":
|
||||
state.env.listStack.pop();
|
||||
if (state.env.listStack.length === 0) {
|
||||
state.text += "\n";
|
||||
}
|
||||
break;
|
||||
case "ordered_list_open": {
|
||||
// Add newline before nested list starts (so nested items appear on new line)
|
||||
if (state.env.listStack.length > 0) {
|
||||
state.text += "\n";
|
||||
}
|
||||
const start = Number(getAttr(token, "start") ?? "1");
|
||||
state.env.listStack.push({ type: "ordered", index: start - 1 });
|
||||
break;
|
||||
}
|
||||
case "ordered_list_close":
|
||||
state.env.listStack.pop();
|
||||
if (state.env.listStack.length === 0) {
|
||||
state.text += "\n";
|
||||
}
|
||||
break;
|
||||
case "list_item_open":
|
||||
appendListPrefix(state);
|
||||
break;
|
||||
case "list_item_close":
|
||||
state.text += "\n";
|
||||
// Avoid double newlines (nested list's last item already added newline)
|
||||
if (!state.text.endsWith("\n")) {
|
||||
state.text += "\n";
|
||||
}
|
||||
break;
|
||||
case "code_block":
|
||||
case "fence":
|
||||
@@ -680,7 +705,8 @@ function renderTokens(tokens: MarkdownToken[], state: RenderState): void {
|
||||
break;
|
||||
|
||||
case "hr":
|
||||
state.text += "\n";
|
||||
// Render as a visual separator
|
||||
state.text += "───\n\n";
|
||||
break;
|
||||
default:
|
||||
if (token.children) {
|
||||
@@ -744,7 +770,13 @@ function mergeStyleSpans(spans: MarkdownStyleSpan[]): MarkdownStyleSpan[] {
|
||||
const merged: MarkdownStyleSpan[] = [];
|
||||
for (const span of sorted) {
|
||||
const prev = merged[merged.length - 1];
|
||||
if (prev && prev.style === span.style && span.start <= prev.end) {
|
||||
if (
|
||||
prev &&
|
||||
prev.style === span.style &&
|
||||
// Blockquotes are container blocks. Adjacent blockquote spans should not merge or
|
||||
// consecutive blockquotes can "style bleed" across the paragraph boundary.
|
||||
(span.start < prev.end || (span.start === prev.end && span.style !== "blockquote"))
|
||||
) {
|
||||
prev.end = Math.max(prev.end, span.end);
|
||||
continue;
|
||||
}
|
||||
|
||||
390
src/signal/format.chunking.test.ts
Normal file
390
src/signal/format.chunking.test.ts
Normal file
@@ -0,0 +1,390 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { markdownToSignalTextChunks } from "./format.js";
|
||||
|
||||
describe("splitSignalFormattedText", () => {
|
||||
// We test the internal chunking behavior via markdownToSignalTextChunks with
|
||||
// pre-rendered SignalFormattedText. The helper is not exported, so we test
|
||||
// it indirectly through integration tests and by constructing scenarios that
|
||||
// exercise the splitting logic.
|
||||
|
||||
describe("style-aware splitting - basic text", () => {
|
||||
it("text with no styles splits correctly at whitespace", () => {
|
||||
// Create text that exceeds limit and must be split
|
||||
const limit = 20;
|
||||
const markdown = "hello world this is a test";
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.text.length).toBeLessThanOrEqual(limit);
|
||||
}
|
||||
// Verify all text is preserved (joined chunks should contain all words)
|
||||
const joinedText = chunks.map((c) => c.text).join(" ");
|
||||
expect(joinedText).toContain("hello");
|
||||
expect(joinedText).toContain("world");
|
||||
expect(joinedText).toContain("test");
|
||||
});
|
||||
|
||||
it("empty text returns empty array", () => {
|
||||
// Empty input produces no chunks (not an empty chunk)
|
||||
const chunks = markdownToSignalTextChunks("", 100);
|
||||
expect(chunks).toEqual([]);
|
||||
});
|
||||
|
||||
it("text under limit returns single chunk unchanged", () => {
|
||||
const markdown = "short text";
|
||||
const chunks = markdownToSignalTextChunks(markdown, 100);
|
||||
|
||||
expect(chunks).toHaveLength(1);
|
||||
expect(chunks[0].text).toBe("short text");
|
||||
});
|
||||
});
|
||||
|
||||
describe("style-aware splitting - style preservation", () => {
|
||||
it("style fully within first chunk stays in first chunk", () => {
|
||||
// Create a message where bold text is in the first chunk
|
||||
const limit = 30;
|
||||
const markdown = "**bold** word more words here that exceed limit";
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
// First chunk should contain the bold style
|
||||
const firstChunk = chunks[0];
|
||||
expect(firstChunk.text).toContain("bold");
|
||||
expect(firstChunk.styles.some((s) => s.style === "BOLD")).toBe(true);
|
||||
// The bold style should start at position 0 in the first chunk
|
||||
const boldStyle = firstChunk.styles.find((s) => s.style === "BOLD");
|
||||
expect(boldStyle).toBeDefined();
|
||||
expect(boldStyle!.start).toBe(0);
|
||||
expect(boldStyle!.length).toBe(4); // "bold"
|
||||
});
|
||||
|
||||
it("style fully within second chunk has offset adjusted to chunk-local position", () => {
|
||||
// Create a message where the styled text is in the second chunk
|
||||
const limit = 30;
|
||||
const markdown = "some filler text here **bold** at the end";
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
// Find the chunk containing "bold"
|
||||
const chunkWithBold = chunks.find((c) => c.text.includes("bold"));
|
||||
expect(chunkWithBold).toBeDefined();
|
||||
expect(chunkWithBold!.styles.some((s) => s.style === "BOLD")).toBe(true);
|
||||
|
||||
// The bold style should have chunk-local offset (not original text offset)
|
||||
const boldStyle = chunkWithBold!.styles.find((s) => s.style === "BOLD");
|
||||
expect(boldStyle).toBeDefined();
|
||||
// The offset should be the position within this chunk, not the original text
|
||||
const boldPos = chunkWithBold!.text.indexOf("bold");
|
||||
expect(boldStyle!.start).toBe(boldPos);
|
||||
expect(boldStyle!.length).toBe(4);
|
||||
});
|
||||
|
||||
it("style spanning chunk boundary is split into two ranges", () => {
|
||||
// Create text where a styled span crosses the chunk boundary
|
||||
const limit = 15;
|
||||
// "hello **bold text here** end" - the bold spans across chunk boundary
|
||||
const markdown = "hello **boldtexthere** end";
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
|
||||
// Both chunks should have BOLD styles if the span was split
|
||||
const chunksWithBold = chunks.filter((c) => c.styles.some((s) => s.style === "BOLD"));
|
||||
// At least one chunk should have the bold style
|
||||
expect(chunksWithBold.length).toBeGreaterThanOrEqual(1);
|
||||
|
||||
// For each chunk with bold, verify the style range is valid for that chunk
|
||||
for (const chunk of chunksWithBold) {
|
||||
for (const style of chunk.styles.filter((s) => s.style === "BOLD")) {
|
||||
expect(style.start).toBeGreaterThanOrEqual(0);
|
||||
expect(style.start + style.length).toBeLessThanOrEqual(chunk.text.length);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("style starting exactly at split point goes entirely to second chunk", () => {
|
||||
// Create text where style starts right at where we'd split
|
||||
const limit = 10;
|
||||
const markdown = "abcdefghi **bold**";
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
|
||||
// Find chunk with bold
|
||||
const chunkWithBold = chunks.find((c) => c.styles.some((s) => s.style === "BOLD"));
|
||||
expect(chunkWithBold).toBeDefined();
|
||||
|
||||
// Verify the bold style is valid within its chunk
|
||||
const boldStyle = chunkWithBold!.styles.find((s) => s.style === "BOLD");
|
||||
expect(boldStyle).toBeDefined();
|
||||
expect(boldStyle!.start).toBeGreaterThanOrEqual(0);
|
||||
expect(boldStyle!.start + boldStyle!.length).toBeLessThanOrEqual(chunkWithBold!.text.length);
|
||||
});
|
||||
|
||||
it("style ending exactly at split point stays entirely in first chunk", () => {
|
||||
const limit = 10;
|
||||
const markdown = "**bold** rest of text";
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
// First chunk should have the complete bold style
|
||||
const firstChunk = chunks[0];
|
||||
if (firstChunk.text.includes("bold")) {
|
||||
const boldStyle = firstChunk.styles.find((s) => s.style === "BOLD");
|
||||
expect(boldStyle).toBeDefined();
|
||||
expect(boldStyle!.start + boldStyle!.length).toBeLessThanOrEqual(firstChunk.text.length);
|
||||
}
|
||||
});
|
||||
|
||||
it("multiple styles, some spanning boundary, some not", () => {
|
||||
const limit = 25;
|
||||
// Mix of styles: italic at start, bold spanning boundary, monospace at end
|
||||
const markdown = "_italic_ some text **bold text** and `code`";
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
|
||||
// Verify all style ranges are valid within their respective chunks
|
||||
for (const chunk of chunks) {
|
||||
for (const style of chunk.styles) {
|
||||
expect(style.start).toBeGreaterThanOrEqual(0);
|
||||
expect(style.start + style.length).toBeLessThanOrEqual(chunk.text.length);
|
||||
expect(style.length).toBeGreaterThan(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Collect all styles across chunks
|
||||
const allStyles = chunks.flatMap((c) => c.styles.map((s) => s.style));
|
||||
// We should have at least italic, bold, and monospace somewhere
|
||||
expect(allStyles).toContain("ITALIC");
|
||||
expect(allStyles).toContain("BOLD");
|
||||
expect(allStyles).toContain("MONOSPACE");
|
||||
});
|
||||
});
|
||||
|
||||
describe("style-aware splitting - edge cases", () => {
|
||||
it("handles zero-length text with styles gracefully", () => {
|
||||
// Edge case: empty markdown produces no chunks
|
||||
const chunks = markdownToSignalTextChunks("", 100);
|
||||
expect(chunks).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("handles text that splits exactly at limit", () => {
|
||||
const limit = 10;
|
||||
const markdown = "1234567890"; // exactly 10 chars
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
expect(chunks).toHaveLength(1);
|
||||
expect(chunks[0].text).toBe("1234567890");
|
||||
});
|
||||
|
||||
it("preserves style through whitespace trimming", () => {
|
||||
const limit = 30;
|
||||
const markdown = "**bold** some text that is longer than limit";
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
// Bold should be preserved in first chunk
|
||||
const firstChunk = chunks[0];
|
||||
if (firstChunk.text.includes("bold")) {
|
||||
expect(firstChunk.styles.some((s) => s.style === "BOLD")).toBe(true);
|
||||
}
|
||||
});
|
||||
|
||||
it("handles repeated substrings correctly (no indexOf fragility)", () => {
|
||||
// This test exposes the fragility of using indexOf to find chunk positions.
|
||||
// If the same substring appears multiple times, indexOf finds the first
|
||||
// occurrence, not necessarily the correct one.
|
||||
const limit = 20;
|
||||
// "word" appears multiple times - indexOf("word") would always find first
|
||||
const markdown = "word **bold word** word more text here to chunk";
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
// Verify chunks are under limit
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.text.length).toBeLessThanOrEqual(limit);
|
||||
}
|
||||
|
||||
// Find chunk(s) with bold style
|
||||
const chunksWithBold = chunks.filter((c) => c.styles.some((s) => s.style === "BOLD"));
|
||||
expect(chunksWithBold.length).toBeGreaterThanOrEqual(1);
|
||||
|
||||
// The bold style should correctly cover "bold word" (or part of it if split)
|
||||
// and NOT incorrectly point to the first "word" in the text
|
||||
for (const chunk of chunksWithBold) {
|
||||
for (const style of chunk.styles.filter((s) => s.style === "BOLD")) {
|
||||
const styledText = chunk.text.slice(style.start, style.start + style.length);
|
||||
// The styled text should be part of "bold word", not the initial "word"
|
||||
expect(styledText).toMatch(/^(bold( word)?|word)$/);
|
||||
expect(style.start).toBeGreaterThanOrEqual(0);
|
||||
expect(style.start + style.length).toBeLessThanOrEqual(chunk.text.length);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("handles chunk that starts with whitespace after split", () => {
|
||||
// When text is split at whitespace, the next chunk might have leading
|
||||
// whitespace trimmed. Styles must account for this.
|
||||
const limit = 15;
|
||||
const markdown = "some text **bold** at end";
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
// All style ranges must be valid
|
||||
for (const chunk of chunks) {
|
||||
for (const style of chunk.styles) {
|
||||
expect(style.start).toBeGreaterThanOrEqual(0);
|
||||
expect(style.start + style.length).toBeLessThanOrEqual(chunk.text.length);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("deterministically tracks position without indexOf fragility", () => {
|
||||
// This test ensures the chunker doesn't rely on finding chunks via indexOf
|
||||
// which can fail when chunkText trims whitespace or when duplicates exist.
|
||||
// Create text with lots of whitespace and repeated patterns.
|
||||
const limit = 25;
|
||||
const markdown = "aaa **bold** aaa **bold** aaa extra text to force split";
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
// Multiple chunks expected
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
|
||||
// All chunks should respect limit
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.text.length).toBeLessThanOrEqual(limit);
|
||||
}
|
||||
|
||||
// All style ranges must be valid within their chunks
|
||||
for (const chunk of chunks) {
|
||||
for (const style of chunk.styles) {
|
||||
expect(style.start).toBeGreaterThanOrEqual(0);
|
||||
expect(style.start + style.length).toBeLessThanOrEqual(chunk.text.length);
|
||||
// The styled text at that position should actually be "bold"
|
||||
if (style.style === "BOLD") {
|
||||
const styledText = chunk.text.slice(style.start, style.start + style.length);
|
||||
expect(styledText).toBe("bold");
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("markdownToSignalTextChunks", () => {
|
||||
describe("link expansion chunk limit", () => {
|
||||
it("does not exceed chunk limit after link expansion", () => {
|
||||
// Create text that is close to limit, with a link that will expand
|
||||
const limit = 100;
|
||||
// Create text that's 90 chars, leaving only 10 chars of headroom
|
||||
const filler = "x".repeat(80);
|
||||
// This link will expand from "[link](url)" to "link (https://example.com/very/long/path)"
|
||||
const markdown = `${filler} [link](https://example.com/very/long/path/that/will/exceed/limit)`;
|
||||
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.text.length).toBeLessThanOrEqual(limit);
|
||||
}
|
||||
});
|
||||
|
||||
it("handles multiple links near chunk boundary", () => {
|
||||
const limit = 100;
|
||||
const filler = "x".repeat(60);
|
||||
const markdown = `${filler} [a](https://a.com) [b](https://b.com) [c](https://c.com)`;
|
||||
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.text.length).toBeLessThanOrEqual(limit);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("link expansion with style preservation", () => {
|
||||
it("long message with links that expand beyond limit preserves all text", () => {
|
||||
const limit = 80;
|
||||
const filler = "a".repeat(50);
|
||||
const markdown = `${filler} [click here](https://example.com/very/long/path/to/page) more text`;
|
||||
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
// All chunks should be under limit
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.text.length).toBeLessThanOrEqual(limit);
|
||||
}
|
||||
|
||||
// Combined text should contain all original content
|
||||
const combined = chunks.map((c) => c.text).join("");
|
||||
expect(combined).toContain(filler);
|
||||
expect(combined).toContain("click here");
|
||||
expect(combined).toContain("example.com");
|
||||
});
|
||||
|
||||
it("styles (bold, italic) survive chunking correctly after link expansion", () => {
|
||||
const limit = 60;
|
||||
const markdown =
|
||||
"**bold start** text [link](https://example.com/path) _italic_ more content here to force chunking";
|
||||
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
// Should have multiple chunks
|
||||
expect(chunks.length).toBeGreaterThan(1);
|
||||
|
||||
// All style ranges should be valid within their chunks
|
||||
for (const chunk of chunks) {
|
||||
for (const style of chunk.styles) {
|
||||
expect(style.start).toBeGreaterThanOrEqual(0);
|
||||
expect(style.start + style.length).toBeLessThanOrEqual(chunk.text.length);
|
||||
expect(style.length).toBeGreaterThan(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify styles exist somewhere
|
||||
const allStyles = chunks.flatMap((c) => c.styles.map((s) => s.style));
|
||||
expect(allStyles).toContain("BOLD");
|
||||
expect(allStyles).toContain("ITALIC");
|
||||
});
|
||||
|
||||
it("multiple links near chunk boundary all get properly chunked", () => {
|
||||
const limit = 50;
|
||||
const markdown =
|
||||
"[first](https://first.com/long/path) [second](https://second.com/another/path) [third](https://third.com)";
|
||||
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
// All chunks should respect limit
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.text.length).toBeLessThanOrEqual(limit);
|
||||
}
|
||||
|
||||
// All link labels should appear somewhere
|
||||
const combined = chunks.map((c) => c.text).join("");
|
||||
expect(combined).toContain("first");
|
||||
expect(combined).toContain("second");
|
||||
expect(combined).toContain("third");
|
||||
});
|
||||
|
||||
it("preserves spoiler style through link expansion and chunking", () => {
|
||||
const limit = 40;
|
||||
const markdown =
|
||||
"||secret content|| and [link](https://example.com/path) with more text to chunk";
|
||||
|
||||
const chunks = markdownToSignalTextChunks(markdown, limit);
|
||||
|
||||
// All chunks should respect limit
|
||||
for (const chunk of chunks) {
|
||||
expect(chunk.text.length).toBeLessThanOrEqual(limit);
|
||||
}
|
||||
|
||||
// Spoiler style should exist and be valid
|
||||
const chunkWithSpoiler = chunks.find((c) => c.styles.some((s) => s.style === "SPOILER"));
|
||||
expect(chunkWithSpoiler).toBeDefined();
|
||||
|
||||
const spoilerStyle = chunkWithSpoiler!.styles.find((s) => s.style === "SPOILER");
|
||||
expect(spoilerStyle).toBeDefined();
|
||||
expect(spoilerStyle!.start).toBeGreaterThanOrEqual(0);
|
||||
expect(spoilerStyle!.start + spoilerStyle!.length).toBeLessThanOrEqual(
|
||||
chunkWithSpoiler!.text.length,
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
58
src/signal/format.links.test.ts
Normal file
58
src/signal/format.links.test.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { markdownToSignalText } from "./format.js";
|
||||
|
||||
describe("markdownToSignalText", () => {
|
||||
describe("duplicate URL display", () => {
|
||||
it("does not duplicate URL when label matches URL without protocol", () => {
|
||||
// [selfh.st](http://selfh.st) should render as "selfh.st" not "selfh.st (http://selfh.st)"
|
||||
const res = markdownToSignalText("[selfh.st](http://selfh.st)");
|
||||
expect(res.text).toBe("selfh.st");
|
||||
});
|
||||
|
||||
it("does not duplicate URL when label matches URL without https protocol", () => {
|
||||
const res = markdownToSignalText("[example.com](https://example.com)");
|
||||
expect(res.text).toBe("example.com");
|
||||
});
|
||||
|
||||
it("does not duplicate URL when label matches URL without www prefix", () => {
|
||||
const res = markdownToSignalText("[www.example.com](https://example.com)");
|
||||
expect(res.text).toBe("www.example.com");
|
||||
});
|
||||
|
||||
it("does not duplicate URL when label matches URL without trailing slash", () => {
|
||||
const res = markdownToSignalText("[example.com](https://example.com/)");
|
||||
expect(res.text).toBe("example.com");
|
||||
});
|
||||
|
||||
it("does not duplicate URL when label matches URL with multiple trailing slashes", () => {
|
||||
const res = markdownToSignalText("[example.com](https://example.com///)");
|
||||
expect(res.text).toBe("example.com");
|
||||
});
|
||||
|
||||
it("does not duplicate URL when label includes www but URL does not", () => {
|
||||
const res = markdownToSignalText("[example.com](https://www.example.com)");
|
||||
expect(res.text).toBe("example.com");
|
||||
});
|
||||
|
||||
it("handles case-insensitive domain comparison", () => {
|
||||
const res = markdownToSignalText("[EXAMPLE.COM](https://example.com)");
|
||||
expect(res.text).toBe("EXAMPLE.COM");
|
||||
});
|
||||
|
||||
it("still shows URL when label is meaningfully different", () => {
|
||||
const res = markdownToSignalText("[click here](https://example.com)");
|
||||
expect(res.text).toBe("click here (https://example.com)");
|
||||
});
|
||||
|
||||
it("handles URL with path - should show URL when label is just domain", () => {
|
||||
// Label is just domain, URL has path - these are meaningfully different
|
||||
const res = markdownToSignalText("[example.com](https://example.com/page)");
|
||||
expect(res.text).toBe("example.com (https://example.com/page)");
|
||||
});
|
||||
|
||||
it("does not duplicate when label matches full URL with path", () => {
|
||||
const res = markdownToSignalText("[example.com/page](https://example.com/page)");
|
||||
expect(res.text).toBe("example.com/page");
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -21,6 +21,18 @@ describe("markdownToSignalText", () => {
|
||||
expect(res.styles).toEqual([]);
|
||||
});
|
||||
|
||||
it("keeps style offsets correct with multiple expanded links", () => {
|
||||
const markdown =
|
||||
"[first](https://example.com/first) **bold** [second](https://example.com/second)";
|
||||
const res = markdownToSignalText(markdown);
|
||||
|
||||
const expectedText =
|
||||
"first (https://example.com/first) bold second (https://example.com/second)";
|
||||
|
||||
expect(res.text).toBe(expectedText);
|
||||
expect(res.styles).toEqual([{ start: expectedText.indexOf("bold"), length: 4, style: "BOLD" }]);
|
||||
});
|
||||
|
||||
it("applies spoiler styling", () => {
|
||||
const res = markdownToSignalText("hello ||secret|| world");
|
||||
|
||||
|
||||
@@ -34,6 +34,17 @@ type Insertion = {
|
||||
length: number;
|
||||
};
|
||||
|
||||
function normalizeUrlForComparison(url: string): string {
|
||||
let normalized = url.toLowerCase();
|
||||
// Strip protocol
|
||||
normalized = normalized.replace(/^https?:\/\//, "");
|
||||
// Strip www. prefix
|
||||
normalized = normalized.replace(/^www\./, "");
|
||||
// Strip trailing slashes
|
||||
normalized = normalized.replace(/\/+$/, "");
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function mapStyle(style: MarkdownStyle): SignalTextStyle | null {
|
||||
switch (style) {
|
||||
case "bold":
|
||||
@@ -100,15 +111,17 @@ function applyInsertionsToStyles(
|
||||
}
|
||||
const sortedInsertions = [...insertions].toSorted((a, b) => a.pos - b.pos);
|
||||
let updated = spans;
|
||||
let cumulativeShift = 0;
|
||||
|
||||
for (const insertion of sortedInsertions) {
|
||||
const insertionPos = insertion.pos + cumulativeShift;
|
||||
const next: SignalStyleSpan[] = [];
|
||||
for (const span of updated) {
|
||||
if (span.end <= insertion.pos) {
|
||||
if (span.end <= insertionPos) {
|
||||
next.push(span);
|
||||
continue;
|
||||
}
|
||||
if (span.start >= insertion.pos) {
|
||||
if (span.start >= insertionPos) {
|
||||
next.push({
|
||||
start: span.start + insertion.length,
|
||||
end: span.end + insertion.length,
|
||||
@@ -116,15 +129,15 @@ function applyInsertionsToStyles(
|
||||
});
|
||||
continue;
|
||||
}
|
||||
if (span.start < insertion.pos && span.end > insertion.pos) {
|
||||
if (insertion.pos > span.start) {
|
||||
if (span.start < insertionPos && span.end > insertionPos) {
|
||||
if (insertionPos > span.start) {
|
||||
next.push({
|
||||
start: span.start,
|
||||
end: insertion.pos,
|
||||
end: insertionPos,
|
||||
style: span.style,
|
||||
});
|
||||
}
|
||||
const shiftedStart = insertion.pos + insertion.length;
|
||||
const shiftedStart = insertionPos + insertion.length;
|
||||
const shiftedEnd = span.end + insertion.length;
|
||||
if (shiftedEnd > shiftedStart) {
|
||||
next.push({
|
||||
@@ -136,6 +149,7 @@ function applyInsertionsToStyles(
|
||||
}
|
||||
}
|
||||
updated = next;
|
||||
cumulativeShift += insertion.length;
|
||||
}
|
||||
|
||||
return updated;
|
||||
@@ -161,16 +175,26 @@ function renderSignalText(ir: MarkdownIR): SignalFormattedText {
|
||||
const href = link.href.trim();
|
||||
const label = text.slice(link.start, link.end);
|
||||
const trimmedLabel = label.trim();
|
||||
const comparableHref = href.startsWith("mailto:") ? href.slice("mailto:".length) : href;
|
||||
|
||||
if (href) {
|
||||
if (!trimmedLabel) {
|
||||
out += href;
|
||||
insertions.push({ pos: link.end, length: href.length });
|
||||
} else if (trimmedLabel !== href && trimmedLabel !== comparableHref) {
|
||||
const addition = ` (${href})`;
|
||||
out += addition;
|
||||
insertions.push({ pos: link.end, length: addition.length });
|
||||
} else {
|
||||
// Check if label is similar enough to URL that showing both would be redundant
|
||||
const normalizedLabel = normalizeUrlForComparison(trimmedLabel);
|
||||
let comparableHref = href;
|
||||
if (href.startsWith("mailto:")) {
|
||||
comparableHref = href.slice("mailto:".length);
|
||||
}
|
||||
const normalizedHref = normalizeUrlForComparison(comparableHref);
|
||||
|
||||
// Only show URL if label is meaningfully different from it
|
||||
if (normalizedLabel !== normalizedHref) {
|
||||
const addition = ` (${href})`;
|
||||
out += addition;
|
||||
insertions.push({ pos: link.end, length: addition.length });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -214,13 +238,136 @@ export function markdownToSignalText(
|
||||
const ir = markdownToIR(markdown ?? "", {
|
||||
linkify: true,
|
||||
enableSpoilers: true,
|
||||
headingStyle: "none",
|
||||
blockquotePrefix: "",
|
||||
headingStyle: "bold",
|
||||
blockquotePrefix: "> ",
|
||||
tableMode: options.tableMode,
|
||||
});
|
||||
return renderSignalText(ir);
|
||||
}
|
||||
|
||||
function sliceSignalStyles(
|
||||
styles: SignalTextStyleRange[],
|
||||
start: number,
|
||||
end: number,
|
||||
): SignalTextStyleRange[] {
|
||||
const sliced: SignalTextStyleRange[] = [];
|
||||
for (const style of styles) {
|
||||
const styleEnd = style.start + style.length;
|
||||
const sliceStart = Math.max(style.start, start);
|
||||
const sliceEnd = Math.min(styleEnd, end);
|
||||
if (sliceEnd > sliceStart) {
|
||||
sliced.push({
|
||||
start: sliceStart - start,
|
||||
length: sliceEnd - sliceStart,
|
||||
style: style.style,
|
||||
});
|
||||
}
|
||||
}
|
||||
return sliced;
|
||||
}
|
||||
|
||||
/**
|
||||
* Split Signal formatted text into chunks under the limit while preserving styles.
|
||||
*
|
||||
* This implementation deterministically tracks cursor position without using indexOf,
|
||||
* which is fragile when chunks are trimmed or when duplicate substrings exist.
|
||||
* Styles spanning chunk boundaries are split into separate ranges for each chunk.
|
||||
*/
|
||||
function splitSignalFormattedText(
|
||||
formatted: SignalFormattedText,
|
||||
limit: number,
|
||||
): SignalFormattedText[] {
|
||||
const { text, styles } = formatted;
|
||||
|
||||
if (text.length <= limit) {
|
||||
return [formatted];
|
||||
}
|
||||
|
||||
const results: SignalFormattedText[] = [];
|
||||
let remaining = text;
|
||||
let offset = 0; // Track position in original text for style slicing
|
||||
|
||||
while (remaining.length > 0) {
|
||||
if (remaining.length <= limit) {
|
||||
// Last chunk - take everything remaining
|
||||
const trimmed = remaining.trimEnd();
|
||||
if (trimmed.length > 0) {
|
||||
results.push({
|
||||
text: trimmed,
|
||||
styles: mergeStyles(sliceSignalStyles(styles, offset, offset + trimmed.length)),
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Find a good break point within the limit
|
||||
const window = remaining.slice(0, limit);
|
||||
let breakIdx = findBreakIndex(window);
|
||||
|
||||
// If no good break point found, hard break at limit
|
||||
if (breakIdx <= 0) {
|
||||
breakIdx = limit;
|
||||
}
|
||||
|
||||
// Extract chunk and trim trailing whitespace
|
||||
const rawChunk = remaining.slice(0, breakIdx);
|
||||
const chunk = rawChunk.trimEnd();
|
||||
|
||||
if (chunk.length > 0) {
|
||||
results.push({
|
||||
text: chunk,
|
||||
styles: mergeStyles(sliceSignalStyles(styles, offset, offset + chunk.length)),
|
||||
});
|
||||
}
|
||||
|
||||
// Advance past the chunk and any whitespace separator
|
||||
const brokeOnWhitespace = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
|
||||
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnWhitespace ? 1 : 0));
|
||||
|
||||
// Chunks are sent as separate messages, so we intentionally drop boundary whitespace.
|
||||
// Keep `offset` in sync with the dropped characters so style slicing stays correct.
|
||||
remaining = remaining.slice(nextStart).trimStart();
|
||||
offset = text.length - remaining.length;
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the best break index within a text window.
|
||||
* Prefers newlines over whitespace, avoids breaking inside parentheses.
|
||||
*/
|
||||
function findBreakIndex(window: string): number {
|
||||
let lastNewline = -1;
|
||||
let lastWhitespace = -1;
|
||||
let parenDepth = 0;
|
||||
|
||||
for (let i = 0; i < window.length; i++) {
|
||||
const char = window[i];
|
||||
|
||||
if (char === "(") {
|
||||
parenDepth++;
|
||||
continue;
|
||||
}
|
||||
if (char === ")" && parenDepth > 0) {
|
||||
parenDepth--;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Only consider break points outside parentheses
|
||||
if (parenDepth === 0) {
|
||||
if (char === "\n") {
|
||||
lastNewline = i;
|
||||
} else if (/\s/.test(char)) {
|
||||
lastWhitespace = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Prefer newline break, fall back to whitespace
|
||||
return lastNewline > 0 ? lastNewline : lastWhitespace;
|
||||
}
|
||||
|
||||
export function markdownToSignalTextChunks(
|
||||
markdown: string,
|
||||
limit: number,
|
||||
@@ -229,10 +376,22 @@ export function markdownToSignalTextChunks(
|
||||
const ir = markdownToIR(markdown ?? "", {
|
||||
linkify: true,
|
||||
enableSpoilers: true,
|
||||
headingStyle: "none",
|
||||
blockquotePrefix: "",
|
||||
headingStyle: "bold",
|
||||
blockquotePrefix: "> ",
|
||||
tableMode: options.tableMode,
|
||||
});
|
||||
const chunks = chunkMarkdownIR(ir, limit);
|
||||
return chunks.map((chunk) => renderSignalText(chunk));
|
||||
const results: SignalFormattedText[] = [];
|
||||
|
||||
for (const chunk of chunks) {
|
||||
const rendered = renderSignalText(chunk);
|
||||
// If link expansion caused the chunk to exceed the limit, re-chunk it
|
||||
if (rendered.text.length > limit) {
|
||||
results.push(...splitSignalFormattedText(rendered, limit));
|
||||
} else {
|
||||
results.push(rendered);
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
57
src/signal/format.visual.test.ts
Normal file
57
src/signal/format.visual.test.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { markdownToSignalText } from "./format.js";
|
||||
|
||||
describe("markdownToSignalText", () => {
|
||||
describe("headings visual distinction", () => {
|
||||
it("renders headings as bold text", () => {
|
||||
const res = markdownToSignalText("# Heading 1");
|
||||
expect(res.text).toBe("Heading 1");
|
||||
expect(res.styles).toContainEqual({ start: 0, length: 9, style: "BOLD" });
|
||||
});
|
||||
|
||||
it("renders h2 headings as bold text", () => {
|
||||
const res = markdownToSignalText("## Heading 2");
|
||||
expect(res.text).toBe("Heading 2");
|
||||
expect(res.styles).toContainEqual({ start: 0, length: 9, style: "BOLD" });
|
||||
});
|
||||
|
||||
it("renders h3 headings as bold text", () => {
|
||||
const res = markdownToSignalText("### Heading 3");
|
||||
expect(res.text).toBe("Heading 3");
|
||||
expect(res.styles).toContainEqual({ start: 0, length: 9, style: "BOLD" });
|
||||
});
|
||||
});
|
||||
|
||||
describe("blockquote visual distinction", () => {
|
||||
it("renders blockquotes with a visible prefix", () => {
|
||||
const res = markdownToSignalText("> This is a quote");
|
||||
// Should have some kind of prefix to distinguish it
|
||||
expect(res.text).toMatch(/^[│>]/);
|
||||
expect(res.text).toContain("This is a quote");
|
||||
});
|
||||
|
||||
it("renders multi-line blockquotes with prefix", () => {
|
||||
const res = markdownToSignalText("> Line 1\n> Line 2");
|
||||
// Should start with the prefix
|
||||
expect(res.text).toMatch(/^[│>]/);
|
||||
expect(res.text).toContain("Line 1");
|
||||
expect(res.text).toContain("Line 2");
|
||||
});
|
||||
});
|
||||
|
||||
describe("horizontal rule rendering", () => {
|
||||
it("renders horizontal rules as a visible separator", () => {
|
||||
const res = markdownToSignalText("Para 1\n\n---\n\nPara 2");
|
||||
// Should contain some kind of visual separator like ───
|
||||
expect(res.text).toMatch(/[─—-]{3,}/);
|
||||
});
|
||||
|
||||
it("renders horizontal rule between content", () => {
|
||||
const res = markdownToSignalText("Above\n\n***\n\nBelow");
|
||||
expect(res.text).toContain("Above");
|
||||
expect(res.text).toContain("Below");
|
||||
// Should have a separator
|
||||
expect(res.text).toMatch(/[─—-]{3,}/);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -82,10 +82,10 @@ describe("markdownToSlackMrkdwn", () => {
|
||||
expect(res).toBe("> Quote");
|
||||
});
|
||||
|
||||
it("handles adjacent list items", () => {
|
||||
it("handles nested list items", () => {
|
||||
const res = markdownToSlackMrkdwn("- item\n - nested");
|
||||
// markdown-it treats indented items as continuation, not nesting
|
||||
expect(res).toBe("• item • nested");
|
||||
// markdown-it correctly parses this as a nested list
|
||||
expect(res).toBe("• item\n • nested");
|
||||
});
|
||||
|
||||
it("handles complex message with multiple elements", () => {
|
||||
|
||||
Reference in New Issue
Block a user