fix(utils): harden directive block placeholders

This commit is contained in:
Peter Steinberger
2026-04-06 15:10:26 +01:00
parent af7c21f207
commit 26a5ab1c6f
2 changed files with 29 additions and 6 deletions

View File

@@ -191,6 +191,22 @@ describe("parseInlineDirectives", () => {
expect(result.audioAsVoice).toBe(true);
expect(result.text).toBe(["```bash", " echo 'hello'", " indented", "```"].join("\n"));
});
test("preserves literal sentinel-like text while restoring masked code blocks", () => {
const sentinelLikeText = "\uE0000\uE000";
const input = [
"[[reply_to_current]]",
`literal ${sentinelLikeText} text`,
"```ts",
" const value = 1;",
"```",
].join("\n");
const result = parseInlineDirectives(input);
expect(result.hasReplyTag).toBe(true);
expect(result.text).toBe(
[`literal ${sentinelLikeText} text`, "```ts", " const value = 1;", "```"].join("\n"),
);
});
});
describe("stripInlineDirectiveTagsFromMessageForDisplay", () => {

View File

@@ -25,21 +25,28 @@ function replacementPreservesWordBoundary(source: string, offset: number, length
return before && after && !/\s/u.test(before) && !/\s/u.test(after) ? " " : "";
}
// Unicode private-use sentinel that cannot appear in normal markdown text.
// Used to bracket code-block placeholders during whitespace normalization.
const BLOCK_SENTINEL = "\uE000";
const BLOCK_PLACEHOLDER_RE = new RegExp(`${BLOCK_SENTINEL}(\\d+)${BLOCK_SENTINEL}`, "g");
const BLOCK_SENTINEL_SEED = "\uE000";
function createBlockSentinel(text: string): string {
let sentinel = BLOCK_SENTINEL_SEED;
while (text.includes(sentinel)) {
sentinel += BLOCK_SENTINEL_SEED;
}
return sentinel;
}
function normalizeDirectiveWhitespace(text: string): string {
// Extract → normalize prose → restore:
// Stash every code block (fenced ``` / ~~~ and indent-code 4-space/tab)
// under a sentinel-delimited placeholder so the prose regexes never touch them.
const blockSentinel = createBlockSentinel(text);
const blockPlaceholderRe = new RegExp(`${blockSentinel}(\\d+)${blockSentinel}`, "g");
const blocks: string[] = [];
const masked = text.replace(
/(`{3,}|~{3,})[^\n]*\n[\s\S]*?\n\1[^\n]*|(?:(?:^|\n)(?: |\t)[^\n]*)+/gm,
(block) => {
blocks.push(block);
return `${BLOCK_SENTINEL}${blocks.length - 1}${BLOCK_SENTINEL}`;
return `${blockSentinel}${blocks.length - 1}${blockSentinel}`;
},
);
@@ -52,7 +59,7 @@ function normalizeDirectiveWhitespace(text: string): string {
.replace(/\n{3,}/g, "\n\n")
.trimEnd();
return normalized.replace(BLOCK_PLACEHOLDER_RE, (_, i) => blocks[Number(i)]);
return normalized.replace(blockPlaceholderRe, (_, i) => blocks[Number(i)]);
}
type StripInlineDirectiveTagsResult = {