test: dedupe routing and text suites

This commit is contained in:
Peter Steinberger
2026-03-28 07:31:24 +00:00
parent 30be04cd87
commit 155915e7dc
10 changed files with 1766 additions and 1351 deletions

View File

@@ -2,35 +2,12 @@ import { describe, expect, it } from "vitest";
import { stripAssistantInternalScaffolding } from "./assistant-visible-text.js";
describe("stripAssistantInternalScaffolding", () => {
it("strips reasoning tags", () => {
const input = ["<thinking>", "secret", "</thinking>", "Visible"].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe("Visible");
});
function expectVisibleText(input: string, expected: string) {
expect(stripAssistantInternalScaffolding(input)).toBe(expected);
}
it("strips relevant-memories scaffolding blocks", () => {
const input = [
"<relevant-memories>",
"The following memories may be relevant to this conversation:",
"- Internal memory note",
"</relevant-memories>",
"",
"User-visible answer",
].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe("User-visible answer");
});
it("supports relevant_memories tag variants", () => {
const input = [
"<relevant_memories>",
"Internal memory note",
"</relevant_memories>",
"Visible",
].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe("Visible");
});
it("keeps relevant-memories tags inside fenced code", () => {
const input = [
function createLiteralRelevantMemoriesCodeBlock() {
return [
"```xml",
"<relevant-memories>",
"sample",
@@ -39,43 +16,87 @@ describe("stripAssistantInternalScaffolding", () => {
"",
"Visible text",
].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe(input);
});
}
it("keeps relevant-memories tags inside inline code", () => {
const input = "Use `<relevant-memories>example</relevant-memories>` literally.";
expect(stripAssistantInternalScaffolding(input)).toBe(input);
});
function expectLiteralVisibleText(input: string) {
expectVisibleText(input, input);
}
it("hides unfinished relevant-memories blocks", () => {
const input = ["Hello", "<relevant-memories>", "internal-only"].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe("Hello\n");
});
it("trims leading whitespace after stripping scaffolding", () => {
const input = [
"<thinking>",
"secret",
"</thinking>",
" ",
"<relevant-memories>",
"internal note",
"</relevant-memories>",
" Visible",
].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe("Visible");
});
it("preserves unfinished reasoning text while still stripping memory blocks", () => {
const input = [
"Before",
"<thinking>",
"secret",
"<relevant-memories>",
"internal note",
"</relevant-memories>",
"After",
].join("\n");
expect(stripAssistantInternalScaffolding(input)).toBe("Before\n\nsecret\n\nAfter");
it.each([
{
name: "strips reasoning tags",
input: ["<thinking>", "secret", "</thinking>", "Visible"].join("\n"),
expected: "Visible",
},
{
name: "strips relevant-memories scaffolding blocks",
input: [
"<relevant-memories>",
"The following memories may be relevant to this conversation:",
"- Internal memory note",
"</relevant-memories>",
"",
"User-visible answer",
].join("\n"),
expected: "User-visible answer",
},
{
name: "supports relevant_memories tag variants",
input: [
"<relevant_memories>",
"Internal memory note",
"</relevant_memories>",
"Visible",
].join("\n"),
expected: "Visible",
},
{
name: "hides unfinished relevant-memories blocks",
input: ["Hello", "<relevant-memories>", "internal-only"].join("\n"),
expected: "Hello\n",
},
{
name: "trims leading whitespace after stripping scaffolding",
input: [
"<thinking>",
"secret",
"</thinking>",
" ",
"<relevant-memories>",
"internal note",
"</relevant-memories>",
" Visible",
].join("\n"),
expected: "Visible",
},
{
name: "preserves unfinished reasoning text while still stripping memory blocks",
input: [
"Before",
"<thinking>",
"secret",
"<relevant-memories>",
"internal note",
"</relevant-memories>",
"After",
].join("\n"),
expected: "Before\n\nsecret\n\nAfter",
},
{
name: "keeps relevant-memories tags inside fenced code",
input: createLiteralRelevantMemoriesCodeBlock(),
expected: undefined,
},
{
name: "keeps literal relevant-memories prose",
input: "Use `<relevant-memories>example</relevant-memories>` literally.",
expected: undefined,
},
] as const)("$name", ({ input, expected }) => {
if (expected === undefined) {
expectLiteralVisibleText(input);
return;
}
expectVisibleText(input, expected);
});
});

View File

@@ -2,50 +2,61 @@ import { describe, expect, it } from "vitest";
import { findCodeRegions, isInsideCode } from "./code-regions.js";
describe("shared/text/code-regions", () => {
it("finds fenced and inline code regions without double-counting inline code inside fences", () => {
const text = [
"before `inline` after",
"```ts",
"const a = `inside fence`;",
"```",
"tail",
].join("\n");
function expectCodeRegionSlices(text: string, expectedSlices: readonly string[]) {
const regions = findCodeRegions(text);
expect(regions).toHaveLength(expectedSlices.length);
expect(regions.map((region) => text.slice(region.start, region.end))).toEqual(expectedSlices);
}
expect(regions).toHaveLength(2);
expect(text.slice(regions[0].start, regions[0].end)).toBe("`inline`");
expect(text.slice(regions[1].start, regions[1].end)).toContain("```ts");
});
it("accepts alternate fence markers and unterminated trailing fences", () => {
const text = "~~~js\nconsole.log(1)\n~~~\nplain\n```\nunterminated";
const regions = findCodeRegions(text);
expect(regions).toHaveLength(2);
expect(text.slice(regions[0].start, regions[0].end)).toContain("~~~js");
expect(text.slice(regions[1].start, regions[1].end)).toBe("```\nunterminated");
});
it("keeps adjacent inline code outside fenced regions", () => {
const text = ["```ts", "const a = 1;", "```", "after `inline` tail"].join("\n");
const regions = findCodeRegions(text);
expect(regions).toHaveLength(2);
expect(text.slice(regions[0].start, regions[0].end)).toContain("```ts");
expect(text.slice(regions[1].start, regions[1].end)).toBe("`inline`");
});
it("reports whether positions are inside discovered regions", () => {
function expectInsideCodeCase(params: {
positionSelector: (text: string, regionEnd: number) => number;
expected: boolean;
}) {
const text = "plain `code` done";
const regions = findCodeRegions(text);
const codeStart = text.indexOf("code");
const plainStart = text.indexOf("plain");
const regionEnd = regions[0]?.end ?? -1;
expect(isInsideCode(params.positionSelector(text, regionEnd), regions)).toBe(params.expected);
}
expect(isInsideCode(codeStart, regions)).toBe(true);
expect(isInsideCode(plainStart, regions)).toBe(false);
expect(isInsideCode(regionEnd, regions)).toBe(false);
it.each([
{
name: "finds fenced and inline code regions without double-counting inline code inside fences",
text: ["before `inline` after", "```ts", "const a = `inside fence`;", "```", "tail"].join(
"\n",
),
expectedSlices: ["`inline`", "```ts\nconst a = `inside fence`;\n```"],
},
{
name: "accepts alternate fence markers and unterminated trailing fences",
text: "~~~js\nconsole.log(1)\n~~~\nplain\n```\nunterminated",
expectedSlices: ["~~~js\nconsole.log(1)\n~~~", "```\nunterminated"],
},
{
name: "keeps adjacent inline code outside fenced regions",
text: ["```ts", "const a = 1;", "```", "after `inline` tail"].join("\n"),
expectedSlices: ["```ts\nconst a = 1;\n```", "`inline`"],
},
] as const)("$name", ({ text, expectedSlices }) => {
expectCodeRegionSlices(text, expectedSlices);
});
it.each([
{
name: "inside code",
positionSelector: (text: string) => text.indexOf("code"),
expected: true,
},
{
name: "outside code",
positionSelector: (text: string) => text.indexOf("plain"),
expected: false,
},
{
name: "at region end",
positionSelector: (_text: string, regionEnd: number) => regionEnd,
expected: false,
},
] as const)("reports whether positions are inside discovered regions: $name", (testCase) => {
expectInsideCodeCase(testCase);
});
});

View File

@@ -1,45 +1,51 @@
import { describe, expect, it } from "vitest";
import { concatOptionalTextSegments, joinPresentTextSegments } from "./join-segments.js";
function expectTextSegmentsCase<T>(actual: T, expected: T) {
expect(actual).toBe(expected);
}
function expectJoinedTextSegmentsCase<T>(params: { run: () => T; expected: T }) {
expectTextSegmentsCase(params.run(), params.expected);
}
describe("concatOptionalTextSegments", () => {
it("concatenates left and right with default separator", () => {
expect(concatOptionalTextSegments({ left: "A", right: "B" })).toBe("A\n\nB");
});
it("keeps explicit empty-string right value", () => {
expect(concatOptionalTextSegments({ left: "A", right: "" })).toBe("");
});
it("falls back to whichever side is present and honors custom separators", () => {
expect(concatOptionalTextSegments({ left: "A" })).toBe("A");
expect(concatOptionalTextSegments({ right: "B" })).toBe("B");
expect(concatOptionalTextSegments({ left: "", right: "B" })).toBe("B");
expect(concatOptionalTextSegments({ left: "" })).toBe("");
expect(concatOptionalTextSegments({ left: "A", right: "B", separator: " | " })).toBe("A | B");
it.each([
{ params: { left: "A", right: "B" }, expected: "A\n\nB" },
{ params: { left: "A", right: "" }, expected: "" },
{ params: { left: "A" }, expected: "A" },
{ params: { right: "B" }, expected: "B" },
{ params: { left: "", right: "B" }, expected: "B" },
{ params: { left: "" }, expected: "" },
{ params: { left: "A", right: "B", separator: " | " }, expected: "A | B" },
] as const)("concatenates optional segments %#", ({ params, expected }) => {
expectJoinedTextSegmentsCase({
run: () => concatOptionalTextSegments(params),
expected,
});
});
});
describe("joinPresentTextSegments", () => {
it("joins non-empty segments", () => {
expect(joinPresentTextSegments(["A", undefined, "B"])).toBe("A\n\nB");
});
it("returns undefined when all segments are empty", () => {
expect(joinPresentTextSegments(["", undefined, null])).toBeUndefined();
});
it("trims segments when requested", () => {
expect(joinPresentTextSegments([" A ", " B "], { trim: true })).toBe("A\n\nB");
});
it("keeps whitespace-only segments unless trim is enabled and supports custom separators", () => {
expect(joinPresentTextSegments(["A", " ", "B"], { separator: " | " })).toBe("A | | B");
expect(joinPresentTextSegments(["A", " ", "B"], { trim: true, separator: " | " })).toBe(
"A | B",
);
});
it("preserves segment whitespace when trim is disabled", () => {
expect(joinPresentTextSegments(["A", " B "], { separator: "|" })).toBe("A| B ");
it.each([
{ segments: ["A", undefined, "B"], options: undefined, expected: "A\n\nB" },
{ segments: ["", undefined, null], options: undefined, expected: undefined },
{ segments: [" A ", " B "], options: { trim: true }, expected: "A\n\nB" },
{
segments: ["A", " ", "B"],
options: { separator: " | " },
expected: "A | | B",
},
{
segments: ["A", " ", "B"],
options: { trim: true, separator: " | " },
expected: "A | B",
},
{ segments: ["A", " B "], options: { separator: "|" }, expected: "A| B " },
] as const)("joins present segments %#", ({ segments, options, expected }) => {
expectJoinedTextSegmentsCase({
run: () => joinPresentTextSegments(segments, options),
expected,
});
});
});

View File

@@ -2,181 +2,200 @@ import { describe, expect, it } from "vitest";
import { stripReasoningTagsFromText } from "./reasoning-tags.js";
describe("stripReasoningTagsFromText", () => {
const expectStrippedCases = (
cases: ReadonlyArray<{
input: string;
expected: string;
opts?: Parameters<typeof stripReasoningTagsFromText>[1];
name?: string;
}>,
) => {
for (const { input, expected, opts, name } of cases) {
expect(stripReasoningTagsFromText(input, opts), name).toBe(expected);
function expectStrippedCase(params: {
input: string | null;
expected: string | null;
opts?: Parameters<typeof stripReasoningTagsFromText>[1];
}) {
expect(stripReasoningTagsFromText(params.input as unknown as string, params.opts)).toBe(
params.expected,
);
}
function expectPreservedReasoningTagCodeExample(input: string) {
expect(stripReasoningTagsFromText(input)).toBe(input);
}
function expectReasoningCodeCase(params: { input: string; expected?: string }) {
if (params.expected === undefined) {
expectPreservedReasoningTagCodeExample(params.input);
return;
}
};
expectStrippedCase({
input: params.input,
expected: params.expected,
});
}
describe("basic functionality", () => {
it("returns text unchanged when no reasoning tags present", () => {
const input = "Hello, this is a normal message.";
expect(stripReasoningTagsFromText(input)).toBe(input);
});
it("strips reasoning-tag variants", () => {
const cases = [
{
name: "strips proper think tags",
input: "Hello <think>internal reasoning</think> world!",
expected: "Hello world!",
},
{
name: "strips thinking tags",
input: "Before <thinking>some thought</thinking> after",
expected: "Before after",
},
{ name: "strips thought tags", input: "A <thought>hmm</thought> B", expected: "A B" },
{
name: "strips antthinking tags",
input: "X <antthinking>internal</antthinking> Y",
expected: "X Y",
},
] as const;
expectStrippedCases(cases);
});
it("strips multiple reasoning blocks", () => {
const input = "<think>first</think>A<think>second</think>B";
expect(stripReasoningTagsFromText(input)).toBe("AB");
it.each([
{
name: "returns text unchanged when no reasoning tags present",
input: "Hello, this is a normal message.",
expected: "Hello, this is a normal message.",
},
{
name: "strips proper think tags",
input: "Hello <think>internal reasoning</think> world!",
expected: "Hello world!",
},
{
name: "strips thinking tags",
input: "Before <thinking>some thought</thinking> after",
expected: "Before after",
},
{ name: "strips thought tags", input: "A <thought>hmm</thought> B", expected: "A B" },
{
name: "strips antthinking tags",
input: "X <antthinking>internal</antthinking> Y",
expected: "X Y",
},
{
name: "strips multiple reasoning blocks",
input: "<think>first</think>A<think>second</think>B",
expected: "AB",
},
] as const)("$name", (testCase) => {
expectStrippedCase(testCase);
});
});
describe("code block preservation (issue #3952)", () => {
it("preserves tags inside code examples", () => {
const cases = [
"Use the tag like this:\n```\n<think>reasoning</think>\n```\nThat's it!",
"The `<think>` tag is used for reasoning. Don't forget the closing `</think>` tag.",
"Example:\n```xml\n<think>\n <thought>nested</thought>\n</think>\n```\nDone!",
"Use `<think>` to open and `</think>` to close.",
"Example:\n```\n<think>reasoning</think>\n```",
"Use `<final>` for final answers in code: ```\n<final>42</final>\n```",
"First `<think>` then ```\n<thinking>block</thinking>\n``` then `<thought>`",
] as const;
for (const input of cases) {
expect(stripReasoningTagsFromText(input)).toBe(input);
}
});
it("handles mixed code-tag and real-tag content", () => {
const cases = [
{
input: "<think>hidden</think>Visible text with `<think>` example.",
expected: "Visible text with `<think>` example.",
},
{
input: "```\n<think>code</think>\n```\n<think>real hidden</think>visible",
expected: "```\n<think>code</think>\n```\nvisible",
},
] as const;
expectStrippedCases(cases);
it.each([
{
name: "preserves plain code example",
input: "Use the tag like this:\n```\n<think>reasoning</think>\n```\nThat's it!",
},
{
name: "preserves inline literal think tag documentation",
input: "The `<think>` tag is used for reasoning. Don't forget the closing `</think>` tag.",
},
{
name: "preserves xml fenced examples",
input: "Example:\n```xml\n<think>\n <thought>nested</thought>\n</think>\n```\nDone!",
},
{
name: "preserves plain literal opening and closing tags",
input: "Use `<think>` to open and `</think>` to close.",
},
{
name: "preserves fenced think example",
input: "Example:\n```\n<think>reasoning</think>\n```",
},
{
name: "preserves final tags inside code examples",
input: "Use `<final>` for final answers in code: ```\n<final>42</final>\n```",
},
{
name: "preserves mixed literal think tags and code blocks",
input: "First `<think>` then ```\n<thinking>block</thinking>\n``` then `<thought>`",
},
{
name: "strips real tags while preserving literal think examples",
input: "<think>hidden</think>Visible text with `<think>` example.",
expected: "Visible text with `<think>` example.",
},
{
name: "strips real tags after fenced code block",
input: "```\n<think>code</think>\n```\n<think>real hidden</think>visible",
expected: "```\n<think>code</think>\n```\nvisible",
},
] as const)("$name", ({ input, expected }) => {
expectReasoningCodeCase({ input, expected });
});
});
describe("edge cases", () => {
it("handles malformed tags and null-ish inputs", () => {
const cases = [
{
input: "Here is how to use <think tags in your code",
expected: "Here is how to use <think tags in your code",
},
{
input: "You can start with <think and then close with </think>",
expected: "You can start with <think and then close with",
},
{
input: "A < think >content< /think > B",
expected: "A B",
},
{
input: "",
expected: "",
},
{
input: null as unknown as string,
expected: null,
},
] as const;
for (const { input, expected } of cases) {
expect(stripReasoningTagsFromText(input)).toBe(expected);
}
it.each([
{
input: "Here is how to use <think tags in your code",
expected: "Here is how to use <think tags in your code",
},
{
input: "You can start with <think and then close with </think>",
expected: "You can start with <think and then close with",
},
{
input: "A < think >content< /think > B",
expected: "A B",
},
{
input: "",
expected: "",
},
{
input: null as unknown as string,
expected: null,
},
] as const)("handles malformed/null-ish input %j", (testCase) => {
expectStrippedCase(testCase);
});
it("handles fenced and inline code edge behavior", () => {
const cases = [
{
input: "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!",
expected: "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!",
},
{
input: "Example:\n~~~js\n<think>code</think>\n~~~",
expected: "Example:\n~~~js\n<think>code</think>\n~~~",
},
{
input: "Use ``code`` with <think>hidden</think> text",
expected: "Use ``code`` with text",
},
{
input: "Before\n```\ncode\n```\nAfter with <think>hidden</think>",
expected: "Before\n```\ncode\n```\nAfter with",
},
{
input: "```\n<think>not protected\n~~~\n</think>text",
expected: "```\n<think>not protected\n~~~\n</think>text",
},
{
input: "Start `unclosed <think>hidden</think> end",
expected: "Start `unclosed end",
},
] as const;
expectStrippedCases(cases);
it.each([
{
input: "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!",
expected: "Example:\n~~~\n<think>reasoning</think>\n~~~\nDone!",
},
{
input: "Example:\n~~~js\n<think>code</think>\n~~~",
expected: "Example:\n~~~js\n<think>code</think>\n~~~",
},
{
input: "Use ``code`` with <think>hidden</think> text",
expected: "Use ``code`` with text",
},
{
input: "Before\n```\ncode\n```\nAfter with <think>hidden</think>",
expected: "Before\n```\ncode\n```\nAfter with",
},
{
input: "```\n<think>not protected\n~~~\n</think>text",
expected: "```\n<think>not protected\n~~~\n</think>text",
},
{
input: "Start `unclosed <think>hidden</think> end",
expected: "Start `unclosed end",
},
] as const)("handles fenced/inline code edge behavior: %j", (testCase) => {
expectStrippedCase(testCase);
});
it("handles nested and final tag behavior", () => {
const cases = [
{
input: "<think>outer <think>inner</think> still outer</think>visible",
expected: "still outervisible",
},
{
input: "A<final>1</final>B<final>2</final>C",
expected: "A1B2C",
},
{
input: "`<final>` in code, <final>visible</final> outside",
expected: "`<final>` in code, visible outside",
},
{
input: "A <FINAL data-x='1'>visible</Final> B",
expected: "A visible B",
},
] as const;
expectStrippedCases(cases);
it.each([
{
input: "<think>outer <think>inner</think> still outer</think>visible",
expected: "still outervisible",
},
{
input: "A<final>1</final>B<final>2</final>C",
expected: "A1B2C",
},
{
input: "`<final>` in code, <final>visible</final> outside",
expected: "`<final>` in code, visible outside",
},
{
input: "A <FINAL data-x='1'>visible</Final> B",
expected: "A visible B",
},
] as const)("handles nested/final tag behavior: %j", (testCase) => {
expectStrippedCase(testCase);
});
it("handles unicode, attributes, and case-insensitive tag names", () => {
const cases = [
{
input: "你好 <think>思考 🤔</think> 世界",
expected: "你好 世界",
},
{
input: "A <think id='test' class=\"foo\">hidden</think> B",
expected: "A B",
},
{
input: "A <THINK>hidden</THINK> <Thinking>also hidden</Thinking> B",
expected: "A B",
},
] as const;
expectStrippedCases(cases);
it.each([
{
input: "你好 <think>思考 🤔</think> 世界",
expected: "你好 世界",
},
{
input: "A <think id='test' class=\"foo\">hidden</think> B",
expected: "A B",
},
{
input: "A <THINK>hidden</THINK> <Thinking>also hidden</Thinking> B",
expected: "A B",
},
] as const)("handles unicode/attributes/case-insensitive names: %j", (testCase) => {
expectStrippedCase(testCase);
});
it("handles long content and pathological backtick patterns efficiently", () => {
@@ -192,50 +211,60 @@ describe("stripReasoningTagsFromText", () => {
});
describe("strict vs preserve mode", () => {
it("applies strict and preserve modes to unclosed tags", () => {
const input = "Before <think>unclosed content after";
const cases = [
{ mode: "strict" as const, expected: "Before" },
{ mode: "preserve" as const, expected: "Before unclosed content after" },
] as const;
for (const { mode, expected } of cases) {
expect(stripReasoningTagsFromText(input, { mode })).toBe(expected);
}
});
it("still strips fully closed reasoning blocks in preserve mode", () => {
expect(stripReasoningTagsFromText("A <think>hidden</think> B", { mode: "preserve" })).toBe(
"A B",
);
it.each([
{
name: "applies strict mode to unclosed tags",
input: "Before <think>unclosed content after",
expected: "Before",
opts: { mode: "strict" as const },
},
{
name: "applies preserve mode to unclosed tags",
input: "Before <think>unclosed content after",
expected: "Before unclosed content after",
opts: { mode: "preserve" as const },
},
{
name: "still strips fully closed reasoning blocks in preserve mode",
input: "A <think>hidden</think> B",
expected: "A B",
opts: { mode: "preserve" as const },
},
] as const)("$name", (testCase) => {
expectStrippedCase(testCase);
});
});
describe("trim options", () => {
it("applies configured trim strategies", () => {
const cases = [
{
input: " <think>x</think> result <think>y</think> ",
expected: "result",
opts: undefined,
},
{
input: " <think>x</think> result ",
expected: " result ",
opts: { trim: "none" as const },
},
{
input: " <think>x</think> result ",
expected: "result ",
opts: { trim: "start" as const },
},
] as const;
expectStrippedCases(cases);
it.each([
{
name: "applies default trim strategy",
input: " <think>x</think> result <think>y</think> ",
expected: "result",
opts: undefined,
},
{
name: "supports trim=none",
input: " <think>x</think> result ",
expected: " result ",
opts: { trim: "none" as const },
},
{
name: "supports trim=start",
input: " <think>x</think> result ",
expected: "result ",
opts: { trim: "start" as const },
},
] as const)("$name", (testCase) => {
expectStrippedCase(testCase);
});
});
it("does not leak regex state across repeated calls", () => {
expect(stripReasoningTagsFromText("A <final>1</final> B")).toBe("A 1 B");
expect(stripReasoningTagsFromText("C <final>2</final> D")).toBe("C 2 D");
expect(stripReasoningTagsFromText("E <think>x</think> F")).toBe("E F");
it.each([
{ input: "A <final>1</final> B", expected: "A 1 B" },
{ input: "C <final>2</final> D", expected: "C 2 D" },
{ input: "E <think>x</think> F", expected: "E F" },
] as const)("does not leak regex state across repeated calls: %j", (testCase) => {
expectStrippedCase(testCase);
});
});