refactor(text): share code-region parsing for reasoning tags

This commit is contained in:
Peter Steinberger
2026-02-21 22:45:32 +00:00
parent a2a19cdad2
commit a32edf423b
4 changed files with 37 additions and 64 deletions

View File

@@ -0,0 +1,31 @@
export interface CodeRegion {
start: number;
end: number;
}
export function findCodeRegions(text: string): CodeRegion[] {
const regions: CodeRegion[] = [];
const fencedRe = /(^|\n)(```|~~~)[^\n]*\n[\s\S]*?(?:\n\2(?:\n|$)|$)/g;
for (const match of text.matchAll(fencedRe)) {
const start = (match.index ?? 0) + match[1].length;
regions.push({ start, end: start + match[0].length - match[1].length });
}
const inlineRe = /`+[^`]+`+/g;
for (const match of text.matchAll(inlineRe)) {
const start = match.index ?? 0;
const end = start + match[0].length;
const insideFenced = regions.some((r) => start >= r.start && end <= r.end);
if (!insideFenced) {
regions.push({ start, end });
}
}
regions.sort((a, b) => a.start - b.start);
return regions;
}
export function isInsideCode(pos: number, regions: CodeRegion[]): boolean {
return regions.some((r) => pos >= r.start && pos < r.end);
}

View File

@@ -1,3 +1,4 @@
import { findCodeRegions, isInsideCode } from "./code-regions.js";
export type ReasoningTagMode = "strict" | "preserve";
export type ReasoningTagTrim = "none" | "start" | "both";
@@ -5,38 +6,6 @@ const QUICK_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking|final)\b/i;
const FINAL_TAG_RE = /<\s*\/?\s*final\b[^<>]*>/gi;
const THINKING_TAG_RE = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\b[^<>]*>/gi;
interface CodeRegion {
start: number;
end: number;
}
function findCodeRegions(text: string): CodeRegion[] {
const regions: CodeRegion[] = [];
const fencedRe = /(^|\n)(```|~~~)[^\n]*\n[\s\S]*?(?:\n\2(?:\n|$)|$)/g;
for (const match of text.matchAll(fencedRe)) {
const start = (match.index ?? 0) + match[1].length;
regions.push({ start, end: start + match[0].length - match[1].length });
}
const inlineRe = /`+[^`]+`+/g;
for (const match of text.matchAll(inlineRe)) {
const start = match.index ?? 0;
const end = start + match[0].length;
const insideFenced = regions.some((r) => start >= r.start && end <= r.end);
if (!insideFenced) {
regions.push({ start, end });
}
}
regions.sort((a, b) => a.start - b.start);
return regions;
}
function isInsideCode(pos: number, regions: CodeRegion[]): boolean {
return regions.some((r) => pos >= r.start && pos < r.end);
}
function applyTrim(value: string, mode: ReasoningTagTrim): string {
if (mode === "none") {
return value;

View File

@@ -22,4 +22,8 @@ describe("splitTelegramReasoningText", () => {
answerText: text,
});
});
it("does not emit partial reasoning tag prefixes", () => {
expect(splitTelegramReasoningText(" <thi")).toEqual({});
});
});

View File

@@ -1,5 +1,6 @@
import { formatReasoningMessage } from "../agents/pi-embedded-utils.js";
import type { ReplyPayload } from "../auto-reply/types.js";
import { findCodeRegions, isInsideCode } from "../shared/text/code-regions.js";
import { stripReasoningTagsFromText } from "../shared/text/reasoning-tags.js";
const REASONING_MESSAGE_PREFIX = "Reasoning:\n";
@@ -15,38 +16,6 @@ const REASONING_TAG_PREFIXES = [
];
const THINKING_TAG_RE = /<\s*(\/?)\s*(?:think(?:ing)?|thought|antthinking)\b[^<>]*>/gi;
interface CodeRegion {
start: number;
end: number;
}
function findCodeRegions(text: string): CodeRegion[] {
const regions: CodeRegion[] = [];
const fencedRe = /(^|\n)(```|~~~)[^\n]*\n[\s\S]*?(?:\n\2(?:\n|$)|$)/g;
for (const match of text.matchAll(fencedRe)) {
const start = (match.index ?? 0) + match[1].length;
regions.push({ start, end: start + match[0].length - match[1].length });
}
const inlineRe = /`+[^`]+`+/g;
for (const match of text.matchAll(inlineRe)) {
const start = match.index ?? 0;
const end = start + match[0].length;
const insideFenced = regions.some((r) => start >= r.start && end <= r.end);
if (!insideFenced) {
regions.push({ start, end });
}
}
regions.sort((a, b) => a.start - b.start);
return regions;
}
function isInsideCode(pos: number, regions: CodeRegion[]): boolean {
return regions.some((r) => pos >= r.start && pos < r.end);
}
function extractThinkingFromTaggedStreamOutsideCode(text: string): string {
if (!text) {
return "";