mirror of
https://github.com/moltbot/moltbot.git
synced 2026-04-23 22:55:24 +00:00
docs-i18n: avoid ambiguous tagged body unwrap
This commit is contained in:
@@ -222,12 +222,14 @@ func sanitizeDocChunkProtocolWrappers(source, translated string) string {
|
||||
if !hasUnexpectedTopLevelProtocolWrapper(source, trimmedTranslated) {
|
||||
return translated
|
||||
}
|
||||
_, body, err := parseTaggedDocument(trimmedTranslated)
|
||||
if err == nil {
|
||||
if strings.TrimSpace(body) == "" {
|
||||
return translated
|
||||
if !hasAmbiguousTaggedBodyClose(source, trimmedTranslated) {
|
||||
_, body, err := parseTaggedDocument(trimmedTranslated)
|
||||
if err == nil {
|
||||
if strings.TrimSpace(body) == "" {
|
||||
return translated
|
||||
}
|
||||
return body
|
||||
}
|
||||
return body
|
||||
}
|
||||
body, ok := stripBodyOnlyWrapper(trimmedTranslated)
|
||||
if !ok || strings.TrimSpace(body) == "" {
|
||||
@@ -251,6 +253,18 @@ func stripBodyOnlyWrapper(text string) (string, bool) {
|
||||
return trimTagNewlines(body), true
|
||||
}
|
||||
|
||||
func hasAmbiguousTaggedBodyClose(source, translated string) bool {
|
||||
sourceLower := strings.ToLower(source)
|
||||
if !strings.Contains(sourceLower, strings.ToLower(bodyTagStart)) && !strings.Contains(sourceLower, strings.ToLower(bodyTagEnd)) {
|
||||
return false
|
||||
}
|
||||
translatedLower := strings.ToLower(translated)
|
||||
if !strings.Contains(translatedLower, strings.ToLower(frontmatterTagStart)) {
|
||||
return false
|
||||
}
|
||||
return strings.Count(translatedLower, strings.ToLower(bodyTagEnd)) == 1
|
||||
}
|
||||
|
||||
func maskDocComponentTags(text string) (string, []string) {
|
||||
placeholders := make([]string, 0, 4)
|
||||
masked := docsComponentTagRE.ReplaceAllStringFunc(text, func(match string) string {
|
||||
|
||||
@@ -91,8 +91,8 @@ func parseTaggedDocument(text string) (string, string, error) {
|
||||
}
|
||||
bodyStart += frontEnd + len(bodyTagStart)
|
||||
|
||||
bodyEnd := strings.LastIndex(text, bodyTagEnd)
|
||||
if bodyEnd == -1 || bodyEnd < bodyStart {
|
||||
bodyEnd := findTaggedBodyEnd(text, bodyStart)
|
||||
if bodyEnd == -1 {
|
||||
return "", "", fmt.Errorf("missing %s", bodyTagEnd)
|
||||
}
|
||||
body := trimTagNewlines(text[bodyStart:bodyEnd])
|
||||
@@ -107,6 +107,31 @@ func parseTaggedDocument(text string) (string, string, error) {
|
||||
return frontMatter, body, nil
|
||||
}
|
||||
|
||||
func findTaggedBodyEnd(text string, bodyStart int) int {
|
||||
if bodyStart < 0 || bodyStart > len(text) {
|
||||
return -1
|
||||
}
|
||||
search := text[bodyStart:]
|
||||
candidate := -1
|
||||
offset := 0
|
||||
for {
|
||||
index := strings.Index(search[offset:], bodyTagEnd)
|
||||
if index == -1 {
|
||||
return candidate
|
||||
}
|
||||
index += offset
|
||||
absolute := bodyStart + index
|
||||
suffix := strings.TrimSpace(text[absolute+len(bodyTagEnd):])
|
||||
if suffix == "" {
|
||||
candidate = absolute
|
||||
}
|
||||
offset = index + len(bodyTagEnd)
|
||||
if offset >= len(search) {
|
||||
return candidate
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func trimTagNewlines(value string) string {
|
||||
value = strings.TrimPrefix(value, "\n")
|
||||
value = strings.TrimSuffix(value, "\n")
|
||||
|
||||
@@ -252,6 +252,33 @@ func TestParseTaggedDocumentRejectsTrailingTextOutsideTags(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindTaggedBodyEndSearchesFromBodyStart(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
text := strings.Join([]string{
|
||||
"<frontmatter>",
|
||||
"summary: literal </body> token in frontmatter",
|
||||
"</frontmatter>",
|
||||
"<body>",
|
||||
"Translated body",
|
||||
"</body>",
|
||||
}, "\n")
|
||||
bodyStart := strings.Index(text, bodyTagStart)
|
||||
if bodyStart == -1 {
|
||||
t.Fatal("expected body tag in test input")
|
||||
}
|
||||
bodyStart += len(bodyTagStart)
|
||||
|
||||
bodyEnd := findTaggedBodyEnd(text, bodyStart)
|
||||
if bodyEnd == -1 {
|
||||
t.Fatal("expected closing body tag to be found")
|
||||
}
|
||||
body := trimTagNewlines(text[bodyStart:bodyEnd])
|
||||
if body != "Translated body" {
|
||||
t.Fatalf("expected body slice to ignore pre-body literal token, got %q", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSplitDocBodyIntoBlocksKeepsFenceTogether(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
@@ -512,6 +539,30 @@ func TestSanitizeDocChunkProtocolWrappersKeepsLegitimateTopLevelBodyBlock(t *tes
|
||||
}
|
||||
}
|
||||
|
||||
func TestSanitizeDocChunkProtocolWrappersKeepsAmbiguousTaggedWrapperForRetry(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
source := strings.Join([]string{
|
||||
"Paragraph mentioning literal tokens `<body>` and `</body>`.",
|
||||
"",
|
||||
"Closing example:",
|
||||
"</body>",
|
||||
}, "\n")
|
||||
translated := strings.Join([]string{
|
||||
"<frontmatter>",
|
||||
"title: leaked",
|
||||
"</frontmatter>",
|
||||
"",
|
||||
"<body>",
|
||||
"提到字面量 `<body>` 和 `</body>` 的段落。",
|
||||
}, "\n")
|
||||
|
||||
got := sanitizeDocChunkProtocolWrappers(source, translated)
|
||||
if got != translated {
|
||||
t.Fatalf("expected ambiguous tagged wrapper to remain unchanged for retry\nwant:\n%s\ngot:\n%s", translated, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSplitDocBodyIntoBlocksKeepsInfoStringExampleInsideFence(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user