mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
(feat: searchResults): adding utility to preprocess markdown
This commit is contained in:
@@ -27,22 +27,169 @@ export const getOS = () => {
|
||||
return 'other';
|
||||
};
|
||||
|
||||
export const preprocessSearchResultsToHTML = (text: string, keyword: string) => {
|
||||
const md = new MarkdownIt();
|
||||
const htmlString = md.render(text);
|
||||
interface MarkdownElement {
|
||||
type: 'heading' | 'paragraph' | 'code' | 'list' | 'other';
|
||||
content: string;
|
||||
level?: number;
|
||||
}
|
||||
|
||||
// Container for processed HTML
|
||||
const filteredResults = document.createElement("div");
|
||||
filteredResults.innerHTML = htmlString;
|
||||
interface ParsedElement {
|
||||
content: string;
|
||||
tag: string;
|
||||
}
|
||||
|
||||
if (!processNode(filteredResults, keyword.trim())) return null;
|
||||
export const processMarkdownString = (markdown: string): ParsedElement[] => {
|
||||
const result: ParsedElement[] = [];
|
||||
const lines = markdown.trim().split('\n');
|
||||
|
||||
let isInCodeBlock = false;
|
||||
let currentCodeBlock = '';
|
||||
|
||||
return filteredResults.innerHTML.trim() ? filteredResults.outerHTML : null;
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const trimmedLine = lines[i].trim();
|
||||
if (!trimmedLine) continue;
|
||||
|
||||
if (trimmedLine.startsWith('```')) {
|
||||
if (isInCodeBlock) {
|
||||
if (currentCodeBlock.trim()) {
|
||||
result.push({
|
||||
content: currentCodeBlock.trim(),
|
||||
tag: 'code'
|
||||
});
|
||||
}
|
||||
currentCodeBlock = '';
|
||||
isInCodeBlock = false;
|
||||
} else {
|
||||
isInCodeBlock = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isInCodeBlock) {
|
||||
currentCodeBlock += trimmedLine + '\n';
|
||||
continue;
|
||||
}
|
||||
|
||||
const headingMatch = trimmedLine.match(/^(#{1,6})\s+(.+)$/);
|
||||
if (headingMatch) {
|
||||
result.push({
|
||||
content: headingMatch[2],
|
||||
tag: 'heading'
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const bulletMatch = trimmedLine.match(/^[-*]\s+(.+)$/);
|
||||
if (bulletMatch) {
|
||||
result.push({
|
||||
content: bulletMatch[1],
|
||||
tag: 'bulletList'
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
const numberedMatch = trimmedLine.match(/^\d+\.\s+(.+)$/);
|
||||
if (numberedMatch) {
|
||||
result.push({
|
||||
content: numberedMatch[1],
|
||||
tag: 'numberedList'
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
result.push({
|
||||
content: trimmedLine,
|
||||
tag: 'text'
|
||||
});
|
||||
}
|
||||
|
||||
if (isInCodeBlock && currentCodeBlock.trim()) {
|
||||
result.push({
|
||||
content: currentCodeBlock.trim(),
|
||||
tag: 'code'
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
export const preprocessSearchResultsToHTML = (text: string, keyword: string): MarkdownElement[] | null => {
|
||||
const md = new MarkdownIt();
|
||||
const tokens = md.parse(text, {});
|
||||
const results: MarkdownElement[] = [];
|
||||
|
||||
for (let i = 0; i < tokens.length; i++) {
|
||||
const token = tokens[i];
|
||||
|
||||
if (token.type.endsWith('_close') || !token.content) continue;
|
||||
|
||||
const content = token.content.toLowerCase();
|
||||
const keywordLower = keyword.trim().toLowerCase();
|
||||
|
||||
if (!content.includes(keywordLower)) continue;
|
||||
|
||||
switch (token.type) {
|
||||
case 'heading_open':
|
||||
const level = parseInt(token.tag.charAt(1));
|
||||
const headingContent = tokens[i + 1].content;
|
||||
results.push({
|
||||
type: 'heading',
|
||||
content: headingContent,
|
||||
level
|
||||
});
|
||||
break;
|
||||
|
||||
case 'paragraph_open':
|
||||
const paragraphContent = tokens[i + 1].content;
|
||||
results.push({
|
||||
type: 'paragraph',
|
||||
content: paragraphContent
|
||||
});
|
||||
break;
|
||||
|
||||
case 'fence':
|
||||
case 'code_block':
|
||||
results.push({
|
||||
type: 'code',
|
||||
content: token.content
|
||||
});
|
||||
break;
|
||||
|
||||
case 'bullet_list_open':
|
||||
case 'ordered_list_open':
|
||||
let listItems = [];
|
||||
i++;
|
||||
while (i < tokens.length && !tokens[i].type.includes('list_close')) {
|
||||
if (tokens[i].type === 'list_item_open') {
|
||||
i++;
|
||||
if (tokens[i].content) {
|
||||
listItems.push(tokens[i].content);
|
||||
}
|
||||
}
|
||||
i++;
|
||||
}
|
||||
if (listItems.length > 0) {
|
||||
results.push({
|
||||
type: 'list',
|
||||
content: listItems.join('\n')
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
if (token.content) {
|
||||
results.push({
|
||||
type: 'other',
|
||||
content: token.content
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return results.length > 0 ? results : null;
|
||||
};
|
||||
|
||||
// Recursive function to process nodes
|
||||
const processNode = (node: Node, keyword: string): boolean => {
|
||||
|
||||
const keywordRegex = new RegExp(`(${keyword})`, "gi");
|
||||
@@ -57,7 +204,6 @@ const processNode = (node: Node, keyword: string): boolean => {
|
||||
const tempContainer = document.createElement("div");
|
||||
tempContainer.innerHTML = highlightedHTML;
|
||||
|
||||
// Replace the text node with highlighted content
|
||||
while (tempContainer.firstChild) {
|
||||
node.parentNode?.insertBefore(tempContainer.firstChild, node);
|
||||
}
|
||||
@@ -84,4 +230,27 @@ const processNode = (node: Node, keyword: string): boolean => {
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
};
|
||||
|
||||
const markdownString = `
|
||||
# Title
|
||||
This is a paragraph.
|
||||
|
||||
## Subtitle
|
||||
- Bullet item 1
|
||||
* Bullet item 2
|
||||
1. Numbered item 1
|
||||
2. Numbered item 2
|
||||
|
||||
\`\`\`javascript
|
||||
const hello = "world";
|
||||
console.log(hello);
|
||||
// This is a multi-line
|
||||
// code block
|
||||
\`\`\`
|
||||
|
||||
Regular text after code block
|
||||
`;
|
||||
|
||||
const parsed = processMarkdownString(markdownString);
|
||||
console.log(parsed);
|
||||
|
||||
Reference in New Issue
Block a user