From b8fade251bb5af4750c70ff7dc79607ca501fe7f Mon Sep 17 00:00:00 2001 From: ManishMadan2882 Date: Tue, 24 Dec 2024 17:15:17 +0530 Subject: [PATCH] (feat: searchResults): adding utility to preprocess markdown --- extensions/react-widget/src/utils/helper.ts | 191 ++++++++++++++++++-- 1 file changed, 180 insertions(+), 11 deletions(-) diff --git a/extensions/react-widget/src/utils/helper.ts b/extensions/react-widget/src/utils/helper.ts index d9aa19c3..511b39fc 100644 --- a/extensions/react-widget/src/utils/helper.ts +++ b/extensions/react-widget/src/utils/helper.ts @@ -27,22 +27,169 @@ export const getOS = () => { return 'other'; }; -export const preprocessSearchResultsToHTML = (text: string, keyword: string) => { - const md = new MarkdownIt(); - const htmlString = md.render(text); +interface MarkdownElement { + type: 'heading' | 'paragraph' | 'code' | 'list' | 'other'; + content: string; + level?: number; +} - // Container for processed HTML - const filteredResults = document.createElement("div"); - filteredResults.innerHTML = htmlString; +interface ParsedElement { + content: string; + tag: string; +} - if (!processNode(filteredResults, keyword.trim())) return null; +export const processMarkdownString = (markdown: string): ParsedElement[] => { + const result: ParsedElement[] = []; + const lines = markdown.trim().split('\n'); + + let isInCodeBlock = false; + let currentCodeBlock = ''; - return filteredResults.innerHTML.trim() ? filteredResults.outerHTML : null; + for (let i = 0; i < lines.length; i++) { + const trimmedLine = lines[i].trim(); + if (!trimmedLine) continue; + + if (trimmedLine.startsWith('```')) { + if (isInCodeBlock) { + if (currentCodeBlock.trim()) { + result.push({ + content: currentCodeBlock.trim(), + tag: 'code' + }); + } + currentCodeBlock = ''; + isInCodeBlock = false; + } else { + isInCodeBlock = true; + } + continue; + } + + if (isInCodeBlock) { + currentCodeBlock += trimmedLine + '\n'; + continue; + } + + const headingMatch = trimmedLine.match(/^(#{1,6})\s+(.+)$/); + if (headingMatch) { + result.push({ + content: headingMatch[2], + tag: 'heading' + }); + continue; + } + + const bulletMatch = trimmedLine.match(/^[-*]\s+(.+)$/); + if (bulletMatch) { + result.push({ + content: bulletMatch[1], + tag: 'bulletList' + }); + continue; + } + + const numberedMatch = trimmedLine.match(/^\d+\.\s+(.+)$/); + if (numberedMatch) { + result.push({ + content: numberedMatch[1], + tag: 'numberedList' + }); + continue; + } + + result.push({ + content: trimmedLine, + tag: 'text' + }); + } + + if (isInCodeBlock && currentCodeBlock.trim()) { + result.push({ + content: currentCodeBlock.trim(), + tag: 'code' + }); + } + + return result; }; +export const preprocessSearchResultsToHTML = (text: string, keyword: string): MarkdownElement[] | null => { + const md = new MarkdownIt(); + const tokens = md.parse(text, {}); + const results: MarkdownElement[] = []; + + for (let i = 0; i < tokens.length; i++) { + const token = tokens[i]; + + if (token.type.endsWith('_close') || !token.content) continue; + const content = token.content.toLowerCase(); + const keywordLower = keyword.trim().toLowerCase(); + + if (!content.includes(keywordLower)) continue; + + switch (token.type) { + case 'heading_open': + const level = parseInt(token.tag.charAt(1)); + const headingContent = tokens[i + 1].content; + results.push({ + type: 'heading', + content: headingContent, + level + }); + break; + + case 'paragraph_open': + const paragraphContent = tokens[i + 1].content; + results.push({ + type: 'paragraph', + content: paragraphContent + }); + break; + + case 'fence': + case 'code_block': + results.push({ + type: 'code', + content: token.content + }); + break; + + case 'bullet_list_open': + case 'ordered_list_open': + let listItems = []; + i++; + while (i < tokens.length && !tokens[i].type.includes('list_close')) { + if (tokens[i].type === 'list_item_open') { + i++; + if (tokens[i].content) { + listItems.push(tokens[i].content); + } + } + i++; + } + if (listItems.length > 0) { + results.push({ + type: 'list', + content: listItems.join('\n') + }); + } + break; + + default: + if (token.content) { + results.push({ + type: 'other', + content: token.content + }); + } + break; + } + } + + return results.length > 0 ? results : null; +}; -// Recursive function to process nodes const processNode = (node: Node, keyword: string): boolean => { const keywordRegex = new RegExp(`(${keyword})`, "gi"); @@ -57,7 +204,6 @@ const processNode = (node: Node, keyword: string): boolean => { const tempContainer = document.createElement("div"); tempContainer.innerHTML = highlightedHTML; - // Replace the text node with highlighted content while (tempContainer.firstChild) { node.parentNode?.insertBefore(tempContainer.firstChild, node); } @@ -84,4 +230,27 @@ const processNode = (node: Node, keyword: string): boolean => { } return false; -}; \ No newline at end of file +}; + +const markdownString = ` +# Title +This is a paragraph. + +## Subtitle +- Bullet item 1 +* Bullet item 2 +1. Numbered item 1 +2. Numbered item 2 + +\`\`\`javascript +const hello = "world"; +console.log(hello); +// This is a multi-line +// code block +\`\`\` + +Regular text after code block +`; + +const parsed = processMarkdownString(markdownString); +console.log(parsed);