Frontend audit: Bug fixes and refinements (#2112)

* (fix:attachements) sep id for redux ops * (fix:ui) popups, toast, share modal * (feat:agentsPreview) stable preview, ui fixes * (fix:ui) light theme icon, sleek scroll * (chore:i18n) missin keys * (chore:i18n) missing keys * (feat:preferrenceSlice) autoclear invalid source from storage * (fix:general) delete all conv close btn * (fix:tts) play one at a time * (fix:tts) gracefully unmount * (feat:tts) audio LRU cache * (feat:tts) pointer on hovered area * (feat:tts) clean text for speach --------- Co-authored-by: GH Action - Upstream Sync <action@github.com>
2026-04-26 19:26:13 +00:00 · 2025-10-29 05:17:26 +05:30
parent 94f70e6de5
commit 6a4cb617f9
40 changed files with 1805 additions and 490 deletions
--- a/application/api/user/attachments/routes.py
+++ b/application/api/user/attachments/routes.py
@@ -130,11 +130,15 @@ class TextToSpeech(Resource):
    @api.expect(tts_model)
    @api.doc(description="Synthesize audio speech from text")
    def post(self):
+        from application.utils import clean_text_for_tts
+
        data = request.get_json()
        text = data["text"]
+        cleaned_text = clean_text_for_tts(text)
+
        try:
            tts_instance = TTSCreator.create_tts(settings.TTS_PROVIDER)
-            audio_base64, detected_language = tts_instance.text_to_speech(text)
+            audio_base64, detected_language = tts_instance.text_to_speech(cleaned_text)
            return make_response(
                jsonify(
                    {
--- a/application/utils.py
+++ b/application/utils.py
@@ -187,3 +187,44 @@ def generate_image_url(image_path):
    else:
        base_url = getattr(settings, "API_URL", "http://localhost:7091")
        return f"{base_url}/api/images/{image_path}"
+
+
+def clean_text_for_tts(text: str) -> str:
+    """
+    clean text for Text-to-Speech processing.
+    """
+    # Handle code blocks and links
+    text = re.sub(r'```mermaid[\s\S]*?```', ' flowchart, ', text)  ## ```mermaid...```
+    text = re.sub(r'```[\s\S]*?```', ' code block, ', text)  ## ```code```
+    text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)  ## [text](url)
+    text = re.sub(r'!\[([^\]]*)\]\([^\)]+\)', '', text)  ## ![alt](url)
+
+    # Remove markdown formatting
+    text = re.sub(r'`([^`]+)`', r'\1', text)  ## `code`
+    text = re.sub(r'\{([^}]*)\}', r' \1 ', text)  ## {text}
+    text = re.sub(r'[{}]', ' ', text)  ## unmatched {}
+    text = re.sub(r'\[([^\]]+)\]', r' \1 ', text)  ## [text]
+    text = re.sub(r'[\[\]]', ' ', text)  ## unmatched []
+    text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text)  ## **bold** __bold__
+    text = re.sub(r'(\*|_)(.*?)\1', r'\2', text)  ## *italic* _italic_
+    text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE)  ## # headers
+    text = re.sub(r'^>\s+', '', text, flags=re.MULTILINE)  ## > blockquotes
+    text = re.sub(r'^[\s]*[-\*\+]\s+', '', text, flags=re.MULTILINE)  ## - * + lists
+    text = re.sub(r'^[\s]*\d+\.\s+', '', text, flags=re.MULTILINE)  ## 1. numbered lists
+    text = re.sub(r'^[\*\-_]{3,}\s*$', '', text, flags=re.MULTILINE)  ## --- *** ___ rules
+    text = re.sub(r'<[^>]*>', '', text)  ## <html> tags
+
+    #Remove non-ASCII (emojis, special Unicode)
+    text = re.sub(r'[^\x20-\x7E\n\r\t]', '', text)
+
+    #Replace special sequences
+    text = re.sub(r'-->', ', ', text)  ## -->
+    text = re.sub(r'<--', ', ', text)  ## <--
+    text = re.sub(r'=>', ', ', text)  ## =>
+    text = re.sub(r'::', ' ', text)  ## ::
+
+    #Normalize whitespace
+    text = re.sub(r'\s+', ' ', text)
+    text = text.strip()
+
+    return text