mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-12-01 17:43:15 +00:00
Frontend audit: Bug fixes and refinements (#2112)
* (fix:attachements) sep id for redux ops * (fix:ui) popups, toast, share modal * (feat:agentsPreview) stable preview, ui fixes * (fix:ui) light theme icon, sleek scroll * (chore:i18n) missin keys * (chore:i18n) missing keys * (feat:preferrenceSlice) autoclear invalid source from storage * (fix:general) delete all conv close btn * (fix:tts) play one at a time * (fix:tts) gracefully unmount * (feat:tts) audio LRU cache * (feat:tts) pointer on hovered area * (feat:tts) clean text for speach --------- Co-authored-by: GH Action - Upstream Sync <action@github.com>
This commit is contained in:
@@ -130,11 +130,15 @@ class TextToSpeech(Resource):
|
||||
@api.expect(tts_model)
|
||||
@api.doc(description="Synthesize audio speech from text")
|
||||
def post(self):
|
||||
from application.utils import clean_text_for_tts
|
||||
|
||||
data = request.get_json()
|
||||
text = data["text"]
|
||||
cleaned_text = clean_text_for_tts(text)
|
||||
|
||||
try:
|
||||
tts_instance = TTSCreator.create_tts(settings.TTS_PROVIDER)
|
||||
audio_base64, detected_language = tts_instance.text_to_speech(text)
|
||||
audio_base64, detected_language = tts_instance.text_to_speech(cleaned_text)
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
|
||||
@@ -187,3 +187,44 @@ def generate_image_url(image_path):
|
||||
else:
|
||||
base_url = getattr(settings, "API_URL", "http://localhost:7091")
|
||||
return f"{base_url}/api/images/{image_path}"
|
||||
|
||||
|
||||
def clean_text_for_tts(text: str) -> str:
|
||||
"""
|
||||
clean text for Text-to-Speech processing.
|
||||
"""
|
||||
# Handle code blocks and links
|
||||
text = re.sub(r'```mermaid[\s\S]*?```', ' flowchart, ', text) ## ```mermaid...```
|
||||
text = re.sub(r'```[\s\S]*?```', ' code block, ', text) ## ```code```
|
||||
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text) ## [text](url)
|
||||
text = re.sub(r'!\[([^\]]*)\]\([^\)]+\)', '', text) ## 
|
||||
|
||||
# Remove markdown formatting
|
||||
text = re.sub(r'`([^`]+)`', r'\1', text) ## `code`
|
||||
text = re.sub(r'\{([^}]*)\}', r' \1 ', text) ## {text}
|
||||
text = re.sub(r'[{}]', ' ', text) ## unmatched {}
|
||||
text = re.sub(r'\[([^\]]+)\]', r' \1 ', text) ## [text]
|
||||
text = re.sub(r'[\[\]]', ' ', text) ## unmatched []
|
||||
text = re.sub(r'(\*\*|__)(.*?)\1', r'\2', text) ## **bold** __bold__
|
||||
text = re.sub(r'(\*|_)(.*?)\1', r'\2', text) ## *italic* _italic_
|
||||
text = re.sub(r'^#{1,6}\s+', '', text, flags=re.MULTILINE) ## # headers
|
||||
text = re.sub(r'^>\s+', '', text, flags=re.MULTILINE) ## > blockquotes
|
||||
text = re.sub(r'^[\s]*[-\*\+]\s+', '', text, flags=re.MULTILINE) ## - * + lists
|
||||
text = re.sub(r'^[\s]*\d+\.\s+', '', text, flags=re.MULTILINE) ## 1. numbered lists
|
||||
text = re.sub(r'^[\*\-_]{3,}\s*$', '', text, flags=re.MULTILINE) ## --- *** ___ rules
|
||||
text = re.sub(r'<[^>]*>', '', text) ## <html> tags
|
||||
|
||||
#Remove non-ASCII (emojis, special Unicode)
|
||||
text = re.sub(r'[^\x20-\x7E\n\r\t]', '', text)
|
||||
|
||||
#Replace special sequences
|
||||
text = re.sub(r'-->', ', ', text) ## -->
|
||||
text = re.sub(r'<--', ', ', text) ## <--
|
||||
text = re.sub(r'=>', ', ', text) ## =>
|
||||
text = re.sub(r'::', ' ', text) ## ::
|
||||
|
||||
#Normalize whitespace
|
||||
text = re.sub(r'\s+', ' ', text)
|
||||
text = text.strip()
|
||||
|
||||
return text
|
||||
|
||||
Reference in New Issue
Block a user