From 9ca079c95a9580df7095520650da9e5234367cbf Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 2 Nov 2024 16:43:28 +0000 Subject: [PATCH] feat: elevenlabs tts --- application/api/user/routes.py | 4 ++-- application/tts/elevenlabs.py | 29 +++++++++++++++++++++++++++++ application/tts/google_tts.py | 10 +++++----- 3 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 application/tts/elevenlabs.py diff --git a/application/api/user/routes.py b/application/api/user/routes.py index b243179e..c33a6c84 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -1682,8 +1682,8 @@ class TextToSpeech(Resource): data = request.get_json() text = data["text"] try: - tts_instance = GoogleTTS(text) - audio_base64, detected_language = tts_instance.text_to_speech() + tts_instance = GoogleTTS() + audio_base64, detected_language = tts_instance.text_to_speech(text) return make_response(jsonify({"success": True,'audio_base64': audio_base64,'lang':detected_language}), 200) except Exception as err: return make_response(jsonify({"success": False, "error": str(err)}), 400) diff --git a/application/tts/elevenlabs.py b/application/tts/elevenlabs.py new file mode 100644 index 00000000..e1b3db27 --- /dev/null +++ b/application/tts/elevenlabs.py @@ -0,0 +1,29 @@ +from io import BytesIO +import base64 +from application.tts.base import BaseTTS + + +class ElevenlabsTTS(BaseTTS): + def __init__(self): + from elevenlabs.client import ElevenLabs + + self.client = ElevenLabs( + api_key="ELEVENLABS_API_KEY", + ) + + + def text_to_speech(self, text): + lang = "en" + audio = self.client.generate( + text=text, + model="eleven_multilingual_v2", + voice="Brian", + ) + audio_data = BytesIO() + for chunk in audio: + audio_data.write(chunk) + audio_bytes = audio_data.getvalue() + + # Encode to base64 + audio_base64 = base64.b64encode(audio_bytes).decode("utf-8") + return audio_base64, lang diff --git a/application/tts/google_tts.py b/application/tts/google_tts.py index 310309dc..ee70161e 100644 --- a/application/tts/google_tts.py +++ b/application/tts/google_tts.py @@ -5,14 +5,14 @@ from application.tts.base import BaseTTS class GoogleTTS(BaseTTS): - def __init__(self, text): - self.text = text - + def __init__(self): + pass - def text_to_speech(self): + + def text_to_speech(self, text): lang = "en" audio_fp = io.BytesIO() - tts = gTTS(text=self.text, lang=lang, slow=False) + tts = gTTS(text=text, lang=lang, slow=False) tts.write_to_fp(audio_fp) audio_fp.seek(0) audio_base64 = base64.b64encode(audio_fp.read()).decode("utf-8")