From 9ca079c95a9580df7095520650da9e5234367cbf Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Sat, 2 Nov 2024 16:43:28 +0000
Subject: [PATCH] feat: elevenlabs tts

---
 application/api/user/routes.py |  4 ++--
 application/tts/elevenlabs.py  | 29 +++++++++++++++++++++++++++++
 application/tts/google_tts.py  | 10 +++++-----
 3 files changed, 36 insertions(+), 7 deletions(-)
 create mode 100644 application/tts/elevenlabs.py

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index b243179e..c33a6c84 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -1682,8 +1682,8 @@ class TextToSpeech(Resource):
         data = request.get_json()
         text = data["text"]
         try:
-            tts_instance = GoogleTTS(text)
-            audio_base64, detected_language = tts_instance.text_to_speech()
+            tts_instance = GoogleTTS()
+            audio_base64, detected_language = tts_instance.text_to_speech(text)
             return make_response(jsonify({"success": True,'audio_base64': audio_base64,'lang':detected_language}), 200)
         except Exception as err:
             return make_response(jsonify({"success": False, "error": str(err)}), 400)
diff --git a/application/tts/elevenlabs.py b/application/tts/elevenlabs.py
new file mode 100644
index 00000000..e1b3db27
--- /dev/null
+++ b/application/tts/elevenlabs.py
@@ -0,0 +1,29 @@
+from io import BytesIO
+import base64
+from application.tts.base import BaseTTS
+
+
+class ElevenlabsTTS(BaseTTS):
+    def __init__(self):
+        from elevenlabs.client import ElevenLabs
+
+        self.client = ElevenLabs(
+            api_key="ELEVENLABS_API_KEY",
+            )
+    
+
+    def text_to_speech(self, text):
+        lang = "en"
+        audio = self.client.generate(
+            text=text,
+            model="eleven_multilingual_v2",
+            voice="Brian",
+        )
+        audio_data = BytesIO()
+        for chunk in audio:
+            audio_data.write(chunk)
+        audio_bytes = audio_data.getvalue()
+
+        # Encode to base64
+        audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
+        return audio_base64, lang
diff --git a/application/tts/google_tts.py b/application/tts/google_tts.py
index 310309dc..ee70161e 100644
--- a/application/tts/google_tts.py
+++ b/application/tts/google_tts.py
@@ -5,14 +5,14 @@ from application.tts.base import BaseTTS
 
 
 class GoogleTTS(BaseTTS):
-    def __init__(self, text):
-        self.text = text
-    
+    def __init__(self):
+        pass
 
-    def text_to_speech(self):
+
+    def text_to_speech(self, text):
         lang = "en"
         audio_fp = io.BytesIO()
-        tts = gTTS(text=self.text, lang=lang, slow=False)
+        tts = gTTS(text=text, lang=lang, slow=False)
         tts.write_to_fp(audio_fp)
         audio_fp.seek(0)
         audio_base64 = base64.b64encode(audio_fp.read()).decode("utf-8")