eleven labs

2026-03-02 16:21:50 +00:00 · 2025-01-16 00:41:09 -05:00
parent 96ab01b0c1
commit 00b10f17c1
2 changed files with 46 additions and 34 deletions
--- a/application/tts/elevenlabs.py
+++ b/application/tts/elevenlabs.py
@@ -8,41 +8,62 @@ from base import BaseTTS

 class ElevenlabsTTS(BaseTTS):
    def __init__(self):        
-        self.api_key = 'sk_19b72c883e8bdfcec2705be2d048f3830a40d2faa4b76b26'
-        self.model = "eleven_multilingual_v2"
-        self.voice = "Brian"
+        self.api_key = 'ELEVENLABS_API_KEY'# here you should put your api key
+        self.model = "eleven_flash_v2_5"
+        self.voice = "VOICE_ID" # this is the hash code for the voice not the name!
+        self.write_audio = 1

    def text_to_speech(self, text):
-        audio_bytes = asyncio.run(self._text_to_speech_websocket(text))
-        audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
-        lang = "en"
-        return audio_base64, lang
+        asyncio.run(self._text_to_speech_websocket(text))

    async def _text_to_speech_websocket(self, text):
        uri = f"wss://api.elevenlabs.io/v1/text-to-speech/{self.voice}/stream-input?model_id={self.model}"
-
+        websocket = await websockets.connect(uri)
        payload = {
-            "text": text,
-            "model_id": self.model,
+            "text": " ",
            "voice_settings": {
-                "voice_id": self.voice
+                "stability": 0.5,
+                "similarity_boost": 0.8,
            },
-            "xi-api-key": self.api_key,
-            "Accept": "audio/mpeg"
+            "xi_api_key": self.api_key,
        }
-        audio_data = BytesIO()

-        async with websockets.connect(uri) as websocket:
-            
-            await websocket.send(json.dumps(payload))
-            
-            async for message in websocket:
-                if isinstance(message, bytes):
-                    audio_data.write(message)
-                else:
-                    print("Received a non-binary frame:", message)
+        await websocket.send(json.dumps(payload))
+        
+        async def listen():
+            while 1:
+                try:
+                    msg = await websocket.recv()
+                    data = json.loads(msg)

-        return audio_data.getvalue()
+                    if data.get("audio"):
+                        print("audio received")
+                        yield base64.b64decode(data["audio"])
+                    elif data.get("isFinal"):
+                        break
+                except websockets.exceptions.ConnectionClosed:
+                    print("websocket closed")
+                    break
+        listen_task =  asyncio.create_task(self.stream(listen()))
+        
+        await websocket.send(json.dumps({"text": text}))
+        # this is to signal the end of the text, either use this or flush
+        await websocket.send(json.dumps({"text": ""})) 
+
+        await listen_task
+    
+    async def stream(self, audio_stream):
+        if self.write_audio:
+            audio_bytes = BytesIO()
+            async for chunk in audio_stream:
+                if chunk:
+                    audio_bytes.write(chunk)
+            with open("output_audio.mp3", "wb") as f:
+                f.write(audio_bytes.getvalue())
+        
+        else:
+            async for chunk in audio_stream:
+                pass # depends on the streamer!


 def test_elevenlabs_websocket():
@@ -54,16 +75,7 @@ def test_elevenlabs_websocket():
    tts = ElevenlabsTTS()

    # Call the method with some sample text
-    audio_base64, lang = tts.text_to_speech("Hello from ElevenLabs WebSocket!")
-
-    print(f"Received language: {lang}")
-    print(f"Base64 Audio (truncated): {audio_base64[:100]}...")
-
-    # Optional: Save the audio to a local file for manual listening.
-    # We'll assume the audio is in MP3 format based on "Accept": "audio/mpeg".
-    audio_bytes = base64.b64decode(audio_base64)
-    with open("output_audio.mp3", "wb") as f:
-        f.write(audio_bytes)
+    tts.text_to_speech("Hello from ElevenLabs WebSocket!")

    print("Saved audio to output_audio.mp3.")

--- a/application/tts/output_audio.mp3
+++ b/application/tts/output_audio.mp3