diff --git a/application/requirements.txt b/application/requirements.txt index 72650c3e..01b4c59e 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -86,4 +86,5 @@ urllib3==2.3.0 vine==5.1.0 wcwidth==0.2.13 werkzeug==3.1.3 -yarl==1.18.3 \ No newline at end of file +yarl==1.18.3 +websockets==14.1 diff --git a/application/tts/elevenlabs.py b/application/tts/elevenlabs.py index 11b4f081..96fb1f43 100644 --- a/application/tts/elevenlabs.py +++ b/application/tts/elevenlabs.py @@ -3,12 +3,12 @@ import websockets import json import base64 from io import BytesIO -from application.tts.base import BaseTTS +from base import BaseTTS class ElevenlabsTTS(BaseTTS): def __init__(self): - self.api_key = "ELEVENLABS_API_KEY" + self.api_key = 'sk_19b72c883e8bdfcec2705be2d048f3830a40d2faa4b76b26' self.model = "eleven_multilingual_v2" self.voice = "Brian" @@ -19,21 +19,20 @@ class ElevenlabsTTS(BaseTTS): return audio_base64, lang async def _text_to_speech_websocket(self, text): - uri = "wss://api.elevenlabs.io/v1/tts-stream" - headers = { - "xi-api-key": self.api_key, - "Accept": "audio/mpeg" - } + uri = f"wss://api.elevenlabs.io/v1/text-to-speech/{self.voice}/stream-input?model_id={self.model}" + payload = { "text": text, "model_id": self.model, "voice_settings": { "voice_id": self.voice }, + "xi-api-key": self.api_key, + "Accept": "audio/mpeg" } audio_data = BytesIO() - async with websockets.connect(uri, extra_headers=headers) as websocket: + async with websockets.connect(uri) as websocket: await websocket.send(json.dumps(payload))