eleven labs

This commit is contained in:
Ahmad Alghooneh
2025-01-16 00:41:09 -05:00
parent 96ab01b0c1
commit 00b10f17c1
2 changed files with 46 additions and 34 deletions

View File

@@ -8,41 +8,62 @@ from base import BaseTTS
class ElevenlabsTTS(BaseTTS):
def __init__(self):
self.api_key = 'sk_19b72c883e8bdfcec2705be2d048f3830a40d2faa4b76b26'
self.model = "eleven_multilingual_v2"
self.voice = "Brian"
self.api_key = 'ELEVENLABS_API_KEY'# here you should put your api key
self.model = "eleven_flash_v2_5"
self.voice = "VOICE_ID" # this is the hash code for the voice not the name!
self.write_audio = 1
def text_to_speech(self, text):
audio_bytes = asyncio.run(self._text_to_speech_websocket(text))
audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
lang = "en"
return audio_base64, lang
asyncio.run(self._text_to_speech_websocket(text))
async def _text_to_speech_websocket(self, text):
uri = f"wss://api.elevenlabs.io/v1/text-to-speech/{self.voice}/stream-input?model_id={self.model}"
websocket = await websockets.connect(uri)
payload = {
"text": text,
"model_id": self.model,
"text": " ",
"voice_settings": {
"voice_id": self.voice
"stability": 0.5,
"similarity_boost": 0.8,
},
"xi-api-key": self.api_key,
"Accept": "audio/mpeg"
"xi_api_key": self.api_key,
}
audio_data = BytesIO()
async with websockets.connect(uri) as websocket:
await websocket.send(json.dumps(payload))
async for message in websocket:
if isinstance(message, bytes):
audio_data.write(message)
else:
print("Received a non-binary frame:", message)
await websocket.send(json.dumps(payload))
async def listen():
while 1:
try:
msg = await websocket.recv()
data = json.loads(msg)
return audio_data.getvalue()
if data.get("audio"):
print("audio received")
yield base64.b64decode(data["audio"])
elif data.get("isFinal"):
break
except websockets.exceptions.ConnectionClosed:
print("websocket closed")
break
listen_task = asyncio.create_task(self.stream(listen()))
await websocket.send(json.dumps({"text": text}))
# this is to signal the end of the text, either use this or flush
await websocket.send(json.dumps({"text": ""}))
await listen_task
async def stream(self, audio_stream):
if self.write_audio:
audio_bytes = BytesIO()
async for chunk in audio_stream:
if chunk:
audio_bytes.write(chunk)
with open("output_audio.mp3", "wb") as f:
f.write(audio_bytes.getvalue())
else:
async for chunk in audio_stream:
pass # depends on the streamer!
def test_elevenlabs_websocket():
@@ -54,16 +75,7 @@ def test_elevenlabs_websocket():
tts = ElevenlabsTTS()
# Call the method with some sample text
audio_base64, lang = tts.text_to_speech("Hello from ElevenLabs WebSocket!")
print(f"Received language: {lang}")
print(f"Base64 Audio (truncated): {audio_base64[:100]}...")
# Optional: Save the audio to a local file for manual listening.
# We'll assume the audio is in MP3 format based on "Accept": "audio/mpeg".
audio_bytes = base64.b64decode(audio_base64)
with open("output_audio.mp3", "wb") as f:
f.write(audio_bytes)
tts.text_to_speech("Hello from ElevenLabs WebSocket!")
print("Saved audio to output_audio.mp3.")

Binary file not shown.