mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 16:43:16 +00:00
85 lines
2.7 KiB
Python
85 lines
2.7 KiB
Python
import asyncio
|
|
import websockets
|
|
import json
|
|
import base64
|
|
from io import BytesIO
|
|
from base import BaseTTS
|
|
|
|
|
|
class ElevenlabsTTS(BaseTTS):
|
|
def __init__(self):
|
|
self.api_key = 'ELEVENLABS_API_KEY'# here you should put your api key
|
|
self.model = "eleven_flash_v2_5"
|
|
self.voice = "VOICE_ID" # this is the hash code for the voice not the name!
|
|
self.write_audio = 1
|
|
|
|
def text_to_speech(self, text):
|
|
asyncio.run(self._text_to_speech_websocket(text))
|
|
|
|
async def _text_to_speech_websocket(self, text):
|
|
uri = f"wss://api.elevenlabs.io/v1/text-to-speech/{self.voice}/stream-input?model_id={self.model}"
|
|
websocket = await websockets.connect(uri)
|
|
payload = {
|
|
"text": " ",
|
|
"voice_settings": {
|
|
"stability": 0.5,
|
|
"similarity_boost": 0.8,
|
|
},
|
|
"xi_api_key": self.api_key,
|
|
}
|
|
|
|
await websocket.send(json.dumps(payload))
|
|
|
|
async def listen():
|
|
while 1:
|
|
try:
|
|
msg = await websocket.recv()
|
|
data = json.loads(msg)
|
|
|
|
if data.get("audio"):
|
|
print("audio received")
|
|
yield base64.b64decode(data["audio"])
|
|
elif data.get("isFinal"):
|
|
break
|
|
except websockets.exceptions.ConnectionClosed:
|
|
print("websocket closed")
|
|
break
|
|
listen_task = asyncio.create_task(self.stream(listen()))
|
|
|
|
await websocket.send(json.dumps({"text": text}))
|
|
# this is to signal the end of the text, either use this or flush
|
|
await websocket.send(json.dumps({"text": ""}))
|
|
|
|
await listen_task
|
|
|
|
async def stream(self, audio_stream):
|
|
if self.write_audio:
|
|
audio_bytes = BytesIO()
|
|
async for chunk in audio_stream:
|
|
if chunk:
|
|
audio_bytes.write(chunk)
|
|
with open("output_audio.mp3", "wb") as f:
|
|
f.write(audio_bytes.getvalue())
|
|
|
|
else:
|
|
async for chunk in audio_stream:
|
|
pass # depends on the streamer!
|
|
|
|
|
|
def test_elevenlabs_websocket():
|
|
"""
|
|
Tests the ElevenlabsTTS text_to_speech method with a sample prompt.
|
|
Prints out the base64-encoded result and writes it to 'output_audio.mp3'.
|
|
"""
|
|
# Instantiate your TTS class
|
|
tts = ElevenlabsTTS()
|
|
|
|
# Call the method with some sample text
|
|
tts.text_to_speech("Hello from ElevenLabs WebSocket!")
|
|
|
|
print("Saved audio to output_audio.mp3.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_elevenlabs_websocket()
|