mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-30 17:13:15 +00:00
eleven labs
This commit is contained in:
@@ -8,41 +8,62 @@ from base import BaseTTS
|
||||
|
||||
class ElevenlabsTTS(BaseTTS):
|
||||
def __init__(self):
|
||||
self.api_key = 'sk_19b72c883e8bdfcec2705be2d048f3830a40d2faa4b76b26'
|
||||
self.model = "eleven_multilingual_v2"
|
||||
self.voice = "Brian"
|
||||
self.api_key = 'ELEVENLABS_API_KEY'# here you should put your api key
|
||||
self.model = "eleven_flash_v2_5"
|
||||
self.voice = "VOICE_ID" # this is the hash code for the voice not the name!
|
||||
self.write_audio = 1
|
||||
|
||||
def text_to_speech(self, text):
|
||||
audio_bytes = asyncio.run(self._text_to_speech_websocket(text))
|
||||
audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
|
||||
lang = "en"
|
||||
return audio_base64, lang
|
||||
asyncio.run(self._text_to_speech_websocket(text))
|
||||
|
||||
async def _text_to_speech_websocket(self, text):
|
||||
uri = f"wss://api.elevenlabs.io/v1/text-to-speech/{self.voice}/stream-input?model_id={self.model}"
|
||||
|
||||
websocket = await websockets.connect(uri)
|
||||
payload = {
|
||||
"text": text,
|
||||
"model_id": self.model,
|
||||
"text": " ",
|
||||
"voice_settings": {
|
||||
"voice_id": self.voice
|
||||
"stability": 0.5,
|
||||
"similarity_boost": 0.8,
|
||||
},
|
||||
"xi-api-key": self.api_key,
|
||||
"Accept": "audio/mpeg"
|
||||
"xi_api_key": self.api_key,
|
||||
}
|
||||
audio_data = BytesIO()
|
||||
|
||||
async with websockets.connect(uri) as websocket:
|
||||
|
||||
await websocket.send(json.dumps(payload))
|
||||
|
||||
async for message in websocket:
|
||||
if isinstance(message, bytes):
|
||||
audio_data.write(message)
|
||||
else:
|
||||
print("Received a non-binary frame:", message)
|
||||
await websocket.send(json.dumps(payload))
|
||||
|
||||
async def listen():
|
||||
while 1:
|
||||
try:
|
||||
msg = await websocket.recv()
|
||||
data = json.loads(msg)
|
||||
|
||||
return audio_data.getvalue()
|
||||
if data.get("audio"):
|
||||
print("audio received")
|
||||
yield base64.b64decode(data["audio"])
|
||||
elif data.get("isFinal"):
|
||||
break
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
print("websocket closed")
|
||||
break
|
||||
listen_task = asyncio.create_task(self.stream(listen()))
|
||||
|
||||
await websocket.send(json.dumps({"text": text}))
|
||||
# this is to signal the end of the text, either use this or flush
|
||||
await websocket.send(json.dumps({"text": ""}))
|
||||
|
||||
await listen_task
|
||||
|
||||
async def stream(self, audio_stream):
|
||||
if self.write_audio:
|
||||
audio_bytes = BytesIO()
|
||||
async for chunk in audio_stream:
|
||||
if chunk:
|
||||
audio_bytes.write(chunk)
|
||||
with open("output_audio.mp3", "wb") as f:
|
||||
f.write(audio_bytes.getvalue())
|
||||
|
||||
else:
|
||||
async for chunk in audio_stream:
|
||||
pass # depends on the streamer!
|
||||
|
||||
|
||||
def test_elevenlabs_websocket():
|
||||
@@ -54,16 +75,7 @@ def test_elevenlabs_websocket():
|
||||
tts = ElevenlabsTTS()
|
||||
|
||||
# Call the method with some sample text
|
||||
audio_base64, lang = tts.text_to_speech("Hello from ElevenLabs WebSocket!")
|
||||
|
||||
print(f"Received language: {lang}")
|
||||
print(f"Base64 Audio (truncated): {audio_base64[:100]}...")
|
||||
|
||||
# Optional: Save the audio to a local file for manual listening.
|
||||
# We'll assume the audio is in MP3 format based on "Accept": "audio/mpeg".
|
||||
audio_bytes = base64.b64decode(audio_base64)
|
||||
with open("output_audio.mp3", "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
tts.text_to_speech("Hello from ElevenLabs WebSocket!")
|
||||
|
||||
print("Saved audio to output_audio.mp3.")
|
||||
|
||||
|
||||
BIN
application/tts/output_audio.mp3
Normal file
BIN
application/tts/output_audio.mp3
Normal file
Binary file not shown.
Reference in New Issue
Block a user