Merge pull request #2000 from ManishMadan2882/main

Test coverage: TTS, Security and Storage layers
2026-01-20 14:00:55 +00:00 · 2025-10-02 00:11:22 +01:00
parent 4d34dc4234 80aaecb5f0
commit c2ccf2c72c
7 changed files with 918 additions and 76 deletions
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -118,6 +118,7 @@ class Settings(BaseSettings):
    # Encryption settings
    ENCRYPTION_SECRET_KEY: str = "default-docsgpt-encryption-key"

+    ELEVENLABS_API_KEY: Optional[str] = None

 path = Path(__file__).parent.parent.absolute()
 settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
--- a/application/tts/elevenlabs.py
+++ b/application/tts/elevenlabs.py
@@ -1,84 +1,30 @@
-import asyncio
-import websockets
-import json
-import base64
 from io import BytesIO
+import base64
 from application.tts.base import BaseTTS
+from application.core.settings import settings


 class ElevenlabsTTS(BaseTTS):
-    def __init__(self):        
-        self.api_key = 'ELEVENLABS_API_KEY'# here you should put your api key
-        self.model = "eleven_flash_v2_5"
-        self.voice = "VOICE_ID" # this is the hash code for the voice not the name!
-        self.write_audio = 1
+    def __init__(self):
+        from elevenlabs.client import ElevenLabs
+
+        self.client = ElevenLabs(
+            api_key=settings.ELEVENLABS_API_KEY,
+            )
+    

    def text_to_speech(self, text):
-        asyncio.run(self._text_to_speech_websocket(text))
+        lang = "en"
+        audio = self.client.generate(
+            text=text,
+            model="eleven_multilingual_v2",
+            voice="Brian",
+        )
+        audio_data = BytesIO()
+        for chunk in audio:
+            audio_data.write(chunk)
+        audio_bytes = audio_data.getvalue()

-    async def _text_to_speech_websocket(self, text):
-        uri = f"wss://api.elevenlabs.io/v1/text-to-speech/{self.voice}/stream-input?model_id={self.model}"
-        websocket = await websockets.connect(uri)
-        payload = {
-            "text": " ",
-            "voice_settings": {
-                "stability": 0.5,
-                "similarity_boost": 0.8,
-            },
-            "xi_api_key": self.api_key,
-        }
-
-        await websocket.send(json.dumps(payload))
-        
-        async def listen():
-            while 1:
-                try:
-                    msg = await websocket.recv()
-                    data = json.loads(msg)
-
-                    if data.get("audio"):
-                        print("audio received")
-                        yield base64.b64decode(data["audio"])
-                    elif data.get("isFinal"):
-                        break
-                except websockets.exceptions.ConnectionClosed:
-                    print("websocket closed")
-                    break
-        listen_task =  asyncio.create_task(self.stream(listen()))
-        
-        await websocket.send(json.dumps({"text": text}))
-        # this is to signal the end of the text, either use this or flush
-        await websocket.send(json.dumps({"text": ""})) 
-
-        await listen_task
-    
-    async def stream(self, audio_stream):
-        if self.write_audio:
-            audio_bytes = BytesIO()
-            async for chunk in audio_stream:
-                if chunk:
-                    audio_bytes.write(chunk)
-            with open("output_audio.mp3", "wb") as f:
-                f.write(audio_bytes.getvalue())
-        
-        else:
-            async for chunk in audio_stream:
-                pass # depends on the streamer!
-
-
-def test_elevenlabs_websocket():
-    """
-    Tests the ElevenlabsTTS text_to_speech method with a sample prompt.
-    Prints out the base64-encoded result and writes it to 'output_audio.mp3'.
-    """
-    # Instantiate your TTS class
-    tts = ElevenlabsTTS()
-
-    # Call the method with some sample text
-    tts.text_to_speech("Hello from ElevenLabs WebSocket!")
-
-    print("Saved audio to output_audio.mp3.")
-
-
-if __name__ == "__main__":
-    test_elevenlabs_websocket()
+        # Encode to base64
+        audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
+        return audio_base64, lang