add configurable provider in settings.py and update ElevenLabs Api (#2065) (#2074)

2025-11-29 08:33:20 +00:00 · 2025-10-22 21:37:21 +05:30
parent c4e8daf50e
commit f448e4a615
7 changed files with 112 additions and 12 deletions
--- a/application/api/user/attachments/routes.py
+++ b/application/api/user/attachments/routes.py
@@ -10,7 +10,7 @@ from application.api import api
 from application.api.user.base import agents_collection, storage
 from application.api.user.tasks import store_attachment
 from application.core.settings import settings
-from application.tts.google_tts import GoogleTTS
+from application.tts.tts_creator import TTSCreator
 from application.utils import safe_filename


@@ -133,7 +133,7 @@ class TextToSpeech(Resource):
        data = request.get_json()
        text = data["text"]
        try:
-            tts_instance = GoogleTTS()
+            tts_instance = TTSCreator.create_tts(settings.TTS_PROVIDER)
            audio_base64, detected_language = tts_instance.text_to_speech(text)
            return make_response(
                jsonify(
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -130,6 +130,7 @@ class Settings(BaseSettings):
    # Encryption settings
    ENCRYPTION_SECRET_KEY: str = "default-docsgpt-encryption-key"

+    TTS_PROVIDER: str = "google_tts" # google_tts or elevenlabs
    ELEVENLABS_API_KEY: Optional[str] = None

 path = Path(__file__).parent.parent.absolute()
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -10,6 +10,7 @@ ebooklib==0.18
 escodegen==1.0.11
 esprima==4.0.1
 esutils==1.0.1
+elevenlabs==2.17.0
 Flask==3.1.1
 faiss-cpu==1.9.0.post1
 fastmcp==2.11.0
--- a/application/tts/elevenlabs.py
+++ b/application/tts/elevenlabs.py
@@ -15,10 +15,11 @@ class ElevenlabsTTS(BaseTTS):

    def text_to_speech(self, text):
        lang = "en"
-        audio = self.client.generate(
+        audio = self.client.text_to_speech.convert(
+            voice_id="nPczCjzI2devNBz1zQrb",             
+            model_id="eleven_multilingual_v2",
            text=text,
-            model="eleven_multilingual_v2",
-            voice="Brian",
+            output_format="mp3_44100_128"
        )
        audio_data = BytesIO()
        for chunk in audio:
--- a/application/tts/tts_creator.py
+++ b/application/tts/tts_creator.py
@@ -0,0 +1,18 @@
+from application.tts.google_tts import GoogleTTS
+from application.tts.elevenlabs import ElevenlabsTTS
+from application.tts.base import BaseTTS
+
+
+
+class TTSCreator:
+    tts_providers = {
+        "google_tts": GoogleTTS,
+        "elevenlabs": ElevenlabsTTS,
+    }
+
+    @classmethod
+    def create_tts(cls, tts_type, *args, **kwargs)-> BaseTTS:
+        tts_class = cls.tts_providers.get(tts_type.lower())
+        if not tts_class:
+            raise ValueError(f"No tts class found for type {tts_type}")
+        return tts_class(*args, **kwargs)