add configurable provider in settings.py and update ElevenLabs Api (#2065) (#2074)

This commit is contained in:
Nihar
2025-10-22 21:37:21 +05:30
committed by GitHub
parent c4e8daf50e
commit f448e4a615
7 changed files with 112 additions and 12 deletions

View File

@@ -10,7 +10,7 @@ from application.api import api
from application.api.user.base import agents_collection, storage
from application.api.user.tasks import store_attachment
from application.core.settings import settings
from application.tts.google_tts import GoogleTTS
from application.tts.tts_creator import TTSCreator
from application.utils import safe_filename
@@ -133,7 +133,7 @@ class TextToSpeech(Resource):
data = request.get_json()
text = data["text"]
try:
tts_instance = GoogleTTS()
tts_instance = TTSCreator.create_tts(settings.TTS_PROVIDER)
audio_base64, detected_language = tts_instance.text_to_speech(text)
return make_response(
jsonify(

View File

@@ -130,6 +130,7 @@ class Settings(BaseSettings):
# Encryption settings
ENCRYPTION_SECRET_KEY: str = "default-docsgpt-encryption-key"
TTS_PROVIDER: str = "google_tts" # google_tts or elevenlabs
ELEVENLABS_API_KEY: Optional[str] = None
path = Path(__file__).parent.parent.absolute()

View File

@@ -10,6 +10,7 @@ ebooklib==0.18
escodegen==1.0.11
esprima==4.0.1
esutils==1.0.1
elevenlabs==2.17.0
Flask==3.1.1
faiss-cpu==1.9.0.post1
fastmcp==2.11.0

View File

@@ -15,10 +15,11 @@ class ElevenlabsTTS(BaseTTS):
def text_to_speech(self, text):
lang = "en"
audio = self.client.generate(
audio = self.client.text_to_speech.convert(
voice_id="nPczCjzI2devNBz1zQrb",
model_id="eleven_multilingual_v2",
text=text,
model="eleven_multilingual_v2",
voice="Brian",
output_format="mp3_44100_128"
)
audio_data = BytesIO()
for chunk in audio:

View File

@@ -0,0 +1,18 @@
from application.tts.google_tts import GoogleTTS
from application.tts.elevenlabs import ElevenlabsTTS
from application.tts.base import BaseTTS
class TTSCreator:
tts_providers = {
"google_tts": GoogleTTS,
"elevenlabs": ElevenlabsTTS,
}
@classmethod
def create_tts(cls, tts_type, *args, **kwargs)-> BaseTTS:
tts_class = cls.tts_providers.get(tts_type.lower())
if not tts_class:
raise ValueError(f"No tts class found for type {tts_type}")
return tts_class(*args, **kwargs)