Merge branch 'arc53:main' into feature-TTS

This commit is contained in:
Srayash
2024-10-29 23:01:12 +05:30
committed by GitHub
5 changed files with 60 additions and 8 deletions

View File

@@ -269,9 +269,6 @@ class Stream(Resource):
"prompt_id": fields.String(
required=False, default="default", description="Prompt ID"
),
"selectedDocs": fields.String(
required=False, description="Selected documents"
),
"chunks": fields.Integer(
required=False, default=2, description="Number of chunks"
),
@@ -303,10 +300,9 @@ class Stream(Resource):
history = json.loads(history)
conversation_id = data.get("conversation_id")
prompt_id = data.get("prompt_id", "default")
if "selectedDocs" in data and data["selectedDocs"] is None:
chunks = 0
else:
chunks = int(data.get("chunks", 2))
chunks = int(data.get("chunks", 2))
token_limit = data.get("token_limit", settings.DEFAULT_MAX_HISTORY)
retriever_name = data.get("retriever", "classic")
@@ -333,7 +329,8 @@ class Stream(Resource):
)
prompt = get_prompt(prompt_id)
if "isNoneDoc" in data and data["isNoneDoc"] is True:
chunks = 0
retriever = RetrieverCreator.create_retriever(
retriever_name,
question=question,

View File

@@ -17,6 +17,7 @@ from application.core.settings import settings
from application.extensions import api
from application.utils import check_required_fields
from application.vectorstore.vector_creator import VectorCreator
from application.tts.google_tts import GoogleTTS
mongo = MongoClient(settings.MONGO_URI)
db = mongo["docsgpt"]
@@ -1663,3 +1664,27 @@ class ManageSync(Resource):
return make_response(jsonify({"success": False, "error": str(err)}), 400)
return make_response(jsonify({"success": True}), 200)
@user_ns.route("/api/tts")
class TextToSpeech(Resource):
tts_model = api.model(
"TextToSpeechModel",
{
"text": fields.String(required=True, description="Text to be synthesized as audio"),
},
)
@api.expect(tts_model)
@api.doc(description="Synthesize audio speech from text")
def post(self):
data = request.get_json()
text = data["text"]
try:
tts_instance = GoogleTTS(text)
audio_base64, detected_language = tts_instance.text_to_speech()
return make_response(jsonify({"success": True,'audio_base64': audio_base64,'lang':detected_language}), 200)
except Exception as err:
return make_response(jsonify({"success": False, "error": str(err)}), 400)

View File

@@ -85,3 +85,4 @@ vine==5.1.0
wcwidth==0.2.13
werkzeug==3.0.4
yarl==1.11.1
gTTS==2.3.2

10
application/tts/base.py Normal file
View File

@@ -0,0 +1,10 @@
from abc import ABC, abstractmethod
class BaseTTS(ABC):
def __init__(self):
pass
@abstractmethod
def text_to_speech(self, *args, **kwargs):
pass

View File

@@ -0,0 +1,19 @@
import io
import base64
from gtts import gTTS
from application.tts.base import BaseTTS
class GoogleTTS(BaseTTS):
def __init__(self, text):
self.text = text
def text_to_speech(self):
lang = "en"
audio_fp = io.BytesIO()
tts = gTTS(text=self.text, lang=lang, slow=False)
tts.write_to_fp(audio_fp)
audio_fp.seek(0)
audio_base64 = base64.b64encode(audio_fp.read()).decode("utf-8")
return audio_base64, lang