mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
Merge branch 'arc53:main' into fix-safari-font
This commit is contained in:
@@ -269,9 +269,6 @@ class Stream(Resource):
|
||||
"prompt_id": fields.String(
|
||||
required=False, default="default", description="Prompt ID"
|
||||
),
|
||||
"selectedDocs": fields.String(
|
||||
required=False, description="Selected documents"
|
||||
),
|
||||
"chunks": fields.Integer(
|
||||
required=False, default=2, description="Number of chunks"
|
||||
),
|
||||
@@ -303,10 +300,9 @@ class Stream(Resource):
|
||||
history = json.loads(history)
|
||||
conversation_id = data.get("conversation_id")
|
||||
prompt_id = data.get("prompt_id", "default")
|
||||
if "selectedDocs" in data and data["selectedDocs"] is None:
|
||||
chunks = 0
|
||||
else:
|
||||
chunks = int(data.get("chunks", 2))
|
||||
|
||||
|
||||
chunks = int(data.get("chunks", 2))
|
||||
token_limit = data.get("token_limit", settings.DEFAULT_MAX_HISTORY)
|
||||
retriever_name = data.get("retriever", "classic")
|
||||
|
||||
@@ -333,7 +329,8 @@ class Stream(Resource):
|
||||
)
|
||||
|
||||
prompt = get_prompt(prompt_id)
|
||||
|
||||
if "isNoneDoc" in data and data["isNoneDoc"] is True:
|
||||
chunks = 0
|
||||
retriever = RetrieverCreator.create_retriever(
|
||||
retriever_name,
|
||||
question=question,
|
||||
|
||||
@@ -17,6 +17,7 @@ from application.core.settings import settings
|
||||
from application.extensions import api
|
||||
from application.utils import check_required_fields
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
from application.tts.google_tts import GoogleTTS
|
||||
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
db = mongo["docsgpt"]
|
||||
@@ -340,6 +341,7 @@ class UploadFile(Resource):
|
||||
".epub",
|
||||
".html",
|
||||
".mdx",
|
||||
".json",
|
||||
".xlsx",
|
||||
],
|
||||
job_name,
|
||||
@@ -1662,3 +1664,27 @@ class ManageSync(Resource):
|
||||
return make_response(jsonify({"success": False, "error": str(err)}), 400)
|
||||
|
||||
return make_response(jsonify({"success": True}), 200)
|
||||
|
||||
|
||||
@user_ns.route("/api/tts")
|
||||
class TextToSpeech(Resource):
|
||||
tts_model = api.model(
|
||||
"TextToSpeechModel",
|
||||
{
|
||||
"text": fields.String(required=True, description="Text to be synthesized as audio"),
|
||||
},
|
||||
)
|
||||
|
||||
@api.expect(tts_model)
|
||||
@api.doc(description="Synthesize audio speech from text")
|
||||
def post(self):
|
||||
data = request.get_json()
|
||||
text = data["text"]
|
||||
try:
|
||||
tts_instance = GoogleTTS(text)
|
||||
audio_base64, detected_language = tts_instance.text_to_speech()
|
||||
return make_response(jsonify({"success": True,'audio_base64': audio_base64,'lang':detected_language}), 200)
|
||||
except Exception as err:
|
||||
return make_response(jsonify({"success": False, "error": str(err)}), 400)
|
||||
|
||||
|
||||
|
||||
@@ -11,6 +11,7 @@ from application.parser.file.html_parser import HTMLParser
|
||||
from application.parser.file.markdown_parser import MarkdownParser
|
||||
from application.parser.file.rst_parser import RstParser
|
||||
from application.parser.file.tabular_parser import PandasCSVParser,ExcelParser
|
||||
from application.parser.file.json_parser import JSONParser
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
@@ -23,6 +24,7 @@ DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".rst": RstParser(),
|
||||
".html": HTMLParser(),
|
||||
".mdx": MarkdownParser(),
|
||||
".json":JSONParser(),
|
||||
}
|
||||
|
||||
|
||||
|
||||
57
application/parser/file/json_parser.py
Normal file
57
application/parser/file/json_parser.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import json
|
||||
from typing import Any, Dict, List, Union
|
||||
from pathlib import Path
|
||||
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
class JSONParser(BaseParser):
|
||||
r"""JSON (.json) parser.
|
||||
|
||||
Parses JSON files into a list of strings or a concatenated document.
|
||||
It handles both JSON objects (dictionaries) and arrays (lists).
|
||||
|
||||
Args:
|
||||
concat_rows (bool): Whether to concatenate all rows into one document.
|
||||
If set to False, a Document will be created for each item in the JSON.
|
||||
True by default.
|
||||
|
||||
row_joiner (str): Separator to use for joining each row.
|
||||
Only used when `concat_rows=True`.
|
||||
Set to "\n" by default.
|
||||
|
||||
json_config (dict): Options for parsing JSON. Can be used to specify options like
|
||||
custom decoding or formatting. Set to empty dict by default.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
row_joiner: str = "\n",
|
||||
json_config: dict = {},
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
self._concat_rows = concat_rows
|
||||
self._row_joiner = row_joiner
|
||||
self._json_config = json_config
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
|
||||
"""Parse JSON file."""
|
||||
|
||||
with open(file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f, **self._json_config)
|
||||
|
||||
if isinstance(data, dict):
|
||||
data = [data]
|
||||
|
||||
if self._concat_rows:
|
||||
return self._row_joiner.join([str(item) for item in data])
|
||||
else:
|
||||
return data
|
||||
@@ -85,3 +85,4 @@ vine==5.1.0
|
||||
wcwidth==0.2.13
|
||||
werkzeug==3.0.4
|
||||
yarl==1.11.1
|
||||
gTTS==2.3.2
|
||||
10
application/tts/base.py
Normal file
10
application/tts/base.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BaseTTS(ABC):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def text_to_speech(self, *args, **kwargs):
|
||||
pass
|
||||
19
application/tts/google_tts.py
Normal file
19
application/tts/google_tts.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import io
|
||||
import base64
|
||||
from gtts import gTTS
|
||||
from application.tts.base import BaseTTS
|
||||
|
||||
|
||||
class GoogleTTS(BaseTTS):
|
||||
def __init__(self, text):
|
||||
self.text = text
|
||||
|
||||
|
||||
def text_to_speech(self):
|
||||
lang = "en"
|
||||
audio_fp = io.BytesIO()
|
||||
tts = gTTS(text=self.text, lang=lang, slow=False)
|
||||
tts.write_to_fp(audio_fp)
|
||||
audio_fp.seek(0)
|
||||
audio_base64 = base64.b64encode(audio_fp.read()).decode("utf-8")
|
||||
return audio_base64, lang
|
||||
@@ -68,10 +68,8 @@ body.dark {
|
||||
.table-default td:last-child {
|
||||
@apply border-r-0; /* Ensure no right border on the last column */
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */
|
||||
|
||||
/* Document
|
||||
|
||||
@@ -86,7 +86,7 @@
|
||||
"start": "Start Chatting",
|
||||
"name": "Name",
|
||||
"choose": "Choose Files",
|
||||
"info": "Please upload .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .zip limited to 25mb",
|
||||
"info": "Please upload .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .zip limited to 25mb",
|
||||
"uploadedFiles": "Uploaded Files",
|
||||
"cancel": "Cancel",
|
||||
"train": "Train",
|
||||
|
||||
@@ -86,7 +86,7 @@
|
||||
"start": "Empezar a chatear",
|
||||
"name": "Nombre",
|
||||
"choose": "Seleccionar Archivos",
|
||||
"info": "Por favor, suba archivos .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .zip limitados a 25 MB",
|
||||
"info": "Por favor, suba archivos .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .zip limitados a 25 MB",
|
||||
"uploadedFiles": "Archivos Subidos",
|
||||
"cancel": "Cancelar",
|
||||
"train": "Entrenar",
|
||||
|
||||
@@ -86,7 +86,7 @@
|
||||
"start": "チャットを開始する",
|
||||
"name": "名前",
|
||||
"choose": "ファイルを選択",
|
||||
"info": ".pdf, .txt, .rst, .docx, .md, .zipファイルを25MBまでアップロードしてください",
|
||||
"info": ".pdf, .txt, .rst, .docx, .md, .json, .zipファイルを25MBまでアップロードしてください",
|
||||
"uploadedFiles": "アップロードされたファイル",
|
||||
"cancel": "キャンセル",
|
||||
"train": "トレーニング",
|
||||
|
||||
@@ -80,7 +80,7 @@
|
||||
"remote": "遠端",
|
||||
"name": "名稱",
|
||||
"choose": "選擇檔案",
|
||||
"info": "請上傳 .pdf, .txt, .rst, .docx, .md, .zip 檔案,大小限制為 25MB",
|
||||
"info": "請上傳 .pdf, .txt, .rst, .docx, .md, .json, .zip 檔案,大小限制為 25MB",
|
||||
"uploadedFiles": "已上傳的檔案",
|
||||
"cancel": "取消",
|
||||
"train": "訓練",
|
||||
|
||||
@@ -86,7 +86,7 @@
|
||||
"start": "开始聊天",
|
||||
"name": "名称",
|
||||
"choose": "选择文件",
|
||||
"info": "请上传 .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .zip 文件,限 25MB",
|
||||
"info": "请上传 .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .zip 文件,限 25MB",
|
||||
"uploadedFiles": "已上传文件",
|
||||
"cancel": "取消",
|
||||
"train": "训练",
|
||||
|
||||
@@ -314,6 +314,7 @@ function Upload({
|
||||
'application/zip': ['.zip'],
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
|
||||
['.docx'],
|
||||
'application/json': ['.json'],
|
||||
'text/csv': ['.csv'],
|
||||
'text/html': ['.html'],
|
||||
'application/epub+zip': ['.epub'],
|
||||
|
||||
Reference in New Issue
Block a user