Merge branch 'arc53:main' into fix-safari-font

2026-05-21 21:05:05 +00:00 · 2024-10-29 21:48:01 +05:30
parent 8564c2ba72 d9787e849e
commit b99b3b844a
14 changed files with 126 additions and 15 deletions
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -269,9 +269,6 @@ class Stream(Resource):
            "prompt_id": fields.String(
                required=False, default="default", description="Prompt ID"
            ),
-            "selectedDocs": fields.String(
-                required=False, description="Selected documents"
-            ),
            "chunks": fields.Integer(
                required=False, default=2, description="Number of chunks"
            ),
@@ -303,10 +300,9 @@ class Stream(Resource):
            history = json.loads(history)
            conversation_id = data.get("conversation_id")
            prompt_id = data.get("prompt_id", "default")
-            if "selectedDocs" in data and data["selectedDocs"] is None:
-                chunks = 0
-            else:
-                chunks = int(data.get("chunks", 2))
+            
+
+            chunks = int(data.get("chunks", 2))
            token_limit = data.get("token_limit", settings.DEFAULT_MAX_HISTORY)
            retriever_name = data.get("retriever", "classic")

@@ -333,7 +329,8 @@ class Stream(Resource):
            )

            prompt = get_prompt(prompt_id)
-
+            if "isNoneDoc" in data and data["isNoneDoc"] is True:
+                chunks = 0
            retriever = RetrieverCreator.create_retriever(
                retriever_name,
                question=question,
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -17,6 +17,7 @@ from application.core.settings import settings
 from application.extensions import api
 from application.utils import check_required_fields
 from application.vectorstore.vector_creator import VectorCreator
+from application.tts.google_tts import GoogleTTS

 mongo = MongoClient(settings.MONGO_URI)
 db = mongo["docsgpt"]
@@ -340,6 +341,7 @@ class UploadFile(Resource):
                        ".epub",
                        ".html",
                        ".mdx",
+                        ".json",
                        ".xlsx",
                    ],
                    job_name,
@@ -1662,3 +1664,27 @@ class ManageSync(Resource):
            return make_response(jsonify({"success": False, "error": str(err)}), 400)

        return make_response(jsonify({"success": True}), 200)
+
+
+@user_ns.route("/api/tts")
+class TextToSpeech(Resource):
+    tts_model = api.model(
+        "TextToSpeechModel",
+        {
+            "text": fields.String(required=True, description="Text to be synthesized as audio"),
+        },
+    )
+
+    @api.expect(tts_model)
+    @api.doc(description="Synthesize audio speech from text")
+    def post(self):
+        data = request.get_json()
+        text = data["text"]
+        try:
+            tts_instance = GoogleTTS(text)
+            audio_base64, detected_language = tts_instance.text_to_speech()
+            return make_response(jsonify({"success": True,'audio_base64': audio_base64,'lang':detected_language}), 200)
+        except Exception as err:
+            return make_response(jsonify({"success": False, "error": str(err)}), 400)
+
+
--- a/application/parser/file/bulk.py
+++ b/application/parser/file/bulk.py
@@ -11,6 +11,7 @@ from application.parser.file.html_parser import HTMLParser
 from application.parser.file.markdown_parser import MarkdownParser
 from application.parser.file.rst_parser import RstParser
 from application.parser.file.tabular_parser import PandasCSVParser,ExcelParser
+from application.parser.file.json_parser import JSONParser
 from application.parser.schema.base import Document

 DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
@@ -23,6 +24,7 @@ DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
    ".rst": RstParser(),
    ".html": HTMLParser(),
    ".mdx": MarkdownParser(),
+    ".json":JSONParser(),
 }


--- a/application/parser/file/json_parser.py
+++ b/application/parser/file/json_parser.py
@@ -0,0 +1,57 @@
+import json
+from typing import Any, Dict, List, Union
+from pathlib import Path
+
+from application.parser.file.base_parser import BaseParser
+
+class JSONParser(BaseParser):
+    r"""JSON (.json) parser.
+
+    Parses JSON files into a list of strings or a concatenated document.
+    It handles both JSON objects (dictionaries) and arrays (lists).
+
+    Args:
+        concat_rows (bool): Whether to concatenate all rows into one document.
+            If set to False, a Document will be created for each item in the JSON.
+            True by default.
+
+        row_joiner (str): Separator to use for joining each row.
+            Only used when `concat_rows=True`.
+            Set to "\n" by default.
+
+        json_config (dict): Options for parsing JSON. Can be used to specify options like
+        custom decoding or formatting. Set to empty dict by default.
+
+    """
+
+    def __init__(
+            self,
+            *args: Any,
+            concat_rows: bool = True,
+            row_joiner: str = "\n",
+            json_config: dict = {},
+            **kwargs: Any
+    ) -> None:
+        """Init params."""
+        super().__init__(*args, **kwargs)
+        self._concat_rows = concat_rows
+        self._row_joiner = row_joiner
+        self._json_config = json_config
+
+    def _init_parser(self) -> Dict:
+        """Init parser."""
+        return {}
+
+    def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
+        """Parse JSON file."""
+        
+        with open(file, 'r', encoding='utf-8') as f:
+                data = json.load(f, **self._json_config)
+
+        if isinstance(data, dict):
+            data = [data]
+
+        if self._concat_rows:
+            return self._row_joiner.join([str(item) for item in data])
+        else:
+            return data
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -85,3 +85,4 @@ vine==5.1.0
 wcwidth==0.2.13
 werkzeug==3.0.4
 yarl==1.11.1
+gTTS==2.3.2
--- a/application/tts/base.py
+++ b/application/tts/base.py
@@ -0,0 +1,10 @@
+from abc import ABC, abstractmethod
+
+
+class BaseTTS(ABC):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def text_to_speech(self, *args, **kwargs):
+        pass
--- a/application/tts/google_tts.py
+++ b/application/tts/google_tts.py
@@ -0,0 +1,19 @@
+import io
+import base64
+from gtts import gTTS
+from application.tts.base import BaseTTS
+
+
+class GoogleTTS(BaseTTS):
+    def __init__(self, text):
+        self.text = text
+    
+
+    def text_to_speech(self):
+        lang = "en"
+        audio_fp = io.BytesIO()
+        tts = gTTS(text=self.text, lang=lang, slow=False)
+        tts.write_to_fp(audio_fp)
+        audio_fp.seek(0)
+        audio_base64 = base64.b64encode(audio_fp.read()).decode("utf-8")
+        return audio_base64, lang
--- a/frontend/src/index.css
+++ b/frontend/src/index.css
@@ -68,10 +68,8 @@ body.dark {
  .table-default td:last-child {
    @apply border-r-0; /* Ensure no right border on the last column */
  }
-
 }

-
 /*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */

 /* Document
--- a/frontend/src/locale/en.json
+++ b/frontend/src/locale/en.json
@@ -86,7 +86,7 @@
      "start": "Start Chatting",
      "name": "Name",
      "choose": "Choose Files",
-      "info": "Please upload .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .zip limited to 25mb",
+      "info": "Please upload .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .zip limited to 25mb",
      "uploadedFiles": "Uploaded Files",
      "cancel": "Cancel",
      "train": "Train",
--- a/frontend/src/locale/es.json
+++ b/frontend/src/locale/es.json
@@ -86,7 +86,7 @@
      "start": "Empezar a chatear",
      "name": "Nombre",
      "choose": "Seleccionar Archivos",
-      "info": "Por favor, suba archivos .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .zip limitados a 25 MB",
+      "info": "Por favor, suba archivos .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .zip limitados a 25 MB",
      "uploadedFiles": "Archivos Subidos",
      "cancel": "Cancelar",
      "train": "Entrenar",
--- a/frontend/src/locale/jp.json
+++ b/frontend/src/locale/jp.json
@@ -86,7 +86,7 @@
      "start": "チャットを開始する",
      "name": "名前",
      "choose": "ファイルを選択",
-      "info": ".pdf, .txt, .rst, .docx, .md, .zipファイルを25MBまでアップロードしてください",
+      "info": ".pdf, .txt, .rst, .docx, .md, .json, .zipファイルを25MBまでアップロードしてください",
      "uploadedFiles": "アップロードされたファイル",
      "cancel": "キャンセル",
      "train": "トレーニング",
--- a/frontend/src/locale/zh-TW.json
+++ b/frontend/src/locale/zh-TW.json
@@ -80,7 +80,7 @@
      "remote": "遠端",
      "name": "名稱",
      "choose": "選擇檔案",
-      "info": "請上傳 .pdf, .txt, .rst, .docx, .md, .zip 檔案，大小限制為 25MB",
+      "info": "請上傳 .pdf, .txt, .rst, .docx, .md, .json, .zip 檔案，大小限制為 25MB",
      "uploadedFiles": "已上傳的檔案",
      "cancel": "取消",
      "train": "訓練",
--- a/frontend/src/locale/zh.json
+++ b/frontend/src/locale/zh.json
@@ -86,7 +86,7 @@
      "start": "开始聊天",
      "name": "名称",
      "choose": "选择文件",
-      "info": "请上传 .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .zip 文件，限 25MB",
+      "info": "请上传 .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .zip 文件，限 25MB",
      "uploadedFiles": "已上传文件",
      "cancel": "取消",
      "train": "训练",
--- a/frontend/src/upload/Upload.tsx
+++ b/frontend/src/upload/Upload.tsx
@@ -314,6 +314,7 @@ function Upload({
      'application/zip': ['.zip'],
      'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
        ['.docx'],
+      'application/json': ['.json'],
      'text/csv': ['.csv'],
      'text/html': ['.html'],
      'application/epub+zip': ['.epub'],