From 83f2fb1e62c215828b2ecd95498e886900e83571 Mon Sep 17 00:00:00 2001 From: kom-senapati Date: Fri, 18 Oct 2024 09:18:15 +0530 Subject: [PATCH 01/15] refactor: Use MongoDB singleton for connection management --- application/api/answer/routes.py | 4 ++-- application/api/internal/routes.py | 4 ++-- application/api/user/routes.py | 4 ++-- application/core/mongo_db.py | 25 +++++++++++++++++++++++++ application/usage.py | 4 ++-- application/worker.py | 4 ++-- 6 files changed, 35 insertions(+), 10 deletions(-) create mode 100644 application/core/mongo_db.py diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py index 17eb5cc3..40c4d8cb 100644 --- a/application/api/answer/routes.py +++ b/application/api/answer/routes.py @@ -11,7 +11,7 @@ from bson.objectid import ObjectId from flask import Blueprint, current_app, make_response, request, Response from flask_restx import fields, Namespace, Resource -from pymongo import MongoClient +from core.mongo_db import MongoDB from application.core.settings import settings from application.error import bad_request @@ -22,7 +22,7 @@ from application.utils import check_required_fields logger = logging.getLogger(__name__) -mongo = MongoClient(settings.MONGO_URI) +mongo = MongoDB.get_client() db = mongo["docsgpt"] conversations_collection = db["conversations"] sources_collection = db["sources"] diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py index 6ecb4346..f004cf97 100755 --- a/application/api/internal/routes.py +++ b/application/api/internal/routes.py @@ -1,13 +1,13 @@ import os import datetime from flask import Blueprint, request, send_from_directory -from pymongo import MongoClient +from core.mongo_db import MongoDB from werkzeug.utils import secure_filename from bson.objectid import ObjectId from application.core.settings import settings -mongo = MongoClient(settings.MONGO_URI) +mongo = MongoDB.get_client() db = mongo["docsgpt"] conversations_collection = db["conversations"] sources_collection = db["sources"] diff --git a/application/api/user/routes.py b/application/api/user/routes.py index feee91cc..3469c800 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -8,7 +8,7 @@ from bson.dbref import DBRef from bson.objectid import ObjectId from flask import Blueprint, jsonify, make_response, request from flask_restx import inputs, fields, Namespace, Resource -from pymongo import MongoClient +from core.mongo_db import MongoDB from werkzeug.utils import secure_filename from application.api.user.tasks import ingest, ingest_remote @@ -18,7 +18,7 @@ from application.extensions import api from application.utils import check_required_fields from application.vectorstore.vector_creator import VectorCreator -mongo = MongoClient(settings.MONGO_URI) +mongo = MongoDB.get_client() db = mongo["docsgpt"] conversations_collection = db["conversations"] sources_collection = db["sources"] diff --git a/application/core/mongo_db.py b/application/core/mongo_db.py new file mode 100644 index 00000000..ffb55d7f --- /dev/null +++ b/application/core/mongo_db.py @@ -0,0 +1,25 @@ +from application.core import settings +from pymongo import MongoClient +from flask import current_app, g + + +class MongoDB: + _client = None + + @classmethod + def get_client(cls): + """ + Get the MongoDB client instance, creating it if necessary. + """ + if cls._client is None: + cls._client = MongoClient(settings.MONGO_URI) + return cls._client + + @classmethod + def close_client(cls): + """ + Close the MongoDB client connection. + """ + if cls._client is not None: + cls._client.close() + cls._client = None diff --git a/application/usage.py b/application/usage.py index aba0ec77..21797817 100644 --- a/application/usage.py +++ b/application/usage.py @@ -1,10 +1,10 @@ import sys -from pymongo import MongoClient +from core.mongo_db import MongoDB from datetime import datetime from application.core.settings import settings from application.utils import num_tokens_from_string -mongo = MongoClient(settings.MONGO_URI) +mongo = MongoDB.get_client() db = mongo["docsgpt"] usage_collection = db["token_usage"] diff --git a/application/worker.py b/application/worker.py index f8f38afa..fc780d61 100755 --- a/application/worker.py +++ b/application/worker.py @@ -8,7 +8,7 @@ from urllib.parse import urljoin import requests from bson.objectid import ObjectId -from pymongo import MongoClient +from core.mongo_db import MongoDB from application.core.settings import settings from application.parser.file.bulk import SimpleDirectoryReader @@ -18,7 +18,7 @@ from application.parser.schema.base import Document from application.parser.token_func import group_split from application.utils import count_tokens_docs -mongo = MongoClient(settings.MONGO_URI) +mongo = MongoDB.get_client() db = mongo["docsgpt"] sources_collection = db["sources"] From 5ba917c5e42c29e286c74d0443d88454ff6b7f46 Mon Sep 17 00:00:00 2001 From: kom-senapati Date: Fri, 18 Oct 2024 09:22:27 +0530 Subject: [PATCH 02/15] chore: remove unused imports --- application/core/mongo_db.py | 1 - application/usage.py | 1 - 2 files changed, 2 deletions(-) diff --git a/application/core/mongo_db.py b/application/core/mongo_db.py index ffb55d7f..fdb728a2 100644 --- a/application/core/mongo_db.py +++ b/application/core/mongo_db.py @@ -1,6 +1,5 @@ from application.core import settings from pymongo import MongoClient -from flask import current_app, g class MongoDB: diff --git a/application/usage.py b/application/usage.py index 21797817..31feb5f1 100644 --- a/application/usage.py +++ b/application/usage.py @@ -1,7 +1,6 @@ import sys from core.mongo_db import MongoDB from datetime import datetime -from application.core.settings import settings from application.utils import num_tokens_from_string mongo = MongoDB.get_client() From cbea17b4d509243851bac2e726e1ccf2bdb7d5a1 Mon Sep 17 00:00:00 2001 From: Christine Date: Mon, 21 Oct 2024 20:52:44 +0000 Subject: [PATCH 03/15] feat: add edit buttons Signed-off-by: Christine --- docs/theme.config.jsx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/theme.config.jsx b/docs/theme.config.jsx index 2b868db7..777a0ed5 100644 --- a/docs/theme.config.jsx +++ b/docs/theme.config.jsx @@ -51,6 +51,9 @@ const config = { footer: { text: `MIT ${new Date().getFullYear()} © DocsGPT`, }, + editLink: { + content: 'Edit this page on GitHub', + }, logo() { return (
From e4a1730a5b9959326ef1e544dc5eb7f0215336cf Mon Sep 17 00:00:00 2001 From: kom-senapati Date: Tue, 22 Oct 2024 17:58:41 +0530 Subject: [PATCH 04/15] fix: MongoDB imports --- application/api/answer/routes.py | 2 +- application/api/internal/routes.py | 2 +- application/api/user/routes.py | 2 +- application/usage.py | 2 +- application/worker.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py index 40c4d8cb..ad434157 100644 --- a/application/api/answer/routes.py +++ b/application/api/answer/routes.py @@ -11,8 +11,8 @@ from bson.objectid import ObjectId from flask import Blueprint, current_app, make_response, request, Response from flask_restx import fields, Namespace, Resource -from core.mongo_db import MongoDB +from application.core.mongo_db import MongoDB from application.core.settings import settings from application.error import bad_request from application.extensions import api diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py index f004cf97..c8e32d11 100755 --- a/application/api/internal/routes.py +++ b/application/api/internal/routes.py @@ -1,10 +1,10 @@ import os import datetime from flask import Blueprint, request, send_from_directory -from core.mongo_db import MongoDB from werkzeug.utils import secure_filename from bson.objectid import ObjectId +from application.core.mongo_db import MongoDB from application.core.settings import settings mongo = MongoDB.get_client() diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 3469c800..ca4a051b 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -8,11 +8,11 @@ from bson.dbref import DBRef from bson.objectid import ObjectId from flask import Blueprint, jsonify, make_response, request from flask_restx import inputs, fields, Namespace, Resource -from core.mongo_db import MongoDB from werkzeug.utils import secure_filename from application.api.user.tasks import ingest, ingest_remote +from application.core.mongo_db import MongoDB from application.core.settings import settings from application.extensions import api from application.utils import check_required_fields diff --git a/application/usage.py b/application/usage.py index 31feb5f1..e87ebe38 100644 --- a/application/usage.py +++ b/application/usage.py @@ -1,6 +1,6 @@ import sys -from core.mongo_db import MongoDB from datetime import datetime +from application.core.mongo_db import MongoDB from application.utils import num_tokens_from_string mongo = MongoDB.get_client() diff --git a/application/worker.py b/application/worker.py index fc780d61..33cd90e5 100755 --- a/application/worker.py +++ b/application/worker.py @@ -8,8 +8,8 @@ from urllib.parse import urljoin import requests from bson.objectid import ObjectId -from core.mongo_db import MongoDB +from application.core.mongo_db import MongoDB from application.core.settings import settings from application.parser.file.bulk import SimpleDirectoryReader from application.parser.open_ai_func import call_openai_api From 3b76b3ddceadcdc8cc7742d9624017ad43896924 Mon Sep 17 00:00:00 2001 From: kom-senapati Date: Tue, 22 Oct 2024 17:58:41 +0530 Subject: [PATCH 05/15] fix: Settings import --- application/api/answer/routes.py | 2 +- application/api/internal/routes.py | 2 +- application/api/user/routes.py | 2 +- application/core/mongo_db.py | 4 ++-- application/usage.py | 2 +- application/worker.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py index 40c4d8cb..ad434157 100644 --- a/application/api/answer/routes.py +++ b/application/api/answer/routes.py @@ -11,8 +11,8 @@ from bson.objectid import ObjectId from flask import Blueprint, current_app, make_response, request, Response from flask_restx import fields, Namespace, Resource -from core.mongo_db import MongoDB +from application.core.mongo_db import MongoDB from application.core.settings import settings from application.error import bad_request from application.extensions import api diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py index f004cf97..c8e32d11 100755 --- a/application/api/internal/routes.py +++ b/application/api/internal/routes.py @@ -1,10 +1,10 @@ import os import datetime from flask import Blueprint, request, send_from_directory -from core.mongo_db import MongoDB from werkzeug.utils import secure_filename from bson.objectid import ObjectId +from application.core.mongo_db import MongoDB from application.core.settings import settings mongo = MongoDB.get_client() diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 3469c800..ca4a051b 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -8,11 +8,11 @@ from bson.dbref import DBRef from bson.objectid import ObjectId from flask import Blueprint, jsonify, make_response, request from flask_restx import inputs, fields, Namespace, Resource -from core.mongo_db import MongoDB from werkzeug.utils import secure_filename from application.api.user.tasks import ingest, ingest_remote +from application.core.mongo_db import MongoDB from application.core.settings import settings from application.extensions import api from application.utils import check_required_fields diff --git a/application/core/mongo_db.py b/application/core/mongo_db.py index fdb728a2..a29ca5d6 100644 --- a/application/core/mongo_db.py +++ b/application/core/mongo_db.py @@ -1,4 +1,4 @@ -from application.core import settings +from application.core.settings import Settings from pymongo import MongoClient @@ -11,7 +11,7 @@ class MongoDB: Get the MongoDB client instance, creating it if necessary. """ if cls._client is None: - cls._client = MongoClient(settings.MONGO_URI) + cls._client = MongoClient(Settings.MONGO_URI) return cls._client @classmethod diff --git a/application/usage.py b/application/usage.py index 31feb5f1..e87ebe38 100644 --- a/application/usage.py +++ b/application/usage.py @@ -1,6 +1,6 @@ import sys -from core.mongo_db import MongoDB from datetime import datetime +from application.core.mongo_db import MongoDB from application.utils import num_tokens_from_string mongo = MongoDB.get_client() diff --git a/application/worker.py b/application/worker.py index fc780d61..33cd90e5 100755 --- a/application/worker.py +++ b/application/worker.py @@ -8,8 +8,8 @@ from urllib.parse import urljoin import requests from bson.objectid import ObjectId -from core.mongo_db import MongoDB +from application.core.mongo_db import MongoDB from application.core.settings import settings from application.parser.file.bulk import SimpleDirectoryReader from application.parser.open_ai_func import call_openai_api From 8564c2ba72b9f9b0c5cba6fc0902756021c40046 Mon Sep 17 00:00:00 2001 From: shatanikmahanty Date: Sat, 26 Oct 2024 22:00:22 +0530 Subject: [PATCH 06/15] Fix: Fonts on Safari browser --- frontend/src/index.css | 1 + 1 file changed, 1 insertion(+) diff --git a/frontend/src/index.css b/frontend/src/index.css index 1eca983c..fe9bec3f 100644 --- a/frontend/src/index.css +++ b/frontend/src/index.css @@ -4,6 +4,7 @@ :root { --viewport-height: 100vh; + font-synthesis: none !important; } @supports (height: 100dvh) { From 45f930a9e2c2337aa23690b50a77357baad76869 Mon Sep 17 00:00:00 2001 From: Arnav Mahalpure <100768252+AranavMahalpure@users.noreply.github.com> Date: Sun, 27 Oct 2024 14:53:47 +0530 Subject: [PATCH 07/15] Update run-with-docker-compose.sh Source Environment Variables: source .env loads environment variables from the .env file, making them available within the script. Conditional Check for Azure Configuration: The if condition checks if all required Azure variables are set (non-empty). If they are, it runs Docker Compose with docker-compose-azure.yaml. Otherwise, it defaults to the standard configuration with docker-compose.yaml. Build and Run Services: Depending on the condition, the script either builds and runs services with Azure settings (docker-compose-azure.yaml) or the standard configuration (docker-compose.yaml). --- run-with-docker-compose.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run-with-docker-compose.sh b/run-with-docker-compose.sh index 61aab467..145b1e23 100755 --- a/run-with-docker-compose.sh +++ b/run-with-docker-compose.sh @@ -4,8 +4,8 @@ source .env if [[ -n "$OPENAI_API_BASE" ]] && [[ -n "$OPENAI_API_VERSION" ]] && [[ -n "$AZURE_DEPLOYMENT_NAME" ]] && [[ -n "$AZURE_EMBEDDINGS_DEPLOYMENT_NAME" ]]; then echo "Running Azure Configuration" - docker compose -f docker-compose-azure.yaml build && docker compose -f docker-compose-azure.yaml up + docker compose -f docker-compose-azure.yaml up --build else echo "Running Plain Configuration" - docker compose build && docker compose up + docker compose up --build fi From 18ed255f5a8488a0dcba441dceea0767d563dcb0 Mon Sep 17 00:00:00 2001 From: kom-senapati Date: Mon, 28 Oct 2024 19:30:38 +0530 Subject: [PATCH 08/15] fix: import settings object of Settings class in mongo_db.py --- application/core/mongo_db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/application/core/mongo_db.py b/application/core/mongo_db.py index a29ca5d6..52006aa7 100644 --- a/application/core/mongo_db.py +++ b/application/core/mongo_db.py @@ -1,4 +1,4 @@ -from application.core.settings import Settings +from application.core.settings import settings from pymongo import MongoClient @@ -11,7 +11,7 @@ class MongoDB: Get the MongoDB client instance, creating it if necessary. """ if cls._client is None: - cls._client = MongoClient(Settings.MONGO_URI) + cls._client = MongoClient(settings.MONGO_URI) return cls._client @classmethod From 23889f7f16cec76cc0407ba959d5d24c9093d584 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 31 Oct 2024 11:32:17 +0000 Subject: [PATCH 09/15] Minor word change --- frontend/src/locale/en.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/locale/en.json b/frontend/src/locale/en.json index cefb99b7..4e1da2d2 100644 --- a/frontend/src/locale/en.json +++ b/frontend/src/locale/en.json @@ -12,7 +12,7 @@ "cancel": "Cancel", "help": "Help", "emailUs": "Email us", - "documentation": "documentation", + "documentation": "Documentation", "demo": [ { "header": "Learn about DocsGPT", From 82189b0a3c54851692a8c4dd2d665f90539094a2 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 31 Oct 2024 11:32:39 +0000 Subject: [PATCH 10/15] capitalisation --- frontend/src/locale/es.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/locale/es.json b/frontend/src/locale/es.json index 66b457e8..8cb0ff1d 100644 --- a/frontend/src/locale/es.json +++ b/frontend/src/locale/es.json @@ -12,7 +12,7 @@ "cancel": "Cancelar", "help": "Asistencia", "emailUs": "Envíanos un correo", - "documentation": "documentación", + "documentation": "Documentación", "demo": [ { "header": "Aprende sobre DocsGPT", From 5c756348a545167bbe5b46a28acafa4bcc541d7e Mon Sep 17 00:00:00 2001 From: JeevaRamanathan M Date: Thu, 31 Oct 2024 11:47:12 +0000 Subject: [PATCH 11/15] feat: Presentation parser implementation Signed-off-by: JeevaRamanathan M --- application/api/user/routes.py | 1 + application/parser/file/bulk.py | 2 + application/parser/file/pptx_parser.py | 75 ++++++++++++++++++++++++++ application/requirements.txt | 5 +- frontend/src/locale/en.json | 2 +- frontend/src/locale/es.json | 2 +- frontend/src/locale/jp.json | 2 +- frontend/src/locale/zh-TW.json | 2 +- frontend/src/locale/zh.json | 2 +- frontend/src/upload/Upload.tsx | 2 + 10 files changed, 88 insertions(+), 7 deletions(-) create mode 100644 application/parser/file/pptx_parser.py diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 794c69d4..84a67863 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -343,6 +343,7 @@ class UploadFile(Resource): ".mdx", ".json", ".xlsx", + ".pptx", ], job_name, final_filename, diff --git a/application/parser/file/bulk.py b/application/parser/file/bulk.py index bb63aa61..3b8fbca8 100644 --- a/application/parser/file/bulk.py +++ b/application/parser/file/bulk.py @@ -12,6 +12,7 @@ from application.parser.file.markdown_parser import MarkdownParser from application.parser.file.rst_parser import RstParser from application.parser.file.tabular_parser import PandasCSVParser,ExcelParser from application.parser.file.json_parser import JSONParser +from application.parser.file.pptx_parser import PPTXParser from application.parser.schema.base import Document DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = { @@ -25,6 +26,7 @@ DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = { ".html": HTMLParser(), ".mdx": MarkdownParser(), ".json":JSONParser(), + ".pptx":PPTXParser(), } diff --git a/application/parser/file/pptx_parser.py b/application/parser/file/pptx_parser.py new file mode 100644 index 00000000..00cb3698 --- /dev/null +++ b/application/parser/file/pptx_parser.py @@ -0,0 +1,75 @@ +"""PPT parser. +Contains parsers for presentation (.pptx) files to extract slide text. +""" +from pathlib import Path +from typing import Any, Dict, List, Union + +from application.parser.file.base_parser import BaseParser + +class PPTXParser(BaseParser): + r"""PPTX (.pptx) parser for extracting text from PowerPoint slides. + Args: + concat_slides (bool): Specifies whether to concatenate all slide text into one document. + - If True, slide texts will be joined together as a single string. + - If False, each slide's text will be stored as a separate entry in a list. + Set to True by default. + slide_separator (str): Separator used to join slides' text content. + Only used when `concat_slides=True`. Default is "\n". + Refer to https://python-pptx.readthedocs.io/en/latest/ for more information. + """ + + def __init__( + self, + *args: Any, + concat_slides: bool = True, + slide_separator: str = "\n", + **kwargs: Any + ) -> None: + """Init params.""" + super().__init__(*args, **kwargs) + self._concat_slides = concat_slides + self._slide_separator = slide_separator + + def _init_parser(self) -> Dict: + """Init parser.""" + return {} + + def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]: + r""" + Parse a .pptx file and extract text from each slide. + Args: + file (Path): Path to the .pptx file. + errors (str): Error handling policy ('ignore' by default). + Returns: + Union[str, List[str]]: Concatenated text if concat_slides is True, + otherwise a list of slide texts. + """ + + try: + from pptx import Presentation + except ImportError: + raise ImportError("pptx module is required to read .PPTX files.") + + try: + presentation = Presentation(file) + slide_texts=[] + + # Iterate over each slide in the presentation + for slide in presentation.slides: + slide_text="" + + # Iterate over each shape in the slide + for shape in slide.shapes: + # Check if the shape has a 'text' attribute and append that to the slide_text + if hasattr(shape,"text"): + slide_text+=shape.text + + slide_texts.append(slide_text.strip()) + + if self._concat_slides: + return self._slide_separator.join(slide_texts) + else: + return slide_texts + + except Exception as e: + raise e \ No newline at end of file diff --git a/application/requirements.txt b/application/requirements.txt index aad629f1..5325a849 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -14,6 +14,7 @@ esutils==1.0.1 Flask==3.0.3 faiss-cpu==1.8.0.post1 flask-restx==1.3.0 +gTTS==2.3.2 gunicorn==23.0.0 html2text==2024.2.26 javalang==0.13.0 @@ -65,6 +66,7 @@ pymongo==4.8.0 pypdf2==3.0.1 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 +python-pptx==0.4.1 qdrant-client==1.11.0 redis==5.0.1 referencing==0.30.2 @@ -84,5 +86,4 @@ urllib3==2.2.3 vine==5.1.0 wcwidth==0.2.13 werkzeug==3.0.4 -yarl==1.11.1 -gTTS==2.3.2 \ No newline at end of file +yarl==1.11.1 \ No newline at end of file diff --git a/frontend/src/locale/en.json b/frontend/src/locale/en.json index a1f254ac..52d3b50e 100644 --- a/frontend/src/locale/en.json +++ b/frontend/src/locale/en.json @@ -86,7 +86,7 @@ "start": "Start Chatting", "name": "Name", "choose": "Choose Files", - "info": "Please upload .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .zip limited to 25mb", + "info": "Please upload .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .pptx, .zip limited to 25mb", "uploadedFiles": "Uploaded Files", "cancel": "Cancel", "train": "Train", diff --git a/frontend/src/locale/es.json b/frontend/src/locale/es.json index 6a096ffd..44fafed7 100644 --- a/frontend/src/locale/es.json +++ b/frontend/src/locale/es.json @@ -86,7 +86,7 @@ "start": "Empezar a chatear", "name": "Nombre", "choose": "Seleccionar Archivos", - "info": "Por favor, suba archivos .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .zip limitados a 25 MB", + "info": "Por favor, suba archivos .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .pptx, .zip limitados a 25 MB", "uploadedFiles": "Archivos Subidos", "cancel": "Cancelar", "train": "Entrenar", diff --git a/frontend/src/locale/jp.json b/frontend/src/locale/jp.json index 841a477b..a69ae31f 100644 --- a/frontend/src/locale/jp.json +++ b/frontend/src/locale/jp.json @@ -86,7 +86,7 @@ "start": "チャットを開始する", "name": "名前", "choose": "ファイルを選択", - "info": ".pdf, .txt, .rst, .docx, .md, .json, .zipファイルを25MBまでアップロードしてください", + "info": ".pdf, .txt, .rst, .docx, .md, .json, .pptx, .zipファイルを25MBまでアップロードしてください", "uploadedFiles": "アップロードされたファイル", "cancel": "キャンセル", "train": "トレーニング", diff --git a/frontend/src/locale/zh-TW.json b/frontend/src/locale/zh-TW.json index 35df818b..fa0638f4 100644 --- a/frontend/src/locale/zh-TW.json +++ b/frontend/src/locale/zh-TW.json @@ -80,7 +80,7 @@ "remote": "遠端", "name": "名稱", "choose": "選擇檔案", - "info": "請上傳 .pdf, .txt, .rst, .docx, .md, .json, .zip 檔案,大小限制為 25MB", + "info": "請上傳 .pdf, .txt, .rst, .docx, .md, .json, .pptx, .zip 檔案,大小限制為 25MB", "uploadedFiles": "已上傳的檔案", "cancel": "取消", "train": "訓練", diff --git a/frontend/src/locale/zh.json b/frontend/src/locale/zh.json index 710c5e3e..51f8bfe9 100644 --- a/frontend/src/locale/zh.json +++ b/frontend/src/locale/zh.json @@ -86,7 +86,7 @@ "start": "开始聊天", "name": "名称", "choose": "选择文件", - "info": "请上传 .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .zip 文件,限 25MB", + "info": "请上传 .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .html, .epub, .json, .pptx, .zip 文件,限 25MB", "uploadedFiles": "已上传文件", "cancel": "取消", "train": "训练", diff --git a/frontend/src/upload/Upload.tsx b/frontend/src/upload/Upload.tsx index 2da284c3..81ce9f2b 100644 --- a/frontend/src/upload/Upload.tsx +++ b/frontend/src/upload/Upload.tsx @@ -321,6 +321,8 @@ function Upload({ 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': [ '.xlsx', ], + 'application/vnd.openxmlformats-officedocument.presentationml.presentation': + ['.pptx'], }, }); From 78c819f976fc534224b4aaee88d4a6f7dac3b1e4 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 31 Oct 2024 17:33:09 +0000 Subject: [PATCH 12/15] fix: history bug --- application/retriever/brave_search.py | 1 - application/retriever/classic_rag.py | 2 -- application/retriever/duckduck_search.py | 1 - 3 files changed, 4 deletions(-) diff --git a/application/retriever/brave_search.py b/application/retriever/brave_search.py index 29666a57..1fd844b2 100644 --- a/application/retriever/brave_search.py +++ b/application/retriever/brave_search.py @@ -75,7 +75,6 @@ class BraveRetSearch(BaseRetriever): if len(self.chat_history) > 1: tokens_current_history = 0 # count tokens in history - self.chat_history.reverse() for i in self.chat_history: if "prompt" in i and "response" in i: tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string( diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py index b87b5852..6a67cb38 100644 --- a/application/retriever/classic_rag.py +++ b/application/retriever/classic_rag.py @@ -78,7 +78,6 @@ class ClassicRAG(BaseRetriever): if len(self.chat_history) > 1: tokens_current_history = 0 # count tokens in history - self.chat_history.reverse() for i in self.chat_history: if "prompt" in i and "response" in i: tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string( @@ -97,7 +96,6 @@ class ClassicRAG(BaseRetriever): llm = LLMCreator.create_llm( settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=self.user_api_key ) - completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine) for line in completion: yield {"answer": str(line)} diff --git a/application/retriever/duckduck_search.py b/application/retriever/duckduck_search.py index d746ecaa..6ae56226 100644 --- a/application/retriever/duckduck_search.py +++ b/application/retriever/duckduck_search.py @@ -92,7 +92,6 @@ class DuckDuckSearch(BaseRetriever): if len(self.chat_history) > 1: tokens_current_history = 0 # count tokens in history - self.chat_history.reverse() for i in self.chat_history: if "prompt" in i and "response" in i: tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string( From 0b7be94d1359ca0ff578ac9abec8db321478e761 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 31 Oct 2024 17:56:32 +0000 Subject: [PATCH 13/15] fix: dependecy version --- application/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/requirements.txt b/application/requirements.txt index 5325a849..2f28c2ea 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -66,7 +66,7 @@ pymongo==4.8.0 pypdf2==3.0.1 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 -python-pptx==0.4.1 +python-pptx==1.0.2 qdrant-client==1.11.0 redis==5.0.1 referencing==0.30.2 From 0684449c2a00ea655024f59946861dca68cf8ec7 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 31 Oct 2024 22:24:19 +0000 Subject: [PATCH 14/15] Update lexeu-competition.md --- lexeu-competition.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lexeu-competition.md b/lexeu-competition.md index 1077de29..e8824438 100644 --- a/lexeu-competition.md +++ b/lexeu-competition.md @@ -12,8 +12,8 @@ Welcome to the LLM Document Analysis by [LexEU](https://www.lexeu.ai/) competiti ### 📆 Timeline: - **Competition Announcement:** 1st October -- **Deadline for Submissions:** 27th October -- **Results Announcement:** Early November/ Late October +- **Deadline for Submissions:** 8th November +- **Results Announcement:** Early November ## 📜 How to Participate: From 0bb014c965e60a607be84952521d3e36b800bfb5 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 31 Oct 2024 22:24:42 +0000 Subject: [PATCH 15/15] Update README.md --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 8f5897fa..eeecb598 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,6 @@ Say goodbye to time-consuming manual searches, and let