diff --git a/application/parser/file/bulk.py b/application/parser/file/bulk.py index aec6c8c1..79fc2c45 100644 --- a/application/parser/file/bulk.py +++ b/application/parser/file/bulk.py @@ -10,13 +10,14 @@ from application.parser.file.epub_parser import EpubParser from application.parser.file.html_parser import HTMLParser from application.parser.file.markdown_parser import MarkdownParser from application.parser.file.rst_parser import RstParser -from application.parser.file.tabular_parser import PandasCSVParser +from application.parser.file.tabular_parser import PandasCSVParser,ExcelParser from application.parser.schema.base import Document DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = { ".pdf": PDFParser(), ".docx": DocxParser(), ".csv": PandasCSVParser(), + ".xlsx":ExcelParser(), ".epub": EpubParser(), ".md": MarkdownParser(), ".rst": RstParser(), diff --git a/application/parser/file/tabular_parser.py b/application/parser/file/tabular_parser.py index 81355ae0..b2dbd193 100644 --- a/application/parser/file/tabular_parser.py +++ b/application/parser/file/tabular_parser.py @@ -113,3 +113,68 @@ class PandasCSVParser(BaseParser): return (self._row_joiner).join(text_list) else: return text_list + + +class ExcelParser(BaseParser): + r"""Excel (.xlsx) parser. + + Parses Excel files using Pandas `read_excel` function. + If special parameters are required, use the `pandas_config` dict. + + Args: + concat_rows (bool): whether to concatenate all rows into one document. + If set to False, a Document will be created for each row. + True by default. + + col_joiner (str): Separator to use for joining cols per row. + Set to ", " by default. + + row_joiner (str): Separator to use for joining each row. + Only used when `concat_rows=True`. + Set to "\n" by default. + + pandas_config (dict): Options for the `pandas.read_excel` function call. + Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html + for more information. + Set to empty dict by default, this means pandas will try to figure + out the table structure on its own. + + """ + + def __init__( + self, + *args: Any, + concat_rows: bool = True, + col_joiner: str = ", ", + row_joiner: str = "\n", + pandas_config: dict = {}, + **kwargs: Any + ) -> None: + """Init params.""" + super().__init__(*args, **kwargs) + self._concat_rows = concat_rows + self._col_joiner = col_joiner + self._row_joiner = row_joiner + self._pandas_config = pandas_config + + def _init_parser(self) -> Dict: + """Init parser.""" + return {} + + def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]: + """Parse file.""" + try: + import pandas as pd + except ImportError: + raise ValueError("pandas module is required to read Excel files.") + + df = pd.read_excel(file, **self._pandas_config) + + text_list = df.apply( + lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1 + ).tolist() + + if self._concat_rows: + return (self._row_joiner).join(text_list) + else: + return text_list \ No newline at end of file diff --git a/application/requirements.txt b/application/requirements.txt index d7621cfd..6a57dd12 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -49,6 +49,7 @@ openapi3-parser==1.1.18 orjson==3.10.7 packaging==24.1 pandas==2.2.3 +openpyxl==3.1.5 pathable==0.4.3 pillow==10.4.0 portalocker==2.10.1 diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py index e6c13bcd..afa55db9 100644 --- a/application/vectorstore/faiss.py +++ b/application/vectorstore/faiss.py @@ -22,7 +22,7 @@ class FaissStore(BaseVectorStore): else: self.docsearch = FAISS.load_local(self.path, embeddings, allow_dangerous_deserialization=True) except Exception: - raise # Just re-raise the exception without assigning to e + raise self.assert_embedding_dimensions(embeddings) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 1a6e0ce3..4087e4f5 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -9,7 +9,6 @@ "version": "0.0.0", "dependencies": { "@reduxjs/toolkit": "^2.2.7", - "@vercel/analytics": "^1.3.1", "chart.js": "^4.4.4", "i18next": "^23.15.1", "i18next-browser-languagedetector": "^8.0.0", @@ -2089,26 +2088,6 @@ "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz", "integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==" }, - "node_modules/@vercel/analytics": { - "version": "1.3.1", - "resolved": "https://registry.npmjs.org/@vercel/analytics/-/analytics-1.3.1.tgz", - "integrity": "sha512-xhSlYgAuJ6Q4WQGkzYTLmXwhYl39sWjoMA3nHxfkvG+WdBT25c563a7QhwwKivEOZtPJXifYHR1m2ihoisbWyA==", - "dependencies": { - "server-only": "^0.0.1" - }, - "peerDependencies": { - "next": ">= 13", - "react": "^18 || ^19" - }, - "peerDependenciesMeta": { - "next": { - "optional": true - }, - "react": { - "optional": true - } - } - }, "node_modules/@vitejs/plugin-react": { "version": "4.3.1", "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.3.1.tgz", @@ -8451,11 +8430,6 @@ "semver": "bin/semver.js" } }, - "node_modules/server-only": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/server-only/-/server-only-0.0.1.tgz", - "integrity": "sha512-qepMx2JxAa5jjfzxG79yPPq+8BuFToHd1hm7kI+Z4zAq1ftQiP7HcxMhDDItrbtwVeLg/cY2JnKnrcFkmiswNA==" - }, "node_modules/set-function-length": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", diff --git a/frontend/package.json b/frontend/package.json index 176c4fd9..83d531d6 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -20,7 +20,6 @@ }, "dependencies": { "@reduxjs/toolkit": "^2.2.7", - "@vercel/analytics": "^1.3.1", "chart.js": "^4.4.4", "i18next": "^23.15.1", "i18next-browser-languagedetector": "^8.0.0", diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 0537e695..e1157141 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -3,7 +3,6 @@ import Navigation from './Navigation'; import Conversation from './conversation/Conversation'; import About from './About'; import PageNotFound from './PageNotFound'; -import { inject } from '@vercel/analytics'; import { useMediaQuery } from './hooks'; import { useState } from 'react'; import Setting from './settings'; @@ -11,7 +10,6 @@ import './locale/i18n'; import { Outlet } from 'react-router-dom'; import { SharedConversation } from './conversation/SharedConversation'; import { useDarkTheme } from './hooks'; -inject(); function MainLayout() { const { isMobile } = useMediaQuery(); diff --git a/frontend/src/Navigation.tsx b/frontend/src/Navigation.tsx index 63cdf04b..ceae6716 100644 --- a/frontend/src/Navigation.tsx +++ b/frontend/src/Navigation.tsx @@ -227,8 +227,10 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { setNavOpen(!navOpen); } }}> - -

DocsGPT

+ + +

DocsGPT

+
+ +
+ {tabs.map((tab, index) => ( + + ))} +
+
+ +
+ + ); +}; + +export default SettingsBar; diff --git a/frontend/src/locale/en.json b/frontend/src/locale/en.json index c9b599bf..f5b48d75 100644 --- a/frontend/src/locale/en.json +++ b/frontend/src/locale/en.json @@ -79,7 +79,7 @@ "remote": "Remote", "name": "Name", "choose": "Choose Files", - "info": "Please upload .pdf, .txt, .rst, .csv, .docx, .md, .zip limited to 25mb", + "info": "Please upload .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .zip limited to 25mb", "uploadedFiles": "Uploaded Files", "cancel": "Cancel", "train": "Train", diff --git a/frontend/src/settings/index.tsx b/frontend/src/settings/index.tsx index ef5434aa..ea3d4428 100644 --- a/frontend/src/settings/index.tsx +++ b/frontend/src/settings/index.tsx @@ -3,8 +3,7 @@ import { useTranslation } from 'react-i18next'; import { useDispatch, useSelector } from 'react-redux'; import userService from '../api/services/userService'; -import ArrowLeft from '../assets/arrow-left.svg'; -import ArrowRight from '../assets/arrow-right.svg'; +import SettingsBar from '../components/SettingsBar'; import i18n from '../locale/i18n'; import { Doc } from '../models/misc'; import { @@ -21,13 +20,6 @@ import Widgets from './Widgets'; export default function Settings() { const dispatch = useDispatch(); const { t } = useTranslation(); - const tabs = [ - t('settings.general.label'), - t('settings.documents.label'), - t('settings.apiKeys.label'), - t('settings.analytics.label'), - t('settings.logs.label'), - ]; const [activeTab, setActiveTab] = React.useState(t('settings.general.label')); const [widgetScreenshot, setWidgetScreenshot] = React.useState( null, @@ -61,39 +53,7 @@ export default function Settings() {

{t('settings.label')}

-
-
- -
-
- {tabs.map((tab, index) => ( - - ))} -
-
- -
-
+ {renderActiveTab()} {/* {activeTab === 'Widgets' && ( @@ -105,13 +65,6 @@ export default function Settings() { ); - function scrollTabs(direction: number) { - const container = document.querySelector('.flex-nowrap'); - if (container) { - container.scrollLeft += direction * 100; // Adjust the scroll amount as needed - } - } - function renderActiveTab() { switch (activeTab) { case t('settings.general.label'): diff --git a/frontend/src/upload/Upload.tsx b/frontend/src/upload/Upload.tsx index 50a6d357..c09bab53 100644 --- a/frontend/src/upload/Upload.tsx +++ b/frontend/src/upload/Upload.tsx @@ -275,6 +275,7 @@ function Upload({ 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'], 'text/csv': ['.csv'], + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'], }, });