Merge remote-tracking branch 'origin/main' into issues/1226

This commit is contained in:
shatanikmahanty
2024-10-07 10:11:05 +05:30
12 changed files with 173 additions and 83 deletions

View File

@@ -10,13 +10,14 @@ from application.parser.file.epub_parser import EpubParser
from application.parser.file.html_parser import HTMLParser
from application.parser.file.markdown_parser import MarkdownParser
from application.parser.file.rst_parser import RstParser
from application.parser.file.tabular_parser import PandasCSVParser
from application.parser.file.tabular_parser import PandasCSVParser,ExcelParser
from application.parser.schema.base import Document
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
".pdf": PDFParser(),
".docx": DocxParser(),
".csv": PandasCSVParser(),
".xlsx":ExcelParser(),
".epub": EpubParser(),
".md": MarkdownParser(),
".rst": RstParser(),

View File

@@ -113,3 +113,68 @@ class PandasCSVParser(BaseParser):
return (self._row_joiner).join(text_list)
else:
return text_list
class ExcelParser(BaseParser):
r"""Excel (.xlsx) parser.
Parses Excel files using Pandas `read_excel` function.
If special parameters are required, use the `pandas_config` dict.
Args:
concat_rows (bool): whether to concatenate all rows into one document.
If set to False, a Document will be created for each row.
True by default.
col_joiner (str): Separator to use for joining cols per row.
Set to ", " by default.
row_joiner (str): Separator to use for joining each row.
Only used when `concat_rows=True`.
Set to "\n" by default.
pandas_config (dict): Options for the `pandas.read_excel` function call.
Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html
for more information.
Set to empty dict by default, this means pandas will try to figure
out the table structure on its own.
"""
def __init__(
self,
*args: Any,
concat_rows: bool = True,
col_joiner: str = ", ",
row_joiner: str = "\n",
pandas_config: dict = {},
**kwargs: Any
) -> None:
"""Init params."""
super().__init__(*args, **kwargs)
self._concat_rows = concat_rows
self._col_joiner = col_joiner
self._row_joiner = row_joiner
self._pandas_config = pandas_config
def _init_parser(self) -> Dict:
"""Init parser."""
return {}
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
"""Parse file."""
try:
import pandas as pd
except ImportError:
raise ValueError("pandas module is required to read Excel files.")
df = pd.read_excel(file, **self._pandas_config)
text_list = df.apply(
lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1
).tolist()
if self._concat_rows:
return (self._row_joiner).join(text_list)
else:
return text_list

View File

@@ -49,6 +49,7 @@ openapi3-parser==1.1.18
orjson==3.10.7
packaging==24.1
pandas==2.2.3
openpyxl==3.1.5
pathable==0.4.3
pillow==10.4.0
portalocker==2.10.1

View File

@@ -22,7 +22,7 @@ class FaissStore(BaseVectorStore):
else:
self.docsearch = FAISS.load_local(self.path, embeddings, allow_dangerous_deserialization=True)
except Exception:
raise # Just re-raise the exception without assigning to e
raise
self.assert_embedding_dimensions(embeddings)

View File

@@ -9,7 +9,6 @@
"version": "0.0.0",
"dependencies": {
"@reduxjs/toolkit": "^2.2.7",
"@vercel/analytics": "^1.3.1",
"chart.js": "^4.4.4",
"i18next": "^23.15.1",
"i18next-browser-languagedetector": "^8.0.0",
@@ -2089,26 +2088,6 @@
"resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz",
"integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ=="
},
"node_modules/@vercel/analytics": {
"version": "1.3.1",
"resolved": "https://registry.npmjs.org/@vercel/analytics/-/analytics-1.3.1.tgz",
"integrity": "sha512-xhSlYgAuJ6Q4WQGkzYTLmXwhYl39sWjoMA3nHxfkvG+WdBT25c563a7QhwwKivEOZtPJXifYHR1m2ihoisbWyA==",
"dependencies": {
"server-only": "^0.0.1"
},
"peerDependencies": {
"next": ">= 13",
"react": "^18 || ^19"
},
"peerDependenciesMeta": {
"next": {
"optional": true
},
"react": {
"optional": true
}
}
},
"node_modules/@vitejs/plugin-react": {
"version": "4.3.1",
"resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.3.1.tgz",
@@ -8451,11 +8430,6 @@
"semver": "bin/semver.js"
}
},
"node_modules/server-only": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/server-only/-/server-only-0.0.1.tgz",
"integrity": "sha512-qepMx2JxAa5jjfzxG79yPPq+8BuFToHd1hm7kI+Z4zAq1ftQiP7HcxMhDDItrbtwVeLg/cY2JnKnrcFkmiswNA=="
},
"node_modules/set-function-length": {
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",

View File

@@ -20,7 +20,6 @@
},
"dependencies": {
"@reduxjs/toolkit": "^2.2.7",
"@vercel/analytics": "^1.3.1",
"chart.js": "^4.4.4",
"i18next": "^23.15.1",
"i18next-browser-languagedetector": "^8.0.0",

View File

@@ -3,7 +3,6 @@ import Navigation from './Navigation';
import Conversation from './conversation/Conversation';
import About from './About';
import PageNotFound from './PageNotFound';
import { inject } from '@vercel/analytics';
import { useMediaQuery } from './hooks';
import { useState } from 'react';
import Setting from './settings';
@@ -11,7 +10,6 @@ import './locale/i18n';
import { Outlet } from 'react-router-dom';
import { SharedConversation } from './conversation/SharedConversation';
import { useDarkTheme } from './hooks';
inject();
function MainLayout() {
const { isMobile } = useMediaQuery();

View File

@@ -227,8 +227,10 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
setNavOpen(!navOpen);
}
}}>
<img className="mb-2 h-10" src={DocsGPT3} alt="" />
<p className="my-auto text-2xl font-semibold">DocsGPT</p>
<a href="/" className="flex gap-1.5">
<img className="mb-2 h-10" src={DocsGPT3} alt="" />
<p className="my-auto text-2xl font-semibold">DocsGPT</p>
</a>
</div>
<button
className="float-right mr-5"

View File

@@ -0,0 +1,96 @@
import React, { useCallback, useRef, useState } from 'react';
import ArrowLeft from '../assets/arrow-left.svg';
import ArrowRight from '../assets/arrow-right.svg';
import { useTranslation } from 'react-i18next';
type HiddenGradientType = 'left' | 'right' | undefined;
const useTabs = () => {
const { t } = useTranslation();
const tabs = [
t('settings.general.label'),
t('settings.documents.label'),
t('settings.apiKeys.label'),
t('settings.analytics.label'),
t('settings.logs.label'),
];
return tabs;
};
interface SettingsBarProps {
setActiveTab: React.Dispatch<React.SetStateAction<string>>;
activeTab: string;
}
const SettingsBar = ({ setActiveTab, activeTab }: SettingsBarProps) => {
const [hiddenGradient, setHiddenGradient] =
useState<HiddenGradientType>('left');
const containerRef = useRef<null | HTMLDivElement>(null);
const tabs = useTabs();
const scrollTabs = useCallback(
(direction: number) => {
if (containerRef.current) {
const container = containerRef.current;
container.scrollLeft += direction * 100; // Adjust the scroll amount as needed
if (container.scrollLeft === 0) {
setHiddenGradient('left');
} else if (
container.scrollLeft + container.offsetWidth ===
container.scrollWidth
) {
setHiddenGradient('right');
} else {
setHiddenGradient(undefined);
}
}
},
[containerRef.current],
);
return (
<div className="relative mt-6 flex flex-row items-center space-x-1 md:space-x-0 overflow-auto">
<div
className={`${hiddenGradient === 'left' ? 'hidden' : ''} md:hidden absolute inset-y-0 left-6 w-14 bg-gradient-to-r from-white dark:from-raisin-black pointer-events-none`}
></div>
<div
className={`${hiddenGradient === 'right' ? 'hidden' : ''} md:hidden absolute inset-y-0 right-6 w-14 bg-gradient-to-l from-white dark:from-raisin-black pointer-events-none`}
></div>
<div className="md:hidden z-10">
<button
onClick={() => scrollTabs(-1)}
className="flex h-6 w-6 items-center rounded-full justify-center transition-all hover:bg-gray-100"
>
<img src={ArrowLeft} alt="left-arrow" className="h-3" />
</button>
</div>
<div
ref={containerRef}
className="flex flex-nowrap overflow-x-auto no-scrollbar md:space-x-4 scroll-smooth snap-x"
>
{tabs.map((tab, index) => (
<button
key={index}
onClick={() => setActiveTab(tab)}
className={`snap-start h-9 rounded-3xl px-4 font-bold ${
activeTab === tab
? 'bg-purple-3000 text-purple-30 dark:bg-dark-charcoal'
: 'text-gray-6000'
}`}
>
{tab}
</button>
))}
</div>
<div className="md:hidden z-10">
<button
onClick={() => scrollTabs(1)}
className="flex h-6 w-6 rounded-full items-center justify-center hover:bg-gray-100"
>
<img src={ArrowRight} alt="right-arrow" className="h-3" />
</button>
</div>
</div>
);
};
export default SettingsBar;

View File

@@ -79,7 +79,7 @@
"remote": "Remote",
"name": "Name",
"choose": "Choose Files",
"info": "Please upload .pdf, .txt, .rst, .csv, .docx, .md, .zip limited to 25mb",
"info": "Please upload .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .zip limited to 25mb",
"uploadedFiles": "Uploaded Files",
"cancel": "Cancel",
"train": "Train",

View File

@@ -3,8 +3,7 @@ import { useTranslation } from 'react-i18next';
import { useDispatch, useSelector } from 'react-redux';
import userService from '../api/services/userService';
import ArrowLeft from '../assets/arrow-left.svg';
import ArrowRight from '../assets/arrow-right.svg';
import SettingsBar from '../components/SettingsBar';
import i18n from '../locale/i18n';
import { Doc } from '../models/misc';
import {
@@ -21,13 +20,6 @@ import Widgets from './Widgets';
export default function Settings() {
const dispatch = useDispatch();
const { t } = useTranslation();
const tabs = [
t('settings.general.label'),
t('settings.documents.label'),
t('settings.apiKeys.label'),
t('settings.analytics.label'),
t('settings.logs.label'),
];
const [activeTab, setActiveTab] = React.useState(t('settings.general.label'));
const [widgetScreenshot, setWidgetScreenshot] = React.useState<File | null>(
null,
@@ -61,39 +53,7 @@ export default function Settings() {
<p className="text-2xl font-bold text-eerie-black dark:text-bright-gray">
{t('settings.label')}
</p>
<div className="mt-6 flex flex-row items-center space-x-4 overflow-auto md:space-x-8 ">
<div className="md:hidden">
<button
onClick={() => scrollTabs(-1)}
className="flex h-8 w-8 items-center justify-center rounded-full border-2 border-purple-30 transition-all hover:bg-gray-100"
>
<img src={ArrowLeft} alt="left-arrow" className="h-6 w-6" />
</button>
</div>
<div className="flex flex-nowrap space-x-4 overflow-x-auto no-scrollbar md:space-x-8">
{tabs.map((tab, index) => (
<button
key={index}
onClick={() => setActiveTab(tab)}
className={`h-9 rounded-3xl px-4 font-bold ${
activeTab === tab
? 'bg-purple-3000 text-purple-30 dark:bg-dark-charcoal'
: 'text-gray-6000'
}`}
>
{tab}
</button>
))}
</div>
<div className="md:hidden">
<button
onClick={() => scrollTabs(1)}
className="flex h-8 w-8 items-center justify-center rounded-full border-2 border-purple-30 hover:bg-gray-100"
>
<img src={ArrowRight} alt="right-arrow" className="h-6 w-6" />
</button>
</div>
</div>
<SettingsBar activeTab={activeTab} setActiveTab={setActiveTab} />
{renderActiveTab()}
{/* {activeTab === 'Widgets' && (
@@ -105,13 +65,6 @@ export default function Settings() {
</div>
);
function scrollTabs(direction: number) {
const container = document.querySelector('.flex-nowrap');
if (container) {
container.scrollLeft += direction * 100; // Adjust the scroll amount as needed
}
}
function renderActiveTab() {
switch (activeTab) {
case t('settings.general.label'):

View File

@@ -275,6 +275,7 @@ function Upload({
'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
['.docx'],
'text/csv': ['.csv'],
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
},
});