mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-12-02 10:03:15 +00:00
Merge remote-tracking branch 'origin/main' into issues/1226
This commit is contained in:
@@ -10,13 +10,14 @@ from application.parser.file.epub_parser import EpubParser
|
||||
from application.parser.file.html_parser import HTMLParser
|
||||
from application.parser.file.markdown_parser import MarkdownParser
|
||||
from application.parser.file.rst_parser import RstParser
|
||||
from application.parser.file.tabular_parser import PandasCSVParser
|
||||
from application.parser.file.tabular_parser import PandasCSVParser,ExcelParser
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".pdf": PDFParser(),
|
||||
".docx": DocxParser(),
|
||||
".csv": PandasCSVParser(),
|
||||
".xlsx":ExcelParser(),
|
||||
".epub": EpubParser(),
|
||||
".md": MarkdownParser(),
|
||||
".rst": RstParser(),
|
||||
|
||||
@@ -113,3 +113,68 @@ class PandasCSVParser(BaseParser):
|
||||
return (self._row_joiner).join(text_list)
|
||||
else:
|
||||
return text_list
|
||||
|
||||
|
||||
class ExcelParser(BaseParser):
|
||||
r"""Excel (.xlsx) parser.
|
||||
|
||||
Parses Excel files using Pandas `read_excel` function.
|
||||
If special parameters are required, use the `pandas_config` dict.
|
||||
|
||||
Args:
|
||||
concat_rows (bool): whether to concatenate all rows into one document.
|
||||
If set to False, a Document will be created for each row.
|
||||
True by default.
|
||||
|
||||
col_joiner (str): Separator to use for joining cols per row.
|
||||
Set to ", " by default.
|
||||
|
||||
row_joiner (str): Separator to use for joining each row.
|
||||
Only used when `concat_rows=True`.
|
||||
Set to "\n" by default.
|
||||
|
||||
pandas_config (dict): Options for the `pandas.read_excel` function call.
|
||||
Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html
|
||||
for more information.
|
||||
Set to empty dict by default, this means pandas will try to figure
|
||||
out the table structure on its own.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
self._concat_rows = concat_rows
|
||||
self._col_joiner = col_joiner
|
||||
self._row_joiner = row_joiner
|
||||
self._pandas_config = pandas_config
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
|
||||
"""Parse file."""
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
raise ValueError("pandas module is required to read Excel files.")
|
||||
|
||||
df = pd.read_excel(file, **self._pandas_config)
|
||||
|
||||
text_list = df.apply(
|
||||
lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1
|
||||
).tolist()
|
||||
|
||||
if self._concat_rows:
|
||||
return (self._row_joiner).join(text_list)
|
||||
else:
|
||||
return text_list
|
||||
@@ -49,6 +49,7 @@ openapi3-parser==1.1.18
|
||||
orjson==3.10.7
|
||||
packaging==24.1
|
||||
pandas==2.2.3
|
||||
openpyxl==3.1.5
|
||||
pathable==0.4.3
|
||||
pillow==10.4.0
|
||||
portalocker==2.10.1
|
||||
|
||||
@@ -22,7 +22,7 @@ class FaissStore(BaseVectorStore):
|
||||
else:
|
||||
self.docsearch = FAISS.load_local(self.path, embeddings, allow_dangerous_deserialization=True)
|
||||
except Exception:
|
||||
raise # Just re-raise the exception without assigning to e
|
||||
raise
|
||||
|
||||
self.assert_embedding_dimensions(embeddings)
|
||||
|
||||
|
||||
26
frontend/package-lock.json
generated
26
frontend/package-lock.json
generated
@@ -9,7 +9,6 @@
|
||||
"version": "0.0.0",
|
||||
"dependencies": {
|
||||
"@reduxjs/toolkit": "^2.2.7",
|
||||
"@vercel/analytics": "^1.3.1",
|
||||
"chart.js": "^4.4.4",
|
||||
"i18next": "^23.15.1",
|
||||
"i18next-browser-languagedetector": "^8.0.0",
|
||||
@@ -2089,26 +2088,6 @@
|
||||
"resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz",
|
||||
"integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ=="
|
||||
},
|
||||
"node_modules/@vercel/analytics": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://registry.npmjs.org/@vercel/analytics/-/analytics-1.3.1.tgz",
|
||||
"integrity": "sha512-xhSlYgAuJ6Q4WQGkzYTLmXwhYl39sWjoMA3nHxfkvG+WdBT25c563a7QhwwKivEOZtPJXifYHR1m2ihoisbWyA==",
|
||||
"dependencies": {
|
||||
"server-only": "^0.0.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"next": ">= 13",
|
||||
"react": "^18 || ^19"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"next": {
|
||||
"optional": true
|
||||
},
|
||||
"react": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@vitejs/plugin-react": {
|
||||
"version": "4.3.1",
|
||||
"resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-4.3.1.tgz",
|
||||
@@ -8451,11 +8430,6 @@
|
||||
"semver": "bin/semver.js"
|
||||
}
|
||||
},
|
||||
"node_modules/server-only": {
|
||||
"version": "0.0.1",
|
||||
"resolved": "https://registry.npmjs.org/server-only/-/server-only-0.0.1.tgz",
|
||||
"integrity": "sha512-qepMx2JxAa5jjfzxG79yPPq+8BuFToHd1hm7kI+Z4zAq1ftQiP7HcxMhDDItrbtwVeLg/cY2JnKnrcFkmiswNA=="
|
||||
},
|
||||
"node_modules/set-function-length": {
|
||||
"version": "1.2.2",
|
||||
"resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz",
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@reduxjs/toolkit": "^2.2.7",
|
||||
"@vercel/analytics": "^1.3.1",
|
||||
"chart.js": "^4.4.4",
|
||||
"i18next": "^23.15.1",
|
||||
"i18next-browser-languagedetector": "^8.0.0",
|
||||
|
||||
@@ -3,7 +3,6 @@ import Navigation from './Navigation';
|
||||
import Conversation from './conversation/Conversation';
|
||||
import About from './About';
|
||||
import PageNotFound from './PageNotFound';
|
||||
import { inject } from '@vercel/analytics';
|
||||
import { useMediaQuery } from './hooks';
|
||||
import { useState } from 'react';
|
||||
import Setting from './settings';
|
||||
@@ -11,7 +10,6 @@ import './locale/i18n';
|
||||
import { Outlet } from 'react-router-dom';
|
||||
import { SharedConversation } from './conversation/SharedConversation';
|
||||
import { useDarkTheme } from './hooks';
|
||||
inject();
|
||||
|
||||
function MainLayout() {
|
||||
const { isMobile } = useMediaQuery();
|
||||
|
||||
@@ -227,8 +227,10 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
|
||||
setNavOpen(!navOpen);
|
||||
}
|
||||
}}>
|
||||
<img className="mb-2 h-10" src={DocsGPT3} alt="" />
|
||||
<p className="my-auto text-2xl font-semibold">DocsGPT</p>
|
||||
<a href="/" className="flex gap-1.5">
|
||||
<img className="mb-2 h-10" src={DocsGPT3} alt="" />
|
||||
<p className="my-auto text-2xl font-semibold">DocsGPT</p>
|
||||
</a>
|
||||
</div>
|
||||
<button
|
||||
className="float-right mr-5"
|
||||
|
||||
96
frontend/src/components/SettingsBar.tsx
Normal file
96
frontend/src/components/SettingsBar.tsx
Normal file
@@ -0,0 +1,96 @@
|
||||
import React, { useCallback, useRef, useState } from 'react';
|
||||
import ArrowLeft from '../assets/arrow-left.svg';
|
||||
import ArrowRight from '../assets/arrow-right.svg';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
|
||||
type HiddenGradientType = 'left' | 'right' | undefined;
|
||||
|
||||
const useTabs = () => {
|
||||
const { t } = useTranslation();
|
||||
const tabs = [
|
||||
t('settings.general.label'),
|
||||
t('settings.documents.label'),
|
||||
t('settings.apiKeys.label'),
|
||||
t('settings.analytics.label'),
|
||||
t('settings.logs.label'),
|
||||
];
|
||||
return tabs;
|
||||
};
|
||||
|
||||
interface SettingsBarProps {
|
||||
setActiveTab: React.Dispatch<React.SetStateAction<string>>;
|
||||
activeTab: string;
|
||||
}
|
||||
|
||||
const SettingsBar = ({ setActiveTab, activeTab }: SettingsBarProps) => {
|
||||
const [hiddenGradient, setHiddenGradient] =
|
||||
useState<HiddenGradientType>('left');
|
||||
const containerRef = useRef<null | HTMLDivElement>(null);
|
||||
const tabs = useTabs();
|
||||
const scrollTabs = useCallback(
|
||||
(direction: number) => {
|
||||
if (containerRef.current) {
|
||||
const container = containerRef.current;
|
||||
container.scrollLeft += direction * 100; // Adjust the scroll amount as needed
|
||||
if (container.scrollLeft === 0) {
|
||||
setHiddenGradient('left');
|
||||
} else if (
|
||||
container.scrollLeft + container.offsetWidth ===
|
||||
container.scrollWidth
|
||||
) {
|
||||
setHiddenGradient('right');
|
||||
} else {
|
||||
setHiddenGradient(undefined);
|
||||
}
|
||||
}
|
||||
},
|
||||
[containerRef.current],
|
||||
);
|
||||
return (
|
||||
<div className="relative mt-6 flex flex-row items-center space-x-1 md:space-x-0 overflow-auto">
|
||||
<div
|
||||
className={`${hiddenGradient === 'left' ? 'hidden' : ''} md:hidden absolute inset-y-0 left-6 w-14 bg-gradient-to-r from-white dark:from-raisin-black pointer-events-none`}
|
||||
></div>
|
||||
<div
|
||||
className={`${hiddenGradient === 'right' ? 'hidden' : ''} md:hidden absolute inset-y-0 right-6 w-14 bg-gradient-to-l from-white dark:from-raisin-black pointer-events-none`}
|
||||
></div>
|
||||
|
||||
<div className="md:hidden z-10">
|
||||
<button
|
||||
onClick={() => scrollTabs(-1)}
|
||||
className="flex h-6 w-6 items-center rounded-full justify-center transition-all hover:bg-gray-100"
|
||||
>
|
||||
<img src={ArrowLeft} alt="left-arrow" className="h-3" />
|
||||
</button>
|
||||
</div>
|
||||
<div
|
||||
ref={containerRef}
|
||||
className="flex flex-nowrap overflow-x-auto no-scrollbar md:space-x-4 scroll-smooth snap-x"
|
||||
>
|
||||
{tabs.map((tab, index) => (
|
||||
<button
|
||||
key={index}
|
||||
onClick={() => setActiveTab(tab)}
|
||||
className={`snap-start h-9 rounded-3xl px-4 font-bold ${
|
||||
activeTab === tab
|
||||
? 'bg-purple-3000 text-purple-30 dark:bg-dark-charcoal'
|
||||
: 'text-gray-6000'
|
||||
}`}
|
||||
>
|
||||
{tab}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
<div className="md:hidden z-10">
|
||||
<button
|
||||
onClick={() => scrollTabs(1)}
|
||||
className="flex h-6 w-6 rounded-full items-center justify-center hover:bg-gray-100"
|
||||
>
|
||||
<img src={ArrowRight} alt="right-arrow" className="h-3" />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default SettingsBar;
|
||||
@@ -79,7 +79,7 @@
|
||||
"remote": "Remote",
|
||||
"name": "Name",
|
||||
"choose": "Choose Files",
|
||||
"info": "Please upload .pdf, .txt, .rst, .csv, .docx, .md, .zip limited to 25mb",
|
||||
"info": "Please upload .pdf, .txt, .rst, .csv, .xlsx, .docx, .md, .zip limited to 25mb",
|
||||
"uploadedFiles": "Uploaded Files",
|
||||
"cancel": "Cancel",
|
||||
"train": "Train",
|
||||
|
||||
@@ -3,8 +3,7 @@ import { useTranslation } from 'react-i18next';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
|
||||
import userService from '../api/services/userService';
|
||||
import ArrowLeft from '../assets/arrow-left.svg';
|
||||
import ArrowRight from '../assets/arrow-right.svg';
|
||||
import SettingsBar from '../components/SettingsBar';
|
||||
import i18n from '../locale/i18n';
|
||||
import { Doc } from '../models/misc';
|
||||
import {
|
||||
@@ -21,13 +20,6 @@ import Widgets from './Widgets';
|
||||
export default function Settings() {
|
||||
const dispatch = useDispatch();
|
||||
const { t } = useTranslation();
|
||||
const tabs = [
|
||||
t('settings.general.label'),
|
||||
t('settings.documents.label'),
|
||||
t('settings.apiKeys.label'),
|
||||
t('settings.analytics.label'),
|
||||
t('settings.logs.label'),
|
||||
];
|
||||
const [activeTab, setActiveTab] = React.useState(t('settings.general.label'));
|
||||
const [widgetScreenshot, setWidgetScreenshot] = React.useState<File | null>(
|
||||
null,
|
||||
@@ -61,39 +53,7 @@ export default function Settings() {
|
||||
<p className="text-2xl font-bold text-eerie-black dark:text-bright-gray">
|
||||
{t('settings.label')}
|
||||
</p>
|
||||
<div className="mt-6 flex flex-row items-center space-x-4 overflow-auto md:space-x-8 ">
|
||||
<div className="md:hidden">
|
||||
<button
|
||||
onClick={() => scrollTabs(-1)}
|
||||
className="flex h-8 w-8 items-center justify-center rounded-full border-2 border-purple-30 transition-all hover:bg-gray-100"
|
||||
>
|
||||
<img src={ArrowLeft} alt="left-arrow" className="h-6 w-6" />
|
||||
</button>
|
||||
</div>
|
||||
<div className="flex flex-nowrap space-x-4 overflow-x-auto no-scrollbar md:space-x-8">
|
||||
{tabs.map((tab, index) => (
|
||||
<button
|
||||
key={index}
|
||||
onClick={() => setActiveTab(tab)}
|
||||
className={`h-9 rounded-3xl px-4 font-bold ${
|
||||
activeTab === tab
|
||||
? 'bg-purple-3000 text-purple-30 dark:bg-dark-charcoal'
|
||||
: 'text-gray-6000'
|
||||
}`}
|
||||
>
|
||||
{tab}
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
<div className="md:hidden">
|
||||
<button
|
||||
onClick={() => scrollTabs(1)}
|
||||
className="flex h-8 w-8 items-center justify-center rounded-full border-2 border-purple-30 hover:bg-gray-100"
|
||||
>
|
||||
<img src={ArrowRight} alt="right-arrow" className="h-6 w-6" />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<SettingsBar activeTab={activeTab} setActiveTab={setActiveTab} />
|
||||
{renderActiveTab()}
|
||||
|
||||
{/* {activeTab === 'Widgets' && (
|
||||
@@ -105,13 +65,6 @@ export default function Settings() {
|
||||
</div>
|
||||
);
|
||||
|
||||
function scrollTabs(direction: number) {
|
||||
const container = document.querySelector('.flex-nowrap');
|
||||
if (container) {
|
||||
container.scrollLeft += direction * 100; // Adjust the scroll amount as needed
|
||||
}
|
||||
}
|
||||
|
||||
function renderActiveTab() {
|
||||
switch (activeTab) {
|
||||
case t('settings.general.label'):
|
||||
|
||||
@@ -275,6 +275,7 @@ function Upload({
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
|
||||
['.docx'],
|
||||
'text/csv': ['.csv'],
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user