Merge branch 'main' into ExcelParser

This commit is contained in:
Devendra Parihar
2024-10-02 22:25:09 +05:30
committed by GitHub
12 changed files with 508 additions and 81 deletions

View File

@@ -7,7 +7,7 @@ All contributors with accepted PRs will receive a cool Holopin! 🤩 (Watch out
### 🏆 Top 50 contributors will recieve a special T-shirt
### 🏆 [LLM Document analysis by LexEU competition](https://github.com/arc53/DocsGPT/blob/main/lexeu-competition.md):
A separate competition is available for those sumbit best new retrieval / workflow method that will analyze a Document using EU laws.
A separate competition is available for those who sumbit new retrieval / workflow method that will analyze a Document using EU laws.
With 200$, 100$, 50$ prize for 1st, 2nd and 3rd place respectively.
You can find more information [here](https://github.com/arc53/DocsGPT/blob/main/lexeu-competition.md)
@@ -15,7 +15,11 @@ You can find more information [here](https://github.com/arc53/DocsGPT/blob/main/
```text
🛠️ Code: This is the golden ticket! Make meaningful contributions through PRs.
🧩 API extention: Build an app utilising DocsGPT API. We prefer submissions that showcase original ideas and turn the API into an AI agent.
🧩 API extension: Build an app utilising DocsGPT API. We prefer submissions that showcase original ideas and turn the API into an AI agent.
They can be a completely separate repo.
For example:
https://github.com/arc53/tg-bot-docsgpt-extenstion or
https://github.com/arc53/DocsGPT-cli
Non-Code Contributions:

View File

@@ -31,8 +31,11 @@ Say goodbye to time-consuming manual searches, and let <strong><a href="https://
We're eager to provide personalized assistance when deploying your DocsGPT to a live environment.
- [Book Enterprise / teams Demo :wave:](https://cal.com/arc53/docsgpt-demo-b2b?date=2024-09-27&month=2024-09)
- [Send Email :email:](mailto:contact@arc53.com?subject=DocsGPT%20support%2Fsolutions)
<a href ="https://cal.com/arc53/docsgpt-demo-b2b">
<img alt="Let's chat" src="https://cal.com/book-with-cal-dark.svg" />
</a>
[Send Email :email:](mailto:contact@arc53.com?subject=DocsGPT%20support%2Fsolutions)
![video-example-of-docs-gpt](https://d3dg1063dc54p9.cloudfront.net/videos/demov3.gif)

View File

@@ -22,22 +22,23 @@ mongo = MongoClient(settings.MONGO_URI)
db = mongo["docsgpt"]
sources_collection = db["sources"]
# Constants
MIN_TOKENS = 150
MAX_TOKENS = 1250
RECURSION_DEPTH = 2
# Define a function to extract metadata from a given filename.
def metadata_from_filename(title):
return {"title": title}
# Define a function to generate a random string of a given length.
def generate_random_string(length):
return "".join([string.ascii_letters[i % 52] for i in range(length)])
current_dir = os.path.dirname(
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
)
def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
"""
Recursively extract zip files with a limit on recursion depth.
@@ -52,9 +53,13 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
logging.warning(f"Reached maximum recursion depth of {max_depth}")
return
with zipfile.ZipFile(zip_path, "r") as zip_ref:
zip_ref.extractall(extract_to)
os.remove(zip_path) # Remove the zip file after extracting
try:
with zipfile.ZipFile(zip_path, "r") as zip_ref:
zip_ref.extractall(extract_to)
os.remove(zip_path) # Remove the zip file after extracting
except Exception as e:
logging.error(f"Error extracting zip file {zip_path}: {e}")
return
# Check for nested zip files and extract them
for root, dirs, files in os.walk(extract_to):
@@ -64,6 +69,38 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
file_path = os.path.join(root, file)
extract_zip_recursive(file_path, root, current_depth + 1, max_depth)
def download_file(url, params, dest_path):
try:
response = requests.get(url, params=params)
response.raise_for_status()
with open(dest_path, "wb") as f:
f.write(response.content)
except requests.RequestException as e:
logging.error(f"Error downloading file: {e}")
raise
def upload_index(full_path, file_data):
try:
if settings.VECTOR_STORE == "faiss":
files = {
"file_faiss": open(full_path + "/index.faiss", "rb"),
"file_pkl": open(full_path + "/index.pkl", "rb"),
}
response = requests.post(
urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
)
else:
response = requests.post(
urljoin(settings.API_URL, "/api/upload_index"), data=file_data
)
response.raise_for_status()
except requests.RequestException as e:
logging.error(f"Error uploading index: {e}")
raise
finally:
if settings.VECTOR_STORE == "faiss":
for file in files.values():
file.close()
# Define the main function for ingesting and processing documents.
def ingest_worker(
@@ -84,39 +121,25 @@ def ingest_worker(
Returns:
dict: Information about the completed ingestion task, including input parameters and a "limited" flag.
"""
# directory = 'inputs' or 'temp'
# formats = [".rst", ".md"]
input_files = None
recursive = True
limit = None
exclude = True
# name_job = 'job1'
# filename = 'install.rst'
# user = 'local'
sample = False
token_check = True
min_tokens = 150
max_tokens = 1250
recursion_depth = 2
full_path = os.path.join(directory, user, name_job)
logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job})
# check if API_URL env variable is set
file_data = {"name": name_job, "file": filename, "user": user}
response = requests.get(
urljoin(settings.API_URL, "/api/download"), params=file_data
)
file = response.content
download_file(urljoin(settings.API_URL, "/api/download"), file_data, os.path.join(full_path, filename))
if not os.path.exists(full_path):
os.makedirs(full_path)
with open(os.path.join(full_path, filename), "wb") as f:
f.write(file)
# check if file is .zip and extract it
if filename.endswith(".zip"):
extract_zip_recursive(
os.path.join(full_path, filename), full_path, 0, recursion_depth
os.path.join(full_path, filename), full_path, 0, RECURSION_DEPTH
)
self.update_state(state="PROGRESS", meta={"current": 1})
@@ -132,8 +155,8 @@ def ingest_worker(
).load_data()
raw_docs = group_split(
documents=raw_docs,
min_tokens=min_tokens,
max_tokens=max_tokens,
min_tokens=MIN_TOKENS,
max_tokens=MAX_TOKENS,
token_check=token_check,
)
@@ -148,28 +171,13 @@ def ingest_worker(
for i in range(min(5, len(raw_docs))):
logging.info(f"Sample document {i}: {raw_docs[i]}")
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
# and send them to the server (provide user and name in form)
file_data = {
"name": name_job,
"user": user,
file_data.update({
"tokens": tokens,
"retriever": retriever,
"id": str(id),
"type": "local",
}
if settings.VECTOR_STORE == "faiss":
files = {
"file_faiss": open(full_path + "/index.faiss", "rb"),
"file_pkl": open(full_path + "/index.pkl", "rb"),
}
response = requests.post(
urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
)
else:
response = requests.post(
urljoin(settings.API_URL, "/api/upload_index"), data=file_data
)
})
upload_index(full_path, file_data)
# delete local
shutil.rmtree(full_path)
@@ -183,7 +191,6 @@ def ingest_worker(
"limited": False,
}
def remote_worker(
self,
source_data,
@@ -197,16 +204,14 @@ def remote_worker(
doc_id=None,
):
token_check = True
min_tokens = 150
max_tokens = 1250
full_path = directory + "/" + user + "/" + name_job
full_path = os.path.join(directory, user, name_job)
if not os.path.exists(full_path):
os.makedirs(full_path)
self.update_state(state="PROGRESS", meta={"current": 1})
logging.info(
f"Remote job: {full_path}",
extra={"user": user, "job": name_job, source_data: source_data},
extra={"user": user, "job": name_job, "source_data": source_data},
)
remote_loader = RemoteCreator.create_loader(loader)
@@ -214,11 +219,10 @@ def remote_worker(
docs = group_split(
documents=raw_docs,
min_tokens=min_tokens,
max_tokens=max_tokens,
min_tokens=MIN_TOKENS,
max_tokens=MAX_TOKENS,
token_check=token_check,
)
# docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
tokens = count_tokens_docs(docs)
if operation_mode == "upload":
id = ObjectId()
@@ -230,7 +234,6 @@ def remote_worker(
call_openai_api(docs, full_path, id, self)
self.update_state(state="PROGRESS", meta={"current": 100})
# Proceed with uploading and cleaning as in the original function
file_data = {
"name": name_job,
"user": user,
@@ -241,23 +244,12 @@ def remote_worker(
"remote_data": source_data,
"sync_frequency": sync_frequency,
}
if settings.VECTOR_STORE == "faiss":
files = {
"file_faiss": open(full_path + "/index.faiss", "rb"),
"file_pkl": open(full_path + "/index.pkl", "rb"),
}
requests.post(
urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
)
else:
requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
upload_index(full_path, file_data)
shutil.rmtree(full_path)
return {"urls": source_data, "name_job": name_job, "user": user, "limited": False}
def sync(
self,
source_data,
@@ -283,10 +275,10 @@ def sync(
doc_id,
)
except Exception as e:
logging.error(f"Error during sync: {e}")
return {"status": "error", "error": str(e)}
return {"status": "success"}
def sync_worker(self, frequency):
sync_counts = Counter()
sources = sources_collection.find()

View File

@@ -24,7 +24,9 @@
"react-redux": "^8.0.5",
"react-router-dom": "^6.8.1",
"react-syntax-highlighter": "^15.5.0",
"remark-gfm": "^4.0.0"
"rehype-katex": "^7.0.1",
"remark-gfm": "^4.0.0",
"remark-math": "^6.0.0"
},
"devDependencies": {
"@types/react": "^18.0.27",
@@ -1636,6 +1638,12 @@
"integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==",
"dev": true
},
"node_modules/@types/katex": {
"version": "0.16.7",
"resolved": "https://registry.npmjs.org/@types/katex/-/katex-0.16.7.tgz",
"integrity": "sha512-HMwFiRujE5PjrgwHQ25+bsLJgowjGjm5Z8FVSf0N6PwgJrwxH0QxzHYDcKsTfV3wva0vzrpqMTJS2jXPr5BMEQ==",
"license": "MIT"
},
"node_modules/@types/mdast": {
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz",
@@ -3104,7 +3112,6 @@
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"dev": true,
"engines": {
"node": ">=0.12"
},
@@ -4582,6 +4589,193 @@
"node": ">= 0.4"
}
},
"node_modules/hast-util-from-dom": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/hast-util-from-dom/-/hast-util-from-dom-5.0.0.tgz",
"integrity": "sha512-d6235voAp/XR3Hh5uy7aGLbM3S4KamdW0WEgOaU1YoewnuYw4HXb5eRtv9g65m/RFGEfUY1Mw4UqCc5Y8L4Stg==",
"license": "ISC",
"dependencies": {
"@types/hast": "^3.0.0",
"hastscript": "^8.0.0",
"web-namespaces": "^2.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/hast-util-from-dom/node_modules/@types/hast": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
"integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==",
"license": "MIT",
"dependencies": {
"@types/unist": "*"
}
},
"node_modules/hast-util-from-dom/node_modules/hast-util-parse-selector": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz",
"integrity": "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==",
"license": "MIT",
"dependencies": {
"@types/hast": "^3.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/hast-util-from-dom/node_modules/hastscript": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/hastscript/-/hastscript-8.0.0.tgz",
"integrity": "sha512-dMOtzCEd3ABUeSIISmrETiKuyydk1w0pa+gE/uormcTpSYuaNJPbX1NU3JLyscSLjwAQM8bWMhhIlnCqnRvDTw==",
"license": "MIT",
"dependencies": {
"@types/hast": "^3.0.0",
"comma-separated-tokens": "^2.0.0",
"hast-util-parse-selector": "^4.0.0",
"property-information": "^6.0.0",
"space-separated-tokens": "^2.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/hast-util-from-html": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/hast-util-from-html/-/hast-util-from-html-2.0.3.tgz",
"integrity": "sha512-CUSRHXyKjzHov8yKsQjGOElXy/3EKpyX56ELnkHH34vDVw1N1XSQ1ZcAvTyAPtGqLTuKP/uxM+aLkSPqF/EtMw==",
"license": "MIT",
"dependencies": {
"@types/hast": "^3.0.0",
"devlop": "^1.1.0",
"hast-util-from-parse5": "^8.0.0",
"parse5": "^7.0.0",
"vfile": "^6.0.0",
"vfile-message": "^4.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/hast-util-from-html-isomorphic": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/hast-util-from-html-isomorphic/-/hast-util-from-html-isomorphic-2.0.0.tgz",
"integrity": "sha512-zJfpXq44yff2hmE0XmwEOzdWin5xwH+QIhMLOScpX91e/NSGPsAzNCvLQDIEPyO2TXi+lBmU6hjLIhV8MwP2kw==",
"license": "MIT",
"dependencies": {
"@types/hast": "^3.0.0",
"hast-util-from-dom": "^5.0.0",
"hast-util-from-html": "^2.0.0",
"unist-util-remove-position": "^5.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/hast-util-from-html-isomorphic/node_modules/@types/hast": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
"integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==",
"license": "MIT",
"dependencies": {
"@types/unist": "*"
}
},
"node_modules/hast-util-from-html/node_modules/@types/hast": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
"integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==",
"license": "MIT",
"dependencies": {
"@types/unist": "*"
}
},
"node_modules/hast-util-from-parse5": {
"version": "8.0.1",
"resolved": "https://registry.npmjs.org/hast-util-from-parse5/-/hast-util-from-parse5-8.0.1.tgz",
"integrity": "sha512-Er/Iixbc7IEa7r/XLtuG52zoqn/b3Xng/w6aZQ0xGVxzhw5xUFxcRqdPzP6yFi/4HBYRaifaI5fQ1RH8n0ZeOQ==",
"license": "MIT",
"dependencies": {
"@types/hast": "^3.0.0",
"@types/unist": "^3.0.0",
"devlop": "^1.0.0",
"hastscript": "^8.0.0",
"property-information": "^6.0.0",
"vfile": "^6.0.0",
"vfile-location": "^5.0.0",
"web-namespaces": "^2.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/hast-util-from-parse5/node_modules/@types/hast": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
"integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==",
"license": "MIT",
"dependencies": {
"@types/unist": "*"
}
},
"node_modules/hast-util-from-parse5/node_modules/hast-util-parse-selector": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz",
"integrity": "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==",
"license": "MIT",
"dependencies": {
"@types/hast": "^3.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/hast-util-from-parse5/node_modules/hastscript": {
"version": "8.0.0",
"resolved": "https://registry.npmjs.org/hastscript/-/hastscript-8.0.0.tgz",
"integrity": "sha512-dMOtzCEd3ABUeSIISmrETiKuyydk1w0pa+gE/uormcTpSYuaNJPbX1NU3JLyscSLjwAQM8bWMhhIlnCqnRvDTw==",
"license": "MIT",
"dependencies": {
"@types/hast": "^3.0.0",
"comma-separated-tokens": "^2.0.0",
"hast-util-parse-selector": "^4.0.0",
"property-information": "^6.0.0",
"space-separated-tokens": "^2.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/hast-util-is-element": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/hast-util-is-element/-/hast-util-is-element-3.0.0.tgz",
"integrity": "sha512-Val9mnv2IWpLbNPqc/pUem+a7Ipj2aHacCwgNfTiK0vJKl0LF+4Ba4+v1oPHFpf3bLYmreq0/l3Gud9S5OH42g==",
"license": "MIT",
"dependencies": {
"@types/hast": "^3.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/hast-util-is-element/node_modules/@types/hast": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
"integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==",
"license": "MIT",
"dependencies": {
"@types/unist": "*"
}
},
"node_modules/hast-util-parse-selector": {
"version": "2.2.5",
"resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-2.2.5.tgz",
@@ -4625,6 +4819,31 @@
"@types/unist": "*"
}
},
"node_modules/hast-util-to-text": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/hast-util-to-text/-/hast-util-to-text-4.0.2.tgz",
"integrity": "sha512-KK6y/BN8lbaq654j7JgBydev7wuNMcID54lkRav1P0CaE1e47P72AWWPiGKXTJU271ooYzcvTAn/Zt0REnvc7A==",
"license": "MIT",
"dependencies": {
"@types/hast": "^3.0.0",
"@types/unist": "^3.0.0",
"hast-util-is-element": "^3.0.0",
"unist-util-find-after": "^5.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/hast-util-to-text/node_modules/@types/hast": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
"integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==",
"license": "MIT",
"dependencies": {
"@types/unist": "*"
}
},
"node_modules/hast-util-whitespace": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz",
@@ -5421,6 +5640,31 @@
"node": ">=4.0"
}
},
"node_modules/katex": {
"version": "0.16.11",
"resolved": "https://registry.npmjs.org/katex/-/katex-0.16.11.tgz",
"integrity": "sha512-RQrI8rlHY92OLf3rho/Ts8i/XvjgguEjOkO1BEXcU3N8BqPpSzBNwV/G0Ukr+P/l3ivvJUE/Fa/CwbS6HesGNQ==",
"funding": [
"https://opencollective.com/katex",
"https://github.com/sponsors/katex"
],
"license": "MIT",
"dependencies": {
"commander": "^8.3.0"
},
"bin": {
"katex": "cli.js"
}
},
"node_modules/katex/node_modules/commander": {
"version": "8.3.0",
"resolved": "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz",
"integrity": "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==",
"license": "MIT",
"engines": {
"node": ">= 12"
}
},
"node_modules/levn": {
"version": "0.4.1",
"resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
@@ -5837,6 +6081,34 @@
"url": "https://opencollective.com/unified"
}
},
"node_modules/mdast-util-math": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/mdast-util-math/-/mdast-util-math-3.0.0.tgz",
"integrity": "sha512-Tl9GBNeG/AhJnQM221bJR2HPvLOSnLE/T9cJI9tlc6zwQk2nPk/4f0cHkOdEixQPC/j8UtKDdITswvLAy1OZ1w==",
"license": "MIT",
"dependencies": {
"@types/hast": "^3.0.0",
"@types/mdast": "^4.0.0",
"devlop": "^1.0.0",
"longest-streak": "^3.0.0",
"mdast-util-from-markdown": "^2.0.0",
"mdast-util-to-markdown": "^2.1.0",
"unist-util-remove-position": "^5.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/mdast-util-math/node_modules/@types/hast": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
"integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==",
"license": "MIT",
"dependencies": {
"@types/unist": "*"
}
},
"node_modules/mdast-util-mdx-expression": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/mdast-util-mdx-expression/-/mdast-util-mdx-expression-2.0.0.tgz",
@@ -6269,6 +6541,25 @@
"url": "https://opencollective.com/unified"
}
},
"node_modules/micromark-extension-math": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/micromark-extension-math/-/micromark-extension-math-3.1.0.tgz",
"integrity": "sha512-lvEqd+fHjATVs+2v/8kg9i5Q0AP2k85H0WUOwpIVvUML8BapsMvh1XAogmQjOCsLpoKRCVQqEkQBB3NhVBcsOg==",
"license": "MIT",
"dependencies": {
"@types/katex": "^0.16.0",
"devlop": "^1.0.0",
"katex": "^0.16.0",
"micromark-factory-space": "^2.0.0",
"micromark-util-character": "^2.0.0",
"micromark-util-symbol": "^2.0.0",
"micromark-util-types": "^2.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/micromark-factory-destination": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.0.tgz",
@@ -7055,6 +7346,18 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/parse5": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
"license": "MIT",
"dependencies": {
"entities": "^4.4.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/path-exists": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
@@ -7831,6 +8134,34 @@
"url": "https://github.com/sponsors/mysticatea"
}
},
"node_modules/rehype-katex": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/rehype-katex/-/rehype-katex-7.0.1.tgz",
"integrity": "sha512-OiM2wrZ/wuhKkigASodFoo8wimG3H12LWQaH8qSPVJn9apWKFSH3YOCtbKpBorTVw/eI7cuT21XBbvwEswbIOA==",
"license": "MIT",
"dependencies": {
"@types/hast": "^3.0.0",
"@types/katex": "^0.16.0",
"hast-util-from-html-isomorphic": "^2.0.0",
"hast-util-to-text": "^4.0.0",
"katex": "^0.16.0",
"unist-util-visit-parents": "^6.0.0",
"vfile": "^6.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/rehype-katex/node_modules/@types/hast": {
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
"integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==",
"license": "MIT",
"dependencies": {
"@types/unist": "*"
}
},
"node_modules/remark-gfm": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/remark-gfm/-/remark-gfm-4.0.0.tgz",
@@ -7848,6 +8179,22 @@
"url": "https://opencollective.com/unified"
}
},
"node_modules/remark-math": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/remark-math/-/remark-math-6.0.0.tgz",
"integrity": "sha512-MMqgnP74Igy+S3WwnhQ7kqGlEerTETXMvJhrUzDikVZ2/uogJCb+WHUg97hK9/jcfc0dkD73s3LN8zU49cTEtA==",
"license": "MIT",
"dependencies": {
"@types/mdast": "^4.0.0",
"mdast-util-math": "^3.0.0",
"micromark-extension-math": "^3.0.0",
"unified": "^11.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/remark-parse": {
"version": "11.0.0",
"resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz",
@@ -8962,6 +9309,20 @@
"url": "https://opencollective.com/unified"
}
},
"node_modules/unist-util-find-after": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/unist-util-find-after/-/unist-util-find-after-5.0.0.tgz",
"integrity": "sha512-amQa0Ep2m6hE2g72AugUItjbuM8X8cGQnFoHk0pGfrFeT9GZhzN5SW8nRsiGKK7Aif4CrACPENkA6P/Lw6fHGQ==",
"license": "MIT",
"dependencies": {
"@types/unist": "^3.0.0",
"unist-util-is": "^6.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/unist-util-is": {
"version": "6.0.0",
"resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.0.tgz",
@@ -9105,6 +9466,20 @@
"url": "https://opencollective.com/unified"
}
},
"node_modules/vfile-location": {
"version": "5.0.3",
"resolved": "https://registry.npmjs.org/vfile-location/-/vfile-location-5.0.3.tgz",
"integrity": "sha512-5yXvWDEgqeiYiBe1lbxYF7UMAIm/IcopxMHrMQDq3nvKcjPKIhZklUKL+AE7J7uApI4kwe2snsK+eI6UTj9EHg==",
"license": "MIT",
"dependencies": {
"@types/unist": "^3.0.0",
"vfile": "^6.0.0"
},
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
}
},
"node_modules/vfile-message": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.2.tgz",
@@ -9200,6 +9575,16 @@
"node": ">=0.10.0"
}
},
"node_modules/web-namespaces": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz",
"integrity": "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==",
"license": "MIT",
"funding": {
"type": "github",
"url": "https://github.com/sponsors/wooorm"
}
},
"node_modules/which": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",

View File

@@ -35,7 +35,9 @@
"react-redux": "^8.0.5",
"react-router-dom": "^6.8.1",
"react-syntax-highlighter": "^15.5.0",
"remark-gfm": "^4.0.0"
"rehype-katex": "^7.0.1",
"remark-gfm": "^4.0.0",
"remark-math": "^6.0.0"
},
"devDependencies": {
"@types/react": "^18.0.27",

View File

@@ -4,6 +4,9 @@ import { useSelector } from 'react-redux';
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
import { vscDarkPlus } from 'react-syntax-highlighter/dist/cjs/styles/prism';
import remarkGfm from 'remark-gfm';
import remarkMath from 'remark-math';
import rehypeKatex from 'rehype-katex';
import 'katex/dist/katex.min.css';
import Alert from '../assets/alert.svg';
import DocsGPT3 from '../assets/cute_docsgpt3.svg';
@@ -62,6 +65,21 @@ const ConversationBubble = forwardRef<
</div>
);
} else {
const preprocessLaTeX = (content: string) => {
// Replace block-level LaTeX delimiters \[ \] with $$ $$
const blockProcessedContent = content.replace(
/\\\[(.*?)\\\]/gs,
(_, equation) => `$$${equation}$$`,
);
// Replace inline LaTeX delimiters \( \) with $ $
const inlineProcessedContent = blockProcessedContent.replace(
/\\\((.*?)\\\)/gs,
(_, equation) => `$${equation}$`,
);
return inlineProcessedContent;
};
bubble = (
<div
ref={ref}
@@ -225,7 +243,8 @@ const ConversationBubble = forwardRef<
)}
<ReactMarkdown
className="whitespace-pre-wrap break-normal leading-normal"
remarkPlugins={[remarkGfm]}
remarkPlugins={[remarkGfm, remarkMath]}
rehypePlugins={[rehypeKatex]}
components={{
code(props) {
const { children, className, node, ref, ...rest } = props;
@@ -308,7 +327,7 @@ const ConversationBubble = forwardRef<
},
}}
>
{message}
{preprocessLaTeX(message)}
</ReactMarkdown>
</div>
</div>

View File

@@ -54,14 +54,16 @@
"name": "Document Name",
"date": "Vector Date",
"type": "Type",
"tokenUsage": "Token Usage"
"tokenUsage": "Token Usage",
"noData": "No existing Documents"
},
"apiKeys": {
"label": "Chatbots",
"name": "Name",
"key": "API Key",
"sourceDoc": "Source Document",
"createNew": "Create New"
"createNew": "Create New",
"noData": "No existing Chatbots"
},
"analytics": {
"label": "Analytics"

View File

@@ -54,14 +54,16 @@
"name": "Nombre del Documento",
"date": "Fecha Vector",
"type": "Tipo",
"tokenUsage": "Uso de Tokens"
"tokenUsage": "Uso de Tokens",
"noData": "No hay documentos existentes"
},
"apiKeys": {
"label": "Chatbots",
"name": "Nombre",
"key": "Clave de API",
"sourceDoc": "Documento Fuente",
"createNew": "Crear Nuevo"
"createNew": "Crear Nuevo",
"noData": "No hay chatbots existentes"
},
"analytics": {
"label": "Analítica"

View File

@@ -54,14 +54,16 @@
"name": "ドキュメント名",
"date": "ベクトル日付",
"type": "タイプ",
"tokenUsage": "トークン使用量"
"tokenUsage": "トークン使用量",
"noData": "既存のドキュメントはありません"
},
"apiKeys": {
"label": "チャットボット",
"name": "名前",
"key": "APIキー",
"sourceDoc": "ソースドキュメント",
"createNew": "新規作成"
"createNew": "新規作成",
"noData": "既存のチャットボットはありません"
},
"analytics": {
"label": "分析"

View File

@@ -54,14 +54,16 @@
"name": "文件名称",
"date": "向量日期",
"type": "类型",
"tokenUsage": "令牌使用"
"tokenUsage": "令牌使用",
"noData": "没有现有的文档"
},
"apiKeys": {
"label": "聊天机器人",
"name": "名称",
"key": "API 密钥",
"sourceDoc": "源文档",
"createNew": "创建新的"
"createNew": "创建新的",
"noData": "没有现有的聊天机器人"
},
"analytics": {
"label": "分析"

View File

@@ -116,6 +116,13 @@ export default function APIKeys() {
</tr>
</thead>
<tbody>
{!apiKeys?.length && (
<tr>
<td colSpan={4} className="border-t p-4">
{t('settings.apiKeys.noData')}
</td>
</tr>
)}
{apiKeys?.map((element, index) => (
<tr key={index}>
<td className="border-r border-t p-4">{element.name}</td>

View File

@@ -74,6 +74,13 @@ const Documents: React.FC<DocumentsProps> = ({
</tr>
</thead>
<tbody>
{!documents?.length && (
<tr>
<td colSpan={5} className="border-t p-4">
{t('settings.documents.noData')}
</td>
</tr>
)}
{documents &&
documents.map((document, index) => (
<tr key={index}>