diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index a00cd334..dd0799c6 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -13,3 +13,7 @@ updates:
directory: "/frontend" # Location of package manifests
schedule:
interval: "weekly"
+ - package-ecosystem: "github-actions"
+ directory: "/"
+ schedule:
+ interval: "weekly"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 2ea8961f..be0263ff 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -12,22 +12,22 @@ jobs:
contents: read
packages: write
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v1
+ uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Login to ghcr.io
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
diff --git a/.github/workflows/cife.yml b/.github/workflows/cife.yml
index 73a97755..4b1cbf3b 100644
--- a/.github/workflows/cife.yml
+++ b/.github/workflows/cife.yml
@@ -12,22 +12,22 @@ jobs:
contents: read
packages: write
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v1
+ uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Login to ghcr.io
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
diff --git a/.github/workflows/docker-develop-build.yml b/.github/workflows/docker-develop-build.yml
index 5edc69d7..0bfc7e70 100644
--- a/.github/workflows/docker-develop-build.yml
+++ b/.github/workflows/docker-develop-build.yml
@@ -14,22 +14,22 @@ jobs:
contents: read
packages: write
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v1
+ uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Login to ghcr.io
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
diff --git a/.github/workflows/docker-develop-fe-build.yml b/.github/workflows/docker-develop-fe-build.yml
index 29ad4524..14dbccc5 100644
--- a/.github/workflows/docker-develop-fe-build.yml
+++ b/.github/workflows/docker-develop-fe-build.yml
@@ -14,22 +14,22 @@ jobs:
contents: read
packages: write
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
- uses: docker/setup-buildx-action@v1
+ uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Login to ghcr.io
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 7ee31ebe..a36f529b 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -11,7 +11,7 @@ jobs:
ruff:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Lint with Ruff
uses: chartboost/ruff-action@v1
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index c6615e56..b858a0f7 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -8,9 +8,9 @@ jobs:
matrix:
python-version: ["3.11"]
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
- uses: actions/setup-python@v4
+ uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
@@ -24,7 +24,7 @@ jobs:
python -m pytest --cov=application --cov-report=xml
- name: Upload coverage reports to Codecov
if: github.event_name == 'pull_request' && matrix.python-version == '3.11'
- uses: codecov/codecov-action@v3
+ uses: codecov/codecov-action@v4
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.github/workflows/sync_fork.yaml b/.github/workflows/sync_fork.yaml
index 81f222bb..a108daf6 100644
--- a/.github/workflows/sync_fork.yaml
+++ b/.github/workflows/sync_fork.yaml
@@ -17,7 +17,7 @@ jobs:
steps:
# Step 1: run a standard checkout action
- name: Checkout target repo
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
# Step 2: run the sync action
- name: Sync upstream changes
diff --git a/README.md b/README.md
index ee9a1af6..8f5897fa 100644
--- a/README.md
+++ b/README.md
@@ -203,4 +203,9 @@ We as members, contributors, and leaders, pledge to make participation in our co
The source code license is [MIT](https://opensource.org/license/mit/), as described in the [LICENSE](LICENSE) file.
-Built with [:bird: :link: LangChain](https://github.com/hwchase17/langchain)
+
This project is supported by:
+
+
+
+
+
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 2ead8ef1..3f1a7218 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -340,6 +340,8 @@ class UploadFile(Resource):
".epub",
".html",
".mdx",
+ ".json",
+ ".xlsx",
],
job_name,
final_filename,
diff --git a/application/parser/file/bulk.py b/application/parser/file/bulk.py
index 79fc2c45..bb63aa61 100644
--- a/application/parser/file/bulk.py
+++ b/application/parser/file/bulk.py
@@ -11,6 +11,7 @@ from application.parser.file.html_parser import HTMLParser
from application.parser.file.markdown_parser import MarkdownParser
from application.parser.file.rst_parser import RstParser
from application.parser.file.tabular_parser import PandasCSVParser,ExcelParser
+from application.parser.file.json_parser import JSONParser
from application.parser.schema.base import Document
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
@@ -23,6 +24,7 @@ DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
".rst": RstParser(),
".html": HTMLParser(),
".mdx": MarkdownParser(),
+ ".json":JSONParser(),
}
diff --git a/application/parser/file/json_parser.py b/application/parser/file/json_parser.py
new file mode 100644
index 00000000..0201b420
--- /dev/null
+++ b/application/parser/file/json_parser.py
@@ -0,0 +1,57 @@
+import json
+from typing import Any, Dict, List, Union
+from pathlib import Path
+
+from application.parser.file.base_parser import BaseParser
+
+class JSONParser(BaseParser):
+ r"""JSON (.json) parser.
+
+ Parses JSON files into a list of strings or a concatenated document.
+ It handles both JSON objects (dictionaries) and arrays (lists).
+
+ Args:
+ concat_rows (bool): Whether to concatenate all rows into one document.
+ If set to False, a Document will be created for each item in the JSON.
+ True by default.
+
+ row_joiner (str): Separator to use for joining each row.
+ Only used when `concat_rows=True`.
+ Set to "\n" by default.
+
+ json_config (dict): Options for parsing JSON. Can be used to specify options like
+ custom decoding or formatting. Set to empty dict by default.
+
+ """
+
+ def __init__(
+ self,
+ *args: Any,
+ concat_rows: bool = True,
+ row_joiner: str = "\n",
+ json_config: dict = {},
+ **kwargs: Any
+ ) -> None:
+ """Init params."""
+ super().__init__(*args, **kwargs)
+ self._concat_rows = concat_rows
+ self._row_joiner = row_joiner
+ self._json_config = json_config
+
+ def _init_parser(self) -> Dict:
+ """Init parser."""
+ return {}
+
+ def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
+ """Parse JSON file."""
+
+ with open(file, 'r', encoding='utf-8') as f:
+ data = json.load(f, **self._json_config)
+
+ if isinstance(data, dict):
+ data = [data]
+
+ if self._concat_rows:
+ return self._row_joiner.join([str(item) for item in data])
+ else:
+ return data
diff --git a/application/parser/remote/github_loader.py b/application/parser/remote/github_loader.py
index 49f0ae9c..8f805056 100644
--- a/application/parser/remote/github_loader.py
+++ b/application/parser/remote/github_loader.py
@@ -3,6 +3,7 @@ import requests
from typing import List
from application.parser.remote.base import BaseRemote
from langchain_core.documents import Document
+import mimetypes
class GitHubLoader(BaseRemote):
def __init__(self):
@@ -18,13 +19,17 @@ class GitHubLoader(BaseRemote):
if response.status_code == 200:
content = response.json()
+ mime_type, _ = mimetypes.guess_type(file_path) # Guess the MIME type based on the file extension
+
if content.get("encoding") == "base64":
- try:
- decoded_content = base64.b64decode(content["content"]).decode("utf-8")
- return f"Filename: {file_path}\n\n{decoded_content}"
- except Exception as e:
- print(f"Error decoding content for {file_path}: {e}")
- raise
+ if mime_type and mime_type.startswith("text"): # Handle only text files
+ try:
+ decoded_content = base64.b64decode(content["content"]).decode("utf-8")
+ return f"Filename: {file_path}\n\n{decoded_content}"
+ except Exception as e:
+ raise e
+ else:
+ return f"Filename: {file_path} is a binary file and was skipped."
else:
return f"Filename: {file_path}\n\n{content['content']}"
else:
diff --git a/docs/package-lock.json b/docs/package-lock.json
index 24295c1c..78206570 100644
--- a/docs/package-lock.json
+++ b/docs/package-lock.json
@@ -1170,6 +1170,58 @@
"node": ">=8"
}
},
+ "node_modules/@parcel/core": {
+ "version": "2.12.0",
+ "resolved": "https://registry.npmjs.org/@parcel/core/-/core-2.12.0.tgz",
+ "integrity": "sha512-s+6pwEj+GfKf7vqGUzN9iSEPueUssCCQrCBUlcAfKrJe0a22hTUCjewpB0I7lNrCIULt8dkndD+sMdOrXsRl6Q==",
+ "peer": true,
+ "dependencies": {
+ "@mischnic/json-sourcemap": "^0.1.0",
+ "@parcel/cache": "2.12.0",
+ "@parcel/diagnostic": "2.12.0",
+ "@parcel/events": "2.12.0",
+ "@parcel/fs": "2.12.0",
+ "@parcel/graph": "3.2.0",
+ "@parcel/logger": "2.12.0",
+ "@parcel/package-manager": "2.12.0",
+ "@parcel/plugin": "2.12.0",
+ "@parcel/profiler": "2.12.0",
+ "@parcel/rust": "2.12.0",
+ "@parcel/source-map": "^2.1.1",
+ "@parcel/types": "2.12.0",
+ "@parcel/utils": "2.12.0",
+ "@parcel/workers": "2.12.0",
+ "abortcontroller-polyfill": "^1.1.9",
+ "base-x": "^3.0.8",
+ "browserslist": "^4.6.6",
+ "clone": "^2.1.1",
+ "dotenv": "^7.0.0",
+ "dotenv-expand": "^5.1.0",
+ "json5": "^2.2.0",
+ "msgpackr": "^1.9.9",
+ "nullthrows": "^1.1.1",
+ "semver": "^7.5.2"
+ },
+ "engines": {
+ "node": ">= 12.0.0"
+ },
+ "funding": {
+ "type": "opencollective",
+ "url": "https://opencollective.com/parcel"
+ }
+ },
+ "node_modules/@parcel/core/node_modules/semver": {
+ "version": "7.6.3",
+ "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz",
+ "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==",
+ "peer": true,
+ "bin": {
+ "semver": "bin/semver.js"
+ },
+ "engines": {
+ "node": ">=10"
+ }
+ },
"node_modules/@parcel/diagnostic": {
"version": "2.12.0",
"resolved": "https://registry.npmjs.org/@parcel/diagnostic/-/diagnostic-2.12.0.tgz",
@@ -1220,6 +1272,22 @@
"@parcel/core": "^2.12.0"
}
},
+ "node_modules/@parcel/graph": {
+ "version": "3.2.0",
+ "resolved": "https://registry.npmjs.org/@parcel/graph/-/graph-3.2.0.tgz",
+ "integrity": "sha512-xlrmCPqy58D4Fg5umV7bpwDx5Vyt7MlnQPxW68vae5+BA4GSWetfZt+Cs5dtotMG2oCHzZxhIPt7YZ7NRyQzLA==",
+ "peer": true,
+ "dependencies": {
+ "nullthrows": "^1.1.1"
+ },
+ "engines": {
+ "node": ">= 12.0.0"
+ },
+ "funding": {
+ "type": "opencollective",
+ "url": "https://opencollective.com/parcel"
+ }
+ },
"node_modules/@parcel/logger": {
"version": "2.12.0",
"resolved": "https://registry.npmjs.org/@parcel/logger/-/logger-2.12.0.tgz",
@@ -2644,6 +2712,12 @@
"server-only": "^0.0.1"
}
},
+ "node_modules/abortcontroller-polyfill": {
+ "version": "1.7.5",
+ "resolved": "https://registry.npmjs.org/abortcontroller-polyfill/-/abortcontroller-polyfill-1.7.5.tgz",
+ "integrity": "sha512-JMJ5soJWP18htbbxJjG7bG6yuI6pRhgJ0scHHTfkUjf6wjP912xZWvM+A4sJK3gqd9E8fcPbDnOefbA9Th/FIQ==",
+ "peer": true
+ },
"node_modules/acorn": {
"version": "8.11.3",
"resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.3.tgz",
@@ -2723,6 +2797,15 @@
"url": "https://github.com/sponsors/wooorm"
}
},
+ "node_modules/base-x": {
+ "version": "3.0.10",
+ "resolved": "https://registry.npmjs.org/base-x/-/base-x-3.0.10.tgz",
+ "integrity": "sha512-7d0s06rR9rYaIWHkpfLIFICM/tkSVdoPC9qYAQRpxn9DdKNWNsKC0uk++akckyLq16Tx2WIinnZ6WRriAt6njQ==",
+ "peer": true,
+ "dependencies": {
+ "safe-buffer": "^5.0.1"
+ }
+ },
"node_modules/boolbase": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz",
@@ -2937,6 +3020,15 @@
"node": ">=4"
}
},
+ "node_modules/clone": {
+ "version": "2.1.2",
+ "resolved": "https://registry.npmjs.org/clone/-/clone-2.1.2.tgz",
+ "integrity": "sha512-3Pe/CF1Nn94hyhIYpjtiLhdCoEoz0DqQ+988E9gmeEdQZlojxnOb74wctFyuwWQHzqyf9X7C7MG8juUpqBJT8w==",
+ "peer": true,
+ "engines": {
+ "node": ">=0.8"
+ }
+ },
"node_modules/clsx": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.0.tgz",
@@ -3065,30 +3157,6 @@
"cytoscape": "^3.2.0"
}
},
- "node_modules/cytoscape-fcose": {
- "version": "2.2.0",
- "resolved": "https://registry.npmjs.org/cytoscape-fcose/-/cytoscape-fcose-2.2.0.tgz",
- "integrity": "sha512-ki1/VuRIHFCzxWNrsshHYPs6L7TvLu3DL+TyIGEsRcvVERmxokbf5Gdk7mFxZnTdiGtnA4cfSmjZJMviqSuZrQ==",
- "dependencies": {
- "cose-base": "^2.2.0"
- },
- "peerDependencies": {
- "cytoscape": "^3.2.0"
- }
- },
- "node_modules/cytoscape-fcose/node_modules/cose-base": {
- "version": "2.2.0",
- "resolved": "https://registry.npmjs.org/cose-base/-/cose-base-2.2.0.tgz",
- "integrity": "sha512-AzlgcsCbUMymkADOJtQm3wO9S3ltPfYOFD5033keQn9NJzIbtnZj+UdBJe7DYml/8TdbtHJW3j58SOnKhWY/5g==",
- "dependencies": {
- "layout-base": "^2.0.0"
- }
- },
- "node_modules/cytoscape-fcose/node_modules/layout-base": {
- "version": "2.0.1",
- "resolved": "https://registry.npmjs.org/layout-base/-/layout-base-2.0.1.tgz",
- "integrity": "sha512-dp3s92+uNI1hWIpPGH3jK2kxE2lMjdXdr+DH8ynZHpd6PUlH6x6cbuXnoMmiNumznqaNO31xu9e79F0uuZ0JFg=="
- },
"node_modules/d3": {
"version": "7.8.5",
"resolved": "https://registry.npmjs.org/d3/-/d3-7.8.5.tgz",
@@ -3689,15 +3757,30 @@
"url": "https://github.com/fb55/domutils?sponsor=1"
}
},
+ "node_modules/dotenv": {
+ "version": "7.0.0",
+ "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-7.0.0.tgz",
+ "integrity": "sha512-M3NhsLbV1i6HuGzBUH8vXrtxOk+tWmzWKDMbAVSUp3Zsjm7ywFeuwrUXhmhQyRK1q5B5GGy7hcXPbj3bnfZg2g==",
+ "peer": true,
+ "engines": {
+ "node": ">=6"
+ }
+ },
+ "node_modules/dotenv-expand": {
+ "version": "5.1.0",
+ "resolved": "https://registry.npmjs.org/dotenv-expand/-/dotenv-expand-5.1.0.tgz",
+ "integrity": "sha512-YXQl1DSa4/PQyRfgrv6aoNjhasp/p4qs9FjJ4q4cQk+8m4r6k4ZSiEyytKG8f8W9gi8WsQtIObNmKd+tMzNTmA==",
+ "peer": true
+ },
"node_modules/electron-to-chromium": {
"version": "1.4.693",
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.693.tgz",
"integrity": "sha512-/if4Ueg0GUQlhCrW2ZlXwDAm40ipuKo+OgeHInlL8sbjt+hzISxZK949fZeJaVsheamrzANXvw1zQTvbxTvSHw=="
},
"node_modules/elkjs": {
- "version": "0.8.2",
- "resolved": "https://registry.npmjs.org/elkjs/-/elkjs-0.8.2.tgz",
- "integrity": "sha512-L6uRgvZTH+4OF5NE/MBbzQx/WYpru1xCBE9respNj6qznEewGUIfhzmm7horWWxbNO2M0WckQypGctR8lH79xQ=="
+ "version": "0.9.3",
+ "resolved": "https://registry.npmjs.org/elkjs/-/elkjs-0.9.3.tgz",
+ "integrity": "sha512-f/ZeWvW/BCXbhGEf1Ujp29EASo/lk1FDnETgNKwJrsVvGZhUWCZyg3xLJjAsxfOmt8KjswHmI5EwCQcPMpOYhQ=="
},
"node_modules/entities": {
"version": "4.5.0",
@@ -5414,22 +5497,22 @@
"license": "MIT"
},
"node_modules/mermaid": {
- "version": "10.6.1",
- "resolved": "https://registry.npmjs.org/mermaid/-/mermaid-10.6.1.tgz",
- "integrity": "sha512-Hky0/RpOw/1il9X8AvzOEChfJtVvmXm+y7JML5C//ePYMy0/9jCEmW1E1g86x9oDfW9+iVEdTV/i+M6KWRNs4A==",
+ "version": "10.9.3",
+ "resolved": "https://registry.npmjs.org/mermaid/-/mermaid-10.9.3.tgz",
+ "integrity": "sha512-V80X1isSEvAewIL3xhmz/rVmc27CVljcsbWxkxlWJWY/1kQa4XOABqpDl2qQLGKzpKm6WbTfUEKImBlUfFYArw==",
"dependencies": {
"@braintree/sanitize-url": "^6.0.1",
"@types/d3-scale": "^4.0.3",
"@types/d3-scale-chromatic": "^3.0.0",
- "cytoscape": "^3.23.0",
+ "cytoscape": "^3.28.1",
"cytoscape-cose-bilkent": "^4.1.0",
- "cytoscape-fcose": "^2.1.0",
"d3": "^7.4.0",
"d3-sankey": "^0.12.3",
"dagre-d3-es": "7.0.10",
"dayjs": "^1.11.7",
- "dompurify": "^3.0.5",
- "elkjs": "^0.8.2",
+ "dompurify": "^3.0.5 <3.1.7",
+ "elkjs": "^0.9.0",
+ "katex": "^0.16.9",
"khroma": "^2.0.0",
"lodash-es": "^4.17.21",
"mdast-util-from-markdown": "^1.3.0",
@@ -9622,6 +9705,26 @@
"node": ">=6"
}
},
+ "node_modules/safe-buffer": {
+ "version": "5.2.1",
+ "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
+ "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==",
+ "funding": [
+ {
+ "type": "github",
+ "url": "https://github.com/sponsors/feross"
+ },
+ {
+ "type": "patreon",
+ "url": "https://www.patreon.com/feross"
+ },
+ {
+ "type": "consulting",
+ "url": "https://feross.org/support"
+ }
+ ],
+ "peer": true
+ },
"node_modules/safer-buffer": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
@@ -9966,6 +10069,19 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
+ "node_modules/typescript": {
+ "version": "5.6.3",
+ "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.3.tgz",
+ "integrity": "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==",
+ "peer": true,
+ "bin": {
+ "tsc": "bin/tsc",
+ "tsserver": "bin/tsserver"
+ },
+ "engines": {
+ "node": ">=14.17"
+ }
+ },
"node_modules/uc.micro": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz",
diff --git a/extensions/slack-bot/.gitignore b/extensions/slack-bot/.gitignore
new file mode 100644
index 00000000..1d8e58b2
--- /dev/null
+++ b/extensions/slack-bot/.gitignore
@@ -0,0 +1,3 @@
+.env
+.venv/
+get-pip.py
\ No newline at end of file
diff --git a/extensions/slack-bot/Readme.md b/extensions/slack-bot/Readme.md
new file mode 100644
index 00000000..704184a2
--- /dev/null
+++ b/extensions/slack-bot/Readme.md
@@ -0,0 +1,84 @@
+
+# Slack Bot Configuration Guide
+
+> **Note:** The following guidelines must be followed on the [Slack API website](https://api.slack.com/) for setting up your Slack app and generating the necessary tokens.
+
+## Step-by-Step Instructions
+
+### 1. Navigate to Your Apps
+- Go to the Slack API page for apps and select **Create an App** from the “From Scratch” option.
+
+### 2. App Creation
+- Name your app and choose the workspace where you wish to add the assistant.
+
+### 3. Enabling Socket Mode
+- Navigate to **Settings > Socket Mode** and enable **Socket Mode**.
+- This action will generate an App-level token. Select the `connections:write` scope and copy the App-level token for future use.
+
+### 4. Socket Naming
+- Assign a name to your socket as per your preference.
+
+### 5. Basic Information Setup
+- Go to **Basic Information** (under **Settings**) and configure the following:
+ - Assistant name
+ - App icon
+ - Background color
+
+### 6. Bot Token and Permissions
+- In the **OAuth & Permissions** option found under the **Features** section, retrieve the Bot Token. Save it for future usage.
+- You will also need to add specific bot token scopes:
+ - `app_mentions:read`
+ - `assistant:write`
+ - `chat:write`
+ - `chat:write.public`
+ - `im:history`
+
+### 7. Enable Events
+- From **Event Subscriptions**, enable events and add the following Bot User events:
+ - `app_mention`
+ - `assistant_thread_context_changed`
+ - `assistant_thread_started`
+ - `message.im`
+
+### 8. Agent/Assistant Toggle
+- In the **Features > Agent & Assistants** section, toggle on the Agent or Assistant option.
+- In the **Suggested Prompts** setting, leave it as `dynamic` (this is the default setting).
+
+---
+
+## Code-Side Configuration Guide
+
+This section focuses on generating and setting up the necessary tokens in the `.env` file, using the `.env-example` as a template.
+
+### Step 1: Generating Required Keys
+
+1. **SLACK_APP_TOKEN**
+ - Navigate to **Settings > Socket Mode** in the Slack API and enable **Socket Mode**.
+ - Copy the App-level token generated (usually starts with `xapp-`).
+
+2. **SLACK_BOT_TOKEN**
+ - Go to **OAuth & Permissions** (under the **Features** section in Slack API).
+ - Retrieve the **Bot Token** (starts with `xoxb-`).
+
+3. **DOCSGPT_API_KEY**
+ - Go to the **DocsGPT website**.
+ - Navigate to **Settings > Chatbots > Create New** to generate a DocsGPT API Key.
+ - Copy the generated key for use.
+
+### Step 2: Creating and Updating the `.env` File
+
+1. Create a new `.env` file in the root of your project (if it doesn’t already exist).
+2. Use the `.env-example` as a reference and update the file with the following keys and values:
+
+```bash
+# .env file
+SLACK_APP_TOKEN=xapp-your-generated-app-token
+SLACK_BOT_TOKEN=xoxb-your-generated-bot-token
+DOCSGPT_API_KEY=your-docsgpt-generated-api-key
+```
+
+Replace the placeholder values with the actual tokens generated from the Slack API and DocsGPT as per the steps outlined above.
+
+---
+
+This concludes the guide for both setting up the Slack API and configuring the `.env` file on the code side.
diff --git a/extensions/slack-bot/app.py b/extensions/slack-bot/app.py
new file mode 100644
index 00000000..d4f522fd
--- /dev/null
+++ b/extensions/slack-bot/app.py
@@ -0,0 +1,112 @@
+import os
+import hashlib
+import httpx
+import re
+from slack_bolt.async_app import AsyncApp
+from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
+from dotenv import load_dotenv
+
+load_dotenv()
+API_BASE = os.getenv("API_BASE", "https://gptcloud.arc53.com")
+API_URL = API_BASE + "/api/answer"
+
+# Slack bot token and signing secret
+SLACK_BOT_TOKEN = os.getenv("SLACK_BOT_TOKEN")
+SLACK_APP_TOKEN = os.getenv("SLACK_APP_TOKEN")
+
+# OpenAI API key for DocsGPT (replace this with your actual API key)
+DOCSGPT_API_KEY = os.getenv("DOCSGPT_API_KEY")
+
+# Initialize Slack app
+app = AsyncApp(token=SLACK_BOT_TOKEN)
+
+def encode_conversation_id(conversation_id: str) -> str:
+ """
+ Encodes 11 length Slack conversation_id to 12 length string
+ Args:
+ conversation_id (str): The 11 digit slack conversation_id.
+ Returns:
+ str: Hashed id.
+ """
+ # Create a SHA-256 hash of the string
+ hashed_id = hashlib.sha256(conversation_id.encode()).hexdigest()
+
+ # Take the first 24 characters of the hash
+ hashed_24_char_id = hashed_id[:24]
+ return hashed_24_char_id
+
+async def generate_answer(question: str, messages: list, conversation_id: str | None) -> dict:
+ """Generates an answer using the external API."""
+ payload = {
+ "question": question,
+ "api_key": DOCSGPT_API_KEY,
+ "history": messages,
+ "conversation_id": conversation_id,
+ }
+ headers = {
+ "Content-Type": "application/json; charset=utf-8"
+ }
+ timeout = 60.0
+ async with httpx.AsyncClient() as client:
+ response = await client.post(API_URL, json=payload, headers=headers, timeout=timeout)
+
+ if response.status_code == 200:
+ data = response.json()
+ conversation_id = data.get("conversation_id")
+ answer = data.get("answer", "Sorry, I couldn't find an answer.")
+ return {"answer": answer, "conversation_id": conversation_id}
+ else:
+ print(response.json())
+ return {"answer": "Sorry, I couldn't find an answer.", "conversation_id": None}
+
+@app.message(".*")
+async def message_docs(message, say):
+ client = app.client
+ channel = message['channel']
+ thread_ts = message['thread_ts']
+ user_query = message['text']
+ await client.assistant_threads_setStatus(
+ channel_id = channel,
+ thread_ts = thread_ts,
+ status = "is generating your answer...",
+ )
+
+ docs_gpt_channel_id = encode_conversation_id(thread_ts)
+
+ # Get response from DocsGPT
+ response = await generate_answer(user_query,[], docs_gpt_channel_id)
+ answer = convert_to_slack_markdown(response['answer'])
+
+ # Respond in Slack
+ await client.chat_postMessage(text = answer, mrkdwn= True, channel= message['channel'],
+ thread_ts = message['thread_ts'],)
+
+def convert_to_slack_markdown(markdown_text: str):
+ # Convert bold **text** to *text* for Slack
+ slack_text = re.sub(r'\*\*(.*?)\*\*', r'*\1*', markdown_text) # **text** to *text*
+
+ # Convert italics _text_ to _text_ for Slack
+ slack_text = re.sub(r'_(.*?)_', r'_\1_', slack_text) # _text_ to _text_
+
+ # Convert inline code `code` to `code` (Slack supports backticks for inline code)
+ slack_text = re.sub(r'`(.*?)`', r'`\1`', slack_text)
+
+ # Convert bullet points with single or no spaces to filled bullets (•)
+ slack_text = re.sub(r'^\s{0,1}[-*]\s+', ' • ', slack_text, flags=re.MULTILINE)
+
+ # Convert bullet points with multiple spaces to hollow bullets (◦)
+ slack_text = re.sub(r'^\s{2,}[-*]\s+', '\t◦ ', slack_text, flags=re.MULTILINE)
+
+ # Convert headers (##) to bold in Slack
+ slack_text = re.sub(r'^\s*#{1,6}\s*(.*?)$', r'*\1*', slack_text, flags=re.MULTILINE)
+
+ return slack_text
+
+async def main():
+ handler = AsyncSocketModeHandler(app, os.environ["SLACK_APP_TOKEN"])
+ await handler.start_async()
+
+# Start the app
+if __name__ == "__main__":
+ import asyncio
+ asyncio.run(main())
\ No newline at end of file
diff --git a/extensions/slack-bot/requirements.txt b/extensions/slack-bot/requirements.txt
new file mode 100644
index 00000000..0c588b43
--- /dev/null
+++ b/extensions/slack-bot/requirements.txt
@@ -0,0 +1,10 @@
+aiohttp>=3,<4
+certifi==2024.7.4
+h11==0.14.0
+httpcore==1.0.5
+httpx==0.27.0
+idna==3.7
+python-dotenv==1.0.1
+sniffio==1.3.1
+slack-bolt==1.21.0
+bson==0.5.10
diff --git a/frontend/src/Navigation.tsx b/frontend/src/Navigation.tsx
index ca12df54..b38ade53 100644
--- a/frontend/src/Navigation.tsx
+++ b/frontend/src/Navigation.tsx
@@ -11,7 +11,6 @@ import DocsGPT3 from './assets/cute_docsgpt3.svg';
import Discord from './assets/discord.svg';
import Expand from './assets/expand.svg';
import Github from './assets/github.svg';
-import Info from './assets/info.svg';
import SettingGear from './assets/settingGear.svg';
import Twitter from './assets/TwitterX.svg';
import UploadIcon from './assets/upload.svg';
@@ -42,10 +41,8 @@ import {
} from './preferences/preferenceSlice';
import { selectQueries } from './conversation/conversationSlice';
import Upload from './upload/Upload';
-import ShareButton from './components/ShareButton';
import Help from './components/Help';
-
interface NavigationProps {
navOpen: boolean;
setNavOpen: React.Dispatch>;
@@ -392,10 +389,10 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
-
+
-
+
\ No newline at end of file
diff --git a/frontend/src/assets/discord.svg b/frontend/src/assets/discord.svg
index ca54be77..8164bca4 100644
--- a/frontend/src/assets/discord.svg
+++ b/frontend/src/assets/discord.svg
@@ -1 +1,3 @@
-
\ No newline at end of file
+
diff --git a/frontend/src/conversation/ConversationBubble.tsx b/frontend/src/conversation/ConversationBubble.tsx
index a3dec8be..2ccf1ca3 100644
--- a/frontend/src/conversation/ConversationBubble.tsx
+++ b/frontend/src/conversation/ConversationBubble.tsx
@@ -354,7 +354,7 @@ const ConversationBubble = forwardRef<
>
{
@@ -35,7 +38,14 @@ const Documents: React.FC
= ({
}) => {
const { t } = useTranslation();
const dispatch = useDispatch();
+
+ // State for search input
+ const [searchTerm, setSearchTerm] = useState('');
+ // State for modal: active/inactive
+ const [modalState, setModalState] = useState('INACTIVE'); // Initialize with inactive state
+ const [isOnboarding, setIsOnboarding] = useState(false); // State for onboarding flag
const [loading, setLoading] = useState(false);
+
const syncOptions = [
{ label: 'Never', value: 'never' },
{ label: 'Daily', value: 'daily' },
@@ -59,10 +69,37 @@ const Documents: React.FC = ({
});
};
+ // Filter documents based on the search term
+ const filteredDocuments = documents?.filter((document) =>
+ document.name.toLowerCase().includes(searchTerm.toLowerCase()),
+ );
+
return (
+
+
+ setSearchTerm(e.target.value)} // Handle search input change
+ />
+
+
+
{loading ? (
) : (
@@ -70,22 +107,37 @@ const Documents: React.FC
= ({
| {t('settings.documents.name')} |
- {t('settings.documents.date')} |
- {t('settings.documents.tokenUsage')} |
- {t('settings.documents.type')} |
+
+
+ {t('settings.documents.date')}
+ 
+
+ |
+
+
+ {t('settings.documents.tokenUsage')}
+ 
+
+ |
+
+
+ {t('settings.documents.type')}
+ 
+
+ |
|
- {!documents?.length && (
+ {!filteredDocuments?.length && (
|
{t('settings.documents.noData')}
|
)}
- {documents &&
- documents.map((document, index) => (
+ {filteredDocuments &&
+ filteredDocuments.map((document, index) => (
| {document.name} |
{document.date} |
@@ -101,7 +153,7 @@ const Documents: React.FC = ({
{
event.stopPropagation();
@@ -130,6 +182,19 @@ const Documents: React.FC = ({
)}
+ {/* Conditionally render the Upload modal based on modalState */}
+ {modalState === 'ACTIVE' && (
+
+
+ {/* Your Upload component */}
+
+
+
+ )}
);
diff --git a/frontend/src/upload/Upload.tsx b/frontend/src/upload/Upload.tsx
index 37a1fc0c..2da284c3 100644
--- a/frontend/src/upload/Upload.tsx
+++ b/frontend/src/upload/Upload.tsx
@@ -314,7 +314,10 @@ function Upload({
'application/zip': ['.zip'],
'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
['.docx'],
+ 'application/json': ['.json'],
'text/csv': ['.csv'],
+ 'text/html': ['.html'],
+ 'application/epub+zip': ['.epub'],
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': [
'.xlsx',
],