From b5dc7281f99c76ed33483f66ac3131b8df6e044d Mon Sep 17 00:00:00 2001 From: Anas Khafaga Date: Tue, 1 Oct 2024 15:29:58 +0300 Subject: [PATCH 01/23] feat: add a row for no document state --- frontend/src/locale/en.json | 3 ++- frontend/src/locale/es.json | 3 ++- frontend/src/locale/jp.json | 3 ++- frontend/src/locale/zh.json | 3 ++- frontend/src/settings/Documents.tsx | 7 +++++++ 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/frontend/src/locale/en.json b/frontend/src/locale/en.json index 645703a2..dfd1cdef 100644 --- a/frontend/src/locale/en.json +++ b/frontend/src/locale/en.json @@ -54,7 +54,8 @@ "name": "Document Name", "date": "Vector Date", "type": "Type", - "tokenUsage": "Token Usage" + "tokenUsage": "Token Usage", + "noData": "No existing Documents" }, "apiKeys": { "label": "Chatbots", diff --git a/frontend/src/locale/es.json b/frontend/src/locale/es.json index 49aa5d53..8e0db8b3 100644 --- a/frontend/src/locale/es.json +++ b/frontend/src/locale/es.json @@ -54,7 +54,8 @@ "name": "Nombre del Documento", "date": "Fecha Vector", "type": "Tipo", - "tokenUsage": "Uso de Tokens" + "tokenUsage": "Uso de Tokens", + "noData": "No hay documentos existentes" }, "apiKeys": { "label": "Chatbots", diff --git a/frontend/src/locale/jp.json b/frontend/src/locale/jp.json index 9e367330..d7ec1d3c 100644 --- a/frontend/src/locale/jp.json +++ b/frontend/src/locale/jp.json @@ -54,7 +54,8 @@ "name": "ドキュメント名", "date": "ベクトル日付", "type": "タイプ", - "tokenUsage": "トークン使用量" + "tokenUsage": "トークン使用量", + "noData": "既存のドキュメントはありません" }, "apiKeys": { "label": "チャットボット", diff --git a/frontend/src/locale/zh.json b/frontend/src/locale/zh.json index 81eff996..fb40541e 100644 --- a/frontend/src/locale/zh.json +++ b/frontend/src/locale/zh.json @@ -54,7 +54,8 @@ "name": "文件名称", "date": "向量日期", "type": "类型", - "tokenUsage": "令牌使用" + "tokenUsage": "令牌使用", + "noData": "没有现有的文档" }, "apiKeys": { "label": "聊天机器人", diff --git a/frontend/src/settings/Documents.tsx b/frontend/src/settings/Documents.tsx index ee88a98f..8dafb6d3 100644 --- a/frontend/src/settings/Documents.tsx +++ b/frontend/src/settings/Documents.tsx @@ -74,6 +74,13 @@ const Documents: React.FC = ({ + {!documents?.length && ( + + + {t('settings.documents.noData')} + + + )} {documents && documents.map((document, index) => ( From 09ba14b8ca797de8d182d9ee6014b9d7b739adda Mon Sep 17 00:00:00 2001 From: Anas Khafaga Date: Tue, 1 Oct 2024 15:30:29 +0300 Subject: [PATCH 02/23] feat: add a row for no chatbot state --- frontend/src/locale/en.json | 3 ++- frontend/src/locale/es.json | 3 ++- frontend/src/locale/jp.json | 3 ++- frontend/src/locale/zh.json | 3 ++- frontend/src/settings/APIKeys.tsx | 7 +++++++ 5 files changed, 15 insertions(+), 4 deletions(-) diff --git a/frontend/src/locale/en.json b/frontend/src/locale/en.json index dfd1cdef..fa2cac3c 100644 --- a/frontend/src/locale/en.json +++ b/frontend/src/locale/en.json @@ -62,7 +62,8 @@ "name": "Name", "key": "API Key", "sourceDoc": "Source Document", - "createNew": "Create New" + "createNew": "Create New", + "noData": "No existing Chatbots" }, "analytics": { "label": "Analytics" diff --git a/frontend/src/locale/es.json b/frontend/src/locale/es.json index 8e0db8b3..7b7dbec0 100644 --- a/frontend/src/locale/es.json +++ b/frontend/src/locale/es.json @@ -62,7 +62,8 @@ "name": "Nombre", "key": "Clave de API", "sourceDoc": "Documento Fuente", - "createNew": "Crear Nuevo" + "createNew": "Crear Nuevo", + "noData": "No hay chatbots existentes" }, "analytics": { "label": "Analítica" diff --git a/frontend/src/locale/jp.json b/frontend/src/locale/jp.json index d7ec1d3c..fa61c291 100644 --- a/frontend/src/locale/jp.json +++ b/frontend/src/locale/jp.json @@ -62,7 +62,8 @@ "name": "名前", "key": "APIキー", "sourceDoc": "ソースドキュメント", - "createNew": "新規作成" + "createNew": "新規作成", + "noData": "既存のチャットボットはありません" }, "analytics": { "label": "分析" diff --git a/frontend/src/locale/zh.json b/frontend/src/locale/zh.json index fb40541e..080c4ee3 100644 --- a/frontend/src/locale/zh.json +++ b/frontend/src/locale/zh.json @@ -62,7 +62,8 @@ "name": "名称", "key": "API 密钥", "sourceDoc": "源文档", - "createNew": "创建新的" + "createNew": "创建新的", + "noData": "没有现有的聊天机器人" }, "analytics": { "label": "分析" diff --git a/frontend/src/settings/APIKeys.tsx b/frontend/src/settings/APIKeys.tsx index ebb32268..f224d016 100644 --- a/frontend/src/settings/APIKeys.tsx +++ b/frontend/src/settings/APIKeys.tsx @@ -116,6 +116,13 @@ export default function APIKeys() { + {!apiKeys?.length && ( + + + {t('settings.apiKeys.noData')} + + + )} {apiKeys?.map((element, index) => ( {element.name} From aa7f59f88c0dbc6fe8efd23e31b4d08c8452784e Mon Sep 17 00:00:00 2001 From: Devendra Parihar <54232149+Devparihar5@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:10:06 +0000 Subject: [PATCH 03/23] fix: Refactor FaissStore to enhance error handling, improve type hints, and document methods for better maintainability and usability --- application/vectorstore/faiss.py | 43 +++++++++++++------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py index a8839cd2..ee74b971 100644 --- a/application/vectorstore/faiss.py +++ b/application/vectorstore/faiss.py @@ -3,30 +3,27 @@ from application.vectorstore.base import BaseVectorStore from application.core.settings import settings import os -def get_vectorstore(path): +def get_vectorstore(path: str) -> str: if path: - vectorstore = "indexes/"+path - vectorstore = os.path.join("application", vectorstore) + vectorstore = os.path.join("application", "indexes", path) else: vectorstore = os.path.join("application") - return vectorstore class FaissStore(BaseVectorStore): - - def __init__(self, source_id, embeddings_key, docs_init=None): + def __init__(self, source_id: str, embeddings_key: str, docs_init=None): super().__init__() self.path = get_vectorstore(source_id) embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key) - if docs_init: - self.docsearch = FAISS.from_documents( - docs_init, embeddings - ) - else: - self.docsearch = FAISS.load_local( - self.path, embeddings, - allow_dangerous_deserialization=True - ) + + try: + if docs_init: + self.docsearch = FAISS.from_documents(docs_init, embeddings) + else: + self.docsearch = FAISS.load_local(self.path, embeddings, allow_dangerous_deserialization=True) + except Exception as e: + raise + self.assert_embedding_dimensions(embeddings) def search(self, *args, **kwargs): @@ -42,16 +39,12 @@ class FaissStore(BaseVectorStore): return self.docsearch.delete(*args, **kwargs) def assert_embedding_dimensions(self, embeddings): - """ - Check that the word embedding dimension of the docsearch index matches - the dimension of the word embeddings used - """ + """Check that the word embedding dimension of the docsearch index matches the dimension of the word embeddings used.""" if settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2": - try: - word_embedding_dimension = embeddings.dimension - except AttributeError as e: - raise AttributeError("'dimension' attribute not found in embeddings instance. Make sure the embeddings object is properly initialized.") from e + word_embedding_dimension = getattr(embeddings, 'dimension', None) + if word_embedding_dimension is None: + raise AttributeError("'dimension' attribute not found in embeddings instance.") + docsearch_index_dimension = self.docsearch.index.d if word_embedding_dimension != docsearch_index_dimension: - raise ValueError(f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) " + - f"!= docsearch index dimension ({docsearch_index_dimension})") \ No newline at end of file + raise ValueError(f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) != docsearch index dimension ({docsearch_index_dimension})") From ef6ec3fcb8bc0a63183a1d56558b9fe1a7fa317b Mon Sep 17 00:00:00 2001 From: Devendra Parihar <54232149+Devparihar5@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:32:08 +0000 Subject: [PATCH 04/23] fix: Fix unused exception variable in FaissStore. --- application/vectorstore/faiss.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py index ee74b971..e6c13bcd 100644 --- a/application/vectorstore/faiss.py +++ b/application/vectorstore/faiss.py @@ -21,8 +21,8 @@ class FaissStore(BaseVectorStore): self.docsearch = FAISS.from_documents(docs_init, embeddings) else: self.docsearch = FAISS.load_local(self.path, embeddings, allow_dangerous_deserialization=True) - except Exception as e: - raise + except Exception: + raise # Just re-raise the exception without assigning to e self.assert_embedding_dimensions(embeddings) From 19315f72a0fe2dd0a9ebce2286ae154cf0c76396 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 1 Oct 2024 23:15:33 +0100 Subject: [PATCH 05/23] Update README.md --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7ceb75b9..a88f2fc5 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,11 @@ Say goodbye to time-consuming manual searches, and let + Let's chat + + +[Send Email :email:](mailto:contact@arc53.com?subject=DocsGPT%20support%2Fsolutions) ![video-example-of-docs-gpt](https://d3dg1063dc54p9.cloudfront.net/videos/demov3.gif) From ab77c4e616fc2a20928f1d2121d02d0b8c7b0315 Mon Sep 17 00:00:00 2001 From: Kom Senapati Date: Wed, 2 Oct 2024 03:59:24 +0000 Subject: [PATCH 06/23] feat: Add support for mathematical equations in Markdown --- frontend/package-lock.json | 389 +++++++++++++++++- frontend/package.json | 4 +- .../src/conversation/ConversationBubble.tsx | 23 +- 3 files changed, 411 insertions(+), 5 deletions(-) diff --git a/frontend/package-lock.json b/frontend/package-lock.json index a09e14c6..1a6e0ce3 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -24,7 +24,9 @@ "react-redux": "^8.0.5", "react-router-dom": "^6.8.1", "react-syntax-highlighter": "^15.5.0", - "remark-gfm": "^4.0.0" + "rehype-katex": "^7.0.1", + "remark-gfm": "^4.0.0", + "remark-math": "^6.0.0" }, "devDependencies": { "@types/react": "^18.0.27", @@ -1636,6 +1638,12 @@ "integrity": "sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==", "dev": true }, + "node_modules/@types/katex": { + "version": "0.16.7", + "resolved": "https://registry.npmjs.org/@types/katex/-/katex-0.16.7.tgz", + "integrity": "sha512-HMwFiRujE5PjrgwHQ25+bsLJgowjGjm5Z8FVSf0N6PwgJrwxH0QxzHYDcKsTfV3wva0vzrpqMTJS2jXPr5BMEQ==", + "license": "MIT" + }, "node_modules/@types/mdast": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz", @@ -3104,7 +3112,6 @@ "version": "4.5.0", "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", - "dev": true, "engines": { "node": ">=0.12" }, @@ -4582,6 +4589,193 @@ "node": ">= 0.4" } }, + "node_modules/hast-util-from-dom": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/hast-util-from-dom/-/hast-util-from-dom-5.0.0.tgz", + "integrity": "sha512-d6235voAp/XR3Hh5uy7aGLbM3S4KamdW0WEgOaU1YoewnuYw4HXb5eRtv9g65m/RFGEfUY1Mw4UqCc5Y8L4Stg==", + "license": "ISC", + "dependencies": { + "@types/hast": "^3.0.0", + "hastscript": "^8.0.0", + "web-namespaces": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-dom/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/hast-util-from-dom/node_modules/hast-util-parse-selector": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz", + "integrity": "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-dom/node_modules/hastscript": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-8.0.0.tgz", + "integrity": "sha512-dMOtzCEd3ABUeSIISmrETiKuyydk1w0pa+gE/uormcTpSYuaNJPbX1NU3JLyscSLjwAQM8bWMhhIlnCqnRvDTw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "comma-separated-tokens": "^2.0.0", + "hast-util-parse-selector": "^4.0.0", + "property-information": "^6.0.0", + "space-separated-tokens": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-html": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/hast-util-from-html/-/hast-util-from-html-2.0.3.tgz", + "integrity": "sha512-CUSRHXyKjzHov8yKsQjGOElXy/3EKpyX56ELnkHH34vDVw1N1XSQ1ZcAvTyAPtGqLTuKP/uxM+aLkSPqF/EtMw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "devlop": "^1.1.0", + "hast-util-from-parse5": "^8.0.0", + "parse5": "^7.0.0", + "vfile": "^6.0.0", + "vfile-message": "^4.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-html-isomorphic": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/hast-util-from-html-isomorphic/-/hast-util-from-html-isomorphic-2.0.0.tgz", + "integrity": "sha512-zJfpXq44yff2hmE0XmwEOzdWin5xwH+QIhMLOScpX91e/NSGPsAzNCvLQDIEPyO2TXi+lBmU6hjLIhV8MwP2kw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "hast-util-from-dom": "^5.0.0", + "hast-util-from-html": "^2.0.0", + "unist-util-remove-position": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-html-isomorphic/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/hast-util-from-html/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/hast-util-from-parse5": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/hast-util-from-parse5/-/hast-util-from-parse5-8.0.1.tgz", + "integrity": "sha512-Er/Iixbc7IEa7r/XLtuG52zoqn/b3Xng/w6aZQ0xGVxzhw5xUFxcRqdPzP6yFi/4HBYRaifaI5fQ1RH8n0ZeOQ==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/unist": "^3.0.0", + "devlop": "^1.0.0", + "hastscript": "^8.0.0", + "property-information": "^6.0.0", + "vfile": "^6.0.0", + "vfile-location": "^5.0.0", + "web-namespaces": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-parse5/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, + "node_modules/hast-util-from-parse5/node_modules/hast-util-parse-selector": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-4.0.0.tgz", + "integrity": "sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-from-parse5/node_modules/hastscript": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/hastscript/-/hastscript-8.0.0.tgz", + "integrity": "sha512-dMOtzCEd3ABUeSIISmrETiKuyydk1w0pa+gE/uormcTpSYuaNJPbX1NU3JLyscSLjwAQM8bWMhhIlnCqnRvDTw==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "comma-separated-tokens": "^2.0.0", + "hast-util-parse-selector": "^4.0.0", + "property-information": "^6.0.0", + "space-separated-tokens": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-is-element": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/hast-util-is-element/-/hast-util-is-element-3.0.0.tgz", + "integrity": "sha512-Val9mnv2IWpLbNPqc/pUem+a7Ipj2aHacCwgNfTiK0vJKl0LF+4Ba4+v1oPHFpf3bLYmreq0/l3Gud9S5OH42g==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-is-element/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, "node_modules/hast-util-parse-selector": { "version": "2.2.5", "resolved": "https://registry.npmjs.org/hast-util-parse-selector/-/hast-util-parse-selector-2.2.5.tgz", @@ -4625,6 +4819,31 @@ "@types/unist": "*" } }, + "node_modules/hast-util-to-text": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/hast-util-to-text/-/hast-util-to-text-4.0.2.tgz", + "integrity": "sha512-KK6y/BN8lbaq654j7JgBydev7wuNMcID54lkRav1P0CaE1e47P72AWWPiGKXTJU271ooYzcvTAn/Zt0REnvc7A==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/unist": "^3.0.0", + "hast-util-is-element": "^3.0.0", + "unist-util-find-after": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/hast-util-to-text/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, "node_modules/hast-util-whitespace": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz", @@ -5421,6 +5640,31 @@ "node": ">=4.0" } }, + "node_modules/katex": { + "version": "0.16.11", + "resolved": "https://registry.npmjs.org/katex/-/katex-0.16.11.tgz", + "integrity": "sha512-RQrI8rlHY92OLf3rho/Ts8i/XvjgguEjOkO1BEXcU3N8BqPpSzBNwV/G0Ukr+P/l3ivvJUE/Fa/CwbS6HesGNQ==", + "funding": [ + "https://opencollective.com/katex", + "https://github.com/sponsors/katex" + ], + "license": "MIT", + "dependencies": { + "commander": "^8.3.0" + }, + "bin": { + "katex": "cli.js" + } + }, + "node_modules/katex/node_modules/commander": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz", + "integrity": "sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==", + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, "node_modules/levn": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", @@ -5837,6 +6081,34 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/mdast-util-math": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-math/-/mdast-util-math-3.0.0.tgz", + "integrity": "sha512-Tl9GBNeG/AhJnQM221bJR2HPvLOSnLE/T9cJI9tlc6zwQk2nPk/4f0cHkOdEixQPC/j8UtKDdITswvLAy1OZ1w==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "longest-streak": "^3.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.1.0", + "unist-util-remove-position": "^5.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-math/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, "node_modules/mdast-util-mdx-expression": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/mdast-util-mdx-expression/-/mdast-util-mdx-expression-2.0.0.tgz", @@ -6269,6 +6541,25 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/micromark-extension-math": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-math/-/micromark-extension-math-3.1.0.tgz", + "integrity": "sha512-lvEqd+fHjATVs+2v/8kg9i5Q0AP2k85H0WUOwpIVvUML8BapsMvh1XAogmQjOCsLpoKRCVQqEkQBB3NhVBcsOg==", + "license": "MIT", + "dependencies": { + "@types/katex": "^0.16.0", + "devlop": "^1.0.0", + "katex": "^0.16.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/micromark-factory-destination": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.0.tgz", @@ -7055,6 +7346,18 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/parse5": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz", + "integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==", + "license": "MIT", + "dependencies": { + "entities": "^4.4.0" + }, + "funding": { + "url": "https://github.com/inikulin/parse5?sponsor=1" + } + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -7831,6 +8134,34 @@ "url": "https://github.com/sponsors/mysticatea" } }, + "node_modules/rehype-katex": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/rehype-katex/-/rehype-katex-7.0.1.tgz", + "integrity": "sha512-OiM2wrZ/wuhKkigASodFoo8wimG3H12LWQaH8qSPVJn9apWKFSH3YOCtbKpBorTVw/eI7cuT21XBbvwEswbIOA==", + "license": "MIT", + "dependencies": { + "@types/hast": "^3.0.0", + "@types/katex": "^0.16.0", + "hast-util-from-html-isomorphic": "^2.0.0", + "hast-util-to-text": "^4.0.0", + "katex": "^0.16.0", + "unist-util-visit-parents": "^6.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/rehype-katex/node_modules/@types/hast": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz", + "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "*" + } + }, "node_modules/remark-gfm": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/remark-gfm/-/remark-gfm-4.0.0.tgz", @@ -7848,6 +8179,22 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/remark-math": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/remark-math/-/remark-math-6.0.0.tgz", + "integrity": "sha512-MMqgnP74Igy+S3WwnhQ7kqGlEerTETXMvJhrUzDikVZ2/uogJCb+WHUg97hK9/jcfc0dkD73s3LN8zU49cTEtA==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-math": "^3.0.0", + "micromark-extension-math": "^3.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/remark-parse": { "version": "11.0.0", "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz", @@ -8962,6 +9309,20 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/unist-util-find-after": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/unist-util-find-after/-/unist-util-find-after-5.0.0.tgz", + "integrity": "sha512-amQa0Ep2m6hE2g72AugUItjbuM8X8cGQnFoHk0pGfrFeT9GZhzN5SW8nRsiGKK7Aif4CrACPENkA6P/Lw6fHGQ==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "unist-util-is": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/unist-util-is": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.0.tgz", @@ -9105,6 +9466,20 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/vfile-location": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/vfile-location/-/vfile-location-5.0.3.tgz", + "integrity": "sha512-5yXvWDEgqeiYiBe1lbxYF7UMAIm/IcopxMHrMQDq3nvKcjPKIhZklUKL+AE7J7uApI4kwe2snsK+eI6UTj9EHg==", + "license": "MIT", + "dependencies": { + "@types/unist": "^3.0.0", + "vfile": "^6.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/vfile-message": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.2.tgz", @@ -9200,6 +9575,16 @@ "node": ">=0.10.0" } }, + "node_modules/web-namespaces": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz", + "integrity": "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/frontend/package.json b/frontend/package.json index 4619fd2e..176c4fd9 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -35,7 +35,9 @@ "react-redux": "^8.0.5", "react-router-dom": "^6.8.1", "react-syntax-highlighter": "^15.5.0", - "remark-gfm": "^4.0.0" + "rehype-katex": "^7.0.1", + "remark-gfm": "^4.0.0", + "remark-math": "^6.0.0" }, "devDependencies": { "@types/react": "^18.0.27", diff --git a/frontend/src/conversation/ConversationBubble.tsx b/frontend/src/conversation/ConversationBubble.tsx index 3741bfa1..543699ed 100644 --- a/frontend/src/conversation/ConversationBubble.tsx +++ b/frontend/src/conversation/ConversationBubble.tsx @@ -4,6 +4,9 @@ import { useSelector } from 'react-redux'; import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'; import { vscDarkPlus } from 'react-syntax-highlighter/dist/cjs/styles/prism'; import remarkGfm from 'remark-gfm'; +import remarkMath from 'remark-math'; +import rehypeKatex from 'rehype-katex'; +import 'katex/dist/katex.min.css'; import Alert from '../assets/alert.svg'; import DocsGPT3 from '../assets/cute_docsgpt3.svg'; @@ -62,6 +65,21 @@ const ConversationBubble = forwardRef< ); } else { + const preprocessLaTeX = (content: string) => { + // Replace block-level LaTeX delimiters \[ \] with $$ $$ + const blockProcessedContent = content.replace( + /\\\[(.*?)\\\]/gs, + (_, equation) => `$$${equation}$$`, + ); + + // Replace inline LaTeX delimiters \( \) with $ $ + const inlineProcessedContent = blockProcessedContent.replace( + /\\\((.*?)\\\)/gs, + (_, equation) => `$${equation}$`, + ); + + return inlineProcessedContent; + }; bubble = (
- {message} + {preprocessLaTeX(message)}
From a85f214fdbe15a903aa2b27a7665c3b25d97d541 Mon Sep 17 00:00:00 2001 From: Anas Khafaga Date: Wed, 2 Oct 2024 07:01:27 +0300 Subject: [PATCH 07/23] fix: sharp corner of the cell --- frontend/src/settings/APIKeys.tsx | 2 +- frontend/src/settings/Documents.tsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/frontend/src/settings/APIKeys.tsx b/frontend/src/settings/APIKeys.tsx index f224d016..e27f5696 100644 --- a/frontend/src/settings/APIKeys.tsx +++ b/frontend/src/settings/APIKeys.tsx @@ -118,7 +118,7 @@ export default function APIKeys() { {!apiKeys?.length && ( - + {t('settings.apiKeys.noData')} diff --git a/frontend/src/settings/Documents.tsx b/frontend/src/settings/Documents.tsx index 8dafb6d3..28816062 100644 --- a/frontend/src/settings/Documents.tsx +++ b/frontend/src/settings/Documents.tsx @@ -76,7 +76,7 @@ const Documents: React.FC = ({ {!documents?.length && ( - + {t('settings.documents.noData')} From 43018840d16cc57ecb409e380f386cc5aae6aa63 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 2 Oct 2024 10:35:00 +0100 Subject: [PATCH 08/23] Update HACKTOBERFEST.md --- HACKTOBERFEST.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/HACKTOBERFEST.md b/HACKTOBERFEST.md index d3bed0fc..6db40db6 100644 --- a/HACKTOBERFEST.md +++ b/HACKTOBERFEST.md @@ -7,7 +7,7 @@ All contributors with accepted PRs will receive a cool Holopin! 🤩 (Watch out ### 🏆 Top 50 contributors will recieve a special T-shirt ### 🏆 [LLM Document analysis by LexEU competition](https://github.com/arc53/DocsGPT/blob/main/lexeu-competition.md): -A separate competition is available for those sumbit best new retrieval / workflow method that will analyze a Document using EU laws. +A separate competition is available for those who sumbit new retrieval / workflow method that will analyze a Document using EU laws. With 200$, 100$, 50$ prize for 1st, 2nd and 3rd place respectively. You can find more information [here](https://github.com/arc53/DocsGPT/blob/main/lexeu-competition.md) @@ -15,7 +15,8 @@ You can find more information [here](https://github.com/arc53/DocsGPT/blob/main/ ```text 🛠️ Code: This is the golden ticket! Make meaningful contributions through PRs. -🧩 API extention: Build an app utilising DocsGPT API. We prefer submissions that showcase original ideas and turn the API into an AI agent. +🧩 API extension: Build an app utilising DocsGPT API. We prefer submissions that showcase original ideas and turn the API into an AI agent. +They can be a completely separate repo. For example [Telegram bot](https://github.com/arc53/tg-bot-docsgpt-extenstion) or [CLI extension](https://github.com/arc53/DocsGPT-cli) Non-Code Contributions: From 350c91889e13e058cd60b879703d478c9a2ea6cc Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 2 Oct 2024 10:35:31 +0100 Subject: [PATCH 09/23] Update HACKTOBERFEST.md --- HACKTOBERFEST.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/HACKTOBERFEST.md b/HACKTOBERFEST.md index 6db40db6..631f73ba 100644 --- a/HACKTOBERFEST.md +++ b/HACKTOBERFEST.md @@ -16,7 +16,10 @@ You can find more information [here](https://github.com/arc53/DocsGPT/blob/main/ 🛠️ Code: This is the golden ticket! Make meaningful contributions through PRs. 🧩 API extension: Build an app utilising DocsGPT API. We prefer submissions that showcase original ideas and turn the API into an AI agent. -They can be a completely separate repo. For example [Telegram bot](https://github.com/arc53/tg-bot-docsgpt-extenstion) or [CLI extension](https://github.com/arc53/DocsGPT-cli) +They can be a completely separate repo. +For example: +https://github.com/arc53/tg-bot-docsgpt-extenstion or +https://github.com/arc53/DocsGPT-cli Non-Code Contributions: From 2989be47ccdd1f46e5f0324caf6eb015c0ee4743 Mon Sep 17 00:00:00 2001 From: YASH <139299779+Yash-2707@users.noreply.github.com> Date: Wed, 2 Oct 2024 22:38:43 +0530 Subject: [PATCH 10/23] Update migrate_to_v1_vectorstore.py Enhancement made in the code by error handling and logging batch processing and new functionalities like backup , progress tracking --- scripts/migrate_to_v1_vectorstore.py | 72 ++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/scripts/migrate_to_v1_vectorstore.py b/scripts/migrate_to_v1_vectorstore.py index 9a709795..da83c4c1 100644 --- a/scripts/migrate_to_v1_vectorstore.py +++ b/scripts/migrate_to_v1_vectorstore.py @@ -1,13 +1,35 @@ import pymongo import os +import shutil +import logging +from tqdm import tqdm + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger() + +# Configuration +MONGO_URI = "mongodb://localhost:27017/" +MONGO_ATLAS_URI = "mongodb+srv://:@/?retryWrites=true&w=majority" +DB_NAME = "docsgpt" + +def backup_collection(collection, backup_collection_name): + logger.info(f"Backing up collection {collection.name} to {backup_collection_name}") + collection.aggregate([{"$out": backup_collection_name}]) + logger.info("Backup completed") def migrate_to_v1_vectorstore_mongo(): - client = pymongo.MongoClient("mongodb://localhost:27017/") - db = client["docsgpt"] + client = pymongo.MongoClient(MONGO_URI) + db = client[DB_NAME] vectors_collection = db["vectors"] sources_collection = db["sources"] - for vector in vectors_collection.find(): + # Backup collections before migration + backup_collection(vectors_collection, "vectors_backup") + backup_collection(sources_collection, "sources_backup") + + vectors = list(vectors_collection.find()) + for vector in tqdm(vectors, desc="Updating vectors"): if "location" in vector: del vector["location"] if "retriever" not in vector: @@ -15,41 +37,53 @@ def migrate_to_v1_vectorstore_mongo(): vector["remote_data"] = None vectors_collection.update_one({"_id": vector["_id"]}, {"$set": vector}) - # move data from vectors_collection to sources_collection - for vector in vectors_collection.find(): + # Move data from vectors_collection to sources_collection + for vector in tqdm(vectors, desc="Moving to sources"): sources_collection.insert_one(vector) vectors_collection.drop() - client.close() + logger.info("Migration completed") def migrate_faiss_to_v1_vectorstore(): - client = pymongo.MongoClient("mongodb://localhost:27017/") - db = client["docsgpt"] + client = pymongo.MongoClient(MONGO_URI) + db = client[DB_NAME] vectors_collection = db["vectors"] - for vector in vectors_collection.find(): + vectors = list(vectors_collection.find()) + for vector in tqdm(vectors, desc="Migrating FAISS vectors"): old_path = f"./application/indexes/{vector['user']}/{vector['name']}" new_path = f"./application/indexes/{vector['_id']}" try: - os.rename(old_path, new_path) + os.makedirs(os.path.dirname(new_path), exist_ok=True) + shutil.move(old_path, new_path) except OSError as e: - print(f"Error moving {old_path} to {new_path}: {e}") + logger.error(f"Error moving {old_path} to {new_path}: {e}") client.close() + logger.info("FAISS migration completed") def migrate_mongo_atlas_vector_to_v1_vectorstore(): - client = pymongo.MongoClient("mongodb+srv://:@/?retryWrites=true&w=majority") - db = client["docsgpt"] + client = pymongo.MongoClient(MONGO_ATLAS_URI) + db = client[DB_NAME] vectors_collection = db["vectors"] - - # mongodb atlas collection documents_collection = db["documents"] - for vector in vectors_collection.find(): - documents_collection.update_many({"store": vector["user"] + "/" + vector["name"]}, {"$set": {"source_id": str(vector["_id"])}}) + # Backup collections before migration + backup_collection(vectors_collection, "vectors_backup") + backup_collection(documents_collection, "documents_backup") + + vectors = list(vectors_collection.find()) + for vector in tqdm(vectors, desc="Updating Mongo Atlas vectors"): + documents_collection.update_many( + {"store": vector["user"] + "/" + vector["name"]}, + {"$set": {"source_id": str(vector["_id"])}} + ) client.close() + logger.info("Mongo Atlas migration completed") -migrate_faiss_to_v1_vectorstore() -migrate_to_v1_vectorstore_mongo() \ No newline at end of file +if __name__ == "__main__": + migrate_faiss_to_v1_vectorstore() + migrate_to_v1_vectorstore_mongo() + migrate_mongo_atlas_vector_to_v1_vectorstore() From 2611550ffd849e12568379cba230ab9bd947be67 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 2 Oct 2024 23:44:29 +0100 Subject: [PATCH 11/23] --- application/api/user/routes.py | 12 ++++- application/parser/remote/github_loader.py | 49 +++++++++++++++++++++ application/parser/remote/remote_creator.py | 2 + frontend/src/upload/Upload.tsx | 34 +++++++++++++- 4 files changed, 94 insertions(+), 3 deletions(-) diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 340d020a..c409e69a 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -363,6 +363,7 @@ class UploadRemote(Resource): ), "name": fields.String(required=True, description="Job name"), "data": fields.String(required=True, description="Data to process"), + "repo_url": fields.String(description="GitHub repository URL"), }, ) ) @@ -377,11 +378,18 @@ class UploadRemote(Resource): return missing_fields try: + if "repo_url" in data: + source_data = data["repo_url"] + loader = "github" + else: + source_data = data["data"] + loader = data["source"] + task = ingest_remote.delay( - source_data=data["data"], + source_data=source_data, job_name=data["name"], user=data["user"], - loader=data["source"], + loader=loader, ) except Exception as err: return make_response(jsonify({"success": False, "error": str(err)}), 400) diff --git a/application/parser/remote/github_loader.py b/application/parser/remote/github_loader.py index e69de29b..2839f48d 100644 --- a/application/parser/remote/github_loader.py +++ b/application/parser/remote/github_loader.py @@ -0,0 +1,49 @@ +import os +import base64 +import requests +from typing import List +from application.parser.remote.base import BaseRemote +from application.parser.schema.base import Document + +class GitHubLoader(BaseRemote): + def __init__(self, access_token: str): + self.access_token = access_token + + def fetch_file_content(self, repo_url: str, file_path: str) -> str: + url = f"https://api.github.com/repos/{repo_url}/contents/{file_path}" + headers = { + "Authorization": f"token {self.access_token}", + "Accept": "application/vnd.github.v3.raw" + } + response = requests.get(url, headers=headers) + response.raise_for_status() + content = response.json() + if content.get("encoding") == "base64": + return base64.b64decode(content["content"]).decode("utf-8") + return content["content"] + + def fetch_repo_files(self, repo_url: str, path: str = "") -> List[str]: + url = f"https://api.github.com/repos/{repo_url}/contents/{path}" + headers = { + "Authorization": f"token {self.access_token}", + "Accept": "application/vnd.github.v3.raw" + } + response = requests.get(url, headers=headers) + response.raise_for_status() + contents = response.json() + files = [] + for item in contents: + if item["type"] == "file": + files.append(item["path"]) + elif item["type"] == "dir": + files.extend(self.fetch_repo_files(repo_url, item["path"])) + return files + + def load_data(self, repo_url: str) -> List[Document]: + repo_name = repo_url.split("github.com/")[-1] + files = self.fetch_repo_files(repo_name) + documents = [] + for file_path in files: + content = self.fetch_file_content(repo_name, file_path) + documents.append(Document(content=content, metadata={"file_path": file_path})) + return documents diff --git a/application/parser/remote/remote_creator.py b/application/parser/remote/remote_creator.py index d2a58f8d..026abd76 100644 --- a/application/parser/remote/remote_creator.py +++ b/application/parser/remote/remote_creator.py @@ -2,6 +2,7 @@ from application.parser.remote.sitemap_loader import SitemapLoader from application.parser.remote.crawler_loader import CrawlerLoader from application.parser.remote.web_loader import WebLoader from application.parser.remote.reddit_loader import RedditPostsLoaderRemote +from application.parser.remote.github_loader import GitHubLoader class RemoteCreator: @@ -10,6 +11,7 @@ class RemoteCreator: "sitemap": SitemapLoader, "crawler": CrawlerLoader, "reddit": RedditPostsLoaderRemote, + "github": GitHubLoader, } @classmethod diff --git a/frontend/src/upload/Upload.tsx b/frontend/src/upload/Upload.tsx index b898e4b6..50a6d357 100644 --- a/frontend/src/upload/Upload.tsx +++ b/frontend/src/upload/Upload.tsx @@ -24,6 +24,7 @@ function Upload({ const [docName, setDocName] = useState(''); const [urlName, setUrlName] = useState(''); const [url, setUrl] = useState(''); + const [repoUrl, setRepoUrl] = useState(''); // P3f93 const [redditData, setRedditData] = useState({ client_id: '', client_secret: '', @@ -48,6 +49,7 @@ function Upload({ // { label: 'Sitemap', value: 'sitemap' }, { label: 'Link', value: 'url' }, { label: 'Reddit', value: 'reddit' }, + { label: 'GitHub', value: 'github' }, // P3f93 ]; const [urlType, setUrlType] = useState<{ label: string; value: string }>({ @@ -238,6 +240,9 @@ function Upload({ formData.set('name', 'other'); formData.set('data', JSON.stringify(redditData)); } + if (urlType.value === 'github') { + formData.append('repo_url', repoUrl); // Pdeac + } const apiHost = import.meta.env.VITE_API_HOST; const xhr = new XMLHttpRequest(); xhr.upload.addEventListener('progress', (event) => { @@ -376,7 +381,7 @@ function Upload({ size="w-full" rounded="3xl" /> - {urlType.label !== 'Reddit' ? ( + {urlType.label !== 'Reddit' && urlType.label !== 'GitHub' ? ( <> + ) : urlType.label === 'GitHub' ? ( // P3f93 + <> + setUrlName(e.target.value)} + borderVariant="thin" + > +
+ + {t('modals.uploadDoc.name')} + +
+ setRepoUrl(e.target.value)} + borderVariant="thin" + > +
+ + {t('modals.uploadDoc.repoUrl')} + +
+ ) : (
From 1616124fa20efe656cf5bfa25983cd01ff9a3100 Mon Sep 17 00:00:00 2001 From: negativenagesh Date: Thu, 3 Oct 2024 16:22:54 +0530 Subject: [PATCH 12/23] Documentation error in Hacktoberfest.md --- HACKTOBERFEST.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HACKTOBERFEST.md b/HACKTOBERFEST.md index 631f73ba..47679960 100644 --- a/HACKTOBERFEST.md +++ b/HACKTOBERFEST.md @@ -7,7 +7,7 @@ All contributors with accepted PRs will receive a cool Holopin! 🤩 (Watch out ### 🏆 Top 50 contributors will recieve a special T-shirt ### 🏆 [LLM Document analysis by LexEU competition](https://github.com/arc53/DocsGPT/blob/main/lexeu-competition.md): -A separate competition is available for those who sumbit new retrieval / workflow method that will analyze a Document using EU laws. +A separate competition is available for those who submit new retrieval / workflow method that will analyze a Document using EU laws. With 200$, 100$, 50$ prize for 1st, 2nd and 3rd place respectively. You can find more information [here](https://github.com/arc53/DocsGPT/blob/main/lexeu-competition.md) From 03adfd4898d78ba9d68db17fd23bae90dd2cafc4 Mon Sep 17 00:00:00 2001 From: JeevaRamanathan Date: Thu, 3 Oct 2024 23:34:56 +0530 Subject: [PATCH 13/23] fix:navigation and deletion issues in conversations Signed-off-by: JeevaRamanathan --- frontend/src/Navigation.tsx | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/frontend/src/Navigation.tsx b/frontend/src/Navigation.tsx index 87cbbe51..7d3333ee 100644 --- a/frontend/src/Navigation.tsx +++ b/frontend/src/Navigation.tsx @@ -119,6 +119,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { .delete(id, {}) .then(() => { fetchConversations(); + resetConversation(); }) .catch((error) => console.error(error)); }; @@ -155,6 +156,15 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { }); }; + const resetConversation = () => { + dispatch(setConversation([])); + dispatch( + updateConversationId({ + query: { conversationId: null }, + }), + ); + }; + async function updateConversationName(updatedConversation: { name: string; id: string; @@ -235,14 +245,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
{ - dispatch(setConversation([])); - dispatch( - updateConversationId({ - query: { conversationId: null }, - }), - ); - }} + onClick={resetConversation} className={({ isActive }) => `${ isActive ? 'bg-gray-3000 dark:bg-transparent' : '' @@ -310,6 +313,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { isActive ? 'bg-gray-3000 dark:bg-transparent' : '' }` } + onClick={resetConversation} > Date: Thu, 3 Oct 2024 21:42:39 +0100 Subject: [PATCH 14/23] Update README.md --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index a88f2fc5..f1942dc1 100644 --- a/README.md +++ b/README.md @@ -25,8 +25,6 @@ Say goodbye to time-consuming manual searches, and let

{demo.header} From d71675f3d20a169588ad5cfa4fc262d82e9e0ab6 Mon Sep 17 00:00:00 2001 From: Anas Khafaga Date: Fri, 4 Oct 2024 23:07:05 +0300 Subject: [PATCH 16/23] feat: create a custom global tailwind component (table) styles --- frontend/src/index.css | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/frontend/src/index.css b/frontend/src/index.css index 805f0cfa..5bcc7683 100644 --- a/frontend/src/index.css +++ b/frontend/src/index.css @@ -47,6 +47,28 @@ body.dark { } } +@layer components { + .table-default { + @apply block w-max table-auto content-center justify-center rounded-xl border border-silver dark:border-silver/40 text-center dark:text-bright-gray; + } + + .table-default th { + @apply border-r border-silver dark:border-silver/40 p-4 w-[244px]; + } + + .table-default th:last-child { + @apply w-[auto] border-r-0; + } + + .table-default td { + @apply border-r border-t border-silver dark:border-silver/40 px-4 py-2; + } + + .table-default td:last-child { + @apply border-r-0; + } +} + /*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */ /* Document From aad12aa22734808f8cb6e44ce74652c5de473d60 Mon Sep 17 00:00:00 2001 From: Anas Khafaga Date: Fri, 4 Oct 2024 23:07:17 +0300 Subject: [PATCH 17/23] fix: update tables styles --- frontend/src/settings/APIKeys.tsx | 26 ++++++++------------- frontend/src/settings/Documents.tsx | 36 ++++++++++------------------- 2 files changed, 22 insertions(+), 40 deletions(-) diff --git a/frontend/src/settings/APIKeys.tsx b/frontend/src/settings/APIKeys.tsx index e27f5696..4517e647 100644 --- a/frontend/src/settings/APIKeys.tsx +++ b/frontend/src/settings/APIKeys.tsx @@ -100,35 +100,29 @@ export default function APIKeys() { )}

- +
- - - - + + + + {!apiKeys?.length && ( - )} {apiKeys?.map((element, index) => ( - - - - + + +
- {t('settings.apiKeys.name')} - - {t('settings.apiKeys.sourceDoc')} - - {t('settings.apiKeys.key')} - {t('settings.apiKeys.name')}{t('settings.apiKeys.sourceDoc')}{t('settings.apiKeys.key')}
+ {t('settings.apiKeys.noData')}
{element.name}{element.source}{element.key} + {element.name}{element.source}{element.key} Delete = ({
- +
- - - - - + + + + + {!documents?.length && ( - @@ -84,19 +76,15 @@ const Documents: React.FC = ({ {documents && documents.map((document, index) => ( - - - + + - -
- {t('settings.documents.name')} - - {t('settings.documents.date')} - - {t('settings.documents.tokenUsage')} - - {t('settings.documents.type')} - {t('settings.documents.name')}{t('settings.documents.date')}{t('settings.documents.tokenUsage')}{t('settings.documents.type')}
+ {t('settings.documents.noData')}
- {document.name} - - {document.date} - + {document.name}{document.date} {document.tokens ? formatTokens(+document.tokens) : ''} + {document.type === 'remote' ? 'Pre-loaded' : 'Private'} +
{document.type !== 'remote' && ( Date: Sat, 5 Oct 2024 02:46:32 +0530 Subject: [PATCH 18/23] fix(stream): while updating conv id --- frontend/src/conversation/Conversation.tsx | 4 ---- frontend/src/conversation/conversationSlice.ts | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/frontend/src/conversation/Conversation.tsx b/frontend/src/conversation/Conversation.tsx index d5908ca3..7bcfbbf6 100644 --- a/frontend/src/conversation/Conversation.tsx +++ b/frontend/src/conversation/Conversation.tsx @@ -54,10 +54,6 @@ export default function Conversation() { } }, []); - useEffect(() => { - fetchStream.current && fetchStream.current.abort(); - }, [conversationId]); - useEffect(() => { if (queries.length) { queries[queries.length - 1].error && setLastQueryReturnedErr(true); diff --git a/frontend/src/conversation/conversationSlice.ts b/frontend/src/conversation/conversationSlice.ts index bf375311..1b7e9d41 100644 --- a/frontend/src/conversation/conversationSlice.ts +++ b/frontend/src/conversation/conversationSlice.ts @@ -151,6 +151,7 @@ export const conversationSlice = createSlice({ state, action: PayloadAction<{ index: number; query: Partial }>, ) { + if (state.status === 'idle') return; const { index, query } = action.payload; if (query.response != undefined) { state.queries[index].response = @@ -167,6 +168,7 @@ export const conversationSlice = createSlice({ action: PayloadAction<{ query: Partial }>, ) { state.conversationId = action.payload.query.conversationId ?? null; + state.status = 'idle'; }, updateStreamingSource( state, From 8fa88175c1fe9ecba31c74aa9f17bf59f1aaddc7 Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 5 Oct 2024 21:33:58 +0100 Subject: [PATCH 19/23] fix: translation + auth --- application/parser/remote/github_loader.py | 43 ++++++++++++---------- frontend/src/locale/en.json | 1 + frontend/src/locale/es.json | 1 + frontend/src/locale/jp.json | 1 + frontend/src/locale/zh.json | 1 + 5 files changed, 27 insertions(+), 20 deletions(-) diff --git a/application/parser/remote/github_loader.py b/application/parser/remote/github_loader.py index 2839f48d..f72d5278 100644 --- a/application/parser/remote/github_loader.py +++ b/application/parser/remote/github_loader.py @@ -1,34 +1,37 @@ -import os import base64 import requests from typing import List from application.parser.remote.base import BaseRemote -from application.parser.schema.base import Document +from langchain_core.documents import Document class GitHubLoader(BaseRemote): - def __init__(self, access_token: str): - self.access_token = access_token + def __init__(self): + self.access_token = None + self.headers = { + "Authorization": f"token {self.access_token}" + } if self.access_token else {} + return def fetch_file_content(self, repo_url: str, file_path: str) -> str: url = f"https://api.github.com/repos/{repo_url}/contents/{file_path}" - headers = { - "Authorization": f"token {self.access_token}", - "Accept": "application/vnd.github.v3.raw" - } - response = requests.get(url, headers=headers) - response.raise_for_status() - content = response.json() - if content.get("encoding") == "base64": - return base64.b64decode(content["content"]).decode("utf-8") - return content["content"] + response = requests.get(url, headers=self.headers) + + if response.status_code == 200: + content = response.json() + if content.get("encoding") == "base64": + try: + decoded_content = base64.b64decode(content["content"]).decode("utf-8") + return decoded_content + except Exception as e: + raise + else: + return content["content"] + else: + response.raise_for_status() def fetch_repo_files(self, repo_url: str, path: str = "") -> List[str]: url = f"https://api.github.com/repos/{repo_url}/contents/{path}" - headers = { - "Authorization": f"token {self.access_token}", - "Accept": "application/vnd.github.v3.raw" - } - response = requests.get(url, headers=headers) + response = requests.get(url, headers=self.headers) response.raise_for_status() contents = response.json() files = [] @@ -45,5 +48,5 @@ class GitHubLoader(BaseRemote): documents = [] for file_path in files: content = self.fetch_file_content(repo_name, file_path) - documents.append(Document(content=content, metadata={"file_path": file_path})) + documents.append(Document(page_content=content, metadata={"file_path": file_path})) return documents diff --git a/frontend/src/locale/en.json b/frontend/src/locale/en.json index fa2cac3c..c9b599bf 100644 --- a/frontend/src/locale/en.json +++ b/frontend/src/locale/en.json @@ -85,6 +85,7 @@ "train": "Train", "link": "Link", "urlLink": "URL Link", + "repoUrl": "Repository URL", "reddit": { "id": "Client ID", "secret": "Client Secret", diff --git a/frontend/src/locale/es.json b/frontend/src/locale/es.json index 7b7dbec0..98b38d7c 100644 --- a/frontend/src/locale/es.json +++ b/frontend/src/locale/es.json @@ -85,6 +85,7 @@ "train": "Entrenar", "link": "Enlace", "urlLink": "Enlace URL", + "repoUrl": "URL del Repositorio", "reddit": { "id": "ID de Cliente", "secret": "Secreto de Cliente", diff --git a/frontend/src/locale/jp.json b/frontend/src/locale/jp.json index fa61c291..b34cc5e5 100644 --- a/frontend/src/locale/jp.json +++ b/frontend/src/locale/jp.json @@ -85,6 +85,7 @@ "train": "トレーニング", "link": "リンク", "urlLink": "URLリンク", + "repoUrl": "リポジトリURL", "reddit": { "id": "クライアントID", "secret": "クライアントシークレット", diff --git a/frontend/src/locale/zh.json b/frontend/src/locale/zh.json index 080c4ee3..7decdefe 100644 --- a/frontend/src/locale/zh.json +++ b/frontend/src/locale/zh.json @@ -85,6 +85,7 @@ "train": "训练", "link": "链接", "urlLink": "URL 链接", + "repoUrl": "存储库 URL", "reddit": { "id": "客户端 ID", "secret": "客户端密钥", From 1ad82c22d977cdd40dd27ca2afe8b4a5e7b37d26 Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 5 Oct 2024 21:36:04 +0100 Subject: [PATCH 20/23] fix: headers --- application/parser/remote/github_loader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/application/parser/remote/github_loader.py b/application/parser/remote/github_loader.py index f72d5278..8ffa5af1 100644 --- a/application/parser/remote/github_loader.py +++ b/application/parser/remote/github_loader.py @@ -31,8 +31,7 @@ class GitHubLoader(BaseRemote): def fetch_repo_files(self, repo_url: str, path: str = "") -> List[str]: url = f"https://api.github.com/repos/{repo_url}/contents/{path}" - response = requests.get(url, headers=self.headers) - response.raise_for_status() + response = requests.get(url, headers={**self.headers, "Accept": "application/vnd.github.v3.raw"}) contents = response.json() files = [] for item in contents: From 7717242112b7ed4752eb73d3ded5761a7d170a10 Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 5 Oct 2024 21:37:55 +0100 Subject: [PATCH 21/23] fix(lint): ruff var --- application/parser/remote/github_loader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/application/parser/remote/github_loader.py b/application/parser/remote/github_loader.py index 8ffa5af1..2137e62a 100644 --- a/application/parser/remote/github_loader.py +++ b/application/parser/remote/github_loader.py @@ -23,6 +23,7 @@ class GitHubLoader(BaseRemote): decoded_content = base64.b64decode(content["content"]).decode("utf-8") return decoded_content except Exception as e: + print(f"Error decoding content for {file_path}: {e}") raise else: return content["content"] From c04687fdd1cc797121bd030d0f6eafde5b18d5d2 Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 5 Oct 2024 21:53:30 +0100 Subject: [PATCH 22/23] fix: github loader metadata clickable --- application/parser/remote/github_loader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/application/parser/remote/github_loader.py b/application/parser/remote/github_loader.py index 2137e62a..d35da20d 100644 --- a/application/parser/remote/github_loader.py +++ b/application/parser/remote/github_loader.py @@ -48,5 +48,6 @@ class GitHubLoader(BaseRemote): documents = [] for file_path in files: content = self.fetch_file_content(repo_name, file_path) - documents.append(Document(page_content=content, metadata={"file_path": file_path})) + documents.append(Document(page_content=content, metadata={"title": file_path, + "source": f"https://github.com/{repo_name}/blob/main/{file_path}"})) return documents From 6932c7e3e9e9231b8a1bbe8bceeffdc4ece3388d Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 5 Oct 2024 21:56:47 +0100 Subject: [PATCH 23/23] feat: add filename to the top --- application/parser/remote/github_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/application/parser/remote/github_loader.py b/application/parser/remote/github_loader.py index d35da20d..49f0ae9c 100644 --- a/application/parser/remote/github_loader.py +++ b/application/parser/remote/github_loader.py @@ -21,12 +21,12 @@ class GitHubLoader(BaseRemote): if content.get("encoding") == "base64": try: decoded_content = base64.b64decode(content["content"]).decode("utf-8") - return decoded_content + return f"Filename: {file_path}\n\n{decoded_content}" except Exception as e: print(f"Error decoding content for {file_path}: {e}") raise else: - return content["content"] + return f"Filename: {file_path}\n\n{content['content']}" else: response.raise_for_status()