diff --git a/.github/dependabot.yml b/.github/dependabot.yml index a00cd334..dd0799c6 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -13,3 +13,7 @@ updates: directory: "/frontend" # Location of package manifests schedule: interval: "weekly" + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/holopin.yml b/.github/holopin.yml index d7487e9a..cc313f22 100644 --- a/.github/holopin.yml +++ b/.github/holopin.yml @@ -1,5 +1,11 @@ -organization: arc53 -defaultSticker: clqmdf0ed34290glbvqh0kzxd +organization: docsgpt +defaultSticker: cm1ulwkkl180570cl82rtzympu stickers: - - id: clqmdf0ed34290glbvqh0kzxd - alias: festive + - id: cm1ulwkkl180570cl82rtzympu + alias: contributor2024 + - id: cm1ureg8o130450cl8c1po6mil + alias: api + - id: cm1urhmag148240cl8yvqxkthx + alias: lpc + - id: cm1urlcpq622090cl2tvu4w71y + alias: lexeu diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2ea8961f..be0263ff 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,22 +12,22 @@ jobs: contents: read packages: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v1 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Login to DockerHub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Login to ghcr.io - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.repository_owner }} diff --git a/.github/workflows/cife.yml b/.github/workflows/cife.yml index 73a97755..4b1cbf3b 100644 --- a/.github/workflows/cife.yml +++ b/.github/workflows/cife.yml @@ -12,22 +12,22 @@ jobs: contents: read packages: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v1 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Login to DockerHub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Login to ghcr.io - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.repository_owner }} diff --git a/.github/workflows/docker-develop-build.yml b/.github/workflows/docker-develop-build.yml index 5edc69d7..0bfc7e70 100644 --- a/.github/workflows/docker-develop-build.yml +++ b/.github/workflows/docker-develop-build.yml @@ -14,22 +14,22 @@ jobs: contents: read packages: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v1 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Login to DockerHub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Login to ghcr.io - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.repository_owner }} diff --git a/.github/workflows/docker-develop-fe-build.yml b/.github/workflows/docker-develop-fe-build.yml index 29ad4524..14dbccc5 100644 --- a/.github/workflows/docker-develop-fe-build.yml +++ b/.github/workflows/docker-develop-fe-build.yml @@ -14,22 +14,22 @@ jobs: contents: read packages: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up QEMU uses: docker/setup-qemu-action@v1 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v3 - name: Login to DockerHub - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Login to ghcr.io - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.repository_owner }} diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7ee31ebe..a36f529b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -11,7 +11,7 @@ jobs: ruff: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Lint with Ruff uses: chartboost/ruff-action@v1 diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index c6615e56..b858a0f7 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -8,9 +8,9 @@ jobs: matrix: python-version: ["3.11"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies @@ -24,7 +24,7 @@ jobs: python -m pytest --cov=application --cov-report=xml - name: Upload coverage reports to Codecov if: github.event_name == 'pull_request' && matrix.python-version == '3.11' - uses: codecov/codecov-action@v3 + uses: codecov/codecov-action@v4 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/sync_fork.yaml b/.github/workflows/sync_fork.yaml index 81f222bb..a108daf6 100644 --- a/.github/workflows/sync_fork.yaml +++ b/.github/workflows/sync_fork.yaml @@ -17,7 +17,7 @@ jobs: steps: # Step 1: run a standard checkout action - name: Checkout target repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Step 2: run the sync action - name: Sync upstream changes diff --git a/application/api/user/routes.py b/application/api/user/routes.py index feee91cc..2ead8ef1 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -939,7 +939,7 @@ class ShareConversation(Resource): "conversation_id": DBRef( "conversations", ObjectId(conversation_id) ), - "isPromptable": not is_promptable, + "isPromptable": is_promptable, "first_n_queries": current_n_queries, "user": user, } @@ -962,7 +962,7 @@ class ShareConversation(Resource): "$ref": "conversations", "$id": ObjectId(conversation_id), }, - "isPromptable": not is_promptable, + "isPromptable": is_promptable, "first_n_queries": current_n_queries, "user": user, } diff --git a/application/core/settings.py b/application/core/settings.py index 7346da08..d4b02481 100644 --- a/application/core/settings.py +++ b/application/core/settings.py @@ -18,7 +18,7 @@ class Settings(BaseSettings): DEFAULT_MAX_HISTORY: int = 150 MODEL_TOKEN_LIMITS: dict = {"gpt-3.5-turbo": 4096, "claude-2": 1e5} UPLOAD_FOLDER: str = "inputs" - VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" + VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb" RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search # LLM Cache @@ -70,6 +70,9 @@ class Settings(BaseSettings): MILVUS_URI: Optional[str] = "./milvus_local.db" # milvus lite version as default MILVUS_TOKEN: Optional[str] = "" + # LanceDB vectorstore config + LANCEDB_PATH: str = "/tmp/lancedb" # Path where LanceDB stores its local data + LANCEDB_TABLE_NAME: Optional[str] = "docsgpts" # Name of the table to use for storing vectors BRAVE_SEARCH_API_KEY: Optional[str] = None FLASK_DEBUG_MODE: bool = False diff --git a/application/vectorstore/lancedb.py b/application/vectorstore/lancedb.py new file mode 100644 index 00000000..25d62318 --- /dev/null +++ b/application/vectorstore/lancedb.py @@ -0,0 +1,119 @@ +from typing import List, Optional +import importlib +from application.vectorstore.base import BaseVectorStore +from application.core.settings import settings + +class LanceDBVectorStore(BaseVectorStore): + """Class for LanceDB Vector Store integration.""" + + def __init__(self, path: str = settings.LANCEDB_PATH, + table_name_prefix: str = settings.LANCEDB_TABLE_NAME, + source_id: str = None, + embeddings_key: str = "embeddings"): + """Initialize the LanceDB vector store.""" + super().__init__() + self.path = path + self.table_name = f"{table_name_prefix}_{source_id}" if source_id else table_name_prefix + self.embeddings_key = embeddings_key + self._lance_db = None + self.docsearch = None + self._pa = None # PyArrow (pa) will be lazy loaded + + @property + def pa(self): + """Lazy load pyarrow module.""" + if self._pa is None: + self._pa = importlib.import_module("pyarrow") + return self._pa + + @property + def lancedb(self): + """Lazy load lancedb module.""" + if not hasattr(self, "_lancedb_module"): + self._lancedb_module = importlib.import_module("lancedb") + return self._lancedb_module + + @property + def lance_db(self): + """Lazy load the LanceDB connection.""" + if self._lance_db is None: + self._lance_db = self.lancedb.connect(self.path) + return self._lance_db + + @property + def table(self): + """Lazy load the LanceDB table.""" + if self.docsearch is None: + if self.table_name in self.lance_db.table_names(): + self.docsearch = self.lance_db.open_table(self.table_name) + else: + self.docsearch = None + return self.docsearch + + def ensure_table_exists(self): + """Ensure the table exists before performing operations.""" + if self.table is None: + embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key) + schema = self.pa.schema([ + self.pa.field("vector", self.pa.list_(self.pa.float32(), list_size=embeddings.dimension)), + self.pa.field("text", self.pa.string()), + self.pa.field("metadata", self.pa.struct([ + self.pa.field("key", self.pa.string()), + self.pa.field("value", self.pa.string()) + ])) + ]) + self.docsearch = self.lance_db.create_table(self.table_name, schema=schema) + + def add_texts(self, texts: List[str], metadatas: Optional[List[dict]] = None, source_id: str = None): + """Add texts with metadata and their embeddings to the LanceDB table.""" + embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key).embed_documents(texts) + vectors = [] + for embedding, text, metadata in zip(embeddings, texts, metadatas or [{}] * len(texts)): + if source_id: + metadata["source_id"] = source_id + metadata_struct = [{"key": k, "value": str(v)} for k, v in metadata.items()] + vectors.append({ + "vector": embedding, + "text": text, + "metadata": metadata_struct + }) + self.ensure_table_exists() + self.docsearch.add(vectors) + + def search(self, query: str, k: int = 2, *args, **kwargs): + """Search LanceDB for the top k most similar vectors.""" + self.ensure_table_exists() + query_embedding = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key).embed_query(query) + results = self.docsearch.search(query_embedding).limit(k).to_list() + return [(result["_distance"], result["text"], result["metadata"]) for result in results] + + def delete_index(self): + """Delete the entire LanceDB index (table).""" + if self.table: + self.lance_db.drop_table(self.table_name) + + def assert_embedding_dimensions(self, embeddings): + """Ensure that embedding dimensions match the table index dimensions.""" + word_embedding_dimension = embeddings.dimension + if self.table: + table_index_dimension = len(self.docsearch.schema["vector"].type.value_type) + if word_embedding_dimension != table_index_dimension: + raise ValueError( + f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) " + f"!= table index dimension ({table_index_dimension})" + ) + + def filter_documents(self, filter_condition: dict) -> List[dict]: + """Filter documents based on certain conditions.""" + self.ensure_table_exists() + + # Ensure source_id exists in the filter condition + if 'source_id' not in filter_condition: + raise ValueError("filter_condition must contain 'source_id'") + + source_id = filter_condition["source_id"] + + # Use LanceDB's native filtering if supported, otherwise filter manually + filtered_data = self.docsearch.filter(lambda x: x.metadata and x.metadata.get("source_id") == source_id).to_list() + + return filtered_data \ No newline at end of file diff --git a/docs/package-lock.json b/docs/package-lock.json index 99836cc6..78206570 100644 --- a/docs/package-lock.json +++ b/docs/package-lock.json @@ -7,7 +7,7 @@ "license": "MIT", "dependencies": { "@vercel/analytics": "^1.1.1", - "docsgpt": "^0.4.1", + "docsgpt": "^0.4.3", "next": "^14.2.12", "nextra": "^2.13.2", "nextra-theme-docs": "^2.13.2", @@ -422,11 +422,6 @@ "node": ">=6.9.0" } }, - "node_modules/@bpmn-io/snarkdown": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/@bpmn-io/snarkdown/-/snarkdown-2.2.0.tgz", - "integrity": "sha512-bVD7FIoaBDZeCJkMRgnBPDeptPlto87wt2qaCjf5t8iLaevDmTPaREd6FpBEGsHlUdHFFZWRk4qAoEC5So2M0Q==" - }, "node_modules/@braintree/sanitize-url": { "version": "6.0.4", "resolved": "https://registry.npmjs.org/@braintree/sanitize-url/-/sanitize-url-6.0.4.tgz", @@ -3162,30 +3157,6 @@ "cytoscape": "^3.2.0" } }, - "node_modules/cytoscape-fcose": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/cytoscape-fcose/-/cytoscape-fcose-2.2.0.tgz", - "integrity": "sha512-ki1/VuRIHFCzxWNrsshHYPs6L7TvLu3DL+TyIGEsRcvVERmxokbf5Gdk7mFxZnTdiGtnA4cfSmjZJMviqSuZrQ==", - "dependencies": { - "cose-base": "^2.2.0" - }, - "peerDependencies": { - "cytoscape": "^3.2.0" - } - }, - "node_modules/cytoscape-fcose/node_modules/cose-base": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/cose-base/-/cose-base-2.2.0.tgz", - "integrity": "sha512-AzlgcsCbUMymkADOJtQm3wO9S3ltPfYOFD5033keQn9NJzIbtnZj+UdBJe7DYml/8TdbtHJW3j58SOnKhWY/5g==", - "dependencies": { - "layout-base": "^2.0.0" - } - }, - "node_modules/cytoscape-fcose/node_modules/layout-base": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/layout-base/-/layout-base-2.0.1.tgz", - "integrity": "sha512-dp3s92+uNI1hWIpPGH3jK2kxE2lMjdXdr+DH8ynZHpd6PUlH6x6cbuXnoMmiNumznqaNO31xu9e79F0uuZ0JFg==" - }, "node_modules/d3": { "version": "7.8.5", "resolved": "https://registry.npmjs.org/d3/-/d3-7.8.5.tgz", @@ -3697,13 +3668,12 @@ } }, "node_modules/docsgpt": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/docsgpt/-/docsgpt-0.4.1.tgz", - "integrity": "sha512-9oH638vIg8I+zsjLV5Rp21yYniAtiTcyuBSByqWl2KoBdF/8vDSmr491l8n+ikbaTLiCW4uRU0p0r3BvRizy2Q==", + "version": "0.4.3", + "resolved": "https://registry.npmjs.org/docsgpt/-/docsgpt-0.4.3.tgz", + "integrity": "sha512-svLM6xEg4iUtb7HuR1qwC95K4ctvTky8gXRXgqtDIUC5Fg4zeHwivbmaFkBbP3N+bcqWVWCJK9DfJfW+OjTeuA==", "license": "Apache-2.0", "dependencies": { "@babel/plugin-transform-flow-strip-types": "^7.23.3", - "@bpmn-io/snarkdown": "^2.2.0", "@parcel/resolver-glob": "^2.12.0", "@parcel/transformer-svg-react": "^2.12.0", "@parcel/transformer-typescript-tsc": "^2.12.0", @@ -3715,6 +3685,7 @@ "flow-bin": "^0.229.2", "i": "^0.3.7", "install": "^0.13.0", + "markdown-it": "^14.1.0", "npm": "^10.5.0", "react": "^18.2.0", "react-dom": "^18.2.0", @@ -3807,9 +3778,9 @@ "integrity": "sha512-/if4Ueg0GUQlhCrW2ZlXwDAm40ipuKo+OgeHInlL8sbjt+hzISxZK949fZeJaVsheamrzANXvw1zQTvbxTvSHw==" }, "node_modules/elkjs": { - "version": "0.8.2", - "resolved": "https://registry.npmjs.org/elkjs/-/elkjs-0.8.2.tgz", - "integrity": "sha512-L6uRgvZTH+4OF5NE/MBbzQx/WYpru1xCBE9respNj6qznEewGUIfhzmm7horWWxbNO2M0WckQypGctR8lH79xQ==" + "version": "0.9.3", + "resolved": "https://registry.npmjs.org/elkjs/-/elkjs-0.9.3.tgz", + "integrity": "sha512-f/ZeWvW/BCXbhGEf1Ujp29EASo/lk1FDnETgNKwJrsVvGZhUWCZyg3xLJjAsxfOmt8KjswHmI5EwCQcPMpOYhQ==" }, "node_modules/entities": { "version": "4.5.0", @@ -4859,6 +4830,15 @@ "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==" }, + "node_modules/linkify-it": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/linkify-it/-/linkify-it-5.0.0.tgz", + "integrity": "sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ==", + "license": "MIT", + "dependencies": { + "uc.micro": "^2.0.0" + } + }, "node_modules/lmdb": { "version": "2.8.5", "resolved": "https://registry.npmjs.org/lmdb/-/lmdb-2.8.5.tgz", @@ -4944,6 +4924,29 @@ "node": ">=0.10.0" } }, + "node_modules/markdown-it": { + "version": "14.1.0", + "resolved": "https://registry.npmjs.org/markdown-it/-/markdown-it-14.1.0.tgz", + "integrity": "sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==", + "license": "MIT", + "dependencies": { + "argparse": "^2.0.1", + "entities": "^4.4.0", + "linkify-it": "^5.0.0", + "mdurl": "^2.0.0", + "punycode.js": "^2.3.1", + "uc.micro": "^2.1.0" + }, + "bin": { + "markdown-it": "bin/markdown-it.mjs" + } + }, + "node_modules/markdown-it/node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==", + "license": "Python-2.0" + }, "node_modules/markdown-table": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-3.0.3.tgz", @@ -5487,23 +5490,29 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/mdurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdurl/-/mdurl-2.0.0.tgz", + "integrity": "sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w==", + "license": "MIT" + }, "node_modules/mermaid": { - "version": "10.6.1", - "resolved": "https://registry.npmjs.org/mermaid/-/mermaid-10.6.1.tgz", - "integrity": "sha512-Hky0/RpOw/1il9X8AvzOEChfJtVvmXm+y7JML5C//ePYMy0/9jCEmW1E1g86x9oDfW9+iVEdTV/i+M6KWRNs4A==", + "version": "10.9.3", + "resolved": "https://registry.npmjs.org/mermaid/-/mermaid-10.9.3.tgz", + "integrity": "sha512-V80X1isSEvAewIL3xhmz/rVmc27CVljcsbWxkxlWJWY/1kQa4XOABqpDl2qQLGKzpKm6WbTfUEKImBlUfFYArw==", "dependencies": { "@braintree/sanitize-url": "^6.0.1", "@types/d3-scale": "^4.0.3", "@types/d3-scale-chromatic": "^3.0.0", - "cytoscape": "^3.23.0", + "cytoscape": "^3.28.1", "cytoscape-cose-bilkent": "^4.1.0", - "cytoscape-fcose": "^2.1.0", "d3": "^7.4.0", "d3-sankey": "^0.12.3", "dagre-d3-es": "7.0.10", "dayjs": "^1.11.7", - "dompurify": "^3.0.5", - "elkjs": "^0.8.2", + "dompurify": "^3.0.5 <3.1.7", + "elkjs": "^0.9.0", + "katex": "^0.16.9", "khroma": "^2.0.0", "lodash-es": "^4.17.21", "mdast-util-from-markdown": "^1.3.0", @@ -9338,6 +9347,15 @@ "resolved": "https://registry.npmjs.org/pseudomap/-/pseudomap-1.0.2.tgz", "integrity": "sha512-b/YwNhb8lk1Zz2+bXXpS/LK9OisiZZ1SNsSLxN1x2OXVEhW2Ckr/7mWE5vrC1ZTiJlD9g19jWszTmJsB+oEpFQ==" }, + "node_modules/punycode.js": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode.js/-/punycode.js-2.3.1.tgz", + "integrity": "sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA==", + "license": "MIT", + "engines": { + "node": ">=6" + } + }, "node_modules/react": { "version": "18.2.0", "resolved": "https://registry.npmjs.org/react/-/react-18.2.0.tgz", @@ -10052,9 +10070,9 @@ } }, "node_modules/typescript": { - "version": "5.6.2", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.2.tgz", - "integrity": "sha512-NW8ByodCSNCwZeghjN3o+JX5OFH0Ojg6sadjEKY4huZ52TqbJTJnDo5+Tw98lSy63NZvi4n+ez5m2u5d4PkZyw==", + "version": "5.6.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.6.3.tgz", + "integrity": "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==", "peer": true, "bin": { "tsc": "bin/tsc", @@ -10064,6 +10082,12 @@ "node": ">=14.17" } }, + "node_modules/uc.micro": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/uc.micro/-/uc.micro-2.1.0.tgz", + "integrity": "sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A==", + "license": "MIT" + }, "node_modules/unified": { "version": "10.1.2", "resolved": "https://registry.npmjs.org/unified/-/unified-10.1.2.tgz", diff --git a/docs/package.json b/docs/package.json index 9acf9b2a..ddd06c03 100644 --- a/docs/package.json +++ b/docs/package.json @@ -7,7 +7,7 @@ "license": "MIT", "dependencies": { "@vercel/analytics": "^1.1.1", - "docsgpt": "^0.4.1", + "docsgpt": "^0.4.3", "next": "^14.2.12", "nextra": "^2.13.2", "nextra-theme-docs": "^2.13.2", diff --git a/extensions/react-widget/package.json b/extensions/react-widget/package.json index d449d0a3..baf62aca 100644 --- a/extensions/react-widget/package.json +++ b/extensions/react-widget/package.json @@ -1,6 +1,6 @@ { "name": "docsgpt", - "version": "0.4.2", + "version": "0.4.3", "private": false, "description": "DocsGPT 🦖 is an innovative open-source tool designed to simplify the retrieval of information from project documentation using advanced GPT models 🤖.", "source": "./src/index.html", diff --git a/extensions/react-widget/src/components/DocsGPTWidget.tsx b/extensions/react-widget/src/components/DocsGPTWidget.tsx index 83defbcf..01861274 100644 --- a/extensions/react-widget/src/components/DocsGPTWidget.tsx +++ b/extensions/react-widget/src/components/DocsGPTWidget.tsx @@ -453,8 +453,11 @@ export const DocsGPTWidget = ({ setQueries(updatedQueries); setStatus('idle') } + else if (data.type === 'source') { + // handle the case where data type === 'source' + } else { - const result = data.answer; + const result = data.answer ? data.answer : ''; //Fallback to an empty string if data.answer is undefined const streamingResponse = queries[queries.length - 1].response ? queries[queries.length - 1].response : ''; const updatedQueries = [...queries]; updatedQueries[updatedQueries.length - 1].response = streamingResponse + result; diff --git a/extensions/slack-bot/.gitignore b/extensions/slack-bot/.gitignore new file mode 100644 index 00000000..1d8e58b2 --- /dev/null +++ b/extensions/slack-bot/.gitignore @@ -0,0 +1,3 @@ +.env +.venv/ +get-pip.py \ No newline at end of file diff --git a/extensions/slack-bot/Readme.md b/extensions/slack-bot/Readme.md new file mode 100644 index 00000000..704184a2 --- /dev/null +++ b/extensions/slack-bot/Readme.md @@ -0,0 +1,84 @@ + +# Slack Bot Configuration Guide + +> **Note:** The following guidelines must be followed on the [Slack API website](https://api.slack.com/) for setting up your Slack app and generating the necessary tokens. + +## Step-by-Step Instructions + +### 1. Navigate to Your Apps +- Go to the Slack API page for apps and select **Create an App** from the “From Scratch” option. + +### 2. App Creation +- Name your app and choose the workspace where you wish to add the assistant. + +### 3. Enabling Socket Mode +- Navigate to **Settings > Socket Mode** and enable **Socket Mode**. +- This action will generate an App-level token. Select the `connections:write` scope and copy the App-level token for future use. + +### 4. Socket Naming +- Assign a name to your socket as per your preference. + +### 5. Basic Information Setup +- Go to **Basic Information** (under **Settings**) and configure the following: + - Assistant name + - App icon + - Background color + +### 6. Bot Token and Permissions +- In the **OAuth & Permissions** option found under the **Features** section, retrieve the Bot Token. Save it for future usage. +- You will also need to add specific bot token scopes: + - `app_mentions:read` + - `assistant:write` + - `chat:write` + - `chat:write.public` + - `im:history` + +### 7. Enable Events +- From **Event Subscriptions**, enable events and add the following Bot User events: + - `app_mention` + - `assistant_thread_context_changed` + - `assistant_thread_started` + - `message.im` + +### 8. Agent/Assistant Toggle +- In the **Features > Agent & Assistants** section, toggle on the Agent or Assistant option. +- In the **Suggested Prompts** setting, leave it as `dynamic` (this is the default setting). + +--- + +## Code-Side Configuration Guide + +This section focuses on generating and setting up the necessary tokens in the `.env` file, using the `.env-example` as a template. + +### Step 1: Generating Required Keys + +1. **SLACK_APP_TOKEN** + - Navigate to **Settings > Socket Mode** in the Slack API and enable **Socket Mode**. + - Copy the App-level token generated (usually starts with `xapp-`). + +2. **SLACK_BOT_TOKEN** + - Go to **OAuth & Permissions** (under the **Features** section in Slack API). + - Retrieve the **Bot Token** (starts with `xoxb-`). + +3. **DOCSGPT_API_KEY** + - Go to the **DocsGPT website**. + - Navigate to **Settings > Chatbots > Create New** to generate a DocsGPT API Key. + - Copy the generated key for use. + +### Step 2: Creating and Updating the `.env` File + +1. Create a new `.env` file in the root of your project (if it doesn’t already exist). +2. Use the `.env-example` as a reference and update the file with the following keys and values: + +```bash +# .env file +SLACK_APP_TOKEN=xapp-your-generated-app-token +SLACK_BOT_TOKEN=xoxb-your-generated-bot-token +DOCSGPT_API_KEY=your-docsgpt-generated-api-key +``` + +Replace the placeholder values with the actual tokens generated from the Slack API and DocsGPT as per the steps outlined above. + +--- + +This concludes the guide for both setting up the Slack API and configuring the `.env` file on the code side. diff --git a/extensions/slack-bot/app.py b/extensions/slack-bot/app.py new file mode 100644 index 00000000..d4f522fd --- /dev/null +++ b/extensions/slack-bot/app.py @@ -0,0 +1,112 @@ +import os +import hashlib +import httpx +import re +from slack_bolt.async_app import AsyncApp +from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler +from dotenv import load_dotenv + +load_dotenv() +API_BASE = os.getenv("API_BASE", "https://gptcloud.arc53.com") +API_URL = API_BASE + "/api/answer" + +# Slack bot token and signing secret +SLACK_BOT_TOKEN = os.getenv("SLACK_BOT_TOKEN") +SLACK_APP_TOKEN = os.getenv("SLACK_APP_TOKEN") + +# OpenAI API key for DocsGPT (replace this with your actual API key) +DOCSGPT_API_KEY = os.getenv("DOCSGPT_API_KEY") + +# Initialize Slack app +app = AsyncApp(token=SLACK_BOT_TOKEN) + +def encode_conversation_id(conversation_id: str) -> str: + """ + Encodes 11 length Slack conversation_id to 12 length string + Args: + conversation_id (str): The 11 digit slack conversation_id. + Returns: + str: Hashed id. + """ + # Create a SHA-256 hash of the string + hashed_id = hashlib.sha256(conversation_id.encode()).hexdigest() + + # Take the first 24 characters of the hash + hashed_24_char_id = hashed_id[:24] + return hashed_24_char_id + +async def generate_answer(question: str, messages: list, conversation_id: str | None) -> dict: + """Generates an answer using the external API.""" + payload = { + "question": question, + "api_key": DOCSGPT_API_KEY, + "history": messages, + "conversation_id": conversation_id, + } + headers = { + "Content-Type": "application/json; charset=utf-8" + } + timeout = 60.0 + async with httpx.AsyncClient() as client: + response = await client.post(API_URL, json=payload, headers=headers, timeout=timeout) + + if response.status_code == 200: + data = response.json() + conversation_id = data.get("conversation_id") + answer = data.get("answer", "Sorry, I couldn't find an answer.") + return {"answer": answer, "conversation_id": conversation_id} + else: + print(response.json()) + return {"answer": "Sorry, I couldn't find an answer.", "conversation_id": None} + +@app.message(".*") +async def message_docs(message, say): + client = app.client + channel = message['channel'] + thread_ts = message['thread_ts'] + user_query = message['text'] + await client.assistant_threads_setStatus( + channel_id = channel, + thread_ts = thread_ts, + status = "is generating your answer...", + ) + + docs_gpt_channel_id = encode_conversation_id(thread_ts) + + # Get response from DocsGPT + response = await generate_answer(user_query,[], docs_gpt_channel_id) + answer = convert_to_slack_markdown(response['answer']) + + # Respond in Slack + await client.chat_postMessage(text = answer, mrkdwn= True, channel= message['channel'], + thread_ts = message['thread_ts'],) + +def convert_to_slack_markdown(markdown_text: str): + # Convert bold **text** to *text* for Slack + slack_text = re.sub(r'\*\*(.*?)\*\*', r'*\1*', markdown_text) # **text** to *text* + + # Convert italics _text_ to _text_ for Slack + slack_text = re.sub(r'_(.*?)_', r'_\1_', slack_text) # _text_ to _text_ + + # Convert inline code `code` to `code` (Slack supports backticks for inline code) + slack_text = re.sub(r'`(.*?)`', r'`\1`', slack_text) + + # Convert bullet points with single or no spaces to filled bullets (•) + slack_text = re.sub(r'^\s{0,1}[-*]\s+', ' • ', slack_text, flags=re.MULTILINE) + + # Convert bullet points with multiple spaces to hollow bullets (◦) + slack_text = re.sub(r'^\s{2,}[-*]\s+', '\t◦ ', slack_text, flags=re.MULTILINE) + + # Convert headers (##) to bold in Slack + slack_text = re.sub(r'^\s*#{1,6}\s*(.*?)$', r'*\1*', slack_text, flags=re.MULTILINE) + + return slack_text + +async def main(): + handler = AsyncSocketModeHandler(app, os.environ["SLACK_APP_TOKEN"]) + await handler.start_async() + +# Start the app +if __name__ == "__main__": + import asyncio + asyncio.run(main()) \ No newline at end of file diff --git a/extensions/slack-bot/requirements.txt b/extensions/slack-bot/requirements.txt new file mode 100644 index 00000000..0c588b43 --- /dev/null +++ b/extensions/slack-bot/requirements.txt @@ -0,0 +1,10 @@ +aiohttp>=3,<4 +certifi==2024.7.4 +h11==0.14.0 +httpcore==1.0.5 +httpx==0.27.0 +idna==3.7 +python-dotenv==1.0.1 +sniffio==1.3.1 +slack-bolt==1.21.0 +bson==0.5.10 diff --git a/frontend/src/Navigation.tsx b/frontend/src/Navigation.tsx index 5aa694f8..aa0b1a58 100644 --- a/frontend/src/Navigation.tsx +++ b/frontend/src/Navigation.tsx @@ -5,10 +5,13 @@ import { NavLink, useNavigate } from 'react-router-dom'; import conversationService from './api/services/conversationService'; import userService from './api/services/userService'; import Add from './assets/add.svg'; +import openNewChat from './assets/openNewChat.svg'; +import Hamburger from './assets/hamburger.svg'; import DocsGPT3 from './assets/cute_docsgpt3.svg'; import Discord from './assets/discord.svg'; import Expand from './assets/expand.svg'; import Github from './assets/github.svg'; +import Info from './assets/info.svg'; import SettingGear from './assets/settingGear.svg'; import Twitter from './assets/TwitterX.svg'; import UploadIcon from './assets/upload.svg'; @@ -39,6 +42,7 @@ import { } from './preferences/preferenceSlice'; import Spinner from './assets/spinner.svg'; import SpinnerDark from './assets/spinner-dark.svg'; +import { selectQueries } from './conversation/conversationSlice'; import Upload from './upload/Upload'; import ShareButton from './components/ShareButton'; import Help from './components/Help'; @@ -64,6 +68,7 @@ NavImage.propTypes = { }; */ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { const dispatch = useDispatch(); + const queries = useSelector(selectQueries); const docs = useSelector(selectSourceDocs); const selectedDocs = useSelector(selectSelectedDocs); const conversations = useSelector(selectConversations); @@ -93,6 +98,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { if (!conversations?.data) { fetchConversations(); } + if (queries.length === 0) { + resetConversation(); + } }, [conversations?.data, dispatch]); async function fetchConversations() { @@ -166,7 +174,11 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { }), ); }; - + const newChat = () => { + if (queries && queries?.length > 0) { + resetConversation(); + } + }; async function updateConversationName(updatedConversation: { name: string; id: string; @@ -203,26 +215,45 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { return ( <> {!navOpen && ( - +