From c0ed54406ffc9dc5c72ae7dbc592fe410d77dc8b Mon Sep 17 00:00:00 2001 From: ManishMadan2882 Date: Fri, 9 Feb 2024 18:04:24 +0530 Subject: [PATCH 01/14] fix(settings): delete button --- frontend/src/Setting.tsx | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/frontend/src/Setting.tsx b/frontend/src/Setting.tsx index b04cb18e..a8b76dd8 100644 --- a/frontend/src/Setting.tsx +++ b/frontend/src/Setting.tsx @@ -8,10 +8,10 @@ import { selectPrompt, setPrompt, selectSourceDocs, + setSourceDocs, } from './preferences/preferenceSlice'; import { Doc } from './preferences/preferenceApi'; import { useDarkTheme } from './hooks'; -import { Light } from 'react-syntax-highlighter'; type PromptProps = { prompts: { name: string; id: string; type: string }[]; selectedPrompt: { name: string; id: string; type: string }; @@ -86,13 +86,11 @@ const Setting: React.FC = () => { fetch(`${apiHost}/api/delete_old?path=${docPath}`, { method: 'GET', }) - .then(() => { - // remove the image element from the DOM - const imageElement = document.querySelector( - `#img-${index}`, - ) as HTMLElement; - const parentElement = imageElement.parentNode as HTMLElement; - parentElement.parentNode?.removeChild(parentElement); + .then((response) => { + if(response.ok && documents){ + const updatedDocuments = [...documents.slice(0, index), ...documents.slice(index + 1)]; + dispatch(setSourceDocs(updatedDocuments)); + } }) .catch((error) => console.error(error)); }; From 9129f7fb33dffe8f8acddace1ab8d9279c72f3e6 Mon Sep 17 00:00:00 2001 From: ManishMadan2882 Date: Fri, 9 Feb 2024 19:12:48 +0530 Subject: [PATCH 02/14] fix(Conversation): input box UI --- frontend/src/conversation/Conversation.tsx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/frontend/src/conversation/Conversation.tsx b/frontend/src/conversation/Conversation.tsx index 6813ed0f..eb5a9aaf 100644 --- a/frontend/src/conversation/Conversation.tsx +++ b/frontend/src/conversation/Conversation.tsx @@ -140,12 +140,12 @@ export default function Conversation() { )} {queries.length > 0 && ( -
+
{queries.map((query, index) => { return ( )} -
+
{ if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); @@ -200,7 +200,7 @@ export default function Conversation() {
)}
-

+

This is a chatbot that uses the GPT-3, Faiss and LangChain to answer questions.

From 8826f0ff3c3381b461a7ef91725885dde1ac4000 Mon Sep 17 00:00:00 2001 From: ManishMadan2882 Date: Fri, 9 Feb 2024 19:17:26 +0530 Subject: [PATCH 03/14] slight UI improvements in input box --- frontend/src/conversation/Conversation.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/frontend/src/conversation/Conversation.tsx b/frontend/src/conversation/Conversation.tsx index eb5a9aaf..ba311446 100644 --- a/frontend/src/conversation/Conversation.tsx +++ b/frontend/src/conversation/Conversation.tsx @@ -160,7 +160,7 @@ export default function Conversation() { {queries.length === 0 && ( )} -
+
Date: Tue, 13 Feb 2024 14:08:55 +0000 Subject: [PATCH 04/14] Add PremAI LLM implementation --- application/core/settings.py | 3 +++ application/llm/llm_creator.py | 4 +++- application/llm/premai.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 application/llm/premai.py diff --git a/application/core/settings.py b/application/core/settings.py index 42dea0ff..d9b68ed7 100644 --- a/application/core/settings.py +++ b/application/core/settings.py @@ -39,6 +39,9 @@ class Settings(BaseSettings): SAGEMAKER_ACCESS_KEY: Optional[str] = None # SageMaker access key SAGEMAKER_SECRET_KEY: Optional[str] = None # SageMaker secret key + # prem ai project id + PREMAI_PROJECT_ID: Optional[str] = None + path = Path(__file__).parent.parent.absolute() settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8") diff --git a/application/llm/llm_creator.py b/application/llm/llm_creator.py index d0d6ae3f..b4fdaebf 100644 --- a/application/llm/llm_creator.py +++ b/application/llm/llm_creator.py @@ -4,6 +4,7 @@ from application.llm.huggingface import HuggingFaceLLM from application.llm.llama_cpp import LlamaCpp from application.llm.anthropic import AnthropicLLM from application.llm.docsgpt_provider import DocsGPTAPILLM +from application.llm.premai import PremAILLM @@ -15,7 +16,8 @@ class LLMCreator: 'huggingface': HuggingFaceLLM, 'llama.cpp': LlamaCpp, 'anthropic': AnthropicLLM, - 'docsgpt': DocsGPTAPILLM + 'docsgpt': DocsGPTAPILLM, + 'premai': PremAILLM, } @classmethod diff --git a/application/llm/premai.py b/application/llm/premai.py new file mode 100644 index 00000000..4bc8a898 --- /dev/null +++ b/application/llm/premai.py @@ -0,0 +1,33 @@ +from application.llm.base import BaseLLM +from application.core.settings import settings + +class PremAILLM(BaseLLM): + + def __init__(self, api_key): + from premai import Prem + + self.client = Prem( + api_key=api_key + ) + self.api_key = api_key + self.project_id = settings.PREMAI_PROJECT_ID + + def gen(self, model, engine, messages, stream=False, **kwargs): + response = self.client.chat.completions.create(model=model, + project_id=self.project_id, + messages=messages, + stream=stream, + **kwargs) + + return response.choices[0].message["content"] + + def gen_stream(self, model, engine, messages, stream=True, **kwargs): + response = self.client.chat.completions.create(model=model, + project_id=self.project_id, + messages=messages, + stream=stream, + **kwargs) + + for line in response: + if line.choices[0].delta["content"] is not None: + yield line.choices[0].delta["content"] From ee06fa85f1be78178abced0f52ec9d00279f2a14 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 13 Feb 2024 15:06:52 +0000 Subject: [PATCH 05/14] fix: docsgpt provider --- application/llm/docsgpt_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/application/llm/docsgpt_provider.py b/application/llm/docsgpt_provider.py index b7d6a5ad..e0c5dbad 100644 --- a/application/llm/docsgpt_provider.py +++ b/application/llm/docsgpt_provider.py @@ -20,7 +20,7 @@ class DocsGPTAPILLM(BaseLLM): "max_new_tokens": 30 } ) - response_clean = response.json()['a'].split("###")[0] + response_clean = response.json()['a'].replace("###", "") return response_clean From 7a005ef1267e0ca1615a5f401f08432c3ec0ce7e Mon Sep 17 00:00:00 2001 From: ManishMadan2882 Date: Wed, 14 Feb 2024 18:39:21 +0530 Subject: [PATCH 06/14] streamed the sample response /stream --- mock-backend/package-lock.json | 1 + mock-backend/package.json | 1 + mock-backend/src/server.js | 47 ++++++++++++++++++++++++++-------- 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/mock-backend/package-lock.json b/mock-backend/package-lock.json index 5a3378fc..2d070b79 100644 --- a/mock-backend/package-lock.json +++ b/mock-backend/package-lock.json @@ -9,6 +9,7 @@ "version": "1.0.0", "license": "ISC", "dependencies": { + "cors": "^2.8.5", "json-server": "^0.17.4", "uuid": "^9.0.1" }, diff --git a/mock-backend/package.json b/mock-backend/package.json index 7cfab8c3..9540fa0a 100644 --- a/mock-backend/package.json +++ b/mock-backend/package.json @@ -12,6 +12,7 @@ "author": "", "license": "ISC", "dependencies": { + "cors": "^2.8.5", "json-server": "^0.17.4", "uuid": "^9.0.1" }, diff --git a/mock-backend/src/server.js b/mock-backend/src/server.js index f37b5d9e..ad65d9a7 100644 --- a/mock-backend/src/server.js +++ b/mock-backend/src/server.js @@ -1,7 +1,7 @@ import jsonServer from "json-server"; import routes from "./mocks/routes.json" assert { type: "json" }; import { v4 as uuid } from "uuid"; - +import cors from 'cors' const server = jsonServer.create(); const router = jsonServer.router("./src/mocks/db.json"); const middlewares = jsonServer.defaults(); @@ -9,7 +9,7 @@ const middlewares = jsonServer.defaults(); const localStorage = []; server.use(middlewares); - +server.use(cors({ origin: '*' })) server.use(jsonServer.rewriter(routes)); server.use((req, res, next) => { @@ -49,16 +49,41 @@ router.render = (req, res) => { } else { res.status(404).jsonp({}); } - } else if (req.url === "/stream") { - res.status(200).jsonp({ - data: "The answer is 42", - sources: [ - "https://en.wikipedia.org/wiki/42_(number)", - "https://en.wikipedia.org/wiki/42_(number)", - ], - conversation_id: "1234", + } else if (req.url === "/stream" && req.method === "POST") { + console.log('pinged !') + res.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive' }); - } else { + const message = ('Hi, How are you today?').split(' '); + let index = 0; + const interval = setInterval(() => { + if (index < message.length) { + res.write(`data: {"answer": "${message[index++]} "}\n`); + } else { + res.write(`data: {"type": "id", "id": "65cbc39d11f077b9eeb06d26"}\n`) + res.write(`data: {"type": "end"}\n`) + clearInterval(interval); // Stop the interval once the message is fully streamed + res.end(); // End the response + } + }, 500); // Send a word every 1 second + } + else if (req.url === '/search' && req.method === 'POST') { + res.status(200).json( + [ + { + "text": "\n\n/api/answer\nIt's a POST request that sends a JSON in body with 4 values. It will receive an answer for a user provided question.\n", + "title": "API-docs.md" + }, + { + "text": "\n\nOur Standards\n\nExamples of behavior that contribute to a positive environment for our\ncommunity include:\n* Demonstrating empathy and kindness towards other people\n", + "title": "How-to-use-different-LLM.md" + } + ] + ) + } + else { res.status(res.statusCode).jsonp(res.locals.data); } }; From 44f27d91a05a9b8770f6982ecba2d54605fcdbb0 Mon Sep 17 00:00:00 2001 From: ManishMadan2882 Date: Wed, 14 Feb 2024 18:48:43 +0530 Subject: [PATCH 07/14] purge console logs --- mock-backend/src/server.js | 1 - 1 file changed, 1 deletion(-) diff --git a/mock-backend/src/server.js b/mock-backend/src/server.js index ad65d9a7..f78cce10 100644 --- a/mock-backend/src/server.js +++ b/mock-backend/src/server.js @@ -50,7 +50,6 @@ router.render = (req, res) => { res.status(404).jsonp({}); } } else if (req.url === "/stream" && req.method === "POST") { - console.log('pinged !') res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', From 5685f831a789084589f82ae891ee8bbba771f269 Mon Sep 17 00:00:00 2001 From: ManishMadan2882 Date: Thu, 15 Feb 2024 05:35:34 +0530 Subject: [PATCH 08/14] (mock) adding prompt routes --- mock-backend/src/server.js | 39 +++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/mock-backend/src/server.js b/mock-backend/src/server.js index f78cce10..64f331fd 100644 --- a/mock-backend/src/server.js +++ b/mock-backend/src/server.js @@ -9,7 +9,7 @@ const middlewares = jsonServer.defaults(); const localStorage = []; server.use(middlewares); -server.use(cors({ origin: '*' })) +server.use(cors({ origin: ['*'] })) server.use(jsonServer.rewriter(routes)); server.use((req, res, next) => { @@ -82,6 +82,43 @@ router.render = (req, res) => { ] ) } + else if (req.url === '/get_prompts' && req.method === 'GET') { + res.status(200).json([ + { + "id": "default", + "name": "default", + "type": "public" + }, + { + "id": "creative", + "name": "creative", + "type": "public" + }, + { + "id": "strict", + "name": "strict", + "type": "public" + } + ]); + } + else if (req.url.startsWith('/get_single_prompt') && req.method==='GET') { + const id = req.query.id; + console.log('hre'); + if (id === 'creative') + res.status(200).json({ + "content": "You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible." + }) + else if (id === 'strict') { + res.status(200).json({ + "content": "You are an AI Assistant, DocsGPT, adept at offering document assistance. \nYour expertise lies in providing answer on top of provided context." + }) + } + else { + res.status(200).json({ + "content": "You are a helpful AI assistant, DocsGPT, specializing in document assistance, designed to offer detailed and informative responses." + }) + } + } else { res.status(res.statusCode).jsonp(res.locals.data); } From 4375215baa4937998329652c9f11a6144dfd06aa Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 18 Feb 2024 19:12:58 +0000 Subject: [PATCH 09/14] Update port number in Dockerfile and server.js --- mock-backend/.gitignore | 5 +++++ mock-backend/Dockerfile | 2 +- mock-backend/src/server.js | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 mock-backend/.gitignore diff --git a/mock-backend/.gitignore b/mock-backend/.gitignore new file mode 100644 index 00000000..bca646a7 --- /dev/null +++ b/mock-backend/.gitignore @@ -0,0 +1,5 @@ + +# Elastic Beanstalk Files +.elasticbeanstalk/* +!.elasticbeanstalk/*.cfg.yml +!.elasticbeanstalk/*.global.yml diff --git a/mock-backend/Dockerfile b/mock-backend/Dockerfile index 5903b27e..588636a9 100644 --- a/mock-backend/Dockerfile +++ b/mock-backend/Dockerfile @@ -6,6 +6,6 @@ COPY package*.json ./ RUN npm install COPY . . -EXPOSE 7091 +EXPOSE 8080 CMD [ "npm", "run", "start"] diff --git a/mock-backend/src/server.js b/mock-backend/src/server.js index 64f331fd..93c326b1 100644 --- a/mock-backend/src/server.js +++ b/mock-backend/src/server.js @@ -126,6 +126,6 @@ router.render = (req, res) => { server.use(router); -server.listen(7091, () => { +server.listen(8080, () => { console.log("JSON Server is running"); }); From 007cd6cff1a0d6a1084632da72b5b722dfa5a97f Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 18 Feb 2024 19:33:45 +0000 Subject: [PATCH 10/14] Add conversations to db.json --- mock-backend/src/mocks/db.json | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/mock-backend/src/mocks/db.json b/mock-backend/src/mocks/db.json index 4be7d3f9..36947158 100644 --- a/mock-backend/src/mocks/db.json +++ b/mock-backend/src/mocks/db.json @@ -225,7 +225,19 @@ "version": "0.1.0" } ], - "conversations": [], + "conversations": [ + { + "id": "65cf39c936523eea21ebe117", + "name": "Request clarification" + }, + { + "id": "65cf39ba36523eea21ebe116", + "name": "Clarification request" + }, + { + "id": "65cf37e97d527c332bbac933", + "name": "Greetings, assistance inquiry." + }], "docs_check": { "status": "loaded" } From 2b644dbb015676fc25d13056a83a3f0fdd1568a1 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 23 Feb 2024 21:15:26 +0000 Subject: [PATCH 11/14] Add Rust toolchain and download mpnet-base-v2.zip model --- application/Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/application/Dockerfile b/application/Dockerfile index 81ed570a..7ea15ff7 100644 --- a/application/Dockerfile +++ b/application/Dockerfile @@ -2,15 +2,16 @@ FROM python:3.11-slim-bullseye as builder # Tiktoken requires Rust toolchain, so build it in a separate stage RUN apt-get update && apt-get install -y gcc curl +RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip +RUN unzip mpnet-base-v2.zip -d model +RUN rm mpnet-base-v2.zip RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y ENV PATH="/root/.cargo/bin:${PATH}" RUN pip install --upgrade pip && pip install tiktoken==0.5.2 COPY requirements.txt . RUN pip install -r requirements.txt RUN apt-get install -y wget unzip -RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip -RUN unzip mpnet-base-v2.zip -d model -RUN rm mpnet-base-v2.zip + FROM python:3.11-slim-bullseye From ee3ea7a970fdf38ac45d48538ec91e4a1ee43202 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 23 Feb 2024 21:19:04 +0000 Subject: [PATCH 12/14] Add wget and unzip packages to Dockerfile --- application/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/application/Dockerfile b/application/Dockerfile index 7ea15ff7..92860c20 100644 --- a/application/Dockerfile +++ b/application/Dockerfile @@ -2,6 +2,7 @@ FROM python:3.11-slim-bullseye as builder # Tiktoken requires Rust toolchain, so build it in a separate stage RUN apt-get update && apt-get install -y gcc curl +RUN apt-get install -y wget unzip RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip RUN unzip mpnet-base-v2.zip -d model RUN rm mpnet-base-v2.zip @@ -10,7 +11,7 @@ ENV PATH="/root/.cargo/bin:${PATH}" RUN pip install --upgrade pip && pip install tiktoken==0.5.2 COPY requirements.txt . RUN pip install -r requirements.txt -RUN apt-get install -y wget unzip + FROM python:3.11-slim-bullseye From 4216671ea21d941ff495e51f331d0a00448b21a4 Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 24 Feb 2024 12:28:31 +0000 Subject: [PATCH 13/14] Update README.md --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f1b864d7..43b11b06 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ docker compose -f docker-compose-dev.yaml up -d > [!Note] > Make sure you have Python 3.10 or 3.11 installed. -1. Export required environment variables or prepare a `.env` file in the `/application` folder: +1. Export required environment variables or prepare a `.env` file in the project folder: - Copy [.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) and create `.env`. (check out [`application/core/settings.py`](application/core/settings.py) if you want to see more config options.) @@ -152,11 +152,12 @@ You can use the script below, or download it manually from [here](https://d3dg10 wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip unzip mpnet-base-v2.zip -d model rm mpnet-base-v2.zip +``` -4. Change to the `application/` subdir by the command `cd application/` and install dependencies for the backend: +4. Install dependencies for the backend: ```commandline -pip install -r requirements.txt +pip install -r application/requirements.txt ``` 5. Run the app using `flask --app application/app.py run --host=0.0.0.0 --port=7091`. From c8d8a8d0b5c3b32838ecfa59e2f646130df7c71f Mon Sep 17 00:00:00 2001 From: Pavel Date: Sun, 25 Feb 2024 16:03:18 +0300 Subject: [PATCH 14/14] Fixing ingestion metadata grouping --- .gitignore | 1 + application/parser/file/bulk.py | 22 +++++++++++++++++----- application/parser/token_func.py | 15 +++++++-------- frontend/src/conversation/Conversation.tsx | 3 +-- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/.gitignore b/.gitignore index 053e5793..d7747efb 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,4 @@ application/vectors/ node_modules/ .vscode/settings.json models/ +model/ diff --git a/application/parser/file/bulk.py b/application/parser/file/bulk.py index af17193d..aec6c8c1 100644 --- a/application/parser/file/bulk.py +++ b/application/parser/file/bulk.py @@ -147,12 +147,24 @@ class SimpleDirectoryReader(BaseReader): # do standard read with open(input_file, "r", errors=self.errors) as f: data = f.read() - if isinstance(data, List): - data_list.extend(data) - else: - data_list.append(str(data)) + # Prepare metadata for this file if self.file_metadata is not None: - metadata_list.append(self.file_metadata(str(input_file))) + file_metadata = self.file_metadata(str(input_file)) + else: + # Provide a default empty metadata + file_metadata = {'title': '', 'store': ''} + # TODO: Find a case with no metadata and check if breaks anything + + if isinstance(data, List): + # Extend data_list with each item in the data list + data_list.extend([str(d) for d in data]) + # For each item in the data list, add the file's metadata to metadata_list + metadata_list.extend([file_metadata for _ in data]) + else: + # Add the single piece of data to data_list + data_list.append(str(data)) + # Add the file's metadata to metadata_list + metadata_list.append(file_metadata) if concatenate: return [Document("\n".join(data_list))] diff --git a/application/parser/token_func.py b/application/parser/token_func.py index 14b231fc..36ae7e56 100644 --- a/application/parser/token_func.py +++ b/application/parser/token_func.py @@ -21,16 +21,15 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) for doc in documents: doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text)) - if current_group is None: - current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding, - extra_info=doc.extra_info) - elif len(tiktoken.get_encoding("cl100k_base").encode( - current_group.text)) + doc_len < max_tokens and doc_len < min_tokens: - current_group.text += " " + doc.text + # Check if current group is empty or if the document can be added based on token count and matching metadata + if current_group is None or (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len < min_tokens and current_group.extra_info == doc.extra_info): + if current_group is None: + current_group = doc # Use the document directly to retain its metadata + else: + current_group.text += " " + doc.text # Append text to the current group else: docs.append(current_group) - current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding, - extra_info=doc.extra_info) + current_group = doc # Start a new group with the current document if current_group is not None: docs.append(current_group) diff --git a/frontend/src/conversation/Conversation.tsx b/frontend/src/conversation/Conversation.tsx index ba311446..5ed43d93 100644 --- a/frontend/src/conversation/Conversation.tsx +++ b/frontend/src/conversation/Conversation.tsx @@ -201,8 +201,7 @@ export default function Conversation() { )}

- This is a chatbot that uses the GPT-3, Faiss and LangChain to answer - questions. + DocsGPT uses GenAI, please review critial information using sources.