From c0ed54406ffc9dc5c72ae7dbc592fe410d77dc8b Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Fri, 9 Feb 2024 18:04:24 +0530
Subject: [PATCH 01/14] fix(settings): delete button

---
 frontend/src/Setting.tsx | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/frontend/src/Setting.tsx b/frontend/src/Setting.tsx
index b04cb18e..a8b76dd8 100644
--- a/frontend/src/Setting.tsx
+++ b/frontend/src/Setting.tsx
@@ -8,10 +8,10 @@ import {
   selectPrompt,
   setPrompt,
   selectSourceDocs,
+  setSourceDocs,
 } from './preferences/preferenceSlice';
 import { Doc } from './preferences/preferenceApi';
 import { useDarkTheme } from './hooks';
-import { Light } from 'react-syntax-highlighter';
 type PromptProps = {
   prompts: { name: string; id: string; type: string }[];
   selectedPrompt: { name: string; id: string; type: string };
@@ -86,13 +86,11 @@ const Setting: React.FC = () => {
     fetch(`${apiHost}/api/delete_old?path=${docPath}`, {
       method: 'GET',
     })
-      .then(() => {
-        // remove the image element from the DOM
-        const imageElement = document.querySelector(
-          `#img-${index}`,
-        ) as HTMLElement;
-        const parentElement = imageElement.parentNode as HTMLElement;
-        parentElement.parentNode?.removeChild(parentElement);
+      .then((response) => {
+        if(response.ok && documents){
+          const updatedDocuments = [...documents.slice(0, index), ...documents.slice(index + 1)];
+          dispatch(setSourceDocs(updatedDocuments));
+        }
       })
       .catch((error) => console.error(error));
   };

From 9129f7fb33dffe8f8acddace1ab8d9279c72f3e6 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Fri, 9 Feb 2024 19:12:48 +0530
Subject: [PATCH 02/14] fix(Conversation): input box UI

---
 frontend/src/conversation/Conversation.tsx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/frontend/src/conversation/Conversation.tsx b/frontend/src/conversation/Conversation.tsx
index 6813ed0f..eb5a9aaf 100644
--- a/frontend/src/conversation/Conversation.tsx
+++ b/frontend/src/conversation/Conversation.tsx
@@ -140,12 +140,12 @@ export default function Conversation() {
       )}
 
       {queries.length > 0 && (
-        <div className="mt-20 flex flex-col transition-all md:w-3/4">
+        <div className="mt-20 mb-9  flex flex-col transition-all md:w-3/4">
           {queries.map((query, index) => {
             return (
               <Fragment key={index}>
                 <ConversationBubble
-                  className={'last:mb-27 mb-7'}
+                  className={'last:mb-28 mb-7'}
                   key={`${index}QUESTION`}
                   message={query.prompt}
                   type="QUESTION"
@@ -160,7 +160,7 @@ export default function Conversation() {
       {queries.length === 0 && (
         <Hero className="mt-24 h-[100vh] md:mt-52"></Hero>
       )}
-      <div className="relative bottom-0 flex w-10/12 flex-col items-end self-center bg-white dark:bg-raisin-black pt-3 md:fixed md:w-[65%]">
+      <div className="absolute bottom-0 flex w-full flex-col items-end self-center bg-white dark:bg-raisin-black pt-4 md:fixed sm:w-[65%]">
         <div className="flex h-full w-full">
           <div
             id="inputbox"
@@ -169,7 +169,7 @@ export default function Conversation() {
             placeholder="Type your message here..."
             contentEditable
             onPaste={handlePaste}
-            className={`border-000000 overflow-x-hidden; max-h-24 min-h-[2.6rem] w-full overflow-y-auto whitespace-pre-wrap rounded-3xl border bg-white dark:bg-transparent dark:text-bright-gray py-2 pl-4 pr-9 text-base leading-7 opacity-100 focus:outline-none`}
+            className={`border-000000 overflow-x-hidden max-h-24 min-h-[2.6rem] w-full overflow-y-auto whitespace-pre-wrap rounded-3xl border bg-white dark:bg-raisin-black dark:text-bright-gray py-2 pl-4 pr-9 text-base leading-7 opacity-100 focus:outline-none`}
             onKeyDown={(e) => {
               if (e.key === 'Enter' && !e.shiftKey) {
                 e.preventDefault();
@@ -200,7 +200,7 @@ export default function Conversation() {
             </div>
           )}
         </div>
-        <p className="text-gray-595959 dark:text-bright-gray w-[100vw] self-center bg-transparent p-5 text-center text-xs md:w-full">
+        <p className="text-gray-595959 dark:text-bright-gray bg-white dark:bg-raisin-black w-[100vw] self-center bg-transparent p-5 text-center text-xs md:w-full">
           This is a chatbot that uses the GPT-3, Faiss and LangChain to answer
           questions.
         </p>

From 8826f0ff3c3381b461a7ef91725885dde1ac4000 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Fri, 9 Feb 2024 19:17:26 +0530
Subject: [PATCH 03/14] slight UI improvements in input box

---
 frontend/src/conversation/Conversation.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/frontend/src/conversation/Conversation.tsx b/frontend/src/conversation/Conversation.tsx
index eb5a9aaf..ba311446 100644
--- a/frontend/src/conversation/Conversation.tsx
+++ b/frontend/src/conversation/Conversation.tsx
@@ -160,7 +160,7 @@ export default function Conversation() {
       {queries.length === 0 && (
         <Hero className="mt-24 h-[100vh] md:mt-52"></Hero>
       )}
-      <div className="absolute bottom-0 flex w-full flex-col items-end self-center bg-white dark:bg-raisin-black pt-4 md:fixed sm:w-[65%]">
+      <div className="absolute bottom-0 flex w-11/12 md:w-[65%] flex-col items-end self-center bg-white dark:bg-raisin-black pt-4 md:fixed">
         <div className="flex h-full w-full">
           <div
             id="inputbox"

From b7927d8d7566b89e89f47c1d29135c96edd5539e Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Tue, 13 Feb 2024 14:08:55 +0000
Subject: [PATCH 04/14] Add PremAI LLM implementation

---
 application/core/settings.py   |  3 +++
 application/llm/llm_creator.py |  4 +++-
 application/llm/premai.py      | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 application/llm/premai.py

diff --git a/application/core/settings.py b/application/core/settings.py
index 42dea0ff..d9b68ed7 100644
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -39,6 +39,9 @@ class Settings(BaseSettings):
     SAGEMAKER_ACCESS_KEY: Optional[str] = None # SageMaker access key
     SAGEMAKER_SECRET_KEY: Optional[str] = None # SageMaker secret key
 
+    # prem ai project id    
+    PREMAI_PROJECT_ID: Optional[str] = None
+
 
 path = Path(__file__).parent.parent.absolute()
 settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
diff --git a/application/llm/llm_creator.py b/application/llm/llm_creator.py
index d0d6ae3f..b4fdaebf 100644
--- a/application/llm/llm_creator.py
+++ b/application/llm/llm_creator.py
@@ -4,6 +4,7 @@ from application.llm.huggingface import HuggingFaceLLM
 from application.llm.llama_cpp import LlamaCpp
 from application.llm.anthropic import AnthropicLLM
 from application.llm.docsgpt_provider import DocsGPTAPILLM
+from application.llm.premai import PremAILLM
 
 
 
@@ -15,7 +16,8 @@ class LLMCreator:
         'huggingface': HuggingFaceLLM,
         'llama.cpp': LlamaCpp,
         'anthropic': AnthropicLLM,
-        'docsgpt': DocsGPTAPILLM
+        'docsgpt': DocsGPTAPILLM,
+        'premai': PremAILLM,
     }
 
     @classmethod
diff --git a/application/llm/premai.py b/application/llm/premai.py
new file mode 100644
index 00000000..4bc8a898
--- /dev/null
+++ b/application/llm/premai.py
@@ -0,0 +1,33 @@
+from application.llm.base import BaseLLM
+from application.core.settings import settings
+
+class PremAILLM(BaseLLM):
+
+    def __init__(self, api_key):
+        from premai import Prem
+        
+        self.client = Prem(
+            api_key=api_key
+        )
+        self.api_key = api_key
+        self.project_id = settings.PREMAI_PROJECT_ID
+
+    def gen(self, model, engine, messages, stream=False, **kwargs):
+        response = self.client.chat.completions.create(model=model,
+            project_id=self.project_id,
+            messages=messages,
+            stream=stream,
+            **kwargs)
+
+        return response.choices[0].message["content"]
+
+    def gen_stream(self, model, engine, messages, stream=True, **kwargs):
+        response = self.client.chat.completions.create(model=model,
+            project_id=self.project_id,
+            messages=messages,
+            stream=stream,
+            **kwargs)
+
+        for line in response:
+            if line.choices[0].delta["content"] is not None:
+                yield line.choices[0].delta["content"]

From ee06fa85f1be78178abced0f52ec9d00279f2a14 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Tue, 13 Feb 2024 15:06:52 +0000
Subject: [PATCH 05/14] fix: docsgpt provider

---
 application/llm/docsgpt_provider.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/application/llm/docsgpt_provider.py b/application/llm/docsgpt_provider.py
index b7d6a5ad..e0c5dbad 100644
--- a/application/llm/docsgpt_provider.py
+++ b/application/llm/docsgpt_provider.py
@@ -20,7 +20,7 @@ class DocsGPTAPILLM(BaseLLM):
                 "max_new_tokens": 30
             }
         )
-        response_clean = response.json()['a'].split("###")[0]
+        response_clean = response.json()['a'].replace("###", "")
 
         return response_clean
 

From 7a005ef1267e0ca1615a5f401f08432c3ec0ce7e Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Wed, 14 Feb 2024 18:39:21 +0530
Subject: [PATCH 06/14] streamed the sample response /stream

---
 mock-backend/package-lock.json |  1 +
 mock-backend/package.json      |  1 +
 mock-backend/src/server.js     | 47 ++++++++++++++++++++++++++--------
 3 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/mock-backend/package-lock.json b/mock-backend/package-lock.json
index 5a3378fc..2d070b79 100644
--- a/mock-backend/package-lock.json
+++ b/mock-backend/package-lock.json
@@ -9,6 +9,7 @@
       "version": "1.0.0",
       "license": "ISC",
       "dependencies": {
+        "cors": "^2.8.5",
         "json-server": "^0.17.4",
         "uuid": "^9.0.1"
       },
diff --git a/mock-backend/package.json b/mock-backend/package.json
index 7cfab8c3..9540fa0a 100644
--- a/mock-backend/package.json
+++ b/mock-backend/package.json
@@ -12,6 +12,7 @@
   "author": "",
   "license": "ISC",
   "dependencies": {
+    "cors": "^2.8.5",
     "json-server": "^0.17.4",
     "uuid": "^9.0.1"
   },
diff --git a/mock-backend/src/server.js b/mock-backend/src/server.js
index f37b5d9e..ad65d9a7 100644
--- a/mock-backend/src/server.js
+++ b/mock-backend/src/server.js
@@ -1,7 +1,7 @@
 import jsonServer from "json-server";
 import routes from "./mocks/routes.json" assert { type: "json" };
 import { v4 as uuid } from "uuid";
-
+import cors from 'cors'
 const server = jsonServer.create();
 const router = jsonServer.router("./src/mocks/db.json");
 const middlewares = jsonServer.defaults();
@@ -9,7 +9,7 @@ const middlewares = jsonServer.defaults();
 const localStorage = [];
 
 server.use(middlewares);
-
+server.use(cors({ origin: '*' }))
 server.use(jsonServer.rewriter(routes));
 
 server.use((req, res, next) => {
@@ -49,16 +49,41 @@ router.render = (req, res) => {
     } else {
       res.status(404).jsonp({});
     }
-  } else if (req.url === "/stream") {
-    res.status(200).jsonp({
-      data: "The answer is 42",
-      sources: [
-        "https://en.wikipedia.org/wiki/42_(number)",
-        "https://en.wikipedia.org/wiki/42_(number)",
-      ],
-      conversation_id: "1234",
+  } else if (req.url === "/stream" && req.method === "POST") {
+    console.log('pinged !')
+    res.writeHead(200, {
+      'Content-Type': 'text/event-stream',
+      'Cache-Control': 'no-cache',
+      'Connection': 'keep-alive'
     });
-  } else {
+    const message = ('Hi, How are you today?').split(' ');
+    let index = 0;
+    const interval = setInterval(() => {
+      if (index < message.length) {
+        res.write(`data: {"answer": "${message[index++]} "}\n`);
+      } else {
+        res.write(`data: {"type": "id", "id": "65cbc39d11f077b9eeb06d26"}\n`)
+        res.write(`data: {"type": "end"}\n`)
+        clearInterval(interval); // Stop the interval once the message is fully streamed
+        res.end(); // End the response
+      }
+    }, 500); // Send a word every 1 second
+  }
+  else if (req.url === '/search' && req.method === 'POST') {
+    res.status(200).json(
+      [
+        {
+          "text": "\n\n/api/answer\nIt's a POST request that sends a JSON in body with 4 values. It will receive an answer for a user provided question.\n",
+          "title": "API-docs.md"
+        },
+        {
+          "text": "\n\nOur Standards\n\nExamples of behavior that contribute to a positive environment for our\ncommunity include:\n* Demonstrating empathy and kindness towards other people\n",
+          "title": "How-to-use-different-LLM.md"
+        }
+      ]
+    )
+  }
+  else {
     res.status(res.statusCode).jsonp(res.locals.data);
   }
 };

From 44f27d91a05a9b8770f6982ecba2d54605fcdbb0 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Wed, 14 Feb 2024 18:48:43 +0530
Subject: [PATCH 07/14] purge console logs

---
 mock-backend/src/server.js | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mock-backend/src/server.js b/mock-backend/src/server.js
index ad65d9a7..f78cce10 100644
--- a/mock-backend/src/server.js
+++ b/mock-backend/src/server.js
@@ -50,7 +50,6 @@ router.render = (req, res) => {
       res.status(404).jsonp({});
     }
   } else if (req.url === "/stream" && req.method === "POST") {
-    console.log('pinged !')
     res.writeHead(200, {
       'Content-Type': 'text/event-stream',
       'Cache-Control': 'no-cache',

From 5685f831a789084589f82ae891ee8bbba771f269 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Thu, 15 Feb 2024 05:35:34 +0530
Subject: [PATCH 08/14] (mock) adding prompt routes

---
 mock-backend/src/server.js | 39 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/mock-backend/src/server.js b/mock-backend/src/server.js
index f78cce10..64f331fd 100644
--- a/mock-backend/src/server.js
+++ b/mock-backend/src/server.js
@@ -9,7 +9,7 @@ const middlewares = jsonServer.defaults();
 const localStorage = [];
 
 server.use(middlewares);
-server.use(cors({ origin: '*' }))
+server.use(cors({ origin: ['*'] }))
 server.use(jsonServer.rewriter(routes));
 
 server.use((req, res, next) => {
@@ -82,6 +82,43 @@ router.render = (req, res) => {
       ]
     )
   }
+  else if (req.url === '/get_prompts' && req.method === 'GET') {
+    res.status(200).json([
+      {
+        "id": "default",
+        "name": "default",
+        "type": "public"
+      },
+      {
+        "id": "creative",
+        "name": "creative",
+        "type": "public"
+      },
+      {
+        "id": "strict",
+        "name": "strict",
+        "type": "public"
+      }
+    ]);
+  }
+  else if (req.url.startsWith('/get_single_prompt') && req.method==='GET') {
+    const id = req.query.id;
+    console.log('hre');
+    if (id === 'creative')
+      res.status(200).json({
+        "content": "You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible."
+      })
+    else if (id === 'strict') {
+      res.status(200).json({
+        "content": "You are an AI Assistant, DocsGPT, adept at offering document assistance. \nYour expertise lies in providing answer on top of provided context."
+      })
+    }
+    else {
+      res.status(200).json({
+        "content": "You are a helpful AI assistant, DocsGPT, specializing in document assistance, designed to offer detailed and informative responses."
+      })
+    }
+  }
   else {
     res.status(res.statusCode).jsonp(res.locals.data);
   }

From 4375215baa4937998329652c9f11a6144dfd06aa Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Sun, 18 Feb 2024 19:12:58 +0000
Subject: [PATCH 09/14] Update port number in Dockerfile and server.js

---
 mock-backend/.gitignore    | 5 +++++
 mock-backend/Dockerfile    | 2 +-
 mock-backend/src/server.js | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)
 create mode 100644 mock-backend/.gitignore

diff --git a/mock-backend/.gitignore b/mock-backend/.gitignore
new file mode 100644
index 00000000..bca646a7
--- /dev/null
+++ b/mock-backend/.gitignore
@@ -0,0 +1,5 @@
+
+# Elastic Beanstalk Files
+.elasticbeanstalk/*
+!.elasticbeanstalk/*.cfg.yml
+!.elasticbeanstalk/*.global.yml
diff --git a/mock-backend/Dockerfile b/mock-backend/Dockerfile
index 5903b27e..588636a9 100644
--- a/mock-backend/Dockerfile
+++ b/mock-backend/Dockerfile
@@ -6,6 +6,6 @@ COPY package*.json ./
 RUN npm install
 COPY . .
 
-EXPOSE 7091
+EXPOSE 8080
 
 CMD [ "npm", "run", "start"]
diff --git a/mock-backend/src/server.js b/mock-backend/src/server.js
index 64f331fd..93c326b1 100644
--- a/mock-backend/src/server.js
+++ b/mock-backend/src/server.js
@@ -126,6 +126,6 @@ router.render = (req, res) => {
 
 server.use(router);
 
-server.listen(7091, () => {
+server.listen(8080, () => {
   console.log("JSON Server is running");
 });

From 007cd6cff1a0d6a1084632da72b5b722dfa5a97f Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Sun, 18 Feb 2024 19:33:45 +0000
Subject: [PATCH 10/14] Add conversations to db.json

---
 mock-backend/src/mocks/db.json | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/mock-backend/src/mocks/db.json b/mock-backend/src/mocks/db.json
index 4be7d3f9..36947158 100644
--- a/mock-backend/src/mocks/db.json
+++ b/mock-backend/src/mocks/db.json
@@ -225,7 +225,19 @@
       "version": "0.1.0"
     }
   ],
-  "conversations": [],
+  "conversations": [
+    {
+      "id": "65cf39c936523eea21ebe117",
+      "name": "Request clarification"
+    },
+    {
+      "id": "65cf39ba36523eea21ebe116",
+      "name": "Clarification request"
+    },
+    {
+      "id": "65cf37e97d527c332bbac933",
+      "name": "Greetings, assistance inquiry."
+    }],
   "docs_check": {
     "status": "loaded"
   }

From 2b644dbb015676fc25d13056a83a3f0fdd1568a1 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Fri, 23 Feb 2024 21:15:26 +0000
Subject: [PATCH 11/14] Add Rust toolchain and download mpnet-base-v2.zip model

---
 application/Dockerfile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/application/Dockerfile b/application/Dockerfile
index 81ed570a..7ea15ff7 100644
--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -2,15 +2,16 @@ FROM python:3.11-slim-bullseye as builder
 
 # Tiktoken requires Rust toolchain, so build it in a separate stage
 RUN apt-get update && apt-get install -y gcc curl
+RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
+RUN unzip mpnet-base-v2.zip -d model
+RUN rm mpnet-base-v2.zip
 RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
 ENV PATH="/root/.cargo/bin:${PATH}"
 RUN pip install --upgrade pip && pip install tiktoken==0.5.2
 COPY requirements.txt .
 RUN pip install -r requirements.txt
 RUN apt-get install -y wget unzip
-RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
-RUN unzip mpnet-base-v2.zip -d model
-RUN rm mpnet-base-v2.zip
+
 
 FROM python:3.11-slim-bullseye
 

From ee3ea7a970fdf38ac45d48538ec91e4a1ee43202 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Fri, 23 Feb 2024 21:19:04 +0000
Subject: [PATCH 12/14] Add wget and unzip packages to Dockerfile

---
 application/Dockerfile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/application/Dockerfile b/application/Dockerfile
index 7ea15ff7..92860c20 100644
--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -2,6 +2,7 @@ FROM python:3.11-slim-bullseye as builder
 
 # Tiktoken requires Rust toolchain, so build it in a separate stage
 RUN apt-get update && apt-get install -y gcc curl
+RUN apt-get install -y wget unzip
 RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
 RUN unzip mpnet-base-v2.zip -d model
 RUN rm mpnet-base-v2.zip
@@ -10,7 +11,7 @@ ENV PATH="/root/.cargo/bin:${PATH}"
 RUN pip install --upgrade pip && pip install tiktoken==0.5.2
 COPY requirements.txt .
 RUN pip install -r requirements.txt
-RUN apt-get install -y wget unzip
+
 
 
 FROM python:3.11-slim-bullseye

From 4216671ea21d941ff495e51f331d0a00448b21a4 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Sat, 24 Feb 2024 12:28:31 +0000
Subject: [PATCH 13/14] Update README.md

---
 README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f1b864d7..43b11b06 100644
--- a/README.md
+++ b/README.md
@@ -123,7 +123,7 @@ docker compose -f docker-compose-dev.yaml up -d
 > [!Note]
 > Make sure you have Python 3.10 or 3.11 installed.
 
-1. Export required environment variables or prepare a `.env` file in the `/application` folder:
+1. Export required environment variables or prepare a `.env` file in the project folder:
    - Copy [.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) and create `.env`.
 
 (check out [`application/core/settings.py`](application/core/settings.py) if you want to see more config options.)
@@ -152,11 +152,12 @@ You can use the script below, or download it manually from [here](https://d3dg10
 wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
 unzip mpnet-base-v2.zip -d model
 rm mpnet-base-v2.zip
+```
 
-4. Change to the `application/` subdir by the command `cd application/` and install dependencies for the backend:
+4. Install dependencies for the backend:
 
 ```commandline
-pip install -r requirements.txt
+pip install -r application/requirements.txt
 ```
 
 5. Run the app using `flask --app application/app.py run --host=0.0.0.0 --port=7091`.

From c8d8a8d0b5c3b32838ecfa59e2f646130df7c71f Mon Sep 17 00:00:00 2001
From: Pavel <pabin@yandex.ru>
Date: Sun, 25 Feb 2024 16:03:18 +0300
Subject: [PATCH 14/14] Fixing ingestion metadata grouping

---
 .gitignore                                 |  1 +
 application/parser/file/bulk.py            | 22 +++++++++++++++++-----
 application/parser/token_func.py           | 15 +++++++--------
 frontend/src/conversation/Conversation.tsx |  3 +--
 4 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/.gitignore b/.gitignore
index 053e5793..d7747efb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -172,3 +172,4 @@ application/vectors/
 node_modules/
 .vscode/settings.json
 models/
+model/
diff --git a/application/parser/file/bulk.py b/application/parser/file/bulk.py
index af17193d..aec6c8c1 100644
--- a/application/parser/file/bulk.py
+++ b/application/parser/file/bulk.py
@@ -147,12 +147,24 @@ class SimpleDirectoryReader(BaseReader):
                 # do standard read
                 with open(input_file, "r", errors=self.errors) as f:
                     data = f.read()
-            if isinstance(data, List):
-                data_list.extend(data)
-            else:
-                data_list.append(str(data))
+            # Prepare metadata for this file
             if self.file_metadata is not None:
-                metadata_list.append(self.file_metadata(str(input_file)))
+                file_metadata = self.file_metadata(str(input_file))
+            else:
+                # Provide a default empty metadata
+                file_metadata = {'title': '', 'store': ''}
+                # TODO: Find a case with no metadata and check if breaks anything 
+
+            if isinstance(data, List):
+                # Extend data_list with each item in the data list
+                data_list.extend([str(d) for d in data])
+                # For each item in the data list, add the file's metadata to metadata_list
+                metadata_list.extend([file_metadata for _ in data])
+            else:
+                # Add the single piece of data to data_list
+                data_list.append(str(data))
+                # Add the file's metadata to metadata_list
+                metadata_list.append(file_metadata)
 
         if concatenate:
             return [Document("\n".join(data_list))]
diff --git a/application/parser/token_func.py b/application/parser/token_func.py
index 14b231fc..36ae7e56 100644
--- a/application/parser/token_func.py
+++ b/application/parser/token_func.py
@@ -21,16 +21,15 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
     for doc in documents:
         doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
 
-        if current_group is None:
-            current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
-                                     extra_info=doc.extra_info)
-        elif len(tiktoken.get_encoding("cl100k_base").encode(
-                current_group.text)) + doc_len < max_tokens and doc_len < min_tokens:
-            current_group.text += " " + doc.text
+        # Check if current group is empty or if the document can be added based on token count and matching metadata
+        if current_group is None or (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len < min_tokens and current_group.extra_info == doc.extra_info):
+            if current_group is None:
+                current_group = doc  # Use the document directly to retain its metadata
+            else:
+                current_group.text += " " + doc.text  # Append text to the current group
         else:
             docs.append(current_group)
-            current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
-                                     extra_info=doc.extra_info)
+            current_group = doc  # Start a new group with the current document
 
     if current_group is not None:
         docs.append(current_group)
diff --git a/frontend/src/conversation/Conversation.tsx b/frontend/src/conversation/Conversation.tsx
index ba311446..5ed43d93 100644
--- a/frontend/src/conversation/Conversation.tsx
+++ b/frontend/src/conversation/Conversation.tsx
@@ -201,8 +201,7 @@ export default function Conversation() {
           )}
         </div>
         <p className="text-gray-595959 dark:text-bright-gray bg-white dark:bg-raisin-black w-[100vw] self-center bg-transparent p-5 text-center text-xs md:w-full">
-          This is a chatbot that uses the GPT-3, Faiss and LangChain to answer
-          questions.
+          DocsGPT uses GenAI, please review critial information using sources.
         </p>
       </div>
     </div>