mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
Merge branch 'main' into feature/remote-loads
This commit is contained in:
@@ -123,7 +123,7 @@ docker compose -f docker-compose-dev.yaml up -d
|
||||
> [!Note]
|
||||
> Make sure you have Python 3.10 or 3.11 installed.
|
||||
|
||||
1. Export required environment variables or prepare a `.env` file in the `/application` folder:
|
||||
1. Export required environment variables or prepare a `.env` file in the project folder:
|
||||
- Copy [.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) and create `.env`.
|
||||
|
||||
(check out [`application/core/settings.py`](application/core/settings.py) if you want to see more config options.)
|
||||
@@ -152,11 +152,12 @@ You can use the script below, or download it manually from [here](https://d3dg10
|
||||
wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
|
||||
unzip mpnet-base-v2.zip -d model
|
||||
rm mpnet-base-v2.zip
|
||||
```
|
||||
|
||||
4. Change to the `application/` subdir by the command `cd application/` and install dependencies for the backend:
|
||||
4. Install dependencies for the backend:
|
||||
|
||||
```commandline
|
||||
pip install -r requirements.txt
|
||||
pip install -r application/requirements.txt
|
||||
```
|
||||
|
||||
5. Run the app using `flask --app application/app.py run --host=0.0.0.0 --port=7091`.
|
||||
|
||||
@@ -2,15 +2,17 @@ FROM python:3.11-slim-bullseye as builder
|
||||
|
||||
# Tiktoken requires Rust toolchain, so build it in a separate stage
|
||||
RUN apt-get update && apt-get install -y gcc curl
|
||||
RUN apt-get install -y wget unzip
|
||||
RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
|
||||
RUN unzip mpnet-base-v2.zip -d model
|
||||
RUN rm mpnet-base-v2.zip
|
||||
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN pip install --upgrade pip && pip install tiktoken==0.5.2
|
||||
COPY requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
RUN apt-get install -y wget unzip
|
||||
RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
|
||||
RUN unzip mpnet-base-v2.zip -d model
|
||||
RUN rm mpnet-base-v2.zip
|
||||
|
||||
|
||||
|
||||
FROM python:3.11-slim-bullseye
|
||||
|
||||
|
||||
@@ -39,6 +39,9 @@ class Settings(BaseSettings):
|
||||
SAGEMAKER_ACCESS_KEY: Optional[str] = None # SageMaker access key
|
||||
SAGEMAKER_SECRET_KEY: Optional[str] = None # SageMaker secret key
|
||||
|
||||
# prem ai project id
|
||||
PREMAI_PROJECT_ID: Optional[str] = None
|
||||
|
||||
|
||||
path = Path(__file__).parent.parent.absolute()
|
||||
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
|
||||
|
||||
@@ -20,7 +20,7 @@ class DocsGPTAPILLM(BaseLLM):
|
||||
"max_new_tokens": 30
|
||||
}
|
||||
)
|
||||
response_clean = response.json()['a'].split("###")[0]
|
||||
response_clean = response.json()['a'].replace("###", "")
|
||||
|
||||
return response_clean
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ from application.llm.huggingface import HuggingFaceLLM
|
||||
from application.llm.llama_cpp import LlamaCpp
|
||||
from application.llm.anthropic import AnthropicLLM
|
||||
from application.llm.docsgpt_provider import DocsGPTAPILLM
|
||||
from application.llm.premai import PremAILLM
|
||||
|
||||
|
||||
|
||||
@@ -15,7 +16,8 @@ class LLMCreator:
|
||||
'huggingface': HuggingFaceLLM,
|
||||
'llama.cpp': LlamaCpp,
|
||||
'anthropic': AnthropicLLM,
|
||||
'docsgpt': DocsGPTAPILLM
|
||||
'docsgpt': DocsGPTAPILLM,
|
||||
'premai': PremAILLM,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
33
application/llm/premai.py
Normal file
33
application/llm/premai.py
Normal file
@@ -0,0 +1,33 @@
|
||||
from application.llm.base import BaseLLM
|
||||
from application.core.settings import settings
|
||||
|
||||
class PremAILLM(BaseLLM):
|
||||
|
||||
def __init__(self, api_key):
|
||||
from premai import Prem
|
||||
|
||||
self.client = Prem(
|
||||
api_key=api_key
|
||||
)
|
||||
self.api_key = api_key
|
||||
self.project_id = settings.PREMAI_PROJECT_ID
|
||||
|
||||
def gen(self, model, engine, messages, stream=False, **kwargs):
|
||||
response = self.client.chat.completions.create(model=model,
|
||||
project_id=self.project_id,
|
||||
messages=messages,
|
||||
stream=stream,
|
||||
**kwargs)
|
||||
|
||||
return response.choices[0].message["content"]
|
||||
|
||||
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
||||
response = self.client.chat.completions.create(model=model,
|
||||
project_id=self.project_id,
|
||||
messages=messages,
|
||||
stream=stream,
|
||||
**kwargs)
|
||||
|
||||
for line in response:
|
||||
if line.choices[0].delta["content"] is not None:
|
||||
yield line.choices[0].delta["content"]
|
||||
@@ -147,12 +147,24 @@ class SimpleDirectoryReader(BaseReader):
|
||||
# do standard read
|
||||
with open(input_file, "r", errors=self.errors) as f:
|
||||
data = f.read()
|
||||
if isinstance(data, List):
|
||||
data_list.extend(data)
|
||||
else:
|
||||
data_list.append(str(data))
|
||||
# Prepare metadata for this file
|
||||
if self.file_metadata is not None:
|
||||
metadata_list.append(self.file_metadata(str(input_file)))
|
||||
file_metadata = self.file_metadata(str(input_file))
|
||||
else:
|
||||
# Provide a default empty metadata
|
||||
file_metadata = {'title': '', 'store': ''}
|
||||
# TODO: Find a case with no metadata and check if breaks anything
|
||||
|
||||
if isinstance(data, List):
|
||||
# Extend data_list with each item in the data list
|
||||
data_list.extend([str(d) for d in data])
|
||||
# For each item in the data list, add the file's metadata to metadata_list
|
||||
metadata_list.extend([file_metadata for _ in data])
|
||||
else:
|
||||
# Add the single piece of data to data_list
|
||||
data_list.append(str(data))
|
||||
# Add the file's metadata to metadata_list
|
||||
metadata_list.append(file_metadata)
|
||||
|
||||
if concatenate:
|
||||
return [Document("\n".join(data_list))]
|
||||
|
||||
@@ -21,16 +21,15 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
for doc in documents:
|
||||
doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
|
||||
|
||||
if current_group is None:
|
||||
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
|
||||
extra_info=doc.extra_info)
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(
|
||||
current_group.text)) + doc_len < max_tokens and doc_len < min_tokens:
|
||||
current_group.text += " " + doc.text
|
||||
# Check if current group is empty or if the document can be added based on token count and matching metadata
|
||||
if current_group is None or (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len < min_tokens and current_group.extra_info == doc.extra_info):
|
||||
if current_group is None:
|
||||
current_group = doc # Use the document directly to retain its metadata
|
||||
else:
|
||||
current_group.text += " " + doc.text # Append text to the current group
|
||||
else:
|
||||
docs.append(current_group)
|
||||
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
|
||||
extra_info=doc.extra_info)
|
||||
current_group = doc # Start a new group with the current document
|
||||
|
||||
if current_group is not None:
|
||||
docs.append(current_group)
|
||||
|
||||
@@ -8,10 +8,10 @@ import {
|
||||
selectPrompt,
|
||||
setPrompt,
|
||||
selectSourceDocs,
|
||||
setSourceDocs,
|
||||
} from './preferences/preferenceSlice';
|
||||
import { Doc } from './preferences/preferenceApi';
|
||||
import { useDarkTheme } from './hooks';
|
||||
import { Light } from 'react-syntax-highlighter';
|
||||
type PromptProps = {
|
||||
prompts: { name: string; id: string; type: string }[];
|
||||
selectedPrompt: { name: string; id: string; type: string };
|
||||
@@ -86,13 +86,11 @@ const Setting: React.FC = () => {
|
||||
fetch(`${apiHost}/api/delete_old?path=${docPath}`, {
|
||||
method: 'GET',
|
||||
})
|
||||
.then(() => {
|
||||
// remove the image element from the DOM
|
||||
const imageElement = document.querySelector(
|
||||
`#img-${index}`,
|
||||
) as HTMLElement;
|
||||
const parentElement = imageElement.parentNode as HTMLElement;
|
||||
parentElement.parentNode?.removeChild(parentElement);
|
||||
.then((response) => {
|
||||
if(response.ok && documents){
|
||||
const updatedDocuments = [...documents.slice(0, index), ...documents.slice(index + 1)];
|
||||
dispatch(setSourceDocs(updatedDocuments));
|
||||
}
|
||||
})
|
||||
.catch((error) => console.error(error));
|
||||
};
|
||||
|
||||
@@ -140,12 +140,12 @@ export default function Conversation() {
|
||||
)}
|
||||
|
||||
{queries.length > 0 && (
|
||||
<div className="mt-20 flex flex-col transition-all md:w-3/4">
|
||||
<div className="mt-20 mb-9 flex flex-col transition-all md:w-3/4">
|
||||
{queries.map((query, index) => {
|
||||
return (
|
||||
<Fragment key={index}>
|
||||
<ConversationBubble
|
||||
className={'last:mb-27 mb-7'}
|
||||
className={'last:mb-28 mb-7'}
|
||||
key={`${index}QUESTION`}
|
||||
message={query.prompt}
|
||||
type="QUESTION"
|
||||
@@ -160,7 +160,7 @@ export default function Conversation() {
|
||||
{queries.length === 0 && (
|
||||
<Hero className="mt-24 h-[100vh] md:mt-52"></Hero>
|
||||
)}
|
||||
<div className="relative bottom-0 flex w-10/12 flex-col items-end self-center bg-white dark:bg-raisin-black pt-3 md:fixed md:w-[65%]">
|
||||
<div className="absolute bottom-0 flex w-11/12 md:w-[65%] flex-col items-end self-center bg-white dark:bg-raisin-black pt-4 md:fixed">
|
||||
<div className="flex h-full w-full">
|
||||
<div
|
||||
id="inputbox"
|
||||
@@ -169,7 +169,7 @@ export default function Conversation() {
|
||||
placeholder="Type your message here..."
|
||||
contentEditable
|
||||
onPaste={handlePaste}
|
||||
className={`border-000000 overflow-x-hidden; max-h-24 min-h-[2.6rem] w-full overflow-y-auto whitespace-pre-wrap rounded-3xl border bg-white dark:bg-transparent dark:text-bright-gray py-2 pl-4 pr-9 text-base leading-7 opacity-100 focus:outline-none`}
|
||||
className={`border-000000 overflow-x-hidden max-h-24 min-h-[2.6rem] w-full overflow-y-auto whitespace-pre-wrap rounded-3xl border bg-white dark:bg-raisin-black dark:text-bright-gray py-2 pl-4 pr-9 text-base leading-7 opacity-100 focus:outline-none`}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
e.preventDefault();
|
||||
@@ -200,9 +200,8 @@ export default function Conversation() {
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<p className="text-gray-595959 dark:text-bright-gray w-[100vw] self-center bg-transparent p-5 text-center text-xs md:w-full">
|
||||
This is a chatbot that uses the GPT-3, Faiss and LangChain to answer
|
||||
questions.
|
||||
<p className="text-gray-595959 dark:text-bright-gray bg-white dark:bg-raisin-black w-[100vw] self-center bg-transparent p-5 text-center text-xs md:w-full">
|
||||
DocsGPT uses GenAI, please review critial information using sources.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
5
mock-backend/.gitignore
vendored
Normal file
5
mock-backend/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
|
||||
# Elastic Beanstalk Files
|
||||
.elasticbeanstalk/*
|
||||
!.elasticbeanstalk/*.cfg.yml
|
||||
!.elasticbeanstalk/*.global.yml
|
||||
@@ -6,6 +6,6 @@ COPY package*.json ./
|
||||
RUN npm install
|
||||
COPY . .
|
||||
|
||||
EXPOSE 7091
|
||||
EXPOSE 8080
|
||||
|
||||
CMD [ "npm", "run", "start"]
|
||||
|
||||
1
mock-backend/package-lock.json
generated
1
mock-backend/package-lock.json
generated
@@ -9,6 +9,7 @@
|
||||
"version": "1.0.0",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"cors": "^2.8.5",
|
||||
"json-server": "^0.17.4",
|
||||
"uuid": "^9.0.1"
|
||||
},
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"cors": "^2.8.5",
|
||||
"json-server": "^0.17.4",
|
||||
"uuid": "^9.0.1"
|
||||
},
|
||||
|
||||
@@ -225,7 +225,19 @@
|
||||
"version": "0.1.0"
|
||||
}
|
||||
],
|
||||
"conversations": [],
|
||||
"conversations": [
|
||||
{
|
||||
"id": "65cf39c936523eea21ebe117",
|
||||
"name": "Request clarification"
|
||||
},
|
||||
{
|
||||
"id": "65cf39ba36523eea21ebe116",
|
||||
"name": "Clarification request"
|
||||
},
|
||||
{
|
||||
"id": "65cf37e97d527c332bbac933",
|
||||
"name": "Greetings, assistance inquiry."
|
||||
}],
|
||||
"docs_check": {
|
||||
"status": "loaded"
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import jsonServer from "json-server";
|
||||
import routes from "./mocks/routes.json" assert { type: "json" };
|
||||
import { v4 as uuid } from "uuid";
|
||||
|
||||
import cors from 'cors'
|
||||
const server = jsonServer.create();
|
||||
const router = jsonServer.router("./src/mocks/db.json");
|
||||
const middlewares = jsonServer.defaults();
|
||||
@@ -9,7 +9,7 @@ const middlewares = jsonServer.defaults();
|
||||
const localStorage = [];
|
||||
|
||||
server.use(middlewares);
|
||||
|
||||
server.use(cors({ origin: ['*'] }))
|
||||
server.use(jsonServer.rewriter(routes));
|
||||
|
||||
server.use((req, res, next) => {
|
||||
@@ -49,22 +49,83 @@ router.render = (req, res) => {
|
||||
} else {
|
||||
res.status(404).jsonp({});
|
||||
}
|
||||
} else if (req.url === "/stream") {
|
||||
res.status(200).jsonp({
|
||||
data: "The answer is 42",
|
||||
sources: [
|
||||
"https://en.wikipedia.org/wiki/42_(number)",
|
||||
"https://en.wikipedia.org/wiki/42_(number)",
|
||||
],
|
||||
conversation_id: "1234",
|
||||
} else if (req.url === "/stream" && req.method === "POST") {
|
||||
res.writeHead(200, {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive'
|
||||
});
|
||||
} else {
|
||||
const message = ('Hi, How are you today?').split(' ');
|
||||
let index = 0;
|
||||
const interval = setInterval(() => {
|
||||
if (index < message.length) {
|
||||
res.write(`data: {"answer": "${message[index++]} "}\n`);
|
||||
} else {
|
||||
res.write(`data: {"type": "id", "id": "65cbc39d11f077b9eeb06d26"}\n`)
|
||||
res.write(`data: {"type": "end"}\n`)
|
||||
clearInterval(interval); // Stop the interval once the message is fully streamed
|
||||
res.end(); // End the response
|
||||
}
|
||||
}, 500); // Send a word every 1 second
|
||||
}
|
||||
else if (req.url === '/search' && req.method === 'POST') {
|
||||
res.status(200).json(
|
||||
[
|
||||
{
|
||||
"text": "\n\n/api/answer\nIt's a POST request that sends a JSON in body with 4 values. It will receive an answer for a user provided question.\n",
|
||||
"title": "API-docs.md"
|
||||
},
|
||||
{
|
||||
"text": "\n\nOur Standards\n\nExamples of behavior that contribute to a positive environment for our\ncommunity include:\n* Demonstrating empathy and kindness towards other people\n",
|
||||
"title": "How-to-use-different-LLM.md"
|
||||
}
|
||||
]
|
||||
)
|
||||
}
|
||||
else if (req.url === '/get_prompts' && req.method === 'GET') {
|
||||
res.status(200).json([
|
||||
{
|
||||
"id": "default",
|
||||
"name": "default",
|
||||
"type": "public"
|
||||
},
|
||||
{
|
||||
"id": "creative",
|
||||
"name": "creative",
|
||||
"type": "public"
|
||||
},
|
||||
{
|
||||
"id": "strict",
|
||||
"name": "strict",
|
||||
"type": "public"
|
||||
}
|
||||
]);
|
||||
}
|
||||
else if (req.url.startsWith('/get_single_prompt') && req.method==='GET') {
|
||||
const id = req.query.id;
|
||||
console.log('hre');
|
||||
if (id === 'creative')
|
||||
res.status(200).json({
|
||||
"content": "You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible."
|
||||
})
|
||||
else if (id === 'strict') {
|
||||
res.status(200).json({
|
||||
"content": "You are an AI Assistant, DocsGPT, adept at offering document assistance. \nYour expertise lies in providing answer on top of provided context."
|
||||
})
|
||||
}
|
||||
else {
|
||||
res.status(200).json({
|
||||
"content": "You are a helpful AI assistant, DocsGPT, specializing in document assistance, designed to offer detailed and informative responses."
|
||||
})
|
||||
}
|
||||
}
|
||||
else {
|
||||
res.status(res.statusCode).jsonp(res.locals.data);
|
||||
}
|
||||
};
|
||||
|
||||
server.use(router);
|
||||
|
||||
server.listen(7091, () => {
|
||||
server.listen(8080, () => {
|
||||
console.log("JSON Server is running");
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user