Merge pull request #825 from arc53/feat/mongodb

Public LLM
This commit is contained in:
Alex
2024-01-09 14:31:40 +00:00
committed by GitHub
13 changed files with 141 additions and 37 deletions

View File

@@ -1,4 +1,5 @@
API_KEY=<LLM api key (for example, open ai key)>
LLM_NAME=docsgpt
VITE_API_STREAMING=true
#For Azure (you can delete it if you don't use Azure)

View File

@@ -86,17 +86,18 @@ On Mac OS or Linux, write:
`./setup.sh`
It will install all the dependencies and allow you to download the local model or use OpenAI.
It will install all the dependencies and allow you to download the local model, use OpenAI or use our LLM API.
Otherwise, refer to this Guide:
1. Download and open this repository with `git clone https://github.com/arc53/DocsGPT.git`
2. Create a `.env` file in your root directory and set the env variable `API_KEY` with your [OpenAI API key](https://platform.openai.com/account/api-keys) and `VITE_API_STREAMING` to true or false, depending on whether you want streaming answers or not.
2. Create a `.env` file in your root directory and set the env variables and `VITE_API_STREAMING` to true or false, depending on whether you want streaming answers or not.
It should look like this inside:
```
API_KEY=Yourkey
LLM_NAME=[docsgpt or openai or others]
VITE_API_STREAMING=true
API_KEY=[if LLM_NAME is openai]
```
See optional environment variables in the [/.env-template](https://github.com/arc53/DocsGPT/blob/main/.env-template) and [/application/.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) files.
@@ -126,7 +127,7 @@ docker compose -f docker-compose-dev.yaml up -d
> Make sure you have Python 3.10 or 3.11 installed.
1. Export required environment variables or prepare a `.env` file in the `/application` folder:
- Copy [.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) and create `.env` with your OpenAI API token for the `API_KEY` and `EMBEDDINGS_KEY` fields.
- Copy [.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) and create `.env`.
(check out [`application/core/settings.py`](application/core/settings.py) if you want to see more config options.)
@@ -147,14 +148,22 @@ python -m venv venv
venv/Scripts/activate
```
3. Change to the `application/` subdir by the command `cd application/` and install dependencies for the backend:
3. Download embedding model and save it in the `model/` folder:
You can use the script below, or download it manually from [here](https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip), unzip it and save it in the `model/` folder.
```commandline
pip install -r application/requirements.txt
wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
unzip mpnet-base-v2.zip -d model
rm mpnet-base-v2.zip
4. Change to the `application/` subdir by the command `cd application/` and install dependencies for the backend:
```commandline
pip install -r requirements.txt
```
4. Run the app using `flask --app application/app.py run --host=0.0.0.0 --port=7091`.
5. Start worker with `celery -A application.app.celery worker -l INFO`.
5. Run the app using `flask --app application/app.py run --host=0.0.0.0 --port=7091`.
6. Start worker with `celery -A application.app.celery worker -l INFO`.
### Start Frontend

View File

@@ -7,6 +7,10 @@ ENV PATH="/root/.cargo/bin:${PATH}"
RUN pip install --upgrade pip && pip install tiktoken==0.5.2
COPY requirements.txt .
RUN pip install -r requirements.txt
RUN apt-get install -y wget unzip
RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
RUN unzip mpnet-base-v2.zip -d model
RUN rm mpnet-base-v2.zip
FROM python:3.11-slim-bullseye
@@ -14,6 +18,8 @@ FROM python:3.11-slim-bullseye
COPY --from=builder /usr/local/ /usr/local/
WORKDIR /app
COPY --from=builder /model /app/model
COPY . /app/application
ENV FLASK_APP=app.py
ENV FLASK_DEBUG=true

View File

@@ -7,8 +7,8 @@ current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__
class Settings(BaseSettings):
LLM_NAME: str = "openai"
EMBEDDINGS_NAME: str = "openai_text-embedding-ada-002"
LLM_NAME: str = "docsgpt"
EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
MONGO_URI: str = "mongodb://localhost:27017/docsgpt"

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,49 @@
from application.llm.base import BaseLLM
import json
import requests
class DocsGPTAPILLM(BaseLLM):
def __init__(self, *args, **kwargs):
self.endpoint = "https://llm.docsgpt.co.uk"
def gen(self, model, engine, messages, stream=False, **kwargs):
context = messages[0]['content']
user_question = messages[-1]['content']
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
response = requests.post(
f"{self.endpoint}/answer",
json={
"prompt": prompt,
"max_new_tokens": 30
}
)
response_clean = response.json()['a'].split("###")[0]
return response_clean
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
context = messages[0]['content']
user_question = messages[-1]['content']
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
# send prompt to endpoint /stream
response = requests.post(
f"{self.endpoint}/stream",
json={
"prompt": prompt,
"max_new_tokens": 256
},
stream=True
)
for line in response.iter_lines():
if line:
#data = json.loads(line)
data_str = line.decode('utf-8')
if data_str.startswith("data: "):
data = json.loads(data_str[6:])
yield data['a']

View File

@@ -3,6 +3,7 @@ from application.llm.sagemaker import SagemakerAPILLM
from application.llm.huggingface import HuggingFaceLLM
from application.llm.llama_cpp import LlamaCpp
from application.llm.anthropic import AnthropicLLM
from application.llm.docsgpt_provider import DocsGPTAPILLM
@@ -13,7 +14,8 @@ class LLMCreator:
'sagemaker': SagemakerAPILLM,
'huggingface': HuggingFaceLLM,
'llama.cpp': LlamaCpp,
'anthropic': AnthropicLLM
'anthropic': AnthropicLLM,
'docsgpt': DocsGPTAPILLM
}
@classmethod

View File

@@ -44,6 +44,11 @@ class BaseVectorStore(ABC):
embedding_instance = embeddings_factory[embeddings_name](
cohere_api_key=embeddings_key
)
elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
embedding_instance = embeddings_factory[embeddings_name](
model_name="./model/all-mpnet-base-v2",
model_kwargs={"device": "cpu"},
)
else:
embedding_instance = embeddings_factory[embeddings_name]()

View File

@@ -8,7 +8,7 @@ Just run the following command:
./setup.sh
```
This command will install all the necessary dependencies and provide you with an option to download the local model or use OpenAI.
This command will install all the necessary dependencies and provide you with an option to use our LLM API, download the local model or use OpenAI.
If you prefer to follow manual steps, refer to this guide:
@@ -16,7 +16,7 @@ If you prefer to follow manual steps, refer to this guide:
```bash
git clone https://github.com/arc53/DocsGPT.git
```
2. Create a `.env` file in your root directory and set your `API_KEY` with your [OpenAI API key](https://platform.openai.com/account/api-keys).
2. Create a `.env` file in your root directory and set your `API_KEY` with your [OpenAI API key](https://platform.openai.com/account/api-keys). (optional in case you want to use OpenAI)
3. Run the following commands:
```bash
docker-compose build && docker-compose up

View File

@@ -64,7 +64,8 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
const navRef = useRef(null);
const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
const embeddingsName =
import.meta.env.VITE_EMBEDDINGS_NAME || 'openai_text-embedding-ada-002';
import.meta.env.VITE_EMBEDDINGS_NAME ||
'huggingface_sentence-transformers/all-mpnet-base-v2';
const navigate = useNavigate();
@@ -181,15 +182,17 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
<img
src={Expand}
alt="menu toggle"
className={`${!navOpen ? 'rotate-180' : 'rotate-0'
} m-auto transition-all duration-200`}
className={`${
!navOpen ? 'rotate-180' : 'rotate-0'
} m-auto transition-all duration-200`}
/>
</button>
)}
<div
ref={navRef}
className={`${!navOpen && '-ml-96 md:-ml-[18rem]'
} duration-20 fixed top-0 z-20 flex h-full w-72 flex-col border-r-2 bg-white transition-all`}
className={`${
!navOpen && '-ml-96 md:-ml-[18rem]'
} duration-20 fixed top-0 z-20 flex h-full w-72 flex-col border-r-2 bg-white transition-all`}
>
<div
className={'visible mt-2 flex h-[6vh] w-full justify-between md:h-12'}
@@ -207,8 +210,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
<img
src={Expand}
alt="menu toggle"
className={`${!navOpen ? 'rotate-180' : 'rotate-0'
} m-auto transition-all duration-200`}
className={`${
!navOpen ? 'rotate-180' : 'rotate-0'
} m-auto transition-all duration-200`}
/>
</button>
</div>
@@ -223,8 +227,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
);
}}
className={({ isActive }) =>
`${isActive ? 'bg-gray-3000' : ''
} group mx-4 mt-4 sticky flex cursor-pointer gap-2.5 rounded-3xl border border-silver p-3 hover:border-rainy-gray hover:bg-gray-3000`
`${
isActive ? 'bg-gray-3000' : ''
} group sticky mx-4 mt-4 flex cursor-pointer gap-2.5 rounded-3xl border border-silver p-3 hover:border-rainy-gray hover:bg-gray-3000`
}
>
<img
@@ -237,12 +242,10 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
</p>
</NavLink>
<div className="mb-auto h-[56vh] overflow-x-hidden overflow-y-scroll">
{conversations && (
<div>
<p className="ml-6 mt-3 text-sm font-semibold">Chats</p>
<div className="conversations-container">
{conversations?.map((conversation) => (
<ConversationTile
key={conversation.id}
@@ -259,7 +262,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
)}
</div>
<div className='h-auto flex-col flex justify-end'>
<div className="flex h-auto flex-col justify-end">
<div className="flex flex-col-reverse border-b-2">
<div className="relative my-4 flex gap-2 px-2">
<div
@@ -274,8 +277,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
<img
src={Arrow2}
alt="arrow"
className={`${!isDocsListOpen ? 'rotate-0' : 'rotate-180'
} ml-auto mr-3 w-3 transition-all`}
className={`${
!isDocsListOpen ? 'rotate-0' : 'rotate-180'
} ml-auto mr-3 w-3 transition-all`}
/>
</div>
<img
@@ -330,11 +334,16 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
<NavLink
to="/settings"
className={({ isActive }) =>
`my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100 ${isActive ? 'bg-gray-3000' : ''
`my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100 ${
isActive ? 'bg-gray-3000' : ''
}`
}
>
<img src={SettingGear} alt="settings" className="ml-2 w-5 opacity-60" />
<img
src={SettingGear}
alt="settings"
className="ml-2 w-5 opacity-60"
/>
<p className="my-auto text-sm text-eerie-black">Settings</p>
</NavLink>
</div>
@@ -343,7 +352,8 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
<NavLink
to="/about"
className={({ isActive }) =>
`my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100 ${isActive ? 'bg-gray-3000' : ''
`my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100 ${
isActive ? 'bg-gray-3000' : ''
}`
}
>
@@ -357,7 +367,11 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
rel="noreferrer"
className="my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100"
>
<img src={Documentation} alt="documentation" className="ml-2 w-5" />
<img
src={Documentation}
alt="documentation"
className="ml-2 w-5"
/>
<p className="my-auto text-sm text-eerie-black">Documentation</p>
</a>
<a
@@ -379,7 +393,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
className="my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100"
>
<img src={Github} alt="github-link" className="ml-2 w-5" />
<p className="my-auto text-sm text-eerie-black">Visit our Github</p>
<p className="my-auto text-sm text-eerie-black">
Visit our Github
</p>
</a>
</div>
</div>

View File

@@ -3,9 +3,10 @@
# Function to prompt the user for their choice
prompt_user() {
echo "Do you want to:"
echo "1. Download the language model locally (12GB)"
echo "2. Use the OpenAI API"
read -p "Enter your choice (1/2): " choice
echo "1. Use DocsGPT public API (simple and free)"
echo "2. Download the language model locally (12GB)"
echo "3. Use the OpenAI API (requires an API key)"
read -p "Enter your choice (1, 2 or 3): " choice
}
# Function to handle the choice to download the model locally
@@ -67,15 +68,30 @@ use_openai() {
echo "docker-compose down"
}
use_docsgpt() {
echo "LLM_NAME=docsgpt" > .env
echo "VITE_API_STREAMING=true" >> .env
echo "The .env file has been created with API_KEY set to your provided key."
docker-compose build && docker-compose up -d
echo "The application will run on http://localhost:5173"
echo "You can stop the application by running the following command:"
echo "docker-compose down"
}
# Prompt the user for their choice
prompt_user
# Handle the user's choice
case $choice in
1)
download_locally
use_docsgpt
;;
2)
download_locally
;;
3)
use_openai
;;
*)

View File

@@ -14,6 +14,6 @@ def test_init_local_faiss_store_huggingface():
index.faiss file in the application/ folder results in a
dimension mismatch error.
"""
settings.EMBEDDINGS_NAME = "huggingface_sentence-transformers/all-mpnet-base-v2"
settings.EMBEDDINGS_NAME = "openai_text-embedding-ada-002"
with pytest.raises(ValueError):
FaissStore("application/", "", None)