Merge pull request #825 from arc53/feat/mongodb

Public LLM
2026-02-05 22:00:36 +00:00 · 2024-01-09 14:31:40 +00:00
parent 103118d558 c0f7b344d9
commit 667b66b926
13 changed files with 141 additions and 37 deletions
--- a/.env-template
+++ b/.env-template
@@ -1,4 +1,5 @@
 API_KEY=<LLM api key (for example, open ai key)>
+LLM_NAME=docsgpt
 VITE_API_STREAMING=true

 #For Azure (you can delete it if you don't use Azure)
--- a/README.md
+++ b/README.md
@@ -86,17 +86,18 @@ On Mac OS or Linux, write:

 `./setup.sh`

-It will install all the dependencies and allow you to download the local model or use OpenAI.
+It will install all the dependencies and allow you to download the local model, use OpenAI or use our LLM API.

 Otherwise, refer to this Guide:

 1. Download and open this repository with `git clone https://github.com/arc53/DocsGPT.git`
-2. Create a `.env` file in your root directory and set the env variable `API_KEY` with your [OpenAI API key](https://platform.openai.com/account/api-keys) and `VITE_API_STREAMING` to true or false, depending on whether you want streaming answers or not.
+2. Create a `.env` file in your root directory and set the env variables and `VITE_API_STREAMING` to true or false, depending on whether you want streaming answers or not.
   It should look like this inside:

   ```
-   API_KEY=Yourkey
+   LLM_NAME=[docsgpt or openai or others] 
   VITE_API_STREAMING=true
+   API_KEY=[if LLM_NAME is openai]
   ```

   See optional environment variables in the [/.env-template](https://github.com/arc53/DocsGPT/blob/main/.env-template) and [/application/.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) files.
@@ -126,7 +127,7 @@ docker compose -f docker-compose-dev.yaml up -d
 > Make sure you have Python 3.10 or 3.11 installed.

 1. Export required environment variables or prepare a `.env` file in the `/application` folder:
-   - Copy [.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) and create `.env` with your OpenAI API token for the `API_KEY` and `EMBEDDINGS_KEY` fields.
+   - Copy [.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) and create `.env`.

 (check out [`application/core/settings.py`](application/core/settings.py) if you want to see more config options.)

@@ -147,14 +148,22 @@ python -m venv venv
 venv/Scripts/activate
 ```

-3. Change to the `application/` subdir by the command `cd application/` and install dependencies for the backend:
+3. Download embedding model and save it in the `model/` folder:
+You can use the script below, or download it manually from [here](https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip), unzip it and save it in the `model/` folder.

 ```commandline
-pip install -r application/requirements.txt
+wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
+unzip mpnet-base-v2.zip -d model
+rm mpnet-base-v2.zip
+
+4. Change to the `application/` subdir by the command `cd application/` and install dependencies for the backend:
+
+```commandline
+pip install -r requirements.txt
 ```

-4. Run the app using `flask --app application/app.py run --host=0.0.0.0 --port=7091`.
-5. Start worker with `celery -A application.app.celery worker -l INFO`.
+5. Run the app using `flask --app application/app.py run --host=0.0.0.0 --port=7091`.
+6. Start worker with `celery -A application.app.celery worker -l INFO`.

 ### Start Frontend

--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -7,6 +7,10 @@ ENV PATH="/root/.cargo/bin:${PATH}"
 RUN pip install --upgrade pip && pip install tiktoken==0.5.2
 COPY requirements.txt .
 RUN pip install -r requirements.txt
+RUN apt-get install -y wget unzip
+RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
+RUN unzip mpnet-base-v2.zip -d model
+RUN rm mpnet-base-v2.zip

 FROM python:3.11-slim-bullseye

@@ -14,6 +18,8 @@ FROM python:3.11-slim-bullseye
 COPY --from=builder /usr/local/ /usr/local/

 WORKDIR /app
+COPY --from=builder /model /app/model
+
 COPY . /app/application
 ENV FLASK_APP=app.py
 ENV FLASK_DEBUG=true
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -7,8 +7,8 @@ current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__


 class Settings(BaseSettings):
-    LLM_NAME: str = "openai"
-    EMBEDDINGS_NAME: str = "openai_text-embedding-ada-002"
+    LLM_NAME: str = "docsgpt"
+    EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
    CELERY_BROKER_URL: str = "redis://localhost:6379/0"
    CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
    MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
--- a/application/index.faiss
+++ b/application/index.faiss
--- a/application/index.pkl
+++ b/application/index.pkl
--- a/application/llm/docsgpt_provider.py
+++ b/application/llm/docsgpt_provider.py
@@ -0,0 +1,49 @@
+from application.llm.base import BaseLLM
+import json
+import requests
+
+class DocsGPTAPILLM(BaseLLM):
+
+    def __init__(self, *args, **kwargs):
+        self.endpoint =  "https://llm.docsgpt.co.uk"
+
+
+    def gen(self, model, engine, messages, stream=False, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+
+        response = requests.post(
+            f"{self.endpoint}/answer",
+            json={
+                "prompt": prompt,
+                "max_new_tokens": 30
+            }
+        )
+        response_clean = response.json()['a'].split("###")[0]
+
+        return response_clean
+
+    def gen_stream(self, model, engine, messages, stream=True, **kwargs):
+        context = messages[0]['content']
+        user_question = messages[-1]['content']
+        prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
+
+        # send prompt to endpoint /stream
+        response = requests.post(
+            f"{self.endpoint}/stream",
+            json={
+                "prompt": prompt,
+                "max_new_tokens": 256
+            },
+            stream=True
+        )
+    
+        for line in response.iter_lines():
+            if line:
+                #data = json.loads(line)
+                data_str = line.decode('utf-8')
+                if data_str.startswith("data: "):
+                    data = json.loads(data_str[6:])
+                    yield data['a']
+                    
--- a/application/llm/llm_creator.py
+++ b/application/llm/llm_creator.py
@@ -3,6 +3,7 @@ from application.llm.sagemaker import SagemakerAPILLM
 from application.llm.huggingface import HuggingFaceLLM
 from application.llm.llama_cpp import LlamaCpp
 from application.llm.anthropic import AnthropicLLM
+from application.llm.docsgpt_provider import DocsGPTAPILLM



@@ -13,7 +14,8 @@ class LLMCreator:
        'sagemaker': SagemakerAPILLM,
        'huggingface': HuggingFaceLLM,
        'llama.cpp': LlamaCpp,
-        'anthropic': AnthropicLLM
+        'anthropic': AnthropicLLM,
+        'docsgpt': DocsGPTAPILLM
    }

    @classmethod
--- a/application/vectorstore/base.py
+++ b/application/vectorstore/base.py
@@ -44,6 +44,11 @@ class BaseVectorStore(ABC):
            embedding_instance = embeddings_factory[embeddings_name](
                cohere_api_key=embeddings_key
            )
+        elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
+            embedding_instance = embeddings_factory[embeddings_name](
+                model_name="./model/all-mpnet-base-v2",
+                model_kwargs={"device": "cpu"},
+            )
        else:
            embedding_instance = embeddings_factory[embeddings_name]()
            
--- a/docs/pages/Deploying/Quickstart.md
+++ b/docs/pages/Deploying/Quickstart.md
@@ -8,7 +8,7 @@ Just run the following command:
 ./setup.sh
 ```

-This command will install all the necessary dependencies and provide you with an option to download the local model or use OpenAI.
+This command will install all the necessary dependencies and provide you with an option to use our LLM API, download the local model or use OpenAI.

 If you prefer to follow manual steps, refer to this guide:

@@ -16,7 +16,7 @@ If you prefer to follow manual steps, refer to this guide:
   ```bash
   git clone https://github.com/arc53/DocsGPT.git
   ```
-2. Create a `.env` file in your root directory and set your `API_KEY` with your [OpenAI API key](https://platform.openai.com/account/api-keys).
+2. Create a `.env` file in your root directory and set your `API_KEY` with your [OpenAI API key](https://platform.openai.com/account/api-keys). (optional in case you want to use OpenAI)
 3. Run the following commands:
   ```bash
   docker-compose build && docker-compose up
--- a/frontend/src/Navigation.tsx
+++ b/frontend/src/Navigation.tsx
@@ -64,7 +64,8 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
  const navRef = useRef(null);
  const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
  const embeddingsName =
-    import.meta.env.VITE_EMBEDDINGS_NAME || 'openai_text-embedding-ada-002';
+    import.meta.env.VITE_EMBEDDINGS_NAME ||
+    'huggingface_sentence-transformers/all-mpnet-base-v2';

  const navigate = useNavigate();

@@ -181,15 +182,17 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
          <img
            src={Expand}
            alt="menu toggle"
-            className={`${!navOpen ? 'rotate-180' : 'rotate-0'
-              } m-auto transition-all duration-200`}
+            className={`${
+              !navOpen ? 'rotate-180' : 'rotate-0'
+            } m-auto transition-all duration-200`}
          />
        </button>
      )}
      <div
        ref={navRef}
-        className={`${!navOpen && '-ml-96 md:-ml-[18rem]'
-          } duration-20 fixed top-0 z-20 flex h-full w-72 flex-col border-r-2 bg-white transition-all`}
+        className={`${
+          !navOpen && '-ml-96 md:-ml-[18rem]'
+        } duration-20 fixed top-0 z-20 flex h-full w-72 flex-col border-r-2 bg-white transition-all`}
      >
        <div
          className={'visible mt-2 flex h-[6vh] w-full justify-between md:h-12'}
@@ -207,8 +210,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
            <img
              src={Expand}
              alt="menu toggle"
-              className={`${!navOpen ? 'rotate-180' : 'rotate-0'
-                } m-auto transition-all duration-200`}
+              className={`${
+                !navOpen ? 'rotate-180' : 'rotate-0'
+              } m-auto transition-all duration-200`}
            />
          </button>
        </div>
@@ -223,8 +227,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
            );
          }}
          className={({ isActive }) =>
-            `${isActive ? 'bg-gray-3000' : ''
-            } group mx-4 mt-4 sticky flex cursor-pointer gap-2.5 rounded-3xl border border-silver p-3 hover:border-rainy-gray hover:bg-gray-3000`
+            `${
+              isActive ? 'bg-gray-3000' : ''
+            } group sticky mx-4 mt-4 flex cursor-pointer gap-2.5 rounded-3xl border border-silver p-3 hover:border-rainy-gray hover:bg-gray-3000`
          }
        >
          <img
@@ -237,12 +242,10 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
          </p>
        </NavLink>
        <div className="mb-auto h-[56vh] overflow-x-hidden  overflow-y-scroll">
-
          {conversations && (
            <div>
              <p className="ml-6 mt-3 text-sm font-semibold">Chats</p>
              <div className="conversations-container">
-
                {conversations?.map((conversation) => (
                  <ConversationTile
                    key={conversation.id}
@@ -259,7 +262,7 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
          )}
        </div>

-        <div className='h-auto flex-col flex justify-end'>
+        <div className="flex h-auto flex-col justify-end">
          <div className="flex flex-col-reverse border-b-2">
            <div className="relative my-4 flex gap-2 px-2">
              <div
@@ -274,8 +277,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
                <img
                  src={Arrow2}
                  alt="arrow"
-                  className={`${!isDocsListOpen ? 'rotate-0' : 'rotate-180'
-                    } ml-auto mr-3 w-3 transition-all`}
+                  className={`${
+                    !isDocsListOpen ? 'rotate-0' : 'rotate-180'
+                  } ml-auto mr-3 w-3 transition-all`}
                />
              </div>
              <img
@@ -330,11 +334,16 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
            <NavLink
              to="/settings"
              className={({ isActive }) =>
-                `my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100 ${isActive ? 'bg-gray-3000' : ''
+                `my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100 ${
+                  isActive ? 'bg-gray-3000' : ''
                }`
              }
            >
-              <img src={SettingGear} alt="settings" className="ml-2 w-5 opacity-60" />
+              <img
+                src={SettingGear}
+                alt="settings"
+                className="ml-2 w-5 opacity-60"
+              />
              <p className="my-auto text-sm text-eerie-black">Settings</p>
            </NavLink>
          </div>
@@ -343,7 +352,8 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
            <NavLink
              to="/about"
              className={({ isActive }) =>
-                `my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100 ${isActive ? 'bg-gray-3000' : ''
+                `my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100 ${
+                  isActive ? 'bg-gray-3000' : ''
                }`
              }
            >
@@ -357,7 +367,11 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
              rel="noreferrer"
              className="my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100"
            >
-              <img src={Documentation} alt="documentation" className="ml-2 w-5" />
+              <img
+                src={Documentation}
+                alt="documentation"
+                className="ml-2 w-5"
+              />
              <p className="my-auto text-sm text-eerie-black">Documentation</p>
            </a>
            <a
@@ -379,7 +393,9 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
              className="my-auto mx-4 flex h-9 cursor-pointer gap-4 rounded-3xl hover:bg-gray-100"
            >
              <img src={Github} alt="github-link" className="ml-2 w-5" />
-              <p className="my-auto text-sm text-eerie-black">Visit our Github</p>
+              <p className="my-auto text-sm text-eerie-black">
+                Visit our Github
+              </p>
            </a>
          </div>
        </div>
--- a/setup.sh
+++ b/setup.sh
@@ -3,9 +3,10 @@
 # Function to prompt the user for their choice
 prompt_user() {
    echo "Do you want to:"
-    echo "1. Download the language model locally (12GB)"
-    echo "2. Use the OpenAI API"
-    read -p "Enter your choice (1/2): " choice
+    echo "1. Use DocsGPT public API (simple and free)"
+    echo "2. Download the language model locally (12GB)"
+    echo "3. Use the OpenAI API (requires an API key)"
+    read -p "Enter your choice (1, 2 or 3): " choice
 }

 # Function to handle the choice to download the model locally
@@ -67,15 +68,30 @@ use_openai() {
    echo "docker-compose down"
 }

+use_docsgpt() {
+    echo "LLM_NAME=docsgpt" > .env
+    echo "VITE_API_STREAMING=true" >> .env
+    echo "The .env file has been created with API_KEY set to your provided key."
+
+    docker-compose build && docker-compose up -d
+
+    echo "The application will run on http://localhost:5173"
+    echo "You can stop the application by running the following command:"
+    echo "docker-compose down"
+}
+
 # Prompt the user for their choice
 prompt_user

 # Handle the user's choice
 case $choice in
    1)
-        download_locally
+        use_docsgpt
        ;;
    2)
+        download_locally
+        ;;
+    3)
        use_openai
        ;;
    *)
--- a/tests/test_vector_store.py
+++ b/tests/test_vector_store.py
@@ -14,6 +14,6 @@ def test_init_local_faiss_store_huggingface():
    index.faiss file in the application/ folder results in a
    dimension mismatch error.
    """
-    settings.EMBEDDINGS_NAME = "huggingface_sentence-transformers/all-mpnet-base-v2"
+    settings.EMBEDDINGS_NAME = "openai_text-embedding-ada-002"
    with pytest.raises(ValueError):
        FaissStore("application/", "", None)