mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 16:43:16 +00:00
Compare commits
108 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
890a20edba | ||
|
|
e6f48c9403 | ||
|
|
909f0afa69 | ||
|
|
5ed2b99b8c | ||
|
|
7848751fd8 | ||
|
|
e593241d75 | ||
|
|
fcdc7b7aeb | ||
|
|
c3c7878f28 | ||
|
|
85f9ae5a0a | ||
|
|
98a97f34f5 | ||
|
|
98d647a3fe | ||
|
|
9a393b4f74 | ||
|
|
88d74235e1 | ||
|
|
36fa470348 | ||
|
|
33dce10bc3 | ||
|
|
feed0b288f | ||
|
|
1b7dc8a509 | ||
|
|
87cc3cf168 | ||
|
|
eac7b1e9f2 | ||
|
|
bb1a42df91 | ||
|
|
ac5ac3e9f1 | ||
|
|
bed25b317c | ||
|
|
1687e6682a | ||
|
|
22572c8ed1 | ||
|
|
8187a339f0 | ||
|
|
382c3930a2 | ||
|
|
a64a30c088 | ||
|
|
dac76a867f | ||
|
|
b2e86e105d | ||
|
|
b8e57c9b6f | ||
|
|
486a1bc9de | ||
|
|
b1b610f4b5 | ||
|
|
68447a6009 | ||
|
|
a55280b941 | ||
|
|
830462d525 | ||
|
|
ce8b29e9d0 | ||
|
|
6ab15f8eb1 | ||
|
|
96eb68e042 | ||
|
|
bf78bdd6d4 | ||
|
|
d998815847 | ||
|
|
00ba7b78ca | ||
|
|
0b735d94f1 | ||
|
|
301989540f | ||
|
|
e26b95a26f | ||
|
|
049c1ddb48 | ||
|
|
2f1c3075a2 | ||
|
|
b1a5068fd6 | ||
|
|
01fbd5d702 | ||
|
|
5916f92f1a | ||
|
|
5e45268f68 | ||
|
|
b8e28e0c12 | ||
|
|
04f824ea36 | ||
|
|
c216bea031 | ||
|
|
e72ef478dc | ||
|
|
897b4ef2cd | ||
|
|
2404899e28 | ||
|
|
a2dfc2cbdc | ||
|
|
92373b25a9 | ||
|
|
ce1840a9ae | ||
|
|
c4f4bdd789 | ||
|
|
ec5068e85b | ||
|
|
1d9d0ddf27 | ||
|
|
e393be90dd | ||
|
|
e633df06e4 | ||
|
|
0ff5f408d6 | ||
|
|
5eda42ff31 | ||
|
|
84168e22d0 | ||
|
|
b722845aff | ||
|
|
fd54682c02 | ||
|
|
f5e287ffa6 | ||
|
|
fb10a546d6 | ||
|
|
006897f1c0 | ||
|
|
968849e52b | ||
|
|
8bee47dc50 | ||
|
|
08250120d1 | ||
|
|
8892b70785 | ||
|
|
534e4cb591 | ||
|
|
489abdcb0b | ||
|
|
f6b6c2e9a3 | ||
|
|
43c016f024 | ||
|
|
c0e7d9cd8b | ||
|
|
5f687a31f8 | ||
|
|
f2d2478dee | ||
|
|
8a98789be1 | ||
|
|
87a5c8894a | ||
|
|
7e92ed4501 | ||
|
|
a57cdfff1e | ||
|
|
d4ff6d4d7a | ||
|
|
63d99d6a57 | ||
|
|
fce7d34171 | ||
|
|
e7df7f69b3 | ||
|
|
94cc18bd71 | ||
|
|
39024ce2ac | ||
|
|
7ac4f45e7b | ||
|
|
f209eebaf8 | ||
|
|
4889db78c9 | ||
|
|
bff200fede | ||
|
|
af6f783043 | ||
|
|
610adcbefc | ||
|
|
1d3631fa04 | ||
|
|
0630504664 | ||
|
|
6d5b698c39 | ||
|
|
dd9f1abcea | ||
|
|
b4bd34fb96 | ||
|
|
014971262d | ||
|
|
36ed69b07e | ||
|
|
ec4fc17e3a | ||
|
|
78b85fb664 |
@@ -1,2 +1,8 @@
|
||||
OPENAI_API_KEY=<LLM api key (for example, open ai key)>
|
||||
EMBEDDINGS_KEY=<LLM embeddings api key (for example, open ai key)>
|
||||
|
||||
#For Azure
|
||||
OPENAI_API_BASE=
|
||||
OPENAI_API_VERSION=
|
||||
AZURE_DEPLOYMENT_NAME=
|
||||
AZURE_EMBEDDINGS_DEPLOYMENT_NAME=
|
||||
1
.github/workflows/ci.yml
vendored
1
.github/workflows/ci.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
if: github.repository == 'arc53/DocsGPT'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
27
.github/workflows/pytest.yml
vendored
Normal file
27
.github/workflows/pytest.yml
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
name: Run python tests with pytest
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9", "3.10", "3.11"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pytest
|
||||
cd application
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
python -m pytest
|
||||
2
.github/workflows/sync_fork.yaml
vendored
2
.github/workflows/sync_fork.yaml
vendored
@@ -5,7 +5,7 @@ permissions:
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 * * * *" # every hour
|
||||
- cron: "0 0 * * *" # every hour
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
|
||||
116
README.md
116
README.md
@@ -23,82 +23,114 @@ Say goodbye to time-consuming manual searches, and let <strong>DocsGPT</strong>
|
||||
|
||||

|
||||
|
||||
## Roadmap
|
||||
|
||||
You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, please don't hesitate contributing or creating issues, it helps us make DocsGPT better!
|
||||
|
||||
## Our open source models optimised for DocsGPT:
|
||||
|
||||
| Name | Base Model | Requirements (or similar) |
|
||||
|-------------------|------------|----------------------------------------------------------|
|
||||
| [Docsgpt-7b-falcon](https://huggingface.co/Arc53/docsgpt-7b-falcon) | Falcon-7b | 1xA10G gpu |
|
||||
| [Docsgpt-14b](https://huggingface.co/Arc53/docsgpt-14b) | llama-2-14b | 2xA10 gpu's |
|
||||
| [Docsgpt-40b](https://huggingface.co/Arc53/docsgpt-40b-falcon) | falcon-40b | 8xA10G gpu's |
|
||||
|
||||
|
||||
If you don't have enough resources to run it you can use bitsnbytes to quantize
|
||||
|
||||
|
||||
## Features
|
||||
|
||||

|
||||
|
||||
|
||||
## Useful links
|
||||
[Live preview](https://docsgpt.arc53.com/)
|
||||
|
||||
[Join Our Discord](https://discord.gg/n5BX8dh8rU)
|
||||
|
||||
[Guides](https://github.com/arc53/docsgpt/wiki)
|
||||
|
||||
## Roadmap
|
||||
[Interested in contributing?](https://github.com/arc53/DocsGPT/blob/main/CONTRIBUTING.md)
|
||||
|
||||
You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, please don't hesitate contributing or creating issues, it helps us make DocsGPT better!
|
||||
[How to use any other documentation](https://github.com/arc53/docsgpt/wiki/How-to-train-on-other-documentation)
|
||||
|
||||
|
||||
|
||||
## [Live preview](https://docsgpt.arc53.com/)
|
||||
|
||||
## [Join Our Discord](https://discord.gg/n5BX8dh8rU)
|
||||
[How to host it locally (so all data will stay on-premises)](https://github.com/arc53/DocsGPT/wiki/How-to-use-different-LLM's#hosting-everything-locally)
|
||||
|
||||
|
||||
## Project structure
|
||||
- Application - flask app (main application)
|
||||
- Application - Flask app (main application)
|
||||
|
||||
- Extensions - chrome extension
|
||||
- Extensions - Chrome extension
|
||||
|
||||
- Scripts - script that creates similarity search index and store for other libraries.
|
||||
- Scripts - Script that creates similarity search index and store for other libraries.
|
||||
|
||||
- frontend - frontend in vite and
|
||||
- Frontend - Frontend uses Vite and React
|
||||
|
||||
## QuickStart
|
||||
|
||||
Note: Make sure you have docker installed
|
||||
|
||||
1. Open dowload this repository with `git clone https://github.com/arc53/DocsGPT.git`
|
||||
2. Create .env file in your root directory and set your OPENAI_API_KEY with your openai api key and VITE_API_STREAMING to true or false if you dont want streaming answers
|
||||
3. Run `docker-compose build && docker-compose up`
|
||||
1. Dowload and open this repository with `git clone https://github.com/arc53/DocsGPT.git`
|
||||
2. Create an .env file in your root directory and set the env variable OPENAI_API_KEY with your openai api key and VITE_API_STREAMING to true or false, depending on if you want streaming answers or not
|
||||
It should look like this inside:
|
||||
|
||||
```
|
||||
OPENAI_API_KEY=Yourkey
|
||||
VITE_API_STREAMING=true
|
||||
```
|
||||
3. Run `./run-with-docker-compose.sh`
|
||||
4. Navigate to http://localhost:5173/
|
||||
|
||||
To stop just run Ctrl + C
|
||||
|
||||
## Development environments
|
||||
|
||||
Spin up only 2 containers from docker-compose.yaml (by deleting all services except for redis and mongo)
|
||||
### Spin up mongo and redis
|
||||
For development only 2 containers are used from docker-compose.yaml (by deleting all services except for redis and mongo).
|
||||
See file [docker-compose-dev.yaml](./docker-compose-dev.yaml).
|
||||
|
||||
Make sure you have python 3.10 or 3.11 installed
|
||||
Run
|
||||
```
|
||||
docker compose -f docker-compose-dev.yaml build
|
||||
docker compose -f docker-compose-dev.yaml up -d
|
||||
```
|
||||
|
||||
1. Navigate to `/application` folder
|
||||
2. Run `docker-compose -f docker-compose-dev.yaml build && docker-compose -f docker-compose-dev.yaml up -d`
|
||||
3. Export required variables
|
||||
`export CELERY_BROKER_URL=redis://localhost:6379/0`
|
||||
`export CELERY_RESULT_BACKEND=redis://localhost:6379/1`
|
||||
`export MONGO_URI=mongodb://localhost:27017/docsgpt`
|
||||
4. Install dependencies
|
||||
`pip install -r requirements.txt`
|
||||
5. Prepare .env file
|
||||
Copy .env_sample and create .env with your openai api token
|
||||
6. Run the app
|
||||
`python wsgi.py`
|
||||
7. Start worker with `celery -A app.celery worker -l INFO`
|
||||
### Run the backend
|
||||
|
||||
Make sure you have Python 3.10 or 3.11 installed.
|
||||
|
||||
1. Export required environment variables
|
||||
```commandline
|
||||
export CELERY_BROKER_URL=redis://localhost:6379/0
|
||||
export CELERY_RESULT_BACKEND=redis://localhost:6379/1
|
||||
export MONGO_URI=mongodb://localhost:27017/docsgpt
|
||||
```
|
||||
2. Prepare .env file
|
||||
Copy `.env_sample` and create `.env` with your OpenAI API token
|
||||
3. (optional) Create a python virtual environment
|
||||
```commandline
|
||||
python -m venv venv
|
||||
. venv/bin/activate
|
||||
```
|
||||
4. Change to `application/` subdir and install dependencies for the backend
|
||||
```commandline
|
||||
cd application/
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
5. Run the app `python wsgi.py`
|
||||
6. Start worker with `celery -A app.celery worker -l INFO`
|
||||
|
||||
### Start frontend
|
||||
Make sure you have Node version 16 or higher.
|
||||
|
||||
To start frontend
|
||||
1. Navigate to `/frontend` folder
|
||||
2. Install dependencies
|
||||
`npm install`
|
||||
3. Run the app
|
||||
4. `npm run dev`
|
||||
3. Run the app
|
||||
`npm run dev`
|
||||
|
||||
|
||||
[How to install the Chrome extension](https://github.com/arc53/docsgpt/wiki#launch-chrome-extension)
|
||||
|
||||
|
||||
## [Guides](https://github.com/arc53/docsgpt/wiki)
|
||||
|
||||
## [Interested in contributing?](https://github.com/arc53/DocsGPT/blob/main/CONTRIBUTING.md)
|
||||
|
||||
## [How to use any other documentation](https://github.com/arc53/docsgpt/wiki/How-to-train-on-other-documentation)
|
||||
|
||||
## [How to host it locally (so all data will stay on-premises)](https://github.com/arc53/DocsGPT/wiki/How-to-use-different-LLM's#hosting-everything-locally)
|
||||
|
||||
Built with [🦜️🔗 LangChain](https://github.com/hwchase17/langchain)
|
||||
|
||||
|
||||
@@ -3,4 +3,10 @@ EMBEDDINGS_KEY=your_api_key
|
||||
CELERY_BROKER_URL=redis://localhost:6379/0
|
||||
CELERY_RESULT_BACKEND=redis://localhost:6379/1
|
||||
MONGO_URI=mongodb://localhost:27017/docsgpt
|
||||
API_URL=http://localhost:5001
|
||||
API_URL=http://localhost:7091
|
||||
|
||||
#For OPENAI on Azure
|
||||
OPENAI_API_BASE=
|
||||
OPENAI_API_VERSION=
|
||||
AZURE_DEPLOYMENT_NAME=
|
||||
AZURE_EMBEDDINGS_DEPLOYMENT_NAME=
|
||||
@@ -8,18 +8,16 @@ RUN pip install --upgrade pip && pip install tiktoken==0.3.3
|
||||
COPY requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
|
||||
FROM python:3.10-slim-bullseye
|
||||
# Copy pre-built packages from builder stage
|
||||
COPY --from=builder /usr/local/lib/python3.10/site-packages/ /usr/local/lib/python3.10/site-packages/
|
||||
RUN pip install gunicorn==20.1.0
|
||||
RUN pip install celery==5.2.7
|
||||
|
||||
# Copy pre-built packages and binaries from builder stage
|
||||
COPY --from=builder /usr/local/ /usr/local/
|
||||
|
||||
WORKDIR /app
|
||||
COPY . /app
|
||||
COPY . /app/application
|
||||
ENV FLASK_APP=app.py
|
||||
ENV FLASK_DEBUG=true
|
||||
|
||||
EXPOSE 7091
|
||||
|
||||
EXPOSE 5001
|
||||
|
||||
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:5001", "wsgi:app"]
|
||||
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "application.wsgi:app"]
|
||||
|
||||
0
application/__init__.py
Normal file
0
application/__init__.py
Normal file
@@ -2,23 +2,29 @@ import asyncio
|
||||
import datetime
|
||||
import http.client
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import traceback
|
||||
|
||||
import openai
|
||||
import dotenv
|
||||
import openai
|
||||
import requests
|
||||
from celery import Celery
|
||||
from celery.result import AsyncResult
|
||||
from flask import Flask, request, render_template, send_from_directory, jsonify, Response
|
||||
from langchain import FAISS
|
||||
from langchain import VectorDBQA, HuggingFaceHub, Cohere, OpenAI
|
||||
from langchain import VectorDBQA, Cohere, OpenAI
|
||||
from langchain.chains import LLMChain, ConversationalRetrievalChain
|
||||
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, \
|
||||
HuggingFaceInstructEmbeddings
|
||||
from langchain.chat_models import ChatOpenAI, AzureChatOpenAI
|
||||
from langchain.embeddings import (
|
||||
OpenAIEmbeddings,
|
||||
HuggingFaceHubEmbeddings,
|
||||
CohereEmbeddings,
|
||||
HuggingFaceInstructEmbeddings,
|
||||
)
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
@@ -26,27 +32,39 @@ from langchain.prompts.chat import (
|
||||
HumanMessagePromptTemplate,
|
||||
AIMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import HumanMessage, AIMessage
|
||||
from pymongo import MongoClient
|
||||
from werkzeug.utils import secure_filename
|
||||
from langchain.llms import GPT4All
|
||||
|
||||
from core.settings import settings
|
||||
from error import bad_request
|
||||
from worker import ingest_worker
|
||||
from application.core.settings import settings
|
||||
from application.error import bad_request
|
||||
from application.worker import ingest_worker
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||
|
||||
if settings.LLM_NAME == "manifest":
|
||||
from manifest import Manifest
|
||||
from langchain.llms.manifest import ManifestWrapper
|
||||
logger = logging.getLogger(__name__)
|
||||
if settings.LLM_NAME == "gpt4":
|
||||
gpt_model = 'gpt-4'
|
||||
else:
|
||||
gpt_model = 'gpt-3.5-turbo'
|
||||
|
||||
manifest = Manifest(
|
||||
client_name="huggingface",
|
||||
client_connection="http://127.0.0.1:5000"
|
||||
|
||||
if settings.SELF_HOSTED_MODEL:
|
||||
from langchain.llms import HuggingFacePipeline
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
||||
|
||||
model_id = settings.LLM_NAME # hf model id (Arc53/docsgpt-7b-falcon, Arc53/docsgpt-14b)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
pipe = pipeline(
|
||||
"text-generation", model=model,
|
||||
tokenizer=tokenizer, max_new_tokens=2000,
|
||||
device_map="auto", eos_token_id=tokenizer.eos_token_id
|
||||
)
|
||||
hf = HuggingFacePipeline(pipeline=pipe)
|
||||
|
||||
# Redirect PosixPath to WindowsPath on Windows
|
||||
import platform
|
||||
|
||||
if platform.system() == "Windows":
|
||||
import pathlib
|
||||
@@ -58,40 +76,36 @@ if platform.system() == "Windows":
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# load the prompts
|
||||
with open("prompts/combine_prompt.txt", "r") as f:
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
with open(os.path.join(current_dir, "prompts", "combine_prompt.txt"), "r") as f:
|
||||
template = f.read()
|
||||
|
||||
with open("prompts/combine_prompt_hist.txt", "r") as f:
|
||||
with open(os.path.join(current_dir, "prompts", "combine_prompt_hist.txt"), "r") as f:
|
||||
template_hist = f.read()
|
||||
|
||||
with open("prompts/question_prompt.txt", "r") as f:
|
||||
with open(os.path.join(current_dir, "prompts", "question_prompt.txt"), "r") as f:
|
||||
template_quest = f.read()
|
||||
|
||||
with open("prompts/chat_combine_prompt.txt", "r") as f:
|
||||
with open(os.path.join(current_dir, "prompts", "chat_combine_prompt.txt"), "r") as f:
|
||||
chat_combine_template = f.read()
|
||||
|
||||
with open("prompts/chat_reduce_prompt.txt", "r") as f:
|
||||
with open(os.path.join(current_dir, "prompts", "chat_reduce_prompt.txt"), "r") as f:
|
||||
chat_reduce_template = f.read()
|
||||
|
||||
if settings.API_KEY is not None:
|
||||
api_key_set = True
|
||||
else:
|
||||
api_key_set = False
|
||||
if settings.EMBEDDINGS_KEY is not None:
|
||||
embeddings_key_set = True
|
||||
else:
|
||||
embeddings_key_set = False
|
||||
api_key_set = settings.API_KEY is not None
|
||||
embeddings_key_set = settings.EMBEDDINGS_KEY is not None
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER = "inputs"
|
||||
app.config['CELERY_BROKER_URL'] = settings.CELERY_BROKER_URL
|
||||
app.config['CELERY_RESULT_BACKEND'] = settings.CELERY_RESULT_BACKEND
|
||||
app.config['MONGO_URI'] = settings.MONGO_URI
|
||||
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER = "inputs"
|
||||
app.config["CELERY_BROKER_URL"] = settings.CELERY_BROKER_URL
|
||||
app.config["CELERY_RESULT_BACKEND"] = settings.CELERY_RESULT_BACKEND
|
||||
app.config["MONGO_URI"] = settings.MONGO_URI
|
||||
celery = Celery()
|
||||
celery.config_from_object('celeryconfig')
|
||||
mongo = MongoClient(app.config['MONGO_URI'])
|
||||
celery.config_from_object("application.celeryconfig")
|
||||
mongo = MongoClient(app.config["MONGO_URI"])
|
||||
db = mongo["docsgpt"]
|
||||
vectors_collection = db["vectors"]
|
||||
conversations_collection = db["conversations"]
|
||||
|
||||
|
||||
async def async_generate(chain, question, chat_history):
|
||||
@@ -120,15 +134,22 @@ def get_vectorstore(data):
|
||||
vectorstore = "indexes/" + data["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + data["active_docs"]
|
||||
if data['active_docs'] == "default":
|
||||
if data["active_docs"] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
vectorstore = os.path.join("application", vectorstore)
|
||||
return vectorstore
|
||||
|
||||
|
||||
def get_docsearch(vectorstore, embeddings_key):
|
||||
if settings.EMBEDDINGS_NAME == "openai_text-embedding-ada-002":
|
||||
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
|
||||
if is_azure_configured():
|
||||
os.environ["OPENAI_API_TYPE"] = "azure"
|
||||
openai_embeddings = OpenAIEmbeddings(model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME)
|
||||
else:
|
||||
openai_embeddings = OpenAIEmbeddings(openai_api_key=embeddings_key)
|
||||
docsearch = FAISS.load_local(vectorstore, openai_embeddings)
|
||||
elif settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
|
||||
elif settings.EMBEDDINGS_NAME == "huggingface_hkunlp/instructor-large":
|
||||
@@ -146,17 +167,42 @@ def ingest(self, directory, formats, name_job, filename, user):
|
||||
|
||||
@app.route("/")
|
||||
def home():
|
||||
return render_template("index.html", api_key_set=api_key_set, llm_choice=settings.LLM_NAME,
|
||||
embeddings_choice=settings.EMBEDDINGS_NAME)
|
||||
return render_template(
|
||||
"index.html", api_key_set=api_key_set, llm_choice=settings.LLM_NAME, embeddings_choice=settings.EMBEDDINGS_NAME
|
||||
)
|
||||
|
||||
def complete_stream(question, docsearch, chat_history, api_key):
|
||||
|
||||
def complete_stream(question, docsearch, chat_history, api_key, conversation_id):
|
||||
openai.api_key = api_key
|
||||
llm = ChatOpenAI(openai_api_key=api_key)
|
||||
if is_azure_configured():
|
||||
logger.debug("in Azure")
|
||||
openai.api_type = "azure"
|
||||
openai.api_version = settings.OPENAI_API_VERSION
|
||||
openai.api_base = settings.OPENAI_API_BASE
|
||||
llm = AzureChatOpenAI(
|
||||
openai_api_key=api_key,
|
||||
openai_api_base=settings.OPENAI_API_BASE,
|
||||
openai_api_version=settings.OPENAI_API_VERSION,
|
||||
deployment_name=settings.AZURE_DEPLOYMENT_NAME,
|
||||
)
|
||||
else:
|
||||
logger.debug("plain OpenAI")
|
||||
llm = ChatOpenAI(openai_api_key=api_key)
|
||||
docs = docsearch.similarity_search(question, k=2)
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc.page_content for doc in docs])
|
||||
p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
source_log_docs = []
|
||||
for doc in docs:
|
||||
if doc.metadata:
|
||||
data = json.dumps({"type": "source", "doc": doc.page_content, "metadata": doc.metadata})
|
||||
source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
|
||||
else:
|
||||
data = json.dumps({"type": "source", "doc": doc.page_content})
|
||||
source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
|
||||
yield f"data:{data}\n\n"
|
||||
|
||||
if len(chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
@@ -169,45 +215,85 @@ def complete_stream(question, docsearch, chat_history, api_key):
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append({"role": "system", "content": i["response"]})
|
||||
messages_combine.append({"role": "user", "content": question})
|
||||
completion = openai.ChatCompletion.create(model="gpt-3.5-turbo",
|
||||
completion = openai.ChatCompletion.create(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
messages=messages_combine, stream=True, max_tokens=500, temperature=0)
|
||||
|
||||
reponse_full = ""
|
||||
for line in completion:
|
||||
if 'content' in line['choices'][0]['delta']:
|
||||
if "content" in line["choices"][0]["delta"]:
|
||||
# check if the delta contains content
|
||||
data = json.dumps({"answer": str(line['choices'][0]['delta']['content'])})
|
||||
data = json.dumps({"answer": str(line["choices"][0]["delta"]["content"])})
|
||||
reponse_full += str(line["choices"][0]["delta"]["content"])
|
||||
yield f"data: {data}\n\n"
|
||||
# save conversation to database
|
||||
if conversation_id is not None:
|
||||
conversations_collection.update_one(
|
||||
{"_id": ObjectId(conversation_id)},
|
||||
{"$push": {"queries": {"prompt": question, "response": reponse_full, "sources": source_log_docs}}},
|
||||
)
|
||||
|
||||
else:
|
||||
# create new conversation
|
||||
# generate summary
|
||||
messages_summary = [{"role": "assistant", "content": "Summarise following conversation in no more than 3 "
|
||||
"words, respond ONLY with the summary, use the same "
|
||||
"language as the system \n\nUser: " + question + "\n\n" +
|
||||
"AI: " +
|
||||
reponse_full},
|
||||
{"role": "user", "content": "Summarise following conversation in no more than 3 words, "
|
||||
"respond ONLY with the summary, use the same language as the "
|
||||
"system"}]
|
||||
completion = openai.ChatCompletion.create(model='gpt-3.5-turbo', engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
messages=messages_summary, max_tokens=30, temperature=0)
|
||||
conversation_id = conversations_collection.insert_one(
|
||||
{"user": "local",
|
||||
"date": datetime.datetime.utcnow(),
|
||||
"name": completion["choices"][0]["message"]["content"],
|
||||
"queries": [{"prompt": question, "response": reponse_full, "sources": source_log_docs}]}
|
||||
).inserted_id
|
||||
|
||||
# send data.type = "end" to indicate that the stream has ended as json
|
||||
data = json.dumps({"type": "id", "id": str(conversation_id)})
|
||||
yield f"data: {data}\n\n"
|
||||
data = json.dumps({"type": "end"})
|
||||
yield f"data: {data}\n\n"
|
||||
@app.route("/stream", methods=['POST', 'GET'])
|
||||
|
||||
|
||||
@app.route("/stream", methods=["POST"])
|
||||
def stream():
|
||||
data = request.get_json()
|
||||
# get parameter from url question
|
||||
question = request.args.get('question')
|
||||
history = request.args.get('history')
|
||||
question = data["question"]
|
||||
history = data["history"]
|
||||
# history to json object from string
|
||||
history = json.loads(history)
|
||||
conversation_id = data["conversation_id"]
|
||||
|
||||
# check if active_docs is set
|
||||
|
||||
if not api_key_set:
|
||||
api_key = request.args.get("api_key")
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = request.args.get("embeddings_key")
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
if "active_docs" in request.args:
|
||||
vectorstore = get_vectorstore({"active_docs": request.args.get("active_docs")})
|
||||
if "active_docs" in data:
|
||||
vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
|
||||
else:
|
||||
vectorstore = ""
|
||||
docsearch = get_docsearch(vectorstore, embeddings_key)
|
||||
|
||||
# question = "Hi"
|
||||
return Response(
|
||||
complete_stream(question, docsearch,
|
||||
chat_history=history, api_key=api_key,
|
||||
conversation_id=conversation_id), mimetype="text/event-stream"
|
||||
)
|
||||
|
||||
#question = "Hi"
|
||||
return Response(complete_stream(question, docsearch,
|
||||
chat_history= history, api_key=api_key), mimetype='text/event-stream')
|
||||
|
||||
def is_azure_configured():
|
||||
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
|
||||
|
||||
|
||||
@app.route("/api/answer", methods=["POST"])
|
||||
@@ -215,7 +301,11 @@ def api_answer():
|
||||
data = request.get_json()
|
||||
question = data["question"]
|
||||
history = data["history"]
|
||||
print('-' * 5)
|
||||
if "conversation_id" not in data:
|
||||
conversation_id = None
|
||||
else:
|
||||
conversation_id = data["conversation_id"]
|
||||
print("-" * 5)
|
||||
if not api_key_set:
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
@@ -233,14 +323,25 @@ def api_answer():
|
||||
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
|
||||
docsearch = get_docsearch(vectorstore, embeddings_key)
|
||||
|
||||
q_prompt = PromptTemplate(input_variables=["context", "question"], template=template_quest,
|
||||
template_format="jinja2")
|
||||
q_prompt = PromptTemplate(
|
||||
input_variables=["context", "question"], template=template_quest, template_format="jinja2"
|
||||
)
|
||||
if settings.LLM_NAME == "openai_chat":
|
||||
llm = ChatOpenAI(openai_api_key=api_key) # optional parameter: model_name="gpt-4"
|
||||
if is_azure_configured():
|
||||
logger.debug("in Azure")
|
||||
llm = AzureChatOpenAI(
|
||||
openai_api_key=api_key,
|
||||
openai_api_base=settings.OPENAI_API_BASE,
|
||||
openai_api_version=settings.OPENAI_API_VERSION,
|
||||
deployment_name=settings.AZURE_DEPLOYMENT_NAME,
|
||||
)
|
||||
else:
|
||||
logger.debug("plain OpenAI")
|
||||
llm = ChatOpenAI(openai_api_key=api_key, model_name=gpt_model) # optional parameter: model_name="gpt-4"
|
||||
messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)]
|
||||
if history:
|
||||
tokens_current_history = 0
|
||||
#count tokens in history
|
||||
# count tokens in history
|
||||
history.reverse()
|
||||
for i in history:
|
||||
if "prompt" in i and "response" in i:
|
||||
@@ -250,19 +351,13 @@ def api_answer():
|
||||
messages_combine.append(HumanMessagePromptTemplate.from_template(i["prompt"]))
|
||||
messages_combine.append(AIMessagePromptTemplate.from_template(i["response"]))
|
||||
messages_combine.append(HumanMessagePromptTemplate.from_template("{question}"))
|
||||
import sys
|
||||
print(messages_combine, file=sys.stderr)
|
||||
p_chat_combine = ChatPromptTemplate.from_messages(messages_combine)
|
||||
elif settings.LLM_NAME == "openai":
|
||||
llm = OpenAI(openai_api_key=api_key, temperature=0)
|
||||
elif settings.LLM_NAME == "manifest":
|
||||
llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
|
||||
elif settings.LLM_NAME == "huggingface":
|
||||
llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
|
||||
elif settings.SELF_HOSTED_MODEL:
|
||||
llm = hf
|
||||
elif settings.LLM_NAME == "cohere":
|
||||
llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
|
||||
elif settings.LLM_NAME == "gpt4all":
|
||||
llm = GPT4All(model=settings.MODEL_PATH)
|
||||
else:
|
||||
raise ValueError("unknown LLM model")
|
||||
|
||||
@@ -278,7 +373,7 @@ def api_answer():
|
||||
# result = chain({"question": question, "chat_history": chat_history})
|
||||
# generate async with async generate method
|
||||
result = run_async_chain(chain, question, chat_history)
|
||||
elif settings.LLM_NAME == "gpt4all":
|
||||
elif settings.SELF_HOSTED_MODEL:
|
||||
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
|
||||
chain = ConversationalRetrievalChain(
|
||||
@@ -292,8 +387,9 @@ def api_answer():
|
||||
result = run_async_chain(chain, question, chat_history)
|
||||
|
||||
else:
|
||||
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
|
||||
combine_prompt=chat_combine_template, question_prompt=q_prompt)
|
||||
qa_chain = load_qa_chain(
|
||||
llm=llm, chain_type="map_reduce", combine_prompt=chat_combine_template, question_prompt=q_prompt
|
||||
)
|
||||
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=3)
|
||||
result = chain({"query": question})
|
||||
|
||||
@@ -301,13 +397,54 @@ def api_answer():
|
||||
|
||||
# some formatting for the frontend
|
||||
if "result" in result:
|
||||
result['answer'] = result['result']
|
||||
result['answer'] = result['answer'].replace("\\n", "\n")
|
||||
result["answer"] = result["result"]
|
||||
result["answer"] = result["answer"].replace("\\n", "\n")
|
||||
try:
|
||||
result['answer'] = result['answer'].split("SOURCES:")[0]
|
||||
result["answer"] = result["answer"].split("SOURCES:")[0]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
sources = docsearch.similarity_search(question, k=2)
|
||||
sources_doc = []
|
||||
for doc in sources:
|
||||
if doc.metadata:
|
||||
sources_doc.append({'title': doc.metadata['title'], 'text': doc.page_content})
|
||||
else:
|
||||
sources_doc.append({'title': doc.page_content, 'text': doc.page_content})
|
||||
result['sources'] = sources_doc
|
||||
|
||||
# generate conversationId
|
||||
if conversation_id is not None:
|
||||
conversations_collection.update_one(
|
||||
{"_id": ObjectId(conversation_id)},
|
||||
{"$push": {"queries": {"prompt": question,
|
||||
"response": result["answer"], "sources": result['sources']}}},
|
||||
)
|
||||
|
||||
else:
|
||||
# create new conversation
|
||||
# generate summary
|
||||
messages_summary = [AIMessage(content="Summarise following conversation in no more than 3 " +
|
||||
"words, respond ONLY with the summary, use the same " +
|
||||
"language as the system \n\nUser: " + question + "\n\nAI: " +
|
||||
result["answer"]),
|
||||
HumanMessage(content="Summarise following conversation in no more than 3 words, " +
|
||||
"respond ONLY with the summary, use the same language as the " +
|
||||
"system")]
|
||||
|
||||
|
||||
# completion = openai.ChatCompletion.create(model='gpt-3.5-turbo', engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
# messages=messages_summary, max_tokens=30, temperature=0)
|
||||
completion = llm.predict_messages(messages_summary)
|
||||
conversation_id = conversations_collection.insert_one(
|
||||
{"user": "local",
|
||||
"date": datetime.datetime.utcnow(),
|
||||
"name": completion.content,
|
||||
"queries": [{"prompt": question, "response": result["answer"], "sources": result['sources']}]}
|
||||
).inserted_id
|
||||
|
||||
result["conversation_id"] = str(conversation_id)
|
||||
|
||||
# mock result
|
||||
# result = {
|
||||
# "answer": "The answer is 42",
|
||||
@@ -327,16 +464,16 @@ def check_docs():
|
||||
data = request.get_json()
|
||||
# split docs on / and take first part
|
||||
if data["docs"].split("/")[0] == "local":
|
||||
return {"status": 'exists'}
|
||||
return {"status": "exists"}
|
||||
vectorstore = "vectors/" + data["docs"]
|
||||
base_path = 'https://raw.githubusercontent.com/arc53/DocsHUB/main/'
|
||||
base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"
|
||||
if os.path.exists(vectorstore) or data["docs"] == "default":
|
||||
return {"status": 'exists'}
|
||||
return {"status": "exists"}
|
||||
else:
|
||||
r = requests.get(base_path + vectorstore + "index.faiss")
|
||||
|
||||
if r.status_code != 200:
|
||||
return {"status": 'null'}
|
||||
return {"status": "null"}
|
||||
else:
|
||||
if not os.path.exists(vectorstore):
|
||||
os.makedirs(vectorstore)
|
||||
@@ -348,7 +485,7 @@ def check_docs():
|
||||
with open(vectorstore + "index.pkl", "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
return {"status": 'loaded'}
|
||||
return {"status": "loaded"}
|
||||
|
||||
|
||||
@app.route("/api/feedback", methods=["POST"])
|
||||
@@ -358,189 +495,225 @@ def api_feedback():
|
||||
answer = data["answer"]
|
||||
feedback = data["feedback"]
|
||||
|
||||
print('-' * 5)
|
||||
print("-" * 5)
|
||||
print("Question: " + question)
|
||||
print("Answer: " + answer)
|
||||
print("Feedback: " + feedback)
|
||||
print('-' * 5)
|
||||
print("-" * 5)
|
||||
response = requests.post(
|
||||
url="https://86x89umx77.execute-api.eu-west-2.amazonaws.com/docsgpt-feedback",
|
||||
headers={
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
},
|
||||
data=json.dumps({
|
||||
"answer": answer,
|
||||
"question": question,
|
||||
"feedback": feedback
|
||||
})
|
||||
data=json.dumps({"answer": answer, "question": question, "feedback": feedback}),
|
||||
)
|
||||
return {"status": http.client.responses.get(response.status_code, 'ok')}
|
||||
return {"status": http.client.responses.get(response.status_code, "ok")}
|
||||
|
||||
|
||||
@app.route('/api/combine', methods=['GET'])
|
||||
@app.route("/api/combine", methods=["GET"])
|
||||
def combined_json():
|
||||
user = 'local'
|
||||
user = "local"
|
||||
"""Provide json file with combined available indexes."""
|
||||
# get json from https://d3dg1063dc54p9.cloudfront.net/combined.json
|
||||
|
||||
data = [{
|
||||
"name": 'default',
|
||||
"language": 'default',
|
||||
"version": '',
|
||||
"description": 'default',
|
||||
"fullName": 'default',
|
||||
"date": 'default',
|
||||
"docLink": 'default',
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local"
|
||||
}]
|
||||
data = [
|
||||
{
|
||||
"name": "default",
|
||||
"language": "default",
|
||||
"version": "",
|
||||
"description": "default",
|
||||
"fullName": "default",
|
||||
"date": "default",
|
||||
"docLink": "default",
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local",
|
||||
}
|
||||
]
|
||||
# structure: name, language, version, description, fullName, date, docLink
|
||||
# append data from vectors_collection
|
||||
for index in vectors_collection.find({'user': user}):
|
||||
data.append({
|
||||
"name": index['name'],
|
||||
"language": index['language'],
|
||||
"version": '',
|
||||
"description": index['name'],
|
||||
"fullName": index['name'],
|
||||
"date": index['date'],
|
||||
"docLink": index['location'],
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local"
|
||||
})
|
||||
for index in vectors_collection.find({"user": user}):
|
||||
data.append(
|
||||
{
|
||||
"name": index["name"],
|
||||
"language": index["language"],
|
||||
"version": "",
|
||||
"description": index["name"],
|
||||
"fullName": index["name"],
|
||||
"date": index["date"],
|
||||
"docLink": index["location"],
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local",
|
||||
}
|
||||
)
|
||||
|
||||
data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
|
||||
for index in data_remote:
|
||||
index['location'] = "remote"
|
||||
index["location"] = "remote"
|
||||
data.append(index)
|
||||
|
||||
return jsonify(data)
|
||||
|
||||
|
||||
@app.route('/api/upload', methods=['POST'])
|
||||
@app.route("/api/upload", methods=["POST"])
|
||||
def upload_file():
|
||||
"""Upload a file to get vectorized and indexed."""
|
||||
if 'user' not in request.form:
|
||||
return {"status": 'no user'}
|
||||
user = secure_filename(request.form['user'])
|
||||
if 'name' not in request.form:
|
||||
return {"status": 'no name'}
|
||||
job_name = secure_filename(request.form['name'])
|
||||
if "user" not in request.form:
|
||||
return {"status": "no user"}
|
||||
user = secure_filename(request.form["user"])
|
||||
if "name" not in request.form:
|
||||
return {"status": "no name"}
|
||||
job_name = secure_filename(request.form["name"])
|
||||
# check if the post request has the file part
|
||||
if 'file' not in request.files:
|
||||
print('No file part')
|
||||
return {"status": 'no file'}
|
||||
file = request.files['file']
|
||||
if file.filename == '':
|
||||
return {"status": 'no file name'}
|
||||
if "file" not in request.files:
|
||||
print("No file part")
|
||||
return {"status": "no file"}
|
||||
file = request.files["file"]
|
||||
if file.filename == "":
|
||||
return {"status": "no file name"}
|
||||
|
||||
if file:
|
||||
filename = secure_filename(file.filename)
|
||||
# save dir
|
||||
save_dir = os.path.join(app.config['UPLOAD_FOLDER'], user, job_name)
|
||||
save_dir = os.path.join(app.config["UPLOAD_FOLDER"], user, job_name)
|
||||
# create dir if not exists
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
|
||||
file.save(os.path.join(save_dir, filename))
|
||||
task = ingest.delay('temp', [".rst", ".md", ".pdf", ".txt"], job_name, filename, user)
|
||||
task = ingest.delay("temp", [".rst", ".md", ".pdf", ".txt"], job_name, filename, user)
|
||||
# task id
|
||||
task_id = task.id
|
||||
return {"status": 'ok', "task_id": task_id}
|
||||
return {"status": "ok", "task_id": task_id}
|
||||
else:
|
||||
return {"status": 'error'}
|
||||
return {"status": "error"}
|
||||
|
||||
|
||||
@app.route('/api/task_status', methods=['GET'])
|
||||
@app.route("/api/task_status", methods=["GET"])
|
||||
def task_status():
|
||||
"""Get celery job status."""
|
||||
task_id = request.args.get('task_id')
|
||||
task_id = request.args.get("task_id")
|
||||
task = AsyncResult(task_id)
|
||||
task_meta = task.info
|
||||
return {"status": task.status, "result": task_meta}
|
||||
|
||||
|
||||
### Backgound task api
|
||||
@app.route('/api/upload_index', methods=['POST'])
|
||||
@app.route("/api/upload_index", methods=["POST"])
|
||||
def upload_index_files():
|
||||
"""Upload two files(index.faiss, index.pkl) to the user's folder."""
|
||||
if 'user' not in request.form:
|
||||
return {"status": 'no user'}
|
||||
user = secure_filename(request.form['user'])
|
||||
if 'name' not in request.form:
|
||||
return {"status": 'no name'}
|
||||
job_name = secure_filename(request.form['name'])
|
||||
if 'file_faiss' not in request.files:
|
||||
print('No file part')
|
||||
return {"status": 'no file'}
|
||||
file_faiss = request.files['file_faiss']
|
||||
if file_faiss.filename == '':
|
||||
return {"status": 'no file name'}
|
||||
if 'file_pkl' not in request.files:
|
||||
print('No file part')
|
||||
return {"status": 'no file'}
|
||||
file_pkl = request.files['file_pkl']
|
||||
if file_pkl.filename == '':
|
||||
return {"status": 'no file name'}
|
||||
if "user" not in request.form:
|
||||
return {"status": "no user"}
|
||||
user = secure_filename(request.form["user"])
|
||||
if "name" not in request.form:
|
||||
return {"status": "no name"}
|
||||
job_name = secure_filename(request.form["name"])
|
||||
if "file_faiss" not in request.files:
|
||||
print("No file part")
|
||||
return {"status": "no file"}
|
||||
file_faiss = request.files["file_faiss"]
|
||||
if file_faiss.filename == "":
|
||||
return {"status": "no file name"}
|
||||
if "file_pkl" not in request.files:
|
||||
print("No file part")
|
||||
return {"status": "no file"}
|
||||
file_pkl = request.files["file_pkl"]
|
||||
if file_pkl.filename == "":
|
||||
return {"status": "no file name"}
|
||||
|
||||
# saves index files
|
||||
save_dir = os.path.join('indexes', user, job_name)
|
||||
save_dir = os.path.join("indexes", user, job_name)
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
file_faiss.save(os.path.join(save_dir, 'index.faiss'))
|
||||
file_pkl.save(os.path.join(save_dir, 'index.pkl'))
|
||||
file_faiss.save(os.path.join(save_dir, "index.faiss"))
|
||||
file_pkl.save(os.path.join(save_dir, "index.pkl"))
|
||||
# create entry in vectors_collection
|
||||
vectors_collection.insert_one({
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"location": save_dir,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": "local"
|
||||
})
|
||||
return {"status": 'ok'}
|
||||
vectors_collection.insert_one(
|
||||
{
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"location": save_dir,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": "local",
|
||||
}
|
||||
)
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.route('/api/download', methods=['get'])
|
||||
@app.route("/api/download", methods=["get"])
|
||||
def download_file():
|
||||
user = secure_filename(request.args.get('user'))
|
||||
job_name = secure_filename(request.args.get('name'))
|
||||
filename = secure_filename(request.args.get('file'))
|
||||
save_dir = os.path.join(app.config['UPLOAD_FOLDER'], user, job_name)
|
||||
user = secure_filename(request.args.get("user"))
|
||||
job_name = secure_filename(request.args.get("name"))
|
||||
filename = secure_filename(request.args.get("file"))
|
||||
save_dir = os.path.join(app.config["UPLOAD_FOLDER"], user, job_name)
|
||||
return send_from_directory(save_dir, filename, as_attachment=True)
|
||||
|
||||
|
||||
@app.route('/api/delete_old', methods=['get'])
|
||||
@app.route("/api/delete_old", methods=["get"])
|
||||
def delete_old():
|
||||
"""Delete old indexes."""
|
||||
import shutil
|
||||
path = request.args.get('path')
|
||||
dirs = path.split('/')
|
||||
|
||||
path = request.args.get("path")
|
||||
dirs = path.split("/")
|
||||
dirs_clean = []
|
||||
for i in range(1, len(dirs)):
|
||||
dirs_clean.append(secure_filename(dirs[i]))
|
||||
# check that path strats with indexes or vectors
|
||||
if dirs[0] not in ['indexes', 'vectors']:
|
||||
return {"status": 'error'}
|
||||
path_clean = '/'.join(dirs)
|
||||
vectors_collection.delete_one({'location': path})
|
||||
if dirs[0] not in ["indexes", "vectors"]:
|
||||
return {"status": "error"}
|
||||
path_clean = "/".join(dirs)
|
||||
vectors_collection.delete_one({"location": path})
|
||||
try:
|
||||
shutil.rmtree(path_clean)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return {"status": 'ok'}
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.route("/api/get_conversations", methods=["get"])
|
||||
def get_conversations():
|
||||
# provides a list of conversations
|
||||
conversations = conversations_collection.find().sort("date", -1)
|
||||
list_conversations = []
|
||||
for conversation in conversations:
|
||||
list_conversations.append({"id": str(conversation["_id"]), "name": conversation["name"]})
|
||||
|
||||
#list_conversations = [{"id": "default", "name": "default"}, {"id": "jeff", "name": "jeff"}]
|
||||
|
||||
return jsonify(list_conversations)
|
||||
|
||||
@app.route("/api/get_single_conversation", methods=["get"])
|
||||
def get_single_conversation():
|
||||
# provides data for a conversation
|
||||
conversation_id = request.args.get("id")
|
||||
conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
|
||||
return jsonify(conversation['queries'])
|
||||
|
||||
@app.route("/api/delete_conversation", methods=["POST"])
|
||||
def delete_conversation():
|
||||
# deletes a conversation from the database
|
||||
conversation_id = request.args.get("id")
|
||||
# write to mongodb
|
||||
conversations_collection.delete_one(
|
||||
{
|
||||
"_id": ObjectId(conversation_id),
|
||||
}
|
||||
)
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# handling CORS
|
||||
@app.after_request
|
||||
def after_request(response):
|
||||
response.headers.add('Access-Control-Allow-Origin', '*')
|
||||
response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
|
||||
response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
|
||||
response.headers.add('Access-Control-Allow-Credentials', 'true')
|
||||
response.headers.add("Access-Control-Allow-Origin", "*")
|
||||
response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization")
|
||||
response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS")
|
||||
response.headers.add("Access-Control-Allow-Credentials", "true")
|
||||
return response
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True, port=5001)
|
||||
app.run(debug=True, port=7091)
|
||||
|
||||
@@ -11,11 +11,16 @@ class Settings(BaseSettings):
|
||||
MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
|
||||
MODEL_PATH: str = "./models/gpt4all-model.bin"
|
||||
TOKENS_MAX_HISTORY: int = 150
|
||||
SELF_HOSTED_MODEL: bool = False
|
||||
|
||||
API_URL: str = "http://localhost:5001" # backend url for celery worker
|
||||
API_URL: str = "http://localhost:7091" # backend url for celery worker
|
||||
|
||||
API_KEY: str = None # LLM api key
|
||||
EMBEDDINGS_KEY: str = None # api key for embeddings (if using openai, just copy API_KEY
|
||||
OPENAI_API_BASE: str = None # azure openai api base url
|
||||
OPENAI_API_VERSION: str = None # azure openai api version
|
||||
AZURE_DEPLOYMENT_NAME: str = None # azure deployment name for answering
|
||||
AZURE_EMBEDDINGS_DEPLOYMENT_NAME: str = None # azure deployment name for embeddings
|
||||
|
||||
|
||||
path = Path(__file__).parent.parent.absolute()
|
||||
|
||||
1
application/parser/file/__init__.py
Normal file
1
application/parser/file/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
@@ -3,7 +3,7 @@ from abc import abstractmethod
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
from parser.schema.base import Document
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
class BaseReader:
|
||||
|
||||
@@ -3,15 +3,15 @@ import logging
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
from parser.file.base import BaseReader
|
||||
from parser.file.base_parser import BaseParser
|
||||
from parser.file.docs_parser import DocxParser, PDFParser
|
||||
from parser.file.epub_parser import EpubParser
|
||||
from parser.file.html_parser import HTMLParser
|
||||
from parser.file.markdown_parser import MarkdownParser
|
||||
from parser.file.rst_parser import RstParser
|
||||
from parser.file.tabular_parser import PandasCSVParser
|
||||
from parser.schema.base import Document
|
||||
from application.parser.file.base import BaseReader
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
from application.parser.file.docs_parser import DocxParser, PDFParser
|
||||
from application.parser.file.epub_parser import EpubParser
|
||||
from application.parser.file.html_parser import HTMLParser
|
||||
from application.parser.file.markdown_parser import MarkdownParser
|
||||
from application.parser.file.rst_parser import RstParser
|
||||
from application.parser.file.tabular_parser import PandasCSVParser
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".pdf": PDFParser(),
|
||||
|
||||
@@ -6,7 +6,7 @@ Contains parsers for docx, pdf files.
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class PDFParser(BaseParser):
|
||||
|
||||
@@ -6,7 +6,7 @@ Contains parsers for epub files.
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class EpubParser(BaseParser):
|
||||
|
||||
@@ -7,7 +7,7 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class HTMLParser(BaseParser):
|
||||
|
||||
@@ -8,7 +8,7 @@ from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
import tiktoken
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class MarkdownParser(BaseParser):
|
||||
|
||||
@@ -7,7 +7,7 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class RstParser(BaseParser):
|
||||
|
||||
@@ -6,7 +6,7 @@ Contains parsers for tabular data files.
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class CSVParser(BaseParser):
|
||||
|
||||
1
application/parser/schema/__init__.py
Normal file
1
application/parser/schema/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
from parser.schema.schema import BaseDocument
|
||||
from application.parser.schema.schema import BaseDocument
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -3,7 +3,7 @@ from math import ceil
|
||||
from typing import List
|
||||
|
||||
import tiktoken
|
||||
from parser.schema.base import Document
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
def separate_header_and_body(text):
|
||||
@@ -25,7 +25,7 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
|
||||
extra_info=doc.extra_info)
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(
|
||||
current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
|
||||
current_group.text)) + doc_len < max_tokens and doc_len < min_tokens:
|
||||
current_group.text += " " + doc.text
|
||||
else:
|
||||
docs.append(current_group)
|
||||
@@ -46,6 +46,9 @@ def split_documents(documents: List[Document], max_tokens: int) -> List[Document
|
||||
docs.append(doc)
|
||||
else:
|
||||
header, body = separate_header_and_body(doc.text)
|
||||
if len(tiktoken.get_encoding("cl100k_base").encode(header)) > max_tokens:
|
||||
body = doc.text
|
||||
header = ""
|
||||
num_body_parts = ceil(token_length / max_tokens)
|
||||
part_length = ceil(len(body) / num_body_parts)
|
||||
body_parts = [body[i:i + part_length] for i in range(0, len(body), part_length)]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
aiodns==3.0.0
|
||||
aiohttp==3.8.4
|
||||
aiohttp==3.8.5
|
||||
aiohttp-retry==2.8.3
|
||||
aiosignal==1.3.1
|
||||
aleph-alpha-client==2.16.1
|
||||
@@ -8,40 +8,39 @@ async-timeout==4.0.2
|
||||
attrs==22.2.0
|
||||
billiard==3.6.4.0
|
||||
blobfile==2.0.1
|
||||
boto3==1.26.102
|
||||
botocore==1.29.102
|
||||
boto3==1.28.20
|
||||
celery==5.2.7
|
||||
cffi==1.15.1
|
||||
charset-normalizer==3.1.0
|
||||
click==8.1.3
|
||||
click-didyoumean==0.3.0
|
||||
click-plugins==1.1.1
|
||||
click-repl==0.2.0
|
||||
cryptography==39.0.2
|
||||
cryptography==41.0.3
|
||||
dataclasses-json==0.5.7
|
||||
decorator==5.1.1
|
||||
deeplake==3.2.13
|
||||
dill==0.3.6
|
||||
dnspython==2.3.0
|
||||
ecdsa==0.18.0
|
||||
entrypoints==0.4
|
||||
faiss-cpu==1.7.3
|
||||
filelock==3.9.0
|
||||
Flask==2.2.3
|
||||
Flask==2.2.5
|
||||
Flask-Cors==3.0.10
|
||||
frozenlist==1.3.3
|
||||
geojson==2.5.0
|
||||
gunicorn==20.1.0
|
||||
greenlet==2.0.2
|
||||
gpt4all==0.1.7
|
||||
hub==3.0.1
|
||||
huggingface-hub==0.12.1
|
||||
humbug==0.2.8
|
||||
huggingface-hub==0.15.1
|
||||
humbug==0.3.2
|
||||
idna==3.4
|
||||
itsdangerous==2.1.2
|
||||
Jinja2==3.1.2
|
||||
jmespath==1.0.1
|
||||
joblib==1.2.0
|
||||
kombu==5.2.4
|
||||
langchain==0.0.179
|
||||
langchain==0.0.263
|
||||
loguru==0.6.0
|
||||
lxml==4.9.2
|
||||
MarkupSafe==2.1.2
|
||||
@@ -52,10 +51,11 @@ multidict==6.0.4
|
||||
multiprocess==0.70.14
|
||||
mypy-extensions==1.0.0
|
||||
networkx==3.0
|
||||
npx
|
||||
nltk==3.8.1
|
||||
numcodecs==0.11.0
|
||||
numpy==1.24.2
|
||||
openai==0.27.0
|
||||
openai==0.27.8
|
||||
packaging==23.0
|
||||
pathos==0.3.0
|
||||
Pillow==9.4.0
|
||||
@@ -73,6 +73,7 @@ pymongo==4.3.3
|
||||
pyowm==3.3.0
|
||||
PyPDF2==3.0.1
|
||||
PySocks==1.7.1
|
||||
pytest
|
||||
python-dateutil==2.8.2
|
||||
python-dotenv==1.0.0
|
||||
python-jose==3.3.0
|
||||
@@ -80,23 +81,20 @@ pytz==2022.7.1
|
||||
PyYAML==6.0
|
||||
redis==4.5.4
|
||||
regex==2022.10.31
|
||||
requests==2.28.2
|
||||
requests==2.31.0
|
||||
retry==0.9.2
|
||||
rsa==4.9
|
||||
s3transfer==0.6.0
|
||||
scikit-learn==1.2.2
|
||||
scipy==1.10.1
|
||||
sentence-transformers==2.2.2
|
||||
sentencepiece==0.1.97
|
||||
sentencepiece
|
||||
six==1.16.0
|
||||
SQLAlchemy==1.4.46
|
||||
sympy==1.11.1
|
||||
tenacity==8.2.2
|
||||
threadpoolctl==3.1.0
|
||||
torch==2.0.0
|
||||
torchvision==0.15.1
|
||||
tiktoken
|
||||
tqdm==4.65.0
|
||||
transformers==4.27.2
|
||||
transformers==4.30.0
|
||||
typer==0.7.0
|
||||
typing-inspect==0.8.0
|
||||
typing_extensions==4.5.0
|
||||
|
||||
@@ -7,11 +7,11 @@ from urllib.parse import urljoin
|
||||
import nltk
|
||||
import requests
|
||||
|
||||
from core.settings import settings
|
||||
from parser.file.bulk import SimpleDirectoryReader
|
||||
from parser.open_ai_func import call_openai_api
|
||||
from parser.schema.base import Document
|
||||
from parser.token_func import group_split
|
||||
from application.core.settings import settings
|
||||
from application.parser.file.bulk import SimpleDirectoryReader
|
||||
from application.parser.open_ai_func import call_openai_api
|
||||
from application.parser.schema.base import Document
|
||||
from application.parser.token_func import group_split
|
||||
|
||||
try:
|
||||
nltk.download('punkt', quiet=True)
|
||||
@@ -19,9 +19,11 @@ try:
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
|
||||
def metadata_from_filename(title):
|
||||
return {'title': title}
|
||||
|
||||
|
||||
def generate_random_string(length):
|
||||
return ''.join([string.ascii_letters[i % 52] for i in range(length)])
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from app import app
|
||||
from application.app import app
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True, port=5001)
|
||||
app.run(debug=True, port=7091)
|
||||
|
||||
71
docker-compose-azure.yaml
Normal file
71
docker-compose-azure.yaml
Normal file
@@ -0,0 +1,71 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:7091
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
ports:
|
||||
- "5173:5173"
|
||||
depends_on:
|
||||
- backend
|
||||
|
||||
backend:
|
||||
build: ./application
|
||||
environment:
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- OPENAI_API_KEY=$OPENAI_API_KEY
|
||||
- OPENAI_API_BASE=$OPENAI_API_BASE
|
||||
- OPENAI_API_VERSION=$OPENAI_API_VERSION
|
||||
- AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME
|
||||
- AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME
|
||||
ports:
|
||||
- "7091:7091"
|
||||
volumes:
|
||||
- ./application/indexes:/app/application/indexes
|
||||
- ./application/inputs:/app/application/inputs
|
||||
- ./application/vectors:/app/application/vectors
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
worker:
|
||||
build: ./application
|
||||
command: celery -A application.app.celery worker -l INFO
|
||||
environment:
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- API_URL=http://backend:7091
|
||||
- OPENAI_API_KEY=$OPENAI_API_KEY
|
||||
- OPENAI_API_BASE=$OPENAI_API_BASE
|
||||
- OPENAI_API_VERSION=$OPENAI_API_VERSION
|
||||
- AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME
|
||||
- AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
redis:
|
||||
image: redis:6-alpine
|
||||
ports:
|
||||
- 6379:6379
|
||||
|
||||
mongo:
|
||||
image: mongo:6
|
||||
ports:
|
||||
- 27017:27017
|
||||
volumes:
|
||||
- mongodb_data_container:/data/db
|
||||
|
||||
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
@@ -4,7 +4,7 @@ services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:5001
|
||||
- VITE_API_HOST=http://localhost:7091
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
ports:
|
||||
- "5173:5173"
|
||||
@@ -19,29 +19,30 @@ services:
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- SELF_HOSTED_MODEL=$SELF_HOSTED_MODEL
|
||||
ports:
|
||||
- "5001:5001"
|
||||
- "7091:7091"
|
||||
volumes:
|
||||
- ./application/indexes:/app/indexes
|
||||
- ./application/inputs:/app/inputs
|
||||
- ./application/vectors:/app/vectors
|
||||
- ./application/indexes:/app/application/indexes
|
||||
- ./application/inputs:/app/application/inputs
|
||||
- ./application/vectors:/app/application/vectors
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
worker:
|
||||
build: ./application
|
||||
command: celery -A app.celery worker -l INFO
|
||||
command: celery -A application.app.celery worker -l INFO
|
||||
environment:
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- API_URL=http://backend:5001
|
||||
- API_URL=http://backend:7091
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
redis:
|
||||
image: redis:6-alpine
|
||||
@@ -55,7 +56,5 @@ services:
|
||||
volumes:
|
||||
- mongodb_data_container:/data/db
|
||||
|
||||
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
mongodb_data_container:
|
||||
|
||||
@@ -21,7 +21,7 @@ document.getElementById("message-form").addEventListener("submit", function(even
|
||||
}
|
||||
|
||||
// send post request to server http://127.0.0.1:5000/ with message in json body
|
||||
fetch('http://127.0.0.1:5001/api/answer', {
|
||||
fetch('http://127.0.0.1:7091/api/answer', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
|
||||
@@ -11,7 +11,7 @@ dotenv.load_dotenv()
|
||||
# Replace 'YOUR_BOT_TOKEN' with your bot's token
|
||||
TOKEN = os.getenv("DISCORD_TOKEN")
|
||||
PREFIX = '@DocsGPT'
|
||||
BASE_API_URL = 'http://localhost:5001'
|
||||
BASE_API_URL = 'http://localhost:7091'
|
||||
|
||||
intents = discord.Intents.default()
|
||||
intents.message_content = True
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
const API_ENDPOINT = "http://localhost:5001/api/answer"; // Replace with your API endpoint
|
||||
const API_ENDPOINT = "http://localhost:7091/api/answer"; // Replace with your API endpoint
|
||||
|
||||
const widgetInitMessage = document.getElementById("docsgpt-init-message");
|
||||
const widgetAnswerMessage = document.getElementById("docsgpt-answer");
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
# Please put appropriate value
|
||||
VITE_API_HOST=http://localhost:5001
|
||||
VITE_API_HOST=http://localhost:7091
|
||||
14
frontend/package-lock.json
generated
14
frontend/package-lock.json
generated
@@ -42,7 +42,7 @@
|
||||
"prettier-plugin-tailwindcss": "^0.2.2",
|
||||
"tailwindcss": "^3.2.4",
|
||||
"typescript": "^4.9.5",
|
||||
"vite": "^4.1.0",
|
||||
"vite": "^4.1.5",
|
||||
"vite-plugin-svgr": "^2.4.0"
|
||||
}
|
||||
},
|
||||
@@ -7207,9 +7207,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "4.1.4",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-4.1.4.tgz",
|
||||
"integrity": "sha512-3knk/HsbSTKEin43zHu7jTwYWv81f8kgAL99G5NWBcA1LKvtvcVAC4JjBH1arBunO9kQka+1oGbrMKOjk4ZrBg==",
|
||||
"version": "4.1.5",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-4.1.5.tgz",
|
||||
"integrity": "sha512-zJ0RiVkf61kpd7O+VtU6r766xgnTaIknP/lR6sJTZq3HtVJ3HGnTo5DaJhTUtYoTyS/CQwZ6yEVdc/lrmQT7dQ==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"esbuild": "^0.16.14",
|
||||
@@ -7320,9 +7320,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/word-wrap": {
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.3.tgz",
|
||||
"integrity": "sha512-Hz/mrNwitNRh/HUAtM/VT/5VH+ygD6DV7mYKZAtHOrbs8U7lvPS6xf7EJKMF0uW1KJCl0H701g3ZGus+muE5vQ==",
|
||||
"version": "1.2.4",
|
||||
"resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.4.tgz",
|
||||
"integrity": "sha512-2V81OA4ugVo5pRo46hAoD2ivUJx8jXmWXfUkY4KFNw0hEptvN0QfH3K4nHiwzGeKl5rFKedV48QVoqYavy4YpA==",
|
||||
"dev": true,
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
|
||||
@@ -53,7 +53,7 @@
|
||||
"prettier-plugin-tailwindcss": "^0.2.2",
|
||||
"tailwindcss": "^3.2.4",
|
||||
"typescript": "^4.9.5",
|
||||
"vite": "^4.1.0",
|
||||
"vite": "^4.1.5",
|
||||
"vite-plugin-svgr": "^2.4.0"
|
||||
}
|
||||
}
|
||||
|
||||
47
frontend/src/Modal/index.tsx
Normal file
47
frontend/src/Modal/index.tsx
Normal file
@@ -0,0 +1,47 @@
|
||||
import * as React from 'react';
|
||||
|
||||
interface ModalProps {
|
||||
handleSubmit: () => void;
|
||||
isCancellable: boolean;
|
||||
handleCancel?: () => void;
|
||||
render: () => JSX.Element;
|
||||
modalState: string;
|
||||
isError: boolean;
|
||||
errorMessage?: string;
|
||||
}
|
||||
const Modal = (props: ModalProps) => {
|
||||
return (
|
||||
<div
|
||||
className={`${
|
||||
props.modalState === 'ACTIVE' ? 'visible' : 'hidden'
|
||||
} absolute z-30 h-screen w-screen bg-gray-alpha`}
|
||||
>
|
||||
{props.render()}
|
||||
<div className=" mx-auto flex w-[90vw] max-w-lg flex-row-reverse rounded-lg bg-white pb-5 pr-5 shadow-lg">
|
||||
<div>
|
||||
<button
|
||||
onClick={() => props.handleSubmit()}
|
||||
className="ml-auto h-10 w-20 rounded-lg bg-violet-800 text-white transition-all hover:bg-violet-700"
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
{props.isCancellable && (
|
||||
<button
|
||||
onClick={() => props.handleCancel && props.handleCancel()}
|
||||
className="ml-5 h-10 w-20 rounded-lg border border-violet-700 bg-white text-violet-800 transition-all hover:bg-violet-700 hover:text-white"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{props.isError && (
|
||||
<p className="mx-auto mt-2 mr-auto text-sm text-red-500">
|
||||
{props.errorMessage}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default Modal;
|
||||
@@ -19,10 +19,17 @@ import {
|
||||
selectSelectedDocsStatus,
|
||||
selectSourceDocs,
|
||||
setSelectedDocs,
|
||||
selectConversations,
|
||||
setConversations,
|
||||
selectConversationId,
|
||||
} from './preferences/preferenceSlice';
|
||||
import {
|
||||
setConversation,
|
||||
updateConversationId,
|
||||
} from './conversation/conversationSlice';
|
||||
import { useOutsideAlerter } from './hooks';
|
||||
import Upload from './upload/Upload';
|
||||
import { Doc } from './preferences/preferenceApi';
|
||||
import { Doc, getConversations } from './preferences/preferenceApi';
|
||||
|
||||
export default function Navigation({
|
||||
navState,
|
||||
@@ -34,6 +41,8 @@ export default function Navigation({
|
||||
const dispatch = useDispatch();
|
||||
const docs = useSelector(selectSourceDocs);
|
||||
const selectedDocs = useSelector(selectSelectedDocs);
|
||||
const conversations = useSelector(selectConversations);
|
||||
const conversationId = useSelector(selectConversationId);
|
||||
|
||||
const [isDocsListOpen, setIsDocsListOpen] = useState(false);
|
||||
|
||||
@@ -51,6 +60,33 @@ export default function Navigation({
|
||||
const navRef = useRef(null);
|
||||
const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
|
||||
|
||||
useEffect(() => {
|
||||
if (!conversations) {
|
||||
getConversations()
|
||||
.then((fetchedConversations) => {
|
||||
dispatch(setConversations(fetchedConversations));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Failed to fetch conversations: ', error);
|
||||
});
|
||||
}
|
||||
}, [conversations, dispatch]);
|
||||
|
||||
const handleDeleteConversation = (id: string) => {
|
||||
fetch(`${apiHost}/api/delete_conversation?id=${id}`, {
|
||||
method: 'POST',
|
||||
})
|
||||
.then(() => {
|
||||
// remove the image element from the DOM
|
||||
const imageElement = document.querySelector(
|
||||
`#img-${id}`,
|
||||
) as HTMLElement;
|
||||
const parentElement = imageElement.parentNode as HTMLElement;
|
||||
parentElement.parentNode?.removeChild(parentElement);
|
||||
})
|
||||
.catch((error) => console.error(error));
|
||||
};
|
||||
|
||||
const handleDeleteClick = (index: number, doc: Doc) => {
|
||||
const docPath = 'indexes/' + 'local' + '/' + doc.name;
|
||||
|
||||
@@ -67,6 +103,22 @@ export default function Navigation({
|
||||
})
|
||||
.catch((error) => console.error(error));
|
||||
};
|
||||
|
||||
const handleConversationClick = (index: string) => {
|
||||
// fetch the conversation from the server and setConversation in the store
|
||||
fetch(`${apiHost}/api/get_single_conversation?id=${index}`, {
|
||||
method: 'GET',
|
||||
})
|
||||
.then((response) => response.json())
|
||||
.then((data) => {
|
||||
dispatch(setConversation(data));
|
||||
dispatch(
|
||||
updateConversationId({
|
||||
query: { conversationId: index },
|
||||
}),
|
||||
);
|
||||
});
|
||||
};
|
||||
useOutsideAlerter(
|
||||
navRef,
|
||||
() => {
|
||||
@@ -121,15 +173,56 @@ export default function Navigation({
|
||||
</div>
|
||||
<NavLink
|
||||
to={'/'}
|
||||
onClick={() => {
|
||||
dispatch(setConversation([]));
|
||||
dispatch(updateConversationId({ query: { conversationId: null } }));
|
||||
}}
|
||||
className={({ isActive }) =>
|
||||
`${
|
||||
isActive ? 'bg-gray-3000' : ''
|
||||
isActive && conversationId === null ? 'bg-gray-3000' : ''
|
||||
} my-auto mx-4 mt-4 flex h-12 cursor-pointer gap-4 rounded-md hover:bg-gray-100`
|
||||
}
|
||||
>
|
||||
<img src={Message} className="ml-2 w-5"></img>
|
||||
<p className="my-auto text-eerie-black">Chat</p>
|
||||
<p className="my-auto text-eerie-black">New Chat</p>
|
||||
</NavLink>
|
||||
<div className="conversations-container max-h-[25rem] overflow-y-auto">
|
||||
{conversations
|
||||
? conversations.map((conversation) => {
|
||||
return (
|
||||
<div
|
||||
key={conversation.id}
|
||||
onClick={() => {
|
||||
handleConversationClick(conversation.id);
|
||||
}}
|
||||
className={`my-auto mx-4 mt-4 flex h-12 cursor-pointer items-center justify-between gap-4 rounded-md hover:bg-gray-100 ${
|
||||
conversationId === conversation.id ? 'bg-gray-100' : ''
|
||||
}`}
|
||||
>
|
||||
<div className="flex gap-4">
|
||||
<img src={Message} className="ml-2 w-5"></img>
|
||||
<p className="my-auto text-eerie-black">
|
||||
{conversation.name}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{conversationId === conversation.id ? (
|
||||
<img
|
||||
src={Exit}
|
||||
alt="Exit"
|
||||
className="mr-4 h-3 w-3 cursor-pointer hover:opacity-50"
|
||||
id={`img-${conversation.id}`}
|
||||
onClick={(event) => {
|
||||
event.stopPropagation();
|
||||
handleDeleteConversation(conversation.id);
|
||||
}}
|
||||
/>
|
||||
) : null}
|
||||
</div>
|
||||
);
|
||||
})
|
||||
: null}
|
||||
</div>
|
||||
|
||||
<div className="flex-grow border-b-2 border-gray-100"></div>
|
||||
<div className="flex flex-col-reverse border-b-2">
|
||||
|
||||
@@ -60,6 +60,7 @@ export default function Conversation() {
|
||||
key={`${index}ANSWER`}
|
||||
message={query.response}
|
||||
type={'ANSWER'}
|
||||
sources={query.sources}
|
||||
feedback={query.feedback}
|
||||
handleFeedback={(feedback: FEEDBACK) =>
|
||||
handleFeedback(query, feedback, index)
|
||||
@@ -70,6 +71,12 @@ export default function Conversation() {
|
||||
return responseView;
|
||||
};
|
||||
|
||||
const handlePaste = (e: React.ClipboardEvent) => {
|
||||
e.preventDefault();
|
||||
const text = e.clipboardData.getData('text/plain');
|
||||
document.execCommand('insertText', false, text);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex justify-center p-4">
|
||||
{queries.length > 0 && (
|
||||
@@ -83,6 +90,7 @@ export default function Conversation() {
|
||||
key={`${index}QUESTION`}
|
||||
message={query.prompt}
|
||||
type="QUESTION"
|
||||
sources={query.sources}
|
||||
></ConversationBubble>
|
||||
{prepResponseView(query, index)}
|
||||
</Fragment>
|
||||
@@ -96,6 +104,7 @@ export default function Conversation() {
|
||||
<div
|
||||
ref={inputRef}
|
||||
contentEditable
|
||||
onPaste={handlePaste}
|
||||
className={`border-000000 overflow-x-hidden; max-h-24 min-h-[2.6rem] w-full overflow-y-auto whitespace-pre-wrap rounded-xl border bg-white py-2 pl-4 pr-9 leading-7 opacity-100 focus:outline-none`}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
|
||||
@@ -8,6 +8,8 @@ import ReactMarkdown from 'react-markdown';
|
||||
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
|
||||
import { vscDarkPlus } from 'react-syntax-highlighter/dist/cjs/styles/prism';
|
||||
|
||||
const DisableSourceFE = import.meta.env.VITE_DISABLE_SOURCE_FE || false;
|
||||
|
||||
const ConversationBubble = forwardRef<
|
||||
HTMLDivElement,
|
||||
{
|
||||
@@ -16,12 +18,14 @@ const ConversationBubble = forwardRef<
|
||||
className?: string;
|
||||
feedback?: FEEDBACK;
|
||||
handleFeedback?: (feedback: FEEDBACK) => void;
|
||||
sources?: { title: string; text: string }[];
|
||||
}
|
||||
>(function ConversationBubble(
|
||||
{ message, type, className, feedback, handleFeedback },
|
||||
{ message, type, className, feedback, handleFeedback, sources },
|
||||
ref,
|
||||
) {
|
||||
const [showFeedback, setShowFeedback] = useState(false);
|
||||
const [openSource, setOpenSource] = useState<number | null>(null);
|
||||
const List = ({
|
||||
ordered,
|
||||
children,
|
||||
@@ -37,7 +41,7 @@ const ConversationBubble = forwardRef<
|
||||
if (type === 'QUESTION') {
|
||||
bubble = (
|
||||
<div ref={ref} className={`flex flex-row-reverse self-end ${className}`}>
|
||||
<Avatar className="mt-4 text-2xl" avatar="🧑💻"></Avatar>
|
||||
<Avatar className="mt-2 text-2xl" avatar="🧑💻"></Avatar>
|
||||
<div className="mr-2 ml-10 flex items-center rounded-3xl bg-blue-1000 p-3.5 text-white">
|
||||
<ReactMarkdown className="whitespace-pre-wrap break-words">
|
||||
{message}
|
||||
@@ -49,85 +53,118 @@ const ConversationBubble = forwardRef<
|
||||
bubble = (
|
||||
<div
|
||||
ref={ref}
|
||||
className={`flex self-start ${className}`}
|
||||
className={`flex self-start ${className} flex-col`}
|
||||
onMouseEnter={() => setShowFeedback(true)}
|
||||
onMouseLeave={() => setShowFeedback(false)}
|
||||
>
|
||||
<Avatar className="mt-4 text-2xl" avatar="🦖"></Avatar>
|
||||
<div
|
||||
className={`ml-2 mr-5 flex items-center rounded-3xl bg-gray-1000 p-3.5 ${
|
||||
type === 'ERROR'
|
||||
? ' rounded-lg border border-red-2000 bg-red-1000 p-2 text-red-3000'
|
||||
: ''
|
||||
}`}
|
||||
>
|
||||
{type === 'ERROR' && (
|
||||
<img src={Alert} alt="alert" className="mr-2 inline" />
|
||||
)}
|
||||
<ReactMarkdown
|
||||
className="whitespace-pre-wrap break-words"
|
||||
components={{
|
||||
code({ node, inline, className, children, ...props }) {
|
||||
const match = /language-(\w+)/.exec(className || '');
|
||||
|
||||
return !inline && match ? (
|
||||
<SyntaxHighlighter
|
||||
PreTag="div"
|
||||
language={match[1]}
|
||||
{...props}
|
||||
style={vscDarkPlus}
|
||||
>
|
||||
{String(children).replace(/\n$/, '')}
|
||||
</SyntaxHighlighter>
|
||||
) : (
|
||||
<code className={className ? className : ''} {...props}>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
},
|
||||
ul({ node, children }) {
|
||||
return <List>{children}</List>;
|
||||
},
|
||||
ol({ node, children }) {
|
||||
return <List ordered>{children}</List>;
|
||||
},
|
||||
}}
|
||||
<div className="flex self-start">
|
||||
<Avatar className="mt-2 text-2xl" avatar="🦖"></Avatar>
|
||||
<div
|
||||
className={`ml-2 mr-5 flex items-center rounded-3xl bg-gray-1000 p-3.5 ${
|
||||
type === 'ERROR'
|
||||
? ' rounded-lg border border-red-2000 bg-red-1000 p-2 text-red-3000'
|
||||
: ''
|
||||
}`}
|
||||
>
|
||||
{message}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
<div
|
||||
className={`mr-2 flex items-center justify-center ${
|
||||
feedback === 'LIKE' || (type !== 'ERROR' && showFeedback)
|
||||
? ''
|
||||
: 'md:invisible'
|
||||
}`}
|
||||
>
|
||||
<Like
|
||||
className={`cursor-pointer ${
|
||||
feedback === 'LIKE'
|
||||
? 'fill-blue-1000 stroke-blue-1000'
|
||||
: 'fill-none stroke-gray-4000 hover:fill-gray-4000'
|
||||
{type === 'ERROR' && (
|
||||
<img src={Alert} alt="alert" className="mr-2 inline" />
|
||||
)}
|
||||
<ReactMarkdown
|
||||
className="whitespace-pre-wrap break-words"
|
||||
components={{
|
||||
code({ node, inline, className, children, ...props }) {
|
||||
const match = /language-(\w+)/.exec(className || '');
|
||||
|
||||
return !inline && match ? (
|
||||
<SyntaxHighlighter
|
||||
PreTag="div"
|
||||
language={match[1]}
|
||||
{...props}
|
||||
style={vscDarkPlus}
|
||||
>
|
||||
{String(children).replace(/\n$/, '')}
|
||||
</SyntaxHighlighter>
|
||||
) : (
|
||||
<code className={className ? className : ''} {...props}>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
},
|
||||
ul({ node, children }) {
|
||||
return <List>{children}</List>;
|
||||
},
|
||||
ol({ node, children }) {
|
||||
return <List ordered>{children}</List>;
|
||||
},
|
||||
}}
|
||||
>
|
||||
{message}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
<div
|
||||
className={`mr-2 flex items-center justify-center ${
|
||||
feedback === 'LIKE' || (type !== 'ERROR' && showFeedback)
|
||||
? ''
|
||||
: 'md:invisible'
|
||||
}`}
|
||||
onClick={() => handleFeedback?.('LIKE')}
|
||||
></Like>
|
||||
</div>
|
||||
<div
|
||||
className={`mr-10 flex items-center justify-center ${
|
||||
feedback === 'DISLIKE' || (type !== 'ERROR' && showFeedback)
|
||||
? ''
|
||||
: 'md:invisible'
|
||||
}`}
|
||||
>
|
||||
<Dislike
|
||||
className={`cursor-pointer ${
|
||||
feedback === 'DISLIKE'
|
||||
? 'fill-red-2000 stroke-red-2000'
|
||||
: 'fill-none stroke-gray-4000 hover:fill-gray-4000'
|
||||
>
|
||||
<Like
|
||||
className={`cursor-pointer ${
|
||||
feedback === 'LIKE'
|
||||
? 'fill-blue-1000 stroke-blue-1000'
|
||||
: 'fill-none stroke-gray-4000 hover:fill-gray-4000'
|
||||
}`}
|
||||
onClick={() => handleFeedback?.('LIKE')}
|
||||
></Like>
|
||||
</div>
|
||||
<div
|
||||
className={`mr-10 flex items-center justify-center ${
|
||||
feedback === 'DISLIKE' || (type !== 'ERROR' && showFeedback)
|
||||
? ''
|
||||
: 'md:invisible'
|
||||
}`}
|
||||
onClick={() => handleFeedback?.('DISLIKE')}
|
||||
></Dislike>
|
||||
>
|
||||
<Dislike
|
||||
className={`cursor-pointer ${
|
||||
feedback === 'DISLIKE'
|
||||
? 'fill-red-2000 stroke-red-2000'
|
||||
: 'fill-none stroke-gray-4000 hover:fill-gray-4000'
|
||||
}`}
|
||||
onClick={() => handleFeedback?.('DISLIKE')}
|
||||
></Dislike>
|
||||
</div>
|
||||
</div>
|
||||
<div className="ml-8 mt-2 grid w-1/2 grid-cols-3 gap-2">
|
||||
{DisableSourceFE
|
||||
? null
|
||||
: sources?.map((source, index) => (
|
||||
<div
|
||||
key={index}
|
||||
className="w-26 cursor-pointer rounded-xl border border-gray-200 py-1 px-2 hover:bg-gray-100"
|
||||
onClick={() =>
|
||||
setOpenSource(openSource === index ? null : index)
|
||||
}
|
||||
>
|
||||
<p className="truncate text-xs text-gray-500">
|
||||
{index + 1}. {source.title}
|
||||
</p>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{sources && openSource !== null && sources[openSource] && (
|
||||
<div className="ml-8 mt-2 w-3/4 rounded-xl bg-blue-200 p-2">
|
||||
<p className="w-3/4 truncate text-xs text-gray-500">
|
||||
Source: {sources[openSource].title}
|
||||
</p>
|
||||
|
||||
<div className="rounded-xl border-2 border-gray-200 bg-white p-2">
|
||||
<p className="text-xs text-gray-500 ">
|
||||
{sources[openSource].text}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -8,7 +8,24 @@ export function fetchAnswerApi(
|
||||
apiKey: string,
|
||||
selectedDocs: Doc,
|
||||
history: Array<any> = [],
|
||||
): Promise<Answer> {
|
||||
conversationId: string | null,
|
||||
): Promise<
|
||||
| {
|
||||
result: any;
|
||||
answer: any;
|
||||
sources: any;
|
||||
conversationId: any;
|
||||
query: string;
|
||||
}
|
||||
| {
|
||||
result: any;
|
||||
answer: any;
|
||||
sources: any;
|
||||
query: string;
|
||||
conversationId: any;
|
||||
title: any;
|
||||
}
|
||||
> {
|
||||
let namePath = selectedDocs.name;
|
||||
if (selectedDocs.language === namePath) {
|
||||
namePath = '.project';
|
||||
@@ -28,6 +45,10 @@ export function fetchAnswerApi(
|
||||
selectedDocs.model +
|
||||
'/';
|
||||
}
|
||||
//in history array remove all keys except prompt and response
|
||||
history = history.map((item) => {
|
||||
return { prompt: item.prompt, response: item.response };
|
||||
});
|
||||
|
||||
return fetch(apiHost + '/api/answer', {
|
||||
method: 'POST',
|
||||
@@ -40,6 +61,7 @@ export function fetchAnswerApi(
|
||||
embeddings_key: apiKey,
|
||||
history: history,
|
||||
active_docs: docPath,
|
||||
conversation_id: conversationId,
|
||||
}),
|
||||
})
|
||||
.then((response) => {
|
||||
@@ -51,7 +73,13 @@ export function fetchAnswerApi(
|
||||
})
|
||||
.then((data) => {
|
||||
const result = data.answer;
|
||||
return { answer: result, query: question, result };
|
||||
return {
|
||||
answer: result,
|
||||
query: question,
|
||||
result,
|
||||
sources: data.sources,
|
||||
conversationId: data.conversation_id,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
@@ -60,6 +88,7 @@ export function fetchAnswerSteaming(
|
||||
apiKey: string,
|
||||
selectedDocs: Doc,
|
||||
history: Array<any> = [],
|
||||
conversationId: string | null,
|
||||
onEvent: (event: MessageEvent) => void,
|
||||
): Promise<Answer> {
|
||||
let namePath = selectedDocs.name;
|
||||
@@ -82,22 +111,72 @@ export function fetchAnswerSteaming(
|
||||
'/';
|
||||
}
|
||||
|
||||
history = history.map((item) => {
|
||||
return { prompt: item.prompt, response: item.response };
|
||||
});
|
||||
|
||||
return new Promise<Answer>((resolve, reject) => {
|
||||
const url = new URL(apiHost + '/stream');
|
||||
url.searchParams.append('question', question);
|
||||
url.searchParams.append('api_key', apiKey);
|
||||
url.searchParams.append('embeddings_key', apiKey);
|
||||
url.searchParams.append('active_docs', docPath);
|
||||
url.searchParams.append('history', JSON.stringify(history));
|
||||
|
||||
const eventSource = new EventSource(url.href);
|
||||
|
||||
eventSource.onmessage = onEvent;
|
||||
|
||||
eventSource.onerror = (error) => {
|
||||
console.log('Connection failed.');
|
||||
eventSource.close();
|
||||
const body = {
|
||||
question: question,
|
||||
api_key: apiKey,
|
||||
embeddings_key: apiKey,
|
||||
active_docs: docPath,
|
||||
history: JSON.stringify(history),
|
||||
conversation_id: conversationId,
|
||||
};
|
||||
|
||||
fetch(apiHost + '/stream', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
})
|
||||
.then((response) => {
|
||||
if (!response.body) throw Error('No response body');
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder('utf-8');
|
||||
let counterrr = 0;
|
||||
const processStream = ({
|
||||
done,
|
||||
value,
|
||||
}: ReadableStreamReadResult<Uint8Array>) => {
|
||||
if (done) {
|
||||
console.log(counterrr);
|
||||
return;
|
||||
}
|
||||
|
||||
counterrr += 1;
|
||||
|
||||
const chunk = decoder.decode(value);
|
||||
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (let line of lines) {
|
||||
if (line.trim() == '') {
|
||||
continue;
|
||||
}
|
||||
if (line.startsWith('data:')) {
|
||||
line = line.substring(5);
|
||||
}
|
||||
|
||||
const messageEvent: MessageEvent = new MessageEvent('message', {
|
||||
data: line,
|
||||
});
|
||||
|
||||
onEvent(messageEvent); // handle each message
|
||||
}
|
||||
|
||||
reader.read().then(processStream).catch(reject);
|
||||
};
|
||||
|
||||
reader.read().then(processStream).catch(reject);
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Connection failed:', error);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -10,12 +10,16 @@ export interface Message {
|
||||
export interface ConversationState {
|
||||
queries: Query[];
|
||||
status: Status;
|
||||
conversationId: string | null;
|
||||
}
|
||||
|
||||
export interface Answer {
|
||||
answer: string;
|
||||
query: string;
|
||||
result: string;
|
||||
sources: { title: string; text: string }[];
|
||||
conversationId: string | null;
|
||||
title: string | null;
|
||||
}
|
||||
|
||||
export interface Query {
|
||||
@@ -23,4 +27,7 @@ export interface Query {
|
||||
response?: string;
|
||||
feedback?: FEEDBACK;
|
||||
error?: string;
|
||||
sources?: { title: string; text: string }[];
|
||||
conversationId?: string | null;
|
||||
title?: string | null;
|
||||
}
|
||||
|
||||
@@ -2,10 +2,13 @@ import { createAsyncThunk, createSlice, PayloadAction } from '@reduxjs/toolkit';
|
||||
import store from '../store';
|
||||
import { fetchAnswerApi, fetchAnswerSteaming } from './conversationApi';
|
||||
import { Answer, ConversationState, Query, Status } from './conversationModels';
|
||||
import { getConversations } from '../preferences/preferenceApi';
|
||||
import { setConversations } from '../preferences/preferenceSlice';
|
||||
|
||||
const initialState: ConversationState = {
|
||||
queries: [],
|
||||
status: 'idle',
|
||||
conversationId: null,
|
||||
};
|
||||
|
||||
const API_STREAMING = import.meta.env.VITE_API_STREAMING === 'true';
|
||||
@@ -21,6 +24,7 @@ export const fetchAnswer = createAsyncThunk<Answer, { question: string }>(
|
||||
state.preference.apiKey,
|
||||
state.preference.selectedDocs!,
|
||||
state.conversation.queries,
|
||||
state.conversation.conversationId,
|
||||
(event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
|
||||
@@ -28,6 +32,37 @@ export const fetchAnswer = createAsyncThunk<Answer, { question: string }>(
|
||||
if (data.type === 'end') {
|
||||
// set status to 'idle'
|
||||
dispatch(conversationSlice.actions.setStatus('idle'));
|
||||
getConversations()
|
||||
.then((fetchedConversations) => {
|
||||
dispatch(setConversations(fetchedConversations));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Failed to fetch conversations: ', error);
|
||||
});
|
||||
} else if (data.type === 'source') {
|
||||
// check if data.metadata exists
|
||||
let result;
|
||||
if (data.metadata && data.metadata.title) {
|
||||
const titleParts = data.metadata.title.split('/');
|
||||
result = {
|
||||
title: titleParts[titleParts.length - 1],
|
||||
text: data.doc,
|
||||
};
|
||||
} else {
|
||||
result = { title: data.doc, text: data.doc };
|
||||
}
|
||||
dispatch(
|
||||
updateStreamingSource({
|
||||
index: state.conversation.queries.length - 1,
|
||||
query: { sources: [result] },
|
||||
}),
|
||||
);
|
||||
} else if (data.type === 'id') {
|
||||
dispatch(
|
||||
updateConversationId({
|
||||
query: { conversationId: data.id },
|
||||
}),
|
||||
);
|
||||
} else {
|
||||
const result = data.answer;
|
||||
dispatch(
|
||||
@@ -45,19 +80,51 @@ export const fetchAnswer = createAsyncThunk<Answer, { question: string }>(
|
||||
state.preference.apiKey,
|
||||
state.preference.selectedDocs!,
|
||||
state.conversation.queries,
|
||||
state.conversation.conversationId,
|
||||
);
|
||||
if (answer) {
|
||||
let sourcesPrepped = [];
|
||||
sourcesPrepped = answer.sources.map((source: { title: string }) => {
|
||||
if (source && source.title) {
|
||||
const titleParts = source.title.split('/');
|
||||
return {
|
||||
...source,
|
||||
title: titleParts[titleParts.length - 1],
|
||||
};
|
||||
}
|
||||
return source;
|
||||
});
|
||||
|
||||
dispatch(
|
||||
updateQuery({
|
||||
index: state.conversation.queries.length - 1,
|
||||
query: { response: answer.answer },
|
||||
query: { response: answer.answer, sources: sourcesPrepped },
|
||||
}),
|
||||
);
|
||||
dispatch(
|
||||
updateConversationId({
|
||||
query: { conversationId: answer.conversationId },
|
||||
}),
|
||||
);
|
||||
dispatch(conversationSlice.actions.setStatus('idle'));
|
||||
getConversations()
|
||||
.then((fetchedConversations) => {
|
||||
dispatch(setConversations(fetchedConversations));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Failed to fetch conversations: ', error);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return { answer: '', query: question, result: '' };
|
||||
return {
|
||||
conversationId: null,
|
||||
title: null,
|
||||
answer: '',
|
||||
query: question,
|
||||
result: '',
|
||||
sources: [],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
@@ -68,6 +135,9 @@ export const conversationSlice = createSlice({
|
||||
addQuery(state, action: PayloadAction<Query>) {
|
||||
state.queries.push(action.payload);
|
||||
},
|
||||
setConversation(state, action: PayloadAction<Query[]>) {
|
||||
state.queries = action.payload;
|
||||
},
|
||||
updateStreamingQuery(
|
||||
state,
|
||||
action: PayloadAction<{ index: number; query: Partial<Query> }>,
|
||||
@@ -83,6 +153,23 @@ export const conversationSlice = createSlice({
|
||||
};
|
||||
}
|
||||
},
|
||||
updateConversationId(
|
||||
state,
|
||||
action: PayloadAction<{ query: Partial<Query> }>,
|
||||
) {
|
||||
state.conversationId = action.payload.query.conversationId ?? null;
|
||||
},
|
||||
updateStreamingSource(
|
||||
state,
|
||||
action: PayloadAction<{ index: number; query: Partial<Query> }>,
|
||||
) {
|
||||
const index = action.payload.index;
|
||||
if (!state.queries[index].sources) {
|
||||
state.queries[index].sources = [action.payload.query.sources![0]];
|
||||
} else {
|
||||
state.queries[index].sources!.push(action.payload.query.sources![0]);
|
||||
}
|
||||
},
|
||||
updateQuery(
|
||||
state,
|
||||
action: PayloadAction<{ index: number; query: Partial<Query> }>,
|
||||
@@ -116,6 +203,12 @@ export const selectQueries = (state: RootState) => state.conversation.queries;
|
||||
|
||||
export const selectStatus = (state: RootState) => state.conversation.status;
|
||||
|
||||
export const { addQuery, updateQuery, updateStreamingQuery } =
|
||||
conversationSlice.actions;
|
||||
export const {
|
||||
addQuery,
|
||||
updateQuery,
|
||||
updateStreamingQuery,
|
||||
updateConversationId,
|
||||
updateStreamingSource,
|
||||
setConversation,
|
||||
} = conversationSlice.actions;
|
||||
export default conversationSlice.reducer;
|
||||
|
||||
@@ -3,6 +3,7 @@ import { useDispatch, useSelector } from 'react-redux';
|
||||
import { ActiveState } from '../models/misc';
|
||||
import { selectApiKey, setApiKey } from './preferenceSlice';
|
||||
import { useOutsideAlerter } from './../hooks';
|
||||
import Modal from '../Modal';
|
||||
|
||||
export default function APIKeyModal({
|
||||
modalState,
|
||||
@@ -49,53 +50,35 @@ export default function APIKeyModal({
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`${
|
||||
modalState === 'ACTIVE' ? 'visible' : 'hidden'
|
||||
} absolute z-30 h-screen w-screen bg-gray-alpha`}
|
||||
>
|
||||
<article
|
||||
ref={modalRef}
|
||||
className="mx-auto mt-24 flex w-[90vw] max-w-lg flex-col gap-4 rounded-lg bg-white p-6 shadow-lg"
|
||||
>
|
||||
<p className="text-xl text-jet">OpenAI API Key</p>
|
||||
<p className="text-md leading-6 text-gray-500">
|
||||
Before you can start using DocsGPT we need you to provide an API key
|
||||
for llm. Currently, we support only OpenAI but soon many more. You can
|
||||
find it here.
|
||||
</p>
|
||||
<input
|
||||
type="text"
|
||||
className="h-10 w-full border-b-2 border-jet focus:outline-none"
|
||||
value={key}
|
||||
maxLength={100}
|
||||
placeholder="API Key"
|
||||
onChange={(e) => setKey(e.target.value)}
|
||||
/>
|
||||
<div className="flex flex-row-reverse">
|
||||
<div>
|
||||
<button
|
||||
onClick={() => handleSubmit()}
|
||||
className="ml-auto h-10 w-20 rounded-lg bg-violet-800 text-white transition-all hover:bg-violet-700"
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
{isCancellable && (
|
||||
<button
|
||||
onClick={() => handleCancel()}
|
||||
className="ml-5 h-10 w-20 rounded-lg border border-violet-700 bg-white text-violet-800 transition-all hover:bg-violet-700 hover:text-white"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{isError && (
|
||||
<p className="mr-auto text-sm text-red-500">
|
||||
Please enter a valid API key
|
||||
<Modal
|
||||
handleCancel={handleCancel}
|
||||
isError={isError}
|
||||
modalState={modalState}
|
||||
isCancellable={isCancellable}
|
||||
handleSubmit={handleSubmit}
|
||||
render={() => {
|
||||
return (
|
||||
<article
|
||||
ref={modalRef}
|
||||
className="mx-auto mt-24 flex w-[90vw] max-w-lg flex-col gap-4 rounded-lg bg-white p-6 shadow-lg"
|
||||
>
|
||||
<p className="text-xl text-jet">OpenAI API Key</p>
|
||||
<p className="text-md leading-6 text-gray-500">
|
||||
Before you can start using DocsGPT we need you to provide an API
|
||||
key for llm. Currently, we support only OpenAI but soon many more.
|
||||
You can find it here.
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</article>
|
||||
</div>
|
||||
<input
|
||||
type="text"
|
||||
className="h-10 w-full border-b-2 border-jet focus:outline-none"
|
||||
value={key}
|
||||
maxLength={100}
|
||||
placeholder="API Key"
|
||||
onChange={(e) => setKey(e.target.value)}
|
||||
/>
|
||||
</article>
|
||||
);
|
||||
}}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { useEffect, useState } from 'react';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
import { ActiveState } from '../models/misc';
|
||||
import Modal from '../Modal';
|
||||
import {
|
||||
setSelectedDocs,
|
||||
setSourceDocs,
|
||||
@@ -50,85 +51,67 @@ export default function APIKeyModal({
|
||||
|
||||
requestDocs();
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`${
|
||||
modalState === 'ACTIVE' ? 'visible' : 'hidden'
|
||||
} absolute z-30 h-screen w-screen bg-gray-alpha`}
|
||||
>
|
||||
<article className="mx-auto mt-24 flex w-[90vw] max-w-lg flex-col gap-4 rounded-lg bg-white p-6 shadow-lg">
|
||||
<p className="text-xl text-jet">Select Source Documentation</p>
|
||||
<p className="text-lg leading-5 text-gray-500">
|
||||
Please select the library of documentation that you would like to use
|
||||
with our app.
|
||||
</p>
|
||||
<div className="relative">
|
||||
<div
|
||||
className="h-10 w-full cursor-pointer border-b-2"
|
||||
onClick={() => setIsDocsListOpen(!isDocsListOpen)}
|
||||
>
|
||||
{!localSelectedDocs ? (
|
||||
<p className="py-3 text-gray-500">Select</p>
|
||||
) : (
|
||||
<p className="py-3">
|
||||
{localSelectedDocs.name} {localSelectedDocs.version}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
{isDocsListOpen && (
|
||||
<div className="absolute top-10 left-0 max-h-52 w-full overflow-y-scroll bg-white">
|
||||
{docs ? (
|
||||
docs.map((doc, index) => {
|
||||
if (doc.model) {
|
||||
return (
|
||||
<div
|
||||
key={index}
|
||||
onClick={() => {
|
||||
setLocalSelectedDocs(doc);
|
||||
setIsDocsListOpen(false);
|
||||
}}
|
||||
className="h-10 w-full cursor-pointer border-x-2 border-b-2 hover:bg-gray-100"
|
||||
>
|
||||
<p className="ml-5 py-3">
|
||||
{doc.name} {doc.version}
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
})
|
||||
) : (
|
||||
<div className="h-10 w-full cursor-pointer border-x-2 border-b-2 hover:bg-gray-100">
|
||||
<p className="ml-5 py-3">No default documentation.</p>
|
||||
<Modal
|
||||
handleSubmit={handleSubmit}
|
||||
isCancellable={isCancellable}
|
||||
handleCancel={handleCancel}
|
||||
modalState={modalState}
|
||||
errorMessage="Please select Source Documentation"
|
||||
isError={isError}
|
||||
render={() => {
|
||||
return (
|
||||
<article className="mx-auto mt-24 flex w-[90vw] max-w-lg flex-col gap-4 rounded-lg bg-white p-6 shadow-lg">
|
||||
<p className="text-xl text-jet">Select Source Documentation</p>
|
||||
<p className="text-lg leading-5 text-gray-500">
|
||||
Please select the library of documentation that you would like to
|
||||
use with our app.
|
||||
</p>
|
||||
<div className="relative">
|
||||
<div
|
||||
className="h-10 w-full cursor-pointer border-b-2"
|
||||
onClick={() => setIsDocsListOpen(!isDocsListOpen)}
|
||||
>
|
||||
{!localSelectedDocs ? (
|
||||
<p className="py-3 text-gray-500">Select</p>
|
||||
) : (
|
||||
<p className="py-3">
|
||||
{localSelectedDocs.name} {localSelectedDocs.version}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
{isDocsListOpen && (
|
||||
<div className="absolute top-10 left-0 max-h-52 w-full overflow-y-scroll bg-white">
|
||||
{docs ? (
|
||||
docs.map((doc, index) => {
|
||||
if (doc.model) {
|
||||
return (
|
||||
<div
|
||||
key={index}
|
||||
onClick={() => {
|
||||
setLocalSelectedDocs(doc);
|
||||
setIsDocsListOpen(false);
|
||||
}}
|
||||
className="h-10 w-full cursor-pointer border-x-2 border-b-2 hover:bg-gray-100"
|
||||
>
|
||||
<p className="ml-5 py-3">
|
||||
{doc.name} {doc.version}
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
})
|
||||
) : (
|
||||
<div className="h-10 w-full cursor-pointer border-x-2 border-b-2 hover:bg-gray-100">
|
||||
<p className="ml-5 py-3">No default documentation.</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex flex-row-reverse">
|
||||
{isCancellable && (
|
||||
<button
|
||||
onClick={() => handleCancel()}
|
||||
className="ml-5 h-10 w-20 rounded-lg border border-violet-700 bg-white text-violet-800 transition-all hover:bg-violet-700 hover:text-white"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
onClick={() => {
|
||||
handleSubmit();
|
||||
}}
|
||||
className="ml-auto h-10 w-20 rounded-lg bg-violet-800 text-white transition-all hover:bg-violet-700"
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
{isError && (
|
||||
<p className="mr-auto text-sm text-red-500">
|
||||
Please select source documentation.
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</article>
|
||||
</div>
|
||||
</article>
|
||||
);
|
||||
}}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -33,6 +33,29 @@ export async function getDocs(): Promise<Doc[] | null> {
|
||||
}
|
||||
}
|
||||
|
||||
export async function getConversations(): Promise<
|
||||
{ name: string; id: string }[] | null
|
||||
> {
|
||||
try {
|
||||
const apiHost =
|
||||
import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
|
||||
|
||||
const response = await fetch(apiHost + '/api/get_conversations');
|
||||
const data = await response.json();
|
||||
|
||||
const conversations: { name: string; id: string }[] = [];
|
||||
|
||||
data.forEach((conversation: object) => {
|
||||
conversations.push(conversation as { name: string; id: string });
|
||||
});
|
||||
|
||||
return conversations;
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function getLocalApiKey(): string | null {
|
||||
const key = localStorage.getItem('DocsGPTApiKey');
|
||||
return key;
|
||||
|
||||
@@ -10,6 +10,7 @@ interface Preference {
|
||||
apiKey: string;
|
||||
selectedDocs: Doc | null;
|
||||
sourceDocs: Doc[] | null;
|
||||
conversations: { name: string; id: string }[] | null;
|
||||
}
|
||||
|
||||
const initialState: Preference = {
|
||||
@@ -26,6 +27,7 @@ const initialState: Preference = {
|
||||
model: 'openai_text-embedding-ada-002',
|
||||
} as Doc,
|
||||
sourceDocs: null,
|
||||
conversations: null,
|
||||
};
|
||||
|
||||
export const prefSlice = createSlice({
|
||||
@@ -41,10 +43,14 @@ export const prefSlice = createSlice({
|
||||
setSourceDocs: (state, action) => {
|
||||
state.sourceDocs = action.payload;
|
||||
},
|
||||
setConversations: (state, action) => {
|
||||
state.conversations = action.payload;
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
export const { setApiKey, setSelectedDocs, setSourceDocs } = prefSlice.actions;
|
||||
export const { setApiKey, setSelectedDocs, setSourceDocs, setConversations } =
|
||||
prefSlice.actions;
|
||||
export default prefSlice.reducer;
|
||||
|
||||
export const prefListenerMiddleware = createListenerMiddleware();
|
||||
@@ -74,3 +80,7 @@ export const selectSourceDocs = (state: RootState) =>
|
||||
state.preference.sourceDocs;
|
||||
export const selectSelectedDocs = (state: RootState) =>
|
||||
state.preference.selectedDocs;
|
||||
export const selectConversations = (state: RootState) =>
|
||||
state.preference.conversations;
|
||||
export const selectConversationId = (state: RootState) =>
|
||||
state.conversation.conversationId;
|
||||
|
||||
@@ -13,6 +13,7 @@ const store = configureStore({
|
||||
preference: {
|
||||
apiKey: key ?? '',
|
||||
selectedDocs: doc !== null ? JSON.parse(doc) : null,
|
||||
conversations: null,
|
||||
sourceDocs: [
|
||||
{
|
||||
location: '',
|
||||
|
||||
11
run-with-docker-compose.sh
Executable file
11
run-with-docker-compose.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
|
||||
source .env
|
||||
|
||||
if [[ -n "$OPENAI_API_BASE" ]] && [[ -n "$OPENAI_API_VERSION" ]] && [[ -n "$AZURE_DEPLOYMENT_NAME" ]] && [[ -n "$AZURE_EMBEDDINGS_DEPLOYMENT_NAME" ]]; then
|
||||
echo "Running Azure Configuration"
|
||||
docker-compose -f docker-compose-azure.yaml build && docker-compose -f docker-compose-azure.yaml up
|
||||
else
|
||||
echo "Running Plain Configuration"
|
||||
docker-compose build && docker-compose up
|
||||
fi
|
||||
@@ -119,8 +119,12 @@ class MarkdownParser(BaseParser):
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
content = f.read()
|
||||
with open(filepath, "r", encoding='utf8') as f:
|
||||
try:
|
||||
content = f.read()
|
||||
except (Exception,) as e:
|
||||
print(f'Error a file: "{filepath}"')
|
||||
raise e
|
||||
if self._remove_hyperlinks:
|
||||
content = self.remove_hyperlinks(content)
|
||||
if self._remove_images:
|
||||
|
||||
@@ -11,11 +11,11 @@ from retry import retry
|
||||
# from langchain.embeddings import CohereEmbeddings
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> tuple[int, float]:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens / 1000) * 0.0004)
|
||||
total_price = (num_tokens / 1000) * 0.0004
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
@@ -33,6 +33,7 @@ def call_openai_api(docs, folder_name):
|
||||
os.makedirs(f"outputs/{folder_name}")
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
docs_test = [docs[0]]
|
||||
# remove the first element from docs
|
||||
docs.pop(0)
|
||||
@@ -44,15 +45,26 @@ def call_openai_api(docs, folder_name):
|
||||
# environment="us-east1-gcp" # next to api key in console
|
||||
# )
|
||||
# index_name = "pandas"
|
||||
store = FAISS.from_documents(docs_test, OpenAIEmbeddings())
|
||||
if ( # azure
|
||||
os.environ.get("OPENAI_API_BASE")
|
||||
and os.environ.get("OPENAI_API_VERSION")
|
||||
and os.environ.get("AZURE_DEPLOYMENT_NAME")
|
||||
and os.environ.get("AZURE_EMBEDDINGS_DEPLOYMENT_NAME")
|
||||
):
|
||||
os.environ["OPENAI_API_TYPE"] = "azure"
|
||||
openai_embeddings = OpenAIEmbeddings(model=os.environ.get("AZURE_EMBEDDINGS_DEPLOYMENT_NAME"))
|
||||
else:
|
||||
openai_embeddings = OpenAIEmbeddings()
|
||||
store = FAISS.from_documents(docs_test, openai_embeddings)
|
||||
# store_pine = Pinecone.from_documents(docs_test, OpenAIEmbeddings(), index_name=index_name)
|
||||
|
||||
# Uncomment for MPNet embeddings
|
||||
# model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
# hf = HuggingFaceEmbeddings(model_name=model_name)
|
||||
# store = FAISS.from_documents(docs_test, hf)
|
||||
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs),
|
||||
bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
||||
for i in tqdm(
|
||||
docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format="{l_bar}{bar}| Time Left: {remaining}"
|
||||
):
|
||||
try:
|
||||
store_add_texts_with_retry(store, i)
|
||||
except Exception as e:
|
||||
|
||||
@@ -5,7 +5,6 @@ from typing import List
|
||||
import tiktoken
|
||||
from parser.schema.base import Document
|
||||
|
||||
|
||||
def separate_header_and_body(text):
|
||||
header_pattern = r"^(.*?\n){3}"
|
||||
match = re.match(header_pattern, text)
|
||||
@@ -25,7 +24,7 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
|
||||
extra_info=doc.extra_info)
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(
|
||||
current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
|
||||
current_group.text)) + doc_len < max_tokens and doc_len < min_tokens:
|
||||
current_group.text += " " + doc.text
|
||||
else:
|
||||
docs.append(current_group)
|
||||
@@ -46,6 +45,9 @@ def split_documents(documents: List[Document], max_tokens: int) -> List[Document
|
||||
docs.append(doc)
|
||||
else:
|
||||
header, body = separate_header_and_body(doc.text)
|
||||
if len(tiktoken.get_encoding("cl100k_base").encode(header)) > max_tokens:
|
||||
body = doc.text
|
||||
header = ""
|
||||
num_body_parts = ceil(token_length / max_tokens)
|
||||
part_length = ceil(len(body) / num_body_parts)
|
||||
body_parts = [body[i:i + part_length] for i in range(0, len(body), part_length)]
|
||||
|
||||
@@ -1,130 +1,122 @@
|
||||
aiodns==3.0.0
|
||||
aiohttp==3.8.3
|
||||
aiohttp==3.8.5
|
||||
aiohttp-retry==2.8.3
|
||||
aiosignal==1.3.1
|
||||
alabaster==0.7.13
|
||||
aleph-alpha-client==2.16.0
|
||||
anyio==3.6.2
|
||||
argilla==1.3.0
|
||||
aleph-alpha-client==3.2.0
|
||||
anyio==3.7.1
|
||||
async-timeout==4.0.2
|
||||
attrs==22.2.0
|
||||
Babel==2.11.0
|
||||
attrs==23.1.0
|
||||
Babel==2.12.1
|
||||
backoff==2.2.1
|
||||
blobfile==2.0.1
|
||||
boto3==1.26.82
|
||||
botocore==1.29.82
|
||||
blobfile==2.0.2
|
||||
boto3==1.28.20
|
||||
cffi==1.15.1
|
||||
charset-normalizer==2.1.1
|
||||
click==8.1.3
|
||||
cohere==3.4.0
|
||||
cryptography==39.0.2
|
||||
dataclasses-json==0.5.7
|
||||
charset-normalizer==3.2.0
|
||||
click==8.1.6
|
||||
cohere==4.19.2
|
||||
cryptography==41.0.3
|
||||
dataclasses-json==0.5.14
|
||||
decorator==5.1.1
|
||||
deeplake==3.2.12
|
||||
Deprecated==1.2.13
|
||||
dill==0.3.6
|
||||
docutils==0.19
|
||||
Deprecated==1.2.14
|
||||
dill==0.3.7
|
||||
docutils==0.20.1
|
||||
docx2txt==0.8
|
||||
ecdsa==0.18.0
|
||||
entrypoints==0.4
|
||||
escodegen==1.0.10
|
||||
escodegen==1.0.11
|
||||
esprima==4.0.1
|
||||
esutils==1.0.1
|
||||
et-xmlfile==1.1.0
|
||||
faiss-cpu==1.7.3
|
||||
filelock==3.9.0
|
||||
Flask==2.2.5
|
||||
frozenlist==1.3.3
|
||||
faiss-cpu==1.7.4
|
||||
filelock==3.12.2
|
||||
Flask==2.3.2
|
||||
frozenlist==1.4.0
|
||||
greenlet==2.0.2
|
||||
gunicorn==20.1.0
|
||||
gunicorn==21.2.0
|
||||
h11==0.14.0
|
||||
httpcore==0.16.3
|
||||
httpx==0.23.3
|
||||
httpcore==0.17.3
|
||||
httpx==0.24.1
|
||||
hub==3.0.1
|
||||
huggingface-hub==0.12.0
|
||||
humbug==0.2.8
|
||||
huggingface-hub==0.16.4
|
||||
humbug==0.3.2
|
||||
idna==3.4
|
||||
imagesize==1.4.1
|
||||
itsdangerous==2.1.2
|
||||
javalang==0.13.0
|
||||
Jinja2==3.1.2
|
||||
jmespath==1.0.1
|
||||
joblib==1.2.0
|
||||
langchain==0.0.103
|
||||
lxml==4.9.2
|
||||
manifest-ml==0.1.1
|
||||
MarkupSafe==2.1.2
|
||||
marshmallow==3.19.0
|
||||
joblib==1.3.1
|
||||
langchain==0.0.252
|
||||
lxml==4.9.3
|
||||
manifest-ml==0.1.8
|
||||
MarkupSafe==2.1.3
|
||||
marshmallow==3.20.1
|
||||
marshmallow-enum==1.5.1
|
||||
monotonic==1.6
|
||||
multidict==6.0.4
|
||||
multiprocess==0.70.14
|
||||
mypy-extensions==0.4.3
|
||||
multiprocess==0.70.15
|
||||
mypy-extensions==1.0.0
|
||||
nltk==3.8.1
|
||||
numcodecs==0.11.0
|
||||
numpy==1.23.5
|
||||
openai==0.27.0
|
||||
openpyxl==3.1.1
|
||||
packaging==23.0
|
||||
pandas==1.5.3
|
||||
pathos==0.3.0
|
||||
Pillow==9.4.0
|
||||
pox==0.3.2
|
||||
ppft==1.7.6.6
|
||||
numpy==1.25.2
|
||||
openai==0.27.8
|
||||
openpyxl==3.1.2
|
||||
packaging==23.1
|
||||
pandas==2.0.3
|
||||
pathos==0.3.1
|
||||
Pillow==10.0.0
|
||||
pox==0.3.3
|
||||
ppft==1.7.6.7
|
||||
py==1.11.0
|
||||
pyasn1==0.4.8
|
||||
pyasn1==0.5.0
|
||||
pycares==4.3.0
|
||||
pycparser==2.21
|
||||
pycryptodomex==3.17
|
||||
pydantic==1.10.4
|
||||
Pygments==2.14.0
|
||||
PyJWT==2.6.0
|
||||
pycryptodomex==3.18.0
|
||||
Pygments==2.15.1
|
||||
PyJWT==2.8.0
|
||||
PyPDF2==3.0.1
|
||||
python-dateutil==2.8.2
|
||||
python-docx==0.8.11
|
||||
python-dotenv==0.21.1
|
||||
python-dotenv==1.0.0
|
||||
python-jose==3.3.0
|
||||
python-magic==0.4.27
|
||||
python-pptx==0.6.21
|
||||
pytz==2022.7.1
|
||||
PyYAML==6.0
|
||||
redis==4.5.4
|
||||
regex==2022.10.31
|
||||
requests==2.28.2
|
||||
pytz==2023.3
|
||||
PyYAML==6.0.1
|
||||
redis==4.6.0
|
||||
regex==2023.6.3
|
||||
requests==2.31.0
|
||||
retry==0.9.2
|
||||
rfc3986==1.5.0
|
||||
rfc3986==2.0.0
|
||||
rsa==4.9
|
||||
s3transfer==0.6.0
|
||||
scikit-learn==1.2.1
|
||||
scipy==1.10.0
|
||||
sentence-transformers==2.2.2
|
||||
sentencepiece==0.1.97
|
||||
scikit-learn==1.3.0
|
||||
scipy==1.11.1
|
||||
sentence-transformers
|
||||
sentencepiece==0.1.99
|
||||
six==1.16.0
|
||||
sniffio==1.3.0
|
||||
snowballstemmer==2.2.0
|
||||
Sphinx==6.1.3
|
||||
Sphinx==7.1.2
|
||||
sphinxcontrib-applehelp==1.0.4
|
||||
sphinxcontrib-devhelp==1.0.2
|
||||
sphinxcontrib-htmlhelp==2.0.1
|
||||
sphinxcontrib-jsmath==1.0.1
|
||||
sphinxcontrib-qthelp==1.0.3
|
||||
sphinxcontrib-serializinghtml==1.1.5
|
||||
SQLAlchemy==1.4.46
|
||||
SQLAlchemy==2.0.19
|
||||
sqlitedict==2.1.0
|
||||
tenacity==8.2.1
|
||||
threadpoolctl==3.1.0
|
||||
tiktoken==0.1.2
|
||||
tokenizers==0.13.2
|
||||
torch==1.13.1
|
||||
torchvision==0.14.1
|
||||
tqdm==4.64.1
|
||||
transformers==4.26.0
|
||||
typer==0.7.0
|
||||
typing-inspect==0.8.0
|
||||
typing_extensions==4.4.0
|
||||
unstructured==0.4.11
|
||||
urllib3==1.26.14
|
||||
wrapt==1.14.1
|
||||
XlsxWriter==3.0.8
|
||||
xxhash==3.2.0
|
||||
yarl==1.8.2
|
||||
tenacity==8.2.2
|
||||
threadpoolctl==3.2.0
|
||||
tiktoken==0.4.0
|
||||
tokenizers==0.13.3
|
||||
tqdm==4.65.0
|
||||
transformers==4.31.0
|
||||
typer==0.9.0
|
||||
typing-inspect==0.9.0
|
||||
typing_extensions==4.7.1
|
||||
unstructured==0.9.0
|
||||
wrapt==1.15.0
|
||||
XlsxWriter==3.1.2
|
||||
xxhash==3.3.0
|
||||
yarl==1.9.2
|
||||
|
||||
6
setup.sh
6
setup.sh
@@ -15,7 +15,7 @@ docker run -d --name redis -p 6379:6379 redis:6-alpine
|
||||
docker run -d --name mongo -p 27017:27017 -v mongodb_data_container:/data/db mongo:6
|
||||
|
||||
# Run backend and worker services
|
||||
docker run -d --name backend -p 5001:5001 \
|
||||
docker run -d --name backend -p 7091:7091 \
|
||||
--link redis:redis --link mongo:mongo \
|
||||
-v $(pwd)/application/indexes:/app/indexes \
|
||||
-v $(pwd)/application/inputs:/app/inputs \
|
||||
@@ -34,12 +34,12 @@ docker run -d --name worker \
|
||||
-e CELERY_BROKER_URL=redis://redis:6379/0 \
|
||||
-e CELERY_RESULT_BACKEND=redis://redis:6379/1 \
|
||||
-e MONGO_URI=mongodb://mongo:27017/docsgpt \
|
||||
-e API_URL=http://backend:5001 \
|
||||
-e API_URL=http://backend:7091 \
|
||||
backend_image \
|
||||
celery -A app.celery worker -l INFO
|
||||
|
||||
# Run frontend service
|
||||
docker run -d --name frontend -p 5173:5173 \
|
||||
-e VITE_API_HOST=http://localhost:5001 \
|
||||
-e VITE_API_HOST=http://localhost:7091 \
|
||||
frontend_image
|
||||
|
||||
|
||||
28
tests/test_app.py
Normal file
28
tests/test_app.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from application.app import get_vectorstore
|
||||
import os
|
||||
|
||||
|
||||
# Test cases for get_vectorstore function
|
||||
def test_no_active_docs():
|
||||
data = {}
|
||||
assert get_vectorstore(data) == os.path.join("application", "")
|
||||
|
||||
|
||||
def test_local_default_active_docs():
|
||||
data = {"active_docs": "local/default"}
|
||||
assert get_vectorstore(data) == os.path.join("application", "")
|
||||
|
||||
|
||||
def test_local_non_default_active_docs():
|
||||
data = {"active_docs": "local/something"}
|
||||
assert get_vectorstore(data) == os.path.join("application", "indexes/local/something")
|
||||
|
||||
|
||||
def test_default_active_docs():
|
||||
data = {"active_docs": "default"}
|
||||
assert get_vectorstore(data) == os.path.join("application", "")
|
||||
|
||||
|
||||
def test_complex_active_docs():
|
||||
data = {"active_docs": "local/other/path"}
|
||||
assert get_vectorstore(data) == os.path.join("application", "indexes/local/other/path")
|
||||
Reference in New Issue
Block a user