Compare commits
211 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
95fdedf12e | ||
|
|
c73dd776db | ||
|
|
891e5fea3f | ||
|
|
bb2f6f23b5 | ||
|
|
cd9b03bdb9 | ||
|
|
a619269502 | ||
|
|
9a33bf2210 | ||
|
|
34b4cd2231 | ||
|
|
6045cbbc62 | ||
|
|
9bbf4044e0 | ||
|
|
fcf8a64d91 | ||
|
|
2c6ab18e41 | ||
|
|
2fea294b13 | ||
|
|
b47ecab1a9 | ||
|
|
b86c294250 | ||
|
|
3eacfb91aa | ||
|
|
94164c2a71 | ||
|
|
d85eb83ea2 | ||
|
|
b2002639db | ||
|
|
347cfe253f | ||
|
|
833e1836e1 | ||
|
|
e4be38b9f7 | ||
|
|
783e7f6939 | ||
|
|
c1c54f4848 | ||
|
|
86be6be2d2 | ||
|
|
35a63e867a | ||
|
|
9c12a417ee | ||
|
|
32a019c0d6 | ||
|
|
b7e4a3c99e | ||
|
|
039062d071 | ||
|
|
83ae3e8371 | ||
|
|
852de8bdfc | ||
|
|
b8acb860aa | ||
|
|
e6849b85d1 | ||
|
|
8fa9657ba6 | ||
|
|
04b038960b | ||
|
|
52507a5a95 | ||
|
|
d8505ba2ab | ||
|
|
fa26c0997e | ||
|
|
5a0aadd2ae | ||
|
|
025549ebf8 | ||
|
|
e85a583f0a | ||
|
|
f7244ddb7a | ||
|
|
d983a519e3 | ||
|
|
ae01070b8f | ||
|
|
b2118602d9 | ||
|
|
9303f3b47b | ||
|
|
e5c43cfc4b | ||
|
|
45fc08e221 | ||
|
|
67e8511106 | ||
|
|
4f7fd0a62b | ||
|
|
88fe454962 | ||
|
|
26f7a9be0a | ||
|
|
9256926bb7 | ||
|
|
2a83318739 | ||
|
|
d6e2535a5e | ||
|
|
2bffb7e22c | ||
|
|
24a162cf86 | ||
|
|
f3104f3bc4 | ||
|
|
45f1bf6709 | ||
|
|
40b2590815 | ||
|
|
dd9ab46b5c | ||
|
|
c2aeadae33 | ||
|
|
1bd9759ab7 | ||
|
|
dcdbb05168 | ||
|
|
ae117c47e9 | ||
|
|
7f7856f0e4 | ||
|
|
aa7b7c8619 | ||
|
|
ee0cbff245 | ||
|
|
c2c18b25d2 | ||
|
|
816c7c95ed | ||
|
|
cb5d65d11a | ||
|
|
75f3f43ba0 | ||
|
|
9a521355ed | ||
|
|
47bfdf0710 | ||
|
|
e1b49c3fb4 | ||
|
|
374dffc5fa | ||
|
|
4f735a5d11 | ||
|
|
94738d8fc4 | ||
|
|
adb4bfa10b | ||
|
|
48e6bbdc97 | ||
|
|
b54d6fea44 | ||
|
|
4462e6339d | ||
|
|
c1581b69f4 | ||
|
|
14284e0cc7 | ||
|
|
de40e733ec | ||
|
|
9d91b6f780 | ||
|
|
6a8b49f9c4 | ||
|
|
445a8a5647 | ||
|
|
83ce4a538a | ||
|
|
35a19d2007 | ||
|
|
505e12c5ea | ||
|
|
b2bfd7f23a | ||
|
|
cdb96e715d | ||
|
|
b3e5f09e3b | ||
|
|
db542d668a | ||
|
|
a8a79a55a4 | ||
|
|
47f62a87a7 | ||
|
|
44f353861a | ||
|
|
a2ef84a4a0 | ||
|
|
12ac20ec43 | ||
|
|
ecfbc7b9fd | ||
|
|
ba2fe0fb1f | ||
|
|
890a20edba | ||
|
|
e6f48c9403 | ||
|
|
909f0afa69 | ||
|
|
5ed2b99b8c | ||
|
|
7848751fd8 | ||
|
|
e593241d75 | ||
|
|
fcdc7b7aeb | ||
|
|
c3c7878f28 | ||
|
|
85f9ae5a0a | ||
|
|
98a97f34f5 | ||
|
|
98d647a3fe | ||
|
|
9a393b4f74 | ||
|
|
88d74235e1 | ||
|
|
36fa470348 | ||
|
|
33dce10bc3 | ||
|
|
feed0b288f | ||
|
|
1b7dc8a509 | ||
|
|
87cc3cf168 | ||
|
|
eac7b1e9f2 | ||
|
|
bb1a42df91 | ||
|
|
ac5ac3e9f1 | ||
|
|
bed25b317c | ||
|
|
1687e6682a | ||
|
|
22572c8ed1 | ||
|
|
8187a339f0 | ||
|
|
382c3930a2 | ||
|
|
a64a30c088 | ||
|
|
dac76a867f | ||
|
|
b2e86e105d | ||
|
|
b8e57c9b6f | ||
|
|
486a1bc9de | ||
|
|
b1b610f4b5 | ||
|
|
68447a6009 | ||
|
|
a55280b941 | ||
|
|
830462d525 | ||
|
|
ce8b29e9d0 | ||
|
|
6ab15f8eb1 | ||
|
|
96eb68e042 | ||
|
|
bf78bdd6d4 | ||
|
|
d998815847 | ||
|
|
00ba7b78ca | ||
|
|
0b735d94f1 | ||
|
|
301989540f | ||
|
|
e26b95a26f | ||
|
|
049c1ddb48 | ||
|
|
2f1c3075a2 | ||
|
|
b1a5068fd6 | ||
|
|
01fbd5d702 | ||
|
|
5916f92f1a | ||
|
|
5e45268f68 | ||
|
|
b8e28e0c12 | ||
|
|
04f824ea36 | ||
|
|
c216bea031 | ||
|
|
e72ef478dc | ||
|
|
897b4ef2cd | ||
|
|
2404899e28 | ||
|
|
a2dfc2cbdc | ||
|
|
92373b25a9 | ||
|
|
ce1840a9ae | ||
|
|
c4f4bdd789 | ||
|
|
ec5068e85b | ||
|
|
1d9d0ddf27 | ||
|
|
e393be90dd | ||
|
|
e633df06e4 | ||
|
|
0ff5f408d6 | ||
|
|
5eda42ff31 | ||
|
|
84168e22d0 | ||
|
|
b722845aff | ||
|
|
fd54682c02 | ||
|
|
f5e287ffa6 | ||
|
|
fb10a546d6 | ||
|
|
006897f1c0 | ||
|
|
968849e52b | ||
|
|
8bee47dc50 | ||
|
|
08250120d1 | ||
|
|
8892b70785 | ||
|
|
534e4cb591 | ||
|
|
489abdcb0b | ||
|
|
f6b6c2e9a3 | ||
|
|
43c016f024 | ||
|
|
c0e7d9cd8b | ||
|
|
5f687a31f8 | ||
|
|
f2d2478dee | ||
|
|
8a98789be1 | ||
|
|
87a5c8894a | ||
|
|
7e92ed4501 | ||
|
|
a57cdfff1e | ||
|
|
d4ff6d4d7a | ||
|
|
63d99d6a57 | ||
|
|
fce7d34171 | ||
|
|
e7df7f69b3 | ||
|
|
94cc18bd71 | ||
|
|
39024ce2ac | ||
|
|
7ac4f45e7b | ||
|
|
f209eebaf8 | ||
|
|
4889db78c9 | ||
|
|
bff200fede | ||
|
|
af6f783043 | ||
|
|
610adcbefc | ||
|
|
1d3631fa04 | ||
|
|
0630504664 | ||
|
|
6d5b698c39 | ||
|
|
dd9f1abcea | ||
|
|
b4bd34fb96 | ||
|
|
014971262d | ||
|
|
36ed69b07e | ||
|
|
ec4fc17e3a | ||
|
|
78b85fb664 |
@@ -1,2 +1,9 @@
|
||||
OPENAI_API_KEY=<LLM api key (for example, open ai key)>
|
||||
EMBEDDINGS_KEY=<LLM embeddings api key (for example, open ai key)>
|
||||
SELF_HOSTED_MODEL=false
|
||||
VITE_API_STREAMING=true
|
||||
|
||||
#For Azure
|
||||
OPENAI_API_BASE=
|
||||
OPENAI_API_VERSION=
|
||||
AZURE_DEPLOYMENT_NAME=
|
||||
AZURE_EMBEDDINGS_DEPLOYMENT_NAME=
|
||||
1
.github/workflows/ci.yml
vendored
@@ -8,6 +8,7 @@ on:
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
if: github.repository == 'arc53/DocsGPT'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
30
.github/workflows/pytest.yml
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
name: Run python tests with pytest
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
pytest_and_coverage:
|
||||
name: Run tests and count coverage
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9", "3.10", "3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pytest pytest-cov
|
||||
cd application
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Test with pytest and generate coverage report
|
||||
run: |
|
||||
python -m pytest --cov=application --cov=scripts --cov=extensions --cov-report=xml
|
||||
- name: Upload coverage reports to Codecov
|
||||
if: github.event_name == 'pull_request' && matrix.python-version == '3.11'
|
||||
uses: codecov/codecov-action@v3
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
2
.github/workflows/sync_fork.yaml
vendored
@@ -5,7 +5,7 @@ permissions:
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 * * * *" # every hour
|
||||
- cron: "0 0 * * *" # every hour
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
|
||||
6
.gitignore
vendored
@@ -5,7 +5,7 @@ __pycache__/
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
*.next
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
@@ -169,4 +169,6 @@ application/vectors/
|
||||
|
||||
**/yarn.lock
|
||||
|
||||
node_modules/
|
||||
node_modules/
|
||||
.vscode/settings.json
|
||||
models/
|
||||
|
||||
@@ -6,33 +6,39 @@ Thank you for choosing this project to contribute to, we are all very grateful!
|
||||
|
||||
📣 Discussions - where you can start a new topic or answer some questions
|
||||
|
||||
🐞 Issues - Is how we track tasks, sometimes its bugs that need fixing, sometimes its new features
|
||||
🐞 Issues - This is how we track tasks, sometimes it is bugs that need fixing, and sometimes it is new features
|
||||
|
||||
🛠️ Pull requests - Is how you can suggest changes to our repository, to work on existing issue or to add new features
|
||||
🛠️ Pull requests - This is how you can suggest changes to our repository, to work on existing issues or add new features
|
||||
|
||||
📚 Wiki - where we have our documentation
|
||||
|
||||
|
||||
## 🐞 Issues and Pull requests
|
||||
|
||||
We value contributions to our issues in form of discussion or suggestion, we recommend that you check out existing issues and our [Roadmap](https://github.com/orgs/arc53/projects/2)
|
||||
We value contributions to our issues in the form of discussion or suggestion, we recommend that you check out existing issues and our [Roadmap](https://github.com/orgs/arc53/projects/2)
|
||||
|
||||
If you want to contribute by writing code there are few things that you should know before doing it:
|
||||
If you want to contribute by writing code there are a few things that you should know before doing it:
|
||||
We have frontend (React, Vite) and Backend (python)
|
||||
|
||||
### If you are looking to contribute to Frontend (⚛️React, Vite):
|
||||
Current frontend is being migrated from /application to /frontend with a new design, so please contribute to the new on. Check out this [Milestone](https://github.com/arc53/DocsGPT/milestone/1) and its issues also [Figma](https://www.figma.com/file/OXLtrl1EAy885to6S69554/DocsGPT?node-id=0%3A1&t=hjWVuxRg9yi5YkJ9-1)
|
||||
Please try to follow guidelines
|
||||
|
||||
The current frontend is being migrated from /application to /frontend with a new design, so please contribute to the new one. Check out this [Milestone](https://github.com/arc53/DocsGPT/milestone/1) and its issues also [Figma](https://www.figma.com/file/OXLtrl1EAy885to6S69554/DocsGPT?node-id=0%3A1&t=hjWVuxRg9yi5YkJ9-1)
|
||||
Please try to follow the guidelines.
|
||||
|
||||
### If you are looking to contribute to Backend (🐍Python):
|
||||
Check out our issues, and contribute to /application or /scripts (ignore old ingest_rst.py ingest_rst_sphinx.py files, they will be deprecated soon)
|
||||
Currently we don't have any tests(which would be useful😉) but before submitting you PR make sure that after you ingested some test data its queryable
|
||||
* Check out our issues, and contribute to /application or /scripts (ignore old ingest_rst.py ingest_rst_sphinx.py files, they will be deprecated soon)
|
||||
* All new code should be covered with unit tests ([pytest](https://github.com/pytest-dev/pytest)). Please find tests under [/tests](https://github.com/arc53/DocsGPT/tree/main/tests) folder.
|
||||
* Before submitting your PR make sure that after you ingested some test data it is queryable.
|
||||
|
||||
### Testing
|
||||
To run unit tests, from the root of the repository execute:
|
||||
```
|
||||
python -m pytest
|
||||
```
|
||||
|
||||
### Workflow:
|
||||
Create a fork, make changes on your forked repository, submit changes in a form of pull request
|
||||
Create a fork, make changes on your forked repository, and submit changes in the form of a pull request.
|
||||
|
||||
## Questions / collaboration
|
||||
## Questions/collaboration
|
||||
Please join our [Discord](https://discord.gg/n5BX8dh8rU) don't hesitate, we are very friendly and welcoming to new contributors.
|
||||
|
||||
# Thank you so much for considering to contribute to DocsGPT!🙏
|
||||
# Thank you so much for considering contributing to DocsGPT!🙏
|
||||
|
||||
31
HACKTOBERFEST.md
Normal file
@@ -0,0 +1,31 @@
|
||||
🎉 Join the Hacktoberfest with DocsGPT and Earn a Free T-shirt! 🎉
|
||||
|
||||
Welcome, contributors! We're excited to announce that DocsGPT is participating in Hacktoberfest. Get involved by submitting a **meaningful** pull request, and earn a free shirt in return!
|
||||
📜 Here's How to Contribute:
|
||||
|
||||
🛠️ Code: This is the golden ticket! Make meaningful contributions through PRs.
|
||||
📚 Wiki: Improve our documentation, Create a guide or change existing documentation.
|
||||
🖥️ Design: Improve the UI/UX, or design a new feature.
|
||||
|
||||
📝 Guidelines for Pull Requests:
|
||||
|
||||
Familiarize yourself with the current contributions and our [Roadmap](https://github.com/orgs/arc53/projects/2).
|
||||
|
||||
Deciding to contribute with code? Here are some insights based on the area of your interest:
|
||||
|
||||
Frontend (⚛️React, Vite):
|
||||
Most of the code is located in /frontend folder. You can also check out our React extension in /extensions/react-widget.
|
||||
For design references, here's the [Figma](https://www.figma.com/file/OXLtrl1EAy885to6S69554/DocsGPT?node-id=0%3A1&t=hjWVuxRg9yi5YkJ9-1).
|
||||
Ensure you adhere to the established guidelines.
|
||||
|
||||
Backend (🐍Python):
|
||||
Focus on /application or /scripts. However, avoid the files ingest_rst.py and ingest_rst_sphinx.py as they are soon to be deprecated.
|
||||
Newly added code should come with relevant unit tests (pytest).
|
||||
Refer to the /tests folder for test suites.
|
||||
|
||||
Check out [Contributing Guidelines](https://github.com/arc53/DocsGPT/blob/main/CONTRIBUTING.md)
|
||||
|
||||
|
||||
Don't be shy! Hop into our [Discord](https://discord.gg/n5BX8dh8rU) Server. We're a friendly bunch and eager to assist newcomers.
|
||||
|
||||
Big thanks for considering contributing to DocsGPT during Hacktoberfest! 🙏 Your effort can earn you a swanky new t-shirt. 🎁 Let's code together! 🚀
|
||||
138
README.md
@@ -18,87 +18,139 @@ Say goodbye to time-consuming manual searches, and let <strong>DocsGPT</strong>
|
||||
<a href="https://discord.gg/n5BX8dh8rU"></a>
|
||||
<a href="https://discord.gg/n5BX8dh8rU"></a>
|
||||
<a href="https://discord.gg/n5BX8dh8rU"></a>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
### Enterprise Solutions:
|
||||
|
||||
When deploying your DocsGPT to a live environment, we're eager to provide personalized assistance. Reach out to us via email [here]( mailto:contact@arc53.com?subject=DocsGPT%20Enterprise&body=Hi%20we%20are%20%3CCompany%20name%3E%20and%20we%20want%20to%20build%20%3CSolution%3E%20with%20DocsGPT) to discuss your project further, and our team will connect with you shortly.
|
||||
|
||||
### [🎉 Join the Hacktoberfest with DocsGPT and Earn a Free T-shirt! 🎉](https://github.com/arc53/DocsGPT/blob/main/HACKTOBERFEST.md)
|
||||
|
||||

|
||||
|
||||
|
||||
## Roadmap
|
||||
|
||||
You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here. Please don't hesitate to contribute or create issues, it helps us make DocsGPT better!
|
||||
|
||||
## Our open source models optimised for DocsGPT:
|
||||
|
||||
| Name | Base Model | Requirements (or similar) |
|
||||
|-------------------|------------|----------------------------------------------------------|
|
||||
| [Docsgpt-7b-falcon](https://huggingface.co/Arc53/docsgpt-7b-falcon) | Falcon-7b | 1xA10G gpu |
|
||||
| [Docsgpt-14b](https://huggingface.co/Arc53/docsgpt-14b) | llama-2-14b | 2xA10 gpu's |
|
||||
| [Docsgpt-40b-falcon](https://huggingface.co/Arc53/docsgpt-40b-falcon) | falcon-40b | 8xA10G gpu's |
|
||||
|
||||
|
||||
If you don't have enough resources to run it you can use bitsnbytes to quantize
|
||||
|
||||
|
||||
## Features
|
||||
|
||||

|
||||
|
||||
|
||||
## Useful links
|
||||
[Live preview](https://docsgpt.arc53.com/)
|
||||
|
||||
[Join Our Discord](https://discord.gg/n5BX8dh8rU)
|
||||
|
||||
[Guides](https://docs.docsgpt.co.uk/)
|
||||
|
||||
## Roadmap
|
||||
[Interested in contributing?](https://github.com/arc53/DocsGPT/blob/main/CONTRIBUTING.md)
|
||||
|
||||
You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, please don't hesitate contributing or creating issues, it helps us make DocsGPT better!
|
||||
[How to use any other documentation](https://docs.docsgpt.co.uk/Guides/How-to-train-on-other-documentation)
|
||||
|
||||
|
||||
|
||||
## [Live preview](https://docsgpt.arc53.com/)
|
||||
|
||||
## [Join Our Discord](https://discord.gg/n5BX8dh8rU)
|
||||
[How to host it locally (so all data will stay on-premises)](https://docs.docsgpt.co.uk/Guides/How-to-use-different-LLM)
|
||||
|
||||
|
||||
## Project structure
|
||||
- Application - flask app (main application)
|
||||
- Application - Flask app (main application)
|
||||
|
||||
- Extensions - chrome extension
|
||||
- Extensions - Chrome extension
|
||||
|
||||
- Scripts - script that creates similarity search index and store for other libraries.
|
||||
- Scripts - Script that creates similarity search index and store for other libraries.
|
||||
|
||||
- frontend - frontend in vite and
|
||||
- Frontend - Frontend uses Vite and React
|
||||
|
||||
## QuickStart
|
||||
|
||||
Note: Make sure you have docker installed
|
||||
Note: Make sure you have Docker installed
|
||||
|
||||
1. Open dowload this repository with `git clone https://github.com/arc53/DocsGPT.git`
|
||||
2. Create .env file in your root directory and set your OPENAI_API_KEY with your openai api key and VITE_API_STREAMING to true or false if you dont want streaming answers
|
||||
3. Run `docker-compose build && docker-compose up`
|
||||
On Mac OS or Linux just write:
|
||||
|
||||
`./setup.sh`
|
||||
|
||||
It will install all the dependencies and give you an option to download local model or use OpenAI
|
||||
|
||||
Otherwise refer to this Guide:
|
||||
|
||||
1. Download and open this repository with `git clone https://github.com/arc53/DocsGPT.git`
|
||||
2. Create a .env file in your root directory and set the env variable OPENAI_API_KEY with your OpenAI API key and VITE_API_STREAMING to true or false, depending on if you want streaming answers or not
|
||||
It should look like this inside:
|
||||
|
||||
```
|
||||
OPENAI_API_KEY=Yourkey
|
||||
VITE_API_STREAMING=true
|
||||
SELF_HOSTED_MODEL=false
|
||||
```
|
||||
See optional environment variables in the `/.env-template` and `/application/.env_sample` files.
|
||||
3. Run `./run-with-docker-compose.sh`
|
||||
4. Navigate to http://localhost:5173/
|
||||
|
||||
To stop just run Ctrl + C
|
||||
|
||||
## Development environments
|
||||
|
||||
Spin up only 2 containers from docker-compose.yaml (by deleting all services except for redis and mongo)
|
||||
### Spin up mongo and redis
|
||||
For development only 2 containers are used from docker-compose.yaml (by deleting all services except for Redis and Mongo).
|
||||
See file [docker-compose-dev.yaml](./docker-compose-dev.yaml).
|
||||
|
||||
Make sure you have python 3.10 or 3.11 installed
|
||||
Run
|
||||
```
|
||||
docker compose -f docker-compose-dev.yaml build
|
||||
docker compose -f docker-compose-dev.yaml up -d
|
||||
```
|
||||
|
||||
1. Navigate to `/application` folder
|
||||
2. Run `docker-compose -f docker-compose-dev.yaml build && docker-compose -f docker-compose-dev.yaml up -d`
|
||||
3. Export required variables
|
||||
`export CELERY_BROKER_URL=redis://localhost:6379/0`
|
||||
`export CELERY_RESULT_BACKEND=redis://localhost:6379/1`
|
||||
`export MONGO_URI=mongodb://localhost:27017/docsgpt`
|
||||
4. Install dependencies
|
||||
`pip install -r requirements.txt`
|
||||
5. Prepare .env file
|
||||
Copy .env_sample and create .env with your openai api token
|
||||
6. Run the app
|
||||
`python wsgi.py`
|
||||
7. Start worker with `celery -A app.celery worker -l INFO`
|
||||
### Run the backend
|
||||
|
||||
Make sure you have Python 3.10 or 3.11 installed.
|
||||
|
||||
1. Export required environment variables
|
||||
```commandline
|
||||
export CELERY_BROKER_URL=redis://localhost:6379/0
|
||||
export CELERY_RESULT_BACKEND=redis://localhost:6379/1
|
||||
export MONGO_URI=mongodb://localhost:27017/docsgpt
|
||||
export FLASK_APP=application/app.py
|
||||
export FLASK_DEBUG=true
|
||||
```
|
||||
2. Prepare .env file
|
||||
Copy `.env_sample` and create `.env` with your OpenAI API token
|
||||
3. (optional) Create a Python virtual environment
|
||||
```commandline
|
||||
python -m venv venv
|
||||
. venv/bin/activate
|
||||
```
|
||||
4. Change to `application/` subdir and install dependencies for the backend
|
||||
```commandline
|
||||
pip install -r application/requirements.txt
|
||||
```
|
||||
5. Run the app `flask run --host=0.0.0.0 --port=7091`
|
||||
6. Start worker with `celery -A application.app.celery worker -l INFO`
|
||||
|
||||
### Start frontend
|
||||
Make sure you have Node version 16 or higher.
|
||||
|
||||
To start frontend
|
||||
1. Navigate to `/frontend` folder
|
||||
2. Install dependencies
|
||||
`npm install`
|
||||
3. Run the app
|
||||
4. `npm run dev`
|
||||
3. Run the app
|
||||
`npm run dev`
|
||||
|
||||
|
||||
[How to install the Chrome extension](https://github.com/arc53/docsgpt/wiki#launch-chrome-extension)
|
||||
|
||||
|
||||
## [Guides](https://github.com/arc53/docsgpt/wiki)
|
||||
|
||||
## [Interested in contributing?](https://github.com/arc53/DocsGPT/blob/main/CONTRIBUTING.md)
|
||||
|
||||
## [How to use any other documentation](https://github.com/arc53/docsgpt/wiki/How-to-train-on-other-documentation)
|
||||
|
||||
## [How to host it locally (so all data will stay on-premises)](https://github.com/arc53/DocsGPT/wiki/How-to-use-different-LLM's#hosting-everything-locally)
|
||||
|
||||
Built with [🦜️🔗 LangChain](https://github.com/hwchase17/langchain)
|
||||
|
||||
|
||||
@@ -3,4 +3,10 @@ EMBEDDINGS_KEY=your_api_key
|
||||
CELERY_BROKER_URL=redis://localhost:6379/0
|
||||
CELERY_RESULT_BACKEND=redis://localhost:6379/1
|
||||
MONGO_URI=mongodb://localhost:27017/docsgpt
|
||||
API_URL=http://localhost:5001
|
||||
API_URL=http://localhost:7091
|
||||
|
||||
#For OPENAI on Azure
|
||||
OPENAI_API_BASE=
|
||||
OPENAI_API_VERSION=
|
||||
AZURE_DEPLOYMENT_NAME=
|
||||
AZURE_EMBEDDINGS_DEPLOYMENT_NAME=
|
||||
@@ -8,18 +8,16 @@ RUN pip install --upgrade pip && pip install tiktoken==0.3.3
|
||||
COPY requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
|
||||
FROM python:3.10-slim-bullseye
|
||||
# Copy pre-built packages from builder stage
|
||||
COPY --from=builder /usr/local/lib/python3.10/site-packages/ /usr/local/lib/python3.10/site-packages/
|
||||
RUN pip install gunicorn==20.1.0
|
||||
RUN pip install celery==5.2.7
|
||||
|
||||
# Copy pre-built packages and binaries from builder stage
|
||||
COPY --from=builder /usr/local/ /usr/local/
|
||||
|
||||
WORKDIR /app
|
||||
COPY . /app
|
||||
COPY . /app/application
|
||||
ENV FLASK_APP=app.py
|
||||
ENV FLASK_DEBUG=true
|
||||
|
||||
EXPOSE 7091
|
||||
|
||||
EXPOSE 5001
|
||||
|
||||
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:5001", "wsgi:app"]
|
||||
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "application.wsgi:app"]
|
||||
|
||||
0
application/__init__.py
Normal file
0
application/api/__init__.py
Normal file
0
application/api/answer/__init__.py
Normal file
337
application/api/answer/routes.py
Normal file
@@ -0,0 +1,337 @@
|
||||
import asyncio
|
||||
import os
|
||||
from flask import Blueprint, request, Response
|
||||
import json
|
||||
import datetime
|
||||
import logging
|
||||
import traceback
|
||||
|
||||
from pymongo import MongoClient
|
||||
from bson.objectid import ObjectId
|
||||
from transformers import GPT2TokenizerFast
|
||||
|
||||
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.error import bad_request
|
||||
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
db = mongo["docsgpt"]
|
||||
conversations_collection = db["conversations"]
|
||||
vectors_collection = db["vectors"]
|
||||
answer = Blueprint('answer', __name__)
|
||||
|
||||
if settings.LLM_NAME == "gpt4":
|
||||
gpt_model = 'gpt-4'
|
||||
else:
|
||||
gpt_model = 'gpt-3.5-turbo'
|
||||
|
||||
# load the prompts
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
with open(os.path.join(current_dir, "prompts", "combine_prompt.txt"), "r") as f:
|
||||
template = f.read()
|
||||
|
||||
with open(os.path.join(current_dir, "prompts", "combine_prompt_hist.txt"), "r") as f:
|
||||
template_hist = f.read()
|
||||
|
||||
with open(os.path.join(current_dir, "prompts", "question_prompt.txt"), "r") as f:
|
||||
template_quest = f.read()
|
||||
|
||||
with open(os.path.join(current_dir, "prompts", "chat_combine_prompt.txt"), "r") as f:
|
||||
chat_combine_template = f.read()
|
||||
|
||||
with open(os.path.join(current_dir, "prompts", "chat_reduce_prompt.txt"), "r") as f:
|
||||
chat_reduce_template = f.read()
|
||||
|
||||
api_key_set = settings.API_KEY is not None
|
||||
embeddings_key_set = settings.EMBEDDINGS_KEY is not None
|
||||
|
||||
|
||||
async def async_generate(chain, question, chat_history):
|
||||
result = await chain.arun({"question": question, "chat_history": chat_history})
|
||||
return result
|
||||
|
||||
|
||||
def count_tokens(string):
|
||||
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
|
||||
return len(tokenizer(string)['input_ids'])
|
||||
|
||||
|
||||
def run_async_chain(chain, question, chat_history):
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
result = {}
|
||||
try:
|
||||
answer = loop.run_until_complete(async_generate(chain, question, chat_history))
|
||||
finally:
|
||||
loop.close()
|
||||
result["answer"] = answer
|
||||
return result
|
||||
|
||||
|
||||
def get_vectorstore(data):
|
||||
if "active_docs" in data:
|
||||
if data["active_docs"].split("/")[0] == "local":
|
||||
if data["active_docs"].split("/")[1] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = "indexes/" + data["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + data["active_docs"]
|
||||
if data["active_docs"] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
vectorstore = os.path.join("application", vectorstore)
|
||||
return vectorstore
|
||||
|
||||
|
||||
# def get_docsearch(vectorstore, embeddings_key):
|
||||
# if settings.EMBEDDINGS_NAME == "openai_text-embedding-ada-002":
|
||||
# if is_azure_configured():
|
||||
# os.environ["OPENAI_API_TYPE"] = "azure"
|
||||
# openai_embeddings = OpenAIEmbeddings(model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME)
|
||||
# else:
|
||||
# openai_embeddings = OpenAIEmbeddings(openai_api_key=embeddings_key)
|
||||
# docsearch = FAISS.load_local(vectorstore, openai_embeddings)
|
||||
# elif settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
# docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
|
||||
# elif settings.EMBEDDINGS_NAME == "huggingface_hkunlp/instructor-large":
|
||||
# docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
|
||||
# elif settings.EMBEDDINGS_NAME == "cohere_medium":
|
||||
# docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
|
||||
# return docsearch
|
||||
|
||||
|
||||
def is_azure_configured():
|
||||
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
|
||||
|
||||
|
||||
def complete_stream(question, docsearch, chat_history, api_key, conversation_id):
|
||||
llm = LLMCreator.create_llm(settings.LLM_NAME, api_key=api_key)
|
||||
|
||||
|
||||
docs = docsearch.search(question, k=2)
|
||||
if settings.LLM_NAME == "llama.cpp":
|
||||
docs = [docs[0]]
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc.page_content for doc in docs])
|
||||
p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
source_log_docs = []
|
||||
for doc in docs:
|
||||
if doc.metadata:
|
||||
data = json.dumps({"type": "source", "doc": doc.page_content, "metadata": doc.metadata})
|
||||
source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
|
||||
else:
|
||||
data = json.dumps({"type": "source", "doc": doc.page_content})
|
||||
source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
|
||||
yield f"data:{data}\n\n"
|
||||
|
||||
if len(chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
chat_history.reverse()
|
||||
for i in chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = count_tokens(i["prompt"]) + count_tokens(i["response"])
|
||||
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append({"role": "system", "content": i["response"]})
|
||||
messages_combine.append({"role": "user", "content": question})
|
||||
|
||||
response_full = ""
|
||||
completion = llm.gen_stream(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
messages=messages_combine)
|
||||
for line in completion:
|
||||
data = json.dumps({"answer": str(line)})
|
||||
response_full += str(line)
|
||||
yield f"data: {data}\n\n"
|
||||
|
||||
# save conversation to database
|
||||
if conversation_id is not None:
|
||||
conversations_collection.update_one(
|
||||
{"_id": ObjectId(conversation_id)},
|
||||
{"$push": {"queries": {"prompt": question, "response": response_full, "sources": source_log_docs}}},
|
||||
)
|
||||
|
||||
else:
|
||||
# create new conversation
|
||||
# generate summary
|
||||
messages_summary = [{"role": "assistant", "content": "Summarise following conversation in no more than 3 "
|
||||
"words, respond ONLY with the summary, use the same "
|
||||
"language as the system \n\nUser: " + question + "\n\n" +
|
||||
"AI: " +
|
||||
response_full},
|
||||
{"role": "user", "content": "Summarise following conversation in no more than 3 words, "
|
||||
"respond ONLY with the summary, use the same language as the "
|
||||
"system"}]
|
||||
|
||||
completion = llm.gen(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
messages=messages_summary, max_tokens=30)
|
||||
conversation_id = conversations_collection.insert_one(
|
||||
{"user": "local",
|
||||
"date": datetime.datetime.utcnow(),
|
||||
"name": completion,
|
||||
"queries": [{"prompt": question, "response": response_full, "sources": source_log_docs}]}
|
||||
).inserted_id
|
||||
|
||||
# send data.type = "end" to indicate that the stream has ended as json
|
||||
data = json.dumps({"type": "id", "id": str(conversation_id)})
|
||||
yield f"data: {data}\n\n"
|
||||
data = json.dumps({"type": "end"})
|
||||
yield f"data: {data}\n\n"
|
||||
|
||||
|
||||
@answer.route("/stream", methods=["POST"])
|
||||
def stream():
|
||||
data = request.get_json()
|
||||
# get parameter from url question
|
||||
question = data["question"]
|
||||
history = data["history"]
|
||||
# history to json object from string
|
||||
history = json.loads(history)
|
||||
conversation_id = data["conversation_id"]
|
||||
|
||||
# check if active_docs is set
|
||||
|
||||
if not api_key_set:
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
if "active_docs" in data:
|
||||
vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
|
||||
else:
|
||||
vectorstore = ""
|
||||
docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
|
||||
|
||||
return Response(
|
||||
complete_stream(question, docsearch,
|
||||
chat_history=history, api_key=api_key,
|
||||
conversation_id=conversation_id), mimetype="text/event-stream"
|
||||
)
|
||||
|
||||
|
||||
@answer.route("/api/answer", methods=["POST"])
|
||||
def api_answer():
|
||||
data = request.get_json()
|
||||
question = data["question"]
|
||||
history = data["history"]
|
||||
if "conversation_id" not in data:
|
||||
conversation_id = None
|
||||
else:
|
||||
conversation_id = data["conversation_id"]
|
||||
print("-" * 5)
|
||||
if not api_key_set:
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
|
||||
# use try and except to check for exception
|
||||
try:
|
||||
# check if the vectorstore is set
|
||||
vectorstore = get_vectorstore(data)
|
||||
# loading the index and the store and the prompt template
|
||||
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
|
||||
docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
|
||||
|
||||
|
||||
llm = LLMCreator.create_llm(settings.LLM_NAME, api_key=api_key)
|
||||
|
||||
|
||||
|
||||
docs = docsearch.search(question, k=2)
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc.page_content for doc in docs])
|
||||
p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
source_log_docs = []
|
||||
for doc in docs:
|
||||
if doc.metadata:
|
||||
source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
|
||||
else:
|
||||
source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
|
||||
# join all page_content together with a newline
|
||||
|
||||
|
||||
if len(history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
history.reverse()
|
||||
for i in history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = count_tokens(i["prompt"]) + count_tokens(i["response"])
|
||||
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append({"role": "system", "content": i["response"]})
|
||||
messages_combine.append({"role": "user", "content": question})
|
||||
|
||||
|
||||
completion = llm.gen(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
messages=messages_combine)
|
||||
|
||||
|
||||
result = {"answer": completion, "sources": source_log_docs}
|
||||
logger.debug(result)
|
||||
|
||||
# generate conversationId
|
||||
if conversation_id is not None:
|
||||
conversations_collection.update_one(
|
||||
{"_id": ObjectId(conversation_id)},
|
||||
{"$push": {"queries": {"prompt": question,
|
||||
"response": result["answer"], "sources": result['sources']}}},
|
||||
)
|
||||
|
||||
else:
|
||||
# create new conversation
|
||||
# generate summary
|
||||
messages_summary = [
|
||||
{"role": "assistant", "content": "Summarise following conversation in no more than 3 words, "
|
||||
"respond ONLY with the summary, use the same language as the system \n\n"
|
||||
"User: " + question + "\n\n" + "AI: " + result["answer"]},
|
||||
{"role": "user", "content": "Summarise following conversation in no more than 3 words, "
|
||||
"respond ONLY with the summary, use the same language as the system"}
|
||||
]
|
||||
|
||||
completion = llm.gen(
|
||||
model=gpt_model,
|
||||
engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
messages=messages_summary,
|
||||
max_tokens=30
|
||||
)
|
||||
conversation_id = conversations_collection.insert_one(
|
||||
{"user": "local",
|
||||
"date": datetime.datetime.utcnow(),
|
||||
"name": completion,
|
||||
"queries": [{"prompt": question, "response": result["answer"], "sources": source_log_docs}]}
|
||||
).inserted_id
|
||||
|
||||
result["conversation_id"] = str(conversation_id)
|
||||
|
||||
# mock result
|
||||
# result = {
|
||||
# "answer": "The answer is 42",
|
||||
# "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"]
|
||||
# }
|
||||
return result
|
||||
except Exception as e:
|
||||
# print whole traceback
|
||||
traceback.print_exc()
|
||||
print(str(e))
|
||||
return bad_request(500, str(e))
|
||||
0
application/api/internal/__init__.py
Normal file
69
application/api/internal/routes.py
Normal file
@@ -0,0 +1,69 @@
|
||||
import os
|
||||
import datetime
|
||||
from flask import Blueprint, request, send_from_directory
|
||||
from pymongo import MongoClient
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
|
||||
from application.core.settings import settings
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
db = mongo["docsgpt"]
|
||||
conversations_collection = db["conversations"]
|
||||
vectors_collection = db["vectors"]
|
||||
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
internal = Blueprint('internal', __name__)
|
||||
@internal.route("/api/download", methods=["get"])
|
||||
def download_file():
|
||||
user = secure_filename(request.args.get("user"))
|
||||
job_name = secure_filename(request.args.get("name"))
|
||||
filename = secure_filename(request.args.get("file"))
|
||||
save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
|
||||
return send_from_directory(save_dir, filename, as_attachment=True)
|
||||
|
||||
|
||||
|
||||
@internal.route("/api/upload_index", methods=["POST"])
|
||||
def upload_index_files():
|
||||
"""Upload two files(index.faiss, index.pkl) to the user's folder."""
|
||||
if "user" not in request.form:
|
||||
return {"status": "no user"}
|
||||
user = secure_filename(request.form["user"])
|
||||
if "name" not in request.form:
|
||||
return {"status": "no name"}
|
||||
job_name = secure_filename(request.form["name"])
|
||||
save_dir = os.path.join(current_dir, "indexes", user, job_name)
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
if "file_faiss" not in request.files:
|
||||
print("No file part")
|
||||
return {"status": "no file"}
|
||||
file_faiss = request.files["file_faiss"]
|
||||
if file_faiss.filename == "":
|
||||
return {"status": "no file name"}
|
||||
if "file_pkl" not in request.files:
|
||||
print("No file part")
|
||||
return {"status": "no file"}
|
||||
file_pkl = request.files["file_pkl"]
|
||||
if file_pkl.filename == "":
|
||||
return {"status": "no file name"}
|
||||
# saves index files
|
||||
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
file_faiss.save(os.path.join(save_dir, "index.faiss"))
|
||||
file_pkl.save(os.path.join(save_dir, "index.pkl"))
|
||||
# create entry in vectors_collection
|
||||
vectors_collection.insert_one(
|
||||
{
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"location": save_dir,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": "local",
|
||||
}
|
||||
)
|
||||
return {"status": "ok"}
|
||||
0
application/api/user/__init__.py
Normal file
226
application/api/user/routes.py
Normal file
@@ -0,0 +1,226 @@
|
||||
import os
|
||||
from flask import Blueprint, request, jsonify
|
||||
import requests
|
||||
import json
|
||||
from pymongo import MongoClient
|
||||
from bson.objectid import ObjectId
|
||||
from werkzeug.utils import secure_filename
|
||||
import http.client
|
||||
|
||||
from application.api.user.tasks import ingest
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
db = mongo["docsgpt"]
|
||||
conversations_collection = db["conversations"]
|
||||
vectors_collection = db["vectors"]
|
||||
user = Blueprint('user', __name__)
|
||||
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
@user.route("/api/delete_conversation", methods=["POST"])
|
||||
def delete_conversation():
|
||||
# deletes a conversation from the database
|
||||
conversation_id = request.args.get("id")
|
||||
# write to mongodb
|
||||
conversations_collection.delete_one(
|
||||
{
|
||||
"_id": ObjectId(conversation_id),
|
||||
}
|
||||
)
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
@user.route("/api/get_conversations", methods=["get"])
|
||||
def get_conversations():
|
||||
# provides a list of conversations
|
||||
conversations = conversations_collection.find().sort("date", -1)
|
||||
list_conversations = []
|
||||
for conversation in conversations:
|
||||
list_conversations.append({"id": str(conversation["_id"]), "name": conversation["name"]})
|
||||
|
||||
#list_conversations = [{"id": "default", "name": "default"}, {"id": "jeff", "name": "jeff"}]
|
||||
|
||||
return jsonify(list_conversations)
|
||||
|
||||
|
||||
@user.route("/api/get_single_conversation", methods=["get"])
|
||||
def get_single_conversation():
|
||||
# provides data for a conversation
|
||||
conversation_id = request.args.get("id")
|
||||
conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
|
||||
return jsonify(conversation['queries'])
|
||||
|
||||
|
||||
@user.route("/api/feedback", methods=["POST"])
|
||||
def api_feedback():
|
||||
data = request.get_json()
|
||||
question = data["question"]
|
||||
answer = data["answer"]
|
||||
feedback = data["feedback"]
|
||||
|
||||
print("-" * 5)
|
||||
print("Question: " + question)
|
||||
print("Answer: " + answer)
|
||||
print("Feedback: " + feedback)
|
||||
print("-" * 5)
|
||||
response = requests.post(
|
||||
url="https://86x89umx77.execute-api.eu-west-2.amazonaws.com/docsgpt-feedback",
|
||||
headers={
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
},
|
||||
data=json.dumps({"answer": answer, "question": question, "feedback": feedback}),
|
||||
)
|
||||
return {"status": http.client.responses.get(response.status_code, "ok")}
|
||||
|
||||
|
||||
@user.route("/api/delete_old", methods=["get"])
|
||||
def delete_old():
|
||||
"""Delete old indexes."""
|
||||
import shutil
|
||||
|
||||
path = request.args.get("path")
|
||||
dirs = path.split("/")
|
||||
dirs_clean = []
|
||||
for i in range(1, len(dirs)):
|
||||
dirs_clean.append(secure_filename(dirs[i]))
|
||||
# check that path strats with indexes or vectors
|
||||
if dirs[0] not in ["indexes", "vectors"]:
|
||||
return {"status": "error"}
|
||||
path_clean = "/".join(dirs)
|
||||
vectors_collection.delete_one({"location": path})
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
try:
|
||||
shutil.rmtree(os.path.join(current_dir, path_clean))
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
else:
|
||||
vetorstore = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)
|
||||
)
|
||||
vetorstore.delete_index()
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
@user.route("/api/upload", methods=["POST"])
|
||||
def upload_file():
|
||||
"""Upload a file to get vectorized and indexed."""
|
||||
if "user" not in request.form:
|
||||
return {"status": "no user"}
|
||||
user = secure_filename(request.form["user"])
|
||||
if "name" not in request.form:
|
||||
return {"status": "no name"}
|
||||
job_name = secure_filename(request.form["name"])
|
||||
# check if the post request has the file part
|
||||
if "file" not in request.files:
|
||||
print("No file part")
|
||||
return {"status": "no file"}
|
||||
file = request.files["file"]
|
||||
if file.filename == "":
|
||||
return {"status": "no file name"}
|
||||
|
||||
if file:
|
||||
filename = secure_filename(file.filename)
|
||||
# save dir
|
||||
save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
|
||||
# create dir if not exists
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
|
||||
file.save(os.path.join(save_dir, filename))
|
||||
task = ingest.delay(settings.UPLOAD_FOLDER, [".rst", ".md", ".pdf", ".txt"], job_name, filename, user)
|
||||
# task id
|
||||
task_id = task.id
|
||||
return {"status": "ok", "task_id": task_id}
|
||||
else:
|
||||
return {"status": "error"}
|
||||
|
||||
@user.route("/api/task_status", methods=["GET"])
|
||||
def task_status():
|
||||
"""Get celery job status."""
|
||||
task_id = request.args.get("task_id")
|
||||
from application.celery import celery
|
||||
task = celery.AsyncResult(task_id)
|
||||
task_meta = task.info
|
||||
return {"status": task.status, "result": task_meta}
|
||||
|
||||
|
||||
@user.route("/api/combine", methods=["GET"])
|
||||
def combined_json():
|
||||
user = "local"
|
||||
"""Provide json file with combined available indexes."""
|
||||
# get json from https://d3dg1063dc54p9.cloudfront.net/combined.json
|
||||
|
||||
data = [
|
||||
{
|
||||
"name": "default",
|
||||
"language": "default",
|
||||
"version": "",
|
||||
"description": "default",
|
||||
"fullName": "default",
|
||||
"date": "default",
|
||||
"docLink": "default",
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local",
|
||||
}
|
||||
]
|
||||
# structure: name, language, version, description, fullName, date, docLink
|
||||
# append data from vectors_collection
|
||||
for index in vectors_collection.find({"user": user}):
|
||||
data.append(
|
||||
{
|
||||
"name": index["name"],
|
||||
"language": index["language"],
|
||||
"version": "",
|
||||
"description": index["name"],
|
||||
"fullName": index["name"],
|
||||
"date": index["date"],
|
||||
"docLink": index["location"],
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local",
|
||||
}
|
||||
)
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
|
||||
for index in data_remote:
|
||||
index["location"] = "remote"
|
||||
data.append(index)
|
||||
|
||||
return jsonify(data)
|
||||
|
||||
|
||||
@user.route("/api/docs_check", methods=["POST"])
|
||||
def check_docs():
|
||||
# check if docs exist in a vectorstore folder
|
||||
data = request.get_json()
|
||||
# split docs on / and take first part
|
||||
if data["docs"].split("/")[0] == "local":
|
||||
return {"status": "exists"}
|
||||
vectorstore = "vectors/" + data["docs"]
|
||||
base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"
|
||||
if os.path.exists(vectorstore) or data["docs"] == "default":
|
||||
return {"status": "exists"}
|
||||
else:
|
||||
r = requests.get(base_path + vectorstore + "index.faiss")
|
||||
|
||||
if r.status_code != 200:
|
||||
return {"status": "null"}
|
||||
else:
|
||||
if not os.path.exists(vectorstore):
|
||||
os.makedirs(vectorstore)
|
||||
with open(vectorstore + "index.faiss", "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
# download the store
|
||||
r = requests.get(base_path + vectorstore + "index.pkl")
|
||||
with open(vectorstore + "index.pkl", "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
return {"status": "loaded"}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
7
application/api/user/tasks.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from application.worker import ingest_worker
|
||||
from application.celery import celery
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest(self, directory, formats, name_job, filename, user):
|
||||
resp = ingest_worker(self, directory, formats, name_job, filename, user)
|
||||
return resp
|
||||
@@ -1,52 +1,19 @@
|
||||
import asyncio
|
||||
import datetime
|
||||
import http.client
|
||||
import json
|
||||
import os
|
||||
import traceback
|
||||
import platform
|
||||
|
||||
|
||||
import openai
|
||||
import dotenv
|
||||
import requests
|
||||
from celery import Celery
|
||||
from celery.result import AsyncResult
|
||||
from flask import Flask, request, render_template, send_from_directory, jsonify, Response
|
||||
from langchain import FAISS
|
||||
from langchain import VectorDBQA, HuggingFaceHub, Cohere, OpenAI
|
||||
from langchain.chains import LLMChain, ConversationalRetrievalChain
|
||||
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, \
|
||||
HuggingFaceInstructEmbeddings
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
AIMessagePromptTemplate,
|
||||
)
|
||||
from pymongo import MongoClient
|
||||
from werkzeug.utils import secure_filename
|
||||
from langchain.llms import GPT4All
|
||||
from application.celery import celery
|
||||
from flask import Flask, request, redirect
|
||||
|
||||
from core.settings import settings
|
||||
from error import bad_request
|
||||
from worker import ingest_worker
|
||||
|
||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||
from application.core.settings import settings
|
||||
from application.api.user.routes import user
|
||||
from application.api.answer.routes import answer
|
||||
from application.api.internal.routes import internal
|
||||
|
||||
if settings.LLM_NAME == "manifest":
|
||||
from manifest import Manifest
|
||||
from langchain.llms.manifest import ManifestWrapper
|
||||
|
||||
manifest = Manifest(
|
||||
client_name="huggingface",
|
||||
client_connection="http://127.0.0.1:5000"
|
||||
)
|
||||
|
||||
# Redirect PosixPath to WindowsPath on Windows
|
||||
import platform
|
||||
|
||||
if platform.system() == "Windows":
|
||||
import pathlib
|
||||
@@ -57,490 +24,45 @@ if platform.system() == "Windows":
|
||||
# loading the .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# load the prompts
|
||||
with open("prompts/combine_prompt.txt", "r") as f:
|
||||
template = f.read()
|
||||
|
||||
with open("prompts/combine_prompt_hist.txt", "r") as f:
|
||||
template_hist = f.read()
|
||||
|
||||
with open("prompts/question_prompt.txt", "r") as f:
|
||||
template_quest = f.read()
|
||||
|
||||
with open("prompts/chat_combine_prompt.txt", "r") as f:
|
||||
chat_combine_template = f.read()
|
||||
|
||||
with open("prompts/chat_reduce_prompt.txt", "r") as f:
|
||||
chat_reduce_template = f.read()
|
||||
|
||||
if settings.API_KEY is not None:
|
||||
api_key_set = True
|
||||
else:
|
||||
api_key_set = False
|
||||
if settings.EMBEDDINGS_KEY is not None:
|
||||
embeddings_key_set = True
|
||||
else:
|
||||
embeddings_key_set = False
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER = "inputs"
|
||||
app.config['CELERY_BROKER_URL'] = settings.CELERY_BROKER_URL
|
||||
app.config['CELERY_RESULT_BACKEND'] = settings.CELERY_RESULT_BACKEND
|
||||
app.config['MONGO_URI'] = settings.MONGO_URI
|
||||
celery = Celery()
|
||||
celery.config_from_object('celeryconfig')
|
||||
mongo = MongoClient(app.config['MONGO_URI'])
|
||||
db = mongo["docsgpt"]
|
||||
vectors_collection = db["vectors"]
|
||||
app.register_blueprint(user)
|
||||
app.register_blueprint(answer)
|
||||
app.register_blueprint(internal)
|
||||
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER = "inputs"
|
||||
app.config["CELERY_BROKER_URL"] = settings.CELERY_BROKER_URL
|
||||
app.config["CELERY_RESULT_BACKEND"] = settings.CELERY_RESULT_BACKEND
|
||||
app.config["MONGO_URI"] = settings.MONGO_URI
|
||||
celery.config_from_object("application.celeryconfig")
|
||||
|
||||
|
||||
async def async_generate(chain, question, chat_history):
|
||||
result = await chain.arun({"question": question, "chat_history": chat_history})
|
||||
return result
|
||||
|
||||
|
||||
def run_async_chain(chain, question, chat_history):
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
result = {}
|
||||
try:
|
||||
answer = loop.run_until_complete(async_generate(chain, question, chat_history))
|
||||
finally:
|
||||
loop.close()
|
||||
result["answer"] = answer
|
||||
return result
|
||||
|
||||
|
||||
def get_vectorstore(data):
|
||||
if "active_docs" in data:
|
||||
if data["active_docs"].split("/")[0] == "local":
|
||||
if data["active_docs"].split("/")[1] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = "indexes/" + data["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + data["active_docs"]
|
||||
if data['active_docs'] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
return vectorstore
|
||||
|
||||
def get_docsearch(vectorstore, embeddings_key):
|
||||
if settings.EMBEDDINGS_NAME == "openai_text-embedding-ada-002":
|
||||
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
|
||||
elif settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
|
||||
elif settings.EMBEDDINGS_NAME == "huggingface_hkunlp/instructor-large":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
|
||||
elif settings.EMBEDDINGS_NAME == "cohere_medium":
|
||||
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
|
||||
return docsearch
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest(self, directory, formats, name_job, filename, user):
|
||||
resp = ingest_worker(self, directory, formats, name_job, filename, user)
|
||||
return resp
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def home():
|
||||
return render_template("index.html", api_key_set=api_key_set, llm_choice=settings.LLM_NAME,
|
||||
embeddings_choice=settings.EMBEDDINGS_NAME)
|
||||
|
||||
def complete_stream(question, docsearch, chat_history, api_key):
|
||||
openai.api_key = api_key
|
||||
llm = ChatOpenAI(openai_api_key=api_key)
|
||||
docs = docsearch.similarity_search(question, k=2)
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc.page_content for doc in docs])
|
||||
p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
if len(chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
chat_history.reverse()
|
||||
for i in chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
|
||||
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append({"role": "system", "content": i["response"]})
|
||||
messages_combine.append({"role": "user", "content": question})
|
||||
completion = openai.ChatCompletion.create(model="gpt-3.5-turbo",
|
||||
messages=messages_combine, stream=True, max_tokens=500, temperature=0)
|
||||
|
||||
for line in completion:
|
||||
if 'content' in line['choices'][0]['delta']:
|
||||
# check if the delta contains content
|
||||
data = json.dumps({"answer": str(line['choices'][0]['delta']['content'])})
|
||||
yield f"data: {data}\n\n"
|
||||
# send data.type = "end" to indicate that the stream has ended as json
|
||||
data = json.dumps({"type": "end"})
|
||||
yield f"data: {data}\n\n"
|
||||
@app.route("/stream", methods=['POST', 'GET'])
|
||||
def stream():
|
||||
# get parameter from url question
|
||||
question = request.args.get('question')
|
||||
history = request.args.get('history')
|
||||
# history to json object from string
|
||||
history = json.loads(history)
|
||||
|
||||
# check if active_docs is set
|
||||
|
||||
if not api_key_set:
|
||||
api_key = request.args.get("api_key")
|
||||
"""
|
||||
The frontend source code lives in the /frontend directory of the repository.
|
||||
"""
|
||||
if request.remote_addr in ('0.0.0.0', '127.0.0.1', 'localhost', '172.18.0.1'):
|
||||
# If users locally try to access DocsGPT running in Docker,
|
||||
# they will be redirected to the Frontend application.
|
||||
return redirect('http://localhost:5173')
|
||||
else:
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = request.args.get("embeddings_key")
|
||||
else:
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
if "active_docs" in request.args:
|
||||
vectorstore = get_vectorstore({"active_docs": request.args.get("active_docs")})
|
||||
else:
|
||||
vectorstore = ""
|
||||
docsearch = get_docsearch(vectorstore, embeddings_key)
|
||||
# Handle other cases or render the default page
|
||||
return 'Welcome to DocsGPT Backend!'
|
||||
|
||||
|
||||
#question = "Hi"
|
||||
return Response(complete_stream(question, docsearch,
|
||||
chat_history= history, api_key=api_key), mimetype='text/event-stream')
|
||||
|
||||
|
||||
@app.route("/api/answer", methods=["POST"])
|
||||
def api_answer():
|
||||
data = request.get_json()
|
||||
question = data["question"]
|
||||
history = data["history"]
|
||||
print('-' * 5)
|
||||
if not api_key_set:
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
|
||||
# use try and except to check for exception
|
||||
try:
|
||||
# check if the vectorstore is set
|
||||
vectorstore = get_vectorstore(data)
|
||||
# loading the index and the store and the prompt template
|
||||
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
|
||||
docsearch = get_docsearch(vectorstore, embeddings_key)
|
||||
|
||||
q_prompt = PromptTemplate(input_variables=["context", "question"], template=template_quest,
|
||||
template_format="jinja2")
|
||||
if settings.LLM_NAME == "openai_chat":
|
||||
llm = ChatOpenAI(openai_api_key=api_key) # optional parameter: model_name="gpt-4"
|
||||
messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)]
|
||||
if history:
|
||||
tokens_current_history = 0
|
||||
#count tokens in history
|
||||
history.reverse()
|
||||
for i in history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
|
||||
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append(HumanMessagePromptTemplate.from_template(i["prompt"]))
|
||||
messages_combine.append(AIMessagePromptTemplate.from_template(i["response"]))
|
||||
messages_combine.append(HumanMessagePromptTemplate.from_template("{question}"))
|
||||
import sys
|
||||
print(messages_combine, file=sys.stderr)
|
||||
p_chat_combine = ChatPromptTemplate.from_messages(messages_combine)
|
||||
elif settings.LLM_NAME == "openai":
|
||||
llm = OpenAI(openai_api_key=api_key, temperature=0)
|
||||
elif settings.LLM_NAME == "manifest":
|
||||
llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
|
||||
elif settings.LLM_NAME == "huggingface":
|
||||
llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
|
||||
elif settings.LLM_NAME == "cohere":
|
||||
llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
|
||||
elif settings.LLM_NAME == "gpt4all":
|
||||
llm = GPT4All(model=settings.MODEL_PATH)
|
||||
else:
|
||||
raise ValueError("unknown LLM model")
|
||||
|
||||
if settings.LLM_NAME == "openai_chat":
|
||||
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
|
||||
chain = ConversationalRetrievalChain(
|
||||
retriever=docsearch.as_retriever(k=2),
|
||||
question_generator=question_generator,
|
||||
combine_docs_chain=doc_chain,
|
||||
)
|
||||
chat_history = []
|
||||
# result = chain({"question": question, "chat_history": chat_history})
|
||||
# generate async with async generate method
|
||||
result = run_async_chain(chain, question, chat_history)
|
||||
elif settings.LLM_NAME == "gpt4all":
|
||||
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
|
||||
chain = ConversationalRetrievalChain(
|
||||
retriever=docsearch.as_retriever(k=2),
|
||||
question_generator=question_generator,
|
||||
combine_docs_chain=doc_chain,
|
||||
)
|
||||
chat_history = []
|
||||
# result = chain({"question": question, "chat_history": chat_history})
|
||||
# generate async with async generate method
|
||||
result = run_async_chain(chain, question, chat_history)
|
||||
|
||||
else:
|
||||
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
|
||||
combine_prompt=chat_combine_template, question_prompt=q_prompt)
|
||||
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=3)
|
||||
result = chain({"query": question})
|
||||
|
||||
print(result)
|
||||
|
||||
# some formatting for the frontend
|
||||
if "result" in result:
|
||||
result['answer'] = result['result']
|
||||
result['answer'] = result['answer'].replace("\\n", "\n")
|
||||
try:
|
||||
result['answer'] = result['answer'].split("SOURCES:")[0]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# mock result
|
||||
# result = {
|
||||
# "answer": "The answer is 42",
|
||||
# "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"]
|
||||
# }
|
||||
return result
|
||||
except Exception as e:
|
||||
# print whole traceback
|
||||
traceback.print_exc()
|
||||
print(str(e))
|
||||
return bad_request(500, str(e))
|
||||
|
||||
|
||||
@app.route("/api/docs_check", methods=["POST"])
|
||||
def check_docs():
|
||||
# check if docs exist in a vectorstore folder
|
||||
data = request.get_json()
|
||||
# split docs on / and take first part
|
||||
if data["docs"].split("/")[0] == "local":
|
||||
return {"status": 'exists'}
|
||||
vectorstore = "vectors/" + data["docs"]
|
||||
base_path = 'https://raw.githubusercontent.com/arc53/DocsHUB/main/'
|
||||
if os.path.exists(vectorstore) or data["docs"] == "default":
|
||||
return {"status": 'exists'}
|
||||
else:
|
||||
r = requests.get(base_path + vectorstore + "index.faiss")
|
||||
|
||||
if r.status_code != 200:
|
||||
return {"status": 'null'}
|
||||
else:
|
||||
if not os.path.exists(vectorstore):
|
||||
os.makedirs(vectorstore)
|
||||
with open(vectorstore + "index.faiss", "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
# download the store
|
||||
r = requests.get(base_path + vectorstore + "index.pkl")
|
||||
with open(vectorstore + "index.pkl", "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
return {"status": 'loaded'}
|
||||
|
||||
|
||||
@app.route("/api/feedback", methods=["POST"])
|
||||
def api_feedback():
|
||||
data = request.get_json()
|
||||
question = data["question"]
|
||||
answer = data["answer"]
|
||||
feedback = data["feedback"]
|
||||
|
||||
print('-' * 5)
|
||||
print("Question: " + question)
|
||||
print("Answer: " + answer)
|
||||
print("Feedback: " + feedback)
|
||||
print('-' * 5)
|
||||
response = requests.post(
|
||||
url="https://86x89umx77.execute-api.eu-west-2.amazonaws.com/docsgpt-feedback",
|
||||
headers={
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
},
|
||||
data=json.dumps({
|
||||
"answer": answer,
|
||||
"question": question,
|
||||
"feedback": feedback
|
||||
})
|
||||
)
|
||||
return {"status": http.client.responses.get(response.status_code, 'ok')}
|
||||
|
||||
|
||||
@app.route('/api/combine', methods=['GET'])
|
||||
def combined_json():
|
||||
user = 'local'
|
||||
"""Provide json file with combined available indexes."""
|
||||
# get json from https://d3dg1063dc54p9.cloudfront.net/combined.json
|
||||
|
||||
data = [{
|
||||
"name": 'default',
|
||||
"language": 'default',
|
||||
"version": '',
|
||||
"description": 'default',
|
||||
"fullName": 'default',
|
||||
"date": 'default',
|
||||
"docLink": 'default',
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local"
|
||||
}]
|
||||
# structure: name, language, version, description, fullName, date, docLink
|
||||
# append data from vectors_collection
|
||||
for index in vectors_collection.find({'user': user}):
|
||||
data.append({
|
||||
"name": index['name'],
|
||||
"language": index['language'],
|
||||
"version": '',
|
||||
"description": index['name'],
|
||||
"fullName": index['name'],
|
||||
"date": index['date'],
|
||||
"docLink": index['location'],
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local"
|
||||
})
|
||||
|
||||
data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
|
||||
for index in data_remote:
|
||||
index['location'] = "remote"
|
||||
data.append(index)
|
||||
|
||||
return jsonify(data)
|
||||
|
||||
|
||||
@app.route('/api/upload', methods=['POST'])
|
||||
def upload_file():
|
||||
"""Upload a file to get vectorized and indexed."""
|
||||
if 'user' not in request.form:
|
||||
return {"status": 'no user'}
|
||||
user = secure_filename(request.form['user'])
|
||||
if 'name' not in request.form:
|
||||
return {"status": 'no name'}
|
||||
job_name = secure_filename(request.form['name'])
|
||||
# check if the post request has the file part
|
||||
if 'file' not in request.files:
|
||||
print('No file part')
|
||||
return {"status": 'no file'}
|
||||
file = request.files['file']
|
||||
if file.filename == '':
|
||||
return {"status": 'no file name'}
|
||||
|
||||
if file:
|
||||
filename = secure_filename(file.filename)
|
||||
# save dir
|
||||
save_dir = os.path.join(app.config['UPLOAD_FOLDER'], user, job_name)
|
||||
# create dir if not exists
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
|
||||
file.save(os.path.join(save_dir, filename))
|
||||
task = ingest.delay('temp', [".rst", ".md", ".pdf", ".txt"], job_name, filename, user)
|
||||
# task id
|
||||
task_id = task.id
|
||||
return {"status": 'ok', "task_id": task_id}
|
||||
else:
|
||||
return {"status": 'error'}
|
||||
|
||||
|
||||
@app.route('/api/task_status', methods=['GET'])
|
||||
def task_status():
|
||||
"""Get celery job status."""
|
||||
task_id = request.args.get('task_id')
|
||||
task = AsyncResult(task_id)
|
||||
task_meta = task.info
|
||||
return {"status": task.status, "result": task_meta}
|
||||
|
||||
|
||||
### Backgound task api
|
||||
@app.route('/api/upload_index', methods=['POST'])
|
||||
def upload_index_files():
|
||||
"""Upload two files(index.faiss, index.pkl) to the user's folder."""
|
||||
if 'user' not in request.form:
|
||||
return {"status": 'no user'}
|
||||
user = secure_filename(request.form['user'])
|
||||
if 'name' not in request.form:
|
||||
return {"status": 'no name'}
|
||||
job_name = secure_filename(request.form['name'])
|
||||
if 'file_faiss' not in request.files:
|
||||
print('No file part')
|
||||
return {"status": 'no file'}
|
||||
file_faiss = request.files['file_faiss']
|
||||
if file_faiss.filename == '':
|
||||
return {"status": 'no file name'}
|
||||
if 'file_pkl' not in request.files:
|
||||
print('No file part')
|
||||
return {"status": 'no file'}
|
||||
file_pkl = request.files['file_pkl']
|
||||
if file_pkl.filename == '':
|
||||
return {"status": 'no file name'}
|
||||
|
||||
# saves index files
|
||||
save_dir = os.path.join('indexes', user, job_name)
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
file_faiss.save(os.path.join(save_dir, 'index.faiss'))
|
||||
file_pkl.save(os.path.join(save_dir, 'index.pkl'))
|
||||
# create entry in vectors_collection
|
||||
vectors_collection.insert_one({
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"location": save_dir,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": "local"
|
||||
})
|
||||
return {"status": 'ok'}
|
||||
|
||||
|
||||
@app.route('/api/download', methods=['get'])
|
||||
def download_file():
|
||||
user = secure_filename(request.args.get('user'))
|
||||
job_name = secure_filename(request.args.get('name'))
|
||||
filename = secure_filename(request.args.get('file'))
|
||||
save_dir = os.path.join(app.config['UPLOAD_FOLDER'], user, job_name)
|
||||
return send_from_directory(save_dir, filename, as_attachment=True)
|
||||
|
||||
|
||||
@app.route('/api/delete_old', methods=['get'])
|
||||
def delete_old():
|
||||
"""Delete old indexes."""
|
||||
import shutil
|
||||
path = request.args.get('path')
|
||||
dirs = path.split('/')
|
||||
dirs_clean = []
|
||||
for i in range(1, len(dirs)):
|
||||
dirs_clean.append(secure_filename(dirs[i]))
|
||||
# check that path strats with indexes or vectors
|
||||
if dirs[0] not in ['indexes', 'vectors']:
|
||||
return {"status": 'error'}
|
||||
path_clean = '/'.join(dirs)
|
||||
vectors_collection.delete_one({'location': path})
|
||||
try:
|
||||
shutil.rmtree(path_clean)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return {"status": 'ok'}
|
||||
|
||||
|
||||
# handling CORS
|
||||
@app.after_request
|
||||
def after_request(response):
|
||||
response.headers.add('Access-Control-Allow-Origin', '*')
|
||||
response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
|
||||
response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
|
||||
response.headers.add('Access-Control-Allow-Credentials', 'true')
|
||||
response.headers.add("Access-Control-Allow-Origin", "*")
|
||||
response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization")
|
||||
response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS")
|
||||
# response.headers.add("Access-Control-Allow-Credentials", "true")
|
||||
return response
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True, port=5001)
|
||||
app.run(debug=True, port=7091)
|
||||
|
||||
9
application/celery.py
Normal file
@@ -0,0 +1,9 @@
|
||||
from celery import Celery
|
||||
from application.core.settings import settings
|
||||
|
||||
def make_celery(app_name=__name__):
|
||||
celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND)
|
||||
celery.conf.update(settings)
|
||||
return celery
|
||||
|
||||
celery = make_celery()
|
||||
@@ -1,21 +1,36 @@
|
||||
from pathlib import Path
|
||||
import os
|
||||
|
||||
from pydantic import BaseSettings
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
LLM_NAME: str = "openai_chat"
|
||||
LLM_NAME: str = "openai"
|
||||
EMBEDDINGS_NAME: str = "openai_text-embedding-ada-002"
|
||||
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
|
||||
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
|
||||
MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
|
||||
MODEL_PATH: str = "./models/gpt4all-model.bin"
|
||||
MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
|
||||
TOKENS_MAX_HISTORY: int = 150
|
||||
UPLOAD_FOLDER: str = "inputs"
|
||||
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch"
|
||||
|
||||
API_URL: str = "http://localhost:5001" # backend url for celery worker
|
||||
API_URL: str = "http://localhost:7091" # backend url for celery worker
|
||||
|
||||
API_KEY: str = None # LLM api key
|
||||
EMBEDDINGS_KEY: str = None # api key for embeddings (if using openai, just copy API_KEY
|
||||
OPENAI_API_BASE: str = None # azure openai api base url
|
||||
OPENAI_API_VERSION: str = None # azure openai api version
|
||||
AZURE_DEPLOYMENT_NAME: str = None # azure deployment name for answering
|
||||
AZURE_EMBEDDINGS_DEPLOYMENT_NAME: str = None # azure deployment name for embeddings
|
||||
|
||||
# elasticsearch
|
||||
ELASTIC_CLOUD_ID: str = None # cloud id for elasticsearch
|
||||
ELASTIC_USERNAME: str = None # username for elasticsearch
|
||||
ELASTIC_PASSWORD: str = None # password for elasticsearch
|
||||
ELASTIC_URL: str = None # url for elasticsearch
|
||||
ELASTIC_INDEX: str = "docsgpt" # index name for elasticsearch
|
||||
|
||||
|
||||
path = Path(__file__).parent.parent.absolute()
|
||||
|
||||
0
application/llm/__init__.py
Normal file
14
application/llm/base.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BaseLLM(ABC):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def gen(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def gen_stream(self, *args, **kwargs):
|
||||
pass
|
||||
31
application/llm/huggingface.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from application.llm.base import BaseLLM
|
||||
|
||||
class HuggingFaceLLM(BaseLLM):
|
||||
|
||||
def __init__(self, api_key, llm_name='Arc53/DocsGPT-7B'):
|
||||
global hf
|
||||
|
||||
from langchain.llms import HuggingFacePipeline
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
||||
tokenizer = AutoTokenizer.from_pretrained(llm_name)
|
||||
model = AutoModelForCausalLM.from_pretrained(llm_name)
|
||||
pipe = pipeline(
|
||||
"text-generation", model=model,
|
||||
tokenizer=tokenizer, max_new_tokens=2000,
|
||||
device_map="auto", eos_token_id=tokenizer.eos_token_id
|
||||
)
|
||||
hf = HuggingFacePipeline(pipeline=pipe)
|
||||
|
||||
def gen(self, model, engine, messages, stream=False, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
|
||||
result = hf(prompt)
|
||||
|
||||
return result.content
|
||||
|
||||
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
||||
|
||||
raise NotImplementedError("HuggingFaceLLM Streaming is not implemented yet.")
|
||||
|
||||
39
application/llm/llama_cpp.py
Normal file
@@ -0,0 +1,39 @@
|
||||
from application.llm.base import BaseLLM
|
||||
from application.core.settings import settings
|
||||
|
||||
class LlamaCpp(BaseLLM):
|
||||
|
||||
def __init__(self, api_key, llm_name=settings.MODEL_PATH, **kwargs):
|
||||
global llama
|
||||
try:
|
||||
from llama_cpp import Llama
|
||||
except ImportError:
|
||||
raise ImportError("Please install llama_cpp using pip install llama-cpp-python")
|
||||
|
||||
llama = Llama(model_path=llm_name, n_ctx=2048)
|
||||
|
||||
def gen(self, model, engine, messages, stream=False, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
|
||||
result = llama(prompt, max_tokens=150, echo=False)
|
||||
|
||||
# import sys
|
||||
# print(result['choices'][0]['text'].split('### Answer \n')[-1], file=sys.stderr)
|
||||
|
||||
return result['choices'][0]['text'].split('### Answer \n')[-1]
|
||||
|
||||
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
|
||||
result = llama(prompt, max_tokens=150, echo=False, stream=stream)
|
||||
|
||||
# import sys
|
||||
# print(list(result), file=sys.stderr)
|
||||
|
||||
for item in result:
|
||||
for choice in item['choices']:
|
||||
yield choice['text']
|
||||
22
application/llm/llm_creator.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from application.llm.openai import OpenAILLM, AzureOpenAILLM
|
||||
from application.llm.sagemaker import SagemakerAPILLM
|
||||
from application.llm.huggingface import HuggingFaceLLM
|
||||
from application.llm.llama_cpp import LlamaCpp
|
||||
|
||||
|
||||
|
||||
class LLMCreator:
|
||||
llms = {
|
||||
'openai': OpenAILLM,
|
||||
'azure_openai': AzureOpenAILLM,
|
||||
'sagemaker': SagemakerAPILLM,
|
||||
'huggingface': HuggingFaceLLM,
|
||||
'llama.cpp': LlamaCpp
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create_llm(cls, type, *args, **kwargs):
|
||||
llm_class = cls.llms.get(type.lower())
|
||||
if not llm_class:
|
||||
raise ValueError(f"No LLM class found for type {type}")
|
||||
return llm_class(*args, **kwargs)
|
||||
57
application/llm/openai.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from application.llm.base import BaseLLM
|
||||
from application.core.settings import settings
|
||||
|
||||
class OpenAILLM(BaseLLM):
|
||||
|
||||
def __init__(self, api_key):
|
||||
global openai
|
||||
import openai
|
||||
openai.api_key = api_key
|
||||
self.api_key = api_key # Save the API key to be used later
|
||||
|
||||
def _get_openai(self):
|
||||
# Import openai when needed
|
||||
import openai
|
||||
# Set the API key every time you import openai
|
||||
openai.api_key = self.api_key
|
||||
return openai
|
||||
|
||||
def gen(self, model, engine, messages, stream=False, **kwargs):
|
||||
response = openai.ChatCompletion.create(
|
||||
model=model,
|
||||
engine=engine,
|
||||
messages=messages,
|
||||
stream=stream,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
return response["choices"][0]["message"]["content"]
|
||||
|
||||
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
||||
response = openai.ChatCompletion.create(
|
||||
model=model,
|
||||
engine=engine,
|
||||
messages=messages,
|
||||
stream=stream,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
for line in response:
|
||||
if "content" in line["choices"][0]["delta"]:
|
||||
yield line["choices"][0]["delta"]["content"]
|
||||
|
||||
|
||||
class AzureOpenAILLM(OpenAILLM):
|
||||
|
||||
def __init__(self, openai_api_key, openai_api_base, openai_api_version, deployment_name):
|
||||
super().__init__(openai_api_key)
|
||||
self.api_base = settings.OPENAI_API_BASE,
|
||||
self.api_version = settings.OPENAI_API_VERSION,
|
||||
self.deployment_name = settings.AZURE_DEPLOYMENT_NAME,
|
||||
|
||||
def _get_openai(self):
|
||||
openai = super()._get_openai()
|
||||
openai.api_base = self.api_base
|
||||
openai.api_version = self.api_version
|
||||
openai.api_type = "azure"
|
||||
return openai
|
||||
27
application/llm/sagemaker.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from application.llm.base import BaseLLM
|
||||
from application.core.settings import settings
|
||||
import requests
|
||||
import json
|
||||
|
||||
class SagemakerAPILLM(BaseLLM):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.url = settings.SAGEMAKER_API_URL
|
||||
|
||||
def gen(self, model, engine, messages, stream=False, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
|
||||
response = requests.post(
|
||||
url=self.url,
|
||||
headers={
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
},
|
||||
data=json.dumps({"input": prompt})
|
||||
)
|
||||
|
||||
return response.json()['answer']
|
||||
|
||||
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
||||
raise NotImplementedError("Sagemaker does not support streaming")
|
||||
1
application/parser/file/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
@@ -3,7 +3,7 @@ from abc import abstractmethod
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
from parser.schema.base import Document
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
class BaseReader:
|
||||
|
||||
@@ -3,15 +3,15 @@ import logging
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
from parser.file.base import BaseReader
|
||||
from parser.file.base_parser import BaseParser
|
||||
from parser.file.docs_parser import DocxParser, PDFParser
|
||||
from parser.file.epub_parser import EpubParser
|
||||
from parser.file.html_parser import HTMLParser
|
||||
from parser.file.markdown_parser import MarkdownParser
|
||||
from parser.file.rst_parser import RstParser
|
||||
from parser.file.tabular_parser import PandasCSVParser
|
||||
from parser.schema.base import Document
|
||||
from application.parser.file.base import BaseReader
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
from application.parser.file.docs_parser import DocxParser, PDFParser
|
||||
from application.parser.file.epub_parser import EpubParser
|
||||
from application.parser.file.html_parser import HTMLParser
|
||||
from application.parser.file.markdown_parser import MarkdownParser
|
||||
from application.parser.file.rst_parser import RstParser
|
||||
from application.parser.file.tabular_parser import PandasCSVParser
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".pdf": PDFParser(),
|
||||
|
||||
@@ -6,7 +6,7 @@ Contains parsers for docx, pdf files.
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class PDFParser(BaseParser):
|
||||
|
||||
@@ -6,7 +6,7 @@ Contains parsers for epub files.
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class EpubParser(BaseParser):
|
||||
|
||||
@@ -7,7 +7,7 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class HTMLParser(BaseParser):
|
||||
@@ -69,10 +69,10 @@ class HTMLParser(BaseParser):
|
||||
Chunks.append([])
|
||||
Chunks[-1].append(isd_el['text'])
|
||||
|
||||
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25
|
||||
# Removing all the chunks with sum of length of all the strings in the chunk < 25
|
||||
# TODO: This value can be an user defined variable
|
||||
for chunk in Chunks:
|
||||
# sum of lenth of all the strings in the chunk
|
||||
# sum of length of all the strings in the chunk
|
||||
sum = 0
|
||||
sum += len(str(chunk))
|
||||
if sum < 25:
|
||||
|
||||
@@ -8,7 +8,7 @@ from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
import tiktoken
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class MarkdownParser(BaseParser):
|
||||
|
||||
@@ -7,7 +7,7 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class RstParser(BaseParser):
|
||||
@@ -27,7 +27,7 @@ class RstParser(BaseParser):
|
||||
remove_interpreters: bool = True,
|
||||
remove_directives: bool = True,
|
||||
remove_whitespaces_excess: bool = True,
|
||||
# Be carefull with remove_characters_excess, might cause data loss
|
||||
# Be careful with remove_characters_excess, might cause data loss
|
||||
remove_characters_excess: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
|
||||
@@ -6,7 +6,7 @@ Contains parsers for tabular data files.
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class CSVParser(BaseParser):
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
import os
|
||||
|
||||
import tiktoken
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.vectorstores import FAISS
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
from application.core.settings import settings
|
||||
from retry import retry
|
||||
|
||||
|
||||
@@ -33,12 +33,23 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
os.makedirs(f"{folder_name}")
|
||||
|
||||
from tqdm import tqdm
|
||||
docs_test = [docs[0]]
|
||||
docs.pop(0)
|
||||
c1 = 0
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
docs_init = [docs[0]]
|
||||
docs.pop(0)
|
||||
|
||||
store = FAISS.from_documents(docs_test, OpenAIEmbeddings(openai_api_key=os.getenv("EMBEDDINGS_KEY")))
|
||||
|
||||
store = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE,
|
||||
docs_init = docs_init,
|
||||
path=f"{folder_name}",
|
||||
embeddings_key=os.getenv("EMBEDDINGS_KEY")
|
||||
)
|
||||
else:
|
||||
store = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE,
|
||||
path=f"{folder_name}",
|
||||
embeddings_key=os.getenv("EMBEDDINGS_KEY")
|
||||
)
|
||||
# Uncomment for MPNet embeddings
|
||||
# model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
# hf = HuggingFaceEmbeddings(model_name=model_name)
|
||||
@@ -57,7 +68,8 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
store.save_local(f"{folder_name}")
|
||||
break
|
||||
c1 += 1
|
||||
store.save_local(f"{folder_name}")
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
store.save_local(f"{folder_name}")
|
||||
|
||||
|
||||
def get_user_permission(docs, folder_name):
|
||||
|
||||
1
application/parser/schema/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
from parser.schema.schema import BaseDocument
|
||||
from application.parser.schema.schema import BaseDocument
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -3,7 +3,7 @@ from math import ceil
|
||||
from typing import List
|
||||
|
||||
import tiktoken
|
||||
from parser.schema.base import Document
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
def separate_header_and_body(text):
|
||||
@@ -25,7 +25,7 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
|
||||
extra_info=doc.extra_info)
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(
|
||||
current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
|
||||
current_group.text)) + doc_len < max_tokens and doc_len < min_tokens:
|
||||
current_group.text += " " + doc.text
|
||||
else:
|
||||
docs.append(current_group)
|
||||
@@ -46,6 +46,9 @@ def split_documents(documents: List[Document], max_tokens: int) -> List[Document
|
||||
docs.append(doc)
|
||||
else:
|
||||
header, body = separate_header_and_body(doc.text)
|
||||
if len(tiktoken.get_encoding("cl100k_base").encode(header)) > max_tokens:
|
||||
body = doc.text
|
||||
header = ""
|
||||
num_body_parts = ceil(token_length / max_tokens)
|
||||
part_length = ceil(len(body) / num_body_parts)
|
||||
body_parts = [body[i:i + part_length] for i in range(0, len(body), part_length)]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
aiodns==3.0.0
|
||||
aiohttp==3.8.4
|
||||
aiohttp==3.8.5
|
||||
aiohttp-retry==2.8.3
|
||||
aiosignal==1.3.1
|
||||
aleph-alpha-client==2.16.1
|
||||
@@ -8,40 +8,40 @@ async-timeout==4.0.2
|
||||
attrs==22.2.0
|
||||
billiard==3.6.4.0
|
||||
blobfile==2.0.1
|
||||
boto3==1.26.102
|
||||
botocore==1.29.102
|
||||
boto3==1.28.20
|
||||
celery==5.2.7
|
||||
cffi==1.15.1
|
||||
charset-normalizer==3.1.0
|
||||
click==8.1.3
|
||||
click-didyoumean==0.3.0
|
||||
click-plugins==1.1.1
|
||||
click-repl==0.2.0
|
||||
cryptography==39.0.2
|
||||
cryptography==41.0.3
|
||||
dataclasses-json==0.5.7
|
||||
decorator==5.1.1
|
||||
deeplake==3.2.13
|
||||
dill==0.3.6
|
||||
dnspython==2.3.0
|
||||
ecdsa==0.18.0
|
||||
elasticsearch==8.9.0
|
||||
entrypoints==0.4
|
||||
faiss-cpu==1.7.3
|
||||
filelock==3.9.0
|
||||
Flask==2.2.3
|
||||
Flask==2.2.5
|
||||
Flask-Cors==3.0.10
|
||||
frozenlist==1.3.3
|
||||
geojson==2.5.0
|
||||
gunicorn==20.1.0
|
||||
greenlet==2.0.2
|
||||
gpt4all==0.1.7
|
||||
hub==3.0.1
|
||||
huggingface-hub==0.12.1
|
||||
humbug==0.2.8
|
||||
huggingface-hub==0.15.1
|
||||
humbug==0.3.2
|
||||
idna==3.4
|
||||
itsdangerous==2.1.2
|
||||
Jinja2==3.1.2
|
||||
jmespath==1.0.1
|
||||
joblib==1.2.0
|
||||
kombu==5.2.4
|
||||
langchain==0.0.179
|
||||
langchain==0.0.263
|
||||
loguru==0.6.0
|
||||
lxml==4.9.2
|
||||
MarkupSafe==2.1.2
|
||||
@@ -52,10 +52,11 @@ multidict==6.0.4
|
||||
multiprocess==0.70.14
|
||||
mypy-extensions==1.0.0
|
||||
networkx==3.0
|
||||
npx
|
||||
nltk==3.8.1
|
||||
numcodecs==0.11.0
|
||||
numpy==1.24.2
|
||||
openai==0.27.0
|
||||
openai==0.27.8
|
||||
packaging==23.0
|
||||
pathos==0.3.0
|
||||
Pillow==9.4.0
|
||||
@@ -67,12 +68,14 @@ pyasn1==0.4.8
|
||||
pycares==4.3.0
|
||||
pycparser==2.21
|
||||
pycryptodomex==3.17
|
||||
pycryptodome==3.19.0
|
||||
pydantic==1.10.5
|
||||
PyJWT==2.6.0
|
||||
pymongo==4.3.3
|
||||
pyowm==3.3.0
|
||||
PyPDF2==3.0.1
|
||||
PySocks==1.7.1
|
||||
pytest
|
||||
python-dateutil==2.8.2
|
||||
python-dotenv==1.0.0
|
||||
python-jose==3.3.0
|
||||
@@ -80,23 +83,20 @@ pytz==2022.7.1
|
||||
PyYAML==6.0
|
||||
redis==4.5.4
|
||||
regex==2022.10.31
|
||||
requests==2.28.2
|
||||
requests==2.31.0
|
||||
retry==0.9.2
|
||||
rsa==4.9
|
||||
s3transfer==0.6.0
|
||||
scikit-learn==1.2.2
|
||||
scipy==1.10.1
|
||||
sentence-transformers==2.2.2
|
||||
sentencepiece==0.1.97
|
||||
sentencepiece
|
||||
six==1.16.0
|
||||
SQLAlchemy==1.4.46
|
||||
sympy==1.11.1
|
||||
tenacity==8.2.2
|
||||
threadpoolctl==3.1.0
|
||||
torch==2.0.0
|
||||
torchvision==0.15.1
|
||||
tiktoken
|
||||
tqdm==4.65.0
|
||||
transformers==4.27.2
|
||||
transformers==4.30.0
|
||||
typer==0.7.0
|
||||
typing-inspect==0.8.0
|
||||
typing_extensions==4.5.0
|
||||
|
||||
|
Before Width: | Height: | Size: 37 KiB |
|
Before Width: | Height: | Size: 352 KiB |
|
Before Width: | Height: | Size: 34 KiB |
|
Before Width: | Height: | Size: 631 B |
|
Before Width: | Height: | Size: 1.7 KiB |
|
Before Width: | Height: | Size: 15 KiB |
@@ -1 +0,0 @@
|
||||
{"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"}
|
||||
@@ -1,19 +0,0 @@
|
||||
function resetApiKey() {
|
||||
const modal = document.getElementById("modal");
|
||||
modal.classList.toggle("hidden");
|
||||
}
|
||||
|
||||
const apiKeyForm = document.getElementById("api-key-form");
|
||||
if (apiKeyForm) {
|
||||
apiKeyForm.addEventListener("submit", function(event) {
|
||||
event.preventDefault();
|
||||
|
||||
const apiKeyInput = document.getElementById("api-key-input");
|
||||
const apiKey = apiKeyInput.value;
|
||||
|
||||
localStorage.setItem("apiKey", apiKey);
|
||||
|
||||
apiKeyInput.value = "";
|
||||
modal.classList.toggle("hidden");
|
||||
});
|
||||
}
|
||||
@@ -1,76 +0,0 @@
|
||||
var form = document.getElementById('message-form');
|
||||
var errorModal = document.getElementById('error-alert')
|
||||
document.getElementById('close').addEventListener('click',()=>{
|
||||
errorModal.classList.toggle('hidden')
|
||||
})
|
||||
|
||||
|
||||
function submitForm(event){
|
||||
event.preventDefault()
|
||||
var message = document.getElementById("message-input").value;
|
||||
console.log(message.length)
|
||||
if(message.length === 0){
|
||||
return
|
||||
}
|
||||
msg_html = '<div class="bg-blue-500 text-white p-2 rounded-lg mb-2 self-end"><p class="text-sm">'
|
||||
msg_html += message
|
||||
msg_html += '</p></div>'
|
||||
document.getElementById("messages").innerHTML += msg_html;
|
||||
let chatWindow = document.getElementById("messages-container");
|
||||
chatWindow.scrollTop = chatWindow.scrollHeight;
|
||||
document.getElementById("message-input").value = "";
|
||||
document.getElementById("button-submit").innerHTML = '<i class="fa fa-circle-o-notch fa-spin"></i> Thinking...';
|
||||
document.getElementById("button-submit").disabled = true;
|
||||
if (localStorage.getItem('activeDocs') == null) {
|
||||
localStorage.setItem('activeDocs', 'default')
|
||||
}
|
||||
|
||||
|
||||
fetch('/api/answer', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
|
||||
body: JSON.stringify({question: message,
|
||||
api_key: localStorage.getItem('apiKey'),
|
||||
embeddings_key: localStorage.getItem('apiKey'),
|
||||
history: localStorage.getItem('chatHistory'),
|
||||
active_docs: localStorage.getItem('activeDocs')}),
|
||||
}).then((response)=> response.json())
|
||||
.then(data => {
|
||||
console.log('Success:', data);
|
||||
if(data.error){
|
||||
document.getElementById('text-error').textContent = `Error : ${JSON.stringify(data.message)}`
|
||||
errorModal.classList.toggle('hidden')
|
||||
}
|
||||
if(data.answer){
|
||||
msg_html = '<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start"><code class="text-sm">'
|
||||
data.answer = data.answer.replace(/\n/g, "<br>");
|
||||
msg_html += data.answer
|
||||
msg_html += '</code></div>'
|
||||
document.getElementById("messages").innerHTML += msg_html;
|
||||
let chatWindow = document.getElementById("messages-container");
|
||||
chatWindow.scrollTop = chatWindow.scrollHeight;
|
||||
}
|
||||
document.getElementById("button-submit").innerHTML = 'Send';
|
||||
document.getElementById("button-submit").disabled = false;
|
||||
let chatHistory = [message, data.answer || ''];
|
||||
localStorage.setItem('chatHistory', JSON.stringify(chatHistory));
|
||||
|
||||
|
||||
|
||||
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Error:', error);
|
||||
// console.log(error);
|
||||
// document.getElementById("button-submit").innerHTML = 'Send';
|
||||
// document.getElementById("button-submit").disabled = false;
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
//window.addEventListener('submit',submitForm)
|
||||
// rewrite using id = button-submit
|
||||
document.getElementById("button-submit").addEventListener('click',submitForm)
|
||||
@@ -1,15 +0,0 @@
|
||||
document.getElementById("select-docs").addEventListener("change", function() {
|
||||
localStorage.setItem('activeDocs', this.value)
|
||||
fetch('/api/docs_check', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({docs: this.value}),
|
||||
}).then(response => response.json()).then(
|
||||
data => {
|
||||
console.log('Success:', data);
|
||||
}
|
||||
)
|
||||
});
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
|
||||
|
||||
|
||||
|
||||
@media screen and (max-width: 1024px) {
|
||||
.text-lg {
|
||||
font-size: 3.125rem;
|
||||
margin: 2rem;
|
||||
line-height: inherit;
|
||||
}
|
||||
.text-sm {
|
||||
font-size: 2.5rem;
|
||||
margin: 1.5rem;
|
||||
line-height: inherit;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
.loader {
|
||||
border: 16px solid #f3f3f3; /* Light grey */
|
||||
border-top: 16px solid #3498db; /* Blue */
|
||||
border-radius: 50%;
|
||||
width: 120px;
|
||||
height: 120px;
|
||||
animation: spin 2s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
0% { transform: rotate(0deg); }
|
||||
100% { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
|
||||
@@ -1,215 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>DocsGPT 🦖 Preview</title>
|
||||
<link href="{{url_for('static',filename='dist/css/output.css')}}" rel="stylesheet">
|
||||
<link rel="favicon" href="{{ url_for('static', filename='favicon/favicon.ico') }}">
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="{{ url_for('static', filename='favicon/apple-touch-icon.png') }}">
|
||||
<link rel="icon" type="image/png" sizes="32x32" href="{{ url_for('static', filename='favicon/favicon-32x32.png') }}">
|
||||
<link rel="icon" type="image/png" sizes="16x16" href="{{ url_for('static', filename='favicon/favicon-16x16.png') }}">
|
||||
<link rel="manifest" href="{{ url_for('static', filename='favicon//site.webmanifest') }}">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
|
||||
|
||||
|
||||
|
||||
</head>
|
||||
|
||||
|
||||
<body>
|
||||
|
||||
|
||||
|
||||
<header class="bg-white p-2 flex justify-between items-center">
|
||||
<h1 class="text-lg font-medium">DocsGPT 🦖 Preview</h1>
|
||||
<div>
|
||||
<a href="https://github.com/arc53/docsgpt" class="text-blue-500 hover:text-blue-800 text-sm">About</a>
|
||||
{% if not api_key_set %}
|
||||
<button class="text-sm text-yellow-500 hover:text-yellow-800" onclick="resetApiKey()">Reset Key</button>
|
||||
{% endif %}
|
||||
</div>
|
||||
</header>
|
||||
|
||||
|
||||
<!-- Alert Info -->
|
||||
<div class="border flex justify-between
|
||||
w-auto px-4 py-3 rounded relative
|
||||
hidden" style="background-color: rgb(197, 51, 51);color: white;" id="error-alert" role="alert">
|
||||
<span class="block sm:inline" id="text-error"></span>
|
||||
<strong class="text-xl align-center alert-del" style="cursor: pointer;" id="close">×</strong>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="lg:flex ml-2 mr-2">
|
||||
<div class="lg:w-3/4 min-h-screen max-h-screen">
|
||||
<div class="w-full flex flex-col h-5/6">
|
||||
<div id="messages-container" style="overflow: auto;" class="sm:max-lg:mb-[12rem]">
|
||||
|
||||
<div id="messages" class="w-full flex flex-col mt-2" >
|
||||
<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
|
||||
<p class="text-sm">Hello, ask me anything about this library. Im here to help</p>
|
||||
</div>
|
||||
<div class="bg-blue-500 text-white p-2 rounded-lg mb-2 self-end">
|
||||
<p class="text-sm">How to merge tables?</p>
|
||||
</div>
|
||||
<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
|
||||
<p class="text-sm">To merge two tables in pandas, you can use the pd.merge() function. The basic syntax is:<br>
|
||||
pd.merge(left, right, on, how)<br>
|
||||
where left and right are the two tables to merge, on is the column to merge on, and how is the type of merge to perform.<br>
|
||||
For example, to merge the two tables df1 and df2 on the column 'key', you can use:<br>
|
||||
pd.merge(df1, df2, on='key', how='left')<br>
|
||||
This will return a new DataFrame with all the columns from both tables, and only the rows that match the 'key' column. </p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="fixed bottom-0 w-full mt-4 mb-2 lg:w-3/4">
|
||||
<form id="message-form" autocomplete="off" class="flex items-stretch">
|
||||
<input autocomplete="off" id="message-input" class="bg-white p-2 rounded-lg ml-2 text-sm w-full" type="text" placeholder="Type your message here...">
|
||||
<button id="button-submit" class="bg-blue-500 text-white p-2 rounded-lg ml-2 mr-2 text-sm sm:max-lg:p-5" type="submit">Send</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<div class="lg:w-1/4 p-2 sm:max-lg:hidden">
|
||||
<p class="text-sm">This is a chatbot that uses the GPT-3, Faiss and <a href="https://github.com/hwchase17/langchain" class="text-blue-500 hover:text-blue-800">LangChain</a> to answer questions</p>
|
||||
<br>
|
||||
<p class="text-sm">The source code is available on <a href="https://github.com/arc53/docsgpt" class="text-blue-500 hover:text-blue-800">Github</a></p><br>
|
||||
<p class="text-sm">Currently It uses python pandas documentation, so it will respond to information relevant to pandas. If you want to train it on different documentation - <a href="https://github.com/arc53/docsgpt/wiki/How-to-train-on-other-documentation" class="text-blue-500 hover:text-blue-800"> please follow this guide </a></p><br>
|
||||
<p class="text-sm">If you want to launch it on your own server - <a href="https://github.com/arc53/docsgpt/wiki/How-to-train-on-other-documentation" class="text-blue-500 hover:text-blue-800"> follow this guide </a></p><br>
|
||||
<label class="block mb-2 text-sm font-medium text-gray-900">Select documentation from DocsHUB</label>
|
||||
<select id="select-docs" class="bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5">
|
||||
<option selected value="default">Choose documentation</option>
|
||||
<option value="default">Default</option>
|
||||
</select>
|
||||
<form action="/api/upload" method="post" enctype="multipart/form-data" class="mt-2">
|
||||
<input type="file" name="file" class="py-4" id="file-upload">
|
||||
<input type="text" name="user" value="local" hidden>
|
||||
<input type="text" name="name" placeholder="Name:">
|
||||
|
||||
|
||||
<button type="submit" class="py-2 px-4 text-white bg-blue-500 rounded-md hover:bg-blue-600 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500">
|
||||
Upload
|
||||
</button>
|
||||
</form>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="flex items-center justify-center h-full">
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
|
||||
{% if not api_key_set %}
|
||||
|
||||
<div class="fixed z-10 overflow-y-auto top-0 w-full left-0 show" id="modal">
|
||||
<div class="flex items-center justify-center min-height-100vh pt-4 px-4 pb-20 text-center sm:block sm:p-0">
|
||||
<div class="fixed inset-0 transition-opacity">
|
||||
<div class="absolute inset-0 bg-gray-900 opacity-75" />
|
||||
</div>
|
||||
<span class="hidden sm:inline-block sm:align-middle sm:h-screen">​</span>
|
||||
<div class=" text-sm inline-block align-center bg-white rounded-lg text-left overflow-hidden shadow-xl transform transition-all sm:my-8 sm:align-middle sm:max-w-lg sm:w-full" role="dialog" aria-modal="true" aria-labelledby="modal-headline">
|
||||
<form id="api-key-form">
|
||||
<div class="bg-white px-4 pt-5 pb-4 sm:p-6 sm:pb-4">
|
||||
<h2>Before you can start using DocsGPT we need you to provide an API key for llm. Currently, we support only OpenAI but soon many more. You can find it <a class="text-blue-500 hover:text-blue-800" href="https://platform.openai.com/account/api-keys">here</a></h2><br>
|
||||
<label>OpenAI API key:</label>
|
||||
|
||||
<input id="api-key-input" type="password" class="w-full bg-gray-100 p-2 mt-2 mb-3" placeholder="Paste you Api Key here">
|
||||
|
||||
</div>
|
||||
<div class="bg-gray-200 px-4 py-3 text-right">
|
||||
<button type="submit" class="py-2 px-4 bg-blue-500 text-white rounded hover:bg-blue-700 mr-2">Save</button>
|
||||
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
function docsIndex() {
|
||||
// loads latest index from https://raw.githubusercontent.com/arc53/DocsHUB/main/combined.json
|
||||
// and stores it in localStorage
|
||||
fetch('/api/combine')
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
localStorage.setItem("docsIndex", JSON.stringify(data));
|
||||
localStorage.setItem("docsIndexDate", Date.now());
|
||||
generateOptions()
|
||||
}
|
||||
|
||||
)
|
||||
|
||||
}
|
||||
function generateOptions(){
|
||||
docsIndex = localStorage.getItem('docsIndex')
|
||||
// create option on select with id select-docs
|
||||
var select = document.getElementById("select-docs");
|
||||
// convert docsIndex to json
|
||||
docsIndex = JSON.parse(docsIndex)
|
||||
// create option for each key in docsIndex
|
||||
for (var key in docsIndex) {
|
||||
var option = document.createElement("option");
|
||||
if (docsIndex[key].location == 'docshub'){
|
||||
if (docsIndex[key].name == docsIndex[key].language) {
|
||||
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
||||
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
|
||||
if (docsIndex[key].model == "{{ embeddings_choice }}") {
|
||||
select.add(option);
|
||||
}
|
||||
}
|
||||
else {
|
||||
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
||||
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
|
||||
if (docsIndex[key].model == "{{ embeddings_choice }}") {
|
||||
select.add(option);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
option.text = docsIndex[key].name;
|
||||
option.value = docsIndex[key].location + "/" + docsIndex[key].name;
|
||||
select.add(option);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
{% if not api_key_set %}
|
||||
if (localStorage.getItem('apiKey') === null) {
|
||||
console.log("apiKey is not set")
|
||||
document.getElementById('modal').classList.toggle('hidden')
|
||||
}
|
||||
{% endif %}
|
||||
if (localStorage.getItem('docsIndex') === null) {
|
||||
console.log("docsIndex is not set")
|
||||
docsIndex()
|
||||
}
|
||||
else if (localStorage.getItem("docsIndexDate") < Date.now() - 900000) {
|
||||
console.log("docsIndex is older than 15 minutes")
|
||||
docsIndex()
|
||||
}
|
||||
|
||||
generateOptions()
|
||||
|
||||
</script>
|
||||
{% if not api_key_set %}
|
||||
<script src="{{url_for('static',filename='src/authapi.js')}}"></script>
|
||||
{% endif %}
|
||||
<script src="{{url_for('static',filename='src/chat.js')}}"></script>
|
||||
<script src="{{url_for('static',filename='src/choiceChange.js')}}"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
0
application/vectorstore/__init__.py
Normal file
51
application/vectorstore/base.py
Normal file
@@ -0,0 +1,51 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import os
|
||||
from langchain.embeddings import (
|
||||
OpenAIEmbeddings,
|
||||
HuggingFaceEmbeddings,
|
||||
CohereEmbeddings,
|
||||
HuggingFaceInstructEmbeddings,
|
||||
)
|
||||
from application.core.settings import settings
|
||||
|
||||
class BaseVectorStore(ABC):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def search(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def is_azure_configured(self):
|
||||
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
|
||||
|
||||
def _get_embeddings(self, embeddings_name, embeddings_key=None):
|
||||
embeddings_factory = {
|
||||
"openai_text-embedding-ada-002": OpenAIEmbeddings,
|
||||
"huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings,
|
||||
"huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings,
|
||||
"cohere_medium": CohereEmbeddings
|
||||
}
|
||||
|
||||
if embeddings_name not in embeddings_factory:
|
||||
raise ValueError(f"Invalid embeddings_name: {embeddings_name}")
|
||||
|
||||
if embeddings_name == "openai_text-embedding-ada-002":
|
||||
if self.is_azure_configured():
|
||||
os.environ["OPENAI_API_TYPE"] = "azure"
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME
|
||||
)
|
||||
else:
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
openai_api_key=embeddings_key
|
||||
)
|
||||
elif embeddings_name == "cohere_medium":
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
cohere_api_key=embeddings_key
|
||||
)
|
||||
else:
|
||||
embedding_instance = embeddings_factory[embeddings_name]()
|
||||
|
||||
return embedding_instance
|
||||
|
||||
221
application/vectorstore/elasticsearch.py
Normal file
@@ -0,0 +1,221 @@
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from application.core.settings import settings
|
||||
import elasticsearch
|
||||
|
||||
class Document(str):
|
||||
"""Class for storing a piece of text and associated metadata."""
|
||||
|
||||
def __new__(cls, page_content: str, metadata: dict):
|
||||
instance = super().__new__(cls, page_content)
|
||||
instance.page_content = page_content
|
||||
instance.metadata = metadata
|
||||
return instance
|
||||
|
||||
|
||||
|
||||
|
||||
class ElasticsearchStore(BaseVectorStore):
|
||||
_es_connection = None # Class attribute to hold the Elasticsearch connection
|
||||
|
||||
def __init__(self, path, embeddings_key, index_name=settings.ELASTIC_INDEX):
|
||||
super().__init__()
|
||||
self.path = path.replace("application/indexes/", "").rstrip("/")
|
||||
self.embeddings_key = embeddings_key
|
||||
self.index_name = index_name
|
||||
|
||||
if ElasticsearchStore._es_connection is None:
|
||||
connection_params = {}
|
||||
if settings.ELASTIC_URL:
|
||||
connection_params["hosts"] = [settings.ELASTIC_URL]
|
||||
connection_params["http_auth"] = (settings.ELASTIC_USERNAME, settings.ELASTIC_PASSWORD)
|
||||
elif settings.ELASTIC_CLOUD_ID:
|
||||
connection_params["cloud_id"] = settings.ELASTIC_CLOUD_ID
|
||||
connection_params["basic_auth"] = (settings.ELASTIC_USERNAME, settings.ELASTIC_PASSWORD)
|
||||
else:
|
||||
raise ValueError("Please provide either elasticsearch_url or cloud_id.")
|
||||
|
||||
|
||||
|
||||
ElasticsearchStore._es_connection = elasticsearch.Elasticsearch(**connection_params)
|
||||
|
||||
self.docsearch = ElasticsearchStore._es_connection
|
||||
|
||||
def connect_to_elasticsearch(
|
||||
*,
|
||||
es_url = None,
|
||||
cloud_id = None,
|
||||
api_key = None,
|
||||
username = None,
|
||||
password = None,
|
||||
):
|
||||
try:
|
||||
import elasticsearch
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import elasticsearch python package. "
|
||||
"Please install it with `pip install elasticsearch`."
|
||||
)
|
||||
|
||||
if es_url and cloud_id:
|
||||
raise ValueError(
|
||||
"Both es_url and cloud_id are defined. Please provide only one."
|
||||
)
|
||||
|
||||
connection_params = {}
|
||||
|
||||
if es_url:
|
||||
connection_params["hosts"] = [es_url]
|
||||
elif cloud_id:
|
||||
connection_params["cloud_id"] = cloud_id
|
||||
else:
|
||||
raise ValueError("Please provide either elasticsearch_url or cloud_id.")
|
||||
|
||||
if api_key:
|
||||
connection_params["api_key"] = api_key
|
||||
elif username and password:
|
||||
connection_params["basic_auth"] = (username, password)
|
||||
|
||||
es_client = elasticsearch.Elasticsearch(
|
||||
**connection_params,
|
||||
)
|
||||
try:
|
||||
es_client.info()
|
||||
except Exception as e:
|
||||
raise e
|
||||
|
||||
return es_client
|
||||
|
||||
def search(self, question, k=2, index_name=settings.ELASTIC_INDEX, *args, **kwargs):
|
||||
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key)
|
||||
vector = embeddings.embed_query(question)
|
||||
knn = {
|
||||
"filter": [{"match": {"metadata.store.keyword": self.path}}],
|
||||
"field": "vector",
|
||||
"k": k,
|
||||
"num_candidates": 100,
|
||||
"query_vector": vector,
|
||||
}
|
||||
full_query = {
|
||||
"knn": knn,
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": [
|
||||
{
|
||||
"match": {
|
||||
"text": {
|
||||
"query": question,
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"filter": [{"match": {"metadata.store.keyword": self.path}}],
|
||||
}
|
||||
},
|
||||
"rank": {"rrf": {}},
|
||||
}
|
||||
resp = self.docsearch.search(index=self.index_name, query=full_query['query'], size=k, knn=full_query['knn'])
|
||||
# create Documnets objects from the results page_content ['_source']['text'], metadata ['_source']['metadata']
|
||||
doc_list = []
|
||||
for hit in resp['hits']['hits']:
|
||||
|
||||
doc_list.append(Document(page_content = hit['_source']['text'], metadata = hit['_source']['metadata']))
|
||||
return doc_list
|
||||
|
||||
def _create_index_if_not_exists(
|
||||
self, index_name, dims_length
|
||||
):
|
||||
|
||||
if self._es_connection.indices.exists(index=index_name):
|
||||
print(f"Index {index_name} already exists.")
|
||||
|
||||
else:
|
||||
|
||||
indexSettings = self.index(
|
||||
dims_length=dims_length,
|
||||
)
|
||||
self._es_connection.indices.create(index=index_name, **indexSettings)
|
||||
|
||||
def index(
|
||||
self,
|
||||
dims_length,
|
||||
):
|
||||
return {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"vector": {
|
||||
"type": "dense_vector",
|
||||
"dims": dims_length,
|
||||
"index": True,
|
||||
"similarity": "cosine",
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
texts,
|
||||
metadatas = None,
|
||||
ids = None,
|
||||
refresh_indices = True,
|
||||
create_index_if_not_exists = True,
|
||||
bulk_kwargs = None,
|
||||
**kwargs,
|
||||
):
|
||||
|
||||
from elasticsearch.helpers import BulkIndexError, bulk
|
||||
|
||||
bulk_kwargs = bulk_kwargs or {}
|
||||
import uuid
|
||||
embeddings = []
|
||||
ids = ids or [str(uuid.uuid4()) for _ in texts]
|
||||
requests = []
|
||||
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key)
|
||||
|
||||
vectors = embeddings.embed_documents(list(texts))
|
||||
|
||||
dims_length = len(vectors[0])
|
||||
|
||||
if create_index_if_not_exists:
|
||||
self._create_index_if_not_exists(
|
||||
index_name=self.index_name, dims_length=dims_length
|
||||
)
|
||||
|
||||
for i, (text, vector) in enumerate(zip(texts, vectors)):
|
||||
metadata = metadatas[i] if metadatas else {}
|
||||
|
||||
requests.append(
|
||||
{
|
||||
"_op_type": "index",
|
||||
"_index": self.index_name,
|
||||
"text": text,
|
||||
"vector": vector,
|
||||
"metadata": metadata,
|
||||
"_id": ids[i],
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
if len(requests) > 0:
|
||||
try:
|
||||
success, failed = bulk(
|
||||
self._es_connection,
|
||||
requests,
|
||||
stats_only=True,
|
||||
refresh=refresh_indices,
|
||||
**bulk_kwargs,
|
||||
)
|
||||
return ids
|
||||
except BulkIndexError as e:
|
||||
print(f"Error adding texts: {e}")
|
||||
firstError = e.errors[0].get("index", {}).get("error", {})
|
||||
print(f"First error reason: {firstError.get('reason')}")
|
||||
raise e
|
||||
|
||||
else:
|
||||
return []
|
||||
|
||||
def delete_index(self):
|
||||
self._es_connection.delete_by_query(index=self.index_name, query={"match": {
|
||||
"metadata.store.keyword": self.path}},)
|
||||
|
||||
26
application/vectorstore/faiss.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from langchain import FAISS
|
||||
from application.core.settings import settings
|
||||
|
||||
class FaissStore(BaseVectorStore):
|
||||
|
||||
def __init__(self, path, embeddings_key, docs_init=None):
|
||||
super().__init__()
|
||||
self.path = path
|
||||
if docs_init:
|
||||
self.docsearch = FAISS.from_documents(
|
||||
docs_init, self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
|
||||
)
|
||||
else:
|
||||
self.docsearch = FAISS.load_local(
|
||||
self.path, self._get_embeddings(settings.EMBEDDINGS_NAME, settings.EMBEDDINGS_KEY)
|
||||
)
|
||||
|
||||
def search(self, *args, **kwargs):
|
||||
return self.docsearch.similarity_search(*args, **kwargs)
|
||||
|
||||
def add_texts(self, *args, **kwargs):
|
||||
return self.docsearch.add_texts(*args, **kwargs)
|
||||
|
||||
def save_local(self, *args, **kwargs):
|
||||
return self.docsearch.save_local(*args, **kwargs)
|
||||
16
application/vectorstore/vector_creator.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from application.vectorstore.faiss import FaissStore
|
||||
from application.vectorstore.elasticsearch import ElasticsearchStore
|
||||
|
||||
|
||||
class VectorCreator:
|
||||
vectorstores = {
|
||||
'faiss': FaissStore,
|
||||
'elasticsearch':ElasticsearchStore
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create_vectorstore(cls, type, *args, **kwargs):
|
||||
vectorstore_class = cls.vectorstores.get(type.lower())
|
||||
if not vectorstore_class:
|
||||
raise ValueError(f"No vectorstore class found for type {type}")
|
||||
return vectorstore_class(*args, **kwargs)
|
||||
@@ -7,11 +7,11 @@ from urllib.parse import urljoin
|
||||
import nltk
|
||||
import requests
|
||||
|
||||
from core.settings import settings
|
||||
from parser.file.bulk import SimpleDirectoryReader
|
||||
from parser.open_ai_func import call_openai_api
|
||||
from parser.schema.base import Document
|
||||
from parser.token_func import group_split
|
||||
from application.core.settings import settings
|
||||
from application.parser.file.bulk import SimpleDirectoryReader
|
||||
from application.parser.open_ai_func import call_openai_api
|
||||
from application.parser.schema.base import Document
|
||||
from application.parser.token_func import group_split
|
||||
|
||||
try:
|
||||
nltk.download('punkt', quiet=True)
|
||||
@@ -19,12 +19,17 @@ try:
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
|
||||
def metadata_from_filename(title):
|
||||
return {'title': title}
|
||||
store = title.split('/')
|
||||
store = store[1] + '/' + store[2]
|
||||
return {'title': title, 'store': store}
|
||||
|
||||
|
||||
def generate_random_string(length):
|
||||
return ''.join([string.ascii_letters[i % 52] for i in range(length)])
|
||||
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
# directory = 'inputs' or 'temp'
|
||||
@@ -41,9 +46,13 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
min_tokens = 150
|
||||
max_tokens = 1250
|
||||
full_path = directory + '/' + user + '/' + name_job
|
||||
import sys
|
||||
print(full_path, file=sys.stderr)
|
||||
# check if API_URL env variable is set
|
||||
file_data = {'name': name_job, 'file': filename, 'user': user}
|
||||
response = requests.get(urljoin(settings.API_URL, "/api/download"), params=file_data)
|
||||
# check if file is in the response
|
||||
print(response, file=sys.stderr)
|
||||
file = response.content
|
||||
|
||||
if not os.path.exists(full_path):
|
||||
@@ -76,11 +85,15 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
|
||||
# and send them to the server (provide user and name in form)
|
||||
file_data = {'name': name_job, 'user': user}
|
||||
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
|
||||
'file_pkl': open(full_path + '/index.pkl', 'rb')}
|
||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
|
||||
'file_pkl': open(full_path + '/index.pkl', 'rb')}
|
||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
||||
response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
|
||||
else:
|
||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
|
||||
|
||||
response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path="))
|
||||
|
||||
# delete local
|
||||
shutil.rmtree(full_path)
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from app import app
|
||||
from application.app import app
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True, port=5001)
|
||||
app.run(debug=True, port=7091)
|
||||
|
||||
2
codecov.yml
Normal file
@@ -0,0 +1,2 @@
|
||||
ignore:
|
||||
- "*/tests/*”
|
||||
71
docker-compose-azure.yaml
Normal file
@@ -0,0 +1,71 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:7091
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
ports:
|
||||
- "5173:5173"
|
||||
depends_on:
|
||||
- backend
|
||||
|
||||
backend:
|
||||
build: ./application
|
||||
environment:
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- OPENAI_API_KEY=$OPENAI_API_KEY
|
||||
- OPENAI_API_BASE=$OPENAI_API_BASE
|
||||
- OPENAI_API_VERSION=$OPENAI_API_VERSION
|
||||
- AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME
|
||||
- AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME
|
||||
ports:
|
||||
- "7091:7091"
|
||||
volumes:
|
||||
- ./application/indexes:/app/application/indexes
|
||||
- ./application/inputs:/app/application/inputs
|
||||
- ./application/vectors:/app/application/vectors
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
worker:
|
||||
build: ./application
|
||||
command: celery -A application.app.celery worker -l INFO
|
||||
environment:
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- API_URL=http://backend:7091
|
||||
- OPENAI_API_KEY=$OPENAI_API_KEY
|
||||
- OPENAI_API_BASE=$OPENAI_API_BASE
|
||||
- OPENAI_API_VERSION=$OPENAI_API_VERSION
|
||||
- AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME
|
||||
- AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
redis:
|
||||
image: redis:6-alpine
|
||||
ports:
|
||||
- 6379:6379
|
||||
|
||||
mongo:
|
||||
image: mongo:6
|
||||
ports:
|
||||
- 27017:27017
|
||||
volumes:
|
||||
- mongodb_data_container:/data/db
|
||||
|
||||
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
26
docker-compose-local.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:7091
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
- VITE_EMBEDDINGS_NAME=$EMBEDDINGS_NAME
|
||||
ports:
|
||||
- "5173:5173"
|
||||
|
||||
redis:
|
||||
image: redis:6-alpine
|
||||
ports:
|
||||
- 6379:6379
|
||||
|
||||
mongo:
|
||||
image: mongo:6
|
||||
ports:
|
||||
- 27017:27017
|
||||
volumes:
|
||||
- mongodb_data_container:/data/db
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
@@ -4,7 +4,7 @@ services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:5001
|
||||
- VITE_API_HOST=http://localhost:7091
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
ports:
|
||||
- "5173:5173"
|
||||
@@ -19,29 +19,30 @@ services:
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- SELF_HOSTED_MODEL=$SELF_HOSTED_MODEL
|
||||
ports:
|
||||
- "5001:5001"
|
||||
- "7091:7091"
|
||||
volumes:
|
||||
- ./application/indexes:/app/indexes
|
||||
- ./application/inputs:/app/inputs
|
||||
- ./application/vectors:/app/vectors
|
||||
- ./application/indexes:/app/application/indexes
|
||||
- ./application/inputs:/app/application/inputs
|
||||
- ./application/vectors:/app/application/vectors
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
worker:
|
||||
build: ./application
|
||||
command: celery -A app.celery worker -l INFO
|
||||
command: celery -A application.app.celery worker -l INFO
|
||||
environment:
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- API_URL=http://backend:5001
|
||||
- API_URL=http://backend:7091
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
redis:
|
||||
image: redis:6-alpine
|
||||
@@ -55,7 +56,5 @@ services:
|
||||
volumes:
|
||||
- mongodb_data_container:/data/db
|
||||
|
||||
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
mongodb_data_container:
|
||||
|
||||
1
docs/README.md
Normal file
@@ -0,0 +1 @@
|
||||
# nextra-docsgpt
|
||||
9
docs/next.config.js
Normal file
@@ -0,0 +1,9 @@
|
||||
const withNextra = require('nextra')({
|
||||
theme: 'nextra-theme-docs',
|
||||
themeConfig: './theme.config.jsx'
|
||||
})
|
||||
|
||||
module.exports = withNextra()
|
||||
|
||||
// If you have other Next.js configurations, you can pass them as the parameter:
|
||||
// module.exports = withNextra({ /* other next.js config */ })
|
||||
5975
docs/package-lock.json
generated
Normal file
11
docs/package.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"dependencies": {
|
||||
"@vercel/analytics": "^1.0.2",
|
||||
"docsgpt": "^0.2.4",
|
||||
"next": "^13.4.19",
|
||||
"nextra": "^2.12.3",
|
||||
"nextra-theme-docs": "^2.12.3",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0"
|
||||
}
|
||||
}
|
||||
112
docs/pages/Deploying/Hosting-the-app.md
Normal file
@@ -0,0 +1,112 @@
|
||||
# Self-hosting DocsGPT on Amazon Lightsail
|
||||
|
||||
Here's a step-by-step guide on how to setup an Amazon Lightsail instance to host DocsGPT.
|
||||
|
||||
## Configuring your instance
|
||||
|
||||
(If you know how to create a Lightsail instance, you can skip to the recommended configuration part by clicking here)
|
||||
|
||||
### 1. Create an account or login to https://lightsail.aws.amazon.com
|
||||
|
||||
### 2. Click on "Create instance"
|
||||
|
||||
### 3. Create your instance
|
||||
|
||||
The first step is to select the "Instance location". In most cases there's no need to switch locations as the default one will work well.
|
||||
|
||||
After that it is time to pick your Instance Image. We recommend using "Linux/Unix" as the image and "Ubuntu 20.04 LTS" for Operating System.
|
||||
|
||||
As for instance plan, it'll vary depending on your unique demands, but a "1 GB, 1vCPU, 40GB SSD and 2TB transfer" setup should cover most scenarios.
|
||||
|
||||
Lastly, Identify your instance by giving it a unique name and then hit "Create instance".
|
||||
|
||||
PS: Once you create your instance, it'll likely take a few minutes for the setup to be completed.
|
||||
|
||||
#### The recommended configuration is as follows:
|
||||
|
||||
- Ubuntu 20.04 LTS
|
||||
- 1GB RAM
|
||||
- 1vCPU
|
||||
- 40GB SSD Hard Drive
|
||||
- 2TB transfer
|
||||
|
||||
### Connecting to your the newly created instance
|
||||
|
||||
Your instance will be ready for use a few minutes after being created. To access, just open it up and click on "Connect using SSH".
|
||||
|
||||
#### Clone the repository
|
||||
|
||||
A terminal window will pop up, and the first step will be to clone DocsGPT git repository.
|
||||
|
||||
`git clone https://github.com/arc53/DocsGPT.git`
|
||||
|
||||
#### Download the package information
|
||||
|
||||
Once it has finished cloning the repository, it is time to download the package information from all sources. To do so simply enter the following command:
|
||||
|
||||
`sudo apt update`
|
||||
|
||||
#### Install Docker and Docker Compose
|
||||
|
||||
DocsGPT backend and worker use python, Frontend is written on React and the whole application is containerized using Docker. To install Docker and Docker Compose, enter the following commands:
|
||||
|
||||
`sudo apt install docker.io`
|
||||
|
||||
And now install docker-compose:
|
||||
|
||||
`sudo apt install docker-compose`
|
||||
|
||||
#### Access the DocsGPT folder
|
||||
|
||||
Enter the following command to access the folder in which DocsGPT docker-compose file is.
|
||||
|
||||
`cd DocsGPT/`
|
||||
|
||||
#### Prepare the environment
|
||||
|
||||
Inside the DocsGPT folder create a .env file and copy the contents of .env_sample into it.
|
||||
|
||||
`nano .env`
|
||||
|
||||
Make sure your .env file looks like this:
|
||||
|
||||
```
|
||||
OPENAI_API_KEY=(Your OpenAI API key)
|
||||
VITE_API_STREAMING=true
|
||||
SELF_HOSTED_MODEL=false
|
||||
```
|
||||
|
||||
To save the file, press CTRL+X, then Y and then ENTER.
|
||||
|
||||
Next we need to set a correct IP for our Backend. To do so, open the docker-compose.yml file:
|
||||
|
||||
`nano docker-compose.yml`
|
||||
|
||||
And change this line 7 `VITE_API_HOST=http://localhost:7091`
|
||||
to this `VITE_API_HOST=http://<your instance public IP>:7091`
|
||||
|
||||
This will allow the frontend to connect to the backend.
|
||||
|
||||
#### Running the app
|
||||
|
||||
You're almost there! Now that all the necessary bits and pieces have been installed, it is time to run the application. To do so, use the following command:
|
||||
|
||||
`sudo docker-compose up -d`
|
||||
|
||||
If you launch it for the first time it will take a few minutes to download all the necessary dependencies and build.
|
||||
|
||||
Once this is done you can go ahead and close the terminal window.
|
||||
|
||||
#### Enabling ports
|
||||
|
||||
Before you being able to access your live instance, you must first enable the port which it is using.
|
||||
|
||||
Open your Lightsail instance and head to "Networking".
|
||||
|
||||
Then click on "Add rule" under "IPv4 Firewall", enter 5173 as your your port and hit "Create".
|
||||
Repeat the process for port 7091.
|
||||
|
||||
#### Access your instance
|
||||
|
||||
Your instance will now be available under your Public IP Address and port 5173. Enjoy!
|
||||
|
||||
23
docs/pages/Deploying/Quickstart.md
Normal file
@@ -0,0 +1,23 @@
|
||||
## Launching Web App
|
||||
Note: Make sure you have docker installed
|
||||
|
||||
1. Open download this repository with `git clone https://github.com/arc53/DocsGPT.git`
|
||||
2. Create .env file in your root directory and set your `OPENAI_API_KEY` with your openai api key
|
||||
3. Run `docker-compose build && docker-compose up`
|
||||
4. Navigate to `http://localhost:5173/`
|
||||
|
||||
To stop just run Ctrl + C
|
||||
|
||||
### Chrome Extension
|
||||
|
||||
To install the Chrome extension:
|
||||
|
||||
1. In the DocsGPT GitHub repository, click on the "Code" button and select Download ZIP
|
||||
2. Unzip the downloaded file to a location you can easily access
|
||||
3. Open the Google Chrome browser and click on the three dots menu (upper right corner)
|
||||
4. Select "More Tools" and then "Extensions"
|
||||
5. Turn on the "Developer mode" switch in the top right corner of the Extensions page
|
||||
6. Click on the "Load unpacked" button
|
||||
7. Select the "Chrome" folder where the DocsGPT files have been unzipped (docsgpt-main > extensions > chrome)
|
||||
8. The extension should now be added to Google Chrome and can be managed on the Extensions page
|
||||
9. To disable or remove the extension, simply turn off the toggle switch on the extension card or click the "Remove" button.
|
||||
10
docs/pages/Deploying/_meta.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"Hosting-the-app": {
|
||||
"title": "☁️ Hosting DocsGPT",
|
||||
"href": "/Deploying/Hosting-the-app"
|
||||
},
|
||||
"Quickstart": {
|
||||
"title": "⚡️Quickstart",
|
||||
"href": "/Deploying/Quickstart"
|
||||
}
|
||||
}
|
||||
153
docs/pages/Developing/API-docs.md
Normal file
@@ -0,0 +1,153 @@
|
||||
App currently has two main api endpoints:
|
||||
|
||||
### /api/answer
|
||||
Its a POST request that sends a JSON in body with 4 values. Here is a JavaScript fetch example
|
||||
It will receive an answer for a user provided question
|
||||
|
||||
```js
|
||||
// answer (POST http://127.0.0.1:5000/api/answer)
|
||||
fetch("http://127.0.0.1:5000/api/answer", {
|
||||
"method": "POST",
|
||||
"headers": {
|
||||
"Content-Type": "application/json; charset=utf-8"
|
||||
},
|
||||
"body": JSON.stringify({"question":"Hi","history":null,"api_key":"OPENAI_API_KEY","embeddings_key":"OPENAI_API_KEY",
|
||||
"active_docs": "javascript/.project/ES2015/openai_text-embedding-ada-002/"})
|
||||
})
|
||||
.then((res) => res.text())
|
||||
.then(console.log.bind(console))
|
||||
```
|
||||
|
||||
In response you will get a json document like this one:
|
||||
|
||||
```json
|
||||
{
|
||||
"answer": " Hi there! How can I help you?\n",
|
||||
"query": "Hi",
|
||||
"result": " Hi there! How can I help you?\nSOURCES:"
|
||||
}
|
||||
```
|
||||
|
||||
### /api/docs_check
|
||||
It will make sure documentation is loaded on a server (just run it every time user is switching between libraries (documentations)
|
||||
Its a POST request that sends a JSON in body with 1 value. Here is a JavaScript fetch example
|
||||
|
||||
```js
|
||||
// answer (POST http://127.0.0.1:5000/api/docs_check)
|
||||
fetch("http://127.0.0.1:5000/api/docs_check", {
|
||||
"method": "POST",
|
||||
"headers": {
|
||||
"Content-Type": "application/json; charset=utf-8"
|
||||
},
|
||||
"body": JSON.stringify({"docs":"javascript/.project/ES2015/openai_text-embedding-ada-002/"})
|
||||
})
|
||||
.then((res) => res.text())
|
||||
.then(console.log.bind(console))
|
||||
```
|
||||
|
||||
In response you will get a json document like this one:
|
||||
```json
|
||||
{
|
||||
"status": "exists"
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
### /api/combine
|
||||
Provides json that tells UI which vectors are available and where they are located with a simple get request
|
||||
|
||||
Respsonse will include:
|
||||
date, description, docLink, fullName, language, location (local or docshub), model, name, version
|
||||
|
||||
Example of json in Docshub and local:
|
||||
<img width="295" alt="image" src="https://user-images.githubusercontent.com/15183589/224714085-f09f51a4-7a9a-4efb-bd39-798029bb4273.png">
|
||||
|
||||
|
||||
### /api/upload
|
||||
Uploads file that needs to be trained, response is json with task id, which can be used to check on tasks progress
|
||||
HTML example:
|
||||
|
||||
```html
|
||||
<form action="/api/upload" method="post" enctype="multipart/form-data" class="mt-2">
|
||||
<input type="file" name="file" class="py-4" id="file-upload">
|
||||
<input type="text" name="user" value="local" hidden>
|
||||
<input type="text" name="name" placeholder="Name:">
|
||||
|
||||
|
||||
<button type="submit" class="py-2 px-4 text-white bg-blue-500 rounded-md hover:bg-blue-600 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500">
|
||||
Upload
|
||||
</button>
|
||||
</form>
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"task_id": "b2684988-9047-428b-bd47-08518679103c"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
### /api/task_status
|
||||
Gets task status (task_id) from /api/upload
|
||||
```js
|
||||
// Task status (Get http://127.0.0.1:5000/api/task_status)
|
||||
fetch("http://localhost:5001/api/task_status?task_id=b2d2a0f4-387c-44fd-a443-e4fe2e7454d1", {
|
||||
"method": "GET",
|
||||
"headers": {
|
||||
"Content-Type": "application/json; charset=utf-8"
|
||||
},
|
||||
})
|
||||
.then((res) => res.text())
|
||||
.then(console.log.bind(console))
|
||||
```
|
||||
|
||||
Responses:
|
||||
There are two types of responses:
|
||||
1. while task it still running, where "current" will show progress from 0 - 100
|
||||
```json
|
||||
{
|
||||
"result": {
|
||||
"current": 1
|
||||
},
|
||||
"status": "PROGRESS"
|
||||
}
|
||||
```
|
||||
|
||||
2. When task is completed
|
||||
```json
|
||||
{
|
||||
"result": {
|
||||
"directory": "temp",
|
||||
"filename": "install.rst",
|
||||
"formats": [
|
||||
".rst",
|
||||
".md",
|
||||
".pdf"
|
||||
],
|
||||
"name_job": "somename",
|
||||
"user": "local"
|
||||
},
|
||||
"status": "SUCCESS"
|
||||
}
|
||||
```
|
||||
|
||||
### /api/delete_old
|
||||
deletes old vecotstores
|
||||
```js
|
||||
// Task status (GET http://127.0.0.1:5000/api/docs_check)
|
||||
fetch("http://localhost:5001/api/task_status?task_id=b2d2a0f4-387c-44fd-a443-e4fe2e7454d1", {
|
||||
"method": "GET",
|
||||
"headers": {
|
||||
"Content-Type": "application/json; charset=utf-8"
|
||||
},
|
||||
})
|
||||
.then((res) => res.text())
|
||||
.then(console.log.bind(console))
|
||||
```
|
||||
response:
|
||||
|
||||
```json
|
||||
{ "status": "ok" }
|
||||
```
|
||||
6
docs/pages/Developing/_meta.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"API-docs": {
|
||||
"title": "🗂️️ API-docs",
|
||||
"href": "/Developing/API-docs"
|
||||
}
|
||||
}
|
||||
29
docs/pages/Extensions/Chatwoot-extension.md
Normal file
@@ -0,0 +1,29 @@
|
||||
### To start chatwoot extension:
|
||||
1. Prepare and start the DocsGPT itself (load your documentation too)
|
||||
Follow our [wiki](https://github.com/arc53/DocsGPT/wiki) to start it and to [ingest](https://github.com/arc53/DocsGPT/wiki/How-to-train-on-other-documentation) data
|
||||
2. Go to chatwoot, Navigate to your profile (bottom left), click on profile settings, scroll to the bottom and copy Access Token
|
||||
2. Navigate to `/extensions/chatwoot`. Copy .env_sample and create .env file
|
||||
3. Fill in the values
|
||||
|
||||
```
|
||||
docsgpt_url=<docsgpt_api_url>
|
||||
chatwoot_url=<chatwoot_url>
|
||||
docsgpt_key=<openai_api_key or other llm key>
|
||||
chatwoot_token=<from part 2>
|
||||
```
|
||||
|
||||
4. start with `flask run` command
|
||||
|
||||
If you want for bot to stop responding to questions for a specific user or session just add label `human-requested` in your conversation
|
||||
|
||||
|
||||
### Optional (extra validation)
|
||||
In app.py uncomment lines 12-13 and 71-75
|
||||
|
||||
in your .env file add:
|
||||
|
||||
`account_id=(optional) 1 `
|
||||
|
||||
`assignee_id=(optional) 1`
|
||||
|
||||
Those are chatwoot values and will allow you to check if you are responding to correct widget and responding to questions assigned to specific user
|
||||
10
docs/pages/Extensions/_meta.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"Chatwoot-extension": {
|
||||
"title": "💬️ Chatwoot Extension",
|
||||
"href": "/Extensions/Chatwoot-extension"
|
||||
},
|
||||
"react-widget": {
|
||||
"title": "🏗️ Widget setup",
|
||||
"href": "/Extensions/react-widget"
|
||||
}
|
||||
}
|
||||
37
docs/pages/Extensions/react-widget.md
Normal file
@@ -0,0 +1,37 @@
|
||||
### How to set up react docsGPT widget on your website:
|
||||
|
||||
### Installation
|
||||
Got to your project and install a new dependency: `npm install docsgpt`
|
||||
|
||||
### Usage
|
||||
Go to your project and in the file where you want to use the widget import it:
|
||||
```js
|
||||
import { DocsGPTWidget } from "docsgpt";
|
||||
import "docsgpt/dist/style.css";
|
||||
```
|
||||
|
||||
|
||||
Then you can use it like this: `<DocsGPTWidget />`
|
||||
|
||||
DocsGPTWidget takes 3 props:
|
||||
- `apiHost` - url of your DocsGPT API
|
||||
- `selectDocs` - documentation that you want to use for your widget (eg. `default` or `local/docs1.zip`)
|
||||
- `apiKey` - usually its empty
|
||||
|
||||
### How to use DocsGPTWidget with [Nextra](https://nextra.site/) (Next.js + MDX)
|
||||
Install you widget as described above and then go to your `pages/` folder and create a new file `_app.js` with the following content:
|
||||
```js
|
||||
import { DocsGPTWidget } from "docsgpt";
|
||||
import "docsgpt/dist/style.css";
|
||||
|
||||
export default function MyApp({ Component, pageProps }) {
|
||||
return (
|
||||
<>
|
||||
<Component {...pageProps} />
|
||||
<DocsGPTWidget selectDocs="local/docsgpt-sep.zip/"/>
|
||||
</>
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
4
docs/pages/Guides/Customising-prompts.md
Normal file
@@ -0,0 +1,4 @@
|
||||
## To customise a main prompt navigate to `/application/prompt/combine_prompt.txt`
|
||||
|
||||
You can try editing it to see how the model responds.
|
||||
|
||||
60
docs/pages/Guides/How-to-train-on-other-documentation.md
Normal file
@@ -0,0 +1,60 @@
|
||||
## How to train on other documentation
|
||||
This AI can use any documentation, but first it needs to be prepared for similarity search.
|
||||
|
||||

|
||||
|
||||
Start by going to
|
||||
`/scripts/` folder
|
||||
|
||||
If you open this file you will see that it uses RST files from the folder to create a `index.faiss` and `index.pkl`.
|
||||
|
||||
It currently uses OPEN_AI to create vector store, so make sure your documentation is not too big. Pandas cost me around 3-4$
|
||||
|
||||
You can usually find documentation on github in docs/ folder for most open-source projects.
|
||||
|
||||
### 1. Find documentation in .rst/.md and create a folder with it in your scripts directory
|
||||
Name it `inputs/`
|
||||
Put all your .rst/.md files in there
|
||||
The search is recursive, so you don't need to flatten them
|
||||
|
||||
If there are no .rst/.md files just convert whatever you find to txt and feed it. (don't forget to change the extension in script)
|
||||
|
||||
### 2. Create .env file in `scripts/` folder
|
||||
And write your OpenAI API key inside
|
||||
`OPENAI_API_KEY=<your-api-key>`
|
||||
|
||||
### 3. Run scripts/ingest.py
|
||||
|
||||
`python ingest.py ingest`
|
||||
|
||||
It will tell you how much it will cost
|
||||
|
||||
### 4. Move `index.faiss` and `index.pkl` generated in `scripts/output` to `application/` folder.
|
||||
|
||||
|
||||
### 5. Run web app
|
||||
Once you run it will use new context that is relevant to your documentation
|
||||
Make sure you select default in the dropdown in the UI
|
||||
|
||||
## Customisation
|
||||
You can learn more about options while running ingest.py by running:
|
||||
|
||||
`python ingest.py --help`
|
||||
| Options | |
|
||||
|:--------------------------------:|:------------------------------------------------------------------------------------------------------------------------------:|
|
||||
| **ingest** | Runs 'ingest' function converting documentation to to Faiss plus Index format |
|
||||
| --dir TEXT | List of paths to directory for index creation. E.g. --dir inputs --dir inputs2 [default: inputs] |
|
||||
| --file TEXT | File paths to use (Optional; overrides directory) E.g. --files inputs/1.md --files inputs/2.md |
|
||||
| --recursive / --no-recursive | Whether to recursively search in subdirectories [default: recursive] |
|
||||
| --limit INTEGER | Maximum number of files to read |
|
||||
| --formats TEXT | List of required extensions (list with .) Currently supported: .rst, .md, .pdf, .docx, .csv, .epub, .html [default: .rst, .md] |
|
||||
| --exclude / --no-exclude | Whether to exclude hidden files (dotfiles) [default: exclude] |
|
||||
| -y, --yes | Whether to skip price confirmation |
|
||||
| --sample / --no-sample | Whether to output sample of the first 5 split documents. [default: no-sample] |
|
||||
| --token-check / --no-token-check | Whether to group small documents and split large. Improves semantics. [default: token-check] |
|
||||
| --min_tokens INTEGER | Minimum number of tokens to not group. [default: 150] |
|
||||
| --max_tokens INTEGER | Maximum number of tokens to not split. [default: 2000] |
|
||||
| | |
|
||||
| **convert** | Creates documentation in .md format from source code |
|
||||
| --dir TEXT | Path to a directory with source code. E.g. --dir inputs [default: inputs] |
|
||||
| --formats TEXT | Source code language from which to create documentation. Supports py, js and java. E.g. --formats py [default: py] |
|
||||
32
docs/pages/Guides/How-to-use-different-LLM.md
Normal file
@@ -0,0 +1,32 @@
|
||||
Fortunately there are many providers for LLM's and some of them can even be ran locally
|
||||
|
||||
There are two models used in the app:
|
||||
1. Embeddings
|
||||
2. Text generation
|
||||
|
||||
By default we use OpenAI's models but if you want to change it or even run it locally, its very simple!
|
||||
|
||||
### Go to .env file or set environment variables:
|
||||
|
||||
`LLM_NAME=<your Text generation>`
|
||||
|
||||
`API_KEY=<api_key for Text generation>`
|
||||
|
||||
`EMBEDDINGS_NAME=<llm for embeddings>`
|
||||
|
||||
`EMBEDDINGS_KEY=<api_key for embeddings>`
|
||||
|
||||
`VITE_API_STREAMING=<true or false (true if using openai, false for all others)>`
|
||||
|
||||
You dont need to provide keys if you are happy with users providing theirs, so make sure you set LLM_NAME and EMBEDDINGS_NAME
|
||||
|
||||
Options:
|
||||
LLM_NAME (openai, manifest, cohere, Arc53/docsgpt-14b, Arc53/docsgpt-7b-falcon)
|
||||
EMBEDDINGS_NAME (openai_text-embedding-ada-002, huggingface_sentence-transformers/all-mpnet-base-v2, huggingface_hkunlp/instructor-large, cohere_medium)
|
||||
|
||||
That's it!
|
||||
|
||||
### Hosting everything locally and privately (for using our optimised open-source models)
|
||||
If you are working with important data and dont want anything to leave your premises.
|
||||
|
||||
Make sure you set SELF_HOSTED_MODEL as true in you .env variable and for your LLM_NAME you can use anything that's on Huggingface
|
||||
@@ -0,0 +1,19 @@
|
||||
If your AI uses external knowledge and is not explicit enough it is ok, because we try to make docsgpt friendly.
|
||||
|
||||
But if you want to adjust it, here is a simple way.
|
||||
|
||||
Got to `application/prompts/chat_combine_prompt.txt`
|
||||
|
||||
And change it to
|
||||
|
||||
|
||||
```
|
||||
|
||||
You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples, if possible.
|
||||
Write an answer for the question below based on the provided context.
|
||||
If the context provides insufficient information, reply "I cannot answer".
|
||||
You have access to chat history and can use it to help answer the question.
|
||||
----------------
|
||||
{summaries}
|
||||
|
||||
```
|
||||
18
docs/pages/Guides/_meta.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"Customising-prompts": {
|
||||
"title": "🏗️️ Customising Prompts",
|
||||
"href": "/Guides/Customising-prompts"
|
||||
},
|
||||
"How-to-train-on-other-documentation": {
|
||||
"title": "📥 Training on docs",
|
||||
"href": "/Guides/How-to-train-on-other-documentation"
|
||||
},
|
||||
"How-to-use-different-LLM": {
|
||||
"title": "⚙️️ How to use different LLM's",
|
||||
"href": "/Guides/How-to-use-different-LLM"
|
||||
},
|
||||
"My-AI-answers-questions-using-external-knowledge": {
|
||||
"title": "💭️ Avoiding hallucinations",
|
||||
"href": "/Guides/My-AI-answers-questions-using-external-knowledge"
|
||||
}
|
||||
}
|
||||
11
docs/pages/_app.js
Normal file
@@ -0,0 +1,11 @@
|
||||
import { DocsGPTWidget } from "docsgpt";
|
||||
import "docsgpt/dist/style.css";
|
||||
|
||||
export default function MyApp({ Component, pageProps }) {
|
||||
return (
|
||||
<>
|
||||
<Component {...pageProps} />
|
||||
<DocsGPTWidget selectDocs="local/docsgpt-sep.zip/"/>
|
||||
</>
|
||||
)
|
||||
}
|
||||
37
docs/pages/index.mdx
Normal file
@@ -0,0 +1,37 @@
|
||||
---
|
||||
title: 'Home'
|
||||
---
|
||||
import { Cards, Card } from 'nextra/components'
|
||||
import deployingGuides from './Deploying/_meta.json';
|
||||
import developingGuides from './Developing/_meta.json';
|
||||
import extensionGuides from './Extensions/_meta.json';
|
||||
import mainGuides from './Guides/_meta.json';
|
||||
|
||||
|
||||
|
||||
|
||||
export const allGuides = {
|
||||
...mainGuides,
|
||||
...developingGuides,
|
||||
...deployingGuides,
|
||||
...extensionGuides,
|
||||
};
|
||||
|
||||
### **DocsGPT 🦖**
|
||||
|
||||
DocsGPT 🦖 is an innovative open-source tool designed to simplify the retrieval of information from project documentation using advanced GPT models 🤖. Eliminate lengthy manual searches 🔍 and enhance your documentation experience with DocsGPT, and consider contributing to its AI-powered future 🚀.
|
||||
|
||||
Our demo: [https://docsgpt.arc53.com/](https://docsgpt.arc53.com/)
|
||||
|
||||
Want to earn a cool shirt by submitting a **meaningful** PR, check out [Hacktoberfest](https://github.com/arc53/DocsGPT/blob/main/HACKTOBERFEST.md) guide.
|
||||
|
||||
<Cards
|
||||
num={3}
|
||||
children={Object.keys(allGuides).map((key, i) => (
|
||||
<Card
|
||||
key={i}
|
||||
title={allGuides[key].title}
|
||||
href={allGuides[key].href}
|
||||
/>
|
||||
))}
|
||||
/>
|
||||
BIN
docs/public/cute-docsgpt.png
Normal file
|
After Width: | Height: | Size: 191 KiB |
BIN
docs/public/favicons/apple-touch-icon.png
Normal file
|
After Width: | Height: | Size: 17 KiB |
BIN
docs/public/favicons/favicon-16x16.png
Normal file
|
After Width: | Height: | Size: 1.1 KiB |
BIN
docs/public/favicons/favicon-32x32.png
Normal file
|
After Width: | Height: | Size: 1.9 KiB |
19
docs/public/favicons/site.webmanifest
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"name": "",
|
||||
"short_name": "",
|
||||
"icons": [
|
||||
{
|
||||
"src": "/android-chrome-192x192.png",
|
||||
"sizes": "192x192",
|
||||
"type": "image/png"
|
||||
},
|
||||
{
|
||||
"src": "/android-chrome-512x512.png",
|
||||
"sizes": "512x512",
|
||||
"type": "image/png"
|
||||
}
|
||||
],
|
||||
"theme_color": "#ffffff",
|
||||
"background_color": "#ffffff",
|
||||
"display": "standalone"
|
||||
}
|
||||
143
docs/theme.config.jsx
Normal file
@@ -0,0 +1,143 @@
|
||||
import Image from 'next/image'
|
||||
import { Analytics } from '@vercel/analytics/react';
|
||||
|
||||
const github = 'https://github.com/arc53/DocsGPT';
|
||||
|
||||
|
||||
|
||||
|
||||
import { useConfig, useTheme } from 'nextra-theme-docs';
|
||||
import CuteLogo from './public/cute-docsgpt.png';
|
||||
const Logo = ({ height, width }) => {
|
||||
const { theme } = useTheme();
|
||||
return (
|
||||
<div style={{ alignItems: 'center', display: 'flex', gap: '8px' }}>
|
||||
<Image src={CuteLogo} alt="DocsGPT logo" width={width} height={height} />
|
||||
|
||||
<span style={{ fontWeight: 'bold', fontSize: 18 }}>DocsGPT Docs</span>
|
||||
|
||||
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const config = {
|
||||
docsRepositoryBase: `${github}/blob/main/docs`,
|
||||
chat: {
|
||||
link: 'https://discord.com/invite/n5BX8dh8rU',
|
||||
},
|
||||
banner: {
|
||||
key: 'docs-launch',
|
||||
text: (
|
||||
<div className="flex justify-center items-center gap-2">
|
||||
Welcome to the new DocsGPT 🦖 docs! 👋
|
||||
</div>
|
||||
),
|
||||
},
|
||||
toc: {
|
||||
float: true,
|
||||
},
|
||||
project: {
|
||||
link: github,
|
||||
},
|
||||
darkMode: true,
|
||||
nextThemes: {
|
||||
defaultTheme: 'dark',
|
||||
},
|
||||
primaryHue: {
|
||||
dark: 207,
|
||||
light: 212,
|
||||
},
|
||||
footer: {
|
||||
text: `MIT ${new Date().getFullYear()} © DocsGPT`,
|
||||
},
|
||||
logo() {
|
||||
return (
|
||||
<div className="flex items-center gap-2">
|
||||
<Logo width={28} height={28} />
|
||||
</div>
|
||||
);
|
||||
},
|
||||
useNextSeoProps() {
|
||||
return {
|
||||
titleTemplate: `%s - DocsGPT Documentation`,
|
||||
};
|
||||
},
|
||||
|
||||
head() {
|
||||
const { frontMatter } = useConfig();
|
||||
const { theme } = useTheme();
|
||||
const title = frontMatter?.title || 'Chat with your data with DocsGPT';
|
||||
const description =
|
||||
frontMatter?.description ||
|
||||
'Use DocsGPT to chat with your data. DocsGPT is a GPT powered chatbot that can answer questions about your data.'
|
||||
const image = '/cute-docsgpt.png';
|
||||
|
||||
const composedTitle = `${title} – DocsGPT Documentation`;
|
||||
|
||||
return (
|
||||
<>
|
||||
<link
|
||||
rel="apple-touch-icon"
|
||||
sizes="180x180"
|
||||
href={`/favicons/apple-touch-icon.png`}
|
||||
/>
|
||||
<link
|
||||
rel="icon"
|
||||
type="image/png"
|
||||
sizes="32x32"
|
||||
href={`/favicons/favicon-32x32.png`}
|
||||
/>
|
||||
<link
|
||||
rel="icon"
|
||||
type="image/png"
|
||||
sizes="16x16"
|
||||
href={`/favicons/favicon-16x16.png`}
|
||||
/>
|
||||
<meta name="theme-color" content="#ffffff" />
|
||||
<meta name="msapplication-TileColor" content="#00a300" />
|
||||
<link rel="manifest" href={`/favicons/site.webmanifest`} />
|
||||
<meta httpEquiv="Content-Language" content="en" />
|
||||
<meta name="title" content={composedTitle} />
|
||||
<meta name="description" content={description} />
|
||||
|
||||
<meta name="twitter:card" content="summary_large_image" />
|
||||
<meta name="twitter:site" content="@ATushynski" />
|
||||
<meta name="twitter:image" content={image} />
|
||||
|
||||
<meta property="og:description" content={description} />
|
||||
<meta property="og:title" content={composedTitle} />
|
||||
<meta property="og:image" content={image} />
|
||||
<meta property="og:type" content="website" />
|
||||
<meta
|
||||
name="apple-mobile-web-app-title"
|
||||
content="DocsGPT Documentation"
|
||||
/>
|
||||
|
||||
</>
|
||||
);
|
||||
},
|
||||
sidebar: {
|
||||
defaultMenuCollapseLevel: 1,
|
||||
titleComponent: ({ title, type }) =>
|
||||
type === 'separator' ? (
|
||||
<div className="flex items-center gap-2">
|
||||
<Logo height={10} width={10} />
|
||||
{title}
|
||||
<Analytics />
|
||||
</div>
|
||||
|
||||
) : (
|
||||
<>{title}
|
||||
<Analytics />
|
||||
</>
|
||||
|
||||
),
|
||||
},
|
||||
|
||||
gitTimestamp: ({ timestamp }) => (
|
||||
<>Last updated on {timestamp.toLocaleDateString()}</>
|
||||
),
|
||||
};
|
||||
|
||||
export default config;
|
||||
0
extensions/chatwoot/__init__.py
Normal file
@@ -21,7 +21,7 @@ document.getElementById("message-form").addEventListener("submit", function(even
|
||||
}
|
||||
|
||||
// send post request to server http://127.0.0.1:5000/ with message in json body
|
||||
fetch('http://127.0.0.1:5001/api/answer', {
|
||||
fetch('http://127.0.0.1:7091/api/answer', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
|
||||
0
extensions/discord/__init__.py
Normal file
@@ -11,7 +11,7 @@ dotenv.load_dotenv()
|
||||
# Replace 'YOUR_BOT_TOKEN' with your bot's token
|
||||
TOKEN = os.getenv("DISCORD_TOKEN")
|
||||
PREFIX = '@DocsGPT'
|
||||
BASE_API_URL = 'http://localhost:5001'
|
||||
BASE_API_URL = 'http://localhost:7091'
|
||||
|
||||
intents = discord.Intents.default()
|
||||
intents.message_content = True
|
||||
|
||||
40
extensions/react-widget/README.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# DocsGPT react widget
|
||||
|
||||
|
||||
THis widget will allow you to embed a DocsGPT assistant in your react app.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
npm install docsgpt
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```javascript
|
||||
import { DocsGPTWidget } from "docsgpt";
|
||||
import "docsgpt/dist/style.css";
|
||||
|
||||
const App = () => {
|
||||
return <DocsGPTWidget />;
|
||||
};
|
||||
```
|
||||
|
||||
To link the widget to your api and your documents you can pass parameters to the <DocsGPTWidget /> component.
|
||||
|
||||
```javascript
|
||||
import { DocsGPTWidget } from "docsgpt";
|
||||
import "docsgpt/dist/style.css";
|
||||
|
||||
const App = () => {
|
||||
return <DocsGPTWidget apiHost="http://localhost:7001" selectDocs='default' apiKey=''/>;
|
||||
};
|
||||
```
|
||||
|
||||
|
||||
## Our github
|
||||
|
||||
[DocsGPT](https://github.com/arc53/DocsGPT)
|
||||
|
||||
You can find the source code in the extensions/react-widget folder.
|
||||
|
||||
1
extensions/react-widget/dist/index.d.ts
vendored
Normal file
@@ -0,0 +1 @@
|
||||
export { DocsGPTWidget } from "./src/components/DocsGPTWidget";
|
||||
832
extensions/react-widget/dist/index.es.js
vendored
Normal file
@@ -0,0 +1,832 @@
|
||||
import Ne, { useState as ke, useRef as ur, useEffect as Pe } from "react";
|
||||
var ne = { exports: {} }, Y = {};
|
||||
/**
|
||||
* @license React
|
||||
* react-jsx-runtime.production.min.js
|
||||
*
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This source code is licensed under the MIT license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
||||
var Ce;
|
||||
function cr() {
|
||||
if (Ce)
|
||||
return Y;
|
||||
Ce = 1;
|
||||
var N = Ne, w = Symbol.for("react.element"), C = Symbol.for("react.fragment"), u = Object.prototype.hasOwnProperty, E = N.__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED.ReactCurrentOwner, S = { key: !0, ref: !0, __self: !0, __source: !0 };
|
||||
function T(b, d, v) {
|
||||
var m, h = {}, x = null, p = null;
|
||||
v !== void 0 && (x = "" + v), d.key !== void 0 && (x = "" + d.key), d.ref !== void 0 && (p = d.ref);
|
||||
for (m in d)
|
||||
u.call(d, m) && !S.hasOwnProperty(m) && (h[m] = d[m]);
|
||||
if (b && b.defaultProps)
|
||||
for (m in d = b.defaultProps, d)
|
||||
h[m] === void 0 && (h[m] = d[m]);
|
||||
return { $$typeof: w, type: b, key: x, ref: p, props: h, _owner: E.current };
|
||||
}
|
||||
return Y.Fragment = C, Y.jsx = T, Y.jsxs = T, Y;
|
||||
}
|
||||
var L = {};
|
||||
/**
|
||||
* @license React
|
||||
* react-jsx-runtime.development.js
|
||||
*
|
||||
* Copyright (c) Facebook, Inc. and its affiliates.
|
||||
*
|
||||
* This source code is licensed under the MIT license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
||||
var Oe;
|
||||
function fr() {
|
||||
return Oe || (Oe = 1, process.env.NODE_ENV !== "production" && function() {
|
||||
var N = Ne, w = Symbol.for("react.element"), C = Symbol.for("react.portal"), u = Symbol.for("react.fragment"), E = Symbol.for("react.strict_mode"), S = Symbol.for("react.profiler"), T = Symbol.for("react.provider"), b = Symbol.for("react.context"), d = Symbol.for("react.forward_ref"), v = Symbol.for("react.suspense"), m = Symbol.for("react.suspense_list"), h = Symbol.for("react.memo"), x = Symbol.for("react.lazy"), p = Symbol.for("react.offscreen"), R = Symbol.iterator, j = "@@iterator";
|
||||
function J(e) {
|
||||
if (e === null || typeof e != "object")
|
||||
return null;
|
||||
var r = R && e[R] || e[j];
|
||||
return typeof r == "function" ? r : null;
|
||||
}
|
||||
var O = N.__SECRET_INTERNALS_DO_NOT_USE_OR_YOU_WILL_BE_FIRED;
|
||||
function g(e) {
|
||||
{
|
||||
for (var r = arguments.length, t = new Array(r > 1 ? r - 1 : 0), n = 1; n < r; n++)
|
||||
t[n - 1] = arguments[n];
|
||||
B("error", e, t);
|
||||
}
|
||||
}
|
||||
function B(e, r, t) {
|
||||
{
|
||||
var n = O.ReactDebugCurrentFrame, o = n.getStackAddendum();
|
||||
o !== "" && (r += "%s", t = t.concat([o]));
|
||||
var s = t.map(function(i) {
|
||||
return String(i);
|
||||
});
|
||||
s.unshift("Warning: " + r), Function.prototype.apply.call(console[e], console, s);
|
||||
}
|
||||
}
|
||||
var D = !1, z = !1, De = !1, Ae = !1, Fe = !1, ae;
|
||||
ae = Symbol.for("react.module.reference");
|
||||
function Ie(e) {
|
||||
return !!(typeof e == "string" || typeof e == "function" || e === u || e === S || Fe || e === E || e === v || e === m || Ae || e === p || D || z || De || typeof e == "object" && e !== null && (e.$$typeof === x || e.$$typeof === h || e.$$typeof === T || e.$$typeof === b || e.$$typeof === d || // This needs to include all possible module reference object
|
||||
// types supported by any Flight configuration anywhere since
|
||||
// we don't know which Flight build this will end up being used
|
||||
// with.
|
||||
e.$$typeof === ae || e.getModuleId !== void 0));
|
||||
}
|
||||
function $e(e, r, t) {
|
||||
var n = e.displayName;
|
||||
if (n)
|
||||
return n;
|
||||
var o = r.displayName || r.name || "";
|
||||
return o !== "" ? t + "(" + o + ")" : t;
|
||||
}
|
||||
function ie(e) {
|
||||
return e.displayName || "Context";
|
||||
}
|
||||
function k(e) {
|
||||
if (e == null)
|
||||
return null;
|
||||
if (typeof e.tag == "number" && g("Received an unexpected object in getComponentNameFromType(). This is likely a bug in React. Please file an issue."), typeof e == "function")
|
||||
return e.displayName || e.name || null;
|
||||
if (typeof e == "string")
|
||||
return e;
|
||||
switch (e) {
|
||||
case u:
|
||||
return "Fragment";
|
||||
case C:
|
||||
return "Portal";
|
||||
case S:
|
||||
return "Profiler";
|
||||
case E:
|
||||
return "StrictMode";
|
||||
case v:
|
||||
return "Suspense";
|
||||
case m:
|
||||
return "SuspenseList";
|
||||
}
|
||||
if (typeof e == "object")
|
||||
switch (e.$$typeof) {
|
||||
case b:
|
||||
var r = e;
|
||||
return ie(r) + ".Consumer";
|
||||
case T:
|
||||
var t = e;
|
||||
return ie(t._context) + ".Provider";
|
||||
case d:
|
||||
return $e(e, e.render, "ForwardRef");
|
||||
case h:
|
||||
var n = e.displayName || null;
|
||||
return n !== null ? n : k(e.type) || "Memo";
|
||||
case x: {
|
||||
var o = e, s = o._payload, i = o._init;
|
||||
try {
|
||||
return k(i(s));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
var A = Object.assign, $ = 0, oe, se, le, ue, ce, fe, de;
|
||||
function ve() {
|
||||
}
|
||||
ve.__reactDisabledLog = !0;
|
||||
function We() {
|
||||
{
|
||||
if ($ === 0) {
|
||||
oe = console.log, se = console.info, le = console.warn, ue = console.error, ce = console.group, fe = console.groupCollapsed, de = console.groupEnd;
|
||||
var e = {
|
||||
configurable: !0,
|
||||
enumerable: !0,
|
||||
value: ve,
|
||||
writable: !0
|
||||
};
|
||||
Object.defineProperties(console, {
|
||||
info: e,
|
||||
log: e,
|
||||
warn: e,
|
||||
error: e,
|
||||
group: e,
|
||||
groupCollapsed: e,
|
||||
groupEnd: e
|
||||
});
|
||||
}
|
||||
$++;
|
||||
}
|
||||
}
|
||||
function Ye() {
|
||||
{
|
||||
if ($--, $ === 0) {
|
||||
var e = {
|
||||
configurable: !0,
|
||||
enumerable: !0,
|
||||
writable: !0
|
||||
};
|
||||
Object.defineProperties(console, {
|
||||
log: A({}, e, {
|
||||
value: oe
|
||||
}),
|
||||
info: A({}, e, {
|
||||
value: se
|
||||
}),
|
||||
warn: A({}, e, {
|
||||
value: le
|
||||
}),
|
||||
error: A({}, e, {
|
||||
value: ue
|
||||
}),
|
||||
group: A({}, e, {
|
||||
value: ce
|
||||
}),
|
||||
groupCollapsed: A({}, e, {
|
||||
value: fe
|
||||
}),
|
||||
groupEnd: A({}, e, {
|
||||
value: de
|
||||
})
|
||||
});
|
||||
}
|
||||
$ < 0 && g("disabledDepth fell below zero. This is a bug in React. Please file an issue.");
|
||||
}
|
||||
}
|
||||
var H = O.ReactCurrentDispatcher, K;
|
||||
function V(e, r, t) {
|
||||
{
|
||||
if (K === void 0)
|
||||
try {
|
||||
throw Error();
|
||||
} catch (o) {
|
||||
var n = o.stack.trim().match(/\n( *(at )?)/);
|
||||
K = n && n[1] || "";
|
||||
}
|
||||
return `
|
||||
` + K + e;
|
||||
}
|
||||
}
|
||||
var X = !1, M;
|
||||
{
|
||||
var Le = typeof WeakMap == "function" ? WeakMap : Map;
|
||||
M = new Le();
|
||||
}
|
||||
function pe(e, r) {
|
||||
if (!e || X)
|
||||
return "";
|
||||
{
|
||||
var t = M.get(e);
|
||||
if (t !== void 0)
|
||||
return t;
|
||||
}
|
||||
var n;
|
||||
X = !0;
|
||||
var o = Error.prepareStackTrace;
|
||||
Error.prepareStackTrace = void 0;
|
||||
var s;
|
||||
s = H.current, H.current = null, We();
|
||||
try {
|
||||
if (r) {
|
||||
var i = function() {
|
||||
throw Error();
|
||||
};
|
||||
if (Object.defineProperty(i.prototype, "props", {
|
||||
set: function() {
|
||||
throw Error();
|
||||
}
|
||||
}), typeof Reflect == "object" && Reflect.construct) {
|
||||
try {
|
||||
Reflect.construct(i, []);
|
||||
} catch (P) {
|
||||
n = P;
|
||||
}
|
||||
Reflect.construct(e, [], i);
|
||||
} else {
|
||||
try {
|
||||
i.call();
|
||||
} catch (P) {
|
||||
n = P;
|
||||
}
|
||||
e.call(i.prototype);
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
throw Error();
|
||||
} catch (P) {
|
||||
n = P;
|
||||
}
|
||||
e();
|
||||
}
|
||||
} catch (P) {
|
||||
if (P && n && typeof P.stack == "string") {
|
||||
for (var a = P.stack.split(`
|
||||
`), y = n.stack.split(`
|
||||
`), c = a.length - 1, f = y.length - 1; c >= 1 && f >= 0 && a[c] !== y[f]; )
|
||||
f--;
|
||||
for (; c >= 1 && f >= 0; c--, f--)
|
||||
if (a[c] !== y[f]) {
|
||||
if (c !== 1 || f !== 1)
|
||||
do
|
||||
if (c--, f--, f < 0 || a[c] !== y[f]) {
|
||||
var _ = `
|
||||
` + a[c].replace(" at new ", " at ");
|
||||
return e.displayName && _.includes("<anonymous>") && (_ = _.replace("<anonymous>", e.displayName)), typeof e == "function" && M.set(e, _), _;
|
||||
}
|
||||
while (c >= 1 && f >= 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
X = !1, H.current = s, Ye(), Error.prepareStackTrace = o;
|
||||
}
|
||||
var I = e ? e.displayName || e.name : "", je = I ? V(I) : "";
|
||||
return typeof e == "function" && M.set(e, je), je;
|
||||
}
|
||||
function Ve(e, r, t) {
|
||||
return pe(e, !1);
|
||||
}
|
||||
function Me(e) {
|
||||
var r = e.prototype;
|
||||
return !!(r && r.isReactComponent);
|
||||
}
|
||||
function U(e, r, t) {
|
||||
if (e == null)
|
||||
return "";
|
||||
if (typeof e == "function")
|
||||
return pe(e, Me(e));
|
||||
if (typeof e == "string")
|
||||
return V(e);
|
||||
switch (e) {
|
||||
case v:
|
||||
return V("Suspense");
|
||||
case m:
|
||||
return V("SuspenseList");
|
||||
}
|
||||
if (typeof e == "object")
|
||||
switch (e.$$typeof) {
|
||||
case d:
|
||||
return Ve(e.render);
|
||||
case h:
|
||||
return U(e.type, r, t);
|
||||
case x: {
|
||||
var n = e, o = n._payload, s = n._init;
|
||||
try {
|
||||
return U(s(o), r, t);
|
||||
} catch {
|
||||
}
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
var G = Object.prototype.hasOwnProperty, he = {}, me = O.ReactDebugCurrentFrame;
|
||||
function q(e) {
|
||||
if (e) {
|
||||
var r = e._owner, t = U(e.type, e._source, r ? r.type : null);
|
||||
me.setExtraStackFrame(t);
|
||||
} else
|
||||
me.setExtraStackFrame(null);
|
||||
}
|
||||
function Ue(e, r, t, n, o) {
|
||||
{
|
||||
var s = Function.call.bind(G);
|
||||
for (var i in e)
|
||||
if (s(e, i)) {
|
||||
var a = void 0;
|
||||
try {
|
||||
if (typeof e[i] != "function") {
|
||||
var y = Error((n || "React class") + ": " + t + " type `" + i + "` is invalid; it must be a function, usually from the `prop-types` package, but received `" + typeof e[i] + "`.This often happens because of typos such as `PropTypes.function` instead of `PropTypes.func`.");
|
||||
throw y.name = "Invariant Violation", y;
|
||||
}
|
||||
a = e[i](r, i, n, t, null, "SECRET_DO_NOT_PASS_THIS_OR_YOU_WILL_BE_FIRED");
|
||||
} catch (c) {
|
||||
a = c;
|
||||
}
|
||||
a && !(a instanceof Error) && (q(o), g("%s: type specification of %s `%s` is invalid; the type checker function must return `null` or an `Error` but returned a %s. You may have forgotten to pass an argument to the type checker creator (arrayOf, instanceOf, objectOf, oneOf, oneOfType, and shape all require an argument).", n || "React class", t, i, typeof a), q(null)), a instanceof Error && !(a.message in he) && (he[a.message] = !0, q(o), g("Failed %s type: %s", t, a.message), q(null));
|
||||
}
|
||||
}
|
||||
}
|
||||
var Ge = Array.isArray;
|
||||
function Z(e) {
|
||||
return Ge(e);
|
||||
}
|
||||
function qe(e) {
|
||||
{
|
||||
var r = typeof Symbol == "function" && Symbol.toStringTag, t = r && e[Symbol.toStringTag] || e.constructor.name || "Object";
|
||||
return t;
|
||||
}
|
||||
}
|
||||
function Je(e) {
|
||||
try {
|
||||
return ge(e), !1;
|
||||
} catch {
|
||||
return !0;
|
||||
}
|
||||
}
|
||||
function ge(e) {
|
||||
return "" + e;
|
||||
}
|
||||
function ye(e) {
|
||||
if (Je(e))
|
||||
return g("The provided key is an unsupported type %s. This value must be coerced to a string before before using it here.", qe(e)), ge(e);
|
||||
}
|
||||
var W = O.ReactCurrentOwner, Be = {
|
||||
key: !0,
|
||||
ref: !0,
|
||||
__self: !0,
|
||||
__source: !0
|
||||
}, be, Ee, Q;
|
||||
Q = {};
|
||||
function ze(e) {
|
||||
if (G.call(e, "ref")) {
|
||||
var r = Object.getOwnPropertyDescriptor(e, "ref").get;
|
||||
if (r && r.isReactWarning)
|
||||
return !1;
|
||||
}
|
||||
return e.ref !== void 0;
|
||||
}
|
||||
function He(e) {
|
||||
if (G.call(e, "key")) {
|
||||
var r = Object.getOwnPropertyDescriptor(e, "key").get;
|
||||
if (r && r.isReactWarning)
|
||||
return !1;
|
||||
}
|
||||
return e.key !== void 0;
|
||||
}
|
||||
function Ke(e, r) {
|
||||
if (typeof e.ref == "string" && W.current && r && W.current.stateNode !== r) {
|
||||
var t = k(W.current.type);
|
||||
Q[t] || (g('Component "%s" contains the string ref "%s". Support for string refs will be removed in a future major release. This case cannot be automatically converted to an arrow function. We ask you to manually fix this case by using useRef() or createRef() instead. Learn more about using refs safely here: https://reactjs.org/link/strict-mode-string-ref', k(W.current.type), e.ref), Q[t] = !0);
|
||||
}
|
||||
}
|
||||
function Xe(e, r) {
|
||||
{
|
||||
var t = function() {
|
||||
be || (be = !0, g("%s: `key` is not a prop. Trying to access it will result in `undefined` being returned. If you need to access the same value within the child component, you should pass it as a different prop. (https://reactjs.org/link/special-props)", r));
|
||||
};
|
||||
t.isReactWarning = !0, Object.defineProperty(e, "key", {
|
||||
get: t,
|
||||
configurable: !0
|
||||
});
|
||||
}
|
||||
}
|
||||
function Ze(e, r) {
|
||||
{
|
||||
var t = function() {
|
||||
Ee || (Ee = !0, g("%s: `ref` is not a prop. Trying to access it will result in `undefined` being returned. If you need to access the same value within the child component, you should pass it as a different prop. (https://reactjs.org/link/special-props)", r));
|
||||
};
|
||||
t.isReactWarning = !0, Object.defineProperty(e, "ref", {
|
||||
get: t,
|
||||
configurable: !0
|
||||
});
|
||||
}
|
||||
}
|
||||
var Qe = function(e, r, t, n, o, s, i) {
|
||||
var a = {
|
||||
// This tag allows us to uniquely identify this as a React Element
|
||||
$$typeof: w,
|
||||
// Built-in properties that belong on the element
|
||||
type: e,
|
||||
key: r,
|
||||
ref: t,
|
||||
props: i,
|
||||
// Record the component responsible for creating this element.
|
||||
_owner: s
|
||||
};
|
||||
return a._store = {}, Object.defineProperty(a._store, "validated", {
|
||||
configurable: !1,
|
||||
enumerable: !1,
|
||||
writable: !0,
|
||||
value: !1
|
||||
}), Object.defineProperty(a, "_self", {
|
||||
configurable: !1,
|
||||
enumerable: !1,
|
||||
writable: !1,
|
||||
value: n
|
||||
}), Object.defineProperty(a, "_source", {
|
||||
configurable: !1,
|
||||
enumerable: !1,
|
||||
writable: !1,
|
||||
value: o
|
||||
}), Object.freeze && (Object.freeze(a.props), Object.freeze(a)), a;
|
||||
};
|
||||
function er(e, r, t, n, o) {
|
||||
{
|
||||
var s, i = {}, a = null, y = null;
|
||||
t !== void 0 && (ye(t), a = "" + t), He(r) && (ye(r.key), a = "" + r.key), ze(r) && (y = r.ref, Ke(r, o));
|
||||
for (s in r)
|
||||
G.call(r, s) && !Be.hasOwnProperty(s) && (i[s] = r[s]);
|
||||
if (e && e.defaultProps) {
|
||||
var c = e.defaultProps;
|
||||
for (s in c)
|
||||
i[s] === void 0 && (i[s] = c[s]);
|
||||
}
|
||||
if (a || y) {
|
||||
var f = typeof e == "function" ? e.displayName || e.name || "Unknown" : e;
|
||||
a && Xe(i, f), y && Ze(i, f);
|
||||
}
|
||||
return Qe(e, a, y, o, n, W.current, i);
|
||||
}
|
||||
}
|
||||
var ee = O.ReactCurrentOwner, xe = O.ReactDebugCurrentFrame;
|
||||
function F(e) {
|
||||
if (e) {
|
||||
var r = e._owner, t = U(e.type, e._source, r ? r.type : null);
|
||||
xe.setExtraStackFrame(t);
|
||||
} else
|
||||
xe.setExtraStackFrame(null);
|
||||
}
|
||||
var re;
|
||||
re = !1;
|
||||
function te(e) {
|
||||
return typeof e == "object" && e !== null && e.$$typeof === w;
|
||||
}
|
||||
function _e() {
|
||||
{
|
||||
if (ee.current) {
|
||||
var e = k(ee.current.type);
|
||||
if (e)
|
||||
return `
|
||||
|
||||
Check the render method of \`` + e + "`.";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
}
|
||||
function rr(e) {
|
||||
{
|
||||
if (e !== void 0) {
|
||||
var r = e.fileName.replace(/^.*[\\\/]/, ""), t = e.lineNumber;
|
||||
return `
|
||||
|
||||
Check your code at ` + r + ":" + t + ".";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
}
|
||||
var Re = {};
|
||||
function tr(e) {
|
||||
{
|
||||
var r = _e();
|
||||
if (!r) {
|
||||
var t = typeof e == "string" ? e : e.displayName || e.name;
|
||||
t && (r = `
|
||||
|
||||
Check the top-level render call using <` + t + ">.");
|
||||
}
|
||||
return r;
|
||||
}
|
||||
}
|
||||
function we(e, r) {
|
||||
{
|
||||
if (!e._store || e._store.validated || e.key != null)
|
||||
return;
|
||||
e._store.validated = !0;
|
||||
var t = tr(r);
|
||||
if (Re[t])
|
||||
return;
|
||||
Re[t] = !0;
|
||||
var n = "";
|
||||
e && e._owner && e._owner !== ee.current && (n = " It was passed a child from " + k(e._owner.type) + "."), F(e), g('Each child in a list should have a unique "key" prop.%s%s See https://reactjs.org/link/warning-keys for more information.', t, n), F(null);
|
||||
}
|
||||
}
|
||||
function Te(e, r) {
|
||||
{
|
||||
if (typeof e != "object")
|
||||
return;
|
||||
if (Z(e))
|
||||
for (var t = 0; t < e.length; t++) {
|
||||
var n = e[t];
|
||||
te(n) && we(n, r);
|
||||
}
|
||||
else if (te(e))
|
||||
e._store && (e._store.validated = !0);
|
||||
else if (e) {
|
||||
var o = J(e);
|
||||
if (typeof o == "function" && o !== e.entries)
|
||||
for (var s = o.call(e), i; !(i = s.next()).done; )
|
||||
te(i.value) && we(i.value, r);
|
||||
}
|
||||
}
|
||||
}
|
||||
function nr(e) {
|
||||
{
|
||||
var r = e.type;
|
||||
if (r == null || typeof r == "string")
|
||||
return;
|
||||
var t;
|
||||
if (typeof r == "function")
|
||||
t = r.propTypes;
|
||||
else if (typeof r == "object" && (r.$$typeof === d || // Note: Memo only checks outer props here.
|
||||
// Inner props are checked in the reconciler.
|
||||
r.$$typeof === h))
|
||||
t = r.propTypes;
|
||||
else
|
||||
return;
|
||||
if (t) {
|
||||
var n = k(r);
|
||||
Ue(t, e.props, "prop", n, e);
|
||||
} else if (r.PropTypes !== void 0 && !re) {
|
||||
re = !0;
|
||||
var o = k(r);
|
||||
g("Component %s declared `PropTypes` instead of `propTypes`. Did you misspell the property assignment?", o || "Unknown");
|
||||
}
|
||||
typeof r.getDefaultProps == "function" && !r.getDefaultProps.isReactClassApproved && g("getDefaultProps is only used on classic React.createClass definitions. Use a static property named `defaultProps` instead.");
|
||||
}
|
||||
}
|
||||
function ar(e) {
|
||||
{
|
||||
for (var r = Object.keys(e.props), t = 0; t < r.length; t++) {
|
||||
var n = r[t];
|
||||
if (n !== "children" && n !== "key") {
|
||||
F(e), g("Invalid prop `%s` supplied to `React.Fragment`. React.Fragment can only have `key` and `children` props.", n), F(null);
|
||||
break;
|
||||
}
|
||||
}
|
||||
e.ref !== null && (F(e), g("Invalid attribute `ref` supplied to `React.Fragment`."), F(null));
|
||||
}
|
||||
}
|
||||
function Se(e, r, t, n, o, s) {
|
||||
{
|
||||
var i = Ie(e);
|
||||
if (!i) {
|
||||
var a = "";
|
||||
(e === void 0 || typeof e == "object" && e !== null && Object.keys(e).length === 0) && (a += " You likely forgot to export your component from the file it's defined in, or you might have mixed up default and named imports.");
|
||||
var y = rr(o);
|
||||
y ? a += y : a += _e();
|
||||
var c;
|
||||
e === null ? c = "null" : Z(e) ? c = "array" : e !== void 0 && e.$$typeof === w ? (c = "<" + (k(e.type) || "Unknown") + " />", a = " Did you accidentally export a JSX literal instead of a component?") : c = typeof e, g("React.jsx: type is invalid -- expected a string (for built-in components) or a class/function (for composite components) but got: %s.%s", c, a);
|
||||
}
|
||||
var f = er(e, r, t, o, s);
|
||||
if (f == null)
|
||||
return f;
|
||||
if (i) {
|
||||
var _ = r.children;
|
||||
if (_ !== void 0)
|
||||
if (n)
|
||||
if (Z(_)) {
|
||||
for (var I = 0; I < _.length; I++)
|
||||
Te(_[I], e);
|
||||
Object.freeze && Object.freeze(_);
|
||||
} else
|
||||
g("React.jsx: Static children should always be an array. You are likely explicitly calling React.jsxs or React.jsxDEV. Use the Babel transform instead.");
|
||||
else
|
||||
Te(_, e);
|
||||
}
|
||||
return e === u ? ar(f) : nr(f), f;
|
||||
}
|
||||
}
|
||||
function ir(e, r, t) {
|
||||
return Se(e, r, t, !0);
|
||||
}
|
||||
function or(e, r, t) {
|
||||
return Se(e, r, t, !1);
|
||||
}
|
||||
var sr = or, lr = ir;
|
||||
L.Fragment = u, L.jsx = sr, L.jsxs = lr;
|
||||
}()), L;
|
||||
}
|
||||
process.env.NODE_ENV === "production" ? ne.exports = cr() : ne.exports = fr();
|
||||
var l = ne.exports;
|
||||
function dr({
|
||||
question: N = "",
|
||||
apiKey: w = "",
|
||||
selectedDocs: C = "",
|
||||
history: u = [],
|
||||
conversationId: E = null,
|
||||
apiHost: S = "",
|
||||
onEvent: T = () => {
|
||||
console.log("Event triggered, but no handler provided.");
|
||||
}
|
||||
}) {
|
||||
let b = "default";
|
||||
return C && (b = C), new Promise((d, v) => {
|
||||
const m = {
|
||||
question: N,
|
||||
api_key: w,
|
||||
embeddings_key: w,
|
||||
active_docs: b,
|
||||
history: JSON.stringify(u),
|
||||
conversation_id: E,
|
||||
model: "default"
|
||||
};
|
||||
fetch(S + "/stream", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json"
|
||||
},
|
||||
body: JSON.stringify(m)
|
||||
}).then((h) => {
|
||||
if (!h.body)
|
||||
throw Error("No response body");
|
||||
const x = h.body.getReader(), p = new TextDecoder("utf-8");
|
||||
let R = 0;
|
||||
const j = ({
|
||||
done: J,
|
||||
value: O
|
||||
}) => {
|
||||
if (J) {
|
||||
console.log(R), d();
|
||||
return;
|
||||
}
|
||||
R += 1;
|
||||
const B = p.decode(O).split(`
|
||||
`);
|
||||
for (let D of B) {
|
||||
if (D.trim() == "")
|
||||
continue;
|
||||
D.startsWith("data:") && (D = D.substring(5));
|
||||
const z = new MessageEvent("message", {
|
||||
data: D
|
||||
});
|
||||
T(z);
|
||||
}
|
||||
x.read().then(j).catch(v);
|
||||
};
|
||||
x.read().then(j).catch(v);
|
||||
}).catch((h) => {
|
||||
console.error("Connection failed:", h), v(h);
|
||||
});
|
||||
});
|
||||
}
|
||||
const pr = ({ apiHost: N = "https://gptcloud.arc53.com", selectDocs: w = "default", apiKey: C = "docsgpt-public" }) => {
|
||||
const [u, E] = ke(() => typeof window < "u" && localStorage.getItem("docsGPTChatState") || "init"), [S, T] = ke(""), b = ur(null);
|
||||
Pe(() => {
|
||||
if (b.current) {
|
||||
const v = b.current;
|
||||
v.scrollTop = v.scrollHeight;
|
||||
}
|
||||
}, [S]), Pe(() => {
|
||||
localStorage.setItem("docsGPTChatState", u);
|
||||
}, [u]);
|
||||
const d = (v) => {
|
||||
T(""), v.preventDefault(), E(
|
||||
"processing"
|
||||
/* Processing */
|
||||
), setTimeout(() => {
|
||||
E(
|
||||
"answer"
|
||||
/* Answer */
|
||||
);
|
||||
}, 800);
|
||||
const h = v.currentTarget[0].value;
|
||||
dr({
|
||||
question: h,
|
||||
apiKey: C,
|
||||
selectedDocs: w,
|
||||
history: [],
|
||||
conversationId: null,
|
||||
apiHost: N,
|
||||
onEvent: (x) => {
|
||||
const p = JSON.parse(x.data);
|
||||
if (p.type === "end")
|
||||
E(
|
||||
"answer"
|
||||
/* Answer */
|
||||
);
|
||||
else if (p.type === "source") {
|
||||
let R;
|
||||
if (p.metadata && p.metadata.title) {
|
||||
const j = p.metadata.title.split("/");
|
||||
R = {
|
||||
title: j[j.length - 1],
|
||||
text: p.doc
|
||||
};
|
||||
} else
|
||||
R = { title: p.doc, text: p.doc };
|
||||
console.log(R);
|
||||
} else if (p.type === "id")
|
||||
console.log(p.id);
|
||||
else {
|
||||
const R = p.answer;
|
||||
T((j) => j + R);
|
||||
}
|
||||
}
|
||||
});
|
||||
};
|
||||
return /* @__PURE__ */ l.jsx(l.Fragment, { children: /* @__PURE__ */ l.jsxs("div", { className: "dark widget-container", children: [
|
||||
/* @__PURE__ */ l.jsx(
|
||||
"div",
|
||||
{
|
||||
onClick: () => E(
|
||||
"init"
|
||||
/* Init */
|
||||
),
|
||||
className: `${u !== "minimized" ? "hidden" : ""} cursor-pointer`,
|
||||
children: /* @__PURE__ */ l.jsx("div", { className: "mr-2 mb-2 w-20 h-20 rounded-full overflow-hidden dark:divide-gray-700 border dark:border-gray-700 bg-gradient-to-br from-gray-100/80 via-white to-white dark:from-gray-900/80 dark:via-gray-900 dark:to-gray-900 font-sans shadow backdrop-blur-sm flex items-center justify-center", children: /* @__PURE__ */ l.jsx(
|
||||
"img",
|
||||
{
|
||||
src: "https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png",
|
||||
alt: "DocsGPT",
|
||||
className: "cursor-pointer hover:opacity-50 h-14"
|
||||
}
|
||||
) })
|
||||
}
|
||||
),
|
||||
/* @__PURE__ */ l.jsxs("div", { className: ` ${u !== "minimized" ? "" : "hidden"} divide-y dark:divide-gray-700 rounded-md border dark:border-gray-700 bg-gradient-to-br from-gray-100/80 via-white to-white dark:from-gray-900/80 dark:via-gray-900 dark:to-gray-900 font-sans shadow backdrop-blur-sm`, style: { width: "18rem", transform: "translateY(0%) translateZ(0px)" }, children: [
|
||||
/* @__PURE__ */ l.jsxs("div", { children: [
|
||||
/* @__PURE__ */ l.jsx(
|
||||
"img",
|
||||
{
|
||||
src: "https://d3dg1063dc54p9.cloudfront.net/exit.svg",
|
||||
alt: "Exit",
|
||||
className: "cursor-pointer hover:opacity-50 h-2 absolute top-0 right-0 m-2 white-filter",
|
||||
onClick: (v) => {
|
||||
v.stopPropagation(), E(
|
||||
"minimized"
|
||||
/* Minimized */
|
||||
);
|
||||
}
|
||||
}
|
||||
),
|
||||
/* @__PURE__ */ l.jsxs("div", { className: "flex items-center gap-2 p-3", children: [
|
||||
/* @__PURE__ */ l.jsxs("div", { className: `${u === "init" || u === "processing" || u === "typing" ? "" : "hidden"} flex-1`, children: [
|
||||
/* @__PURE__ */ l.jsx("h3", { className: "text-sm font-bold text-gray-700 dark:text-gray-200", children: "Need help with documentation?" }),
|
||||
/* @__PURE__ */ l.jsx("p", { className: "mt-1 text-xs text-gray-400 dark:text-gray-500", children: "DocsGPT AI assistant will help you with docs" })
|
||||
] }),
|
||||
/* @__PURE__ */ l.jsx("div", { id: "docsgpt-answer", ref: b, className: `${u !== "answer" ? "hidden" : ""}`, children: /* @__PURE__ */ l.jsx("p", { className: "mt-1 text-sm text-gray-600 dark:text-white text-left", children: S }) })
|
||||
] })
|
||||
] }),
|
||||
/* @__PURE__ */ l.jsxs("div", { className: "w-full", children: [
|
||||
/* @__PURE__ */ l.jsx(
|
||||
"button",
|
||||
{
|
||||
onClick: () => E(
|
||||
"typing"
|
||||
/* Typing */
|
||||
),
|
||||
className: `flex w-full justify-center px-5 py-3 text-sm text-gray-800 font-bold dark:text-white transition duration-300 hover:bg-gray-100 rounded-b dark:hover:bg-gray-800/70 ${u !== "init" ? "hidden" : ""}`,
|
||||
children: "Ask DocsGPT"
|
||||
}
|
||||
),
|
||||
(u === "typing" || u === "answer") && /* @__PURE__ */ l.jsxs(
|
||||
"form",
|
||||
{
|
||||
onSubmit: d,
|
||||
className: "relative w-full m-0",
|
||||
style: { opacity: 1 },
|
||||
children: [
|
||||
/* @__PURE__ */ l.jsx(
|
||||
"input",
|
||||
{
|
||||
type: "text",
|
||||
className: "w-full bg-transparent px-5 py-3 pr-8 text-sm text-gray-700 dark:text-white focus:outline-none",
|
||||
placeholder: "What do you want to do?"
|
||||
}
|
||||
),
|
||||
/* @__PURE__ */ l.jsx("button", { className: "absolute text-gray-400 dark:text-gray-500 text-sm inset-y-0 right-2 -mx-2 px-2", type: "submit", children: "Sumbit" })
|
||||
]
|
||||
}
|
||||
),
|
||||
/* @__PURE__ */ l.jsxs("p", { className: `${u !== "processing" ? "hidden" : ""} flex w-full justify-center px-5 py-3 text-sm text-gray-800 font-bold dark:text-white transition duration-300 rounded-b`, children: [
|
||||
"Processing",
|
||||
/* @__PURE__ */ l.jsx("span", { className: "dot-animation", children: "." }),
|
||||
/* @__PURE__ */ l.jsx("span", { className: "dot-animation delay-200", children: "." }),
|
||||
/* @__PURE__ */ l.jsx("span", { className: "dot-animation delay-400", children: "." })
|
||||
] })
|
||||
] })
|
||||
] })
|
||||
] }) });
|
||||
};
|
||||
export {
|
||||
pr as DocsGPTWidget
|
||||
};
|
||||
//# sourceMappingURL=index.es.js.map
|
||||