mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-30 09:03:15 +00:00
Compare commits
216 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
890a20edba | ||
|
|
e6f48c9403 | ||
|
|
909f0afa69 | ||
|
|
5ed2b99b8c | ||
|
|
7848751fd8 | ||
|
|
e593241d75 | ||
|
|
fcdc7b7aeb | ||
|
|
c3c7878f28 | ||
|
|
85f9ae5a0a | ||
|
|
98a97f34f5 | ||
|
|
98d647a3fe | ||
|
|
9a393b4f74 | ||
|
|
88d74235e1 | ||
|
|
36fa470348 | ||
|
|
33dce10bc3 | ||
|
|
feed0b288f | ||
|
|
1b7dc8a509 | ||
|
|
87cc3cf168 | ||
|
|
eac7b1e9f2 | ||
|
|
bb1a42df91 | ||
|
|
ac5ac3e9f1 | ||
|
|
bed25b317c | ||
|
|
1687e6682a | ||
|
|
22572c8ed1 | ||
|
|
8187a339f0 | ||
|
|
382c3930a2 | ||
|
|
a64a30c088 | ||
|
|
dac76a867f | ||
|
|
b2e86e105d | ||
|
|
b8e57c9b6f | ||
|
|
486a1bc9de | ||
|
|
b1b610f4b5 | ||
|
|
68447a6009 | ||
|
|
a55280b941 | ||
|
|
830462d525 | ||
|
|
ce8b29e9d0 | ||
|
|
6ab15f8eb1 | ||
|
|
96eb68e042 | ||
|
|
bf78bdd6d4 | ||
|
|
d998815847 | ||
|
|
00ba7b78ca | ||
|
|
0b735d94f1 | ||
|
|
301989540f | ||
|
|
e26b95a26f | ||
|
|
049c1ddb48 | ||
|
|
2f1c3075a2 | ||
|
|
b1a5068fd6 | ||
|
|
01fbd5d702 | ||
|
|
5916f92f1a | ||
|
|
5e45268f68 | ||
|
|
b8e28e0c12 | ||
|
|
04f824ea36 | ||
|
|
c216bea031 | ||
|
|
e72ef478dc | ||
|
|
897b4ef2cd | ||
|
|
2404899e28 | ||
|
|
a2dfc2cbdc | ||
|
|
92373b25a9 | ||
|
|
ce1840a9ae | ||
|
|
c4f4bdd789 | ||
|
|
ec5068e85b | ||
|
|
1d9d0ddf27 | ||
|
|
e393be90dd | ||
|
|
e633df06e4 | ||
|
|
0ff5f408d6 | ||
|
|
5eda42ff31 | ||
|
|
84168e22d0 | ||
|
|
b722845aff | ||
|
|
fd54682c02 | ||
|
|
f5e287ffa6 | ||
|
|
fb10a546d6 | ||
|
|
006897f1c0 | ||
|
|
968849e52b | ||
|
|
8bee47dc50 | ||
|
|
08250120d1 | ||
|
|
8892b70785 | ||
|
|
534e4cb591 | ||
|
|
489abdcb0b | ||
|
|
f6b6c2e9a3 | ||
|
|
43c016f024 | ||
|
|
c0e7d9cd8b | ||
|
|
5f687a31f8 | ||
|
|
f2d2478dee | ||
|
|
8a98789be1 | ||
|
|
87a5c8894a | ||
|
|
7e92ed4501 | ||
|
|
a57cdfff1e | ||
|
|
d4ff6d4d7a | ||
|
|
63d99d6a57 | ||
|
|
fce7d34171 | ||
|
|
e7df7f69b3 | ||
|
|
94cc18bd71 | ||
|
|
39024ce2ac | ||
|
|
7ac4f45e7b | ||
|
|
f209eebaf8 | ||
|
|
4889db78c9 | ||
|
|
bff200fede | ||
|
|
af6f783043 | ||
|
|
610adcbefc | ||
|
|
1d3631fa04 | ||
|
|
0630504664 | ||
|
|
577d58c92b | ||
|
|
899777632b | ||
|
|
6d5b698c39 | ||
|
|
dd9f1abcea | ||
|
|
b4bd34fb96 | ||
|
|
014971262d | ||
|
|
36ed69b07e | ||
|
|
bbf55ca46e | ||
|
|
3f88b04c4a | ||
|
|
f8910ba136 | ||
|
|
6c95d8b13e | ||
|
|
e6bccaaf4e | ||
|
|
3b8039a580 | ||
|
|
fae3f55010 | ||
|
|
20c877f75b | ||
|
|
8380858a82 | ||
|
|
d2358c399d | ||
|
|
c3af8a77af | ||
|
|
bc5a0b030b | ||
|
|
0b94f1717f | ||
|
|
aaa1249a41 | ||
|
|
ffaa22c49b | ||
|
|
0b78480977 | ||
|
|
ec4fc17e3a | ||
|
|
78b85fb664 | ||
|
|
6b6737613a | ||
|
|
da5d62cc1c | ||
|
|
6a68b63192 | ||
|
|
ff2e79fe7b | ||
|
|
1800e51b19 | ||
|
|
ba9c505249 | ||
|
|
bc9f1c17ed | ||
|
|
74845aed64 | ||
|
|
e49dd0cc6a | ||
|
|
27c45ae24a | ||
|
|
364a14adaf | ||
|
|
5c560b1dd5 | ||
|
|
28b8b88332 | ||
|
|
e39ef0cc9e | ||
|
|
8098d3fec8 | ||
|
|
059ffe09ea | ||
|
|
36a845c29e | ||
|
|
ce6f0dab56 | ||
|
|
f200ab10a4 | ||
|
|
3001688e0e | ||
|
|
a73774099e | ||
|
|
b28676d52c | ||
|
|
eef012b4d1 | ||
|
|
1417a1c020 | ||
|
|
962becb9a5 | ||
|
|
168648e789 | ||
|
|
7f56f57778 | ||
|
|
6cadddc2fc | ||
|
|
15fd54eac4 | ||
|
|
31350e6302 | ||
|
|
8742cdae0a | ||
|
|
4efcb388ff | ||
|
|
2d92e95c8a | ||
|
|
47e5d5684a | ||
|
|
b723e14d98 | ||
|
|
c9d24b8f42 | ||
|
|
43622e7ab1 | ||
|
|
5cfc185ba5 | ||
|
|
4be2635fbe | ||
|
|
0beafb8391 | ||
|
|
1d2654b9fa | ||
|
|
a4bc3673e7 | ||
|
|
fa080537e8 | ||
|
|
bdf67a7db7 | ||
|
|
db4cdc901c | ||
|
|
16a540b89b | ||
|
|
e00ec9ac3f | ||
|
|
fc760afdfc | ||
|
|
cb47bcdb0e | ||
|
|
8d62559ca8 | ||
|
|
dbe9c4dc18 | ||
|
|
1609b4562d | ||
|
|
b6cadb1d65 | ||
|
|
7aafac5b5e | ||
|
|
36f0aacb19 | ||
|
|
0c1a6a918d | ||
|
|
d1f5ff4dba | ||
|
|
77e6df2a1c | ||
|
|
119c037f24 | ||
|
|
97fe1abfd8 | ||
|
|
3a0163f0fb | ||
|
|
d3fab69155 | ||
|
|
9395d2c091 | ||
|
|
b9efb98280 | ||
|
|
60bb264663 | ||
|
|
316dd2f165 | ||
|
|
8a0f700563 | ||
|
|
3d0c6eafec | ||
|
|
46e055833b | ||
|
|
80dfdd1cb9 | ||
|
|
db21678b74 | ||
|
|
09c7fe0565 | ||
|
|
b6dfb2c856 | ||
|
|
ab46ba521f | ||
|
|
4a7670f2aa | ||
|
|
9ba86bc174 | ||
|
|
2ebe5e051c | ||
|
|
24e98abd15 | ||
|
|
b7f1a94ba4 | ||
|
|
70bc7465c9 | ||
|
|
65c2568427 | ||
|
|
186e7bf402 | ||
|
|
e6f1c7d0c3 | ||
|
|
87ad9a3190 | ||
|
|
0ed45f8754 | ||
|
|
116e4401c4 | ||
|
|
c3c0e643d2 | ||
|
|
d5522e7c08 | ||
|
|
658b14ba26 | ||
|
|
38f8469d0b |
8
.env-template
Normal file
8
.env-template
Normal file
@@ -0,0 +1,8 @@
|
||||
OPENAI_API_KEY=<LLM api key (for example, open ai key)>
|
||||
EMBEDDINGS_KEY=<LLM embeddings api key (for example, open ai key)>
|
||||
|
||||
#For Azure
|
||||
OPENAI_API_BASE=
|
||||
OPENAI_API_VERSION=
|
||||
AZURE_DEPLOYMENT_NAME=
|
||||
AZURE_EMBEDDINGS_DEPLOYMENT_NAME=
|
||||
11
.github/workflows/ci.yml
vendored
11
.github/workflows/ci.yml
vendored
@@ -8,7 +8,12 @@ on:
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
if: github.repository == 'arc53/DocsGPT'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -23,17 +28,17 @@ jobs:
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GHCR_TOKEN }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Runs a single command using the runners shell
|
||||
- name: Build and push Docker images to docker.io and ghcr.io
|
||||
uses: docker/build-push-action@v2
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
file: './application/Dockerfile'
|
||||
platforms: linux/amd64
|
||||
|
||||
10
.github/workflows/cife.yml
vendored
10
.github/workflows/cife.yml
vendored
@@ -9,6 +9,10 @@ on:
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -23,17 +27,17 @@ jobs:
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GHCR_TOKEN }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Runs a single command using the runners shell
|
||||
- name: Build and push Docker images to docker.io and ghcr.io
|
||||
uses: docker/build-push-action@v2
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
file: './frontend/Dockerfile'
|
||||
platforms: linux/amd64
|
||||
|
||||
17
.github/workflows/lint.yml
vendored
Normal file
17
.github/workflows/lint.yml
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
name: Python linting
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- '*'
|
||||
pull_request:
|
||||
types: [ opened, synchronize ]
|
||||
|
||||
jobs:
|
||||
ruff:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Lint with Ruff
|
||||
uses: chartboost/ruff-action@v1
|
||||
27
.github/workflows/pytest.yml
vendored
Normal file
27
.github/workflows/pytest.yml
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
name: Run python tests with pytest
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9", "3.10", "3.11"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pytest
|
||||
cd application
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
python -m pytest
|
||||
41
.github/workflows/sync_fork.yaml
vendored
Normal file
41
.github/workflows/sync_fork.yaml
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
name: Upstream Sync
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 0 * * *" # every hour
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
sync_latest_from_upstream:
|
||||
name: Sync latest commits from upstream repo
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event.repository.fork }}
|
||||
|
||||
steps:
|
||||
# Step 1: run a standard checkout action
|
||||
- name: Checkout target repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
# Step 2: run the sync action
|
||||
- name: Sync upstream changes
|
||||
id: sync
|
||||
uses: aormsby/Fork-Sync-With-Upstream-action@v3.4
|
||||
with:
|
||||
# set your upstream repo and branch
|
||||
upstream_sync_repo: arc53/DocsGPT
|
||||
upstream_sync_branch: main
|
||||
target_sync_branch: main
|
||||
target_repo_token: ${{ secrets.GITHUB_TOKEN }} # automatically generated, no need to set
|
||||
|
||||
# Set test_mode true to run tests instead of the true action!!
|
||||
test_mode: false
|
||||
|
||||
- name: Sync check
|
||||
if: failure()
|
||||
run: |
|
||||
echo "::error::由于权限不足,导致同步失败(这是预期的行为),请前往仓库首页手动执行[Sync fork]。"
|
||||
echo "::error::Due to insufficient permissions, synchronization failed (as expected). Please go to the repository homepage and manually perform [Sync fork]."
|
||||
exit 1
|
||||
2
.ruff.toml
Normal file
2
.ruff.toml
Normal file
@@ -0,0 +1,2 @@
|
||||
# Allow lines to be as long as 120 characters.
|
||||
line-length = 120
|
||||
113
README.md
113
README.md
@@ -23,77 +23,114 @@ Say goodbye to time-consuming manual searches, and let <strong>DocsGPT</strong>
|
||||
|
||||

|
||||
|
||||
## Roadmap
|
||||
|
||||
You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, please don't hesitate contributing or creating issues, it helps us make DocsGPT better!
|
||||
|
||||
## Our open source models optimised for DocsGPT:
|
||||
|
||||
| Name | Base Model | Requirements (or similar) |
|
||||
|-------------------|------------|----------------------------------------------------------|
|
||||
| [Docsgpt-7b-falcon](https://huggingface.co/Arc53/docsgpt-7b-falcon) | Falcon-7b | 1xA10G gpu |
|
||||
| [Docsgpt-14b](https://huggingface.co/Arc53/docsgpt-14b) | llama-2-14b | 2xA10 gpu's |
|
||||
| [Docsgpt-40b](https://huggingface.co/Arc53/docsgpt-40b-falcon) | falcon-40b | 8xA10G gpu's |
|
||||
|
||||
|
||||
If you don't have enough resources to run it you can use bitsnbytes to quantize
|
||||
|
||||
|
||||
## Features
|
||||
|
||||

|
||||
|
||||
|
||||
## Useful links
|
||||
[Live preview](https://docsgpt.arc53.com/)
|
||||
|
||||
[Join Our Discord](https://discord.gg/n5BX8dh8rU)
|
||||
|
||||
[Guides](https://github.com/arc53/docsgpt/wiki)
|
||||
|
||||
## Roadmap
|
||||
[Interested in contributing?](https://github.com/arc53/DocsGPT/blob/main/CONTRIBUTING.md)
|
||||
|
||||
You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, please don't hesitate contributing or creating issues, it helps us make DocsGPT better!
|
||||
[How to use any other documentation](https://github.com/arc53/docsgpt/wiki/How-to-train-on-other-documentation)
|
||||
|
||||
|
||||
|
||||
## [Live preview](https://docsgpt.arc53.com/)
|
||||
|
||||
## [Join Our Discord](https://discord.gg/n5BX8dh8rU)
|
||||
[How to host it locally (so all data will stay on-premises)](https://github.com/arc53/DocsGPT/wiki/How-to-use-different-LLM's#hosting-everything-locally)
|
||||
|
||||
|
||||
## Project structure
|
||||
- Application - flask app (main application)
|
||||
- Application - Flask app (main application)
|
||||
|
||||
- Extensions - chrome extension
|
||||
- Extensions - Chrome extension
|
||||
|
||||
- Scripts - script that creates similarity search index and store for other libraries.
|
||||
- Scripts - Script that creates similarity search index and store for other libraries.
|
||||
|
||||
- frontend - frontend in vite and
|
||||
- Frontend - Frontend uses Vite and React
|
||||
|
||||
## QuickStart
|
||||
|
||||
Note: Make sure you have docker installed
|
||||
|
||||
1. Open dowload this repository with `git clone https://github.com/arc53/DocsGPT.git`
|
||||
2. Open docker-compose.yaml and replace <your_api_key> with your OpenAI's key (there are 4 places)
|
||||
3. Run `docker-compose build && docker-compose up`
|
||||
1. Dowload and open this repository with `git clone https://github.com/arc53/DocsGPT.git`
|
||||
2. Create an .env file in your root directory and set the env variable OPENAI_API_KEY with your openai api key and VITE_API_STREAMING to true or false, depending on if you want streaming answers or not
|
||||
It should look like this inside:
|
||||
|
||||
```
|
||||
OPENAI_API_KEY=Yourkey
|
||||
VITE_API_STREAMING=true
|
||||
```
|
||||
3. Run `./run-with-docker-compose.sh`
|
||||
4. Navigate to http://localhost:5173/
|
||||
|
||||
To stop just run Ctrl + C
|
||||
|
||||
## Development environments
|
||||
|
||||
Spin up only 2 containers from docker-compose.yaml (by deleting all services except for redis and mongo)
|
||||
### Spin up mongo and redis
|
||||
For development only 2 containers are used from docker-compose.yaml (by deleting all services except for redis and mongo).
|
||||
See file [docker-compose-dev.yaml](./docker-compose-dev.yaml).
|
||||
|
||||
Make sure you have python 3.10 or 3.11 installed
|
||||
Run
|
||||
```
|
||||
docker compose -f docker-compose-dev.yaml build
|
||||
docker compose -f docker-compose-dev.yaml up -d
|
||||
```
|
||||
|
||||
1. Navigate to `/application` folder
|
||||
2. Install dependencies
|
||||
`pip install -r requirements.txt`
|
||||
3. Prepare .env file
|
||||
Copy .env_sample and create .env with your openai api token
|
||||
4. Run the app
|
||||
`python app.py`
|
||||
5. Start worker with `celery -A app.celery worker -l INFO`
|
||||
### Run the backend
|
||||
|
||||
Make sure you have Python 3.10 or 3.11 installed.
|
||||
|
||||
1. Export required environment variables
|
||||
```commandline
|
||||
export CELERY_BROKER_URL=redis://localhost:6379/0
|
||||
export CELERY_RESULT_BACKEND=redis://localhost:6379/1
|
||||
export MONGO_URI=mongodb://localhost:27017/docsgpt
|
||||
```
|
||||
2. Prepare .env file
|
||||
Copy `.env_sample` and create `.env` with your OpenAI API token
|
||||
3. (optional) Create a python virtual environment
|
||||
```commandline
|
||||
python -m venv venv
|
||||
. venv/bin/activate
|
||||
```
|
||||
4. Change to `application/` subdir and install dependencies for the backend
|
||||
```commandline
|
||||
cd application/
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
5. Run the app `python wsgi.py`
|
||||
6. Start worker with `celery -A app.celery worker -l INFO`
|
||||
|
||||
### Start frontend
|
||||
Make sure you have Node version 16 or higher.
|
||||
|
||||
To start frontend
|
||||
1. Navigate to `/frontend` folder
|
||||
2. Install dependencies
|
||||
`npm install`
|
||||
3. In the file `.env.development` instead of `VITE_API_HOST = https://docsapi.arc53.com` use `VITE_API_HOST=http://localhost:5001`
|
||||
3. Run the app
|
||||
4. `npm run dev`
|
||||
3. Run the app
|
||||
`npm run dev`
|
||||
|
||||
|
||||
[How to install the Chrome extension](https://github.com/arc53/docsgpt/wiki#launch-chrome-extension)
|
||||
|
||||
|
||||
## [Guides](https://github.com/arc53/docsgpt/wiki)
|
||||
|
||||
## [Interested in contributing?](https://github.com/arc53/DocsGPT/blob/main/CONTRIBUTING.md)
|
||||
|
||||
## [How to use any other documentation](https://github.com/arc53/docsgpt/wiki/How-to-train-on-other-documentation)
|
||||
|
||||
## [How to host it locally (so all data will stay on-premises)](https://github.com/arc53/DocsGPT/wiki/How-to-use-different-LLM's#hosting-everything-locally)
|
||||
|
||||
Built with [🦜️🔗 LangChain](https://github.com/hwchase17/langchain)
|
||||
|
||||
|
||||
@@ -3,4 +3,10 @@ EMBEDDINGS_KEY=your_api_key
|
||||
CELERY_BROKER_URL=redis://localhost:6379/0
|
||||
CELERY_RESULT_BACKEND=redis://localhost:6379/1
|
||||
MONGO_URI=mongodb://localhost:27017/docsgpt
|
||||
API_URL=http://localhost:5001
|
||||
API_URL=http://localhost:7091
|
||||
|
||||
#For OPENAI on Azure
|
||||
OPENAI_API_BASE=
|
||||
OPENAI_API_VERSION=
|
||||
AZURE_DEPLOYMENT_NAME=
|
||||
AZURE_EMBEDDINGS_DEPLOYMENT_NAME=
|
||||
@@ -4,22 +4,20 @@ FROM python:3.10-slim-bullseye as builder
|
||||
RUN apt-get update && apt-get install -y gcc curl
|
||||
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN pip install --upgrade pip && pip install tiktoken==0.1.2
|
||||
RUN pip install --upgrade pip && pip install tiktoken==0.3.3
|
||||
COPY requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
|
||||
FROM python:3.10-slim-bullseye
|
||||
# Copy pre-built packages from builder stage
|
||||
COPY --from=builder /usr/local/lib/python3.10/site-packages/ /usr/local/lib/python3.10/site-packages/
|
||||
RUN pip install gunicorn==20.1.0
|
||||
RUN pip install celery==5.2.7
|
||||
|
||||
# Copy pre-built packages and binaries from builder stage
|
||||
COPY --from=builder /usr/local/ /usr/local/
|
||||
|
||||
WORKDIR /app
|
||||
COPY . /app
|
||||
COPY . /app/application
|
||||
ENV FLASK_APP=app.py
|
||||
ENV FLASK_DEBUG=true
|
||||
|
||||
EXPOSE 7091
|
||||
|
||||
EXPOSE 5001
|
||||
|
||||
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:5001", "wsgi:app"]
|
||||
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "application.wsgi:app"]
|
||||
|
||||
0
application/__init__.py
Normal file
0
application/__init__.py
Normal file
@@ -1,55 +1,70 @@
|
||||
import asyncio
|
||||
import datetime
|
||||
import http.client
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import traceback
|
||||
|
||||
import dotenv
|
||||
import openai
|
||||
import requests
|
||||
from celery import Celery
|
||||
from celery.result import AsyncResult
|
||||
from flask import Flask, request, render_template, send_from_directory, jsonify
|
||||
from flask import Flask, request, render_template, send_from_directory, jsonify, Response
|
||||
from langchain import FAISS
|
||||
from langchain import VectorDBQA, HuggingFaceHub, Cohere, OpenAI
|
||||
from langchain.chains import ChatVectorDBChain
|
||||
from langchain import VectorDBQA, Cohere, OpenAI
|
||||
from langchain.chains import LLMChain, ConversationalRetrievalChain
|
||||
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, \
|
||||
HuggingFaceInstructEmbeddings
|
||||
from langchain.chat_models import ChatOpenAI, AzureChatOpenAI
|
||||
from langchain.embeddings import (
|
||||
OpenAIEmbeddings,
|
||||
HuggingFaceHubEmbeddings,
|
||||
CohereEmbeddings,
|
||||
HuggingFaceInstructEmbeddings,
|
||||
)
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
AIMessagePromptTemplate,
|
||||
)
|
||||
from langchain.schema import HumanMessage, AIMessage
|
||||
from pymongo import MongoClient
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from error import bad_request
|
||||
from worker import ingest_worker
|
||||
from application.core.settings import settings
|
||||
from application.error import bad_request
|
||||
from application.worker import ingest_worker
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||
|
||||
if os.getenv("LLM_NAME") is not None:
|
||||
llm_choice = os.getenv("LLM_NAME")
|
||||
logger = logging.getLogger(__name__)
|
||||
if settings.LLM_NAME == "gpt4":
|
||||
gpt_model = 'gpt-4'
|
||||
else:
|
||||
llm_choice = "openai_chat"
|
||||
gpt_model = 'gpt-3.5-turbo'
|
||||
|
||||
if os.getenv("EMBEDDINGS_NAME") is not None:
|
||||
embeddings_choice = os.getenv("EMBEDDINGS_NAME")
|
||||
else:
|
||||
embeddings_choice = "openai_text-embedding-ada-002"
|
||||
|
||||
if llm_choice == "manifest":
|
||||
from manifest import Manifest
|
||||
from langchain.llms.manifest import ManifestWrapper
|
||||
if settings.SELF_HOSTED_MODEL:
|
||||
from langchain.llms import HuggingFacePipeline
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
||||
|
||||
manifest = Manifest(
|
||||
client_name="huggingface",
|
||||
client_connection="http://127.0.0.1:5000"
|
||||
model_id = settings.LLM_NAME # hf model id (Arc53/docsgpt-7b-falcon, Arc53/docsgpt-14b)
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
pipe = pipeline(
|
||||
"text-generation", model=model,
|
||||
tokenizer=tokenizer, max_new_tokens=2000,
|
||||
device_map="auto", eos_token_id=tokenizer.eos_token_id
|
||||
)
|
||||
hf = HuggingFacePipeline(pipeline=pipe)
|
||||
|
||||
# Redirect PosixPath to WindowsPath on Windows
|
||||
import platform
|
||||
|
||||
if platform.system() == "Windows":
|
||||
import pathlib
|
||||
@@ -61,40 +76,87 @@ if platform.system() == "Windows":
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# load the prompts
|
||||
with open("prompts/combine_prompt.txt", "r") as f:
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
with open(os.path.join(current_dir, "prompts", "combine_prompt.txt"), "r") as f:
|
||||
template = f.read()
|
||||
|
||||
with open("prompts/combine_prompt_hist.txt", "r") as f:
|
||||
with open(os.path.join(current_dir, "prompts", "combine_prompt_hist.txt"), "r") as f:
|
||||
template_hist = f.read()
|
||||
|
||||
with open("prompts/question_prompt.txt", "r") as f:
|
||||
with open(os.path.join(current_dir, "prompts", "question_prompt.txt"), "r") as f:
|
||||
template_quest = f.read()
|
||||
|
||||
with open("prompts/chat_combine_prompt.txt", "r") as f:
|
||||
with open(os.path.join(current_dir, "prompts", "chat_combine_prompt.txt"), "r") as f:
|
||||
chat_combine_template = f.read()
|
||||
|
||||
with open("prompts/chat_reduce_prompt.txt", "r") as f:
|
||||
with open(os.path.join(current_dir, "prompts", "chat_reduce_prompt.txt"), "r") as f:
|
||||
chat_reduce_template = f.read()
|
||||
|
||||
if os.getenv("API_KEY") is not None:
|
||||
api_key_set = True
|
||||
else:
|
||||
api_key_set = False
|
||||
if os.getenv("EMBEDDINGS_KEY") is not None:
|
||||
embeddings_key_set = True
|
||||
else:
|
||||
embeddings_key_set = False
|
||||
api_key_set = settings.API_KEY is not None
|
||||
embeddings_key_set = settings.EMBEDDINGS_KEY is not None
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER = "inputs"
|
||||
app.config['CELERY_BROKER_URL'] = os.getenv("CELERY_BROKER_URL")
|
||||
app.config['CELERY_RESULT_BACKEND'] = os.getenv("CELERY_RESULT_BACKEND")
|
||||
app.config['MONGO_URI'] = os.getenv("MONGO_URI")
|
||||
celery = Celery(app.name, broker=app.config['CELERY_BROKER_URL'], backend=app.config['CELERY_RESULT_BACKEND'])
|
||||
celery.conf.update(app.config)
|
||||
mongo = MongoClient(app.config['MONGO_URI'])
|
||||
app.config["UPLOAD_FOLDER"] = UPLOAD_FOLDER = "inputs"
|
||||
app.config["CELERY_BROKER_URL"] = settings.CELERY_BROKER_URL
|
||||
app.config["CELERY_RESULT_BACKEND"] = settings.CELERY_RESULT_BACKEND
|
||||
app.config["MONGO_URI"] = settings.MONGO_URI
|
||||
celery = Celery()
|
||||
celery.config_from_object("application.celeryconfig")
|
||||
mongo = MongoClient(app.config["MONGO_URI"])
|
||||
db = mongo["docsgpt"]
|
||||
vectors_collection = db["vectors"]
|
||||
conversations_collection = db["conversations"]
|
||||
|
||||
|
||||
async def async_generate(chain, question, chat_history):
|
||||
result = await chain.arun({"question": question, "chat_history": chat_history})
|
||||
return result
|
||||
|
||||
|
||||
def run_async_chain(chain, question, chat_history):
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
result = {}
|
||||
try:
|
||||
answer = loop.run_until_complete(async_generate(chain, question, chat_history))
|
||||
finally:
|
||||
loop.close()
|
||||
result["answer"] = answer
|
||||
return result
|
||||
|
||||
|
||||
def get_vectorstore(data):
|
||||
if "active_docs" in data:
|
||||
if data["active_docs"].split("/")[0] == "local":
|
||||
if data["active_docs"].split("/")[1] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = "indexes/" + data["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + data["active_docs"]
|
||||
if data["active_docs"] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
vectorstore = os.path.join("application", vectorstore)
|
||||
return vectorstore
|
||||
|
||||
|
||||
def get_docsearch(vectorstore, embeddings_key):
|
||||
if settings.EMBEDDINGS_NAME == "openai_text-embedding-ada-002":
|
||||
if is_azure_configured():
|
||||
os.environ["OPENAI_API_TYPE"] = "azure"
|
||||
openai_embeddings = OpenAIEmbeddings(model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME)
|
||||
else:
|
||||
openai_embeddings = OpenAIEmbeddings(openai_api_key=embeddings_key)
|
||||
docsearch = FAISS.load_local(vectorstore, openai_embeddings)
|
||||
elif settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
|
||||
elif settings.EMBEDDINGS_NAME == "huggingface_hkunlp/instructor-large":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
|
||||
elif settings.EMBEDDINGS_NAME == "cohere_medium":
|
||||
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
|
||||
return docsearch
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
@@ -105,8 +167,133 @@ def ingest(self, directory, formats, name_job, filename, user):
|
||||
|
||||
@app.route("/")
|
||||
def home():
|
||||
return render_template("index.html", api_key_set=api_key_set, llm_choice=llm_choice,
|
||||
embeddings_choice=embeddings_choice)
|
||||
return render_template(
|
||||
"index.html", api_key_set=api_key_set, llm_choice=settings.LLM_NAME, embeddings_choice=settings.EMBEDDINGS_NAME
|
||||
)
|
||||
|
||||
|
||||
def complete_stream(question, docsearch, chat_history, api_key, conversation_id):
|
||||
openai.api_key = api_key
|
||||
if is_azure_configured():
|
||||
logger.debug("in Azure")
|
||||
openai.api_type = "azure"
|
||||
openai.api_version = settings.OPENAI_API_VERSION
|
||||
openai.api_base = settings.OPENAI_API_BASE
|
||||
llm = AzureChatOpenAI(
|
||||
openai_api_key=api_key,
|
||||
openai_api_base=settings.OPENAI_API_BASE,
|
||||
openai_api_version=settings.OPENAI_API_VERSION,
|
||||
deployment_name=settings.AZURE_DEPLOYMENT_NAME,
|
||||
)
|
||||
else:
|
||||
logger.debug("plain OpenAI")
|
||||
llm = ChatOpenAI(openai_api_key=api_key)
|
||||
docs = docsearch.similarity_search(question, k=2)
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc.page_content for doc in docs])
|
||||
p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
source_log_docs = []
|
||||
for doc in docs:
|
||||
if doc.metadata:
|
||||
data = json.dumps({"type": "source", "doc": doc.page_content, "metadata": doc.metadata})
|
||||
source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
|
||||
else:
|
||||
data = json.dumps({"type": "source", "doc": doc.page_content})
|
||||
source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
|
||||
yield f"data:{data}\n\n"
|
||||
|
||||
if len(chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
chat_history.reverse()
|
||||
for i in chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
|
||||
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append({"role": "system", "content": i["response"]})
|
||||
messages_combine.append({"role": "user", "content": question})
|
||||
completion = openai.ChatCompletion.create(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
messages=messages_combine, stream=True, max_tokens=500, temperature=0)
|
||||
reponse_full = ""
|
||||
for line in completion:
|
||||
if "content" in line["choices"][0]["delta"]:
|
||||
# check if the delta contains content
|
||||
data = json.dumps({"answer": str(line["choices"][0]["delta"]["content"])})
|
||||
reponse_full += str(line["choices"][0]["delta"]["content"])
|
||||
yield f"data: {data}\n\n"
|
||||
# save conversation to database
|
||||
if conversation_id is not None:
|
||||
conversations_collection.update_one(
|
||||
{"_id": ObjectId(conversation_id)},
|
||||
{"$push": {"queries": {"prompt": question, "response": reponse_full, "sources": source_log_docs}}},
|
||||
)
|
||||
|
||||
else:
|
||||
# create new conversation
|
||||
# generate summary
|
||||
messages_summary = [{"role": "assistant", "content": "Summarise following conversation in no more than 3 "
|
||||
"words, respond ONLY with the summary, use the same "
|
||||
"language as the system \n\nUser: " + question + "\n\n" +
|
||||
"AI: " +
|
||||
reponse_full},
|
||||
{"role": "user", "content": "Summarise following conversation in no more than 3 words, "
|
||||
"respond ONLY with the summary, use the same language as the "
|
||||
"system"}]
|
||||
completion = openai.ChatCompletion.create(model='gpt-3.5-turbo', engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
messages=messages_summary, max_tokens=30, temperature=0)
|
||||
conversation_id = conversations_collection.insert_one(
|
||||
{"user": "local",
|
||||
"date": datetime.datetime.utcnow(),
|
||||
"name": completion["choices"][0]["message"]["content"],
|
||||
"queries": [{"prompt": question, "response": reponse_full, "sources": source_log_docs}]}
|
||||
).inserted_id
|
||||
|
||||
# send data.type = "end" to indicate that the stream has ended as json
|
||||
data = json.dumps({"type": "id", "id": str(conversation_id)})
|
||||
yield f"data: {data}\n\n"
|
||||
data = json.dumps({"type": "end"})
|
||||
yield f"data: {data}\n\n"
|
||||
|
||||
|
||||
@app.route("/stream", methods=["POST"])
|
||||
def stream():
|
||||
data = request.get_json()
|
||||
# get parameter from url question
|
||||
question = data["question"]
|
||||
history = data["history"]
|
||||
# history to json object from string
|
||||
history = json.loads(history)
|
||||
conversation_id = data["conversation_id"]
|
||||
|
||||
# check if active_docs is set
|
||||
|
||||
if not api_key_set:
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
if "active_docs" in data:
|
||||
vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
|
||||
else:
|
||||
vectorstore = ""
|
||||
docsearch = get_docsearch(vectorstore, embeddings_key)
|
||||
|
||||
# question = "Hi"
|
||||
return Response(
|
||||
complete_stream(question, docsearch,
|
||||
chat_history=history, api_key=api_key,
|
||||
conversation_id=conversation_id), mimetype="text/event-stream"
|
||||
)
|
||||
|
||||
|
||||
def is_azure_configured():
|
||||
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
|
||||
|
||||
|
||||
@app.route("/api/answer", methods=["POST"])
|
||||
@@ -114,102 +301,150 @@ def api_answer():
|
||||
data = request.get_json()
|
||||
question = data["question"]
|
||||
history = data["history"]
|
||||
print('-' * 5)
|
||||
if "conversation_id" not in data:
|
||||
conversation_id = None
|
||||
else:
|
||||
conversation_id = data["conversation_id"]
|
||||
print("-" * 5)
|
||||
if not api_key_set:
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
api_key = os.getenv("API_KEY")
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = os.getenv("EMBEDDINGS_KEY")
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
|
||||
# use try and except to check for exception
|
||||
try:
|
||||
# check if the vectorstore is set
|
||||
if "active_docs" in data:
|
||||
if data["active_docs"].split("/")[0] == "local":
|
||||
vectorstore = "indexes/" + data["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + data["active_docs"]
|
||||
if data['active_docs'] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
print(vectorstore)
|
||||
# vectorstore = "outputs/inputs/"
|
||||
vectorstore = get_vectorstore(data)
|
||||
# loading the index and the store and the prompt template
|
||||
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
|
||||
if embeddings_choice == "openai_text-embedding-ada-002":
|
||||
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
|
||||
elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
|
||||
elif embeddings_choice == "huggingface_hkunlp/instructor-large":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
|
||||
elif embeddings_choice == "cohere_medium":
|
||||
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
|
||||
docsearch = get_docsearch(vectorstore, embeddings_key)
|
||||
|
||||
# create a prompt template
|
||||
if history:
|
||||
history = json.loads(history)
|
||||
template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}",
|
||||
history[1])
|
||||
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp,
|
||||
template_format="jinja2")
|
||||
else:
|
||||
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template,
|
||||
template_format="jinja2")
|
||||
|
||||
q_prompt = PromptTemplate(input_variables=["context", "question"], template=template_quest,
|
||||
template_format="jinja2")
|
||||
if llm_choice == "openai_chat":
|
||||
# llm = ChatOpenAI(openai_api_key=api_key, model_name="gpt-4")
|
||||
llm = ChatOpenAI(openai_api_key=api_key)
|
||||
messages_combine = [
|
||||
SystemMessagePromptTemplate.from_template(chat_combine_template),
|
||||
HumanMessagePromptTemplate.from_template("{question}")
|
||||
]
|
||||
q_prompt = PromptTemplate(
|
||||
input_variables=["context", "question"], template=template_quest, template_format="jinja2"
|
||||
)
|
||||
if settings.LLM_NAME == "openai_chat":
|
||||
if is_azure_configured():
|
||||
logger.debug("in Azure")
|
||||
llm = AzureChatOpenAI(
|
||||
openai_api_key=api_key,
|
||||
openai_api_base=settings.OPENAI_API_BASE,
|
||||
openai_api_version=settings.OPENAI_API_VERSION,
|
||||
deployment_name=settings.AZURE_DEPLOYMENT_NAME,
|
||||
)
|
||||
else:
|
||||
logger.debug("plain OpenAI")
|
||||
llm = ChatOpenAI(openai_api_key=api_key, model_name=gpt_model) # optional parameter: model_name="gpt-4"
|
||||
messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)]
|
||||
if history:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
history.reverse()
|
||||
for i in history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
|
||||
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append(HumanMessagePromptTemplate.from_template(i["prompt"]))
|
||||
messages_combine.append(AIMessagePromptTemplate.from_template(i["response"]))
|
||||
messages_combine.append(HumanMessagePromptTemplate.from_template("{question}"))
|
||||
p_chat_combine = ChatPromptTemplate.from_messages(messages_combine)
|
||||
messages_reduce = [
|
||||
SystemMessagePromptTemplate.from_template(chat_reduce_template),
|
||||
HumanMessagePromptTemplate.from_template("{question}")
|
||||
]
|
||||
p_chat_reduce = ChatPromptTemplate.from_messages(messages_reduce)
|
||||
elif llm_choice == "openai":
|
||||
elif settings.LLM_NAME == "openai":
|
||||
llm = OpenAI(openai_api_key=api_key, temperature=0)
|
||||
elif llm_choice == "manifest":
|
||||
llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
|
||||
elif llm_choice == "huggingface":
|
||||
llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
|
||||
elif llm_choice == "cohere":
|
||||
elif settings.SELF_HOSTED_MODEL:
|
||||
llm = hf
|
||||
elif settings.LLM_NAME == "cohere":
|
||||
llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
|
||||
|
||||
if llm_choice == "openai_chat":
|
||||
chain = ChatVectorDBChain.from_llm(
|
||||
llm=llm,
|
||||
vectorstore=docsearch,
|
||||
prompt=p_chat_combine,
|
||||
qa_prompt=p_chat_reduce,
|
||||
top_k_docs_for_context=3,
|
||||
return_source_documents=False)
|
||||
result = chain({"question": question, "chat_history": []})
|
||||
else:
|
||||
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
|
||||
combine_prompt=c_prompt, question_prompt=q_prompt)
|
||||
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=4)
|
||||
raise ValueError("unknown LLM model")
|
||||
|
||||
if settings.LLM_NAME == "openai_chat":
|
||||
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
|
||||
chain = ConversationalRetrievalChain(
|
||||
retriever=docsearch.as_retriever(k=2),
|
||||
question_generator=question_generator,
|
||||
combine_docs_chain=doc_chain,
|
||||
)
|
||||
chat_history = []
|
||||
# result = chain({"question": question, "chat_history": chat_history})
|
||||
# generate async with async generate method
|
||||
result = run_async_chain(chain, question, chat_history)
|
||||
elif settings.SELF_HOSTED_MODEL:
|
||||
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
|
||||
chain = ConversationalRetrievalChain(
|
||||
retriever=docsearch.as_retriever(k=2),
|
||||
question_generator=question_generator,
|
||||
combine_docs_chain=doc_chain,
|
||||
)
|
||||
chat_history = []
|
||||
# result = chain({"question": question, "chat_history": chat_history})
|
||||
# generate async with async generate method
|
||||
result = run_async_chain(chain, question, chat_history)
|
||||
|
||||
else:
|
||||
qa_chain = load_qa_chain(
|
||||
llm=llm, chain_type="map_reduce", combine_prompt=chat_combine_template, question_prompt=q_prompt
|
||||
)
|
||||
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=3)
|
||||
result = chain({"query": question})
|
||||
|
||||
print(result)
|
||||
|
||||
# some formatting for the frontend
|
||||
if "result" in result:
|
||||
result['answer'] = result['result']
|
||||
result['answer'] = result['answer'].replace("\\n", "\n")
|
||||
result["answer"] = result["result"]
|
||||
result["answer"] = result["answer"].replace("\\n", "\n")
|
||||
try:
|
||||
result['answer'] = result['answer'].split("SOURCES:")[0]
|
||||
except:
|
||||
result["answer"] = result["answer"].split("SOURCES:")[0]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
sources = docsearch.similarity_search(question, k=2)
|
||||
sources_doc = []
|
||||
for doc in sources:
|
||||
if doc.metadata:
|
||||
sources_doc.append({'title': doc.metadata['title'], 'text': doc.page_content})
|
||||
else:
|
||||
sources_doc.append({'title': doc.page_content, 'text': doc.page_content})
|
||||
result['sources'] = sources_doc
|
||||
|
||||
# generate conversationId
|
||||
if conversation_id is not None:
|
||||
conversations_collection.update_one(
|
||||
{"_id": ObjectId(conversation_id)},
|
||||
{"$push": {"queries": {"prompt": question,
|
||||
"response": result["answer"], "sources": result['sources']}}},
|
||||
)
|
||||
|
||||
else:
|
||||
# create new conversation
|
||||
# generate summary
|
||||
messages_summary = [AIMessage(content="Summarise following conversation in no more than 3 " +
|
||||
"words, respond ONLY with the summary, use the same " +
|
||||
"language as the system \n\nUser: " + question + "\n\nAI: " +
|
||||
result["answer"]),
|
||||
HumanMessage(content="Summarise following conversation in no more than 3 words, " +
|
||||
"respond ONLY with the summary, use the same language as the " +
|
||||
"system")]
|
||||
|
||||
|
||||
# completion = openai.ChatCompletion.create(model='gpt-3.5-turbo', engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
# messages=messages_summary, max_tokens=30, temperature=0)
|
||||
completion = llm.predict_messages(messages_summary)
|
||||
conversation_id = conversations_collection.insert_one(
|
||||
{"user": "local",
|
||||
"date": datetime.datetime.utcnow(),
|
||||
"name": completion.content,
|
||||
"queries": [{"prompt": question, "response": result["answer"], "sources": result['sources']}]}
|
||||
).inserted_id
|
||||
|
||||
result["conversation_id"] = str(conversation_id)
|
||||
|
||||
# mock result
|
||||
# result = {
|
||||
# "answer": "The answer is 42",
|
||||
@@ -229,16 +464,16 @@ def check_docs():
|
||||
data = request.get_json()
|
||||
# split docs on / and take first part
|
||||
if data["docs"].split("/")[0] == "local":
|
||||
return {"status": 'exists'}
|
||||
return {"status": "exists"}
|
||||
vectorstore = "vectors/" + data["docs"]
|
||||
base_path = 'https://raw.githubusercontent.com/arc53/DocsHUB/main/'
|
||||
base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"
|
||||
if os.path.exists(vectorstore) or data["docs"] == "default":
|
||||
return {"status": 'exists'}
|
||||
return {"status": "exists"}
|
||||
else:
|
||||
r = requests.get(base_path + vectorstore + "index.faiss")
|
||||
|
||||
if r.status_code != 200:
|
||||
return {"status": 'null'}
|
||||
return {"status": "null"}
|
||||
else:
|
||||
if not os.path.exists(vectorstore):
|
||||
os.makedirs(vectorstore)
|
||||
@@ -250,7 +485,7 @@ def check_docs():
|
||||
with open(vectorstore + "index.pkl", "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
return {"status": 'loaded'}
|
||||
return {"status": "loaded"}
|
||||
|
||||
|
||||
@app.route("/api/feedback", methods=["POST"])
|
||||
@@ -260,179 +495,225 @@ def api_feedback():
|
||||
answer = data["answer"]
|
||||
feedback = data["feedback"]
|
||||
|
||||
print('-' * 5)
|
||||
print("-" * 5)
|
||||
print("Question: " + question)
|
||||
print("Answer: " + answer)
|
||||
print("Feedback: " + feedback)
|
||||
print('-' * 5)
|
||||
print("-" * 5)
|
||||
response = requests.post(
|
||||
url="https://86x89umx77.execute-api.eu-west-2.amazonaws.com/docsgpt-feedback",
|
||||
headers={
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
},
|
||||
data=json.dumps({
|
||||
"answer": answer,
|
||||
"question": question,
|
||||
"feedback": feedback
|
||||
})
|
||||
data=json.dumps({"answer": answer, "question": question, "feedback": feedback}),
|
||||
)
|
||||
return {"status": 'ok'}
|
||||
return {"status": http.client.responses.get(response.status_code, "ok")}
|
||||
|
||||
|
||||
@app.route('/api/combine', methods=['GET'])
|
||||
@app.route("/api/combine", methods=["GET"])
|
||||
def combined_json():
|
||||
user = 'local'
|
||||
user = "local"
|
||||
"""Provide json file with combined available indexes."""
|
||||
# get json from https://d3dg1063dc54p9.cloudfront.net/combined.json
|
||||
|
||||
data = []
|
||||
data = [
|
||||
{
|
||||
"name": "default",
|
||||
"language": "default",
|
||||
"version": "",
|
||||
"description": "default",
|
||||
"fullName": "default",
|
||||
"date": "default",
|
||||
"docLink": "default",
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local",
|
||||
}
|
||||
]
|
||||
# structure: name, language, version, description, fullName, date, docLink
|
||||
# append data from vectors_collection
|
||||
for index in vectors_collection.find({'user': user}):
|
||||
data.append({
|
||||
"name": index['name'],
|
||||
"language": index['language'],
|
||||
"version": '',
|
||||
"description": index['name'],
|
||||
"fullName": index['name'],
|
||||
"date": index['date'],
|
||||
"docLink": index['location'],
|
||||
"model": embeddings_choice,
|
||||
"location": "local"
|
||||
})
|
||||
for index in vectors_collection.find({"user": user}):
|
||||
data.append(
|
||||
{
|
||||
"name": index["name"],
|
||||
"language": index["language"],
|
||||
"version": "",
|
||||
"description": index["name"],
|
||||
"fullName": index["name"],
|
||||
"date": index["date"],
|
||||
"docLink": index["location"],
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local",
|
||||
}
|
||||
)
|
||||
|
||||
data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
|
||||
for index in data_remote:
|
||||
index['location'] = "remote"
|
||||
index["location"] = "remote"
|
||||
data.append(index)
|
||||
|
||||
return jsonify(data)
|
||||
|
||||
|
||||
@app.route('/api/upload', methods=['POST'])
|
||||
@app.route("/api/upload", methods=["POST"])
|
||||
def upload_file():
|
||||
"""Upload a file to get vectorized and indexed."""
|
||||
if 'user' not in request.form:
|
||||
return {"status": 'no user'}
|
||||
user = secure_filename(request.form['user'])
|
||||
if 'name' not in request.form:
|
||||
return {"status": 'no name'}
|
||||
job_name = secure_filename(request.form['name'])
|
||||
if "user" not in request.form:
|
||||
return {"status": "no user"}
|
||||
user = secure_filename(request.form["user"])
|
||||
if "name" not in request.form:
|
||||
return {"status": "no name"}
|
||||
job_name = secure_filename(request.form["name"])
|
||||
# check if the post request has the file part
|
||||
if 'file' not in request.files:
|
||||
print('No file part')
|
||||
return {"status": 'no file'}
|
||||
file = request.files['file']
|
||||
if file.filename == '':
|
||||
return {"status": 'no file name'}
|
||||
if "file" not in request.files:
|
||||
print("No file part")
|
||||
return {"status": "no file"}
|
||||
file = request.files["file"]
|
||||
if file.filename == "":
|
||||
return {"status": "no file name"}
|
||||
|
||||
if file:
|
||||
filename = secure_filename(file.filename)
|
||||
# save dir
|
||||
save_dir = os.path.join(app.config['UPLOAD_FOLDER'], user, job_name)
|
||||
save_dir = os.path.join(app.config["UPLOAD_FOLDER"], user, job_name)
|
||||
# create dir if not exists
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
|
||||
file.save(os.path.join(save_dir, filename))
|
||||
task = ingest.delay('temp', [".rst", ".md", ".pdf"], job_name, filename, user)
|
||||
task = ingest.delay("temp", [".rst", ".md", ".pdf", ".txt"], job_name, filename, user)
|
||||
# task id
|
||||
task_id = task.id
|
||||
return {"status": 'ok', "task_id": task_id}
|
||||
return {"status": "ok", "task_id": task_id}
|
||||
else:
|
||||
return {"status": 'error'}
|
||||
return {"status": "error"}
|
||||
|
||||
|
||||
@app.route('/api/task_status', methods=['GET'])
|
||||
@app.route("/api/task_status", methods=["GET"])
|
||||
def task_status():
|
||||
"""Get celery job status."""
|
||||
task_id = request.args.get('task_id')
|
||||
task_id = request.args.get("task_id")
|
||||
task = AsyncResult(task_id)
|
||||
task_meta = task.info
|
||||
return {"status": task.status, "result": task_meta}
|
||||
|
||||
|
||||
### Backgound task api
|
||||
@app.route('/api/upload_index', methods=['POST'])
|
||||
@app.route("/api/upload_index", methods=["POST"])
|
||||
def upload_index_files():
|
||||
"""Upload two files(index.faiss, index.pkl) to the user's folder."""
|
||||
if 'user' not in request.form:
|
||||
return {"status": 'no user'}
|
||||
user = secure_filename(request.form['user'])
|
||||
if 'name' not in request.form:
|
||||
return {"status": 'no name'}
|
||||
job_name = secure_filename(request.form['name'])
|
||||
if 'file_faiss' not in request.files:
|
||||
print('No file part')
|
||||
return {"status": 'no file'}
|
||||
file_faiss = request.files['file_faiss']
|
||||
if file_faiss.filename == '':
|
||||
return {"status": 'no file name'}
|
||||
if 'file_pkl' not in request.files:
|
||||
print('No file part')
|
||||
return {"status": 'no file'}
|
||||
file_pkl = request.files['file_pkl']
|
||||
if file_pkl.filename == '':
|
||||
return {"status": 'no file name'}
|
||||
if "user" not in request.form:
|
||||
return {"status": "no user"}
|
||||
user = secure_filename(request.form["user"])
|
||||
if "name" not in request.form:
|
||||
return {"status": "no name"}
|
||||
job_name = secure_filename(request.form["name"])
|
||||
if "file_faiss" not in request.files:
|
||||
print("No file part")
|
||||
return {"status": "no file"}
|
||||
file_faiss = request.files["file_faiss"]
|
||||
if file_faiss.filename == "":
|
||||
return {"status": "no file name"}
|
||||
if "file_pkl" not in request.files:
|
||||
print("No file part")
|
||||
return {"status": "no file"}
|
||||
file_pkl = request.files["file_pkl"]
|
||||
if file_pkl.filename == "":
|
||||
return {"status": "no file name"}
|
||||
|
||||
# saves index files
|
||||
save_dir = os.path.join('indexes', user, job_name)
|
||||
save_dir = os.path.join("indexes", user, job_name)
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
file_faiss.save(os.path.join(save_dir, 'index.faiss'))
|
||||
file_pkl.save(os.path.join(save_dir, 'index.pkl'))
|
||||
file_faiss.save(os.path.join(save_dir, "index.faiss"))
|
||||
file_pkl.save(os.path.join(save_dir, "index.pkl"))
|
||||
# create entry in vectors_collection
|
||||
vectors_collection.insert_one({
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"location": save_dir,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": embeddings_choice,
|
||||
"type": "local"
|
||||
})
|
||||
return {"status": 'ok'}
|
||||
vectors_collection.insert_one(
|
||||
{
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"location": save_dir,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": "local",
|
||||
}
|
||||
)
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.route('/api/download', methods=['get'])
|
||||
@app.route("/api/download", methods=["get"])
|
||||
def download_file():
|
||||
user = secure_filename(request.args.get('user'))
|
||||
job_name = secure_filename(request.args.get('name'))
|
||||
filename = secure_filename(request.args.get('file'))
|
||||
save_dir = os.path.join(app.config['UPLOAD_FOLDER'], user, job_name)
|
||||
user = secure_filename(request.args.get("user"))
|
||||
job_name = secure_filename(request.args.get("name"))
|
||||
filename = secure_filename(request.args.get("file"))
|
||||
save_dir = os.path.join(app.config["UPLOAD_FOLDER"], user, job_name)
|
||||
return send_from_directory(save_dir, filename, as_attachment=True)
|
||||
|
||||
|
||||
@app.route('/api/delete_old', methods=['get'])
|
||||
@app.route("/api/delete_old", methods=["get"])
|
||||
def delete_old():
|
||||
"""Delete old indexes."""
|
||||
import shutil
|
||||
path = request.args.get('path')
|
||||
dirs = path.split('/')
|
||||
|
||||
path = request.args.get("path")
|
||||
dirs = path.split("/")
|
||||
dirs_clean = []
|
||||
for i in range(1, len(dirs)):
|
||||
dirs_clean.append(secure_filename(dirs[i]))
|
||||
# check that path strats with indexes or vectors
|
||||
if dirs[0] not in ['indexes', 'vectors']:
|
||||
return {"status": 'error'}
|
||||
path_clean = '/'.join(dirs)
|
||||
vectors_collection.delete_one({'location': path})
|
||||
if dirs[0] not in ["indexes", "vectors"]:
|
||||
return {"status": "error"}
|
||||
path_clean = "/".join(dirs)
|
||||
vectors_collection.delete_one({"location": path})
|
||||
try:
|
||||
shutil.rmtree(path_clean)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return {"status": 'ok'}
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.route("/api/get_conversations", methods=["get"])
|
||||
def get_conversations():
|
||||
# provides a list of conversations
|
||||
conversations = conversations_collection.find().sort("date", -1)
|
||||
list_conversations = []
|
||||
for conversation in conversations:
|
||||
list_conversations.append({"id": str(conversation["_id"]), "name": conversation["name"]})
|
||||
|
||||
#list_conversations = [{"id": "default", "name": "default"}, {"id": "jeff", "name": "jeff"}]
|
||||
|
||||
return jsonify(list_conversations)
|
||||
|
||||
@app.route("/api/get_single_conversation", methods=["get"])
|
||||
def get_single_conversation():
|
||||
# provides data for a conversation
|
||||
conversation_id = request.args.get("id")
|
||||
conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
|
||||
return jsonify(conversation['queries'])
|
||||
|
||||
@app.route("/api/delete_conversation", methods=["POST"])
|
||||
def delete_conversation():
|
||||
# deletes a conversation from the database
|
||||
conversation_id = request.args.get("id")
|
||||
# write to mongodb
|
||||
conversations_collection.delete_one(
|
||||
{
|
||||
"_id": ObjectId(conversation_id),
|
||||
}
|
||||
)
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# handling CORS
|
||||
@app.after_request
|
||||
def after_request(response):
|
||||
response.headers.add('Access-Control-Allow-Origin', '*')
|
||||
response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
|
||||
response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
|
||||
response.headers.add('Access-Control-Allow-Credentials', 'true')
|
||||
response.headers.add("Access-Control-Allow-Origin", "*")
|
||||
response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization")
|
||||
response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS")
|
||||
response.headers.add("Access-Control-Allow-Credentials", "true")
|
||||
return response
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True, port=5001)
|
||||
app.run(debug=True, port=7091)
|
||||
|
||||
8
application/celeryconfig.py
Normal file
8
application/celeryconfig.py
Normal file
@@ -0,0 +1,8 @@
|
||||
import os
|
||||
|
||||
broker_url = os.getenv("CELERY_BROKER_URL")
|
||||
result_backend = os.getenv("CELERY_RESULT_BACKEND")
|
||||
|
||||
task_serializer = 'json'
|
||||
result_serializer = 'json'
|
||||
accept_content = ['json']
|
||||
0
application/core/__init__.py
Normal file
0
application/core/__init__.py
Normal file
27
application/core/settings.py
Normal file
27
application/core/settings.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import BaseSettings
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
LLM_NAME: str = "openai_chat"
|
||||
EMBEDDINGS_NAME: str = "openai_text-embedding-ada-002"
|
||||
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
|
||||
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
|
||||
MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
|
||||
MODEL_PATH: str = "./models/gpt4all-model.bin"
|
||||
TOKENS_MAX_HISTORY: int = 150
|
||||
SELF_HOSTED_MODEL: bool = False
|
||||
|
||||
API_URL: str = "http://localhost:7091" # backend url for celery worker
|
||||
|
||||
API_KEY: str = None # LLM api key
|
||||
EMBEDDINGS_KEY: str = None # api key for embeddings (if using openai, just copy API_KEY
|
||||
OPENAI_API_BASE: str = None # azure openai api base url
|
||||
OPENAI_API_VERSION: str = None # azure openai api version
|
||||
AZURE_DEPLOYMENT_NAME: str = None # azure deployment name for answering
|
||||
AZURE_EMBEDDINGS_DEPLOYMENT_NAME: str = None # azure deployment name for embeddings
|
||||
|
||||
|
||||
path = Path(__file__).parent.parent.absolute()
|
||||
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
|
||||
@@ -1,13 +1,15 @@
|
||||
from flask import jsonify
|
||||
from werkzeug.http import HTTP_STATUS_CODES
|
||||
|
||||
def response_error(code_status,message=None):
|
||||
payload = {'error':HTTP_STATUS_CODES.get(code_status,"something went wrong")}
|
||||
|
||||
def response_error(code_status, message=None):
|
||||
payload = {'error': HTTP_STATUS_CODES.get(code_status, "something went wrong")}
|
||||
if message:
|
||||
payload['message'] = message
|
||||
response = jsonify(payload)
|
||||
response.status_code = code_status
|
||||
return response
|
||||
|
||||
def bad_request(status_code=400,message=''):
|
||||
return response_error(code_status=status_code,message=message)
|
||||
|
||||
def bad_request(status_code=400, message=''):
|
||||
return response_error(code_status=status_code, message=message)
|
||||
|
||||
1
application/parser/file/__init__.py
Normal file
1
application/parser/file/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
@@ -3,8 +3,7 @@ from abc import abstractmethod
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
|
||||
from parser.schema.base import Document
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
class BaseReader:
|
||||
|
||||
@@ -3,15 +3,15 @@ import logging
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
from parser.file.base import BaseReader
|
||||
from parser.file.base_parser import BaseParser
|
||||
from parser.file.docs_parser import DocxParser, PDFParser
|
||||
from parser.file.epub_parser import EpubParser
|
||||
from parser.file.html_parser import HTMLParser
|
||||
from parser.file.markdown_parser import MarkdownParser
|
||||
from parser.file.rst_parser import RstParser
|
||||
from parser.file.tabular_parser import PandasCSVParser
|
||||
from parser.schema.base import Document
|
||||
from application.parser.file.base import BaseReader
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
from application.parser.file.docs_parser import DocxParser, PDFParser
|
||||
from application.parser.file.epub_parser import EpubParser
|
||||
from application.parser.file.html_parser import HTMLParser
|
||||
from application.parser.file.markdown_parser import MarkdownParser
|
||||
from application.parser.file.rst_parser import RstParser
|
||||
from application.parser.file.tabular_parser import PandasCSVParser
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".pdf": PDFParser(),
|
||||
@@ -52,17 +52,17 @@ class SimpleDirectoryReader(BaseReader):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_dir: Optional[str] = None,
|
||||
input_files: Optional[List] = None,
|
||||
exclude_hidden: bool = True,
|
||||
errors: str = "ignore",
|
||||
recursive: bool = True,
|
||||
required_exts: Optional[List[str]] = None,
|
||||
file_extractor: Optional[Dict[str, BaseParser]] = None,
|
||||
num_files_limit: Optional[int] = None,
|
||||
file_metadata: Optional[Callable[[str], Dict]] = None,
|
||||
chunk_size_max: int = 2048,
|
||||
self,
|
||||
input_dir: Optional[str] = None,
|
||||
input_files: Optional[List] = None,
|
||||
exclude_hidden: bool = True,
|
||||
errors: str = "ignore",
|
||||
recursive: bool = True,
|
||||
required_exts: Optional[List[str]] = None,
|
||||
file_extractor: Optional[Dict[str, BaseParser]] = None,
|
||||
num_files_limit: Optional[int] = None,
|
||||
file_metadata: Optional[Callable[[str], Dict]] = None,
|
||||
chunk_size_max: int = 2048,
|
||||
) -> None:
|
||||
"""Initialize with parameters."""
|
||||
super().__init__()
|
||||
@@ -102,8 +102,8 @@ class SimpleDirectoryReader(BaseReader):
|
||||
elif self.exclude_hidden and input_file.name.startswith("."):
|
||||
continue
|
||||
elif (
|
||||
self.required_exts is not None
|
||||
and input_file.suffix not in self.required_exts
|
||||
self.required_exts is not None
|
||||
and input_file.suffix not in self.required_exts
|
||||
):
|
||||
continue
|
||||
else:
|
||||
@@ -114,7 +114,7 @@ class SimpleDirectoryReader(BaseReader):
|
||||
new_input_files.extend(sub_input_files)
|
||||
|
||||
if self.num_files_limit is not None and self.num_files_limit > 0:
|
||||
new_input_files = new_input_files[0 : self.num_files_limit]
|
||||
new_input_files = new_input_files[0: self.num_files_limit]
|
||||
|
||||
# print total number of files added
|
||||
logging.debug(
|
||||
|
||||
@@ -6,7 +6,7 @@ Contains parsers for docx, pdf files.
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class PDFParser(BaseParser):
|
||||
|
||||
@@ -6,7 +6,7 @@ Contains parsers for epub files.
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class EpubParser(BaseParser):
|
||||
|
||||
@@ -7,7 +7,8 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class HTMLParser(BaseParser):
|
||||
"""HTML parser."""
|
||||
@@ -23,38 +24,37 @@ class HTMLParser(BaseParser):
|
||||
Union[str, List[str]]: a string or a List of strings.
|
||||
"""
|
||||
try:
|
||||
import unstructured
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
except ImportError:
|
||||
raise ValueError("unstructured package is required to parse HTML files.")
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
|
||||
# Using the unstructured library to convert the html to isd format
|
||||
# isd sample : isd = [
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
with open(file, "r", encoding="utf-8") as fp:
|
||||
elements = partition_html(file=fp)
|
||||
isd = convert_to_isd(elements)
|
||||
isd = convert_to_isd(elements)
|
||||
|
||||
# Removing non ascii charactwers from isd_el['text']
|
||||
# Removing non ascii charactwers from isd_el['text']
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = isd_el['text'].encode("ascii", "ignore").decode()
|
||||
|
||||
# Removing all the \n characters from isd_el['text'] using regex and replace with single space
|
||||
# Removing all the extra spaces from isd_el['text'] using regex and replace with single space
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE|re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}"," ", isd_el['text'], flags=re.MULTILINE|re.DOTALL)
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
|
||||
# more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
|
||||
for isd_el in isd:
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True )
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)
|
||||
|
||||
# Creating a list of all the indexes of isd_el['type'] = 'Title'
|
||||
title_indexes = [i for i,isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
|
||||
# Creating 'Chunks' - List of lists of strings
|
||||
# each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
|
||||
@@ -64,19 +64,20 @@ class HTMLParser(BaseParser):
|
||||
Chunks = [[]]
|
||||
final_chunks = list(list())
|
||||
|
||||
for i,isd_el in enumerate(isd):
|
||||
for i, isd_el in enumerate(isd):
|
||||
if i in title_indexes:
|
||||
Chunks.append([])
|
||||
Chunks[-1].append(isd_el['text'])
|
||||
|
||||
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25 #TODO: This value can be an user defined variable
|
||||
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25
|
||||
# TODO: This value can be an user defined variable
|
||||
for chunk in Chunks:
|
||||
# sum of lenth of all the strings in the chunk
|
||||
sum = 0
|
||||
sum += len(str(chunk))
|
||||
if sum < 25:
|
||||
Chunks.remove(chunk)
|
||||
else :
|
||||
else:
|
||||
# appending all the approved chunks to final_chunks as a single string
|
||||
final_chunks.append(" ".join([str(item) for item in chunk]))
|
||||
return final_chunks
|
||||
|
||||
@@ -7,8 +7,8 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
import tiktoken
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class MarkdownParser(BaseParser):
|
||||
@@ -20,13 +20,13 @@ class MarkdownParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
max_tokens: int = 2048,
|
||||
# remove_tables: bool = True,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
max_tokens: int = 2048,
|
||||
# remove_tables: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -35,8 +35,8 @@ class MarkdownParser(BaseParser):
|
||||
self._max_tokens = max_tokens
|
||||
# self._remove_tables = remove_tables
|
||||
|
||||
|
||||
def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str], current_text: str):
|
||||
def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str],
|
||||
current_text: str):
|
||||
"""Append to tups chunk."""
|
||||
num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(current_text))
|
||||
if num_tokens > self._max_tokens:
|
||||
@@ -46,6 +46,7 @@ class MarkdownParser(BaseParser):
|
||||
else:
|
||||
tups.append((current_header, current_text))
|
||||
return tups
|
||||
|
||||
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
|
||||
"""Convert a markdown file to a dictionary.
|
||||
|
||||
@@ -115,7 +116,7 @@ class MarkdownParser(BaseParser):
|
||||
return {}
|
||||
|
||||
def parse_tups(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
@@ -130,7 +131,7 @@ class MarkdownParser(BaseParser):
|
||||
return markdown_tups
|
||||
|
||||
def parse_file(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> Union[str, List[str]]:
|
||||
"""Parse file into string."""
|
||||
tups = self.parse_tups(filepath, errors=errors)
|
||||
|
||||
@@ -5,10 +5,10 @@ Contains parser for md files.
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
import tiktoken
|
||||
|
||||
class RstParser(BaseParser):
|
||||
"""reStructuredText parser.
|
||||
@@ -19,17 +19,17 @@ class RstParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
remove_table_excess: bool = True,
|
||||
remove_interpreters: bool = True,
|
||||
remove_directives: bool = True,
|
||||
remove_whitespaces_excess: bool = True,
|
||||
#Be carefull with remove_characters_excess, might cause data loss
|
||||
remove_characters_excess: bool = True,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
remove_table_excess: bool = True,
|
||||
remove_interpreters: bool = True,
|
||||
remove_directives: bool = True,
|
||||
remove_whitespaces_excess: bool = True,
|
||||
# Be carefull with remove_characters_excess, might cause data loss
|
||||
remove_characters_excess: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -41,7 +41,6 @@ class RstParser(BaseParser):
|
||||
self._remove_whitespaces_excess = remove_whitespaces_excess
|
||||
self._remove_characters_excess = remove_characters_excess
|
||||
|
||||
|
||||
def rst_to_tups(self, rst_text: str) -> List[Tuple[Optional[str], str]]:
|
||||
"""Convert a reStructuredText file to a dictionary.
|
||||
|
||||
@@ -56,7 +55,8 @@ class RstParser(BaseParser):
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
header_match = re.match(r"^[^\S\n]*[-=]+[^\S\n]*$", line)
|
||||
if header_match and i > 0 and (len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
|
||||
if header_match and i > 0 and (
|
||||
len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
|
||||
if current_header is not None:
|
||||
if current_text == "" or None:
|
||||
continue
|
||||
@@ -72,7 +72,7 @@ class RstParser(BaseParser):
|
||||
|
||||
rst_tups.append((current_header, current_text))
|
||||
|
||||
#TODO: Format for rst
|
||||
# TODO: Format for rst
|
||||
#
|
||||
# if current_header is not None:
|
||||
# # pass linting, assert keys are defined
|
||||
@@ -136,7 +136,7 @@ class RstParser(BaseParser):
|
||||
return {}
|
||||
|
||||
def parse_tups(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
@@ -159,7 +159,7 @@ class RstParser(BaseParser):
|
||||
return rst_tups
|
||||
|
||||
def parse_file(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> Union[str, List[str]]:
|
||||
"""Parse file into string."""
|
||||
tups = self.parse_tups(filepath, errors=errors)
|
||||
|
||||
@@ -6,7 +6,7 @@ Contains parsers for tabular data files.
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class CSVParser(BaseParser):
|
||||
@@ -77,13 +77,13 @@ class PandasCSVParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import os
|
||||
|
||||
import javalang
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
files_list = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
@@ -9,6 +11,7 @@ def find_files(directory):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, "r") as file:
|
||||
java_code = file.read()
|
||||
@@ -28,6 +31,7 @@ def extract_functions(file_path):
|
||||
methods[method_name] = method_source_code
|
||||
return methods
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -47,6 +51,7 @@ def extract_classes(file_path):
|
||||
classes[class_name] = class_string
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
@@ -58,4 +63,4 @@ def extract_functions_and_classes(directory):
|
||||
classes = extract_classes(file)
|
||||
if classes:
|
||||
classes_dict[file] = classes
|
||||
return functions_dict, classes_dict
|
||||
return functions_dict, classes_dict
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
import esprima
|
||||
|
||||
import escodegen
|
||||
import esprima
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
@@ -11,6 +12,7 @@ def find_files(directory):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -26,7 +28,6 @@ def extract_functions(file_path):
|
||||
func_name = declaration.id.name if declaration.id else '<anonymous>'
|
||||
functions[func_name] = escodegen.generate(declaration.init)
|
||||
elif node.type == 'ClassDeclaration':
|
||||
class_name = node.id.name
|
||||
for subnode in node.body.body:
|
||||
if subnode.type == 'MethodDefinition':
|
||||
func_name = subnode.key.name
|
||||
@@ -38,6 +39,7 @@ def extract_functions(file_path):
|
||||
functions[func_name] = escodegen.generate(declaration.init)
|
||||
return functions
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -53,6 +55,7 @@ def extract_classes(file_path):
|
||||
classes[class_name] = ", ".join(function_names)
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
|
||||
@@ -1,32 +1,32 @@
|
||||
import os
|
||||
import faiss
|
||||
import pickle
|
||||
|
||||
import tiktoken
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
|
||||
#from langchain.embeddings import HuggingFaceEmbeddings
|
||||
#from langchain.embeddings import HuggingFaceInstructEmbeddings
|
||||
#from langchain.embeddings import CohereEmbeddings
|
||||
|
||||
from langchain.vectorstores import FAISS
|
||||
from retry import retry
|
||||
|
||||
|
||||
# from langchain.embeddings import HuggingFaceEmbeddings
|
||||
# from langchain.embeddings import HuggingFaceInstructEmbeddings
|
||||
# from langchain.embeddings import CohereEmbeddings
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens/1000) * 0.0004)
|
||||
total_price = ((num_tokens / 1000) * 0.0004)
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
@retry(tries=10, delay=60)
|
||||
def store_add_texts_with_retry(store, i):
|
||||
store.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
#store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
|
||||
|
||||
def call_openai_api(docs, folder_name, task_status):
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
|
||||
# create output folder if it doesn't exist
|
||||
if not os.path.exists(f"{folder_name}"):
|
||||
@@ -44,7 +44,8 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
# hf = HuggingFaceEmbeddings(model_name=model_name)
|
||||
# store = FAISS.from_documents(docs_test, hf)
|
||||
s1 = len(docs)
|
||||
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
||||
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs),
|
||||
bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
||||
try:
|
||||
task_status.update_state(state='PROGRESS', meta={'current': int((c1 / s1) * 100)})
|
||||
store_add_texts_with_retry(store, i)
|
||||
@@ -58,20 +59,20 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
c1 += 1
|
||||
store.save_local(f"{folder_name}")
|
||||
|
||||
|
||||
def get_user_permission(docs, folder_name):
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
|
||||
#docs_content = (" ".join(docs))
|
||||
# docs_content = (" ".join(docs))
|
||||
docs_content = ""
|
||||
for doc in docs:
|
||||
docs_content += doc.page_content
|
||||
|
||||
|
||||
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
#Here we check for user permission before calling the API.
|
||||
# Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
call_openai_api(docs, folder_name)
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import os
|
||||
import ast
|
||||
import tiktoken
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import tiktoken
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
files_list = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
@@ -13,6 +15,7 @@ def find_files(directory):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -25,6 +28,7 @@ def extract_functions(file_path):
|
||||
functions[func_name] = func_def
|
||||
return functions
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -40,6 +44,7 @@ def extract_classes(file_path):
|
||||
classes[class_name] = ", ".join(function_names)
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
@@ -53,11 +58,12 @@ def extract_functions_and_classes(directory):
|
||||
classes_dict[file] = classes
|
||||
return functions_dict, classes_dict
|
||||
|
||||
|
||||
def parse_functions(functions_dict, formats, dir):
|
||||
c1 = len(functions_dict)
|
||||
for i, (source, functions) in enumerate(functions_dict.items(), start=1):
|
||||
print(f"Processing file {i}/{c1}")
|
||||
source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
|
||||
source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
|
||||
subfolders = "/".join(source_w.split("/")[:-1])
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
|
||||
for j, (name, function) in enumerate(functions.items(), start=1):
|
||||
@@ -70,18 +76,19 @@ def parse_functions(functions_dict, formats, dir):
|
||||
response = llm(prompt.format(code=function))
|
||||
mode = "a" if Path(f"outputs/{source_w}").exists() else "w"
|
||||
with open(f"outputs/{source_w}", mode) as f:
|
||||
f.write(f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
f.write(
|
||||
f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
def parse_classes(classes_dict, formats, dir):
|
||||
c1 = len(classes_dict)
|
||||
for i, (source, classes) in enumerate(classes_dict.items()):
|
||||
print(f"Processing file {i+1}/{c1}")
|
||||
source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
|
||||
print(f"Processing file {i + 1}/{c1}")
|
||||
source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
|
||||
subfolders = "/".join(source_w.split("/")[:-1])
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
|
||||
for name, function_names in classes.items():
|
||||
print(f"Processing Class {i+1}/{c1}")
|
||||
print(f"Processing Class {i + 1}/{c1}")
|
||||
prompt = PromptTemplate(
|
||||
input_variables=["class_name", "functions_names"],
|
||||
template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
|
||||
@@ -92,6 +99,7 @@ def parse_classes(classes_dict, formats, dir):
|
||||
with open(f"outputs/{source_w}", "a" if Path(f"outputs/{source_w}").exists() else "w") as f:
|
||||
f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
def transform_to_docs(functions_dict, classes_dict, formats, dir):
|
||||
docs_content = ''.join([str(key) + str(value) for key, value in functions_dict.items()])
|
||||
docs_content += ''.join([str(key) + str(value) for key, value in classes_dict.items()])
|
||||
@@ -110,4 +118,4 @@ def transform_to_docs(functions_dict, classes_dict, formats, dir):
|
||||
parse_classes(classes_dict, formats, dir)
|
||||
print("All done!")
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
print("The API was not called. No money was spent.")
|
||||
|
||||
1
application/parser/schema/__init__.py
Normal file
1
application/parser/schema/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
@@ -2,8 +2,7 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
|
||||
from parser.schema.schema import BaseDocument
|
||||
from application.parser.schema.schema import BaseDocument
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import re
|
||||
import tiktoken
|
||||
|
||||
from typing import List
|
||||
from parser.schema.base import Document
|
||||
from math import ceil
|
||||
from typing import List
|
||||
|
||||
import tiktoken
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
def separate_header_and_body(text):
|
||||
@@ -13,6 +13,7 @@ def separate_header_and_body(text):
|
||||
body = text[len(header):]
|
||||
return header, body
|
||||
|
||||
|
||||
def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) -> List[Document]:
|
||||
docs = []
|
||||
current_group = None
|
||||
@@ -23,7 +24,8 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
if current_group is None:
|
||||
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
|
||||
extra_info=doc.extra_info)
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(
|
||||
current_group.text)) + doc_len < max_tokens and doc_len < min_tokens:
|
||||
current_group.text += " " + doc.text
|
||||
else:
|
||||
docs.append(current_group)
|
||||
@@ -35,6 +37,7 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
|
||||
return docs
|
||||
|
||||
|
||||
def split_documents(documents: List[Document], max_tokens: int) -> List[Document]:
|
||||
docs = []
|
||||
for doc in documents:
|
||||
@@ -43,6 +46,9 @@ def split_documents(documents: List[Document], max_tokens: int) -> List[Document
|
||||
docs.append(doc)
|
||||
else:
|
||||
header, body = separate_header_and_body(doc.text)
|
||||
if len(tiktoken.get_encoding("cl100k_base").encode(header)) > max_tokens:
|
||||
body = doc.text
|
||||
header = ""
|
||||
num_body_parts = ceil(token_length / max_tokens)
|
||||
part_length = ceil(len(body) / num_body_parts)
|
||||
body_parts = [body[i:i + part_length] for i in range(0, len(body), part_length)]
|
||||
@@ -54,17 +60,18 @@ def split_documents(documents: List[Document], max_tokens: int) -> List[Document
|
||||
docs.append(new_doc)
|
||||
return docs
|
||||
|
||||
|
||||
def group_split(documents: List[Document], max_tokens: int = 2000, min_tokens: int = 150, token_check: bool = True):
|
||||
if token_check == False:
|
||||
if not token_check:
|
||||
return documents
|
||||
print("Grouping small documents")
|
||||
try:
|
||||
documents = group_documents(documents=documents, min_tokens=min_tokens, max_tokens=max_tokens)
|
||||
except:
|
||||
except Exception:
|
||||
print("Grouping failed, try running without token_check")
|
||||
print("Separating large documents")
|
||||
try:
|
||||
documents = split_documents(documents=documents, max_tokens=max_tokens)
|
||||
except:
|
||||
except Exception:
|
||||
print("Grouping failed, try running without token_check")
|
||||
return documents
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible.
|
||||
Use the following pieces of context to help answer the users question.
|
||||
Use the following pieces of context to help answer the users question. If its not relevant to the question, provide friendly responses.
|
||||
You have access to chat history, and can use it to help answer the question.
|
||||
When using code examples, use the following format:
|
||||
```(language)
|
||||
(code)
|
||||
```
|
||||
----------------
|
||||
{summaries}
|
||||
@@ -1,3 +1,3 @@
|
||||
Use the following portion of a long document to see if any of the text is relevant to answer the question.
|
||||
{context}
|
||||
Provide all relevant text to the question verbatim. Summarize if needed. If nothing relevant return "-".
|
||||
Use the following pieces of context to help answer the users question. If its not relevant to the question, respond with "-"
|
||||
----------------
|
||||
{context}
|
||||
@@ -1,5 +1,5 @@
|
||||
aiodns==3.0.0
|
||||
aiohttp==3.8.4
|
||||
aiohttp==3.8.5
|
||||
aiohttp-retry==2.8.3
|
||||
aiosignal==1.3.1
|
||||
aleph-alpha-client==2.16.1
|
||||
@@ -8,48 +8,54 @@ async-timeout==4.0.2
|
||||
attrs==22.2.0
|
||||
billiard==3.6.4.0
|
||||
blobfile==2.0.1
|
||||
boto3==1.26.84
|
||||
botocore==1.29.84
|
||||
boto3==1.28.20
|
||||
celery==5.2.7
|
||||
cffi==1.15.1
|
||||
charset-normalizer==3.1.0
|
||||
click==8.1.3
|
||||
click-didyoumean==0.3.0
|
||||
click-plugins==1.1.1
|
||||
click-repl==0.2.0
|
||||
cryptography==39.0.2
|
||||
cryptography==41.0.3
|
||||
dataclasses-json==0.5.7
|
||||
decorator==5.1.1
|
||||
deeplake==3.2.13
|
||||
dill==0.3.6
|
||||
dnspython==2.3.0
|
||||
ecdsa==0.18.0
|
||||
entrypoints==0.4
|
||||
faiss-cpu==1.7.3
|
||||
filelock==3.9.0
|
||||
Flask==2.2.3
|
||||
Flask==2.2.5
|
||||
Flask-Cors==3.0.10
|
||||
frozenlist==1.3.3
|
||||
geojson==2.5.0
|
||||
gunicorn==20.1.0
|
||||
greenlet==2.0.2
|
||||
hub==3.0.1
|
||||
huggingface-hub==0.12.1
|
||||
humbug==0.2.8
|
||||
gpt4all==0.1.7
|
||||
huggingface-hub==0.15.1
|
||||
humbug==0.3.2
|
||||
idna==3.4
|
||||
itsdangerous==2.1.2
|
||||
Jinja2==3.1.2
|
||||
jmespath==1.0.1
|
||||
joblib==1.2.0
|
||||
kombu==5.2.4
|
||||
langchain==0.0.118
|
||||
langchain==0.0.263
|
||||
loguru==0.6.0
|
||||
lxml==4.9.2
|
||||
MarkupSafe==2.1.2
|
||||
marshmallow==3.19.0
|
||||
marshmallow-enum==1.5.1
|
||||
mpmath==1.3.0
|
||||
multidict==6.0.4
|
||||
multiprocess==0.70.14
|
||||
mypy-extensions==1.0.0
|
||||
networkx==3.0
|
||||
npx
|
||||
nltk==3.8.1
|
||||
numcodecs==0.11.0
|
||||
numpy==1.24.2
|
||||
openai==0.27.0
|
||||
openai==0.27.8
|
||||
packaging==23.0
|
||||
pathos==0.3.0
|
||||
Pillow==9.4.0
|
||||
@@ -64,29 +70,35 @@ pycryptodomex==3.17
|
||||
pydantic==1.10.5
|
||||
PyJWT==2.6.0
|
||||
pymongo==4.3.3
|
||||
pyowm==3.3.0
|
||||
PyPDF2==3.0.1
|
||||
PySocks==1.7.1
|
||||
pytest
|
||||
python-dateutil==2.8.2
|
||||
python-dotenv==1.0.0
|
||||
python-jose==3.3.0
|
||||
pytz==2022.7.1
|
||||
PyYAML==6.0
|
||||
redis==4.5.2
|
||||
redis==4.5.4
|
||||
regex==2022.10.31
|
||||
requests==2.28.2
|
||||
requests==2.31.0
|
||||
retry==0.9.2
|
||||
rsa==4.9
|
||||
s3transfer==0.6.0
|
||||
scikit-learn==1.2.2
|
||||
scipy==1.10.1
|
||||
sentencepiece
|
||||
six==1.16.0
|
||||
SQLAlchemy==1.4.46
|
||||
sympy==1.11.1
|
||||
tenacity==8.2.2
|
||||
tiktoken==0.3.0
|
||||
tokenizers==0.13.2
|
||||
threadpoolctl==3.1.0
|
||||
tiktoken
|
||||
tqdm==4.65.0
|
||||
transformers==4.26.1
|
||||
transformers==4.30.0
|
||||
typer==0.7.0
|
||||
typing-inspect==0.8.0
|
||||
typing_extensions==4.5.0
|
||||
urllib3==1.26.14
|
||||
vine==5.0.0
|
||||
wcwidth==0.2.6
|
||||
Werkzeug==2.2.3
|
||||
yarl==1.8.2
|
||||
|
||||
@@ -1,28 +1,33 @@
|
||||
import requests
|
||||
import nltk
|
||||
import os
|
||||
|
||||
from parser.file.bulk import SimpleDirectoryReader
|
||||
from parser.schema.base import Document
|
||||
from parser.open_ai_func import call_openai_api
|
||||
from parser.token_func import group_split
|
||||
from celery import current_task
|
||||
|
||||
|
||||
import shutil
|
||||
import string
|
||||
import zipfile
|
||||
import shutil
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import nltk
|
||||
import requests
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.parser.file.bulk import SimpleDirectoryReader
|
||||
from application.parser.open_ai_func import call_openai_api
|
||||
from application.parser.schema.base import Document
|
||||
from application.parser.token_func import group_split
|
||||
|
||||
try:
|
||||
nltk.download('punkt', quiet=True)
|
||||
nltk.download('averaged_perceptron_tagger', quiet=True)
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
|
||||
def metadata_from_filename(title):
|
||||
return {'title': title}
|
||||
|
||||
|
||||
def generate_random_string(length):
|
||||
return ''.join([string.ascii_letters[i % 52] for i in range(length)])
|
||||
|
||||
|
||||
|
||||
def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
# directory = 'inputs' or 'temp'
|
||||
# formats = [".rst", ".md"]
|
||||
@@ -39,12 +44,8 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
max_tokens = 1250
|
||||
full_path = directory + '/' + user + '/' + name_job
|
||||
# check if API_URL env variable is set
|
||||
if not os.environ.get('API_URL'):
|
||||
url = 'http://localhost:5001/api/download'
|
||||
else:
|
||||
url = os.environ.get('API_URL') + '/api/download'
|
||||
file_data = {'name': name_job, 'file': filename, 'user': user}
|
||||
response = requests.get(url, params=file_data)
|
||||
response = requests.get(urljoin(settings.API_URL, "/api/download"), params=file_data)
|
||||
file = response.content
|
||||
|
||||
if not os.path.exists(full_path):
|
||||
@@ -52,19 +53,17 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
with open(full_path + '/' + filename, 'wb') as f:
|
||||
f.write(file)
|
||||
|
||||
#check if file is .zip and extract it
|
||||
# check if file is .zip and extract it
|
||||
if filename.endswith('.zip'):
|
||||
with zipfile.ZipFile(full_path + '/' + filename, 'r') as zip_ref:
|
||||
zip_ref.extractall(full_path)
|
||||
os.remove(full_path + '/' + filename)
|
||||
|
||||
|
||||
import time
|
||||
self.update_state(state='PROGRESS', meta={'current': 1})
|
||||
|
||||
raw_docs = SimpleDirectoryReader(input_dir=full_path, input_files=input_files, recursive=recursive,
|
||||
required_exts=formats, num_files_limit=limit,
|
||||
exclude_hidden=exclude).load_data()
|
||||
exclude_hidden=exclude, file_metadata=metadata_from_filename).load_data()
|
||||
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
|
||||
|
||||
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
@@ -72,28 +71,26 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
call_openai_api(docs, full_path, self)
|
||||
self.update_state(state='PROGRESS', meta={'current': 100})
|
||||
|
||||
if sample == True:
|
||||
if sample:
|
||||
for i in range(min(5, len(raw_docs))):
|
||||
print(raw_docs[i].text)
|
||||
|
||||
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
|
||||
# and send them to the server (provide user and name in form)
|
||||
if not os.environ.get('API_URL'):
|
||||
url = 'http://localhost:5001/api/upload_index'
|
||||
else:
|
||||
url = os.environ.get('API_URL') + '/api/upload_index'
|
||||
file_data = {'name': name_job, 'user': user}
|
||||
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
|
||||
'file_pkl': open(full_path + '/index.pkl', 'rb')}
|
||||
response = requests.post(url, files=files, data=file_data)
|
||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
||||
|
||||
#deletes remote
|
||||
if not os.environ.get('API_URL'):
|
||||
url = 'http://localhost:5001/api/delete_old?path=' + 'inputs/' + user + '/' + name_job
|
||||
else:
|
||||
url = os.environ.get('API_URL') + '/api/delete_old?path=' + 'inputs/' + user + '/' + name_job
|
||||
response = requests.get(url)
|
||||
response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path="))
|
||||
# delete local
|
||||
shutil.rmtree(full_path)
|
||||
|
||||
return {'directory': directory, 'formats': formats, 'name_job': name_job, 'filename': filename, 'user': user}
|
||||
return {
|
||||
'directory': directory,
|
||||
'formats': formats,
|
||||
'name_job': name_job,
|
||||
'filename': filename,
|
||||
'user': user,
|
||||
'limited': False
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from app import app
|
||||
from application.app import app
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run()
|
||||
app.run(debug=True, port=7091)
|
||||
|
||||
71
docker-compose-azure.yaml
Normal file
71
docker-compose-azure.yaml
Normal file
@@ -0,0 +1,71 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:7091
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
ports:
|
||||
- "5173:5173"
|
||||
depends_on:
|
||||
- backend
|
||||
|
||||
backend:
|
||||
build: ./application
|
||||
environment:
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- OPENAI_API_KEY=$OPENAI_API_KEY
|
||||
- OPENAI_API_BASE=$OPENAI_API_BASE
|
||||
- OPENAI_API_VERSION=$OPENAI_API_VERSION
|
||||
- AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME
|
||||
- AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME
|
||||
ports:
|
||||
- "7091:7091"
|
||||
volumes:
|
||||
- ./application/indexes:/app/application/indexes
|
||||
- ./application/inputs:/app/application/inputs
|
||||
- ./application/vectors:/app/application/vectors
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
worker:
|
||||
build: ./application
|
||||
command: celery -A application.app.celery worker -l INFO
|
||||
environment:
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- API_URL=http://backend:7091
|
||||
- OPENAI_API_KEY=$OPENAI_API_KEY
|
||||
- OPENAI_API_BASE=$OPENAI_API_BASE
|
||||
- OPENAI_API_VERSION=$OPENAI_API_VERSION
|
||||
- AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME
|
||||
- AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
redis:
|
||||
image: redis:6-alpine
|
||||
ports:
|
||||
- 6379:6379
|
||||
|
||||
mongo:
|
||||
image: mongo:6
|
||||
ports:
|
||||
- 27017:27017
|
||||
volumes:
|
||||
- mongodb_data_container:/data/db
|
||||
|
||||
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
20
docker-compose-dev.yaml
Normal file
20
docker-compose-dev.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
|
||||
redis:
|
||||
image: redis:6-alpine
|
||||
ports:
|
||||
- 6379:6379
|
||||
|
||||
mongo:
|
||||
image: mongo:6
|
||||
ports:
|
||||
- 27017:27017
|
||||
volumes:
|
||||
- mongodb_data_container:/data/db
|
||||
|
||||
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
@@ -4,41 +4,45 @@ services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:5001
|
||||
- VITE_API_HOST=http://localhost:7091
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
ports:
|
||||
- "5173:5173"
|
||||
depends_on:
|
||||
- backend
|
||||
- backend
|
||||
|
||||
backend:
|
||||
build: ./application
|
||||
environment:
|
||||
- API_KEY=<your_api_key>
|
||||
- EMBEDDINGS_KEY=<your_api_key>
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- SELF_HOSTED_MODEL=$SELF_HOSTED_MODEL
|
||||
ports:
|
||||
- "5001:5001"
|
||||
- "7091:7091"
|
||||
volumes:
|
||||
- app_data_container:/app
|
||||
- ./application/indexes:/app/application/indexes
|
||||
- ./application/inputs:/app/application/inputs
|
||||
- ./application/vectors:/app/application/vectors
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
worker:
|
||||
build: ./application
|
||||
command: celery -A app.celery worker -l INFO
|
||||
command: celery -A application.app.celery worker -l INFO
|
||||
environment:
|
||||
- API_KEY=<your_api_key>
|
||||
- EMBEDDINGS_KEY=<your_api_key>
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- API_URL=http://backend:5001
|
||||
- API_URL=http://backend:7091
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
redis:
|
||||
image: redis:6-alpine
|
||||
@@ -52,8 +56,5 @@ services:
|
||||
volumes:
|
||||
- mongodb_data_container:/data/db
|
||||
|
||||
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
app_data_container:
|
||||
@@ -1,18 +1,20 @@
|
||||
import requests
|
||||
import dotenv
|
||||
import os
|
||||
import json
|
||||
import pprint
|
||||
|
||||
import dotenv
|
||||
import requests
|
||||
from flask import Flask, request
|
||||
|
||||
dotenv.load_dotenv()
|
||||
docsgpt_url = os.getenv("docsgpt_url")
|
||||
chatwoot_url = os.getenv("chatwoot_url")
|
||||
docsgpt_key = os.getenv("docsgpt_key")
|
||||
chatwoot_token = os.getenv("chatwoot_token")
|
||||
#account_id = os.getenv("account_id")
|
||||
#assignee_id = os.getenv("assignee_id")
|
||||
# account_id = os.getenv("account_id")
|
||||
# assignee_id = os.getenv("assignee_id")
|
||||
label_stop = "human-requested"
|
||||
|
||||
|
||||
def send_to_bot(sender, message):
|
||||
data = {
|
||||
'sender': sender,
|
||||
@@ -43,7 +45,6 @@ def send_to_chatwoot(account, conversation, message):
|
||||
return r.json()
|
||||
|
||||
|
||||
from flask import Flask, request
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@@ -74,7 +75,7 @@ def docsgpt():
|
||||
# elif str(assignee) != str(assignee_id):
|
||||
# return "Not the right assignee"
|
||||
|
||||
if(message_type == "incoming"):
|
||||
if (message_type == "incoming"):
|
||||
bot_response = send_to_bot(contact, message)
|
||||
create_message = send_to_chatwoot(
|
||||
account, conversation, bot_response)
|
||||
@@ -83,5 +84,6 @@ def docsgpt():
|
||||
|
||||
return create_message
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=80)
|
||||
app.run(host='0.0.0.0', port=80)
|
||||
|
||||
@@ -21,7 +21,7 @@ document.getElementById("message-form").addEventListener("submit", function(even
|
||||
}
|
||||
|
||||
// send post request to server http://127.0.0.1:5000/ with message in json body
|
||||
fetch('http://127.0.0.1:5001/api/answer', {
|
||||
fetch('http://127.0.0.1:7091/api/answer', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
|
||||
@@ -10,8 +10,8 @@ dotenv.load_dotenv()
|
||||
|
||||
# Replace 'YOUR_BOT_TOKEN' with your bot's token
|
||||
TOKEN = os.getenv("DISCORD_TOKEN")
|
||||
PREFIX = '@docsgpt '
|
||||
BASE_API_URL = 'http://localhost:5001'
|
||||
PREFIX = '@DocsGPT'
|
||||
BASE_API_URL = 'http://localhost:7091'
|
||||
|
||||
intents = discord.Intents.default()
|
||||
intents.message_content = True
|
||||
@@ -20,13 +20,11 @@ bot = commands.Bot(command_prefix=PREFIX, intents=intents)
|
||||
|
||||
|
||||
def split_string(input_str):
|
||||
pattern = r'<(.*?)>'
|
||||
match = re.search(pattern, input_str)
|
||||
|
||||
pattern = r'^<@!?{0}>\s*'.format(bot.user.id)
|
||||
match = re.match(pattern, input_str)
|
||||
if match:
|
||||
content = match.group(1)
|
||||
rest = input_str[:match.start()] + input_str[match.end():]
|
||||
return content, rest.strip()
|
||||
content = input_str[match.end():].strip()
|
||||
return str(bot.user.id), content
|
||||
return None, input_str
|
||||
|
||||
|
||||
@@ -59,8 +57,8 @@ async def on_message(message):
|
||||
if prefix is None:
|
||||
return
|
||||
|
||||
part_prefix = "@"
|
||||
if part_prefix in prefix:
|
||||
part_prefix = str(bot.user.id)
|
||||
if part_prefix == prefix:
|
||||
answer = await fetch_answer(content)
|
||||
await message.channel.send(answer)
|
||||
|
||||
|
||||
25
extensions/web-widget/README.md
Normal file
25
extensions/web-widget/README.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# Chat Widget
|
||||
|
||||
A simple chat widget that can be easily integrated into any website.
|
||||
|
||||
## Installation
|
||||
|
||||
1. Host the `widget.html`, `styles.css`, and `script.js` files from the `src` folder on your own server or a Content Delivery Network (CDN). Make sure to note the URLs for these files.
|
||||
|
||||
2. Update the URLs in the `dist/chat-widget.js` file to match the locations of your hosted files:
|
||||
|
||||
```javascript
|
||||
fetch("https://your-server-or-cdn.com/path/to/widget.html"),
|
||||
fetch("https://your-server-or-cdn.com/path/to/styles.css"),
|
||||
fetch("https://your-server-or-cdn.com/path/to/script.js"),
|
||||
```
|
||||
|
||||
3. Host the `dist/chat-widget.js` file on your own server or a Content Delivery Network (CDN). Make sure to note the URL for this file.
|
||||
|
||||
|
||||
##Integration
|
||||
|
||||
To integrate the chat widget into a website, add the following script tag to the HTML file, replacing URL_TO_CHAT_WIDGET_JS with the actual URL of your hosted chat-widget.js file:
|
||||
```javascript
|
||||
<script src="URL_TO_CHAT_WIDGET_JS"></script>
|
||||
```
|
||||
41
extensions/web-widget/dist/chat-widget.js
vendored
Normal file
41
extensions/web-widget/dist/chat-widget.js
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
(async function () {
|
||||
// Fetch the HTML, CSS, and JavaScript from your server or CDN
|
||||
const [htmlRes, jsRes] = await Promise.all([
|
||||
fetch("https://s3-eu-west-2.amazonaws.com/arc53data/widget.html"),
|
||||
// fetch("https://s3-eu-west-2.amazonaws.com/arc53data/tailwind.css"),
|
||||
fetch("https://s3-eu-west-2.amazonaws.com/arc53data/script.js"),
|
||||
]);
|
||||
|
||||
const html = await htmlRes.text();
|
||||
//const css = await cssRes.text();
|
||||
const js = await jsRes.text();
|
||||
|
||||
// create a new link element
|
||||
const link = document.createElement("link");
|
||||
|
||||
//set the rel, href, type, and integrity attributes
|
||||
link.rel = "stylesheet";
|
||||
link.href = "https://cdn.tailwindcss.com/";
|
||||
link.type = "text/css";
|
||||
link.integrity = "sha384-PDOmVviaTm8N1W35y1NSmo80w6GPaGhbDuOBAF/5hRffaeGc6yOwIo1qAt4gqLGA%";
|
||||
|
||||
// get the document head and append the link element to it
|
||||
// document.head.appendChild(link);
|
||||
|
||||
|
||||
|
||||
// Create a style element for the CSS
|
||||
// const style = document.createElement("style");
|
||||
// style.innerHTML = css;
|
||||
// document.head.appendChild(style);
|
||||
|
||||
// Create a container for the chat widget and inject the HTML
|
||||
const chatWidgetContainer = document.createElement("div");
|
||||
chatWidgetContainer.innerHTML = html;
|
||||
document.body.appendChild(chatWidgetContainer);
|
||||
|
||||
// Execute the JavaScript code
|
||||
const script = document.createElement("script");
|
||||
script.innerHTML = js;
|
||||
document.body.appendChild(script);
|
||||
})();
|
||||
807
extensions/web-widget/dist/output.css
vendored
Normal file
807
extensions/web-widget/dist/output.css
vendored
Normal file
@@ -0,0 +1,807 @@
|
||||
/*
|
||||
! tailwindcss v3.3.1 | MIT License | https://tailwindcss.com
|
||||
*/
|
||||
|
||||
/*
|
||||
1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)
|
||||
2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)
|
||||
*/
|
||||
|
||||
*,
|
||||
::before,
|
||||
::after {
|
||||
box-sizing: border-box;
|
||||
/* 1 */
|
||||
border-width: 0;
|
||||
/* 2 */
|
||||
border-style: solid;
|
||||
/* 2 */
|
||||
border-color: #e5e7eb;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
::before,
|
||||
::after {
|
||||
--tw-content: '';
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use a consistent sensible line-height in all browsers.
|
||||
2. Prevent adjustments of font size after orientation changes in iOS.
|
||||
3. Use a more readable tab size.
|
||||
4. Use the user's configured `sans` font-family by default.
|
||||
5. Use the user's configured `sans` font-feature-settings by default.
|
||||
6. Use the user's configured `sans` font-variation-settings by default.
|
||||
*/
|
||||
|
||||
html {
|
||||
line-height: 1.5;
|
||||
/* 1 */
|
||||
-webkit-text-size-adjust: 100%;
|
||||
/* 2 */
|
||||
-moz-tab-size: 4;
|
||||
/* 3 */
|
||||
-o-tab-size: 4;
|
||||
tab-size: 4;
|
||||
/* 3 */
|
||||
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
|
||||
/* 4 */
|
||||
font-feature-settings: normal;
|
||||
/* 5 */
|
||||
font-variation-settings: normal;
|
||||
/* 6 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove the margin in all browsers.
|
||||
2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.
|
||||
*/
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
/* 1 */
|
||||
line-height: inherit;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Add the correct height in Firefox.
|
||||
2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)
|
||||
3. Ensure horizontal rules are visible by default.
|
||||
*/
|
||||
|
||||
hr {
|
||||
height: 0;
|
||||
/* 1 */
|
||||
color: inherit;
|
||||
/* 2 */
|
||||
border-top-width: 1px;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct text decoration in Chrome, Edge, and Safari.
|
||||
*/
|
||||
|
||||
abbr:where([title]) {
|
||||
-webkit-text-decoration: underline dotted;
|
||||
text-decoration: underline dotted;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the default font size and weight for headings.
|
||||
*/
|
||||
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6 {
|
||||
font-size: inherit;
|
||||
font-weight: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Reset links to optimize for opt-in styling instead of opt-out.
|
||||
*/
|
||||
|
||||
a {
|
||||
color: inherit;
|
||||
text-decoration: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font weight in Edge and Safari.
|
||||
*/
|
||||
|
||||
b,
|
||||
strong {
|
||||
font-weight: bolder;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use the user's configured `mono` font family by default.
|
||||
2. Correct the odd `em` font sizing in all browsers.
|
||||
*/
|
||||
|
||||
code,
|
||||
kbd,
|
||||
samp,
|
||||
pre {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
|
||||
/* 1 */
|
||||
font-size: 1em;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font size in all browsers.
|
||||
*/
|
||||
|
||||
small {
|
||||
font-size: 80%;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent `sub` and `sup` elements from affecting the line height in all browsers.
|
||||
*/
|
||||
|
||||
sub,
|
||||
sup {
|
||||
font-size: 75%;
|
||||
line-height: 0;
|
||||
position: relative;
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
sub {
|
||||
bottom: -0.25em;
|
||||
}
|
||||
|
||||
sup {
|
||||
top: -0.5em;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)
|
||||
2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)
|
||||
3. Remove gaps between table borders by default.
|
||||
*/
|
||||
|
||||
table {
|
||||
text-indent: 0;
|
||||
/* 1 */
|
||||
border-color: inherit;
|
||||
/* 2 */
|
||||
border-collapse: collapse;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Change the font styles in all browsers.
|
||||
2. Remove the margin in Firefox and Safari.
|
||||
3. Remove default padding in all browsers.
|
||||
*/
|
||||
|
||||
button,
|
||||
input,
|
||||
optgroup,
|
||||
select,
|
||||
textarea {
|
||||
font-family: inherit;
|
||||
/* 1 */
|
||||
font-size: 100%;
|
||||
/* 1 */
|
||||
font-weight: inherit;
|
||||
/* 1 */
|
||||
line-height: inherit;
|
||||
/* 1 */
|
||||
color: inherit;
|
||||
/* 1 */
|
||||
margin: 0;
|
||||
/* 2 */
|
||||
padding: 0;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inheritance of text transform in Edge and Firefox.
|
||||
*/
|
||||
|
||||
button,
|
||||
select {
|
||||
text-transform: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Remove default button styles.
|
||||
*/
|
||||
|
||||
button,
|
||||
[type='button'],
|
||||
[type='reset'],
|
||||
[type='submit'] {
|
||||
-webkit-appearance: button;
|
||||
/* 1 */
|
||||
background-color: transparent;
|
||||
/* 2 */
|
||||
background-image: none;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Use the modern Firefox focus style for all focusable elements.
|
||||
*/
|
||||
|
||||
:-moz-focusring {
|
||||
outline: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)
|
||||
*/
|
||||
|
||||
:-moz-ui-invalid {
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct vertical alignment in Chrome and Firefox.
|
||||
*/
|
||||
|
||||
progress {
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
/*
|
||||
Correct the cursor style of increment and decrement buttons in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-inner-spin-button,
|
||||
::-webkit-outer-spin-button {
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the odd appearance in Chrome and Safari.
|
||||
2. Correct the outline style in Safari.
|
||||
*/
|
||||
|
||||
[type='search'] {
|
||||
-webkit-appearance: textfield;
|
||||
/* 1 */
|
||||
outline-offset: -2px;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inner padding in Chrome and Safari on macOS.
|
||||
*/
|
||||
|
||||
::-webkit-search-decoration {
|
||||
-webkit-appearance: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Change font properties to `inherit` in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-file-upload-button {
|
||||
-webkit-appearance: button;
|
||||
/* 1 */
|
||||
font: inherit;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct display in Chrome and Safari.
|
||||
*/
|
||||
|
||||
summary {
|
||||
display: list-item;
|
||||
}
|
||||
|
||||
/*
|
||||
Removes the default spacing and border for appropriate elements.
|
||||
*/
|
||||
|
||||
blockquote,
|
||||
dl,
|
||||
dd,
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6,
|
||||
hr,
|
||||
figure,
|
||||
p,
|
||||
pre {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
fieldset {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
legend {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
ol,
|
||||
ul,
|
||||
menu {
|
||||
list-style: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent resizing textareas horizontally by default.
|
||||
*/
|
||||
|
||||
textarea {
|
||||
resize: vertical;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)
|
||||
2. Set the default placeholder color to the user's configured gray 400 color.
|
||||
*/
|
||||
|
||||
input::-moz-placeholder, textarea::-moz-placeholder {
|
||||
opacity: 1;
|
||||
/* 1 */
|
||||
color: #9ca3af;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
input::placeholder,
|
||||
textarea::placeholder {
|
||||
opacity: 1;
|
||||
/* 1 */
|
||||
color: #9ca3af;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Set the default cursor for buttons.
|
||||
*/
|
||||
|
||||
button,
|
||||
[role="button"] {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/*
|
||||
Make sure disabled buttons don't get the pointer cursor.
|
||||
*/
|
||||
|
||||
:disabled {
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)
|
||||
This can trigger a poorly considered lint error in some tools but is included by design.
|
||||
*/
|
||||
|
||||
img,
|
||||
svg,
|
||||
video,
|
||||
canvas,
|
||||
audio,
|
||||
iframe,
|
||||
embed,
|
||||
object {
|
||||
display: block;
|
||||
/* 1 */
|
||||
vertical-align: middle;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
*/
|
||||
|
||||
img,
|
||||
video {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/* Make elements with the HTML hidden attribute stay hidden by default */
|
||||
|
||||
[hidden] {
|
||||
display: none;
|
||||
}
|
||||
|
||||
*, ::before, ::after {
|
||||
--tw-border-spacing-x: 0;
|
||||
--tw-border-spacing-y: 0;
|
||||
--tw-translate-x: 0;
|
||||
--tw-translate-y: 0;
|
||||
--tw-rotate: 0;
|
||||
--tw-skew-x: 0;
|
||||
--tw-skew-y: 0;
|
||||
--tw-scale-x: 1;
|
||||
--tw-scale-y: 1;
|
||||
--tw-pan-x: ;
|
||||
--tw-pan-y: ;
|
||||
--tw-pinch-zoom: ;
|
||||
--tw-scroll-snap-strictness: proximity;
|
||||
--tw-ordinal: ;
|
||||
--tw-slashed-zero: ;
|
||||
--tw-numeric-figure: ;
|
||||
--tw-numeric-spacing: ;
|
||||
--tw-numeric-fraction: ;
|
||||
--tw-ring-inset: ;
|
||||
--tw-ring-offset-width: 0px;
|
||||
--tw-ring-offset-color: #fff;
|
||||
--tw-ring-color: rgb(59 130 246 / 0.5);
|
||||
--tw-ring-offset-shadow: 0 0 #0000;
|
||||
--tw-ring-shadow: 0 0 #0000;
|
||||
--tw-shadow: 0 0 #0000;
|
||||
--tw-shadow-colored: 0 0 #0000;
|
||||
--tw-blur: ;
|
||||
--tw-brightness: ;
|
||||
--tw-contrast: ;
|
||||
--tw-grayscale: ;
|
||||
--tw-hue-rotate: ;
|
||||
--tw-invert: ;
|
||||
--tw-saturate: ;
|
||||
--tw-sepia: ;
|
||||
--tw-drop-shadow: ;
|
||||
--tw-backdrop-blur: ;
|
||||
--tw-backdrop-brightness: ;
|
||||
--tw-backdrop-contrast: ;
|
||||
--tw-backdrop-grayscale: ;
|
||||
--tw-backdrop-hue-rotate: ;
|
||||
--tw-backdrop-invert: ;
|
||||
--tw-backdrop-opacity: ;
|
||||
--tw-backdrop-saturate: ;
|
||||
--tw-backdrop-sepia: ;
|
||||
}
|
||||
|
||||
::backdrop {
|
||||
--tw-border-spacing-x: 0;
|
||||
--tw-border-spacing-y: 0;
|
||||
--tw-translate-x: 0;
|
||||
--tw-translate-y: 0;
|
||||
--tw-rotate: 0;
|
||||
--tw-skew-x: 0;
|
||||
--tw-skew-y: 0;
|
||||
--tw-scale-x: 1;
|
||||
--tw-scale-y: 1;
|
||||
--tw-pan-x: ;
|
||||
--tw-pan-y: ;
|
||||
--tw-pinch-zoom: ;
|
||||
--tw-scroll-snap-strictness: proximity;
|
||||
--tw-ordinal: ;
|
||||
--tw-slashed-zero: ;
|
||||
--tw-numeric-figure: ;
|
||||
--tw-numeric-spacing: ;
|
||||
--tw-numeric-fraction: ;
|
||||
--tw-ring-inset: ;
|
||||
--tw-ring-offset-width: 0px;
|
||||
--tw-ring-offset-color: #fff;
|
||||
--tw-ring-color: rgb(59 130 246 / 0.5);
|
||||
--tw-ring-offset-shadow: 0 0 #0000;
|
||||
--tw-ring-shadow: 0 0 #0000;
|
||||
--tw-shadow: 0 0 #0000;
|
||||
--tw-shadow-colored: 0 0 #0000;
|
||||
--tw-blur: ;
|
||||
--tw-brightness: ;
|
||||
--tw-contrast: ;
|
||||
--tw-grayscale: ;
|
||||
--tw-hue-rotate: ;
|
||||
--tw-invert: ;
|
||||
--tw-saturate: ;
|
||||
--tw-sepia: ;
|
||||
--tw-drop-shadow: ;
|
||||
--tw-backdrop-blur: ;
|
||||
--tw-backdrop-brightness: ;
|
||||
--tw-backdrop-contrast: ;
|
||||
--tw-backdrop-grayscale: ;
|
||||
--tw-backdrop-hue-rotate: ;
|
||||
--tw-backdrop-invert: ;
|
||||
--tw-backdrop-opacity: ;
|
||||
--tw-backdrop-saturate: ;
|
||||
--tw-backdrop-sepia: ;
|
||||
}
|
||||
|
||||
.fixed {
|
||||
position: fixed;
|
||||
}
|
||||
|
||||
.absolute {
|
||||
position: absolute;
|
||||
}
|
||||
|
||||
.relative {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.inset-y-0 {
|
||||
top: 0px;
|
||||
bottom: 0px;
|
||||
}
|
||||
|
||||
.bottom-5 {
|
||||
bottom: 1.25rem;
|
||||
}
|
||||
|
||||
.left-5 {
|
||||
left: 1.25rem;
|
||||
}
|
||||
|
||||
.right-2 {
|
||||
right: 0.5rem;
|
||||
}
|
||||
|
||||
.z-50 {
|
||||
z-index: 50;
|
||||
}
|
||||
|
||||
.m-0 {
|
||||
margin: 0px;
|
||||
}
|
||||
|
||||
.-mx-2 {
|
||||
margin-left: -0.5rem;
|
||||
margin-right: -0.5rem;
|
||||
}
|
||||
|
||||
.mt-1 {
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
|
||||
.flex {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.hidden {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.w-full {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.flex-1 {
|
||||
flex: 1 1 0%;
|
||||
}
|
||||
|
||||
.transform {
|
||||
transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));
|
||||
}
|
||||
|
||||
.items-center {
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.justify-center {
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.gap-2 {
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.divide-y > :not([hidden]) ~ :not([hidden]) {
|
||||
--tw-divide-y-reverse: 0;
|
||||
border-top-width: calc(1px * calc(1 - var(--tw-divide-y-reverse)));
|
||||
border-bottom-width: calc(1px * var(--tw-divide-y-reverse));
|
||||
}
|
||||
|
||||
.rounded-md {
|
||||
border-radius: 0.375rem;
|
||||
}
|
||||
|
||||
.rounded-b {
|
||||
border-bottom-right-radius: 0.25rem;
|
||||
border-bottom-left-radius: 0.25rem;
|
||||
}
|
||||
|
||||
.border {
|
||||
border-width: 1px;
|
||||
}
|
||||
|
||||
.bg-transparent {
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
.bg-gradient-to-br {
|
||||
background-image: linear-gradient(to bottom right, var(--tw-gradient-stops));
|
||||
}
|
||||
|
||||
.from-gray-100\/80 {
|
||||
--tw-gradient-from: rgb(243 244 246 / 0.8) var(--tw-gradient-from-position);
|
||||
--tw-gradient-from-position: ;
|
||||
--tw-gradient-to: rgb(243 244 246 / 0) var(--tw-gradient-from-position);
|
||||
--tw-gradient-to-position: ;
|
||||
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
|
||||
}
|
||||
|
||||
.via-white {
|
||||
--tw-gradient-via-position: ;
|
||||
--tw-gradient-to: rgb(255 255 255 / 0) var(--tw-gradient-to-position);
|
||||
--tw-gradient-to-position: ;
|
||||
--tw-gradient-stops: var(--tw-gradient-from), #fff var(--tw-gradient-via-position), var(--tw-gradient-to);
|
||||
}
|
||||
|
||||
.to-white {
|
||||
--tw-gradient-to: #fff var(--tw-gradient-to-position);
|
||||
--tw-gradient-to-position: ;
|
||||
}
|
||||
|
||||
.p-3 {
|
||||
padding: 0.75rem;
|
||||
}
|
||||
|
||||
.px-2 {
|
||||
padding-left: 0.5rem;
|
||||
padding-right: 0.5rem;
|
||||
}
|
||||
|
||||
.px-5 {
|
||||
padding-left: 1.25rem;
|
||||
padding-right: 1.25rem;
|
||||
}
|
||||
|
||||
.py-3 {
|
||||
padding-top: 0.75rem;
|
||||
padding-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
.pl-5 {
|
||||
padding-left: 1.25rem;
|
||||
}
|
||||
|
||||
.pr-8 {
|
||||
padding-right: 2rem;
|
||||
}
|
||||
|
||||
.font-sans {
|
||||
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
|
||||
}
|
||||
|
||||
.text-sm {
|
||||
font-size: 0.875rem;
|
||||
line-height: 1.25rem;
|
||||
}
|
||||
|
||||
.text-xs {
|
||||
font-size: 0.75rem;
|
||||
line-height: 1rem;
|
||||
}
|
||||
|
||||
.font-bold {
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.text-gray-400 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(156 163 175 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.text-gray-600 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(75 85 99 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.text-gray-700 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(55 65 81 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.text-gray-800 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(31 41 55 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.shadow {
|
||||
--tw-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
|
||||
--tw-shadow-colored: 0 1px 3px 0 var(--tw-shadow-color), 0 1px 2px -1px var(--tw-shadow-color);
|
||||
box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow);
|
||||
}
|
||||
|
||||
.backdrop-blur-sm {
|
||||
--tw-backdrop-blur: blur(4px);
|
||||
-webkit-backdrop-filter: var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);
|
||||
backdrop-filter: var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);
|
||||
}
|
||||
|
||||
.transition {
|
||||
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, -webkit-backdrop-filter;
|
||||
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, backdrop-filter;
|
||||
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, backdrop-filter, -webkit-backdrop-filter;
|
||||
transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
|
||||
transition-duration: 150ms;
|
||||
}
|
||||
|
||||
.delay-200 {
|
||||
transition-delay: 200ms;
|
||||
}
|
||||
|
||||
.duration-300 {
|
||||
transition-duration: 300ms;
|
||||
}
|
||||
|
||||
.hover\:bg-gray-100:hover {
|
||||
--tw-bg-opacity: 1;
|
||||
background-color: rgb(243 244 246 / var(--tw-bg-opacity));
|
||||
}
|
||||
|
||||
.focus\:outline-none:focus {
|
||||
outline: 2px solid transparent;
|
||||
outline-offset: 2px;
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
.dark\:divide-gray-700 > :not([hidden]) ~ :not([hidden]) {
|
||||
--tw-divide-opacity: 1;
|
||||
border-color: rgb(55 65 81 / var(--tw-divide-opacity));
|
||||
}
|
||||
|
||||
.dark\:border-gray-700 {
|
||||
--tw-border-opacity: 1;
|
||||
border-color: rgb(55 65 81 / var(--tw-border-opacity));
|
||||
}
|
||||
|
||||
.dark\:from-gray-900\/80 {
|
||||
--tw-gradient-from: rgb(17 24 39 / 0.8) var(--tw-gradient-from-position);
|
||||
--tw-gradient-from-position: ;
|
||||
--tw-gradient-to: rgb(17 24 39 / 0) var(--tw-gradient-from-position);
|
||||
--tw-gradient-to-position: ;
|
||||
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
|
||||
}
|
||||
|
||||
.dark\:via-gray-900 {
|
||||
--tw-gradient-via-position: ;
|
||||
--tw-gradient-to: rgb(17 24 39 / 0) var(--tw-gradient-to-position);
|
||||
--tw-gradient-to-position: ;
|
||||
--tw-gradient-stops: var(--tw-gradient-from), #111827 var(--tw-gradient-via-position), var(--tw-gradient-to);
|
||||
}
|
||||
|
||||
.dark\:to-gray-900 {
|
||||
--tw-gradient-to: #111827 var(--tw-gradient-to-position);
|
||||
--tw-gradient-to-position: ;
|
||||
}
|
||||
|
||||
.dark\:text-gray-200 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(229 231 235 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.dark\:text-gray-300 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(209 213 219 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.dark\:text-gray-500 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(107 114 128 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.dark\:text-white {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(255 255 255 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.dark\:hover\:bg-gray-800\/70:hover {
|
||||
background-color: rgb(31 41 55 / 0.7);
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 768px) {
|
||||
.md\:pl-0 {
|
||||
padding-left: 0px;
|
||||
}
|
||||
}
|
||||
12
extensions/web-widget/index.html
Normal file
12
extensions/web-widget/index.html
Normal file
@@ -0,0 +1,12 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Chat Widget Test</title>
|
||||
<link href="dist/output.css" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
<script src="dist/chat-widget.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
1002
extensions/web-widget/package-lock.json
generated
Normal file
1002
extensions/web-widget/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
15
extensions/web-widget/package.json
Normal file
15
extensions/web-widget/package.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"name": "web-widget",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"tailwindcss": "^3.3.1"
|
||||
}
|
||||
}
|
||||
58
extensions/web-widget/src/html/widget.html
Normal file
58
extensions/web-widget/src/html/widget.html
Normal file
@@ -0,0 +1,58 @@
|
||||
<div id="docsgpt-widget" class="dark fixed bottom-5 left-5 pl-5 md:pl-0 z-50">
|
||||
<style>
|
||||
@keyframes dotBounce {
|
||||
0%, 80%, 100% {
|
||||
transform: translateY(0);
|
||||
}
|
||||
40% {
|
||||
transform: translateY(-5px);
|
||||
}
|
||||
}
|
||||
|
||||
.dot-animation {
|
||||
display: inline-block;
|
||||
animation: dotBounce 1s infinite ease-in-out;
|
||||
}
|
||||
|
||||
.delay-200 {
|
||||
animation-delay: 200ms;
|
||||
}
|
||||
|
||||
.delay-400 {
|
||||
animation-delay: 400ms;
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
<div class="divide-y dark:divide-gray-700 rounded-md border dark:border-gray-700 bg-gradient-to-br from-gray-100/80 via-white to-white dark:from-gray-900/80 dark:via-gray-900 dark:to-gray-900 font-sans shadow backdrop-blur-sm" style="width: 18rem; transform: translateY(0%) translateZ(0px);"><div>
|
||||
<div class="flex items-center gap-2 p-3">
|
||||
<div id="docsgpt-init-message" class="flex-1">
|
||||
<h3 class="text-sm font-bold text-gray-700 dark:text-gray-200">Looking for help with documentation?</h3>
|
||||
<p class="mt-1 text-xs text-gray-400 dark:text-gray-500">DocsGPT AI assistant will help you with docs</p>
|
||||
</div>
|
||||
<div id="docsgpt-answer" class="hidden">
|
||||
<p class="mt-1 text-xs text-gray-600 dark:text-gray-300">Come cool answer</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<div class="w-full">
|
||||
<button id="ask-docsgpt" class="flex w-full justify-center px-5 py-3 text-sm text-gray-800 font-bold dark:text-white transition duration-300 hover:bg-gray-100 rounded-b dark:hover:bg-gray-800/70">
|
||||
Ask DocsGPT
|
||||
</button>
|
||||
|
||||
<form id="docsgpt-chat-form" class="relative w-full m-0 hidden" style="opacity: 1;" data-projection-id="1">
|
||||
<input id="docsgpt-chat-input" type="text" class="w-full bg-transparent px-5 py-3 pr-8 text-sm text-gray-700 dark:text-white focus:outline-none" placeholder="What do you want to do?" value="">
|
||||
<button class="absolute inset-y-0 right-2 -mx-2 px-2" type="submit" style="opacity: 0;" data-projection-id="2">
|
||||
|
||||
</button>
|
||||
</form>
|
||||
<p id="docsgpt-chat-processing" class="hidden flex w-full justify-center px-5 py-3 text-sm text-gray-800 font-bold dark:text-white transition duration-300 rounded-b animate-fadeIn animate-2s">
|
||||
Processing<span class="dot-animation">.</span><span class="dot-animation delay-200">.</span><span class="dot-animation delay-400">.</span>
|
||||
</p>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
3
extensions/web-widget/src/input.css
Normal file
3
extensions/web-widget/src/input.css
Normal file
@@ -0,0 +1,3 @@
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
56
extensions/web-widget/src/js/script.js
Normal file
56
extensions/web-widget/src/js/script.js
Normal file
@@ -0,0 +1,56 @@
|
||||
const API_ENDPOINT = "http://localhost:7091/api/answer"; // Replace with your API endpoint
|
||||
|
||||
const widgetInitMessage = document.getElementById("docsgpt-init-message");
|
||||
const widgetAnswerMessage = document.getElementById("docsgpt-answer");
|
||||
const widgetAnswerMessageP = widgetAnswerMessage.querySelector("p");
|
||||
const askDocsGPTButton = document.getElementById("ask-docsgpt");
|
||||
const chatInput = document.getElementById("docsgpt-chat-input");
|
||||
const chatForm = document.getElementById("docsgpt-chat-form");
|
||||
const chatProcessing = document.getElementById("docsgpt-chat-processing");
|
||||
|
||||
async function sendMessage(message) {
|
||||
const requestData = {
|
||||
"question": message,
|
||||
"active_docs": "default",
|
||||
"api_key": "token",
|
||||
"embeddings_key": "token",
|
||||
"model": "default",
|
||||
"history": null,
|
||||
}
|
||||
const response = await fetch(API_ENDPOINT, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(requestData),
|
||||
});
|
||||
const data = await response.json();
|
||||
return data.answer;
|
||||
}
|
||||
|
||||
askDocsGPTButton.addEventListener("click", () => {
|
||||
askDocsGPTButton.classList.add("hidden");
|
||||
chatForm.classList.remove("hidden");
|
||||
chatForm.focus();
|
||||
widgetInitMessage.classList.remove("hidden");
|
||||
widgetAnswerMessage.classList.add("hidden");
|
||||
|
||||
|
||||
});
|
||||
|
||||
chatForm.addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const message = chatInput.value.trim();
|
||||
if (!message) return;
|
||||
|
||||
chatInput.value = "";
|
||||
chatForm.classList.add("hidden");
|
||||
chatProcessing.classList.remove("hidden");
|
||||
|
||||
const reply = await sendMessage(message);
|
||||
chatProcessing.classList.add("hidden");
|
||||
|
||||
// inside <p> tag
|
||||
widgetAnswerMessageP.innerHTML = reply;
|
||||
widgetAnswerMessage.classList.remove("hidden");
|
||||
widgetInitMessage.classList.add("hidden");
|
||||
askDocsGPTButton.classList.remove("hidden");
|
||||
});
|
||||
10
extensions/web-widget/tailwind.config.js
Normal file
10
extensions/web-widget/tailwind.config.js
Normal file
@@ -0,0 +1,10 @@
|
||||
/** @type {import('tailwindcss').Config} */
|
||||
module.exports = {
|
||||
content: ["./src/**/*.{html,js}"],
|
||||
theme: {
|
||||
extend: {},
|
||||
},
|
||||
plugins: [],
|
||||
}
|
||||
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
# Please put appropriate value
|
||||
VITE_API_HOST = http://localhost:5001
|
||||
VITE_API_HOST=http://localhost:7091
|
||||
@@ -1 +1 @@
|
||||
VITE_API_HOST = https://docsapi.arc53.com
|
||||
VITE_API_HOST = https://gptcloud.arc53.com
|
||||
6023
frontend/package-lock.json
generated
6023
frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -24,12 +24,15 @@
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-dropzone": "^14.2.3",
|
||||
"react-markdown": "^8.0.7",
|
||||
"react-redux": "^8.0.5",
|
||||
"react-router-dom": "^6.8.1"
|
||||
"react-router-dom": "^6.8.1",
|
||||
"react-syntax-highlighter": "^15.5.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/react": "^18.0.27",
|
||||
"@types/react-dom": "^18.0.10",
|
||||
"@types/react-syntax-highlighter": "^15.5.6",
|
||||
"@typescript-eslint/eslint-plugin": "^5.51.0",
|
||||
"@typescript-eslint/parser": "^5.51.0",
|
||||
"@vitejs/plugin-react": "^3.1.0",
|
||||
@@ -50,7 +53,7 @@
|
||||
"prettier-plugin-tailwindcss": "^0.2.2",
|
||||
"tailwindcss": "^3.2.4",
|
||||
"typescript": "^4.9.5",
|
||||
"vite": "^4.1.0",
|
||||
"vite": "^4.1.5",
|
||||
"vite-plugin-svgr": "^2.4.0"
|
||||
}
|
||||
}
|
||||
|
||||
47
frontend/src/Modal/index.tsx
Normal file
47
frontend/src/Modal/index.tsx
Normal file
@@ -0,0 +1,47 @@
|
||||
import * as React from 'react';
|
||||
|
||||
interface ModalProps {
|
||||
handleSubmit: () => void;
|
||||
isCancellable: boolean;
|
||||
handleCancel?: () => void;
|
||||
render: () => JSX.Element;
|
||||
modalState: string;
|
||||
isError: boolean;
|
||||
errorMessage?: string;
|
||||
}
|
||||
const Modal = (props: ModalProps) => {
|
||||
return (
|
||||
<div
|
||||
className={`${
|
||||
props.modalState === 'ACTIVE' ? 'visible' : 'hidden'
|
||||
} absolute z-30 h-screen w-screen bg-gray-alpha`}
|
||||
>
|
||||
{props.render()}
|
||||
<div className=" mx-auto flex w-[90vw] max-w-lg flex-row-reverse rounded-lg bg-white pb-5 pr-5 shadow-lg">
|
||||
<div>
|
||||
<button
|
||||
onClick={() => props.handleSubmit()}
|
||||
className="ml-auto h-10 w-20 rounded-lg bg-violet-800 text-white transition-all hover:bg-violet-700"
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
{props.isCancellable && (
|
||||
<button
|
||||
onClick={() => props.handleCancel && props.handleCancel()}
|
||||
className="ml-5 h-10 w-20 rounded-lg border border-violet-700 bg-white text-violet-800 transition-all hover:bg-violet-700 hover:text-white"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{props.isError && (
|
||||
<p className="mx-auto mt-2 mr-auto text-sm text-red-500">
|
||||
{props.errorMessage}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default Modal;
|
||||
@@ -19,10 +19,17 @@ import {
|
||||
selectSelectedDocsStatus,
|
||||
selectSourceDocs,
|
||||
setSelectedDocs,
|
||||
selectConversations,
|
||||
setConversations,
|
||||
selectConversationId,
|
||||
} from './preferences/preferenceSlice';
|
||||
import {
|
||||
setConversation,
|
||||
updateConversationId,
|
||||
} from './conversation/conversationSlice';
|
||||
import { useOutsideAlerter } from './hooks';
|
||||
import Upload from './upload/Upload';
|
||||
import { Doc } from './preferences/preferenceApi';
|
||||
import { Doc, getConversations } from './preferences/preferenceApi';
|
||||
|
||||
export default function Navigation({
|
||||
navState,
|
||||
@@ -34,13 +41,14 @@ export default function Navigation({
|
||||
const dispatch = useDispatch();
|
||||
const docs = useSelector(selectSourceDocs);
|
||||
const selectedDocs = useSelector(selectSelectedDocs);
|
||||
const conversations = useSelector(selectConversations);
|
||||
const conversationId = useSelector(selectConversationId);
|
||||
|
||||
const [isDocsListOpen, setIsDocsListOpen] = useState(false);
|
||||
|
||||
const isApiKeySet = useSelector(selectApiKeyStatus);
|
||||
const [apiKeyModalState, setApiKeyModalState] = useState<ActiveState>(
|
||||
isApiKeySet ? 'INACTIVE' : 'ACTIVE',
|
||||
);
|
||||
const [apiKeyModalState, setApiKeyModalState] =
|
||||
useState<ActiveState>('INACTIVE');
|
||||
|
||||
const isSelectedDocsSet = useSelector(selectSelectedDocsStatus);
|
||||
const [selectedDocsModalState, setSelectedDocsModalState] =
|
||||
@@ -52,6 +60,33 @@ export default function Navigation({
|
||||
const navRef = useRef(null);
|
||||
const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
|
||||
|
||||
useEffect(() => {
|
||||
if (!conversations) {
|
||||
getConversations()
|
||||
.then((fetchedConversations) => {
|
||||
dispatch(setConversations(fetchedConversations));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Failed to fetch conversations: ', error);
|
||||
});
|
||||
}
|
||||
}, [conversations, dispatch]);
|
||||
|
||||
const handleDeleteConversation = (id: string) => {
|
||||
fetch(`${apiHost}/api/delete_conversation?id=${id}`, {
|
||||
method: 'POST',
|
||||
})
|
||||
.then(() => {
|
||||
// remove the image element from the DOM
|
||||
const imageElement = document.querySelector(
|
||||
`#img-${id}`,
|
||||
) as HTMLElement;
|
||||
const parentElement = imageElement.parentNode as HTMLElement;
|
||||
parentElement.parentNode?.removeChild(parentElement);
|
||||
})
|
||||
.catch((error) => console.error(error));
|
||||
};
|
||||
|
||||
const handleDeleteClick = (index: number, doc: Doc) => {
|
||||
const docPath = 'indexes/' + 'local' + '/' + doc.name;
|
||||
|
||||
@@ -68,6 +103,22 @@ export default function Navigation({
|
||||
})
|
||||
.catch((error) => console.error(error));
|
||||
};
|
||||
|
||||
const handleConversationClick = (index: string) => {
|
||||
// fetch the conversation from the server and setConversation in the store
|
||||
fetch(`${apiHost}/api/get_single_conversation?id=${index}`, {
|
||||
method: 'GET',
|
||||
})
|
||||
.then((response) => response.json())
|
||||
.then((data) => {
|
||||
dispatch(setConversation(data));
|
||||
dispatch(
|
||||
updateConversationId({
|
||||
query: { conversationId: index },
|
||||
}),
|
||||
);
|
||||
});
|
||||
};
|
||||
useOutsideAlerter(
|
||||
navRef,
|
||||
() => {
|
||||
@@ -122,15 +173,56 @@ export default function Navigation({
|
||||
</div>
|
||||
<NavLink
|
||||
to={'/'}
|
||||
onClick={() => {
|
||||
dispatch(setConversation([]));
|
||||
dispatch(updateConversationId({ query: { conversationId: null } }));
|
||||
}}
|
||||
className={({ isActive }) =>
|
||||
`${
|
||||
isActive ? 'bg-gray-3000' : ''
|
||||
isActive && conversationId === null ? 'bg-gray-3000' : ''
|
||||
} my-auto mx-4 mt-4 flex h-12 cursor-pointer gap-4 rounded-md hover:bg-gray-100`
|
||||
}
|
||||
>
|
||||
<img src={Message} className="ml-2 w-5"></img>
|
||||
<p className="my-auto text-eerie-black">Chat</p>
|
||||
<p className="my-auto text-eerie-black">New Chat</p>
|
||||
</NavLink>
|
||||
<div className="conversations-container max-h-[25rem] overflow-y-auto">
|
||||
{conversations
|
||||
? conversations.map((conversation) => {
|
||||
return (
|
||||
<div
|
||||
key={conversation.id}
|
||||
onClick={() => {
|
||||
handleConversationClick(conversation.id);
|
||||
}}
|
||||
className={`my-auto mx-4 mt-4 flex h-12 cursor-pointer items-center justify-between gap-4 rounded-md hover:bg-gray-100 ${
|
||||
conversationId === conversation.id ? 'bg-gray-100' : ''
|
||||
}`}
|
||||
>
|
||||
<div className="flex gap-4">
|
||||
<img src={Message} className="ml-2 w-5"></img>
|
||||
<p className="my-auto text-eerie-black">
|
||||
{conversation.name}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{conversationId === conversation.id ? (
|
||||
<img
|
||||
src={Exit}
|
||||
alt="Exit"
|
||||
className="mr-4 h-3 w-3 cursor-pointer hover:opacity-50"
|
||||
id={`img-${conversation.id}`}
|
||||
onClick={(event) => {
|
||||
event.stopPropagation();
|
||||
handleDeleteConversation(conversation.id);
|
||||
}}
|
||||
/>
|
||||
) : null}
|
||||
</div>
|
||||
);
|
||||
})
|
||||
: null}
|
||||
</div>
|
||||
|
||||
<div className="flex-grow border-b-2 border-gray-100"></div>
|
||||
<div className="flex flex-col-reverse border-b-2">
|
||||
@@ -148,7 +240,7 @@ export default function Navigation({
|
||||
src={Arrow2}
|
||||
alt="arrow"
|
||||
className={`${
|
||||
isDocsListOpen ? 'rotate-0' : '-rotate-90'
|
||||
isDocsListOpen ? 'rotate-0' : 'rotate-180'
|
||||
} mr-3 w-3 transition-all`}
|
||||
/>
|
||||
</div>
|
||||
|
||||
@@ -60,6 +60,7 @@ export default function Conversation() {
|
||||
key={`${index}ANSWER`}
|
||||
message={query.response}
|
||||
type={'ANSWER'}
|
||||
sources={query.sources}
|
||||
feedback={query.feedback}
|
||||
handleFeedback={(feedback: FEEDBACK) =>
|
||||
handleFeedback(query, feedback, index)
|
||||
@@ -70,23 +71,26 @@ export default function Conversation() {
|
||||
return responseView;
|
||||
};
|
||||
|
||||
const handlePaste = (e: React.ClipboardEvent) => {
|
||||
e.preventDefault();
|
||||
const text = e.clipboardData.getData('text/plain');
|
||||
document.execCommand('insertText', false, text);
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex justify-center p-6">
|
||||
<div className="flex justify-center p-4">
|
||||
{queries.length > 0 && (
|
||||
<div className="mt-20 flex w-10/12 flex-col transition-all md:w-3/4">
|
||||
<div className="mt-20 flex flex-col transition-all md:w-3/4">
|
||||
{queries.map((query, index) => {
|
||||
return (
|
||||
<Fragment key={index}>
|
||||
<ConversationBubble
|
||||
ref={endMessageRef}
|
||||
className={`${
|
||||
index === queries.length - 1 && status === 'loading'
|
||||
? 'mb-24'
|
||||
: 'mb-7'
|
||||
}`}
|
||||
className={'mb-7'}
|
||||
key={`${index}QUESTION`}
|
||||
message={query.prompt}
|
||||
type="QUESTION"
|
||||
sources={query.sources}
|
||||
></ConversationBubble>
|
||||
{prepResponseView(query, index)}
|
||||
</Fragment>
|
||||
@@ -100,6 +104,7 @@ export default function Conversation() {
|
||||
<div
|
||||
ref={inputRef}
|
||||
contentEditable
|
||||
onPaste={handlePaste}
|
||||
className={`border-000000 overflow-x-hidden; max-h-24 min-h-[2.6rem] w-full overflow-y-auto whitespace-pre-wrap rounded-xl border bg-white py-2 pl-4 pr-9 leading-7 opacity-100 focus:outline-none`}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Enter' && !e.shiftKey) {
|
||||
|
||||
@@ -4,6 +4,11 @@ import { FEEDBACK, MESSAGE_TYPE } from './conversationModels';
|
||||
import Alert from './../assets/alert.svg';
|
||||
import { ReactComponent as Like } from './../assets/like.svg';
|
||||
import { ReactComponent as Dislike } from './../assets/dislike.svg';
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
|
||||
import { vscDarkPlus } from 'react-syntax-highlighter/dist/cjs/styles/prism';
|
||||
|
||||
const DisableSourceFE = import.meta.env.VITE_DISABLE_SOURCE_FE || false;
|
||||
|
||||
const ConversationBubble = forwardRef<
|
||||
HTMLDivElement,
|
||||
@@ -13,20 +18,34 @@ const ConversationBubble = forwardRef<
|
||||
className?: string;
|
||||
feedback?: FEEDBACK;
|
||||
handleFeedback?: (feedback: FEEDBACK) => void;
|
||||
sources?: { title: string; text: string }[];
|
||||
}
|
||||
>(function ConversationBubble(
|
||||
{ message, type, className, feedback, handleFeedback },
|
||||
{ message, type, className, feedback, handleFeedback, sources },
|
||||
ref,
|
||||
) {
|
||||
const [showFeedback, setShowFeedback] = useState(false);
|
||||
const [openSource, setOpenSource] = useState<number | null>(null);
|
||||
const List = ({
|
||||
ordered,
|
||||
children,
|
||||
}: {
|
||||
ordered?: boolean;
|
||||
children: React.ReactNode;
|
||||
}) => {
|
||||
const Tag = ordered ? 'ol' : 'ul';
|
||||
return <Tag className="list-inside list-disc">{children}</Tag>;
|
||||
};
|
||||
let bubble;
|
||||
|
||||
if (type === 'QUESTION') {
|
||||
bubble = (
|
||||
<div ref={ref} className={`flex flex-row-reverse self-end ${className}`}>
|
||||
<Avatar className="mt-4 text-2xl" avatar="🧑💻"></Avatar>
|
||||
<div className="mr-2 ml-10 flex items-center rounded-3xl bg-blue-1000 py-5 px-5 text-white">
|
||||
<p className="whitespace-pre-wrap break-words">{message}</p>
|
||||
<Avatar className="mt-2 text-2xl" avatar="🧑💻"></Avatar>
|
||||
<div className="mr-2 ml-10 flex items-center rounded-3xl bg-blue-1000 p-3.5 text-white">
|
||||
<ReactMarkdown className="whitespace-pre-wrap break-words">
|
||||
{message}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
@@ -34,55 +53,118 @@ const ConversationBubble = forwardRef<
|
||||
bubble = (
|
||||
<div
|
||||
ref={ref}
|
||||
className={`flex self-start ${className}`}
|
||||
className={`flex self-start ${className} flex-col`}
|
||||
onMouseEnter={() => setShowFeedback(true)}
|
||||
onMouseLeave={() => setShowFeedback(false)}
|
||||
>
|
||||
<Avatar className="mt-4 text-2xl" avatar="🦖"></Avatar>
|
||||
<div
|
||||
className={`ml-2 mr-5 flex items-center rounded-3xl bg-gray-1000 py-5 px-5 ${
|
||||
type === 'ERROR'
|
||||
? ' rounded-lg border border-red-2000 bg-red-1000 p-2 text-red-3000'
|
||||
: ''
|
||||
}`}
|
||||
>
|
||||
{type === 'ERROR' && (
|
||||
<img src={Alert} alt="alert" className="mr-2 inline" />
|
||||
)}
|
||||
<p className="whitespace-pre-wrap break-words">{message}</p>
|
||||
</div>
|
||||
<div
|
||||
className={`mr-2 flex items-center justify-center ${
|
||||
feedback === 'LIKE' || (type !== 'ERROR' && showFeedback)
|
||||
? ''
|
||||
: 'md:invisible'
|
||||
}`}
|
||||
>
|
||||
<Like
|
||||
className={`cursor-pointer ${
|
||||
feedback === 'LIKE'
|
||||
? 'fill-blue-1000 stroke-blue-1000'
|
||||
: 'fill-none stroke-gray-4000 hover:fill-gray-4000'
|
||||
<div className="flex self-start">
|
||||
<Avatar className="mt-2 text-2xl" avatar="🦖"></Avatar>
|
||||
<div
|
||||
className={`ml-2 mr-5 flex items-center rounded-3xl bg-gray-1000 p-3.5 ${
|
||||
type === 'ERROR'
|
||||
? ' rounded-lg border border-red-2000 bg-red-1000 p-2 text-red-3000'
|
||||
: ''
|
||||
}`}
|
||||
onClick={() => handleFeedback?.('LIKE')}
|
||||
></Like>
|
||||
</div>
|
||||
<div
|
||||
className={`mr-10 flex items-center justify-center ${
|
||||
feedback === 'DISLIKE' || (type !== 'ERROR' && showFeedback)
|
||||
? ''
|
||||
: 'md:invisible'
|
||||
}`}
|
||||
>
|
||||
<Dislike
|
||||
className={`cursor-pointer ${
|
||||
feedback === 'DISLIKE'
|
||||
? 'fill-red-2000 stroke-red-2000'
|
||||
: 'fill-none stroke-gray-4000 hover:fill-gray-4000'
|
||||
>
|
||||
{type === 'ERROR' && (
|
||||
<img src={Alert} alt="alert" className="mr-2 inline" />
|
||||
)}
|
||||
<ReactMarkdown
|
||||
className="whitespace-pre-wrap break-words"
|
||||
components={{
|
||||
code({ node, inline, className, children, ...props }) {
|
||||
const match = /language-(\w+)/.exec(className || '');
|
||||
|
||||
return !inline && match ? (
|
||||
<SyntaxHighlighter
|
||||
PreTag="div"
|
||||
language={match[1]}
|
||||
{...props}
|
||||
style={vscDarkPlus}
|
||||
>
|
||||
{String(children).replace(/\n$/, '')}
|
||||
</SyntaxHighlighter>
|
||||
) : (
|
||||
<code className={className ? className : ''} {...props}>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
},
|
||||
ul({ node, children }) {
|
||||
return <List>{children}</List>;
|
||||
},
|
||||
ol({ node, children }) {
|
||||
return <List ordered>{children}</List>;
|
||||
},
|
||||
}}
|
||||
>
|
||||
{message}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
<div
|
||||
className={`mr-2 flex items-center justify-center ${
|
||||
feedback === 'LIKE' || (type !== 'ERROR' && showFeedback)
|
||||
? ''
|
||||
: 'md:invisible'
|
||||
}`}
|
||||
onClick={() => handleFeedback?.('DISLIKE')}
|
||||
></Dislike>
|
||||
>
|
||||
<Like
|
||||
className={`cursor-pointer ${
|
||||
feedback === 'LIKE'
|
||||
? 'fill-blue-1000 stroke-blue-1000'
|
||||
: 'fill-none stroke-gray-4000 hover:fill-gray-4000'
|
||||
}`}
|
||||
onClick={() => handleFeedback?.('LIKE')}
|
||||
></Like>
|
||||
</div>
|
||||
<div
|
||||
className={`mr-10 flex items-center justify-center ${
|
||||
feedback === 'DISLIKE' || (type !== 'ERROR' && showFeedback)
|
||||
? ''
|
||||
: 'md:invisible'
|
||||
}`}
|
||||
>
|
||||
<Dislike
|
||||
className={`cursor-pointer ${
|
||||
feedback === 'DISLIKE'
|
||||
? 'fill-red-2000 stroke-red-2000'
|
||||
: 'fill-none stroke-gray-4000 hover:fill-gray-4000'
|
||||
}`}
|
||||
onClick={() => handleFeedback?.('DISLIKE')}
|
||||
></Dislike>
|
||||
</div>
|
||||
</div>
|
||||
<div className="ml-8 mt-2 grid w-1/2 grid-cols-3 gap-2">
|
||||
{DisableSourceFE
|
||||
? null
|
||||
: sources?.map((source, index) => (
|
||||
<div
|
||||
key={index}
|
||||
className="w-26 cursor-pointer rounded-xl border border-gray-200 py-1 px-2 hover:bg-gray-100"
|
||||
onClick={() =>
|
||||
setOpenSource(openSource === index ? null : index)
|
||||
}
|
||||
>
|
||||
<p className="truncate text-xs text-gray-500">
|
||||
{index + 1}. {source.title}
|
||||
</p>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{sources && openSource !== null && sources[openSource] && (
|
||||
<div className="ml-8 mt-2 w-3/4 rounded-xl bg-blue-200 p-2">
|
||||
<p className="w-3/4 truncate text-xs text-gray-500">
|
||||
Source: {sources[openSource].title}
|
||||
</p>
|
||||
|
||||
<div className="rounded-xl border-2 border-gray-200 bg-white p-2">
|
||||
<p className="text-xs text-gray-500 ">
|
||||
{sources[openSource].text}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -7,6 +7,89 @@ export function fetchAnswerApi(
|
||||
question: string,
|
||||
apiKey: string,
|
||||
selectedDocs: Doc,
|
||||
history: Array<any> = [],
|
||||
conversationId: string | null,
|
||||
): Promise<
|
||||
| {
|
||||
result: any;
|
||||
answer: any;
|
||||
sources: any;
|
||||
conversationId: any;
|
||||
query: string;
|
||||
}
|
||||
| {
|
||||
result: any;
|
||||
answer: any;
|
||||
sources: any;
|
||||
query: string;
|
||||
conversationId: any;
|
||||
title: any;
|
||||
}
|
||||
> {
|
||||
let namePath = selectedDocs.name;
|
||||
if (selectedDocs.language === namePath) {
|
||||
namePath = '.project';
|
||||
}
|
||||
|
||||
let docPath = 'default';
|
||||
if (selectedDocs.location === 'local') {
|
||||
docPath = 'local' + '/' + selectedDocs.name + '/';
|
||||
} else if (selectedDocs.location === 'remote') {
|
||||
docPath =
|
||||
selectedDocs.language +
|
||||
'/' +
|
||||
namePath +
|
||||
'/' +
|
||||
selectedDocs.version +
|
||||
'/' +
|
||||
selectedDocs.model +
|
||||
'/';
|
||||
}
|
||||
//in history array remove all keys except prompt and response
|
||||
history = history.map((item) => {
|
||||
return { prompt: item.prompt, response: item.response };
|
||||
});
|
||||
|
||||
return fetch(apiHost + '/api/answer', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
question: question,
|
||||
api_key: apiKey,
|
||||
embeddings_key: apiKey,
|
||||
history: history,
|
||||
active_docs: docPath,
|
||||
conversation_id: conversationId,
|
||||
}),
|
||||
})
|
||||
.then((response) => {
|
||||
if (response.ok) {
|
||||
return response.json();
|
||||
} else {
|
||||
return Promise.reject(new Error(response.statusText));
|
||||
}
|
||||
})
|
||||
.then((data) => {
|
||||
const result = data.answer;
|
||||
return {
|
||||
answer: result,
|
||||
query: question,
|
||||
result,
|
||||
sources: data.sources,
|
||||
conversationId: data.conversation_id,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export function fetchAnswerSteaming(
|
||||
question: string,
|
||||
apiKey: string,
|
||||
selectedDocs: Doc,
|
||||
history: Array<any> = [],
|
||||
conversationId: string | null,
|
||||
onEvent: (event: MessageEvent) => void,
|
||||
): Promise<Answer> {
|
||||
let namePath = selectedDocs.name;
|
||||
if (selectedDocs.language === namePath) {
|
||||
@@ -28,30 +111,73 @@ export function fetchAnswerApi(
|
||||
'/';
|
||||
}
|
||||
|
||||
return fetch(apiHost + '/api/answer', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
history = history.map((item) => {
|
||||
return { prompt: item.prompt, response: item.response };
|
||||
});
|
||||
|
||||
return new Promise<Answer>((resolve, reject) => {
|
||||
const body = {
|
||||
question: question,
|
||||
api_key: apiKey,
|
||||
embeddings_key: apiKey,
|
||||
history: localStorage.getItem('chatHistory'),
|
||||
active_docs: docPath,
|
||||
}),
|
||||
})
|
||||
.then((response) => {
|
||||
if (response.ok) {
|
||||
return response.json();
|
||||
} else {
|
||||
Promise.reject(response);
|
||||
}
|
||||
history: JSON.stringify(history),
|
||||
conversation_id: conversationId,
|
||||
};
|
||||
|
||||
fetch(apiHost + '/stream', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
})
|
||||
.then((data) => {
|
||||
const result = data.answer;
|
||||
return { answer: result, query: question, result };
|
||||
});
|
||||
.then((response) => {
|
||||
if (!response.body) throw Error('No response body');
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder('utf-8');
|
||||
let counterrr = 0;
|
||||
const processStream = ({
|
||||
done,
|
||||
value,
|
||||
}: ReadableStreamReadResult<Uint8Array>) => {
|
||||
if (done) {
|
||||
console.log(counterrr);
|
||||
return;
|
||||
}
|
||||
|
||||
counterrr += 1;
|
||||
|
||||
const chunk = decoder.decode(value);
|
||||
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (let line of lines) {
|
||||
if (line.trim() == '') {
|
||||
continue;
|
||||
}
|
||||
if (line.startsWith('data:')) {
|
||||
line = line.substring(5);
|
||||
}
|
||||
|
||||
const messageEvent: MessageEvent = new MessageEvent('message', {
|
||||
data: line,
|
||||
});
|
||||
|
||||
onEvent(messageEvent); // handle each message
|
||||
}
|
||||
|
||||
reader.read().then(processStream).catch(reject);
|
||||
};
|
||||
|
||||
reader.read().then(processStream).catch(reject);
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Connection failed:', error);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export function sendFeedback(
|
||||
|
||||
@@ -10,12 +10,16 @@ export interface Message {
|
||||
export interface ConversationState {
|
||||
queries: Query[];
|
||||
status: Status;
|
||||
conversationId: string | null;
|
||||
}
|
||||
|
||||
export interface Answer {
|
||||
answer: string;
|
||||
query: string;
|
||||
result: string;
|
||||
sources: { title: string; text: string }[];
|
||||
conversationId: string | null;
|
||||
title: string | null;
|
||||
}
|
||||
|
||||
export interface Query {
|
||||
@@ -23,4 +27,7 @@ export interface Query {
|
||||
response?: string;
|
||||
feedback?: FEEDBACK;
|
||||
error?: string;
|
||||
sources?: { title: string; text: string }[];
|
||||
conversationId?: string | null;
|
||||
title?: string | null;
|
||||
}
|
||||
|
||||
@@ -1,27 +1,132 @@
|
||||
import { createAsyncThunk, createSlice, PayloadAction } from '@reduxjs/toolkit';
|
||||
import store from '../store';
|
||||
import { fetchAnswerApi } from './conversationApi';
|
||||
import { Answer, ConversationState, Query } from './conversationModels';
|
||||
import { fetchAnswerApi, fetchAnswerSteaming } from './conversationApi';
|
||||
import { Answer, ConversationState, Query, Status } from './conversationModels';
|
||||
import { getConversations } from '../preferences/preferenceApi';
|
||||
import { setConversations } from '../preferences/preferenceSlice';
|
||||
|
||||
const initialState: ConversationState = {
|
||||
queries: [],
|
||||
status: 'idle',
|
||||
conversationId: null,
|
||||
};
|
||||
|
||||
export const fetchAnswer = createAsyncThunk<
|
||||
Answer,
|
||||
{ question: string },
|
||||
{ state: RootState }
|
||||
>('fetchAnswer', async ({ question }, { getState }) => {
|
||||
const state = getState();
|
||||
const API_STREAMING = import.meta.env.VITE_API_STREAMING === 'true';
|
||||
|
||||
const answer = await fetchAnswerApi(
|
||||
question,
|
||||
state.preference.apiKey,
|
||||
state.preference.selectedDocs!,
|
||||
);
|
||||
return answer;
|
||||
});
|
||||
export const fetchAnswer = createAsyncThunk<Answer, { question: string }>(
|
||||
'fetchAnswer',
|
||||
async ({ question }, { dispatch, getState }) => {
|
||||
const state = getState() as RootState;
|
||||
if (state.preference) {
|
||||
if (API_STREAMING) {
|
||||
await fetchAnswerSteaming(
|
||||
question,
|
||||
state.preference.apiKey,
|
||||
state.preference.selectedDocs!,
|
||||
state.conversation.queries,
|
||||
state.conversation.conversationId,
|
||||
(event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
|
||||
// check if the 'end' event has been received
|
||||
if (data.type === 'end') {
|
||||
// set status to 'idle'
|
||||
dispatch(conversationSlice.actions.setStatus('idle'));
|
||||
getConversations()
|
||||
.then((fetchedConversations) => {
|
||||
dispatch(setConversations(fetchedConversations));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Failed to fetch conversations: ', error);
|
||||
});
|
||||
} else if (data.type === 'source') {
|
||||
// check if data.metadata exists
|
||||
let result;
|
||||
if (data.metadata && data.metadata.title) {
|
||||
const titleParts = data.metadata.title.split('/');
|
||||
result = {
|
||||
title: titleParts[titleParts.length - 1],
|
||||
text: data.doc,
|
||||
};
|
||||
} else {
|
||||
result = { title: data.doc, text: data.doc };
|
||||
}
|
||||
dispatch(
|
||||
updateStreamingSource({
|
||||
index: state.conversation.queries.length - 1,
|
||||
query: { sources: [result] },
|
||||
}),
|
||||
);
|
||||
} else if (data.type === 'id') {
|
||||
dispatch(
|
||||
updateConversationId({
|
||||
query: { conversationId: data.id },
|
||||
}),
|
||||
);
|
||||
} else {
|
||||
const result = data.answer;
|
||||
dispatch(
|
||||
updateStreamingQuery({
|
||||
index: state.conversation.queries.length - 1,
|
||||
query: { response: result },
|
||||
}),
|
||||
);
|
||||
}
|
||||
},
|
||||
);
|
||||
} else {
|
||||
const answer = await fetchAnswerApi(
|
||||
question,
|
||||
state.preference.apiKey,
|
||||
state.preference.selectedDocs!,
|
||||
state.conversation.queries,
|
||||
state.conversation.conversationId,
|
||||
);
|
||||
if (answer) {
|
||||
let sourcesPrepped = [];
|
||||
sourcesPrepped = answer.sources.map((source: { title: string }) => {
|
||||
if (source && source.title) {
|
||||
const titleParts = source.title.split('/');
|
||||
return {
|
||||
...source,
|
||||
title: titleParts[titleParts.length - 1],
|
||||
};
|
||||
}
|
||||
return source;
|
||||
});
|
||||
|
||||
dispatch(
|
||||
updateQuery({
|
||||
index: state.conversation.queries.length - 1,
|
||||
query: { response: answer.answer, sources: sourcesPrepped },
|
||||
}),
|
||||
);
|
||||
dispatch(
|
||||
updateConversationId({
|
||||
query: { conversationId: answer.conversationId },
|
||||
}),
|
||||
);
|
||||
dispatch(conversationSlice.actions.setStatus('idle'));
|
||||
getConversations()
|
||||
.then((fetchedConversations) => {
|
||||
dispatch(setConversations(fetchedConversations));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Failed to fetch conversations: ', error);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return {
|
||||
conversationId: null,
|
||||
title: null,
|
||||
answer: '',
|
||||
query: question,
|
||||
result: '',
|
||||
sources: [],
|
||||
};
|
||||
},
|
||||
);
|
||||
|
||||
export const conversationSlice = createSlice({
|
||||
name: 'conversation',
|
||||
@@ -30,6 +135,41 @@ export const conversationSlice = createSlice({
|
||||
addQuery(state, action: PayloadAction<Query>) {
|
||||
state.queries.push(action.payload);
|
||||
},
|
||||
setConversation(state, action: PayloadAction<Query[]>) {
|
||||
state.queries = action.payload;
|
||||
},
|
||||
updateStreamingQuery(
|
||||
state,
|
||||
action: PayloadAction<{ index: number; query: Partial<Query> }>,
|
||||
) {
|
||||
const index = action.payload.index;
|
||||
if (action.payload.query.response) {
|
||||
state.queries[index].response =
|
||||
(state.queries[index].response || '') + action.payload.query.response;
|
||||
} else {
|
||||
state.queries[index] = {
|
||||
...state.queries[index],
|
||||
...action.payload.query,
|
||||
};
|
||||
}
|
||||
},
|
||||
updateConversationId(
|
||||
state,
|
||||
action: PayloadAction<{ query: Partial<Query> }>,
|
||||
) {
|
||||
state.conversationId = action.payload.query.conversationId ?? null;
|
||||
},
|
||||
updateStreamingSource(
|
||||
state,
|
||||
action: PayloadAction<{ index: number; query: Partial<Query> }>,
|
||||
) {
|
||||
const index = action.payload.index;
|
||||
if (!state.queries[index].sources) {
|
||||
state.queries[index].sources = [action.payload.query.sources![0]];
|
||||
} else {
|
||||
state.queries[index].sources!.push(action.payload.query.sources![0]);
|
||||
}
|
||||
},
|
||||
updateQuery(
|
||||
state,
|
||||
action: PayloadAction<{ index: number; query: Partial<Query> }>,
|
||||
@@ -40,17 +180,15 @@ export const conversationSlice = createSlice({
|
||||
...action.payload.query,
|
||||
};
|
||||
},
|
||||
setStatus(state, action: PayloadAction<Status>) {
|
||||
state.status = action.payload;
|
||||
},
|
||||
},
|
||||
extraReducers(builder) {
|
||||
builder
|
||||
.addCase(fetchAnswer.pending, (state) => {
|
||||
state.status = 'loading';
|
||||
})
|
||||
.addCase(fetchAnswer.fulfilled, (state, action) => {
|
||||
state.status = 'idle';
|
||||
state.queries[state.queries.length - 1].response =
|
||||
action.payload.answer;
|
||||
})
|
||||
.addCase(fetchAnswer.rejected, (state, action) => {
|
||||
state.status = 'failed';
|
||||
state.queries[state.queries.length - 1].error =
|
||||
@@ -65,5 +203,12 @@ export const selectQueries = (state: RootState) => state.conversation.queries;
|
||||
|
||||
export const selectStatus = (state: RootState) => state.conversation.status;
|
||||
|
||||
export const { addQuery, updateQuery } = conversationSlice.actions;
|
||||
export const {
|
||||
addQuery,
|
||||
updateQuery,
|
||||
updateStreamingQuery,
|
||||
updateConversationId,
|
||||
updateStreamingSource,
|
||||
setConversation,
|
||||
} = conversationSlice.actions;
|
||||
export default conversationSlice.reducer;
|
||||
|
||||
@@ -3,6 +3,7 @@ import { useDispatch, useSelector } from 'react-redux';
|
||||
import { ActiveState } from '../models/misc';
|
||||
import { selectApiKey, setApiKey } from './preferenceSlice';
|
||||
import { useOutsideAlerter } from './../hooks';
|
||||
import Modal from '../Modal';
|
||||
|
||||
export default function APIKeyModal({
|
||||
modalState,
|
||||
@@ -49,53 +50,35 @@ export default function APIKeyModal({
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`${
|
||||
modalState === 'ACTIVE' ? 'visible' : 'hidden'
|
||||
} absolute z-30 h-screen w-screen bg-gray-alpha`}
|
||||
>
|
||||
<article
|
||||
ref={modalRef}
|
||||
className="mx-auto mt-24 flex w-[90vw] max-w-lg flex-col gap-4 rounded-lg bg-white p-6 shadow-lg"
|
||||
>
|
||||
<p className="text-xl text-jet">OpenAI API Key</p>
|
||||
<p className="text-md leading-6 text-gray-500">
|
||||
Before you can start using DocsGPT we need you to provide an API key
|
||||
for llm. Currently, we support only OpenAI but soon many more. You can
|
||||
find it here.
|
||||
</p>
|
||||
<input
|
||||
type="text"
|
||||
className="h-10 w-full border-b-2 border-jet focus:outline-none"
|
||||
value={key}
|
||||
maxLength={100}
|
||||
placeholder="API Key"
|
||||
onChange={(e) => setKey(e.target.value)}
|
||||
/>
|
||||
<div className="flex flex-row-reverse">
|
||||
<div>
|
||||
<button
|
||||
onClick={() => handleSubmit()}
|
||||
className="ml-auto h-10 w-20 rounded-lg bg-violet-800 text-white transition-all hover:bg-violet-700"
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
{isCancellable && (
|
||||
<button
|
||||
onClick={() => handleCancel()}
|
||||
className="ml-5 h-10 w-20 rounded-lg border border-violet-700 bg-white text-violet-800 transition-all hover:bg-violet-700 hover:text-white"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{isError && (
|
||||
<p className="mr-auto text-sm text-red-500">
|
||||
Please enter a valid API key
|
||||
<Modal
|
||||
handleCancel={handleCancel}
|
||||
isError={isError}
|
||||
modalState={modalState}
|
||||
isCancellable={isCancellable}
|
||||
handleSubmit={handleSubmit}
|
||||
render={() => {
|
||||
return (
|
||||
<article
|
||||
ref={modalRef}
|
||||
className="mx-auto mt-24 flex w-[90vw] max-w-lg flex-col gap-4 rounded-lg bg-white p-6 shadow-lg"
|
||||
>
|
||||
<p className="text-xl text-jet">OpenAI API Key</p>
|
||||
<p className="text-md leading-6 text-gray-500">
|
||||
Before you can start using DocsGPT we need you to provide an API
|
||||
key for llm. Currently, we support only OpenAI but soon many more.
|
||||
You can find it here.
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</article>
|
||||
</div>
|
||||
<input
|
||||
type="text"
|
||||
className="h-10 w-full border-b-2 border-jet focus:outline-none"
|
||||
value={key}
|
||||
maxLength={100}
|
||||
placeholder="API Key"
|
||||
onChange={(e) => setKey(e.target.value)}
|
||||
/>
|
||||
</article>
|
||||
);
|
||||
}}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { useEffect, useState } from 'react';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
import { ActiveState } from '../models/misc';
|
||||
import Modal from '../Modal';
|
||||
import {
|
||||
setSelectedDocs,
|
||||
setSourceDocs,
|
||||
@@ -50,85 +51,67 @@ export default function APIKeyModal({
|
||||
|
||||
requestDocs();
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<div
|
||||
className={`${
|
||||
modalState === 'ACTIVE' ? 'visible' : 'hidden'
|
||||
} absolute z-30 h-screen w-screen bg-gray-alpha`}
|
||||
>
|
||||
<article className="mx-auto mt-24 flex w-[90vw] max-w-lg flex-col gap-4 rounded-lg bg-white p-6 shadow-lg">
|
||||
<p className="text-xl text-jet">Select Source Documentation</p>
|
||||
<p className="text-lg leading-5 text-gray-500">
|
||||
Please select the library of documentation that you would like to use
|
||||
with our app.
|
||||
</p>
|
||||
<div className="relative">
|
||||
<div
|
||||
className="h-10 w-full cursor-pointer border-b-2"
|
||||
onClick={() => setIsDocsListOpen(!isDocsListOpen)}
|
||||
>
|
||||
{!localSelectedDocs ? (
|
||||
<p className="py-3 text-gray-500">Select</p>
|
||||
) : (
|
||||
<p className="py-3">
|
||||
{localSelectedDocs.name} {localSelectedDocs.version}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
{isDocsListOpen && (
|
||||
<div className="absolute top-10 left-0 max-h-52 w-full overflow-y-scroll bg-white">
|
||||
{docs ? (
|
||||
docs.map((doc, index) => {
|
||||
if (doc.model) {
|
||||
return (
|
||||
<div
|
||||
key={index}
|
||||
onClick={() => {
|
||||
setLocalSelectedDocs(doc);
|
||||
setIsDocsListOpen(false);
|
||||
}}
|
||||
className="h-10 w-full cursor-pointer border-x-2 border-b-2 hover:bg-gray-100"
|
||||
>
|
||||
<p className="ml-5 py-3">
|
||||
{doc.name} {doc.version}
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
})
|
||||
) : (
|
||||
<div className="h-10 w-full cursor-pointer border-x-2 border-b-2 hover:bg-gray-100">
|
||||
<p className="ml-5 py-3">No default documentation.</p>
|
||||
<Modal
|
||||
handleSubmit={handleSubmit}
|
||||
isCancellable={isCancellable}
|
||||
handleCancel={handleCancel}
|
||||
modalState={modalState}
|
||||
errorMessage="Please select Source Documentation"
|
||||
isError={isError}
|
||||
render={() => {
|
||||
return (
|
||||
<article className="mx-auto mt-24 flex w-[90vw] max-w-lg flex-col gap-4 rounded-lg bg-white p-6 shadow-lg">
|
||||
<p className="text-xl text-jet">Select Source Documentation</p>
|
||||
<p className="text-lg leading-5 text-gray-500">
|
||||
Please select the library of documentation that you would like to
|
||||
use with our app.
|
||||
</p>
|
||||
<div className="relative">
|
||||
<div
|
||||
className="h-10 w-full cursor-pointer border-b-2"
|
||||
onClick={() => setIsDocsListOpen(!isDocsListOpen)}
|
||||
>
|
||||
{!localSelectedDocs ? (
|
||||
<p className="py-3 text-gray-500">Select</p>
|
||||
) : (
|
||||
<p className="py-3">
|
||||
{localSelectedDocs.name} {localSelectedDocs.version}
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
{isDocsListOpen && (
|
||||
<div className="absolute top-10 left-0 max-h-52 w-full overflow-y-scroll bg-white">
|
||||
{docs ? (
|
||||
docs.map((doc, index) => {
|
||||
if (doc.model) {
|
||||
return (
|
||||
<div
|
||||
key={index}
|
||||
onClick={() => {
|
||||
setLocalSelectedDocs(doc);
|
||||
setIsDocsListOpen(false);
|
||||
}}
|
||||
className="h-10 w-full cursor-pointer border-x-2 border-b-2 hover:bg-gray-100"
|
||||
>
|
||||
<p className="ml-5 py-3">
|
||||
{doc.name} {doc.version}
|
||||
</p>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
})
|
||||
) : (
|
||||
<div className="h-10 w-full cursor-pointer border-x-2 border-b-2 hover:bg-gray-100">
|
||||
<p className="ml-5 py-3">No default documentation.</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<div className="flex flex-row-reverse">
|
||||
{isCancellable && (
|
||||
<button
|
||||
onClick={() => handleCancel()}
|
||||
className="ml-5 h-10 w-20 rounded-lg border border-violet-700 bg-white text-violet-800 transition-all hover:bg-violet-700 hover:text-white"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
)}
|
||||
<button
|
||||
onClick={() => {
|
||||
handleSubmit();
|
||||
}}
|
||||
className="ml-auto h-10 w-20 rounded-lg bg-violet-800 text-white transition-all hover:bg-violet-700"
|
||||
>
|
||||
Save
|
||||
</button>
|
||||
{isError && (
|
||||
<p className="mr-auto text-sm text-red-500">
|
||||
Please select source documentation.
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</article>
|
||||
</div>
|
||||
</article>
|
||||
);
|
||||
}}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -33,6 +33,29 @@ export async function getDocs(): Promise<Doc[] | null> {
|
||||
}
|
||||
}
|
||||
|
||||
export async function getConversations(): Promise<
|
||||
{ name: string; id: string }[] | null
|
||||
> {
|
||||
try {
|
||||
const apiHost =
|
||||
import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
|
||||
|
||||
const response = await fetch(apiHost + '/api/get_conversations');
|
||||
const data = await response.json();
|
||||
|
||||
const conversations: { name: string; id: string }[] = [];
|
||||
|
||||
data.forEach((conversation: object) => {
|
||||
conversations.push(conversation as { name: string; id: string });
|
||||
});
|
||||
|
||||
return conversations;
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export function getLocalApiKey(): string | null {
|
||||
const key = localStorage.getItem('DocsGPTApiKey');
|
||||
return key;
|
||||
|
||||
@@ -10,12 +10,24 @@ interface Preference {
|
||||
apiKey: string;
|
||||
selectedDocs: Doc | null;
|
||||
sourceDocs: Doc[] | null;
|
||||
conversations: { name: string; id: string }[] | null;
|
||||
}
|
||||
|
||||
const initialState: Preference = {
|
||||
apiKey: '',
|
||||
selectedDocs: null,
|
||||
apiKey: 'xxx',
|
||||
selectedDocs: {
|
||||
name: 'default',
|
||||
language: 'default',
|
||||
location: 'default',
|
||||
version: 'default',
|
||||
description: 'default',
|
||||
fullName: 'default',
|
||||
dat: 'default',
|
||||
docLink: 'default',
|
||||
model: 'openai_text-embedding-ada-002',
|
||||
} as Doc,
|
||||
sourceDocs: null,
|
||||
conversations: null,
|
||||
};
|
||||
|
||||
export const prefSlice = createSlice({
|
||||
@@ -29,12 +41,16 @@ export const prefSlice = createSlice({
|
||||
state.selectedDocs = action.payload;
|
||||
},
|
||||
setSourceDocs: (state, action) => {
|
||||
state.sourceDocs?.push(...action.payload);
|
||||
state.sourceDocs = action.payload;
|
||||
},
|
||||
setConversations: (state, action) => {
|
||||
state.conversations = action.payload;
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
export const { setApiKey, setSelectedDocs, setSourceDocs } = prefSlice.actions;
|
||||
export const { setApiKey, setSelectedDocs, setSourceDocs, setConversations } =
|
||||
prefSlice.actions;
|
||||
export default prefSlice.reducer;
|
||||
|
||||
export const prefListenerMiddleware = createListenerMiddleware();
|
||||
@@ -64,3 +80,7 @@ export const selectSourceDocs = (state: RootState) =>
|
||||
state.preference.sourceDocs;
|
||||
export const selectSelectedDocs = (state: RootState) =>
|
||||
state.preference.selectedDocs;
|
||||
export const selectConversations = (state: RootState) =>
|
||||
state.preference.conversations;
|
||||
export const selectConversationId = (state: RootState) =>
|
||||
state.conversation.conversationId;
|
||||
|
||||
@@ -13,6 +13,7 @@ const store = configureStore({
|
||||
preference: {
|
||||
apiKey: key ?? '',
|
||||
selectedDocs: doc !== null ? JSON.parse(doc) : null,
|
||||
conversations: null,
|
||||
sourceDocs: [
|
||||
{
|
||||
location: '',
|
||||
|
||||
@@ -19,20 +19,27 @@ export default function Upload({
|
||||
type: 'UPLOAD' | 'TRAINIING';
|
||||
percentage: number;
|
||||
taskId?: string;
|
||||
failed?: boolean;
|
||||
}>();
|
||||
|
||||
function Progress({
|
||||
title,
|
||||
isCancellable = false,
|
||||
isFailed = false,
|
||||
}: {
|
||||
title: string;
|
||||
isCancellable?: boolean;
|
||||
isFailed?: boolean;
|
||||
}) {
|
||||
return (
|
||||
<div className="mt-5 flex flex-col items-center gap-2">
|
||||
<p className="text-xl tracking-[0.15px]">{title}...</p>
|
||||
<p className="text-sm text-gray-2000">This may take several minutes</p>
|
||||
<p className={`ml-5 text-xl text-red-400 ${isFailed ? '' : 'hidden'}`}>
|
||||
Over the token limit, please consider uploading smaller document
|
||||
</p>
|
||||
<p className="mt-10 text-2xl">{progress?.percentage || 0}%</p>
|
||||
|
||||
<div className="mb-10 w-[50%]">
|
||||
<div className="h-1 w-[100%] bg-blue-4000"></div>
|
||||
<div
|
||||
@@ -40,6 +47,7 @@ export default function Upload({
|
||||
style={{ width: `${progress?.percentage || 0}%` }}
|
||||
></div>
|
||||
</div>
|
||||
|
||||
<button
|
||||
onClick={() => {
|
||||
setDocName('');
|
||||
@@ -71,11 +79,28 @@ export default function Upload({
|
||||
.then((data) => data.json())
|
||||
.then((data) => {
|
||||
if (data.status == 'SUCCESS') {
|
||||
getDocs().then((data) => dispatch(setSourceDocs(data)));
|
||||
setProgress(
|
||||
(progress) => progress && { ...progress, percentage: 100 },
|
||||
);
|
||||
} else {
|
||||
if (data.result.limited === true) {
|
||||
getDocs().then((data) => dispatch(setSourceDocs(data)));
|
||||
setProgress(
|
||||
(progress) =>
|
||||
progress && {
|
||||
...progress,
|
||||
percentage: 100,
|
||||
failed: true,
|
||||
},
|
||||
);
|
||||
} else {
|
||||
getDocs().then((data) => dispatch(setSourceDocs(data)));
|
||||
setProgress(
|
||||
(progress) =>
|
||||
progress && {
|
||||
...progress,
|
||||
percentage: 100,
|
||||
failed: false,
|
||||
},
|
||||
);
|
||||
}
|
||||
} else if (data.status == 'PROGRESS') {
|
||||
setProgress(
|
||||
(progress) =>
|
||||
progress && {
|
||||
@@ -91,6 +116,7 @@ export default function Upload({
|
||||
<Progress
|
||||
title="Training is in progress"
|
||||
isCancellable={progress?.percentage === 100}
|
||||
isFailed={progress?.failed === true}
|
||||
></Progress>
|
||||
);
|
||||
}
|
||||
@@ -125,10 +151,18 @@ export default function Upload({
|
||||
|
||||
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
||||
onDrop,
|
||||
multiple: true,
|
||||
multiple: false,
|
||||
onDragEnter: doNothing,
|
||||
onDragOver: doNothing,
|
||||
onDragLeave: doNothing,
|
||||
maxSize: 25000000,
|
||||
accept: {
|
||||
'application/pdf': ['.pdf'],
|
||||
'text/plain': ['.txt'],
|
||||
'text/x-rst': ['.rst'],
|
||||
'text/x-markdown': ['.md'],
|
||||
'application/zip': ['.zip'],
|
||||
},
|
||||
});
|
||||
|
||||
let view;
|
||||
@@ -139,7 +173,10 @@ export default function Upload({
|
||||
} else {
|
||||
view = (
|
||||
<>
|
||||
<p className="mb-7 text-xl text-jet">Upload New Documentation</p>
|
||||
<p className="text-xl text-jet">Upload New Documentation</p>
|
||||
<p className="mb-3 text-xs text-gray-4000">
|
||||
Please upload .pdf, .txt, .rst, .md, .zip limited to 25mb
|
||||
</p>
|
||||
<input
|
||||
type="text"
|
||||
className="h-10 w-[60%] rounded-md border-2 border-gray-5000 px-3 outline-none"
|
||||
|
||||
11
run-with-docker-compose.sh
Executable file
11
run-with-docker-compose.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
|
||||
source .env
|
||||
|
||||
if [[ -n "$OPENAI_API_BASE" ]] && [[ -n "$OPENAI_API_VERSION" ]] && [[ -n "$AZURE_DEPLOYMENT_NAME" ]] && [[ -n "$AZURE_EMBEDDINGS_DEPLOYMENT_NAME" ]]; then
|
||||
echo "Running Azure Configuration"
|
||||
docker-compose -f docker-compose-azure.yaml build && docker-compose -f docker-compose-azure.yaml up
|
||||
else
|
||||
echo "Running Plain Configuration"
|
||||
docker-compose build && docker-compose up
|
||||
fi
|
||||
@@ -1,20 +1,13 @@
|
||||
import ast
|
||||
import json
|
||||
from pathlib import Path
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
import faiss
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
|
||||
import dotenv
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.prompts import PromptTemplate
|
||||
import pickle
|
||||
import dotenv
|
||||
import tiktoken
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
import ast
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
|
||||
ps = list(Path("inputs").glob("**/*.py"))
|
||||
data = []
|
||||
sources = []
|
||||
@@ -24,13 +17,6 @@ for p in ps:
|
||||
sources.append(p)
|
||||
|
||||
|
||||
|
||||
# with open('inputs/client.py', 'r') as f:
|
||||
# tree = ast.parse(f.read())
|
||||
|
||||
# print(tree)
|
||||
|
||||
|
||||
def get_functions_in_class(node):
|
||||
functions = []
|
||||
functions_code = []
|
||||
@@ -64,21 +50,9 @@ for code in data:
|
||||
c1 += 1
|
||||
|
||||
# save the structure dict as json
|
||||
import json
|
||||
with open('structure_dict.json', 'w') as f:
|
||||
json.dump(structure_dict, f)
|
||||
|
||||
|
||||
# llm = OpenAI(temperature=0)
|
||||
# prompt = PromptTemplate(
|
||||
# input_variables=["code"],
|
||||
# template="Code: {code}, Documentation: ",
|
||||
# )
|
||||
#
|
||||
# print(prompt.format(code="print('hello world')"))
|
||||
# print(llm(prompt.format(code="print('hello world')")))
|
||||
|
||||
|
||||
if not Path("outputs").exists():
|
||||
Path("outputs").mkdir()
|
||||
|
||||
@@ -119,8 +93,3 @@ for source, classes in structure_dict.items():
|
||||
else:
|
||||
with open(f"outputs/{source_w}", "a") as f:
|
||||
f.write(f"\n\nFunction: {functions[function]}, \nDocumentation: {response}")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,21 +1,20 @@
|
||||
import os
|
||||
import sys
|
||||
import nltk
|
||||
import dotenv
|
||||
import typer
|
||||
|
||||
from collections import defaultdict
|
||||
from typing import List, Optional
|
||||
|
||||
from parser.file.bulk import SimpleDirectoryReader
|
||||
from parser.schema.base import Document
|
||||
from parser.open_ai_func import call_openai_api, get_user_permission
|
||||
from parser.py2doc import transform_to_docs
|
||||
from parser.py2doc import extract_functions_and_classes as extract_py
|
||||
from parser.js2doc import extract_functions_and_classes as extract_js
|
||||
from parser.java2doc import extract_functions_and_classes as extract_java
|
||||
from parser.token_func import group_split
|
||||
import dotenv
|
||||
import nltk
|
||||
import typer
|
||||
|
||||
from parser.file.bulk import SimpleDirectoryReader
|
||||
from parser.java2doc import extract_functions_and_classes as extract_java
|
||||
from parser.js2doc import extract_functions_and_classes as extract_js
|
||||
from parser.open_ai_func import call_openai_api, get_user_permission
|
||||
from parser.py2doc import extract_functions_and_classes as extract_py
|
||||
from parser.py2doc import transform_to_docs
|
||||
from parser.schema.base import Document
|
||||
from parser.token_func import group_split
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
@@ -25,28 +24,32 @@ nltk.download('punkt', quiet=True)
|
||||
nltk.download('averaged_perceptron_tagger', quiet=True)
|
||||
|
||||
|
||||
#Splits all files in specified folder to documents
|
||||
def metadata_from_filename(title):
|
||||
return {'title': title}
|
||||
|
||||
# Splits all files in specified folder to documents
|
||||
@app.command()
|
||||
def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
|
||||
help="Whether to skip price confirmation"),
|
||||
help="Whether to skip price confirmation"),
|
||||
dir: Optional[List[str]] = typer.Option(["inputs"],
|
||||
help="""List of paths to directory for index creation.
|
||||
E.g. --dir inputs --dir inputs2"""),
|
||||
file: Optional[List[str]] = typer.Option(None,
|
||||
help="""File paths to use (Optional; overrides dir).
|
||||
help="""File paths to use (Optional; overrides dir).
|
||||
E.g. --file inputs/1.md --file inputs/2.md"""),
|
||||
recursive: Optional[bool] = typer.Option(True, help="Whether to recursively search in subdirectories."),
|
||||
limit: Optional[int] = typer.Option(None, help="Maximum number of files to read."),
|
||||
formats: Optional[List[str]] = typer.Option([".rst", ".md"],
|
||||
help="""List of required extensions (list with .)
|
||||
Currently supported: .rst, .md, .pdf, .docx, .csv, .epub, .html, .mdx"""),
|
||||
help="""List of required extensions (list with .)
|
||||
Currently supported:
|
||||
.rst, .md, .pdf, .docx, .csv, .epub, .html, .mdx"""),
|
||||
exclude: Optional[bool] = typer.Option(True, help="Whether to exclude hidden files (dotfiles)."),
|
||||
sample: Optional[bool] = typer.Option(False, help="Whether to output sample of the first 5 split documents."),
|
||||
sample: Optional[bool] = typer.Option(False,
|
||||
help="Whether to output sample of the first 5 split documents."),
|
||||
token_check: Optional[bool] = typer.Option(True, help="Whether to group small documents and split large."),
|
||||
min_tokens: Optional[int] = typer.Option(150, help="Minimum number of tokens to not group."),
|
||||
max_tokens: Optional[int] = typer.Option(2000, help="Maximum number of tokens to not split."),
|
||||
):
|
||||
|
||||
"""
|
||||
Creates index from specified location or files.
|
||||
By default /inputs folder is used, .rst and .md are parsed.
|
||||
@@ -55,23 +58,23 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
|
||||
def process_one_docs(directory, folder_name):
|
||||
raw_docs = SimpleDirectoryReader(input_dir=directory, input_files=file, recursive=recursive,
|
||||
required_exts=formats, num_files_limit=limit,
|
||||
exclude_hidden=exclude).load_data()
|
||||
exclude_hidden=exclude, file_metadata=metadata_from_filename).load_data()
|
||||
|
||||
# Here we split the documents, as needed, into smaller chunks.
|
||||
# We do this due to the context limits of the LLMs.
|
||||
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
|
||||
#Old method
|
||||
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens,
|
||||
token_check=token_check)
|
||||
# Old method
|
||||
# text_splitter = RecursiveCharacterTextSplitter()
|
||||
# docs = text_splitter.split_documents(raw_docs)
|
||||
|
||||
#Sample feature
|
||||
if sample == True:
|
||||
# Sample feature
|
||||
if sample:
|
||||
for i in range(min(5, len(raw_docs))):
|
||||
print(raw_docs[i].text)
|
||||
|
||||
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
|
||||
|
||||
# Here we check for command line arguments for bot calls.
|
||||
# If no argument exists or the yes is not True, then the
|
||||
# user permission is requested to call the API.
|
||||
@@ -98,12 +101,11 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
|
||||
|
||||
@app.command()
|
||||
def convert(dir: Optional[str] = typer.Option("inputs",
|
||||
help="""Path to directory to make documentation for.
|
||||
help="""Path to directory to make documentation for.
|
||||
E.g. --dir inputs """),
|
||||
formats: Optional[str] = typer.Option("py",
|
||||
help="""Required language.
|
||||
help="""Required language.
|
||||
py, js, java supported for now""")):
|
||||
|
||||
"""
|
||||
Creates documentation linked to original functions from specified location.
|
||||
By default /inputs folder is used, .py is parsed.
|
||||
@@ -117,7 +119,7 @@ def convert(dir: Optional[str] = typer.Option("inputs",
|
||||
else:
|
||||
raise Exception("Sorry, language not supported yet")
|
||||
transform_to_docs(functions_dict, classes_dict, formats, dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
|
||||
app()
|
||||
|
||||
@@ -1,38 +1,42 @@
|
||||
from pathlib import Path
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
import faiss
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
import pickle
|
||||
import dotenv
|
||||
import tiktoken
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
import dotenv
|
||||
import faiss
|
||||
import tiktoken
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain.vectorstores import FAISS
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens/1000) * 0.0004)
|
||||
total_price = ((num_tokens / 1000) * 0.0004)
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
def call_openai_api():
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
|
||||
faiss.write_index(store.index, "docs.index")
|
||||
store.index = None
|
||||
with open("faiss_store.pkl", "wb") as f:
|
||||
pickle.dump(store, f)
|
||||
|
||||
|
||||
def get_user_permission():
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
|
||||
docs_content = (" ".join(docs))
|
||||
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
#Here we check for user permission before calling the API.
|
||||
# Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
call_openai_api()
|
||||
@@ -41,7 +45,8 @@ def get_user_permission():
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
|
||||
#Load .env file
|
||||
|
||||
# Load .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
ap = ArgumentParser("Script for training DocsGPT on .rst documentation files.")
|
||||
|
||||
@@ -1,71 +1,75 @@
|
||||
import os
|
||||
import pickle
|
||||
import dotenv
|
||||
import tiktoken
|
||||
import sys
|
||||
import faiss
|
||||
import shutil
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
from langchain.vectorstores import FAISS
|
||||
|
||||
import dotenv
|
||||
import faiss
|
||||
import tiktoken
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain.vectorstores import FAISS
|
||||
from sphinx.cmd.build import main as sphinx_main
|
||||
from argparse import ArgumentParser
|
||||
|
||||
|
||||
def convert_rst_to_txt(src_dir, dst_dir):
|
||||
# Check if the source directory exists
|
||||
if not os.path.exists(src_dir):
|
||||
raise Exception("Source directory does not exist")
|
||||
# Walk through the source directory
|
||||
for root, dirs, files in os.walk(src_dir):
|
||||
for file in files:
|
||||
# Check if the file has .rst extension
|
||||
if file.endswith(".rst"):
|
||||
# Construct the full path of the file
|
||||
src_file = os.path.join(root, file.replace(".rst", ""))
|
||||
# Convert the .rst file to .txt file using sphinx-build
|
||||
args = f". -b text -D extensions=sphinx.ext.autodoc " \
|
||||
f"-D master_doc={src_file} " \
|
||||
f"-D source_suffix=.rst " \
|
||||
f"-C {dst_dir} "
|
||||
sphinx_main(args.split())
|
||||
elif file.endswith(".md"):
|
||||
# Rename the .md file to .rst file
|
||||
src_file = os.path.join(root, file)
|
||||
dst_file = os.path.join(root, file.replace(".md", ".rst"))
|
||||
os.rename(src_file, dst_file)
|
||||
# Convert the .rst file to .txt file using sphinx-build
|
||||
args = f". -b text -D extensions=sphinx.ext.autodoc " \
|
||||
f"-D master_doc={dst_file} " \
|
||||
f"-D source_suffix=.rst " \
|
||||
f"-C {dst_dir} "
|
||||
sphinx_main(args.split())
|
||||
# Check if the source directory exists
|
||||
if not os.path.exists(src_dir):
|
||||
raise Exception("Source directory does not exist")
|
||||
# Walk through the source directory
|
||||
for root, dirs, files in os.walk(src_dir):
|
||||
for file in files:
|
||||
# Check if the file has .rst extension
|
||||
if file.endswith(".rst"):
|
||||
# Construct the full path of the file
|
||||
src_file = os.path.join(root, file.replace(".rst", ""))
|
||||
# Convert the .rst file to .txt file using sphinx-build
|
||||
args = f". -b text -D extensions=sphinx.ext.autodoc " \
|
||||
f"-D master_doc={src_file} " \
|
||||
f"-D source_suffix=.rst " \
|
||||
f"-C {dst_dir} "
|
||||
sphinx_main(args.split())
|
||||
elif file.endswith(".md"):
|
||||
# Rename the .md file to .rst file
|
||||
src_file = os.path.join(root, file)
|
||||
dst_file = os.path.join(root, file.replace(".md", ".rst"))
|
||||
os.rename(src_file, dst_file)
|
||||
# Convert the .rst file to .txt file using sphinx-build
|
||||
args = f". -b text -D extensions=sphinx.ext.autodoc " \
|
||||
f"-D master_doc={dst_file} " \
|
||||
f"-D source_suffix=.rst " \
|
||||
f"-C {dst_dir} "
|
||||
sphinx_main(args.split())
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens/1000) * 0.0004)
|
||||
total_price = ((num_tokens / 1000) * 0.0004)
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
def call_openai_api():
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
|
||||
faiss.write_index(store.index, "docs.index")
|
||||
store.index = None
|
||||
with open("faiss_store.pkl", "wb") as f:
|
||||
pickle.dump(store, f)
|
||||
|
||||
|
||||
def get_user_permission():
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
|
||||
docs_content = (" ".join(docs))
|
||||
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
#Here we check for user permission before calling the API.
|
||||
# Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
call_openai_api()
|
||||
@@ -74,6 +78,7 @@ def get_user_permission():
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
|
||||
|
||||
ap = ArgumentParser("Script for training DocsGPT on Sphinx documentation")
|
||||
ap.add_argument("-i", "--inputs",
|
||||
type=str,
|
||||
@@ -81,17 +86,17 @@ ap.add_argument("-i", "--inputs",
|
||||
help="Directory containing documentation files")
|
||||
args = ap.parse_args()
|
||||
|
||||
#Load .env file
|
||||
# Load .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
#Directory to vector
|
||||
# Directory to vector
|
||||
src_dir = args.inputs
|
||||
dst_dir = "tmp"
|
||||
|
||||
convert_rst_to_txt(src_dir, dst_dir)
|
||||
|
||||
# Here we load in the data in the format that Notion exports it in.
|
||||
ps = list(Path("tmp/"+ src_dir).glob("**/*.txt"))
|
||||
ps = list(Path("tmp/" + src_dir).glob("**/*.txt"))
|
||||
|
||||
# parse all child directories
|
||||
data = []
|
||||
|
||||
@@ -3,7 +3,6 @@ from abc import abstractmethod
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
|
||||
from parser.schema.base import Document
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
"""Simple reader that reads files of different formats from a directory."""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
from parser.file.base import BaseReader
|
||||
from parser.file.base_parser import BaseParser
|
||||
from parser.file.docs_parser import DocxParser, PDFParser
|
||||
@@ -12,6 +9,8 @@ from parser.file.markdown_parser import MarkdownParser
|
||||
from parser.file.rst_parser import RstParser
|
||||
from parser.file.tabular_parser import PandasCSVParser
|
||||
from parser.schema.base import Document
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".pdf": PDFParser(),
|
||||
@@ -52,17 +51,17 @@ class SimpleDirectoryReader(BaseReader):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_dir: Optional[str] = None,
|
||||
input_files: Optional[List] = None,
|
||||
exclude_hidden: bool = True,
|
||||
errors: str = "ignore",
|
||||
recursive: bool = True,
|
||||
required_exts: Optional[List[str]] = None,
|
||||
file_extractor: Optional[Dict[str, BaseParser]] = None,
|
||||
num_files_limit: Optional[int] = None,
|
||||
file_metadata: Optional[Callable[[str], Dict]] = None,
|
||||
chunk_size_max: int = 2048,
|
||||
self,
|
||||
input_dir: Optional[str] = None,
|
||||
input_files: Optional[List] = None,
|
||||
exclude_hidden: bool = True,
|
||||
errors: str = "ignore",
|
||||
recursive: bool = True,
|
||||
required_exts: Optional[List[str]] = None,
|
||||
file_extractor: Optional[Dict[str, BaseParser]] = None,
|
||||
num_files_limit: Optional[int] = None,
|
||||
file_metadata: Optional[Callable[[str], Dict]] = None,
|
||||
chunk_size_max: int = 2048,
|
||||
) -> None:
|
||||
"""Initialize with parameters."""
|
||||
super().__init__()
|
||||
@@ -103,8 +102,8 @@ class SimpleDirectoryReader(BaseReader):
|
||||
elif self.exclude_hidden and input_file.name.startswith("."):
|
||||
continue
|
||||
elif (
|
||||
self.required_exts is not None
|
||||
and input_file.suffix not in self.required_exts
|
||||
self.required_exts is not None
|
||||
and input_file.suffix not in self.required_exts
|
||||
):
|
||||
continue
|
||||
else:
|
||||
@@ -115,7 +114,7 @@ class SimpleDirectoryReader(BaseReader):
|
||||
new_input_files.extend(sub_input_files)
|
||||
|
||||
if self.num_files_limit is not None and self.num_files_limit > 0:
|
||||
new_input_files = new_input_files[0 : self.num_files_limit]
|
||||
new_input_files = new_input_files[0: self.num_files_limit]
|
||||
|
||||
# print total number of files added
|
||||
logging.debug(
|
||||
@@ -151,10 +150,15 @@ class SimpleDirectoryReader(BaseReader):
|
||||
data = f.read()
|
||||
if isinstance(data, List):
|
||||
data_list.extend(data)
|
||||
if self.file_metadata is not None:
|
||||
for _ in range(len(data)):
|
||||
metadata_list.append(self.file_metadata(str(input_file)))
|
||||
else:
|
||||
data_list.append(str(data))
|
||||
if self.file_metadata is not None:
|
||||
metadata_list.append(self.file_metadata(str(input_file)))
|
||||
if self.file_metadata is not None:
|
||||
metadata_list.append(self.file_metadata(str(input_file)))
|
||||
|
||||
|
||||
|
||||
if concatenate:
|
||||
return [Document("\n".join(data_list))]
|
||||
|
||||
@@ -9,6 +9,7 @@ from typing import Dict, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class HTMLParser(BaseParser):
|
||||
"""HTML parser."""
|
||||
|
||||
@@ -23,21 +24,20 @@ class HTMLParser(BaseParser):
|
||||
Union[str, List[str]]: a string or a List of strings.
|
||||
"""
|
||||
try:
|
||||
import unstructured
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
except ImportError:
|
||||
raise ValueError("unstructured package is required to parse HTML files.")
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
|
||||
# Using the unstructured library to convert the html to isd format
|
||||
# isd sample : isd = [
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
with open(file, "r", encoding="utf-8") as fp:
|
||||
elements = partition_html(file=fp)
|
||||
isd = convert_to_isd(elements)
|
||||
isd = convert_to_isd(elements)
|
||||
|
||||
# Removing non ascii charactwers from isd_el['text']
|
||||
for isd_el in isd:
|
||||
@@ -46,15 +46,15 @@ class HTMLParser(BaseParser):
|
||||
# Removing all the \n characters from isd_el['text'] using regex and replace with single space
|
||||
# Removing all the extra spaces from isd_el['text'] using regex and replace with single space
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE|re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}"," ", isd_el['text'], flags=re.MULTILINE|re.DOTALL)
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
|
||||
# more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
|
||||
for isd_el in isd:
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True )
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)
|
||||
|
||||
# Creating a list of all the indexes of isd_el['type'] = 'Title'
|
||||
title_indexes = [i for i,isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
|
||||
# Creating 'Chunks' - List of lists of strings
|
||||
# each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
|
||||
@@ -64,19 +64,20 @@ class HTMLParser(BaseParser):
|
||||
Chunks = [[]]
|
||||
final_chunks = list(list())
|
||||
|
||||
for i,isd_el in enumerate(isd):
|
||||
for i, isd_el in enumerate(isd):
|
||||
if i in title_indexes:
|
||||
Chunks.append([])
|
||||
Chunks[-1].append(isd_el['text'])
|
||||
|
||||
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25 #TODO: This value can be an user defined variable
|
||||
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25
|
||||
# TODO: This value can be a user defined variable
|
||||
for chunk in Chunks:
|
||||
# sum of lenth of all the strings in the chunk
|
||||
sum = 0
|
||||
sum += len(str(chunk))
|
||||
if sum < 25:
|
||||
Chunks.remove(chunk)
|
||||
else :
|
||||
else:
|
||||
# appending all the approved chunks to final_chunks as a single string
|
||||
final_chunks.append(" ".join([str(item) for item in chunk]))
|
||||
return final_chunks
|
||||
|
||||
@@ -7,8 +7,8 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
import tiktoken
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class MarkdownParser(BaseParser):
|
||||
@@ -20,13 +20,13 @@ class MarkdownParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
max_tokens: int = 2048,
|
||||
# remove_tables: bool = True,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
max_tokens: int = 2048,
|
||||
# remove_tables: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -35,8 +35,8 @@ class MarkdownParser(BaseParser):
|
||||
self._max_tokens = max_tokens
|
||||
# self._remove_tables = remove_tables
|
||||
|
||||
|
||||
def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str], current_text: str):
|
||||
def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str],
|
||||
current_text: str):
|
||||
"""Append to tups chunk."""
|
||||
num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(current_text))
|
||||
if num_tokens > self._max_tokens:
|
||||
@@ -46,6 +46,7 @@ class MarkdownParser(BaseParser):
|
||||
else:
|
||||
tups.append((current_header, current_text))
|
||||
return tups
|
||||
|
||||
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
|
||||
"""Convert a markdown file to a dictionary.
|
||||
|
||||
@@ -115,11 +116,15 @@ class MarkdownParser(BaseParser):
|
||||
return {}
|
||||
|
||||
def parse_tups(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
content = f.read()
|
||||
with open(filepath, "r", encoding='utf8') as f:
|
||||
try:
|
||||
content = f.read()
|
||||
except (Exception,) as e:
|
||||
print(f'Error a file: "{filepath}"')
|
||||
raise e
|
||||
if self._remove_hyperlinks:
|
||||
content = self.remove_hyperlinks(content)
|
||||
if self._remove_images:
|
||||
@@ -130,7 +135,7 @@ class MarkdownParser(BaseParser):
|
||||
return markdown_tups
|
||||
|
||||
def parse_file(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> Union[str, List[str]]:
|
||||
"""Parse file into string."""
|
||||
tups = self.parse_tups(filepath, errors=errors)
|
||||
|
||||
@@ -5,10 +5,10 @@ Contains parser for md files.
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
import tiktoken
|
||||
|
||||
|
||||
class RstParser(BaseParser):
|
||||
"""reStructuredText parser.
|
||||
@@ -19,17 +19,17 @@ class RstParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
remove_table_excess: bool = True,
|
||||
remove_interpreters: bool = True,
|
||||
remove_directives: bool = True,
|
||||
remove_whitespaces_excess: bool = True,
|
||||
#Be carefull with remove_characters_excess, might cause data loss
|
||||
remove_characters_excess: bool = True,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
remove_table_excess: bool = True,
|
||||
remove_interpreters: bool = True,
|
||||
remove_directives: bool = True,
|
||||
remove_whitespaces_excess: bool = True,
|
||||
# Be carefull with remove_characters_excess, might cause data loss
|
||||
remove_characters_excess: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -41,7 +41,6 @@ class RstParser(BaseParser):
|
||||
self._remove_whitespaces_excess = remove_whitespaces_excess
|
||||
self._remove_characters_excess = remove_characters_excess
|
||||
|
||||
|
||||
def rst_to_tups(self, rst_text: str) -> List[Tuple[Optional[str], str]]:
|
||||
"""Convert a reStructuredText file to a dictionary.
|
||||
|
||||
@@ -56,7 +55,8 @@ class RstParser(BaseParser):
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
header_match = re.match(r"^[^\S\n]*[-=]+[^\S\n]*$", line)
|
||||
if header_match and i > 0 and (len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
|
||||
if header_match and i > 0 and (
|
||||
len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
|
||||
if current_header is not None:
|
||||
if current_text == "" or None:
|
||||
continue
|
||||
@@ -72,7 +72,7 @@ class RstParser(BaseParser):
|
||||
|
||||
rst_tups.append((current_header, current_text))
|
||||
|
||||
#TODO: Format for rst
|
||||
# TODO: Format for rst
|
||||
#
|
||||
# if current_header is not None:
|
||||
# # pass linting, assert keys are defined
|
||||
@@ -136,7 +136,7 @@ class RstParser(BaseParser):
|
||||
return {}
|
||||
|
||||
def parse_tups(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
@@ -159,7 +159,7 @@ class RstParser(BaseParser):
|
||||
return rst_tups
|
||||
|
||||
def parse_file(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> Union[str, List[str]]:
|
||||
"""Parse file into string."""
|
||||
tups = self.parse_tups(filepath, errors=errors)
|
||||
|
||||
@@ -77,13 +77,13 @@ class PandasCSVParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import os
|
||||
|
||||
import javalang
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
files_list = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
@@ -9,6 +11,7 @@ def find_files(directory):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, "r") as file:
|
||||
java_code = file.read()
|
||||
@@ -28,6 +31,7 @@ def extract_functions(file_path):
|
||||
methods[method_name] = method_source_code
|
||||
return methods
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -47,6 +51,7 @@ def extract_classes(file_path):
|
||||
classes[class_name] = class_string
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
@@ -58,4 +63,4 @@ def extract_functions_and_classes(directory):
|
||||
classes = extract_classes(file)
|
||||
if classes:
|
||||
classes_dict[file] = classes
|
||||
return functions_dict, classes_dict
|
||||
return functions_dict, classes_dict
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
import esprima
|
||||
|
||||
import escodegen
|
||||
import esprima
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
@@ -11,6 +12,7 @@ def find_files(directory):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -26,7 +28,6 @@ def extract_functions(file_path):
|
||||
func_name = declaration.id.name if declaration.id else '<anonymous>'
|
||||
functions[func_name] = escodegen.generate(declaration.init)
|
||||
elif node.type == 'ClassDeclaration':
|
||||
class_name = node.id.name
|
||||
for subnode in node.body.body:
|
||||
if subnode.type == 'MethodDefinition':
|
||||
func_name = subnode.key.name
|
||||
@@ -38,6 +39,7 @@ def extract_functions(file_path):
|
||||
functions[func_name] = escodegen.generate(declaration.init)
|
||||
return functions
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -53,6 +55,7 @@ def extract_classes(file_path):
|
||||
classes[class_name] = ", ".join(function_names)
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
|
||||
@@ -1,57 +1,70 @@
|
||||
import os
|
||||
import faiss
|
||||
import pickle
|
||||
|
||||
import tiktoken
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
|
||||
#from langchain.embeddings import HuggingFaceEmbeddings
|
||||
#from langchain.embeddings import HuggingFaceInstructEmbeddings
|
||||
#from langchain.embeddings import CohereEmbeddings
|
||||
|
||||
from langchain.vectorstores import FAISS
|
||||
from retry import retry
|
||||
|
||||
|
||||
# from langchain.embeddings import HuggingFaceEmbeddings
|
||||
# from langchain.embeddings import HuggingFaceInstructEmbeddings
|
||||
# from langchain.embeddings import CohereEmbeddings
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> tuple[int, float]:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens/1000) * 0.0004)
|
||||
total_price = (num_tokens / 1000) * 0.0004
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
@retry(tries=10, delay=60)
|
||||
def store_add_texts_with_retry(store, i):
|
||||
store.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
#store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
|
||||
|
||||
def call_openai_api(docs, folder_name):
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
|
||||
# create output folder if it doesn't exist
|
||||
if not os.path.exists(f"outputs/{folder_name}"):
|
||||
os.makedirs(f"outputs/{folder_name}")
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
docs_test = [docs[0]]
|
||||
# remove the first element from docs
|
||||
docs.pop(0)
|
||||
# cut first n docs if you want to restart
|
||||
#docs = docs[:n]
|
||||
# docs = docs[:n]
|
||||
c1 = 0
|
||||
# pinecone.init(
|
||||
# api_key="", # find at app.pinecone.io
|
||||
# environment="us-east1-gcp" # next to api key in console
|
||||
# )
|
||||
#index_name = "pandas"
|
||||
store = FAISS.from_documents(docs_test, OpenAIEmbeddings())
|
||||
#store_pine = Pinecone.from_documents(docs_test, OpenAIEmbeddings(), index_name=index_name)
|
||||
# index_name = "pandas"
|
||||
if ( # azure
|
||||
os.environ.get("OPENAI_API_BASE")
|
||||
and os.environ.get("OPENAI_API_VERSION")
|
||||
and os.environ.get("AZURE_DEPLOYMENT_NAME")
|
||||
and os.environ.get("AZURE_EMBEDDINGS_DEPLOYMENT_NAME")
|
||||
):
|
||||
os.environ["OPENAI_API_TYPE"] = "azure"
|
||||
openai_embeddings = OpenAIEmbeddings(model=os.environ.get("AZURE_EMBEDDINGS_DEPLOYMENT_NAME"))
|
||||
else:
|
||||
openai_embeddings = OpenAIEmbeddings()
|
||||
store = FAISS.from_documents(docs_test, openai_embeddings)
|
||||
# store_pine = Pinecone.from_documents(docs_test, OpenAIEmbeddings(), index_name=index_name)
|
||||
|
||||
# Uncomment for MPNet embeddings
|
||||
# model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
# hf = HuggingFaceEmbeddings(model_name=model_name)
|
||||
# store = FAISS.from_documents(docs_test, hf)
|
||||
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
||||
for i in tqdm(
|
||||
docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format="{l_bar}{bar}| Time Left: {remaining}"
|
||||
):
|
||||
try:
|
||||
store_add_texts_with_retry(store, i)
|
||||
except Exception as e:
|
||||
@@ -64,20 +77,20 @@ def call_openai_api(docs, folder_name):
|
||||
c1 += 1
|
||||
store.save_local(f"outputs/{folder_name}")
|
||||
|
||||
|
||||
def get_user_permission(docs, folder_name):
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
|
||||
#docs_content = (" ".join(docs))
|
||||
# docs_content = (" ".join(docs))
|
||||
docs_content = ""
|
||||
for doc in docs:
|
||||
docs_content += doc.page_content
|
||||
|
||||
|
||||
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
#Here we check for user permission before calling the API.
|
||||
# Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
call_openai_api(docs, folder_name)
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import os
|
||||
import ast
|
||||
import tiktoken
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import tiktoken
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
files_list = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
@@ -13,6 +15,7 @@ def find_files(directory):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -25,6 +28,7 @@ def extract_functions(file_path):
|
||||
functions[func_name] = func_def
|
||||
return functions
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -40,6 +44,7 @@ def extract_classes(file_path):
|
||||
classes[class_name] = ", ".join(function_names)
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
@@ -53,11 +58,12 @@ def extract_functions_and_classes(directory):
|
||||
classes_dict[file] = classes
|
||||
return functions_dict, classes_dict
|
||||
|
||||
|
||||
def parse_functions(functions_dict, formats, dir):
|
||||
c1 = len(functions_dict)
|
||||
for i, (source, functions) in enumerate(functions_dict.items(), start=1):
|
||||
print(f"Processing file {i}/{c1}")
|
||||
source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
|
||||
source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
|
||||
subfolders = "/".join(source_w.split("/")[:-1])
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
|
||||
for j, (name, function) in enumerate(functions.items(), start=1):
|
||||
@@ -70,18 +76,19 @@ def parse_functions(functions_dict, formats, dir):
|
||||
response = llm(prompt.format(code=function))
|
||||
mode = "a" if Path(f"outputs/{source_w}").exists() else "w"
|
||||
with open(f"outputs/{source_w}", mode) as f:
|
||||
f.write(f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
f.write(
|
||||
f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
def parse_classes(classes_dict, formats, dir):
|
||||
c1 = len(classes_dict)
|
||||
for i, (source, classes) in enumerate(classes_dict.items()):
|
||||
print(f"Processing file {i+1}/{c1}")
|
||||
source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
|
||||
print(f"Processing file {i + 1}/{c1}")
|
||||
source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
|
||||
subfolders = "/".join(source_w.split("/")[:-1])
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
|
||||
for name, function_names in classes.items():
|
||||
print(f"Processing Class {i+1}/{c1}")
|
||||
print(f"Processing Class {i + 1}/{c1}")
|
||||
prompt = PromptTemplate(
|
||||
input_variables=["class_name", "functions_names"],
|
||||
template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
|
||||
@@ -92,6 +99,7 @@ def parse_classes(classes_dict, formats, dir):
|
||||
with open(f"outputs/{source_w}", "a" if Path(f"outputs/{source_w}").exists() else "w") as f:
|
||||
f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
def transform_to_docs(functions_dict, classes_dict, formats, dir):
|
||||
docs_content = ''.join([str(key) + str(value) for key, value in functions_dict.items()])
|
||||
docs_content += ''.join([str(key) + str(value) for key, value in classes_dict.items()])
|
||||
@@ -110,4 +118,4 @@ def transform_to_docs(functions_dict, classes_dict, formats, dir):
|
||||
parse_classes(classes_dict, formats, dir)
|
||||
print("All done!")
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
print("The API was not called. No money was spent.")
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
|
||||
from parser.schema.schema import BaseDocument
|
||||
|
||||
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import re
|
||||
import tiktoken
|
||||
|
||||
from typing import List
|
||||
from parser.schema.base import Document
|
||||
from math import ceil
|
||||
from typing import List
|
||||
|
||||
import tiktoken
|
||||
from parser.schema.base import Document
|
||||
|
||||
def separate_header_and_body(text):
|
||||
header_pattern = r"^(.*?\n){3}"
|
||||
@@ -13,6 +12,7 @@ def separate_header_and_body(text):
|
||||
body = text[len(header):]
|
||||
return header, body
|
||||
|
||||
|
||||
def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) -> List[Document]:
|
||||
docs = []
|
||||
current_group = None
|
||||
@@ -23,7 +23,8 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
if current_group is None:
|
||||
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
|
||||
extra_info=doc.extra_info)
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(
|
||||
current_group.text)) + doc_len < max_tokens and doc_len < min_tokens:
|
||||
current_group.text += " " + doc.text
|
||||
else:
|
||||
docs.append(current_group)
|
||||
@@ -35,6 +36,7 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
|
||||
return docs
|
||||
|
||||
|
||||
def split_documents(documents: List[Document], max_tokens: int) -> List[Document]:
|
||||
docs = []
|
||||
for doc in documents:
|
||||
@@ -43,6 +45,9 @@ def split_documents(documents: List[Document], max_tokens: int) -> List[Document
|
||||
docs.append(doc)
|
||||
else:
|
||||
header, body = separate_header_and_body(doc.text)
|
||||
if len(tiktoken.get_encoding("cl100k_base").encode(header)) > max_tokens:
|
||||
body = doc.text
|
||||
header = ""
|
||||
num_body_parts = ceil(token_length / max_tokens)
|
||||
part_length = ceil(len(body) / num_body_parts)
|
||||
body_parts = [body[i:i + part_length] for i in range(0, len(body), part_length)]
|
||||
@@ -54,17 +59,18 @@ def split_documents(documents: List[Document], max_tokens: int) -> List[Document
|
||||
docs.append(new_doc)
|
||||
return docs
|
||||
|
||||
|
||||
def group_split(documents: List[Document], max_tokens: int = 2000, min_tokens: int = 150, token_check: bool = True):
|
||||
if token_check == False:
|
||||
if not token_check:
|
||||
return documents
|
||||
print("Grouping small documents")
|
||||
try:
|
||||
documents = group_documents(documents=documents, min_tokens=min_tokens, max_tokens=max_tokens)
|
||||
except:
|
||||
except Exception:
|
||||
print("Grouping failed, try running without token_check")
|
||||
print("Separating large documents")
|
||||
try:
|
||||
documents = split_documents(documents=documents, max_tokens=max_tokens)
|
||||
except:
|
||||
except Exception:
|
||||
print("Grouping failed, try running without token_check")
|
||||
return documents
|
||||
|
||||
@@ -1,131 +1,122 @@
|
||||
aiodns==3.0.0
|
||||
aiohttp==3.8.3
|
||||
aiohttp==3.8.5
|
||||
aiohttp-retry==2.8.3
|
||||
aiosignal==1.3.1
|
||||
alabaster==0.7.13
|
||||
aleph-alpha-client==2.16.0
|
||||
anyio==3.6.2
|
||||
argilla==1.3.0
|
||||
aleph-alpha-client==3.2.0
|
||||
anyio==3.7.1
|
||||
async-timeout==4.0.2
|
||||
attrs==22.2.0
|
||||
Babel==2.11.0
|
||||
attrs==23.1.0
|
||||
Babel==2.12.1
|
||||
backoff==2.2.1
|
||||
blobfile==2.0.1
|
||||
boto3==1.26.82
|
||||
botocore==1.29.82
|
||||
blobfile==2.0.2
|
||||
boto3==1.28.20
|
||||
cffi==1.15.1
|
||||
charset-normalizer==2.1.1
|
||||
click==8.1.3
|
||||
cohere==3.4.0
|
||||
cryptography==39.0.2
|
||||
dataclasses-json==0.5.7
|
||||
charset-normalizer==3.2.0
|
||||
click==8.1.6
|
||||
cohere==4.19.2
|
||||
cryptography==41.0.3
|
||||
dataclasses-json==0.5.14
|
||||
decorator==5.1.1
|
||||
deeplake==3.2.12
|
||||
Deprecated==1.2.13
|
||||
dill==0.3.6
|
||||
docutils==0.19
|
||||
Deprecated==1.2.14
|
||||
dill==0.3.7
|
||||
docutils==0.20.1
|
||||
docx2txt==0.8
|
||||
ecdsa==0.18.0
|
||||
entrypoints==0.4
|
||||
escodegen==1.0.10
|
||||
escodegen==1.0.11
|
||||
esprima==4.0.1
|
||||
esutils==1.0.1
|
||||
et-xmlfile==1.1.0
|
||||
faiss-cpu==1.7.3
|
||||
filelock==3.9.0
|
||||
Flask==2.2.2
|
||||
frozenlist==1.3.3
|
||||
faiss-cpu==1.7.4
|
||||
filelock==3.12.2
|
||||
Flask==2.3.2
|
||||
frozenlist==1.4.0
|
||||
greenlet==2.0.2
|
||||
gunicorn==20.1.0
|
||||
gunicorn==21.2.0
|
||||
h11==0.14.0
|
||||
httpcore==0.16.3
|
||||
httpx==0.23.3
|
||||
httpcore==0.17.3
|
||||
httpx==0.24.1
|
||||
hub==3.0.1
|
||||
huggingface-hub==0.12.0
|
||||
humbug==0.2.8
|
||||
huggingface-hub==0.16.4
|
||||
humbug==0.3.2
|
||||
idna==3.4
|
||||
imagesize==1.4.1
|
||||
itsdangerous==2.1.2
|
||||
javalang==0.13.0
|
||||
Jinja2==3.1.2
|
||||
jmespath==1.0.1
|
||||
joblib==1.2.0
|
||||
langchain==0.0.103
|
||||
lxml==4.9.2
|
||||
manifest-ml==0.1.1
|
||||
MarkupSafe==2.1.2
|
||||
marshmallow==3.19.0
|
||||
joblib==1.3.1
|
||||
langchain==0.0.252
|
||||
lxml==4.9.3
|
||||
manifest-ml==0.1.8
|
||||
MarkupSafe==2.1.3
|
||||
marshmallow==3.20.1
|
||||
marshmallow-enum==1.5.1
|
||||
monotonic==1.6
|
||||
multidict==6.0.4
|
||||
multiprocess==0.70.14
|
||||
mypy-extensions==0.4.3
|
||||
multiprocess==0.70.15
|
||||
mypy-extensions==1.0.0
|
||||
nltk==3.8.1
|
||||
numcodecs==0.11.0
|
||||
numpy==1.23.5
|
||||
openai==0.27.0
|
||||
openpyxl==3.1.1
|
||||
packaging==23.0
|
||||
pandas==1.5.3
|
||||
pathos==0.3.0
|
||||
Pillow==9.4.0
|
||||
pox==0.3.2
|
||||
ppft==1.7.6.6
|
||||
numpy==1.25.2
|
||||
openai==0.27.8
|
||||
openpyxl==3.1.2
|
||||
packaging==23.1
|
||||
pandas==2.0.3
|
||||
pathos==0.3.1
|
||||
Pillow==10.0.0
|
||||
pox==0.3.3
|
||||
ppft==1.7.6.7
|
||||
py==1.11.0
|
||||
pyasn1==0.4.8
|
||||
pyasn1==0.5.0
|
||||
pycares==4.3.0
|
||||
pycparser==2.21
|
||||
pycryptodomex==3.17
|
||||
pydantic==1.10.4
|
||||
Pygments==2.14.0
|
||||
PyJWT==2.6.0
|
||||
pycryptodomex==3.18.0
|
||||
Pygments==2.15.1
|
||||
PyJWT==2.8.0
|
||||
PyPDF2==3.0.1
|
||||
python-dateutil==2.8.2
|
||||
python-docx==0.8.11
|
||||
python-dotenv==0.21.1
|
||||
python-dotenv==1.0.0
|
||||
python-jose==3.3.0
|
||||
python-magic==0.4.27
|
||||
python-pptx==0.6.21
|
||||
pytz==2022.7.1
|
||||
PyYAML==6.0
|
||||
redis==4.5.1
|
||||
regex==2022.10.31
|
||||
requests==2.28.2
|
||||
pytz==2023.3
|
||||
PyYAML==6.0.1
|
||||
redis==4.6.0
|
||||
regex==2023.6.3
|
||||
requests==2.31.0
|
||||
retry==0.9.2
|
||||
rfc3986==1.5.0
|
||||
rfc3986==2.0.0
|
||||
rsa==4.9
|
||||
s3transfer==0.6.0
|
||||
scikit-learn==1.2.1
|
||||
scipy==1.10.0
|
||||
sentence-transformers==2.2.2
|
||||
sentencepiece==0.1.97
|
||||
scikit-learn==1.3.0
|
||||
scipy==1.11.1
|
||||
sentence-transformers
|
||||
sentencepiece==0.1.99
|
||||
six==1.16.0
|
||||
sniffio==1.3.0
|
||||
snowballstemmer==2.2.0
|
||||
Sphinx==6.1.3
|
||||
Sphinx==7.1.2
|
||||
sphinxcontrib-applehelp==1.0.4
|
||||
sphinxcontrib-devhelp==1.0.2
|
||||
sphinxcontrib-htmlhelp==2.0.1
|
||||
sphinxcontrib-jsmath==1.0.1
|
||||
sphinxcontrib-qthelp==1.0.3
|
||||
sphinxcontrib-serializinghtml==1.1.5
|
||||
SQLAlchemy==1.4.46
|
||||
SQLAlchemy==2.0.19
|
||||
sqlitedict==2.1.0
|
||||
tenacity==8.2.1
|
||||
threadpoolctl==3.1.0
|
||||
tiktoken==0.1.2
|
||||
tokenizers==0.13.2
|
||||
torch==1.13.1
|
||||
torchvision==0.14.1
|
||||
tqdm==4.64.1
|
||||
transformers==4.26.0
|
||||
typer==0.7.0
|
||||
typing-inspect==0.8.0
|
||||
typing_extensions==4.4.0
|
||||
unstructured==0.4.11
|
||||
urllib3==1.26.14
|
||||
Werkzeug==2.2.3
|
||||
wrapt==1.14.1
|
||||
XlsxWriter==3.0.8
|
||||
xxhash==3.2.0
|
||||
yarl==1.8.2
|
||||
tenacity==8.2.2
|
||||
threadpoolctl==3.2.0
|
||||
tiktoken==0.4.0
|
||||
tokenizers==0.13.3
|
||||
tqdm==4.65.0
|
||||
transformers==4.31.0
|
||||
typer==0.9.0
|
||||
typing-inspect==0.9.0
|
||||
typing_extensions==4.7.1
|
||||
unstructured==0.9.0
|
||||
wrapt==1.15.0
|
||||
XlsxWriter==3.1.2
|
||||
xxhash==3.3.0
|
||||
yarl==1.9.2
|
||||
|
||||
45
setup.sh
Executable file
45
setup.sh
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/bin/bash
|
||||
cd "$(dirname "$0")" || exit
|
||||
|
||||
# Create the required directories on the host machine if they don't exist
|
||||
[ ! -d "./application/indexes" ] && mkdir -p ./application/indexes
|
||||
[ ! -d "./application/inputs" ] && mkdir -p ./application/inputs
|
||||
[ ! -d "./application/vectors" ] && mkdir -p ./application/vectors
|
||||
|
||||
# Build frontend and backend images
|
||||
docker build -t frontend_image ./frontend
|
||||
docker build -t backend_image ./application
|
||||
|
||||
# Run redis and mongo services
|
||||
docker run -d --name redis -p 6379:6379 redis:6-alpine
|
||||
docker run -d --name mongo -p 27017:27017 -v mongodb_data_container:/data/db mongo:6
|
||||
|
||||
# Run backend and worker services
|
||||
docker run -d --name backend -p 7091:7091 \
|
||||
--link redis:redis --link mongo:mongo \
|
||||
-v $(pwd)/application/indexes:/app/indexes \
|
||||
-v $(pwd)/application/inputs:/app/inputs \
|
||||
-v $(pwd)/application/vectors:/app/vectors \
|
||||
-e API_KEY=$OPENAI_API_KEY \
|
||||
-e EMBEDDINGS_KEY=$OPENAI_API_KEY \
|
||||
-e CELERY_BROKER_URL=redis://redis:6379/0 \
|
||||
-e CELERY_RESULT_BACKEND=redis://redis:6379/1 \
|
||||
-e MONGO_URI=mongodb://mongo:27017/docsgpt \
|
||||
backend_image
|
||||
|
||||
docker run -d --name worker \
|
||||
--link redis:redis --link mongo:mongo \
|
||||
-e API_KEY=$OPENAI_API_KEY \
|
||||
-e EMBEDDINGS_KEY=$OPENAI_API_KEY \
|
||||
-e CELERY_BROKER_URL=redis://redis:6379/0 \
|
||||
-e CELERY_RESULT_BACKEND=redis://redis:6379/1 \
|
||||
-e MONGO_URI=mongodb://mongo:27017/docsgpt \
|
||||
-e API_URL=http://backend:7091 \
|
||||
backend_image \
|
||||
celery -A app.celery worker -l INFO
|
||||
|
||||
# Run frontend service
|
||||
docker run -d --name frontend -p 5173:5173 \
|
||||
-e VITE_API_HOST=http://localhost:7091 \
|
||||
frontend_image
|
||||
|
||||
28
tests/test_app.py
Normal file
28
tests/test_app.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from application.app import get_vectorstore
|
||||
import os
|
||||
|
||||
|
||||
# Test cases for get_vectorstore function
|
||||
def test_no_active_docs():
|
||||
data = {}
|
||||
assert get_vectorstore(data) == os.path.join("application", "")
|
||||
|
||||
|
||||
def test_local_default_active_docs():
|
||||
data = {"active_docs": "local/default"}
|
||||
assert get_vectorstore(data) == os.path.join("application", "")
|
||||
|
||||
|
||||
def test_local_non_default_active_docs():
|
||||
data = {"active_docs": "local/something"}
|
||||
assert get_vectorstore(data) == os.path.join("application", "indexes/local/something")
|
||||
|
||||
|
||||
def test_default_active_docs():
|
||||
data = {"active_docs": "default"}
|
||||
assert get_vectorstore(data) == os.path.join("application", "")
|
||||
|
||||
|
||||
def test_complex_active_docs():
|
||||
data = {"active_docs": "local/other/path"}
|
||||
assert get_vectorstore(data) == os.path.join("application", "indexes/local/other/path")
|
||||
Reference in New Issue
Block a user