Compare commits

...

202 Commits
0.1.0 ... 0.3.0

Author SHA1 Message Date
Alex
577d58c92b less token less issues 2023-06-03 16:31:10 +01:00
Alex
899777632b Update README.md 2023-06-03 16:09:10 +01:00
Alex
bbf55ca46e Merge pull request #250 from tardigrde/main 2023-06-01 14:56:48 +01:00
Alex
3f88b04c4a Update app.py 2023-05-31 23:49:41 +01:00
Alex
f8910ba136 Added history in streaming convo + fixed little bug with message margins on loading state 2023-05-31 23:47:16 +01:00
Alex
6c95d8b13e Merge pull request #251 from arc53/feature/streaming
Feature/streaming
2023-05-31 22:30:57 +01:00
Alex
e6bccaaf4e Update app.py 2023-05-31 22:20:47 +01:00
Alex
3b8039a580 Merge branch 'main' into feature/streaming 2023-05-31 22:15:53 +01:00
Alex
fae3f55010 Working streaming 2023-05-31 17:44:20 +01:00
Alex
20c877f75b working fe 2023-05-31 15:42:17 +01:00
Alex
8380858a82 some fixes 2023-05-30 20:00:41 +01:00
Alex
d2358c399d working version 2023-05-30 19:43:06 +01:00
Alex
c3af8a77af working streams 2023-05-29 17:55:43 +01:00
Levente Csőke
bc5a0b030b Update .env-template to OPENAI_API_KEY 2023-05-26 08:57:11 +02:00
Alex
0b94f1717f Merge pull request #246 from arc53/feature/gpt4all
Feature/gpt4all
2023-05-25 19:42:20 +01:00
Alex
aaa1249a41 model fix + env var 2023-05-25 19:33:37 +01:00
Alex
ffaa22c49b reverse history order to use latest history firts
Co-Authored-By: Pavel <32868631+pabik@users.noreply.github.com>
2023-05-25 16:40:11 +01:00
Alex
0b78480977 init 2023-05-25 15:14:47 +01:00
Alex
6b6737613a Merge pull request #243 from nazihkalo/main
updating the bulk ingest file metadata logic
2023-05-20 16:02:32 +01:00
Nazih Kalo
da5d62cc1c updating the bulk ingest file metadata to account for parsers that output lists 2023-05-19 10:29:18 -07:00
Alex
6a68b63192 history fix 2023-05-19 13:09:41 +01:00
Alex
ff2e79fe7b streaming experiments 2023-05-18 23:52:59 +01:00
Alex
1800e51b19 Merge pull request #241 from arc53/feature/history
Feature/history
2023-05-18 18:50:35 +01:00
Alex
ba9c505249 accidentaly deleted frontend container 2023-05-18 18:45:15 +01:00
Alex
bc9f1c17ed History
Co-Authored-By: riccardofresi <89981746+riccardofresi@users.noreply.github.com>
2023-05-18 18:42:23 +01:00
Alex
74845aed64 history init 2023-05-18 14:27:13 +01:00
Alex
e49dd0cc6a metadata on ingestion 2023-05-17 21:41:24 +01:00
Alex
27c45ae24a Merge pull request #236 from larinam/fixbuild_github_token
fix workflow: upgrade "build and push" action version to the latest
2023-05-16 12:04:27 +01:00
Anton Larin
364a14adaf fix workflow: upgrade "build and push" action version to the latest 2023-05-16 08:02:13 +02:00
Alex
5c560b1dd5 Merge pull request #235 from larinam/fixbuild_github_token
fix workflow: adjust permissions according to documentation
2023-05-15 23:17:53 +01:00
Anton Larin
28b8b88332 fix workflow: adjust permissions according to documentation
https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions#publishing-a-package-using-an-action
2023-05-15 21:22:06 +02:00
Alex
e39ef0cc9e Merge pull request #234 from larinam/fixbuild_github_token
fix workflow: login to GHCR according to the GH documentation
2023-05-15 18:17:48 +01:00
Anton Larin
8098d3fec8 fix workflow nad login to GHCR according to the GH documentation
https://docs.github.com/en/packages/managing-github-packages-using-github-actions-workflows/publishing-and-installing-a-package-with-github-actions
2023-05-15 18:55:40 +02:00
Alex
059ffe09ea Merge pull request #232 from larinam/lint
Lint
2023-05-15 13:53:09 +01:00
Alex
36a845c29e Merge pull request #231 from larinam/main
Proper PEP8 formatting
2023-05-15 13:45:52 +01:00
GH Action - Upstream Sync
ce6f0dab56 Merge branch 'main' of https://github.com/arc53/DocsGPT 2023-05-15 12:05:18 +00:00
Alex
f200ab10a4 Merge pull request #233 from arc53/dependabot/pip/scripts/flask-2.2.5
Bump flask from 2.2.2 to 2.2.5 in /scripts
2023-05-15 12:50:30 +01:00
Alex
3001688e0e Update requirements.txt 2023-05-15 12:46:39 +01:00
GH Action - Upstream Sync
a73774099e Merge branch 'main' of https://github.com/arc53/DocsGPT 2023-05-15 11:03:45 +00:00
dependabot[bot]
b28676d52c Bump flask from 2.2.2 to 2.2.5 in /scripts
Bumps [flask](https://github.com/pallets/flask) from 2.2.2 to 2.2.5.
- [Release notes](https://github.com/pallets/flask/releases)
- [Changelog](https://github.com/pallets/flask/blob/main/CHANGES.rst)
- [Commits](https://github.com/pallets/flask/compare/2.2.2...2.2.5)

---
updated-dependencies:
- dependency-name: flask
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-05-15 11:00:19 +00:00
Alex
eef012b4d1 Merge pull request #225 from arc53/dependabot/pip/application/flask-2.3.2
Bump flask from 2.2.3 to 2.3.2 in /application
2023-05-15 11:58:54 +01:00
Alex
1417a1c020 Update requirements.txt 2023-05-15 11:49:41 +01:00
Anton Larin
962becb9a5 Linting
* validate python formatting on every build with Ruff
* fix lint warnings
2023-05-13 10:36:17 +02:00
Anton Larin
168648e789 Proper PEP8 formatting 2023-05-12 12:02:25 +02:00
Alex
7f56f57778 better markdown styling 2023-05-06 15:22:23 +01:00
Alex
6cadddc2fc Merge pull request #223 from Zillibub/main
Moved env variables to the pydantic settings file
2023-05-02 11:07:52 +01:00
dependabot[bot]
15fd54eac4 Bump flask from 2.2.3 to 2.3.2 in /application
Bumps [flask](https://github.com/pallets/flask) from 2.2.3 to 2.3.2.
- [Release notes](https://github.com/pallets/flask/releases)
- [Changelog](https://github.com/pallets/flask/blob/main/CHANGES.rst)
- [Commits](https://github.com/pallets/flask/compare/2.2.3...2.3.2)

---
updated-dependencies:
- dependency-name: flask
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-05-02 00:27:05 +00:00
Serj
31350e6302 Set celery and mongo urls as default 2023-04-30 11:03:09 +01:00
Serj
8742cdae0a Refactored url join 2023-04-30 10:46:52 +01:00
Serj
4efcb388ff Added settings usage to the worker 2023-04-29 15:58:02 +01:00
Serj
2d92e95c8a Added settings usage to the worker 2023-04-29 15:56:32 +01:00
Serj
47e5d5684a Replace other env variables in the file 2023-04-29 15:50:02 +01:00
Serj
b723e14d98 Added embeddings name variable 2023-04-29 15:46:09 +01:00
Serj
c9d24b8f42 Added llm model variable 2023-04-29 15:44:47 +01:00
Serj
43622e7ab1 Added settings file 2023-04-29 15:40:55 +01:00
Serge Kozloff
5cfc185ba5 Merge pull request #2 from arc53/main
d
2023-04-29 15:39:46 +01:00
Alex
4be2635fbe Merge pull request #221 from darth-pika-hu/main
Create setup.sh
2023-04-27 21:36:29 +01:00
Darth Pika
0beafb8391 Update setup.sh
This script includes the necessary changes to use container linking and updated environment variables for the `backend` and `worker` containers.

Make sure you have the `./frontend` and `./application` directories in the correct locations before running the script.
2023-04-27 12:39:03 -07:00
Darth Pika
1d2654b9fa Update setup.sh
Create required directories on the host machine if they don't exist.
2023-04-27 12:02:11 -07:00
Darth Pika
a4bc3673e7 Create setup.sh
Added a bash script to help with installation issues.
2023-04-27 11:40:25 -07:00
Alex
fa080537e8 Merge pull request #220 from Zillibub/main
Updated readme for development run
2023-04-27 12:27:20 +01:00
Serj
bdf67a7db7 Added dev docker compose file 2023-04-26 19:05:50 +01:00
Serge Kozloff
db4cdc901c Merge pull request #1 from arc53/main
t
2023-04-26 18:55:39 +01:00
Serj
16a540b89b Expand readme and added port in wsgi 2023-04-26 18:54:59 +01:00
Alex
e00ec9ac3f Update chat_combine_prompt.txt 2023-04-26 15:01:46 +01:00
Alex
fc760afdfc Update chat_combine_prompt.txt 2023-04-26 14:54:26 +01:00
Alex
cb47bcdb0e Update ConversationBubble.tsx 2023-04-26 13:35:05 +01:00
Alex
8d62559ca8 Merge pull request #219 from arc53/feature/code-highlighting
code highlighting
2023-04-26 10:30:39 +01:00
Alex
dbe9c4dc18 init 2023-04-25 17:01:44 +01:00
Serj
1609b4562d Added mongo db start 2023-04-24 19:22:42 +01:00
Serj
b6cadb1d65 Removed spaces 2023-04-24 18:46:05 +01:00
Serj
7aafac5b5e Expanded developer start a little bit 2023-04-24 18:39:53 +01:00
Pavel
36f0aacb19 Merge pull request #218 from arc53/feature/web-widget
web widget
2023-04-23 15:12:18 +01:00
Alex
0c1a6a918d web widget 2023-04-23 15:07:55 +01:00
Alex
d1f5ff4dba Merge pull request #214 from SAMZONG/main 2023-04-18 15:12:54 +01:00
samzong
77e6df2a1c add auto sync fork for workflow
Signed-off-by: samzong <samzong.lu@gmail.com>
2023-04-18 04:24:10 +00:00
Alex
119c037f24 Merge pull request #209 from arc53/dot-env
.env
2023-04-11 22:50:19 +01:00
Alex
97fe1abfd8 .env
Co-Authored-By: Subhadip N <subhadip@get-deck.com>
2023-04-11 22:49:47 +01:00
Alex
3a0163f0fb Merge pull request #202 from yuchen9/feat/ui-enhancement
feat: ui enhancement
2023-04-07 11:17:12 +01:00
Chen
d3fab69155 feat: ui enhancement 2023-04-06 23:54:16 +08:00
Alex
9395d2c091 celery load 2023-04-06 12:16:30 +01:00
Alex
b9efb98280 Update README.md 2023-04-04 14:12:35 +01:00
Alex
60bb264663 async calls 2023-04-03 14:37:09 +01:00
Alex
316dd2f165 Merge pull request #197 from arc53/dependabot/pip/application/redis-4.5.4
Bump redis from 4.5.3 to 4.5.4 in /application
2023-04-03 13:01:13 +01:00
dependabot[bot]
8a0f700563 Bump redis from 4.5.3 to 4.5.4 in /application
Bumps [redis](https://github.com/redis/redis-py) from 4.5.3 to 4.5.4.
- [Release notes](https://github.com/redis/redis-py/releases)
- [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES)
- [Commits](https://github.com/redis/redis-py/compare/v4.5.3...v4.5.4)

---
updated-dependencies:
- dependency-name: redis
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-03-31 14:31:03 +00:00
Alex
3d0c6eafec gpt4- compatable 2023-03-31 10:45:40 +01:00
Alex
46e055833b Merge pull request #196 from arc53/dependabot/pip/scripts/redis-4.5.4
Bump redis from 4.5.3 to 4.5.4 in /scripts
2023-03-30 12:52:15 +01:00
dependabot[bot]
80dfdd1cb9 Bump redis from 4.5.3 to 4.5.4 in /scripts
Bumps [redis](https://github.com/redis/redis-py) from 4.5.3 to 4.5.4.
- [Release notes](https://github.com/redis/redis-py/releases)
- [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES)
- [Commits](https://github.com/redis/redis-py/compare/v4.5.3...v4.5.4)

---
updated-dependencies:
- dependency-name: redis
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-03-30 11:49:21 +00:00
Alex
db21678b74 Merge pull request #192 from arc53/dependabot/pip/scripts/redis-4.5.3
Bump redis from 4.5.1 to 4.5.3 in /scripts
2023-03-30 12:48:58 +01:00
Alex
09c7fe0565 Merge pull request #193 from arc53/dependabot/pip/application/redis-4.5.3
Bump redis from 4.5.2 to 4.5.3 in /application
2023-03-30 12:48:35 +01:00
Alex
b6dfb2c856 map_reduce 2023-03-30 12:44:25 +01:00
Alex
ab46ba521f different prompts 2023-03-29 18:36:58 +01:00
Alex
4a7670f2aa Update app.py 2023-03-29 17:32:00 +01:00
Alex
9ba86bc174 Update preferenceSlice.ts 2023-03-28 10:19:42 +01:00
Pavel
2ebe5e051c discord bot fix
Stop random answers
2023-03-28 01:51:54 +04:00
dependabot[bot]
24e98abd15 Bump redis from 4.5.2 to 4.5.3 in /application
Bumps [redis](https://github.com/redis/redis-py) from 4.5.2 to 4.5.3.
- [Release notes](https://github.com/redis/redis-py/releases)
- [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES)
- [Commits](https://github.com/redis/redis-py/compare/v4.5.2...v4.5.3)

---
updated-dependencies:
- dependency-name: redis
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-03-27 21:36:39 +00:00
dependabot[bot]
b7f1a94ba4 Bump redis from 4.5.1 to 4.5.3 in /scripts
Bumps [redis](https://github.com/redis/redis-py) from 4.5.1 to 4.5.3.
- [Release notes](https://github.com/redis/redis-py/releases)
- [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES)
- [Commits](https://github.com/redis/redis-py/compare/v4.5.1...v4.5.3)

---
updated-dependencies:
- dependency-name: redis
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2023-03-27 21:34:50 +00:00
Alex
70bc7465c9 Merge pull request #191 from arc53/features/little-fixes
Features/little fixes
2023-03-27 22:28:55 +01:00
Alex
65c2568427 Update app.py 2023-03-27 22:23:36 +01:00
Alex
186e7bf402 update for better runs + storage sync 2023-03-27 22:07:26 +01:00
Alex
e6f1c7d0c3 mobile more space 2023-03-27 21:50:54 +01:00
Alex
87ad9a3190 Update Upload.tsx 2023-03-27 21:48:44 +01:00
Alex
0ed45f8754 fix pending status 2023-03-27 21:48:16 +01:00
Alex
116e4401c4 Update .env.production 2023-03-27 21:44:22 +01:00
Alex
c3c0e643d2 Update chat_combine_prompt.txt 2023-03-27 21:42:06 +01:00
Alex
d5522e7c08 prep things 2023-03-27 19:29:10 +01:00
Alex
658b14ba26 failed upload 2023-03-27 19:22:06 +01:00
Alex
38f8469d0b Update Navigation.tsx 2023-03-27 19:11:57 +01:00
Alex
ce6750be15 Update README.md 2023-03-24 09:32:40 +00:00
Alex
f2e600cff9 Merge branch 'main' of https://github.com/arc53/docsgpt 2023-03-23 15:52:59 +00:00
Alex
3dab4b0b1e Update .env_sample 2023-03-23 15:52:47 +00:00
Alex
5d47aaff29 Update README.md 2023-03-23 15:51:12 +00:00
Alex
625448fcd1 Update Navigation.tsx 2023-03-22 17:16:53 +00:00
Alex
e0258cfc51 Goodbye annoying braket 2023-03-21 22:30:30 +00:00
Alex
92993ee105 Small fixes + polishing 2023-03-21 22:16:09 +00:00
Alex
ce579293fb Merge pull request #185 from arc53/feature/upload 2023-03-20 15:53:36 +00:00
Alex
d56db14fc7 Merge pull request #184 from arc53/feature/upload-exit-btn
Feature/upload exit btn
2023-03-20 14:36:21 +00:00
Alex
3e98f9e6bd Button working now 2023-03-20 14:34:51 +00:00
Alex
e7bd9b6323 Update Navigation.tsx 2023-03-20 14:19:12 +00:00
Alex
13699f5c02 Merge pull request #181 from arc53/feature/training-modal
Feature/training modal
2023-03-19 14:47:17 +00:00
Alex
f1f8341d25 fixes + combined new + path 2023-03-19 14:44:17 +00:00
Alex
796b4899aa Fixed progress + new path + combined new
Co-Authored-By: Ajay Thapliyal <ajaythapliyal1703@gmail.com>
2023-03-19 14:39:21 +00:00
ajaythapliyal
7dcbed644a avoid opening the upload window twice 2023-03-19 09:17:08 +05:30
ajaythapliyal
e6fe01876b some fixes 2023-03-19 09:10:53 +05:30
ajaythapliyal
9b75524d43 memoize the training model modal 2023-03-18 18:38:10 +05:30
ajaythapliyal
d98c876f82 adds training modals 2023-03-18 18:25:23 +05:30
Alex
be7b2fa0a4 Update README.md 2023-03-18 01:58:53 +00:00
Alex
28719e534c Merge pull request #177 from arc53/feature/discord
Botichello
2023-03-17 18:45:09 +00:00
Alex
620a7c6db2 Botichello 2023-03-17 18:44:12 +00:00
Alex
26450aca3a Merge pull request #176 from arc53/feature/upload-request
api-request
2023-03-17 13:46:04 +00:00
Alex
b1a3ff6cb1 api-request 2023-03-17 11:56:21 +00:00
Alex
ae2efc7f7b Merge pull request #174 from arc53/feature/upload-init
Adds upload modal UI
2023-03-17 11:20:44 +00:00
Alex
2523d039fb Little cloud - right size and location 2023-03-17 11:19:09 +00:00
ajaythapliyal
6dd13c6845 adds upload modal visibility 2023-03-17 13:25:21 +05:30
ajaythapliyal
dbaa116fe0 reads uploaded files and adds icon to launch upload feature 2023-03-17 13:05:44 +05:30
Alex
476071fc1b Merge pull request #173 from arc53/feature/chatwoot
chatwoot, label + id checks
2023-03-16 23:22:10 +00:00
Alex
66332ccf76 Update app.py 2023-03-16 23:19:02 +00:00
Alex
b3e9bb9ddb Update .env_sample 2023-03-16 23:13:13 +00:00
Alex
dff87b5fa3 chatwoot, label + id checks 2023-03-16 23:02:54 +00:00
Alex
7a00df65d0 Merge pull request #171 from genie88/feat-extention
feat(extension): migrate chrome extension v2 to v3
2023-03-16 13:43:20 +00:00
Alex
60cc1d8ee5 Update popup.js 2023-03-16 13:42:28 +00:00
genie
b67ade3610 feat(extension): migrate chrome extension v2 to v3 2023-03-16 13:58:11 +08:00
ajaythapliyal
97f47f5415 adds react dropzone to add upload functionality 2023-03-15 09:41:44 +05:30
ajaythapliyal
ddef31ecdf margin 2023-03-15 08:16:05 +05:30
ajaythapliyal
fa31f1ee26 modal ui done 2023-03-15 08:10:44 +05:30
Alex
8e477c9d16 update worker 2023-03-15 00:23:51 +00:00
Pavel
ce8f0ef9e1 Merge pull request #168 from arc53/feature/backend-uploads
Feature/backend uploads
2023-03-14 19:09:37 +04:00
Alex
4f64738f9e Update app.py 2023-03-14 14:36:40 +00:00
Alex
c4464455a1 cors + dependencies 2023-03-14 14:29:36 +00:00
Alex
254a6c2916 Merge pull request #169 from arc53/min-max-tokens
Min max tokens
2023-03-14 14:02:06 +00:00
Pavel
c9e1c326f5 - index.plk 2023-03-14 17:56:42 +04:00
Pavel
4532b6cd8c print minus 2023-03-14 17:49:57 +04:00
Pavel
53424a5c19 Added cli commands 2023-03-14 17:33:19 +04:00
Alex
bfb47da398 security things 2023-03-14 11:34:55 +00:00
Alex
cb96d90563 Update .gitignore 2023-03-14 10:36:27 +00:00
Pavel
b6c02c850a token ingeest 2023-03-14 13:32:29 +04:00
Alex
c297e076e6 folders 2023-03-13 21:56:09 +00:00
Alex
20a0800aa7 Create test_ingestion.py 2023-03-13 17:37:01 +00:00
Pavel
bac25112b7 v1 2023-03-13 19:14:33 +04:00
Alex
1d2162705d uploads backend first 2023-03-13 14:20:03 +00:00
ajaythapliyal
1a1f66d2a0 adds upload modal 2023-03-13 09:59:29 +05:30
Alex
a44cde33ed favicon 2023-03-10 11:48:52 +00:00
Alex
a9afd84787 Update README.md 2023-03-08 23:42:50 +00:00
Alex
ac0224b687 mdx format 2023-03-08 23:16:20 +00:00
Alex
8be2992c9a Update requirements.txt 2023-03-08 18:33:49 +00:00
Alex
e3ed23a0d4 Update requirements.txt 2023-03-08 18:20:37 +00:00
Pavel
377070e3a9 Merge pull request #163 from arc53/chat-prompts
chat prompts
2023-03-08 22:07:08 +04:00
Alex
6d959051e2 chat prompts 2023-03-08 17:50:07 +00:00
Alex
0799728000 Create requirements.txt 2023-03-08 11:44:02 +00:00
Alex
1f02f3b376 Update rst_parser.py 2023-03-08 11:32:44 +00:00
Alex
f7d7244588 chunks rst 2023-03-08 00:07:53 +00:00
Alex
352703827c Update README.md 2023-03-07 22:58:44 +00:00
Alex
5f4f55269e dependecies + default values 2023-03-07 18:37:21 +00:00
Alex
19e27e8403 Merge pull request #161 from arc53/feature/feedback
Feature/feedback
2023-03-07 13:24:40 +00:00
Alex
b41b960ef0 Merge pull request #160 from arc53/feature/feedback-cleanup-ux
Makes neutral state of feedback icon visible for mobile & makes outline color same as fill
2023-03-07 13:16:54 +00:00
ajaythapliyal
d1cc91dd6f makes feedback icon always visible for mobile 2023-03-07 09:44:53 +05:30
ajaythapliyal
b0b12856d5 makes the outline of feedback icon consistent with the inner color 2023-03-07 09:27:26 +05:30
ajaythapliyal
fdb19f8c49 Merge branch 'main' of github.com:arc53/DocsGPT into feature/feedback 2023-03-07 08:45:28 +05:30
Alex
b8a935ce3d Merge pull request #158 from arc53/feature/call-feedback-api
wires up feedback event handler with redux store
2023-03-06 19:43:53 +00:00
Alex
ec61b80fd3 Update Dockerfile 2023-03-06 19:28:22 +00:00
Alex
133863e601 Update Dockerfile 2023-03-06 19:23:56 +00:00
Alex
3767b85958 Update Dockerfile 2023-03-06 19:10:20 +00:00
Alex
a9672bc4a2 Merge pull request #159 from arc53/slimmer-conatiner
slimming
2023-03-06 18:38:30 +00:00
Alex
4f1e86d269 slimming 2023-03-06 18:34:48 +00:00
ajaythapliyal
ae36ff9394 flattens the params 2023-03-06 23:05:21 +05:30
ajaythapliyal
a888f38afb wires up the feedback event handler with redux storie 2023-03-06 23:02:15 +05:30
Alex
07e51dc8c6 sending data to aws 2023-03-06 10:23:52 +00:00
Alex
986648479a Merge pull request #155 from arc53/feedback-ui
Feedback UI
2023-03-06 09:49:26 +00:00
ajaythapliyal
533053f66f adds click event to the like/dislike icon 2023-03-06 08:32:57 +05:30
Alex
7a74c60fd1 updated backend 2023-03-06 01:39:23 +00:00
ajaythapliyal
d4d663de38 feedback UI 2023-03-05 22:50:24 +05:30
ajaythapliyal
37ae24b879 adds like dislike 2023-03-05 22:36:57 +05:30
ajaythapliyal
787a06d06e refactors bubble logic 2023-03-05 19:22:03 +05:30
Alex
43ca879f83 Merge pull request #154 from arc53/feature/feedback-api
Refactors singular Message model to Query model & adds api request to post feedbacks
2023-03-05 12:36:45 +00:00
ajaythapliyal
19f807b6c4 refactors the conversation model from singular messages to queries
consisting of prompt and response
2023-03-05 16:29:10 +05:30
ajaythapliyal
780f5893de adds api call and introduces model for conversations 2023-03-05 14:25:21 +05:30
Alex
54eea0ff04 Merge branch 'main' of https://github.com/arc53/docsgpt 2023-03-04 11:35:30 +00:00
Alex
480d91e818 Update cife.yml 2023-03-04 11:35:03 +00:00
Alex
820871329d Merge pull request #153 from arc53/feature/docker-compose
docker
2023-03-04 11:32:50 +00:00
Alex
f7d31fe615 docker 2023-03-04 11:28:36 +00:00
Alex
8d9648391b Merge pull request #150 from arc53/feature/taylor-fixes
two fixes
2023-03-04 10:22:05 +00:00
TaylorS15
d14438bf54 navigation changes/bugfix
fixed chats being visible under text below chat box
2023-03-03 16:15:35 -05:00
108 changed files with 11187 additions and 1905 deletions

2
.env-template Normal file
View File

@@ -0,0 +1,2 @@
OPENAI_API_KEY=<LLM api key (for example, open ai key)>
EMBEDDINGS_KEY=<LLM embeddings api key (for example, open ai key)>

View File

@@ -9,6 +9,10 @@ on:
jobs:
deploy:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v3
@@ -23,17 +27,17 @@ jobs:
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Login to ghcr.io
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GHCR_TOKEN }}
password: ${{ secrets.GITHUB_TOKEN }}
# Runs a single command using the runners shell
- name: Build and push Docker images to docker.io and ghcr.io
uses: docker/build-push-action@v2
uses: docker/build-push-action@v4
with:
file: './application/Dockerfile'
platforms: linux/amd64

48
.github/workflows/cife.yml vendored Normal file
View File

@@ -0,0 +1,48 @@
name: Build and push DocsGPT-FE Docker image
on:
workflow_dispatch:
push:
branches:
- main
jobs:
deploy:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v3
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Login to ghcr.io
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
# Runs a single command using the runners shell
- name: Build and push Docker images to docker.io and ghcr.io
uses: docker/build-push-action@v4
with:
file: './frontend/Dockerfile'
platforms: linux/amd64
context: ./frontend
push: true
tags: |
${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:latest
ghcr.io/${{ github.repository_owner }}/docsgpt-fe:latest

17
.github/workflows/lint.yml vendored Normal file
View File

@@ -0,0 +1,17 @@
name: Python linting
on:
push:
branches:
- '*'
pull_request:
types: [ opened, synchronize ]
jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Lint with Ruff
uses: chartboost/ruff-action@v1

41
.github/workflows/sync_fork.yaml vendored Normal file
View File

@@ -0,0 +1,41 @@
name: Upstream Sync
permissions:
contents: write
on:
schedule:
- cron: "0 * * * *" # every hour
workflow_dispatch:
jobs:
sync_latest_from_upstream:
name: Sync latest commits from upstream repo
runs-on: ubuntu-latest
if: ${{ github.event.repository.fork }}
steps:
# Step 1: run a standard checkout action
- name: Checkout target repo
uses: actions/checkout@v3
# Step 2: run the sync action
- name: Sync upstream changes
id: sync
uses: aormsby/Fork-Sync-With-Upstream-action@v3.4
with:
# set your upstream repo and branch
upstream_sync_repo: arc53/DocsGPT
upstream_sync_branch: main
target_sync_branch: main
target_repo_token: ${{ secrets.GITHUB_TOKEN }} # automatically generated, no need to set
# Set test_mode true to run tests instead of the true action!!
test_mode: false
- name: Sync check
if: failure()
run: |
echo "::error::由于权限不足,导致同步失败(这是预期的行为),请前往仓库首页手动执行[Sync fork]。"
echo "::error::Due to insufficient permissions, synchronization failed (as expected). Please go to the repository homepage and manually perform [Sync fork]."
exit 1

8
.gitignore vendored
View File

@@ -162,3 +162,11 @@ frontend/*.sw?
application/vectors/
**/inputs
**/indexes
**/temp
**/yarn.lock
node_modules/

2
.ruff.toml Normal file
View File

@@ -0,0 +1,2 @@
# Allow lines to be as long as 120 characters.
line-length = 120

View File

@@ -21,6 +21,11 @@ Say goodbye to time-consuming manual searches, and let <strong>DocsGPT</strong>
</div>
![video-example-of-docs-gpt](https://d3dg1063dc54p9.cloudfront.net/videos/demov3.gif)
## Features
![Group 9](https://user-images.githubusercontent.com/17906039/220427472-2644cff4-7666-46a5-819f-fc4a521f63c7.png)
@@ -29,8 +34,7 @@ Say goodbye to time-consuming manual searches, and let <strong>DocsGPT</strong>
You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, please don't hesitate contributing or creating issues, it helps us make DocsGPT better!
## Preview
![video-example-of-docs-gpt](https://d3dg1063dc54p9.cloudfront.net/videos/demov2.gif)
## [Live preview](https://docsgpt.arc53.com/)
@@ -44,16 +48,45 @@ You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, pleas
- Scripts - script that creates similarity search index and store for other libraries.
- frontend - frontend in vite and
## QuickStart
Please note: current vector database uses pandas Python documentation, thus responses will be related to it, if you want to use other docs please follow a guide below
Note: Make sure you have docker installed
1. Open dowload this repository with `git clone https://github.com/arc53/DocsGPT.git`
2. Create .env file in your root directory and set your OPENAI_API_KEY with your openai api key and VITE_API_STREAMING to true or false if you dont want streaming answers
3. Run `docker-compose build && docker-compose up`
4. Navigate to http://localhost:5173/
To stop just run Ctrl + C
## Development environments
Spin up only 2 containers from docker-compose.yaml (by deleting all services except for redis and mongo)
Make sure you have python 3.10 or 3.11 installed
1. Navigate to `/application` folder
2. Install dependencies
2. Run `docker-compose -f docker-compose-dev.yaml build && docker-compose -f docker-compose-dev.yaml up -d`
3. Export required variables
`export CELERY_BROKER_URL=redis://localhost:6379/0`
`export CELERY_RESULT_BACKEND=redis://localhost:6379/1`
`export MONGO_URI=mongodb://localhost:27017/docsgpt`
4. Install dependencies
`pip install -r requirements.txt`
3. Prepare .env file
5. Prepare .env file
Copy .env_sample and create .env with your openai api token
4. Run the app
`python app.py`
6. Run the app
`python wsgi.py`
7. Start worker with `celery -A app.celery worker -l INFO`
To start frontend
1. Navigate to `/frontend` folder
2. Install dependencies
`npm install`
3. Run the app
4. `npm run dev`
[How to install the Chrome extension](https://github.com/arc53/docsgpt/wiki#launch-chrome-extension)

View File

@@ -1 +1,6 @@
OPENAI_API_KEY=your_api_key
API_KEY=your_api_key
EMBEDDINGS_KEY=your_api_key
CELERY_BROKER_URL=redis://localhost:6379/0
CELERY_RESULT_BACKEND=redis://localhost:6379/1
MONGO_URI=mongodb://localhost:27017/docsgpt
API_URL=http://localhost:5001

View File

@@ -4,17 +4,22 @@ FROM python:3.10-slim-bullseye as builder
RUN apt-get update && apt-get install -y gcc curl
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
ENV PATH="/root/.cargo/bin:${PATH}"
RUN pip install --upgrade pip && pip install tiktoken==0.1.2
RUN pip install --upgrade pip && pip install tiktoken==0.3.3
COPY requirements.txt .
RUN pip install -r requirements.txt
FROM python:3.10-slim-bullseye
# Copy pre-built packages from builder stage
COPY --from=builder /usr/local/lib/python3.10/site-packages/ /usr/local/lib/python3.10/site-packages/
RUN pip install gunicorn==20.1.0
RUN pip install celery==5.2.7
WORKDIR /app
COPY . /app
ENV FLASK_APP=app.py
ENV FLASK_DEBUG=true
RUN pip install -r requirements.txt
EXPOSE 5001
CMD ["gunicorn", "-w", "6", "--bind", "0.0.0.0:5001", "wsgi:app"]
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:5001", "wsgi:app"]

View File

@@ -1,32 +1,42 @@
import os
import asyncio
import datetime
import http.client
import json
import os
import traceback
import openai
import dotenv
import requests
from flask import Flask, request, render_template
from celery import Celery
from celery.result import AsyncResult
from flask import Flask, request, render_template, send_from_directory, jsonify, Response
from langchain import FAISS
from langchain.llms import OpenAIChat
from langchain import VectorDBQA, HuggingFaceHub, Cohere, OpenAI
from langchain.chains import LLMChain, ConversationalRetrievalChain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, \
HuggingFaceInstructEmbeddings
from langchain.prompts import PromptTemplate
from langchain.prompts.chat import (
ChatPromptTemplate,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
AIMessagePromptTemplate,
)
from pymongo import MongoClient
from werkzeug.utils import secure_filename
from langchain.llms import GPT4All
from core.settings import settings
from error import bad_request
from worker import ingest_worker
os.environ["LANGCHAIN_HANDLER"] = "langchain"
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
if os.getenv("LLM_NAME") is not None:
llm_choice = os.getenv("LLM_NAME")
else:
llm_choice = "openai"
if os.getenv("EMBEDDINGS_NAME") is not None:
embeddings_choice = os.getenv("EMBEDDINGS_NAME")
else:
embeddings_choice = "openai_text-embedding-ada-002"
if llm_choice == "manifest":
if settings.LLM_NAME == "manifest":
from manifest import Manifest
from langchain.llms.manifest import ManifestWrapper
@@ -47,31 +57,157 @@ if platform.system() == "Windows":
# loading the .env file
dotenv.load_dotenv()
with open("combine_prompt.txt", "r") as f:
# load the prompts
with open("prompts/combine_prompt.txt", "r") as f:
template = f.read()
with open("combine_prompt_hist.txt", "r") as f:
with open("prompts/combine_prompt_hist.txt", "r") as f:
template_hist = f.read()
with open("question_prompt.txt", "r") as f:
with open("prompts/question_prompt.txt", "r") as f:
template_quest = f.read()
if os.getenv("API_KEY") is not None:
with open("prompts/chat_combine_prompt.txt", "r") as f:
chat_combine_template = f.read()
with open("prompts/chat_reduce_prompt.txt", "r") as f:
chat_reduce_template = f.read()
if settings.API_KEY is not None:
api_key_set = True
else:
api_key_set = False
if os.getenv("EMBEDDINGS_KEY") is not None:
if settings.EMBEDDINGS_KEY is not None:
embeddings_key_set = True
else:
embeddings_key_set = False
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER = "inputs"
app.config['CELERY_BROKER_URL'] = settings.CELERY_BROKER_URL
app.config['CELERY_RESULT_BACKEND'] = settings.CELERY_RESULT_BACKEND
app.config['MONGO_URI'] = settings.MONGO_URI
celery = Celery()
celery.config_from_object('celeryconfig')
mongo = MongoClient(app.config['MONGO_URI'])
db = mongo["docsgpt"]
vectors_collection = db["vectors"]
async def async_generate(chain, question, chat_history):
result = await chain.arun({"question": question, "chat_history": chat_history})
return result
def run_async_chain(chain, question, chat_history):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
result = {}
try:
answer = loop.run_until_complete(async_generate(chain, question, chat_history))
finally:
loop.close()
result["answer"] = answer
return result
def get_vectorstore(data):
if "active_docs" in data:
if data["active_docs"].split("/")[0] == "local":
if data["active_docs"].split("/")[1] == "default":
vectorstore = ""
else:
vectorstore = "indexes/" + data["active_docs"]
else:
vectorstore = "vectors/" + data["active_docs"]
if data['active_docs'] == "default":
vectorstore = ""
else:
vectorstore = ""
return vectorstore
def get_docsearch(vectorstore, embeddings_key):
if settings.EMBEDDINGS_NAME == "openai_text-embedding-ada-002":
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
elif settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
elif settings.EMBEDDINGS_NAME == "huggingface_hkunlp/instructor-large":
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
elif settings.EMBEDDINGS_NAME == "cohere_medium":
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
return docsearch
@celery.task(bind=True)
def ingest(self, directory, formats, name_job, filename, user):
resp = ingest_worker(self, directory, formats, name_job, filename, user)
return resp
@app.route("/")
def home():
return render_template("index.html", api_key_set=api_key_set, llm_choice=llm_choice,
embeddings_choice=embeddings_choice)
return render_template("index.html", api_key_set=api_key_set, llm_choice=settings.LLM_NAME,
embeddings_choice=settings.EMBEDDINGS_NAME)
def complete_stream(question, docsearch, chat_history, api_key):
openai.api_key = api_key
llm = ChatOpenAI(openai_api_key=api_key)
docs = docsearch.similarity_search(question, k=2)
# join all page_content together with a newline
docs_together = "\n".join([doc.page_content for doc in docs])
p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)
messages_combine = [{"role": "system", "content": p_chat_combine}]
if len(chat_history) > 1:
tokens_current_history = 0
# count tokens in history
chat_history.reverse()
for i in chat_history:
if "prompt" in i and "response" in i:
tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
tokens_current_history += tokens_batch
messages_combine.append({"role": "user", "content": i["prompt"]})
messages_combine.append({"role": "system", "content": i["response"]})
messages_combine.append({"role": "user", "content": question})
completion = openai.ChatCompletion.create(model="gpt-3.5-turbo",
messages=messages_combine, stream=True, max_tokens=500, temperature=0)
for line in completion:
if 'content' in line['choices'][0]['delta']:
# check if the delta contains content
data = json.dumps({"answer": str(line['choices'][0]['delta']['content'])})
yield f"data: {data}\n\n"
# send data.type = "end" to indicate that the stream has ended as json
data = json.dumps({"type": "end"})
yield f"data: {data}\n\n"
@app.route("/stream", methods=['POST', 'GET'])
def stream():
# get parameter from url question
question = request.args.get('question')
history = request.args.get('history')
# history to json object from string
history = json.loads(history)
# check if active_docs is set
if not api_key_set:
api_key = request.args.get("api_key")
else:
api_key = settings.API_KEY
if not embeddings_key_set:
embeddings_key = request.args.get("embeddings_key")
else:
embeddings_key = settings.EMBEDDINGS_KEY
if "active_docs" in request.args:
vectorstore = get_vectorstore({"active_docs": request.args.get("active_docs")})
else:
vectorstore = ""
docsearch = get_docsearch(vectorstore, embeddings_key)
#question = "Hi"
return Response(complete_stream(question, docsearch,
chat_history= history, api_key=api_key), mimetype='text/event-stream')
@app.route("/api/answer", methods=["POST"])
@@ -83,70 +219,93 @@ def api_answer():
if not api_key_set:
api_key = data["api_key"]
else:
api_key = os.getenv("API_KEY")
api_key = settings.API_KEY
if not embeddings_key_set:
embeddings_key = data["embeddings_key"]
else:
embeddings_key = os.getenv("EMBEDDINGS_KEY")
embeddings_key = settings.EMBEDDINGS_KEY
# use try and except to check for exception
try:
# check if the vectorstore is set
if "active_docs" in data:
vectorstore = "vectors/" + data["active_docs"]
if data['active_docs'] == "default":
vectorstore = ""
else:
vectorstore = ""
#vectorstore = "outputs/inputs/"
vectorstore = get_vectorstore(data)
# loading the index and the store and the prompt template
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
if embeddings_choice == "openai_text-embedding-ada-002":
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
elif embeddings_choice == "huggingface_hkunlp/instructor-large":
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
elif embeddings_choice == "cohere_medium":
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
# create a prompt template
if history:
history = json.loads(history)
template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}",
history[1])
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp,
template_format="jinja2")
else:
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template,
template_format="jinja2")
docsearch = get_docsearch(vectorstore, embeddings_key)
q_prompt = PromptTemplate(input_variables=["context", "question"], template=template_quest,
template_format="jinja2")
if llm_choice == "openai":
llm = OpenAIChat(openai_api_key=api_key, temperature=0)
#llm = OpenAI(openai_api_key=api_key, temperature=0)
elif llm_choice == "manifest":
if settings.LLM_NAME == "openai_chat":
llm = ChatOpenAI(openai_api_key=api_key) # optional parameter: model_name="gpt-4"
messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)]
if history:
tokens_current_history = 0
#count tokens in history
history.reverse()
for i in history:
if "prompt" in i and "response" in i:
tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
tokens_current_history += tokens_batch
messages_combine.append(HumanMessagePromptTemplate.from_template(i["prompt"]))
messages_combine.append(AIMessagePromptTemplate.from_template(i["response"]))
messages_combine.append(HumanMessagePromptTemplate.from_template("{question}"))
import sys
print(messages_combine, file=sys.stderr)
p_chat_combine = ChatPromptTemplate.from_messages(messages_combine)
elif settings.LLM_NAME == "openai":
llm = OpenAI(openai_api_key=api_key, temperature=0)
elif settings.LLM_NAME == "manifest":
llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
elif llm_choice == "huggingface":
elif settings.LLM_NAME == "huggingface":
llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
elif llm_choice == "cohere":
elif settings.LLM_NAME == "cohere":
llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
elif settings.LLM_NAME == "gpt4all":
llm = GPT4All(model=settings.MODEL_PATH)
else:
raise ValueError("unknown LLM model")
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
combine_prompt=c_prompt, question_prompt=q_prompt)
if settings.LLM_NAME == "openai_chat":
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
chain = ConversationalRetrievalChain(
retriever=docsearch.as_retriever(k=2),
question_generator=question_generator,
combine_docs_chain=doc_chain,
)
chat_history = []
# result = chain({"question": question, "chat_history": chat_history})
# generate async with async generate method
result = run_async_chain(chain, question, chat_history)
elif settings.LLM_NAME == "gpt4all":
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
chain = ConversationalRetrievalChain(
retriever=docsearch.as_retriever(k=2),
question_generator=question_generator,
combine_docs_chain=doc_chain,
)
chat_history = []
# result = chain({"question": question, "chat_history": chat_history})
# generate async with async generate method
result = run_async_chain(chain, question, chat_history)
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=10)
else:
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
combine_prompt=chat_combine_template, question_prompt=q_prompt)
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=3)
result = chain({"query": question})
# fetch the answer
result = chain({"query": question})
print(result)
# some formatting for the frontend
result['answer'] = result['result']
if "result" in result:
result['answer'] = result['result']
result['answer'] = result['answer'].replace("\\n", "\n")
try:
result['answer'] = result['answer'].split("SOURCES:")[0]
except:
except Exception:
pass
# mock result
@@ -166,6 +325,9 @@ def api_answer():
def check_docs():
# check if docs exist in a vectorstore folder
data = request.get_json()
# split docs on / and take first part
if data["docs"].split("/")[0] == "local":
return {"status": 'exists'}
vectorstore = "vectors/" + data["docs"]
base_path = 'https://raw.githubusercontent.com/arc53/DocsHUB/main/'
if os.path.exists(vectorstore) or data["docs"] == "default":
@@ -189,12 +351,194 @@ def check_docs():
return {"status": 'loaded'}
@app.route("/api/feedback", methods=["POST"])
def api_feedback():
data = request.get_json()
question = data["question"]
answer = data["answer"]
feedback = data["feedback"]
print('-' * 5)
print("Question: " + question)
print("Answer: " + answer)
print("Feedback: " + feedback)
print('-' * 5)
response = requests.post(
url="https://86x89umx77.execute-api.eu-west-2.amazonaws.com/docsgpt-feedback",
headers={
"Content-Type": "application/json; charset=utf-8",
},
data=json.dumps({
"answer": answer,
"question": question,
"feedback": feedback
})
)
return {"status": http.client.responses.get(response.status_code, 'ok')}
@app.route('/api/combine', methods=['GET'])
def combined_json():
user = 'local'
"""Provide json file with combined available indexes."""
# get json from https://d3dg1063dc54p9.cloudfront.net/combined.json
data = [{
"name": 'default',
"language": 'default',
"version": '',
"description": 'default',
"fullName": 'default',
"date": 'default',
"docLink": 'default',
"model": settings.EMBEDDINGS_NAME,
"location": "local"
}]
# structure: name, language, version, description, fullName, date, docLink
# append data from vectors_collection
for index in vectors_collection.find({'user': user}):
data.append({
"name": index['name'],
"language": index['language'],
"version": '',
"description": index['name'],
"fullName": index['name'],
"date": index['date'],
"docLink": index['location'],
"model": settings.EMBEDDINGS_NAME,
"location": "local"
})
data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
for index in data_remote:
index['location'] = "remote"
data.append(index)
return jsonify(data)
@app.route('/api/upload', methods=['POST'])
def upload_file():
"""Upload a file to get vectorized and indexed."""
if 'user' not in request.form:
return {"status": 'no user'}
user = secure_filename(request.form['user'])
if 'name' not in request.form:
return {"status": 'no name'}
job_name = secure_filename(request.form['name'])
# check if the post request has the file part
if 'file' not in request.files:
print('No file part')
return {"status": 'no file'}
file = request.files['file']
if file.filename == '':
return {"status": 'no file name'}
if file:
filename = secure_filename(file.filename)
# save dir
save_dir = os.path.join(app.config['UPLOAD_FOLDER'], user, job_name)
# create dir if not exists
if not os.path.exists(save_dir):
os.makedirs(save_dir)
file.save(os.path.join(save_dir, filename))
task = ingest.delay('temp', [".rst", ".md", ".pdf", ".txt"], job_name, filename, user)
# task id
task_id = task.id
return {"status": 'ok', "task_id": task_id}
else:
return {"status": 'error'}
@app.route('/api/task_status', methods=['GET'])
def task_status():
"""Get celery job status."""
task_id = request.args.get('task_id')
task = AsyncResult(task_id)
task_meta = task.info
return {"status": task.status, "result": task_meta}
### Backgound task api
@app.route('/api/upload_index', methods=['POST'])
def upload_index_files():
"""Upload two files(index.faiss, index.pkl) to the user's folder."""
if 'user' not in request.form:
return {"status": 'no user'}
user = secure_filename(request.form['user'])
if 'name' not in request.form:
return {"status": 'no name'}
job_name = secure_filename(request.form['name'])
if 'file_faiss' not in request.files:
print('No file part')
return {"status": 'no file'}
file_faiss = request.files['file_faiss']
if file_faiss.filename == '':
return {"status": 'no file name'}
if 'file_pkl' not in request.files:
print('No file part')
return {"status": 'no file'}
file_pkl = request.files['file_pkl']
if file_pkl.filename == '':
return {"status": 'no file name'}
# saves index files
save_dir = os.path.join('indexes', user, job_name)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
file_faiss.save(os.path.join(save_dir, 'index.faiss'))
file_pkl.save(os.path.join(save_dir, 'index.pkl'))
# create entry in vectors_collection
vectors_collection.insert_one({
"user": user,
"name": job_name,
"language": job_name,
"location": save_dir,
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
"model": settings.EMBEDDINGS_NAME,
"type": "local"
})
return {"status": 'ok'}
@app.route('/api/download', methods=['get'])
def download_file():
user = secure_filename(request.args.get('user'))
job_name = secure_filename(request.args.get('name'))
filename = secure_filename(request.args.get('file'))
save_dir = os.path.join(app.config['UPLOAD_FOLDER'], user, job_name)
return send_from_directory(save_dir, filename, as_attachment=True)
@app.route('/api/delete_old', methods=['get'])
def delete_old():
"""Delete old indexes."""
import shutil
path = request.args.get('path')
dirs = path.split('/')
dirs_clean = []
for i in range(1, len(dirs)):
dirs_clean.append(secure_filename(dirs[i]))
# check that path strats with indexes or vectors
if dirs[0] not in ['indexes', 'vectors']:
return {"status": 'error'}
path_clean = '/'.join(dirs)
vectors_collection.delete_one({'location': path})
try:
shutil.rmtree(path_clean)
except FileNotFoundError:
pass
return {"status": 'ok'}
# handling CORS
@app.after_request
def after_request(response):
response.headers.add('Access-Control-Allow-Origin', '*')
response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
response.headers.add('Access-Control-Allow-Credentials', 'true')
return response

View File

@@ -0,0 +1,8 @@
import os
broker_url = os.getenv("CELERY_BROKER_URL")
result_backend = os.getenv("CELERY_RESULT_BACKEND")
task_serializer = 'json'
result_serializer = 'json'
accept_content = ['json']

View File

View File

@@ -0,0 +1,22 @@
from pathlib import Path
from pydantic import BaseSettings
class Settings(BaseSettings):
LLM_NAME: str = "openai_chat"
EMBEDDINGS_NAME: str = "openai_text-embedding-ada-002"
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
MODEL_PATH: str = "./models/gpt4all-model.bin"
TOKENS_MAX_HISTORY: int = 150
API_URL: str = "http://localhost:5001" # backend url for celery worker
API_KEY: str = None # LLM api key
EMBEDDINGS_KEY: str = None # api key for embeddings (if using openai, just copy API_KEY
path = Path(__file__).parent.parent.absolute()
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")

View File

@@ -1,13 +1,15 @@
from flask import jsonify
from werkzeug.http import HTTP_STATUS_CODES
def response_error(code_status,message=None):
payload = {'error':HTTP_STATUS_CODES.get(code_status,"something went wrong")}
def response_error(code_status, message=None):
payload = {'error': HTTP_STATUS_CODES.get(code_status, "something went wrong")}
if message:
payload['message'] = message
response = jsonify(payload)
response.status_code = code_status
return response
def bad_request(status_code=400,message=''):
return response_error(code_status=status_code,message=message)
def bad_request(status_code=400, message=''):
return response_error(code_status=status_code, message=message)

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1,19 @@
"""Base reader class."""
from abc import abstractmethod
from typing import Any, List
from langchain.docstore.document import Document as LCDocument
from parser.schema.base import Document
class BaseReader:
"""Utilities for loading data from a directory."""
@abstractmethod
def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
"""Load data from the input directory."""
def load_langchain_documents(self, **load_kwargs: Any) -> List[LCDocument]:
"""Load data in LangChain document format."""
docs = self.load_data(**load_kwargs)
return [d.to_langchain_format() for d in docs]

View File

@@ -0,0 +1,38 @@
"""Base parser and config class."""
from abc import abstractmethod
from pathlib import Path
from typing import Dict, List, Optional, Union
class BaseParser:
"""Base class for all parsers."""
def __init__(self, parser_config: Optional[Dict] = None):
"""Init params."""
self._parser_config = parser_config
def init_parser(self) -> None:
"""Init parser and store it."""
parser_config = self._init_parser()
self._parser_config = parser_config
@property
def parser_config_set(self) -> bool:
"""Check if parser config is set."""
return self._parser_config is not None
@property
def parser_config(self) -> Dict:
"""Check if parser config is set."""
if self._parser_config is None:
raise ValueError("Parser config not set.")
return self._parser_config
@abstractmethod
def _init_parser(self) -> Dict:
"""Initialize the parser with the config."""
@abstractmethod
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
"""Parse file."""

View File

@@ -0,0 +1,163 @@
"""Simple reader that reads files of different formats from a directory."""
import logging
from pathlib import Path
from typing import Callable, Dict, List, Optional, Union
from parser.file.base import BaseReader
from parser.file.base_parser import BaseParser
from parser.file.docs_parser import DocxParser, PDFParser
from parser.file.epub_parser import EpubParser
from parser.file.html_parser import HTMLParser
from parser.file.markdown_parser import MarkdownParser
from parser.file.rst_parser import RstParser
from parser.file.tabular_parser import PandasCSVParser
from parser.schema.base import Document
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
".pdf": PDFParser(),
".docx": DocxParser(),
".csv": PandasCSVParser(),
".epub": EpubParser(),
".md": MarkdownParser(),
".rst": RstParser(),
".html": HTMLParser(),
".mdx": MarkdownParser(),
}
class SimpleDirectoryReader(BaseReader):
"""Simple directory reader.
Can read files into separate documents, or concatenates
files into one document text.
Args:
input_dir (str): Path to the directory.
input_files (List): List of file paths to read (Optional; overrides input_dir)
exclude_hidden (bool): Whether to exclude hidden files (dotfiles).
errors (str): how encoding and decoding errors are to be handled,
see https://docs.python.org/3/library/functions.html#open
recursive (bool): Whether to recursively search in subdirectories.
False by default.
required_exts (Optional[List[str]]): List of required extensions.
Default is None.
file_extractor (Optional[Dict[str, BaseParser]]): A mapping of file
extension to a BaseParser class that specifies how to convert that file
to text. See DEFAULT_FILE_EXTRACTOR.
num_files_limit (Optional[int]): Maximum number of files to read.
Default is None.
file_metadata (Optional[Callable[str, Dict]]): A function that takes
in a filename and returns a Dict of metadata for the Document.
Default is None.
"""
def __init__(
self,
input_dir: Optional[str] = None,
input_files: Optional[List] = None,
exclude_hidden: bool = True,
errors: str = "ignore",
recursive: bool = True,
required_exts: Optional[List[str]] = None,
file_extractor: Optional[Dict[str, BaseParser]] = None,
num_files_limit: Optional[int] = None,
file_metadata: Optional[Callable[[str], Dict]] = None,
chunk_size_max: int = 2048,
) -> None:
"""Initialize with parameters."""
super().__init__()
if not input_dir and not input_files:
raise ValueError("Must provide either `input_dir` or `input_files`.")
self.errors = errors
self.recursive = recursive
self.exclude_hidden = exclude_hidden
self.required_exts = required_exts
self.num_files_limit = num_files_limit
if input_files:
self.input_files = []
for path in input_files:
print(path)
input_file = Path(path)
self.input_files.append(input_file)
elif input_dir:
self.input_dir = Path(input_dir)
self.input_files = self._add_files(self.input_dir)
self.file_extractor = file_extractor or DEFAULT_FILE_EXTRACTOR
self.file_metadata = file_metadata
def _add_files(self, input_dir: Path) -> List[Path]:
"""Add files."""
input_files = sorted(input_dir.iterdir())
new_input_files = []
dirs_to_explore = []
for input_file in input_files:
if input_file.is_dir():
if self.recursive:
dirs_to_explore.append(input_file)
elif self.exclude_hidden and input_file.name.startswith("."):
continue
elif (
self.required_exts is not None
and input_file.suffix not in self.required_exts
):
continue
else:
new_input_files.append(input_file)
for dir_to_explore in dirs_to_explore:
sub_input_files = self._add_files(dir_to_explore)
new_input_files.extend(sub_input_files)
if self.num_files_limit is not None and self.num_files_limit > 0:
new_input_files = new_input_files[0: self.num_files_limit]
# print total number of files added
logging.debug(
f"> [SimpleDirectoryReader] Total files added: {len(new_input_files)}"
)
return new_input_files
def load_data(self, concatenate: bool = False) -> List[Document]:
"""Load data from the input directory.
Args:
concatenate (bool): whether to concatenate all files into one document.
If set to True, file metadata is ignored.
False by default.
Returns:
List[Document]: A list of documents.
"""
data: Union[str, List[str]] = ""
data_list: List[str] = []
metadata_list = []
for input_file in self.input_files:
if input_file.suffix in self.file_extractor:
parser = self.file_extractor[input_file.suffix]
if not parser.parser_config_set:
parser.init_parser()
data = parser.parse_file(input_file, errors=self.errors)
else:
# do standard read
with open(input_file, "r", errors=self.errors) as f:
data = f.read()
if isinstance(data, List):
data_list.extend(data)
else:
data_list.append(str(data))
if self.file_metadata is not None:
metadata_list.append(self.file_metadata(str(input_file)))
if concatenate:
return [Document("\n".join(data_list))]
elif self.file_metadata is not None:
return [Document(d, extra_info=m) for d, m in zip(data_list, metadata_list)]
else:
return [Document(d) for d in data_list]

View File

@@ -0,0 +1,59 @@
"""Docs parser.
Contains parsers for docx, pdf files.
"""
from pathlib import Path
from typing import Dict
from parser.file.base_parser import BaseParser
class PDFParser(BaseParser):
"""PDF parser."""
def _init_parser(self) -> Dict:
"""Init parser."""
return {}
def parse_file(self, file: Path, errors: str = "ignore") -> str:
"""Parse file."""
try:
import PyPDF2
except ImportError:
raise ValueError("PyPDF2 is required to read PDF files.")
text_list = []
with open(file, "rb") as fp:
# Create a PDF object
pdf = PyPDF2.PdfReader(fp)
# Get the number of pages in the PDF document
num_pages = len(pdf.pages)
# Iterate over every page
for page in range(num_pages):
# Extract the text from the page
page_text = pdf.pages[page].extract_text()
text_list.append(page_text)
text = "\n".join(text_list)
return text
class DocxParser(BaseParser):
"""Docx parser."""
def _init_parser(self) -> Dict:
"""Init parser."""
return {}
def parse_file(self, file: Path, errors: str = "ignore") -> str:
"""Parse file."""
try:
import docx2txt
except ImportError:
raise ValueError("docx2txt is required to read Microsoft Word files.")
text = docx2txt.process(file)
return text

View File

@@ -0,0 +1,43 @@
"""Epub parser.
Contains parsers for epub files.
"""
from pathlib import Path
from typing import Dict
from parser.file.base_parser import BaseParser
class EpubParser(BaseParser):
"""Epub Parser."""
def _init_parser(self) -> Dict:
"""Init parser."""
return {}
def parse_file(self, file: Path, errors: str = "ignore") -> str:
"""Parse file."""
try:
import ebooklib
from ebooklib import epub
except ImportError:
raise ValueError("`EbookLib` is required to read Epub files.")
try:
import html2text
except ImportError:
raise ValueError("`html2text` is required to parse Epub files.")
text_list = []
book = epub.read_epub(file, options={"ignore_ncx": True})
# Iterate through all chapters.
for item in book.get_items():
# Chapters are typically located in epub documents items.
if item.get_type() == ebooklib.ITEM_DOCUMENT:
text_list.append(
html2text.html2text(item.get_content().decode("utf-8"))
)
text = "\n".join(text_list)
return text

View File

@@ -0,0 +1,83 @@
"""HTML parser.
Contains parser for html files.
"""
import re
from pathlib import Path
from typing import Dict, Union
from parser.file.base_parser import BaseParser
class HTMLParser(BaseParser):
"""HTML parser."""
def _init_parser(self) -> Dict:
"""Init parser."""
return {}
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, list[str]]:
"""Parse file.
Returns:
Union[str, List[str]]: a string or a List of strings.
"""
try:
from unstructured.partition.html import partition_html
from unstructured.staging.base import convert_to_isd
from unstructured.cleaners.core import clean
except ImportError:
raise ValueError("unstructured package is required to parse HTML files.")
# Using the unstructured library to convert the html to isd format
# isd sample : isd = [
# {"text": "My Title", "type": "Title"},
# {"text": "My Narrative", "type": "NarrativeText"}
# ]
with open(file, "r", encoding="utf-8") as fp:
elements = partition_html(file=fp)
isd = convert_to_isd(elements)
# Removing non ascii charactwers from isd_el['text']
for isd_el in isd:
isd_el['text'] = isd_el['text'].encode("ascii", "ignore").decode()
# Removing all the \n characters from isd_el['text'] using regex and replace with single space
# Removing all the extra spaces from isd_el['text'] using regex and replace with single space
for isd_el in isd:
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)
# more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
for isd_el in isd:
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)
# Creating a list of all the indexes of isd_el['type'] = 'Title'
title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']
# Creating 'Chunks' - List of lists of strings
# each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
# Each Chunk can be thought of as an individual set of data, which can be sent to the model
# Where Each Title is grouped together with the data under it
Chunks = [[]]
final_chunks = list(list())
for i, isd_el in enumerate(isd):
if i in title_indexes:
Chunks.append([])
Chunks[-1].append(isd_el['text'])
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25
# TODO: This value can be an user defined variable
for chunk in Chunks:
# sum of lenth of all the strings in the chunk
sum = 0
sum += len(str(chunk))
if sum < 25:
Chunks.remove(chunk)
else:
# appending all the approved chunks to final_chunks as a single string
final_chunks.append(" ".join([str(item) for item in chunk]))
return final_chunks

View File

@@ -0,0 +1,145 @@
"""Markdown parser.
Contains parser for md files.
"""
import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast
import tiktoken
from parser.file.base_parser import BaseParser
class MarkdownParser(BaseParser):
"""Markdown parser.
Extract text from markdown files.
Returns dictionary with keys as headers and values as the text between headers.
"""
def __init__(
self,
*args: Any,
remove_hyperlinks: bool = True,
remove_images: bool = True,
max_tokens: int = 2048,
# remove_tables: bool = True,
**kwargs: Any,
) -> None:
"""Init params."""
super().__init__(*args, **kwargs)
self._remove_hyperlinks = remove_hyperlinks
self._remove_images = remove_images
self._max_tokens = max_tokens
# self._remove_tables = remove_tables
def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str],
current_text: str):
"""Append to tups chunk."""
num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(current_text))
if num_tokens > self._max_tokens:
chunks = [current_text[i:i + self._max_tokens] for i in range(0, len(current_text), self._max_tokens)]
for chunk in chunks:
tups.append((current_header, chunk))
else:
tups.append((current_header, current_text))
return tups
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
"""Convert a markdown file to a dictionary.
The keys are the headers and the values are the text under each header.
"""
markdown_tups: List[Tuple[Optional[str], str]] = []
lines = markdown_text.split("\n")
current_header = None
current_text = ""
for line in lines:
header_match = re.match(r"^#+\s", line)
if header_match:
if current_header is not None:
if current_text == "" or None:
continue
markdown_tups = self.tups_chunk_append(markdown_tups, current_header, current_text)
current_header = line
current_text = ""
else:
current_text += line + "\n"
markdown_tups = self.tups_chunk_append(markdown_tups, current_header, current_text)
if current_header is not None:
# pass linting, assert keys are defined
markdown_tups = [
(re.sub(r"#", "", cast(str, key)).strip(), re.sub(r"<.*?>", "", value))
for key, value in markdown_tups
]
else:
markdown_tups = [
(key, re.sub("\n", "", value)) for key, value in markdown_tups
]
return markdown_tups
def remove_images(self, content: str) -> str:
"""Get a dictionary of a markdown file from its path."""
pattern = r"!{1}\[\[(.*)\]\]"
content = re.sub(pattern, "", content)
return content
# def remove_tables(self, content: str) -> List[List[str]]:
# """Convert markdown tables to nested lists."""
# table_rows_pattern = r"((\r?\n){2}|^)([^\r\n]*\|[^\r\n]*(\r?\n)?)+(?=(\r?\n){2}|$)"
# table_cells_pattern = r"([^\|\r\n]*)\|"
#
# table_rows = re.findall(table_rows_pattern, content, re.MULTILINE)
# table_lists = []
# for row in table_rows:
# cells = re.findall(table_cells_pattern, row[2])
# cells = [cell.strip() for cell in cells if cell.strip()]
# table_lists.append(cells)
# return str(table_lists)
def remove_hyperlinks(self, content: str) -> str:
"""Get a dictionary of a markdown file from its path."""
pattern = r"\[(.*?)\]\((.*?)\)"
content = re.sub(pattern, r"\1", content)
return content
def _init_parser(self) -> Dict:
"""Initialize the parser with the config."""
return {}
def parse_tups(
self, filepath: Path, errors: str = "ignore"
) -> List[Tuple[Optional[str], str]]:
"""Parse file into tuples."""
with open(filepath, "r") as f:
content = f.read()
if self._remove_hyperlinks:
content = self.remove_hyperlinks(content)
if self._remove_images:
content = self.remove_images(content)
# if self._remove_tables:
# content = self.remove_tables(content)
markdown_tups = self.markdown_to_tups(content)
return markdown_tups
def parse_file(
self, filepath: Path, errors: str = "ignore"
) -> Union[str, List[str]]:
"""Parse file into string."""
tups = self.parse_tups(filepath, errors=errors)
results = []
# TODO: don't include headers right now
for header, value in tups:
if header is None:
results.append(value)
else:
results.append(f"\n\n{header}\n{value}")
return results

View File

@@ -0,0 +1,173 @@
"""reStructuredText parser.
Contains parser for md files.
"""
import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
from parser.file.base_parser import BaseParser
class RstParser(BaseParser):
"""reStructuredText parser.
Extract text from .rst files.
Returns dictionary with keys as headers and values as the text between headers.
"""
def __init__(
self,
*args: Any,
remove_hyperlinks: bool = True,
remove_images: bool = True,
remove_table_excess: bool = True,
remove_interpreters: bool = True,
remove_directives: bool = True,
remove_whitespaces_excess: bool = True,
# Be carefull with remove_characters_excess, might cause data loss
remove_characters_excess: bool = True,
**kwargs: Any,
) -> None:
"""Init params."""
super().__init__(*args, **kwargs)
self._remove_hyperlinks = remove_hyperlinks
self._remove_images = remove_images
self._remove_table_excess = remove_table_excess
self._remove_interpreters = remove_interpreters
self._remove_directives = remove_directives
self._remove_whitespaces_excess = remove_whitespaces_excess
self._remove_characters_excess = remove_characters_excess
def rst_to_tups(self, rst_text: str) -> List[Tuple[Optional[str], str]]:
"""Convert a reStructuredText file to a dictionary.
The keys are the headers and the values are the text under each header.
"""
rst_tups: List[Tuple[Optional[str], str]] = []
lines = rst_text.split("\n")
current_header = None
current_text = ""
for i, line in enumerate(lines):
header_match = re.match(r"^[^\S\n]*[-=]+[^\S\n]*$", line)
if header_match and i > 0 and (
len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
if current_header is not None:
if current_text == "" or None:
continue
# removes the next heading from current Document
if current_text.endswith(lines[i - 1] + "\n"):
current_text = current_text[:len(current_text) - len(lines[i - 1] + "\n")]
rst_tups.append((current_header, current_text))
current_header = lines[i - 1]
current_text = ""
else:
current_text += line + "\n"
rst_tups.append((current_header, current_text))
# TODO: Format for rst
#
# if current_header is not None:
# # pass linting, assert keys are defined
# rst_tups = [
# (re.sub(r"#", "", cast(str, key)).strip(), re.sub(r"<.*?>", "", value))
# for key, value in rst_tups
# ]
# else:
# rst_tups = [
# (key, re.sub("\n", "", value)) for key, value in rst_tups
# ]
if current_header is None:
rst_tups = [
(key, re.sub("\n", "", value)) for key, value in rst_tups
]
return rst_tups
def remove_images(self, content: str) -> str:
pattern = r"\.\. image:: (.*)"
content = re.sub(pattern, "", content)
return content
def remove_hyperlinks(self, content: str) -> str:
pattern = r"`(.*?) <(.*?)>`_"
content = re.sub(pattern, r"\1", content)
return content
def remove_directives(self, content: str) -> str:
"""Removes reStructuredText Directives"""
pattern = r"`\.\.([^:]+)::"
content = re.sub(pattern, "", content)
return content
def remove_interpreters(self, content: str) -> str:
"""Removes reStructuredText Interpreted Text Roles"""
pattern = r":(\w+):"
content = re.sub(pattern, "", content)
return content
def remove_table_excess(self, content: str) -> str:
"""Pattern to remove grid table separators"""
pattern = r"^\+[-]+\+[-]+\+$"
content = re.sub(pattern, "", content, flags=re.MULTILINE)
return content
def remove_whitespaces_excess(self, content: List[Tuple[str, Any]]) -> List[Tuple[str, Any]]:
"""Pattern to match 2 or more consecutive whitespaces"""
pattern = r"\s{2,}"
content = [(key, re.sub(pattern, " ", value)) for key, value in content]
return content
def remove_characters_excess(self, content: List[Tuple[str, Any]]) -> List[Tuple[str, Any]]:
"""Pattern to match 2 or more consecutive characters"""
pattern = r"(\S)\1{2,}"
content = [(key, re.sub(pattern, r"\1\1\1", value, flags=re.MULTILINE)) for key, value in content]
return content
def _init_parser(self) -> Dict:
"""Initialize the parser with the config."""
return {}
def parse_tups(
self, filepath: Path, errors: str = "ignore"
) -> List[Tuple[Optional[str], str]]:
"""Parse file into tuples."""
with open(filepath, "r") as f:
content = f.read()
if self._remove_hyperlinks:
content = self.remove_hyperlinks(content)
if self._remove_images:
content = self.remove_images(content)
if self._remove_table_excess:
content = self.remove_table_excess(content)
if self._remove_directives:
content = self.remove_directives(content)
if self._remove_interpreters:
content = self.remove_interpreters(content)
rst_tups = self.rst_to_tups(content)
if self._remove_whitespaces_excess:
rst_tups = self.remove_whitespaces_excess(rst_tups)
if self._remove_characters_excess:
rst_tups = self.remove_characters_excess(rst_tups)
return rst_tups
def parse_file(
self, filepath: Path, errors: str = "ignore"
) -> Union[str, List[str]]:
"""Parse file into string."""
tups = self.parse_tups(filepath, errors=errors)
results = []
# TODO: don't include headers right now
for header, value in tups:
if header is None:
results.append(value)
else:
results.append(f"\n\n{header}\n{value}")
return results

View File

@@ -0,0 +1,115 @@
"""Tabular parser.
Contains parsers for tabular data files.
"""
from pathlib import Path
from typing import Any, Dict, List, Union
from parser.file.base_parser import BaseParser
class CSVParser(BaseParser):
"""CSV parser.
Args:
concat_rows (bool): whether to concatenate all rows into one document.
If set to False, a Document will be created for each row.
True by default.
"""
def __init__(self, *args: Any, concat_rows: bool = True, **kwargs: Any) -> None:
"""Init params."""
super().__init__(*args, **kwargs)
self._concat_rows = concat_rows
def _init_parser(self) -> Dict:
"""Init parser."""
return {}
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
"""Parse file.
Returns:
Union[str, List[str]]: a string or a List of strings.
"""
try:
import csv
except ImportError:
raise ValueError("csv module is required to read CSV files.")
text_list = []
with open(file, "r") as fp:
csv_reader = csv.reader(fp)
for row in csv_reader:
text_list.append(", ".join(row))
if self._concat_rows:
return "\n".join(text_list)
else:
return text_list
class PandasCSVParser(BaseParser):
r"""Pandas-based CSV parser.
Parses CSVs using the separator detection from Pandas `read_csv`function.
If special parameters are required, use the `pandas_config` dict.
Args:
concat_rows (bool): whether to concatenate all rows into one document.
If set to False, a Document will be created for each row.
True by default.
col_joiner (str): Separator to use for joining cols per row.
Set to ", " by default.
row_joiner (str): Separator to use for joining each row.
Only used when `concat_rows=True`.
Set to "\n" by default.
pandas_config (dict): Options for the `pandas.read_csv` function call.
Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
for more information.
Set to empty dict by default, this means pandas will try to figure
out the separators, table head, etc. on its own.
"""
def __init__(
self,
*args: Any,
concat_rows: bool = True,
col_joiner: str = ", ",
row_joiner: str = "\n",
pandas_config: dict = {},
**kwargs: Any
) -> None:
"""Init params."""
super().__init__(*args, **kwargs)
self._concat_rows = concat_rows
self._col_joiner = col_joiner
self._row_joiner = row_joiner
self._pandas_config = pandas_config
def _init_parser(self) -> Dict:
"""Init parser."""
return {}
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
"""Parse file."""
try:
import pandas as pd
except ImportError:
raise ValueError("pandas module is required to read CSV files.")
df = pd.read_csv(file, **self._pandas_config)
text_list = df.apply(
lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1
).tolist()
if self._concat_rows:
return (self._row_joiner).join(text_list)
else:
return text_list

View File

@@ -0,0 +1,66 @@
import os
import javalang
def find_files(directory):
files_list = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.java'):
files_list.append(os.path.join(root, file))
return files_list
def extract_functions(file_path):
with open(file_path, "r") as file:
java_code = file.read()
methods = {}
tree = javalang.parse.parse(java_code)
for _, node in tree.filter(javalang.tree.MethodDeclaration):
method_name = node.name
start_line = node.position.line - 1
end_line = start_line
brace_count = 0
for line in java_code.splitlines()[start_line:]:
end_line += 1
brace_count += line.count("{") - line.count("}")
if brace_count == 0:
break
method_source_code = "\n".join(java_code.splitlines()[start_line:end_line])
methods[method_name] = method_source_code
return methods
def extract_classes(file_path):
with open(file_path, 'r') as file:
source_code = file.read()
classes = {}
tree = javalang.parse.parse(source_code)
for class_decl in tree.types:
class_name = class_decl.name
declarations = []
methods = []
for field_decl in class_decl.fields:
field_name = field_decl.declarators[0].name
field_type = field_decl.type.name
declarations.append(f"{field_type} {field_name}")
for method_decl in class_decl.methods:
methods.append(method_decl.name)
class_string = "Declarations: " + ", ".join(declarations) + "\n Method name: " + ", ".join(methods)
classes[class_name] = class_string
return classes
def extract_functions_and_classes(directory):
files = find_files(directory)
functions_dict = {}
classes_dict = {}
for file in files:
functions = extract_functions(file)
if functions:
functions_dict[file] = functions
classes = extract_classes(file)
if classes:
classes_dict[file] = classes
return functions_dict, classes_dict

View File

@@ -0,0 +1,70 @@
import os
import escodegen
import esprima
def find_files(directory):
files_list = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.js'):
files_list.append(os.path.join(root, file))
return files_list
def extract_functions(file_path):
with open(file_path, 'r') as file:
source_code = file.read()
functions = {}
tree = esprima.parseScript(source_code)
for node in tree.body:
if node.type == 'FunctionDeclaration':
func_name = node.id.name if node.id else '<anonymous>'
functions[func_name] = escodegen.generate(node)
elif node.type == 'VariableDeclaration':
for declaration in node.declarations:
if declaration.init and declaration.init.type == 'FunctionExpression':
func_name = declaration.id.name if declaration.id else '<anonymous>'
functions[func_name] = escodegen.generate(declaration.init)
elif node.type == 'ClassDeclaration':
for subnode in node.body.body:
if subnode.type == 'MethodDefinition':
func_name = subnode.key.name
functions[func_name] = escodegen.generate(subnode.value)
elif subnode.type == 'VariableDeclaration':
for declaration in subnode.declarations:
if declaration.init and declaration.init.type == 'FunctionExpression':
func_name = declaration.id.name if declaration.id else '<anonymous>'
functions[func_name] = escodegen.generate(declaration.init)
return functions
def extract_classes(file_path):
with open(file_path, 'r') as file:
source_code = file.read()
classes = {}
tree = esprima.parseScript(source_code)
for node in tree.body:
if node.type == 'ClassDeclaration':
class_name = node.id.name
function_names = []
for subnode in node.body.body:
if subnode.type == 'MethodDefinition':
function_names.append(subnode.key.name)
classes[class_name] = ", ".join(function_names)
return classes
def extract_functions_and_classes(directory):
files = find_files(directory)
functions_dict = {}
classes_dict = {}
for file in files:
functions = extract_functions(file)
if functions:
functions_dict[file] = functions
classes = extract_classes(file)
if classes:
classes_dict[file] = classes
return functions_dict, classes_dict

View File

@@ -0,0 +1,82 @@
import os
import tiktoken
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from retry import retry
# from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.embeddings import HuggingFaceInstructEmbeddings
# from langchain.embeddings import CohereEmbeddings
def num_tokens_from_string(string: str, encoding_name: str) -> int:
# Function to convert string to tokens and estimate user cost.
encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
total_price = ((num_tokens / 1000) * 0.0004)
return num_tokens, total_price
@retry(tries=10, delay=60)
def store_add_texts_with_retry(store, i):
store.add_texts([i.page_content], metadatas=[i.metadata])
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
def call_openai_api(docs, folder_name, task_status):
# Function to create a vector store from the documents and save it to disk.
# create output folder if it doesn't exist
if not os.path.exists(f"{folder_name}"):
os.makedirs(f"{folder_name}")
from tqdm import tqdm
docs_test = [docs[0]]
docs.pop(0)
c1 = 0
store = FAISS.from_documents(docs_test, OpenAIEmbeddings(openai_api_key=os.getenv("EMBEDDINGS_KEY")))
# Uncomment for MPNet embeddings
# model_name = "sentence-transformers/all-mpnet-base-v2"
# hf = HuggingFaceEmbeddings(model_name=model_name)
# store = FAISS.from_documents(docs_test, hf)
s1 = len(docs)
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs),
bar_format='{l_bar}{bar}| Time Left: {remaining}'):
try:
task_status.update_state(state='PROGRESS', meta={'current': int((c1 / s1) * 100)})
store_add_texts_with_retry(store, i)
except Exception as e:
print(e)
print("Error on ", i)
print("Saving progress")
print(f"stopped at {c1} out of {len(docs)}")
store.save_local(f"{folder_name}")
break
c1 += 1
store.save_local(f"{folder_name}")
def get_user_permission(docs, folder_name):
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
# docs_content = (" ".join(docs))
docs_content = ""
for doc in docs:
docs_content += doc.page_content
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
print(f"Number of Tokens = {format(tokens, ',d')}")
print(f"Approx Cost = ${format(total_price, ',.2f')}")
# Here we check for user permission before calling the API.
user_input = input("Price Okay? (Y/N) \n").lower()
if user_input == "y":
call_openai_api(docs, folder_name)
elif user_input == "":
call_openai_api(docs, folder_name)
else:
print("The API was not called. No money was spent.")

View File

@@ -0,0 +1,121 @@
import ast
import os
from pathlib import Path
import tiktoken
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
def find_files(directory):
files_list = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.py'):
files_list.append(os.path.join(root, file))
return files_list
def extract_functions(file_path):
with open(file_path, 'r') as file:
source_code = file.read()
functions = {}
tree = ast.parse(source_code)
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef):
func_name = node.name
func_def = ast.get_source_segment(source_code, node)
functions[func_name] = func_def
return functions
def extract_classes(file_path):
with open(file_path, 'r') as file:
source_code = file.read()
classes = {}
tree = ast.parse(source_code)
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef):
class_name = node.name
function_names = []
for subnode in ast.walk(node):
if isinstance(subnode, ast.FunctionDef):
function_names.append(subnode.name)
classes[class_name] = ", ".join(function_names)
return classes
def extract_functions_and_classes(directory):
files = find_files(directory)
functions_dict = {}
classes_dict = {}
for file in files:
functions = extract_functions(file)
if functions:
functions_dict[file] = functions
classes = extract_classes(file)
if classes:
classes_dict[file] = classes
return functions_dict, classes_dict
def parse_functions(functions_dict, formats, dir):
c1 = len(functions_dict)
for i, (source, functions) in enumerate(functions_dict.items(), start=1):
print(f"Processing file {i}/{c1}")
source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
subfolders = "/".join(source_w.split("/")[:-1])
Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
for j, (name, function) in enumerate(functions.items(), start=1):
print(f"Processing function {j}/{len(functions)}")
prompt = PromptTemplate(
input_variables=["code"],
template="Code: \n{code}, \nDocumentation: ",
)
llm = OpenAI(temperature=0)
response = llm(prompt.format(code=function))
mode = "a" if Path(f"outputs/{source_w}").exists() else "w"
with open(f"outputs/{source_w}", mode) as f:
f.write(
f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
def parse_classes(classes_dict, formats, dir):
c1 = len(classes_dict)
for i, (source, classes) in enumerate(classes_dict.items()):
print(f"Processing file {i + 1}/{c1}")
source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
subfolders = "/".join(source_w.split("/")[:-1])
Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
for name, function_names in classes.items():
print(f"Processing Class {i + 1}/{c1}")
prompt = PromptTemplate(
input_variables=["class_name", "functions_names"],
template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
)
llm = OpenAI(temperature=0)
response = llm(prompt.format(class_name=name, functions_names=function_names))
with open(f"outputs/{source_w}", "a" if Path(f"outputs/{source_w}").exists() else "w") as f:
f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
def transform_to_docs(functions_dict, classes_dict, formats, dir):
docs_content = ''.join([str(key) + str(value) for key, value in functions_dict.items()])
docs_content += ''.join([str(key) + str(value) for key, value in classes_dict.items()])
num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(docs_content))
total_price = ((num_tokens / 1000) * 0.02)
print(f"Number of Tokens = {num_tokens:,d}")
print(f"Approx Cost = ${total_price:,.2f}")
user_input = input("Price Okay? (Y/N)\n").lower()
if user_input == "y" or user_input == "":
if not Path("outputs").exists():
Path("outputs").mkdir()
parse_functions(functions_dict, formats, dir)
parse_classes(classes_dict, formats, dir)
print("All done!")
else:
print("The API was not called. No money was spent.")

View File

@@ -0,0 +1,34 @@
"""Base schema for readers."""
from dataclasses import dataclass
from langchain.docstore.document import Document as LCDocument
from parser.schema.schema import BaseDocument
@dataclass
class Document(BaseDocument):
"""Generic interface for a data document.
This document connects to data sources.
"""
def __post_init__(self) -> None:
"""Post init."""
if self.text is None:
raise ValueError("text field not set.")
@classmethod
def get_type(cls) -> str:
"""Get Document type."""
return "Document"
def to_langchain_format(self) -> LCDocument:
"""Convert struct to LangChain document format."""
metadata = self.extra_info or {}
return LCDocument(page_content=self.text, metadata=metadata)
@classmethod
def from_langchain_format(cls, doc: LCDocument) -> "Document":
"""Convert struct from LangChain document format."""
return cls(text=doc.page_content, extra_info=doc.metadata)

View File

@@ -0,0 +1,64 @@
"""Base schema for data structures."""
from abc import abstractmethod
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
from dataclasses_json import DataClassJsonMixin
@dataclass
class BaseDocument(DataClassJsonMixin):
"""Base document.
Generic abstract interfaces that captures both index structs
as well as documents.
"""
# TODO: consolidate fields from Document/IndexStruct into base class
text: Optional[str] = None
doc_id: Optional[str] = None
embedding: Optional[List[float]] = None
# extra fields
extra_info: Optional[Dict[str, Any]] = None
@classmethod
@abstractmethod
def get_type(cls) -> str:
"""Get Document type."""
def get_text(self) -> str:
"""Get text."""
if self.text is None:
raise ValueError("text field not set.")
return self.text
def get_doc_id(self) -> str:
"""Get doc_id."""
if self.doc_id is None:
raise ValueError("doc_id not set.")
return self.doc_id
@property
def is_doc_id_none(self) -> bool:
"""Check if doc_id is None."""
return self.doc_id is None
def get_embedding(self) -> List[float]:
"""Get embedding.
Errors if embedding is None.
"""
if self.embedding is None:
raise ValueError("embedding not set.")
return self.embedding
@property
def extra_info_str(self) -> Optional[str]:
"""Extra info string."""
if self.extra_info is None:
return None
return "\n".join([f"{k}: {str(v)}" for k, v in self.extra_info.items()])

View File

@@ -0,0 +1,74 @@
import re
from math import ceil
from typing import List
import tiktoken
from parser.schema.base import Document
def separate_header_and_body(text):
header_pattern = r"^(.*?\n){3}"
match = re.match(header_pattern, text)
header = match.group(0)
body = text[len(header):]
return header, body
def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) -> List[Document]:
docs = []
current_group = None
for doc in documents:
doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
if current_group is None:
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
extra_info=doc.extra_info)
elif len(tiktoken.get_encoding("cl100k_base").encode(
current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
current_group.text += " " + doc.text
else:
docs.append(current_group)
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
extra_info=doc.extra_info)
if current_group is not None:
docs.append(current_group)
return docs
def split_documents(documents: List[Document], max_tokens: int) -> List[Document]:
docs = []
for doc in documents:
token_length = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
if token_length <= max_tokens:
docs.append(doc)
else:
header, body = separate_header_and_body(doc.text)
num_body_parts = ceil(token_length / max_tokens)
part_length = ceil(len(body) / num_body_parts)
body_parts = [body[i:i + part_length] for i in range(0, len(body), part_length)]
for i, body_part in enumerate(body_parts):
new_doc = Document(text=header + body_part.strip(),
doc_id=f"{doc.doc_id}-{i}",
embedding=doc.embedding,
extra_info=doc.extra_info)
docs.append(new_doc)
return docs
def group_split(documents: List[Document], max_tokens: int = 2000, min_tokens: int = 150, token_check: bool = True):
if not token_check:
return documents
print("Grouping small documents")
try:
documents = group_documents(documents=documents, min_tokens=min_tokens, max_tokens=max_tokens)
except Exception:
print("Grouping failed, try running without token_check")
print("Separating large documents")
try:
documents = split_documents(documents=documents, max_tokens=max_tokens)
except Exception:
print("Grouping failed, try running without token_check")
return documents

View File

@@ -0,0 +1,9 @@
You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible.
Use the following pieces of context to help answer the users question. If its not relevant to the question, provide friendly responses.
You have access to chat history, and can use it to help answer the question.
When using code examples, use the following format:
```(language)
(code)
```
----------------
{summaries}

View File

@@ -0,0 +1,3 @@
Use the following pieces of context to help answer the users question. If its not relevant to the question, respond with "-"
----------------
{context}

View File

@@ -1,126 +1,106 @@
aiodns==3.0.0
aiohttp==3.8.3
aiohttp==3.8.4
aiohttp-retry==2.8.3
aiosignal==1.3.1
alabaster==0.7.13
aleph-alpha-client==2.16.0
anyio==3.6.2
argilla==1.3.0
aleph-alpha-client==2.16.1
amqp==5.1.1
async-timeout==4.0.2
attrs==22.2.0
Babel==2.11.0
backoff==2.2.1
billiard==3.6.4.0
blobfile==2.0.1
boto3==1.26.82
botocore==1.29.82
boto3==1.26.102
botocore==1.29.102
cffi==1.15.1
charset-normalizer==2.1.1
charset-normalizer==3.1.0
click==8.1.3
cohere==3.4.0
click-didyoumean==0.3.0
click-plugins==1.1.1
click-repl==0.2.0
cryptography==39.0.2
dataclasses-json==0.5.7
decorator==5.1.1
deeplake==3.2.12
Deprecated==1.2.13
deeplake==3.2.13
dill==0.3.6
docutils==0.19
docx2txt==0.8
dnspython==2.3.0
ecdsa==0.18.0
entrypoints==0.4
escodegen==1.0.10
esprima==4.0.1
esutils==1.0.1
et-xmlfile==1.1.0
faiss-cpu==1.7.3
filelock==3.9.0
Flask==2.2.2
Flask==2.2.3
Flask-Cors==3.0.10
frozenlist==1.3.3
geojson==2.5.0
greenlet==2.0.2
gunicorn==20.1.0
h11==0.14.0
httpcore==0.16.3
httpx==0.23.3
gpt4all==0.1.7
hub==3.0.1
huggingface-hub==0.12.0
huggingface-hub==0.12.1
humbug==0.2.8
idna==3.4
imagesize==1.4.1
itsdangerous==2.1.2
javalang==0.13.0
Jinja2==3.1.2
jmespath==1.0.1
joblib==1.2.0
langchain==0.0.98
kombu==5.2.4
langchain==0.0.179
loguru==0.6.0
lxml==4.9.2
manifest-ml==0.1.1
MarkupSafe==2.1.2
marshmallow==3.19.0
marshmallow-enum==1.5.1
monotonic==1.6
mpmath==1.3.0
multidict==6.0.4
multiprocess==0.70.14
mypy-extensions==0.4.3
mypy-extensions==1.0.0
networkx==3.0
nltk==3.8.1
numcodecs==0.11.0
numpy==1.23.5
numpy==1.24.2
openai==0.27.0
openpyxl==3.1.1
packaging==23.0
pandas==1.5.3
pathos==0.3.0
Pillow==9.4.0
pox==0.3.2
ppft==1.7.6.6
prompt-toolkit==3.0.38
py==1.11.0
pyasn1==0.4.8
pycares==4.3.0
pycparser==2.21
pycryptodomex==3.17
pydantic==1.10.4
Pygments==2.14.0
pydantic==1.10.5
PyJWT==2.6.0
pymongo==4.3.3
pyowm==3.3.0
PyPDF2==3.0.1
PySocks==1.7.1
python-dateutil==2.8.2
python-docx==0.8.11
python-dotenv==0.21.1
python-magic==0.4.27
python-pptx==0.6.21
python-dotenv==1.0.0
python-jose==3.3.0
pytz==2022.7.1
PyYAML==6.0
redis==4.5.1
redis==4.5.4
regex==2022.10.31
requests==2.28.2
retry==0.9.2
rfc3986==1.5.0
rsa==4.9
s3transfer==0.6.0
scikit-learn==1.2.1
scipy==1.10.0
scikit-learn==1.2.2
scipy==1.10.1
sentence-transformers==2.2.2
sentencepiece==0.1.97
six==1.16.0
sniffio==1.3.0
snowballstemmer==2.2.0
Sphinx==6.1.3
sphinxcontrib-applehelp==1.0.4
sphinxcontrib-devhelp==1.0.2
sphinxcontrib-htmlhelp==2.0.1
sphinxcontrib-jsmath==1.0.1
sphinxcontrib-qthelp==1.0.3
sphinxcontrib-serializinghtml==1.1.5
SQLAlchemy==1.4.46
sqlitedict==2.1.0
tenacity==8.2.1
sympy==1.11.1
tenacity==8.2.2
threadpoolctl==3.1.0
tiktoken==0.1.2
tokenizers==0.13.2
torch==1.13.1
torchvision==0.14.1
tqdm==4.64.1
transformers==4.26.0
torch==2.0.0
torchvision==0.15.1
tqdm==4.65.0
transformers==4.27.2
typer==0.7.0
typing-inspect==0.8.0
typing_extensions==4.4.0
unstructured==0.4.11
typing_extensions==4.5.0
urllib3==1.26.14
Werkzeug==2.2.3
wrapt==1.14.1
XlsxWriter==3.0.8
xxhash==3.2.0
vine==5.0.0
wcwidth==0.2.6
yarl==1.8.2

View File

@@ -525,6 +525,10 @@ video {
position: absolute;
}
.relative {
position: relative;
}
.inset-0 {
top: 0px;
right: 0px;
@@ -604,6 +608,10 @@ video {
min-height: 100vh;
}
.w-auto {
width: auto;
}
.w-full {
width: 100%;
}
@@ -648,12 +656,16 @@ video {
overflow-y: auto;
}
.rounded {
border-radius: 0.25rem;
}
.rounded-lg {
border-radius: 0.5rem;
}
.rounded {
border-radius: 0.25rem;
.rounded-md {
border-radius: 0.375rem;
}
.border {
@@ -723,6 +735,11 @@ video {
padding-bottom: 0.5rem;
}
.py-4 {
padding-top: 1rem;
padding-bottom: 1rem;
}
.pt-4 {
padding-top: 1rem;
}
@@ -761,6 +778,11 @@ video {
line-height: 1.25rem;
}
.text-xl {
font-size: 1.25rem;
line-height: 1.75rem;
}
.font-medium {
font-weight: 500;
}
@@ -842,6 +864,11 @@ video {
}
}
.hover\:bg-blue-600:hover {
--tw-bg-opacity: 1;
background-color: rgb(37 99 235 / var(--tw-bg-opacity));
}
.hover\:bg-blue-700:hover {
--tw-bg-opacity: 1;
background-color: rgb(29 78 216 / var(--tw-bg-opacity));
@@ -862,11 +889,26 @@ video {
border-color: rgb(59 130 246 / var(--tw-border-opacity));
}
.focus\:outline-none:focus {
outline: 2px solid transparent;
outline-offset: 2px;
}
.focus\:ring-2:focus {
--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(2px + var(--tw-ring-offset-width)) var(--tw-ring-color);
box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
}
.focus\:ring-blue-500:focus {
--tw-ring-opacity: 1;
--tw-ring-color: rgb(59 130 246 / var(--tw-ring-opacity));
}
.focus\:ring-offset-2:focus {
--tw-ring-offset-width: 2px;
}
@media (min-width: 640px) {
.sm\:my-8 {
margin-top: 2rem;
@@ -881,6 +923,10 @@ video {
display: inline-block;
}
.sm\:inline {
display: inline;
}
.sm\:h-screen {
height: 100vh;
}

View File

@@ -71,4 +71,6 @@ function submitForm(event){
});
}
window.addEventListener('submit',submitForm)
//window.addEventListener('submit',submitForm)
// rewrite using id = button-submit
document.getElementById("button-submit").addEventListener('click',submitForm)

View File

@@ -86,6 +86,19 @@ This will return a new DataFrame with all the columns from both tables, and only
<option selected value="default">Choose documentation</option>
<option value="default">Default</option>
</select>
<form action="/api/upload" method="post" enctype="multipart/form-data" class="mt-2">
<input type="file" name="file" class="py-4" id="file-upload">
<input type="text" name="user" value="local" hidden>
<input type="text" name="name" placeholder="Name:">
<button type="submit" class="py-2 px-4 text-white bg-blue-500 rounded-md hover:bg-blue-600 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500">
Upload
</button>
</form>
</div>
</div>
@@ -130,7 +143,7 @@ This will return a new DataFrame with all the columns from both tables, and only
function docsIndex() {
// loads latest index from https://raw.githubusercontent.com/arc53/DocsHUB/main/combined.json
// and stores it in localStorage
fetch('https://d3dg1063dc54p9.cloudfront.net/combined.json')
fetch('/api/combine')
.then(response => response.json())
.then(data => {
localStorage.setItem("docsIndex", JSON.stringify(data));
@@ -150,19 +163,26 @@ This will return a new DataFrame with all the columns from both tables, and only
// create option for each key in docsIndex
for (var key in docsIndex) {
var option = document.createElement("option");
if (docsIndex[key].name == docsIndex[key].language) {
option.text = docsIndex[key].name + " " + docsIndex[key].version;
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
if (docsIndex[key].model == "{{ embeddings_choice }}") {
select.add(option);
if (docsIndex[key].location == 'docshub'){
if (docsIndex[key].name == docsIndex[key].language) {
option.text = docsIndex[key].name + " " + docsIndex[key].version;
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
if (docsIndex[key].model == "{{ embeddings_choice }}") {
select.add(option);
}
}
else {
option.text = docsIndex[key].name + " " + docsIndex[key].version;
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
if (docsIndex[key].model == "{{ embeddings_choice }}") {
select.add(option);
}
}
}
else {
option.text = docsIndex[key].name + " " + docsIndex[key].version;
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
if (docsIndex[key].model == "{{ embeddings_choice }}") {
select.add(option);
}
option.text = docsIndex[key].name;
option.value = docsIndex[key].location + "/" + docsIndex[key].name;
select.add(option);
}
}

94
application/worker.py Normal file
View File

@@ -0,0 +1,94 @@
import os
import shutil
import string
import zipfile
from urllib.parse import urljoin
import nltk
import requests
from core.settings import settings
from parser.file.bulk import SimpleDirectoryReader
from parser.open_ai_func import call_openai_api
from parser.schema.base import Document
from parser.token_func import group_split
try:
nltk.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
except FileExistsError:
pass
def metadata_from_filename(title):
return {'title': title}
def generate_random_string(length):
return ''.join([string.ascii_letters[i % 52] for i in range(length)])
def ingest_worker(self, directory, formats, name_job, filename, user):
# directory = 'inputs' or 'temp'
# formats = [".rst", ".md"]
input_files = None
recursive = True
limit = None
exclude = True
# name_job = 'job1'
# filename = 'install.rst'
# user = 'local'
sample = False
token_check = True
min_tokens = 150
max_tokens = 1250
full_path = directory + '/' + user + '/' + name_job
# check if API_URL env variable is set
file_data = {'name': name_job, 'file': filename, 'user': user}
response = requests.get(urljoin(settings.API_URL, "/api/download"), params=file_data)
file = response.content
if not os.path.exists(full_path):
os.makedirs(full_path)
with open(full_path + '/' + filename, 'wb') as f:
f.write(file)
# check if file is .zip and extract it
if filename.endswith('.zip'):
with zipfile.ZipFile(full_path + '/' + filename, 'r') as zip_ref:
zip_ref.extractall(full_path)
os.remove(full_path + '/' + filename)
self.update_state(state='PROGRESS', meta={'current': 1})
raw_docs = SimpleDirectoryReader(input_dir=full_path, input_files=input_files, recursive=recursive,
required_exts=formats, num_files_limit=limit,
exclude_hidden=exclude, file_metadata=metadata_from_filename).load_data()
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
call_openai_api(docs, full_path, self)
self.update_state(state='PROGRESS', meta={'current': 100})
if sample:
for i in range(min(5, len(raw_docs))):
print(raw_docs[i].text)
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
# and send them to the server (provide user and name in form)
file_data = {'name': name_job, 'user': user}
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
'file_pkl': open(full_path + '/index.pkl', 'rb')}
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path="))
# delete local
shutil.rmtree(full_path)
return {
'directory': directory,
'formats': formats,
'name_job': name_job,
'filename': filename,
'user': user,
'limited': False
}

View File

@@ -1,4 +1,4 @@
from app import app
if __name__ == "__main__":
app.run()
app.run(debug=True, port=5001)

20
docker-compose-dev.yaml Normal file
View File

@@ -0,0 +1,20 @@
version: "3.9"
services:
redis:
image: redis:6-alpine
ports:
- 6379:6379
mongo:
image: mongo:6
ports:
- 27017:27017
volumes:
- mongodb_data_container:/data/db
volumes:
mongodb_data_container:

View File

@@ -4,12 +4,58 @@ services:
frontend:
build: ./frontend
environment:
- API_HOST=http://backend:5001
- VITE_API_HOST=http://localhost:5001
- VITE_API_STREAMING=$VITE_API_STREAMING
ports:
- "5173:5173"
depends_on:
- backend
backend:
build: ./application
environment:
- API_KEY=$OPENAI_API_KEY
- EMBEDDINGS_KEY=$OPENAI_API_KEY
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/1
- MONGO_URI=mongodb://mongo:27017/docsgpt
ports:
- "5001:5001"
volumes:
- ./application/indexes:/app/indexes
- ./application/inputs:/app/inputs
- ./application/vectors:/app/vectors
depends_on:
- redis
- mongo
worker:
build: ./application
command: celery -A app.celery worker -l INFO
environment:
- API_KEY=$OPENAI_API_KEY
- EMBEDDINGS_KEY=$OPENAI_API_KEY
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/1
- MONGO_URI=mongodb://mongo:27017/docsgpt
- API_URL=http://backend:5001
depends_on:
- redis
- mongo
redis:
image: redis:6-alpine
ports:
- 6379:6379
mongo:
image: mongo:6
ports:
- 27017:27017
volumes:
- mongodb_data_container:/data/db
volumes:
mongodb_data_container:

View File

@@ -0,0 +1,6 @@
docsgpt_url=<docsgpt_api_url>
chatwoot_url=<chatwoot_url>
docsgpt_key=<openai_api_key or other llm>
chatwoot_token=xxxxx
account_id=(optional) 1
assignee_id=(optional) 1

View File

@@ -1,13 +1,18 @@
import requests
import dotenv
import os
import json
import pprint
import dotenv
import requests
from flask import Flask, request
dotenv.load_dotenv()
docsgpt_url = os.getenv("docsgpt_url")
chatwoot_url = os.getenv("chatwoot_url")
docsgpt_key = os.getenv("docsgpt_key")
chatwoot_token = os.getenv("chatwoot_token")
# account_id = os.getenv("account_id")
# assignee_id = os.getenv("assignee_id")
label_stop = "human-requested"
def send_to_bot(sender, message):
@@ -40,38 +45,45 @@ def send_to_chatwoot(account, conversation, message):
return r.json()
from flask import Flask, request
app = Flask(__name__)
@app.route('/docsgpt', methods=['POST'])
def docsgpt():
data = request.get_json()
message_type = data['message_type']
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(data)
try:
message_type = data['message_type']
except KeyError:
return "Not a message"
message = data['content']
conversation = data['conversation']['id']
contact = data['sender']['id']
account = data['account']['id']
assignee = data['conversation']['meta']['assignee']['id']
print(account)
print(label_stop)
print(data['conversation']['labels'])
print(assignee)
if(message_type == "incoming"):
if label_stop in data['conversation']['labels']:
return "Label stop"
# elif str(account) != str(account_id):
# return "Not the right account"
# elif str(assignee) != str(assignee_id):
# return "Not the right assignee"
if (message_type == "incoming"):
bot_response = send_to_bot(contact, message)
create_message = send_to_chatwoot(
account, conversation, bot_response)
response = requests.post(
url="https://86x89umx77.execute-api.eu-west-2.amazonaws.com/docsgpt-logs",
headers={
"Content-Type": "application/json; charset=utf-8",
},
data=json.dumps({
"answer": str(bot_response),
"question": str(message),
"source": "chatwoot"
})
)
else:
return "Not an incoming message"
return create_message
if __name__ == '__main__':
app.run(debug=True, host='0.0.0.0')
app.run(host='0.0.0.0', port=80)

678
extensions/chrome/dist/output.css vendored Normal file
View File

@@ -0,0 +1,678 @@
/*
! tailwindcss v3.2.7 | MIT License | https://tailwindcss.com
*/
/*
1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)
2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)
*/
*,
::before,
::after {
box-sizing: border-box;
/* 1 */
border-width: 0;
/* 2 */
border-style: solid;
/* 2 */
border-color: #e5e7eb;
/* 2 */
}
::before,
::after {
--tw-content: '';
}
/*
1. Use a consistent sensible line-height in all browsers.
2. Prevent adjustments of font size after orientation changes in iOS.
3. Use a more readable tab size.
4. Use the user's configured `sans` font-family by default.
5. Use the user's configured `sans` font-feature-settings by default.
*/
html {
line-height: 1.5;
/* 1 */
-webkit-text-size-adjust: 100%;
/* 2 */
-moz-tab-size: 4;
/* 3 */
-o-tab-size: 4;
tab-size: 4;
/* 3 */
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
/* 4 */
font-feature-settings: normal;
/* 5 */
}
/*
1. Remove the margin in all browsers.
2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.
*/
body {
margin: 0;
/* 1 */
line-height: inherit;
/* 2 */
}
/*
1. Add the correct height in Firefox.
2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)
3. Ensure horizontal rules are visible by default.
*/
hr {
height: 0;
/* 1 */
color: inherit;
/* 2 */
border-top-width: 1px;
/* 3 */
}
/*
Add the correct text decoration in Chrome, Edge, and Safari.
*/
abbr:where([title]) {
-webkit-text-decoration: underline dotted;
text-decoration: underline dotted;
}
/*
Remove the default font size and weight for headings.
*/
h1,
h2,
h3,
h4,
h5,
h6 {
font-size: inherit;
font-weight: inherit;
}
/*
Reset links to optimize for opt-in styling instead of opt-out.
*/
a {
color: inherit;
text-decoration: inherit;
}
/*
Add the correct font weight in Edge and Safari.
*/
b,
strong {
font-weight: bolder;
}
/*
1. Use the user's configured `mono` font family by default.
2. Correct the odd `em` font sizing in all browsers.
*/
code,
kbd,
samp,
pre {
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
/* 1 */
font-size: 1em;
/* 2 */
}
/*
Add the correct font size in all browsers.
*/
small {
font-size: 80%;
}
/*
Prevent `sub` and `sup` elements from affecting the line height in all browsers.
*/
sub,
sup {
font-size: 75%;
line-height: 0;
position: relative;
vertical-align: baseline;
}
sub {
bottom: -0.25em;
}
sup {
top: -0.5em;
}
/*
1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)
2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)
3. Remove gaps between table borders by default.
*/
table {
text-indent: 0;
/* 1 */
border-color: inherit;
/* 2 */
border-collapse: collapse;
/* 3 */
}
/*
1. Change the font styles in all browsers.
2. Remove the margin in Firefox and Safari.
3. Remove default padding in all browsers.
*/
button,
input,
optgroup,
select,
textarea {
font-family: inherit;
/* 1 */
font-size: 100%;
/* 1 */
font-weight: inherit;
/* 1 */
line-height: inherit;
/* 1 */
color: inherit;
/* 1 */
margin: 0;
/* 2 */
padding: 0;
/* 3 */
}
/*
Remove the inheritance of text transform in Edge and Firefox.
*/
button,
select {
text-transform: none;
}
/*
1. Correct the inability to style clickable types in iOS and Safari.
2. Remove default button styles.
*/
button,
[type='button'],
[type='reset'],
[type='submit'] {
-webkit-appearance: button;
/* 1 */
background-color: transparent;
/* 2 */
background-image: none;
/* 2 */
}
/*
Use the modern Firefox focus style for all focusable elements.
*/
:-moz-focusring {
outline: auto;
}
/*
Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)
*/
:-moz-ui-invalid {
box-shadow: none;
}
/*
Add the correct vertical alignment in Chrome and Firefox.
*/
progress {
vertical-align: baseline;
}
/*
Correct the cursor style of increment and decrement buttons in Safari.
*/
::-webkit-inner-spin-button,
::-webkit-outer-spin-button {
height: auto;
}
/*
1. Correct the odd appearance in Chrome and Safari.
2. Correct the outline style in Safari.
*/
[type='search'] {
-webkit-appearance: textfield;
/* 1 */
outline-offset: -2px;
/* 2 */
}
/*
Remove the inner padding in Chrome and Safari on macOS.
*/
::-webkit-search-decoration {
-webkit-appearance: none;
}
/*
1. Correct the inability to style clickable types in iOS and Safari.
2. Change font properties to `inherit` in Safari.
*/
::-webkit-file-upload-button {
-webkit-appearance: button;
/* 1 */
font: inherit;
/* 2 */
}
/*
Add the correct display in Chrome and Safari.
*/
summary {
display: list-item;
}
/*
Removes the default spacing and border for appropriate elements.
*/
blockquote,
dl,
dd,
h1,
h2,
h3,
h4,
h5,
h6,
hr,
figure,
p,
pre {
margin: 0;
}
fieldset {
margin: 0;
padding: 0;
}
legend {
padding: 0;
}
ol,
ul,
menu {
list-style: none;
margin: 0;
padding: 0;
}
/*
Prevent resizing textareas horizontally by default.
*/
textarea {
resize: vertical;
}
/*
1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)
2. Set the default placeholder color to the user's configured gray 400 color.
*/
input::-moz-placeholder, textarea::-moz-placeholder {
opacity: 1;
/* 1 */
color: #9ca3af;
/* 2 */
}
input::placeholder,
textarea::placeholder {
opacity: 1;
/* 1 */
color: #9ca3af;
/* 2 */
}
/*
Set the default cursor for buttons.
*/
button,
[role="button"] {
cursor: pointer;
}
/*
Make sure disabled buttons don't get the pointer cursor.
*/
:disabled {
cursor: default;
}
/*
1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)
2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)
This can trigger a poorly considered lint error in some tools but is included by design.
*/
img,
svg,
video,
canvas,
audio,
iframe,
embed,
object {
display: block;
/* 1 */
vertical-align: middle;
/* 2 */
}
/*
Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)
*/
img,
video {
max-width: 100%;
height: auto;
}
/* Make elements with the HTML hidden attribute stay hidden by default */
[hidden] {
display: none;
}
*, ::before, ::after {
--tw-border-spacing-x: 0;
--tw-border-spacing-y: 0;
--tw-translate-x: 0;
--tw-translate-y: 0;
--tw-rotate: 0;
--tw-skew-x: 0;
--tw-skew-y: 0;
--tw-scale-x: 1;
--tw-scale-y: 1;
--tw-pan-x: ;
--tw-pan-y: ;
--tw-pinch-zoom: ;
--tw-scroll-snap-strictness: proximity;
--tw-ordinal: ;
--tw-slashed-zero: ;
--tw-numeric-figure: ;
--tw-numeric-spacing: ;
--tw-numeric-fraction: ;
--tw-ring-inset: ;
--tw-ring-offset-width: 0px;
--tw-ring-offset-color: #fff;
--tw-ring-color: rgb(59 130 246 / 0.5);
--tw-ring-offset-shadow: 0 0 #0000;
--tw-ring-shadow: 0 0 #0000;
--tw-shadow: 0 0 #0000;
--tw-shadow-colored: 0 0 #0000;
--tw-blur: ;
--tw-brightness: ;
--tw-contrast: ;
--tw-grayscale: ;
--tw-hue-rotate: ;
--tw-invert: ;
--tw-saturate: ;
--tw-sepia: ;
--tw-drop-shadow: ;
--tw-backdrop-blur: ;
--tw-backdrop-brightness: ;
--tw-backdrop-contrast: ;
--tw-backdrop-grayscale: ;
--tw-backdrop-hue-rotate: ;
--tw-backdrop-invert: ;
--tw-backdrop-opacity: ;
--tw-backdrop-saturate: ;
--tw-backdrop-sepia: ;
}
::backdrop {
--tw-border-spacing-x: 0;
--tw-border-spacing-y: 0;
--tw-translate-x: 0;
--tw-translate-y: 0;
--tw-rotate: 0;
--tw-skew-x: 0;
--tw-skew-y: 0;
--tw-scale-x: 1;
--tw-scale-y: 1;
--tw-pan-x: ;
--tw-pan-y: ;
--tw-pinch-zoom: ;
--tw-scroll-snap-strictness: proximity;
--tw-ordinal: ;
--tw-slashed-zero: ;
--tw-numeric-figure: ;
--tw-numeric-spacing: ;
--tw-numeric-fraction: ;
--tw-ring-inset: ;
--tw-ring-offset-width: 0px;
--tw-ring-offset-color: #fff;
--tw-ring-color: rgb(59 130 246 / 0.5);
--tw-ring-offset-shadow: 0 0 #0000;
--tw-ring-shadow: 0 0 #0000;
--tw-shadow: 0 0 #0000;
--tw-shadow-colored: 0 0 #0000;
--tw-blur: ;
--tw-brightness: ;
--tw-contrast: ;
--tw-grayscale: ;
--tw-hue-rotate: ;
--tw-invert: ;
--tw-saturate: ;
--tw-sepia: ;
--tw-drop-shadow: ;
--tw-backdrop-blur: ;
--tw-backdrop-brightness: ;
--tw-backdrop-contrast: ;
--tw-backdrop-grayscale: ;
--tw-backdrop-hue-rotate: ;
--tw-backdrop-invert: ;
--tw-backdrop-opacity: ;
--tw-backdrop-saturate: ;
--tw-backdrop-sepia: ;
}
.mb-2 {
margin-bottom: 0.5rem;
}
.ml-2 {
margin-left: 0.5rem;
}
.mr-2 {
margin-right: 0.5rem;
}
.mt-4 {
margin-top: 1rem;
}
.flex {
display: flex;
}
.w-\[26rem\] {
width: 26rem;
}
.w-full {
width: 100%;
}
.flex-col {
flex-direction: column;
}
.items-center {
align-items: center;
}
.justify-between {
justify-content: space-between;
}
.self-start {
align-self: flex-start;
}
.self-end {
align-self: flex-end;
}
.rounded-lg {
border-radius: 0.5rem;
}
.bg-blue-500 {
--tw-bg-opacity: 1;
background-color: rgb(59 130 246 / var(--tw-bg-opacity));
}
.bg-gray-200 {
--tw-bg-opacity: 1;
background-color: rgb(229 231 235 / var(--tw-bg-opacity));
}
.bg-gray-900 {
--tw-bg-opacity: 1;
background-color: rgb(17 24 39 / var(--tw-bg-opacity));
}
.bg-indigo-500 {
--tw-bg-opacity: 1;
background-color: rgb(99 102 241 / var(--tw-bg-opacity));
}
.bg-white {
--tw-bg-opacity: 1;
background-color: rgb(255 255 255 / var(--tw-bg-opacity));
}
.p-2 {
padding: 0.5rem;
}
.p-4 {
padding: 1rem;
}
.text-lg {
font-size: 1.125rem;
line-height: 1.75rem;
}
.text-sm {
font-size: 0.875rem;
line-height: 1.25rem;
}
.font-medium {
font-weight: 500;
}
.text-blue-500 {
--tw-text-opacity: 1;
color: rgb(59 130 246 / var(--tw-text-opacity));
}
.text-gray-700 {
--tw-text-opacity: 1;
color: rgb(55 65 81 / var(--tw-text-opacity));
}
.text-white {
--tw-text-opacity: 1;
color: rgb(255 255 255 / var(--tw-text-opacity));
}
.shadow {
--tw-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
--tw-shadow-colored: 0 1px 3px 0 var(--tw-shadow-color), 0 1px 2px -1px var(--tw-shadow-color);
box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow);
}
#chat-container {
width: 500px;
height: 450px;
background-color: white;
padding: 10px;
overflow: auto;
}
.bg-gray-200 {
background-color: #edf2f7;
}
.bg-gray-900 {
background-color: #1a202c;
}
.rounded-lg {
border-radius: 0.5rem;
}
.shadow {
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12), 0 1px 2px rgba(0, 0, 0, 0.24);
}
.text-gray-700 {
color: #4a5568;
}
.text-sm {
font-size: 0.875rem;
}
.p-4 {
padding: 1.5rem;
}
.hover\:text-blue-800:hover {
--tw-text-opacity: 1;
color: rgb(30 64 175 / var(--tw-text-opacity));
}

View File

@@ -1,7 +1,7 @@
{
"name": "DocsGPT - Documentation AI butler",
"version": "0.0.1",
"manifest_version": 2,
"manifest_version": 3,
"description": "AI assistant for developers, that helps you answer your questions about the documentation you are reading.",
"icons": {
"16": "icons/icon16.png",
@@ -10,12 +10,19 @@
},
"default_locale": "en",
"background": {
"page": "src/bg/background.html",
"persistent": true
"service_worker": "src/bg/service-worker.js"
},
"action": {
"default_title": "DocsGPT - Documentation AI butler",
"default_popup": "popup.html"
},
"permissions": ["activeTab", "storage"],
"browser_action": {
"default_popup": "popup.html"
}
"host_permissions": [
"*://*/*"
],
"content_scripts": [{
"js": ["popup.js"],
"matches": ["https://github.com/*"]
}]
}

View File

@@ -1,4 +1,19 @@
{
"name": "docsgpt-chrome-extension",
"version": "0.0.1",
"description": "DocsGPT - Documentation AI butler",
"main": "popup.js",
"author": "",
"license": "MIT",
"scripts": {
"dev": "npx tailwindcss -i ./styles.css -o ./dist/output.css --watch"
},
"keywords": [
"DocsGPT",
"Documentation",
"Chrome",
"extension"
],
"devDependencies": {
"tailwindcss": "^3.2.4"
}

View File

@@ -21,12 +21,12 @@ document.getElementById("message-form").addEventListener("submit", function(even
}
// send post request to server http://127.0.0.1:5000/ with message in json body
fetch('http://127.0.0.1:5000/api/answer', {
fetch('http://127.0.0.1:5001/api/answer', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({question: message}),
body: JSON.stringify({question: message, history: null}),
})
.then(response => response.json())
.then(data => {

View File

@@ -1,6 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<script type="text/javascript" src="background.js"></script>
</head>
</html>

View File

@@ -1,22 +0,0 @@
// if you checked "fancy-settings" in extensionizr.com, uncomment this lines
// var settings = new Store("settings", {
// "sample_setting": "This is how you use Store.js to remember values"
// });
//example of using a message handler from the inject scripts
chrome.extension.onMessage.addListener(
function(request, sender, sendResponse) {
chrome.pageAction.show(sender.tab.id);
sendResponse();
});
chrome.runtime.onMessage.addListener(
function(request, sender, sendResponse) {
if (request.msg === "sendMessage") {
sendResponse({response: "Message received"});
}
});

View File

@@ -0,0 +1,12 @@
// This is the service worker script, which executes in its own context
// when the extension is installed or refreshed (or when you access its console).
// It would correspond to the background script in chrome extensions v2.
console.log("This prints to the console of the service worker (background script)");
chrome.runtime.onMessage.addListener(
function(request, sender, sendResponse) {
if (request.msg === "sendMessage") {
sendResponse({response: "Message received"});
}
}
);

68
extensions/discord/bot.py Normal file
View File

@@ -0,0 +1,68 @@
import os
import re
import discord
import requests
from discord.ext import commands
import dotenv
dotenv.load_dotenv()
# Replace 'YOUR_BOT_TOKEN' with your bot's token
TOKEN = os.getenv("DISCORD_TOKEN")
PREFIX = '@DocsGPT'
BASE_API_URL = 'http://localhost:5001'
intents = discord.Intents.default()
intents.message_content = True
bot = commands.Bot(command_prefix=PREFIX, intents=intents)
def split_string(input_str):
pattern = r'^<@!?{0}>\s*'.format(bot.user.id)
match = re.match(pattern, input_str)
if match:
content = input_str[match.end():].strip()
return str(bot.user.id), content
return None, input_str
@bot.event
async def on_ready():
print(f'{bot.user.name} has connected to Discord!')
async def fetch_answer(question):
data = {
'sender': 'discord',
'question': question,
'history': ''
}
headers = {"Content-Type": "application/json",
"Accept": "application/json"}
response = requests.post(BASE_API_URL + '/api/answer', json=data, headers=headers)
if response.status_code == 200:
return response.json()['answer']
return 'Sorry, I could not fetch the answer.'
@bot.event
async def on_message(message):
if message.author == bot.user:
return
content = message.content.strip()
prefix, content = split_string(content)
if prefix is None:
return
part_prefix = str(bot.user.id)
if part_prefix == prefix:
answer = await fetch_answer(content)
await message.channel.send(answer)
await bot.process_commands(message)
bot.run(TOKEN)

View File

@@ -0,0 +1,25 @@
# Chat Widget
A simple chat widget that can be easily integrated into any website.
## Installation
1. Host the `widget.html`, `styles.css`, and `script.js` files from the `src` folder on your own server or a Content Delivery Network (CDN). Make sure to note the URLs for these files.
2. Update the URLs in the `dist/chat-widget.js` file to match the locations of your hosted files:
```javascript
fetch("https://your-server-or-cdn.com/path/to/widget.html"),
fetch("https://your-server-or-cdn.com/path/to/styles.css"),
fetch("https://your-server-or-cdn.com/path/to/script.js"),
```
3. Host the `dist/chat-widget.js` file on your own server or a Content Delivery Network (CDN). Make sure to note the URL for this file.
##Integration
To integrate the chat widget into a website, add the following script tag to the HTML file, replacing URL_TO_CHAT_WIDGET_JS with the actual URL of your hosted chat-widget.js file:
```javascript
<script src="URL_TO_CHAT_WIDGET_JS"></script>
```

View File

@@ -0,0 +1,41 @@
(async function () {
// Fetch the HTML, CSS, and JavaScript from your server or CDN
const [htmlRes, jsRes] = await Promise.all([
fetch("https://s3-eu-west-2.amazonaws.com/arc53data/widget.html"),
// fetch("https://s3-eu-west-2.amazonaws.com/arc53data/tailwind.css"),
fetch("https://s3-eu-west-2.amazonaws.com/arc53data/script.js"),
]);
const html = await htmlRes.text();
//const css = await cssRes.text();
const js = await jsRes.text();
// create a new link element
const link = document.createElement("link");
//set the rel, href, type, and integrity attributes
link.rel = "stylesheet";
link.href = "https://cdn.tailwindcss.com/";
link.type = "text/css";
link.integrity = "sha384-PDOmVviaTm8N1W35y1NSmo80w6GPaGhbDuOBAF/5hRffaeGc6yOwIo1qAt4gqLGA%";
// get the document head and append the link element to it
// document.head.appendChild(link);
// Create a style element for the CSS
// const style = document.createElement("style");
// style.innerHTML = css;
// document.head.appendChild(style);
// Create a container for the chat widget and inject the HTML
const chatWidgetContainer = document.createElement("div");
chatWidgetContainer.innerHTML = html;
document.body.appendChild(chatWidgetContainer);
// Execute the JavaScript code
const script = document.createElement("script");
script.innerHTML = js;
document.body.appendChild(script);
})();

807
extensions/web-widget/dist/output.css vendored Normal file
View File

@@ -0,0 +1,807 @@
/*
! tailwindcss v3.3.1 | MIT License | https://tailwindcss.com
*/
/*
1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)
2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)
*/
*,
::before,
::after {
box-sizing: border-box;
/* 1 */
border-width: 0;
/* 2 */
border-style: solid;
/* 2 */
border-color: #e5e7eb;
/* 2 */
}
::before,
::after {
--tw-content: '';
}
/*
1. Use a consistent sensible line-height in all browsers.
2. Prevent adjustments of font size after orientation changes in iOS.
3. Use a more readable tab size.
4. Use the user's configured `sans` font-family by default.
5. Use the user's configured `sans` font-feature-settings by default.
6. Use the user's configured `sans` font-variation-settings by default.
*/
html {
line-height: 1.5;
/* 1 */
-webkit-text-size-adjust: 100%;
/* 2 */
-moz-tab-size: 4;
/* 3 */
-o-tab-size: 4;
tab-size: 4;
/* 3 */
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
/* 4 */
font-feature-settings: normal;
/* 5 */
font-variation-settings: normal;
/* 6 */
}
/*
1. Remove the margin in all browsers.
2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.
*/
body {
margin: 0;
/* 1 */
line-height: inherit;
/* 2 */
}
/*
1. Add the correct height in Firefox.
2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)
3. Ensure horizontal rules are visible by default.
*/
hr {
height: 0;
/* 1 */
color: inherit;
/* 2 */
border-top-width: 1px;
/* 3 */
}
/*
Add the correct text decoration in Chrome, Edge, and Safari.
*/
abbr:where([title]) {
-webkit-text-decoration: underline dotted;
text-decoration: underline dotted;
}
/*
Remove the default font size and weight for headings.
*/
h1,
h2,
h3,
h4,
h5,
h6 {
font-size: inherit;
font-weight: inherit;
}
/*
Reset links to optimize for opt-in styling instead of opt-out.
*/
a {
color: inherit;
text-decoration: inherit;
}
/*
Add the correct font weight in Edge and Safari.
*/
b,
strong {
font-weight: bolder;
}
/*
1. Use the user's configured `mono` font family by default.
2. Correct the odd `em` font sizing in all browsers.
*/
code,
kbd,
samp,
pre {
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
/* 1 */
font-size: 1em;
/* 2 */
}
/*
Add the correct font size in all browsers.
*/
small {
font-size: 80%;
}
/*
Prevent `sub` and `sup` elements from affecting the line height in all browsers.
*/
sub,
sup {
font-size: 75%;
line-height: 0;
position: relative;
vertical-align: baseline;
}
sub {
bottom: -0.25em;
}
sup {
top: -0.5em;
}
/*
1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)
2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)
3. Remove gaps between table borders by default.
*/
table {
text-indent: 0;
/* 1 */
border-color: inherit;
/* 2 */
border-collapse: collapse;
/* 3 */
}
/*
1. Change the font styles in all browsers.
2. Remove the margin in Firefox and Safari.
3. Remove default padding in all browsers.
*/
button,
input,
optgroup,
select,
textarea {
font-family: inherit;
/* 1 */
font-size: 100%;
/* 1 */
font-weight: inherit;
/* 1 */
line-height: inherit;
/* 1 */
color: inherit;
/* 1 */
margin: 0;
/* 2 */
padding: 0;
/* 3 */
}
/*
Remove the inheritance of text transform in Edge and Firefox.
*/
button,
select {
text-transform: none;
}
/*
1. Correct the inability to style clickable types in iOS and Safari.
2. Remove default button styles.
*/
button,
[type='button'],
[type='reset'],
[type='submit'] {
-webkit-appearance: button;
/* 1 */
background-color: transparent;
/* 2 */
background-image: none;
/* 2 */
}
/*
Use the modern Firefox focus style for all focusable elements.
*/
:-moz-focusring {
outline: auto;
}
/*
Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)
*/
:-moz-ui-invalid {
box-shadow: none;
}
/*
Add the correct vertical alignment in Chrome and Firefox.
*/
progress {
vertical-align: baseline;
}
/*
Correct the cursor style of increment and decrement buttons in Safari.
*/
::-webkit-inner-spin-button,
::-webkit-outer-spin-button {
height: auto;
}
/*
1. Correct the odd appearance in Chrome and Safari.
2. Correct the outline style in Safari.
*/
[type='search'] {
-webkit-appearance: textfield;
/* 1 */
outline-offset: -2px;
/* 2 */
}
/*
Remove the inner padding in Chrome and Safari on macOS.
*/
::-webkit-search-decoration {
-webkit-appearance: none;
}
/*
1. Correct the inability to style clickable types in iOS and Safari.
2. Change font properties to `inherit` in Safari.
*/
::-webkit-file-upload-button {
-webkit-appearance: button;
/* 1 */
font: inherit;
/* 2 */
}
/*
Add the correct display in Chrome and Safari.
*/
summary {
display: list-item;
}
/*
Removes the default spacing and border for appropriate elements.
*/
blockquote,
dl,
dd,
h1,
h2,
h3,
h4,
h5,
h6,
hr,
figure,
p,
pre {
margin: 0;
}
fieldset {
margin: 0;
padding: 0;
}
legend {
padding: 0;
}
ol,
ul,
menu {
list-style: none;
margin: 0;
padding: 0;
}
/*
Prevent resizing textareas horizontally by default.
*/
textarea {
resize: vertical;
}
/*
1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)
2. Set the default placeholder color to the user's configured gray 400 color.
*/
input::-moz-placeholder, textarea::-moz-placeholder {
opacity: 1;
/* 1 */
color: #9ca3af;
/* 2 */
}
input::placeholder,
textarea::placeholder {
opacity: 1;
/* 1 */
color: #9ca3af;
/* 2 */
}
/*
Set the default cursor for buttons.
*/
button,
[role="button"] {
cursor: pointer;
}
/*
Make sure disabled buttons don't get the pointer cursor.
*/
:disabled {
cursor: default;
}
/*
1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)
2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)
This can trigger a poorly considered lint error in some tools but is included by design.
*/
img,
svg,
video,
canvas,
audio,
iframe,
embed,
object {
display: block;
/* 1 */
vertical-align: middle;
/* 2 */
}
/*
Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)
*/
img,
video {
max-width: 100%;
height: auto;
}
/* Make elements with the HTML hidden attribute stay hidden by default */
[hidden] {
display: none;
}
*, ::before, ::after {
--tw-border-spacing-x: 0;
--tw-border-spacing-y: 0;
--tw-translate-x: 0;
--tw-translate-y: 0;
--tw-rotate: 0;
--tw-skew-x: 0;
--tw-skew-y: 0;
--tw-scale-x: 1;
--tw-scale-y: 1;
--tw-pan-x: ;
--tw-pan-y: ;
--tw-pinch-zoom: ;
--tw-scroll-snap-strictness: proximity;
--tw-ordinal: ;
--tw-slashed-zero: ;
--tw-numeric-figure: ;
--tw-numeric-spacing: ;
--tw-numeric-fraction: ;
--tw-ring-inset: ;
--tw-ring-offset-width: 0px;
--tw-ring-offset-color: #fff;
--tw-ring-color: rgb(59 130 246 / 0.5);
--tw-ring-offset-shadow: 0 0 #0000;
--tw-ring-shadow: 0 0 #0000;
--tw-shadow: 0 0 #0000;
--tw-shadow-colored: 0 0 #0000;
--tw-blur: ;
--tw-brightness: ;
--tw-contrast: ;
--tw-grayscale: ;
--tw-hue-rotate: ;
--tw-invert: ;
--tw-saturate: ;
--tw-sepia: ;
--tw-drop-shadow: ;
--tw-backdrop-blur: ;
--tw-backdrop-brightness: ;
--tw-backdrop-contrast: ;
--tw-backdrop-grayscale: ;
--tw-backdrop-hue-rotate: ;
--tw-backdrop-invert: ;
--tw-backdrop-opacity: ;
--tw-backdrop-saturate: ;
--tw-backdrop-sepia: ;
}
::backdrop {
--tw-border-spacing-x: 0;
--tw-border-spacing-y: 0;
--tw-translate-x: 0;
--tw-translate-y: 0;
--tw-rotate: 0;
--tw-skew-x: 0;
--tw-skew-y: 0;
--tw-scale-x: 1;
--tw-scale-y: 1;
--tw-pan-x: ;
--tw-pan-y: ;
--tw-pinch-zoom: ;
--tw-scroll-snap-strictness: proximity;
--tw-ordinal: ;
--tw-slashed-zero: ;
--tw-numeric-figure: ;
--tw-numeric-spacing: ;
--tw-numeric-fraction: ;
--tw-ring-inset: ;
--tw-ring-offset-width: 0px;
--tw-ring-offset-color: #fff;
--tw-ring-color: rgb(59 130 246 / 0.5);
--tw-ring-offset-shadow: 0 0 #0000;
--tw-ring-shadow: 0 0 #0000;
--tw-shadow: 0 0 #0000;
--tw-shadow-colored: 0 0 #0000;
--tw-blur: ;
--tw-brightness: ;
--tw-contrast: ;
--tw-grayscale: ;
--tw-hue-rotate: ;
--tw-invert: ;
--tw-saturate: ;
--tw-sepia: ;
--tw-drop-shadow: ;
--tw-backdrop-blur: ;
--tw-backdrop-brightness: ;
--tw-backdrop-contrast: ;
--tw-backdrop-grayscale: ;
--tw-backdrop-hue-rotate: ;
--tw-backdrop-invert: ;
--tw-backdrop-opacity: ;
--tw-backdrop-saturate: ;
--tw-backdrop-sepia: ;
}
.fixed {
position: fixed;
}
.absolute {
position: absolute;
}
.relative {
position: relative;
}
.inset-y-0 {
top: 0px;
bottom: 0px;
}
.bottom-5 {
bottom: 1.25rem;
}
.left-5 {
left: 1.25rem;
}
.right-2 {
right: 0.5rem;
}
.z-50 {
z-index: 50;
}
.m-0 {
margin: 0px;
}
.-mx-2 {
margin-left: -0.5rem;
margin-right: -0.5rem;
}
.mt-1 {
margin-top: 0.25rem;
}
.flex {
display: flex;
}
.hidden {
display: none;
}
.w-full {
width: 100%;
}
.flex-1 {
flex: 1 1 0%;
}
.transform {
transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));
}
.items-center {
align-items: center;
}
.justify-center {
justify-content: center;
}
.gap-2 {
gap: 0.5rem;
}
.divide-y > :not([hidden]) ~ :not([hidden]) {
--tw-divide-y-reverse: 0;
border-top-width: calc(1px * calc(1 - var(--tw-divide-y-reverse)));
border-bottom-width: calc(1px * var(--tw-divide-y-reverse));
}
.rounded-md {
border-radius: 0.375rem;
}
.rounded-b {
border-bottom-right-radius: 0.25rem;
border-bottom-left-radius: 0.25rem;
}
.border {
border-width: 1px;
}
.bg-transparent {
background-color: transparent;
}
.bg-gradient-to-br {
background-image: linear-gradient(to bottom right, var(--tw-gradient-stops));
}
.from-gray-100\/80 {
--tw-gradient-from: rgb(243 244 246 / 0.8) var(--tw-gradient-from-position);
--tw-gradient-from-position: ;
--tw-gradient-to: rgb(243 244 246 / 0) var(--tw-gradient-from-position);
--tw-gradient-to-position: ;
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
}
.via-white {
--tw-gradient-via-position: ;
--tw-gradient-to: rgb(255 255 255 / 0) var(--tw-gradient-to-position);
--tw-gradient-to-position: ;
--tw-gradient-stops: var(--tw-gradient-from), #fff var(--tw-gradient-via-position), var(--tw-gradient-to);
}
.to-white {
--tw-gradient-to: #fff var(--tw-gradient-to-position);
--tw-gradient-to-position: ;
}
.p-3 {
padding: 0.75rem;
}
.px-2 {
padding-left: 0.5rem;
padding-right: 0.5rem;
}
.px-5 {
padding-left: 1.25rem;
padding-right: 1.25rem;
}
.py-3 {
padding-top: 0.75rem;
padding-bottom: 0.75rem;
}
.pl-5 {
padding-left: 1.25rem;
}
.pr-8 {
padding-right: 2rem;
}
.font-sans {
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
}
.text-sm {
font-size: 0.875rem;
line-height: 1.25rem;
}
.text-xs {
font-size: 0.75rem;
line-height: 1rem;
}
.font-bold {
font-weight: 700;
}
.text-gray-400 {
--tw-text-opacity: 1;
color: rgb(156 163 175 / var(--tw-text-opacity));
}
.text-gray-600 {
--tw-text-opacity: 1;
color: rgb(75 85 99 / var(--tw-text-opacity));
}
.text-gray-700 {
--tw-text-opacity: 1;
color: rgb(55 65 81 / var(--tw-text-opacity));
}
.text-gray-800 {
--tw-text-opacity: 1;
color: rgb(31 41 55 / var(--tw-text-opacity));
}
.shadow {
--tw-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
--tw-shadow-colored: 0 1px 3px 0 var(--tw-shadow-color), 0 1px 2px -1px var(--tw-shadow-color);
box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow);
}
.backdrop-blur-sm {
--tw-backdrop-blur: blur(4px);
-webkit-backdrop-filter: var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);
backdrop-filter: var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);
}
.transition {
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, -webkit-backdrop-filter;
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, backdrop-filter;
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, backdrop-filter, -webkit-backdrop-filter;
transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
transition-duration: 150ms;
}
.delay-200 {
transition-delay: 200ms;
}
.duration-300 {
transition-duration: 300ms;
}
.hover\:bg-gray-100:hover {
--tw-bg-opacity: 1;
background-color: rgb(243 244 246 / var(--tw-bg-opacity));
}
.focus\:outline-none:focus {
outline: 2px solid transparent;
outline-offset: 2px;
}
@media (prefers-color-scheme: dark) {
.dark\:divide-gray-700 > :not([hidden]) ~ :not([hidden]) {
--tw-divide-opacity: 1;
border-color: rgb(55 65 81 / var(--tw-divide-opacity));
}
.dark\:border-gray-700 {
--tw-border-opacity: 1;
border-color: rgb(55 65 81 / var(--tw-border-opacity));
}
.dark\:from-gray-900\/80 {
--tw-gradient-from: rgb(17 24 39 / 0.8) var(--tw-gradient-from-position);
--tw-gradient-from-position: ;
--tw-gradient-to: rgb(17 24 39 / 0) var(--tw-gradient-from-position);
--tw-gradient-to-position: ;
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
}
.dark\:via-gray-900 {
--tw-gradient-via-position: ;
--tw-gradient-to: rgb(17 24 39 / 0) var(--tw-gradient-to-position);
--tw-gradient-to-position: ;
--tw-gradient-stops: var(--tw-gradient-from), #111827 var(--tw-gradient-via-position), var(--tw-gradient-to);
}
.dark\:to-gray-900 {
--tw-gradient-to: #111827 var(--tw-gradient-to-position);
--tw-gradient-to-position: ;
}
.dark\:text-gray-200 {
--tw-text-opacity: 1;
color: rgb(229 231 235 / var(--tw-text-opacity));
}
.dark\:text-gray-300 {
--tw-text-opacity: 1;
color: rgb(209 213 219 / var(--tw-text-opacity));
}
.dark\:text-gray-500 {
--tw-text-opacity: 1;
color: rgb(107 114 128 / var(--tw-text-opacity));
}
.dark\:text-white {
--tw-text-opacity: 1;
color: rgb(255 255 255 / var(--tw-text-opacity));
}
.dark\:hover\:bg-gray-800\/70:hover {
background-color: rgb(31 41 55 / 0.7);
}
}
@media (min-width: 768px) {
.md\:pl-0 {
padding-left: 0px;
}
}

View File

@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Chat Widget Test</title>
<link href="dist/output.css" rel="stylesheet">
</head>
<body>
<script src="dist/chat-widget.js"></script>
</body>
</html>

1002
extensions/web-widget/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,15 @@
{
"name": "web-widget",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"devDependencies": {
"tailwindcss": "^3.3.1"
}
}

View File

@@ -0,0 +1,58 @@
<div id="docsgpt-widget" class="dark fixed bottom-5 left-5 pl-5 md:pl-0 z-50">
<style>
@keyframes dotBounce {
0%, 80%, 100% {
transform: translateY(0);
}
40% {
transform: translateY(-5px);
}
}
.dot-animation {
display: inline-block;
animation: dotBounce 1s infinite ease-in-out;
}
.delay-200 {
animation-delay: 200ms;
}
.delay-400 {
animation-delay: 400ms;
}
</style>
<div class="divide-y dark:divide-gray-700 rounded-md border dark:border-gray-700 bg-gradient-to-br from-gray-100/80 via-white to-white dark:from-gray-900/80 dark:via-gray-900 dark:to-gray-900 font-sans shadow backdrop-blur-sm" style="width: 18rem; transform: translateY(0%) translateZ(0px);"><div>
<div class="flex items-center gap-2 p-3">
<div id="docsgpt-init-message" class="flex-1">
<h3 class="text-sm font-bold text-gray-700 dark:text-gray-200">Looking for help with documentation?</h3>
<p class="mt-1 text-xs text-gray-400 dark:text-gray-500">DocsGPT AI assistant will help you with docs</p>
</div>
<div id="docsgpt-answer" class="hidden">
<p class="mt-1 text-xs text-gray-600 dark:text-gray-300">Come cool answer</p>
</div>
</div>
</div>
<div class="w-full">
<button id="ask-docsgpt" class="flex w-full justify-center px-5 py-3 text-sm text-gray-800 font-bold dark:text-white transition duration-300 hover:bg-gray-100 rounded-b dark:hover:bg-gray-800/70">
Ask DocsGPT
</button>
<form id="docsgpt-chat-form" class="relative w-full m-0 hidden" style="opacity: 1;" data-projection-id="1">
<input id="docsgpt-chat-input" type="text" class="w-full bg-transparent px-5 py-3 pr-8 text-sm text-gray-700 dark:text-white focus:outline-none" placeholder="What do you want to do?" value="">
<button class="absolute inset-y-0 right-2 -mx-2 px-2" type="submit" style="opacity: 0;" data-projection-id="2">
</button>
</form>
<p id="docsgpt-chat-processing" class="hidden flex w-full justify-center px-5 py-3 text-sm text-gray-800 font-bold dark:text-white transition duration-300 rounded-b animate-fadeIn animate-2s">
Processing<span class="dot-animation">.</span><span class="dot-animation delay-200">.</span><span class="dot-animation delay-400">.</span>
</p>
</div>
</div>
</div>

View File

@@ -0,0 +1,3 @@
@tailwind base;
@tailwind components;
@tailwind utilities;

View File

@@ -0,0 +1,56 @@
const API_ENDPOINT = "http://localhost:5001/api/answer"; // Replace with your API endpoint
const widgetInitMessage = document.getElementById("docsgpt-init-message");
const widgetAnswerMessage = document.getElementById("docsgpt-answer");
const widgetAnswerMessageP = widgetAnswerMessage.querySelector("p");
const askDocsGPTButton = document.getElementById("ask-docsgpt");
const chatInput = document.getElementById("docsgpt-chat-input");
const chatForm = document.getElementById("docsgpt-chat-form");
const chatProcessing = document.getElementById("docsgpt-chat-processing");
async function sendMessage(message) {
const requestData = {
"question": message,
"active_docs": "default",
"api_key": "token",
"embeddings_key": "token",
"model": "default",
"history": null,
}
const response = await fetch(API_ENDPOINT, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(requestData),
});
const data = await response.json();
return data.answer;
}
askDocsGPTButton.addEventListener("click", () => {
askDocsGPTButton.classList.add("hidden");
chatForm.classList.remove("hidden");
chatForm.focus();
widgetInitMessage.classList.remove("hidden");
widgetAnswerMessage.classList.add("hidden");
});
chatForm.addEventListener("submit", async (e) => {
e.preventDefault();
const message = chatInput.value.trim();
if (!message) return;
chatInput.value = "";
chatForm.classList.add("hidden");
chatProcessing.classList.remove("hidden");
const reply = await sendMessage(message);
chatProcessing.classList.add("hidden");
// inside <p> tag
widgetAnswerMessageP.innerHTML = reply;
widgetAnswerMessage.classList.remove("hidden");
widgetInitMessage.classList.add("hidden");
askDocsGPTButton.classList.remove("hidden");
});

View File

@@ -0,0 +1,10 @@
/** @type {import('tailwindcss').Config} */
module.exports = {
content: ["./src/**/*.{html,js}"],
theme: {
extend: {},
},
plugins: [],
}

View File

@@ -0,0 +1,2 @@
# Please put appropriate value
VITE_API_HOST=http://localhost:5001

1
frontend/.env.production Normal file
View File

@@ -0,0 +1 @@
VITE_API_HOST = https://gptcloud.arc53.com

View File

@@ -4,6 +4,7 @@
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>DocsGPT 🦖</title>
<link rel="shortcut icon" type="image/x-icon" href="/favicon.ico" />
</head>
<body>
<div id="root" class="h-screen"></div>

File diff suppressed because it is too large Load Diff

View File

@@ -23,12 +23,16 @@
"@vercel/analytics": "^0.1.10",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-dropzone": "^14.2.3",
"react-markdown": "^8.0.7",
"react-redux": "^8.0.5",
"react-router-dom": "^6.8.1"
"react-router-dom": "^6.8.1",
"react-syntax-highlighter": "^15.5.0"
},
"devDependencies": {
"@types/react": "^18.0.27",
"@types/react-dom": "^18.0.10",
"@types/react-syntax-highlighter": "^15.5.6",
"@typescript-eslint/eslint-plugin": "^5.51.0",
"@typescript-eslint/parser": "^5.51.0",
"@vitejs/plugin-react": "^3.1.0",
@@ -49,6 +53,7 @@
"prettier-plugin-tailwindcss": "^0.2.2",
"tailwindcss": "^3.2.4",
"typescript": "^4.9.5",
"vite": "^4.1.0"
"vite": "^4.1.0",
"vite-plugin-svgr": "^2.4.0"
}
}

BIN
frontend/public/favicon.ico Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

View File

@@ -1,12 +1,14 @@
import { useRef, useState } from 'react';
import { useEffect, useRef, useState } from 'react';
import { NavLink } from 'react-router-dom';
import Arrow1 from './assets/arrow.svg';
import Arrow2 from './assets/dropdown-arrow.svg';
import Exit from './assets/exit.svg';
import Message from './assets/message.svg';
import Hamburger from './assets/hamburger.svg';
import Key from './assets/key.svg';
import Info from './assets/info.svg';
import Link from './assets/link.svg';
import UploadIcon from './assets/upload.svg';
import { ActiveState } from './models/misc';
import APIKeyModal from './preferences/APIKeyModal';
import SelectDocsModal from './preferences/SelectDocsModal';
@@ -19,6 +21,8 @@ import {
setSelectedDocs,
} from './preferences/preferenceSlice';
import { useOutsideAlerter } from './hooks';
import Upload from './upload/Upload';
import { Doc } from './preferences/preferenceApi';
export default function Navigation({
navState,
@@ -34,15 +38,35 @@ export default function Navigation({
const [isDocsListOpen, setIsDocsListOpen] = useState(false);
const isApiKeySet = useSelector(selectApiKeyStatus);
const [apiKeyModalState, setApiKeyModalState] = useState<ActiveState>(
isApiKeySet ? 'INACTIVE' : 'ACTIVE',
);
const [apiKeyModalState, setApiKeyModalState] =
useState<ActiveState>('INACTIVE');
const isSelectedDocsSet = useSelector(selectSelectedDocsStatus);
const [selectedDocsModalState, setSelectedDocsModalState] =
useState<ActiveState>(isSelectedDocsSet ? 'INACTIVE' : 'ACTIVE');
const [uploadModalState, setUploadModalState] =
useState<ActiveState>('INACTIVE');
const navRef = useRef(null);
const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
const handleDeleteClick = (index: number, doc: Doc) => {
const docPath = 'indexes/' + 'local' + '/' + doc.name;
fetch(`${apiHost}/api/delete_old?path=${docPath}`, {
method: 'GET',
})
.then(() => {
// remove the image element from the DOM
const imageElement = document.querySelector(
`#img-${index}`,
) as HTMLElement;
const parentElement = imageElement.parentNode as HTMLElement;
parentElement.parentNode?.removeChild(parentElement);
})
.catch((error) => console.error(error));
};
useOutsideAlerter(
navRef,
() => {
@@ -58,6 +82,19 @@ export default function Navigation({
[navState, isDocsListOpen, apiKeyModalState],
);
/*
Needed to fix bug where if mobile nav was closed and then window was resized to desktop, nav would still be closed but the button to open would be gone, as per #1 on issue #146
*/
useEffect(() => {
window.addEventListener('resize', () => {
if (window.matchMedia('(min-width: 768px)').matches) {
setNavState('ACTIVE');
} else {
setNavState('INACTIVE');
}
});
}, []);
return (
<>
<div
@@ -66,7 +103,7 @@ export default function Navigation({
navState === 'INACTIVE' && '-ml-96 md:-ml-[14rem]'
} duration-20 fixed z-20 flex h-full w-72 flex-col border-r-2 bg-gray-50 transition-all`}
>
<div className={'h-16 w-full border-b-2'}>
<div className={'visible h-16 w-full border-b-2 md:hidden'}>
<button
className="float-right mr-5 mt-5 h-5 w-5"
onClick={() =>
@@ -95,8 +132,8 @@ export default function Navigation({
</NavLink>
<div className="flex-grow border-b-2 border-gray-100"></div>
<div className="flex flex-grow flex-col-reverse border-b-2">
<div className="relative my-4 px-6">
<div className="flex flex-col-reverse border-b-2">
<div className="relative my-4 flex gap-2 px-2">
<div
className="flex h-12 w-full cursor-pointer justify-between rounded-md border-2 bg-white"
onClick={() => setIsDocsListOpen(!isDocsListOpen)}
@@ -110,15 +147,20 @@ export default function Navigation({
src={Arrow2}
alt="arrow"
className={`${
isDocsListOpen ? 'rotate-0' : '-rotate-90'
isDocsListOpen ? 'rotate-0' : 'rotate-180'
} mr-3 w-3 transition-all`}
/>
</div>
<img
className="mt-2 h-9 w-9 hover:cursor-pointer"
src={UploadIcon}
onClick={() => setUploadModalState('ACTIVE')}
></img>
{isDocsListOpen && (
<div className="absolute top-12 left-0 right-0 mx-6 max-h-52 overflow-y-scroll bg-white shadow-lg">
<div className="absolute top-12 left-0 right-6 ml-2 mr-4 max-h-52 overflow-y-scroll bg-white shadow-lg">
{docs ? (
docs.map((doc, index) => {
if (doc.model) {
if (doc.model === 'openai_text-embedding-ada-002') {
return (
<div
key={index}
@@ -126,11 +168,23 @@ export default function Navigation({
dispatch(setSelectedDocs(doc));
setIsDocsListOpen(false);
}}
className="h-10 w-full cursor-pointer border-x-2 border-b-2 hover:bg-gray-100"
className="flex h-10 w-full cursor-pointer items-center justify-between border-x-2 border-b-2 hover:bg-gray-100"
>
<p className="ml-5 py-3">
<p className="ml-5 flex-1 overflow-hidden overflow-ellipsis whitespace-nowrap py-3">
{doc.name} {doc.version}
</p>
{doc.location === 'local' ? (
<img
src={Exit}
alt="Exit"
className="mr-4 h-3 w-3 cursor-pointer hover:opacity-50"
id={`img-${index}`}
onClick={(event) => {
event.stopPropagation();
handleDeleteClick(index, doc);
}}
/>
) : null}
</div>
);
}
@@ -143,7 +197,7 @@ export default function Navigation({
</div>
)}
</div>
<p className="ml-6 font-bold text-jet">Source Docs</p>
<p className="ml-6 mt-3 font-bold text-jet">Source Docs</p>
</div>
<div className="flex flex-col gap-2 border-b-2 py-2">
<div
@@ -209,6 +263,10 @@ export default function Navigation({
setModalState={setApiKeyModalState}
isCancellable={isApiKeySet}
/>
<Upload
modalState={uploadModalState}
setModalState={setUploadModalState}
></Upload>
</>
);
}

View File

@@ -0,0 +1,4 @@
<svg width="16" height="16" viewBox="0 0 16 16" fill="current" xmlns="http://www.w3.org/2000/svg">
<path d="M6.37776 10.1001V12.9C6.37776 13.457 6.599 13.9911 6.99282 14.3849C7.38664 14.7788 7.92077 15 8.47772 15L11.2777 8.70011V1.00025H3.38181C3.04419 0.996436 2.71656 1.11477 2.45929 1.33344C2.20203 1.55212 2.03246 1.8564 1.98184 2.19023L1.01585 8.49012C0.985398 8.69076 0.998931 8.89563 1.05551 9.09053C1.1121 9.28543 1.21038 9.46569 1.34355 9.61884C1.47671 9.77198 1.64159 9.89434 1.82674 9.97744C2.01189 10.0605 2.2129 10.1024 2.41583 10.1001H6.37776ZM11.2777 1.00025H13.1466C13.5428 0.993247 13.9277 1.13195 14.2284 1.39002C14.5291 1.64809 14.7245 2.00758 14.7776 2.40023V7.30014C14.7245 7.69279 14.5291 8.05227 14.2284 8.31035C13.9277 8.56842 13.5428 8.70712 13.1466 8.70011H11.2777" fill="white"/>
<path d="M11.2777 8.70011L8.47772 15C7.92077 15 7.38664 14.7788 6.99282 14.3849C6.599 13.9911 6.37776 13.457 6.37776 12.9V10.1001H2.41583C2.2129 10.1024 2.01189 10.0605 1.82674 9.97744C1.64159 9.89434 1.47671 9.77198 1.34355 9.61884C1.21038 9.46569 1.1121 9.28543 1.05551 9.09053C0.998931 8.89563 0.985398 8.69076 1.01585 8.49012L1.98184 2.19023C2.03246 1.8564 2.20203 1.55212 2.45929 1.33344C2.71656 1.11477 3.04419 0.996436 3.38181 1.00025H11.2777M11.2777 8.70011V1.00025M11.2777 8.70011H13.1466C13.5428 8.70712 13.9277 8.56842 14.2284 8.31035C14.5291 8.05227 14.7245 7.69279 14.7776 7.30014V2.40023C14.7245 2.00758 14.5291 1.64809 14.2284 1.39002C13.9277 1.13195 13.5428 0.993247 13.1466 1.00025H11.2777" stroke="current" stroke-width="1.4" stroke-linecap="round" stroke-linejoin="round"/>
</svg>

After

Width:  |  Height:  |  Size: 1.6 KiB

View File

@@ -0,0 +1,4 @@
<svg width="16" height="16" viewBox="0 0 16 16" fill="current" xmlns="http://www.w3.org/2000/svg">
<path d="M9.39995 5.89997V3.09999C9.39995 2.54304 9.1787 2.0089 8.78487 1.61507C8.39105 1.22125 7.85691 1 7.29996 1L4.49998 7.29996V14.9999H12.3959C12.7336 15.0037 13.0612 14.8854 13.3185 14.6667C13.5757 14.448 13.7453 14.1437 13.7959 13.8099L14.7619 7.50996C14.7924 7.30931 14.7788 7.10444 14.7222 6.90954C14.6657 6.71464 14.5674 6.53437 14.4342 6.38123C14.301 6.22808 14.1362 6.10572 13.951 6.02262C13.7659 5.93952 13.5649 5.89767 13.3619 5.89997H9.39995ZM4.49998 14.9999H2.39999C2.02869 14.9999 1.6726 14.8524 1.41005 14.5899C1.1475 14.3273 1 13.9712 1 13.5999V8.69995C1 8.32865 1.1475 7.97256 1.41005 7.71001C1.6726 7.44746 2.02869 7.29996 2.39999 7.29996H4.49998" fill="white"/>
<path d="M4.49998 7.29996L7.29996 1C7.85691 1 8.39105 1.22125 8.78487 1.61507C9.1787 2.0089 9.39995 2.54304 9.39995 3.09999V5.89997H13.3619C13.5649 5.89767 13.7659 5.93952 13.951 6.02262C14.1362 6.10572 14.301 6.22808 14.4342 6.38123C14.5674 6.53437 14.6657 6.71464 14.7223 6.90954C14.7788 7.10444 14.7924 7.30931 14.7619 7.50996L13.7959 13.8099C13.7453 14.1437 13.5757 14.448 13.3185 14.6667C13.0612 14.8854 12.7336 15.0037 12.3959 14.9999H4.49998M4.49998 7.29996V14.9999M4.49998 7.29996H2.39999C2.02869 7.29996 1.6726 7.44746 1.41005 7.71001C1.1475 7.97256 1 8.32865 1 8.69995V13.5999C1 13.9712 1.1475 14.3273 1.41005 14.5899C1.6726 14.8524 2.02869 14.9999 2.39999 14.9999H4.49998" stroke="current" stroke-width="1.39999" stroke-linecap="round" stroke-linejoin="round"/>
</svg>

After

Width:  |  Height:  |  Size: 1.5 KiB

View File

@@ -0,0 +1,3 @@
<svg width="24" height="16" viewBox="0 0 24 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M19.35 6.04C18.67 2.59 15.64 0 12 0C9.11 0 6.6 1.64 5.35 4.04C2.34 4.36 0 6.91 0 10C0 13.31 2.69 16 6 16H19C21.76 16 24 13.76 24 11C24 8.36 21.95 6.22 19.35 6.04ZM14 9V13H10V9H7L12 4L17 9H14Z" fill="black" fill-opacity="0.54"/>
</svg>

After

Width:  |  Height:  |  Size: 340 B

View File

@@ -1,19 +1,22 @@
import { useEffect, useRef } from 'react';
import { Fragment, useEffect, useRef } from 'react';
import { useDispatch, useSelector } from 'react-redux';
import Hero from '../Hero';
import { AppDispatch } from '../store';
import ConversationBubble from './ConversationBubble';
import {
addMessage,
addQuery,
fetchAnswer,
selectConversation,
selectQueries,
selectStatus,
updateQuery,
} from './conversationSlice';
import Send from './../assets/send.svg';
import Spinner from './../assets/spinner.svg';
import { FEEDBACK, Query } from './conversationModels';
import { sendFeedback } from './conversationApi';
export default function Conversation() {
const messages = useSelector(selectConversation);
const queries = useSelector(selectQueries);
const status = useSelector(selectStatus);
const dispatch = useDispatch<AppDispatch>();
const endMessageRef = useRef<HTMLDivElement>(null);
@@ -21,35 +24,74 @@ export default function Conversation() {
useEffect(
() => endMessageRef?.current?.scrollIntoView({ behavior: 'smooth' }),
[messages],
[queries.length, queries[queries.length - 1]],
);
const handleQuestion = (question: string) => {
dispatch(addMessage({ text: question, type: 'QUESTION' }));
dispatch(addQuery({ prompt: question }));
dispatch(fetchAnswer({ question }));
};
const handleFeedback = (query: Query, feedback: FEEDBACK, index: number) => {
const prevFeedback = query.feedback;
dispatch(updateQuery({ index, query: { feedback } }));
sendFeedback(query.prompt, query.response!, feedback).catch(() =>
dispatch(updateQuery({ index, query: { feedback: prevFeedback } })),
);
};
const prepResponseView = (query: Query, index: number) => {
let responseView;
if (query.error) {
responseView = (
<ConversationBubble
ref={endMessageRef}
className={`${index === queries.length - 1 ? 'mb-24' : 'mb-7'}`}
key={`${index}ERROR`}
message={query.error}
type="ERROR"
></ConversationBubble>
);
} else if (query.response) {
responseView = (
<ConversationBubble
ref={endMessageRef}
className={`${index === queries.length - 1 ? 'mb-24' : 'mb-7'}`}
key={`${index}ANSWER`}
message={query.response}
type={'ANSWER'}
feedback={query.feedback}
handleFeedback={(feedback: FEEDBACK) =>
handleFeedback(query, feedback, index)
}
></ConversationBubble>
);
}
return responseView;
};
return (
<div className="flex justify-center p-6">
{messages.length > 0 && (
<div className="mt-20 flex w-10/12 flex-col transition-all md:w-1/2">
{messages.map((message, index) => {
<div className="flex justify-center p-4">
{queries.length > 0 && (
<div className="mt-20 flex flex-col transition-all md:w-3/4">
{queries.map((query, index) => {
return (
<ConversationBubble
ref={index === messages.length - 1 ? endMessageRef : null}
className={`${
index === messages.length - 1 ? 'mb-24' : 'mb-7'
}`}
key={index}
message={message.text}
type={message.type}
></ConversationBubble>
<Fragment key={index}>
<ConversationBubble
ref={endMessageRef}
className={'mb-7'}
key={`${index}QUESTION`}
message={query.prompt}
type="QUESTION"
></ConversationBubble>
{prepResponseView(query, index)}
</Fragment>
);
})}
</div>
)}
{messages.length === 0 && <Hero className="mt-24 md:mt-52"></Hero>}
<div className="fixed bottom-6 flex w-10/12 flex-col items-end self-center md:w-[50%]">
{queries.length === 0 && <Hero className="mt-24 md:mt-52"></Hero>}
<div className="fixed bottom-0 flex w-10/12 flex-col items-end self-center md:w-[50%]">
<div className="flex w-full">
<div
ref={inputRef}
@@ -85,7 +127,7 @@ export default function Conversation() {
</div>
)}
</div>
<p className="mt-3 w-10/12 self-center text-center text-xs text-gray-2000">
<p className="w-[100vw] self-center bg-white p-5 text-center text-xs text-gray-2000">
This is a chatbot that uses the GPT-3, Faiss and LangChain to answer
questions.
</p>

View File

@@ -1,51 +1,137 @@
import { forwardRef } from 'react';
import { forwardRef, useState } from 'react';
import Avatar from '../Avatar';
import { MESSAGE_TYPE } from './conversationModels';
import { FEEDBACK, MESSAGE_TYPE } from './conversationModels';
import Alert from './../assets/alert.svg';
import { ReactComponent as Like } from './../assets/like.svg';
import { ReactComponent as Dislike } from './../assets/dislike.svg';
import ReactMarkdown from 'react-markdown';
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
import { vscDarkPlus } from 'react-syntax-highlighter/dist/cjs/styles/prism';
const ConversationBubble = forwardRef<
HTMLDivElement,
{
message: string;
type: MESSAGE_TYPE;
className: string;
className?: string;
feedback?: FEEDBACK;
handleFeedback?: (feedback: FEEDBACK) => void;
}
>(function ConversationBubble({ message, type, className }, ref) {
return (
<div
ref={ref}
className={`flex ${
type === 'ANSWER'
? 'self-start '
: type === 'ERROR'
? 'self-start'
: 'flex-row-reverse self-end '
} ${className}`}
>
<Avatar
className="mt-4 text-2xl"
avatar={type === 'QUESTION' ? '🧑‍💻' : '🦖'}
></Avatar>
<div
className={`${
type === 'QUESTION'
? ' mr-2 ml-10 bg-blue-1000 py-5 px-5 text-white'
: ' ml-2 mr-10 bg-gray-1000 py-5 px-5'
} flex items-center rounded-3xl ${
type === 'ERROR'
? 'rounded-lg border border-red-2000 bg-red-1000 p-2 text-red-3000'
: ''
}`}
>
{type === 'ERROR' && (
<img src={Alert} alt="alert" className="mr-2 inline" />
)}
<p className="whitespace-pre-wrap break-words">{message}</p>
>(function ConversationBubble(
{ message, type, className, feedback, handleFeedback },
ref,
) {
const [showFeedback, setShowFeedback] = useState(false);
const List = ({
ordered,
children,
}: {
ordered?: boolean;
children: React.ReactNode;
}) => {
const Tag = ordered ? 'ol' : 'ul';
return <Tag className="list-inside list-disc">{children}</Tag>;
};
let bubble;
if (type === 'QUESTION') {
bubble = (
<div ref={ref} className={`flex flex-row-reverse self-end ${className}`}>
<Avatar className="mt-4 text-2xl" avatar="🧑‍💻"></Avatar>
<div className="mr-2 ml-10 flex items-center rounded-3xl bg-blue-1000 p-3.5 text-white">
<ReactMarkdown className="whitespace-pre-wrap break-words">
{message}
</ReactMarkdown>
</div>
</div>
</div>
);
);
} else {
bubble = (
<div
ref={ref}
className={`flex self-start ${className}`}
onMouseEnter={() => setShowFeedback(true)}
onMouseLeave={() => setShowFeedback(false)}
>
<Avatar className="mt-4 text-2xl" avatar="🦖"></Avatar>
<div
className={`ml-2 mr-5 flex items-center rounded-3xl bg-gray-1000 p-3.5 ${
type === 'ERROR'
? ' rounded-lg border border-red-2000 bg-red-1000 p-2 text-red-3000'
: ''
}`}
>
{type === 'ERROR' && (
<img src={Alert} alt="alert" className="mr-2 inline" />
)}
<ReactMarkdown
className="whitespace-pre-wrap break-words"
components={{
code({ node, inline, className, children, ...props }) {
const match = /language-(\w+)/.exec(className || '');
return !inline && match ? (
<SyntaxHighlighter
PreTag="div"
language={match[1]}
{...props}
style={vscDarkPlus}
>
{String(children).replace(/\n$/, '')}
</SyntaxHighlighter>
) : (
<code className={className ? className : ''} {...props}>
{children}
</code>
);
},
ul({ node, children }) {
return <List>{children}</List>;
},
ol({ node, children }) {
return <List ordered>{children}</List>;
},
}}
>
{message}
</ReactMarkdown>
</div>
<div
className={`mr-2 flex items-center justify-center ${
feedback === 'LIKE' || (type !== 'ERROR' && showFeedback)
? ''
: 'md:invisible'
}`}
>
<Like
className={`cursor-pointer ${
feedback === 'LIKE'
? 'fill-blue-1000 stroke-blue-1000'
: 'fill-none stroke-gray-4000 hover:fill-gray-4000'
}`}
onClick={() => handleFeedback?.('LIKE')}
></Like>
</div>
<div
className={`mr-10 flex items-center justify-center ${
feedback === 'DISLIKE' || (type !== 'ERROR' && showFeedback)
? ''
: 'md:invisible'
}`}
>
<Dislike
className={`cursor-pointer ${
feedback === 'DISLIKE'
? 'fill-red-2000 stroke-red-2000'
: 'fill-none stroke-gray-4000 hover:fill-gray-4000'
}`}
onClick={() => handleFeedback?.('DISLIKE')}
></Dislike>
</div>
</div>
);
}
return bubble;
});
export default ConversationBubble;
// TODO : split question and answer into two diff JSX

View File

@@ -1,29 +1,33 @@
import { Answer } from './conversationModels';
import { Answer, FEEDBACK } from './conversationModels';
import { Doc } from '../preferences/preferenceApi';
const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
export function fetchAnswerApi(
question: string,
apiKey: string,
selectedDocs: Doc,
history: Array<any> = [],
): Promise<Answer> {
let namePath = selectedDocs.name;
if (selectedDocs.language === namePath) {
namePath = '.project';
}
const docPath =
selectedDocs.name === 'default'
? 'default'
: selectedDocs.language +
'/' +
namePath +
'/' +
selectedDocs.version +
'/' +
selectedDocs.model +
'/';
const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
let docPath = 'default';
if (selectedDocs.location === 'local') {
docPath = 'local' + '/' + selectedDocs.name + '/';
} else if (selectedDocs.location === 'remote') {
docPath =
selectedDocs.language +
'/' +
namePath +
'/' +
selectedDocs.version +
'/' +
selectedDocs.model +
'/';
}
return fetch(apiHost + '/api/answer', {
method: 'POST',
@@ -34,7 +38,7 @@ export function fetchAnswerApi(
question: question,
api_key: apiKey,
embeddings_key: apiKey,
history: localStorage.getItem('chatHistory'),
history: history,
active_docs: docPath,
}),
})
@@ -42,7 +46,7 @@ export function fetchAnswerApi(
if (response.ok) {
return response.json();
} else {
Promise.reject(response);
return Promise.reject(new Error(response.statusText));
}
})
.then((data) => {
@@ -51,8 +55,72 @@ export function fetchAnswerApi(
});
}
function getRandomInt(min: number, max: number) {
min = Math.ceil(min);
max = Math.floor(max);
return Math.floor(Math.random() * (max - min) + min); // The maximum is exclusive and the minimum is inclusive
export function fetchAnswerSteaming(
question: string,
apiKey: string,
selectedDocs: Doc,
history: Array<any> = [],
onEvent: (event: MessageEvent) => void,
): Promise<Answer> {
let namePath = selectedDocs.name;
if (selectedDocs.language === namePath) {
namePath = '.project';
}
let docPath = 'default';
if (selectedDocs.location === 'local') {
docPath = 'local' + '/' + selectedDocs.name + '/';
} else if (selectedDocs.location === 'remote') {
docPath =
selectedDocs.language +
'/' +
namePath +
'/' +
selectedDocs.version +
'/' +
selectedDocs.model +
'/';
}
return new Promise<Answer>((resolve, reject) => {
const url = new URL(apiHost + '/stream');
url.searchParams.append('question', question);
url.searchParams.append('api_key', apiKey);
url.searchParams.append('embeddings_key', apiKey);
url.searchParams.append('active_docs', docPath);
url.searchParams.append('history', JSON.stringify(history));
const eventSource = new EventSource(url.href);
eventSource.onmessage = onEvent;
eventSource.onerror = (error) => {
console.log('Connection failed.');
eventSource.close();
};
});
}
export function sendFeedback(
prompt: string,
response: string,
feedback: FEEDBACK,
) {
return fetch(`${apiHost}/api/feedback`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
question: prompt,
answer: response,
feedback: feedback,
}),
}).then((response) => {
if (response.ok) {
return Promise.resolve();
} else {
return Promise.reject();
}
});
}

View File

@@ -1,5 +1,6 @@
export type MESSAGE_TYPE = 'QUESTION' | 'ANSWER' | 'ERROR';
export type Status = 'idle' | 'loading' | 'failed';
export type FEEDBACK = 'LIKE' | 'DISLIKE';
export interface Message {
text: string;
@@ -7,7 +8,7 @@ export interface Message {
}
export interface ConversationState {
conversation: Message[];
queries: Query[];
status: Status;
}
@@ -16,3 +17,10 @@ export interface Answer {
query: string;
result: string;
}
export interface Query {
prompt: string;
response?: string;
feedback?: FEEDBACK;
error?: string;
}

View File

@@ -1,34 +1,100 @@
import { createAsyncThunk, createSlice, PayloadAction } from '@reduxjs/toolkit';
import store from '../store';
import { fetchAnswerApi } from './conversationApi';
import { Answer, ConversationState, Message } from './conversationModels';
import { fetchAnswerApi, fetchAnswerSteaming } from './conversationApi';
import { Answer, ConversationState, Query, Status } from './conversationModels';
const initialState: ConversationState = {
conversation: [],
queries: [],
status: 'idle',
};
export const fetchAnswer = createAsyncThunk<
Answer,
{ question: string },
{ state: RootState }
>('fetchAnswer', async ({ question }, { getState }) => {
const state = getState();
const API_STREAMING = import.meta.env.VITE_API_STREAMING === 'true';
const answer = await fetchAnswerApi(
question,
state.preference.apiKey,
state.preference.selectedDocs!,
);
return answer;
});
export const fetchAnswer = createAsyncThunk<Answer, { question: string }>(
'fetchAnswer',
async ({ question }, { dispatch, getState }) => {
const state = getState() as RootState;
if (state.preference) {
if (API_STREAMING) {
await fetchAnswerSteaming(
question,
state.preference.apiKey,
state.preference.selectedDocs!,
state.conversation.queries,
(event) => {
const data = JSON.parse(event.data);
// check if the 'end' event has been received
if (data.type === 'end') {
// set status to 'idle'
dispatch(conversationSlice.actions.setStatus('idle'));
} else {
const result = data.answer;
dispatch(
updateStreamingQuery({
index: state.conversation.queries.length - 1,
query: { response: result },
}),
);
}
},
);
} else {
const answer = await fetchAnswerApi(
question,
state.preference.apiKey,
state.preference.selectedDocs!,
state.conversation.queries,
);
if (answer) {
dispatch(
updateQuery({
index: state.conversation.queries.length - 1,
query: { response: answer.answer },
}),
);
dispatch(conversationSlice.actions.setStatus('idle'));
}
}
}
return { answer: '', query: question, result: '' };
},
);
export const conversationSlice = createSlice({
name: 'conversation',
initialState,
reducers: {
addMessage(state, action: PayloadAction<Message>) {
state.conversation.push(action.payload);
addQuery(state, action: PayloadAction<Query>) {
state.queries.push(action.payload);
},
updateStreamingQuery(
state,
action: PayloadAction<{ index: number; query: Partial<Query> }>,
) {
const index = action.payload.index;
if (action.payload.query.response) {
state.queries[index].response =
(state.queries[index].response || '') + action.payload.query.response;
} else {
state.queries[index] = {
...state.queries[index],
...action.payload.query,
};
}
},
updateQuery(
state,
action: PayloadAction<{ index: number; query: Partial<Query> }>,
) {
const index = action.payload.index;
state.queries[index] = {
...state.queries[index],
...action.payload.query,
};
},
setStatus(state, action: PayloadAction<Status>) {
state.status = action.payload;
},
},
extraReducers(builder) {
@@ -36,29 +102,20 @@ export const conversationSlice = createSlice({
.addCase(fetchAnswer.pending, (state) => {
state.status = 'loading';
})
.addCase(fetchAnswer.fulfilled, (state, action) => {
state.status = 'idle';
state.conversation.push({
text: action.payload.answer,
type: 'ANSWER',
});
})
.addCase(fetchAnswer.rejected, (state, action) => {
state.status = 'failed';
state.conversation.push({
text: 'Something went wrong. Please try again later.',
type: 'ERROR',
});
state.queries[state.queries.length - 1].error =
'Something went wrong. Please try again later.';
});
},
});
type RootState = ReturnType<typeof store.getState>;
export const selectConversation = (state: RootState) =>
state.conversation.conversation;
export const selectQueries = (state: RootState) => state.conversation.queries;
export const selectStatus = (state: RootState) => state.conversation.status;
export const { addMessage } = conversationSlice.actions;
export const { addQuery, updateQuery, updateStreamingQuery } =
conversationSlice.actions;
export default conversationSlice.reducer;

View File

@@ -1,5 +1,6 @@
// not all properties in Doc are going to be present. Make some optional
export type Doc = {
location: string;
name: string;
language: string;
version: string;
@@ -13,9 +14,10 @@ export type Doc = {
//Fetches all JSON objects from the source. We only use the objects with the "model" property in SelectDocsModal.tsx. Hopefully can clean up the source file later.
export async function getDocs(): Promise<Doc[] | null> {
try {
const response = await fetch(
'https://d3dg1063dc54p9.cloudfront.net/combined.json',
);
const apiHost =
import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
const response = await fetch(apiHost + '/api/combine');
const data = await response.json();
const docs: Doc[] = [];
@@ -52,17 +54,13 @@ export function setLocalRecentDocs(doc: Doc): void {
namePath = '.project';
}
const docPath =
doc.name === 'default'
? 'default'
: doc.language +
'/' +
namePath +
'/' +
doc.version +
'/' +
doc.model +
'/';
let docPath = 'default';
if (doc.location === 'local') {
docPath = 'local' + '/' + doc.name + '/';
} else if (doc.location === 'remote') {
docPath =
doc.language + '/' + namePath + '/' + doc.version + '/' + doc.model + '/';
}
const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
fetch(apiHost + '/api/docs_check', {
method: 'POST',

View File

@@ -13,8 +13,18 @@ interface Preference {
}
const initialState: Preference = {
apiKey: '',
selectedDocs: null,
apiKey: 'xxx',
selectedDocs: {
name: 'default',
language: 'default',
location: 'default',
version: 'default',
description: 'default',
fullName: 'default',
dat: 'default',
docLink: 'default',
model: 'openai_text-embedding-ada-002',
} as Doc,
sourceDocs: null,
};
@@ -29,7 +39,7 @@ export const prefSlice = createSlice({
state.selectedDocs = action.payload;
},
setSourceDocs: (state, action) => {
state.sourceDocs?.push(...action.payload);
state.sourceDocs = action.payload;
},
},
});

View File

@@ -15,6 +15,7 @@ const store = configureStore({
selectedDocs: doc !== null ? JSON.parse(doc) : null,
sourceDocs: [
{
location: '',
language: '',
name: 'default',
version: '',

View File

@@ -0,0 +1,238 @@
import React from 'react';
import { useCallback, useEffect, useState } from 'react';
import { useDropzone } from 'react-dropzone';
import { useDispatch } from 'react-redux';
import { ActiveState } from '../models/misc';
import { getDocs } from '../preferences/preferenceApi';
import { setSourceDocs } from '../preferences/preferenceSlice';
export default function Upload({
modalState,
setModalState,
}: {
modalState: ActiveState;
setModalState: (state: ActiveState) => void;
}) {
const [docName, setDocName] = useState('');
const [files, setfiles] = useState<File[]>([]);
const [progress, setProgress] = useState<{
type: 'UPLOAD' | 'TRAINIING';
percentage: number;
taskId?: string;
failed?: boolean;
}>();
function Progress({
title,
isCancellable = false,
isFailed = false,
}: {
title: string;
isCancellable?: boolean;
isFailed?: boolean;
}) {
return (
<div className="mt-5 flex flex-col items-center gap-2">
<p className="text-xl tracking-[0.15px]">{title}...</p>
<p className="text-sm text-gray-2000">This may take several minutes</p>
<p className={`ml-5 text-xl text-red-400 ${isFailed ? '' : 'hidden'}`}>
Over the token limit, please consider uploading smaller document
</p>
<p className="mt-10 text-2xl">{progress?.percentage || 0}%</p>
<div className="mb-10 w-[50%]">
<div className="h-1 w-[100%] bg-blue-4000"></div>
<div
className={`relative bottom-1 h-1 bg-blue-5000 transition-all`}
style={{ width: `${progress?.percentage || 0}%` }}
></div>
</div>
<button
onClick={() => {
setDocName('');
setfiles([]);
setProgress(undefined);
setModalState('INACTIVE');
}}
className={`rounded-md bg-blue-3000 px-4 py-2 text-sm font-medium text-white ${
isCancellable ? '' : 'hidden'
}`}
>
Finish
</button>
</div>
);
}
function UploadProgress() {
return <Progress title="Upload is in progress"></Progress>;
}
function TrainingProgress() {
const dispatch = useDispatch();
useEffect(() => {
(progress?.percentage ?? 0) < 100 &&
setTimeout(() => {
const apiHost = import.meta.env.VITE_API_HOST;
fetch(`${apiHost}/api/task_status?task_id=${progress?.taskId}`)
.then((data) => data.json())
.then((data) => {
if (data.status == 'SUCCESS') {
if (data.result.limited === true) {
getDocs().then((data) => dispatch(setSourceDocs(data)));
setProgress(
(progress) =>
progress && {
...progress,
percentage: 100,
failed: true,
},
);
} else {
getDocs().then((data) => dispatch(setSourceDocs(data)));
setProgress(
(progress) =>
progress && {
...progress,
percentage: 100,
failed: false,
},
);
}
} else if (data.status == 'PROGRESS') {
setProgress(
(progress) =>
progress && {
...progress,
percentage: data.result.current,
},
);
}
});
}, 5000);
}, [progress, dispatch]);
return (
<Progress
title="Training is in progress"
isCancellable={progress?.percentage === 100}
isFailed={progress?.failed === true}
></Progress>
);
}
const onDrop = useCallback((acceptedFiles: File[]) => {
setfiles(acceptedFiles);
setDocName(acceptedFiles[0]?.name);
}, []);
const doNothing = () => undefined;
const uploadFile = () => {
const formData = new FormData();
files.forEach((file) => {
formData.append('file', file);
});
formData.append('name', docName);
formData.append('user', 'local');
const apiHost = import.meta.env.VITE_API_HOST;
const xhr = new XMLHttpRequest();
xhr.upload.addEventListener('progress', (event) => {
const progress = +((event.loaded / event.total) * 100).toFixed(2);
setProgress({ type: 'UPLOAD', percentage: progress });
});
xhr.onload = () => {
const { task_id } = JSON.parse(xhr.responseText);
setProgress({ type: 'TRAINIING', percentage: 0, taskId: task_id });
};
xhr.open('POST', `${apiHost + '/api/upload'}`);
xhr.send(formData);
};
const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop,
multiple: false,
onDragEnter: doNothing,
onDragOver: doNothing,
onDragLeave: doNothing,
maxSize: 25000000,
accept: {
'application/pdf': ['.pdf'],
'text/plain': ['.txt'],
'text/x-rst': ['.rst'],
'text/x-markdown': ['.md'],
'application/zip': ['.zip'],
},
});
let view;
if (progress?.type === 'UPLOAD') {
view = <UploadProgress></UploadProgress>;
} else if (progress?.type === 'TRAINIING') {
view = <TrainingProgress></TrainingProgress>;
} else {
view = (
<>
<p className="text-xl text-jet">Upload New Documentation</p>
<p className="mb-3 text-xs text-gray-4000">
Please upload .pdf, .txt, .rst, .md, .zip limited to 25mb
</p>
<input
type="text"
className="h-10 w-[60%] rounded-md border-2 border-gray-5000 px-3 outline-none"
value={docName}
onChange={(e) => setDocName(e.target.value)}
></input>
<div className="relative bottom-12 left-2 mt-[-18.39px]">
<span className="bg-white px-2 text-xs text-gray-4000">Name</span>
</div>
<div {...getRootProps()}>
<span className="rounded-md border border-blue-2000 px-4 py-2 font-medium text-blue-2000 hover:cursor-pointer">
<input type="button" {...getInputProps()} />
Choose Files
</span>
</div>
<div className="mt-9">
<p className="mb-5 font-medium text-eerie-black">Uploaded Files</p>
{files.map((file) => (
<p key={file.name} className="text-gray-6000">
{file.name}
</p>
))}
{files.length === 0 && <p className="text-gray-6000">None</p>}
</div>
<div className="flex flex-row-reverse">
<button
onClick={uploadFile}
className="ml-6 rounded-md bg-blue-3000 py-2 px-6 text-white"
>
Train
</button>
<button
onClick={() => {
setDocName('');
setfiles([]);
setModalState('INACTIVE');
}}
className="font-medium"
>
Cancel
</button>
</div>
</>
);
}
return (
<article
className={`${
modalState === 'ACTIVE' ? 'visible' : 'hidden'
} absolute z-30 h-screen w-screen bg-gray-alpha`}
>
<article className="mx-auto mt-24 flex w-[90vw] max-w-lg flex-col gap-4 rounded-lg bg-white p-6 shadow-lg">
{view}
</article>
</article>
);
}
// TODO: sanitize all inputs

7
frontend/svg.d.ts vendored Normal file
View File

@@ -0,0 +1,7 @@
declare module '*.svg' {
import * as React from 'react';
export const ReactComponent: React.FunctionComponent<
React.SVGProps<SVGSVGElement> & { title?: string }
>;
}

View File

@@ -15,10 +15,17 @@ module.exports = {
'gray-1000': '#F6F6F6',
'gray-2000': 'rgba(0, 0, 0, 0.5)',
'gray-3000': 'rgba(243, 243, 243, 1)',
'gray-4000': '#949494',
'gray-5000': '#BBBBBB',
'gray-6000': '#757575',
'red-1000': 'rgb(254, 202, 202)',
'red-2000': '#F44336',
'red-3000': '#621B16',
'blue-1000': '#7D54D1',
'blue-2000': '#002B49',
'blue-3000': '#4B02E2',
'blue-4000': 'rgba(0, 125, 255, 0.36)',
'blue-5000': 'rgba(0, 125, 255)',
},
},
},

View File

@@ -16,6 +16,6 @@
"noEmit": true,
"jsx": "react-jsx"
},
"include": ["src"],
"include": ["src", "svg.d.ts"],
"references": [{ "path": "./tsconfig.node.json" }]
}

View File

@@ -1,7 +1,8 @@
import { defineConfig } from 'vite';
import react from '@vitejs/plugin-react';
import svgr from 'vite-plugin-svgr';
// https://vitejs.dev/config/
export default defineConfig({
plugins: [react()],
plugins: [react(), svgr()],
});

View File

@@ -1,20 +1,13 @@
import ast
import json
from pathlib import Path
from langchain.text_splitter import CharacterTextSplitter
import faiss
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
import dotenv
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
import pickle
import dotenv
import tiktoken
import sys
from argparse import ArgumentParser
import ast
dotenv.load_dotenv()
ps = list(Path("inputs").glob("**/*.py"))
data = []
sources = []
@@ -24,13 +17,6 @@ for p in ps:
sources.append(p)
# with open('inputs/client.py', 'r') as f:
# tree = ast.parse(f.read())
# print(tree)
def get_functions_in_class(node):
functions = []
functions_code = []
@@ -64,21 +50,9 @@ for code in data:
c1 += 1
# save the structure dict as json
import json
with open('structure_dict.json', 'w') as f:
json.dump(structure_dict, f)
# llm = OpenAI(temperature=0)
# prompt = PromptTemplate(
# input_variables=["code"],
# template="Code: {code}, Documentation: ",
# )
#
# print(prompt.format(code="print('hello world')"))
# print(llm(prompt.format(code="print('hello world')")))
if not Path("outputs").exists():
Path("outputs").mkdir()
@@ -119,8 +93,3 @@ for source, classes in structure_dict.items():
else:
with open(f"outputs/{source_w}", "a") as f:
f.write(f"\n\nFunction: {functions[function]}, \nDocumentation: {response}")

View File

@@ -1,24 +1,20 @@
import os
import sys
import nltk
import dotenv
import typer
import ast
from collections import defaultdict
from pathlib import Path
from typing import List, Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter
import dotenv
import nltk
import typer
from parser.file.bulk import SimpleDirectoryReader
from parser.schema.base import Document
from parser.open_ai_func import call_openai_api, get_user_permission
from parser.py2doc import transform_to_docs
from parser.py2doc import extract_functions_and_classes as extract_py
from parser.js2doc import extract_functions_and_classes as extract_js
from parser.java2doc import extract_functions_and_classes as extract_java
from parser.js2doc import extract_functions_and_classes as extract_js
from parser.open_ai_func import call_openai_api, get_user_permission
from parser.py2doc import extract_functions_and_classes as extract_py
from parser.py2doc import transform_to_docs
from parser.schema.base import Document
from parser.token_func import group_split
dotenv.load_dotenv()
@@ -28,25 +24,32 @@ nltk.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
#Splits all files in specified folder to documents
def metadata_from_filename(title):
return {'title': title}
# Splits all files in specified folder to documents
@app.command()
def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
help="Whether to skip price confirmation"),
help="Whether to skip price confirmation"),
dir: Optional[List[str]] = typer.Option(["inputs"],
help="""List of paths to directory for index creation.
E.g. --dir inputs --dir inputs2"""),
file: Optional[List[str]] = typer.Option(None,
help="""File paths to use (Optional; overrides dir).
help="""File paths to use (Optional; overrides dir).
E.g. --file inputs/1.md --file inputs/2.md"""),
recursive: Optional[bool] = typer.Option(True,
help="Whether to recursively search in subdirectories."),
limit: Optional[int] = typer.Option(None,
help="Maximum number of files to read."),
recursive: Optional[bool] = typer.Option(True, help="Whether to recursively search in subdirectories."),
limit: Optional[int] = typer.Option(None, help="Maximum number of files to read."),
formats: Optional[List[str]] = typer.Option([".rst", ".md"],
help="""List of required extensions (list with .)
Currently supported: .rst, .md, .pdf, .docx, .csv, .epub, .html"""),
exclude: Optional[bool] = typer.Option(True, help="Whether to exclude hidden files (dotfiles).")):
help="""List of required extensions (list with .)
Currently supported:
.rst, .md, .pdf, .docx, .csv, .epub, .html, .mdx"""),
exclude: Optional[bool] = typer.Option(True, help="Whether to exclude hidden files (dotfiles)."),
sample: Optional[bool] = typer.Option(False,
help="Whether to output sample of the first 5 split documents."),
token_check: Optional[bool] = typer.Option(True, help="Whether to group small documents and split large."),
min_tokens: Optional[int] = typer.Option(150, help="Minimum number of tokens to not group."),
max_tokens: Optional[int] = typer.Option(2000, help="Maximum number of tokens to not split."),
):
"""
Creates index from specified location or files.
By default /inputs folder is used, .rst and .md are parsed.
@@ -55,12 +58,22 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
def process_one_docs(directory, folder_name):
raw_docs = SimpleDirectoryReader(input_dir=directory, input_files=file, recursive=recursive,
required_exts=formats, num_files_limit=limit,
exclude_hidden=exclude).load_data()
raw_docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
exclude_hidden=exclude, file_metadata=metadata_from_filename).load_data()
# Here we split the documents, as needed, into smaller chunks.
# We do this due to the context limits of the LLMs.
text_splitter = RecursiveCharacterTextSplitter()
docs = text_splitter.split_documents(raw_docs)
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens,
token_check=token_check)
# Old method
# text_splitter = RecursiveCharacterTextSplitter()
# docs = text_splitter.split_documents(raw_docs)
# Sample feature
if sample:
for i in range(min(5, len(raw_docs))):
print(raw_docs[i].text)
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
# Here we check for command line arguments for bot calls.
# If no argument exists or the yes is not True, then the
@@ -88,12 +101,11 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
@app.command()
def convert(dir: Optional[str] = typer.Option("inputs",
help="""Path to directory to make documentation for.
help="""Path to directory to make documentation for.
E.g. --dir inputs """),
formats: Optional[str] = typer.Option("py",
help="""Required language.
help="""Required language.
py, js, java supported for now""")):
"""
Creates documentation linked to original functions from specified location.
By default /inputs folder is used, .py is parsed.
@@ -107,5 +119,7 @@ def convert(dir: Optional[str] = typer.Option("inputs",
else:
raise Exception("Sorry, language not supported yet")
transform_to_docs(functions_dict, classes_dict, formats, dir)
if __name__ == "__main__":
app()
app()

View File

@@ -1,38 +1,42 @@
from pathlib import Path
from langchain.text_splitter import CharacterTextSplitter
import faiss
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
import pickle
import dotenv
import tiktoken
import sys
from argparse import ArgumentParser
from pathlib import Path
import dotenv
import faiss
import tiktoken
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
def num_tokens_from_string(string: str, encoding_name: str) -> int:
# Function to convert string to tokens and estimate user cost.
# Function to convert string to tokens and estimate user cost.
encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
total_price = ((num_tokens/1000) * 0.0004)
total_price = ((num_tokens / 1000) * 0.0004)
return num_tokens, total_price
def call_openai_api():
# Function to create a vector store from the documents and save it to disk.
# Function to create a vector store from the documents and save it to disk.
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
faiss.write_index(store.index, "docs.index")
store.index = None
with open("faiss_store.pkl", "wb") as f:
pickle.dump(store, f)
def get_user_permission():
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
docs_content = (" ".join(docs))
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
print(f"Number of Tokens = {format(tokens, ',d')}")
print(f"Approx Cost = ${format(total_price, ',.2f')}")
#Here we check for user permission before calling the API.
# Here we check for user permission before calling the API.
user_input = input("Price Okay? (Y/N) \n").lower()
if user_input == "y":
call_openai_api()
@@ -41,7 +45,8 @@ def get_user_permission():
else:
print("The API was not called. No money was spent.")
#Load .env file
# Load .env file
dotenv.load_dotenv()
ap = ArgumentParser("Script for training DocsGPT on .rst documentation files.")

View File

@@ -1,71 +1,75 @@
import os
import pickle
import dotenv
import tiktoken
import sys
import faiss
import shutil
import sys
from argparse import ArgumentParser
from pathlib import Path
from langchain.vectorstores import FAISS
import dotenv
import faiss
import tiktoken
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from sphinx.cmd.build import main as sphinx_main
from argparse import ArgumentParser
def convert_rst_to_txt(src_dir, dst_dir):
# Check if the source directory exists
if not os.path.exists(src_dir):
raise Exception("Source directory does not exist")
# Walk through the source directory
for root, dirs, files in os.walk(src_dir):
for file in files:
# Check if the file has .rst extension
if file.endswith(".rst"):
# Construct the full path of the file
src_file = os.path.join(root, file.replace(".rst", ""))
# Convert the .rst file to .txt file using sphinx-build
args = f". -b text -D extensions=sphinx.ext.autodoc " \
f"-D master_doc={src_file} " \
f"-D source_suffix=.rst " \
f"-C {dst_dir} "
sphinx_main(args.split())
elif file.endswith(".md"):
# Rename the .md file to .rst file
src_file = os.path.join(root, file)
dst_file = os.path.join(root, file.replace(".md", ".rst"))
os.rename(src_file, dst_file)
# Convert the .rst file to .txt file using sphinx-build
args = f". -b text -D extensions=sphinx.ext.autodoc " \
f"-D master_doc={dst_file} " \
f"-D source_suffix=.rst " \
f"-C {dst_dir} "
sphinx_main(args.split())
# Check if the source directory exists
if not os.path.exists(src_dir):
raise Exception("Source directory does not exist")
# Walk through the source directory
for root, dirs, files in os.walk(src_dir):
for file in files:
# Check if the file has .rst extension
if file.endswith(".rst"):
# Construct the full path of the file
src_file = os.path.join(root, file.replace(".rst", ""))
# Convert the .rst file to .txt file using sphinx-build
args = f". -b text -D extensions=sphinx.ext.autodoc " \
f"-D master_doc={src_file} " \
f"-D source_suffix=.rst " \
f"-C {dst_dir} "
sphinx_main(args.split())
elif file.endswith(".md"):
# Rename the .md file to .rst file
src_file = os.path.join(root, file)
dst_file = os.path.join(root, file.replace(".md", ".rst"))
os.rename(src_file, dst_file)
# Convert the .rst file to .txt file using sphinx-build
args = f". -b text -D extensions=sphinx.ext.autodoc " \
f"-D master_doc={dst_file} " \
f"-D source_suffix=.rst " \
f"-C {dst_dir} "
sphinx_main(args.split())
def num_tokens_from_string(string: str, encoding_name: str) -> int:
# Function to convert string to tokens and estimate user cost.
# Function to convert string to tokens and estimate user cost.
encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
total_price = ((num_tokens/1000) * 0.0004)
total_price = ((num_tokens / 1000) * 0.0004)
return num_tokens, total_price
def call_openai_api():
# Function to create a vector store from the documents and save it to disk.
# Function to create a vector store from the documents and save it to disk.
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
faiss.write_index(store.index, "docs.index")
store.index = None
with open("faiss_store.pkl", "wb") as f:
pickle.dump(store, f)
def get_user_permission():
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
docs_content = (" ".join(docs))
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
print(f"Number of Tokens = {format(tokens, ',d')}")
print(f"Approx Cost = ${format(total_price, ',.2f')}")
#Here we check for user permission before calling the API.
# Here we check for user permission before calling the API.
user_input = input("Price Okay? (Y/N) \n").lower()
if user_input == "y":
call_openai_api()
@@ -74,6 +78,7 @@ def get_user_permission():
else:
print("The API was not called. No money was spent.")
ap = ArgumentParser("Script for training DocsGPT on Sphinx documentation")
ap.add_argument("-i", "--inputs",
type=str,
@@ -81,17 +86,17 @@ ap.add_argument("-i", "--inputs",
help="Directory containing documentation files")
args = ap.parse_args()
#Load .env file
# Load .env file
dotenv.load_dotenv()
#Directory to vector
# Directory to vector
src_dir = args.inputs
dst_dir = "tmp"
convert_rst_to_txt(src_dir, dst_dir)
# Here we load in the data in the format that Notion exports it in.
ps = list(Path("tmp/"+ src_dir).glob("**/*.txt"))
ps = list(Path("tmp/" + src_dir).glob("**/*.txt"))
# parse all child directories
data = []

View File

@@ -3,7 +3,6 @@ from abc import abstractmethod
from typing import Any, List
from langchain.docstore.document import Document as LCDocument
from parser.schema.base import Document

View File

@@ -1,8 +1,5 @@
"""Simple reader that reads files of different formats from a directory."""
import logging
from pathlib import Path
from typing import Callable, Dict, List, Optional, Union
from parser.file.base import BaseReader
from parser.file.base_parser import BaseParser
from parser.file.docs_parser import DocxParser, PDFParser
@@ -12,6 +9,8 @@ from parser.file.markdown_parser import MarkdownParser
from parser.file.rst_parser import RstParser
from parser.file.tabular_parser import PandasCSVParser
from parser.schema.base import Document
from pathlib import Path
from typing import Callable, Dict, List, Optional, Union
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
".pdf": PDFParser(),
@@ -21,6 +20,7 @@ DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
".md": MarkdownParser(),
".rst": RstParser(),
".html": HTMLParser(),
".mdx": MarkdownParser(),
}
@@ -51,16 +51,17 @@ class SimpleDirectoryReader(BaseReader):
"""
def __init__(
self,
input_dir: Optional[str] = None,
input_files: Optional[List] = None,
exclude_hidden: bool = True,
errors: str = "ignore",
recursive: bool = True,
required_exts: Optional[List[str]] = None,
file_extractor: Optional[Dict[str, BaseParser]] = None,
num_files_limit: Optional[int] = None,
file_metadata: Optional[Callable[[str], Dict]] = None,
self,
input_dir: Optional[str] = None,
input_files: Optional[List] = None,
exclude_hidden: bool = True,
errors: str = "ignore",
recursive: bool = True,
required_exts: Optional[List[str]] = None,
file_extractor: Optional[Dict[str, BaseParser]] = None,
num_files_limit: Optional[int] = None,
file_metadata: Optional[Callable[[str], Dict]] = None,
chunk_size_max: int = 2048,
) -> None:
"""Initialize with parameters."""
super().__init__()
@@ -74,6 +75,8 @@ class SimpleDirectoryReader(BaseReader):
self.exclude_hidden = exclude_hidden
self.required_exts = required_exts
self.num_files_limit = num_files_limit
print("input_files")
print(input_files)
if input_files:
self.input_files = []
@@ -99,8 +102,8 @@ class SimpleDirectoryReader(BaseReader):
elif self.exclude_hidden and input_file.name.startswith("."):
continue
elif (
self.required_exts is not None
and input_file.suffix not in self.required_exts
self.required_exts is not None
and input_file.suffix not in self.required_exts
):
continue
else:
@@ -111,7 +114,7 @@ class SimpleDirectoryReader(BaseReader):
new_input_files.extend(sub_input_files)
if self.num_files_limit is not None and self.num_files_limit > 0:
new_input_files = new_input_files[0 : self.num_files_limit]
new_input_files = new_input_files[0: self.num_files_limit]
# print total number of files added
logging.debug(
@@ -147,10 +150,15 @@ class SimpleDirectoryReader(BaseReader):
data = f.read()
if isinstance(data, List):
data_list.extend(data)
if self.file_metadata is not None:
for _ in range(len(data)):
metadata_list.append(self.file_metadata(str(input_file)))
else:
data_list.append(str(data))
if self.file_metadata is not None:
metadata_list.append(self.file_metadata(str(input_file)))
if self.file_metadata is not None:
metadata_list.append(self.file_metadata(str(input_file)))
if concatenate:
return [Document("\n".join(data_list))]

View File

@@ -9,6 +9,7 @@ from typing import Dict, Union
from parser.file.base_parser import BaseParser
class HTMLParser(BaseParser):
"""HTML parser."""
@@ -23,21 +24,20 @@ class HTMLParser(BaseParser):
Union[str, List[str]]: a string or a List of strings.
"""
try:
import unstructured
from unstructured.partition.html import partition_html
from unstructured.staging.base import convert_to_isd
from unstructured.cleaners.core import clean
except ImportError:
raise ValueError("unstructured package is required to parse HTML files.")
from unstructured.partition.html import partition_html
from unstructured.staging.base import convert_to_isd
from unstructured.cleaners.core import clean
# Using the unstructured library to convert the html to isd format
# isd sample : isd = [
# {"text": "My Title", "type": "Title"},
# {"text": "My Narrative", "type": "NarrativeText"}
# ]
# {"text": "My Title", "type": "Title"},
# {"text": "My Narrative", "type": "NarrativeText"}
# ]
with open(file, "r", encoding="utf-8") as fp:
elements = partition_html(file=fp)
isd = convert_to_isd(elements)
isd = convert_to_isd(elements)
# Removing non ascii charactwers from isd_el['text']
for isd_el in isd:
@@ -46,15 +46,15 @@ class HTMLParser(BaseParser):
# Removing all the \n characters from isd_el['text'] using regex and replace with single space
# Removing all the extra spaces from isd_el['text'] using regex and replace with single space
for isd_el in isd:
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE|re.DOTALL)
isd_el['text'] = re.sub(r"\s{2,}"," ", isd_el['text'], flags=re.MULTILINE|re.DOTALL)
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)
# more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
for isd_el in isd:
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True )
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)
# Creating a list of all the indexes of isd_el['type'] = 'Title'
title_indexes = [i for i,isd_el in enumerate(isd) if isd_el['type'] == 'Title']
title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']
# Creating 'Chunks' - List of lists of strings
# each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
@@ -64,19 +64,20 @@ class HTMLParser(BaseParser):
Chunks = [[]]
final_chunks = list(list())
for i,isd_el in enumerate(isd):
for i, isd_el in enumerate(isd):
if i in title_indexes:
Chunks.append([])
Chunks[-1].append(isd_el['text'])
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25 #TODO: This value can be an user defined variable
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25
# TODO: This value can be a user defined variable
for chunk in Chunks:
# sum of lenth of all the strings in the chunk
sum = 0
sum += len(str(chunk))
if sum < 25:
Chunks.remove(chunk)
else :
else:
# appending all the approved chunks to final_chunks as a single string
final_chunks.append(" ".join([str(item) for item in chunk]))
return final_chunks

View File

@@ -7,6 +7,7 @@ import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast
import tiktoken
from parser.file.base_parser import BaseParser
@@ -19,19 +20,33 @@ class MarkdownParser(BaseParser):
"""
def __init__(
self,
*args: Any,
remove_hyperlinks: bool = True,
remove_images: bool = True,
# remove_tables: bool = True,
**kwargs: Any,
self,
*args: Any,
remove_hyperlinks: bool = True,
remove_images: bool = True,
max_tokens: int = 2048,
# remove_tables: bool = True,
**kwargs: Any,
) -> None:
"""Init params."""
super().__init__(*args, **kwargs)
self._remove_hyperlinks = remove_hyperlinks
self._remove_images = remove_images
self._max_tokens = max_tokens
# self._remove_tables = remove_tables
def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str],
current_text: str):
"""Append to tups chunk."""
num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(current_text))
if num_tokens > self._max_tokens:
chunks = [current_text[i:i + self._max_tokens] for i in range(0, len(current_text), self._max_tokens)]
for chunk in chunks:
tups.append((current_header, chunk))
else:
tups.append((current_header, current_text))
return tups
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
"""Convert a markdown file to a dictionary.
@@ -50,13 +65,13 @@ class MarkdownParser(BaseParser):
if current_header is not None:
if current_text == "" or None:
continue
markdown_tups.append((current_header, current_text))
markdown_tups = self.tups_chunk_append(markdown_tups, current_header, current_text)
current_header = line
current_text = ""
else:
current_text += line + "\n"
markdown_tups.append((current_header, current_text))
markdown_tups = self.tups_chunk_append(markdown_tups, current_header, current_text)
if current_header is not None:
# pass linting, assert keys are defined
@@ -101,7 +116,7 @@ class MarkdownParser(BaseParser):
return {}
def parse_tups(
self, filepath: Path, errors: str = "ignore"
self, filepath: Path, errors: str = "ignore"
) -> List[Tuple[Optional[str], str]]:
"""Parse file into tuples."""
with open(filepath, "r") as f:
@@ -116,7 +131,7 @@ class MarkdownParser(BaseParser):
return markdown_tups
def parse_file(
self, filepath: Path, errors: str = "ignore"
self, filepath: Path, errors: str = "ignore"
) -> Union[str, List[str]]:
"""Parse file into string."""
tups = self.parse_tups(filepath, errors=errors)

View File

@@ -5,7 +5,7 @@ Contains parser for md files.
"""
import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast
from typing import Any, Dict, List, Optional, Tuple, Union
from parser.file.base_parser import BaseParser
@@ -19,17 +19,17 @@ class RstParser(BaseParser):
"""
def __init__(
self,
*args: Any,
remove_hyperlinks: bool = True,
remove_images: bool = True,
remove_table_excess: bool = True,
remove_interpreters: bool = True,
remove_directives: bool = True,
remove_whitespaces_excess: bool = True,
#Be carefull with remove_characters_excess, might cause data loss
remove_characters_excess: bool = True,
**kwargs: Any,
self,
*args: Any,
remove_hyperlinks: bool = True,
remove_images: bool = True,
remove_table_excess: bool = True,
remove_interpreters: bool = True,
remove_directives: bool = True,
remove_whitespaces_excess: bool = True,
# Be carefull with remove_characters_excess, might cause data loss
remove_characters_excess: bool = True,
**kwargs: Any,
) -> None:
"""Init params."""
super().__init__(*args, **kwargs)
@@ -55,7 +55,8 @@ class RstParser(BaseParser):
for i, line in enumerate(lines):
header_match = re.match(r"^[^\S\n]*[-=]+[^\S\n]*$", line)
if header_match and i > 0 and (len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
if header_match and i > 0 and (
len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
if current_header is not None:
if current_text == "" or None:
continue
@@ -68,9 +69,10 @@ class RstParser(BaseParser):
current_text = ""
else:
current_text += line + "\n"
rst_tups.append((current_header, current_text))
#TODO: Format for rst
# TODO: Format for rst
#
# if current_header is not None:
# # pass linting, assert keys are defined
@@ -134,7 +136,7 @@ class RstParser(BaseParser):
return {}
def parse_tups(
self, filepath: Path, errors: str = "ignore"
self, filepath: Path, errors: str = "ignore"
) -> List[Tuple[Optional[str], str]]:
"""Parse file into tuples."""
with open(filepath, "r") as f:
@@ -157,7 +159,7 @@ class RstParser(BaseParser):
return rst_tups
def parse_file(
self, filepath: Path, errors: str = "ignore"
self, filepath: Path, errors: str = "ignore"
) -> Union[str, List[str]]:
"""Parse file into string."""
tups = self.parse_tups(filepath, errors=errors)

View File

@@ -77,13 +77,13 @@ class PandasCSVParser(BaseParser):
"""
def __init__(
self,
*args: Any,
concat_rows: bool = True,
col_joiner: str = ", ",
row_joiner: str = "\n",
pandas_config: dict = {},
**kwargs: Any
self,
*args: Any,
concat_rows: bool = True,
col_joiner: str = ", ",
row_joiner: str = "\n",
pandas_config: dict = {},
**kwargs: Any
) -> None:
"""Init params."""
super().__init__(*args, **kwargs)

Some files were not shown because too many files have changed in this diff Show More