mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 16:43:16 +00:00
Compare commits
712 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bed4939652 | ||
|
|
ebf6109219 | ||
|
|
ad602f22c8 | ||
|
|
70f44fcaca | ||
|
|
1f32e7cf82 | ||
|
|
00390200ec | ||
|
|
0a11a3afee | ||
|
|
9f77b03643 | ||
|
|
c6dc1675d8 | ||
|
|
e475a4cc7c | ||
|
|
dfc3cdd5d4 | ||
|
|
6974db5fd8 | ||
|
|
32c67c2a02 | ||
|
|
6c585de6d3 | ||
|
|
1056c943d3 | ||
|
|
839f0a3b95 | ||
|
|
b19e9cae23 | ||
|
|
84a15ef54d | ||
|
|
d4b409e166 | ||
|
|
ba1c0ab6fb | ||
|
|
eddafcfdfb | ||
|
|
e00c6f2c14 | ||
|
|
0837295bd3 | ||
|
|
f3a005a667 | ||
|
|
d59ffaf0bd | ||
|
|
e133c29b2c | ||
|
|
f64bf7daa0 | ||
|
|
ef24318c17 | ||
|
|
33fe0ffc93 | ||
|
|
243b036ae7 | ||
|
|
06518c209a | ||
|
|
3482474265 | ||
|
|
5debb48265 | ||
|
|
84377eed07 | ||
|
|
dd9589b37a | ||
|
|
7c00099919 | ||
|
|
4429755c09 | ||
|
|
a2967afb55 | ||
|
|
3d03826db5 | ||
|
|
7ff86a2aee | ||
|
|
2a68cc9989 | ||
|
|
855365fba6 | ||
|
|
928303f27b | ||
|
|
df2f69e85f | ||
|
|
ec3407df7e | ||
|
|
5dae074c95 | ||
|
|
a35be6ae57 | ||
|
|
101935ae46 | ||
|
|
d68e731ffd | ||
|
|
bf0dd6946e | ||
|
|
41cbcbc07f | ||
|
|
73f93946b0 | ||
|
|
181d2504e5 | ||
|
|
b0423d987e | ||
|
|
9157fe7323 | ||
|
|
e02718947a | ||
|
|
ebbd47c9cb | ||
|
|
ad810b3740 | ||
|
|
c3e85d747a | ||
|
|
8e092cbe1c | ||
|
|
9a35609bc7 | ||
|
|
7fed92d6b3 | ||
|
|
1f52461cd9 | ||
|
|
7fd8e57bdc | ||
|
|
ed6cd9890a | ||
|
|
f876f9e20e | ||
|
|
62b15f2d6f | ||
|
|
89529f4df5 | ||
|
|
042519005c | ||
|
|
7e24995afe | ||
|
|
877b165a9a | ||
|
|
64f72ada28 | ||
|
|
171916e1a4 | ||
|
|
dbfc1bb68f | ||
|
|
5d4c067d80 | ||
|
|
3f10a775ba | ||
|
|
2f05a47de3 | ||
|
|
9ca079c95a | ||
|
|
0b890e1d70 | ||
|
|
0bb014c965 | ||
|
|
0684449c2a | ||
|
|
a23806d16a | ||
|
|
0b7be94d13 | ||
|
|
4ac996cfe6 | ||
|
|
78c819f976 | ||
|
|
365537f74e | ||
|
|
5c756348a5 | ||
|
|
ed12c2d527 | ||
|
|
82189b0a3c | ||
|
|
23889f7f16 | ||
|
|
45e14bc2f5 | ||
|
|
0746f30645 | ||
|
|
daedfc0a57 | ||
|
|
2cc3372b86 | ||
|
|
e024452610 | ||
|
|
06e4a05e41 | ||
|
|
256514fefc | ||
|
|
3f64ff8194 | ||
|
|
af2cef1bfc | ||
|
|
3be74b1fdd | ||
|
|
e2a705806a | ||
|
|
5c99615edf | ||
|
|
605f168c7e | ||
|
|
b223cf05d9 | ||
|
|
419b98b50f | ||
|
|
b99b3b844a | ||
|
|
d9787e849e | ||
|
|
631e77ce65 | ||
|
|
7ff3a31e72 | ||
|
|
331dfdbab4 | ||
|
|
c83ff2237c | ||
|
|
9972435525 | ||
|
|
409d0e4084 | ||
|
|
18ed255f5a | ||
|
|
4a8e9bf04e | ||
|
|
0b1a302995 | ||
|
|
6978e7439f | ||
|
|
fcb6bec474 | ||
|
|
91690ff99a | ||
|
|
45f930a9e2 | ||
|
|
09a1879f3e | ||
|
|
1627d424e7 | ||
|
|
0aa9da39a9 | ||
|
|
8564c2ba72 | ||
|
|
1c791f240a | ||
|
|
bea0ccaa6c | ||
|
|
05f756963c | ||
|
|
54ad6ad1c7 | ||
|
|
c44ff77e09 | ||
|
|
c77d415893 | ||
|
|
92c9612dee | ||
|
|
b40417fcfe | ||
|
|
13c890b212 | ||
|
|
741ab6e43c | ||
|
|
3db46ecd68 | ||
|
|
a972bb8827 | ||
|
|
2e4e080329 | ||
|
|
5fe4c40ec1 | ||
|
|
d18cb373fc | ||
|
|
1b1771e4eb | ||
|
|
51e450cc4b | ||
|
|
67a97a7e51 | ||
|
|
1e88c86378 | ||
|
|
fcb5f946dd | ||
|
|
2e69e9bef3 | ||
|
|
88623754cf | ||
|
|
886bcef7b0 | ||
|
|
cc414da744 | ||
|
|
6e88ecc2da | ||
|
|
88d2420163 | ||
|
|
7e71ee1aae | ||
|
|
c6d78e27c6 | ||
|
|
b6d06dcfc3 | ||
|
|
756c46c026 | ||
|
|
5afba6e30f | ||
|
|
02a23a65e7 | ||
|
|
e7de16833d | ||
|
|
990c6d1c64 | ||
|
|
f78a8c6fea | ||
|
|
5580d19b75 | ||
|
|
c035e3c7c6 | ||
|
|
1502adfb85 | ||
|
|
3b76b3ddce | ||
|
|
e4a1730a5b | ||
|
|
4aa66170c5 | ||
|
|
24352b56af | ||
|
|
cbea17b4d5 | ||
|
|
4dd0d65db4 | ||
|
|
cdfdcc7d03 | ||
|
|
b4082b2cfa | ||
|
|
fae3cfc58f | ||
|
|
ad038784cc | ||
|
|
7e69dd5ff0 | ||
|
|
8720ec65ab | ||
|
|
3098f99eba | ||
|
|
98b90b82c4 | ||
|
|
01268a37e3 | ||
|
|
0c46f3c205 | ||
|
|
b442cc186a | ||
|
|
2353276aa7 | ||
|
|
8c034d3e78 | ||
|
|
2c25f4a4c0 | ||
|
|
d2a988a715 | ||
|
|
bd12eac145 | ||
|
|
ebc44273c9 | ||
|
|
b781d78cc6 | ||
|
|
3f7c8bdba5 | ||
|
|
fd8e277530 | ||
|
|
6a024b0ced | ||
|
|
a185b2a12a | ||
|
|
2366c2cd94 | ||
|
|
567c01e302 | ||
|
|
b3d2c1a5d1 | ||
|
|
7b3ecb5c2f | ||
|
|
f4abed43ba | ||
|
|
5854202f22 | ||
|
|
d3238de8ab | ||
|
|
09a2705311 | ||
|
|
a4c0861cf4 | ||
|
|
5ba917c5e4 | ||
|
|
83f2fb1e62 | ||
|
|
7bf79675c1 | ||
|
|
f33aa9c71b | ||
|
|
ac1c21c784 | ||
|
|
1757ce23af | ||
|
|
3f1bae3044 | ||
|
|
20bd7ca2cc | ||
|
|
a2b0204a95 | ||
|
|
f7063d03f1 | ||
|
|
9be3043e0f | ||
|
|
4aeeaf185c | ||
|
|
a25d5d98a4 | ||
|
|
973304e0d7 | ||
|
|
590a735f99 | ||
|
|
42185a011b | ||
|
|
e7b872a5df | ||
|
|
2bf75c36e4 | ||
|
|
bcd9005b53 | ||
|
|
cca6297a64 | ||
|
|
39e94d4a5e | ||
|
|
204b1b1963 | ||
|
|
b2e45e8af3 | ||
|
|
27797581ba | ||
|
|
a797801d4c | ||
|
|
d7c09e3493 | ||
|
|
085542c861 | ||
|
|
7ccd74b022 | ||
|
|
047afeebb6 | ||
|
|
a6eab324b8 | ||
|
|
74a11da9bd | ||
|
|
473cd79af1 | ||
|
|
b3c075714c | ||
|
|
f5661b3b1e | ||
|
|
8ce1fd561d | ||
|
|
adb2cf35d4 | ||
|
|
3e32724729 | ||
|
|
b59aa2f3e7 | ||
|
|
44405b250c | ||
|
|
2b547f71f4 | ||
|
|
56e3a1c3b2 | ||
|
|
644a66c983 | ||
|
|
5f62c0d57a | ||
|
|
c440e6f8fa | ||
|
|
706bd6126a | ||
|
|
f8544cf14b | ||
|
|
a8bb992569 | ||
|
|
ddafb96eba | ||
|
|
1dfb5d29c3 | ||
|
|
3c64abceb8 | ||
|
|
3c43b87e9f | ||
|
|
d7fe1150dc | ||
|
|
3e55be910b | ||
|
|
1ae0af56cc | ||
|
|
b329ede52a | ||
|
|
a9f6a06446 | ||
|
|
24383997ef | ||
|
|
48ec0ae44c | ||
|
|
ea49095b0a | ||
|
|
f3ff3920d9 | ||
|
|
3e20934e20 | ||
|
|
ec5db5d2c7 | ||
|
|
cc25b5e856 | ||
|
|
c06f232589 | ||
|
|
e1a8184c2e | ||
|
|
fc24eb08cb | ||
|
|
345ab8ea9e | ||
|
|
65547bad87 | ||
|
|
4402442e54 | ||
|
|
5bfd7d5e6c | ||
|
|
09537ec0dd | ||
|
|
5ad4ba6abd | ||
|
|
4decb34f99 | ||
|
|
947014945f | ||
|
|
b6710beadc | ||
|
|
c2c5f07ffa | ||
|
|
95a60adfcc | ||
|
|
59eeb73b60 | ||
|
|
0173a4d7fa | ||
|
|
e2e7ee1893 | ||
|
|
58dfc58622 | ||
|
|
c8bfc52fab | ||
|
|
3e50fe1aa9 | ||
|
|
e7c760e68b | ||
|
|
fa9ca221b4 | ||
|
|
cff76c672c | ||
|
|
2395785337 | ||
|
|
862ea5fa36 | ||
|
|
aa0964d99f | ||
|
|
5b5281e50c | ||
|
|
8bcf7bdade | ||
|
|
57cfafeb34 | ||
|
|
5ea8706ba9 | ||
|
|
68f497517d | ||
|
|
8308bd0039 | ||
|
|
b7f00324bc | ||
|
|
77f5328ab9 | ||
|
|
bb82b9a9d3 | ||
|
|
355fd2b5d7 | ||
|
|
1e6cc95f09 | ||
|
|
bb8b4cae79 | ||
|
|
faef061d74 | ||
|
|
62af7549c7 | ||
|
|
04b4db763c | ||
|
|
c95b4d1305 | ||
|
|
40b8aa42cc | ||
|
|
e66535c572 | ||
|
|
81c8511316 | ||
|
|
b59170078d | ||
|
|
453c653975 | ||
|
|
976e7b6765 | ||
|
|
94a8c42311 | ||
|
|
534b650c10 | ||
|
|
f01c04bb24 | ||
|
|
fcc0449076 | ||
|
|
565b0c5a9c | ||
|
|
c204d37ac7 | ||
|
|
c5adea6993 | ||
|
|
c8648101a7 | ||
|
|
1f9c167bd2 | ||
|
|
cbae7bd500 | ||
|
|
27f97bc55d | ||
|
|
601ba40cb0 | ||
|
|
4632531f2d | ||
|
|
c9e95a9146 | ||
|
|
c48d4ae7df | ||
|
|
4895d389e4 | ||
|
|
92916e42c1 | ||
|
|
1a8499cf26 | ||
|
|
81a912c93f | ||
|
|
989d6eee0f | ||
|
|
a8d371045b | ||
|
|
b80726e942 | ||
|
|
0fc48ea03f | ||
|
|
afc86efe28 | ||
|
|
ab1ebeb7e0 | ||
|
|
6932c7e3e9 | ||
|
|
c04687fdd1 | ||
|
|
7717242112 | ||
|
|
1ad82c22d9 | ||
|
|
8fa88175c1 | ||
|
|
b05fec93bb | ||
|
|
802311a06a | ||
|
|
dc0f26d3d8 | ||
|
|
36c32fd968 | ||
|
|
0001963a04 | ||
|
|
6a264a45e2 | ||
|
|
d897315355 | ||
|
|
d536b9d8c6 | ||
|
|
6af7d4e6e8 | ||
|
|
67dddcb224 | ||
|
|
aad12aa227 | ||
|
|
d71675f3d2 | ||
|
|
88fb35552a | ||
|
|
8a084a05c9 | ||
|
|
90ef7ddacb | ||
|
|
034dfffb85 | ||
|
|
09a15e2e59 | ||
|
|
7859a0d001 | ||
|
|
085e1b6c41 | ||
|
|
5b01664d53 | ||
|
|
03adfd4898 | ||
|
|
1616124fa2 | ||
|
|
2611550ffd | ||
|
|
2989be47cc | ||
|
|
9b7a346380 | ||
|
|
8c8bf8702f | ||
|
|
eb8c2a9277 | ||
|
|
350c91889e | ||
|
|
43018840d1 | ||
|
|
4daf443db8 | ||
|
|
a85f214fdb | ||
|
|
ab77c4e616 | ||
|
|
19315f72a0 | ||
|
|
3683d4a759 | ||
|
|
e9a7722915 | ||
|
|
7794129929 | ||
|
|
2ee2067634 | ||
|
|
ef6ec3fcb8 | ||
|
|
c57ff3ae51 | ||
|
|
aa7f59f88c | ||
|
|
3b4a95ef33 | ||
|
|
09ba14b8ca | ||
|
|
b5dc7281f9 | ||
|
|
deca7726f4 | ||
|
|
058e0279b4 | ||
|
|
5e604950c5 | ||
|
|
af1b81097f | ||
|
|
08e713d381 | ||
|
|
a2c228c09f | ||
|
|
4e6afebf9e | ||
|
|
13d274202e | ||
|
|
f56acac656 | ||
|
|
68d325b5b5 | ||
|
|
40907a2584 | ||
|
|
f4f5d99ec9 | ||
|
|
0475e55518 | ||
|
|
872b390cfa | ||
|
|
dc4c539607 | ||
|
|
69f8c76ce2 | ||
|
|
cbdcca7fd8 | ||
|
|
1e7fa988da | ||
|
|
3cfe771b8b | ||
|
|
8b4d6d0868 | ||
|
|
ba35e1422b | ||
|
|
8db9b09c96 | ||
|
|
78e99d1171 | ||
|
|
6a12f96fdc | ||
|
|
b79a20151c | ||
|
|
c9976020dd | ||
|
|
e8988e82d0 | ||
|
|
ff95570da6 | ||
|
|
6698a000e6 | ||
|
|
b084e3074d | ||
|
|
bc4f9c3442 | ||
|
|
ecf88e7ea1 | ||
|
|
d6cb66cd2f | ||
|
|
bc2241f67a | ||
|
|
3d292aa485 | ||
|
|
21f46a8aea | ||
|
|
ba6d61cc35 | ||
|
|
27b43c11bd | ||
|
|
a5292c4473 | ||
|
|
f3923488a5 | ||
|
|
d964ebfff8 | ||
|
|
e420eeece4 | ||
|
|
f34713545e | ||
|
|
2b513c7d87 | ||
|
|
7e28893613 | ||
|
|
822674ca78 | ||
|
|
35ebf97fdf | ||
|
|
b456c0ca9f | ||
|
|
bae49891be | ||
|
|
dfb4a67d87 | ||
|
|
8e727a3253 | ||
|
|
d6f03d7a07 | ||
|
|
5ada5bf1a0 | ||
|
|
ae9c935c5c | ||
|
|
95618001aa | ||
|
|
40c361968e | ||
|
|
757abda654 | ||
|
|
862807e863 | ||
|
|
d188db887c | ||
|
|
59b6c56262 | ||
|
|
f92658de82 | ||
|
|
f2b3402c17 | ||
|
|
24badf65a4 | ||
|
|
86442f212f | ||
|
|
9b6d6ecc32 | ||
|
|
1f51277004 | ||
|
|
68cc646a3e | ||
|
|
420ca1b3b4 | ||
|
|
a83e68815a | ||
|
|
d87aebb718 | ||
|
|
a9a4d14e8a | ||
|
|
9ed2fc7359 | ||
|
|
caad382a95 | ||
|
|
ea39fc9c48 | ||
|
|
bf7cce52db | ||
|
|
63a15a3359 | ||
|
|
db34392210 | ||
|
|
cc4e0ba6c1 | ||
|
|
38989f9c68 | ||
|
|
c78c92e539 | ||
|
|
31e694f50d | ||
|
|
5368199517 | ||
|
|
6bbb9176fc | ||
|
|
4209eee2f8 | ||
|
|
f65ebb6b71 | ||
|
|
ef8107e56a | ||
|
|
2293a30f19 | ||
|
|
d7678fd355 | ||
|
|
27d8a5cf99 | ||
|
|
03f6c58ac6 | ||
|
|
4fb52dc6fc | ||
|
|
0232ba3f25 | ||
|
|
5987e4c8e1 | ||
|
|
18ce7c8f2f | ||
|
|
177c9da8b5 | ||
|
|
b5f1a8e90f | ||
|
|
494c3dd1bd | ||
|
|
ad8f78d51e | ||
|
|
5112801c37 | ||
|
|
226adfdba2 | ||
|
|
22c0375dca | ||
|
|
66ebfef619 | ||
|
|
7e75513151 | ||
|
|
e77f6c9f6f | ||
|
|
5bd7c0ab8b | ||
|
|
97f7f6f7d2 | ||
|
|
d65fc70f07 | ||
|
|
dcae85eae8 | ||
|
|
686a48298b | ||
|
|
8ca590559d | ||
|
|
70251222cc | ||
|
|
e55c68d27e | ||
|
|
da4f2ef6b3 | ||
|
|
dbf2cabd38 | ||
|
|
72e68a163c | ||
|
|
919a87bf47 | ||
|
|
bea0bbfcdb | ||
|
|
c12d7a8d82 | ||
|
|
711ad1750a | ||
|
|
825567d449 | ||
|
|
800439d29e | ||
|
|
e3517dde13 | ||
|
|
f2da8473a4 | ||
|
|
9cc9a6e9b4 | ||
|
|
873406732f | ||
|
|
14ab950a6c | ||
|
|
6cd8e71f4f | ||
|
|
4aeaec9dc7 | ||
|
|
e318228a08 | ||
|
|
d22efbf745 | ||
|
|
90309d5552 | ||
|
|
72842ecd7a | ||
|
|
a1b32ffca9 | ||
|
|
44d225e6ca | ||
|
|
37ab5f9d7a | ||
|
|
61fdcec511 | ||
|
|
45cc4fd97a | ||
|
|
3228b88312 | ||
|
|
a1d3592d08 | ||
|
|
c686d950d0 | ||
|
|
ca779bb0af | ||
|
|
90f64e2527 | ||
|
|
444d50f751 | ||
|
|
2f9c72c1cf | ||
|
|
1bb81614a5 | ||
|
|
888e13e198 | ||
|
|
8166642ff9 | ||
|
|
51c42790b7 | ||
|
|
f105fd1b2c | ||
|
|
fe78e9a336 | ||
|
|
2fce25b0c8 | ||
|
|
6c0da2ea94 | ||
|
|
a353e69648 | ||
|
|
ac930d5504 | ||
|
|
d4cf8037b7 | ||
|
|
fb1fd851b0 | ||
|
|
2ff8c0b128 | ||
|
|
d232229abf | ||
|
|
490e58fb52 | ||
|
|
a8582be54d | ||
|
|
30bb8449e9 | ||
|
|
adb7132e02 | ||
|
|
4a1e488bd7 | ||
|
|
d200db0eeb | ||
|
|
28e06fa684 | ||
|
|
c4cb9b07cb | ||
|
|
817fc5d4b3 | ||
|
|
2de1e5f71a | ||
|
|
5246d85f11 | ||
|
|
9526ed0258 | ||
|
|
736add031c | ||
|
|
d4042ebaa2 | ||
|
|
54e31be3b2 | ||
|
|
b630be8c8a | ||
|
|
0aca41f9a6 | ||
|
|
a3fed0f84b | ||
|
|
1414ad6d50 | ||
|
|
ed6b4dabf8 | ||
|
|
d9309ebc6e | ||
|
|
c49b7613e0 | ||
|
|
4f88b6dc71 | ||
|
|
5c9e6404cc | ||
|
|
80df494787 | ||
|
|
c0886c2785 | ||
|
|
a83a56eecd | ||
|
|
130eb56d09 | ||
|
|
b60b473e02 | ||
|
|
e0504eb957 | ||
|
|
3886e41e94 | ||
|
|
edc54c7120 | ||
|
|
cef1167ef1 | ||
|
|
f456500f3a | ||
|
|
59328ea44d | ||
|
|
0dc840dc8e | ||
|
|
6700028bd1 | ||
|
|
213b1d1d0d | ||
|
|
feab64b09a | ||
|
|
f9f096cca8 | ||
|
|
535d174c2b | ||
|
|
11d2401970 | ||
|
|
232b36d4ae | ||
|
|
b38b159f4e | ||
|
|
606bf1ff58 | ||
|
|
46cec638dd | ||
|
|
8637397c86 | ||
|
|
7502e1881f | ||
|
|
734d5e50c5 | ||
|
|
052ff6727b | ||
|
|
2962dbd6b8 | ||
|
|
392afd6f33 | ||
|
|
fc3f4dff10 | ||
|
|
24d6889b24 | ||
|
|
27e3e22703 | ||
|
|
7b7f609c47 | ||
|
|
5389c8858a | ||
|
|
c2d2fbba96 | ||
|
|
cfc039dae1 | ||
|
|
edf24dc992 | ||
|
|
97710296ac | ||
|
|
acbfc0bb81 | ||
|
|
e1fe2fb093 | ||
|
|
dd5c1ec9ed | ||
|
|
c7f7614646 | ||
|
|
d604398642 | ||
|
|
d40b1d8937 | ||
|
|
49b4b476dc | ||
|
|
c0ec689be9 | ||
|
|
8e26decb5b | ||
|
|
921efcbf4b | ||
|
|
705ab58bfb | ||
|
|
b42e32a955 | ||
|
|
dec60a0fdd | ||
|
|
fa84d5c502 | ||
|
|
34c763caf5 | ||
|
|
e5709dfabc | ||
|
|
5f4f4a8ab9 | ||
|
|
ca9e71087b | ||
|
|
6da483b3ef | ||
|
|
e300145263 | ||
|
|
eb3f0035fe | ||
|
|
6be3a2a142 | ||
|
|
e23893a419 | ||
|
|
7b4c1dcde0 | ||
|
|
4b7cb2a22a | ||
|
|
344a8a3887 | ||
|
|
0afda5dc27 | ||
|
|
0891ef6d0a | ||
|
|
cdb64ecb19 | ||
|
|
b05ac4f2a4 | ||
|
|
411189a076 | ||
|
|
63cf4d46c9 | ||
|
|
3c683f2192 | ||
|
|
d46e59bcd4 | ||
|
|
c45e76ec31 | ||
|
|
44828707ea | ||
|
|
74c047d249 | ||
|
|
50abfb98fe | ||
|
|
6104657970 | ||
|
|
02116d4c05 | ||
|
|
23f993bb54 | ||
|
|
16aedd61da | ||
|
|
5a2f3ad616 | ||
|
|
54971104f8 | ||
|
|
deeffbf77d | ||
|
|
7e8dd6bba8 | ||
|
|
dc4078d744 | ||
|
|
1eb168be55 | ||
|
|
3c6fd365fb | ||
|
|
53c9184057 | ||
|
|
16c9872571 | ||
|
|
17160bc467 | ||
|
|
3b39e58cf3 | ||
|
|
7db055116c | ||
|
|
0262ff1aac | ||
|
|
14def09ce3 | ||
|
|
96e59da6bc | ||
|
|
8f75b0a0c0 | ||
|
|
70a40cfc45 | ||
|
|
ef6b8b9ebc | ||
|
|
f9dbaa9407 | ||
|
|
ed338668d1 | ||
|
|
1e5d94a958 | ||
|
|
51553c565f | ||
|
|
ae36805aa1 | ||
|
|
22b7445ac4 | ||
|
|
9c278d7d0b | ||
|
|
3a1592692e | ||
|
|
bdec956708 | ||
|
|
d0d8a8a3af | ||
|
|
f787962be8 | ||
|
|
57b9b369b7 | ||
|
|
ccda5bdb7e | ||
|
|
bd5fa83fe0 | ||
|
|
59caf381f7 | ||
|
|
181cb1b1bd | ||
|
|
ba2dd2d872 | ||
|
|
4dc5acd68e | ||
|
|
f57116afbe | ||
|
|
99c41c7e34 | ||
|
|
d7b38d9513 | ||
|
|
2c7aad1dcd | ||
|
|
593dba72a8 | ||
|
|
09f2f2a9e7 | ||
|
|
6c583eedb9 | ||
|
|
1c4d7a6ad1 | ||
|
|
f75ed4bc66 | ||
|
|
f487deb7b9 | ||
|
|
ad30a8476c | ||
|
|
60db807443 | ||
|
|
d1dedff9ca | ||
|
|
9a04506b0d | ||
|
|
a58369fbb1 | ||
|
|
d63b5d71a1 | ||
|
|
360d790282 | ||
|
|
a0dd8f8e0f | ||
|
|
db7c001076 | ||
|
|
c96f905d2b | ||
|
|
4b3f04083b | ||
|
|
8276b6c9a9 | ||
|
|
43d6e788dc | ||
|
|
0c062a8485 | ||
|
|
99b649f24e | ||
|
|
7c6532f145 | ||
|
|
052669a0b0 | ||
|
|
0cf86d3bbc | ||
|
|
56a16b862a | ||
|
|
b2fffb2e23 | ||
|
|
3f7a27cdbb | ||
|
|
58e30b8c88 |
19
.github/dependabot.yml
vendored
Normal file
19
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
# To get started with Dependabot version updates, you'll need to specify which
|
||||
# package ecosystems to update and where the package manifests are located.
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "pip" # See documentation for possible values
|
||||
directory: "/application" # Location of package manifests
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "npm" # See documentation for possible values
|
||||
directory: "/frontend" # Location of package manifests
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
14
.github/holopin.yml
vendored
14
.github/holopin.yml
vendored
@@ -1,5 +1,11 @@
|
||||
organization: arc53
|
||||
defaultSticker: clqmdf0ed34290glbvqh0kzxd
|
||||
organization: docsgpt
|
||||
defaultSticker: cm1ulwkkl180570cl82rtzympu
|
||||
stickers:
|
||||
- id: clqmdf0ed34290glbvqh0kzxd
|
||||
alias: festive
|
||||
- id: cm1ulwkkl180570cl82rtzympu
|
||||
alias: contributor2024
|
||||
- id: cm1ureg8o130450cl8c1po6mil
|
||||
alias: api
|
||||
- id: cm1urhmag148240cl8yvqxkthx
|
||||
alias: lpc
|
||||
- id: cm1urlcpq622090cl2tvu4w71y
|
||||
alias: lexeu
|
||||
|
||||
24
.github/labeler.yml
vendored
24
.github/labeler.yml
vendored
@@ -1,23 +1,31 @@
|
||||
repo:
|
||||
- '*'
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: '*'
|
||||
|
||||
github:
|
||||
- .github/**/*
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: '.github/**/*'
|
||||
|
||||
application:
|
||||
- application/**/*
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'application/**/*'
|
||||
|
||||
docs:
|
||||
- docs/**/*
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'docs/**/*'
|
||||
|
||||
extensions:
|
||||
- extensions/**/*
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'extensions/**/*'
|
||||
|
||||
frontend:
|
||||
- frontend/**/*
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'frontend/**/*'
|
||||
|
||||
scripts:
|
||||
- scripts/**/*
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'scripts/**/*'
|
||||
|
||||
tests:
|
||||
- tests/**/*
|
||||
- changed-files:
|
||||
- any-glob-to-any-file: 'tests/**/*'
|
||||
|
||||
24
.github/workflows/ci.yml
vendored
24
.github/workflows/ci.yml
vendored
@@ -1,10 +1,8 @@
|
||||
name: Build and push DocsGPT Docker image
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
@@ -14,34 +12,36 @@ jobs:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v1
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push Docker images to docker.io and ghcr.io
|
||||
uses: docker/build-push-action@v4
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
file: './application/Dockerfile'
|
||||
platforms: linux/amd64
|
||||
context: ./application
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_USERNAME }}/docsgpt:latest
|
||||
ghcr.io/${{ github.repository_owner }}/docsgpt:latest
|
||||
${{ secrets.DOCKER_USERNAME }}/docsgpt:${{ github.event.release.tag_name }},${{ secrets.DOCKER_USERNAME }}/docsgpt:latest
|
||||
ghcr.io/${{ github.repository_owner }}/docsgpt:${{ github.event.release.tag_name }},ghcr.io/${{ github.repository_owner }}/docsgpt:latest
|
||||
cache-from: type=registry,ref=${{ secrets.DOCKER_USERNAME }}/docsgpt:latest
|
||||
cache-to: type=inline
|
||||
|
||||
24
.github/workflows/cife.yml
vendored
24
.github/workflows/cife.yml
vendored
@@ -1,10 +1,8 @@
|
||||
name: Build and push DocsGPT-FE Docker image
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
@@ -14,22 +12,22 @@ jobs:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v1
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
@@ -37,12 +35,14 @@ jobs:
|
||||
|
||||
# Runs a single command using the runners shell
|
||||
- name: Build and push Docker images to docker.io and ghcr.io
|
||||
uses: docker/build-push-action@v4
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
file: './frontend/Dockerfile'
|
||||
platforms: linux/amd64, linux/arm64
|
||||
context: ./frontend
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:latest
|
||||
ghcr.io/${{ github.repository_owner }}/docsgpt-fe:latest
|
||||
${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:${{ github.event.release.tag_name }},${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:latest
|
||||
ghcr.io/${{ github.repository_owner }}/docsgpt-fe:${{ github.event.release.tag_name }},ghcr.io/${{ github.repository_owner }}/docsgpt-fe:latest
|
||||
cache-from: type=registry,ref=${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:latest
|
||||
cache-to: type=inline
|
||||
|
||||
49
.github/workflows/docker-develop-build.yml
vendored
Normal file
49
.github/workflows/docker-develop-build.yml
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
name: Build and push DocsGPT Docker image for development
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
if: github.repository == 'arc53/DocsGPT'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push Docker images to docker.io and ghcr.io
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
file: './application/Dockerfile'
|
||||
platforms: linux/amd64
|
||||
context: ./application
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_USERNAME }}/docsgpt:develop
|
||||
ghcr.io/${{ github.repository_owner }}/docsgpt:develop
|
||||
cache-from: type=registry,ref=${{ secrets.DOCKER_USERNAME }}/docsgpt:develop
|
||||
cache-to: type=inline
|
||||
49
.github/workflows/docker-develop-fe-build.yml
vendored
Normal file
49
.github/workflows/docker-develop-fe-build.yml
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
name: Build and push DocsGPT FE Docker image for development
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
if: github.repository == 'arc53/DocsGPT'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push Docker images to docker.io and ghcr.io
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
file: './frontend/Dockerfile'
|
||||
platforms: linux/amd64
|
||||
context: ./frontend
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:develop
|
||||
ghcr.io/${{ github.repository_owner }}/docsgpt-fe:develop
|
||||
cache-from: type=registry,ref=${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:develop
|
||||
cache-to: type=inline
|
||||
3
.github/workflows/labeler.yml
vendored
3
.github/workflows/labeler.yml
vendored
@@ -4,12 +4,13 @@ on:
|
||||
- pull_request_target
|
||||
jobs:
|
||||
triage:
|
||||
if: github.repository == 'arc53/DocsGPT'
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/labeler@v4
|
||||
- uses: actions/labeler@v5
|
||||
with:
|
||||
repo-token: "${{ secrets.GITHUB_TOKEN }}"
|
||||
sync-labels: true
|
||||
|
||||
2
.github/workflows/lint.yml
vendored
2
.github/workflows/lint.yml
vendored
@@ -11,7 +11,7 @@ jobs:
|
||||
ruff:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Lint with Ruff
|
||||
uses: chartboost/ruff-action@v1
|
||||
|
||||
10
.github/workflows/pytest.yml
vendored
10
.github/workflows/pytest.yml
vendored
@@ -6,11 +6,11 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9", "3.10", "3.11"]
|
||||
python-version: ["3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
@@ -21,10 +21,10 @@ jobs:
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Test with pytest and generate coverage report
|
||||
run: |
|
||||
python -m pytest --cov=application --cov=scripts --cov=extensions --cov-report=xml
|
||||
python -m pytest --cov=application --cov-report=xml
|
||||
- name: Upload coverage reports to Codecov
|
||||
if: github.event_name == 'pull_request' && matrix.python-version == '3.11'
|
||||
uses: codecov/codecov-action@v3
|
||||
uses: codecov/codecov-action@v4
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
|
||||
2
.github/workflows/sync_fork.yaml
vendored
2
.github/workflows/sync_fork.yaml
vendored
@@ -17,7 +17,7 @@ jobs:
|
||||
steps:
|
||||
# Step 1: run a standard checkout action
|
||||
- name: Checkout target repo
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
# Step 2: run the sync action
|
||||
- name: Sync upstream changes
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -172,5 +172,5 @@ application/vectors/
|
||||
|
||||
node_modules/
|
||||
.vscode/settings.json
|
||||
models/
|
||||
/models/
|
||||
model/
|
||||
|
||||
16
.vscode/launch.json
vendored
Normal file
16
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Docker Debug Frontend",
|
||||
"request": "launch",
|
||||
"type": "chrome",
|
||||
"preLaunchTask": "docker-compose: debug:frontend",
|
||||
"url": "http://127.0.0.1:5173",
|
||||
"webRoot": "${workspaceFolder}/frontend",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
21
.vscode/tasks.json
vendored
Normal file
21
.vscode/tasks.json
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"type": "docker-compose",
|
||||
"label": "docker-compose: debug:frontend",
|
||||
"dockerCompose": {
|
||||
"up": {
|
||||
"detached": true,
|
||||
"services": [
|
||||
"frontend"
|
||||
],
|
||||
"build": true
|
||||
},
|
||||
"files": [
|
||||
"${workspaceFolder}/docker-compose.yaml"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -6,7 +6,7 @@ Thank you for choosing to contribute to DocsGPT! We are all very grateful!
|
||||
|
||||
📣 **Discussions** - Engage in conversations, start new topics, or help answer questions.
|
||||
|
||||
🐞 **Issues** - This is where we keep track of tasks. It could be bugs,fixes or suggestions for new features.
|
||||
🐞 **Issues** - This is where we keep track of tasks. It could be bugs, fixes or suggestions for new features.
|
||||
|
||||
🛠️ **Pull requests** - Suggest changes to our repository, either by working on existing issues or adding new features.
|
||||
|
||||
@@ -21,8 +21,9 @@ Thank you for choosing to contribute to DocsGPT! We are all very grateful!
|
||||
- If you're interested in contributing code, here are some important things to know:
|
||||
|
||||
- We have a frontend built on React (Vite) and a backend in Python.
|
||||
=======
|
||||
Before creating issues, please check out how the latest version of our app looks and works by launching it via [Quickstart](https://github.com/arc53/DocsGPT#quickstart) the version on our live demo is slightly modified with login. Your issues should relate to the version that you can launch via [Quickstart](https://github.com/arc53/DocsGPT#quickstart).
|
||||
|
||||
|
||||
Before creating issues, please check out how the latest version of our app looks and works by launching it via [Quickstart](https://github.com/arc53/DocsGPT#quickstart) the version on our live demo is slightly modified with login. Your issues should relate to the version you can launch via [Quickstart](https://github.com/arc53/DocsGPT#quickstart).
|
||||
|
||||
### 👨💻 If you're interested in contributing code, here are some important things to know:
|
||||
|
||||
@@ -43,7 +44,7 @@ Please try to follow the guidelines.
|
||||
|
||||
### 🖥 If you are looking to contribute to Backend (🐍 Python):
|
||||
|
||||
- Review our issues and contribute to [`/application`](https://github.com/arc53/DocsGPT/tree/main/application) or [`/scripts`](https://github.com/arc53/DocsGPT/tree/main/scripts) (please disregard old [`ingest_rst.py`](https://github.com/arc53/DocsGPT/blob/main/scripts/old/ingest_rst.py) [`ingest_rst_sphinx.py`](https://github.com/arc53/DocsGPT/blob/main/scripts/old/ingest_rst_sphinx.py) files; they will be deprecated soon).
|
||||
- Review our issues and contribute to [`/application`](https://github.com/arc53/DocsGPT/tree/main/application) or [`/scripts`](https://github.com/arc53/DocsGPT/tree/main/scripts) (please disregard old [`ingest_rst.py`](https://github.com/arc53/DocsGPT/blob/main/scripts/old/ingest_rst.py) [`ingest_rst_sphinx.py`](https://github.com/arc53/DocsGPT/blob/main/scripts/old/ingest_rst_sphinx.py) files; these will be deprecated soon).
|
||||
- All new code should be covered with unit tests ([pytest](https://github.com/pytest-dev/pytest)). Please find tests under [`/tests`](https://github.com/arc53/DocsGPT/tree/main/tests) folder.
|
||||
- Before submitting your Pull Request, ensure it can be queried after ingesting some test data.
|
||||
|
||||
@@ -125,4 +126,4 @@ Thank you for considering contributing to DocsGPT! 🙏
|
||||
|
||||
## Questions/collaboration
|
||||
Feel free to join our [Discord](https://discord.gg/n5BX8dh8rU). We're very friendly and welcoming to new contributors, so don't hesitate to reach out.
|
||||
# Thank you so much for considering to contribute DocsGPT!🙏
|
||||
# Thank you so much for considering to contributing DocsGPT!🙏
|
||||
|
||||
41
HACKTOBERFEST.md
Normal file
41
HACKTOBERFEST.md
Normal file
@@ -0,0 +1,41 @@
|
||||
# **🎉 Join the Hacktoberfest with DocsGPT and win a Free T-shirt and other prizes! 🎉**
|
||||
|
||||
Welcome, contributors! We're excited to announce that DocsGPT is participating in Hacktoberfest. Get involved by submitting meaningful pull requests.
|
||||
|
||||
All contributors with accepted PRs will receive a cool Holopin! 🤩 (Watch out for a reply in your PR to collect it).
|
||||
|
||||
### 🏆 Top 50 contributors will receive a special T-shirt
|
||||
|
||||
### 🏆 [LLM Document analysis by LexEU competition](https://github.com/arc53/DocsGPT/blob/main/lexeu-competition.md):
|
||||
A separate competition is available for those who submit new retrieval / workflow method that will analyze a Document using EU laws.
|
||||
With 200$, 100$, 50$ prize for 1st, 2nd and 3rd place respectively.
|
||||
You can find more information [here](https://github.com/arc53/DocsGPT/blob/main/lexeu-competition.md)
|
||||
|
||||
## 📜 Here's How to Contribute:
|
||||
```text
|
||||
🛠️ Code: This is the golden ticket! Make meaningful contributions through PRs.
|
||||
|
||||
🧩 API extension: Build an app utilising DocsGPT API. We prefer submissions that showcase original ideas and turn the API into an AI agent.
|
||||
They can be a completely separate repos.
|
||||
For example:
|
||||
https://github.com/arc53/tg-bot-docsgpt-extenstion or
|
||||
https://github.com/arc53/DocsGPT-cli
|
||||
|
||||
Non-Code Contributions:
|
||||
|
||||
📚 Wiki: Improve our documentation, create a guide or change existing documentation.
|
||||
|
||||
🖥️ Design: Improve the UI/UX or design a new feature.
|
||||
|
||||
📝 Blogging or Content Creation: Write articles or create videos to showcase DocsGPT or highlight your contributions!
|
||||
```
|
||||
|
||||
### 📝 Guidelines for Pull Requests:
|
||||
- Familiarize yourself with the current contributions and our [Roadmap](https://github.com/orgs/arc53/projects/2).
|
||||
- Before contributing we highly advise that you check existing [issues](https://github.com/arc53/DocsGPT/issues) or [create](https://github.com/arc53/DocsGPT/issues/new/choose) an issue and wait to get assigned.
|
||||
- Once you are finished with your contribution, please fill in this [form](https://airtable.com/appikMaJwdHhC1SDP/pagoblCJ9W29wf6Hf/form).
|
||||
- Refer to the [Documentation](https://docs.docsgpt.cloud/).
|
||||
- Feel free to join our [Discord](https://discord.gg/n5BX8dh8rU) server. We're here to help newcomers, so don't hesitate to jump in! Join us [here](https://discord.gg/n5BX8dh8rU).
|
||||
|
||||
Thank you very much for considering contributing to DocsGPT during Hacktoberfest! 🙏 Your contributions (not just simple typos) could earn you a stylish new t-shirt and other prizes as a token of our appreciation. 🎁 Join us, and let's code together! 🚀
|
||||
|
||||
23
README.md
23
README.md
@@ -27,10 +27,14 @@ Say goodbye to time-consuming manual searches, and let <strong><a href="https://
|
||||
|
||||
We're eager to provide personalized assistance when deploying your DocsGPT to a live environment.
|
||||
|
||||
- [Get Enterprise / teams Demo :wave:](https://www.docsgpt.cloud/contact)
|
||||
- [Send Email :email:](mailto:contact@arc53.com?subject=DocsGPT%20support%2Fsolutions)
|
||||
<a href ="https://cal.com/arc53/docsgpt-demo-b2b">
|
||||
<img alt="Let's chat" src="https://cal.com/book-with-cal-dark.svg" />
|
||||
</a>
|
||||
|
||||

|
||||
[Send Email :email:](mailto:contact@arc53.com?subject=DocsGPT%20support%2Fsolutions)
|
||||
|
||||
|
||||
<img src="https://github.com/user-attachments/assets/9a1f21de-7a15-4e42-9424-70d22ba5a913" alt="video-example-of-docs-gpt" width="1000" height="500">
|
||||
|
||||
## Roadmap
|
||||
|
||||
@@ -46,9 +50,9 @@ You can find our roadmap [here](https://github.com/orgs/arc53/projects/2). Pleas
|
||||
|
||||
If you don't have enough resources to run it, you can use bitsnbytes to quantize.
|
||||
|
||||
## Features
|
||||
## End to End AI Framework for Information Retrieval
|
||||
|
||||

|
||||

|
||||
|
||||
## Useful Links
|
||||
|
||||
@@ -124,7 +128,7 @@ docker compose -f docker-compose-dev.yaml up -d
|
||||
> Make sure you have Python 3.10 or 3.11 installed.
|
||||
|
||||
1. Export required environment variables or prepare a `.env` file in the project folder:
|
||||
- Copy [.env_sample](https://github.com/arc53/DocsGPT/blob/main/application/.env_sample) and create `.env`.
|
||||
- Copy [.env-template](https://github.com/arc53/DocsGPT/blob/main/application/.env-template) and create `.env`.
|
||||
|
||||
(check out [`application/core/settings.py`](application/core/settings.py) if you want to see more config options.)
|
||||
|
||||
@@ -197,4 +201,9 @@ We as members, contributors, and leaders, pledge to make participation in our co
|
||||
|
||||
The source code license is [MIT](https://opensource.org/license/mit/), as described in the [LICENSE](LICENSE) file.
|
||||
|
||||
Built with [:bird: :link: LangChain](https://github.com/hwchase17/langchain)
|
||||
<p>This project is supported by:</p>
|
||||
<p>
|
||||
<a href="https://www.digitalocean.com/?utm_medium=opensource&utm_source=DocsGPT">
|
||||
<img src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/SVG/DO_Logo_horizontal_blue.svg" width="201px">
|
||||
</a>
|
||||
</p>
|
||||
|
||||
@@ -4,14 +4,11 @@ FROM ubuntu:24.04 as builder
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y software-properties-common
|
||||
|
||||
RUN add-apt-repository ppa:deadsnakes/ppa
|
||||
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository ppa:deadsnakes/ppa && \
|
||||
# Install necessary packages and Python
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends gcc curl wget unzip libc6-dev python3.11 python3.11-distutils python3.11-venv && \
|
||||
apt-get clean && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends gcc wget unzip libc6-dev python3.11 python3.11-distutils python3.11-venv && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Verify Python installation and setup symlink
|
||||
@@ -27,7 +24,7 @@ RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.z
|
||||
rm mpnet-base-v2.zip
|
||||
|
||||
# Install Rust
|
||||
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
|
||||
RUN wget -q -O - https://sh.rustup.rs | sh -s -- -y
|
||||
|
||||
# Clean up to reduce container size
|
||||
RUN apt-get remove --purge -y wget unzip && apt-get autoremove -y && rm -rf /var/lib/apt/lists/*
|
||||
@@ -50,12 +47,10 @@ RUN pip install --no-cache-dir --upgrade pip && \
|
||||
FROM ubuntu:24.04 as final
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y software-properties-common
|
||||
|
||||
RUN add-apt-repository ppa:deadsnakes/ppa
|
||||
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository ppa:deadsnakes/ppa && \
|
||||
# Install Python
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends python3.11 && \
|
||||
apt-get update && apt-get install -y --no-install-recommends python3.11 && \
|
||||
ln -s /usr/bin/python3.11 /usr/bin/python && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
@@ -1,29 +1,38 @@
|
||||
import asyncio
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from flask import Blueprint, request, Response
|
||||
import json
|
||||
import datetime
|
||||
import logging
|
||||
import traceback
|
||||
|
||||
from pymongo import MongoClient
|
||||
from bson.dbref import DBRef
|
||||
from bson.objectid import ObjectId
|
||||
from flask import Blueprint, current_app, make_response, request, Response
|
||||
from flask_restx import fields, Namespace, Resource
|
||||
|
||||
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
from application.error import bad_request
|
||||
from application.extensions import api
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.retriever.retriever_creator import RetrieverCreator
|
||||
from application.error import bad_request
|
||||
from application.utils import check_required_fields
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
mongo = MongoDB.get_client()
|
||||
db = mongo["docsgpt"]
|
||||
conversations_collection = db["conversations"]
|
||||
vectors_collection = db["vectors"]
|
||||
sources_collection = db["sources"]
|
||||
prompts_collection = db["prompts"]
|
||||
api_key_collection = db["api_keys"]
|
||||
user_logs_collection = db["user_logs"]
|
||||
|
||||
answer = Blueprint("answer", __name__)
|
||||
answer_ns = Namespace("answer", description="Answer related operations", path="/")
|
||||
api.add_namespace(answer_ns)
|
||||
|
||||
gpt_model = ""
|
||||
# to have some kind of default behaviour
|
||||
@@ -31,6 +40,8 @@ if settings.LLM_NAME == "openai":
|
||||
gpt_model = "gpt-3.5-turbo"
|
||||
elif settings.LLM_NAME == "anthropic":
|
||||
gpt_model = "claude-2"
|
||||
elif settings.LLM_NAME == "groq":
|
||||
gpt_model = "llama3-8b-8192"
|
||||
|
||||
if settings.MODEL_NAME: # in case there is particular model name configured
|
||||
gpt_model = settings.MODEL_NAME
|
||||
@@ -74,27 +85,29 @@ def run_async_chain(chain, question, chat_history):
|
||||
|
||||
def get_data_from_api_key(api_key):
|
||||
data = api_key_collection.find_one({"key": api_key})
|
||||
|
||||
# # Raise custom exception if the API key is not found
|
||||
if data is None:
|
||||
raise Exception("Invalid API Key, please generate new key", 401)
|
||||
|
||||
if "retriever" not in data:
|
||||
data["retriever"] = None
|
||||
|
||||
if "source" in data and isinstance(data["source"], DBRef):
|
||||
source_doc = db.dereference(data["source"])
|
||||
data["source"] = str(source_doc["_id"])
|
||||
if "retriever" in source_doc:
|
||||
data["retriever"] = source_doc["retriever"]
|
||||
else:
|
||||
data["source"] = {}
|
||||
return data
|
||||
|
||||
|
||||
def get_vectorstore(data):
|
||||
if "active_docs" in data:
|
||||
if data["active_docs"].split("/")[0] == "default":
|
||||
vectorstore = ""
|
||||
elif data["active_docs"].split("/")[0] == "local":
|
||||
vectorstore = "indexes/" + data["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + data["active_docs"]
|
||||
if data["active_docs"] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
vectorstore = os.path.join("application", vectorstore)
|
||||
return vectorstore
|
||||
def get_retriever(source_id: str):
|
||||
doc = sources_collection.find_one({"_id": ObjectId(source_id)})
|
||||
if doc is None:
|
||||
raise Exception("Source document does not exist", 404)
|
||||
retriever_name = None if "retriever" not in doc else doc["retriever"]
|
||||
return retriever_name
|
||||
|
||||
|
||||
def is_azure_configured():
|
||||
@@ -129,10 +142,10 @@ def save_conversation(conversation_id, question, response, source_log_docs, llm)
|
||||
"content": "Summarise following conversation in no more than 3 "
|
||||
"words, respond ONLY with the summary, use the same "
|
||||
"language as the system \n\nUser: "
|
||||
+question
|
||||
+"\n\n"
|
||||
+"AI: "
|
||||
+response,
|
||||
+ question
|
||||
+ "\n\n"
|
||||
+ "AI: "
|
||||
+ response,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
@@ -172,12 +185,21 @@ def get_prompt(prompt_id):
|
||||
return prompt
|
||||
|
||||
|
||||
def complete_stream(question, retriever, conversation_id, user_api_key):
|
||||
def complete_stream(
|
||||
question, retriever, conversation_id, user_api_key, isNoneDoc=False
|
||||
):
|
||||
|
||||
try:
|
||||
response_full = ""
|
||||
source_log_docs = []
|
||||
answer = retriever.gen()
|
||||
sources = retriever.search()
|
||||
for source in sources:
|
||||
if "text" in source:
|
||||
source["text"] = source["text"][:100].strip() + "..."
|
||||
if len(sources) > 0:
|
||||
data = json.dumps({"type": "source", "source": sources})
|
||||
yield f"data: {data}\n\n"
|
||||
for line in answer:
|
||||
if "answer" in line:
|
||||
response_full += str(line["answer"])
|
||||
@@ -186,254 +208,411 @@ def complete_stream(question, retriever, conversation_id, user_api_key):
|
||||
elif "source" in line:
|
||||
source_log_docs.append(line["source"])
|
||||
|
||||
if isNoneDoc:
|
||||
for doc in source_log_docs:
|
||||
doc["source"] = "None"
|
||||
|
||||
llm = LLMCreator.create_llm(
|
||||
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=user_api_key
|
||||
)
|
||||
conversation_id = save_conversation(
|
||||
conversation_id, question, response_full, source_log_docs, llm
|
||||
)
|
||||
|
||||
# send data.type = "end" to indicate that the stream has ended as json
|
||||
data = json.dumps({"type": "id", "id": str(conversation_id)})
|
||||
yield f"data: {data}\n\n"
|
||||
if user_api_key is None:
|
||||
conversation_id = save_conversation(
|
||||
conversation_id, question, response_full, source_log_docs, llm
|
||||
)
|
||||
# send data.type = "end" to indicate that the stream has ended as json
|
||||
data = json.dumps({"type": "id", "id": str(conversation_id)})
|
||||
yield f"data: {data}\n\n"
|
||||
|
||||
retriever_params = retriever.get_params()
|
||||
user_logs_collection.insert_one(
|
||||
{
|
||||
"action": "stream_answer",
|
||||
"level": "info",
|
||||
"user": "local",
|
||||
"api_key": user_api_key,
|
||||
"question": question,
|
||||
"response": response_full,
|
||||
"sources": source_log_docs,
|
||||
"retriever_params": retriever_params,
|
||||
"timestamp": datetime.datetime.now(datetime.timezone.utc),
|
||||
}
|
||||
)
|
||||
data = json.dumps({"type": "end"})
|
||||
yield f"data: {data}\n\n"
|
||||
except Exception as e:
|
||||
print("\033[91merr", str(e), file=sys.stderr)
|
||||
data = json.dumps({"type": "error","error":"Please try again later. We apologize for any inconvenience.",
|
||||
"error_exception": str(e)})
|
||||
data = json.dumps(
|
||||
{
|
||||
"type": "error",
|
||||
"error": "Please try again later. We apologize for any inconvenience.",
|
||||
"error_exception": str(e),
|
||||
}
|
||||
)
|
||||
yield f"data: {data}\n\n"
|
||||
return
|
||||
return
|
||||
|
||||
@answer.route("/stream", methods=["POST"])
|
||||
def stream():
|
||||
try:
|
||||
data = request.get_json()
|
||||
# get parameter from url question
|
||||
question = data["question"]
|
||||
if "history" not in data:
|
||||
history = []
|
||||
else:
|
||||
history = data["history"]
|
||||
history = json.loads(history)
|
||||
if "conversation_id" not in data:
|
||||
conversation_id = None
|
||||
else:
|
||||
conversation_id = data["conversation_id"]
|
||||
if "prompt_id" in data:
|
||||
prompt_id = data["prompt_id"]
|
||||
else:
|
||||
prompt_id = "default"
|
||||
if "selectedDocs" in data and data["selectedDocs"] is None:
|
||||
chunks = 0
|
||||
elif "chunks" in data:
|
||||
chunks = int(data["chunks"])
|
||||
else:
|
||||
chunks = 2
|
||||
if "token_limit" in data:
|
||||
token_limit = data["token_limit"]
|
||||
else:
|
||||
token_limit = settings.DEFAULT_MAX_HISTORY
|
||||
|
||||
# check if active_docs or api_key is set
|
||||
|
||||
if "api_key" in data:
|
||||
data_key = get_data_from_api_key(data["api_key"])
|
||||
chunks = int(data_key["chunks"])
|
||||
prompt_id = data_key["prompt_id"]
|
||||
source = {"active_docs": data_key["source"]}
|
||||
user_api_key = data["api_key"]
|
||||
elif "active_docs" in data:
|
||||
source = {"active_docs": data["active_docs"]}
|
||||
user_api_key = None
|
||||
else:
|
||||
source = {}
|
||||
user_api_key = None
|
||||
|
||||
if (
|
||||
source["active_docs"].split("/")[0] == "default"
|
||||
or source["active_docs"].split("/")[0] == "local"
|
||||
):
|
||||
retriever_name = "classic"
|
||||
else:
|
||||
retriever_name = source["active_docs"]
|
||||
|
||||
prompt = get_prompt(prompt_id)
|
||||
|
||||
retriever = RetrieverCreator.create_retriever(
|
||||
retriever_name,
|
||||
question=question,
|
||||
source=source,
|
||||
chat_history=history,
|
||||
prompt=prompt,
|
||||
chunks=chunks,
|
||||
token_limit=token_limit,
|
||||
gpt_model=gpt_model,
|
||||
user_api_key=user_api_key,
|
||||
@answer_ns.route("/stream")
|
||||
class Stream(Resource):
|
||||
stream_model = api.model(
|
||||
"StreamModel",
|
||||
{
|
||||
"question": fields.String(
|
||||
required=True, description="Question to be asked"
|
||||
),
|
||||
"history": fields.List(
|
||||
fields.String, required=False, description="Chat history"
|
||||
),
|
||||
"conversation_id": fields.String(
|
||||
required=False, description="Conversation ID"
|
||||
),
|
||||
"prompt_id": fields.String(
|
||||
required=False, default="default", description="Prompt ID"
|
||||
),
|
||||
"chunks": fields.Integer(
|
||||
required=False, default=2, description="Number of chunks"
|
||||
),
|
||||
"token_limit": fields.Integer(required=False, description="Token limit"),
|
||||
"retriever": fields.String(required=False, description="Retriever type"),
|
||||
"api_key": fields.String(required=False, description="API key"),
|
||||
"active_docs": fields.String(
|
||||
required=False, description="Active documents"
|
||||
),
|
||||
"isNoneDoc": fields.Boolean(
|
||||
required=False, description="Flag indicating if no document is used"
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
return Response(
|
||||
complete_stream(
|
||||
question=question,
|
||||
retriever=retriever,
|
||||
conversation_id=conversation_id,
|
||||
user_api_key=user_api_key,
|
||||
),
|
||||
mimetype="text/event-stream",
|
||||
)
|
||||
|
||||
except ValueError:
|
||||
message = "Malformed request body"
|
||||
print("\033[91merr", str(message), file=sys.stderr)
|
||||
return Response(
|
||||
error_stream_generate(message),
|
||||
status=400,
|
||||
mimetype="text/event-stream",
|
||||
)
|
||||
except Exception as e:
|
||||
print("\033[91merr", str(e), file=sys.stderr)
|
||||
message = e.args[0]
|
||||
status_code = 400
|
||||
# # Custom exceptions with two arguments, index 1 as status code
|
||||
if(len(e.args) >= 2):
|
||||
status_code = e.args[1]
|
||||
return Response(
|
||||
error_stream_generate(message),
|
||||
status=status_code,
|
||||
mimetype="text/event-stream",
|
||||
)
|
||||
@api.expect(stream_model)
|
||||
@api.doc(description="Stream a response based on the question and retriever")
|
||||
def post(self):
|
||||
data = request.get_json()
|
||||
required_fields = ["question"]
|
||||
|
||||
missing_fields = check_required_fields(data, required_fields)
|
||||
if missing_fields:
|
||||
return missing_fields
|
||||
|
||||
try:
|
||||
question = data["question"]
|
||||
history = data.get("history", [])
|
||||
history = json.loads(history)
|
||||
conversation_id = data.get("conversation_id")
|
||||
prompt_id = data.get("prompt_id", "default")
|
||||
|
||||
|
||||
chunks = int(data.get("chunks", 2))
|
||||
token_limit = data.get("token_limit", settings.DEFAULT_MAX_HISTORY)
|
||||
retriever_name = data.get("retriever", "classic")
|
||||
|
||||
if "api_key" in data:
|
||||
data_key = get_data_from_api_key(data["api_key"])
|
||||
chunks = int(data_key.get("chunks", 2))
|
||||
prompt_id = data_key.get("prompt_id", "default")
|
||||
source = {"active_docs": data_key.get("source")}
|
||||
retriever_name = data_key.get("retriever", retriever_name)
|
||||
user_api_key = data["api_key"]
|
||||
|
||||
elif "active_docs" in data:
|
||||
source = {"active_docs": data["active_docs"]}
|
||||
retriever_name = get_retriever(data["active_docs"]) or retriever_name
|
||||
user_api_key = None
|
||||
|
||||
else:
|
||||
source = {}
|
||||
user_api_key = None
|
||||
|
||||
current_app.logger.info(
|
||||
f"/stream - request_data: {data}, source: {source}",
|
||||
extra={"data": json.dumps({"request_data": data, "source": source})},
|
||||
)
|
||||
|
||||
prompt = get_prompt(prompt_id)
|
||||
if "isNoneDoc" in data and data["isNoneDoc"] is True:
|
||||
chunks = 0
|
||||
retriever = RetrieverCreator.create_retriever(
|
||||
retriever_name,
|
||||
question=question,
|
||||
source=source,
|
||||
chat_history=history,
|
||||
prompt=prompt,
|
||||
chunks=chunks,
|
||||
token_limit=token_limit,
|
||||
gpt_model=gpt_model,
|
||||
user_api_key=user_api_key,
|
||||
)
|
||||
|
||||
return Response(
|
||||
complete_stream(
|
||||
question=question,
|
||||
retriever=retriever,
|
||||
conversation_id=conversation_id,
|
||||
user_api_key=user_api_key,
|
||||
isNoneDoc=data.get("isNoneDoc"),
|
||||
),
|
||||
mimetype="text/event-stream",
|
||||
)
|
||||
|
||||
except ValueError:
|
||||
message = "Malformed request body"
|
||||
print("\033[91merr", str(message), file=sys.stderr)
|
||||
return Response(
|
||||
error_stream_generate(message),
|
||||
status=400,
|
||||
mimetype="text/event-stream",
|
||||
)
|
||||
except Exception as e:
|
||||
current_app.logger.error(
|
||||
f"/stream - error: {str(e)} - traceback: {traceback.format_exc()}",
|
||||
extra={"error": str(e), "traceback": traceback.format_exc()},
|
||||
)
|
||||
message = e.args[0]
|
||||
status_code = 400
|
||||
# Custom exceptions with two arguments, index 1 as status code
|
||||
if len(e.args) >= 2:
|
||||
status_code = e.args[1]
|
||||
return Response(
|
||||
error_stream_generate(message),
|
||||
status=status_code,
|
||||
mimetype="text/event-stream",
|
||||
)
|
||||
|
||||
|
||||
def error_stream_generate(err_response):
|
||||
data = json.dumps({"type": "error", "error":err_response})
|
||||
yield f"data: {data}\n\n"
|
||||
|
||||
@answer.route("/api/answer", methods=["POST"])
|
||||
def api_answer():
|
||||
data = request.get_json()
|
||||
question = data["question"]
|
||||
if "history" not in data:
|
||||
history = []
|
||||
else:
|
||||
history = data["history"]
|
||||
if "conversation_id" not in data:
|
||||
conversation_id = None
|
||||
else:
|
||||
conversation_id = data["conversation_id"]
|
||||
print("-" * 5)
|
||||
if "prompt_id" in data:
|
||||
prompt_id = data["prompt_id"]
|
||||
else:
|
||||
prompt_id = "default"
|
||||
if "chunks" in data:
|
||||
chunks = int(data["chunks"])
|
||||
else:
|
||||
chunks = 2
|
||||
if "token_limit" in data:
|
||||
token_limit = data["token_limit"]
|
||||
else:
|
||||
token_limit = settings.DEFAULT_MAX_HISTORY
|
||||
|
||||
# use try and except to check for exception
|
||||
try:
|
||||
# check if the vectorstore is set
|
||||
if "api_key" in data:
|
||||
data_key = get_data_from_api_key(data["api_key"])
|
||||
chunks = int(data_key["chunks"])
|
||||
prompt_id = data_key["prompt_id"]
|
||||
source = {"active_docs": data_key["source"]}
|
||||
user_api_key = data["api_key"]
|
||||
else:
|
||||
source = data
|
||||
user_api_key = None
|
||||
|
||||
if (
|
||||
source["active_docs"].split("/")[0] == "default"
|
||||
or source["active_docs"].split("/")[0] == "local"
|
||||
):
|
||||
retriever_name = "classic"
|
||||
else:
|
||||
retriever_name = source["active_docs"]
|
||||
|
||||
prompt = get_prompt(prompt_id)
|
||||
|
||||
retriever = RetrieverCreator.create_retriever(
|
||||
retriever_name,
|
||||
question=question,
|
||||
source=source,
|
||||
chat_history=history,
|
||||
prompt=prompt,
|
||||
chunks=chunks,
|
||||
token_limit=token_limit,
|
||||
gpt_model=gpt_model,
|
||||
user_api_key=user_api_key,
|
||||
)
|
||||
source_log_docs = []
|
||||
response_full = ""
|
||||
for line in retriever.gen():
|
||||
if "source" in line:
|
||||
source_log_docs.append(line["source"])
|
||||
elif "answer" in line:
|
||||
response_full += line["answer"]
|
||||
|
||||
llm = LLMCreator.create_llm(
|
||||
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=user_api_key
|
||||
)
|
||||
|
||||
result = {"answer": response_full, "sources": source_log_docs}
|
||||
result["conversation_id"] = save_conversation(
|
||||
conversation_id, question, response_full, source_log_docs, llm
|
||||
)
|
||||
|
||||
return result
|
||||
except Exception as e:
|
||||
# print whole traceback
|
||||
traceback.print_exc()
|
||||
print(str(e))
|
||||
return bad_request(500, str(e))
|
||||
data = json.dumps({"type": "error", "error": err_response})
|
||||
yield f"data: {data}\n\n"
|
||||
|
||||
|
||||
@answer.route("/api/search", methods=["POST"])
|
||||
def api_search():
|
||||
data = request.get_json()
|
||||
# get parameter from url question
|
||||
question = data["question"]
|
||||
if "chunks" in data:
|
||||
chunks = int(data["chunks"])
|
||||
else:
|
||||
chunks = 2
|
||||
if "api_key" in data:
|
||||
data_key = get_data_from_api_key(data["api_key"])
|
||||
chunks = int(data_key["chunks"])
|
||||
source = {"active_docs": data_key["source"]}
|
||||
user_api_key = data["api_key"]
|
||||
elif "active_docs" in data:
|
||||
source = {"active_docs": data["active_docs"]}
|
||||
user_api_key = None
|
||||
else:
|
||||
source = {}
|
||||
user_api_key = None
|
||||
|
||||
if (
|
||||
source["active_docs"].split("/")[0] == "default"
|
||||
or source["active_docs"].split("/")[0] == "local"
|
||||
):
|
||||
retriever_name = "classic"
|
||||
else:
|
||||
retriever_name = source["active_docs"]
|
||||
if "token_limit" in data:
|
||||
token_limit = data["token_limit"]
|
||||
else:
|
||||
token_limit = settings.DEFAULT_MAX_HISTORY
|
||||
|
||||
retriever = RetrieverCreator.create_retriever(
|
||||
retriever_name,
|
||||
question=question,
|
||||
source=source,
|
||||
chat_history=[],
|
||||
prompt="default",
|
||||
chunks=chunks,
|
||||
token_limit=token_limit,
|
||||
gpt_model=gpt_model,
|
||||
user_api_key=user_api_key,
|
||||
@answer_ns.route("/api/answer")
|
||||
class Answer(Resource):
|
||||
answer_model = api.model(
|
||||
"AnswerModel",
|
||||
{
|
||||
"question": fields.String(
|
||||
required=True, description="The question to answer"
|
||||
),
|
||||
"history": fields.List(
|
||||
fields.String, required=False, description="Conversation history"
|
||||
),
|
||||
"conversation_id": fields.String(
|
||||
required=False, description="Conversation ID"
|
||||
),
|
||||
"prompt_id": fields.String(
|
||||
required=False, default="default", description="Prompt ID"
|
||||
),
|
||||
"chunks": fields.Integer(
|
||||
required=False, default=2, description="Number of chunks"
|
||||
),
|
||||
"token_limit": fields.Integer(required=False, description="Token limit"),
|
||||
"retriever": fields.String(required=False, description="Retriever type"),
|
||||
"api_key": fields.String(required=False, description="API key"),
|
||||
"active_docs": fields.String(
|
||||
required=False, description="Active documents"
|
||||
),
|
||||
"isNoneDoc": fields.Boolean(
|
||||
required=False, description="Flag indicating if no document is used"
|
||||
),
|
||||
},
|
||||
)
|
||||
docs = retriever.search()
|
||||
return docs
|
||||
|
||||
@api.expect(answer_model)
|
||||
@api.doc(description="Provide an answer based on the question and retriever")
|
||||
def post(self):
|
||||
data = request.get_json()
|
||||
required_fields = ["question"]
|
||||
missing_fields = check_required_fields(data, required_fields)
|
||||
if missing_fields:
|
||||
return missing_fields
|
||||
|
||||
try:
|
||||
question = data["question"]
|
||||
history = data.get("history", [])
|
||||
conversation_id = data.get("conversation_id")
|
||||
prompt_id = data.get("prompt_id", "default")
|
||||
chunks = int(data.get("chunks", 2))
|
||||
token_limit = data.get("token_limit", settings.DEFAULT_MAX_HISTORY)
|
||||
retriever_name = data.get("retriever", "classic")
|
||||
|
||||
if "api_key" in data:
|
||||
data_key = get_data_from_api_key(data["api_key"])
|
||||
chunks = int(data_key.get("chunks", 2))
|
||||
prompt_id = data_key.get("prompt_id", "default")
|
||||
source = {"active_docs": data_key.get("source")}
|
||||
retriever_name = data_key.get("retriever", retriever_name)
|
||||
user_api_key = data["api_key"]
|
||||
elif "active_docs" in data:
|
||||
source = {"active_docs": data["active_docs"]}
|
||||
retriever_name = get_retriever(data["active_docs"]) or retriever_name
|
||||
user_api_key = None
|
||||
else:
|
||||
source = {}
|
||||
user_api_key = None
|
||||
|
||||
prompt = get_prompt(prompt_id)
|
||||
|
||||
current_app.logger.info(
|
||||
f"/api/answer - request_data: {data}, source: {source}",
|
||||
extra={"data": json.dumps({"request_data": data, "source": source})},
|
||||
)
|
||||
|
||||
retriever = RetrieverCreator.create_retriever(
|
||||
retriever_name,
|
||||
question=question,
|
||||
source=source,
|
||||
chat_history=history,
|
||||
prompt=prompt,
|
||||
chunks=chunks,
|
||||
token_limit=token_limit,
|
||||
gpt_model=gpt_model,
|
||||
user_api_key=user_api_key,
|
||||
)
|
||||
|
||||
source_log_docs = []
|
||||
response_full = ""
|
||||
for line in retriever.gen():
|
||||
if "source" in line:
|
||||
source_log_docs.append(line["source"])
|
||||
elif "answer" in line:
|
||||
response_full += line["answer"]
|
||||
|
||||
if data.get("isNoneDoc"):
|
||||
for doc in source_log_docs:
|
||||
doc["source"] = "None"
|
||||
|
||||
llm = LLMCreator.create_llm(
|
||||
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=user_api_key
|
||||
)
|
||||
|
||||
result = {"answer": response_full, "sources": source_log_docs}
|
||||
result["conversation_id"] = str(
|
||||
save_conversation(
|
||||
conversation_id, question, response_full, source_log_docs, llm
|
||||
)
|
||||
)
|
||||
retriever_params = retriever.get_params()
|
||||
user_logs_collection.insert_one(
|
||||
{
|
||||
"action": "api_answer",
|
||||
"level": "info",
|
||||
"user": "local",
|
||||
"api_key": user_api_key,
|
||||
"question": question,
|
||||
"response": response_full,
|
||||
"sources": source_log_docs,
|
||||
"retriever_params": retriever_params,
|
||||
"timestamp": datetime.datetime.now(datetime.timezone.utc),
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(
|
||||
f"/api/answer - error: {str(e)} - traceback: {traceback.format_exc()}",
|
||||
extra={"error": str(e), "traceback": traceback.format_exc()},
|
||||
)
|
||||
return bad_request(500, str(e))
|
||||
|
||||
return make_response(result, 200)
|
||||
|
||||
|
||||
@answer_ns.route("/api/search")
|
||||
class Search(Resource):
|
||||
search_model = api.model(
|
||||
"SearchModel",
|
||||
{
|
||||
"question": fields.String(
|
||||
required=True, description="The question to search"
|
||||
),
|
||||
"chunks": fields.Integer(
|
||||
required=False, default=2, description="Number of chunks"
|
||||
),
|
||||
"api_key": fields.String(
|
||||
required=False, description="API key for authentication"
|
||||
),
|
||||
"active_docs": fields.String(
|
||||
required=False, description="Active documents for retrieval"
|
||||
),
|
||||
"retriever": fields.String(required=False, description="Retriever type"),
|
||||
"token_limit": fields.Integer(
|
||||
required=False, description="Limit for tokens"
|
||||
),
|
||||
"isNoneDoc": fields.Boolean(
|
||||
required=False, description="Flag indicating if no document is used"
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@api.expect(search_model)
|
||||
@api.doc(
|
||||
description="Search for relevant documents based on the question and retriever"
|
||||
)
|
||||
def post(self):
|
||||
data = request.get_json()
|
||||
required_fields = ["question"]
|
||||
missing_fields = check_required_fields(data, required_fields)
|
||||
if missing_fields:
|
||||
return missing_fields
|
||||
|
||||
try:
|
||||
question = data["question"]
|
||||
chunks = int(data.get("chunks", 2))
|
||||
token_limit = data.get("token_limit", settings.DEFAULT_MAX_HISTORY)
|
||||
retriever_name = data.get("retriever", "classic")
|
||||
|
||||
if "api_key" in data:
|
||||
data_key = get_data_from_api_key(data["api_key"])
|
||||
chunks = int(data_key.get("chunks", 2))
|
||||
source = {"active_docs": data_key.get("source")}
|
||||
user_api_key = data["api_key"]
|
||||
elif "active_docs" in data:
|
||||
source = {"active_docs": data["active_docs"]}
|
||||
user_api_key = None
|
||||
else:
|
||||
source = {}
|
||||
user_api_key = None
|
||||
|
||||
current_app.logger.info(
|
||||
f"/api/answer - request_data: {data}, source: {source}",
|
||||
extra={"data": json.dumps({"request_data": data, "source": source})},
|
||||
)
|
||||
|
||||
retriever = RetrieverCreator.create_retriever(
|
||||
retriever_name,
|
||||
question=question,
|
||||
source=source,
|
||||
chat_history=[],
|
||||
prompt="default",
|
||||
chunks=chunks,
|
||||
token_limit=token_limit,
|
||||
gpt_model=gpt_model,
|
||||
user_api_key=user_api_key,
|
||||
)
|
||||
|
||||
docs = retriever.search()
|
||||
retriever_params = retriever.get_params()
|
||||
|
||||
user_logs_collection.insert_one(
|
||||
{
|
||||
"action": "api_search",
|
||||
"level": "info",
|
||||
"user": "local",
|
||||
"api_key": user_api_key,
|
||||
"question": question,
|
||||
"sources": docs,
|
||||
"retriever_params": retriever_params,
|
||||
"timestamp": datetime.datetime.now(datetime.timezone.utc),
|
||||
}
|
||||
)
|
||||
|
||||
if data.get("isNoneDoc"):
|
||||
for doc in docs:
|
||||
doc["source"] = "None"
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(
|
||||
f"/api/search - error: {str(e)} - traceback: {traceback.format_exc()}",
|
||||
extra={"error": str(e), "traceback": traceback.format_exc()},
|
||||
)
|
||||
return bad_request(500, str(e))
|
||||
|
||||
return make_response(docs, 200)
|
||||
|
||||
@@ -1,20 +1,25 @@
|
||||
import os
|
||||
import datetime
|
||||
from flask import Blueprint, request, send_from_directory
|
||||
from pymongo import MongoClient
|
||||
from werkzeug.utils import secure_filename
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
|
||||
mongo = MongoDB.get_client()
|
||||
db = mongo["docsgpt"]
|
||||
conversations_collection = db["conversations"]
|
||||
vectors_collection = db["vectors"]
|
||||
sources_collection = db["sources"]
|
||||
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
current_dir = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
|
||||
|
||||
internal = Blueprint("internal", __name__)
|
||||
|
||||
|
||||
internal = Blueprint('internal', __name__)
|
||||
@internal.route("/api/download", methods=["get"])
|
||||
def download_file():
|
||||
user = secure_filename(request.args.get("user"))
|
||||
@@ -24,7 +29,6 @@ def download_file():
|
||||
return send_from_directory(save_dir, filename, as_attachment=True)
|
||||
|
||||
|
||||
|
||||
@internal.route("/api/upload_index", methods=["POST"])
|
||||
def upload_index_files():
|
||||
"""Upload two files(index.faiss, index.pkl) to the user's folder."""
|
||||
@@ -35,7 +39,13 @@ def upload_index_files():
|
||||
return {"status": "no name"}
|
||||
job_name = secure_filename(request.form["name"])
|
||||
tokens = secure_filename(request.form["tokens"])
|
||||
save_dir = os.path.join(current_dir, "indexes", user, job_name)
|
||||
retriever = secure_filename(request.form["retriever"])
|
||||
id = secure_filename(request.form["id"])
|
||||
type = secure_filename(request.form["type"])
|
||||
remote_data = request.form["remote_data"] if "remote_data" in request.form else None
|
||||
sync_frequency = secure_filename(request.form["sync_frequency"]) if "sync_frequency" in request.form else None
|
||||
|
||||
save_dir = os.path.join(current_dir, "indexes", str(id))
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
if "file_faiss" not in request.files:
|
||||
print("No file part")
|
||||
@@ -50,22 +60,45 @@ def upload_index_files():
|
||||
if file_pkl.filename == "":
|
||||
return {"status": "no file name"}
|
||||
# saves index files
|
||||
|
||||
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
file_faiss.save(os.path.join(save_dir, "index.faiss"))
|
||||
file_pkl.save(os.path.join(save_dir, "index.pkl"))
|
||||
# create entry in vectors_collection
|
||||
vectors_collection.insert_one(
|
||||
{
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"location": save_dir,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": "local",
|
||||
"tokens": tokens
|
||||
}
|
||||
)
|
||||
return {"status": "ok"}
|
||||
|
||||
existing_entry = sources_collection.find_one({"_id": ObjectId(id)})
|
||||
if existing_entry:
|
||||
sources_collection.update_one(
|
||||
{"_id": ObjectId(id)},
|
||||
{
|
||||
"$set": {
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"date": datetime.datetime.now(),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": type,
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"remote_data": remote_data,
|
||||
"sync_frequency": sync_frequency,
|
||||
}
|
||||
},
|
||||
)
|
||||
else:
|
||||
sources_collection.insert_one(
|
||||
{
|
||||
"_id": ObjectId(id),
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"date": datetime.datetime.now(),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": type,
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"remote_data": remote_data,
|
||||
"sync_frequency": sync_frequency,
|
||||
}
|
||||
)
|
||||
return {"status": "ok"}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,12 +1,38 @@
|
||||
from application.worker import ingest_worker, remote_worker
|
||||
from datetime import timedelta
|
||||
|
||||
from application.celery_init import celery
|
||||
from application.worker import ingest_worker, remote_worker, sync_worker
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest(self, directory, formats, name_job, filename, user):
|
||||
resp = ingest_worker(self, directory, formats, name_job, filename, user)
|
||||
return resp
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest_remote(self, source_data, job_name, user, loader):
|
||||
resp = remote_worker(self, source_data, job_name, user, loader)
|
||||
return resp
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def schedule_syncs(self, frequency):
|
||||
resp = sync_worker(self, frequency)
|
||||
return resp
|
||||
|
||||
|
||||
@celery.on_after_configure.connect
|
||||
def setup_periodic_tasks(sender, **kwargs):
|
||||
sender.add_periodic_task(
|
||||
timedelta(days=1),
|
||||
schedule_syncs.s("daily"),
|
||||
)
|
||||
sender.add_periodic_task(
|
||||
timedelta(weeks=1),
|
||||
schedule_syncs.s("weekly"),
|
||||
)
|
||||
sender.add_periodic_task(
|
||||
timedelta(days=30),
|
||||
schedule_syncs.s("monthly"),
|
||||
)
|
||||
|
||||
@@ -1,17 +1,23 @@
|
||||
import platform
|
||||
|
||||
import dotenv
|
||||
from application.celery_init import celery
|
||||
from flask import Flask, request, redirect
|
||||
from application.core.settings import settings
|
||||
from application.api.user.routes import user
|
||||
from flask import Flask, redirect, request
|
||||
|
||||
from application.api.answer.routes import answer
|
||||
from application.api.internal.routes import internal
|
||||
from application.api.user.routes import user
|
||||
from application.celery_init import celery
|
||||
from application.core.logging_config import setup_logging
|
||||
from application.core.settings import settings
|
||||
from application.extensions import api
|
||||
|
||||
if platform.system() == "Windows":
|
||||
import pathlib
|
||||
|
||||
pathlib.PosixPath = pathlib.WindowsPath
|
||||
|
||||
dotenv.load_dotenv()
|
||||
setup_logging()
|
||||
|
||||
app = Flask(__name__)
|
||||
app.register_blueprint(user)
|
||||
@@ -21,16 +27,19 @@ app.config.update(
|
||||
UPLOAD_FOLDER="inputs",
|
||||
CELERY_BROKER_URL=settings.CELERY_BROKER_URL,
|
||||
CELERY_RESULT_BACKEND=settings.CELERY_RESULT_BACKEND,
|
||||
MONGO_URI=settings.MONGO_URI
|
||||
MONGO_URI=settings.MONGO_URI,
|
||||
)
|
||||
celery.config_from_object("application.celeryconfig")
|
||||
api.init_app(app)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def home():
|
||||
if request.remote_addr in ('0.0.0.0', '127.0.0.1', 'localhost', '172.18.0.1'):
|
||||
return redirect('http://localhost:5173')
|
||||
if request.remote_addr in ("0.0.0.0", "127.0.0.1", "localhost", "172.18.0.1"):
|
||||
return redirect("http://localhost:5173")
|
||||
else:
|
||||
return 'Welcome to DocsGPT Backend!'
|
||||
return "Welcome to DocsGPT Backend!"
|
||||
|
||||
|
||||
@app.after_request
|
||||
def after_request(response):
|
||||
@@ -39,6 +48,6 @@ def after_request(response):
|
||||
response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS")
|
||||
return response
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=settings.FLASK_DEBUG_MODE, port=7091)
|
||||
|
||||
|
||||
93
application/cache.py
Normal file
93
application/cache.py
Normal file
@@ -0,0 +1,93 @@
|
||||
import redis
|
||||
import time
|
||||
import json
|
||||
import logging
|
||||
from threading import Lock
|
||||
from application.core.settings import settings
|
||||
from application.utils import get_hash
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_redis_instance = None
|
||||
_instance_lock = Lock()
|
||||
|
||||
def get_redis_instance():
|
||||
global _redis_instance
|
||||
if _redis_instance is None:
|
||||
with _instance_lock:
|
||||
if _redis_instance is None:
|
||||
try:
|
||||
_redis_instance = redis.Redis.from_url(settings.CACHE_REDIS_URL, socket_connect_timeout=2)
|
||||
except redis.ConnectionError as e:
|
||||
logger.error(f"Redis connection error: {e}")
|
||||
_redis_instance = None
|
||||
return _redis_instance
|
||||
|
||||
def gen_cache_key(*messages, model="docgpt"):
|
||||
if not all(isinstance(msg, dict) for msg in messages):
|
||||
raise ValueError("All messages must be dictionaries.")
|
||||
messages_str = json.dumps(list(messages), sort_keys=True)
|
||||
combined = f"{model}_{messages_str}"
|
||||
cache_key = get_hash(combined)
|
||||
return cache_key
|
||||
|
||||
def gen_cache(func):
|
||||
def wrapper(self, model, messages, *args, **kwargs):
|
||||
try:
|
||||
cache_key = gen_cache_key(*messages)
|
||||
redis_client = get_redis_instance()
|
||||
if redis_client:
|
||||
try:
|
||||
cached_response = redis_client.get(cache_key)
|
||||
if cached_response:
|
||||
return cached_response.decode('utf-8')
|
||||
except redis.ConnectionError as e:
|
||||
logger.error(f"Redis connection error: {e}")
|
||||
|
||||
result = func(self, model, messages, *args, **kwargs)
|
||||
if redis_client:
|
||||
try:
|
||||
redis_client.set(cache_key, result, ex=1800)
|
||||
except redis.ConnectionError as e:
|
||||
logger.error(f"Redis connection error: {e}")
|
||||
|
||||
return result
|
||||
except ValueError as e:
|
||||
logger.error(e)
|
||||
return "Error: No user message found in the conversation to generate a cache key."
|
||||
return wrapper
|
||||
|
||||
def stream_cache(func):
|
||||
def wrapper(self, model, messages, stream, *args, **kwargs):
|
||||
cache_key = gen_cache_key(*messages)
|
||||
logger.info(f"Stream cache key: {cache_key}")
|
||||
|
||||
redis_client = get_redis_instance()
|
||||
if redis_client:
|
||||
try:
|
||||
cached_response = redis_client.get(cache_key)
|
||||
if cached_response:
|
||||
logger.info(f"Cache hit for stream key: {cache_key}")
|
||||
cached_response = json.loads(cached_response.decode('utf-8'))
|
||||
for chunk in cached_response:
|
||||
yield chunk
|
||||
time.sleep(0.03)
|
||||
return
|
||||
except redis.ConnectionError as e:
|
||||
logger.error(f"Redis connection error: {e}")
|
||||
|
||||
result = func(self, model, messages, stream, *args, **kwargs)
|
||||
stream_cache_data = []
|
||||
|
||||
for chunk in result:
|
||||
stream_cache_data.append(chunk)
|
||||
yield chunk
|
||||
|
||||
if redis_client:
|
||||
try:
|
||||
redis_client.set(cache_key, json.dumps(stream_cache_data), ex=1800)
|
||||
logger.info(f"Stream cache saved for key: {cache_key}")
|
||||
except redis.ConnectionError as e:
|
||||
logger.error(f"Redis connection error: {e}")
|
||||
|
||||
return wrapper
|
||||
@@ -1,9 +1,15 @@
|
||||
from celery import Celery
|
||||
from application.core.settings import settings
|
||||
from celery.signals import setup_logging
|
||||
|
||||
def make_celery(app_name=__name__):
|
||||
celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND)
|
||||
celery.conf.update(settings)
|
||||
return celery
|
||||
|
||||
@setup_logging.connect
|
||||
def config_loggers(*args, **kwargs):
|
||||
from application.core.logging_config import setup_logging
|
||||
setup_logging()
|
||||
|
||||
celery = make_celery()
|
||||
|
||||
22
application/core/logging_config.py
Normal file
22
application/core/logging_config.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from logging.config import dictConfig
|
||||
|
||||
def setup_logging():
|
||||
dictConfig({
|
||||
'version': 1,
|
||||
'formatters': {
|
||||
'default': {
|
||||
'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
|
||||
}
|
||||
},
|
||||
"handlers": {
|
||||
"console": {
|
||||
"class": "logging.StreamHandler",
|
||||
"stream": "ext://sys.stdout",
|
||||
"formatter": "default",
|
||||
}
|
||||
},
|
||||
'root': {
|
||||
'level': 'INFO',
|
||||
'handlers': ['console'],
|
||||
},
|
||||
})
|
||||
24
application/core/mongo_db.py
Normal file
24
application/core/mongo_db.py
Normal file
@@ -0,0 +1,24 @@
|
||||
from application.core.settings import settings
|
||||
from pymongo import MongoClient
|
||||
|
||||
|
||||
class MongoDB:
|
||||
_client = None
|
||||
|
||||
@classmethod
|
||||
def get_client(cls):
|
||||
"""
|
||||
Get the MongoDB client instance, creating it if necessary.
|
||||
"""
|
||||
if cls._client is None:
|
||||
cls._client = MongoClient(settings.MONGO_URI)
|
||||
return cls._client
|
||||
|
||||
@classmethod
|
||||
def close_client(cls):
|
||||
"""
|
||||
Close the MongoDB client connection.
|
||||
"""
|
||||
if cls._client is not None:
|
||||
cls._client.close()
|
||||
cls._client = None
|
||||
@@ -18,9 +18,12 @@ class Settings(BaseSettings):
|
||||
DEFAULT_MAX_HISTORY: int = 150
|
||||
MODEL_TOKEN_LIMITS: dict = {"gpt-3.5-turbo": 4096, "claude-2": 1e5}
|
||||
UPLOAD_FOLDER: str = "inputs"
|
||||
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant"
|
||||
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb"
|
||||
RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search
|
||||
|
||||
# LLM Cache
|
||||
CACHE_REDIS_URL: str = "redis://localhost:6379/2"
|
||||
|
||||
API_URL: str = "http://localhost:7091" # backend url for celery worker
|
||||
|
||||
API_KEY: Optional[str] = None # LLM api key
|
||||
@@ -29,6 +32,7 @@ class Settings(BaseSettings):
|
||||
OPENAI_API_VERSION: Optional[str] = None # azure openai api version
|
||||
AZURE_DEPLOYMENT_NAME: Optional[str] = None # azure deployment name for answering
|
||||
AZURE_EMBEDDINGS_DEPLOYMENT_NAME: Optional[str] = None # azure deployment name for embeddings
|
||||
OPENAI_BASE_URL: Optional[str] = None # openai base url for open ai compatable models
|
||||
|
||||
# elasticsearch
|
||||
ELASTIC_CLOUD_ID: Optional[str] = None # cloud id for elasticsearch
|
||||
@@ -61,6 +65,14 @@ class Settings(BaseSettings):
|
||||
QDRANT_PATH: Optional[str] = None
|
||||
QDRANT_DISTANCE_FUNC: str = "Cosine"
|
||||
|
||||
# Milvus vectorstore config
|
||||
MILVUS_COLLECTION_NAME: Optional[str] = "docsgpt"
|
||||
MILVUS_URI: Optional[str] = "./milvus_local.db" # milvus lite version as default
|
||||
MILVUS_TOKEN: Optional[str] = ""
|
||||
|
||||
# LanceDB vectorstore config
|
||||
LANCEDB_PATH: str = "/tmp/lancedb" # Path where LanceDB stores its local data
|
||||
LANCEDB_TABLE_NAME: Optional[str] = "docsgpts" # Name of the table to use for storing vectors
|
||||
BRAVE_SEARCH_API_KEY: Optional[str] = None
|
||||
|
||||
FLASK_DEBUG_MODE: bool = False
|
||||
|
||||
7
application/extensions.py
Normal file
7
application/extensions.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from flask_restx import Api
|
||||
|
||||
api = Api(
|
||||
version="1.0",
|
||||
title="DocsGPT API",
|
||||
description="API for DocsGPT",
|
||||
)
|
||||
@@ -1,28 +1,29 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from application.usage import gen_token_usage, stream_token_usage
|
||||
from application.cache import stream_cache, gen_cache
|
||||
|
||||
|
||||
class BaseLLM(ABC):
|
||||
def __init__(self):
|
||||
self.token_usage = {"prompt_tokens": 0, "generated_tokens": 0}
|
||||
|
||||
def _apply_decorator(self, method, decorator, *args, **kwargs):
|
||||
return decorator(method, *args, **kwargs)
|
||||
def _apply_decorator(self, method, decorators, *args, **kwargs):
|
||||
for decorator in decorators:
|
||||
method = decorator(method)
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def _raw_gen(self, model, messages, stream, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def gen(self, model, messages, stream=False, *args, **kwargs):
|
||||
return self._apply_decorator(self._raw_gen, gen_token_usage)(
|
||||
self, model=model, messages=messages, stream=stream, *args, **kwargs
|
||||
)
|
||||
decorators = [gen_token_usage, gen_cache]
|
||||
return self._apply_decorator(self._raw_gen, decorators=decorators, model=model, messages=messages, stream=stream, *args, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def _raw_gen_stream(self, model, messages, stream, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def gen_stream(self, model, messages, stream=True, *args, **kwargs):
|
||||
return self._apply_decorator(self._raw_gen_stream, stream_token_usage)(
|
||||
self, model=model, messages=messages, stream=stream, *args, **kwargs
|
||||
)
|
||||
decorators = [stream_cache, stream_token_usage]
|
||||
return self._apply_decorator(self._raw_gen_stream, decorators=decorators, model=model, messages=messages, stream=stream, *args, **kwargs)
|
||||
45
application/llm/groq.py
Normal file
45
application/llm/groq.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from application.llm.base import BaseLLM
|
||||
|
||||
|
||||
|
||||
class GroqLLM(BaseLLM):
|
||||
|
||||
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
|
||||
from openai import OpenAI
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
self.client = OpenAI(api_key=api_key, base_url="https://api.groq.com/openai/v1")
|
||||
self.api_key = api_key
|
||||
self.user_api_key = user_api_key
|
||||
|
||||
def _raw_gen(
|
||||
self,
|
||||
baseself,
|
||||
model,
|
||||
messages,
|
||||
stream=False,
|
||||
**kwargs
|
||||
):
|
||||
response = self.client.chat.completions.create(
|
||||
model=model, messages=messages, stream=stream, **kwargs
|
||||
)
|
||||
|
||||
return response.choices[0].message.content
|
||||
|
||||
def _raw_gen_stream(
|
||||
self,
|
||||
baseself,
|
||||
model,
|
||||
messages,
|
||||
stream=True,
|
||||
**kwargs
|
||||
):
|
||||
response = self.client.chat.completions.create(
|
||||
model=model, messages=messages, stream=stream, **kwargs
|
||||
)
|
||||
|
||||
for line in response:
|
||||
# import sys
|
||||
# print(line.choices[0].delta.content, file=sys.stderr)
|
||||
if line.choices[0].delta.content is not None:
|
||||
yield line.choices[0].delta.content
|
||||
@@ -1,3 +1,4 @@
|
||||
from application.llm.groq import GroqLLM
|
||||
from application.llm.openai import OpenAILLM, AzureOpenAILLM
|
||||
from application.llm.sagemaker import SagemakerAPILLM
|
||||
from application.llm.huggingface import HuggingFaceLLM
|
||||
@@ -17,6 +18,7 @@ class LLMCreator:
|
||||
"anthropic": AnthropicLLM,
|
||||
"docsgpt": DocsGPTAPILLM,
|
||||
"premai": PremAILLM,
|
||||
"groq": GroqLLM
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -2,25 +2,23 @@ from application.llm.base import BaseLLM
|
||||
from application.core.settings import settings
|
||||
|
||||
|
||||
|
||||
class OpenAILLM(BaseLLM):
|
||||
|
||||
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
|
||||
global openai
|
||||
from openai import OpenAI
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
self.client = OpenAI(
|
||||
api_key=api_key,
|
||||
)
|
||||
if settings.OPENAI_BASE_URL:
|
||||
self.client = OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=settings.OPENAI_BASE_URL
|
||||
)
|
||||
else:
|
||||
self.client = OpenAI(api_key=api_key)
|
||||
self.api_key = api_key
|
||||
self.user_api_key = user_api_key
|
||||
|
||||
def _get_openai(self):
|
||||
# Import openai when needed
|
||||
import openai
|
||||
|
||||
return openai
|
||||
|
||||
def _raw_gen(
|
||||
self,
|
||||
baseself,
|
||||
@@ -29,7 +27,7 @@ class OpenAILLM(BaseLLM):
|
||||
stream=False,
|
||||
engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
**kwargs
|
||||
):
|
||||
):
|
||||
response = self.client.chat.completions.create(
|
||||
model=model, messages=messages, stream=stream, **kwargs
|
||||
)
|
||||
@@ -44,7 +42,7 @@ class OpenAILLM(BaseLLM):
|
||||
stream=True,
|
||||
engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
**kwargs
|
||||
):
|
||||
):
|
||||
response = self.client.chat.completions.create(
|
||||
model=model, messages=messages, stream=stream, **kwargs
|
||||
)
|
||||
@@ -73,8 +71,3 @@ class AzureOpenAILLM(OpenAILLM):
|
||||
api_base=settings.OPENAI_API_BASE,
|
||||
deployment_name=settings.AZURE_DEPLOYMENT_NAME,
|
||||
)
|
||||
|
||||
def _get_openai(self):
|
||||
openai = super()._get_openai()
|
||||
|
||||
return openai
|
||||
|
||||
@@ -10,18 +10,23 @@ from application.parser.file.epub_parser import EpubParser
|
||||
from application.parser.file.html_parser import HTMLParser
|
||||
from application.parser.file.markdown_parser import MarkdownParser
|
||||
from application.parser.file.rst_parser import RstParser
|
||||
from application.parser.file.tabular_parser import PandasCSVParser
|
||||
from application.parser.file.tabular_parser import PandasCSVParser,ExcelParser
|
||||
from application.parser.file.json_parser import JSONParser
|
||||
from application.parser.file.pptx_parser import PPTXParser
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".pdf": PDFParser(),
|
||||
".docx": DocxParser(),
|
||||
".csv": PandasCSVParser(),
|
||||
".xlsx":ExcelParser(),
|
||||
".epub": EpubParser(),
|
||||
".md": MarkdownParser(),
|
||||
".rst": RstParser(),
|
||||
".html": HTMLParser(),
|
||||
".mdx": MarkdownParser(),
|
||||
".json":JSONParser(),
|
||||
".pptx":PPTXParser(),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
Contains parser for html files.
|
||||
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, Union
|
||||
|
||||
@@ -18,66 +17,8 @@ class HTMLParser(BaseParser):
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, list[str]]:
|
||||
"""Parse file.
|
||||
from langchain_community.document_loaders import BSHTMLLoader
|
||||
|
||||
Returns:
|
||||
Union[str, List[str]]: a string or a List of strings.
|
||||
"""
|
||||
try:
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
except ImportError:
|
||||
raise ValueError("unstructured package is required to parse HTML files.")
|
||||
|
||||
# Using the unstructured library to convert the html to isd format
|
||||
# isd sample : isd = [
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
with open(file, "r", encoding="utf-8") as fp:
|
||||
elements = partition_html(file=fp)
|
||||
isd = convert_to_isd(elements)
|
||||
|
||||
# Removing non ascii charactwers from isd_el['text']
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = isd_el['text'].encode("ascii", "ignore").decode()
|
||||
|
||||
# Removing all the \n characters from isd_el['text'] using regex and replace with single space
|
||||
# Removing all the extra spaces from isd_el['text'] using regex and replace with single space
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
|
||||
# more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
|
||||
for isd_el in isd:
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)
|
||||
|
||||
# Creating a list of all the indexes of isd_el['type'] = 'Title'
|
||||
title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
|
||||
# Creating 'Chunks' - List of lists of strings
|
||||
# each list starting with isd_el['type'] = 'Title' and all the data till the next 'Title'
|
||||
# Each Chunk can be thought of as an individual set of data, which can be sent to the model
|
||||
# Where Each Title is grouped together with the data under it
|
||||
|
||||
Chunks = [[]]
|
||||
final_chunks = list(list())
|
||||
|
||||
for i, isd_el in enumerate(isd):
|
||||
if i in title_indexes:
|
||||
Chunks.append([])
|
||||
Chunks[-1].append(isd_el['text'])
|
||||
|
||||
# Removing all the chunks with sum of length of all the strings in the chunk < 25
|
||||
# TODO: This value can be an user defined variable
|
||||
for chunk in Chunks:
|
||||
# sum of length of all the strings in the chunk
|
||||
sum = 0
|
||||
sum += len(str(chunk))
|
||||
if sum < 25:
|
||||
Chunks.remove(chunk)
|
||||
else:
|
||||
# appending all the approved chunks to final_chunks as a single string
|
||||
final_chunks.append(" ".join([str(item) for item in chunk]))
|
||||
return final_chunks
|
||||
loader = BSHTMLLoader(file)
|
||||
data = loader.load()
|
||||
return data
|
||||
|
||||
57
application/parser/file/json_parser.py
Normal file
57
application/parser/file/json_parser.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import json
|
||||
from typing import Any, Dict, List, Union
|
||||
from pathlib import Path
|
||||
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
class JSONParser(BaseParser):
|
||||
r"""JSON (.json) parser.
|
||||
|
||||
Parses JSON files into a list of strings or a concatenated document.
|
||||
It handles both JSON objects (dictionaries) and arrays (lists).
|
||||
|
||||
Args:
|
||||
concat_rows (bool): Whether to concatenate all rows into one document.
|
||||
If set to False, a Document will be created for each item in the JSON.
|
||||
True by default.
|
||||
|
||||
row_joiner (str): Separator to use for joining each row.
|
||||
Only used when `concat_rows=True`.
|
||||
Set to "\n" by default.
|
||||
|
||||
json_config (dict): Options for parsing JSON. Can be used to specify options like
|
||||
custom decoding or formatting. Set to empty dict by default.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
row_joiner: str = "\n",
|
||||
json_config: dict = {},
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
self._concat_rows = concat_rows
|
||||
self._row_joiner = row_joiner
|
||||
self._json_config = json_config
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
|
||||
"""Parse JSON file."""
|
||||
|
||||
with open(file, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f, **self._json_config)
|
||||
|
||||
if isinstance(data, dict):
|
||||
data = [data]
|
||||
|
||||
if self._concat_rows:
|
||||
return self._row_joiner.join([str(item) for item in data])
|
||||
else:
|
||||
return data
|
||||
75
application/parser/file/pptx_parser.py
Normal file
75
application/parser/file/pptx_parser.py
Normal file
@@ -0,0 +1,75 @@
|
||||
"""PPT parser.
|
||||
Contains parsers for presentation (.pptx) files to extract slide text.
|
||||
"""
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
class PPTXParser(BaseParser):
|
||||
r"""PPTX (.pptx) parser for extracting text from PowerPoint slides.
|
||||
Args:
|
||||
concat_slides (bool): Specifies whether to concatenate all slide text into one document.
|
||||
- If True, slide texts will be joined together as a single string.
|
||||
- If False, each slide's text will be stored as a separate entry in a list.
|
||||
Set to True by default.
|
||||
slide_separator (str): Separator used to join slides' text content.
|
||||
Only used when `concat_slides=True`. Default is "\n".
|
||||
Refer to https://python-pptx.readthedocs.io/en/latest/ for more information.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
concat_slides: bool = True,
|
||||
slide_separator: str = "\n",
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
self._concat_slides = concat_slides
|
||||
self._slide_separator = slide_separator
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
|
||||
r"""
|
||||
Parse a .pptx file and extract text from each slide.
|
||||
Args:
|
||||
file (Path): Path to the .pptx file.
|
||||
errors (str): Error handling policy ('ignore' by default).
|
||||
Returns:
|
||||
Union[str, List[str]]: Concatenated text if concat_slides is True,
|
||||
otherwise a list of slide texts.
|
||||
"""
|
||||
|
||||
try:
|
||||
from pptx import Presentation
|
||||
except ImportError:
|
||||
raise ImportError("pptx module is required to read .PPTX files.")
|
||||
|
||||
try:
|
||||
presentation = Presentation(file)
|
||||
slide_texts=[]
|
||||
|
||||
# Iterate over each slide in the presentation
|
||||
for slide in presentation.slides:
|
||||
slide_text=""
|
||||
|
||||
# Iterate over each shape in the slide
|
||||
for shape in slide.shapes:
|
||||
# Check if the shape has a 'text' attribute and append that to the slide_text
|
||||
if hasattr(shape,"text"):
|
||||
slide_text+=shape.text
|
||||
|
||||
slide_texts.append(slide_text.strip())
|
||||
|
||||
if self._concat_slides:
|
||||
return self._slide_separator.join(slide_texts)
|
||||
else:
|
||||
return slide_texts
|
||||
|
||||
except Exception as e:
|
||||
raise e
|
||||
@@ -113,3 +113,68 @@ class PandasCSVParser(BaseParser):
|
||||
return (self._row_joiner).join(text_list)
|
||||
else:
|
||||
return text_list
|
||||
|
||||
|
||||
class ExcelParser(BaseParser):
|
||||
r"""Excel (.xlsx) parser.
|
||||
|
||||
Parses Excel files using Pandas `read_excel` function.
|
||||
If special parameters are required, use the `pandas_config` dict.
|
||||
|
||||
Args:
|
||||
concat_rows (bool): whether to concatenate all rows into one document.
|
||||
If set to False, a Document will be created for each row.
|
||||
True by default.
|
||||
|
||||
col_joiner (str): Separator to use for joining cols per row.
|
||||
Set to ", " by default.
|
||||
|
||||
row_joiner (str): Separator to use for joining each row.
|
||||
Only used when `concat_rows=True`.
|
||||
Set to "\n" by default.
|
||||
|
||||
pandas_config (dict): Options for the `pandas.read_excel` function call.
|
||||
Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html
|
||||
for more information.
|
||||
Set to empty dict by default, this means pandas will try to figure
|
||||
out the table structure on its own.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
self._concat_rows = concat_rows
|
||||
self._col_joiner = col_joiner
|
||||
self._row_joiner = row_joiner
|
||||
self._pandas_config = pandas_config
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
|
||||
"""Parse file."""
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
raise ValueError("pandas module is required to read Excel files.")
|
||||
|
||||
df = pd.read_excel(file, **self._pandas_config)
|
||||
|
||||
text_list = df.apply(
|
||||
lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1
|
||||
).tolist()
|
||||
|
||||
if self._concat_rows:
|
||||
return (self._row_joiner).join(text_list)
|
||||
else:
|
||||
return text_list
|
||||
@@ -1,9 +1,11 @@
|
||||
import os
|
||||
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
from application.core.settings import settings
|
||||
from retry import retry
|
||||
|
||||
from application.core.settings import settings
|
||||
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
|
||||
|
||||
# from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
# from langchain_community.embeddings import HuggingFaceInstructEmbeddings
|
||||
@@ -11,12 +13,14 @@ from retry import retry
|
||||
|
||||
|
||||
@retry(tries=10, delay=60)
|
||||
def store_add_texts_with_retry(store, i):
|
||||
def store_add_texts_with_retry(store, i, id):
|
||||
# add source_id to the metadata
|
||||
i.metadata["source_id"] = str(id)
|
||||
store.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
|
||||
|
||||
def call_openai_api(docs, folder_name, task_status):
|
||||
def call_openai_api(docs, folder_name, id, task_status):
|
||||
# Function to create a vector store from the documents and save it to disk
|
||||
|
||||
if not os.path.exists(f"{folder_name}"):
|
||||
@@ -32,15 +36,16 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
store = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE,
|
||||
docs_init=docs_init,
|
||||
path=f"{folder_name}",
|
||||
source_id=f"{folder_name}",
|
||||
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
|
||||
)
|
||||
else:
|
||||
store = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE,
|
||||
path=f"{folder_name}",
|
||||
source_id=str(id),
|
||||
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
|
||||
)
|
||||
store.delete_index()
|
||||
# Uncomment for MPNet embeddings
|
||||
# model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
# hf = HuggingFaceEmbeddings(model_name=model_name)
|
||||
@@ -57,7 +62,7 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
task_status.update_state(
|
||||
state="PROGRESS", meta={"current": int((c1 / s1) * 100)}
|
||||
)
|
||||
store_add_texts_with_retry(store, i)
|
||||
store_add_texts_with_retry(store, i, id)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Error on ", i)
|
||||
@@ -68,5 +73,3 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
c1 += 1
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
store.save_local(f"{folder_name}")
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ from application.parser.remote.base import BaseRemote
|
||||
|
||||
class CrawlerLoader(BaseRemote):
|
||||
def __init__(self, limit=10):
|
||||
from langchain.document_loaders import WebBaseLoader
|
||||
from langchain_community.document_loaders import WebBaseLoader
|
||||
self.loader = WebBaseLoader # Initialize the document loader
|
||||
self.limit = limit # Set the limit for the number of pages to scrape
|
||||
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
import base64
|
||||
import requests
|
||||
from typing import List
|
||||
from application.parser.remote.base import BaseRemote
|
||||
from langchain_core.documents import Document
|
||||
import mimetypes
|
||||
|
||||
class GitHubLoader(BaseRemote):
|
||||
def __init__(self):
|
||||
self.access_token = None
|
||||
self.headers = {
|
||||
"Authorization": f"token {self.access_token}"
|
||||
} if self.access_token else {}
|
||||
return
|
||||
|
||||
def fetch_file_content(self, repo_url: str, file_path: str) -> str:
|
||||
url = f"https://api.github.com/repos/{repo_url}/contents/{file_path}"
|
||||
response = requests.get(url, headers=self.headers)
|
||||
|
||||
if response.status_code == 200:
|
||||
content = response.json()
|
||||
mime_type, _ = mimetypes.guess_type(file_path) # Guess the MIME type based on the file extension
|
||||
|
||||
if content.get("encoding") == "base64":
|
||||
if mime_type and mime_type.startswith("text"): # Handle only text files
|
||||
try:
|
||||
decoded_content = base64.b64decode(content["content"]).decode("utf-8")
|
||||
return f"Filename: {file_path}\n\n{decoded_content}"
|
||||
except Exception as e:
|
||||
raise e
|
||||
else:
|
||||
return f"Filename: {file_path} is a binary file and was skipped."
|
||||
else:
|
||||
return f"Filename: {file_path}\n\n{content['content']}"
|
||||
else:
|
||||
response.raise_for_status()
|
||||
|
||||
def fetch_repo_files(self, repo_url: str, path: str = "") -> List[str]:
|
||||
url = f"https://api.github.com/repos/{repo_url}/contents/{path}"
|
||||
response = requests.get(url, headers={**self.headers, "Accept": "application/vnd.github.v3.raw"})
|
||||
contents = response.json()
|
||||
files = []
|
||||
for item in contents:
|
||||
if item["type"] == "file":
|
||||
files.append(item["path"])
|
||||
elif item["type"] == "dir":
|
||||
files.extend(self.fetch_repo_files(repo_url, item["path"]))
|
||||
return files
|
||||
|
||||
def load_data(self, repo_url: str) -> List[Document]:
|
||||
repo_name = repo_url.split("github.com/")[-1]
|
||||
files = self.fetch_repo_files(repo_name)
|
||||
documents = []
|
||||
for file_path in files:
|
||||
content = self.fetch_file_content(repo_name, file_path)
|
||||
documents.append(Document(page_content=content, metadata={"title": file_path,
|
||||
"source": f"https://github.com/{repo_name}/blob/main/{file_path}"}))
|
||||
return documents
|
||||
|
||||
@@ -2,6 +2,7 @@ from application.parser.remote.sitemap_loader import SitemapLoader
|
||||
from application.parser.remote.crawler_loader import CrawlerLoader
|
||||
from application.parser.remote.web_loader import WebLoader
|
||||
from application.parser.remote.reddit_loader import RedditPostsLoaderRemote
|
||||
from application.parser.remote.github_loader import GitHubLoader
|
||||
|
||||
|
||||
class RemoteCreator:
|
||||
@@ -10,6 +11,7 @@ class RemoteCreator:
|
||||
"sitemap": SitemapLoader,
|
||||
"crawler": CrawlerLoader,
|
||||
"reddit": RedditPostsLoaderRemote,
|
||||
"github": GitHubLoader,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -5,7 +5,7 @@ from application.parser.remote.base import BaseRemote
|
||||
|
||||
class SitemapLoader(BaseRemote):
|
||||
def __init__(self, limit=20):
|
||||
from langchain.document_loaders import WebBaseLoader
|
||||
from langchain_community.document_loaders import WebBaseLoader
|
||||
self.loader = WebBaseLoader
|
||||
self.limit = limit # Adding limit to control the number of URLs to process
|
||||
|
||||
|
||||
@@ -1,34 +1,89 @@
|
||||
anthropic==0.12.0
|
||||
boto3==1.34.6
|
||||
anthropic==0.34.2
|
||||
boto3==1.34.153
|
||||
beautifulsoup4==4.12.3
|
||||
celery==5.3.6
|
||||
dataclasses_json==0.6.3
|
||||
dataclasses-json==0.6.7
|
||||
docx2txt==0.8
|
||||
duckduckgo-search==5.3.0
|
||||
EbookLib==0.18
|
||||
elasticsearch==8.12.0
|
||||
duckduckgo-search==6.3.0
|
||||
ebooklib==0.18
|
||||
elastic-transport==8.15.0
|
||||
elasticsearch==8.15.1
|
||||
escodegen==1.0.11
|
||||
esprima==4.0.1
|
||||
faiss-cpu==1.7.4
|
||||
Flask==3.0.1
|
||||
gunicorn==22.0.0
|
||||
html2text==2020.1.16
|
||||
esutils==1.0.1
|
||||
Flask==3.0.3
|
||||
faiss-cpu==1.8.0.post1
|
||||
flask-restx==1.3.0
|
||||
gTTS==2.3.2
|
||||
gunicorn==23.0.0
|
||||
html2text==2024.2.26
|
||||
javalang==0.13.0
|
||||
langchain==0.1.4
|
||||
langchain-openai==0.0.5
|
||||
openapi3_parser==1.1.16
|
||||
pandas==2.2.0
|
||||
pydantic_settings==2.1.0
|
||||
pymongo==4.6.3
|
||||
PyPDF2==3.0.1
|
||||
jinja2==3.1.4
|
||||
jiter==0.5.0
|
||||
jmespath==1.0.1
|
||||
joblib==1.4.2
|
||||
jsonpatch==1.33
|
||||
jsonpointer==3.0.0
|
||||
jsonschema==4.23.0
|
||||
jsonschema-spec==0.2.4
|
||||
jsonschema-specifications==2023.7.1
|
||||
kombu==5.4.2
|
||||
langchain==0.3.0
|
||||
langchain-community==0.3.0
|
||||
langchain-core==0.3.2
|
||||
langchain-openai==0.2.0
|
||||
langchain-text-splitters==0.3.0
|
||||
langsmith==0.1.125
|
||||
lazy-object-proxy==1.10.0
|
||||
lxml==5.3.0
|
||||
markupsafe==2.1.5
|
||||
marshmallow==3.22.0
|
||||
mpmath==1.3.0
|
||||
multidict==6.1.0
|
||||
mypy-extensions==1.0.0
|
||||
networkx==3.3
|
||||
numpy==1.26.4
|
||||
openai==1.46.1
|
||||
openapi-schema-validator==0.6.2
|
||||
openapi-spec-validator==0.6.0
|
||||
openapi3-parser==1.1.18
|
||||
orjson==3.10.7
|
||||
packaging==24.1
|
||||
pandas==2.2.3
|
||||
openpyxl==3.1.5
|
||||
pathable==0.4.3
|
||||
pillow==10.4.0
|
||||
portalocker==2.10.1
|
||||
prance==23.6.21.0
|
||||
primp==0.6.3
|
||||
prompt-toolkit==3.0.47
|
||||
protobuf==5.28.2
|
||||
py==1.11.0
|
||||
pydantic==2.9.2
|
||||
pydantic-core==2.23.4
|
||||
pydantic-settings==2.4.0
|
||||
pymongo==4.8.0
|
||||
pypdf2==3.0.1
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.0.1
|
||||
qdrant-client==1.9.0
|
||||
python-pptx==1.0.2
|
||||
qdrant-client==1.11.0
|
||||
redis==5.0.1
|
||||
Requests==2.32.0
|
||||
referencing==0.30.2
|
||||
regex==2024.9.11
|
||||
requests==2.32.3
|
||||
retry==0.9.2
|
||||
sentence-transformers
|
||||
tiktoken
|
||||
torch
|
||||
tqdm==4.66.3
|
||||
transformers==4.36.2
|
||||
unstructured==0.12.2
|
||||
Werkzeug==3.0.3
|
||||
sentence-transformers==3.0.1
|
||||
tiktoken==0.7.0
|
||||
tokenizers==0.19.1
|
||||
torch==2.4.1
|
||||
tqdm==4.66.5
|
||||
transformers==4.44.2
|
||||
typing-extensions==4.12.2
|
||||
typing-inspect==0.9.0
|
||||
tzdata==2024.2
|
||||
urllib3==2.2.3
|
||||
vine==5.1.0
|
||||
wcwidth==0.2.13
|
||||
werkzeug==3.0.4
|
||||
yarl==1.11.1
|
||||
@@ -12,3 +12,7 @@ class BaseRetriever(ABC):
|
||||
@abstractmethod
|
||||
def search(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_params(self):
|
||||
pass
|
||||
|
||||
@@ -2,7 +2,7 @@ import json
|
||||
from application.retriever.base import BaseRetriever
|
||||
from application.core.settings import settings
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.utils import count_tokens
|
||||
from application.utils import num_tokens_from_string
|
||||
from langchain_community.tools import BraveSearch
|
||||
|
||||
|
||||
@@ -75,10 +75,9 @@ class BraveRetSearch(BaseRetriever):
|
||||
if len(self.chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
self.chat_history.reverse()
|
||||
for i in self.chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = count_tokens(i["prompt"]) + count_tokens(
|
||||
tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(
|
||||
i["response"]
|
||||
)
|
||||
if tokens_current_history + tokens_batch < self.token_limit:
|
||||
@@ -101,3 +100,15 @@ class BraveRetSearch(BaseRetriever):
|
||||
|
||||
def search(self):
|
||||
return self._get_data()
|
||||
|
||||
def get_params(self):
|
||||
return {
|
||||
"question": self.question,
|
||||
"source": self.source,
|
||||
"chat_history": self.chat_history,
|
||||
"prompt": self.prompt,
|
||||
"chunks": self.chunks,
|
||||
"token_limit": self.token_limit,
|
||||
"gpt_model": self.gpt_model,
|
||||
"user_api_key": self.user_api_key
|
||||
}
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import os
|
||||
from application.retriever.base import BaseRetriever
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
|
||||
from application.utils import count_tokens
|
||||
from application.utils import num_tokens_from_string
|
||||
|
||||
|
||||
class ClassicRAG(BaseRetriever):
|
||||
@@ -21,7 +20,7 @@ class ClassicRAG(BaseRetriever):
|
||||
user_api_key=None,
|
||||
):
|
||||
self.question = question
|
||||
self.vectorstore = self._get_vectorstore(source=source)
|
||||
self.vectorstore = source['active_docs'] if 'active_docs' in source else None
|
||||
self.chat_history = chat_history
|
||||
self.prompt = prompt
|
||||
self.chunks = chunks
|
||||
@@ -38,21 +37,6 @@ class ClassicRAG(BaseRetriever):
|
||||
)
|
||||
self.user_api_key = user_api_key
|
||||
|
||||
def _get_vectorstore(self, source):
|
||||
if "active_docs" in source:
|
||||
if source["active_docs"].split("/")[0] == "default":
|
||||
vectorstore = ""
|
||||
elif source["active_docs"].split("/")[0] == "local":
|
||||
vectorstore = "indexes/" + source["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + source["active_docs"]
|
||||
if source["active_docs"] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
vectorstore = os.path.join("application", vectorstore)
|
||||
return vectorstore
|
||||
|
||||
def _get_data(self):
|
||||
if self.chunks == 0:
|
||||
docs = []
|
||||
@@ -61,13 +45,12 @@ class ClassicRAG(BaseRetriever):
|
||||
settings.VECTOR_STORE, self.vectorstore, settings.EMBEDDINGS_KEY
|
||||
)
|
||||
docs_temp = docsearch.search(self.question, k=self.chunks)
|
||||
print(docs_temp)
|
||||
docs = [
|
||||
{
|
||||
"title": (
|
||||
i.metadata["title"].split("/")[-1]
|
||||
if i.metadata
|
||||
else i.page_content
|
||||
),
|
||||
"title": i.metadata.get(
|
||||
"title", i.metadata.get("post_title", i.page_content)
|
||||
).split("/")[-1],
|
||||
"text": i.page_content,
|
||||
"source": (
|
||||
i.metadata.get("source")
|
||||
@@ -95,10 +78,9 @@ class ClassicRAG(BaseRetriever):
|
||||
if len(self.chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
self.chat_history.reverse()
|
||||
for i in self.chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = count_tokens(i["prompt"]) + count_tokens(
|
||||
tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(
|
||||
i["response"]
|
||||
)
|
||||
if tokens_current_history + tokens_batch < self.token_limit:
|
||||
@@ -114,10 +96,21 @@ class ClassicRAG(BaseRetriever):
|
||||
llm = LLMCreator.create_llm(
|
||||
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=self.user_api_key
|
||||
)
|
||||
|
||||
completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
|
||||
for line in completion:
|
||||
yield {"answer": str(line)}
|
||||
|
||||
def search(self):
|
||||
return self._get_data()
|
||||
|
||||
def get_params(self):
|
||||
return {
|
||||
"question": self.question,
|
||||
"source": self.vectorstore,
|
||||
"chat_history": self.chat_history,
|
||||
"prompt": self.prompt,
|
||||
"chunks": self.chunks,
|
||||
"token_limit": self.token_limit,
|
||||
"gpt_model": self.gpt_model,
|
||||
"user_api_key": self.user_api_key
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from application.retriever.base import BaseRetriever
|
||||
from application.core.settings import settings
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.utils import count_tokens
|
||||
from application.utils import num_tokens_from_string
|
||||
from langchain_community.tools import DuckDuckGoSearchResults
|
||||
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
|
||||
|
||||
@@ -92,10 +92,9 @@ class DuckDuckSearch(BaseRetriever):
|
||||
if len(self.chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
self.chat_history.reverse()
|
||||
for i in self.chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = count_tokens(i["prompt"]) + count_tokens(
|
||||
tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(
|
||||
i["response"]
|
||||
)
|
||||
if tokens_current_history + tokens_batch < self.token_limit:
|
||||
@@ -118,3 +117,15 @@ class DuckDuckSearch(BaseRetriever):
|
||||
|
||||
def search(self):
|
||||
return self._get_data()
|
||||
|
||||
def get_params(self):
|
||||
return {
|
||||
"question": self.question,
|
||||
"source": self.source,
|
||||
"chat_history": self.chat_history,
|
||||
"prompt": self.prompt,
|
||||
"chunks": self.chunks,
|
||||
"token_limit": self.token_limit,
|
||||
"gpt_model": self.gpt_model,
|
||||
"user_api_key": self.user_api_key
|
||||
}
|
||||
|
||||
@@ -5,15 +5,16 @@ from application.retriever.brave_search import BraveRetSearch
|
||||
|
||||
|
||||
class RetrieverCreator:
|
||||
retievers = {
|
||||
retrievers = {
|
||||
'classic': ClassicRAG,
|
||||
'duckduck_search': DuckDuckSearch,
|
||||
'brave_search': BraveRetSearch
|
||||
'brave_search': BraveRetSearch,
|
||||
'default': ClassicRAG
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create_retriever(cls, type, *args, **kwargs):
|
||||
retiever_class = cls.retievers.get(type.lower())
|
||||
retiever_class = cls.retrievers.get(type.lower())
|
||||
if not retiever_class:
|
||||
raise ValueError(f"No retievers class found for type {type}")
|
||||
return retiever_class(*args, **kwargs)
|
||||
10
application/tts/base.py
Normal file
10
application/tts/base.py
Normal file
@@ -0,0 +1,10 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BaseTTS(ABC):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def text_to_speech(self, *args, **kwargs):
|
||||
pass
|
||||
29
application/tts/elevenlabs.py
Normal file
29
application/tts/elevenlabs.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from io import BytesIO
|
||||
import base64
|
||||
from application.tts.base import BaseTTS
|
||||
|
||||
|
||||
class ElevenlabsTTS(BaseTTS):
|
||||
def __init__(self):
|
||||
from elevenlabs.client import ElevenLabs
|
||||
|
||||
self.client = ElevenLabs(
|
||||
api_key="ELEVENLABS_API_KEY",
|
||||
)
|
||||
|
||||
|
||||
def text_to_speech(self, text):
|
||||
lang = "en"
|
||||
audio = self.client.generate(
|
||||
text=text,
|
||||
model="eleven_multilingual_v2",
|
||||
voice="Brian",
|
||||
)
|
||||
audio_data = BytesIO()
|
||||
for chunk in audio:
|
||||
audio_data.write(chunk)
|
||||
audio_bytes = audio_data.getvalue()
|
||||
|
||||
# Encode to base64
|
||||
audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
|
||||
return audio_base64, lang
|
||||
19
application/tts/google_tts.py
Normal file
19
application/tts/google_tts.py
Normal file
@@ -0,0 +1,19 @@
|
||||
import io
|
||||
import base64
|
||||
from gtts import gTTS
|
||||
from application.tts.base import BaseTTS
|
||||
|
||||
|
||||
class GoogleTTS(BaseTTS):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
||||
def text_to_speech(self, text):
|
||||
lang = "en"
|
||||
audio_fp = io.BytesIO()
|
||||
tts = gTTS(text=text, lang=lang, slow=False)
|
||||
tts.write_to_fp(audio_fp)
|
||||
audio_fp.seek(0)
|
||||
audio_base64 = base64.b64encode(audio_fp.read()).decode("utf-8")
|
||||
return audio_base64, lang
|
||||
@@ -1,10 +1,9 @@
|
||||
import sys
|
||||
from pymongo import MongoClient
|
||||
from datetime import datetime
|
||||
from application.core.settings import settings
|
||||
from application.utils import count_tokens
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.utils import num_tokens_from_string
|
||||
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
mongo = MongoDB.get_client()
|
||||
db = mongo["docsgpt"]
|
||||
usage_collection = db["token_usage"]
|
||||
|
||||
@@ -24,9 +23,9 @@ def update_token_usage(user_api_key, token_usage):
|
||||
def gen_token_usage(func):
|
||||
def wrapper(self, model, messages, stream, **kwargs):
|
||||
for message in messages:
|
||||
self.token_usage["prompt_tokens"] += count_tokens(message["content"])
|
||||
self.token_usage["prompt_tokens"] += num_tokens_from_string(message["content"])
|
||||
result = func(self, model, messages, stream, **kwargs)
|
||||
self.token_usage["generated_tokens"] += count_tokens(result)
|
||||
self.token_usage["generated_tokens"] += num_tokens_from_string(result)
|
||||
update_token_usage(self.user_api_key, self.token_usage)
|
||||
return result
|
||||
|
||||
@@ -36,14 +35,14 @@ def gen_token_usage(func):
|
||||
def stream_token_usage(func):
|
||||
def wrapper(self, model, messages, stream, **kwargs):
|
||||
for message in messages:
|
||||
self.token_usage["prompt_tokens"] += count_tokens(message["content"])
|
||||
self.token_usage["prompt_tokens"] += num_tokens_from_string(message["content"])
|
||||
batch = []
|
||||
result = func(self, model, messages, stream, **kwargs)
|
||||
for r in result:
|
||||
batch.append(r)
|
||||
yield r
|
||||
for line in batch:
|
||||
self.token_usage["generated_tokens"] += count_tokens(line)
|
||||
self.token_usage["generated_tokens"] += num_tokens_from_string(line)
|
||||
update_token_usage(self.user_api_key, self.token_usage)
|
||||
|
||||
return wrapper
|
||||
|
||||
@@ -1,6 +1,48 @@
|
||||
from transformers import GPT2TokenizerFast
|
||||
import tiktoken
|
||||
import hashlib
|
||||
from flask import jsonify, make_response
|
||||
|
||||
|
||||
_encoding = None
|
||||
|
||||
|
||||
def get_encoding():
|
||||
global _encoding
|
||||
if _encoding is None:
|
||||
_encoding = tiktoken.get_encoding("cl100k_base")
|
||||
return _encoding
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str) -> int:
|
||||
encoding = get_encoding()
|
||||
num_tokens = len(encoding.encode(string))
|
||||
return num_tokens
|
||||
|
||||
|
||||
def count_tokens_docs(docs):
|
||||
docs_content = ""
|
||||
for doc in docs:
|
||||
docs_content += doc.page_content
|
||||
|
||||
tokens = num_tokens_from_string(docs_content)
|
||||
return tokens
|
||||
|
||||
|
||||
def check_required_fields(data, required_fields):
|
||||
missing_fields = [field for field in required_fields if field not in data]
|
||||
if missing_fields:
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": f"Missing fields: {', '.join(missing_fields)}",
|
||||
}
|
||||
),
|
||||
400,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def get_hash(data):
|
||||
return hashlib.md5(data.encode()).hexdigest()
|
||||
|
||||
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
|
||||
tokenizer.model_max_length = 100000
|
||||
def count_tokens(string):
|
||||
return len(tokenizer(string)['input_ids'])
|
||||
@@ -1,13 +1,30 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import os
|
||||
from langchain_community.embeddings import (
|
||||
HuggingFaceEmbeddings,
|
||||
CohereEmbeddings,
|
||||
HuggingFaceInstructEmbeddings,
|
||||
)
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from application.core.settings import settings
|
||||
|
||||
class EmbeddingsWrapper:
|
||||
def __init__(self, model_name, *args, **kwargs):
|
||||
self.model = SentenceTransformer(model_name, config_kwargs={'allow_dangerous_deserialization': True}, *args, **kwargs)
|
||||
self.dimension = self.model.get_sentence_embedding_dimension()
|
||||
|
||||
def embed_query(self, query: str):
|
||||
return self.model.encode(query).tolist()
|
||||
|
||||
def embed_documents(self, documents: list):
|
||||
return self.model.encode(documents).tolist()
|
||||
|
||||
def __call__(self, text):
|
||||
if isinstance(text, str):
|
||||
return self.embed_query(text)
|
||||
elif isinstance(text, list):
|
||||
return self.embed_documents(text)
|
||||
else:
|
||||
raise ValueError("Input must be a string or a list of strings")
|
||||
|
||||
|
||||
|
||||
class EmbeddingsSingleton:
|
||||
_instances = {}
|
||||
|
||||
@@ -23,16 +40,15 @@ class EmbeddingsSingleton:
|
||||
def _create_instance(embeddings_name, *args, **kwargs):
|
||||
embeddings_factory = {
|
||||
"openai_text-embedding-ada-002": OpenAIEmbeddings,
|
||||
"huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings,
|
||||
"huggingface_sentence-transformers-all-mpnet-base-v2": HuggingFaceEmbeddings,
|
||||
"huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings,
|
||||
"cohere_medium": CohereEmbeddings
|
||||
"huggingface_sentence-transformers/all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
|
||||
"huggingface_sentence-transformers-all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
|
||||
"huggingface_hkunlp/instructor-large": lambda: EmbeddingsWrapper("hkunlp/instructor-large"),
|
||||
}
|
||||
|
||||
if embeddings_name not in embeddings_factory:
|
||||
raise ValueError(f"Invalid embeddings_name: {embeddings_name}")
|
||||
|
||||
return embeddings_factory[embeddings_name](*args, **kwargs)
|
||||
if embeddings_name in embeddings_factory:
|
||||
return embeddings_factory[embeddings_name](*args, **kwargs)
|
||||
else:
|
||||
return EmbeddingsWrapper(embeddings_name, *args, **kwargs)
|
||||
|
||||
class BaseVectorStore(ABC):
|
||||
def __init__(self):
|
||||
@@ -58,22 +74,14 @@ class BaseVectorStore(ABC):
|
||||
embeddings_name,
|
||||
openai_api_key=embeddings_key
|
||||
)
|
||||
elif embeddings_name == "cohere_medium":
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name,
|
||||
cohere_api_key=embeddings_key
|
||||
)
|
||||
elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
if os.path.exists("./model/all-mpnet-base-v2"):
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name,
|
||||
model_name="./model/all-mpnet-base-v2",
|
||||
model_kwargs={"device": "cpu"}
|
||||
embeddings_name="./model/all-mpnet-base-v2",
|
||||
)
|
||||
else:
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name,
|
||||
model_kwargs={"device": "cpu"}
|
||||
)
|
||||
else:
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(embeddings_name)
|
||||
|
||||
@@ -9,9 +9,9 @@ import elasticsearch
|
||||
class ElasticsearchStore(BaseVectorStore):
|
||||
_es_connection = None # Class attribute to hold the Elasticsearch connection
|
||||
|
||||
def __init__(self, path, embeddings_key, index_name=settings.ELASTIC_INDEX):
|
||||
def __init__(self, source_id, embeddings_key, index_name=settings.ELASTIC_INDEX):
|
||||
super().__init__()
|
||||
self.path = path.replace("application/indexes/", "").rstrip("/")
|
||||
self.source_id = source_id.replace("application/indexes/", "").rstrip("/")
|
||||
self.embeddings_key = embeddings_key
|
||||
self.index_name = index_name
|
||||
|
||||
@@ -81,7 +81,7 @@ class ElasticsearchStore(BaseVectorStore):
|
||||
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key)
|
||||
vector = embeddings.embed_query(question)
|
||||
knn = {
|
||||
"filter": [{"match": {"metadata.store.keyword": self.path}}],
|
||||
"filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
|
||||
"field": "vector",
|
||||
"k": k,
|
||||
"num_candidates": 100,
|
||||
@@ -100,7 +100,7 @@ class ElasticsearchStore(BaseVectorStore):
|
||||
}
|
||||
}
|
||||
],
|
||||
"filter": [{"match": {"metadata.store.keyword": self.path}}],
|
||||
"filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
|
||||
}
|
||||
},
|
||||
"rank": {"rrf": {}},
|
||||
@@ -209,5 +209,4 @@ class ElasticsearchStore(BaseVectorStore):
|
||||
|
||||
def delete_index(self):
|
||||
self._es_connection.delete_by_query(index=self.index_name, query={"match": {
|
||||
"metadata.store.keyword": self.path}},)
|
||||
|
||||
"metadata.source_id.keyword": self.source_id}},)
|
||||
|
||||
@@ -1,21 +1,29 @@
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from application.core.settings import settings
|
||||
import os
|
||||
|
||||
def get_vectorstore(path: str) -> str:
|
||||
if path:
|
||||
vectorstore = os.path.join("application", "indexes", path)
|
||||
else:
|
||||
vectorstore = os.path.join("application")
|
||||
return vectorstore
|
||||
|
||||
class FaissStore(BaseVectorStore):
|
||||
|
||||
def __init__(self, path, embeddings_key, docs_init=None):
|
||||
def __init__(self, source_id: str, embeddings_key: str, docs_init=None):
|
||||
super().__init__()
|
||||
self.path = path
|
||||
self.path = get_vectorstore(source_id)
|
||||
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
|
||||
if docs_init:
|
||||
self.docsearch = FAISS.from_documents(
|
||||
docs_init, embeddings
|
||||
)
|
||||
else:
|
||||
self.docsearch = FAISS.load_local(
|
||||
self.path, embeddings
|
||||
)
|
||||
|
||||
try:
|
||||
if docs_init:
|
||||
self.docsearch = FAISS.from_documents(docs_init, embeddings)
|
||||
else:
|
||||
self.docsearch = FAISS.load_local(self.path, embeddings, allow_dangerous_deserialization=True)
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
self.assert_embedding_dimensions(embeddings)
|
||||
|
||||
def search(self, *args, **kwargs):
|
||||
@@ -31,16 +39,12 @@ class FaissStore(BaseVectorStore):
|
||||
return self.docsearch.delete(*args, **kwargs)
|
||||
|
||||
def assert_embedding_dimensions(self, embeddings):
|
||||
"""
|
||||
Check that the word embedding dimension of the docsearch index matches
|
||||
the dimension of the word embeddings used
|
||||
"""
|
||||
"""Check that the word embedding dimension of the docsearch index matches the dimension of the word embeddings used."""
|
||||
if settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
try:
|
||||
word_embedding_dimension = embeddings.client[1].word_embedding_dimension
|
||||
except AttributeError as e:
|
||||
raise AttributeError("word_embedding_dimension not found in embeddings.client[1]") from e
|
||||
word_embedding_dimension = getattr(embeddings, 'dimension', None)
|
||||
if word_embedding_dimension is None:
|
||||
raise AttributeError("'dimension' attribute not found in embeddings instance.")
|
||||
|
||||
docsearch_index_dimension = self.docsearch.index.d
|
||||
if word_embedding_dimension != docsearch_index_dimension:
|
||||
raise ValueError(f"word_embedding_dimension ({word_embedding_dimension}) " +
|
||||
f"!= docsearch_index_word_embedding_dimension ({docsearch_index_dimension})")
|
||||
raise ValueError(f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) != docsearch index dimension ({docsearch_index_dimension})")
|
||||
|
||||
119
application/vectorstore/lancedb.py
Normal file
119
application/vectorstore/lancedb.py
Normal file
@@ -0,0 +1,119 @@
|
||||
from typing import List, Optional
|
||||
import importlib
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from application.core.settings import settings
|
||||
|
||||
class LanceDBVectorStore(BaseVectorStore):
|
||||
"""Class for LanceDB Vector Store integration."""
|
||||
|
||||
def __init__(self, path: str = settings.LANCEDB_PATH,
|
||||
table_name_prefix: str = settings.LANCEDB_TABLE_NAME,
|
||||
source_id: str = None,
|
||||
embeddings_key: str = "embeddings"):
|
||||
"""Initialize the LanceDB vector store."""
|
||||
super().__init__()
|
||||
self.path = path
|
||||
self.table_name = f"{table_name_prefix}_{source_id}" if source_id else table_name_prefix
|
||||
self.embeddings_key = embeddings_key
|
||||
self._lance_db = None
|
||||
self.docsearch = None
|
||||
self._pa = None # PyArrow (pa) will be lazy loaded
|
||||
|
||||
@property
|
||||
def pa(self):
|
||||
"""Lazy load pyarrow module."""
|
||||
if self._pa is None:
|
||||
self._pa = importlib.import_module("pyarrow")
|
||||
return self._pa
|
||||
|
||||
@property
|
||||
def lancedb(self):
|
||||
"""Lazy load lancedb module."""
|
||||
if not hasattr(self, "_lancedb_module"):
|
||||
self._lancedb_module = importlib.import_module("lancedb")
|
||||
return self._lancedb_module
|
||||
|
||||
@property
|
||||
def lance_db(self):
|
||||
"""Lazy load the LanceDB connection."""
|
||||
if self._lance_db is None:
|
||||
self._lance_db = self.lancedb.connect(self.path)
|
||||
return self._lance_db
|
||||
|
||||
@property
|
||||
def table(self):
|
||||
"""Lazy load the LanceDB table."""
|
||||
if self.docsearch is None:
|
||||
if self.table_name in self.lance_db.table_names():
|
||||
self.docsearch = self.lance_db.open_table(self.table_name)
|
||||
else:
|
||||
self.docsearch = None
|
||||
return self.docsearch
|
||||
|
||||
def ensure_table_exists(self):
|
||||
"""Ensure the table exists before performing operations."""
|
||||
if self.table is None:
|
||||
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key)
|
||||
schema = self.pa.schema([
|
||||
self.pa.field("vector", self.pa.list_(self.pa.float32(), list_size=embeddings.dimension)),
|
||||
self.pa.field("text", self.pa.string()),
|
||||
self.pa.field("metadata", self.pa.struct([
|
||||
self.pa.field("key", self.pa.string()),
|
||||
self.pa.field("value", self.pa.string())
|
||||
]))
|
||||
])
|
||||
self.docsearch = self.lance_db.create_table(self.table_name, schema=schema)
|
||||
|
||||
def add_texts(self, texts: List[str], metadatas: Optional[List[dict]] = None, source_id: str = None):
|
||||
"""Add texts with metadata and their embeddings to the LanceDB table."""
|
||||
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key).embed_documents(texts)
|
||||
vectors = []
|
||||
for embedding, text, metadata in zip(embeddings, texts, metadatas or [{}] * len(texts)):
|
||||
if source_id:
|
||||
metadata["source_id"] = source_id
|
||||
metadata_struct = [{"key": k, "value": str(v)} for k, v in metadata.items()]
|
||||
vectors.append({
|
||||
"vector": embedding,
|
||||
"text": text,
|
||||
"metadata": metadata_struct
|
||||
})
|
||||
self.ensure_table_exists()
|
||||
self.docsearch.add(vectors)
|
||||
|
||||
def search(self, query: str, k: int = 2, *args, **kwargs):
|
||||
"""Search LanceDB for the top k most similar vectors."""
|
||||
self.ensure_table_exists()
|
||||
query_embedding = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key).embed_query(query)
|
||||
results = self.docsearch.search(query_embedding).limit(k).to_list()
|
||||
return [(result["_distance"], result["text"], result["metadata"]) for result in results]
|
||||
|
||||
def delete_index(self):
|
||||
"""Delete the entire LanceDB index (table)."""
|
||||
if self.table:
|
||||
self.lance_db.drop_table(self.table_name)
|
||||
|
||||
def assert_embedding_dimensions(self, embeddings):
|
||||
"""Ensure that embedding dimensions match the table index dimensions."""
|
||||
word_embedding_dimension = embeddings.dimension
|
||||
if self.table:
|
||||
table_index_dimension = len(self.docsearch.schema["vector"].type.value_type)
|
||||
if word_embedding_dimension != table_index_dimension:
|
||||
raise ValueError(
|
||||
f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) "
|
||||
f"!= table index dimension ({table_index_dimension})"
|
||||
)
|
||||
|
||||
def filter_documents(self, filter_condition: dict) -> List[dict]:
|
||||
"""Filter documents based on certain conditions."""
|
||||
self.ensure_table_exists()
|
||||
|
||||
# Ensure source_id exists in the filter condition
|
||||
if 'source_id' not in filter_condition:
|
||||
raise ValueError("filter_condition must contain 'source_id'")
|
||||
|
||||
source_id = filter_condition["source_id"]
|
||||
|
||||
# Use LanceDB's native filtering if supported, otherwise filter manually
|
||||
filtered_data = self.docsearch.filter(lambda x: x.metadata and x.metadata.get("source_id") == source_id).to_list()
|
||||
|
||||
return filtered_data
|
||||
38
application/vectorstore/milvus.py
Normal file
38
application/vectorstore/milvus.py
Normal file
@@ -0,0 +1,38 @@
|
||||
from typing import List, Optional
|
||||
from uuid import uuid4
|
||||
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
|
||||
|
||||
class MilvusStore(BaseVectorStore):
|
||||
def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"):
|
||||
super().__init__()
|
||||
from langchain_milvus import Milvus
|
||||
|
||||
connection_args = {
|
||||
"uri": settings.MILVUS_URI,
|
||||
"token": settings.MILVUS_TOKEN,
|
||||
}
|
||||
self._docsearch = Milvus(
|
||||
embedding_function=self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key),
|
||||
collection_name=settings.MILVUS_COLLECTION_NAME,
|
||||
connection_args=connection_args,
|
||||
)
|
||||
self._source_id = source_id
|
||||
|
||||
def search(self, question, k=2, *args, **kwargs):
|
||||
expr = f"source_id == '{self._source_id}'"
|
||||
return self._docsearch.similarity_search(query=question, k=k, expr=expr, *args, **kwargs)
|
||||
|
||||
def add_texts(self, texts: List[str], metadatas: Optional[List[dict]], *args, **kwargs):
|
||||
ids = [str(uuid4()) for _ in range(len(texts))]
|
||||
|
||||
return self._docsearch.add_texts(texts=texts, metadatas=metadatas, ids=ids, *args, **kwargs)
|
||||
|
||||
def save_local(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def delete_index(self, *args, **kwargs):
|
||||
pass
|
||||
@@ -1,11 +1,12 @@
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from application.vectorstore.document_class import Document
|
||||
|
||||
|
||||
class MongoDBVectorStore(BaseVectorStore):
|
||||
def __init__(
|
||||
self,
|
||||
path: str = "",
|
||||
source_id: str = "",
|
||||
embeddings_key: str = "embeddings",
|
||||
collection: str = "documents",
|
||||
index_name: str = "vector_search_index",
|
||||
@@ -18,7 +19,7 @@ class MongoDBVectorStore(BaseVectorStore):
|
||||
self._embedding_key = embedding_key
|
||||
self._embeddings_key = embeddings_key
|
||||
self._mongo_uri = settings.MONGO_URI
|
||||
self._path = path.replace("application/indexes/", "").rstrip("/")
|
||||
self._source_id = source_id.replace("application/indexes/", "").rstrip("/")
|
||||
self._embedding = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
|
||||
|
||||
try:
|
||||
@@ -33,27 +34,24 @@ class MongoDBVectorStore(BaseVectorStore):
|
||||
self._database = self._client[database]
|
||||
self._collection = self._database[collection]
|
||||
|
||||
|
||||
def search(self, question, k=2, *args, **kwargs):
|
||||
query_vector = self._embedding.embed_query(question)
|
||||
|
||||
pipeline = [
|
||||
{
|
||||
"$vectorSearch": {
|
||||
"queryVector": query_vector,
|
||||
"queryVector": query_vector,
|
||||
"path": self._embedding_key,
|
||||
"limit": k,
|
||||
"numCandidates": k * 10,
|
||||
"limit": k,
|
||||
"numCandidates": k * 10,
|
||||
"index": self._index_name,
|
||||
"filter": {
|
||||
"store": {"$eq": self._path}
|
||||
}
|
||||
"filter": {"source_id": {"$eq": self._source_id}},
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
cursor = self._collection.aggregate(pipeline)
|
||||
|
||||
|
||||
results = []
|
||||
for doc in cursor:
|
||||
text = doc[self._text_key]
|
||||
@@ -63,30 +61,32 @@ class MongoDBVectorStore(BaseVectorStore):
|
||||
metadata = doc
|
||||
results.append(Document(text, metadata))
|
||||
return results
|
||||
|
||||
|
||||
def _insert_texts(self, texts, metadatas):
|
||||
if not texts:
|
||||
return []
|
||||
embeddings = self._embedding.embed_documents(texts)
|
||||
|
||||
to_insert = [
|
||||
{self._text_key: t, self._embedding_key: embedding, **m}
|
||||
for t, m, embedding in zip(texts, metadatas, embeddings)
|
||||
]
|
||||
# insert the documents in MongoDB Atlas
|
||||
|
||||
insert_result = self._collection.insert_many(to_insert)
|
||||
return insert_result.inserted_ids
|
||||
|
||||
def add_texts(self,
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
texts,
|
||||
metadatas = None,
|
||||
ids = None,
|
||||
refresh_indices = True,
|
||||
create_index_if_not_exists = True,
|
||||
bulk_kwargs = None,
|
||||
**kwargs,):
|
||||
metadatas=None,
|
||||
ids=None,
|
||||
refresh_indices=True,
|
||||
create_index_if_not_exists=True,
|
||||
bulk_kwargs=None,
|
||||
**kwargs,
|
||||
):
|
||||
|
||||
|
||||
#dims = self._embedding.client[1].word_embedding_dimension
|
||||
# dims = self._embedding.client[1].word_embedding_dimension
|
||||
# # check if index exists
|
||||
# if create_index_if_not_exists:
|
||||
# # check if index exists
|
||||
@@ -121,6 +121,6 @@ class MongoDBVectorStore(BaseVectorStore):
|
||||
if texts_batch:
|
||||
result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
|
||||
return result_ids
|
||||
|
||||
|
||||
def delete_index(self, *args, **kwargs):
|
||||
self._collection.delete_many({"store": self._path})
|
||||
self._collection.delete_many({"source_id": self._source_id})
|
||||
|
||||
@@ -5,12 +5,12 @@ from qdrant_client import models
|
||||
|
||||
|
||||
class QdrantStore(BaseVectorStore):
|
||||
def __init__(self, path: str = "", embeddings_key: str = "embeddings"):
|
||||
def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"):
|
||||
self._filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key="metadata.store",
|
||||
match=models.MatchValue(value=path.replace("application/indexes/", "").rstrip("/")),
|
||||
key="metadata.source_id",
|
||||
match=models.MatchValue(value=source_id.replace("application/indexes/", "").rstrip("/")),
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from application.vectorstore.faiss import FaissStore
|
||||
from application.vectorstore.elasticsearch import ElasticsearchStore
|
||||
from application.vectorstore.milvus import MilvusStore
|
||||
from application.vectorstore.mongodb import MongoDBVectorStore
|
||||
from application.vectorstore.qdrant import QdrantStore
|
||||
|
||||
@@ -10,6 +11,7 @@ class VectorCreator:
|
||||
"elasticsearch": ElasticsearchStore,
|
||||
"mongodb": MongoDBVectorStore,
|
||||
"qdrant": QdrantStore,
|
||||
"milvus": MilvusStore,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -1,35 +1,44 @@
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import string
|
||||
import zipfile
|
||||
import tiktoken
|
||||
from collections import Counter
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import requests
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
from application.parser.file.bulk import SimpleDirectoryReader
|
||||
from application.parser.remote.remote_creator import RemoteCreator
|
||||
from application.parser.open_ai_func import call_openai_api
|
||||
from application.parser.remote.remote_creator import RemoteCreator
|
||||
from application.parser.schema.base import Document
|
||||
from application.parser.token_func import group_split
|
||||
from application.utils import count_tokens_docs
|
||||
|
||||
mongo = MongoDB.get_client()
|
||||
db = mongo["docsgpt"]
|
||||
sources_collection = db["sources"]
|
||||
|
||||
# Constants
|
||||
MIN_TOKENS = 150
|
||||
MAX_TOKENS = 1250
|
||||
RECURSION_DEPTH = 2
|
||||
|
||||
# Define a function to extract metadata from a given filename.
|
||||
def metadata_from_filename(title):
|
||||
store = "/".join(title.split("/")[1:3])
|
||||
return {"title": title, "store": store}
|
||||
|
||||
return {"title": title}
|
||||
|
||||
# Define a function to generate a random string of a given length.
|
||||
def generate_random_string(length):
|
||||
return "".join([string.ascii_letters[i % 52] for i in range(length)])
|
||||
|
||||
|
||||
current_dir = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
|
||||
|
||||
def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
|
||||
"""
|
||||
Recursively extract zip files with a limit on recursion depth.
|
||||
@@ -41,12 +50,16 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
|
||||
max_depth (int): Maximum allowed depth of recursion to prevent infinite loops.
|
||||
"""
|
||||
if current_depth > max_depth:
|
||||
print(f"Reached maximum recursion depth of {max_depth}")
|
||||
logging.warning(f"Reached maximum recursion depth of {max_depth}")
|
||||
return
|
||||
|
||||
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
zip_ref.extractall(extract_to)
|
||||
os.remove(zip_path) # Remove the zip file after extracting
|
||||
try:
|
||||
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
zip_ref.extractall(extract_to)
|
||||
os.remove(zip_path) # Remove the zip file after extracting
|
||||
except Exception as e:
|
||||
logging.error(f"Error extracting zip file {zip_path}: {e}")
|
||||
return
|
||||
|
||||
# Check for nested zip files and extract them
|
||||
for root, dirs, files in os.walk(extract_to):
|
||||
@@ -56,9 +69,43 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
|
||||
file_path = os.path.join(root, file)
|
||||
extract_zip_recursive(file_path, root, current_depth + 1, max_depth)
|
||||
|
||||
def download_file(url, params, dest_path):
|
||||
try:
|
||||
response = requests.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
with open(dest_path, "wb") as f:
|
||||
f.write(response.content)
|
||||
except requests.RequestException as e:
|
||||
logging.error(f"Error downloading file: {e}")
|
||||
raise
|
||||
|
||||
def upload_index(full_path, file_data):
|
||||
try:
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
files = {
|
||||
"file_faiss": open(full_path + "/index.faiss", "rb"),
|
||||
"file_pkl": open(full_path + "/index.pkl", "rb"),
|
||||
}
|
||||
response = requests.post(
|
||||
urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
|
||||
)
|
||||
else:
|
||||
response = requests.post(
|
||||
urljoin(settings.API_URL, "/api/upload_index"), data=file_data
|
||||
)
|
||||
response.raise_for_status()
|
||||
except requests.RequestException as e:
|
||||
logging.error(f"Error uploading index: {e}")
|
||||
raise
|
||||
finally:
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
for file in files.values():
|
||||
file.close()
|
||||
|
||||
# Define the main function for ingesting and processing documents.
|
||||
def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
def ingest_worker(
|
||||
self, directory, formats, name_job, filename, user, retriever="classic"
|
||||
):
|
||||
"""
|
||||
Ingest and process documents.
|
||||
|
||||
@@ -69,46 +116,30 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
name_job (str): Name of the job for this ingestion task.
|
||||
filename (str): Name of the file to be ingested.
|
||||
user (str): Identifier for the user initiating the ingestion.
|
||||
retriever (str): Type of retriever to use for processing the documents.
|
||||
|
||||
Returns:
|
||||
dict: Information about the completed ingestion task, including input parameters and a "limited" flag.
|
||||
"""
|
||||
# directory = 'inputs' or 'temp'
|
||||
# formats = [".rst", ".md"]
|
||||
input_files = None
|
||||
recursive = True
|
||||
limit = None
|
||||
exclude = True
|
||||
# name_job = 'job1'
|
||||
# filename = 'install.rst'
|
||||
# user = 'local'
|
||||
sample = False
|
||||
token_check = True
|
||||
min_tokens = 150
|
||||
max_tokens = 1250
|
||||
recursion_depth = 2
|
||||
full_path = os.path.join(directory, user, name_job)
|
||||
import sys
|
||||
|
||||
print(full_path, file=sys.stderr)
|
||||
# check if API_URL env variable is set
|
||||
logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job})
|
||||
file_data = {"name": name_job, "file": filename, "user": user}
|
||||
response = requests.get(
|
||||
urljoin(settings.API_URL, "/api/download"), params=file_data
|
||||
)
|
||||
# check if file is in the response
|
||||
print(response, file=sys.stderr)
|
||||
file = response.content
|
||||
|
||||
if not os.path.exists(full_path):
|
||||
os.makedirs(full_path)
|
||||
with open(os.path.join(full_path, filename), "wb") as f:
|
||||
f.write(file)
|
||||
download_file(urljoin(settings.API_URL, "/api/download"), file_data, os.path.join(full_path, filename))
|
||||
|
||||
# check if file is .zip and extract it
|
||||
if filename.endswith(".zip"):
|
||||
extract_zip_recursive(
|
||||
os.path.join(full_path, filename), full_path, 0, recursion_depth
|
||||
os.path.join(full_path, filename), full_path, 0, RECURSION_DEPTH
|
||||
)
|
||||
|
||||
self.update_state(state="PROGRESS", meta={"current": 1})
|
||||
@@ -124,39 +155,29 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
).load_data()
|
||||
raw_docs = group_split(
|
||||
documents=raw_docs,
|
||||
min_tokens=min_tokens,
|
||||
max_tokens=max_tokens,
|
||||
min_tokens=MIN_TOKENS,
|
||||
max_tokens=MAX_TOKENS,
|
||||
token_check=token_check,
|
||||
)
|
||||
|
||||
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
id = ObjectId()
|
||||
|
||||
call_openai_api(docs, full_path, self)
|
||||
call_openai_api(docs, full_path, id, self)
|
||||
tokens = count_tokens_docs(docs)
|
||||
self.update_state(state="PROGRESS", meta={"current": 100})
|
||||
|
||||
if sample:
|
||||
for i in range(min(5, len(raw_docs))):
|
||||
print(raw_docs[i].text)
|
||||
logging.info(f"Sample document {i}: {raw_docs[i]}")
|
||||
|
||||
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
|
||||
# and send them to the server (provide user and name in form)
|
||||
file_data = {"name": name_job, "user": user, "tokens":tokens}
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
files = {
|
||||
"file_faiss": open(full_path + "/index.faiss", "rb"),
|
||||
"file_pkl": open(full_path + "/index.pkl", "rb"),
|
||||
}
|
||||
response = requests.post(
|
||||
urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
|
||||
)
|
||||
response = requests.get(
|
||||
urljoin(settings.API_URL, "/api/delete_old?path=" + full_path)
|
||||
)
|
||||
else:
|
||||
response = requests.post(
|
||||
urljoin(settings.API_URL, "/api/upload_index"), data=file_data
|
||||
)
|
||||
file_data.update({
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"id": str(id),
|
||||
"type": "local",
|
||||
})
|
||||
upload_index(full_path, file_data)
|
||||
|
||||
# delete local
|
||||
shutil.rmtree(full_path)
|
||||
@@ -170,68 +191,114 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
"limited": False,
|
||||
}
|
||||
|
||||
|
||||
def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
|
||||
def remote_worker(
|
||||
self,
|
||||
source_data,
|
||||
name_job,
|
||||
user,
|
||||
loader,
|
||||
directory="temp",
|
||||
retriever="classic",
|
||||
sync_frequency="never",
|
||||
operation_mode="upload",
|
||||
doc_id=None,
|
||||
):
|
||||
token_check = True
|
||||
min_tokens = 150
|
||||
max_tokens = 1250
|
||||
full_path = directory + "/" + user + "/" + name_job
|
||||
full_path = os.path.join(directory, user, name_job)
|
||||
|
||||
if not os.path.exists(full_path):
|
||||
os.makedirs(full_path)
|
||||
self.update_state(state="PROGRESS", meta={"current": 1})
|
||||
logging.info(
|
||||
f"Remote job: {full_path}",
|
||||
extra={"user": user, "job": name_job, "source_data": source_data},
|
||||
)
|
||||
|
||||
remote_loader = RemoteCreator.create_loader(loader)
|
||||
raw_docs = remote_loader.load_data(source_data)
|
||||
|
||||
docs = group_split(
|
||||
documents=raw_docs,
|
||||
min_tokens=min_tokens,
|
||||
max_tokens=max_tokens,
|
||||
min_tokens=MIN_TOKENS,
|
||||
max_tokens=MAX_TOKENS,
|
||||
token_check=token_check,
|
||||
)
|
||||
# docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
call_openai_api(docs, full_path, self)
|
||||
tokens = count_tokens_docs(docs)
|
||||
if operation_mode == "upload":
|
||||
id = ObjectId()
|
||||
call_openai_api(docs, full_path, id, self)
|
||||
elif operation_mode == "sync":
|
||||
if not doc_id or not ObjectId.is_valid(doc_id):
|
||||
raise ValueError("doc_id must be provided for sync operation.")
|
||||
id = ObjectId(doc_id)
|
||||
call_openai_api(docs, full_path, id, self)
|
||||
self.update_state(state="PROGRESS", meta={"current": 100})
|
||||
|
||||
# Proceed with uploading and cleaning as in the original function
|
||||
file_data = {"name": name_job, "user": user, "tokens":tokens}
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
files = {
|
||||
"file_faiss": open(full_path + "/index.faiss", "rb"),
|
||||
"file_pkl": open(full_path + "/index.pkl", "rb"),
|
||||
}
|
||||
|
||||
requests.post(
|
||||
urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
|
||||
)
|
||||
requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
|
||||
else:
|
||||
requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
|
||||
file_data = {
|
||||
"name": name_job,
|
||||
"user": user,
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"id": str(id),
|
||||
"type": loader,
|
||||
"remote_data": source_data,
|
||||
"sync_frequency": sync_frequency,
|
||||
}
|
||||
upload_index(full_path, file_data)
|
||||
|
||||
shutil.rmtree(full_path)
|
||||
|
||||
return {"urls": source_data, "name_job": name_job, "user": user, "limited": False}
|
||||
|
||||
def sync(
|
||||
self,
|
||||
source_data,
|
||||
name_job,
|
||||
user,
|
||||
loader,
|
||||
sync_frequency,
|
||||
retriever,
|
||||
doc_id=None,
|
||||
directory="temp",
|
||||
):
|
||||
try:
|
||||
remote_worker(
|
||||
self,
|
||||
source_data,
|
||||
name_job,
|
||||
user,
|
||||
loader,
|
||||
directory,
|
||||
retriever,
|
||||
sync_frequency,
|
||||
"sync",
|
||||
doc_id,
|
||||
)
|
||||
except Exception as e:
|
||||
logging.error(f"Error during sync: {e}")
|
||||
return {"status": "error", "error": str(e)}
|
||||
return {"status": "success"}
|
||||
|
||||
def count_tokens_docs(docs):
|
||||
# Here we convert the docs list to a string and calculate the number of tokens the string represents.
|
||||
# docs_content = (" ".join(docs))
|
||||
docs_content = ""
|
||||
for doc in docs:
|
||||
docs_content += doc.page_content
|
||||
def sync_worker(self, frequency):
|
||||
sync_counts = Counter()
|
||||
sources = sources_collection.find()
|
||||
for doc in sources:
|
||||
if doc.get("sync_frequency") == frequency:
|
||||
name = doc.get("name")
|
||||
user = doc.get("user")
|
||||
source_type = doc.get("type")
|
||||
source_data = doc.get("remote_data")
|
||||
retriever = doc.get("retriever")
|
||||
doc_id = str(doc.get("_id"))
|
||||
resp = sync(
|
||||
self, source_data, name, user, source_type, frequency, retriever, doc_id
|
||||
)
|
||||
sync_counts["total_sync_count"] += 1
|
||||
sync_counts[
|
||||
"sync_success" if resp["status"] == "success" else "sync_failure"
|
||||
] += 1
|
||||
|
||||
tokens, total_price = num_tokens_from_string(
|
||||
string=docs_content, encoding_name="cl100k_base"
|
||||
)
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
return tokens
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = (num_tokens / 1000) * 0.0004
|
||||
return num_tokens, total_price
|
||||
return {
|
||||
key: sync_counts[key]
|
||||
for key in ["total_sync_count", "sync_success", "sync_failure"]
|
||||
}
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
|
||||
redis:
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
volumes:
|
||||
- ./frontend/src:/app/src
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:7091
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
volumes:
|
||||
- ./frontend/src:/app/src
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:7091
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
@@ -20,6 +20,7 @@ services:
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- CACHE_REDIS_URL=redis://redis:6379/2
|
||||
ports:
|
||||
- "7091:7091"
|
||||
volumes:
|
||||
@@ -32,7 +33,7 @@ services:
|
||||
|
||||
worker:
|
||||
build: ./application
|
||||
command: celery -A application.app.celery worker -l INFO
|
||||
command: celery -A application.app.celery worker -l INFO -B
|
||||
environment:
|
||||
- API_KEY=$API_KEY
|
||||
- EMBEDDINGS_KEY=$API_KEY
|
||||
@@ -41,6 +42,7 @@ services:
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- API_URL=http://backend:7091
|
||||
- CACHE_REDIS_URL=redis://redis:6379/2
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
@@ -46,6 +46,6 @@ yarn install
|
||||
yarn dev
|
||||
```
|
||||
|
||||
- Now, you should be able to view the docs on your local environment by visiting `http://localhost:5000`. You can explore the different markdown files and make changes as you see fit.
|
||||
- Now, you should be able to view the docs on your local environment by visiting `http://localhost:3000`. You can explore the different markdown files and make changes as you see fit.
|
||||
|
||||
- **Footnotes:** This guide assumes you have Node.js and npm installed. The guide involves running a local server using yarn, and viewing the documentation offline. If you encounter any issues, it may be worth verifying your Node.js and npm installations and whether you have installed yarn correctly.
|
||||
|
||||
2029
docs/package-lock.json
generated
2029
docs/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -7,8 +7,8 @@
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@vercel/analytics": "^1.1.1",
|
||||
"docsgpt": "^0.3.7",
|
||||
"next": "^14.1.1",
|
||||
"docsgpt": "^0.4.7",
|
||||
"next": "^14.2.12",
|
||||
"nextra": "^2.13.2",
|
||||
"nextra-theme-docs": "^2.13.2",
|
||||
"react": "^18.2.0",
|
||||
|
||||
@@ -67,46 +67,3 @@ To run the setup on Windows, you have two options: using the Windows Subsystem f
|
||||
|
||||
These steps should help you set up and run the project on Windows using either WSL or Git Bash/Command Prompt.
|
||||
**Important:** Ensure that Docker is installed and properly configured on your Windows system for these steps to work.
|
||||
|
||||
|
||||
For WINDOWS:
|
||||
|
||||
To run the given setup on Windows, you can use the Windows Subsystem for Linux (WSL) or a Git Bash terminal to execute similar commands. Here are the steps adapted for Windows:
|
||||
|
||||
Option 1: Using Windows Subsystem for Linux (WSL):
|
||||
|
||||
1. Install WSL if you haven't already. You can follow the official Microsoft documentation for installation: (https://learn.microsoft.com/en-us/windows/wsl/install).
|
||||
2. After setting up WSL, open the WSL terminal.
|
||||
3. Clone the repository and create the `.env` file:
|
||||
```bash
|
||||
git clone https://github.com/arc53/DocsGPT.git
|
||||
cd DocsGPT
|
||||
echo "API_KEY=Yourkey" > .env
|
||||
echo "VITE_API_STREAMING=true" >> .env
|
||||
```
|
||||
4. Run the following command to start the setup with Docker Compose:
|
||||
```bash
|
||||
./run-with-docker-compose.sh
|
||||
```
|
||||
5. Open your web browser and navigate to http://localhost:5173/.
|
||||
6. To stop the setup, just press **Ctrl + C** in the WSL terminal.
|
||||
|
||||
Option 2: Using Git Bash or Command Prompt (CMD):
|
||||
|
||||
1. Install Git for Windows if you haven't already. You can download it from the official website: (https://gitforwindows.org/).
|
||||
2. Open Git Bash or Command Prompt.
|
||||
3. Clone the repository and create the `.env` file:
|
||||
```bash
|
||||
git clone https://github.com/arc53/DocsGPT.git
|
||||
cd DocsGPT
|
||||
echo "API_KEY=Yourkey" > .env
|
||||
echo "VITE_API_STREAMING=true" >> .env
|
||||
```
|
||||
4. Run the following command to start the setup with Docker Compose:
|
||||
```bash
|
||||
./run-with-docker-compose.sh
|
||||
```
|
||||
5. Open your web browser and navigate to http://localhost:5173/.
|
||||
6. To stop the setup, just press **Ctrl + C** in the Git Bash or Command Prompt terminal.
|
||||
|
||||
These steps should help you set up and run the project on Windows using either WSL or Git Bash/Command Prompt. Make sure you have Docker installed and properly configured on your Windows system for this to work.
|
||||
|
||||
@@ -17,25 +17,31 @@ Now, you can use the widget in your component like this :
|
||||
```jsx
|
||||
<DocsGPTWidget
|
||||
apiHost="https://your-docsgpt-api.com"
|
||||
selectDocs="local/docs.zip"
|
||||
apiKey=""
|
||||
avatar = "https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png",
|
||||
title = "Get AI assistance",
|
||||
description = "DocsGPT's AI Chatbot is here to help",
|
||||
heroTitle = "Welcome to DocsGPT !",
|
||||
avatar = "https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png"
|
||||
title = "Get AI assistance"
|
||||
description = "DocsGPT's AI Chatbot is here to help"
|
||||
heroTitle = "Welcome to DocsGPT !"
|
||||
heroDescription="This chatbot is built with DocsGPT and utilises GenAI,
|
||||
please review important information using sources."
|
||||
theme = "dark"
|
||||
buttonIcon = "https://your-icon"
|
||||
buttonBg = "#222327"
|
||||
/>
|
||||
```
|
||||
DocsGPTWidget takes 8 **props** with default fallback values:
|
||||
To tailor the widget to your needs, you can configure the following props in your component:
|
||||
1. `apiHost` — The URL of your DocsGPT API.
|
||||
2. `selectDocs` — The documentation source that you want to use for your widget (e.g. `default` or `local/docs1.zip`).
|
||||
2. `theme` — Allows to select your specific theme (dark or light).
|
||||
3. `apiKey` — Usually, it's empty.
|
||||
4. `avatar`: Specifies the URL of the avatar or image representing the chatbot.
|
||||
5. `title`: Sets the title text displayed in the chatbot interface.
|
||||
6. `description`: Provides a brief description of the chatbot's purpose or functionality.
|
||||
7. `heroTitle`: Displays a welcome title when users interact with the chatbot.
|
||||
8. `heroDescription`: Provide additional introductory text or information about the chatbot's capabilities.
|
||||
9. `buttonIcon`: Specifies the url of the icon image for the widget.
|
||||
10. `buttonBg`: Allows to specify the Background color of the widget.
|
||||
11. `size`: Sets the size of the widget ( small, medium).
|
||||
|
||||
|
||||
### How to use DocsGPTWidget with [Nextra](https://nextra.site/) (Next.js + MDX)
|
||||
Install your widget as described above and then go to your `pages/` folder and create a new file `_app.js` with the following content:
|
||||
@@ -55,22 +61,30 @@ export default function MyApp({ Component, pageProps }) {
|
||||
```html
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>DocsGPT Widget</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
<!-- Include the widget script from dist/modern or dist/legacy -->
|
||||
<script src="https://unpkg.com/docsgpt/dist/modern/main.js" type="module"></script>
|
||||
<script type="module">
|
||||
window.onload = function() {
|
||||
renderDocsGPTWidget('app');
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta http-equiv="X-UA-Compatible" content="ie=edge" />
|
||||
<title>HTML + CSS</title>
|
||||
<link rel="stylesheet" href="styles.css" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>This is a simple HTML + CSS template!</h1>
|
||||
<div id="app"></div>
|
||||
<!-- Include the widget script from dist/modern or dist/legacy -->
|
||||
<script
|
||||
src="https://unpkg.com/docsgpt/dist/modern/main.js"
|
||||
type="module"
|
||||
></script>
|
||||
<script type="module">
|
||||
window.onload = function () {
|
||||
renderDocsGPTWidget("app", {
|
||||
apiKey: "",
|
||||
size: "medium",
|
||||
});
|
||||
};
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
To link the widget to your api and your documents you can pass parameters to the renderDocsGPTWidget('div id', { parameters }).
|
||||
@@ -82,22 +96,24 @@ To link the widget to your api and your documents you can pass parameters to the
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>DocsGPT Widget</title>
|
||||
<script src="https://unpkg.com/docsgpt/dist/modern/main.js" type="module"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
<!-- Include the widget script from dist/modern or dist/legacy -->
|
||||
<script src="https://unpkg.com/docsgpt/dist/modern/main.js" type="module"></script>
|
||||
<script type="module">
|
||||
window.onload = function() {
|
||||
renderDocsGPTWidget('app', {
|
||||
apiHost: 'http://localhost:7001',
|
||||
selectDocs: 'default',
|
||||
apiKey: '',
|
||||
apiKey:"",
|
||||
avatar: 'https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png',
|
||||
title: 'Get AI assistance',
|
||||
description: "DocsGPT's AI Chatbot is here to help",
|
||||
heroTitle: 'Welcome to DocsGPT!',
|
||||
heroDescription: 'This chatbot is built with DocsGPT and utilises GenAI, please review important information using sources.'
|
||||
heroDescription: 'This chatbot is built with DocsGPT and utilises GenAI, please review important information using sources.',
|
||||
theme:"dark",
|
||||
buttonIcon:"https://your-icon",
|
||||
buttonBg:"#222327"
|
||||
});
|
||||
}
|
||||
</script>
|
||||
|
||||
@@ -28,15 +28,15 @@ Navigate to the sidebar where you will find `Source Docs` option,here you will f
|
||||
|
||||
|
||||
### Step 2
|
||||
Click on the `Upload icon` just beside the source docs options,now borwse and upload the document which you want to train on or select the `remote` option if you have to insert the link of the documentation.
|
||||
Click on the `Upload icon` just beside the source docs options,now browse and upload the document which you want to train on or select the `remote` option if you have to insert the link of the documentation.
|
||||
|
||||
|
||||
### Step 3
|
||||
Now you will be able to see the name of the file uploaded under the Uploaded Files ,now click on `Train`,once you click on train it might take some time to train on the document. You will be able to see the `Training progress` and once the training is completed you can click the `finish` button and there you go your docuemnt is uploaded.
|
||||
Now you will be able to see the name of the file uploaded under the Uploaded Files ,now click on `Train`,once you click on train it might take some time to train on the document. You will be able to see the `Training progress` and once the training is completed you can click the `finish` button and there you go your document is uploaded.
|
||||
|
||||
|
||||
### Step 4
|
||||
Go to `New chat` and from the side bar select the document you uploaded under the `Source Docs` and go ahead with your chat, now you can ask qestions regarding the document you uploaded and you will get the effective answer based on it.
|
||||
Go to `New chat` and from the side bar select the document you uploaded under the `Source Docs` and go ahead with your chat, now you can ask questions regarding the document you uploaded and you will get the effective answer based on it.
|
||||
|
||||
</Steps>
|
||||
|
||||
|
||||
@@ -33,9 +33,17 @@ For open source you have to edit .env file with LLM_NAME with their desired LLM
|
||||
All the supported LLM providers are here application/llm and you can check what env variable are needed for each
|
||||
List of latest supported LLMs are https://github.com/arc53/DocsGPT/blob/main/application/llm/llm_creator.py
|
||||
### Step 3
|
||||
Visit application/llm and select the file of your selected llm and there you will find the speicifc requirements needed to be filled in order to use it,i.e API key of that llm.
|
||||
Visit application/llm and select the file of your selected llm and there you will find the specific requirements needed to be filled in order to use it,i.e API key of that llm.
|
||||
</Steps>
|
||||
|
||||
### For OpenAI-Compatible Endpoints:
|
||||
DocsGPT supports the use of OpenAI-compatible endpoints through base URL substitution. This feature allows you to use alternative AI models or services that implement the OpenAI API interface.
|
||||
|
||||
|
||||
Set the OPENAI_BASE_URL in your environment. You can change .env file with OPENAI_BASE_URL with the desired base URL or docker-compose.yml file and add the environment variable to the backend container.
|
||||
|
||||
> Make sure you have the right API_KEY and correct LLM_NAME.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ export default function MyApp({ Component, pageProps }) {
|
||||
return (
|
||||
<>
|
||||
<Component {...pageProps} />
|
||||
<DocsGPTWidget apiKey="d61a020c-ac8f-4f23-bb98-458e4da3c240" />
|
||||
<DocsGPTWidget apiKey="d61a020c-ac8f-4f23-bb98-458e4da3c240" theme="dark" size="medium" />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -51,6 +51,9 @@ const config = {
|
||||
footer: {
|
||||
text: `MIT ${new Date().getFullYear()} © DocsGPT`,
|
||||
},
|
||||
editLink: {
|
||||
content: 'Edit this page on GitHub',
|
||||
},
|
||||
logo() {
|
||||
return (
|
||||
<div className="flex items-center gap-2">
|
||||
|
||||
28
extensions/chrome/package-lock.json
generated
28
extensions/chrome/package-lock.json
generated
@@ -107,12 +107,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/braces": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
|
||||
"integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
|
||||
"integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"fill-range": "^7.0.1"
|
||||
"fill-range": "^7.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
@@ -260,9 +260,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/fill-range": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
|
||||
"integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
|
||||
"integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"to-regex-range": "^5.0.1"
|
||||
@@ -884,12 +884,12 @@
|
||||
"dev": true
|
||||
},
|
||||
"braces": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
|
||||
"integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
|
||||
"integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"fill-range": "^7.0.1"
|
||||
"fill-range": "^7.1.1"
|
||||
}
|
||||
},
|
||||
"camelcase-css": {
|
||||
@@ -1000,9 +1000,9 @@
|
||||
}
|
||||
},
|
||||
"fill-range": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
|
||||
"integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
|
||||
"integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"to-regex-range": "^5.0.1"
|
||||
|
||||
@@ -1,25 +1,60 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
import logging
|
||||
import aiohttp
|
||||
import discord
|
||||
import requests
|
||||
from discord.ext import commands
|
||||
import dotenv
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# Replace 'YOUR_BOT_TOKEN' with your bot's token
|
||||
# Enable logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Bot configuration
|
||||
TOKEN = os.getenv("DISCORD_TOKEN")
|
||||
PREFIX = '@DocsGPT'
|
||||
BASE_API_URL = 'http://localhost:7091'
|
||||
PREFIX = '!' # Command prefix
|
||||
BASE_API_URL = os.getenv("API_BASE", "https://gptcloud.arc53.com")
|
||||
API_URL = BASE_API_URL + "/api/answer"
|
||||
API_KEY = os.getenv("API_KEY")
|
||||
|
||||
intents = discord.Intents.default()
|
||||
intents.message_content = True
|
||||
|
||||
bot = commands.Bot(command_prefix=PREFIX, intents=intents)
|
||||
|
||||
# Store conversation history per user
|
||||
conversation_histories = {}
|
||||
|
||||
def chunk_string(text, max_length=2000):
|
||||
"""Splits a string into chunks of a specified maximum length."""
|
||||
# Create list to store the split strings
|
||||
chunks = []
|
||||
# Loop through the text, create substrings with max_length
|
||||
while len(text) > max_length:
|
||||
# Find last space within the limit
|
||||
idx = text.rfind(' ', 0, max_length)
|
||||
# Ensure we don't have an empty part
|
||||
if idx == -1:
|
||||
# If no spaces, just take chunk
|
||||
chunks.append(text[:max_length])
|
||||
text = text[max_length:]
|
||||
else:
|
||||
# Push whatever we've got up to the last space
|
||||
chunks.append(text[:idx])
|
||||
text = text[idx+1:]
|
||||
# Catches the remaining part
|
||||
chunks.append(text)
|
||||
return chunks
|
||||
|
||||
def escape_markdown(text):
|
||||
"""Escapes Discord markdown characters."""
|
||||
escape_chars = r'\*_$$$$()~>#+-=|{}.!'
|
||||
return re.sub(f'([{re.escape(escape_chars)}])', r'\\\1', text)
|
||||
|
||||
def split_string(input_str):
|
||||
"""Splits the input string to detect bot mentions."""
|
||||
pattern = r'^<@!?{0}>\s*'.format(bot.user.id)
|
||||
match = re.match(pattern, input_str)
|
||||
if match:
|
||||
@@ -27,42 +62,97 @@ def split_string(input_str):
|
||||
return str(bot.user.id), content
|
||||
return None, input_str
|
||||
|
||||
|
||||
@bot.event
|
||||
async def on_ready():
|
||||
print(f'{bot.user.name} has connected to Discord!')
|
||||
|
||||
|
||||
async def fetch_answer(question):
|
||||
data = {
|
||||
'sender': 'discord',
|
||||
'question': question,
|
||||
'history': ''
|
||||
async def generate_answer(question, messages, conversation_id):
|
||||
"""Generates an answer using the external API."""
|
||||
payload = {
|
||||
"question": question,
|
||||
"api_key": API_KEY,
|
||||
"history": messages,
|
||||
"conversation_id": conversation_id
|
||||
}
|
||||
headers = {"Content-Type": "application/json",
|
||||
"Accept": "application/json"}
|
||||
response = requests.post(BASE_API_URL + '/api/answer', json=data, headers=headers)
|
||||
if response.status_code == 200:
|
||||
return response.json()['answer']
|
||||
return 'Sorry, I could not fetch the answer.'
|
||||
headers = {
|
||||
"Content-Type": "application/json; charset=utf-8"
|
||||
}
|
||||
timeout = aiohttp.ClientTimeout(total=60)
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
async with session.post(API_URL, json=payload, headers=headers) as resp:
|
||||
if resp.status == 200:
|
||||
data = await resp.json()
|
||||
conversation_id = data.get("conversation_id")
|
||||
answer = data.get("answer", "Sorry, I couldn't find an answer.")
|
||||
return {"answer": answer, "conversation_id": conversation_id}
|
||||
else:
|
||||
return {"answer": "Sorry, I couldn't find an answer.", "conversation_id": None}
|
||||
|
||||
@bot.command(name="start")
|
||||
async def start(ctx):
|
||||
"""Handles the /start command."""
|
||||
await ctx.send(f"Hi {ctx.author.mention}! How can I assist you today?")
|
||||
|
||||
@bot.command(name="custom_help")
|
||||
async def custom_help_command(ctx):
|
||||
"""Handles the /custom_help command."""
|
||||
help_text = (
|
||||
"Here are the available commands:\n"
|
||||
"`!start` - Begin a new conversation with the bot\n"
|
||||
"`!help` - Display this help message\n\n"
|
||||
"You can also mention me or send a direct message to ask a question!"
|
||||
)
|
||||
await ctx.send(help_text)
|
||||
|
||||
@bot.event
|
||||
async def on_message(message):
|
||||
if message.author == bot.user:
|
||||
return
|
||||
|
||||
content = message.content.strip()
|
||||
prefix, content = split_string(content)
|
||||
if prefix is None:
|
||||
return
|
||||
|
||||
part_prefix = str(bot.user.id)
|
||||
if part_prefix == prefix:
|
||||
answer = await fetch_answer(content)
|
||||
await message.channel.send(answer)
|
||||
|
||||
# Process commands first
|
||||
await bot.process_commands(message)
|
||||
|
||||
# Check if the message is in a DM channel
|
||||
if isinstance(message.channel, discord.DMChannel):
|
||||
content = message.content.strip()
|
||||
else:
|
||||
# In guild channels, check if the message mentions the bot at the start
|
||||
content = message.content.strip()
|
||||
prefix, content = split_string(content)
|
||||
if prefix is None:
|
||||
return
|
||||
part_prefix = str(bot.user.id)
|
||||
if part_prefix != prefix:
|
||||
return # Bot not mentioned at the start, so do not process
|
||||
|
||||
bot.run(TOKEN)
|
||||
# Now process the message
|
||||
user_id = message.author.id
|
||||
if user_id not in conversation_histories:
|
||||
conversation_histories[user_id] = {
|
||||
"history": [],
|
||||
"conversation_id": None
|
||||
}
|
||||
|
||||
conversation = conversation_histories[user_id]
|
||||
conversation["history"].append({"prompt": content})
|
||||
|
||||
# Generate the answer
|
||||
response_doc = await generate_answer(
|
||||
content,
|
||||
conversation["history"],
|
||||
conversation["conversation_id"]
|
||||
)
|
||||
answer = response_doc["answer"]
|
||||
conversation_id = response_doc["conversation_id"]
|
||||
|
||||
answer_chunks = chunk_string(answer)
|
||||
for chunk in answer_chunks:
|
||||
await message.channel.send(chunk)
|
||||
|
||||
conversation["history"][-1]["response"] = answer
|
||||
conversation["conversation_id"] = conversation_id
|
||||
|
||||
# Keep conversation history to last 10 exchanges
|
||||
conversation["history"] = conversation["history"][-10:]
|
||||
|
||||
bot.run(TOKEN)
|
||||
@@ -27,15 +27,18 @@ To link the widget to your api and your documents you can pass parameters to the
|
||||
|
||||
const App = () => {
|
||||
return <DocsGPTWidget
|
||||
apiHost = 'http://localhost:7001',
|
||||
selectDocs = 'default',
|
||||
apiKey = '',
|
||||
avatar = 'https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png',
|
||||
title = 'Get AI assistance',
|
||||
description = 'DocsGPT\'s AI Chatbot is here to help',
|
||||
heroTitle = 'Welcome to DocsGPT !',
|
||||
heroDescription='This chatbot is built with DocsGPT and utilises GenAI, please review important information using sources.'
|
||||
/>;
|
||||
apiHost="https://your-docsgpt-api.com"
|
||||
apiKey=""
|
||||
avatar = "https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png"
|
||||
title = "Get AI assistance"
|
||||
description = "DocsGPT's AI Chatbot is here to help"
|
||||
heroTitle = "Welcome to DocsGPT !"
|
||||
heroDescription="This chatbot is built with DocsGPT and utilises GenAI,
|
||||
please review important information using sources."
|
||||
theme = "dark"
|
||||
buttonIcon = "https://your-icon"
|
||||
buttonBg = "#222327"
|
||||
/>;
|
||||
};
|
||||
```
|
||||
|
||||
@@ -80,15 +83,17 @@ To link the widget to your api and your documents you can pass parameters to the
|
||||
<script src="https://unpkg.com/docsgpt/dist/modern/main.js" type="module"></script>
|
||||
<script type="module">
|
||||
window.onload = function() {
|
||||
renderDocsGPTWidget('app', , {
|
||||
renderDocsGPTWidget('app', {
|
||||
apiHost: 'http://localhost:7001',
|
||||
selectDocs: 'default',
|
||||
apiKey: '',
|
||||
apiKey:"",
|
||||
avatar: 'https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png',
|
||||
title: 'Get AI assistance',
|
||||
description: "DocsGPT's AI Chatbot is here to help",
|
||||
heroTitle: 'Welcome to DocsGPT !',
|
||||
heroDescription: 'This chatbot is built with DocsGPT and utilises GenAI, please review important information using sources.'
|
||||
heroTitle: 'Welcome to DocsGPT!',
|
||||
heroDescription: 'This chatbot is built with DocsGPT and utilises GenAI, please review important information using sources.',
|
||||
theme:"dark",
|
||||
buttonIcon:"https://your-icon.svg",
|
||||
buttonBg:"#222327"
|
||||
});
|
||||
}
|
||||
</script>
|
||||
|
||||
913
extensions/react-widget/package-lock.json
generated
913
extensions/react-widget/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "docsgpt",
|
||||
"version": "0.3.9",
|
||||
"version": "0.4.7",
|
||||
"private": false,
|
||||
"description": "DocsGPT 🦖 is an innovative open-source tool designed to simplify the retrieval of information from project documentation using advanced GPT models 🤖.",
|
||||
"source": "./src/index.html",
|
||||
@@ -31,6 +31,7 @@
|
||||
},
|
||||
"scripts": {
|
||||
"build": "parcel build src/main.tsx --public-url ./",
|
||||
"build:react": "parcel build src/index.ts",
|
||||
"dev": "parcel src/index.html -p 3000",
|
||||
"test": "jest",
|
||||
"lint": "eslint",
|
||||
@@ -39,7 +40,6 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@babel/plugin-transform-flow-strip-types": "^7.23.3",
|
||||
"@bpmn-io/snarkdown": "^2.2.0",
|
||||
"@parcel/resolver-glob": "^2.12.0",
|
||||
"@parcel/transformer-svg-react": "^2.12.0",
|
||||
"@parcel/transformer-typescript-tsc": "^2.12.0",
|
||||
@@ -51,6 +51,7 @@
|
||||
"flow-bin": "^0.229.2",
|
||||
"i": "^0.3.7",
|
||||
"install": "^0.13.0",
|
||||
"markdown-it": "^14.1.0",
|
||||
"npm": "^10.5.0",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
@@ -63,6 +64,7 @@
|
||||
"@parcel/packager-ts": "^2.12.0",
|
||||
"@parcel/transformer-typescript-types": "^2.12.0",
|
||||
"@types/dompurify": "^3.0.5",
|
||||
"@types/markdown-it": "^14.1.2",
|
||||
"@types/react": "^18.3.3",
|
||||
"@types/react-dom": "^18.3.0",
|
||||
"babel-loader": "^8.0.4",
|
||||
|
||||
43
extensions/react-widget/publish.sh
Executable file
43
extensions/react-widget/publish.sh
Executable file
@@ -0,0 +1,43 @@
|
||||
#!/bin/bash
|
||||
## chmod +x publish.sh - to upgrade ownership
|
||||
set -e
|
||||
cat package.json >> package_copy.json
|
||||
cat package-lock.json >> package-lock_copy.json
|
||||
publish_package() {
|
||||
PACKAGE_NAME=$1
|
||||
BUILD_COMMAND=$2
|
||||
# Update package name in package.json
|
||||
jq --arg name "$PACKAGE_NAME" '.name=$name' package.json > temp.json && mv temp.json package.json
|
||||
|
||||
# Remove 'target' key if the package name is 'docsgpt-react'
|
||||
if [ "$PACKAGE_NAME" = "docsgpt-react" ]; then
|
||||
jq 'del(.targets)' package.json > temp.json && mv temp.json package.json
|
||||
fi
|
||||
|
||||
if [ -d "dist" ]; then
|
||||
echo "Deleting existing dist directory..."
|
||||
rm -rf dist
|
||||
fi
|
||||
|
||||
npm version patch
|
||||
|
||||
npm run "$BUILD_COMMAND"
|
||||
|
||||
# Publish to npm
|
||||
npm publish
|
||||
# Clean up
|
||||
mv package_copy.json package.json
|
||||
mv package-lock_copy.json package-lock.json
|
||||
echo "Published ${PACKAGE_NAME}"
|
||||
}
|
||||
|
||||
# Publish docsgpt package
|
||||
publish_package "docsgpt" "build"
|
||||
|
||||
# Publish docsgpt-react package
|
||||
publish_package "docsgpt-react" "build:react"
|
||||
|
||||
|
||||
rm -rf package_copy.json
|
||||
rm -rf package-lock_copy.json
|
||||
echo "---Process completed---"
|
||||
4
extensions/react-widget/src/assets/dislike.svg
Normal file
4
extensions/react-widget/src/assets/dislike.svg
Normal file
@@ -0,0 +1,4 @@
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="current" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M6.37776 10.1001V12.9C6.37776 13.457 6.599 13.9911 6.99282 14.3849C7.38664 14.7788 7.92077 15 8.47772 15L11.2777 8.70011V1.00025H3.38181C3.04419 0.996436 2.71656 1.11477 2.45929 1.33344C2.20203 1.55212 2.03246 1.8564 1.98184 2.19023L1.01585 8.49012C0.985398 8.69076 0.998931 8.89563 1.05551 9.09053C1.1121 9.28543 1.21038 9.46569 1.34355 9.61884C1.47671 9.77198 1.64159 9.89434 1.82674 9.97744C2.01189 10.0605 2.2129 10.1024 2.41583 10.1001H6.37776ZM11.2777 1.00025H13.1466C13.5428 0.993247 13.9277 1.13195 14.2284 1.39002C14.5291 1.64809 14.7245 2.00758 14.7776 2.40023V7.30014C14.7245 7.69279 14.5291 8.05227 14.2284 8.31035C13.9277 8.56842 13.5428 8.70712 13.1466 8.70011H11.2777" fill="none"/>
|
||||
<path d="M11.2777 8.70011L8.47772 15C7.92077 15 7.38664 14.7788 6.99282 14.3849C6.599 13.9911 6.37776 13.457 6.37776 12.9V10.1001H2.41583C2.2129 10.1024 2.01189 10.0605 1.82674 9.97744C1.64159 9.89434 1.47671 9.77198 1.34355 9.61884C1.21038 9.46569 1.1121 9.28543 1.05551 9.09053C0.998931 8.89563 0.985398 8.69076 1.01585 8.49012L1.98184 2.19023C2.03246 1.8564 2.20203 1.55212 2.45929 1.33344C2.71656 1.11477 3.04419 0.996436 3.38181 1.00025H11.2777M11.2777 8.70011V1.00025M11.2777 8.70011H13.1466C13.5428 8.70712 13.9277 8.56842 14.2284 8.31035C14.5291 8.05227 14.7245 7.69279 14.7776 7.30014V2.40023C14.7245 2.00758 14.5291 1.64809 14.2284 1.39002C13.9277 1.13195 13.5428 0.993247 13.1466 1.00025H11.2777" stroke="current" stroke-width="1.4" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.6 KiB |
4
extensions/react-widget/src/assets/like.svg
Normal file
4
extensions/react-widget/src/assets/like.svg
Normal file
@@ -0,0 +1,4 @@
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="current" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M9.39995 5.89997V3.09999C9.39995 2.54304 9.1787 2.0089 8.78487 1.61507C8.39105 1.22125 7.85691 1 7.29996 1L4.49998 7.29996V14.9999H12.3959C12.7336 15.0037 13.0612 14.8854 13.3185 14.6667C13.5757 14.448 13.7453 14.1437 13.7959 13.8099L14.7619 7.50996C14.7924 7.30931 14.7788 7.10444 14.7222 6.90954C14.6657 6.71464 14.5674 6.53437 14.4342 6.38123C14.301 6.22808 14.1362 6.10572 13.951 6.02262C13.7659 5.93952 13.5649 5.89767 13.3619 5.89997H9.39995ZM4.49998 14.9999H2.39999C2.02869 14.9999 1.6726 14.8524 1.41005 14.5899C1.1475 14.3273 1 13.9712 1 13.5999V8.69995C1 8.32865 1.1475 7.97256 1.41005 7.71001C1.6726 7.44746 2.02869 7.29996 2.39999 7.29996H4.49998" fill="none"/>
|
||||
<path d="M4.49998 7.29996L7.29996 1C7.85691 1 8.39105 1.22125 8.78487 1.61507C9.1787 2.0089 9.39995 2.54304 9.39995 3.09999V5.89997H13.3619C13.5649 5.89767 13.7659 5.93952 13.951 6.02262C14.1362 6.10572 14.301 6.22808 14.4342 6.38123C14.5674 6.53437 14.6657 6.71464 14.7223 6.90954C14.7788 7.10444 14.7924 7.30931 14.7619 7.50996L13.7959 13.8099C13.7453 14.1437 13.5757 14.448 13.3185 14.6667C13.0612 14.8854 12.7336 15.0037 12.3959 14.9999H4.49998M4.49998 7.29996V14.9999M4.49998 7.29996H2.39999C2.02869 7.29996 1.6726 7.44746 1.41005 7.71001C1.1475 7.97256 1 8.32865 1 8.69995V13.5999C1 13.9712 1.1475 14.3273 1.41005 14.5899C1.6726 14.8524 2.02869 14.9999 2.39999 14.9999H4.49998" stroke="current" stroke-width="1.39999" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.5 KiB |
@@ -1,7 +0,0 @@
|
||||
<svg width="36" height="36" viewBox="0 0 18 18" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M4.37891 9.44824H7.75821" stroke="white" stroke-width="1.68965" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M11.1377 9.44824H12.8273" stroke="white" stroke-width="1.68965" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M4.37891 6.06934H6.06856" stroke="white" stroke-width="1.68965" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M9.44824 6.06934H12.8276" stroke="white" stroke-width="1.68965" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M16.2069 11.1379C16.2069 11.5861 16.0289 12.0158 15.712 12.3327C15.3951 12.6496 14.9654 12.8276 14.5172 12.8276H4.37931L1 16.2069V2.68965C1 2.24153 1.17802 1.81176 1.49489 1.49489C1.81176 1.17802 2.24153 1 2.68965 1H14.5172C14.9654 1 15.3951 1.17802 15.712 1.49489C16.0289 1.81176 16.2069 2.24153 16.2069 2.68965V11.1379Z" stroke="white" stroke-width="1.68965" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 1009 B |
@@ -1,81 +1,219 @@
|
||||
"use client";
|
||||
import React from 'react'
|
||||
import React, { useRef } from 'react'
|
||||
import DOMPurify from 'dompurify';
|
||||
import snarkdown from '@bpmn-io/snarkdown';
|
||||
import styled, { keyframes, createGlobalStyle } from 'styled-components';
|
||||
import { PaperPlaneIcon, RocketIcon, ExclamationTriangleIcon, Cross2Icon } from '@radix-ui/react-icons';
|
||||
import MessageIcon from '../assets/message.svg';
|
||||
import { MESSAGE_TYPE, Query, Status } from '../types/index';
|
||||
import { fetchAnswerStreaming } from '../requests/streamingApi';
|
||||
import { FEEDBACK, MESSAGE_TYPE, Query, Status, WidgetProps } from '../types/index';
|
||||
import { fetchAnswerStreaming, sendFeedback } from '../requests/streamingApi';
|
||||
import { ThemeProvider } from 'styled-components';
|
||||
import Like from "../assets/like.svg"
|
||||
import Dislike from "../assets/dislike.svg"
|
||||
import MarkdownIt from 'markdown-it';
|
||||
|
||||
const GlobalStyles = createGlobalStyle`
|
||||
.response pre {
|
||||
padding: 8px;
|
||||
width: 90%;
|
||||
font-size: 12px;
|
||||
border-radius: 6px;
|
||||
overflow-x: auto;
|
||||
background-color: #1B1C1F;
|
||||
const themes = {
|
||||
dark: {
|
||||
bg: '#222327',
|
||||
text: '#fff',
|
||||
primary: {
|
||||
text: "#FAFAFA",
|
||||
bg: '#222327'
|
||||
},
|
||||
secondary: {
|
||||
text: "#A1A1AA",
|
||||
bg: "#38383b"
|
||||
}
|
||||
},
|
||||
light: {
|
||||
bg: '#fff',
|
||||
text: '#000',
|
||||
primary: {
|
||||
text: "#222327",
|
||||
bg: "#fff"
|
||||
},
|
||||
secondary: {
|
||||
text: "#A1A1AA",
|
||||
bg: "#F6F6F6"
|
||||
}
|
||||
}
|
||||
}
|
||||
.response h1{
|
||||
font-size: 20px;
|
||||
}
|
||||
.response h2{
|
||||
font-size: 18px;
|
||||
}
|
||||
.response h3{
|
||||
font-size: 16px;
|
||||
}
|
||||
.response code:not(pre code){
|
||||
border-radius: 6px;
|
||||
padding: 1px 3px 1px 3px;
|
||||
font-size: 12px;
|
||||
display: inline-block;
|
||||
background-color: #646464;
|
||||
}
|
||||
`;
|
||||
const WidgetContainer = styled.div`
|
||||
display: block;
|
||||
|
||||
const sizesConfig = {
|
||||
small: { size: 'small', width: '320px', height: '400px' },
|
||||
medium: { size: 'medium', width: '400px', height: '80vh' },
|
||||
large: { size: 'large', width: '666px', height: '75vh' },
|
||||
getCustom: (custom: { width: string; height: string; maxWidth?: string; maxHeight?: string }) => ({
|
||||
size: 'custom',
|
||||
width: custom.width,
|
||||
height: custom.height,
|
||||
maxWidth: custom.maxWidth || '968px',
|
||||
maxHeight: custom.maxHeight || '70vh',
|
||||
}),
|
||||
};
|
||||
|
||||
const Overlay = styled.div`
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background-color: rgba(0, 0, 0, 0.5);
|
||||
z-index: 999;
|
||||
transition: opacity 0.5s;
|
||||
`
|
||||
const WidgetContainer = styled.div<{ modal?: boolean, isOpen?: boolean }>`
|
||||
all: initial;
|
||||
position: fixed;
|
||||
right: 10px;
|
||||
bottom: 10px;
|
||||
right: ${props => props.modal ? '50%' : '10px'};
|
||||
bottom: ${props => props.modal ? '50%' : '10px'};
|
||||
z-index: 1000;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
display: none;
|
||||
transform-origin:100% 100%;
|
||||
&.open {
|
||||
animation: createBox 250ms cubic-bezier(0.25, 0.1, 0.25, 1) forwards;
|
||||
}
|
||||
&.close {
|
||||
animation: closeBox 250ms cubic-bezier(0.25, 0.1, 0.25, 1) forwards;
|
||||
}
|
||||
${props => props.modal &&
|
||||
"transform : translate(50%,50%);"
|
||||
}
|
||||
align-items: center;
|
||||
text-align: left;
|
||||
@keyframes createBox {
|
||||
0% {
|
||||
transform: scale(0.6);
|
||||
}
|
||||
90% {
|
||||
transform: scale(1.02);
|
||||
}
|
||||
100% {
|
||||
transform: scale(1);
|
||||
}
|
||||
}
|
||||
|
||||
@keyframes closeBox {
|
||||
0% {
|
||||
transform: scale(1);
|
||||
}
|
||||
10% {
|
||||
transform: scale(1.02);
|
||||
}
|
||||
100% {
|
||||
transform: scale(0.6);
|
||||
}
|
||||
}
|
||||
`;
|
||||
const StyledContainer = styled.div`
|
||||
display: block;
|
||||
const StyledContainer = styled.div<{ isOpen: boolean }>`
|
||||
all: initial;
|
||||
max-height: ${(props) => props.theme.dimensions.maxHeight};
|
||||
max-width: ${(props) => props.theme.dimensions.maxWidth};
|
||||
position: relative;
|
||||
flex-direction: column;
|
||||
justify-content: space-between;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
width: 352px;
|
||||
height: 407px;
|
||||
max-height: 407px;
|
||||
border-radius: 0.75rem;
|
||||
background-color: #222327;
|
||||
background-color: ${(props) => props.theme.primary.bg};
|
||||
font-family: sans-serif;
|
||||
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05), 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
transition: visibility 0.3s, opacity 0.3s;
|
||||
`;
|
||||
const FloatingButton = styled.div`
|
||||
position: fixed;
|
||||
display: flex;
|
||||
border-radius: 12px;
|
||||
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05), 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
padding: 26px 26px 0px 26px;
|
||||
animation: ${({ isOpen, theme }) =>
|
||||
theme.dimensions.size === 'large'
|
||||
? isOpen
|
||||
? 'fadeIn 150ms ease-in forwards'
|
||||
: 'fadeOut 150ms ease-in forwards'
|
||||
: isOpen
|
||||
? 'openContainer 150ms ease-in forwards'
|
||||
: 'closeContainer 250ms ease-in forwards'};
|
||||
@keyframes openContainer {
|
||||
0% {
|
||||
width: 200px;
|
||||
height: 100px;
|
||||
}
|
||||
100% {
|
||||
width: ${(props) => props.theme.dimensions.width};
|
||||
height: ${(props) => props.theme.dimensions.height};
|
||||
border-radius: 12px;
|
||||
}
|
||||
}
|
||||
@keyframes closeContainer {
|
||||
0% {
|
||||
width: ${(props) => props.theme.dimensions.width};
|
||||
height: ${(props) => props.theme.dimensions.height};
|
||||
border-radius: 12px;
|
||||
}
|
||||
100% {
|
||||
width: 200px;
|
||||
height: 100px;
|
||||
}
|
||||
}
|
||||
@keyframes fadeIn {
|
||||
from {
|
||||
opacity: 0;
|
||||
width: ${(props) => props.theme.dimensions.width};
|
||||
height: ${(props) => props.theme.dimensions.height};
|
||||
transform: scale(0.9);
|
||||
}
|
||||
to {
|
||||
opacity: 1;
|
||||
transform: scale(1);
|
||||
width: ${(props) => props.theme.dimensions.width};
|
||||
height: ${(props) => props.theme.dimensions.height};
|
||||
}
|
||||
}
|
||||
@keyframes fadeOut {
|
||||
from {
|
||||
opacity: 1;
|
||||
width: ${(props) => props.theme.dimensions.width};
|
||||
height: ${(props) => props.theme.dimensions.height};
|
||||
}
|
||||
to {
|
||||
opacity: 0;
|
||||
transform: scale(0.9);
|
||||
width: ${(props) => props.theme.dimensions.width};
|
||||
height: ${(props) => props.theme.dimensions.height};
|
||||
}
|
||||
}
|
||||
@media only screen and (max-width: 768px) {
|
||||
max-height: 100vh;
|
||||
max-width: 80vw;
|
||||
overflow: auto;
|
||||
}
|
||||
`;
|
||||
const FloatingButton = styled.div<{ bgcolor: string, hidden: boolean, isAnimatingButton: boolean }>`
|
||||
position: fixed;
|
||||
display: ${props => props.hidden ? "none" : "flex"};
|
||||
z-index: 500;
|
||||
justify-content: center;
|
||||
gap: 8px;
|
||||
padding: 14px;
|
||||
align-items: center;
|
||||
bottom: 1rem;
|
||||
right: 1rem;
|
||||
width: 5rem;
|
||||
height: 5rem;
|
||||
bottom: 16px;
|
||||
color: white;
|
||||
font-family: sans-serif;
|
||||
right: 16px;
|
||||
font-weight: 500;
|
||||
border-radius: 9999px;
|
||||
background-image: linear-gradient(to bottom right, #5AF0EC, #E80D9D);
|
||||
background: ${props => props.bgcolor};
|
||||
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
||||
cursor: pointer;
|
||||
animation: ${props => props.isAnimatingButton ? 'scaleAnimation 200ms forwards' : 'none'};
|
||||
&:hover {
|
||||
transform: scale(1.1);
|
||||
transition: transform 0.2s ease-in-out;
|
||||
transform: scale(1.1);
|
||||
transition: transform 0.2s ease-in-out;
|
||||
}
|
||||
&:not(:hover) {
|
||||
transition: transform 0.2s ease-in-out;
|
||||
}
|
||||
|
||||
@keyframes scaleAnimation {
|
||||
from {
|
||||
transform: scale(1.2);
|
||||
}
|
||||
to {
|
||||
transform: scale(1);
|
||||
}
|
||||
}
|
||||
`;
|
||||
const CancelButton = styled.button`
|
||||
@@ -83,7 +221,7 @@ const CancelButton = styled.button`
|
||||
position: absolute;
|
||||
top: 0;
|
||||
right: 0;
|
||||
margin: 0.5rem;
|
||||
margin: 8px;
|
||||
width: 30px;
|
||||
padding: 0;
|
||||
background-color: transparent;
|
||||
@@ -102,74 +240,129 @@ const CancelButton = styled.button`
|
||||
|
||||
const Header = styled.div`
|
||||
display: flex;
|
||||
align-items: center;
|
||||
padding-inline: 0.75rem;
|
||||
padding-top: 1rem;
|
||||
padding-bottom: 0.5rem;
|
||||
`;
|
||||
|
||||
const IconWrapper = styled.div`
|
||||
padding: 0.5rem;
|
||||
align-items: flex-start;
|
||||
`;
|
||||
|
||||
const ContentWrapper = styled.div`
|
||||
flex: 1;
|
||||
margin-left: 0.5rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap:2px;
|
||||
margin-left: 8px;
|
||||
`;
|
||||
|
||||
const Title = styled.h3`
|
||||
font-size: 1rem;
|
||||
font-size: 14px;
|
||||
font-weight: normal;
|
||||
color: #FAFAFA;
|
||||
margin-top: 0;
|
||||
margin-bottom: 0.25rem;
|
||||
color: ${props => props.theme.primary.text};
|
||||
margin: 0;
|
||||
`;
|
||||
|
||||
const Description = styled.p`
|
||||
font-size: 0.85rem;
|
||||
color: #A1A1AA;
|
||||
margin-top: 0;
|
||||
`;
|
||||
const Conversation = styled.div`
|
||||
height: 16rem;
|
||||
padding-inline: 0.5rem;
|
||||
border-radius: 0.375rem;
|
||||
text-align: left;
|
||||
overflow-y: auto;
|
||||
scrollbar-width: thin;
|
||||
scrollbar-color: #4a4a4a transparent; /* thumb color track color */
|
||||
font-size: 13.75px;
|
||||
color: ${props => props.theme.secondary.text};
|
||||
margin: 0 ;
|
||||
padding: 0 ;
|
||||
`;
|
||||
|
||||
const MessageBubble = styled.div<{ type: MESSAGE_TYPE }>`
|
||||
display: flex;
|
||||
font-size: 16px;
|
||||
justify-content: ${props => props.type === 'QUESTION' ? 'flex-end' : 'flex-start'};
|
||||
margin: 0.5rem;
|
||||
const Conversation = styled.div`
|
||||
height: 70%;
|
||||
border-radius: 6px;
|
||||
text-align: left;
|
||||
overflow-y: auto;
|
||||
scrollbar-width: thin;
|
||||
scrollbar-color: #4a4a4a transparent; /* thumb color track color */
|
||||
`;
|
||||
const Message = styled.p<{ type: MESSAGE_TYPE }>`
|
||||
const Feedback = styled.div`
|
||||
background-color: transparent;
|
||||
font-weight: normal;
|
||||
gap: 12px;
|
||||
display: flex;
|
||||
padding: 6px;
|
||||
clear: both;
|
||||
`;
|
||||
const MessageBubble = styled.div<{ type: MESSAGE_TYPE }>`
|
||||
display: block;
|
||||
font-size: 16px;
|
||||
position: relative;
|
||||
width: 100%;;
|
||||
float: right;
|
||||
margin: 0px;
|
||||
&:hover ${Feedback} * {
|
||||
visibility: visible ;
|
||||
}
|
||||
`;
|
||||
const Message = styled.div<{ type: MESSAGE_TYPE }>`
|
||||
background: ${props => props.type === 'QUESTION' ?
|
||||
'linear-gradient(to bottom right, #8860DB, #6D42C5)' :
|
||||
'#38383b'};
|
||||
color: #ffff;
|
||||
props.theme.secondary.bg};
|
||||
color: ${props => props.type === 'ANSWER' ? props.theme.primary.text : '#fff'};
|
||||
border: none;
|
||||
max-width: 80%;
|
||||
float: ${props => props.type === 'QUESTION' ? 'right' : 'left'};
|
||||
max-width: ${props => props.type === 'ANSWER' ? '100%' : '80'};
|
||||
overflow: auto;
|
||||
margin: 4px;
|
||||
display: block;
|
||||
line-height: 1.5;
|
||||
padding: 0.75rem;
|
||||
border-radius: 0.375rem;
|
||||
padding: 12px;
|
||||
border-radius: 6px;
|
||||
`;
|
||||
const Markdown = styled.div`
|
||||
pre {
|
||||
padding: 8px;
|
||||
width: 90%;
|
||||
font-size: 12px;
|
||||
border-radius: 6px;
|
||||
overflow-x: auto;
|
||||
background-color: #1B1C1F;
|
||||
color: #fff ;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 16px;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
h3 {
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
p {
|
||||
margin: 0px;
|
||||
}
|
||||
|
||||
code:not(pre code) {
|
||||
border-radius: 6px;
|
||||
padding: 1px 3px;
|
||||
font-size: 12px;
|
||||
display: inline-block;
|
||||
background-color: #646464;
|
||||
color: #fff ;
|
||||
}
|
||||
|
||||
code {
|
||||
white-space: pre-wrap ;
|
||||
overflow-wrap: break-word;
|
||||
word-break: break-all;
|
||||
}
|
||||
|
||||
ul{
|
||||
padding:0px;
|
||||
list-style-position: inside;
|
||||
}
|
||||
`
|
||||
const ErrorAlert = styled.div`
|
||||
color: #b91c1c;
|
||||
border:0.1px solid #b91c1c;
|
||||
display: flex;
|
||||
padding:4px;
|
||||
margin:0.7rem;
|
||||
margin:11.2px;
|
||||
opacity: 90%;
|
||||
max-width: 70%;
|
||||
font-weight: 400;
|
||||
border-radius: 0.375rem;
|
||||
border-radius: 6px;
|
||||
justify-content: space-evenly;
|
||||
`
|
||||
//dot loading animation
|
||||
@@ -192,23 +385,18 @@ const Delay = styled(DotAnimation) <{ delay: number }>`
|
||||
`;
|
||||
const PromptContainer = styled.form`
|
||||
background-color: transparent;
|
||||
height: 36px;
|
||||
position: absolute;
|
||||
bottom: 25px;
|
||||
left: 24px;
|
||||
right: 24px;
|
||||
height: ${props => props.theme.dimensions.size == 'large' ? '60px' : '40px'};
|
||||
display: flex;
|
||||
justify-content: space-evenly;
|
||||
`;
|
||||
const StyledInput = styled.input`
|
||||
width: 260px;
|
||||
height: 36px;
|
||||
width: 100%;
|
||||
border: 1px solid #686877;
|
||||
padding-left: 12px;
|
||||
background-color: transparent;
|
||||
font-size: 16px;
|
||||
border-radius: 6px;
|
||||
color: #ffff;
|
||||
color: ${props => props.theme.text};
|
||||
outline: none;
|
||||
`;
|
||||
const StyledButton = styled.button`
|
||||
@@ -216,11 +404,13 @@ const StyledButton = styled.button`
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
background-image: linear-gradient(to bottom right, #5AF0EC, #E80D9D);
|
||||
background-color: rgba(0, 0, 0, 0.3);
|
||||
border-radius: 6px;
|
||||
width: 36px;
|
||||
height: 36px;
|
||||
min-width: ${props => props.theme.dimensions.size === 'large' ? '60px' : '40px'};
|
||||
height: ${props => props.theme.dimensions.size === 'large' ? '60px' : '40px'};
|
||||
margin-left:8px;
|
||||
padding: 0px;
|
||||
|
||||
border: none;
|
||||
cursor: pointer;
|
||||
outline: none;
|
||||
@@ -228,78 +418,95 @@ const StyledButton = styled.button`
|
||||
opacity: 90%;
|
||||
}
|
||||
&:disabled {
|
||||
opacity: 60%;
|
||||
background-image: linear-gradient(to bottom right, #2d938f, #b31877);
|
||||
}`;
|
||||
const HeroContainer = styled.div`
|
||||
position: absolute;
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: middle;
|
||||
transform: translate(-50%, -50%);
|
||||
width: 80%;
|
||||
position: relative;
|
||||
width: 90%;
|
||||
max-width: 500px;
|
||||
background-image: linear-gradient(to bottom right, #5AF0EC, #ff1bf4);
|
||||
border-radius: 10px;
|
||||
margin: 0 auto;
|
||||
margin: 16px auto;
|
||||
padding: 2px;
|
||||
`;
|
||||
const HeroWrapper = styled.div`
|
||||
background-color: #222327;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: flex-start;
|
||||
gap: 8px;
|
||||
align-items: middle;
|
||||
background-color: ${props => props.theme.primary.bg};
|
||||
border-radius: 10px;
|
||||
font-weight: normal;
|
||||
padding: 6px;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
padding: 12px;
|
||||
`
|
||||
const HeroTitle = styled.h3`
|
||||
color: #fff;
|
||||
font-size: 17px;
|
||||
margin-bottom: 5px;
|
||||
padding: 2px;
|
||||
color: ${props => props.theme.text};
|
||||
font-size: 16px;
|
||||
margin:0px ;
|
||||
padding: 0px;
|
||||
`;
|
||||
const HeroDescription = styled.p`
|
||||
color: #fff;
|
||||
font-size: 14px;
|
||||
color: ${props => props.theme.text};
|
||||
font-size: 12px;
|
||||
line-height: 1.5;
|
||||
margin: 0px;
|
||||
padding: 0px;
|
||||
`;
|
||||
const Hero = ({ title, description }: { title: string, description: string }) => {
|
||||
const Hyperlink = styled.a`
|
||||
color: #9971EC;
|
||||
text-decoration: none;
|
||||
`;
|
||||
const Tagline = styled.div`
|
||||
text-align: center;
|
||||
display: block;
|
||||
color: ${props => props.theme.secondary.text};
|
||||
padding: 12px ;
|
||||
font-size: 12px;
|
||||
`;
|
||||
|
||||
|
||||
|
||||
const Hero = ({ title, description, theme }: { title: string, description: string, theme: string }) => {
|
||||
return (
|
||||
<>
|
||||
<HeroContainer>
|
||||
<HeroWrapper>
|
||||
<IconWrapper style={{ marginTop: '8px' }}>
|
||||
<RocketIcon color='white' width={20} height={20} />
|
||||
</IconWrapper>
|
||||
<div>
|
||||
<HeroTitle>{title}</HeroTitle>
|
||||
<HeroDescription>
|
||||
{description}
|
||||
</HeroDescription>
|
||||
</div>
|
||||
</HeroWrapper>
|
||||
</HeroContainer>
|
||||
</>
|
||||
<HeroContainer>
|
||||
<HeroWrapper>
|
||||
<RocketIcon color={theme === 'light' ? 'black' : 'white'} width={24} height={24} />
|
||||
<HeroTitle>{title}</HeroTitle>
|
||||
<HeroDescription>{description}</HeroDescription>
|
||||
</HeroWrapper>
|
||||
</HeroContainer>
|
||||
);
|
||||
};
|
||||
export const DocsGPTWidget = ({
|
||||
apiHost = 'https://gptcloud.arc53.com',
|
||||
selectDocs = 'default',
|
||||
apiKey = '82962c9a-aa77-4152-94e5-a4f84fd44c6a',
|
||||
avatar = 'https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png',
|
||||
title = 'Get AI assistance',
|
||||
description = 'DocsGPT\'s AI Chatbot is here to help',
|
||||
heroTitle = 'Welcome to DocsGPT !',
|
||||
heroDescription = 'This chatbot is built with DocsGPT and utilises GenAI, please review important information using sources.'
|
||||
}) => {
|
||||
|
||||
heroDescription = 'This chatbot is built with DocsGPT and utilises GenAI, please review important information using sources.',
|
||||
size = 'small',
|
||||
theme = 'dark',
|
||||
buttonIcon = 'https://d3dg1063dc54p9.cloudfront.net/widget/chat.svg',
|
||||
buttonText = 'Ask a question',
|
||||
buttonBg = 'linear-gradient(to bottom right, #5AF0EC, #E80D9D)',
|
||||
collectFeedback = true,
|
||||
deafultOpen = false
|
||||
}: WidgetProps) => {
|
||||
const [prompt, setPrompt] = React.useState('');
|
||||
const [status, setStatus] = React.useState<Status>('idle');
|
||||
const [queries, setQueries] = React.useState<Query[]>([])
|
||||
const [conversationId, setConversationId] = React.useState<string | null>(null)
|
||||
const [open, setOpen] = React.useState<boolean>(false)
|
||||
const [open, setOpen] = React.useState<boolean>(deafultOpen)
|
||||
const [eventInterrupt, setEventInterrupt] = React.useState<boolean>(false); //click or scroll by user while autoScrolling
|
||||
const [isAnimatingButton, setIsAnimatingButton] = React.useState(false);
|
||||
const [isFloatingButtonVisible, setIsFloatingButtonVisible] = React.useState(true);
|
||||
const isBubbleHovered = useRef<boolean>(false)
|
||||
const widgetRef = useRef<HTMLDivElement>(null)
|
||||
const endMessageRef = React.useRef<HTMLDivElement | null>(null);
|
||||
const md = new MarkdownIt();
|
||||
|
||||
const handleUserInterrupt = () => {
|
||||
(status === 'loading') && setEventInterrupt(true);
|
||||
}
|
||||
@@ -316,11 +523,40 @@ export const DocsGPTWidget = ({
|
||||
const lastChild = element?.children?.[element.children.length - 1]
|
||||
lastChild && scrollToBottom(lastChild)
|
||||
};
|
||||
|
||||
React.useEffect(() => {
|
||||
!eventInterrupt && scrollToBottom(endMessageRef.current);
|
||||
}, [queries.length, queries[queries.length - 1]?.response]);
|
||||
|
||||
async function handleFeedback(feedback: FEEDBACK, index: number) {
|
||||
let query = queries[index]
|
||||
if (!query.response)
|
||||
return;
|
||||
if (query.feedback != feedback) {
|
||||
sendFeedback({
|
||||
question: query.prompt,
|
||||
answer: query.response,
|
||||
feedback: feedback,
|
||||
apikey: apiKey
|
||||
}, apiHost)
|
||||
.then(res => {
|
||||
if (res.status == 200) {
|
||||
query.feedback = feedback;
|
||||
setQueries((prev: Query[]) => {
|
||||
return prev.map((q, i) => (i === index ? query : q));
|
||||
});
|
||||
}
|
||||
})
|
||||
.catch(err => console.log("Connection failed", err))
|
||||
}
|
||||
else {
|
||||
delete query.feedback;
|
||||
setQueries((prev: Query[]) => {
|
||||
return prev.map((q, i) => (i === index ? query : q));
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
async function stream(question: string) {
|
||||
setStatus('loading')
|
||||
try {
|
||||
@@ -329,20 +565,28 @@ export const DocsGPTWidget = ({
|
||||
question: question,
|
||||
apiKey: apiKey,
|
||||
apiHost: apiHost,
|
||||
selectedDocs: selectDocs,
|
||||
history: queries,
|
||||
conversationId: conversationId,
|
||||
onEvent: (event: MessageEvent) => {
|
||||
const data = JSON.parse(event.data);
|
||||
// check if the 'end' event has been received
|
||||
if (data.type === 'end') {
|
||||
// set status to 'idle'
|
||||
setStatus('idle');
|
||||
|
||||
} else if (data.type === 'id') {
|
||||
}
|
||||
else if (data.type === 'id') {
|
||||
setConversationId(data.id)
|
||||
} else {
|
||||
const result = data.answer;
|
||||
}
|
||||
else if (data.type === 'error') {
|
||||
const updatedQueries = [...queries];
|
||||
updatedQueries[updatedQueries.length - 1].error = data.error;
|
||||
setQueries(updatedQueries);
|
||||
setStatus('idle')
|
||||
}
|
||||
else if (data.type === 'source') {
|
||||
// handle the case where data type === 'source'
|
||||
}
|
||||
else {
|
||||
const result = data.answer ? data.answer : ''; //Fallback to an empty string if data.answer is undefined
|
||||
const streamingResponse = queries[queries.length - 1].response ? queries[queries.length - 1].response : '';
|
||||
const updatedQueries = [...queries];
|
||||
updatedQueries[updatedQueries.length - 1].response = streamingResponse + result;
|
||||
@@ -353,7 +597,7 @@ export const DocsGPTWidget = ({
|
||||
);
|
||||
} catch (error) {
|
||||
const updatedQueries = [...queries];
|
||||
updatedQueries[updatedQueries.length - 1].error = 'error'
|
||||
updatedQueries[updatedQueries.length - 1].error = 'Something went wrong !'
|
||||
setQueries(updatedQueries);
|
||||
setStatus('idle')
|
||||
//setEventInterrupt(false)
|
||||
@@ -371,22 +615,43 @@ export const DocsGPTWidget = ({
|
||||
const handleImageError = (event: React.SyntheticEvent<HTMLImageElement, Event>) => {
|
||||
event.currentTarget.src = "https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png";
|
||||
};
|
||||
const handleClose = () => {
|
||||
setOpen(false);
|
||||
setTimeout(() => {
|
||||
if (widgetRef.current) widgetRef.current.style.display = "none";
|
||||
setIsFloatingButtonVisible(true);
|
||||
setIsAnimatingButton(true);
|
||||
setTimeout(() => setIsAnimatingButton(false), 200);
|
||||
}, 250)
|
||||
};
|
||||
const handleOpen = () => {
|
||||
setOpen(true);
|
||||
setIsFloatingButtonVisible(false);
|
||||
if (widgetRef.current)
|
||||
widgetRef.current.style.display = 'block'
|
||||
}
|
||||
const dimensions =
|
||||
typeof size === 'object' && 'custom' in size
|
||||
? sizesConfig.getCustom(size.custom)
|
||||
: sizesConfig[size];
|
||||
|
||||
return (
|
||||
<>
|
||||
<WidgetContainer>
|
||||
<GlobalStyles />
|
||||
{!open && <FloatingButton onClick={() => setOpen(true)} hidden={open}>
|
||||
<MessageIcon style={{ marginTop: '8px' }} />
|
||||
</FloatingButton>}
|
||||
{open && <StyledContainer>
|
||||
<ThemeProvider theme={{ ...themes[theme], dimensions }}>
|
||||
{open && size === 'large' &&
|
||||
<Overlay onClick={handleClose} />
|
||||
}
|
||||
<FloatingButton bgcolor={buttonBg} onClick={handleOpen} hidden={!isFloatingButtonVisible} isAnimatingButton={isAnimatingButton}>
|
||||
<img width={24} src={buttonIcon} />
|
||||
<span>{buttonText}</span>
|
||||
</FloatingButton>
|
||||
<WidgetContainer ref={widgetRef} className={`${size != "large" && (open ? "open" : "close")}`} modal={size == 'large'}>
|
||||
{<StyledContainer isOpen={open}>
|
||||
<div>
|
||||
<CancelButton onClick={() => setOpen(false)}>
|
||||
<Cross2Icon width={24} height={24} color='white' />
|
||||
<CancelButton onClick={handleClose}>
|
||||
<Cross2Icon width={24} height={24} color={theme === 'light' ? 'black' : 'white'} />
|
||||
</CancelButton>
|
||||
<Header>
|
||||
<IconWrapper>
|
||||
<img style={{ maxWidth: "42px", maxHeight: "42px" }} onError={handleImageError} src={avatar} alt='docs-gpt' />
|
||||
</IconWrapper>
|
||||
<img style={{ transform: 'translateY(-5px)', maxWidth: "42px", maxHeight: "42px" }} onError={handleImageError} src={avatar} alt='docs-gpt' />
|
||||
<ContentWrapper>
|
||||
<Title>{title}</Title>
|
||||
<Description>{description}</Description>
|
||||
@@ -408,23 +673,42 @@ export const DocsGPTWidget = ({
|
||||
</MessageBubble>
|
||||
}
|
||||
{
|
||||
query.response ? <MessageBubble type='ANSWER'>
|
||||
query.response ? <MessageBubble onMouseOver={() => { isBubbleHovered.current = true }} type='ANSWER'>
|
||||
<Message
|
||||
type='ANSWER'
|
||||
ref={(index === queries.length - 1) ? endMessageRef : null}
|
||||
>
|
||||
<div className="response" dangerouslySetInnerHTML={{ __html: DOMPurify.sanitize(snarkdown(query.response)) }} />
|
||||
<Markdown
|
||||
dangerouslySetInnerHTML={{ __html: DOMPurify.sanitize(md.render(query.response)) }}
|
||||
/>
|
||||
</Message>
|
||||
|
||||
{collectFeedback &&
|
||||
<Feedback>
|
||||
<Like
|
||||
style={{
|
||||
stroke: query.feedback == 'LIKE' ? '#8860DB' : '#c0c0c0',
|
||||
visibility: query.feedback == 'LIKE' ? 'visible' : 'hidden'
|
||||
}}
|
||||
fill='none'
|
||||
onClick={() => handleFeedback("LIKE", index)} />
|
||||
<Dislike
|
||||
style={{
|
||||
stroke: query.feedback == 'DISLIKE' ? '#ed8085' : '#c0c0c0',
|
||||
visibility: query.feedback == 'DISLIKE' ? 'visible' : 'hidden'
|
||||
}}
|
||||
fill='none'
|
||||
onClick={() => handleFeedback("DISLIKE", index)} />
|
||||
</Feedback>}
|
||||
</MessageBubble>
|
||||
: <div>
|
||||
{
|
||||
query.error ? <ErrorAlert>
|
||||
<IconWrapper>
|
||||
<ExclamationTriangleIcon style={{ marginTop: '4px' }} width={22} height={22} color='#b91c1c' />
|
||||
</IconWrapper>
|
||||
|
||||
<ExclamationTriangleIcon width={22} height={22} color='#b91c1c' />
|
||||
<div>
|
||||
<h5 style={{ margin: 2 }}>Network Error</h5>
|
||||
<span style={{ margin: 2, fontSize: '13px' }}>Something went wrong !</span>
|
||||
<span style={{ margin: 2, fontSize: '13px' }}>{query.error}</span>
|
||||
</div>
|
||||
</ErrorAlert>
|
||||
: <MessageBubble type='ANSWER'>
|
||||
@@ -439,22 +723,27 @@ export const DocsGPTWidget = ({
|
||||
}
|
||||
</React.Fragment>)
|
||||
})
|
||||
: <Hero title={heroTitle} description={heroDescription} />
|
||||
: <Hero title={heroTitle} description={heroDescription} theme={theme} />
|
||||
}
|
||||
</Conversation>
|
||||
|
||||
<PromptContainer
|
||||
onSubmit={handleSubmit}>
|
||||
<StyledInput
|
||||
value={prompt} onChange={(event) => setPrompt(event.target.value)}
|
||||
type='text' placeholder="What do you want to do?" />
|
||||
<StyledButton
|
||||
disabled={prompt.length == 0 || status !== 'idle'}>
|
||||
<PaperPlaneIcon width={15} height={15} color='white' />
|
||||
</StyledButton>
|
||||
</PromptContainer>
|
||||
<div>
|
||||
<PromptContainer
|
||||
onSubmit={handleSubmit}>
|
||||
<StyledInput
|
||||
value={prompt} onChange={(event) => setPrompt(event.target.value)}
|
||||
type='text' placeholder="Ask your question" />
|
||||
<StyledButton
|
||||
disabled={prompt.trim().length == 0 || status !== 'idle'}>
|
||||
<PaperPlaneIcon width={18} height={18} color='white' />
|
||||
</StyledButton>
|
||||
</PromptContainer>
|
||||
<Tagline>
|
||||
Powered by
|
||||
<Hyperlink target='_blank' href='https://www.docsgpt.cloud/'>DocsGPT</Hyperlink>
|
||||
</Tagline>
|
||||
</div>
|
||||
</StyledContainer>}
|
||||
</WidgetContainer>
|
||||
</>
|
||||
</ThemeProvider>
|
||||
)
|
||||
}
|
||||
@@ -2,10 +2,11 @@ import React from 'react';
|
||||
import { createRoot } from 'react-dom/client';
|
||||
import { DocsGPTWidget } from './components/DocsGPTWidget';
|
||||
|
||||
const renderWidget = (elementId: string, props = {}) => {
|
||||
const root = createRoot(document.getElementById(elementId) as HTMLElement);
|
||||
root.render(<DocsGPTWidget {...props} />);
|
||||
};
|
||||
|
||||
(window as any).renderDocsGPTWidget = renderWidget;
|
||||
if (typeof window !== 'undefined') {
|
||||
const renderWidget = (elementId: string, props = {}) => {
|
||||
const root = createRoot(document.getElementById(elementId) as HTMLElement);
|
||||
root.render(<DocsGPTWidget {...props} />);
|
||||
};
|
||||
(window as any).renderDocsGPTWidget = renderWidget;
|
||||
}
|
||||
export { DocsGPTWidget };
|
||||
@@ -1,92 +1,106 @@
|
||||
import { FEEDBACK } from "@/types";
|
||||
interface HistoryItem {
|
||||
prompt: string;
|
||||
response?: string;
|
||||
}
|
||||
prompt: string;
|
||||
response?: string;
|
||||
}
|
||||
interface FetchAnswerStreamingProps {
|
||||
question?: string;
|
||||
apiKey?: string;
|
||||
selectedDocs?: string;
|
||||
history?: HistoryItem[];
|
||||
conversationId?: string | null;
|
||||
apiHost?: string;
|
||||
onEvent?: (event: MessageEvent) => void;
|
||||
}
|
||||
question?: string;
|
||||
apiKey?: string;
|
||||
selectedDocs?: string;
|
||||
history?: HistoryItem[];
|
||||
conversationId?: string | null;
|
||||
apiHost?: string;
|
||||
onEvent?: (event: MessageEvent) => void;
|
||||
}
|
||||
interface FeedbackPayload {
|
||||
question: string;
|
||||
answer: string;
|
||||
apikey: string;
|
||||
feedback: FEEDBACK;
|
||||
}
|
||||
export function fetchAnswerStreaming({
|
||||
question = '',
|
||||
apiKey = '',
|
||||
selectedDocs = '',
|
||||
history = [],
|
||||
conversationId = null,
|
||||
apiHost = '',
|
||||
onEvent = () => {console.log("Event triggered, but no handler provided.");}
|
||||
}: FetchAnswerStreamingProps): Promise<void> {
|
||||
let docPath = 'default';
|
||||
if (selectedDocs) {
|
||||
docPath = selectedDocs;
|
||||
}
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const body = {
|
||||
question: question,
|
||||
api_key: apiKey,
|
||||
embeddings_key: apiKey,
|
||||
active_docs: docPath,
|
||||
history: JSON.stringify(history),
|
||||
conversation_id: conversationId,
|
||||
model: 'default'
|
||||
};
|
||||
|
||||
fetch(apiHost + '/stream', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
})
|
||||
.then((response) => {
|
||||
if (!response.body) throw Error('No response body');
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder('utf-8');
|
||||
let counterrr = 0;
|
||||
const processStream = ({
|
||||
done,
|
||||
value,
|
||||
}: ReadableStreamReadResult<Uint8Array>) => {
|
||||
if (done) {
|
||||
resolve();
|
||||
return;
|
||||
question = '',
|
||||
apiKey = '',
|
||||
history = [],
|
||||
conversationId = null,
|
||||
apiHost = '',
|
||||
onEvent = () => { console.log("Event triggered, but no handler provided."); }
|
||||
}: FetchAnswerStreamingProps): Promise<void> {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const body = {
|
||||
question: question,
|
||||
history: JSON.stringify(history),
|
||||
conversation_id: conversationId,
|
||||
model: 'default',
|
||||
api_key: apiKey
|
||||
};
|
||||
fetch(apiHost + '/stream', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
})
|
||||
.then((response) => {
|
||||
if (!response.body) throw Error('No response body');
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder('utf-8');
|
||||
let counterrr = 0;
|
||||
const processStream = ({
|
||||
done,
|
||||
value,
|
||||
}: ReadableStreamReadResult<Uint8Array>) => {
|
||||
if (done) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
counterrr += 1;
|
||||
|
||||
const chunk = decoder.decode(value);
|
||||
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (let line of lines) {
|
||||
if (line.trim() == '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
counterrr += 1;
|
||||
|
||||
const chunk = decoder.decode(value);
|
||||
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (let line of lines) {
|
||||
if (line.trim() == '') {
|
||||
continue;
|
||||
}
|
||||
if (line.startsWith('data:')) {
|
||||
line = line.substring(5);
|
||||
}
|
||||
|
||||
const messageEvent = new MessageEvent('message', {
|
||||
data: line,
|
||||
});
|
||||
|
||||
onEvent(messageEvent); // handle each message
|
||||
if (line.startsWith('data:')) {
|
||||
line = line.substring(5);
|
||||
}
|
||||
|
||||
reader.read().then(processStream).catch(reject);
|
||||
};
|
||||
|
||||
|
||||
const messageEvent = new MessageEvent('message', {
|
||||
data: line,
|
||||
});
|
||||
|
||||
onEvent(messageEvent); // handle each message
|
||||
}
|
||||
|
||||
reader.read().then(processStream).catch(reject);
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Connection failed:', error);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
reader.read().then(processStream).catch(reject);
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Connection failed:', error);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
export const sendFeedback = (payload: FeedbackPayload,apiHost:string): Promise<Response> => {
|
||||
return fetch(`${apiHost}/api/feedback`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
question: payload.question,
|
||||
answer: payload.answer,
|
||||
feedback: payload.feedback,
|
||||
api_key:payload.apikey
|
||||
}),
|
||||
});
|
||||
};
|
||||
@@ -1,7 +1,7 @@
|
||||
export type MESSAGE_TYPE = 'QUESTION' | 'ANSWER' | 'ERROR';
|
||||
export type Status = 'idle' | 'loading' | 'failed';
|
||||
export type FEEDBACK = 'LIKE' | 'DISLIKE';
|
||||
|
||||
export type THEME = 'light' | 'dark';
|
||||
export interface Query {
|
||||
prompt: string;
|
||||
response?: string;
|
||||
@@ -10,4 +10,27 @@ export interface Query {
|
||||
sources?: { title: string; text: string }[];
|
||||
conversationId?: string | null;
|
||||
title?: string | null;
|
||||
}
|
||||
export interface WidgetProps {
|
||||
apiHost?: string;
|
||||
apiKey?: string;
|
||||
avatar?: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
heroTitle?: string;
|
||||
heroDescription?: string;
|
||||
size?: 'small' | 'medium' | 'large' | {
|
||||
custom: {
|
||||
width: string;
|
||||
height: string;
|
||||
maxWidth?: string;
|
||||
maxHeight?: string;
|
||||
};
|
||||
};
|
||||
theme?:THEME,
|
||||
buttonIcon?:string;
|
||||
buttonText?:string;
|
||||
buttonBg?:string;
|
||||
collectFeedback?:boolean;
|
||||
deafultOpen?: boolean;
|
||||
}
|
||||
3
extensions/slack-bot/.gitignore
vendored
Normal file
3
extensions/slack-bot/.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
.env
|
||||
.venv/
|
||||
get-pip.py
|
||||
84
extensions/slack-bot/Readme.md
Normal file
84
extensions/slack-bot/Readme.md
Normal file
@@ -0,0 +1,84 @@
|
||||
|
||||
# Slack Bot Configuration Guide
|
||||
|
||||
> **Note:** The following guidelines must be followed on the [Slack API website](https://api.slack.com/) for setting up your Slack app and generating the necessary tokens.
|
||||
|
||||
## Step-by-Step Instructions
|
||||
|
||||
### 1. Navigate to Your Apps
|
||||
- Go to the Slack API page for apps and select **Create an App** from the “From Scratch” option.
|
||||
|
||||
### 2. App Creation
|
||||
- Name your app and choose the workspace where you wish to add the assistant.
|
||||
|
||||
### 3. Enabling Socket Mode
|
||||
- Navigate to **Settings > Socket Mode** and enable **Socket Mode**.
|
||||
- This action will generate an App-level token. Select the `connections:write` scope and copy the App-level token for future use.
|
||||
|
||||
### 4. Socket Naming
|
||||
- Assign a name to your socket as per your preference.
|
||||
|
||||
### 5. Basic Information Setup
|
||||
- Go to **Basic Information** (under **Settings**) and configure the following:
|
||||
- Assistant name
|
||||
- App icon
|
||||
- Background color
|
||||
|
||||
### 6. Bot Token and Permissions
|
||||
- In the **OAuth & Permissions** option found under the **Features** section, retrieve the Bot Token. Save it for future usage.
|
||||
- You will also need to add specific bot token scopes:
|
||||
- `app_mentions:read`
|
||||
- `assistant:write`
|
||||
- `chat:write`
|
||||
- `chat:write.public`
|
||||
- `im:history`
|
||||
|
||||
### 7. Enable Events
|
||||
- From **Event Subscriptions**, enable events and add the following Bot User events:
|
||||
- `app_mention`
|
||||
- `assistant_thread_context_changed`
|
||||
- `assistant_thread_started`
|
||||
- `message.im`
|
||||
|
||||
### 8. Agent/Assistant Toggle
|
||||
- In the **Features > Agent & Assistants** section, toggle on the Agent or Assistant option.
|
||||
- In the **Suggested Prompts** setting, leave it as `dynamic` (this is the default setting).
|
||||
|
||||
---
|
||||
|
||||
## Code-Side Configuration Guide
|
||||
|
||||
This section focuses on generating and setting up the necessary tokens in the `.env` file, using the `.env-example` as a template.
|
||||
|
||||
### Step 1: Generating Required Keys
|
||||
|
||||
1. **SLACK_APP_TOKEN**
|
||||
- Navigate to **Settings > Socket Mode** in the Slack API and enable **Socket Mode**.
|
||||
- Copy the App-level token generated (usually starts with `xapp-`).
|
||||
|
||||
2. **SLACK_BOT_TOKEN**
|
||||
- Go to **OAuth & Permissions** (under the **Features** section in Slack API).
|
||||
- Retrieve the **Bot Token** (starts with `xoxb-`).
|
||||
|
||||
3. **DOCSGPT_API_KEY**
|
||||
- Go to the **DocsGPT website**.
|
||||
- Navigate to **Settings > Chatbots > Create New** to generate a DocsGPT API Key.
|
||||
- Copy the generated key for use.
|
||||
|
||||
### Step 2: Creating and Updating the `.env` File
|
||||
|
||||
1. Create a new `.env` file in the root of your project (if it doesn’t already exist).
|
||||
2. Use the `.env-example` as a reference and update the file with the following keys and values:
|
||||
|
||||
```bash
|
||||
# .env file
|
||||
SLACK_APP_TOKEN=xapp-your-generated-app-token
|
||||
SLACK_BOT_TOKEN=xoxb-your-generated-bot-token
|
||||
DOCSGPT_API_KEY=your-docsgpt-generated-api-key
|
||||
```
|
||||
|
||||
Replace the placeholder values with the actual tokens generated from the Slack API and DocsGPT as per the steps outlined above.
|
||||
|
||||
---
|
||||
|
||||
This concludes the guide for both setting up the Slack API and configuring the `.env` file on the code side.
|
||||
112
extensions/slack-bot/app.py
Normal file
112
extensions/slack-bot/app.py
Normal file
@@ -0,0 +1,112 @@
|
||||
import os
|
||||
import hashlib
|
||||
import httpx
|
||||
import re
|
||||
from slack_bolt.async_app import AsyncApp
|
||||
from slack_bolt.adapter.socket_mode.async_handler import AsyncSocketModeHandler
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
API_BASE = os.getenv("API_BASE", "https://gptcloud.arc53.com")
|
||||
API_URL = API_BASE + "/api/answer"
|
||||
|
||||
# Slack bot token and signing secret
|
||||
SLACK_BOT_TOKEN = os.getenv("SLACK_BOT_TOKEN")
|
||||
SLACK_APP_TOKEN = os.getenv("SLACK_APP_TOKEN")
|
||||
|
||||
# OpenAI API key for DocsGPT (replace this with your actual API key)
|
||||
DOCSGPT_API_KEY = os.getenv("DOCSGPT_API_KEY")
|
||||
|
||||
# Initialize Slack app
|
||||
app = AsyncApp(token=SLACK_BOT_TOKEN)
|
||||
|
||||
def encode_conversation_id(conversation_id: str) -> str:
|
||||
"""
|
||||
Encodes 11 length Slack conversation_id to 12 length string
|
||||
Args:
|
||||
conversation_id (str): The 11 digit slack conversation_id.
|
||||
Returns:
|
||||
str: Hashed id.
|
||||
"""
|
||||
# Create a SHA-256 hash of the string
|
||||
hashed_id = hashlib.sha256(conversation_id.encode()).hexdigest()
|
||||
|
||||
# Take the first 24 characters of the hash
|
||||
hashed_24_char_id = hashed_id[:24]
|
||||
return hashed_24_char_id
|
||||
|
||||
async def generate_answer(question: str, messages: list, conversation_id: str | None) -> dict:
|
||||
"""Generates an answer using the external API."""
|
||||
payload = {
|
||||
"question": question,
|
||||
"api_key": DOCSGPT_API_KEY,
|
||||
"history": messages,
|
||||
"conversation_id": conversation_id,
|
||||
}
|
||||
headers = {
|
||||
"Content-Type": "application/json; charset=utf-8"
|
||||
}
|
||||
timeout = 60.0
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(API_URL, json=payload, headers=headers, timeout=timeout)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
conversation_id = data.get("conversation_id")
|
||||
answer = data.get("answer", "Sorry, I couldn't find an answer.")
|
||||
return {"answer": answer, "conversation_id": conversation_id}
|
||||
else:
|
||||
print(response.json())
|
||||
return {"answer": "Sorry, I couldn't find an answer.", "conversation_id": None}
|
||||
|
||||
@app.message(".*")
|
||||
async def message_docs(message, say):
|
||||
client = app.client
|
||||
channel = message['channel']
|
||||
thread_ts = message['thread_ts']
|
||||
user_query = message['text']
|
||||
await client.assistant_threads_setStatus(
|
||||
channel_id = channel,
|
||||
thread_ts = thread_ts,
|
||||
status = "is generating your answer...",
|
||||
)
|
||||
|
||||
docs_gpt_channel_id = encode_conversation_id(thread_ts)
|
||||
|
||||
# Get response from DocsGPT
|
||||
response = await generate_answer(user_query,[], docs_gpt_channel_id)
|
||||
answer = convert_to_slack_markdown(response['answer'])
|
||||
|
||||
# Respond in Slack
|
||||
await client.chat_postMessage(text = answer, mrkdwn= True, channel= message['channel'],
|
||||
thread_ts = message['thread_ts'],)
|
||||
|
||||
def convert_to_slack_markdown(markdown_text: str):
|
||||
# Convert bold **text** to *text* for Slack
|
||||
slack_text = re.sub(r'\*\*(.*?)\*\*', r'*\1*', markdown_text) # **text** to *text*
|
||||
|
||||
# Convert italics _text_ to _text_ for Slack
|
||||
slack_text = re.sub(r'_(.*?)_', r'_\1_', slack_text) # _text_ to _text_
|
||||
|
||||
# Convert inline code `code` to `code` (Slack supports backticks for inline code)
|
||||
slack_text = re.sub(r'`(.*?)`', r'`\1`', slack_text)
|
||||
|
||||
# Convert bullet points with single or no spaces to filled bullets (•)
|
||||
slack_text = re.sub(r'^\s{0,1}[-*]\s+', ' • ', slack_text, flags=re.MULTILINE)
|
||||
|
||||
# Convert bullet points with multiple spaces to hollow bullets (◦)
|
||||
slack_text = re.sub(r'^\s{2,}[-*]\s+', '\t◦ ', slack_text, flags=re.MULTILINE)
|
||||
|
||||
# Convert headers (##) to bold in Slack
|
||||
slack_text = re.sub(r'^\s*#{1,6}\s*(.*?)$', r'*\1*', slack_text, flags=re.MULTILINE)
|
||||
|
||||
return slack_text
|
||||
|
||||
async def main():
|
||||
handler = AsyncSocketModeHandler(app, os.environ["SLACK_APP_TOKEN"])
|
||||
await handler.start_async()
|
||||
|
||||
# Start the app
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
asyncio.run(main())
|
||||
10
extensions/slack-bot/requirements.txt
Normal file
10
extensions/slack-bot/requirements.txt
Normal file
@@ -0,0 +1,10 @@
|
||||
aiohttp>=3,<4
|
||||
certifi==2024.7.4
|
||||
h11==0.14.0
|
||||
httpcore==1.0.5
|
||||
httpx==0.27.0
|
||||
idna==3.7
|
||||
python-dotenv==1.0.1
|
||||
sniffio==1.3.1
|
||||
slack-bolt==1.21.0
|
||||
bson==0.5.10
|
||||
5348
frontend/package-lock.json
generated
5348
frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -19,47 +19,50 @@
|
||||
]
|
||||
},
|
||||
"dependencies": {
|
||||
"@reduxjs/toolkit": "^1.9.2",
|
||||
"@vercel/analytics": "^0.1.10",
|
||||
"i18next": "^23.11.5",
|
||||
"@reduxjs/toolkit": "^2.2.7",
|
||||
"chart.js": "^4.4.4",
|
||||
"i18next": "^23.15.1",
|
||||
"i18next-browser-languagedetector": "^8.0.0",
|
||||
"prop-types": "^15.8.1",
|
||||
"react": "^18.2.0",
|
||||
"react-chartjs-2": "^5.2.0",
|
||||
"react-copy-to-clipboard": "^5.1.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-dom": "^18.3.1",
|
||||
"react-dropzone": "^14.2.3",
|
||||
"react-i18next": "^14.1.2",
|
||||
"react-markdown": "^8.0.7",
|
||||
"react-i18next": "^15.0.2",
|
||||
"react-markdown": "^9.0.1",
|
||||
"react-redux": "^8.0.5",
|
||||
"react-router-dom": "^6.8.1",
|
||||
"react-syntax-highlighter": "^15.5.0",
|
||||
"remark-gfm": "^3.0.0"
|
||||
"rehype-katex": "^7.0.1",
|
||||
"remark-gfm": "^4.0.0",
|
||||
"remark-math": "^6.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/react": "^18.0.27",
|
||||
"@types/react-dom": "^18.0.10",
|
||||
"@types/react-syntax-highlighter": "^15.5.6",
|
||||
"@types/react-dom": "^18.3.0",
|
||||
"@types/react-syntax-highlighter": "^15.5.13",
|
||||
"@typescript-eslint/eslint-plugin": "^5.51.0",
|
||||
"@typescript-eslint/parser": "^5.51.0",
|
||||
"@vitejs/plugin-react": "^4.2.1",
|
||||
"@typescript-eslint/parser": "^5.62.0",
|
||||
"@vitejs/plugin-react": "^4.3.1",
|
||||
"autoprefixer": "^10.4.13",
|
||||
"eslint": "^8.33.0",
|
||||
"eslint-config-prettier": "^8.6.0",
|
||||
"eslint": "^8.57.1",
|
||||
"eslint-config-prettier": "^9.1.0",
|
||||
"eslint-config-standard-with-typescript": "^34.0.0",
|
||||
"eslint-plugin-import": "^2.27.5",
|
||||
"eslint-plugin-n": "^15.6.1",
|
||||
"eslint-plugin-prettier": "^4.2.1",
|
||||
"eslint-plugin-promise": "^6.1.1",
|
||||
"eslint-plugin-react": "^7.32.2",
|
||||
"eslint-plugin-unused-imports": "^2.0.0",
|
||||
"eslint-plugin-import": "^2.30.0",
|
||||
"eslint-plugin-n": "^15.7.0",
|
||||
"eslint-plugin-prettier": "^5.2.1",
|
||||
"eslint-plugin-promise": "^6.6.0",
|
||||
"eslint-plugin-react": "^7.35.0",
|
||||
"eslint-plugin-unused-imports": "^4.1.4",
|
||||
"husky": "^8.0.0",
|
||||
"lint-staged": "^13.1.1",
|
||||
"postcss": "^8.4.31",
|
||||
"prettier": "^2.8.4",
|
||||
"prettier-plugin-tailwindcss": "^0.2.2",
|
||||
"tailwindcss": "^3.2.4",
|
||||
"typescript": "^4.9.5",
|
||||
"vite": "^5.0.13",
|
||||
"lint-staged": "^15.2.10",
|
||||
"postcss": "^8.4.41",
|
||||
"prettier": "^3.3.3",
|
||||
"prettier-plugin-tailwindcss": "^0.6.8",
|
||||
"tailwindcss": "^3.4.11",
|
||||
"typescript": "^5.6.2",
|
||||
"vite": "^5.4.6",
|
||||
"vite-plugin-svgr": "^4.2.0"
|
||||
}
|
||||
}
|
||||
|
||||
BIN
frontend/public/fonts/IBMPlexMono-Medium.ttf
Normal file
BIN
frontend/public/fonts/IBMPlexMono-Medium.ttf
Normal file
Binary file not shown.
BIN
frontend/signal-desktop-keyring.gpg
Normal file
BIN
frontend/signal-desktop-keyring.gpg
Normal file
Binary file not shown.
@@ -1,27 +1,25 @@
|
||||
import { Routes, Route } from 'react-router-dom';
|
||||
import { useEffect } from 'react';
|
||||
import Navigation from './Navigation';
|
||||
import Conversation from './conversation/Conversation';
|
||||
import About from './About';
|
||||
import PageNotFound from './PageNotFound';
|
||||
import { inject } from '@vercel/analytics';
|
||||
import { useMediaQuery } from './hooks';
|
||||
import { useState } from 'react';
|
||||
import Setting from './settings';
|
||||
import './locale/i18n';
|
||||
import { Outlet } from 'react-router-dom';
|
||||
import SharedConversation from './conversation/SharedConversation';
|
||||
import { SharedConversation } from './conversation/SharedConversation';
|
||||
import { useDarkTheme } from './hooks';
|
||||
inject();
|
||||
|
||||
function MainLayout() {
|
||||
const { isMobile } = useMediaQuery();
|
||||
const [navOpen, setNavOpen] = useState(!isMobile);
|
||||
|
||||
return (
|
||||
<div className="dark:bg-raisin-black">
|
||||
<div className="dark:bg-raisin-black relative h-screen overflow-auto">
|
||||
<Navigation navOpen={navOpen} setNavOpen={setNavOpen} />
|
||||
<div
|
||||
className={`min-h-screen ${
|
||||
className={`h-[calc(100dvh-64px)] md:h-screen ${
|
||||
!isMobile
|
||||
? `ml-0 ${!navOpen ? 'md:mx-auto lg:mx-auto' : 'md:ml-72'}`
|
||||
: 'ml-0 md:ml-16'
|
||||
@@ -34,19 +32,12 @@ function MainLayout() {
|
||||
}
|
||||
|
||||
export default function App() {
|
||||
const [isDarkTheme] = useDarkTheme();
|
||||
useEffect(() => {
|
||||
localStorage.setItem('selectedTheme', isDarkTheme ? 'Dark' : 'Light');
|
||||
if (isDarkTheme) {
|
||||
document
|
||||
.getElementById('root')
|
||||
?.classList.add('dark', 'dark:bg-raisin-black');
|
||||
} else {
|
||||
document.getElementById('root')?.classList.remove('dark');
|
||||
}
|
||||
}, [isDarkTheme]);
|
||||
const [, , componentMounted] = useDarkTheme();
|
||||
if (!componentMounted) {
|
||||
return <div />;
|
||||
}
|
||||
return (
|
||||
<>
|
||||
<div className="h-full relative overflow-auto">
|
||||
<Routes>
|
||||
<Route element={<MainLayout />}>
|
||||
<Route index element={<Conversation />} />
|
||||
@@ -56,6 +47,6 @@ export default function App() {
|
||||
<Route path="/share/:identifier" element={<SharedConversation />} />
|
||||
<Route path="/*" element={<PageNotFound />} />
|
||||
</Routes>
|
||||
</>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ export default function Hero({
|
||||
}>;
|
||||
return (
|
||||
<div
|
||||
className={`mt-16 mb-4 flex w-full flex-col justify-end text-black-1000 dark:text-bright-gray sm:w-full lg:mt-6`}
|
||||
className={`pt-20 sm:pt-0 pb-6 sm:pb-12 flex h-full w-full flex-col text-black-1000 dark:text-bright-gray sm:w-full px-2 sm:px-0`}
|
||||
>
|
||||
<div className="flex h-full w-full flex-col items-center justify-center">
|
||||
<div className="flex items-center">
|
||||
@@ -37,7 +37,7 @@ export default function Hero({
|
||||
<Fragment key={key}>
|
||||
<button
|
||||
onClick={() => handleQuestion({ question: demo.query })}
|
||||
className="w-full rounded-full border-2 border-silver px-6 py-4 text-left hover:border-gray-4000 dark:hover:border-gray-3000 xl:min-w-[24vw]"
|
||||
className="w-full rounded-full border border-silver px-6 py-4 text-left hover:border-gray-4000 dark:hover:border-gray-3000 xl:min-w-[24vw]"
|
||||
>
|
||||
<p className="mb-1 font-semibold text-black dark:text-silver">
|
||||
{demo.header}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user