Compare commits
666 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0475e55518 | ||
|
|
872b390cfa | ||
|
|
dc4c539607 | ||
|
|
69f8c76ce2 | ||
|
|
cbdcca7fd8 | ||
|
|
1e7fa988da | ||
|
|
ba35e1422b | ||
|
|
78e99d1171 | ||
|
|
6a12f96fdc | ||
|
|
b79a20151c | ||
|
|
c9976020dd | ||
|
|
e8988e82d0 | ||
|
|
ff95570da6 | ||
|
|
b084e3074d | ||
|
|
bc4f9c3442 | ||
|
|
ecf88e7ea1 | ||
|
|
d6cb66cd2f | ||
|
|
bc2241f67a | ||
|
|
3d292aa485 | ||
|
|
21f46a8aea | ||
|
|
ba6d61cc35 | ||
|
|
27b43c11bd | ||
|
|
a5292c4473 | ||
|
|
f3923488a5 | ||
|
|
d964ebfff8 | ||
|
|
e420eeece4 | ||
|
|
f34713545e | ||
|
|
2b513c7d87 | ||
|
|
7e28893613 | ||
|
|
822674ca78 | ||
|
|
35ebf97fdf | ||
|
|
b456c0ca9f | ||
|
|
bae49891be | ||
|
|
dfb4a67d87 | ||
|
|
8e727a3253 | ||
|
|
d6f03d7a07 | ||
|
|
5ada5bf1a0 | ||
|
|
ae9c935c5c | ||
|
|
95618001aa | ||
|
|
40c361968e | ||
|
|
757abda654 | ||
|
|
862807e863 | ||
|
|
d188db887c | ||
|
|
59b6c56262 | ||
|
|
f92658de82 | ||
|
|
f2b3402c17 | ||
|
|
24badf65a4 | ||
|
|
86442f212f | ||
|
|
9b6d6ecc32 | ||
|
|
1f51277004 | ||
|
|
68cc646a3e | ||
|
|
420ca1b3b4 | ||
|
|
a83e68815a | ||
|
|
d87aebb718 | ||
|
|
a9a4d14e8a | ||
|
|
9ed2fc7359 | ||
|
|
caad382a95 | ||
|
|
ea39fc9c48 | ||
|
|
bf7cce52db | ||
|
|
63a15a3359 | ||
|
|
db34392210 | ||
|
|
cc4e0ba6c1 | ||
|
|
38989f9c68 | ||
|
|
c78c92e539 | ||
|
|
31e694f50d | ||
|
|
5368199517 | ||
|
|
6bbb9176fc | ||
|
|
4209eee2f8 | ||
|
|
f65ebb6b71 | ||
|
|
ef8107e56a | ||
|
|
2293a30f19 | ||
|
|
d7678fd355 | ||
|
|
27d8a5cf99 | ||
|
|
03f6c58ac6 | ||
|
|
4fb52dc6fc | ||
|
|
0232ba3f25 | ||
|
|
5987e4c8e1 | ||
|
|
18ce7c8f2f | ||
|
|
177c9da8b5 | ||
|
|
b5f1a8e90f | ||
|
|
494c3dd1bd | ||
|
|
ad8f78d51e | ||
|
|
5112801c37 | ||
|
|
226adfdba2 | ||
|
|
22c0375dca | ||
|
|
e77f6c9f6f | ||
|
|
5bd7c0ab8b | ||
|
|
97f7f6f7d2 | ||
|
|
d65fc70f07 | ||
|
|
dcae85eae8 | ||
|
|
686a48298b | ||
|
|
8ca590559d | ||
|
|
70251222cc | ||
|
|
e55c68d27e | ||
|
|
da4f2ef6b3 | ||
|
|
dbf2cabd38 | ||
|
|
72e68a163c | ||
|
|
919a87bf47 | ||
|
|
bea0bbfcdb | ||
|
|
c12d7a8d82 | ||
|
|
711ad1750a | ||
|
|
825567d449 | ||
|
|
800439d29e | ||
|
|
e3517dde13 | ||
|
|
f2da8473a4 | ||
|
|
9cc9a6e9b4 | ||
|
|
873406732f | ||
|
|
14ab950a6c | ||
|
|
6cd8e71f4f | ||
|
|
4aeaec9dc7 | ||
|
|
e318228a08 | ||
|
|
d22efbf745 | ||
|
|
90309d5552 | ||
|
|
72842ecd7a | ||
|
|
a1b32ffca9 | ||
|
|
44d225e6ca | ||
|
|
37ab5f9d7a | ||
|
|
61fdcec511 | ||
|
|
45cc4fd97a | ||
|
|
3228b88312 | ||
|
|
a1d3592d08 | ||
|
|
c686d950d0 | ||
|
|
ca779bb0af | ||
|
|
90f64e2527 | ||
|
|
444d50f751 | ||
|
|
2f9c72c1cf | ||
|
|
1bb81614a5 | ||
|
|
888e13e198 | ||
|
|
8166642ff9 | ||
|
|
51c42790b7 | ||
|
|
f105fd1b2c | ||
|
|
fe78e9a336 | ||
|
|
2fce25b0c8 | ||
|
|
6c0da2ea94 | ||
|
|
a353e69648 | ||
|
|
ac930d5504 | ||
|
|
d4cf8037b7 | ||
|
|
fb1fd851b0 | ||
|
|
2ff8c0b128 | ||
|
|
d232229abf | ||
|
|
490e58fb52 | ||
|
|
a8582be54d | ||
|
|
30bb8449e9 | ||
|
|
adb7132e02 | ||
|
|
4a1e488bd7 | ||
|
|
d200db0eeb | ||
|
|
28e06fa684 | ||
|
|
c4cb9b07cb | ||
|
|
817fc5d4b3 | ||
|
|
2de1e5f71a | ||
|
|
5246d85f11 | ||
|
|
9526ed0258 | ||
|
|
736add031c | ||
|
|
d4042ebaa2 | ||
|
|
54e31be3b2 | ||
|
|
b630be8c8a | ||
|
|
0aca41f9a6 | ||
|
|
a3fed0f84b | ||
|
|
1414ad6d50 | ||
|
|
ed6b4dabf8 | ||
|
|
d9309ebc6e | ||
|
|
c49b7613e0 | ||
|
|
4f88b6dc71 | ||
|
|
5c9e6404cc | ||
|
|
80df494787 | ||
|
|
c0886c2785 | ||
|
|
a83a56eecd | ||
|
|
130eb56d09 | ||
|
|
b60b473e02 | ||
|
|
e0504eb957 | ||
|
|
3886e41e94 | ||
|
|
edc54c7120 | ||
|
|
cef1167ef1 | ||
|
|
f456500f3a | ||
|
|
59328ea44d | ||
|
|
0dc840dc8e | ||
|
|
6700028bd1 | ||
|
|
213b1d1d0d | ||
|
|
feab64b09a | ||
|
|
f9f096cca8 | ||
|
|
535d174c2b | ||
|
|
11d2401970 | ||
|
|
232b36d4ae | ||
|
|
b38b159f4e | ||
|
|
606bf1ff58 | ||
|
|
46cec638dd | ||
|
|
8637397c86 | ||
|
|
7502e1881f | ||
|
|
734d5e50c5 | ||
|
|
052ff6727b | ||
|
|
2962dbd6b8 | ||
|
|
392afd6f33 | ||
|
|
fc3f4dff10 | ||
|
|
24d6889b24 | ||
|
|
27e3e22703 | ||
|
|
7b7f609c47 | ||
|
|
5389c8858a | ||
|
|
c2d2fbba96 | ||
|
|
cfc039dae1 | ||
|
|
edf24dc992 | ||
|
|
97710296ac | ||
|
|
acbfc0bb81 | ||
|
|
e1fe2fb093 | ||
|
|
dd5c1ec9ed | ||
|
|
c7f7614646 | ||
|
|
d604398642 | ||
|
|
d40b1d8937 | ||
|
|
49b4b476dc | ||
|
|
c0ec689be9 | ||
|
|
8e26decb5b | ||
|
|
921efcbf4b | ||
|
|
705ab58bfb | ||
|
|
b42e32a955 | ||
|
|
dec60a0fdd | ||
|
|
fa84d5c502 | ||
|
|
34c763caf5 | ||
|
|
e5709dfabc | ||
|
|
5f4f4a8ab9 | ||
|
|
ca9e71087b | ||
|
|
6da483b3ef | ||
|
|
e300145263 | ||
|
|
eb3f0035fe | ||
|
|
6be3a2a142 | ||
|
|
e23893a419 | ||
|
|
7b4c1dcde0 | ||
|
|
4b7cb2a22a | ||
|
|
344a8a3887 | ||
|
|
0afda5dc27 | ||
|
|
0891ef6d0a | ||
|
|
cdb64ecb19 | ||
|
|
b05ac4f2a4 | ||
|
|
411189a076 | ||
|
|
63cf4d46c9 | ||
|
|
3c683f2192 | ||
|
|
d46e59bcd4 | ||
|
|
c45e76ec31 | ||
|
|
44828707ea | ||
|
|
74c047d249 | ||
|
|
50abfb98fe | ||
|
|
6104657970 | ||
|
|
02116d4c05 | ||
|
|
23f993bb54 | ||
|
|
16aedd61da | ||
|
|
5a2f3ad616 | ||
|
|
54971104f8 | ||
|
|
deeffbf77d | ||
|
|
7e8dd6bba8 | ||
|
|
dc4078d744 | ||
|
|
1eb168be55 | ||
|
|
3c6fd365fb | ||
|
|
53c9184057 | ||
|
|
16c9872571 | ||
|
|
17160bc467 | ||
|
|
3b39e58cf3 | ||
|
|
7db055116c | ||
|
|
0262ff1aac | ||
|
|
14def09ce3 | ||
|
|
96e59da6bc | ||
|
|
8f75b0a0c0 | ||
|
|
70a40cfc45 | ||
|
|
ef6b8b9ebc | ||
|
|
f9dbaa9407 | ||
|
|
ed338668d1 | ||
|
|
1e5d94a958 | ||
|
|
51553c565f | ||
|
|
ae36805aa1 | ||
|
|
22b7445ac4 | ||
|
|
9c278d7d0b | ||
|
|
3a1592692e | ||
|
|
bdec956708 | ||
|
|
d0d8a8a3af | ||
|
|
f787962be8 | ||
|
|
57b9b369b7 | ||
|
|
ccda5bdb7e | ||
|
|
bd5fa83fe0 | ||
|
|
59caf381f7 | ||
|
|
181cb1b1bd | ||
|
|
ba2dd2d872 | ||
|
|
4dc5acd68e | ||
|
|
f57116afbe | ||
|
|
99c41c7e34 | ||
|
|
d7b38d9513 | ||
|
|
2c7aad1dcd | ||
|
|
593dba72a8 | ||
|
|
09f2f2a9e7 | ||
|
|
6c583eedb9 | ||
|
|
1c4d7a6ad1 | ||
|
|
f75ed4bc66 | ||
|
|
f487deb7b9 | ||
|
|
ad30a8476c | ||
|
|
60db807443 | ||
|
|
d1dedff9ca | ||
|
|
9a04506b0d | ||
|
|
a58369fbb1 | ||
|
|
d63b5d71a1 | ||
|
|
360d790282 | ||
|
|
a0dd8f8e0f | ||
|
|
db7c001076 | ||
|
|
c96f905d2b | ||
|
|
4b3f04083b | ||
|
|
8276b6c9a9 | ||
|
|
43d6e788dc | ||
|
|
0c062a8485 | ||
|
|
99b649f24e | ||
|
|
7c6532f145 | ||
|
|
052669a0b0 | ||
|
|
0cf86d3bbc | ||
|
|
56a16b862a | ||
|
|
b2fffb2e23 | ||
|
|
3f7a27cdbb | ||
|
|
58e30b8c88 | ||
|
|
4025e55b95 | ||
|
|
e1e63ebd64 | ||
|
|
8279df48bf | ||
|
|
d86a06fab0 | ||
|
|
90b24dd915 | ||
|
|
bacd2a6893 | ||
|
|
0f059f247d | ||
|
|
e2b76d9c29 | ||
|
|
1107a2f2bc | ||
|
|
efd43013da | ||
|
|
7b8458b47d | ||
|
|
84eed09a17 | ||
|
|
35b1a40d49 | ||
|
|
81d7fe3fdb | ||
|
|
02187fed4e | ||
|
|
019bf013ac | ||
|
|
d6e59a6a0a | ||
|
|
46aa862943 | ||
|
|
0413cab0d9 | ||
|
|
3357ce8f33 | ||
|
|
1776f6e7fd | ||
|
|
edfe5e1156 | ||
|
|
0768992848 | ||
|
|
1224f94879 | ||
|
|
b58c5344b8 | ||
|
|
7175bc0595 | ||
|
|
b7a6f5696d | ||
|
|
abf5b89c28 | ||
|
|
d554444b0e | ||
|
|
16ae0725e6 | ||
|
|
61feced541 | ||
|
|
a1d4db2f1e | ||
|
|
357e9af627 | ||
|
|
a41519be63 | ||
|
|
870e6b07c8 | ||
|
|
6f41759519 | ||
|
|
6727c42f18 | ||
|
|
90c367842f | ||
|
|
a0bb6e370e | ||
|
|
f2910ab9d1 | ||
|
|
b4bfed2ccb | ||
|
|
2fcde61b6d | ||
|
|
ffddf10de5 | ||
|
|
6e3bd5e6f3 | ||
|
|
b21230c4d6 | ||
|
|
0a533b64e1 | ||
|
|
15b0e321bd | ||
|
|
4d749340a2 | ||
|
|
0ef6ffa452 | ||
|
|
d7b1310ba3 | ||
|
|
7408454a75 | ||
|
|
07b71468cc | ||
|
|
522e966194 | ||
|
|
937c60c9cf | ||
|
|
bbb1e22163 | ||
|
|
a16e83200a | ||
|
|
d437521710 | ||
|
|
5cbf4cf352 | ||
|
|
2985e3b75b | ||
|
|
f34a75fc5b | ||
|
|
5aa88714b8 | ||
|
|
ce56a414e0 | ||
|
|
ba4a7dcd45 | ||
|
|
85c648da6c | ||
|
|
483f8eb690 | ||
|
|
93c868d698 | ||
|
|
a14e70e3f4 | ||
|
|
a6ff606cae | ||
|
|
651eb3374c | ||
|
|
68c71adc5a | ||
|
|
0c4ca9c94d | ||
|
|
8c04f5b3f1 | ||
|
|
35b29a0a1e | ||
|
|
d289f432b1 | ||
|
|
e16e269775 | ||
|
|
4e5d0c2e84 | ||
|
|
c9a2034936 | ||
|
|
b70fc1151d | ||
|
|
c11034edcd | ||
|
|
804d9b42a5 | ||
|
|
b1bb4e6758 | ||
|
|
76ed8f0ba2 | ||
|
|
4dde7eaea1 | ||
|
|
2e2149c110 | ||
|
|
70bb9477c5 | ||
|
|
ec5363e9c1 | ||
|
|
dba3b1c559 | ||
|
|
9606e3f80c | ||
|
|
7bc7b500f5 | ||
|
|
c6e804fa10 | ||
|
|
1cbaf9bd9d | ||
|
|
45145685d5 | ||
|
|
2fbec6f21f | ||
|
|
ad29d2765f | ||
|
|
e47e751142 | ||
|
|
c63d4ccf3e | ||
|
|
e5c30cf841 | ||
|
|
c80678aac5 | ||
|
|
1754570057 | ||
|
|
d87b411193 | ||
|
|
8fc6284317 | ||
|
|
eae49d2367 | ||
|
|
69287c5198 | ||
|
|
e6b3984f78 | ||
|
|
547fe888d4 | ||
|
|
3454309cbc | ||
|
|
544c46cd44 | ||
|
|
2c100825cc | ||
|
|
558ecd84a6 | ||
|
|
df24cfff4f | ||
|
|
bd5d93a964 | ||
|
|
ae2ded119f | ||
|
|
abdb80a6be | ||
|
|
2f9cbe2bf1 | ||
|
|
2cca7d60d5 | ||
|
|
3df745d1d2 | ||
|
|
9862083e0b | ||
|
|
7a4976c470 | ||
|
|
8834a19743 | ||
|
|
6e15403f60 | ||
|
|
7e1cf10cb2 | ||
|
|
ee762c3c68 | ||
|
|
32c06414c5 | ||
|
|
e97e1ba4bc | ||
|
|
2f580f7800 | ||
|
|
1ce1459455 | ||
|
|
c26573482e | ||
|
|
414ec08dee | ||
|
|
1cc78191eb | ||
|
|
75c6c6081a | ||
|
|
8d2ebe9718 | ||
|
|
eed974b883 | ||
|
|
ae846dac4d | ||
|
|
0b09c00b50 | ||
|
|
f7a1874cb3 | ||
|
|
28fb04eb7b | ||
|
|
34310cf420 | ||
|
|
e1d61d7190 | ||
|
|
9c14ac84cb | ||
|
|
1d1ea7b6f2 | ||
|
|
92401f5b7c | ||
|
|
38ac9218ec | ||
|
|
48497c749a | ||
|
|
72a1892058 | ||
|
|
f2c328d212 | ||
|
|
e9eafc40a7 | ||
|
|
933ca1bf81 | ||
|
|
b4fc9aa7eb | ||
|
|
dcc475bbef | ||
|
|
1fe35ad0cd | ||
|
|
f1ed1e0f14 | ||
|
|
fcc746fb98 | ||
|
|
95934a5b7a | ||
|
|
d38b101820 | ||
|
|
91d730a7bc | ||
|
|
0cfa77b628 | ||
|
|
ca4881ad51 | ||
|
|
8c2c064fe2 | ||
|
|
10646b9b86 | ||
|
|
967b195946 | ||
|
|
1ae7771290 | ||
|
|
a585fe4d54 | ||
|
|
fa3a9fe70e | ||
|
|
99952a393f | ||
|
|
920a41e3ca | ||
|
|
e5bec957a1 | ||
|
|
41cb765255 | ||
|
|
2d12a3cd7a | ||
|
|
df4fe0176c | ||
|
|
4fcc80719e | ||
|
|
f6c66f6ee4 | ||
|
|
220d137e66 | ||
|
|
425803a1b6 | ||
|
|
c794ea614a | ||
|
|
9000838aab | ||
|
|
2790bda1e9 | ||
|
|
e13d4daa9a | ||
|
|
2f504a4e03 | ||
|
|
598a50a133 | ||
|
|
1b06a5a3e0 | ||
|
|
9f1d3b0269 | ||
|
|
a09543d38b | ||
|
|
2ab3539925 | ||
|
|
23ddf53abe | ||
|
|
d8720d0849 | ||
|
|
6753b55160 | ||
|
|
7f7f48ad56 | ||
|
|
149ca01029 | ||
|
|
5c8133a810 | ||
|
|
2adccdd1b0 | ||
|
|
b91068d658 | ||
|
|
4534cafd3f | ||
|
|
405e79d729 | ||
|
|
4df2349e9d | ||
|
|
a9b61d3e13 | ||
|
|
3767d14e5c | ||
|
|
889a050f25 | ||
|
|
0701fac807 | ||
|
|
9fba91069a | ||
|
|
4f9ce70ff8 | ||
|
|
5e00d4ded7 | ||
|
|
95cd9ee5bb | ||
|
|
40f16f8ef1 | ||
|
|
3d9288f82f | ||
|
|
c51f12f88b | ||
|
|
0618153390 | ||
|
|
a7c066291b | ||
|
|
a69ac372fa | ||
|
|
16b2a54981 | ||
|
|
3f68e0d66f | ||
|
|
12d483fde6 | ||
|
|
96034a9712 | ||
|
|
d2def4479b | ||
|
|
afbbb913e7 | ||
|
|
ad76f239a3 | ||
|
|
e6b096c9e0 | ||
|
|
6e26b4e6c7 | ||
|
|
ea79494b6d | ||
|
|
afb18a3e4d | ||
|
|
f9c9853102 | ||
|
|
b3eb9fb6fa | ||
|
|
d3b97bf51a | ||
|
|
7a2e491199 | ||
|
|
25efaf08b7 | ||
|
|
f893ea6b98 | ||
|
|
500745b62c | ||
|
|
9ebe5bf1a7 | ||
|
|
4aecb86daa | ||
|
|
6924dd6df6 | ||
|
|
431755144e | ||
|
|
d182f81754 | ||
|
|
de0193fffc | ||
|
|
53e86205ad | ||
|
|
aa670efe3a | ||
|
|
e693fe49a7 | ||
|
|
7eaa32d85f | ||
|
|
ab40d2c37a | ||
|
|
784206b39b | ||
|
|
7c8264e221 | ||
|
|
db7195aa30 | ||
|
|
eb7bbc1612 | ||
|
|
ee3792181d | ||
|
|
9804965a20 | ||
|
|
b84842df3d | ||
|
|
fc170d3033 | ||
|
|
8fa4ec7ad8 | ||
|
|
480825ddd7 | ||
|
|
260e328cc1 | ||
|
|
8873428b4b | ||
|
|
ab43c20b8f | ||
|
|
88d9d4f4a3 | ||
|
|
d4840f85c0 | ||
|
|
6f9ddeaed0 | ||
|
|
af5e73c8cb | ||
|
|
333b6e60e1 | ||
|
|
1b61337b75 | ||
|
|
77991896b4 | ||
|
|
60a670ce29 | ||
|
|
c1c69ed22b | ||
|
|
d71c74c6fb | ||
|
|
590aa8b43f | ||
|
|
607e0166f6 | ||
|
|
130c83ee92 | ||
|
|
fd5e418abf | ||
|
|
262d160314 | ||
|
|
9146827590 | ||
|
|
062b108259 | ||
|
|
ba796b6be1 | ||
|
|
3d763235e1 | ||
|
|
c30c6d9f10 | ||
|
|
311716ed18 | ||
|
|
19bb1b4aa4 | ||
|
|
b8749e36b9 | ||
|
|
00b6639155 | ||
|
|
71d7daaef3 | ||
|
|
8654c5d471 | ||
|
|
02124b3d38 | ||
|
|
340dcfb70d | ||
|
|
a37b92223a | ||
|
|
7d2b8cb4fc | ||
|
|
8d7a134cb4 | ||
|
|
4b849d7201 | ||
|
|
e03e185d30 | ||
|
|
7a02df5588 | ||
|
|
19494685ba | ||
|
|
1e26943c3e | ||
|
|
83fa850142 | ||
|
|
968a116d14 | ||
|
|
fb55b494d7 | ||
|
|
59b6a83d7d | ||
|
|
aabc4f0d7b | ||
|
|
391f686173 | ||
|
|
8e6f6d46ec | ||
|
|
2ba7a55439 | ||
|
|
e07df29ab9 | ||
|
|
abf24fe60f | ||
|
|
fad5f5b81f | ||
|
|
6961f49a0c | ||
|
|
6911f8652a | ||
|
|
6658cec6a0 | ||
|
|
14011b9d84 | ||
|
|
bd2d0b6790 | ||
|
|
d36f58230a | ||
|
|
018f950ca3 | ||
|
|
db8db9fae9 | ||
|
|
79ce8d6563 | ||
|
|
13eaa9a35a | ||
|
|
39f0d76b4b | ||
|
|
0a5832ec75 | ||
|
|
6e147b3ed2 | ||
|
|
c162f79daa | ||
|
|
87585be687 | ||
|
|
ea08d6413c | ||
|
|
879905edf6 | ||
|
|
6fd80a5582 | ||
|
|
0dc7333563 | ||
|
|
f61c3168d2 | ||
|
|
9cadd74a96 | ||
|
|
729fa2352b | ||
|
|
b673aaf9f0 | ||
|
|
3132cc6005 | ||
|
|
ac994d3077 | ||
|
|
02d4f7f2da | ||
|
|
d99569f005 | ||
|
|
ec5166249a | ||
|
|
dadd12adb3 | ||
|
|
88b4fb8c2a | ||
|
|
afecae3786 | ||
|
|
d18598bc33 | ||
|
|
794fc05ada | ||
|
|
5daeb7f876 | ||
|
|
53e71c545e | ||
|
|
959a55e36c | ||
|
|
64572b0024 | ||
|
|
9a0c1caa43 | ||
|
|
eed6723147 | ||
|
|
97fabf51b8 | ||
|
|
5e5e2b8aee | ||
|
|
e01071426f | ||
|
|
eed1bfbe50 | ||
|
|
0c3970a266 | ||
|
|
267cfb621e | ||
|
|
0e90febab2 | ||
|
|
31d947837f | ||
|
|
017b11fbba | ||
|
|
3c492062a9 | ||
|
|
b26b49d0ca | ||
|
|
ed08123550 | ||
|
|
add2db5b7a | ||
|
|
f272d7121a | ||
|
|
577556678c | ||
|
|
e146922367 | ||
|
|
6f1548b7f8 | ||
|
|
9e6fe47b44 | ||
|
|
60cfea1126 |
15
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
# To get started with Dependabot version updates, you'll need to specify which
|
||||
# package ecosystems to update and where the package manifests are located.
|
||||
# Please see the documentation for all configuration options:
|
||||
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
||||
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "pip" # See documentation for possible values
|
||||
directory: "/application" # Location of package manifests
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
- package-ecosystem: "npm" # See documentation for possible values
|
||||
directory: "/frontend" # Location of package manifests
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
2
.github/workflows/ci.yml
vendored
@@ -13,7 +13,6 @@ jobs:
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -36,7 +35,6 @@ jobs:
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Runs a single command using the runners shell
|
||||
- name: Build and push Docker images to docker.io and ghcr.io
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
|
||||
4
.github/workflows/cife.yml
vendored
@@ -8,11 +8,11 @@ on:
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
if: github.repository == 'arc53/DocsGPT'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
file: './frontend/Dockerfile'
|
||||
platforms: linux/amd64
|
||||
platforms: linux/amd64, linux/arm64
|
||||
context: ./frontend
|
||||
push: true
|
||||
tags: |
|
||||
|
||||
1
.github/workflows/labeler.yml
vendored
@@ -4,6 +4,7 @@ on:
|
||||
- pull_request_target
|
||||
jobs:
|
||||
triage:
|
||||
if: github.repository == 'arc53/DocsGPT'
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
|
||||
4
.github/workflows/pytest.yml
vendored
@@ -6,7 +6,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9", "3.10", "3.11"]
|
||||
python-version: ["3.11"]
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
@@ -21,7 +21,7 @@ jobs:
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Test with pytest and generate coverage report
|
||||
run: |
|
||||
python -m pytest --cov=application --cov=scripts --cov=extensions --cov-report=xml
|
||||
python -m pytest --cov=application --cov-report=xml
|
||||
- name: Upload coverage reports to Codecov
|
||||
if: github.event_name == 'pull_request' && matrix.python-version == '3.11'
|
||||
uses: codecov/codecov-action@v3
|
||||
|
||||
3
.gitignore
vendored
@@ -75,6 +75,7 @@ target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
**/*.ipynb
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
@@ -171,5 +172,5 @@ application/vectors/
|
||||
|
||||
node_modules/
|
||||
.vscode/settings.json
|
||||
models/
|
||||
/models/
|
||||
model/
|
||||
|
||||
16
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Docker Debug Frontend",
|
||||
"request": "launch",
|
||||
"type": "chrome",
|
||||
"preLaunchTask": "docker-compose: debug:frontend",
|
||||
"url": "http://127.0.0.1:5173",
|
||||
"webRoot": "${workspaceFolder}/frontend",
|
||||
"skipFiles": [
|
||||
"<node_internals>/**"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
21
.vscode/tasks.json
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"type": "docker-compose",
|
||||
"label": "docker-compose: debug:frontend",
|
||||
"dockerCompose": {
|
||||
"up": {
|
||||
"detached": true,
|
||||
"services": [
|
||||
"frontend"
|
||||
],
|
||||
"build": true
|
||||
},
|
||||
"files": [
|
||||
"${workspaceFolder}/docker-compose.yaml"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
37
HACKTOBERFEST.md
Normal file
@@ -0,0 +1,37 @@
|
||||
# **🎉 Join the Hacktoberfest with DocsGPT and win a Free T-shirt and other prizes! 🎉**
|
||||
|
||||
Welcome, contributors! We're excited to announce that DocsGPT is participating in Hacktoberfest. Get involved by submitting meaningful pull requests.
|
||||
|
||||
All contributors with accepted PRs will receive a cool Holopin! 🤩 (Watch out for a reply in your PR to collect it).
|
||||
|
||||
### 🏆 Top 50 contributors will recieve a special T-shirt
|
||||
|
||||
### 🏆 [LLM Document analysis by LexEU competition](https://github.com/arc53/DocsGPT/blob/main/lexeu-competition.md):
|
||||
A separate competition is available for those sumbit best new retrieval / workflow method that will analyze a Document using EU laws.
|
||||
With 200$, 100$, 50$ prize for 1st, 2nd and 3rd place respectively.
|
||||
You can find more information [here](https://github.com/arc53/DocsGPT/blob/main/lexeu-competition.md)
|
||||
|
||||
## 📜 Here's How to Contribute:
|
||||
```text
|
||||
🛠️ Code: This is the golden ticket! Make meaningful contributions through PRs.
|
||||
|
||||
🧩 API extention: Build an app utilising DocsGPT API. We prefer submissions that showcase original ideas and turn the API into an AI agent.
|
||||
|
||||
Non-Code Contributions:
|
||||
|
||||
📚 Wiki: Improve our documentation, Create a guide or change existing documentation.
|
||||
|
||||
🖥️ Design: Improve the UI/UX or design a new feature.
|
||||
|
||||
📝 Blogging or Content Creation: Write articles or create videos to showcase DocsGPT or highlight your contributions!
|
||||
```
|
||||
|
||||
### 📝 Guidelines for Pull Requests:
|
||||
- Familiarize yourself with the current contributions and our [Roadmap](https://github.com/orgs/arc53/projects/2).
|
||||
- Before contributing we highly advise that you check existing [issues](https://github.com/arc53/DocsGPT/issues) or [create](https://github.com/arc53/DocsGPT/issues/new/choose) an issue and wait to get assigned.
|
||||
- Once you are finished with your contribution, please fill in this [form](https://airtable.com/appikMaJwdHhC1SDP/pagoblCJ9W29wf6Hf/form).
|
||||
- Refer to the [Documentation](https://docs.docsgpt.cloud/).
|
||||
- Feel free to join our [Discord](https://discord.gg/n5BX8dh8rU) server. We're here to help newcomers, so don't hesitate to jump in! Join us [here](https://discord.gg/n5BX8dh8rU).
|
||||
|
||||
Thank you very much for considering contributing to DocsGPT during Hacktoberfest! 🙏 Your contributions (not just simple typo) could earn you a stylish new t-shirt and other prizes as a token of our appreciation. 🎁 Join us, and let's code together! 🚀
|
||||
|
||||
24
README.md
@@ -7,9 +7,9 @@
|
||||
</p>
|
||||
|
||||
<p align="left">
|
||||
<strong><a href="https://docsgpt.arc53.com/">DocsGPT</a></strong> is a cutting-edge open-source solution that streamlines the process of finding information in the project documentation. With its integration of the powerful <strong>GPT</strong> models, developers can easily ask questions about a project and receive accurate answers.
|
||||
<strong><a href="https://www.docsgpt.cloud/">DocsGPT</a></strong> is a cutting-edge open-source solution that streamlines the process of finding information in the project documentation. With its integration of the powerful <strong>GPT</strong> models, developers can easily ask questions about a project and receive accurate answers.
|
||||
|
||||
Say goodbye to time-consuming manual searches, and let <strong><a href="https://docsgpt.arc53.com/">DocsGPT</a></strong> help you quickly find the information you need. Try it out and see how it revolutionizes your project documentation experience. Contribute to its development and be a part of the future of AI-powered assistance.
|
||||
Say goodbye to time-consuming manual searches, and let <strong><a href="https://www.docsgpt.cloud/">DocsGPT</a></strong> help you quickly find the information you need. Try it out and see how it revolutionizes your project documentation experience. Contribute to its development and be a part of the future of AI-powered assistance.
|
||||
</p>
|
||||
|
||||
<div align="center">
|
||||
@@ -23,11 +23,15 @@ Say goodbye to time-consuming manual searches, and let <strong><a href="https://
|
||||
|
||||
</div>
|
||||
|
||||
### 🎃 [Hacktoberfest Prizes, Rules & Q&A](https://github.com/arc53/DocsGPT/blob/main/HACKTOBERFEST.md) 🎃
|
||||
|
||||
### Our [Livestream to Dive into Hacktoberfest! Prizes, Rules & Q&A 🎉](https://www.youtube.com/watch?v=5QQaFFu9BC8) on 3rd of October
|
||||
|
||||
### Production Support / Help for Companies:
|
||||
|
||||
We're eager to provide personalized assistance when deploying your DocsGPT to a live environment.
|
||||
|
||||
- [Book Demo :wave:](https://airtable.com/appdeaL0F1qV8Bl2C/shrrJF1Ll7btCJRbP)
|
||||
- [Book Enterprise / teams Demo :wave:](https://cal.com/arc53/docsgpt-demo-b2b?date=2024-09-27&month=2024-09)
|
||||
- [Send Email :email:](mailto:contact@arc53.com?subject=DocsGPT%20support%2Fsolutions)
|
||||
|
||||

|
||||
@@ -46,23 +50,23 @@ You can find our roadmap [here](https://github.com/orgs/arc53/projects/2). Pleas
|
||||
|
||||
If you don't have enough resources to run it, you can use bitsnbytes to quantize.
|
||||
|
||||
## Features
|
||||
## End to End AI Framework for Information Retrieval
|
||||
|
||||

|
||||

|
||||
|
||||
## Useful Links
|
||||
|
||||
- :mag: :fire: [Live preview](https://docsgpt.arc53.com/)
|
||||
- :mag: :fire: [Cloud Version](https://app.docsgpt.cloud/)
|
||||
|
||||
- :speech_balloon: :tada: [Join our Discord](https://discord.gg/n5BX8dh8rU)
|
||||
|
||||
- :books: :sunglasses: [Guides](https://docs.docsgpt.co.uk/)
|
||||
- :books: :sunglasses: [Guides](https://docs.docsgpt.cloud/)
|
||||
|
||||
- :couple: [Interested in contributing?](https://github.com/arc53/DocsGPT/blob/main/CONTRIBUTING.md)
|
||||
|
||||
- :file_folder: :rocket: [How to use any other documentation](https://docs.docsgpt.co.uk/Guides/How-to-train-on-other-documentation)
|
||||
- :file_folder: :rocket: [How to use any other documentation](https://docs.docsgpt.cloud/Guides/How-to-train-on-other-documentation)
|
||||
|
||||
- :house: :closed_lock_with_key: [How to host it locally (so all data will stay on-premises)](https://docs.docsgpt.co.uk/Guides/How-to-use-different-LLM)
|
||||
- :house: :closed_lock_with_key: [How to host it locally (so all data will stay on-premises)](https://docs.docsgpt.cloud/Guides/How-to-use-different-LLM)
|
||||
|
||||
## Project Structure
|
||||
|
||||
@@ -85,7 +89,7 @@ On Mac OS or Linux, write:
|
||||
|
||||
It will install all the dependencies and allow you to download the local model, use OpenAI or use our LLM API.
|
||||
|
||||
Otherwise, refer to this Guide:
|
||||
Otherwise, refer to this Guide for Windows:
|
||||
|
||||
1. Download and open this repository with `git clone https://github.com/arc53/DocsGPT.git`
|
||||
2. Create a `.env` file in your root directory and set the env variables and `VITE_API_STREAMING` to true or false, depending on whether you want streaming answers or not.
|
||||
|
||||
14
SECURITY.md
Normal file
@@ -0,0 +1,14 @@
|
||||
# Security Policy
|
||||
|
||||
## Supported Versions
|
||||
|
||||
Supported Versions:
|
||||
|
||||
Currently, we support security patches by committing changes and bumping the version published on Github.
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
Found a vulnerability? Please email us:
|
||||
|
||||
security@arc53.com
|
||||
|
||||
@@ -1,31 +1,88 @@
|
||||
FROM python:3.11-slim-bullseye as builder
|
||||
# Builder Stage
|
||||
FROM ubuntu:24.04 as builder
|
||||
|
||||
# Tiktoken requires Rust toolchain, so build it in a separate stage
|
||||
RUN apt-get update && apt-get install -y gcc curl
|
||||
RUN apt-get install -y wget unzip
|
||||
RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
|
||||
RUN unzip mpnet-base-v2.zip -d model
|
||||
RUN rm mpnet-base-v2.zip
|
||||
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN pip install --upgrade pip && pip install tiktoken==0.5.2
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository ppa:deadsnakes/ppa && \
|
||||
# Install necessary packages and Python
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends gcc wget unzip libc6-dev python3.11 python3.11-distutils python3.11-venv && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Verify Python installation and setup symlink
|
||||
RUN if [ -f /usr/bin/python3.11 ]; then \
|
||||
ln -s /usr/bin/python3.11 /usr/bin/python; \
|
||||
else \
|
||||
echo "Python 3.11 not found"; exit 1; \
|
||||
fi
|
||||
|
||||
# Download and unzip the model
|
||||
RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip && \
|
||||
unzip mpnet-base-v2.zip -d model && \
|
||||
rm mpnet-base-v2.zip
|
||||
|
||||
# Install Rust
|
||||
RUN wget -q -O - https://sh.rustup.rs | sh -s -- -y
|
||||
|
||||
# Clean up to reduce container size
|
||||
RUN apt-get remove --purge -y wget unzip && apt-get autoremove -y && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements.txt
|
||||
COPY requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
# Setup Python virtual environment
|
||||
RUN python3.11 -m venv /venv
|
||||
|
||||
# Activate virtual environment and install Python packages
|
||||
ENV PATH="/venv/bin:$PATH"
|
||||
|
||||
FROM python:3.11-slim-bullseye
|
||||
# Install Python packages
|
||||
RUN pip install --no-cache-dir --upgrade pip && \
|
||||
pip install --no-cache-dir tiktoken && \
|
||||
pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy pre-built packages and binaries from builder stage
|
||||
COPY --from=builder /usr/local/ /usr/local/
|
||||
# Final Stage
|
||||
FROM ubuntu:24.04 as final
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y software-properties-common && \
|
||||
add-apt-repository ppa:deadsnakes/ppa && \
|
||||
# Install Python
|
||||
apt-get update && apt-get install -y --no-install-recommends python3.11 && \
|
||||
ln -s /usr/bin/python3.11 /usr/bin/python && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Create a non-root user: `appuser` (Feel free to choose a name)
|
||||
RUN groupadd -r appuser && \
|
||||
useradd -r -g appuser -d /app -s /sbin/nologin -c "Docker image user" appuser
|
||||
|
||||
# Copy the virtual environment and model from the builder stage
|
||||
COPY --from=builder /venv /venv
|
||||
COPY --from=builder /model /app/model
|
||||
|
||||
# Copy your application code
|
||||
COPY . /app/application
|
||||
ENV FLASK_APP=app.py
|
||||
ENV FLASK_DEBUG=true
|
||||
|
||||
# Change the ownership of the /app directory to the appuser
|
||||
|
||||
RUN mkdir -p /app/application/inputs/local
|
||||
RUN chown -R appuser:appuser /app
|
||||
|
||||
# Set environment variables
|
||||
ENV FLASK_APP=app.py \
|
||||
FLASK_DEBUG=true \
|
||||
PATH="/venv/bin:$PATH"
|
||||
|
||||
# Expose the port the app runs on
|
||||
EXPOSE 7091
|
||||
|
||||
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "application.wsgi:app"]
|
||||
# Switch to non-root user
|
||||
USER appuser
|
||||
|
||||
# Start Gunicorn
|
||||
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "application.wsgi:app"]
|
||||
@@ -1,42 +1,53 @@
|
||||
import asyncio
|
||||
import os
|
||||
from flask import Blueprint, request, Response
|
||||
import json
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
from pymongo import MongoClient
|
||||
from bson.dbref import DBRef
|
||||
from bson.objectid import ObjectId
|
||||
from transformers import GPT2TokenizerFast
|
||||
|
||||
from flask import Blueprint, current_app, make_response, request, Response
|
||||
from flask_restx import fields, Namespace, Resource
|
||||
|
||||
from pymongo import MongoClient
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.error import bad_request
|
||||
|
||||
|
||||
from application.extensions import api
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.retriever.retriever_creator import RetrieverCreator
|
||||
from application.utils import check_required_fields
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
db = mongo["docsgpt"]
|
||||
conversations_collection = db["conversations"]
|
||||
vectors_collection = db["vectors"]
|
||||
sources_collection = db["sources"]
|
||||
prompts_collection = db["prompts"]
|
||||
answer = Blueprint('answer', __name__)
|
||||
api_key_collection = db["api_keys"]
|
||||
user_logs_collection = db["user_logs"]
|
||||
|
||||
if settings.LLM_NAME == "gpt4":
|
||||
gpt_model = 'gpt-4'
|
||||
answer = Blueprint("answer", __name__)
|
||||
answer_ns = Namespace("answer", description="Answer related operations", path="/")
|
||||
api.add_namespace(answer_ns)
|
||||
|
||||
gpt_model = ""
|
||||
# to have some kind of default behaviour
|
||||
if settings.LLM_NAME == "openai":
|
||||
gpt_model = "gpt-3.5-turbo"
|
||||
elif settings.LLM_NAME == "anthropic":
|
||||
gpt_model = 'claude-2'
|
||||
else:
|
||||
gpt_model = 'gpt-3.5-turbo'
|
||||
gpt_model = "claude-2"
|
||||
|
||||
if settings.MODEL_NAME: # in case there is particular model name configured
|
||||
gpt_model = settings.MODEL_NAME
|
||||
|
||||
# load the prompts
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
current_dir = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
|
||||
chat_combine_template = f.read()
|
||||
|
||||
@@ -47,7 +58,7 @@ with open(os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r"
|
||||
chat_combine_creative = f.read()
|
||||
|
||||
with open(os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r") as f:
|
||||
chat_combine_strict = f.read()
|
||||
chat_combine_strict = f.read()
|
||||
|
||||
api_key_set = settings.API_KEY is not None
|
||||
embeddings_key_set = settings.EMBEDDINGS_KEY is not None
|
||||
@@ -58,11 +69,6 @@ async def async_generate(chain, question, chat_history):
|
||||
return result
|
||||
|
||||
|
||||
def count_tokens(string):
|
||||
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
|
||||
return len(tokenizer(string)['input_ids'])
|
||||
|
||||
|
||||
def run_async_chain(chain, question, chat_history):
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
@@ -75,300 +81,538 @@ def run_async_chain(chain, question, chat_history):
|
||||
return result
|
||||
|
||||
|
||||
def get_vectorstore(data):
|
||||
if "active_docs" in data:
|
||||
if data["active_docs"].split("/")[0] == "default":
|
||||
vectorstore = ""
|
||||
elif data["active_docs"].split("/")[0] == "local":
|
||||
vectorstore = "indexes/" + data["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + data["active_docs"]
|
||||
if data["active_docs"] == "default":
|
||||
vectorstore = ""
|
||||
def get_data_from_api_key(api_key):
|
||||
data = api_key_collection.find_one({"key": api_key})
|
||||
# # Raise custom exception if the API key is not found
|
||||
if data is None:
|
||||
raise Exception("Invalid API Key, please generate new key", 401)
|
||||
|
||||
if "retriever" not in data:
|
||||
data["retriever"] = None
|
||||
|
||||
if "source" in data and isinstance(data["source"], DBRef):
|
||||
source_doc = db.dereference(data["source"])
|
||||
data["source"] = str(source_doc["_id"])
|
||||
if "retriever" in source_doc:
|
||||
data["retriever"] = source_doc["retriever"]
|
||||
else:
|
||||
vectorstore = ""
|
||||
vectorstore = os.path.join("application", vectorstore)
|
||||
return vectorstore
|
||||
data["source"] = {}
|
||||
return data
|
||||
|
||||
|
||||
def get_retriever(source_id: str):
|
||||
doc = sources_collection.find_one({"_id": ObjectId(source_id)})
|
||||
if doc is None:
|
||||
raise Exception("Source document does not exist", 404)
|
||||
retriever_name = None if "retriever" not in doc else doc["retriever"]
|
||||
return retriever_name
|
||||
|
||||
|
||||
def is_azure_configured():
|
||||
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
|
||||
return (
|
||||
settings.OPENAI_API_BASE
|
||||
and settings.OPENAI_API_VERSION
|
||||
and settings.AZURE_DEPLOYMENT_NAME
|
||||
)
|
||||
|
||||
|
||||
def complete_stream(question, docsearch, chat_history, api_key, prompt_id, conversation_id):
|
||||
llm = LLMCreator.create_llm(settings.LLM_NAME, api_key=api_key)
|
||||
|
||||
if prompt_id == 'default':
|
||||
prompt = chat_combine_template
|
||||
elif prompt_id == 'creative':
|
||||
prompt = chat_combine_creative
|
||||
elif prompt_id == 'strict':
|
||||
prompt = chat_combine_strict
|
||||
else:
|
||||
prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})["content"]
|
||||
|
||||
docs = docsearch.search(question, k=2)
|
||||
if settings.LLM_NAME == "llama.cpp":
|
||||
docs = [docs[0]]
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc.page_content for doc in docs])
|
||||
p_chat_combine = prompt.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
source_log_docs = []
|
||||
for doc in docs:
|
||||
if doc.metadata:
|
||||
source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
|
||||
else:
|
||||
source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
|
||||
|
||||
if len(chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
chat_history.reverse()
|
||||
for i in chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = count_tokens(i["prompt"]) + count_tokens(i["response"])
|
||||
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append({"role": "system", "content": i["response"]})
|
||||
messages_combine.append({"role": "user", "content": question})
|
||||
|
||||
response_full = ""
|
||||
completion = llm.gen_stream(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
messages=messages_combine)
|
||||
for line in completion:
|
||||
data = json.dumps({"answer": str(line)})
|
||||
response_full += str(line)
|
||||
yield f"data: {data}\n\n"
|
||||
|
||||
# save conversation to database
|
||||
if conversation_id is not None:
|
||||
def save_conversation(conversation_id, question, response, source_log_docs, llm):
|
||||
if conversation_id is not None and conversation_id != "None":
|
||||
conversations_collection.update_one(
|
||||
{"_id": ObjectId(conversation_id)},
|
||||
{"$push": {"queries": {"prompt": question, "response": response_full, "sources": source_log_docs}}},
|
||||
{
|
||||
"$push": {
|
||||
"queries": {
|
||||
"prompt": question,
|
||||
"response": response,
|
||||
"sources": source_log_docs,
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
else:
|
||||
# create new conversation
|
||||
# generate summary
|
||||
messages_summary = [{"role": "assistant", "content": "Summarise following conversation in no more than 3 "
|
||||
"words, respond ONLY with the summary, use the same "
|
||||
"language as the system \n\nUser: " + question + "\n\n" +
|
||||
"AI: " +
|
||||
response_full},
|
||||
{"role": "user", "content": "Summarise following conversation in no more than 3 words, "
|
||||
"respond ONLY with the summary, use the same language as the "
|
||||
"system"}]
|
||||
messages_summary = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Summarise following conversation in no more than 3 "
|
||||
"words, respond ONLY with the summary, use the same "
|
||||
"language as the system \n\nUser: "
|
||||
+ question
|
||||
+ "\n\n"
|
||||
+ "AI: "
|
||||
+ response,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Summarise following conversation in no more than 3 words, "
|
||||
"respond ONLY with the summary, use the same language as the "
|
||||
"system",
|
||||
},
|
||||
]
|
||||
|
||||
completion = llm.gen(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
messages=messages_summary, max_tokens=30)
|
||||
completion = llm.gen(model=gpt_model, messages=messages_summary, max_tokens=30)
|
||||
conversation_id = conversations_collection.insert_one(
|
||||
{"user": "local",
|
||||
"date": datetime.datetime.utcnow(),
|
||||
"name": completion,
|
||||
"queries": [{"prompt": question, "response": response_full, "sources": source_log_docs}]}
|
||||
{
|
||||
"user": "local",
|
||||
"date": datetime.datetime.utcnow(),
|
||||
"name": completion,
|
||||
"queries": [
|
||||
{
|
||||
"prompt": question,
|
||||
"response": response,
|
||||
"sources": source_log_docs,
|
||||
}
|
||||
],
|
||||
}
|
||||
).inserted_id
|
||||
|
||||
# send data.type = "end" to indicate that the stream has ended as json
|
||||
data = json.dumps({"type": "id", "id": str(conversation_id)})
|
||||
yield f"data: {data}\n\n"
|
||||
data = json.dumps({"type": "end"})
|
||||
yield f"data: {data}\n\n"
|
||||
return conversation_id
|
||||
|
||||
|
||||
@answer.route("/stream", methods=["POST"])
|
||||
def stream():
|
||||
data = request.get_json()
|
||||
# get parameter from url question
|
||||
question = data["question"]
|
||||
history = data["history"]
|
||||
# history to json object from string
|
||||
history = json.loads(history)
|
||||
conversation_id = data["conversation_id"]
|
||||
if 'prompt_id' in data:
|
||||
prompt_id = data["prompt_id"]
|
||||
else:
|
||||
prompt_id = 'default'
|
||||
|
||||
# check if active_docs is set
|
||||
|
||||
if not api_key_set:
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
if "active_docs" in data:
|
||||
vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
|
||||
else:
|
||||
vectorstore = ""
|
||||
docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
|
||||
|
||||
return Response(
|
||||
complete_stream(question, docsearch,
|
||||
chat_history=history, api_key=api_key,
|
||||
prompt_id=prompt_id,
|
||||
conversation_id=conversation_id), mimetype="text/event-stream"
|
||||
)
|
||||
|
||||
|
||||
@answer.route("/api/answer", methods=["POST"])
|
||||
def api_answer():
|
||||
data = request.get_json()
|
||||
question = data["question"]
|
||||
history = data["history"]
|
||||
if "conversation_id" not in data:
|
||||
conversation_id = None
|
||||
else:
|
||||
conversation_id = data["conversation_id"]
|
||||
print("-" * 5)
|
||||
if not api_key_set:
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
if 'prompt_id' in data:
|
||||
prompt_id = data["prompt_id"]
|
||||
else:
|
||||
prompt_id = 'default'
|
||||
|
||||
if prompt_id == 'default':
|
||||
def get_prompt(prompt_id):
|
||||
if prompt_id == "default":
|
||||
prompt = chat_combine_template
|
||||
elif prompt_id == 'creative':
|
||||
elif prompt_id == "creative":
|
||||
prompt = chat_combine_creative
|
||||
elif prompt_id == 'strict':
|
||||
elif prompt_id == "strict":
|
||||
prompt = chat_combine_strict
|
||||
else:
|
||||
prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})["content"]
|
||||
return prompt
|
||||
|
||||
|
||||
def complete_stream(
|
||||
question, retriever, conversation_id, user_api_key, isNoneDoc=False
|
||||
):
|
||||
|
||||
# use try and except to check for exception
|
||||
try:
|
||||
# check if the vectorstore is set
|
||||
vectorstore = get_vectorstore(data)
|
||||
# loading the index and the store and the prompt template
|
||||
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
|
||||
docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
|
||||
|
||||
|
||||
llm = LLMCreator.create_llm(settings.LLM_NAME, api_key=api_key)
|
||||
|
||||
|
||||
|
||||
docs = docsearch.search(question, k=2)
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc.page_content for doc in docs])
|
||||
p_chat_combine = prompt.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
response_full = ""
|
||||
source_log_docs = []
|
||||
for doc in docs:
|
||||
if doc.metadata:
|
||||
source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
|
||||
else:
|
||||
source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
|
||||
# join all page_content together with a newline
|
||||
answer = retriever.gen()
|
||||
sources = retriever.search()
|
||||
for source in sources:
|
||||
if "text" in source:
|
||||
source["text"] = source["text"][:100].strip() + "..."
|
||||
if len(sources) > 0:
|
||||
data = json.dumps({"type": "source", "source": sources})
|
||||
yield f"data: {data}\n\n"
|
||||
for line in answer:
|
||||
if "answer" in line:
|
||||
response_full += str(line["answer"])
|
||||
data = json.dumps(line)
|
||||
yield f"data: {data}\n\n"
|
||||
elif "source" in line:
|
||||
source_log_docs.append(line["source"])
|
||||
|
||||
if isNoneDoc:
|
||||
for doc in source_log_docs:
|
||||
doc["source"] = "None"
|
||||
|
||||
if len(history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
history.reverse()
|
||||
for i in history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = count_tokens(i["prompt"]) + count_tokens(i["response"])
|
||||
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append({"role": "system", "content": i["response"]})
|
||||
messages_combine.append({"role": "user", "content": question})
|
||||
|
||||
|
||||
completion = llm.gen(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
messages=messages_combine)
|
||||
|
||||
|
||||
result = {"answer": completion, "sources": source_log_docs}
|
||||
logger.debug(result)
|
||||
|
||||
# generate conversationId
|
||||
if conversation_id is not None:
|
||||
conversations_collection.update_one(
|
||||
{"_id": ObjectId(conversation_id)},
|
||||
{"$push": {"queries": {"prompt": question,
|
||||
"response": result["answer"], "sources": result['sources']}}},
|
||||
llm = LLMCreator.create_llm(
|
||||
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=user_api_key
|
||||
)
|
||||
if user_api_key is None:
|
||||
conversation_id = save_conversation(
|
||||
conversation_id, question, response_full, source_log_docs, llm
|
||||
)
|
||||
# send data.type = "end" to indicate that the stream has ended as json
|
||||
data = json.dumps({"type": "id", "id": str(conversation_id)})
|
||||
yield f"data: {data}\n\n"
|
||||
|
||||
else:
|
||||
# create new conversation
|
||||
# generate summary
|
||||
messages_summary = [
|
||||
{"role": "assistant", "content": "Summarise following conversation in no more than 3 words, "
|
||||
"respond ONLY with the summary, use the same language as the system \n\n"
|
||||
"User: " + question + "\n\n" + "AI: " + result["answer"]},
|
||||
{"role": "user", "content": "Summarise following conversation in no more than 3 words, "
|
||||
"respond ONLY with the summary, use the same language as the system"}
|
||||
]
|
||||
|
||||
completion = llm.gen(
|
||||
model=gpt_model,
|
||||
engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
messages=messages_summary,
|
||||
max_tokens=30
|
||||
)
|
||||
conversation_id = conversations_collection.insert_one(
|
||||
{"user": "local",
|
||||
"date": datetime.datetime.utcnow(),
|
||||
"name": completion,
|
||||
"queries": [{"prompt": question, "response": result["answer"], "sources": source_log_docs}]}
|
||||
).inserted_id
|
||||
|
||||
result["conversation_id"] = str(conversation_id)
|
||||
|
||||
# mock result
|
||||
# result = {
|
||||
# "answer": "The answer is 42",
|
||||
# "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"]
|
||||
# }
|
||||
return result
|
||||
retriever_params = retriever.get_params()
|
||||
user_logs_collection.insert_one(
|
||||
{
|
||||
"action": "stream_answer",
|
||||
"level": "info",
|
||||
"user": "local",
|
||||
"api_key": user_api_key,
|
||||
"question": question,
|
||||
"response": response_full,
|
||||
"sources": source_log_docs,
|
||||
"retriever_params": retriever_params,
|
||||
"timestamp": datetime.datetime.now(datetime.timezone.utc),
|
||||
}
|
||||
)
|
||||
data = json.dumps({"type": "end"})
|
||||
yield f"data: {data}\n\n"
|
||||
except Exception as e:
|
||||
# print whole traceback
|
||||
traceback.print_exc()
|
||||
print(str(e))
|
||||
return bad_request(500, str(e))
|
||||
print("\033[91merr", str(e), file=sys.stderr)
|
||||
data = json.dumps(
|
||||
{
|
||||
"type": "error",
|
||||
"error": "Please try again later. We apologize for any inconvenience.",
|
||||
"error_exception": str(e),
|
||||
}
|
||||
)
|
||||
yield f"data: {data}\n\n"
|
||||
return
|
||||
|
||||
|
||||
@answer.route("/api/search", methods=["POST"])
|
||||
def api_search():
|
||||
data = request.get_json()
|
||||
# get parameter from url question
|
||||
question = data["question"]
|
||||
@answer_ns.route("/stream")
|
||||
class Stream(Resource):
|
||||
stream_model = api.model(
|
||||
"StreamModel",
|
||||
{
|
||||
"question": fields.String(
|
||||
required=True, description="Question to be asked"
|
||||
),
|
||||
"history": fields.List(
|
||||
fields.String, required=False, description="Chat history"
|
||||
),
|
||||
"conversation_id": fields.String(
|
||||
required=False, description="Conversation ID"
|
||||
),
|
||||
"prompt_id": fields.String(
|
||||
required=False, default="default", description="Prompt ID"
|
||||
),
|
||||
"selectedDocs": fields.String(
|
||||
required=False, description="Selected documents"
|
||||
),
|
||||
"chunks": fields.Integer(
|
||||
required=False, default=2, description="Number of chunks"
|
||||
),
|
||||
"token_limit": fields.Integer(required=False, description="Token limit"),
|
||||
"retriever": fields.String(required=False, description="Retriever type"),
|
||||
"api_key": fields.String(required=False, description="API key"),
|
||||
"active_docs": fields.String(
|
||||
required=False, description="Active documents"
|
||||
),
|
||||
"isNoneDoc": fields.Boolean(
|
||||
required=False, description="Flag indicating if no document is used"
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
if not embeddings_key_set:
|
||||
if "embeddings_key" in data:
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
else:
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
if "active_docs" in data:
|
||||
vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
|
||||
else:
|
||||
vectorstore = ""
|
||||
docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
|
||||
@api.expect(stream_model)
|
||||
@api.doc(description="Stream a response based on the question and retriever")
|
||||
def post(self):
|
||||
data = request.get_json()
|
||||
required_fields = ["question"]
|
||||
missing_fields = check_required_fields(data, required_fields)
|
||||
if missing_fields:
|
||||
return missing_fields
|
||||
|
||||
docs = docsearch.search(question, k=2)
|
||||
try:
|
||||
question = data["question"]
|
||||
history = data.get("history", [])
|
||||
history = json.loads(history)
|
||||
conversation_id = data.get("conversation_id")
|
||||
prompt_id = data.get("prompt_id", "default")
|
||||
if "selectedDocs" in data and data["selectedDocs"] is None:
|
||||
chunks = 0
|
||||
else:
|
||||
chunks = int(data.get("chunks", 2))
|
||||
token_limit = data.get("token_limit", settings.DEFAULT_MAX_HISTORY)
|
||||
retriever_name = data.get("retriever", "classic")
|
||||
|
||||
source_log_docs = []
|
||||
for doc in docs:
|
||||
if doc.metadata:
|
||||
source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
|
||||
else:
|
||||
source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
|
||||
#yield f"data:{data}\n\n"
|
||||
return source_log_docs
|
||||
if "api_key" in data:
|
||||
data_key = get_data_from_api_key(data["api_key"])
|
||||
chunks = int(data_key.get("chunks", 2))
|
||||
prompt_id = data_key.get("prompt_id", "default")
|
||||
source = {"active_docs": data_key.get("source")}
|
||||
retriever_name = data_key.get("retriever", retriever_name)
|
||||
user_api_key = data["api_key"]
|
||||
|
||||
elif "active_docs" in data:
|
||||
source = {"active_docs": data["active_docs"]}
|
||||
retriever_name = get_retriever(data["active_docs"]) or retriever_name
|
||||
user_api_key = None
|
||||
|
||||
else:
|
||||
source = {}
|
||||
user_api_key = None
|
||||
|
||||
current_app.logger.info(
|
||||
f"/stream - request_data: {data}, source: {source}",
|
||||
extra={"data": json.dumps({"request_data": data, "source": source})},
|
||||
)
|
||||
|
||||
prompt = get_prompt(prompt_id)
|
||||
|
||||
retriever = RetrieverCreator.create_retriever(
|
||||
retriever_name,
|
||||
question=question,
|
||||
source=source,
|
||||
chat_history=history,
|
||||
prompt=prompt,
|
||||
chunks=chunks,
|
||||
token_limit=token_limit,
|
||||
gpt_model=gpt_model,
|
||||
user_api_key=user_api_key,
|
||||
)
|
||||
|
||||
return Response(
|
||||
complete_stream(
|
||||
question=question,
|
||||
retriever=retriever,
|
||||
conversation_id=conversation_id,
|
||||
user_api_key=user_api_key,
|
||||
isNoneDoc=data.get("isNoneDoc"),
|
||||
),
|
||||
mimetype="text/event-stream",
|
||||
)
|
||||
|
||||
except ValueError:
|
||||
message = "Malformed request body"
|
||||
print("\033[91merr", str(message), file=sys.stderr)
|
||||
return Response(
|
||||
error_stream_generate(message),
|
||||
status=400,
|
||||
mimetype="text/event-stream",
|
||||
)
|
||||
except Exception as e:
|
||||
current_app.logger.error(
|
||||
f"/stream - error: {str(e)} - traceback: {traceback.format_exc()}",
|
||||
extra={"error": str(e), "traceback": traceback.format_exc()},
|
||||
)
|
||||
message = e.args[0]
|
||||
status_code = 400
|
||||
# Custom exceptions with two arguments, index 1 as status code
|
||||
if len(e.args) >= 2:
|
||||
status_code = e.args[1]
|
||||
return Response(
|
||||
error_stream_generate(message),
|
||||
status=status_code,
|
||||
mimetype="text/event-stream",
|
||||
)
|
||||
|
||||
|
||||
def error_stream_generate(err_response):
|
||||
data = json.dumps({"type": "error", "error": err_response})
|
||||
yield f"data: {data}\n\n"
|
||||
|
||||
|
||||
@answer_ns.route("/api/answer")
|
||||
class Answer(Resource):
|
||||
answer_model = api.model(
|
||||
"AnswerModel",
|
||||
{
|
||||
"question": fields.String(
|
||||
required=True, description="The question to answer"
|
||||
),
|
||||
"history": fields.List(
|
||||
fields.String, required=False, description="Conversation history"
|
||||
),
|
||||
"conversation_id": fields.String(
|
||||
required=False, description="Conversation ID"
|
||||
),
|
||||
"prompt_id": fields.String(
|
||||
required=False, default="default", description="Prompt ID"
|
||||
),
|
||||
"chunks": fields.Integer(
|
||||
required=False, default=2, description="Number of chunks"
|
||||
),
|
||||
"token_limit": fields.Integer(required=False, description="Token limit"),
|
||||
"retriever": fields.String(required=False, description="Retriever type"),
|
||||
"api_key": fields.String(required=False, description="API key"),
|
||||
"active_docs": fields.String(
|
||||
required=False, description="Active documents"
|
||||
),
|
||||
"isNoneDoc": fields.Boolean(
|
||||
required=False, description="Flag indicating if no document is used"
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@api.expect(answer_model)
|
||||
@api.doc(description="Provide an answer based on the question and retriever")
|
||||
def post(self):
|
||||
data = request.get_json()
|
||||
required_fields = ["question"]
|
||||
missing_fields = check_required_fields(data, required_fields)
|
||||
if missing_fields:
|
||||
return missing_fields
|
||||
|
||||
try:
|
||||
question = data["question"]
|
||||
history = data.get("history", [])
|
||||
conversation_id = data.get("conversation_id")
|
||||
prompt_id = data.get("prompt_id", "default")
|
||||
chunks = int(data.get("chunks", 2))
|
||||
token_limit = data.get("token_limit", settings.DEFAULT_MAX_HISTORY)
|
||||
retriever_name = data.get("retriever", "classic")
|
||||
|
||||
if "api_key" in data:
|
||||
data_key = get_data_from_api_key(data["api_key"])
|
||||
chunks = int(data_key.get("chunks", 2))
|
||||
prompt_id = data_key.get("prompt_id", "default")
|
||||
source = {"active_docs": data_key.get("source")}
|
||||
retriever_name = data_key.get("retriever", retriever_name)
|
||||
user_api_key = data["api_key"]
|
||||
elif "active_docs" in data:
|
||||
source = {"active_docs": data["active_docs"]}
|
||||
retriever_name = get_retriever(data["active_docs"]) or retriever_name
|
||||
user_api_key = None
|
||||
else:
|
||||
source = {}
|
||||
user_api_key = None
|
||||
|
||||
prompt = get_prompt(prompt_id)
|
||||
|
||||
current_app.logger.info(
|
||||
f"/api/answer - request_data: {data}, source: {source}",
|
||||
extra={"data": json.dumps({"request_data": data, "source": source})},
|
||||
)
|
||||
|
||||
retriever = RetrieverCreator.create_retriever(
|
||||
retriever_name,
|
||||
question=question,
|
||||
source=source,
|
||||
chat_history=history,
|
||||
prompt=prompt,
|
||||
chunks=chunks,
|
||||
token_limit=token_limit,
|
||||
gpt_model=gpt_model,
|
||||
user_api_key=user_api_key,
|
||||
)
|
||||
|
||||
source_log_docs = []
|
||||
response_full = ""
|
||||
for line in retriever.gen():
|
||||
if "source" in line:
|
||||
source_log_docs.append(line["source"])
|
||||
elif "answer" in line:
|
||||
response_full += line["answer"]
|
||||
|
||||
if data.get("isNoneDoc"):
|
||||
for doc in source_log_docs:
|
||||
doc["source"] = "None"
|
||||
|
||||
llm = LLMCreator.create_llm(
|
||||
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=user_api_key
|
||||
)
|
||||
|
||||
result = {"answer": response_full, "sources": source_log_docs}
|
||||
result["conversation_id"] = str(
|
||||
save_conversation(
|
||||
conversation_id, question, response_full, source_log_docs, llm
|
||||
)
|
||||
)
|
||||
retriever_params = retriever.get_params()
|
||||
user_logs_collection.insert_one(
|
||||
{
|
||||
"action": "api_answer",
|
||||
"level": "info",
|
||||
"user": "local",
|
||||
"api_key": user_api_key,
|
||||
"question": question,
|
||||
"response": response_full,
|
||||
"sources": source_log_docs,
|
||||
"retriever_params": retriever_params,
|
||||
"timestamp": datetime.datetime.now(datetime.timezone.utc),
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(
|
||||
f"/api/answer - error: {str(e)} - traceback: {traceback.format_exc()}",
|
||||
extra={"error": str(e), "traceback": traceback.format_exc()},
|
||||
)
|
||||
return bad_request(500, str(e))
|
||||
|
||||
return make_response(result, 200)
|
||||
|
||||
|
||||
@answer_ns.route("/api/search")
|
||||
class Search(Resource):
|
||||
search_model = api.model(
|
||||
"SearchModel",
|
||||
{
|
||||
"question": fields.String(
|
||||
required=True, description="The question to search"
|
||||
),
|
||||
"chunks": fields.Integer(
|
||||
required=False, default=2, description="Number of chunks"
|
||||
),
|
||||
"api_key": fields.String(
|
||||
required=False, description="API key for authentication"
|
||||
),
|
||||
"active_docs": fields.String(
|
||||
required=False, description="Active documents for retrieval"
|
||||
),
|
||||
"retriever": fields.String(required=False, description="Retriever type"),
|
||||
"token_limit": fields.Integer(
|
||||
required=False, description="Limit for tokens"
|
||||
),
|
||||
"isNoneDoc": fields.Boolean(
|
||||
required=False, description="Flag indicating if no document is used"
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
@api.expect(search_model)
|
||||
@api.doc(
|
||||
description="Search for relevant documents based on the question and retriever"
|
||||
)
|
||||
def post(self):
|
||||
data = request.get_json()
|
||||
required_fields = ["question"]
|
||||
missing_fields = check_required_fields(data, required_fields)
|
||||
if missing_fields:
|
||||
return missing_fields
|
||||
|
||||
try:
|
||||
question = data["question"]
|
||||
chunks = int(data.get("chunks", 2))
|
||||
token_limit = data.get("token_limit", settings.DEFAULT_MAX_HISTORY)
|
||||
retriever_name = data.get("retriever", "classic")
|
||||
|
||||
if "api_key" in data:
|
||||
data_key = get_data_from_api_key(data["api_key"])
|
||||
chunks = int(data_key.get("chunks", 2))
|
||||
source = {"active_docs": data_key.get("source")}
|
||||
user_api_key = data["api_key"]
|
||||
elif "active_docs" in data:
|
||||
source = {"active_docs": data["active_docs"]}
|
||||
user_api_key = None
|
||||
else:
|
||||
source = {}
|
||||
user_api_key = None
|
||||
|
||||
current_app.logger.info(
|
||||
f"/api/answer - request_data: {data}, source: {source}",
|
||||
extra={"data": json.dumps({"request_data": data, "source": source})},
|
||||
)
|
||||
|
||||
retriever = RetrieverCreator.create_retriever(
|
||||
retriever_name,
|
||||
question=question,
|
||||
source=source,
|
||||
chat_history=[],
|
||||
prompt="default",
|
||||
chunks=chunks,
|
||||
token_limit=token_limit,
|
||||
gpt_model=gpt_model,
|
||||
user_api_key=user_api_key,
|
||||
)
|
||||
|
||||
docs = retriever.search()
|
||||
retriever_params = retriever.get_params()
|
||||
|
||||
user_logs_collection.insert_one(
|
||||
{
|
||||
"action": "api_search",
|
||||
"level": "info",
|
||||
"user": "local",
|
||||
"api_key": user_api_key,
|
||||
"question": question,
|
||||
"sources": docs,
|
||||
"retriever_params": retriever_params,
|
||||
"timestamp": datetime.datetime.now(datetime.timezone.utc),
|
||||
}
|
||||
)
|
||||
|
||||
if data.get("isNoneDoc"):
|
||||
for doc in docs:
|
||||
doc["source"] = "None"
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(
|
||||
f"/api/search - error: {str(e)} - traceback: {traceback.format_exc()}",
|
||||
extra={"error": str(e), "traceback": traceback.format_exc()},
|
||||
)
|
||||
return bad_request(500, str(e))
|
||||
|
||||
return make_response(docs, 200)
|
||||
|
||||
75
application/api/internal/routes.py
Normal file → Executable file
@@ -3,18 +3,23 @@ import datetime
|
||||
from flask import Blueprint, request, send_from_directory
|
||||
from pymongo import MongoClient
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
from application.core.settings import settings
|
||||
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
db = mongo["docsgpt"]
|
||||
conversations_collection = db["conversations"]
|
||||
vectors_collection = db["vectors"]
|
||||
sources_collection = db["sources"]
|
||||
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
current_dir = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
|
||||
|
||||
internal = Blueprint("internal", __name__)
|
||||
|
||||
|
||||
internal = Blueprint('internal', __name__)
|
||||
@internal.route("/api/download", methods=["get"])
|
||||
def download_file():
|
||||
user = secure_filename(request.args.get("user"))
|
||||
@@ -24,7 +29,6 @@ def download_file():
|
||||
return send_from_directory(save_dir, filename, as_attachment=True)
|
||||
|
||||
|
||||
|
||||
@internal.route("/api/upload_index", methods=["POST"])
|
||||
def upload_index_files():
|
||||
"""Upload two files(index.faiss, index.pkl) to the user's folder."""
|
||||
@@ -34,7 +38,14 @@ def upload_index_files():
|
||||
if "name" not in request.form:
|
||||
return {"status": "no name"}
|
||||
job_name = secure_filename(request.form["name"])
|
||||
save_dir = os.path.join(current_dir, "indexes", user, job_name)
|
||||
tokens = secure_filename(request.form["tokens"])
|
||||
retriever = secure_filename(request.form["retriever"])
|
||||
id = secure_filename(request.form["id"])
|
||||
type = secure_filename(request.form["type"])
|
||||
remote_data = request.form["remote_data"] if "remote_data" in request.form else None
|
||||
sync_frequency = secure_filename(request.form["sync_frequency"]) if "sync_frequency" in request.form else None
|
||||
|
||||
save_dir = os.path.join(current_dir, "indexes", str(id))
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
if "file_faiss" not in request.files:
|
||||
print("No file part")
|
||||
@@ -49,21 +60,45 @@ def upload_index_files():
|
||||
if file_pkl.filename == "":
|
||||
return {"status": "no file name"}
|
||||
# saves index files
|
||||
|
||||
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
file_faiss.save(os.path.join(save_dir, "index.faiss"))
|
||||
file_pkl.save(os.path.join(save_dir, "index.pkl"))
|
||||
# create entry in vectors_collection
|
||||
vectors_collection.insert_one(
|
||||
{
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"location": save_dir,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": "local",
|
||||
}
|
||||
)
|
||||
return {"status": "ok"}
|
||||
|
||||
existing_entry = sources_collection.find_one({"_id": ObjectId(id)})
|
||||
if existing_entry:
|
||||
sources_collection.update_one(
|
||||
{"_id": ObjectId(id)},
|
||||
{
|
||||
"$set": {
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": type,
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"remote_data": remote_data,
|
||||
"sync_frequency": sync_frequency,
|
||||
}
|
||||
},
|
||||
)
|
||||
else:
|
||||
sources_collection.insert_one(
|
||||
{
|
||||
"_id": ObjectId(id),
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": type,
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"remote_data": remote_data,
|
||||
"sync_frequency": sync_frequency,
|
||||
}
|
||||
)
|
||||
return {"status": "ok"}
|
||||
|
||||
@@ -1,12 +1,38 @@
|
||||
from application.worker import ingest_worker, remote_worker
|
||||
from application.celery import celery
|
||||
from datetime import timedelta
|
||||
|
||||
from application.celery_init import celery
|
||||
from application.worker import ingest_worker, remote_worker, sync_worker
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest(self, directory, formats, name_job, filename, user):
|
||||
resp = ingest_worker(self, directory, formats, name_job, filename, user)
|
||||
return resp
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest_remote(self, source_data, job_name, user, loader):
|
||||
resp = remote_worker(self, source_data, job_name, user, loader)
|
||||
return resp
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def schedule_syncs(self, frequency):
|
||||
resp = sync_worker(self, frequency)
|
||||
return resp
|
||||
|
||||
|
||||
@celery.on_after_configure.connect
|
||||
def setup_periodic_tasks(sender, **kwargs):
|
||||
sender.add_periodic_task(
|
||||
timedelta(days=1),
|
||||
schedule_syncs.s("daily"),
|
||||
)
|
||||
sender.add_periodic_task(
|
||||
timedelta(weeks=1),
|
||||
schedule_syncs.s("weekly"),
|
||||
)
|
||||
sender.add_periodic_task(
|
||||
timedelta(days=30),
|
||||
schedule_syncs.s("monthly"),
|
||||
)
|
||||
|
||||
@@ -1,17 +1,23 @@
|
||||
import platform
|
||||
|
||||
import dotenv
|
||||
from application.celery import celery
|
||||
from flask import Flask, request, redirect
|
||||
from application.core.settings import settings
|
||||
from application.api.user.routes import user
|
||||
from flask import Flask, redirect, request
|
||||
|
||||
from application.api.answer.routes import answer
|
||||
from application.api.internal.routes import internal
|
||||
from application.api.user.routes import user
|
||||
from application.celery_init import celery
|
||||
from application.core.logging_config import setup_logging
|
||||
from application.core.settings import settings
|
||||
from application.extensions import api
|
||||
|
||||
if platform.system() == "Windows":
|
||||
import pathlib
|
||||
|
||||
pathlib.PosixPath = pathlib.WindowsPath
|
||||
|
||||
dotenv.load_dotenv()
|
||||
setup_logging()
|
||||
|
||||
app = Flask(__name__)
|
||||
app.register_blueprint(user)
|
||||
@@ -21,16 +27,19 @@ app.config.update(
|
||||
UPLOAD_FOLDER="inputs",
|
||||
CELERY_BROKER_URL=settings.CELERY_BROKER_URL,
|
||||
CELERY_RESULT_BACKEND=settings.CELERY_RESULT_BACKEND,
|
||||
MONGO_URI=settings.MONGO_URI
|
||||
MONGO_URI=settings.MONGO_URI,
|
||||
)
|
||||
celery.config_from_object("application.celeryconfig")
|
||||
api.init_app(app)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def home():
|
||||
if request.remote_addr in ('0.0.0.0', '127.0.0.1', 'localhost', '172.18.0.1'):
|
||||
return redirect('http://localhost:5173')
|
||||
if request.remote_addr in ("0.0.0.0", "127.0.0.1", "localhost", "172.18.0.1"):
|
||||
return redirect("http://localhost:5173")
|
||||
else:
|
||||
return 'Welcome to DocsGPT Backend!'
|
||||
return "Welcome to DocsGPT Backend!"
|
||||
|
||||
|
||||
@app.after_request
|
||||
def after_request(response):
|
||||
@@ -39,6 +48,6 @@ def after_request(response):
|
||||
response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS")
|
||||
return response
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True, port=7091)
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=settings.FLASK_DEBUG_MODE, port=7091)
|
||||
|
||||
@@ -1,9 +1,15 @@
|
||||
from celery import Celery
|
||||
from application.core.settings import settings
|
||||
from celery.signals import setup_logging
|
||||
|
||||
def make_celery(app_name=__name__):
|
||||
celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND)
|
||||
celery.conf.update(settings)
|
||||
return celery
|
||||
|
||||
@setup_logging.connect
|
||||
def config_loggers(*args, **kwargs):
|
||||
from application.core.logging_config import setup_logging
|
||||
setup_logging()
|
||||
|
||||
celery = make_celery()
|
||||
22
application/core/logging_config.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from logging.config import dictConfig
|
||||
|
||||
def setup_logging():
|
||||
dictConfig({
|
||||
'version': 1,
|
||||
'formatters': {
|
||||
'default': {
|
||||
'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
|
||||
}
|
||||
},
|
||||
"handlers": {
|
||||
"console": {
|
||||
"class": "logging.StreamHandler",
|
||||
"stream": "ext://sys.stdout",
|
||||
"formatter": "default",
|
||||
}
|
||||
},
|
||||
'root': {
|
||||
'level': 'INFO',
|
||||
'handlers': ['console'],
|
||||
},
|
||||
})
|
||||
@@ -9,14 +9,17 @@ current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__
|
||||
|
||||
class Settings(BaseSettings):
|
||||
LLM_NAME: str = "docsgpt"
|
||||
MODEL_NAME: Optional[str] = None # if LLM_NAME is openai, MODEL_NAME can be gpt-4 or gpt-3.5-turbo
|
||||
EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
|
||||
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
|
||||
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
|
||||
MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
|
||||
MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
|
||||
TOKENS_MAX_HISTORY: int = 150
|
||||
DEFAULT_MAX_HISTORY: int = 150
|
||||
MODEL_TOKEN_LIMITS: dict = {"gpt-3.5-turbo": 4096, "claude-2": 1e5}
|
||||
UPLOAD_FOLDER: str = "inputs"
|
||||
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant"
|
||||
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus"
|
||||
RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search
|
||||
|
||||
API_URL: str = "http://localhost:7091" # backend url for celery worker
|
||||
|
||||
@@ -26,6 +29,7 @@ class Settings(BaseSettings):
|
||||
OPENAI_API_VERSION: Optional[str] = None # azure openai api version
|
||||
AZURE_DEPLOYMENT_NAME: Optional[str] = None # azure deployment name for answering
|
||||
AZURE_EMBEDDINGS_DEPLOYMENT_NAME: Optional[str] = None # azure deployment name for embeddings
|
||||
OPENAI_BASE_URL: Optional[str] = None # openai base url for open ai compatable models
|
||||
|
||||
# elasticsearch
|
||||
ELASTIC_CLOUD_ID: Optional[str] = None # cloud id for elasticsearch
|
||||
@@ -58,6 +62,15 @@ class Settings(BaseSettings):
|
||||
QDRANT_PATH: Optional[str] = None
|
||||
QDRANT_DISTANCE_FUNC: str = "Cosine"
|
||||
|
||||
# Milvus vectorstore config
|
||||
MILVUS_COLLECTION_NAME: Optional[str] = "docsgpt"
|
||||
MILVUS_URI: Optional[str] = "./milvus_local.db" # milvus lite version as default
|
||||
MILVUS_TOKEN: Optional[str] = ""
|
||||
|
||||
BRAVE_SEARCH_API_KEY: Optional[str] = None
|
||||
|
||||
FLASK_DEBUG_MODE: bool = False
|
||||
|
||||
|
||||
path = Path(__file__).parent.parent.absolute()
|
||||
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
|
||||
|
||||
7
application/extensions.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from flask_restx import Api
|
||||
|
||||
api = Api(
|
||||
version="1.0",
|
||||
title="DocsGPT API",
|
||||
description="API for DocsGPT",
|
||||
)
|
||||
@@ -1,21 +1,29 @@
|
||||
from application.llm.base import BaseLLM
|
||||
from application.core.settings import settings
|
||||
|
||||
|
||||
class AnthropicLLM(BaseLLM):
|
||||
|
||||
def __init__(self, api_key=None):
|
||||
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
|
||||
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||
self.api_key = api_key or settings.ANTHROPIC_API_KEY # If not provided, use a default from settings
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
self.api_key = (
|
||||
api_key or settings.ANTHROPIC_API_KEY
|
||||
) # If not provided, use a default from settings
|
||||
self.user_api_key = user_api_key
|
||||
self.anthropic = Anthropic(api_key=self.api_key)
|
||||
self.HUMAN_PROMPT = HUMAN_PROMPT
|
||||
self.AI_PROMPT = AI_PROMPT
|
||||
|
||||
def gen(self, model, messages, engine=None, max_tokens=300, stream=False, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
def _raw_gen(
|
||||
self, baseself, model, messages, stream=False, max_tokens=300, **kwargs
|
||||
):
|
||||
context = messages[0]["content"]
|
||||
user_question = messages[-1]["content"]
|
||||
prompt = f"### Context \n {context} \n ### Question \n {user_question}"
|
||||
if stream:
|
||||
return self.gen_stream(model, prompt, max_tokens, **kwargs)
|
||||
return self.gen_stream(model, prompt, stream, max_tokens, **kwargs)
|
||||
|
||||
completion = self.anthropic.completions.create(
|
||||
model=model,
|
||||
@@ -25,9 +33,11 @@ class AnthropicLLM(BaseLLM):
|
||||
)
|
||||
return completion.completion
|
||||
|
||||
def gen_stream(self, model, messages, engine=None, max_tokens=300, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
def _raw_gen_stream(
|
||||
self, baseself, model, messages, stream=True, max_tokens=300, **kwargs
|
||||
):
|
||||
context = messages[0]["content"]
|
||||
user_question = messages[-1]["content"]
|
||||
prompt = f"### Context \n {context} \n ### Question \n {user_question}"
|
||||
stream_response = self.anthropic.completions.create(
|
||||
model=model,
|
||||
@@ -37,4 +47,4 @@ class AnthropicLLM(BaseLLM):
|
||||
)
|
||||
|
||||
for completion in stream_response:
|
||||
yield completion.completion
|
||||
yield completion.completion
|
||||
|
||||
@@ -1,14 +1,28 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from application.usage import gen_token_usage, stream_token_usage
|
||||
|
||||
|
||||
class BaseLLM(ABC):
|
||||
def __init__(self):
|
||||
pass
|
||||
self.token_usage = {"prompt_tokens": 0, "generated_tokens": 0}
|
||||
|
||||
def _apply_decorator(self, method, decorator, *args, **kwargs):
|
||||
return decorator(method, *args, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def gen(self, *args, **kwargs):
|
||||
def _raw_gen(self, model, messages, stream, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def gen(self, model, messages, stream=False, *args, **kwargs):
|
||||
return self._apply_decorator(self._raw_gen, gen_token_usage)(
|
||||
self, model=model, messages=messages, stream=stream, *args, **kwargs
|
||||
)
|
||||
|
||||
@abstractmethod
|
||||
def gen_stream(self, *args, **kwargs):
|
||||
def _raw_gen_stream(self, model, messages, stream, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def gen_stream(self, model, messages, stream=True, *args, **kwargs):
|
||||
return self._apply_decorator(self._raw_gen_stream, stream_token_usage)(
|
||||
self, model=model, messages=messages, stream=stream, *args, **kwargs
|
||||
)
|
||||
|
||||
@@ -2,48 +2,43 @@ from application.llm.base import BaseLLM
|
||||
import json
|
||||
import requests
|
||||
|
||||
|
||||
class DocsGPTAPILLM(BaseLLM):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.endpoint = "https://llm.docsgpt.co.uk"
|
||||
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.api_key = api_key
|
||||
self.user_api_key = user_api_key
|
||||
self.endpoint = "https://llm.docsgpt.co.uk"
|
||||
|
||||
|
||||
def gen(self, model, engine, messages, stream=False, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
def _raw_gen(self, baseself, model, messages, stream=False, *args, **kwargs):
|
||||
context = messages[0]["content"]
|
||||
user_question = messages[-1]["content"]
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
|
||||
response = requests.post(
|
||||
f"{self.endpoint}/answer",
|
||||
json={
|
||||
"prompt": prompt,
|
||||
"max_new_tokens": 30
|
||||
}
|
||||
f"{self.endpoint}/answer", json={"prompt": prompt, "max_new_tokens": 30}
|
||||
)
|
||||
response_clean = response.json()['a'].replace("###", "")
|
||||
response_clean = response.json()["a"].replace("###", "")
|
||||
|
||||
return response_clean
|
||||
|
||||
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
def _raw_gen_stream(self, baseself, model, messages, stream=True, *args, **kwargs):
|
||||
context = messages[0]["content"]
|
||||
user_question = messages[-1]["content"]
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
|
||||
# send prompt to endpoint /stream
|
||||
response = requests.post(
|
||||
f"{self.endpoint}/stream",
|
||||
json={
|
||||
"prompt": prompt,
|
||||
"max_new_tokens": 256
|
||||
},
|
||||
stream=True
|
||||
json={"prompt": prompt, "max_new_tokens": 256},
|
||||
stream=True,
|
||||
)
|
||||
|
||||
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
#data = json.loads(line)
|
||||
data_str = line.decode('utf-8')
|
||||
# data = json.loads(line)
|
||||
data_str = line.decode("utf-8")
|
||||
if data_str.startswith("data: "):
|
||||
data = json.loads(data_str[6:])
|
||||
yield data['a']
|
||||
|
||||
yield data["a"]
|
||||
|
||||
@@ -1,44 +1,68 @@
|
||||
from application.llm.base import BaseLLM
|
||||
|
||||
|
||||
class HuggingFaceLLM(BaseLLM):
|
||||
|
||||
def __init__(self, api_key, llm_name='Arc53/DocsGPT-7B',q=False):
|
||||
def __init__(
|
||||
self,
|
||||
api_key=None,
|
||||
user_api_key=None,
|
||||
llm_name="Arc53/DocsGPT-7B",
|
||||
q=False,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
global hf
|
||||
|
||||
|
||||
from langchain.llms import HuggingFacePipeline
|
||||
|
||||
if q:
|
||||
import torch
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
|
||||
from transformers import (
|
||||
AutoModelForCausalLM,
|
||||
AutoTokenizer,
|
||||
pipeline,
|
||||
BitsAndBytesConfig,
|
||||
)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(llm_name)
|
||||
bnb_config = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_use_double_quant=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype=torch.bfloat16
|
||||
)
|
||||
model = AutoModelForCausalLM.from_pretrained(llm_name,quantization_config=bnb_config)
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_use_double_quant=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
)
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
llm_name, quantization_config=bnb_config
|
||||
)
|
||||
else:
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(llm_name)
|
||||
model = AutoModelForCausalLM.from_pretrained(llm_name)
|
||||
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
self.api_key = api_key
|
||||
self.user_api_key = user_api_key
|
||||
pipe = pipeline(
|
||||
"text-generation", model=model,
|
||||
tokenizer=tokenizer, max_new_tokens=2000,
|
||||
device_map="auto", eos_token_id=tokenizer.eos_token_id
|
||||
"text-generation",
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
max_new_tokens=2000,
|
||||
device_map="auto",
|
||||
eos_token_id=tokenizer.eos_token_id,
|
||||
)
|
||||
hf = HuggingFacePipeline(pipeline=pipe)
|
||||
|
||||
def gen(self, model, engine, messages, stream=False, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
def _raw_gen(self, baseself, model, messages, stream=False, **kwargs):
|
||||
context = messages[0]["content"]
|
||||
user_question = messages[-1]["content"]
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
|
||||
result = hf(prompt)
|
||||
|
||||
return result.content
|
||||
|
||||
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
||||
def _raw_gen_stream(self, baseself, model, messages, stream=True, **kwargs):
|
||||
|
||||
raise NotImplementedError("HuggingFaceLLM Streaming is not implemented yet.")
|
||||
|
||||
|
||||
@@ -1,39 +1,55 @@
|
||||
from application.llm.base import BaseLLM
|
||||
from application.core.settings import settings
|
||||
import threading
|
||||
|
||||
class LlamaSingleton:
|
||||
_instances = {}
|
||||
_lock = threading.Lock() # Add a lock for thread synchronization
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls, llm_name):
|
||||
if llm_name not in cls._instances:
|
||||
try:
|
||||
from llama_cpp import Llama
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Please install llama_cpp using pip install llama-cpp-python"
|
||||
)
|
||||
cls._instances[llm_name] = Llama(model_path=llm_name, n_ctx=2048)
|
||||
return cls._instances[llm_name]
|
||||
|
||||
@classmethod
|
||||
def query_model(cls, llm, prompt, **kwargs):
|
||||
with cls._lock:
|
||||
return llm(prompt, **kwargs)
|
||||
|
||||
|
||||
class LlamaCpp(BaseLLM):
|
||||
def __init__(
|
||||
self,
|
||||
api_key=None,
|
||||
user_api_key=None,
|
||||
llm_name=settings.MODEL_PATH,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.api_key = api_key
|
||||
self.user_api_key = user_api_key
|
||||
self.llama = LlamaSingleton.get_instance(llm_name)
|
||||
|
||||
def __init__(self, api_key, llm_name=settings.MODEL_PATH, **kwargs):
|
||||
global llama
|
||||
try:
|
||||
from llama_cpp import Llama
|
||||
except ImportError:
|
||||
raise ImportError("Please install llama_cpp using pip install llama-cpp-python")
|
||||
|
||||
llama = Llama(model_path=llm_name, n_ctx=2048)
|
||||
|
||||
def gen(self, model, engine, messages, stream=False, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
def _raw_gen(self, baseself, model, messages, stream=False, **kwargs):
|
||||
context = messages[0]["content"]
|
||||
user_question = messages[-1]["content"]
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
result = LlamaSingleton.query_model(self.llama, prompt, max_tokens=150, echo=False)
|
||||
return result["choices"][0]["text"].split("### Answer \n")[-1]
|
||||
|
||||
result = llama(prompt, max_tokens=150, echo=False)
|
||||
|
||||
# import sys
|
||||
# print(result['choices'][0]['text'].split('### Answer \n')[-1], file=sys.stderr)
|
||||
|
||||
return result['choices'][0]['text'].split('### Answer \n')[-1]
|
||||
|
||||
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
def _raw_gen_stream(self, baseself, model, messages, stream=True, **kwargs):
|
||||
context = messages[0]["content"]
|
||||
user_question = messages[-1]["content"]
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
|
||||
result = llama(prompt, max_tokens=150, echo=False, stream=stream)
|
||||
|
||||
# import sys
|
||||
# print(list(result), file=sys.stderr)
|
||||
|
||||
result = LlamaSingleton.query_model(self.llama, prompt, max_tokens=150, echo=False, stream=stream)
|
||||
for item in result:
|
||||
for choice in item['choices']:
|
||||
yield choice['text']
|
||||
for choice in item["choices"]:
|
||||
yield choice["text"]
|
||||
@@ -7,22 +7,21 @@ from application.llm.docsgpt_provider import DocsGPTAPILLM
|
||||
from application.llm.premai import PremAILLM
|
||||
|
||||
|
||||
|
||||
class LLMCreator:
|
||||
llms = {
|
||||
'openai': OpenAILLM,
|
||||
'azure_openai': AzureOpenAILLM,
|
||||
'sagemaker': SagemakerAPILLM,
|
||||
'huggingface': HuggingFaceLLM,
|
||||
'llama.cpp': LlamaCpp,
|
||||
'anthropic': AnthropicLLM,
|
||||
'docsgpt': DocsGPTAPILLM,
|
||||
'premai': PremAILLM,
|
||||
"openai": OpenAILLM,
|
||||
"azure_openai": AzureOpenAILLM,
|
||||
"sagemaker": SagemakerAPILLM,
|
||||
"huggingface": HuggingFaceLLM,
|
||||
"llama.cpp": LlamaCpp,
|
||||
"anthropic": AnthropicLLM,
|
||||
"docsgpt": DocsGPTAPILLM,
|
||||
"premai": PremAILLM,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create_llm(cls, type, *args, **kwargs):
|
||||
def create_llm(cls, type, api_key, user_api_key, *args, **kwargs):
|
||||
llm_class = cls.llms.get(type.lower())
|
||||
if not llm_class:
|
||||
raise ValueError(f"No LLM class found for type {type}")
|
||||
return llm_class(*args, **kwargs)
|
||||
return llm_class(api_key, user_api_key, *args, **kwargs)
|
||||
|
||||
@@ -1,36 +1,51 @@
|
||||
from application.llm.base import BaseLLM
|
||||
from application.core.settings import settings
|
||||
|
||||
|
||||
|
||||
class OpenAILLM(BaseLLM):
|
||||
|
||||
def __init__(self, api_key):
|
||||
global openai
|
||||
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
|
||||
from openai import OpenAI
|
||||
|
||||
self.client = OpenAI(
|
||||
api_key=api_key,
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
if settings.OPENAI_BASE_URL:
|
||||
self.client = OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=settings.OPENAI_BASE_URL
|
||||
)
|
||||
else:
|
||||
self.client = OpenAI(api_key=api_key)
|
||||
self.api_key = api_key
|
||||
self.user_api_key = user_api_key
|
||||
|
||||
def _get_openai(self):
|
||||
# Import openai when needed
|
||||
import openai
|
||||
|
||||
return openai
|
||||
|
||||
def gen(self, model, engine, messages, stream=False, **kwargs):
|
||||
response = self.client.chat.completions.create(model=model,
|
||||
messages=messages,
|
||||
stream=stream,
|
||||
**kwargs)
|
||||
def _raw_gen(
|
||||
self,
|
||||
baseself,
|
||||
model,
|
||||
messages,
|
||||
stream=False,
|
||||
engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
**kwargs
|
||||
):
|
||||
response = self.client.chat.completions.create(
|
||||
model=model, messages=messages, stream=stream, **kwargs
|
||||
)
|
||||
|
||||
return response.choices[0].message.content
|
||||
|
||||
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
||||
response = self.client.chat.completions.create(model=model,
|
||||
messages=messages,
|
||||
stream=stream,
|
||||
**kwargs)
|
||||
def _raw_gen_stream(
|
||||
self,
|
||||
baseself,
|
||||
model,
|
||||
messages,
|
||||
stream=True,
|
||||
engine=settings.AZURE_DEPLOYMENT_NAME,
|
||||
**kwargs
|
||||
):
|
||||
response = self.client.chat.completions.create(
|
||||
model=model, messages=messages, stream=stream, **kwargs
|
||||
)
|
||||
|
||||
for line in response:
|
||||
# import sys
|
||||
@@ -41,20 +56,18 @@ class OpenAILLM(BaseLLM):
|
||||
|
||||
class AzureOpenAILLM(OpenAILLM):
|
||||
|
||||
def __init__(self, openai_api_key, openai_api_base, openai_api_version, deployment_name):
|
||||
def __init__(
|
||||
self, openai_api_key, openai_api_base, openai_api_version, deployment_name
|
||||
):
|
||||
super().__init__(openai_api_key)
|
||||
self.api_base = settings.OPENAI_API_BASE,
|
||||
self.api_version = settings.OPENAI_API_VERSION,
|
||||
self.deployment_name = settings.AZURE_DEPLOYMENT_NAME,
|
||||
self.api_base = (settings.OPENAI_API_BASE,)
|
||||
self.api_version = (settings.OPENAI_API_VERSION,)
|
||||
self.deployment_name = (settings.AZURE_DEPLOYMENT_NAME,)
|
||||
from openai import AzureOpenAI
|
||||
|
||||
self.client = AzureOpenAI(
|
||||
api_key=openai_api_key,
|
||||
api_key=openai_api_key,
|
||||
api_version=settings.OPENAI_API_VERSION,
|
||||
api_base=settings.OPENAI_API_BASE,
|
||||
deployment_name=settings.AZURE_DEPLOYMENT_NAME,
|
||||
)
|
||||
|
||||
def _get_openai(self):
|
||||
openai = super()._get_openai()
|
||||
|
||||
return openai
|
||||
|
||||
@@ -1,32 +1,37 @@
|
||||
from application.llm.base import BaseLLM
|
||||
from application.core.settings import settings
|
||||
|
||||
|
||||
class PremAILLM(BaseLLM):
|
||||
|
||||
def __init__(self, api_key):
|
||||
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
|
||||
from premai import Prem
|
||||
|
||||
self.client = Prem(
|
||||
api_key=api_key
|
||||
)
|
||||
|
||||
super().__init__(*args, **kwargs)
|
||||
self.client = Prem(api_key=api_key)
|
||||
self.api_key = api_key
|
||||
self.user_api_key = user_api_key
|
||||
self.project_id = settings.PREMAI_PROJECT_ID
|
||||
|
||||
def gen(self, model, engine, messages, stream=False, **kwargs):
|
||||
response = self.client.chat.completions.create(model=model,
|
||||
def _raw_gen(self, baseself, model, messages, stream=False, **kwargs):
|
||||
response = self.client.chat.completions.create(
|
||||
model=model,
|
||||
project_id=self.project_id,
|
||||
messages=messages,
|
||||
stream=stream,
|
||||
**kwargs)
|
||||
**kwargs
|
||||
)
|
||||
|
||||
return response.choices[0].message["content"]
|
||||
|
||||
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
||||
response = self.client.chat.completions.create(model=model,
|
||||
def _raw_gen_stream(self, baseself, model, messages, stream=True, **kwargs):
|
||||
response = self.client.chat.completions.create(
|
||||
model=model,
|
||||
project_id=self.project_id,
|
||||
messages=messages,
|
||||
stream=stream,
|
||||
**kwargs)
|
||||
**kwargs
|
||||
)
|
||||
|
||||
for line in response:
|
||||
if line.choices[0].delta["content"] is not None:
|
||||
|
||||
@@ -4,11 +4,10 @@ import json
|
||||
import io
|
||||
|
||||
|
||||
|
||||
class LineIterator:
|
||||
"""
|
||||
A helper class for parsing the byte stream input.
|
||||
|
||||
A helper class for parsing the byte stream input.
|
||||
|
||||
The output of the model will be in the following format:
|
||||
```
|
||||
b'{"outputs": [" a"]}\n'
|
||||
@@ -16,21 +15,21 @@ class LineIterator:
|
||||
b'{"outputs": [" problem"]}\n'
|
||||
...
|
||||
```
|
||||
|
||||
While usually each PayloadPart event from the event stream will contain a byte array
|
||||
|
||||
While usually each PayloadPart event from the event stream will contain a byte array
|
||||
with a full json, this is not guaranteed and some of the json objects may be split across
|
||||
PayloadPart events. For example:
|
||||
```
|
||||
{'PayloadPart': {'Bytes': b'{"outputs": '}}
|
||||
{'PayloadPart': {'Bytes': b'[" problem"]}\n'}}
|
||||
```
|
||||
|
||||
|
||||
This class accounts for this by concatenating bytes written via the 'write' function
|
||||
and then exposing a method which will return lines (ending with a '\n' character) within
|
||||
the buffer via the 'scan_lines' function. It maintains the position of the last read
|
||||
position to ensure that previous bytes are not exposed again.
|
||||
the buffer via the 'scan_lines' function. It maintains the position of the last read
|
||||
position to ensure that previous bytes are not exposed again.
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, stream):
|
||||
self.byte_iterator = iter(stream)
|
||||
self.buffer = io.BytesIO()
|
||||
@@ -43,7 +42,7 @@ class LineIterator:
|
||||
while True:
|
||||
self.buffer.seek(self.read_pos)
|
||||
line = self.buffer.readline()
|
||||
if line and line[-1] == ord('\n'):
|
||||
if line and line[-1] == ord("\n"):
|
||||
self.read_pos += len(line)
|
||||
return line[:-1]
|
||||
try:
|
||||
@@ -52,33 +51,35 @@ class LineIterator:
|
||||
if self.read_pos < self.buffer.getbuffer().nbytes:
|
||||
continue
|
||||
raise
|
||||
if 'PayloadPart' not in chunk:
|
||||
print('Unknown event type:' + chunk)
|
||||
if "PayloadPart" not in chunk:
|
||||
print("Unknown event type:" + chunk)
|
||||
continue
|
||||
self.buffer.seek(0, io.SEEK_END)
|
||||
self.buffer.write(chunk['PayloadPart']['Bytes'])
|
||||
self.buffer.write(chunk["PayloadPart"]["Bytes"])
|
||||
|
||||
|
||||
class SagemakerAPILLM(BaseLLM):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
def __init__(self, api_key=None, user_api_key=None, *args, **kwargs):
|
||||
import boto3
|
||||
|
||||
runtime = boto3.client(
|
||||
'runtime.sagemaker',
|
||||
aws_access_key_id='xxx',
|
||||
aws_secret_access_key='xxx',
|
||||
region_name='us-west-2'
|
||||
"runtime.sagemaker",
|
||||
aws_access_key_id="xxx",
|
||||
aws_secret_access_key="xxx",
|
||||
region_name="us-west-2",
|
||||
)
|
||||
|
||||
|
||||
self.endpoint = settings.SAGEMAKER_ENDPOINT
|
||||
super().__init__(*args, **kwargs)
|
||||
self.api_key = api_key
|
||||
self.user_api_key = user_api_key
|
||||
self.endpoint = settings.SAGEMAKER_ENDPOINT
|
||||
self.runtime = runtime
|
||||
|
||||
|
||||
def gen(self, model, engine, messages, stream=False, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
def _raw_gen(self, baseself, model, messages, stream=False, **kwargs):
|
||||
context = messages[0]["content"]
|
||||
user_question = messages[-1]["content"]
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
|
||||
|
||||
# Construct payload for endpoint
|
||||
payload = {
|
||||
@@ -89,25 +90,25 @@ class SagemakerAPILLM(BaseLLM):
|
||||
"temperature": 0.1,
|
||||
"max_new_tokens": 30,
|
||||
"repetition_penalty": 1.03,
|
||||
"stop": ["</s>", "###"]
|
||||
}
|
||||
"stop": ["</s>", "###"],
|
||||
},
|
||||
}
|
||||
body_bytes = json.dumps(payload).encode('utf-8')
|
||||
body_bytes = json.dumps(payload).encode("utf-8")
|
||||
|
||||
# Invoke the endpoint
|
||||
response = self.runtime.invoke_endpoint(EndpointName=self.endpoint,
|
||||
ContentType='application/json',
|
||||
Body=body_bytes)
|
||||
result = json.loads(response['Body'].read().decode())
|
||||
response = self.runtime.invoke_endpoint(
|
||||
EndpointName=self.endpoint, ContentType="application/json", Body=body_bytes
|
||||
)
|
||||
result = json.loads(response["Body"].read().decode())
|
||||
import sys
|
||||
print(result[0]['generated_text'], file=sys.stderr)
|
||||
return result[0]['generated_text'][len(prompt):]
|
||||
|
||||
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
||||
context = messages[0]['content']
|
||||
user_question = messages[-1]['content']
|
||||
print(result[0]["generated_text"], file=sys.stderr)
|
||||
return result[0]["generated_text"][len(prompt) :]
|
||||
|
||||
def _raw_gen_stream(self, baseself, model, messages, stream=True, **kwargs):
|
||||
context = messages[0]["content"]
|
||||
user_question = messages[-1]["content"]
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
|
||||
|
||||
# Construct payload for endpoint
|
||||
payload = {
|
||||
@@ -118,22 +119,22 @@ class SagemakerAPILLM(BaseLLM):
|
||||
"temperature": 0.1,
|
||||
"max_new_tokens": 512,
|
||||
"repetition_penalty": 1.03,
|
||||
"stop": ["</s>", "###"]
|
||||
}
|
||||
"stop": ["</s>", "###"],
|
||||
},
|
||||
}
|
||||
body_bytes = json.dumps(payload).encode('utf-8')
|
||||
body_bytes = json.dumps(payload).encode("utf-8")
|
||||
|
||||
# Invoke the endpoint
|
||||
response = self.runtime.invoke_endpoint_with_response_stream(EndpointName=self.endpoint,
|
||||
ContentType='application/json',
|
||||
Body=body_bytes)
|
||||
#result = json.loads(response['Body'].read().decode())
|
||||
event_stream = response['Body']
|
||||
start_json = b'{'
|
||||
response = self.runtime.invoke_endpoint_with_response_stream(
|
||||
EndpointName=self.endpoint, ContentType="application/json", Body=body_bytes
|
||||
)
|
||||
# result = json.loads(response['Body'].read().decode())
|
||||
event_stream = response["Body"]
|
||||
start_json = b"{"
|
||||
for line in LineIterator(event_stream):
|
||||
if line != b'' and start_json in line:
|
||||
#print(line)
|
||||
data = json.loads(line[line.find(start_json):].decode('utf-8'))
|
||||
if data['token']['text'] not in ["</s>", "###"]:
|
||||
print(data['token']['text'],end='')
|
||||
yield data['token']['text']
|
||||
if line != b"" and start_json in line:
|
||||
# print(line)
|
||||
data = json.loads(line[line.find(start_json) :].decode("utf-8"))
|
||||
if data["token"]["text"] not in ["</s>", "###"]:
|
||||
print(data["token"]["text"], end="")
|
||||
yield data["token"]["text"]
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
Contains parser for html files.
|
||||
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, Union
|
||||
|
||||
@@ -18,66 +17,8 @@ class HTMLParser(BaseParser):
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, list[str]]:
|
||||
"""Parse file.
|
||||
from langchain_community.document_loaders import BSHTMLLoader
|
||||
|
||||
Returns:
|
||||
Union[str, List[str]]: a string or a List of strings.
|
||||
"""
|
||||
try:
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
except ImportError:
|
||||
raise ValueError("unstructured package is required to parse HTML files.")
|
||||
|
||||
# Using the unstructured library to convert the html to isd format
|
||||
# isd sample : isd = [
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
with open(file, "r", encoding="utf-8") as fp:
|
||||
elements = partition_html(file=fp)
|
||||
isd = convert_to_isd(elements)
|
||||
|
||||
# Removing non ascii charactwers from isd_el['text']
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = isd_el['text'].encode("ascii", "ignore").decode()
|
||||
|
||||
# Removing all the \n characters from isd_el['text'] using regex and replace with single space
|
||||
# Removing all the extra spaces from isd_el['text'] using regex and replace with single space
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
|
||||
# more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
|
||||
for isd_el in isd:
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)
|
||||
|
||||
# Creating a list of all the indexes of isd_el['type'] = 'Title'
|
||||
title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
|
||||
# Creating 'Chunks' - List of lists of strings
|
||||
# each list starting with isd_el['type'] = 'Title' and all the data till the next 'Title'
|
||||
# Each Chunk can be thought of as an individual set of data, which can be sent to the model
|
||||
# Where Each Title is grouped together with the data under it
|
||||
|
||||
Chunks = [[]]
|
||||
final_chunks = list(list())
|
||||
|
||||
for i, isd_el in enumerate(isd):
|
||||
if i in title_indexes:
|
||||
Chunks.append([])
|
||||
Chunks[-1].append(isd_el['text'])
|
||||
|
||||
# Removing all the chunks with sum of length of all the strings in the chunk < 25
|
||||
# TODO: This value can be an user defined variable
|
||||
for chunk in Chunks:
|
||||
# sum of length of all the strings in the chunk
|
||||
sum = 0
|
||||
sum += len(str(chunk))
|
||||
if sum < 25:
|
||||
Chunks.remove(chunk)
|
||||
else:
|
||||
# appending all the approved chunks to final_chunks as a single string
|
||||
final_chunks.append(" ".join([str(item) for item in chunk]))
|
||||
return final_chunks
|
||||
loader = BSHTMLLoader(file)
|
||||
data = loader.load()
|
||||
return data
|
||||
|
||||
73
application/parser/open_ai_func.py
Normal file → Executable file
@@ -1,38 +1,33 @@
|
||||
import os
|
||||
|
||||
import tiktoken
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
from application.core.settings import settings
|
||||
from retry import retry
|
||||
|
||||
from application.core.settings import settings
|
||||
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
|
||||
|
||||
# from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
# from langchain_community.embeddings import HuggingFaceInstructEmbeddings
|
||||
# from langchain_community.embeddings import CohereEmbeddings
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens / 1000) * 0.0004)
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
@retry(tries=10, delay=60)
|
||||
def store_add_texts_with_retry(store, i):
|
||||
def store_add_texts_with_retry(store, i, id):
|
||||
# add source_id to the metadata
|
||||
i.metadata["source_id"] = str(id)
|
||||
store.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
|
||||
|
||||
def call_openai_api(docs, folder_name, task_status):
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
def call_openai_api(docs, folder_name, id, task_status):
|
||||
# Function to create a vector store from the documents and save it to disk
|
||||
|
||||
# create output folder if it doesn't exist
|
||||
if not os.path.exists(f"{folder_name}"):
|
||||
os.makedirs(f"{folder_name}")
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
c1 = 0
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
docs_init = [docs[0]]
|
||||
@@ -40,26 +35,34 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
|
||||
store = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE,
|
||||
docs_init = docs_init,
|
||||
path=f"{folder_name}",
|
||||
embeddings_key=os.getenv("EMBEDDINGS_KEY")
|
||||
docs_init=docs_init,
|
||||
source_id=f"{folder_name}",
|
||||
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
|
||||
)
|
||||
else:
|
||||
store = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE,
|
||||
path=f"{folder_name}",
|
||||
embeddings_key=os.getenv("EMBEDDINGS_KEY")
|
||||
source_id=str(id),
|
||||
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
|
||||
)
|
||||
store.delete_index()
|
||||
# Uncomment for MPNet embeddings
|
||||
# model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
# hf = HuggingFaceEmbeddings(model_name=model_name)
|
||||
# store = FAISS.from_documents(docs_test, hf)
|
||||
s1 = len(docs)
|
||||
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs),
|
||||
bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
||||
for i in tqdm(
|
||||
docs,
|
||||
desc="Embedding 🦖",
|
||||
unit="docs",
|
||||
total=len(docs),
|
||||
bar_format="{l_bar}{bar}| Time Left: {remaining}",
|
||||
):
|
||||
try:
|
||||
task_status.update_state(state='PROGRESS', meta={'current': int((c1 / s1) * 100)})
|
||||
store_add_texts_with_retry(store, i)
|
||||
task_status.update_state(
|
||||
state="PROGRESS", meta={"current": int((c1 / s1) * 100)}
|
||||
)
|
||||
store_add_texts_with_retry(store, i, id)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Error on ", i)
|
||||
@@ -70,25 +73,3 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
c1 += 1
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
store.save_local(f"{folder_name}")
|
||||
|
||||
|
||||
def get_user_permission(docs, folder_name):
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
|
||||
# docs_content = (" ".join(docs))
|
||||
docs_content = ""
|
||||
for doc in docs:
|
||||
docs_content += doc.page_content
|
||||
|
||||
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
# Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
call_openai_api(docs, folder_name)
|
||||
elif user_input == "":
|
||||
call_openai_api(docs, folder_name)
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
|
||||
@@ -5,7 +5,7 @@ from application.parser.remote.base import BaseRemote
|
||||
|
||||
class CrawlerLoader(BaseRemote):
|
||||
def __init__(self, limit=10):
|
||||
from langchain.document_loaders import WebBaseLoader
|
||||
from langchain_community.document_loaders import WebBaseLoader
|
||||
self.loader = WebBaseLoader # Initialize the document loader
|
||||
self.limit = limit # Set the limit for the number of pages to scrape
|
||||
|
||||
|
||||
26
application/parser/remote/reddit_loader.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from application.parser.remote.base import BaseRemote
|
||||
from langchain_community.document_loaders import RedditPostsLoader
|
||||
|
||||
|
||||
class RedditPostsLoaderRemote(BaseRemote):
|
||||
def load_data(self, inputs):
|
||||
data = eval(inputs)
|
||||
client_id = data.get("client_id")
|
||||
client_secret = data.get("client_secret")
|
||||
user_agent = data.get("user_agent")
|
||||
categories = data.get("categories", ["new", "hot"])
|
||||
mode = data.get("mode", "subreddit")
|
||||
search_queries = data.get("search_queries")
|
||||
number_posts = data.get("number_posts", 10)
|
||||
self.loader = RedditPostsLoader(
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
user_agent=user_agent,
|
||||
categories=categories,
|
||||
mode=mode,
|
||||
search_queries=search_queries,
|
||||
number_posts=number_posts,
|
||||
)
|
||||
documents = self.loader.load()
|
||||
print(f"Loaded {len(documents)} documents from Reddit")
|
||||
return documents
|
||||
@@ -1,13 +1,15 @@
|
||||
from application.parser.remote.sitemap_loader import SitemapLoader
|
||||
from application.parser.remote.crawler_loader import CrawlerLoader
|
||||
from application.parser.remote.web_loader import WebLoader
|
||||
from application.parser.remote.reddit_loader import RedditPostsLoaderRemote
|
||||
|
||||
|
||||
class RemoteCreator:
|
||||
loaders = {
|
||||
'url': WebLoader,
|
||||
'sitemap': SitemapLoader,
|
||||
'crawler': CrawlerLoader
|
||||
"url": WebLoader,
|
||||
"sitemap": SitemapLoader,
|
||||
"crawler": CrawlerLoader,
|
||||
"reddit": RedditPostsLoaderRemote,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@@ -15,4 +17,4 @@ class RemoteCreator:
|
||||
loader_class = cls.loaders.get(type.lower())
|
||||
if not loader_class:
|
||||
raise ValueError(f"No LLM class found for type {type}")
|
||||
return loader_class(*args, **kwargs)
|
||||
return loader_class(*args, **kwargs)
|
||||
|
||||
@@ -5,7 +5,7 @@ from application.parser.remote.base import BaseRemote
|
||||
|
||||
class SitemapLoader(BaseRemote):
|
||||
def __init__(self, limit=20):
|
||||
from langchain.document_loaders import WebBaseLoader
|
||||
from langchain_community.document_loaders import WebBaseLoader
|
||||
self.loader = WebBaseLoader
|
||||
self.limit = limit # Adding limit to control the number of URLs to process
|
||||
|
||||
|
||||
@@ -1,22 +1,32 @@
|
||||
from application.parser.remote.base import BaseRemote
|
||||
from langchain_community.document_loaders import WebBaseLoader
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*"
|
||||
";q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Referer": "https://www.google.com/",
|
||||
"DNT": "1",
|
||||
"Connection": "keep-alive",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
}
|
||||
|
||||
|
||||
class WebLoader(BaseRemote):
|
||||
def __init__(self):
|
||||
from langchain.document_loaders import WebBaseLoader
|
||||
self.loader = WebBaseLoader
|
||||
|
||||
def load_data(self, inputs):
|
||||
urls = inputs
|
||||
|
||||
if isinstance(urls, str):
|
||||
urls = [urls] # Convert string to list if a single URL is passed
|
||||
|
||||
urls = [urls]
|
||||
documents = []
|
||||
for url in urls:
|
||||
try:
|
||||
loader = self.loader([url]) # Process URLs one by one
|
||||
loader = self.loader([url], header_template=headers)
|
||||
documents.extend(loader.load())
|
||||
except Exception as e:
|
||||
print(f"Error processing URL {url}: {e}")
|
||||
continue # Continue with the next URL if an error occurs
|
||||
return documents
|
||||
continue
|
||||
return documents
|
||||
|
||||
@@ -22,7 +22,10 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
|
||||
|
||||
# Check if current group is empty or if the document can be added based on token count and matching metadata
|
||||
if current_group is None or (len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len < min_tokens and current_group.extra_info == doc.extra_info):
|
||||
if (current_group is None or
|
||||
(len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and
|
||||
doc_len < min_tokens and
|
||||
current_group.extra_info == doc.extra_info)):
|
||||
if current_group is None:
|
||||
current_group = doc # Use the document directly to retain its metadata
|
||||
else:
|
||||
|
||||
@@ -1,34 +1,86 @@
|
||||
anthropic==0.12.0
|
||||
boto3==1.34.6
|
||||
anthropic==0.34.2
|
||||
boto3==1.34.153
|
||||
beautifulsoup4==4.12.3
|
||||
celery==5.3.6
|
||||
dataclasses_json==0.6.3
|
||||
dataclasses-json==0.6.7
|
||||
docx2txt==0.8
|
||||
EbookLib==0.18
|
||||
elasticsearch==8.12.0
|
||||
duckduckgo-search==6.2.6
|
||||
ebooklib==0.18
|
||||
elastic-transport==8.15.0
|
||||
elasticsearch==8.15.1
|
||||
escodegen==1.0.11
|
||||
esprima==4.0.1
|
||||
faiss-cpu==1.7.4
|
||||
Flask==3.0.1
|
||||
gunicorn==21.2.0
|
||||
html2text==2020.1.16
|
||||
esutils==1.0.1
|
||||
Flask==3.0.3
|
||||
faiss-cpu==1.8.0.post1
|
||||
flask-restx==1.3.0
|
||||
gunicorn==23.0.0
|
||||
html2text==2024.2.26
|
||||
javalang==0.13.0
|
||||
langchain==0.1.4
|
||||
langchain-openai==0.0.5
|
||||
nltk==3.8.1
|
||||
openapi3_parser==1.1.16
|
||||
pandas==2.2.0
|
||||
pydantic_settings==2.1.0
|
||||
pymongo==4.6.1
|
||||
PyPDF2==3.0.1
|
||||
jinja2==3.1.4
|
||||
jiter==0.5.0
|
||||
jmespath==1.0.1
|
||||
joblib==1.4.2
|
||||
jsonpatch==1.33
|
||||
jsonpointer==3.0.0
|
||||
jsonschema==4.23.0
|
||||
jsonschema-spec==0.2.4
|
||||
jsonschema-specifications==2023.7.1
|
||||
kombu==5.4.2
|
||||
langchain==0.3.0
|
||||
langchain-community==0.3.0
|
||||
langchain-core==0.3.2
|
||||
langchain-openai==0.2.0
|
||||
langchain-text-splitters==0.3.0
|
||||
langsmith==0.1.125
|
||||
lazy-object-proxy==1.10.0
|
||||
lxml==5.3.0
|
||||
markupsafe==2.1.5
|
||||
marshmallow==3.22.0
|
||||
mpmath==1.3.0
|
||||
multidict==6.1.0
|
||||
mypy-extensions==1.0.0
|
||||
networkx==3.3
|
||||
numpy==1.26.4
|
||||
openai==1.46.1
|
||||
openapi-schema-validator==0.6.2
|
||||
openapi-spec-validator==0.6.0
|
||||
openapi3-parser==1.1.18
|
||||
orjson==3.10.7
|
||||
packaging==24.1
|
||||
pandas==2.2.3
|
||||
pathable==0.4.3
|
||||
pillow==10.4.0
|
||||
portalocker==2.10.1
|
||||
prance==23.6.21.0
|
||||
primp==0.6.2
|
||||
prompt-toolkit==3.0.47
|
||||
protobuf==5.28.2
|
||||
py==1.11.0
|
||||
pydantic==2.9.2
|
||||
pydantic-core==2.23.4
|
||||
pydantic-settings==2.4.0
|
||||
pymongo==4.8.0
|
||||
pypdf2==3.0.1
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.0.1
|
||||
qdrant-client==1.7.3
|
||||
qdrant-client==1.11.0
|
||||
redis==5.0.1
|
||||
Requests==2.31.0
|
||||
referencing==0.30.2
|
||||
regex==2024.9.11
|
||||
requests==2.32.3
|
||||
retry==0.9.2
|
||||
sentence-transformers
|
||||
tiktoken==0.5.2
|
||||
torch==2.1.2
|
||||
tqdm==4.66.1
|
||||
transformers==4.36.2
|
||||
unstructured==0.12.2
|
||||
Werkzeug==3.0.1
|
||||
sentence-transformers==3.0.1
|
||||
tiktoken==0.7.0
|
||||
tokenizers==0.19.1
|
||||
torch==2.4.1
|
||||
tqdm==4.66.5
|
||||
transformers==4.44.2
|
||||
typing-extensions==4.12.2
|
||||
typing-inspect==0.9.0
|
||||
tzdata==2024.2
|
||||
urllib3==2.2.3
|
||||
vine==5.1.0
|
||||
wcwidth==0.2.13
|
||||
werkzeug==3.0.4
|
||||
yarl==1.11.1
|
||||
|
||||
0
application/retriever/__init__.py
Normal file
18
application/retriever/base.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class BaseRetriever(ABC):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def gen(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def search(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_params(self):
|
||||
pass
|
||||
115
application/retriever/brave_search.py
Normal file
@@ -0,0 +1,115 @@
|
||||
import json
|
||||
from application.retriever.base import BaseRetriever
|
||||
from application.core.settings import settings
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.utils import num_tokens_from_string
|
||||
from langchain_community.tools import BraveSearch
|
||||
|
||||
|
||||
class BraveRetSearch(BaseRetriever):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
question,
|
||||
source,
|
||||
chat_history,
|
||||
prompt,
|
||||
chunks=2,
|
||||
token_limit=150,
|
||||
gpt_model="docsgpt",
|
||||
user_api_key=None,
|
||||
):
|
||||
self.question = question
|
||||
self.source = source
|
||||
self.chat_history = chat_history
|
||||
self.prompt = prompt
|
||||
self.chunks = chunks
|
||||
self.gpt_model = gpt_model
|
||||
self.token_limit = (
|
||||
token_limit
|
||||
if token_limit
|
||||
< settings.MODEL_TOKEN_LIMITS.get(
|
||||
self.gpt_model, settings.DEFAULT_MAX_HISTORY
|
||||
)
|
||||
else settings.MODEL_TOKEN_LIMITS.get(
|
||||
self.gpt_model, settings.DEFAULT_MAX_HISTORY
|
||||
)
|
||||
)
|
||||
self.user_api_key = user_api_key
|
||||
|
||||
def _get_data(self):
|
||||
if self.chunks == 0:
|
||||
docs = []
|
||||
else:
|
||||
search = BraveSearch.from_api_key(
|
||||
api_key=settings.BRAVE_SEARCH_API_KEY,
|
||||
search_kwargs={"count": int(self.chunks)},
|
||||
)
|
||||
results = search.run(self.question)
|
||||
results = json.loads(results)
|
||||
|
||||
docs = []
|
||||
for i in results:
|
||||
try:
|
||||
title = i["title"]
|
||||
link = i["link"]
|
||||
snippet = i["snippet"]
|
||||
docs.append({"text": snippet, "title": title, "link": link})
|
||||
except IndexError:
|
||||
pass
|
||||
if settings.LLM_NAME == "llama.cpp":
|
||||
docs = [docs[0]]
|
||||
|
||||
return docs
|
||||
|
||||
def gen(self):
|
||||
docs = self._get_data()
|
||||
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc["text"] for doc in docs])
|
||||
p_chat_combine = self.prompt.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
for doc in docs:
|
||||
yield {"source": doc}
|
||||
|
||||
if len(self.chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
self.chat_history.reverse()
|
||||
for i in self.chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(
|
||||
i["response"]
|
||||
)
|
||||
if tokens_current_history + tokens_batch < self.token_limit:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append(
|
||||
{"role": "user", "content": i["prompt"]}
|
||||
)
|
||||
messages_combine.append(
|
||||
{"role": "system", "content": i["response"]}
|
||||
)
|
||||
messages_combine.append({"role": "user", "content": self.question})
|
||||
|
||||
llm = LLMCreator.create_llm(
|
||||
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=self.user_api_key
|
||||
)
|
||||
|
||||
completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
|
||||
for line in completion:
|
||||
yield {"answer": str(line)}
|
||||
|
||||
def search(self):
|
||||
return self._get_data()
|
||||
|
||||
def get_params(self):
|
||||
return {
|
||||
"question": self.question,
|
||||
"source": self.source,
|
||||
"chat_history": self.chat_history,
|
||||
"prompt": self.prompt,
|
||||
"chunks": self.chunks,
|
||||
"token_limit": self.token_limit,
|
||||
"gpt_model": self.gpt_model,
|
||||
"user_api_key": self.user_api_key
|
||||
}
|
||||
118
application/retriever/classic_rag.py
Normal file
@@ -0,0 +1,118 @@
|
||||
from application.retriever.base import BaseRetriever
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
|
||||
from application.utils import num_tokens_from_string
|
||||
|
||||
|
||||
class ClassicRAG(BaseRetriever):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
question,
|
||||
source,
|
||||
chat_history,
|
||||
prompt,
|
||||
chunks=2,
|
||||
token_limit=150,
|
||||
gpt_model="docsgpt",
|
||||
user_api_key=None,
|
||||
):
|
||||
self.question = question
|
||||
self.vectorstore = source['active_docs'] if 'active_docs' in source else None
|
||||
self.chat_history = chat_history
|
||||
self.prompt = prompt
|
||||
self.chunks = chunks
|
||||
self.gpt_model = gpt_model
|
||||
self.token_limit = (
|
||||
token_limit
|
||||
if token_limit
|
||||
< settings.MODEL_TOKEN_LIMITS.get(
|
||||
self.gpt_model, settings.DEFAULT_MAX_HISTORY
|
||||
)
|
||||
else settings.MODEL_TOKEN_LIMITS.get(
|
||||
self.gpt_model, settings.DEFAULT_MAX_HISTORY
|
||||
)
|
||||
)
|
||||
self.user_api_key = user_api_key
|
||||
|
||||
def _get_data(self):
|
||||
if self.chunks == 0:
|
||||
docs = []
|
||||
else:
|
||||
docsearch = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE, self.vectorstore, settings.EMBEDDINGS_KEY
|
||||
)
|
||||
docs_temp = docsearch.search(self.question, k=self.chunks)
|
||||
print(docs_temp)
|
||||
docs = [
|
||||
{
|
||||
"title": i.metadata.get(
|
||||
"title", i.metadata.get("post_title", i.page_content)
|
||||
).split("/")[-1],
|
||||
"text": i.page_content,
|
||||
"source": (
|
||||
i.metadata.get("source")
|
||||
if i.metadata.get("source")
|
||||
else "local"
|
||||
),
|
||||
}
|
||||
for i in docs_temp
|
||||
]
|
||||
if settings.LLM_NAME == "llama.cpp":
|
||||
docs = [docs[0]]
|
||||
|
||||
return docs
|
||||
|
||||
def gen(self):
|
||||
docs = self._get_data()
|
||||
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc["text"] for doc in docs])
|
||||
p_chat_combine = self.prompt.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
for doc in docs:
|
||||
yield {"source": doc}
|
||||
|
||||
if len(self.chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
self.chat_history.reverse()
|
||||
for i in self.chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(
|
||||
i["response"]
|
||||
)
|
||||
if tokens_current_history + tokens_batch < self.token_limit:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append(
|
||||
{"role": "user", "content": i["prompt"]}
|
||||
)
|
||||
messages_combine.append(
|
||||
{"role": "system", "content": i["response"]}
|
||||
)
|
||||
messages_combine.append({"role": "user", "content": self.question})
|
||||
|
||||
llm = LLMCreator.create_llm(
|
||||
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=self.user_api_key
|
||||
)
|
||||
|
||||
completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
|
||||
for line in completion:
|
||||
yield {"answer": str(line)}
|
||||
|
||||
def search(self):
|
||||
return self._get_data()
|
||||
|
||||
def get_params(self):
|
||||
return {
|
||||
"question": self.question,
|
||||
"source": self.vectorstore,
|
||||
"chat_history": self.chat_history,
|
||||
"prompt": self.prompt,
|
||||
"chunks": self.chunks,
|
||||
"token_limit": self.token_limit,
|
||||
"gpt_model": self.gpt_model,
|
||||
"user_api_key": self.user_api_key
|
||||
}
|
||||
132
application/retriever/duckduck_search.py
Normal file
@@ -0,0 +1,132 @@
|
||||
from application.retriever.base import BaseRetriever
|
||||
from application.core.settings import settings
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.utils import num_tokens_from_string
|
||||
from langchain_community.tools import DuckDuckGoSearchResults
|
||||
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
|
||||
|
||||
|
||||
class DuckDuckSearch(BaseRetriever):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
question,
|
||||
source,
|
||||
chat_history,
|
||||
prompt,
|
||||
chunks=2,
|
||||
token_limit=150,
|
||||
gpt_model="docsgpt",
|
||||
user_api_key=None,
|
||||
):
|
||||
self.question = question
|
||||
self.source = source
|
||||
self.chat_history = chat_history
|
||||
self.prompt = prompt
|
||||
self.chunks = chunks
|
||||
self.gpt_model = gpt_model
|
||||
self.token_limit = (
|
||||
token_limit
|
||||
if token_limit
|
||||
< settings.MODEL_TOKEN_LIMITS.get(
|
||||
self.gpt_model, settings.DEFAULT_MAX_HISTORY
|
||||
)
|
||||
else settings.MODEL_TOKEN_LIMITS.get(
|
||||
self.gpt_model, settings.DEFAULT_MAX_HISTORY
|
||||
)
|
||||
)
|
||||
self.user_api_key = user_api_key
|
||||
|
||||
def _parse_lang_string(self, input_string):
|
||||
result = []
|
||||
current_item = ""
|
||||
inside_brackets = False
|
||||
for char in input_string:
|
||||
if char == "[":
|
||||
inside_brackets = True
|
||||
elif char == "]":
|
||||
inside_brackets = False
|
||||
result.append(current_item)
|
||||
current_item = ""
|
||||
elif inside_brackets:
|
||||
current_item += char
|
||||
|
||||
if inside_brackets:
|
||||
result.append(current_item)
|
||||
|
||||
return result
|
||||
|
||||
def _get_data(self):
|
||||
if self.chunks == 0:
|
||||
docs = []
|
||||
else:
|
||||
wrapper = DuckDuckGoSearchAPIWrapper(max_results=self.chunks)
|
||||
search = DuckDuckGoSearchResults(api_wrapper=wrapper)
|
||||
results = search.run(self.question)
|
||||
results = self._parse_lang_string(results)
|
||||
|
||||
docs = []
|
||||
for i in results:
|
||||
try:
|
||||
text = i.split("title:")[0]
|
||||
title = i.split("title:")[1].split("link:")[0]
|
||||
link = i.split("link:")[1]
|
||||
docs.append({"text": text, "title": title, "link": link})
|
||||
except IndexError:
|
||||
pass
|
||||
if settings.LLM_NAME == "llama.cpp":
|
||||
docs = [docs[0]]
|
||||
|
||||
return docs
|
||||
|
||||
def gen(self):
|
||||
docs = self._get_data()
|
||||
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc["text"] for doc in docs])
|
||||
p_chat_combine = self.prompt.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
for doc in docs:
|
||||
yield {"source": doc}
|
||||
|
||||
if len(self.chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
self.chat_history.reverse()
|
||||
for i in self.chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = num_tokens_from_string(i["prompt"]) + num_tokens_from_string(
|
||||
i["response"]
|
||||
)
|
||||
if tokens_current_history + tokens_batch < self.token_limit:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append(
|
||||
{"role": "user", "content": i["prompt"]}
|
||||
)
|
||||
messages_combine.append(
|
||||
{"role": "system", "content": i["response"]}
|
||||
)
|
||||
messages_combine.append({"role": "user", "content": self.question})
|
||||
|
||||
llm = LLMCreator.create_llm(
|
||||
settings.LLM_NAME, api_key=settings.API_KEY, user_api_key=self.user_api_key
|
||||
)
|
||||
|
||||
completion = llm.gen_stream(model=self.gpt_model, messages=messages_combine)
|
||||
for line in completion:
|
||||
yield {"answer": str(line)}
|
||||
|
||||
def search(self):
|
||||
return self._get_data()
|
||||
|
||||
def get_params(self):
|
||||
return {
|
||||
"question": self.question,
|
||||
"source": self.source,
|
||||
"chat_history": self.chat_history,
|
||||
"prompt": self.prompt,
|
||||
"chunks": self.chunks,
|
||||
"token_limit": self.token_limit,
|
||||
"gpt_model": self.gpt_model,
|
||||
"user_api_key": self.user_api_key
|
||||
}
|
||||
20
application/retriever/retriever_creator.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from application.retriever.classic_rag import ClassicRAG
|
||||
from application.retriever.duckduck_search import DuckDuckSearch
|
||||
from application.retriever.brave_search import BraveRetSearch
|
||||
|
||||
|
||||
|
||||
class RetrieverCreator:
|
||||
retrievers = {
|
||||
'classic': ClassicRAG,
|
||||
'duckduck_search': DuckDuckSearch,
|
||||
'brave_search': BraveRetSearch,
|
||||
'default': ClassicRAG
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create_retriever(cls, type, *args, **kwargs):
|
||||
retiever_class = cls.retrievers.get(type.lower())
|
||||
if not retiever_class:
|
||||
raise ValueError(f"No retievers class found for type {type}")
|
||||
return retiever_class(*args, **kwargs)
|
||||
49
application/usage.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import sys
|
||||
from pymongo import MongoClient
|
||||
from datetime import datetime
|
||||
from application.core.settings import settings
|
||||
from application.utils import num_tokens_from_string
|
||||
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
db = mongo["docsgpt"]
|
||||
usage_collection = db["token_usage"]
|
||||
|
||||
|
||||
def update_token_usage(user_api_key, token_usage):
|
||||
if "pytest" in sys.modules:
|
||||
return
|
||||
usage_data = {
|
||||
"api_key": user_api_key,
|
||||
"prompt_tokens": token_usage["prompt_tokens"],
|
||||
"generated_tokens": token_usage["generated_tokens"],
|
||||
"timestamp": datetime.now(),
|
||||
}
|
||||
usage_collection.insert_one(usage_data)
|
||||
|
||||
|
||||
def gen_token_usage(func):
|
||||
def wrapper(self, model, messages, stream, **kwargs):
|
||||
for message in messages:
|
||||
self.token_usage["prompt_tokens"] += num_tokens_from_string(message["content"])
|
||||
result = func(self, model, messages, stream, **kwargs)
|
||||
self.token_usage["generated_tokens"] += num_tokens_from_string(result)
|
||||
update_token_usage(self.user_api_key, self.token_usage)
|
||||
return result
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def stream_token_usage(func):
|
||||
def wrapper(self, model, messages, stream, **kwargs):
|
||||
for message in messages:
|
||||
self.token_usage["prompt_tokens"] += num_tokens_from_string(message["content"])
|
||||
batch = []
|
||||
result = func(self, model, messages, stream, **kwargs)
|
||||
for r in result:
|
||||
batch.append(r)
|
||||
yield r
|
||||
for line in batch:
|
||||
self.token_usage["generated_tokens"] += num_tokens_from_string(line)
|
||||
update_token_usage(self.user_api_key, self.token_usage)
|
||||
|
||||
return wrapper
|
||||
41
application/utils.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import tiktoken
|
||||
from flask import jsonify, make_response
|
||||
|
||||
_encoding = None
|
||||
|
||||
|
||||
def get_encoding():
|
||||
global _encoding
|
||||
if _encoding is None:
|
||||
_encoding = tiktoken.get_encoding("cl100k_base")
|
||||
return _encoding
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str) -> int:
|
||||
encoding = get_encoding()
|
||||
num_tokens = len(encoding.encode(string))
|
||||
return num_tokens
|
||||
|
||||
|
||||
def count_tokens_docs(docs):
|
||||
docs_content = ""
|
||||
for doc in docs:
|
||||
docs_content += doc.page_content
|
||||
|
||||
tokens = num_tokens_from_string(docs_content)
|
||||
return tokens
|
||||
|
||||
|
||||
def check_required_fields(data, required_fields):
|
||||
missing_fields = [field for field in required_fields if field not in data]
|
||||
if missing_fields:
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": f"Missing fields: {', '.join(missing_fields)}",
|
||||
}
|
||||
),
|
||||
400,
|
||||
)
|
||||
return None
|
||||
@@ -1,13 +1,55 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import os
|
||||
from langchain_community.embeddings import (
|
||||
HuggingFaceEmbeddings,
|
||||
CohereEmbeddings,
|
||||
HuggingFaceInstructEmbeddings,
|
||||
)
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from application.core.settings import settings
|
||||
|
||||
class EmbeddingsWrapper:
|
||||
def __init__(self, model_name, *args, **kwargs):
|
||||
self.model = SentenceTransformer(model_name, config_kwargs={'allow_dangerous_deserialization': True}, *args, **kwargs)
|
||||
self.dimension = self.model.get_sentence_embedding_dimension()
|
||||
|
||||
def embed_query(self, query: str):
|
||||
return self.model.encode(query).tolist()
|
||||
|
||||
def embed_documents(self, documents: list):
|
||||
return self.model.encode(documents).tolist()
|
||||
|
||||
def __call__(self, text):
|
||||
if isinstance(text, str):
|
||||
return self.embed_query(text)
|
||||
elif isinstance(text, list):
|
||||
return self.embed_documents(text)
|
||||
else:
|
||||
raise ValueError("Input must be a string or a list of strings")
|
||||
|
||||
|
||||
|
||||
class EmbeddingsSingleton:
|
||||
_instances = {}
|
||||
|
||||
@staticmethod
|
||||
def get_instance(embeddings_name, *args, **kwargs):
|
||||
if embeddings_name not in EmbeddingsSingleton._instances:
|
||||
EmbeddingsSingleton._instances[embeddings_name] = EmbeddingsSingleton._create_instance(
|
||||
embeddings_name, *args, **kwargs
|
||||
)
|
||||
return EmbeddingsSingleton._instances[embeddings_name]
|
||||
|
||||
@staticmethod
|
||||
def _create_instance(embeddings_name, *args, **kwargs):
|
||||
embeddings_factory = {
|
||||
"openai_text-embedding-ada-002": OpenAIEmbeddings,
|
||||
"huggingface_sentence-transformers/all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
|
||||
"huggingface_sentence-transformers-all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
|
||||
"huggingface_hkunlp/instructor-large": lambda: EmbeddingsWrapper("hkunlp/instructor-large"),
|
||||
}
|
||||
|
||||
if embeddings_name in embeddings_factory:
|
||||
return embeddings_factory[embeddings_name](*args, **kwargs)
|
||||
else:
|
||||
return EmbeddingsWrapper(embeddings_name, *args, **kwargs)
|
||||
|
||||
class BaseVectorStore(ABC):
|
||||
def __init__(self):
|
||||
pass
|
||||
@@ -20,37 +62,28 @@ class BaseVectorStore(ABC):
|
||||
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
|
||||
|
||||
def _get_embeddings(self, embeddings_name, embeddings_key=None):
|
||||
embeddings_factory = {
|
||||
"openai_text-embedding-ada-002": OpenAIEmbeddings,
|
||||
"huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings,
|
||||
"huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings,
|
||||
"cohere_medium": CohereEmbeddings
|
||||
}
|
||||
|
||||
if embeddings_name not in embeddings_factory:
|
||||
raise ValueError(f"Invalid embeddings_name: {embeddings_name}")
|
||||
|
||||
if embeddings_name == "openai_text-embedding-ada-002":
|
||||
if self.is_azure_configured():
|
||||
os.environ["OPENAI_API_TYPE"] = "azure"
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name,
|
||||
model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME
|
||||
)
|
||||
else:
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name,
|
||||
openai_api_key=embeddings_key
|
||||
)
|
||||
elif embeddings_name == "cohere_medium":
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
cohere_api_key=embeddings_key
|
||||
)
|
||||
elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
#model_name="./model/all-mpnet-base-v2",
|
||||
model_kwargs={"device": "cpu"},
|
||||
)
|
||||
if os.path.exists("./model/all-mpnet-base-v2"):
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name="./model/all-mpnet-base-v2",
|
||||
)
|
||||
else:
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name,
|
||||
)
|
||||
else:
|
||||
embedding_instance = embeddings_factory[embeddings_name]()
|
||||
|
||||
return embedding_instance
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(embeddings_name)
|
||||
|
||||
return embedding_instance
|
||||
@@ -9,9 +9,9 @@ import elasticsearch
|
||||
class ElasticsearchStore(BaseVectorStore):
|
||||
_es_connection = None # Class attribute to hold the Elasticsearch connection
|
||||
|
||||
def __init__(self, path, embeddings_key, index_name=settings.ELASTIC_INDEX):
|
||||
def __init__(self, source_id, embeddings_key, index_name=settings.ELASTIC_INDEX):
|
||||
super().__init__()
|
||||
self.path = path.replace("application/indexes/", "").rstrip("/")
|
||||
self.source_id = source_id.replace("application/indexes/", "").rstrip("/")
|
||||
self.embeddings_key = embeddings_key
|
||||
self.index_name = index_name
|
||||
|
||||
@@ -81,7 +81,7 @@ class ElasticsearchStore(BaseVectorStore):
|
||||
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key)
|
||||
vector = embeddings.embed_query(question)
|
||||
knn = {
|
||||
"filter": [{"match": {"metadata.store.keyword": self.path}}],
|
||||
"filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
|
||||
"field": "vector",
|
||||
"k": k,
|
||||
"num_candidates": 100,
|
||||
@@ -100,7 +100,7 @@ class ElasticsearchStore(BaseVectorStore):
|
||||
}
|
||||
}
|
||||
],
|
||||
"filter": [{"match": {"metadata.store.keyword": self.path}}],
|
||||
"filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
|
||||
}
|
||||
},
|
||||
"rank": {"rrf": {}},
|
||||
@@ -209,5 +209,4 @@ class ElasticsearchStore(BaseVectorStore):
|
||||
|
||||
def delete_index(self):
|
||||
self._es_connection.delete_by_query(index=self.index_name, query={"match": {
|
||||
"metadata.store.keyword": self.path}},)
|
||||
|
||||
"metadata.source_id.keyword": self.source_id}},)
|
||||
|
||||
@@ -1,12 +1,22 @@
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from application.core.settings import settings
|
||||
import os
|
||||
|
||||
def get_vectorstore(path):
|
||||
if path:
|
||||
vectorstore = "indexes/"+path
|
||||
vectorstore = os.path.join("application", vectorstore)
|
||||
else:
|
||||
vectorstore = os.path.join("application")
|
||||
|
||||
return vectorstore
|
||||
|
||||
class FaissStore(BaseVectorStore):
|
||||
|
||||
def __init__(self, path, embeddings_key, docs_init=None):
|
||||
def __init__(self, source_id, embeddings_key, docs_init=None):
|
||||
super().__init__()
|
||||
self.path = path
|
||||
self.path = get_vectorstore(source_id)
|
||||
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
|
||||
if docs_init:
|
||||
self.docsearch = FAISS.from_documents(
|
||||
@@ -14,7 +24,8 @@ class FaissStore(BaseVectorStore):
|
||||
)
|
||||
else:
|
||||
self.docsearch = FAISS.load_local(
|
||||
self.path, embeddings
|
||||
self.path, embeddings,
|
||||
allow_dangerous_deserialization=True
|
||||
)
|
||||
self.assert_embedding_dimensions(embeddings)
|
||||
|
||||
@@ -37,10 +48,10 @@ class FaissStore(BaseVectorStore):
|
||||
"""
|
||||
if settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
try:
|
||||
word_embedding_dimension = embeddings.client[1].word_embedding_dimension
|
||||
word_embedding_dimension = embeddings.dimension
|
||||
except AttributeError as e:
|
||||
raise AttributeError("word_embedding_dimension not found in embeddings.client[1]") from e
|
||||
raise AttributeError("'dimension' attribute not found in embeddings instance. Make sure the embeddings object is properly initialized.") from e
|
||||
docsearch_index_dimension = self.docsearch.index.d
|
||||
if word_embedding_dimension != docsearch_index_dimension:
|
||||
raise ValueError(f"word_embedding_dimension ({word_embedding_dimension}) " +
|
||||
f"!= docsearch_index_word_embedding_dimension ({docsearch_index_dimension})")
|
||||
raise ValueError(f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) " +
|
||||
f"!= docsearch index dimension ({docsearch_index_dimension})")
|
||||
37
application/vectorstore/milvus.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from typing import List, Optional
|
||||
from uuid import uuid4
|
||||
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
|
||||
|
||||
class MilvusStore(BaseVectorStore):
|
||||
def __init__(self, path: str = "", embeddings_key: str = "embeddings"):
|
||||
super().__init__()
|
||||
from langchain_milvus import Milvus
|
||||
|
||||
connection_args = {
|
||||
"uri": settings.MILVUS_URI,
|
||||
"token": settings.MILVUS_TOKEN,
|
||||
}
|
||||
self._docsearch = Milvus(
|
||||
embedding_function=self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key),
|
||||
collection_name=settings.MILVUS_COLLECTION_NAME,
|
||||
connection_args=connection_args,
|
||||
)
|
||||
self._path = path
|
||||
|
||||
def search(self, question, k=2, *args, **kwargs):
|
||||
return self._docsearch.similarity_search(query=question, k=k, filter={"path": self._path} *args, **kwargs)
|
||||
|
||||
def add_texts(self, texts: List[str], metadatas: Optional[List[dict]], *args, **kwargs):
|
||||
ids = [str(uuid4()) for _ in range(len(texts))]
|
||||
|
||||
return self._docsearch.add_texts(texts=texts, metadatas=metadatas, ids=ids, *args, **kwargs)
|
||||
|
||||
def save_local(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def delete_index(self, *args, **kwargs):
|
||||
pass
|
||||
@@ -1,11 +1,12 @@
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from application.vectorstore.document_class import Document
|
||||
|
||||
|
||||
class MongoDBVectorStore(BaseVectorStore):
|
||||
def __init__(
|
||||
self,
|
||||
path: str = "",
|
||||
source_id: str = "",
|
||||
embeddings_key: str = "embeddings",
|
||||
collection: str = "documents",
|
||||
index_name: str = "vector_search_index",
|
||||
@@ -18,7 +19,7 @@ class MongoDBVectorStore(BaseVectorStore):
|
||||
self._embedding_key = embedding_key
|
||||
self._embeddings_key = embeddings_key
|
||||
self._mongo_uri = settings.MONGO_URI
|
||||
self._path = path.replace("application/indexes/", "").rstrip("/")
|
||||
self._source_id = source_id.replace("application/indexes/", "").rstrip("/")
|
||||
self._embedding = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
|
||||
|
||||
try:
|
||||
@@ -33,27 +34,24 @@ class MongoDBVectorStore(BaseVectorStore):
|
||||
self._database = self._client[database]
|
||||
self._collection = self._database[collection]
|
||||
|
||||
|
||||
def search(self, question, k=2, *args, **kwargs):
|
||||
query_vector = self._embedding.embed_query(question)
|
||||
|
||||
pipeline = [
|
||||
{
|
||||
"$vectorSearch": {
|
||||
"queryVector": query_vector,
|
||||
"queryVector": query_vector,
|
||||
"path": self._embedding_key,
|
||||
"limit": k,
|
||||
"numCandidates": k * 10,
|
||||
"limit": k,
|
||||
"numCandidates": k * 10,
|
||||
"index": self._index_name,
|
||||
"filter": {
|
||||
"store": {"$eq": self._path}
|
||||
}
|
||||
"filter": {"source_id": {"$eq": self._source_id}},
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
cursor = self._collection.aggregate(pipeline)
|
||||
|
||||
|
||||
results = []
|
||||
for doc in cursor:
|
||||
text = doc[self._text_key]
|
||||
@@ -63,30 +61,32 @@ class MongoDBVectorStore(BaseVectorStore):
|
||||
metadata = doc
|
||||
results.append(Document(text, metadata))
|
||||
return results
|
||||
|
||||
|
||||
def _insert_texts(self, texts, metadatas):
|
||||
if not texts:
|
||||
return []
|
||||
embeddings = self._embedding.embed_documents(texts)
|
||||
|
||||
to_insert = [
|
||||
{self._text_key: t, self._embedding_key: embedding, **m}
|
||||
for t, m, embedding in zip(texts, metadatas, embeddings)
|
||||
]
|
||||
# insert the documents in MongoDB Atlas
|
||||
|
||||
insert_result = self._collection.insert_many(to_insert)
|
||||
return insert_result.inserted_ids
|
||||
|
||||
def add_texts(self,
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
texts,
|
||||
metadatas = None,
|
||||
ids = None,
|
||||
refresh_indices = True,
|
||||
create_index_if_not_exists = True,
|
||||
bulk_kwargs = None,
|
||||
**kwargs,):
|
||||
metadatas=None,
|
||||
ids=None,
|
||||
refresh_indices=True,
|
||||
create_index_if_not_exists=True,
|
||||
bulk_kwargs=None,
|
||||
**kwargs,
|
||||
):
|
||||
|
||||
|
||||
#dims = self._embedding.client[1].word_embedding_dimension
|
||||
# dims = self._embedding.client[1].word_embedding_dimension
|
||||
# # check if index exists
|
||||
# if create_index_if_not_exists:
|
||||
# # check if index exists
|
||||
@@ -121,6 +121,6 @@ class MongoDBVectorStore(BaseVectorStore):
|
||||
if texts_batch:
|
||||
result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
|
||||
return result_ids
|
||||
|
||||
|
||||
def delete_index(self, *args, **kwargs):
|
||||
self._collection.delete_many({"store": self._path})
|
||||
self._collection.delete_many({"source_id": self._source_id})
|
||||
|
||||
@@ -5,12 +5,12 @@ from qdrant_client import models
|
||||
|
||||
|
||||
class QdrantStore(BaseVectorStore):
|
||||
def __init__(self, path: str = "", embeddings_key: str = "embeddings"):
|
||||
def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"):
|
||||
self._filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key="metadata.store",
|
||||
match=models.MatchValue(value=path.replace("application/indexes/", "").rstrip("/")),
|
||||
key="metadata.source_id",
|
||||
match=models.MatchValue(value=source_id.replace("application/indexes/", "").rstrip("/")),
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from application.vectorstore.faiss import FaissStore
|
||||
from application.vectorstore.elasticsearch import ElasticsearchStore
|
||||
from application.vectorstore.milvus import MilvusStore
|
||||
from application.vectorstore.mongodb import MongoDBVectorStore
|
||||
from application.vectorstore.qdrant import QdrantStore
|
||||
|
||||
@@ -10,6 +11,7 @@ class VectorCreator:
|
||||
"elasticsearch": ElasticsearchStore,
|
||||
"mongodb": MongoDBVectorStore,
|
||||
"qdrant": QdrantStore,
|
||||
"milvus": MilvusStore,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
284
application/worker.py
Normal file → Executable file
@@ -1,40 +1,74 @@
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import string
|
||||
import zipfile
|
||||
from collections import Counter
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import nltk
|
||||
import requests
|
||||
from bson.objectid import ObjectId
|
||||
from pymongo import MongoClient
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.parser.file.bulk import SimpleDirectoryReader
|
||||
from application.parser.remote.remote_creator import RemoteCreator
|
||||
from application.parser.open_ai_func import call_openai_api
|
||||
from application.parser.remote.remote_creator import RemoteCreator
|
||||
from application.parser.schema.base import Document
|
||||
from application.parser.token_func import group_split
|
||||
from application.utils import count_tokens_docs
|
||||
|
||||
try:
|
||||
nltk.download('punkt', quiet=True)
|
||||
nltk.download('averaged_perceptron_tagger', quiet=True)
|
||||
except FileExistsError:
|
||||
pass
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
db = mongo["docsgpt"]
|
||||
sources_collection = db["sources"]
|
||||
|
||||
|
||||
# Define a function to extract metadata from a given filename.
|
||||
def metadata_from_filename(title):
|
||||
store = '/'.join(title.split('/')[1:3])
|
||||
return {'title': title, 'store': store}
|
||||
return {"title": title}
|
||||
|
||||
|
||||
# Define a function to generate a random string of a given length.
|
||||
def generate_random_string(length):
|
||||
return ''.join([string.ascii_letters[i % 52] for i in range(length)])
|
||||
return "".join([string.ascii_letters[i % 52] for i in range(length)])
|
||||
|
||||
|
||||
current_dir = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
|
||||
|
||||
def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
|
||||
"""
|
||||
Recursively extract zip files with a limit on recursion depth.
|
||||
|
||||
Args:
|
||||
zip_path (str): Path to the zip file to be extracted.
|
||||
extract_to (str): Destination path for extracted files.
|
||||
current_depth (int): Current depth of recursion.
|
||||
max_depth (int): Maximum allowed depth of recursion to prevent infinite loops.
|
||||
"""
|
||||
if current_depth > max_depth:
|
||||
logging.warning(f"Reached maximum recursion depth of {max_depth}")
|
||||
return
|
||||
|
||||
with zipfile.ZipFile(zip_path, "r") as zip_ref:
|
||||
zip_ref.extractall(extract_to)
|
||||
os.remove(zip_path) # Remove the zip file after extracting
|
||||
|
||||
# Check for nested zip files and extract them
|
||||
for root, dirs, files in os.walk(extract_to):
|
||||
for file in files:
|
||||
if file.endswith(".zip"):
|
||||
# If a nested zip file is found, extract it recursively
|
||||
file_path = os.path.join(root, file)
|
||||
extract_zip_recursive(file_path, root, current_depth + 1, max_depth)
|
||||
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Define the main function for ingesting and processing documents.
|
||||
def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
def ingest_worker(
|
||||
self, directory, formats, name_job, filename, user, retriever="classic"
|
||||
):
|
||||
"""
|
||||
Ingest and process documents.
|
||||
|
||||
@@ -45,6 +79,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
name_job (str): Name of the job for this ingestion task.
|
||||
filename (str): Name of the file to be ingested.
|
||||
user (str): Identifier for the user initiating the ingestion.
|
||||
retriever (str): Type of retriever to use for processing the documents.
|
||||
|
||||
Returns:
|
||||
dict: Information about the completed ingestion task, including input parameters and a "limited" flag.
|
||||
@@ -62,109 +97,216 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
token_check = True
|
||||
min_tokens = 150
|
||||
max_tokens = 1250
|
||||
full_path = directory + '/' + user + '/' + name_job
|
||||
import sys
|
||||
print(full_path, file=sys.stderr)
|
||||
recursion_depth = 2
|
||||
full_path = os.path.join(directory, user, name_job)
|
||||
|
||||
logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job})
|
||||
# check if API_URL env variable is set
|
||||
file_data = {'name': name_job, 'file': filename, 'user': user}
|
||||
response = requests.get(urljoin(settings.API_URL, "/api/download"), params=file_data)
|
||||
# check if file is in the response
|
||||
print(response, file=sys.stderr)
|
||||
file_data = {"name": name_job, "file": filename, "user": user}
|
||||
response = requests.get(
|
||||
urljoin(settings.API_URL, "/api/download"), params=file_data
|
||||
)
|
||||
file = response.content
|
||||
|
||||
if not os.path.exists(full_path):
|
||||
os.makedirs(full_path)
|
||||
with open(full_path + '/' + filename, 'wb') as f:
|
||||
with open(os.path.join(full_path, filename), "wb") as f:
|
||||
f.write(file)
|
||||
|
||||
# check if file is .zip and extract it
|
||||
if filename.endswith('.zip'):
|
||||
with zipfile.ZipFile(full_path + '/' + filename, 'r') as zip_ref:
|
||||
zip_ref.extractall(full_path)
|
||||
os.remove(full_path + '/' + filename)
|
||||
if filename.endswith(".zip"):
|
||||
extract_zip_recursive(
|
||||
os.path.join(full_path, filename), full_path, 0, recursion_depth
|
||||
)
|
||||
|
||||
self.update_state(state='PROGRESS', meta={'current': 1})
|
||||
self.update_state(state="PROGRESS", meta={"current": 1})
|
||||
|
||||
raw_docs = SimpleDirectoryReader(input_dir=full_path, input_files=input_files, recursive=recursive,
|
||||
required_exts=formats, num_files_limit=limit,
|
||||
exclude_hidden=exclude, file_metadata=metadata_from_filename).load_data()
|
||||
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
|
||||
raw_docs = SimpleDirectoryReader(
|
||||
input_dir=full_path,
|
||||
input_files=input_files,
|
||||
recursive=recursive,
|
||||
required_exts=formats,
|
||||
num_files_limit=limit,
|
||||
exclude_hidden=exclude,
|
||||
file_metadata=metadata_from_filename,
|
||||
).load_data()
|
||||
raw_docs = group_split(
|
||||
documents=raw_docs,
|
||||
min_tokens=min_tokens,
|
||||
max_tokens=max_tokens,
|
||||
token_check=token_check,
|
||||
)
|
||||
|
||||
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
id = ObjectId()
|
||||
|
||||
call_openai_api(docs, full_path, self)
|
||||
self.update_state(state='PROGRESS', meta={'current': 100})
|
||||
call_openai_api(docs, full_path, id, self)
|
||||
tokens = count_tokens_docs(docs)
|
||||
self.update_state(state="PROGRESS", meta={"current": 100})
|
||||
|
||||
if sample:
|
||||
for i in range(min(5, len(raw_docs))):
|
||||
print(raw_docs[i].text)
|
||||
logging.info(f"Sample document {i}: {raw_docs[i]}")
|
||||
|
||||
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
|
||||
# and send them to the server (provide user and name in form)
|
||||
file_data = {'name': name_job, 'user': user}
|
||||
file_data = {
|
||||
"name": name_job,
|
||||
"user": user,
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"id": str(id),
|
||||
"type": "local",
|
||||
}
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
|
||||
'file_pkl': open(full_path + '/index.pkl', 'rb')}
|
||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
||||
response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
|
||||
files = {
|
||||
"file_faiss": open(full_path + "/index.faiss", "rb"),
|
||||
"file_pkl": open(full_path + "/index.pkl", "rb"),
|
||||
}
|
||||
response = requests.post(
|
||||
urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
|
||||
)
|
||||
else:
|
||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
|
||||
response = requests.post(
|
||||
urljoin(settings.API_URL, "/api/upload_index"), data=file_data
|
||||
)
|
||||
|
||||
|
||||
# delete local
|
||||
shutil.rmtree(full_path)
|
||||
|
||||
return {
|
||||
'directory': directory,
|
||||
'formats': formats,
|
||||
'name_job': name_job,
|
||||
'filename': filename,
|
||||
'user': user,
|
||||
'limited': False
|
||||
"directory": directory,
|
||||
"formats": formats,
|
||||
"name_job": name_job,
|
||||
"filename": filename,
|
||||
"user": user,
|
||||
"limited": False,
|
||||
}
|
||||
|
||||
def remote_worker(self, source_data, name_job, user, directory = 'temp', loader = 'url'):
|
||||
# sample = False
|
||||
|
||||
def remote_worker(
|
||||
self,
|
||||
source_data,
|
||||
name_job,
|
||||
user,
|
||||
loader,
|
||||
directory="temp",
|
||||
retriever="classic",
|
||||
sync_frequency="never",
|
||||
operation_mode="upload",
|
||||
doc_id=None,
|
||||
):
|
||||
token_check = True
|
||||
min_tokens = 150
|
||||
max_tokens = 1250
|
||||
full_path = directory + '/' + user + '/' + name_job
|
||||
full_path = directory + "/" + user + "/" + name_job
|
||||
|
||||
if not os.path.exists(full_path):
|
||||
os.makedirs(full_path)
|
||||
self.update_state(state="PROGRESS", meta={"current": 1})
|
||||
logging.info(
|
||||
f"Remote job: {full_path}",
|
||||
extra={"user": user, "job": name_job, source_data: source_data},
|
||||
)
|
||||
|
||||
self.update_state(state='PROGRESS', meta={'current': 1})
|
||||
|
||||
# source_data {"data": [url]} for url type task just urls
|
||||
|
||||
# Use RemoteCreator to load data from URL
|
||||
remote_loader = RemoteCreator.create_loader(loader)
|
||||
raw_docs = remote_loader.load_data(source_data)
|
||||
|
||||
docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
|
||||
|
||||
#docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
|
||||
call_openai_api(docs, full_path, self)
|
||||
self.update_state(state='PROGRESS', meta={'current': 100})
|
||||
|
||||
|
||||
docs = group_split(
|
||||
documents=raw_docs,
|
||||
min_tokens=min_tokens,
|
||||
max_tokens=max_tokens,
|
||||
token_check=token_check,
|
||||
)
|
||||
# docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
tokens = count_tokens_docs(docs)
|
||||
if operation_mode == "upload":
|
||||
id = ObjectId()
|
||||
call_openai_api(docs, full_path, id, self)
|
||||
elif operation_mode == "sync":
|
||||
if not doc_id or not ObjectId.is_valid(doc_id):
|
||||
raise ValueError("doc_id must be provided for sync operation.")
|
||||
id = ObjectId(doc_id)
|
||||
call_openai_api(docs, full_path, id, self)
|
||||
self.update_state(state="PROGRESS", meta={"current": 100})
|
||||
|
||||
# Proceed with uploading and cleaning as in the original function
|
||||
file_data = {'name': name_job, 'user': user}
|
||||
file_data = {
|
||||
"name": name_job,
|
||||
"user": user,
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"id": str(id),
|
||||
"type": loader,
|
||||
"remote_data": source_data,
|
||||
"sync_frequency": sync_frequency,
|
||||
}
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
|
||||
'file_pkl': open(full_path + '/index.pkl', 'rb')}
|
||||
requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
||||
requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
|
||||
files = {
|
||||
"file_faiss": open(full_path + "/index.faiss", "rb"),
|
||||
"file_pkl": open(full_path + "/index.pkl", "rb"),
|
||||
}
|
||||
|
||||
requests.post(
|
||||
urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
|
||||
)
|
||||
else:
|
||||
requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
|
||||
|
||||
shutil.rmtree(full_path)
|
||||
|
||||
return {"urls": source_data, "name_job": name_job, "user": user, "limited": False}
|
||||
|
||||
|
||||
def sync(
|
||||
self,
|
||||
source_data,
|
||||
name_job,
|
||||
user,
|
||||
loader,
|
||||
sync_frequency,
|
||||
retriever,
|
||||
doc_id=None,
|
||||
directory="temp",
|
||||
):
|
||||
try:
|
||||
remote_worker(
|
||||
self,
|
||||
source_data,
|
||||
name_job,
|
||||
user,
|
||||
loader,
|
||||
directory,
|
||||
retriever,
|
||||
sync_frequency,
|
||||
"sync",
|
||||
doc_id,
|
||||
)
|
||||
except Exception as e:
|
||||
return {"status": "error", "error": str(e)}
|
||||
return {"status": "success"}
|
||||
|
||||
|
||||
def sync_worker(self, frequency):
|
||||
sync_counts = Counter()
|
||||
sources = sources_collection.find()
|
||||
for doc in sources:
|
||||
if doc.get("sync_frequency") == frequency:
|
||||
name = doc.get("name")
|
||||
user = doc.get("user")
|
||||
source_type = doc.get("type")
|
||||
source_data = doc.get("remote_data")
|
||||
retriever = doc.get("retriever")
|
||||
doc_id = str(doc.get("_id"))
|
||||
resp = sync(
|
||||
self, source_data, name, user, source_type, frequency, retriever, doc_id
|
||||
)
|
||||
sync_counts["total_sync_count"] += 1
|
||||
sync_counts[
|
||||
"sync_success" if resp["status"] == "success" else "sync_failure"
|
||||
] += 1
|
||||
|
||||
return {
|
||||
'urls': source_data,
|
||||
'name_job': name_job,
|
||||
'user': user,
|
||||
'limited': False
|
||||
}
|
||||
key: sync_counts[key]
|
||||
for key in ["total_sync_count", "sync_success", "sync_failure"]
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from application.app import app
|
||||
from application.core.settings import settings
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True, port=7091)
|
||||
app.run(debug=settings.FLASK_DEBUG_MODE, port=7091)
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
|
||||
redis:
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
volumes:
|
||||
- ./frontend/src:/app/src
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:7091
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
volumes:
|
||||
- ./frontend/src:/app/src
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:7091
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
@@ -32,7 +32,7 @@ services:
|
||||
|
||||
worker:
|
||||
build: ./application
|
||||
command: celery -A application.app.celery worker -l INFO
|
||||
command: celery -A application.app.celery worker -l INFO -B
|
||||
environment:
|
||||
- API_KEY=$API_KEY
|
||||
- EMBEDDINGS_KEY=$API_KEY
|
||||
|
||||
2246
docs/package-lock.json
generated
@@ -7,8 +7,8 @@
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@vercel/analytics": "^1.1.1",
|
||||
"docsgpt": "^0.3.7",
|
||||
"next": "^14.0.4",
|
||||
"docsgpt": "^0.4.1",
|
||||
"next": "^14.2.12",
|
||||
"nextra": "^2.13.2",
|
||||
"nextra-theme-docs": "^2.13.2",
|
||||
"react": "^18.2.0",
|
||||
|
||||
@@ -227,3 +227,124 @@ JSON response indicating the status of the operation:
|
||||
```json
|
||||
{ "status": "ok" }
|
||||
```
|
||||
|
||||
### 7. /api/get_api_keys
|
||||
**Description:**
|
||||
|
||||
The endpoint retrieves a list of API keys for the user.
|
||||
|
||||
**Request:**
|
||||
|
||||
**Method**: `GET`
|
||||
|
||||
**Sample JavaScript Fetch Request:**
|
||||
```js
|
||||
// get_api_keys (GET http://127.0.0.1:5000/api/get_api_keys)
|
||||
fetch("http://localhost:5001/api/get_api_keys", {
|
||||
"method": "GET",
|
||||
"headers": {
|
||||
"Content-Type": "application/json; charset=utf-8"
|
||||
},
|
||||
})
|
||||
.then((res) => res.text())
|
||||
.then(console.log.bind(console))
|
||||
|
||||
```
|
||||
**Response:**
|
||||
|
||||
JSON response with a list of created API keys:
|
||||
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": "string",
|
||||
"name": "string",
|
||||
"key": "string",
|
||||
"source": "string"
|
||||
},
|
||||
...
|
||||
]
|
||||
```
|
||||
|
||||
### 8. /api/create_api_key
|
||||
|
||||
**Description:**
|
||||
|
||||
Create a new API key for the user.
|
||||
|
||||
**Request:**
|
||||
|
||||
**Method**: `POST`
|
||||
|
||||
**Headers**: Content-Type should be set to `application/json; charset=utf-8`
|
||||
|
||||
**Request Body**: JSON object with the following fields:
|
||||
* `name` — A name for the API key.
|
||||
* `source` — The source documents that will be used.
|
||||
* `prompt_id` — The prompt ID.
|
||||
* `chunks` — The number of chunks used to process an answer.
|
||||
|
||||
Here is a JavaScript Fetch Request example:
|
||||
```js
|
||||
// create_api_key (POST http://127.0.0.1:5000/api/create_api_key)
|
||||
fetch("http://127.0.0.1:5000/api/create_api_key", {
|
||||
"method": "POST",
|
||||
"headers": {
|
||||
"Content-Type": "application/json; charset=utf-8"
|
||||
},
|
||||
"body": JSON.stringify({"name":"Example Key Name",
|
||||
"source":"Example Source",
|
||||
"prompt_id":"creative",
|
||||
"chunks":"2"})
|
||||
})
|
||||
.then((res) => res.json())
|
||||
.then(console.log.bind(console))
|
||||
```
|
||||
|
||||
**Response**
|
||||
|
||||
In response, you will get a JSON document containing the `id` and `key`:
|
||||
```json
|
||||
{
|
||||
"id": "string",
|
||||
"key": "string"
|
||||
}
|
||||
```
|
||||
|
||||
### 9. /api/delete_api_key
|
||||
|
||||
**Description:**
|
||||
|
||||
Delete an API key for the user.
|
||||
|
||||
**Request:**
|
||||
|
||||
**Method**: `POST`
|
||||
|
||||
**Headers**: Content-Type should be set to `application/json; charset=utf-8`
|
||||
|
||||
**Request Body**: JSON object with the field:
|
||||
* `id` — The unique identifier of the API key to be deleted.
|
||||
|
||||
Here is a JavaScript Fetch Request example:
|
||||
```js
|
||||
// delete_api_key (POST http://127.0.0.1:5000/api/delete_api_key)
|
||||
fetch("http://127.0.0.1:5000/api/delete_api_key", {
|
||||
"method": "POST",
|
||||
"headers": {
|
||||
"Content-Type": "application/json; charset=utf-8"
|
||||
},
|
||||
"body": JSON.stringify({"id":"API_KEY_ID"})
|
||||
})
|
||||
.then((res) => res.json())
|
||||
.then(console.log.bind(console))
|
||||
```
|
||||
|
||||
**Response:**
|
||||
|
||||
In response, you will get a JSON document indicating the status of the operation:
|
||||
```json
|
||||
{
|
||||
"status": "ok"
|
||||
}
|
||||
```
|
||||
10
docs/pages/API/_meta.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"API-docs": {
|
||||
"title": "🗂️️ API-docs",
|
||||
"href": "/API/API-docs"
|
||||
},
|
||||
"api-key-guide": {
|
||||
"title": "🔐 API Keys guide",
|
||||
"href": "/API/api-key-guide"
|
||||
}
|
||||
}
|
||||
30
docs/pages/API/api-key-guide.md
Normal file
@@ -0,0 +1,30 @@
|
||||
## Guide to DocsGPT API Keys
|
||||
|
||||
DocsGPT API keys are essential for developers and users who wish to integrate the DocsGPT models into external applications, such as the our widget. This guide will walk you through the steps of obtaining an API key, starting from uploading your document to understanding the key variables associated with API keys.
|
||||
|
||||
### Uploading Your Document
|
||||
|
||||
Before creating your first API key, you must upload the document that will be linked to this key. You can upload your document through two methods:
|
||||
|
||||
- **GUI Web App Upload:** A user-friendly graphical interface that allows for easy upload and management of documents.
|
||||
- **Using `/api/upload` Method:** For users comfortable with API calls, this method provides a direct way to upload documents.
|
||||
|
||||
### Obtaining Your API Key
|
||||
|
||||
After uploading your document, you can obtain an API key either through the graphical user interface or via an API call:
|
||||
|
||||
- **Graphical User Interface:** Navigate to the Settings section of the DocsGPT web app, find the API Keys option, and press 'Create New' to generate your key.
|
||||
- **API Call:** Alternatively, you can use the `/api/create_api_key` endpoint to create a new API key. For detailed instructions, visit [DocsGPT API Documentation](https://docs.docsgpt.cloud/API/API-docs#8-apicreate_api_key).
|
||||
|
||||
### Understanding Key Variables
|
||||
|
||||
Upon creating your API key, you will encounter several key variables. Each serves a specific purpose:
|
||||
|
||||
- **Name:** Assign a name to your API key for easy identification.
|
||||
- **Source:** Indicates the source document(s) linked to your API key, which DocsGPT will use to generate responses.
|
||||
- **ID:** A unique identifier for your API key. You can view this by making a call to `/api/get_api_keys`.
|
||||
- **Key:** The API key itself, which will be used in your application to authenticate API requests.
|
||||
|
||||
With your API key ready, you can now integrate DocsGPT into your application, such as the DocsGPT Widget or any other software, via `/api/answer` or `/stream` endpoints. The source document is preset with the API key, allowing you to bypass fields like `selectDocs` and `active_docs` during implementation.
|
||||
|
||||
Congratulations on taking the first step towards enhancing your applications with DocsGPT! With this guide, you're now equipped to navigate the process of obtaining and understanding DocsGPT API keys.
|
||||
100
docs/pages/Deploying/Kubernetes-Deploying.md
Normal file
@@ -0,0 +1,100 @@
|
||||
# Self-hosting DocsGPT on Kubernetes
|
||||
|
||||
This guide will walk you through deploying DocsGPT on Kubernetes.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Ensure you have the following installed before proceeding:
|
||||
|
||||
- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/)
|
||||
- Access to a Kubernetes cluster
|
||||
|
||||
## Folder Structure
|
||||
|
||||
The `k8s` folder contains the necessary deployment and service configuration files:
|
||||
|
||||
- `deployments/`
|
||||
- `services/`
|
||||
- `docsgpt-secrets.yaml`
|
||||
|
||||
## Deployment Instructions
|
||||
|
||||
1. **Clone the Repository**
|
||||
|
||||
```sh
|
||||
git clone https://github.com/arc53/DocsGPT.git
|
||||
cd docsgpt/k8s
|
||||
```
|
||||
|
||||
2. **Configure Secrets (optional)**
|
||||
|
||||
Ensure that you have all the necessary secrets in `docsgpt-secrets.yaml`. Update it with your secrets before applying if you want. By default we will use qdrant as a vectorstore and public docsgpt llm as llm for inference.
|
||||
|
||||
3. **Apply Kubernetes Deployments**
|
||||
|
||||
Deploy your DocsGPT resources using the following commands:
|
||||
|
||||
```sh
|
||||
kubectl apply -f deployments/
|
||||
```
|
||||
|
||||
4. **Apply Kubernetes Services**
|
||||
|
||||
Set up your services using the following commands:
|
||||
|
||||
```sh
|
||||
kubectl apply -f services/
|
||||
```
|
||||
|
||||
5. **Apply Secrets**
|
||||
|
||||
Apply the secret configurations:
|
||||
|
||||
```sh
|
||||
kubectl apply -f docsgpt-secrets.yaml
|
||||
```
|
||||
|
||||
6. **Substitute API URL**
|
||||
|
||||
After deploying the services, you need to update the environment variable `VITE_API_HOST` in your deployment file `deployments/docsgpt-deploy.yaml` with the actual endpoint URL created by your `docsgpt-api-service`.
|
||||
|
||||
```sh
|
||||
kubectl get services/docsgpt-api-service -o jsonpath='{.status.loadBalancer.ingress[0].ip}' | xargs -I {} sed -i "s|<your-api-endpoint>|{}|g" deployments/docsgpt-deploy.yaml
|
||||
```
|
||||
|
||||
7. **Rerun Deployment**
|
||||
|
||||
After making the changes, reapply the deployment configuration to update the environment variables:
|
||||
|
||||
```sh
|
||||
kubectl apply -f deployments/
|
||||
```
|
||||
|
||||
## Verifying the Deployment
|
||||
|
||||
To verify if everything is set up correctly, you can run the following:
|
||||
|
||||
```sh
|
||||
kubectl get pods
|
||||
kubectl get services
|
||||
```
|
||||
|
||||
Ensure that the pods are running and the services are available.
|
||||
|
||||
## Accessing DocsGPT
|
||||
|
||||
To access DocsGPT, you need to find the external IP address of the frontend service. You can do this by running:
|
||||
|
||||
```sh
|
||||
kubectl get services/docsgpt-frontend-service | awk 'NR>1 {print "http://" $4}'
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
If you encounter any issues, you can check the logs of the pods for more details:
|
||||
|
||||
```sh
|
||||
kubectl logs <pod-name>
|
||||
```
|
||||
|
||||
Replace `<pod-name>` with the actual name of your DocsGPT pod.
|
||||
@@ -67,62 +67,3 @@ To run the setup on Windows, you have two options: using the Windows Subsystem f
|
||||
|
||||
These steps should help you set up and run the project on Windows using either WSL or Git Bash/Command Prompt.
|
||||
**Important:** Ensure that Docker is installed and properly configured on your Windows system for these steps to work.
|
||||
|
||||
|
||||
For WINDOWS:
|
||||
|
||||
To run the given setup on Windows, you can use the Windows Subsystem for Linux (WSL) or a Git Bash terminal to execute similar commands. Here are the steps adapted for Windows:
|
||||
|
||||
Option 1: Using Windows Subsystem for Linux (WSL):
|
||||
|
||||
1. Install WSL if you haven't already. You can follow the official Microsoft documentation for installation: (https://learn.microsoft.com/en-us/windows/wsl/install).
|
||||
2. After setting up WSL, open the WSL terminal.
|
||||
3. Clone the repository and create the `.env` file:
|
||||
```bash
|
||||
git clone https://github.com/arc53/DocsGPT.git
|
||||
cd DocsGPT
|
||||
echo "API_KEY=Yourkey" > .env
|
||||
echo "VITE_API_STREAMING=true" >> .env
|
||||
```
|
||||
4. Run the following command to start the setup with Docker Compose:
|
||||
```bash
|
||||
./run-with-docker-compose.sh
|
||||
```
|
||||
5. Open your web browser and navigate to http://localhost:5173/.
|
||||
6. To stop the setup, just press **Ctrl + C** in the WSL terminal.
|
||||
|
||||
Option 2: Using Git Bash or Command Prompt (CMD):
|
||||
|
||||
1. Install Git for Windows if you haven't already. You can download it from the official website: (https://gitforwindows.org/).
|
||||
2. Open Git Bash or Command Prompt.
|
||||
3. Clone the repository and create the `.env` file:
|
||||
```bash
|
||||
git clone https://github.com/arc53/DocsGPT.git
|
||||
cd DocsGPT
|
||||
echo "API_KEY=Yourkey" > .env
|
||||
echo "VITE_API_STREAMING=true" >> .env
|
||||
```
|
||||
4. Run the following command to start the setup with Docker Compose:
|
||||
```bash
|
||||
./run-with-docker-compose.sh
|
||||
```
|
||||
5. Open your web browser and navigate to http://localhost:5173/.
|
||||
6. To stop the setup, just press **Ctrl + C** in the Git Bash or Command Prompt terminal.
|
||||
|
||||
These steps should help you set up and run the project on Windows using either WSL or Git Bash/Command Prompt. Make sure you have Docker installed and properly configured on your Windows system for this to work.
|
||||
|
||||
|
||||
### Chrome Extension
|
||||
|
||||
#### Installing the Chrome extension:
|
||||
To enhance your DocsGPT experience, you can install the DocsGPT Chrome extension. Here's how:
|
||||
|
||||
1. In the DocsGPT GitHub repository, click on the **Code** button and select **Download ZIP**.
|
||||
2. Unzip the downloaded file to a location you can easily access.
|
||||
3. Open the Google Chrome browser and click on the three dots menu (upper right corner).
|
||||
4. Select **More Tools** and then **Extensions**.
|
||||
5. Turn on the **Developer mode** switch in the top right corner of the **Extensions page**.
|
||||
6. Click on the **Load unpacked** button.
|
||||
7. Select the **Chrome** folder where the DocsGPT files have been unzipped (docsgpt-main > extensions > chrome).
|
||||
8. The extension should now be added to Google Chrome and can be managed on the Extensions page.
|
||||
9. To disable or remove the extension, simply turn off the toggle switch on the extension card or click the **Remove** button.
|
||||
|
||||
@@ -10,5 +10,9 @@
|
||||
"Railway-Deploying": {
|
||||
"title": "🚂Deploying on Railway",
|
||||
"href": "/Deploying/Railway-Deploying"
|
||||
},
|
||||
"Kubernetes-Deploying": {
|
||||
"title": "☸️Deploying on Kubernetes",
|
||||
"href": "/Deploying/Kubernetes-Deploying"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
{
|
||||
"API-docs": {
|
||||
"title": "🗂️️ API-docs",
|
||||
"href": "/Developing/API-docs"
|
||||
}
|
||||
}
|
||||
34
docs/pages/Extensions/Chrome-extension.mdx
Normal file
@@ -0,0 +1,34 @@
|
||||
|
||||
import {Steps} from 'nextra/components'
|
||||
import { Callout } from 'nextra/components'
|
||||
|
||||
|
||||
## Chrome Extension Setup Guide
|
||||
|
||||
To enhance your DocsGPT experience, you can install the DocsGPT Chrome extension. Here's how:
|
||||
<Steps >
|
||||
### Step 1
|
||||
|
||||
|
||||
|
||||
In the DocsGPT GitHub repository, click on the **Code** button and select **Download ZIP**.
|
||||
### Step 2
|
||||
Unzip the downloaded file to a location you can easily access.
|
||||
### Step 3
|
||||
Open the Google Chrome browser and click on the three dots menu (upper right corner).
|
||||
### Step 4
|
||||
Select **More Tools** and then **Extensions**.
|
||||
### Step 5
|
||||
Turn on the **Developer mode** switch in the top right corner of the **Extensions page**.
|
||||
### Step 6
|
||||
Click on the **Load unpacked** button.
|
||||
### Step 7
|
||||
7. Select the **Chrome** folder where the DocsGPT files have been unzipped (docsgpt-main > extensions > chrome).
|
||||
### Step 8
|
||||
The extension should now be added to Google Chrome and can be managed on the Extensions page.
|
||||
### Step 9
|
||||
To disable or remove the extension, simply turn off the toggle switch on the extension card or click the **Remove** button.
|
||||
</Steps>
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,11 @@
|
||||
"href": "/Extensions/Chatwoot-extension"
|
||||
},
|
||||
"react-widget": {
|
||||
"title": "🏗️ Widget setup",
|
||||
"href": "/Extensions/react-widget"
|
||||
}
|
||||
"title": "🏗️ Widget setup",
|
||||
"href": "/Extensions/react-widget"
|
||||
},
|
||||
"Chrome-extension": {
|
||||
"title": "🌐 Chrome Extension",
|
||||
"href": "/Extensions/Chrome-extension"
|
||||
}
|
||||
}
|
||||
@@ -17,25 +17,31 @@ Now, you can use the widget in your component like this :
|
||||
```jsx
|
||||
<DocsGPTWidget
|
||||
apiHost="https://your-docsgpt-api.com"
|
||||
selectDocs="local/docs.zip"
|
||||
apiKey=""
|
||||
avatar = "https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png",
|
||||
title = "Get AI assistance",
|
||||
description = "DocsGPT's AI Chatbot is here to help",
|
||||
heroTitle = "Welcome to DocsGPT !",
|
||||
avatar = "https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png"
|
||||
title = "Get AI assistance"
|
||||
description = "DocsGPT's AI Chatbot is here to help"
|
||||
heroTitle = "Welcome to DocsGPT !"
|
||||
heroDescription="This chatbot is built with DocsGPT and utilises GenAI,
|
||||
please review important information using sources."
|
||||
theme = "dark"
|
||||
buttonIcon = "https://your-icon"
|
||||
buttonBg = "#222327"
|
||||
/>
|
||||
```
|
||||
DocsGPTWidget takes 8 **props** with default fallback values:
|
||||
To tailor the widget to your needs, you can configure the following props in your component:
|
||||
1. `apiHost` — The URL of your DocsGPT API.
|
||||
2. `selectDocs` — The documentation source that you want to use for your widget (e.g. `default` or `local/docs1.zip`).
|
||||
2. `theme` — Allows to select your specific theme (dark or light).
|
||||
3. `apiKey` — Usually, it's empty.
|
||||
4. `avatar`: Specifies the URL of the avatar or image representing the chatbot.
|
||||
5. `title`: Sets the title text displayed in the chatbot interface.
|
||||
6. `description`: Provides a brief description of the chatbot's purpose or functionality.
|
||||
7. `heroTitle`: Displays a welcome title when users interact with the chatbot.
|
||||
8. `heroDescription`: Provide additional introductory text or information about the chatbot's capabilities.
|
||||
9. `buttonIcon`: Specifies the url of the icon image for the widget.
|
||||
10. `buttonBg`: Allows to specify the Background color of the widget.
|
||||
11. `size`: Sets the size of the widget ( small, medium).
|
||||
|
||||
|
||||
### How to use DocsGPTWidget with [Nextra](https://nextra.site/) (Next.js + MDX)
|
||||
Install your widget as described above and then go to your `pages/` folder and create a new file `_app.js` with the following content:
|
||||
@@ -51,6 +57,69 @@ export default function MyApp({ Component, pageProps }) {
|
||||
)
|
||||
}
|
||||
```
|
||||
### How to use DocsGPTWidget with HTML
|
||||
```html
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta http-equiv="X-UA-Compatible" content="ie=edge" />
|
||||
<title>HTML + CSS</title>
|
||||
<link rel="stylesheet" href="styles.css" />
|
||||
</head>
|
||||
<body>
|
||||
<h1>This is a simple HTML + CSS template!</h1>
|
||||
<div id="app"></div>
|
||||
<!-- Include the widget script from dist/modern or dist/legacy -->
|
||||
<script
|
||||
src="https://unpkg.com/docsgpt/dist/modern/main.js"
|
||||
type="module"
|
||||
></script>
|
||||
<script type="module">
|
||||
window.onload = function () {
|
||||
renderDocsGPTWidget("app", {
|
||||
apiKey: "",
|
||||
size: "medium",
|
||||
});
|
||||
};
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
To link the widget to your api and your documents you can pass parameters to the renderDocsGPTWidget('div id', { parameters }).
|
||||
```html
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>DocsGPT Widget</title>
|
||||
<script src="https://unpkg.com/docsgpt/dist/modern/main.js" type="module"></script>
|
||||
</head>
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
<!-- Include the widget script from dist/modern or dist/legacy -->
|
||||
<script type="module">
|
||||
window.onload = function() {
|
||||
renderDocsGPTWidget('app', {
|
||||
apiHost: 'http://localhost:7001',
|
||||
apiKey:"",
|
||||
avatar: 'https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png',
|
||||
title: 'Get AI assistance',
|
||||
description: "DocsGPT's AI Chatbot is here to help",
|
||||
heroTitle: 'Welcome to DocsGPT!',
|
||||
heroDescription: 'This chatbot is built with DocsGPT and utilises GenAI, please review important information using sources.',
|
||||
theme:"dark",
|
||||
buttonIcon:"https://your-icon",
|
||||
buttonBg:"#222327"
|
||||
});
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
For more information about React, refer to this [link here](https://react.dev/learn)
|
||||
|
||||
|
||||
@@ -1,10 +1,25 @@
|
||||
import Image from 'next/image'
|
||||
|
||||
# Customizing the Main Prompt
|
||||
|
||||
Customizing the main prompt for DocsGPT gives you the ability to tailor the AI's responses to your specific requirements. By modifying the prompt text, you can achieve more accurate and relevant answers. Here's how you can do it:
|
||||
|
||||
1. Navigate to `/application/prompts/combine_prompt.txt`.
|
||||
1. Navigate to `SideBar -> Settings`.
|
||||
|
||||
|
||||
|
||||
|
||||
2.In Settings select the `Active Prompt` now you will be able to see various prompts style.x
|
||||
|
||||
|
||||
|
||||
|
||||
3.Click on the `edit icon` on the prompt of your choice and you will be able to see the current prompt for it,you can now customise the prompt as per your choice.
|
||||
|
||||
### Video Demo
|
||||
<Image src="/prompts.gif" alt="prompts" width={800} height={500} />
|
||||
|
||||
|
||||
2. Open the `combine_prompt.txt` file and modify the prompt text to suit your needs. You can experiment with different phrasings and structures to observe how the model responds. The main prompt serves as guidance to the AI model on how to generate responses.
|
||||
|
||||
## Example Prompt Modification
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
## How to train on other documentation
|
||||
|
||||
This AI can utilize any documentation, but it requires preparation for similarity search. Follow these steps to get your documentation ready:
|
||||
|
||||
**Step 1: Prepare Your Documentation**
|
||||

|
||||
|
||||
Start by going to `/scripts/` folder.
|
||||
|
||||
If you open this file, you will see that it uses RST files from the folder to create a `index.faiss` and `index.pkl`.
|
||||
|
||||
It currently uses OPENAI to create the vector store, so make sure your documentation is not too large. Using Pandas cost me around $3-$4.
|
||||
|
||||
You can typically find documentation on GitHub in the `docs/` folder for most open-source projects.
|
||||
|
||||
### 1. Find documentation in .rst/.md format and create a folder with it in your scripts directory.
|
||||
- Name it `inputs/`.
|
||||
- Put all your .rst/.md files in there.
|
||||
- The search is recursive, so you don't need to flatten them.
|
||||
|
||||
If there are no .rst/.md files, convert whatever you find to a .txt file and feed it. (Don't forget to change the extension in the script).
|
||||
|
||||
### Step 2: Configure Your OpenAI API Key
|
||||
1. Create a .env file in the scripts/ folder.
|
||||
- Add your OpenAI API key inside: OPENAI_API_KEY=<your-api-key>.
|
||||
|
||||
### Step 3: Run the Ingestion Script
|
||||
|
||||
`python ingest.py ingest`
|
||||
|
||||
It will provide you with the estimated cost.
|
||||
|
||||
### Step 4: Move `index.faiss` and `index.pkl` generated in `scripts/output` to `application/` folder.
|
||||
|
||||
|
||||
### Step 5: Run the Web App
|
||||
Once you run it, it will use new context relevant to your documentation.Make sure you select default in the dropdown in the UI.
|
||||
|
||||
## Customization
|
||||
You can learn more about options while running ingest.py by running:
|
||||
- Make sure you select 'default' from the dropdown in the UI.
|
||||
|
||||
## Customization
|
||||
You can learn more about options while running ingest.py by executing:
|
||||
`python ingest.py --help`
|
||||
| Options | |
|
||||
|:--------------------------------:|:------------------------------------------------------------------------------------------------------------------------------:|
|
||||
| **ingest** | Runs 'ingest' function, converting documentation to Faiss plus Index format |
|
||||
| --dir TEXT | List of paths to directory for index creation. E.g. --dir inputs --dir inputs2 [default: inputs] |
|
||||
| --file TEXT | File paths to use (Optional; overrides directory) E.g. --files inputs/1.md --files inputs/2.md |
|
||||
| --recursive / --no-recursive | Whether to recursively search in subdirectories [default: recursive] |
|
||||
| --limit INTEGER | Maximum number of files to read |
|
||||
| --formats TEXT | List of required extensions (list with .) Currently supported: .rst, .md, .pdf, .docx, .csv, .epub, .html [default: .rst, .md] |
|
||||
| --exclude / --no-exclude | Whether to exclude hidden files (dotfiles) [default: exclude] |
|
||||
| -y, --yes | Whether to skip price confirmation |
|
||||
| --sample / --no-sample | Whether to output sample of the first 5 split documents. [default: no-sample] |
|
||||
| --token-check / --no-token-check | Whether to group small documents and split large. Improves semantics. [default: token-check] |
|
||||
| --min_tokens INTEGER | Minimum number of tokens to not group. [default: 150] |
|
||||
| --max_tokens INTEGER | Maximum number of tokens to not split. [default: 2000] |
|
||||
| | |
|
||||
| **convert** | Creates documentation in .md format from source code |
|
||||
| --dir TEXT | Path to a directory with source code. E.g. --dir inputs [default: inputs] |
|
||||
| --formats TEXT | Source code language from which to create documentation. Supports py, js and java. E.g. --formats py [default: py] |
|
||||
44
docs/pages/Guides/How-to-train-on-other-documentation.mdx
Normal file
@@ -0,0 +1,44 @@
|
||||
|
||||
import { Callout } from 'nextra/components'
|
||||
import Image from 'next/image'
|
||||
import { Steps } from 'nextra/components'
|
||||
|
||||
## How to train on other documentation
|
||||
|
||||
Training on other documentation sources can greatly enhance the versatility and depth of DocsGPT's knowledge. By incorporating diverse materials, you can broaden the AI's understanding and improve its ability to generate insightful responses across a range of topics. Here's a step-by-step guide on how to effectively train DocsGPT on additional documentation sources:
|
||||
|
||||
**Get your document ready**:
|
||||
|
||||
Make sure you have the document on which you want to train on ready with you on the device which you are using .You can also use links to the documentation to train on.
|
||||
|
||||
<Callout type="warning" emoji="⚠️">
|
||||
Note: The document should be either of the given file formats .pdf, .txt, .rst, .docx, .md, .zip and limited to 25mb.You can also train using the link of the documentation.
|
||||
|
||||
</Callout>
|
||||
|
||||
### Video Demo
|
||||
|
||||
<Image src="/docs.gif" alt="prompts" width={800} height={500} />
|
||||
|
||||
|
||||
|
||||
<Steps>
|
||||
### Step1
|
||||
Navigate to the sidebar where you will find `Source Docs` option,here you will find 3 options built in which are default,Web Search and None.
|
||||
|
||||
|
||||
### Step 2
|
||||
Click on the `Upload icon` just beside the source docs options,now borwse and upload the document which you want to train on or select the `remote` option if you have to insert the link of the documentation.
|
||||
|
||||
|
||||
### Step 3
|
||||
Now you will be able to see the name of the file uploaded under the Uploaded Files ,now click on `Train`,once you click on train it might take some time to train on the document. You will be able to see the `Training progress` and once the training is completed you can click the `finish` button and there you go your docuemnt is uploaded.
|
||||
|
||||
|
||||
### Step 4
|
||||
Go to `New chat` and from the side bar select the document you uploaded under the `Source Docs` and go ahead with your chat, now you can ask qestions regarding the document you uploaded and you will get the effective answer based on it.
|
||||
|
||||
</Steps>
|
||||
|
||||
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
# Setting Up Local Language Models for Your App
|
||||
|
||||
Your app relies on two essential models: Embeddings and Text Generation. While OpenAI's default models work seamlessly, you have the flexibility to switch providers or even run the models locally.
|
||||
|
||||
## Step 1: Configure Environment Variables
|
||||
|
||||
Navigate to the `.env` file or set the following environment variables:
|
||||
|
||||
```env
|
||||
LLM_NAME=<your Text Generation model>
|
||||
API_KEY=<API key for Text Generation>
|
||||
EMBEDDINGS_NAME=<LLM for Embeddings>
|
||||
EMBEDDINGS_KEY=<API key for Embeddings>
|
||||
VITE_API_STREAMING=<true or false>
|
||||
```
|
||||
|
||||
You can omit the keys if users provide their own. Ensure you set `LLM_NAME` and `EMBEDDINGS_NAME`.
|
||||
|
||||
## Step 2: Choose Your Models
|
||||
|
||||
**Options for `LLM_NAME`:**
|
||||
- openai ([More details](https://platform.openai.com/docs/models))
|
||||
- anthropic ([More details](https://docs.anthropic.com/claude/reference/selecting-a-model))
|
||||
- manifest ([More details](https://python.langchain.com/docs/integrations/llms/manifest))
|
||||
- cohere ([More details](https://docs.cohere.com/docs/llmu))
|
||||
- llama.cpp ([More details](https://python.langchain.com/docs/integrations/llms/llamacpp))
|
||||
- huggingface (Arc53/DocsGPT-7B by default)
|
||||
- sagemaker ([Mode details](https://aws.amazon.com/sagemaker/))
|
||||
|
||||
|
||||
Note: for huggingface you can choose any model inside application/llm/huggingface.py or pass llm_name on init, loads
|
||||
|
||||
**Options for `EMBEDDINGS_NAME`:**
|
||||
- openai_text-embedding-ada-002
|
||||
- huggingface_sentence-transformers/all-mpnet-base-v2
|
||||
- huggingface_hkunlp/instructor-large
|
||||
- cohere_medium
|
||||
|
||||
If you want to be completely local, set `EMBEDDINGS_NAME` to `huggingface_sentence-transformers/all-mpnet-base-v2`.
|
||||
|
||||
For llama.cpp Download the required model and place it in the `models/` folder.
|
||||
|
||||
Alternatively, for local Llama setup, run `setup.sh` and choose option 1. The script handles the DocsGPT model addition.
|
||||
|
||||
## Step 3: Local Hosting for Privacy
|
||||
|
||||
If working with sensitive data, host everything locally by setting `LLM_NAME`, llama.cpp or huggingface, use any model available on Hugging Face, for llama.cpp you need to convert it into gguf format.
|
||||
That's it! Your app is now configured for local and private hosting, ensuring optimal security for critical data.
|
||||
49
docs/pages/Guides/How-to-use-different-LLM.mdx
Normal file
@@ -0,0 +1,49 @@
|
||||
|
||||
import { Callout } from 'nextra/components'
|
||||
import Image from 'next/image'
|
||||
import { Steps } from 'nextra/components'
|
||||
|
||||
# Setting Up Local Language Models for Your App
|
||||
|
||||
Setting up local language models for your app can significantly enhance its capabilities, enabling it to understand and generate text in multiple languages without relying on external APIs. By integrating local language models, you can improve privacy, reduce latency, and ensure continuous functionality even in offline environments. Here's a comprehensive guide on how to set up local language models for your application:
|
||||
|
||||
## Steps:
|
||||
### For cloud version LLM change:
|
||||
<Steps >
|
||||
### Step 1
|
||||
Visit the chat screen and you will be to see the default LLM selected.
|
||||
### Step 2
|
||||
Click on it and you will get a drop down of various LLM's available to choose.
|
||||
### Step 3
|
||||
Choose the LLM of your choice.
|
||||
|
||||
</Steps>
|
||||
|
||||
|
||||
|
||||
|
||||
### Video Demo
|
||||
<Image src="/llms.gif" alt="prompts" width={800} height={500} />
|
||||
|
||||
### For Open source llm change:
|
||||
<Steps >
|
||||
### Step 1
|
||||
For open source you have to edit .env file with LLM_NAME with their desired LLM name.
|
||||
### Step 2
|
||||
All the supported LLM providers are here application/llm and you can check what env variable are needed for each
|
||||
List of latest supported LLMs are https://github.com/arc53/DocsGPT/blob/main/application/llm/llm_creator.py
|
||||
### Step 3
|
||||
Visit application/llm and select the file of your selected llm and there you will find the speicifc requirements needed to be filled in order to use it,i.e API key of that llm.
|
||||
</Steps>
|
||||
|
||||
### For OpenAI-Compatible Endpoints:
|
||||
DocsGPT supports the use of OpenAI-compatible endpoints through base URL substitution. This feature allows you to use alternative AI models or services that implement the OpenAI API interface.
|
||||
|
||||
|
||||
Set the OPENAI_BASE_URL in your environment. You can change .env file with OPENAI_BASE_URL with the desired base URL or docker-compose.yml file and add the environment variable to the backend container.
|
||||
|
||||
> Make sure you have the right API_KEY and correct LLM_NAME.
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"Customising-prompts": {
|
||||
"title": "🏗️️ Customising Prompts",
|
||||
"title": "️💻 Customising Prompts",
|
||||
"href": "/Guides/Customising-prompts"
|
||||
},
|
||||
"How-to-train-on-other-documentation": {
|
||||
@@ -8,7 +8,7 @@
|
||||
"href": "/Guides/How-to-train-on-other-documentation"
|
||||
},
|
||||
"How-to-use-different-LLM": {
|
||||
"title": "⚙️️ How to use different LLM's",
|
||||
"title": "️🤖 How to use different LLM's",
|
||||
"href": "/Guides/How-to-use-different-LLM"
|
||||
},
|
||||
"My-AI-answers-questions-using-external-knowledge": {
|
||||
|
||||
@@ -4,7 +4,7 @@ export default function MyApp({ Component, pageProps }) {
|
||||
return (
|
||||
<>
|
||||
<Component {...pageProps} />
|
||||
<DocsGPTWidget selectDocs="local/docsgpt-sep.zip/"/>
|
||||
<DocsGPTWidget apiKey="d61a020c-ac8f-4f23-bb98-458e4da3c240" theme="dark" />
|
||||
</>
|
||||
)
|
||||
}
|
||||
@@ -2,14 +2,16 @@
|
||||
title: 'Home'
|
||||
---
|
||||
import { Cards, Card } from 'nextra/components'
|
||||
import Image from 'next/image'
|
||||
import deployingGuides from './Deploying/_meta.json';
|
||||
import developingGuides from './Developing/_meta.json';
|
||||
import developingGuides from './API/_meta.json';
|
||||
import extensionGuides from './Extensions/_meta.json';
|
||||
import mainGuides from './Guides/_meta.json';
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
export const allGuides = {
|
||||
...deployingGuides,
|
||||
...developingGuides,
|
||||
@@ -21,9 +23,12 @@ export const allGuides = {
|
||||
|
||||
DocsGPT 🦖 is an innovative open-source tool designed to simplify the retrieval of information from project documentation using advanced GPT models 🤖. Eliminate lengthy manual searches 🔍 and enhance your documentation experience with DocsGPT, and consider contributing to its AI-powered future 🚀.
|
||||
|
||||

|
||||
|
||||
Try it yourself: [https://docsgpt.arc53.com/](https://docsgpt.arc53.com/)
|
||||
|
||||
<Image src="/homevideo.gif" alt="homedemo" width={800} height={500}/>
|
||||
|
||||
|
||||
Try it yourself: [https://www.docsgpt.cloud/](https://www.docsgpt.cloud/)
|
||||
|
||||
<Cards
|
||||
num={3}
|
||||
|
||||
BIN
docs/public/docs.gif
Normal file
|
After Width: | Height: | Size: 839 KiB |
BIN
docs/public/homevideo.gif
Normal file
|
After Width: | Height: | Size: 23 MiB |
BIN
docs/public/llms.gif
Normal file
|
After Width: | Height: | Size: 500 KiB |
BIN
docs/public/prompts.gif
Normal file
|
After Width: | Height: | Size: 974 KiB |
28
extensions/chrome/package-lock.json
generated
@@ -107,12 +107,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/braces": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
|
||||
"integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
|
||||
"integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"fill-range": "^7.0.1"
|
||||
"fill-range": "^7.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
@@ -260,9 +260,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/fill-range": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
|
||||
"integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
|
||||
"integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"to-regex-range": "^5.0.1"
|
||||
@@ -884,12 +884,12 @@
|
||||
"dev": true
|
||||
},
|
||||
"braces": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
|
||||
"integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
|
||||
"integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"fill-range": "^7.0.1"
|
||||
"fill-range": "^7.1.1"
|
||||
}
|
||||
},
|
||||
"camelcase-css": {
|
||||
@@ -1000,9 +1000,9 @@
|
||||
}
|
||||
},
|
||||
"fill-range": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
|
||||
"integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
|
||||
"integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"to-regex-range": "^5.0.1"
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
# DocsGPT react widget
|
||||
|
||||
|
||||
This widget will allow you to embed a DocsGPT assistant in your React app.
|
||||
|
||||
## Installation
|
||||
@@ -11,6 +10,8 @@ npm install docsgpt
|
||||
|
||||
## Usage
|
||||
|
||||
### React
|
||||
|
||||
```javascript
|
||||
import { DocsGPTWidget } from "docsgpt";
|
||||
|
||||
@@ -25,23 +26,83 @@ To link the widget to your api and your documents you can pass parameters to the
|
||||
import { DocsGPTWidget } from "docsgpt";
|
||||
|
||||
const App = () => {
|
||||
return <DocsGPTWidget
|
||||
apiHost = 'http://localhost:7001',
|
||||
selectDocs = 'default',
|
||||
apiKey = '',
|
||||
avatar = 'https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png',
|
||||
title = 'Get AI assistance',
|
||||
description = 'DocsGPT\'s AI Chatbot is here to help',
|
||||
heroTitle = 'Welcome to DocsGPT !',
|
||||
heroDescription='This chatbot is built with DocsGPT and utilises GenAI, please review important information using sources.'
|
||||
/>;
|
||||
return <DocsGPTWidget
|
||||
apiHost="https://your-docsgpt-api.com"
|
||||
apiKey=""
|
||||
avatar = "https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png"
|
||||
title = "Get AI assistance"
|
||||
description = "DocsGPT's AI Chatbot is here to help"
|
||||
heroTitle = "Welcome to DocsGPT !"
|
||||
heroDescription="This chatbot is built with DocsGPT and utilises GenAI,
|
||||
please review important information using sources."
|
||||
theme = "dark"
|
||||
buttonIcon = "https://your-icon"
|
||||
buttonBg = "#222327"
|
||||
/>;
|
||||
};
|
||||
```
|
||||
|
||||
### Html
|
||||
|
||||
```html
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>DocsGPT Widget</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
<!-- Include the widget script from dist/modern or dist/legacy -->
|
||||
<script src="https://unpkg.com/docsgpt/dist/modern/main.js" type="module"></script>
|
||||
<script type="module">
|
||||
window.onload = function() {
|
||||
renderDocsGPTWidget('app');
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
To link the widget to your api and your documents you can pass parameters to the **renderDocsGPTWidget('div id', { parameters })**.
|
||||
|
||||
```html
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>DocsGPT Widget</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
<!-- Include the widget script from dist/modern or dist/legacy -->
|
||||
<script src="https://unpkg.com/docsgpt/dist/modern/main.js" type="module"></script>
|
||||
<script type="module">
|
||||
window.onload = function() {
|
||||
renderDocsGPTWidget('app', {
|
||||
apiHost: 'http://localhost:7001',
|
||||
apiKey:"",
|
||||
avatar: 'https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png',
|
||||
title: 'Get AI assistance',
|
||||
description: "DocsGPT's AI Chatbot is here to help",
|
||||
heroTitle: 'Welcome to DocsGPT!',
|
||||
heroDescription: 'This chatbot is built with DocsGPT and utilises GenAI, please review important information using sources.',
|
||||
theme:"dark",
|
||||
buttonIcon:"https://your-icon.svg",
|
||||
buttonBg:"#222327"
|
||||
});
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
```
|
||||
|
||||
## Our github
|
||||
|
||||
[DocsGPT](https://github.com/arc53/DocsGPT)
|
||||
|
||||
You can find the source code in the extensions/react-widget folder.
|
||||
|
||||
|
||||
6324
extensions/react-widget/package-lock.json
generated
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "docsgpt",
|
||||
"version": "0.3.7",
|
||||
"version": "0.4.2",
|
||||
"private": false,
|
||||
"description": "DocsGPT 🦖 is an innovative open-source tool designed to simplify the retrieval of information from project documentation using advanced GPT models 🤖.",
|
||||
"source": "./src/index.html",
|
||||
@@ -11,6 +11,18 @@
|
||||
"dist",
|
||||
"package.json"
|
||||
],
|
||||
"targets": {
|
||||
"modern": {
|
||||
"engines": {
|
||||
"browsers": "Chrome 80"
|
||||
}
|
||||
},
|
||||
"legacy": {
|
||||
"engines": {
|
||||
"browsers": "> 0.5%, last 2 versions, not dead"
|
||||
}
|
||||
}
|
||||
},
|
||||
"@parcel/resolver-default": {
|
||||
"packageExports": true
|
||||
},
|
||||
@@ -18,8 +30,9 @@
|
||||
"styled-components": "^5"
|
||||
},
|
||||
"scripts": {
|
||||
"build": "parcel build src/index.ts",
|
||||
"dev": "parcel",
|
||||
"build": "parcel build src/main.tsx --public-url ./",
|
||||
"build:react": "parcel build src/index.ts",
|
||||
"dev": "parcel src/index.html -p 3000",
|
||||
"test": "jest",
|
||||
"lint": "eslint",
|
||||
"check": "tsc --noEmit",
|
||||
@@ -27,22 +40,19 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@babel/plugin-transform-flow-strip-types": "^7.23.3",
|
||||
"@bpmn-io/snarkdown": "^2.2.0",
|
||||
"@parcel/resolver-glob": "^2.12.0",
|
||||
"@parcel/transformer-svg-react": "^2.12.0",
|
||||
"@parcel/transformer-typescript-tsc": "^2.12.0",
|
||||
"@parcel/validator-typescript": "^2.12.0",
|
||||
"@radix-ui/react-icons": "^1.3.0",
|
||||
"@types/react": "^18.2.61",
|
||||
"@types/react-dom": "^18.2.19",
|
||||
"class-variance-authority": "^0.7.0",
|
||||
"clsx": "^2.1.0",
|
||||
"dompurify": "^3.0.9",
|
||||
"dompurify": "^3.1.5",
|
||||
"flow-bin": "^0.229.2",
|
||||
"i": "^0.3.7",
|
||||
"install": "^0.13.0",
|
||||
"markdown-it": "^14.1.0",
|
||||
"npm": "^10.5.0",
|
||||
"parcel": "^2.12.0",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"styled-components": "^6.1.8"
|
||||
@@ -54,7 +64,11 @@
|
||||
"@parcel/packager-ts": "^2.12.0",
|
||||
"@parcel/transformer-typescript-types": "^2.12.0",
|
||||
"@types/dompurify": "^3.0.5",
|
||||
"@types/markdown-it": "^14.1.2",
|
||||
"@types/react": "^18.3.3",
|
||||
"@types/react-dom": "^18.3.0",
|
||||
"babel-loader": "^8.0.4",
|
||||
"parcel": "^2.12.0",
|
||||
"process": "^0.11.10",
|
||||
"typescript": "^5.3.3"
|
||||
},
|
||||
|
||||
43
extensions/react-widget/publish.sh
Executable file
@@ -0,0 +1,43 @@
|
||||
#!/bin/bash
|
||||
## chmod +x publish.sh - to upgrade ownership
|
||||
set -e
|
||||
cat package.json >> package_copy.json
|
||||
cat package-lock.json >> package-lock_copy.json
|
||||
publish_package() {
|
||||
PACKAGE_NAME=$1
|
||||
BUILD_COMMAND=$2
|
||||
# Update package name in package.json
|
||||
jq --arg name "$PACKAGE_NAME" '.name=$name' package.json > temp.json && mv temp.json package.json
|
||||
|
||||
# Remove 'target' key if the package name is 'docsgpt-react'
|
||||
if [ "$PACKAGE_NAME" = "docsgpt-react" ]; then
|
||||
jq 'del(.targets)' package.json > temp.json && mv temp.json package.json
|
||||
fi
|
||||
|
||||
if [ -d "dist" ]; then
|
||||
echo "Deleting existing dist directory..."
|
||||
rm -rf dist
|
||||
fi
|
||||
|
||||
npm version patch
|
||||
|
||||
npm run "$BUILD_COMMAND"
|
||||
|
||||
# Publish to npm
|
||||
npm publish
|
||||
# Clean up
|
||||
mv package_copy.json package.json
|
||||
mv package-lock_copy.json package-lock.json
|
||||
echo "Published ${PACKAGE_NAME}"
|
||||
}
|
||||
|
||||
# Publish docsgpt package
|
||||
publish_package "docsgpt" "build"
|
||||
|
||||
# Publish docsgpt-react package
|
||||
publish_package "docsgpt-react" "build:react"
|
||||
|
||||
|
||||
rm -rf package_copy.json
|
||||
rm -rf package-lock_copy.json
|
||||
echo "---Process completed---"
|
||||
4
extensions/react-widget/src/assets/dislike.svg
Normal file
@@ -0,0 +1,4 @@
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="current" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M6.37776 10.1001V12.9C6.37776 13.457 6.599 13.9911 6.99282 14.3849C7.38664 14.7788 7.92077 15 8.47772 15L11.2777 8.70011V1.00025H3.38181C3.04419 0.996436 2.71656 1.11477 2.45929 1.33344C2.20203 1.55212 2.03246 1.8564 1.98184 2.19023L1.01585 8.49012C0.985398 8.69076 0.998931 8.89563 1.05551 9.09053C1.1121 9.28543 1.21038 9.46569 1.34355 9.61884C1.47671 9.77198 1.64159 9.89434 1.82674 9.97744C2.01189 10.0605 2.2129 10.1024 2.41583 10.1001H6.37776ZM11.2777 1.00025H13.1466C13.5428 0.993247 13.9277 1.13195 14.2284 1.39002C14.5291 1.64809 14.7245 2.00758 14.7776 2.40023V7.30014C14.7245 7.69279 14.5291 8.05227 14.2284 8.31035C13.9277 8.56842 13.5428 8.70712 13.1466 8.70011H11.2777" fill="none"/>
|
||||
<path d="M11.2777 8.70011L8.47772 15C7.92077 15 7.38664 14.7788 6.99282 14.3849C6.599 13.9911 6.37776 13.457 6.37776 12.9V10.1001H2.41583C2.2129 10.1024 2.01189 10.0605 1.82674 9.97744C1.64159 9.89434 1.47671 9.77198 1.34355 9.61884C1.21038 9.46569 1.1121 9.28543 1.05551 9.09053C0.998931 8.89563 0.985398 8.69076 1.01585 8.49012L1.98184 2.19023C2.03246 1.8564 2.20203 1.55212 2.45929 1.33344C2.71656 1.11477 3.04419 0.996436 3.38181 1.00025H11.2777M11.2777 8.70011V1.00025M11.2777 8.70011H13.1466C13.5428 8.70712 13.9277 8.56842 14.2284 8.31035C14.5291 8.05227 14.7245 7.69279 14.7776 7.30014V2.40023C14.7245 2.00758 14.5291 1.64809 14.2284 1.39002C13.9277 1.13195 13.5428 0.993247 13.1466 1.00025H11.2777" stroke="current" stroke-width="1.4" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.6 KiB |
4
extensions/react-widget/src/assets/like.svg
Normal file
@@ -0,0 +1,4 @@
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="current" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M9.39995 5.89997V3.09999C9.39995 2.54304 9.1787 2.0089 8.78487 1.61507C8.39105 1.22125 7.85691 1 7.29996 1L4.49998 7.29996V14.9999H12.3959C12.7336 15.0037 13.0612 14.8854 13.3185 14.6667C13.5757 14.448 13.7453 14.1437 13.7959 13.8099L14.7619 7.50996C14.7924 7.30931 14.7788 7.10444 14.7222 6.90954C14.6657 6.71464 14.5674 6.53437 14.4342 6.38123C14.301 6.22808 14.1362 6.10572 13.951 6.02262C13.7659 5.93952 13.5649 5.89767 13.3619 5.89997H9.39995ZM4.49998 14.9999H2.39999C2.02869 14.9999 1.6726 14.8524 1.41005 14.5899C1.1475 14.3273 1 13.9712 1 13.5999V8.69995C1 8.32865 1.1475 7.97256 1.41005 7.71001C1.6726 7.44746 2.02869 7.29996 2.39999 7.29996H4.49998" fill="none"/>
|
||||
<path d="M4.49998 7.29996L7.29996 1C7.85691 1 8.39105 1.22125 8.78487 1.61507C9.1787 2.0089 9.39995 2.54304 9.39995 3.09999V5.89997H13.3619C13.5649 5.89767 13.7659 5.93952 13.951 6.02262C14.1362 6.10572 14.301 6.22808 14.4342 6.38123C14.5674 6.53437 14.6657 6.71464 14.7223 6.90954C14.7788 7.10444 14.7924 7.30931 14.7619 7.50996L13.7959 13.8099C13.7453 14.1437 13.5757 14.448 13.3185 14.6667C13.0612 14.8854 12.7336 15.0037 12.3959 14.9999H4.49998M4.49998 7.29996V14.9999M4.49998 7.29996H2.39999C2.02869 7.29996 1.6726 7.44746 1.41005 7.71001C1.1475 7.97256 1 8.32865 1 8.69995V13.5999C1 13.9712 1.1475 14.3273 1.41005 14.5899C1.6726 14.8524 2.02869 14.9999 2.39999 14.9999H4.49998" stroke="current" stroke-width="1.39999" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.5 KiB |
@@ -1,7 +0,0 @@
|
||||
<svg width="36" height="36" viewBox="0 0 18 18" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M4.37891 9.44824H7.75821" stroke="white" stroke-width="1.68965" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M11.1377 9.44824H12.8273" stroke="white" stroke-width="1.68965" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M4.37891 6.06934H6.06856" stroke="white" stroke-width="1.68965" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M9.44824 6.06934H12.8276" stroke="white" stroke-width="1.68965" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
<path d="M16.2069 11.1379C16.2069 11.5861 16.0289 12.0158 15.712 12.3327C15.3951 12.6496 14.9654 12.8276 14.5172 12.8276H4.37931L1 16.2069V2.68965C1 2.24153 1.17802 1.81176 1.49489 1.49489C1.81176 1.17802 2.24153 1 2.68965 1H14.5172C14.9654 1 15.3951 1.17802 15.712 1.49489C16.0289 1.81176 16.2069 2.24153 16.2069 2.68965V11.1379Z" stroke="white" stroke-width="1.68965" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 1009 B |
@@ -1,13 +1,40 @@
|
||||
"use client";
|
||||
import { Fragment, useEffect, useRef, useState } from 'react'
|
||||
import { PaperPlaneIcon, RocketIcon, ExclamationTriangleIcon, Cross2Icon } from '@radix-ui/react-icons';
|
||||
import { MESSAGE_TYPE } from '../models/types';
|
||||
import { Query, Status } from '../models/types';
|
||||
import MessageIcon from '../assets/message.svg'
|
||||
import { fetchAnswerStreaming } from '../requests/streamingApi';
|
||||
import React, { useRef } from 'react'
|
||||
import DOMPurify from 'dompurify';
|
||||
import styled, { keyframes, createGlobalStyle } from 'styled-components';
|
||||
import snarkdown from '@bpmn-io/snarkdown';
|
||||
import { sanitize } from 'dompurify';
|
||||
import { PaperPlaneIcon, RocketIcon, ExclamationTriangleIcon, Cross2Icon } from '@radix-ui/react-icons';
|
||||
import { FEEDBACK, MESSAGE_TYPE, Query, Status, WidgetProps } from '../types/index';
|
||||
import { fetchAnswerStreaming, sendFeedback } from '../requests/streamingApi';
|
||||
import { ThemeProvider } from 'styled-components';
|
||||
import Like from "../assets/like.svg"
|
||||
import Dislike from "../assets/dislike.svg"
|
||||
import MarkdownIt from 'markdown-it';
|
||||
const themes = {
|
||||
dark: {
|
||||
bg: '#222327',
|
||||
text: '#fff',
|
||||
primary: {
|
||||
text: "#FAFAFA",
|
||||
bg: '#222327'
|
||||
},
|
||||
secondary: {
|
||||
text: "#A1A1AA",
|
||||
bg: "#38383b"
|
||||
}
|
||||
},
|
||||
light: {
|
||||
bg: '#fff',
|
||||
text: '#000',
|
||||
primary: {
|
||||
text: "#222327",
|
||||
bg: "#fff"
|
||||
},
|
||||
secondary: {
|
||||
text: "#A1A1AA",
|
||||
bg: "#F6F6F6"
|
||||
}
|
||||
}
|
||||
}
|
||||
const GlobalStyles = createGlobalStyle`
|
||||
.response pre {
|
||||
padding: 8px;
|
||||
@@ -16,6 +43,7 @@ const GlobalStyles = createGlobalStyle`
|
||||
border-radius: 6px;
|
||||
overflow-x: auto;
|
||||
background-color: #1B1C1F;
|
||||
color: #fff !important;
|
||||
}
|
||||
.response h1{
|
||||
font-size: 20px;
|
||||
@@ -26,40 +54,64 @@ const GlobalStyles = createGlobalStyle`
|
||||
.response h3{
|
||||
font-size: 16px;
|
||||
}
|
||||
.response p{
|
||||
margin:0px;
|
||||
}
|
||||
.response code:not(pre code){
|
||||
border-radius: 6px;
|
||||
padding: 1px 3px 1px 3px;
|
||||
font-size: 12px;
|
||||
display: inline-block;
|
||||
background-color: #646464;
|
||||
color: #fff !important;
|
||||
}
|
||||
.response code {
|
||||
white-space: pre-wrap !important;
|
||||
line-break: loose !important;
|
||||
}
|
||||
`;
|
||||
const WidgetContainer = styled.div`
|
||||
const Overlay = styled.div`
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background-color: rgba(0, 0, 0, 0.5);
|
||||
z-index: 999;
|
||||
transition: opacity 0.5s;
|
||||
`
|
||||
const WidgetContainer = styled.div<{ modal: boolean }>`
|
||||
display: block;
|
||||
position: fixed;
|
||||
right: 10px;
|
||||
bottom: 10px;
|
||||
right: ${props => props.modal ? '50%' : '10px'};
|
||||
bottom: ${props => props.modal ? '50%' : '10px'};
|
||||
z-index: 1000;
|
||||
display: flex;
|
||||
${props => props.modal &&
|
||||
"transform : translate(50%,50%);"
|
||||
}
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
text-align: left;
|
||||
@media only screen and (max-width: 768px) {
|
||||
max-height: 100vh !important;
|
||||
overflow: auto;
|
||||
}
|
||||
`;
|
||||
const StyledContainer = styled.div`
|
||||
display: block;
|
||||
display: flex;
|
||||
position: relative;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
width: 352px;
|
||||
height: 407px;
|
||||
max-height: 407px;
|
||||
border-radius: 0.75rem;
|
||||
background-color: #222327;
|
||||
background-color: ${props => props.theme.primary.bg};
|
||||
font-family: sans-serif;
|
||||
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05), 0 2px 4px rgba(0, 0, 0, 0.1);
|
||||
transition: visibility 0.3s, opacity 0.3s;
|
||||
`;
|
||||
const FloatingButton = styled.div`
|
||||
const FloatingButton = styled.div<{ bgcolor: string }>`
|
||||
position: fixed;
|
||||
display: flex;
|
||||
z-index: 500;
|
||||
@@ -70,7 +122,7 @@ const FloatingButton = styled.div`
|
||||
width: 5rem;
|
||||
height: 5rem;
|
||||
border-radius: 9999px;
|
||||
background-image: linear-gradient(to bottom right, #5AF0EC, #E80D9D);
|
||||
background: ${props => props.bgcolor};
|
||||
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
||||
cursor: pointer;
|
||||
&:hover {
|
||||
@@ -120,39 +172,62 @@ const ContentWrapper = styled.div`
|
||||
const Title = styled.h3`
|
||||
font-size: 1rem;
|
||||
font-weight: normal;
|
||||
color: #FAFAFA;
|
||||
color: ${props => props.theme.primary.text};
|
||||
margin-top: 0;
|
||||
margin-bottom: 0.25rem;
|
||||
`;
|
||||
|
||||
const Description = styled.p`
|
||||
font-size: 0.85rem;
|
||||
color: #A1A1AA;
|
||||
color: ${props => props.theme.secondary.text};
|
||||
margin-top: 0;
|
||||
`;
|
||||
const Conversation = styled.div`
|
||||
height: 16rem;
|
||||
|
||||
const Conversation = styled.div<{ size: string }>`
|
||||
min-height: 250px;
|
||||
max-width: 968px;
|
||||
height: ${props => props.size === 'large' ? '75vh' : props.size === 'medium' ? '70vh' : '320px'};
|
||||
width: ${props => props.size === 'large' ? '60vw' : props.size === 'medium' ? '28vw' : '400px'};
|
||||
padding-inline: 0.5rem;
|
||||
border-radius: 0.375rem;
|
||||
text-align: left;
|
||||
overflow-y: auto;
|
||||
scrollbar-width: thin;
|
||||
scrollbar-color: #4a4a4a transparent; /* thumb color track color */
|
||||
@media only screen and (max-width: 768px) {
|
||||
width: 90vw !important;
|
||||
}
|
||||
@media only screen and (min-width:768px ) and (max-width: 1280px) {
|
||||
width:${props => props.size === 'large' ? '90vw' : props.size === 'medium' ? '60vw' : '400px'} !important;
|
||||
}
|
||||
`;
|
||||
const Feedback = styled.div`
|
||||
background-color: transparent;
|
||||
font-weight: normal;
|
||||
gap: 12px;
|
||||
display: flex;
|
||||
padding: 6px;
|
||||
clear: both;
|
||||
`;
|
||||
|
||||
const MessageBubble = styled.div<{ type: MESSAGE_TYPE }>`
|
||||
display: flex;
|
||||
display: block;
|
||||
font-size: 16px;
|
||||
justify-content: ${props => props.type === 'QUESTION' ? 'flex-end' : 'flex-start'};
|
||||
margin: 0.5rem;
|
||||
position: relative;
|
||||
width: 100%;;
|
||||
float: right;
|
||||
margin: 0rem;
|
||||
&:hover ${Feedback} * {
|
||||
visibility: visible !important;
|
||||
}
|
||||
`;
|
||||
const Message = styled.p<{ type: MESSAGE_TYPE }>`
|
||||
const Message = styled.div<{ type: MESSAGE_TYPE }>`
|
||||
background: ${props => props.type === 'QUESTION' ?
|
||||
'linear-gradient(to bottom right, #8860DB, #6D42C5)' :
|
||||
'#38383b'};
|
||||
color: #ffff;
|
||||
props.theme.secondary.bg};
|
||||
color: ${props => props.type === 'ANSWER' ? props.theme.primary.text : '#fff'};
|
||||
border: none;
|
||||
max-width: 80%;
|
||||
float: ${props => props.type === 'QUESTION' ? 'right' : 'left'};
|
||||
max-width: ${props => props.type === 'ANSWER' ? '100%' : '80'};
|
||||
overflow: auto;
|
||||
margin: 4px;
|
||||
display: block;
|
||||
@@ -190,35 +265,31 @@ const DotAnimation = styled.div`
|
||||
const Delay = styled(DotAnimation) <{ delay: number }>`
|
||||
animation-delay: ${props => props.delay + 'ms'};
|
||||
`;
|
||||
const PromptContainer = styled.form`
|
||||
const PromptContainer = styled.form<{ size: string }>`
|
||||
background-color: transparent;
|
||||
height: 36px;
|
||||
position: absolute;
|
||||
bottom: 25px;
|
||||
left: 24px;
|
||||
right: 24px;
|
||||
height: ${props => props.size == 'large' ? '60px' : '40px'};
|
||||
margin: 16px;
|
||||
display: flex;
|
||||
justify-content: space-evenly;
|
||||
`;
|
||||
const StyledInput = styled.input`
|
||||
width: 260px;
|
||||
height: 36px;
|
||||
width: 100%;
|
||||
border: 1px solid #686877;
|
||||
padding-left: 12px;
|
||||
background-color: transparent;
|
||||
font-size: 16px;
|
||||
border-radius: 6px;
|
||||
color: #ffff;
|
||||
color: ${props => props.theme.text};
|
||||
outline: none;
|
||||
`;
|
||||
const StyledButton = styled.button`
|
||||
const StyledButton = styled.button<{ size: string }>`
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
background-image: linear-gradient(to bottom right, #5AF0EC, #E80D9D);
|
||||
border-radius: 6px;
|
||||
width: 36px;
|
||||
height: 36px;
|
||||
min-width: ${props => props.size === 'large' ? '60px' : '36px'};
|
||||
height: ${props => props.size === 'large' ? '60px' : '36px'};
|
||||
margin-left:8px;
|
||||
padding: 0px;
|
||||
border: none;
|
||||
@@ -239,13 +310,14 @@ const HeroContainer = styled.div`
|
||||
align-items: middle;
|
||||
transform: translate(-50%, -50%);
|
||||
width: 80%;
|
||||
max-width: 500px;
|
||||
background-image: linear-gradient(to bottom right, #5AF0EC, #ff1bf4);
|
||||
border-radius: 10px;
|
||||
margin: 0 auto;
|
||||
padding: 2px;
|
||||
`;
|
||||
const HeroWrapper = styled.div`
|
||||
background-color: #222327;
|
||||
background-color: ${props => props.theme.primary.bg};
|
||||
border-radius: 10px;
|
||||
font-weight: normal;
|
||||
padding: 6px;
|
||||
@@ -253,23 +325,23 @@ const HeroWrapper = styled.div`
|
||||
justify-content: space-between;
|
||||
`
|
||||
const HeroTitle = styled.h3`
|
||||
color: #fff;
|
||||
font-size: 17px;
|
||||
color: ${props => props.theme.text};
|
||||
margin-bottom: 5px;
|
||||
padding: 2px;
|
||||
`;
|
||||
const HeroDescription = styled.p`
|
||||
color: #fff;
|
||||
color: ${props => props.theme.text};
|
||||
font-size: 14px;
|
||||
line-height: 1.5;
|
||||
`;
|
||||
const Hero = ({ title, description }: { title: string, description: string }) => {
|
||||
|
||||
const Hero = ({ title, description, theme }: { title: string, description: string, theme: string }) => {
|
||||
return (
|
||||
<>
|
||||
<HeroContainer>
|
||||
<HeroWrapper>
|
||||
<IconWrapper style={{ marginTop: '8px' }}>
|
||||
<RocketIcon color='white' width={20} height={20} />
|
||||
<IconWrapper style={{ marginTop: '12px' }}>
|
||||
<RocketIcon color={theme === 'light' ? 'black' : 'white'} width={20} height={20} />
|
||||
</IconWrapper>
|
||||
<div>
|
||||
<HeroTitle>{title}</HeroTitle>
|
||||
@@ -284,22 +356,28 @@ const Hero = ({ title, description }: { title: string, description: string }) =>
|
||||
};
|
||||
export const DocsGPTWidget = ({
|
||||
apiHost = 'https://gptcloud.arc53.com',
|
||||
selectDocs = 'default',
|
||||
apiKey = 'docsgpt-public',
|
||||
apiKey = '82962c9a-aa77-4152-94e5-a4f84fd44c6a',
|
||||
avatar = 'https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png',
|
||||
title = 'Get AI assistance',
|
||||
description = 'DocsGPT\'s AI Chatbot is here to help',
|
||||
heroTitle = 'Welcome to DocsGPT !',
|
||||
heroDescription = 'This chatbot is built with DocsGPT and utilises GenAI, please review important information using sources.'
|
||||
}) => {
|
||||
heroDescription = 'This chatbot is built with DocsGPT and utilises GenAI, please review important information using sources.',
|
||||
size = 'small',
|
||||
theme = 'dark',
|
||||
buttonIcon = 'https://d3dg1063dc54p9.cloudfront.net/widget/message.svg',
|
||||
buttonBg = 'linear-gradient(to bottom right, #5AF0EC, #E80D9D)',
|
||||
collectFeedback = true
|
||||
}: WidgetProps) => {
|
||||
const [prompt, setPrompt] = React.useState('');
|
||||
const [status, setStatus] = React.useState<Status>('idle');
|
||||
const [queries, setQueries] = React.useState<Query[]>([])
|
||||
const [conversationId, setConversationId] = React.useState<string | null>(null)
|
||||
const [open, setOpen] = React.useState<boolean>(false)
|
||||
const [eventInterrupt, setEventInterrupt] = React.useState<boolean>(false); //click or scroll by user while autoScrolling
|
||||
const isBubbleHovered = useRef<boolean>(false)
|
||||
const endMessageRef = React.useRef<HTMLDivElement | null>(null);
|
||||
const md = new MarkdownIt();
|
||||
|
||||
const [prompt, setPrompt] = useState('');
|
||||
const [status, setStatus] = useState<Status>('idle');
|
||||
const [queries, setQueries] = useState<Query[]>([])
|
||||
const [conversationId, setConversationId] = useState<string | null>(null)
|
||||
const [open, setOpen] = useState<boolean>(false)
|
||||
const [eventInterrupt, setEventInterrupt] = useState<boolean>(false); //click or scroll by user while autoScrolling
|
||||
const endMessageRef = useRef<HTMLDivElement | null>(null);
|
||||
const handleUserInterrupt = () => {
|
||||
(status === 'loading') && setEventInterrupt(true);
|
||||
}
|
||||
@@ -316,11 +394,40 @@ export const DocsGPTWidget = ({
|
||||
const lastChild = element?.children?.[element.children.length - 1]
|
||||
lastChild && scrollToBottom(lastChild)
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
React.useEffect(() => {
|
||||
!eventInterrupt && scrollToBottom(endMessageRef.current);
|
||||
}, [queries.length, queries[queries.length - 1]?.response]);
|
||||
|
||||
async function handleFeedback(feedback: FEEDBACK, index: number) {
|
||||
let query = queries[index]
|
||||
if (!query.response)
|
||||
return;
|
||||
if (query.feedback != feedback) {
|
||||
sendFeedback({
|
||||
question: query.prompt,
|
||||
answer: query.response,
|
||||
feedback: feedback,
|
||||
apikey: apiKey
|
||||
}, apiHost)
|
||||
.then(res => {
|
||||
if (res.status == 200) {
|
||||
query.feedback = feedback;
|
||||
setQueries((prev: Query[]) => {
|
||||
return prev.map((q, i) => (i === index ? query : q));
|
||||
});
|
||||
}
|
||||
})
|
||||
.catch(err => console.log("Connection failed",err))
|
||||
}
|
||||
else {
|
||||
delete query.feedback;
|
||||
setQueries((prev: Query[]) => {
|
||||
return prev.map((q, i) => (i === index ? query : q));
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
async function stream(question: string) {
|
||||
setStatus('loading')
|
||||
try {
|
||||
@@ -329,19 +436,24 @@ export const DocsGPTWidget = ({
|
||||
question: question,
|
||||
apiKey: apiKey,
|
||||
apiHost: apiHost,
|
||||
selectedDocs: selectDocs,
|
||||
history: queries,
|
||||
conversationId: conversationId,
|
||||
onEvent: (event: MessageEvent) => {
|
||||
const data = JSON.parse(event.data);
|
||||
// check if the 'end' event has been received
|
||||
if (data.type === 'end') {
|
||||
// set status to 'idle'
|
||||
setStatus('idle');
|
||||
|
||||
} else if (data.type === 'id') {
|
||||
}
|
||||
else if (data.type === 'id') {
|
||||
setConversationId(data.id)
|
||||
} else {
|
||||
}
|
||||
else if (data.type === 'error') {
|
||||
const updatedQueries = [...queries];
|
||||
updatedQueries[updatedQueries.length - 1].error = data.error;
|
||||
setQueries(updatedQueries);
|
||||
setStatus('idle')
|
||||
}
|
||||
else {
|
||||
const result = data.answer;
|
||||
const streamingResponse = queries[queries.length - 1].response ? queries[queries.length - 1].response : '';
|
||||
const updatedQueries = [...queries];
|
||||
@@ -353,7 +465,7 @@ export const DocsGPTWidget = ({
|
||||
);
|
||||
} catch (error) {
|
||||
const updatedQueries = [...queries];
|
||||
updatedQueries[updatedQueries.length - 1].error = 'error'
|
||||
updatedQueries[updatedQueries.length - 1].error = 'Something went wrong !'
|
||||
setQueries(updatedQueries);
|
||||
setStatus('idle')
|
||||
//setEventInterrupt(false)
|
||||
@@ -372,16 +484,21 @@ export const DocsGPTWidget = ({
|
||||
event.currentTarget.src = "https://d3dg1063dc54p9.cloudfront.net/cute-docsgpt.png";
|
||||
};
|
||||
return (
|
||||
<>
|
||||
<WidgetContainer>
|
||||
<ThemeProvider theme={themes[theme]}>
|
||||
{open && size === 'large' &&
|
||||
<Overlay onClick={() => {
|
||||
setOpen(false)
|
||||
}} />
|
||||
}
|
||||
<FloatingButton bgcolor={buttonBg} onClick={() => setOpen(!open)} hidden={open}>
|
||||
<img style={{ maxHeight: '4rem', maxWidth: '4rem' }} src={buttonIcon} />
|
||||
</FloatingButton>
|
||||
<WidgetContainer modal={size == 'large'}>
|
||||
<GlobalStyles />
|
||||
{!open && <FloatingButton onClick={() => setOpen(true)} hidden={open}>
|
||||
<MessageIcon style={{ marginTop: '8px' }} />
|
||||
</FloatingButton>}
|
||||
{open && <StyledContainer>
|
||||
<div>
|
||||
<CancelButton onClick={() => setOpen(false)}>
|
||||
<Cross2Icon width={24} height={24} color='white' />
|
||||
<Cross2Icon width={24} height={24} color={theme === 'light' ? 'black' : 'white'} />
|
||||
</CancelButton>
|
||||
<Header>
|
||||
<IconWrapper>
|
||||
@@ -393,11 +510,11 @@ export const DocsGPTWidget = ({
|
||||
</ContentWrapper>
|
||||
</Header>
|
||||
</div>
|
||||
<Conversation onWheel={handleUserInterrupt} onTouchMove={handleUserInterrupt}>
|
||||
<Conversation size={size} onWheel={handleUserInterrupt} onTouchMove={handleUserInterrupt}>
|
||||
{
|
||||
queries.length > 0 ? queries?.map((query, index) => {
|
||||
return (
|
||||
<Fragment key={index}>
|
||||
<React.Fragment key={index}>
|
||||
{
|
||||
query.prompt && <MessageBubble type='QUESTION'>
|
||||
<Message
|
||||
@@ -408,13 +525,34 @@ export const DocsGPTWidget = ({
|
||||
</MessageBubble>
|
||||
}
|
||||
{
|
||||
query.response ? <MessageBubble type='ANSWER'>
|
||||
query.response ? <MessageBubble onMouseOver={() => { isBubbleHovered.current = true }} type='ANSWER'>
|
||||
<Message
|
||||
type='ANSWER'
|
||||
ref={(index === queries.length - 1) ? endMessageRef : null}
|
||||
>
|
||||
<div className="response" dangerouslySetInnerHTML={{ __html: sanitize(snarkdown(query.response)) }} />
|
||||
<div
|
||||
className="response"
|
||||
dangerouslySetInnerHTML={{ __html: DOMPurify.sanitize(md.render(query.response)) }}
|
||||
/>
|
||||
</Message>
|
||||
|
||||
{collectFeedback &&
|
||||
<Feedback>
|
||||
<Like
|
||||
style={{
|
||||
stroke: query.feedback == 'LIKE' ? '#8860DB' : '#c0c0c0',
|
||||
visibility: query.feedback == 'LIKE' ? 'visible' : 'hidden'
|
||||
}}
|
||||
fill='none'
|
||||
onClick={() => handleFeedback("LIKE", index)} />
|
||||
<Dislike
|
||||
style={{
|
||||
stroke: query.feedback == 'DISLIKE' ? '#ed8085' : '#c0c0c0',
|
||||
visibility: query.feedback == 'DISLIKE' ? 'visible' : 'hidden'
|
||||
}}
|
||||
fill='none'
|
||||
onClick={() => handleFeedback("DISLIKE", index)} />
|
||||
</Feedback>}
|
||||
</MessageBubble>
|
||||
: <div>
|
||||
{
|
||||
@@ -424,7 +562,7 @@ export const DocsGPTWidget = ({
|
||||
</IconWrapper>
|
||||
<div>
|
||||
<h5 style={{ margin: 2 }}>Network Error</h5>
|
||||
<span style={{ margin: 2, fontSize: '13px' }}>Something went wrong !</span>
|
||||
<span style={{ margin: 2, fontSize: '13px' }}>{query.error}</span>
|
||||
</div>
|
||||
</ErrorAlert>
|
||||
: <MessageBubble type='ANSWER'>
|
||||
@@ -437,24 +575,25 @@ export const DocsGPTWidget = ({
|
||||
}
|
||||
</div>
|
||||
}
|
||||
</Fragment>)
|
||||
</React.Fragment>)
|
||||
})
|
||||
: <Hero title={heroTitle} description={heroDescription} />
|
||||
: <Hero title={heroTitle} description={heroDescription} theme={theme} />
|
||||
}
|
||||
</Conversation>
|
||||
|
||||
<PromptContainer
|
||||
size={size}
|
||||
onSubmit={handleSubmit}>
|
||||
<StyledInput
|
||||
value={prompt} onChange={(event) => setPrompt(event.target.value)}
|
||||
type='text' placeholder="What do you want to do?" />
|
||||
<StyledButton
|
||||
disabled={prompt.length == 0 || status !== 'idle'}>
|
||||
size={size}
|
||||
disabled={prompt.trim().length == 0 || status !== 'idle'}>
|
||||
<PaperPlaneIcon width={15} height={15} color='white' />
|
||||
</StyledButton>
|
||||
</PromptContainer>
|
||||
</StyledContainer>}
|
||||
</WidgetContainer>
|
||||
</>
|
||||
</ThemeProvider>
|
||||
)
|
||||
}
|
||||
@@ -9,5 +9,11 @@
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
<script type="module" src="main.tsx"></script>
|
||||
<script type="module" src="../dist/main.js"></script>
|
||||
<script type="module">
|
||||
window.onload = function() {
|
||||
renderDocsGPTWidget('app');
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -1,6 +1,12 @@
|
||||
import { createRoot } from 'react-dom/client';
|
||||
import App from './App.tsx';
|
||||
import React from 'react';
|
||||
const root = createRoot(document.getElementById('app') as HTMLElement);
|
||||
import { createRoot } from 'react-dom/client';
|
||||
import { DocsGPTWidget } from './components/DocsGPTWidget';
|
||||
|
||||
root.render(<App />);
|
||||
if (typeof window !== 'undefined') {
|
||||
const renderWidget = (elementId: string, props = {}) => {
|
||||
const root = createRoot(document.getElementById(elementId) as HTMLElement);
|
||||
root.render(<DocsGPTWidget {...props} />);
|
||||
};
|
||||
(window as any).renderDocsGPTWidget = renderWidget;
|
||||
}
|
||||
export { DocsGPTWidget };
|
||||
@@ -1,92 +1,106 @@
|
||||
import { FEEDBACK } from "@/types";
|
||||
interface HistoryItem {
|
||||
prompt: string;
|
||||
response?: string;
|
||||
}
|
||||
prompt: string;
|
||||
response?: string;
|
||||
}
|
||||
interface FetchAnswerStreamingProps {
|
||||
question?: string;
|
||||
apiKey?: string;
|
||||
selectedDocs?: string;
|
||||
history?: HistoryItem[];
|
||||
conversationId?: string | null;
|
||||
apiHost?: string;
|
||||
onEvent?: (event: MessageEvent) => void;
|
||||
}
|
||||
question?: string;
|
||||
apiKey?: string;
|
||||
selectedDocs?: string;
|
||||
history?: HistoryItem[];
|
||||
conversationId?: string | null;
|
||||
apiHost?: string;
|
||||
onEvent?: (event: MessageEvent) => void;
|
||||
}
|
||||
interface FeedbackPayload {
|
||||
question: string;
|
||||
answer: string;
|
||||
apikey: string;
|
||||
feedback: FEEDBACK;
|
||||
}
|
||||
export function fetchAnswerStreaming({
|
||||
question = '',
|
||||
apiKey = '',
|
||||
selectedDocs = '',
|
||||
history = [],
|
||||
conversationId = null,
|
||||
apiHost = '',
|
||||
onEvent = () => {console.log("Event triggered, but no handler provided.");}
|
||||
}: FetchAnswerStreamingProps): Promise<void> {
|
||||
let docPath = 'default';
|
||||
if (selectedDocs) {
|
||||
docPath = selectedDocs;
|
||||
}
|
||||
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const body = {
|
||||
question: question,
|
||||
api_key: apiKey,
|
||||
embeddings_key: apiKey,
|
||||
active_docs: docPath,
|
||||
history: JSON.stringify(history),
|
||||
conversation_id: conversationId,
|
||||
model: 'default'
|
||||
};
|
||||
|
||||
fetch(apiHost + '/stream', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
})
|
||||
.then((response) => {
|
||||
if (!response.body) throw Error('No response body');
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder('utf-8');
|
||||
let counterrr = 0;
|
||||
const processStream = ({
|
||||
done,
|
||||
value,
|
||||
}: ReadableStreamReadResult<Uint8Array>) => {
|
||||
if (done) {
|
||||
resolve();
|
||||
return;
|
||||
question = '',
|
||||
apiKey = '',
|
||||
history = [],
|
||||
conversationId = null,
|
||||
apiHost = '',
|
||||
onEvent = () => { console.log("Event triggered, but no handler provided."); }
|
||||
}: FetchAnswerStreamingProps): Promise<void> {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const body = {
|
||||
question: question,
|
||||
history: JSON.stringify(history),
|
||||
conversation_id: conversationId,
|
||||
model: 'default',
|
||||
api_key: apiKey
|
||||
};
|
||||
fetch(apiHost + '/stream', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
})
|
||||
.then((response) => {
|
||||
if (!response.body) throw Error('No response body');
|
||||
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder('utf-8');
|
||||
let counterrr = 0;
|
||||
const processStream = ({
|
||||
done,
|
||||
value,
|
||||
}: ReadableStreamReadResult<Uint8Array>) => {
|
||||
if (done) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
counterrr += 1;
|
||||
|
||||
const chunk = decoder.decode(value);
|
||||
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (let line of lines) {
|
||||
if (line.trim() == '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
counterrr += 1;
|
||||
|
||||
const chunk = decoder.decode(value);
|
||||
|
||||
const lines = chunk.split('\n');
|
||||
|
||||
for (let line of lines) {
|
||||
if (line.trim() == '') {
|
||||
continue;
|
||||
}
|
||||
if (line.startsWith('data:')) {
|
||||
line = line.substring(5);
|
||||
}
|
||||
|
||||
const messageEvent = new MessageEvent('message', {
|
||||
data: line,
|
||||
});
|
||||
|
||||
onEvent(messageEvent); // handle each message
|
||||
if (line.startsWith('data:')) {
|
||||
line = line.substring(5);
|
||||
}
|
||||
|
||||
reader.read().then(processStream).catch(reject);
|
||||
};
|
||||
|
||||
|
||||
const messageEvent = new MessageEvent('message', {
|
||||
data: line,
|
||||
});
|
||||
|
||||
onEvent(messageEvent); // handle each message
|
||||
}
|
||||
|
||||
reader.read().then(processStream).catch(reject);
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Connection failed:', error);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
reader.read().then(processStream).catch(reject);
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Connection failed:', error);
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
export const sendFeedback = (payload: FeedbackPayload,apiHost:string): Promise<Response> => {
|
||||
return fetch(`${apiHost}/api/feedback`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
question: payload.question,
|
||||
answer: payload.answer,
|
||||
feedback: payload.feedback,
|
||||
api_key:payload.apikey
|
||||
}),
|
||||
});
|
||||
};
|
||||
27
extensions/react-widget/src/types/index.ts
Normal file
@@ -0,0 +1,27 @@
|
||||
export type MESSAGE_TYPE = 'QUESTION' | 'ANSWER' | 'ERROR';
|
||||
export type Status = 'idle' | 'loading' | 'failed';
|
||||
export type FEEDBACK = 'LIKE' | 'DISLIKE';
|
||||
export type THEME = 'light' | 'dark';
|
||||
export interface Query {
|
||||
prompt: string;
|
||||
response?: string;
|
||||
feedback?: FEEDBACK;
|
||||
error?: string;
|
||||
sources?: { title: string; text: string }[];
|
||||
conversationId?: string | null;
|
||||
title?: string | null;
|
||||
}
|
||||
export interface WidgetProps {
|
||||
apiHost?: string;
|
||||
apiKey?: string;
|
||||
avatar?: string;
|
||||
title?: string;
|
||||
description?: string;
|
||||
heroTitle?: string;
|
||||
heroDescription?: string;
|
||||
size?: 'small' | 'medium' | 'large';
|
||||
theme?:THEME,
|
||||
buttonIcon?:string;
|
||||
buttonBg?:string;
|
||||
collectFeedback?:boolean
|
||||
}
|
||||
14
extensions/web-widget/package-lock.json
generated
@@ -152,12 +152,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/braces": {
|
||||
"version": "3.0.2",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
|
||||
"integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
|
||||
"integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"fill-range": "^7.0.1"
|
||||
"fill-range": "^7.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
@@ -294,9 +294,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/fill-range": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
|
||||
"integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
|
||||
"integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"to-regex-range": "^5.0.1"
|
||||
|
||||
@@ -1,13 +1,17 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>DocsGPT 🦖</title>
|
||||
<link rel="shortcut icon" type="image/x-icon" href="/favicon.ico" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="root" class="h-screen"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover" />
|
||||
<meta name="apple-mobile-web-app-capable" content="yes">
|
||||
<title>DocsGPT 🦖</title>
|
||||
<link rel="shortcut icon" type="image/x-icon" href="/favicon.ico" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<div id="root" class="h-screen"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||