mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
Compare commits
202 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
577d58c92b | ||
|
|
899777632b | ||
|
|
bbf55ca46e | ||
|
|
3f88b04c4a | ||
|
|
f8910ba136 | ||
|
|
6c95d8b13e | ||
|
|
e6bccaaf4e | ||
|
|
3b8039a580 | ||
|
|
fae3f55010 | ||
|
|
20c877f75b | ||
|
|
8380858a82 | ||
|
|
d2358c399d | ||
|
|
c3af8a77af | ||
|
|
bc5a0b030b | ||
|
|
0b94f1717f | ||
|
|
aaa1249a41 | ||
|
|
ffaa22c49b | ||
|
|
0b78480977 | ||
|
|
6b6737613a | ||
|
|
da5d62cc1c | ||
|
|
6a68b63192 | ||
|
|
ff2e79fe7b | ||
|
|
1800e51b19 | ||
|
|
ba9c505249 | ||
|
|
bc9f1c17ed | ||
|
|
74845aed64 | ||
|
|
e49dd0cc6a | ||
|
|
27c45ae24a | ||
|
|
364a14adaf | ||
|
|
5c560b1dd5 | ||
|
|
28b8b88332 | ||
|
|
e39ef0cc9e | ||
|
|
8098d3fec8 | ||
|
|
059ffe09ea | ||
|
|
36a845c29e | ||
|
|
ce6f0dab56 | ||
|
|
f200ab10a4 | ||
|
|
3001688e0e | ||
|
|
a73774099e | ||
|
|
b28676d52c | ||
|
|
eef012b4d1 | ||
|
|
1417a1c020 | ||
|
|
962becb9a5 | ||
|
|
168648e789 | ||
|
|
7f56f57778 | ||
|
|
6cadddc2fc | ||
|
|
15fd54eac4 | ||
|
|
31350e6302 | ||
|
|
8742cdae0a | ||
|
|
4efcb388ff | ||
|
|
2d92e95c8a | ||
|
|
47e5d5684a | ||
|
|
b723e14d98 | ||
|
|
c9d24b8f42 | ||
|
|
43622e7ab1 | ||
|
|
5cfc185ba5 | ||
|
|
4be2635fbe | ||
|
|
0beafb8391 | ||
|
|
1d2654b9fa | ||
|
|
a4bc3673e7 | ||
|
|
fa080537e8 | ||
|
|
bdf67a7db7 | ||
|
|
db4cdc901c | ||
|
|
16a540b89b | ||
|
|
e00ec9ac3f | ||
|
|
fc760afdfc | ||
|
|
cb47bcdb0e | ||
|
|
8d62559ca8 | ||
|
|
dbe9c4dc18 | ||
|
|
1609b4562d | ||
|
|
b6cadb1d65 | ||
|
|
7aafac5b5e | ||
|
|
36f0aacb19 | ||
|
|
0c1a6a918d | ||
|
|
d1f5ff4dba | ||
|
|
77e6df2a1c | ||
|
|
119c037f24 | ||
|
|
97fe1abfd8 | ||
|
|
3a0163f0fb | ||
|
|
d3fab69155 | ||
|
|
9395d2c091 | ||
|
|
b9efb98280 | ||
|
|
60bb264663 | ||
|
|
316dd2f165 | ||
|
|
8a0f700563 | ||
|
|
3d0c6eafec | ||
|
|
46e055833b | ||
|
|
80dfdd1cb9 | ||
|
|
db21678b74 | ||
|
|
09c7fe0565 | ||
|
|
b6dfb2c856 | ||
|
|
ab46ba521f | ||
|
|
4a7670f2aa | ||
|
|
9ba86bc174 | ||
|
|
2ebe5e051c | ||
|
|
24e98abd15 | ||
|
|
b7f1a94ba4 | ||
|
|
70bc7465c9 | ||
|
|
65c2568427 | ||
|
|
186e7bf402 | ||
|
|
e6f1c7d0c3 | ||
|
|
87ad9a3190 | ||
|
|
0ed45f8754 | ||
|
|
116e4401c4 | ||
|
|
c3c0e643d2 | ||
|
|
d5522e7c08 | ||
|
|
658b14ba26 | ||
|
|
38f8469d0b | ||
|
|
ce6750be15 | ||
|
|
f2e600cff9 | ||
|
|
3dab4b0b1e | ||
|
|
5d47aaff29 | ||
|
|
625448fcd1 | ||
|
|
e0258cfc51 | ||
|
|
92993ee105 | ||
|
|
ce579293fb | ||
|
|
d56db14fc7 | ||
|
|
3e98f9e6bd | ||
|
|
e7bd9b6323 | ||
|
|
13699f5c02 | ||
|
|
f1f8341d25 | ||
|
|
796b4899aa | ||
|
|
7dcbed644a | ||
|
|
e6fe01876b | ||
|
|
9b75524d43 | ||
|
|
d98c876f82 | ||
|
|
be7b2fa0a4 | ||
|
|
28719e534c | ||
|
|
620a7c6db2 | ||
|
|
26450aca3a | ||
|
|
b1a3ff6cb1 | ||
|
|
ae2efc7f7b | ||
|
|
2523d039fb | ||
|
|
6dd13c6845 | ||
|
|
dbaa116fe0 | ||
|
|
476071fc1b | ||
|
|
66332ccf76 | ||
|
|
b3e9bb9ddb | ||
|
|
dff87b5fa3 | ||
|
|
7a00df65d0 | ||
|
|
60cc1d8ee5 | ||
|
|
b67ade3610 | ||
|
|
97f47f5415 | ||
|
|
ddef31ecdf | ||
|
|
fa31f1ee26 | ||
|
|
8e477c9d16 | ||
|
|
ce8f0ef9e1 | ||
|
|
4f64738f9e | ||
|
|
c4464455a1 | ||
|
|
254a6c2916 | ||
|
|
c9e1c326f5 | ||
|
|
4532b6cd8c | ||
|
|
53424a5c19 | ||
|
|
bfb47da398 | ||
|
|
cb96d90563 | ||
|
|
b6c02c850a | ||
|
|
c297e076e6 | ||
|
|
20a0800aa7 | ||
|
|
bac25112b7 | ||
|
|
1d2162705d | ||
|
|
1a1f66d2a0 | ||
|
|
a44cde33ed | ||
|
|
a9afd84787 | ||
|
|
ac0224b687 | ||
|
|
8be2992c9a | ||
|
|
e3ed23a0d4 | ||
|
|
377070e3a9 | ||
|
|
6d959051e2 | ||
|
|
0799728000 | ||
|
|
1f02f3b376 | ||
|
|
f7d7244588 | ||
|
|
352703827c | ||
|
|
5f4f55269e | ||
|
|
19e27e8403 | ||
|
|
b41b960ef0 | ||
|
|
d1cc91dd6f | ||
|
|
b0b12856d5 | ||
|
|
fdb19f8c49 | ||
|
|
b8a935ce3d | ||
|
|
ec61b80fd3 | ||
|
|
133863e601 | ||
|
|
3767b85958 | ||
|
|
a9672bc4a2 | ||
|
|
4f1e86d269 | ||
|
|
ae36ff9394 | ||
|
|
a888f38afb | ||
|
|
07e51dc8c6 | ||
|
|
986648479a | ||
|
|
533053f66f | ||
|
|
7a74c60fd1 | ||
|
|
d4d663de38 | ||
|
|
37ae24b879 | ||
|
|
787a06d06e | ||
|
|
43ca879f83 | ||
|
|
19f807b6c4 | ||
|
|
780f5893de | ||
|
|
54eea0ff04 | ||
|
|
480d91e818 | ||
|
|
820871329d | ||
|
|
f7d31fe615 | ||
|
|
8d9648391b | ||
|
|
d14438bf54 |
2
.env-template
Normal file
2
.env-template
Normal file
@@ -0,0 +1,2 @@
|
||||
OPENAI_API_KEY=<LLM api key (for example, open ai key)>
|
||||
EMBEDDINGS_KEY=<LLM embeddings api key (for example, open ai key)>
|
||||
10
.github/workflows/ci.yml
vendored
10
.github/workflows/ci.yml
vendored
@@ -9,6 +9,10 @@ on:
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -23,17 +27,17 @@ jobs:
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GHCR_TOKEN }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Runs a single command using the runners shell
|
||||
- name: Build and push Docker images to docker.io and ghcr.io
|
||||
uses: docker/build-push-action@v2
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
file: './application/Dockerfile'
|
||||
platforms: linux/amd64
|
||||
|
||||
48
.github/workflows/cife.yml
vendored
Normal file
48
.github/workflows/cife.yml
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
name: Build and push DocsGPT-FE Docker image
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v1
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Runs a single command using the runners shell
|
||||
- name: Build and push Docker images to docker.io and ghcr.io
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
file: './frontend/Dockerfile'
|
||||
platforms: linux/amd64
|
||||
context: ./frontend
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_USERNAME }}/docsgpt-fe:latest
|
||||
ghcr.io/${{ github.repository_owner }}/docsgpt-fe:latest
|
||||
17
.github/workflows/lint.yml
vendored
Normal file
17
.github/workflows/lint.yml
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
name: Python linting
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- '*'
|
||||
pull_request:
|
||||
types: [ opened, synchronize ]
|
||||
|
||||
jobs:
|
||||
ruff:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Lint with Ruff
|
||||
uses: chartboost/ruff-action@v1
|
||||
41
.github/workflows/sync_fork.yaml
vendored
Normal file
41
.github/workflows/sync_fork.yaml
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
name: Upstream Sync
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 * * * *" # every hour
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
sync_latest_from_upstream:
|
||||
name: Sync latest commits from upstream repo
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event.repository.fork }}
|
||||
|
||||
steps:
|
||||
# Step 1: run a standard checkout action
|
||||
- name: Checkout target repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
# Step 2: run the sync action
|
||||
- name: Sync upstream changes
|
||||
id: sync
|
||||
uses: aormsby/Fork-Sync-With-Upstream-action@v3.4
|
||||
with:
|
||||
# set your upstream repo and branch
|
||||
upstream_sync_repo: arc53/DocsGPT
|
||||
upstream_sync_branch: main
|
||||
target_sync_branch: main
|
||||
target_repo_token: ${{ secrets.GITHUB_TOKEN }} # automatically generated, no need to set
|
||||
|
||||
# Set test_mode true to run tests instead of the true action!!
|
||||
test_mode: false
|
||||
|
||||
- name: Sync check
|
||||
if: failure()
|
||||
run: |
|
||||
echo "::error::由于权限不足,导致同步失败(这是预期的行为),请前往仓库首页手动执行[Sync fork]。"
|
||||
echo "::error::Due to insufficient permissions, synchronization failed (as expected). Please go to the repository homepage and manually perform [Sync fork]."
|
||||
exit 1
|
||||
8
.gitignore
vendored
8
.gitignore
vendored
@@ -162,3 +162,11 @@ frontend/*.sw?
|
||||
application/vectors/
|
||||
|
||||
**/inputs
|
||||
|
||||
**/indexes
|
||||
|
||||
**/temp
|
||||
|
||||
**/yarn.lock
|
||||
|
||||
node_modules/
|
||||
2
.ruff.toml
Normal file
2
.ruff.toml
Normal file
@@ -0,0 +1,2 @@
|
||||
# Allow lines to be as long as 120 characters.
|
||||
line-length = 120
|
||||
47
README.md
47
README.md
@@ -21,6 +21,11 @@ Say goodbye to time-consuming manual searches, and let <strong>DocsGPT</strong>
|
||||
|
||||
</div>
|
||||
|
||||

|
||||
|
||||
|
||||
## Features
|
||||
|
||||

|
||||
|
||||
|
||||
@@ -29,8 +34,7 @@ Say goodbye to time-consuming manual searches, and let <strong>DocsGPT</strong>
|
||||
|
||||
You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, please don't hesitate contributing or creating issues, it helps us make DocsGPT better!
|
||||
|
||||
## Preview
|
||||

|
||||
|
||||
|
||||
## [Live preview](https://docsgpt.arc53.com/)
|
||||
|
||||
@@ -44,16 +48,45 @@ You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, pleas
|
||||
|
||||
- Scripts - script that creates similarity search index and store for other libraries.
|
||||
|
||||
- frontend - frontend in vite and
|
||||
|
||||
## QuickStart
|
||||
Please note: current vector database uses pandas Python documentation, thus responses will be related to it, if you want to use other docs please follow a guide below
|
||||
|
||||
Note: Make sure you have docker installed
|
||||
|
||||
1. Open dowload this repository with `git clone https://github.com/arc53/DocsGPT.git`
|
||||
2. Create .env file in your root directory and set your OPENAI_API_KEY with your openai api key and VITE_API_STREAMING to true or false if you dont want streaming answers
|
||||
3. Run `docker-compose build && docker-compose up`
|
||||
4. Navigate to http://localhost:5173/
|
||||
|
||||
To stop just run Ctrl + C
|
||||
|
||||
## Development environments
|
||||
|
||||
Spin up only 2 containers from docker-compose.yaml (by deleting all services except for redis and mongo)
|
||||
|
||||
Make sure you have python 3.10 or 3.11 installed
|
||||
|
||||
1. Navigate to `/application` folder
|
||||
2. Install dependencies
|
||||
2. Run `docker-compose -f docker-compose-dev.yaml build && docker-compose -f docker-compose-dev.yaml up -d`
|
||||
3. Export required variables
|
||||
`export CELERY_BROKER_URL=redis://localhost:6379/0`
|
||||
`export CELERY_RESULT_BACKEND=redis://localhost:6379/1`
|
||||
`export MONGO_URI=mongodb://localhost:27017/docsgpt`
|
||||
4. Install dependencies
|
||||
`pip install -r requirements.txt`
|
||||
3. Prepare .env file
|
||||
5. Prepare .env file
|
||||
Copy .env_sample and create .env with your openai api token
|
||||
4. Run the app
|
||||
`python app.py`
|
||||
6. Run the app
|
||||
`python wsgi.py`
|
||||
7. Start worker with `celery -A app.celery worker -l INFO`
|
||||
|
||||
To start frontend
|
||||
1. Navigate to `/frontend` folder
|
||||
2. Install dependencies
|
||||
`npm install`
|
||||
3. Run the app
|
||||
4. `npm run dev`
|
||||
|
||||
|
||||
[How to install the Chrome extension](https://github.com/arc53/docsgpt/wiki#launch-chrome-extension)
|
||||
|
||||
@@ -1 +1,6 @@
|
||||
OPENAI_API_KEY=your_api_key
|
||||
API_KEY=your_api_key
|
||||
EMBEDDINGS_KEY=your_api_key
|
||||
CELERY_BROKER_URL=redis://localhost:6379/0
|
||||
CELERY_RESULT_BACKEND=redis://localhost:6379/1
|
||||
MONGO_URI=mongodb://localhost:27017/docsgpt
|
||||
API_URL=http://localhost:5001
|
||||
@@ -4,17 +4,22 @@ FROM python:3.10-slim-bullseye as builder
|
||||
RUN apt-get update && apt-get install -y gcc curl
|
||||
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN pip install --upgrade pip && pip install tiktoken==0.1.2
|
||||
RUN pip install --upgrade pip && pip install tiktoken==0.3.3
|
||||
COPY requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
|
||||
FROM python:3.10-slim-bullseye
|
||||
# Copy pre-built packages from builder stage
|
||||
COPY --from=builder /usr/local/lib/python3.10/site-packages/ /usr/local/lib/python3.10/site-packages/
|
||||
RUN pip install gunicorn==20.1.0
|
||||
RUN pip install celery==5.2.7
|
||||
WORKDIR /app
|
||||
COPY . /app
|
||||
ENV FLASK_APP=app.py
|
||||
ENV FLASK_DEBUG=true
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
|
||||
EXPOSE 5001
|
||||
|
||||
CMD ["gunicorn", "-w", "6", "--bind", "0.0.0.0:5001", "wsgi:app"]
|
||||
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:5001", "wsgi:app"]
|
||||
|
||||
@@ -1,32 +1,42 @@
|
||||
import os
|
||||
import asyncio
|
||||
import datetime
|
||||
import http.client
|
||||
import json
|
||||
import os
|
||||
import traceback
|
||||
|
||||
import openai
|
||||
import dotenv
|
||||
import requests
|
||||
from flask import Flask, request, render_template
|
||||
from celery import Celery
|
||||
from celery.result import AsyncResult
|
||||
from flask import Flask, request, render_template, send_from_directory, jsonify, Response
|
||||
from langchain import FAISS
|
||||
from langchain.llms import OpenAIChat
|
||||
from langchain import VectorDBQA, HuggingFaceHub, Cohere, OpenAI
|
||||
from langchain.chains import LLMChain, ConversationalRetrievalChain
|
||||
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, \
|
||||
HuggingFaceInstructEmbeddings
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
AIMessagePromptTemplate,
|
||||
)
|
||||
from pymongo import MongoClient
|
||||
from werkzeug.utils import secure_filename
|
||||
from langchain.llms import GPT4All
|
||||
|
||||
from core.settings import settings
|
||||
from error import bad_request
|
||||
from worker import ingest_worker
|
||||
|
||||
os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||
|
||||
if os.getenv("LLM_NAME") is not None:
|
||||
llm_choice = os.getenv("LLM_NAME")
|
||||
else:
|
||||
llm_choice = "openai"
|
||||
|
||||
if os.getenv("EMBEDDINGS_NAME") is not None:
|
||||
embeddings_choice = os.getenv("EMBEDDINGS_NAME")
|
||||
else:
|
||||
embeddings_choice = "openai_text-embedding-ada-002"
|
||||
|
||||
if llm_choice == "manifest":
|
||||
if settings.LLM_NAME == "manifest":
|
||||
from manifest import Manifest
|
||||
from langchain.llms.manifest import ManifestWrapper
|
||||
|
||||
@@ -47,31 +57,157 @@ if platform.system() == "Windows":
|
||||
# loading the .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
with open("combine_prompt.txt", "r") as f:
|
||||
# load the prompts
|
||||
with open("prompts/combine_prompt.txt", "r") as f:
|
||||
template = f.read()
|
||||
|
||||
with open("combine_prompt_hist.txt", "r") as f:
|
||||
with open("prompts/combine_prompt_hist.txt", "r") as f:
|
||||
template_hist = f.read()
|
||||
|
||||
with open("question_prompt.txt", "r") as f:
|
||||
with open("prompts/question_prompt.txt", "r") as f:
|
||||
template_quest = f.read()
|
||||
|
||||
if os.getenv("API_KEY") is not None:
|
||||
with open("prompts/chat_combine_prompt.txt", "r") as f:
|
||||
chat_combine_template = f.read()
|
||||
|
||||
with open("prompts/chat_reduce_prompt.txt", "r") as f:
|
||||
chat_reduce_template = f.read()
|
||||
|
||||
if settings.API_KEY is not None:
|
||||
api_key_set = True
|
||||
else:
|
||||
api_key_set = False
|
||||
if os.getenv("EMBEDDINGS_KEY") is not None:
|
||||
if settings.EMBEDDINGS_KEY is not None:
|
||||
embeddings_key_set = True
|
||||
else:
|
||||
embeddings_key_set = False
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER = "inputs"
|
||||
app.config['CELERY_BROKER_URL'] = settings.CELERY_BROKER_URL
|
||||
app.config['CELERY_RESULT_BACKEND'] = settings.CELERY_RESULT_BACKEND
|
||||
app.config['MONGO_URI'] = settings.MONGO_URI
|
||||
celery = Celery()
|
||||
celery.config_from_object('celeryconfig')
|
||||
mongo = MongoClient(app.config['MONGO_URI'])
|
||||
db = mongo["docsgpt"]
|
||||
vectors_collection = db["vectors"]
|
||||
|
||||
|
||||
async def async_generate(chain, question, chat_history):
|
||||
result = await chain.arun({"question": question, "chat_history": chat_history})
|
||||
return result
|
||||
|
||||
|
||||
def run_async_chain(chain, question, chat_history):
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
result = {}
|
||||
try:
|
||||
answer = loop.run_until_complete(async_generate(chain, question, chat_history))
|
||||
finally:
|
||||
loop.close()
|
||||
result["answer"] = answer
|
||||
return result
|
||||
|
||||
|
||||
def get_vectorstore(data):
|
||||
if "active_docs" in data:
|
||||
if data["active_docs"].split("/")[0] == "local":
|
||||
if data["active_docs"].split("/")[1] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = "indexes/" + data["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + data["active_docs"]
|
||||
if data['active_docs'] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
return vectorstore
|
||||
|
||||
def get_docsearch(vectorstore, embeddings_key):
|
||||
if settings.EMBEDDINGS_NAME == "openai_text-embedding-ada-002":
|
||||
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
|
||||
elif settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
|
||||
elif settings.EMBEDDINGS_NAME == "huggingface_hkunlp/instructor-large":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
|
||||
elif settings.EMBEDDINGS_NAME == "cohere_medium":
|
||||
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
|
||||
return docsearch
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest(self, directory, formats, name_job, filename, user):
|
||||
resp = ingest_worker(self, directory, formats, name_job, filename, user)
|
||||
return resp
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def home():
|
||||
return render_template("index.html", api_key_set=api_key_set, llm_choice=llm_choice,
|
||||
embeddings_choice=embeddings_choice)
|
||||
return render_template("index.html", api_key_set=api_key_set, llm_choice=settings.LLM_NAME,
|
||||
embeddings_choice=settings.EMBEDDINGS_NAME)
|
||||
|
||||
def complete_stream(question, docsearch, chat_history, api_key):
|
||||
openai.api_key = api_key
|
||||
llm = ChatOpenAI(openai_api_key=api_key)
|
||||
docs = docsearch.similarity_search(question, k=2)
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc.page_content for doc in docs])
|
||||
p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
if len(chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
chat_history.reverse()
|
||||
for i in chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
|
||||
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append({"role": "system", "content": i["response"]})
|
||||
messages_combine.append({"role": "user", "content": question})
|
||||
completion = openai.ChatCompletion.create(model="gpt-3.5-turbo",
|
||||
messages=messages_combine, stream=True, max_tokens=500, temperature=0)
|
||||
|
||||
for line in completion:
|
||||
if 'content' in line['choices'][0]['delta']:
|
||||
# check if the delta contains content
|
||||
data = json.dumps({"answer": str(line['choices'][0]['delta']['content'])})
|
||||
yield f"data: {data}\n\n"
|
||||
# send data.type = "end" to indicate that the stream has ended as json
|
||||
data = json.dumps({"type": "end"})
|
||||
yield f"data: {data}\n\n"
|
||||
@app.route("/stream", methods=['POST', 'GET'])
|
||||
def stream():
|
||||
# get parameter from url question
|
||||
question = request.args.get('question')
|
||||
history = request.args.get('history')
|
||||
# history to json object from string
|
||||
history = json.loads(history)
|
||||
|
||||
# check if active_docs is set
|
||||
|
||||
if not api_key_set:
|
||||
api_key = request.args.get("api_key")
|
||||
else:
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = request.args.get("embeddings_key")
|
||||
else:
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
if "active_docs" in request.args:
|
||||
vectorstore = get_vectorstore({"active_docs": request.args.get("active_docs")})
|
||||
else:
|
||||
vectorstore = ""
|
||||
docsearch = get_docsearch(vectorstore, embeddings_key)
|
||||
|
||||
|
||||
#question = "Hi"
|
||||
return Response(complete_stream(question, docsearch,
|
||||
chat_history= history, api_key=api_key), mimetype='text/event-stream')
|
||||
|
||||
|
||||
@app.route("/api/answer", methods=["POST"])
|
||||
@@ -83,70 +219,93 @@ def api_answer():
|
||||
if not api_key_set:
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
api_key = os.getenv("API_KEY")
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = os.getenv("EMBEDDINGS_KEY")
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
|
||||
# use try and except to check for exception
|
||||
try:
|
||||
# check if the vectorstore is set
|
||||
if "active_docs" in data:
|
||||
vectorstore = "vectors/" + data["active_docs"]
|
||||
if data['active_docs'] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
#vectorstore = "outputs/inputs/"
|
||||
vectorstore = get_vectorstore(data)
|
||||
# loading the index and the store and the prompt template
|
||||
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
|
||||
if embeddings_choice == "openai_text-embedding-ada-002":
|
||||
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
|
||||
elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
|
||||
elif embeddings_choice == "huggingface_hkunlp/instructor-large":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
|
||||
elif embeddings_choice == "cohere_medium":
|
||||
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
|
||||
|
||||
# create a prompt template
|
||||
if history:
|
||||
history = json.loads(history)
|
||||
template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}",
|
||||
history[1])
|
||||
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp,
|
||||
template_format="jinja2")
|
||||
else:
|
||||
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template,
|
||||
template_format="jinja2")
|
||||
docsearch = get_docsearch(vectorstore, embeddings_key)
|
||||
|
||||
q_prompt = PromptTemplate(input_variables=["context", "question"], template=template_quest,
|
||||
template_format="jinja2")
|
||||
if llm_choice == "openai":
|
||||
llm = OpenAIChat(openai_api_key=api_key, temperature=0)
|
||||
#llm = OpenAI(openai_api_key=api_key, temperature=0)
|
||||
elif llm_choice == "manifest":
|
||||
if settings.LLM_NAME == "openai_chat":
|
||||
llm = ChatOpenAI(openai_api_key=api_key) # optional parameter: model_name="gpt-4"
|
||||
messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)]
|
||||
if history:
|
||||
tokens_current_history = 0
|
||||
#count tokens in history
|
||||
history.reverse()
|
||||
for i in history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
|
||||
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append(HumanMessagePromptTemplate.from_template(i["prompt"]))
|
||||
messages_combine.append(AIMessagePromptTemplate.from_template(i["response"]))
|
||||
messages_combine.append(HumanMessagePromptTemplate.from_template("{question}"))
|
||||
import sys
|
||||
print(messages_combine, file=sys.stderr)
|
||||
p_chat_combine = ChatPromptTemplate.from_messages(messages_combine)
|
||||
elif settings.LLM_NAME == "openai":
|
||||
llm = OpenAI(openai_api_key=api_key, temperature=0)
|
||||
elif settings.LLM_NAME == "manifest":
|
||||
llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
|
||||
elif llm_choice == "huggingface":
|
||||
elif settings.LLM_NAME == "huggingface":
|
||||
llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
|
||||
elif llm_choice == "cohere":
|
||||
elif settings.LLM_NAME == "cohere":
|
||||
llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
|
||||
elif settings.LLM_NAME == "gpt4all":
|
||||
llm = GPT4All(model=settings.MODEL_PATH)
|
||||
else:
|
||||
raise ValueError("unknown LLM model")
|
||||
|
||||
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
|
||||
combine_prompt=c_prompt, question_prompt=q_prompt)
|
||||
if settings.LLM_NAME == "openai_chat":
|
||||
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
|
||||
chain = ConversationalRetrievalChain(
|
||||
retriever=docsearch.as_retriever(k=2),
|
||||
question_generator=question_generator,
|
||||
combine_docs_chain=doc_chain,
|
||||
)
|
||||
chat_history = []
|
||||
# result = chain({"question": question, "chat_history": chat_history})
|
||||
# generate async with async generate method
|
||||
result = run_async_chain(chain, question, chat_history)
|
||||
elif settings.LLM_NAME == "gpt4all":
|
||||
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
|
||||
chain = ConversationalRetrievalChain(
|
||||
retriever=docsearch.as_retriever(k=2),
|
||||
question_generator=question_generator,
|
||||
combine_docs_chain=doc_chain,
|
||||
)
|
||||
chat_history = []
|
||||
# result = chain({"question": question, "chat_history": chat_history})
|
||||
# generate async with async generate method
|
||||
result = run_async_chain(chain, question, chat_history)
|
||||
|
||||
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=10)
|
||||
else:
|
||||
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
|
||||
combine_prompt=chat_combine_template, question_prompt=q_prompt)
|
||||
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=3)
|
||||
result = chain({"query": question})
|
||||
|
||||
# fetch the answer
|
||||
result = chain({"query": question})
|
||||
print(result)
|
||||
|
||||
# some formatting for the frontend
|
||||
result['answer'] = result['result']
|
||||
if "result" in result:
|
||||
result['answer'] = result['result']
|
||||
result['answer'] = result['answer'].replace("\\n", "\n")
|
||||
try:
|
||||
result['answer'] = result['answer'].split("SOURCES:")[0]
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# mock result
|
||||
@@ -166,6 +325,9 @@ def api_answer():
|
||||
def check_docs():
|
||||
# check if docs exist in a vectorstore folder
|
||||
data = request.get_json()
|
||||
# split docs on / and take first part
|
||||
if data["docs"].split("/")[0] == "local":
|
||||
return {"status": 'exists'}
|
||||
vectorstore = "vectors/" + data["docs"]
|
||||
base_path = 'https://raw.githubusercontent.com/arc53/DocsHUB/main/'
|
||||
if os.path.exists(vectorstore) or data["docs"] == "default":
|
||||
@@ -189,12 +351,194 @@ def check_docs():
|
||||
return {"status": 'loaded'}
|
||||
|
||||
|
||||
@app.route("/api/feedback", methods=["POST"])
|
||||
def api_feedback():
|
||||
data = request.get_json()
|
||||
question = data["question"]
|
||||
answer = data["answer"]
|
||||
feedback = data["feedback"]
|
||||
|
||||
print('-' * 5)
|
||||
print("Question: " + question)
|
||||
print("Answer: " + answer)
|
||||
print("Feedback: " + feedback)
|
||||
print('-' * 5)
|
||||
response = requests.post(
|
||||
url="https://86x89umx77.execute-api.eu-west-2.amazonaws.com/docsgpt-feedback",
|
||||
headers={
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
},
|
||||
data=json.dumps({
|
||||
"answer": answer,
|
||||
"question": question,
|
||||
"feedback": feedback
|
||||
})
|
||||
)
|
||||
return {"status": http.client.responses.get(response.status_code, 'ok')}
|
||||
|
||||
|
||||
@app.route('/api/combine', methods=['GET'])
|
||||
def combined_json():
|
||||
user = 'local'
|
||||
"""Provide json file with combined available indexes."""
|
||||
# get json from https://d3dg1063dc54p9.cloudfront.net/combined.json
|
||||
|
||||
data = [{
|
||||
"name": 'default',
|
||||
"language": 'default',
|
||||
"version": '',
|
||||
"description": 'default',
|
||||
"fullName": 'default',
|
||||
"date": 'default',
|
||||
"docLink": 'default',
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local"
|
||||
}]
|
||||
# structure: name, language, version, description, fullName, date, docLink
|
||||
# append data from vectors_collection
|
||||
for index in vectors_collection.find({'user': user}):
|
||||
data.append({
|
||||
"name": index['name'],
|
||||
"language": index['language'],
|
||||
"version": '',
|
||||
"description": index['name'],
|
||||
"fullName": index['name'],
|
||||
"date": index['date'],
|
||||
"docLink": index['location'],
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local"
|
||||
})
|
||||
|
||||
data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
|
||||
for index in data_remote:
|
||||
index['location'] = "remote"
|
||||
data.append(index)
|
||||
|
||||
return jsonify(data)
|
||||
|
||||
|
||||
@app.route('/api/upload', methods=['POST'])
|
||||
def upload_file():
|
||||
"""Upload a file to get vectorized and indexed."""
|
||||
if 'user' not in request.form:
|
||||
return {"status": 'no user'}
|
||||
user = secure_filename(request.form['user'])
|
||||
if 'name' not in request.form:
|
||||
return {"status": 'no name'}
|
||||
job_name = secure_filename(request.form['name'])
|
||||
# check if the post request has the file part
|
||||
if 'file' not in request.files:
|
||||
print('No file part')
|
||||
return {"status": 'no file'}
|
||||
file = request.files['file']
|
||||
if file.filename == '':
|
||||
return {"status": 'no file name'}
|
||||
|
||||
if file:
|
||||
filename = secure_filename(file.filename)
|
||||
# save dir
|
||||
save_dir = os.path.join(app.config['UPLOAD_FOLDER'], user, job_name)
|
||||
# create dir if not exists
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
|
||||
file.save(os.path.join(save_dir, filename))
|
||||
task = ingest.delay('temp', [".rst", ".md", ".pdf", ".txt"], job_name, filename, user)
|
||||
# task id
|
||||
task_id = task.id
|
||||
return {"status": 'ok', "task_id": task_id}
|
||||
else:
|
||||
return {"status": 'error'}
|
||||
|
||||
|
||||
@app.route('/api/task_status', methods=['GET'])
|
||||
def task_status():
|
||||
"""Get celery job status."""
|
||||
task_id = request.args.get('task_id')
|
||||
task = AsyncResult(task_id)
|
||||
task_meta = task.info
|
||||
return {"status": task.status, "result": task_meta}
|
||||
|
||||
|
||||
### Backgound task api
|
||||
@app.route('/api/upload_index', methods=['POST'])
|
||||
def upload_index_files():
|
||||
"""Upload two files(index.faiss, index.pkl) to the user's folder."""
|
||||
if 'user' not in request.form:
|
||||
return {"status": 'no user'}
|
||||
user = secure_filename(request.form['user'])
|
||||
if 'name' not in request.form:
|
||||
return {"status": 'no name'}
|
||||
job_name = secure_filename(request.form['name'])
|
||||
if 'file_faiss' not in request.files:
|
||||
print('No file part')
|
||||
return {"status": 'no file'}
|
||||
file_faiss = request.files['file_faiss']
|
||||
if file_faiss.filename == '':
|
||||
return {"status": 'no file name'}
|
||||
if 'file_pkl' not in request.files:
|
||||
print('No file part')
|
||||
return {"status": 'no file'}
|
||||
file_pkl = request.files['file_pkl']
|
||||
if file_pkl.filename == '':
|
||||
return {"status": 'no file name'}
|
||||
|
||||
# saves index files
|
||||
save_dir = os.path.join('indexes', user, job_name)
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
file_faiss.save(os.path.join(save_dir, 'index.faiss'))
|
||||
file_pkl.save(os.path.join(save_dir, 'index.pkl'))
|
||||
# create entry in vectors_collection
|
||||
vectors_collection.insert_one({
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"location": save_dir,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": "local"
|
||||
})
|
||||
return {"status": 'ok'}
|
||||
|
||||
|
||||
@app.route('/api/download', methods=['get'])
|
||||
def download_file():
|
||||
user = secure_filename(request.args.get('user'))
|
||||
job_name = secure_filename(request.args.get('name'))
|
||||
filename = secure_filename(request.args.get('file'))
|
||||
save_dir = os.path.join(app.config['UPLOAD_FOLDER'], user, job_name)
|
||||
return send_from_directory(save_dir, filename, as_attachment=True)
|
||||
|
||||
|
||||
@app.route('/api/delete_old', methods=['get'])
|
||||
def delete_old():
|
||||
"""Delete old indexes."""
|
||||
import shutil
|
||||
path = request.args.get('path')
|
||||
dirs = path.split('/')
|
||||
dirs_clean = []
|
||||
for i in range(1, len(dirs)):
|
||||
dirs_clean.append(secure_filename(dirs[i]))
|
||||
# check that path strats with indexes or vectors
|
||||
if dirs[0] not in ['indexes', 'vectors']:
|
||||
return {"status": 'error'}
|
||||
path_clean = '/'.join(dirs)
|
||||
vectors_collection.delete_one({'location': path})
|
||||
try:
|
||||
shutil.rmtree(path_clean)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return {"status": 'ok'}
|
||||
|
||||
|
||||
# handling CORS
|
||||
@app.after_request
|
||||
def after_request(response):
|
||||
response.headers.add('Access-Control-Allow-Origin', '*')
|
||||
response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
|
||||
response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
|
||||
response.headers.add('Access-Control-Allow-Credentials', 'true')
|
||||
return response
|
||||
|
||||
|
||||
|
||||
8
application/celeryconfig.py
Normal file
8
application/celeryconfig.py
Normal file
@@ -0,0 +1,8 @@
|
||||
import os
|
||||
|
||||
broker_url = os.getenv("CELERY_BROKER_URL")
|
||||
result_backend = os.getenv("CELERY_RESULT_BACKEND")
|
||||
|
||||
task_serializer = 'json'
|
||||
result_serializer = 'json'
|
||||
accept_content = ['json']
|
||||
0
application/core/__init__.py
Normal file
0
application/core/__init__.py
Normal file
22
application/core/settings.py
Normal file
22
application/core/settings.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import BaseSettings
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
LLM_NAME: str = "openai_chat"
|
||||
EMBEDDINGS_NAME: str = "openai_text-embedding-ada-002"
|
||||
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
|
||||
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
|
||||
MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
|
||||
MODEL_PATH: str = "./models/gpt4all-model.bin"
|
||||
TOKENS_MAX_HISTORY: int = 150
|
||||
|
||||
API_URL: str = "http://localhost:5001" # backend url for celery worker
|
||||
|
||||
API_KEY: str = None # LLM api key
|
||||
EMBEDDINGS_KEY: str = None # api key for embeddings (if using openai, just copy API_KEY
|
||||
|
||||
|
||||
path = Path(__file__).parent.parent.absolute()
|
||||
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
|
||||
@@ -1,13 +1,15 @@
|
||||
from flask import jsonify
|
||||
from werkzeug.http import HTTP_STATUS_CODES
|
||||
|
||||
def response_error(code_status,message=None):
|
||||
payload = {'error':HTTP_STATUS_CODES.get(code_status,"something went wrong")}
|
||||
|
||||
def response_error(code_status, message=None):
|
||||
payload = {'error': HTTP_STATUS_CODES.get(code_status, "something went wrong")}
|
||||
if message:
|
||||
payload['message'] = message
|
||||
response = jsonify(payload)
|
||||
response.status_code = code_status
|
||||
return response
|
||||
|
||||
def bad_request(status_code=400,message=''):
|
||||
return response_error(code_status=status_code,message=message)
|
||||
|
||||
def bad_request(status_code=400, message=''):
|
||||
return response_error(code_status=status_code, message=message)
|
||||
|
||||
Binary file not shown.
Binary file not shown.
1
application/parser/__init__.py
Normal file
1
application/parser/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
19
application/parser/file/base.py
Normal file
19
application/parser/file/base.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""Base reader class."""
|
||||
from abc import abstractmethod
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
from parser.schema.base import Document
|
||||
|
||||
|
||||
class BaseReader:
|
||||
"""Utilities for loading data from a directory."""
|
||||
|
||||
@abstractmethod
|
||||
def load_data(self, *args: Any, **load_kwargs: Any) -> List[Document]:
|
||||
"""Load data from the input directory."""
|
||||
|
||||
def load_langchain_documents(self, **load_kwargs: Any) -> List[LCDocument]:
|
||||
"""Load data in LangChain document format."""
|
||||
docs = self.load_data(**load_kwargs)
|
||||
return [d.to_langchain_format() for d in docs]
|
||||
38
application/parser/file/base_parser.py
Normal file
38
application/parser/file/base_parser.py
Normal file
@@ -0,0 +1,38 @@
|
||||
"""Base parser and config class."""
|
||||
|
||||
from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
|
||||
class BaseParser:
|
||||
"""Base class for all parsers."""
|
||||
|
||||
def __init__(self, parser_config: Optional[Dict] = None):
|
||||
"""Init params."""
|
||||
self._parser_config = parser_config
|
||||
|
||||
def init_parser(self) -> None:
|
||||
"""Init parser and store it."""
|
||||
parser_config = self._init_parser()
|
||||
self._parser_config = parser_config
|
||||
|
||||
@property
|
||||
def parser_config_set(self) -> bool:
|
||||
"""Check if parser config is set."""
|
||||
return self._parser_config is not None
|
||||
|
||||
@property
|
||||
def parser_config(self) -> Dict:
|
||||
"""Check if parser config is set."""
|
||||
if self._parser_config is None:
|
||||
raise ValueError("Parser config not set.")
|
||||
return self._parser_config
|
||||
|
||||
@abstractmethod
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Initialize the parser with the config."""
|
||||
|
||||
@abstractmethod
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
|
||||
"""Parse file."""
|
||||
163
application/parser/file/bulk.py
Normal file
163
application/parser/file/bulk.py
Normal file
@@ -0,0 +1,163 @@
|
||||
"""Simple reader that reads files of different formats from a directory."""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
from parser.file.base import BaseReader
|
||||
from parser.file.base_parser import BaseParser
|
||||
from parser.file.docs_parser import DocxParser, PDFParser
|
||||
from parser.file.epub_parser import EpubParser
|
||||
from parser.file.html_parser import HTMLParser
|
||||
from parser.file.markdown_parser import MarkdownParser
|
||||
from parser.file.rst_parser import RstParser
|
||||
from parser.file.tabular_parser import PandasCSVParser
|
||||
from parser.schema.base import Document
|
||||
|
||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".pdf": PDFParser(),
|
||||
".docx": DocxParser(),
|
||||
".csv": PandasCSVParser(),
|
||||
".epub": EpubParser(),
|
||||
".md": MarkdownParser(),
|
||||
".rst": RstParser(),
|
||||
".html": HTMLParser(),
|
||||
".mdx": MarkdownParser(),
|
||||
}
|
||||
|
||||
|
||||
class SimpleDirectoryReader(BaseReader):
|
||||
"""Simple directory reader.
|
||||
|
||||
Can read files into separate documents, or concatenates
|
||||
files into one document text.
|
||||
|
||||
Args:
|
||||
input_dir (str): Path to the directory.
|
||||
input_files (List): List of file paths to read (Optional; overrides input_dir)
|
||||
exclude_hidden (bool): Whether to exclude hidden files (dotfiles).
|
||||
errors (str): how encoding and decoding errors are to be handled,
|
||||
see https://docs.python.org/3/library/functions.html#open
|
||||
recursive (bool): Whether to recursively search in subdirectories.
|
||||
False by default.
|
||||
required_exts (Optional[List[str]]): List of required extensions.
|
||||
Default is None.
|
||||
file_extractor (Optional[Dict[str, BaseParser]]): A mapping of file
|
||||
extension to a BaseParser class that specifies how to convert that file
|
||||
to text. See DEFAULT_FILE_EXTRACTOR.
|
||||
num_files_limit (Optional[int]): Maximum number of files to read.
|
||||
Default is None.
|
||||
file_metadata (Optional[Callable[str, Dict]]): A function that takes
|
||||
in a filename and returns a Dict of metadata for the Document.
|
||||
Default is None.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_dir: Optional[str] = None,
|
||||
input_files: Optional[List] = None,
|
||||
exclude_hidden: bool = True,
|
||||
errors: str = "ignore",
|
||||
recursive: bool = True,
|
||||
required_exts: Optional[List[str]] = None,
|
||||
file_extractor: Optional[Dict[str, BaseParser]] = None,
|
||||
num_files_limit: Optional[int] = None,
|
||||
file_metadata: Optional[Callable[[str], Dict]] = None,
|
||||
chunk_size_max: int = 2048,
|
||||
) -> None:
|
||||
"""Initialize with parameters."""
|
||||
super().__init__()
|
||||
|
||||
if not input_dir and not input_files:
|
||||
raise ValueError("Must provide either `input_dir` or `input_files`.")
|
||||
|
||||
self.errors = errors
|
||||
|
||||
self.recursive = recursive
|
||||
self.exclude_hidden = exclude_hidden
|
||||
self.required_exts = required_exts
|
||||
self.num_files_limit = num_files_limit
|
||||
|
||||
if input_files:
|
||||
self.input_files = []
|
||||
for path in input_files:
|
||||
print(path)
|
||||
input_file = Path(path)
|
||||
self.input_files.append(input_file)
|
||||
elif input_dir:
|
||||
self.input_dir = Path(input_dir)
|
||||
self.input_files = self._add_files(self.input_dir)
|
||||
|
||||
self.file_extractor = file_extractor or DEFAULT_FILE_EXTRACTOR
|
||||
self.file_metadata = file_metadata
|
||||
|
||||
def _add_files(self, input_dir: Path) -> List[Path]:
|
||||
"""Add files."""
|
||||
input_files = sorted(input_dir.iterdir())
|
||||
new_input_files = []
|
||||
dirs_to_explore = []
|
||||
for input_file in input_files:
|
||||
if input_file.is_dir():
|
||||
if self.recursive:
|
||||
dirs_to_explore.append(input_file)
|
||||
elif self.exclude_hidden and input_file.name.startswith("."):
|
||||
continue
|
||||
elif (
|
||||
self.required_exts is not None
|
||||
and input_file.suffix not in self.required_exts
|
||||
):
|
||||
continue
|
||||
else:
|
||||
new_input_files.append(input_file)
|
||||
|
||||
for dir_to_explore in dirs_to_explore:
|
||||
sub_input_files = self._add_files(dir_to_explore)
|
||||
new_input_files.extend(sub_input_files)
|
||||
|
||||
if self.num_files_limit is not None and self.num_files_limit > 0:
|
||||
new_input_files = new_input_files[0: self.num_files_limit]
|
||||
|
||||
# print total number of files added
|
||||
logging.debug(
|
||||
f"> [SimpleDirectoryReader] Total files added: {len(new_input_files)}"
|
||||
)
|
||||
|
||||
return new_input_files
|
||||
|
||||
def load_data(self, concatenate: bool = False) -> List[Document]:
|
||||
"""Load data from the input directory.
|
||||
|
||||
Args:
|
||||
concatenate (bool): whether to concatenate all files into one document.
|
||||
If set to True, file metadata is ignored.
|
||||
False by default.
|
||||
|
||||
Returns:
|
||||
List[Document]: A list of documents.
|
||||
|
||||
"""
|
||||
data: Union[str, List[str]] = ""
|
||||
data_list: List[str] = []
|
||||
metadata_list = []
|
||||
for input_file in self.input_files:
|
||||
if input_file.suffix in self.file_extractor:
|
||||
parser = self.file_extractor[input_file.suffix]
|
||||
if not parser.parser_config_set:
|
||||
parser.init_parser()
|
||||
data = parser.parse_file(input_file, errors=self.errors)
|
||||
else:
|
||||
# do standard read
|
||||
with open(input_file, "r", errors=self.errors) as f:
|
||||
data = f.read()
|
||||
if isinstance(data, List):
|
||||
data_list.extend(data)
|
||||
else:
|
||||
data_list.append(str(data))
|
||||
if self.file_metadata is not None:
|
||||
metadata_list.append(self.file_metadata(str(input_file)))
|
||||
|
||||
if concatenate:
|
||||
return [Document("\n".join(data_list))]
|
||||
elif self.file_metadata is not None:
|
||||
return [Document(d, extra_info=m) for d, m in zip(data_list, metadata_list)]
|
||||
else:
|
||||
return [Document(d) for d in data_list]
|
||||
59
application/parser/file/docs_parser.py
Normal file
59
application/parser/file/docs_parser.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""Docs parser.
|
||||
|
||||
Contains parsers for docx, pdf files.
|
||||
|
||||
"""
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class PDFParser(BaseParser):
|
||||
"""PDF parser."""
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> str:
|
||||
"""Parse file."""
|
||||
try:
|
||||
import PyPDF2
|
||||
except ImportError:
|
||||
raise ValueError("PyPDF2 is required to read PDF files.")
|
||||
text_list = []
|
||||
with open(file, "rb") as fp:
|
||||
# Create a PDF object
|
||||
pdf = PyPDF2.PdfReader(fp)
|
||||
|
||||
# Get the number of pages in the PDF document
|
||||
num_pages = len(pdf.pages)
|
||||
|
||||
# Iterate over every page
|
||||
for page in range(num_pages):
|
||||
# Extract the text from the page
|
||||
page_text = pdf.pages[page].extract_text()
|
||||
text_list.append(page_text)
|
||||
text = "\n".join(text_list)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
class DocxParser(BaseParser):
|
||||
"""Docx parser."""
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> str:
|
||||
"""Parse file."""
|
||||
try:
|
||||
import docx2txt
|
||||
except ImportError:
|
||||
raise ValueError("docx2txt is required to read Microsoft Word files.")
|
||||
|
||||
text = docx2txt.process(file)
|
||||
|
||||
return text
|
||||
43
application/parser/file/epub_parser.py
Normal file
43
application/parser/file/epub_parser.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""Epub parser.
|
||||
|
||||
Contains parsers for epub files.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class EpubParser(BaseParser):
|
||||
"""Epub Parser."""
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> str:
|
||||
"""Parse file."""
|
||||
try:
|
||||
import ebooklib
|
||||
from ebooklib import epub
|
||||
except ImportError:
|
||||
raise ValueError("`EbookLib` is required to read Epub files.")
|
||||
try:
|
||||
import html2text
|
||||
except ImportError:
|
||||
raise ValueError("`html2text` is required to parse Epub files.")
|
||||
|
||||
text_list = []
|
||||
book = epub.read_epub(file, options={"ignore_ncx": True})
|
||||
|
||||
# Iterate through all chapters.
|
||||
for item in book.get_items():
|
||||
# Chapters are typically located in epub documents items.
|
||||
if item.get_type() == ebooklib.ITEM_DOCUMENT:
|
||||
text_list.append(
|
||||
html2text.html2text(item.get_content().decode("utf-8"))
|
||||
)
|
||||
|
||||
text = "\n".join(text_list)
|
||||
return text
|
||||
83
application/parser/file/html_parser.py
Normal file
83
application/parser/file/html_parser.py
Normal file
@@ -0,0 +1,83 @@
|
||||
"""HTML parser.
|
||||
|
||||
Contains parser for html files.
|
||||
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class HTMLParser(BaseParser):
|
||||
"""HTML parser."""
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, list[str]]:
|
||||
"""Parse file.
|
||||
|
||||
Returns:
|
||||
Union[str, List[str]]: a string or a List of strings.
|
||||
"""
|
||||
try:
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
except ImportError:
|
||||
raise ValueError("unstructured package is required to parse HTML files.")
|
||||
|
||||
# Using the unstructured library to convert the html to isd format
|
||||
# isd sample : isd = [
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
with open(file, "r", encoding="utf-8") as fp:
|
||||
elements = partition_html(file=fp)
|
||||
isd = convert_to_isd(elements)
|
||||
|
||||
# Removing non ascii charactwers from isd_el['text']
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = isd_el['text'].encode("ascii", "ignore").decode()
|
||||
|
||||
# Removing all the \n characters from isd_el['text'] using regex and replace with single space
|
||||
# Removing all the extra spaces from isd_el['text'] using regex and replace with single space
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
|
||||
# more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
|
||||
for isd_el in isd:
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)
|
||||
|
||||
# Creating a list of all the indexes of isd_el['type'] = 'Title'
|
||||
title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
|
||||
# Creating 'Chunks' - List of lists of strings
|
||||
# each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
|
||||
# Each Chunk can be thought of as an individual set of data, which can be sent to the model
|
||||
# Where Each Title is grouped together with the data under it
|
||||
|
||||
Chunks = [[]]
|
||||
final_chunks = list(list())
|
||||
|
||||
for i, isd_el in enumerate(isd):
|
||||
if i in title_indexes:
|
||||
Chunks.append([])
|
||||
Chunks[-1].append(isd_el['text'])
|
||||
|
||||
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25
|
||||
# TODO: This value can be an user defined variable
|
||||
for chunk in Chunks:
|
||||
# sum of lenth of all the strings in the chunk
|
||||
sum = 0
|
||||
sum += len(str(chunk))
|
||||
if sum < 25:
|
||||
Chunks.remove(chunk)
|
||||
else:
|
||||
# appending all the approved chunks to final_chunks as a single string
|
||||
final_chunks.append(" ".join([str(item) for item in chunk]))
|
||||
return final_chunks
|
||||
145
application/parser/file/markdown_parser.py
Normal file
145
application/parser/file/markdown_parser.py
Normal file
@@ -0,0 +1,145 @@
|
||||
"""Markdown parser.
|
||||
|
||||
Contains parser for md files.
|
||||
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
import tiktoken
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class MarkdownParser(BaseParser):
|
||||
"""Markdown parser.
|
||||
|
||||
Extract text from markdown files.
|
||||
Returns dictionary with keys as headers and values as the text between headers.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
max_tokens: int = 2048,
|
||||
# remove_tables: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
self._remove_hyperlinks = remove_hyperlinks
|
||||
self._remove_images = remove_images
|
||||
self._max_tokens = max_tokens
|
||||
# self._remove_tables = remove_tables
|
||||
|
||||
def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str],
|
||||
current_text: str):
|
||||
"""Append to tups chunk."""
|
||||
num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(current_text))
|
||||
if num_tokens > self._max_tokens:
|
||||
chunks = [current_text[i:i + self._max_tokens] for i in range(0, len(current_text), self._max_tokens)]
|
||||
for chunk in chunks:
|
||||
tups.append((current_header, chunk))
|
||||
else:
|
||||
tups.append((current_header, current_text))
|
||||
return tups
|
||||
|
||||
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
|
||||
"""Convert a markdown file to a dictionary.
|
||||
|
||||
The keys are the headers and the values are the text under each header.
|
||||
|
||||
"""
|
||||
markdown_tups: List[Tuple[Optional[str], str]] = []
|
||||
lines = markdown_text.split("\n")
|
||||
|
||||
current_header = None
|
||||
current_text = ""
|
||||
|
||||
for line in lines:
|
||||
header_match = re.match(r"^#+\s", line)
|
||||
if header_match:
|
||||
if current_header is not None:
|
||||
if current_text == "" or None:
|
||||
continue
|
||||
markdown_tups = self.tups_chunk_append(markdown_tups, current_header, current_text)
|
||||
|
||||
current_header = line
|
||||
current_text = ""
|
||||
else:
|
||||
current_text += line + "\n"
|
||||
markdown_tups = self.tups_chunk_append(markdown_tups, current_header, current_text)
|
||||
|
||||
if current_header is not None:
|
||||
# pass linting, assert keys are defined
|
||||
markdown_tups = [
|
||||
(re.sub(r"#", "", cast(str, key)).strip(), re.sub(r"<.*?>", "", value))
|
||||
for key, value in markdown_tups
|
||||
]
|
||||
else:
|
||||
markdown_tups = [
|
||||
(key, re.sub("\n", "", value)) for key, value in markdown_tups
|
||||
]
|
||||
|
||||
return markdown_tups
|
||||
|
||||
def remove_images(self, content: str) -> str:
|
||||
"""Get a dictionary of a markdown file from its path."""
|
||||
pattern = r"!{1}\[\[(.*)\]\]"
|
||||
content = re.sub(pattern, "", content)
|
||||
return content
|
||||
|
||||
# def remove_tables(self, content: str) -> List[List[str]]:
|
||||
# """Convert markdown tables to nested lists."""
|
||||
# table_rows_pattern = r"((\r?\n){2}|^)([^\r\n]*\|[^\r\n]*(\r?\n)?)+(?=(\r?\n){2}|$)"
|
||||
# table_cells_pattern = r"([^\|\r\n]*)\|"
|
||||
#
|
||||
# table_rows = re.findall(table_rows_pattern, content, re.MULTILINE)
|
||||
# table_lists = []
|
||||
# for row in table_rows:
|
||||
# cells = re.findall(table_cells_pattern, row[2])
|
||||
# cells = [cell.strip() for cell in cells if cell.strip()]
|
||||
# table_lists.append(cells)
|
||||
# return str(table_lists)
|
||||
|
||||
def remove_hyperlinks(self, content: str) -> str:
|
||||
"""Get a dictionary of a markdown file from its path."""
|
||||
pattern = r"\[(.*?)\]\((.*?)\)"
|
||||
content = re.sub(pattern, r"\1", content)
|
||||
return content
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Initialize the parser with the config."""
|
||||
return {}
|
||||
|
||||
def parse_tups(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
content = f.read()
|
||||
if self._remove_hyperlinks:
|
||||
content = self.remove_hyperlinks(content)
|
||||
if self._remove_images:
|
||||
content = self.remove_images(content)
|
||||
# if self._remove_tables:
|
||||
# content = self.remove_tables(content)
|
||||
markdown_tups = self.markdown_to_tups(content)
|
||||
return markdown_tups
|
||||
|
||||
def parse_file(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> Union[str, List[str]]:
|
||||
"""Parse file into string."""
|
||||
tups = self.parse_tups(filepath, errors=errors)
|
||||
results = []
|
||||
# TODO: don't include headers right now
|
||||
for header, value in tups:
|
||||
if header is None:
|
||||
results.append(value)
|
||||
else:
|
||||
results.append(f"\n\n{header}\n{value}")
|
||||
return results
|
||||
173
application/parser/file/rst_parser.py
Normal file
173
application/parser/file/rst_parser.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""reStructuredText parser.
|
||||
|
||||
Contains parser for md files.
|
||||
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class RstParser(BaseParser):
|
||||
"""reStructuredText parser.
|
||||
|
||||
Extract text from .rst files.
|
||||
Returns dictionary with keys as headers and values as the text between headers.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
remove_table_excess: bool = True,
|
||||
remove_interpreters: bool = True,
|
||||
remove_directives: bool = True,
|
||||
remove_whitespaces_excess: bool = True,
|
||||
# Be carefull with remove_characters_excess, might cause data loss
|
||||
remove_characters_excess: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
self._remove_hyperlinks = remove_hyperlinks
|
||||
self._remove_images = remove_images
|
||||
self._remove_table_excess = remove_table_excess
|
||||
self._remove_interpreters = remove_interpreters
|
||||
self._remove_directives = remove_directives
|
||||
self._remove_whitespaces_excess = remove_whitespaces_excess
|
||||
self._remove_characters_excess = remove_characters_excess
|
||||
|
||||
def rst_to_tups(self, rst_text: str) -> List[Tuple[Optional[str], str]]:
|
||||
"""Convert a reStructuredText file to a dictionary.
|
||||
|
||||
The keys are the headers and the values are the text under each header.
|
||||
|
||||
"""
|
||||
rst_tups: List[Tuple[Optional[str], str]] = []
|
||||
lines = rst_text.split("\n")
|
||||
|
||||
current_header = None
|
||||
current_text = ""
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
header_match = re.match(r"^[^\S\n]*[-=]+[^\S\n]*$", line)
|
||||
if header_match and i > 0 and (
|
||||
len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
|
||||
if current_header is not None:
|
||||
if current_text == "" or None:
|
||||
continue
|
||||
# removes the next heading from current Document
|
||||
if current_text.endswith(lines[i - 1] + "\n"):
|
||||
current_text = current_text[:len(current_text) - len(lines[i - 1] + "\n")]
|
||||
rst_tups.append((current_header, current_text))
|
||||
|
||||
current_header = lines[i - 1]
|
||||
current_text = ""
|
||||
else:
|
||||
current_text += line + "\n"
|
||||
|
||||
rst_tups.append((current_header, current_text))
|
||||
|
||||
# TODO: Format for rst
|
||||
#
|
||||
# if current_header is not None:
|
||||
# # pass linting, assert keys are defined
|
||||
# rst_tups = [
|
||||
# (re.sub(r"#", "", cast(str, key)).strip(), re.sub(r"<.*?>", "", value))
|
||||
# for key, value in rst_tups
|
||||
# ]
|
||||
# else:
|
||||
# rst_tups = [
|
||||
# (key, re.sub("\n", "", value)) for key, value in rst_tups
|
||||
# ]
|
||||
|
||||
if current_header is None:
|
||||
rst_tups = [
|
||||
(key, re.sub("\n", "", value)) for key, value in rst_tups
|
||||
]
|
||||
return rst_tups
|
||||
|
||||
def remove_images(self, content: str) -> str:
|
||||
pattern = r"\.\. image:: (.*)"
|
||||
content = re.sub(pattern, "", content)
|
||||
return content
|
||||
|
||||
def remove_hyperlinks(self, content: str) -> str:
|
||||
pattern = r"`(.*?) <(.*?)>`_"
|
||||
content = re.sub(pattern, r"\1", content)
|
||||
return content
|
||||
|
||||
def remove_directives(self, content: str) -> str:
|
||||
"""Removes reStructuredText Directives"""
|
||||
pattern = r"`\.\.([^:]+)::"
|
||||
content = re.sub(pattern, "", content)
|
||||
return content
|
||||
|
||||
def remove_interpreters(self, content: str) -> str:
|
||||
"""Removes reStructuredText Interpreted Text Roles"""
|
||||
pattern = r":(\w+):"
|
||||
content = re.sub(pattern, "", content)
|
||||
return content
|
||||
|
||||
def remove_table_excess(self, content: str) -> str:
|
||||
"""Pattern to remove grid table separators"""
|
||||
pattern = r"^\+[-]+\+[-]+\+$"
|
||||
content = re.sub(pattern, "", content, flags=re.MULTILINE)
|
||||
return content
|
||||
|
||||
def remove_whitespaces_excess(self, content: List[Tuple[str, Any]]) -> List[Tuple[str, Any]]:
|
||||
"""Pattern to match 2 or more consecutive whitespaces"""
|
||||
pattern = r"\s{2,}"
|
||||
content = [(key, re.sub(pattern, " ", value)) for key, value in content]
|
||||
return content
|
||||
|
||||
def remove_characters_excess(self, content: List[Tuple[str, Any]]) -> List[Tuple[str, Any]]:
|
||||
"""Pattern to match 2 or more consecutive characters"""
|
||||
pattern = r"(\S)\1{2,}"
|
||||
content = [(key, re.sub(pattern, r"\1\1\1", value, flags=re.MULTILINE)) for key, value in content]
|
||||
return content
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Initialize the parser with the config."""
|
||||
return {}
|
||||
|
||||
def parse_tups(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
content = f.read()
|
||||
if self._remove_hyperlinks:
|
||||
content = self.remove_hyperlinks(content)
|
||||
if self._remove_images:
|
||||
content = self.remove_images(content)
|
||||
if self._remove_table_excess:
|
||||
content = self.remove_table_excess(content)
|
||||
if self._remove_directives:
|
||||
content = self.remove_directives(content)
|
||||
if self._remove_interpreters:
|
||||
content = self.remove_interpreters(content)
|
||||
rst_tups = self.rst_to_tups(content)
|
||||
if self._remove_whitespaces_excess:
|
||||
rst_tups = self.remove_whitespaces_excess(rst_tups)
|
||||
if self._remove_characters_excess:
|
||||
rst_tups = self.remove_characters_excess(rst_tups)
|
||||
return rst_tups
|
||||
|
||||
def parse_file(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> Union[str, List[str]]:
|
||||
"""Parse file into string."""
|
||||
tups = self.parse_tups(filepath, errors=errors)
|
||||
results = []
|
||||
# TODO: don't include headers right now
|
||||
for header, value in tups:
|
||||
if header is None:
|
||||
results.append(value)
|
||||
else:
|
||||
results.append(f"\n\n{header}\n{value}")
|
||||
return results
|
||||
115
application/parser/file/tabular_parser.py
Normal file
115
application/parser/file/tabular_parser.py
Normal file
@@ -0,0 +1,115 @@
|
||||
"""Tabular parser.
|
||||
|
||||
Contains parsers for tabular data files.
|
||||
|
||||
"""
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class CSVParser(BaseParser):
|
||||
"""CSV parser.
|
||||
|
||||
Args:
|
||||
concat_rows (bool): whether to concatenate all rows into one document.
|
||||
If set to False, a Document will be created for each row.
|
||||
True by default.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, *args: Any, concat_rows: bool = True, **kwargs: Any) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
self._concat_rows = concat_rows
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
|
||||
"""Parse file.
|
||||
|
||||
Returns:
|
||||
Union[str, List[str]]: a string or a List of strings.
|
||||
|
||||
"""
|
||||
try:
|
||||
import csv
|
||||
except ImportError:
|
||||
raise ValueError("csv module is required to read CSV files.")
|
||||
text_list = []
|
||||
with open(file, "r") as fp:
|
||||
csv_reader = csv.reader(fp)
|
||||
for row in csv_reader:
|
||||
text_list.append(", ".join(row))
|
||||
if self._concat_rows:
|
||||
return "\n".join(text_list)
|
||||
else:
|
||||
return text_list
|
||||
|
||||
|
||||
class PandasCSVParser(BaseParser):
|
||||
r"""Pandas-based CSV parser.
|
||||
|
||||
Parses CSVs using the separator detection from Pandas `read_csv`function.
|
||||
If special parameters are required, use the `pandas_config` dict.
|
||||
|
||||
Args:
|
||||
concat_rows (bool): whether to concatenate all rows into one document.
|
||||
If set to False, a Document will be created for each row.
|
||||
True by default.
|
||||
|
||||
col_joiner (str): Separator to use for joining cols per row.
|
||||
Set to ", " by default.
|
||||
|
||||
row_joiner (str): Separator to use for joining each row.
|
||||
Only used when `concat_rows=True`.
|
||||
Set to "\n" by default.
|
||||
|
||||
pandas_config (dict): Options for the `pandas.read_csv` function call.
|
||||
Refer to https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
|
||||
for more information.
|
||||
Set to empty dict by default, this means pandas will try to figure
|
||||
out the separators, table head, etc. on its own.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
self._concat_rows = concat_rows
|
||||
self._col_joiner = col_joiner
|
||||
self._row_joiner = row_joiner
|
||||
self._pandas_config = pandas_config
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, List[str]]:
|
||||
"""Parse file."""
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
raise ValueError("pandas module is required to read CSV files.")
|
||||
|
||||
df = pd.read_csv(file, **self._pandas_config)
|
||||
|
||||
text_list = df.apply(
|
||||
lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1
|
||||
).tolist()
|
||||
|
||||
if self._concat_rows:
|
||||
return (self._row_joiner).join(text_list)
|
||||
else:
|
||||
return text_list
|
||||
66
application/parser/java2doc.py
Normal file
66
application/parser/java2doc.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import os
|
||||
|
||||
import javalang
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
files_list = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for file in files:
|
||||
if file.endswith('.java'):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, "r") as file:
|
||||
java_code = file.read()
|
||||
methods = {}
|
||||
tree = javalang.parse.parse(java_code)
|
||||
for _, node in tree.filter(javalang.tree.MethodDeclaration):
|
||||
method_name = node.name
|
||||
start_line = node.position.line - 1
|
||||
end_line = start_line
|
||||
brace_count = 0
|
||||
for line in java_code.splitlines()[start_line:]:
|
||||
end_line += 1
|
||||
brace_count += line.count("{") - line.count("}")
|
||||
if brace_count == 0:
|
||||
break
|
||||
method_source_code = "\n".join(java_code.splitlines()[start_line:end_line])
|
||||
methods[method_name] = method_source_code
|
||||
return methods
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
classes = {}
|
||||
tree = javalang.parse.parse(source_code)
|
||||
for class_decl in tree.types:
|
||||
class_name = class_decl.name
|
||||
declarations = []
|
||||
methods = []
|
||||
for field_decl in class_decl.fields:
|
||||
field_name = field_decl.declarators[0].name
|
||||
field_type = field_decl.type.name
|
||||
declarations.append(f"{field_type} {field_name}")
|
||||
for method_decl in class_decl.methods:
|
||||
methods.append(method_decl.name)
|
||||
class_string = "Declarations: " + ", ".join(declarations) + "\n Method name: " + ", ".join(methods)
|
||||
classes[class_name] = class_string
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
classes_dict = {}
|
||||
for file in files:
|
||||
functions = extract_functions(file)
|
||||
if functions:
|
||||
functions_dict[file] = functions
|
||||
classes = extract_classes(file)
|
||||
if classes:
|
||||
classes_dict[file] = classes
|
||||
return functions_dict, classes_dict
|
||||
70
application/parser/js2doc.py
Normal file
70
application/parser/js2doc.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import os
|
||||
|
||||
import escodegen
|
||||
import esprima
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
files_list = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for file in files:
|
||||
if file.endswith('.js'):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
functions = {}
|
||||
tree = esprima.parseScript(source_code)
|
||||
for node in tree.body:
|
||||
if node.type == 'FunctionDeclaration':
|
||||
func_name = node.id.name if node.id else '<anonymous>'
|
||||
functions[func_name] = escodegen.generate(node)
|
||||
elif node.type == 'VariableDeclaration':
|
||||
for declaration in node.declarations:
|
||||
if declaration.init and declaration.init.type == 'FunctionExpression':
|
||||
func_name = declaration.id.name if declaration.id else '<anonymous>'
|
||||
functions[func_name] = escodegen.generate(declaration.init)
|
||||
elif node.type == 'ClassDeclaration':
|
||||
for subnode in node.body.body:
|
||||
if subnode.type == 'MethodDefinition':
|
||||
func_name = subnode.key.name
|
||||
functions[func_name] = escodegen.generate(subnode.value)
|
||||
elif subnode.type == 'VariableDeclaration':
|
||||
for declaration in subnode.declarations:
|
||||
if declaration.init and declaration.init.type == 'FunctionExpression':
|
||||
func_name = declaration.id.name if declaration.id else '<anonymous>'
|
||||
functions[func_name] = escodegen.generate(declaration.init)
|
||||
return functions
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
classes = {}
|
||||
tree = esprima.parseScript(source_code)
|
||||
for node in tree.body:
|
||||
if node.type == 'ClassDeclaration':
|
||||
class_name = node.id.name
|
||||
function_names = []
|
||||
for subnode in node.body.body:
|
||||
if subnode.type == 'MethodDefinition':
|
||||
function_names.append(subnode.key.name)
|
||||
classes[class_name] = ", ".join(function_names)
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
classes_dict = {}
|
||||
for file in files:
|
||||
functions = extract_functions(file)
|
||||
if functions:
|
||||
functions_dict[file] = functions
|
||||
classes = extract_classes(file)
|
||||
if classes:
|
||||
classes_dict[file] = classes
|
||||
return functions_dict, classes_dict
|
||||
82
application/parser/open_ai_func.py
Normal file
82
application/parser/open_ai_func.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import os
|
||||
|
||||
import tiktoken
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.vectorstores import FAISS
|
||||
from retry import retry
|
||||
|
||||
|
||||
# from langchain.embeddings import HuggingFaceEmbeddings
|
||||
# from langchain.embeddings import HuggingFaceInstructEmbeddings
|
||||
# from langchain.embeddings import CohereEmbeddings
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens / 1000) * 0.0004)
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
@retry(tries=10, delay=60)
|
||||
def store_add_texts_with_retry(store, i):
|
||||
store.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
|
||||
|
||||
def call_openai_api(docs, folder_name, task_status):
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
|
||||
# create output folder if it doesn't exist
|
||||
if not os.path.exists(f"{folder_name}"):
|
||||
os.makedirs(f"{folder_name}")
|
||||
|
||||
from tqdm import tqdm
|
||||
docs_test = [docs[0]]
|
||||
docs.pop(0)
|
||||
c1 = 0
|
||||
|
||||
store = FAISS.from_documents(docs_test, OpenAIEmbeddings(openai_api_key=os.getenv("EMBEDDINGS_KEY")))
|
||||
|
||||
# Uncomment for MPNet embeddings
|
||||
# model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
# hf = HuggingFaceEmbeddings(model_name=model_name)
|
||||
# store = FAISS.from_documents(docs_test, hf)
|
||||
s1 = len(docs)
|
||||
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs),
|
||||
bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
||||
try:
|
||||
task_status.update_state(state='PROGRESS', meta={'current': int((c1 / s1) * 100)})
|
||||
store_add_texts_with_retry(store, i)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Error on ", i)
|
||||
print("Saving progress")
|
||||
print(f"stopped at {c1} out of {len(docs)}")
|
||||
store.save_local(f"{folder_name}")
|
||||
break
|
||||
c1 += 1
|
||||
store.save_local(f"{folder_name}")
|
||||
|
||||
|
||||
def get_user_permission(docs, folder_name):
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
|
||||
# docs_content = (" ".join(docs))
|
||||
docs_content = ""
|
||||
for doc in docs:
|
||||
docs_content += doc.page_content
|
||||
|
||||
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
# Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
call_openai_api(docs, folder_name)
|
||||
elif user_input == "":
|
||||
call_openai_api(docs, folder_name)
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
121
application/parser/py2doc.py
Normal file
121
application/parser/py2doc.py
Normal file
@@ -0,0 +1,121 @@
|
||||
import ast
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import tiktoken
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
files_list = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for file in files:
|
||||
if file.endswith('.py'):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
functions = {}
|
||||
tree = ast.parse(source_code)
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.FunctionDef):
|
||||
func_name = node.name
|
||||
func_def = ast.get_source_segment(source_code, node)
|
||||
functions[func_name] = func_def
|
||||
return functions
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
classes = {}
|
||||
tree = ast.parse(source_code)
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.ClassDef):
|
||||
class_name = node.name
|
||||
function_names = []
|
||||
for subnode in ast.walk(node):
|
||||
if isinstance(subnode, ast.FunctionDef):
|
||||
function_names.append(subnode.name)
|
||||
classes[class_name] = ", ".join(function_names)
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
classes_dict = {}
|
||||
for file in files:
|
||||
functions = extract_functions(file)
|
||||
if functions:
|
||||
functions_dict[file] = functions
|
||||
classes = extract_classes(file)
|
||||
if classes:
|
||||
classes_dict[file] = classes
|
||||
return functions_dict, classes_dict
|
||||
|
||||
|
||||
def parse_functions(functions_dict, formats, dir):
|
||||
c1 = len(functions_dict)
|
||||
for i, (source, functions) in enumerate(functions_dict.items(), start=1):
|
||||
print(f"Processing file {i}/{c1}")
|
||||
source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
|
||||
subfolders = "/".join(source_w.split("/")[:-1])
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
|
||||
for j, (name, function) in enumerate(functions.items(), start=1):
|
||||
print(f"Processing function {j}/{len(functions)}")
|
||||
prompt = PromptTemplate(
|
||||
input_variables=["code"],
|
||||
template="Code: \n{code}, \nDocumentation: ",
|
||||
)
|
||||
llm = OpenAI(temperature=0)
|
||||
response = llm(prompt.format(code=function))
|
||||
mode = "a" if Path(f"outputs/{source_w}").exists() else "w"
|
||||
with open(f"outputs/{source_w}", mode) as f:
|
||||
f.write(
|
||||
f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
def parse_classes(classes_dict, formats, dir):
|
||||
c1 = len(classes_dict)
|
||||
for i, (source, classes) in enumerate(classes_dict.items()):
|
||||
print(f"Processing file {i + 1}/{c1}")
|
||||
source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
|
||||
subfolders = "/".join(source_w.split("/")[:-1])
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
|
||||
for name, function_names in classes.items():
|
||||
print(f"Processing Class {i + 1}/{c1}")
|
||||
prompt = PromptTemplate(
|
||||
input_variables=["class_name", "functions_names"],
|
||||
template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
|
||||
)
|
||||
llm = OpenAI(temperature=0)
|
||||
response = llm(prompt.format(class_name=name, functions_names=function_names))
|
||||
|
||||
with open(f"outputs/{source_w}", "a" if Path(f"outputs/{source_w}").exists() else "w") as f:
|
||||
f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
def transform_to_docs(functions_dict, classes_dict, formats, dir):
|
||||
docs_content = ''.join([str(key) + str(value) for key, value in functions_dict.items()])
|
||||
docs_content += ''.join([str(key) + str(value) for key, value in classes_dict.items()])
|
||||
|
||||
num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(docs_content))
|
||||
total_price = ((num_tokens / 1000) * 0.02)
|
||||
|
||||
print(f"Number of Tokens = {num_tokens:,d}")
|
||||
print(f"Approx Cost = ${total_price:,.2f}")
|
||||
|
||||
user_input = input("Price Okay? (Y/N)\n").lower()
|
||||
if user_input == "y" or user_input == "":
|
||||
if not Path("outputs").exists():
|
||||
Path("outputs").mkdir()
|
||||
parse_functions(functions_dict, formats, dir)
|
||||
parse_classes(classes_dict, formats, dir)
|
||||
print("All done!")
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
34
application/parser/schema/base.py
Normal file
34
application/parser/schema/base.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""Base schema for readers."""
|
||||
from dataclasses import dataclass
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
from parser.schema.schema import BaseDocument
|
||||
|
||||
|
||||
@dataclass
|
||||
class Document(BaseDocument):
|
||||
"""Generic interface for a data document.
|
||||
|
||||
This document connects to data sources.
|
||||
|
||||
"""
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
"""Post init."""
|
||||
if self.text is None:
|
||||
raise ValueError("text field not set.")
|
||||
|
||||
@classmethod
|
||||
def get_type(cls) -> str:
|
||||
"""Get Document type."""
|
||||
return "Document"
|
||||
|
||||
def to_langchain_format(self) -> LCDocument:
|
||||
"""Convert struct to LangChain document format."""
|
||||
metadata = self.extra_info or {}
|
||||
return LCDocument(page_content=self.text, metadata=metadata)
|
||||
|
||||
@classmethod
|
||||
def from_langchain_format(cls, doc: LCDocument) -> "Document":
|
||||
"""Convert struct from LangChain document format."""
|
||||
return cls(text=doc.page_content, extra_info=doc.metadata)
|
||||
64
application/parser/schema/schema.py
Normal file
64
application/parser/schema/schema.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""Base schema for data structures."""
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from dataclasses_json import DataClassJsonMixin
|
||||
|
||||
|
||||
@dataclass
|
||||
class BaseDocument(DataClassJsonMixin):
|
||||
"""Base document.
|
||||
|
||||
Generic abstract interfaces that captures both index structs
|
||||
as well as documents.
|
||||
|
||||
"""
|
||||
|
||||
# TODO: consolidate fields from Document/IndexStruct into base class
|
||||
text: Optional[str] = None
|
||||
doc_id: Optional[str] = None
|
||||
embedding: Optional[List[float]] = None
|
||||
|
||||
# extra fields
|
||||
extra_info: Optional[Dict[str, Any]] = None
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def get_type(cls) -> str:
|
||||
"""Get Document type."""
|
||||
|
||||
def get_text(self) -> str:
|
||||
"""Get text."""
|
||||
if self.text is None:
|
||||
raise ValueError("text field not set.")
|
||||
return self.text
|
||||
|
||||
def get_doc_id(self) -> str:
|
||||
"""Get doc_id."""
|
||||
if self.doc_id is None:
|
||||
raise ValueError("doc_id not set.")
|
||||
return self.doc_id
|
||||
|
||||
@property
|
||||
def is_doc_id_none(self) -> bool:
|
||||
"""Check if doc_id is None."""
|
||||
return self.doc_id is None
|
||||
|
||||
def get_embedding(self) -> List[float]:
|
||||
"""Get embedding.
|
||||
|
||||
Errors if embedding is None.
|
||||
|
||||
"""
|
||||
if self.embedding is None:
|
||||
raise ValueError("embedding not set.")
|
||||
return self.embedding
|
||||
|
||||
@property
|
||||
def extra_info_str(self) -> Optional[str]:
|
||||
"""Extra info string."""
|
||||
if self.extra_info is None:
|
||||
return None
|
||||
|
||||
return "\n".join([f"{k}: {str(v)}" for k, v in self.extra_info.items()])
|
||||
74
application/parser/token_func.py
Normal file
74
application/parser/token_func.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import re
|
||||
from math import ceil
|
||||
from typing import List
|
||||
|
||||
import tiktoken
|
||||
from parser.schema.base import Document
|
||||
|
||||
|
||||
def separate_header_and_body(text):
|
||||
header_pattern = r"^(.*?\n){3}"
|
||||
match = re.match(header_pattern, text)
|
||||
header = match.group(0)
|
||||
body = text[len(header):]
|
||||
return header, body
|
||||
|
||||
|
||||
def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) -> List[Document]:
|
||||
docs = []
|
||||
current_group = None
|
||||
|
||||
for doc in documents:
|
||||
doc_len = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
|
||||
|
||||
if current_group is None:
|
||||
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
|
||||
extra_info=doc.extra_info)
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(
|
||||
current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
|
||||
current_group.text += " " + doc.text
|
||||
else:
|
||||
docs.append(current_group)
|
||||
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
|
||||
extra_info=doc.extra_info)
|
||||
|
||||
if current_group is not None:
|
||||
docs.append(current_group)
|
||||
|
||||
return docs
|
||||
|
||||
|
||||
def split_documents(documents: List[Document], max_tokens: int) -> List[Document]:
|
||||
docs = []
|
||||
for doc in documents:
|
||||
token_length = len(tiktoken.get_encoding("cl100k_base").encode(doc.text))
|
||||
if token_length <= max_tokens:
|
||||
docs.append(doc)
|
||||
else:
|
||||
header, body = separate_header_and_body(doc.text)
|
||||
num_body_parts = ceil(token_length / max_tokens)
|
||||
part_length = ceil(len(body) / num_body_parts)
|
||||
body_parts = [body[i:i + part_length] for i in range(0, len(body), part_length)]
|
||||
for i, body_part in enumerate(body_parts):
|
||||
new_doc = Document(text=header + body_part.strip(),
|
||||
doc_id=f"{doc.doc_id}-{i}",
|
||||
embedding=doc.embedding,
|
||||
extra_info=doc.extra_info)
|
||||
docs.append(new_doc)
|
||||
return docs
|
||||
|
||||
|
||||
def group_split(documents: List[Document], max_tokens: int = 2000, min_tokens: int = 150, token_check: bool = True):
|
||||
if not token_check:
|
||||
return documents
|
||||
print("Grouping small documents")
|
||||
try:
|
||||
documents = group_documents(documents=documents, min_tokens=min_tokens, max_tokens=max_tokens)
|
||||
except Exception:
|
||||
print("Grouping failed, try running without token_check")
|
||||
print("Separating large documents")
|
||||
try:
|
||||
documents = split_documents(documents=documents, max_tokens=max_tokens)
|
||||
except Exception:
|
||||
print("Grouping failed, try running without token_check")
|
||||
return documents
|
||||
9
application/prompts/chat_combine_prompt.txt
Normal file
9
application/prompts/chat_combine_prompt.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible.
|
||||
Use the following pieces of context to help answer the users question. If its not relevant to the question, provide friendly responses.
|
||||
You have access to chat history, and can use it to help answer the question.
|
||||
When using code examples, use the following format:
|
||||
```(language)
|
||||
(code)
|
||||
```
|
||||
----------------
|
||||
{summaries}
|
||||
3
application/prompts/chat_reduce_prompt.txt
Normal file
3
application/prompts/chat_reduce_prompt.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
Use the following pieces of context to help answer the users question. If its not relevant to the question, respond with "-"
|
||||
----------------
|
||||
{context}
|
||||
@@ -1,126 +1,106 @@
|
||||
aiodns==3.0.0
|
||||
aiohttp==3.8.3
|
||||
aiohttp==3.8.4
|
||||
aiohttp-retry==2.8.3
|
||||
aiosignal==1.3.1
|
||||
alabaster==0.7.13
|
||||
aleph-alpha-client==2.16.0
|
||||
anyio==3.6.2
|
||||
argilla==1.3.0
|
||||
aleph-alpha-client==2.16.1
|
||||
amqp==5.1.1
|
||||
async-timeout==4.0.2
|
||||
attrs==22.2.0
|
||||
Babel==2.11.0
|
||||
backoff==2.2.1
|
||||
billiard==3.6.4.0
|
||||
blobfile==2.0.1
|
||||
boto3==1.26.82
|
||||
botocore==1.29.82
|
||||
boto3==1.26.102
|
||||
botocore==1.29.102
|
||||
cffi==1.15.1
|
||||
charset-normalizer==2.1.1
|
||||
charset-normalizer==3.1.0
|
||||
click==8.1.3
|
||||
cohere==3.4.0
|
||||
click-didyoumean==0.3.0
|
||||
click-plugins==1.1.1
|
||||
click-repl==0.2.0
|
||||
cryptography==39.0.2
|
||||
dataclasses-json==0.5.7
|
||||
decorator==5.1.1
|
||||
deeplake==3.2.12
|
||||
Deprecated==1.2.13
|
||||
deeplake==3.2.13
|
||||
dill==0.3.6
|
||||
docutils==0.19
|
||||
docx2txt==0.8
|
||||
dnspython==2.3.0
|
||||
ecdsa==0.18.0
|
||||
entrypoints==0.4
|
||||
escodegen==1.0.10
|
||||
esprima==4.0.1
|
||||
esutils==1.0.1
|
||||
et-xmlfile==1.1.0
|
||||
faiss-cpu==1.7.3
|
||||
filelock==3.9.0
|
||||
Flask==2.2.2
|
||||
Flask==2.2.3
|
||||
Flask-Cors==3.0.10
|
||||
frozenlist==1.3.3
|
||||
geojson==2.5.0
|
||||
greenlet==2.0.2
|
||||
gunicorn==20.1.0
|
||||
h11==0.14.0
|
||||
httpcore==0.16.3
|
||||
httpx==0.23.3
|
||||
gpt4all==0.1.7
|
||||
hub==3.0.1
|
||||
huggingface-hub==0.12.0
|
||||
huggingface-hub==0.12.1
|
||||
humbug==0.2.8
|
||||
idna==3.4
|
||||
imagesize==1.4.1
|
||||
itsdangerous==2.1.2
|
||||
javalang==0.13.0
|
||||
Jinja2==3.1.2
|
||||
jmespath==1.0.1
|
||||
joblib==1.2.0
|
||||
langchain==0.0.98
|
||||
kombu==5.2.4
|
||||
langchain==0.0.179
|
||||
loguru==0.6.0
|
||||
lxml==4.9.2
|
||||
manifest-ml==0.1.1
|
||||
MarkupSafe==2.1.2
|
||||
marshmallow==3.19.0
|
||||
marshmallow-enum==1.5.1
|
||||
monotonic==1.6
|
||||
mpmath==1.3.0
|
||||
multidict==6.0.4
|
||||
multiprocess==0.70.14
|
||||
mypy-extensions==0.4.3
|
||||
mypy-extensions==1.0.0
|
||||
networkx==3.0
|
||||
nltk==3.8.1
|
||||
numcodecs==0.11.0
|
||||
numpy==1.23.5
|
||||
numpy==1.24.2
|
||||
openai==0.27.0
|
||||
openpyxl==3.1.1
|
||||
packaging==23.0
|
||||
pandas==1.5.3
|
||||
pathos==0.3.0
|
||||
Pillow==9.4.0
|
||||
pox==0.3.2
|
||||
ppft==1.7.6.6
|
||||
prompt-toolkit==3.0.38
|
||||
py==1.11.0
|
||||
pyasn1==0.4.8
|
||||
pycares==4.3.0
|
||||
pycparser==2.21
|
||||
pycryptodomex==3.17
|
||||
pydantic==1.10.4
|
||||
Pygments==2.14.0
|
||||
pydantic==1.10.5
|
||||
PyJWT==2.6.0
|
||||
pymongo==4.3.3
|
||||
pyowm==3.3.0
|
||||
PyPDF2==3.0.1
|
||||
PySocks==1.7.1
|
||||
python-dateutil==2.8.2
|
||||
python-docx==0.8.11
|
||||
python-dotenv==0.21.1
|
||||
python-magic==0.4.27
|
||||
python-pptx==0.6.21
|
||||
python-dotenv==1.0.0
|
||||
python-jose==3.3.0
|
||||
pytz==2022.7.1
|
||||
PyYAML==6.0
|
||||
redis==4.5.1
|
||||
redis==4.5.4
|
||||
regex==2022.10.31
|
||||
requests==2.28.2
|
||||
retry==0.9.2
|
||||
rfc3986==1.5.0
|
||||
rsa==4.9
|
||||
s3transfer==0.6.0
|
||||
scikit-learn==1.2.1
|
||||
scipy==1.10.0
|
||||
scikit-learn==1.2.2
|
||||
scipy==1.10.1
|
||||
sentence-transformers==2.2.2
|
||||
sentencepiece==0.1.97
|
||||
six==1.16.0
|
||||
sniffio==1.3.0
|
||||
snowballstemmer==2.2.0
|
||||
Sphinx==6.1.3
|
||||
sphinxcontrib-applehelp==1.0.4
|
||||
sphinxcontrib-devhelp==1.0.2
|
||||
sphinxcontrib-htmlhelp==2.0.1
|
||||
sphinxcontrib-jsmath==1.0.1
|
||||
sphinxcontrib-qthelp==1.0.3
|
||||
sphinxcontrib-serializinghtml==1.1.5
|
||||
SQLAlchemy==1.4.46
|
||||
sqlitedict==2.1.0
|
||||
tenacity==8.2.1
|
||||
sympy==1.11.1
|
||||
tenacity==8.2.2
|
||||
threadpoolctl==3.1.0
|
||||
tiktoken==0.1.2
|
||||
tokenizers==0.13.2
|
||||
torch==1.13.1
|
||||
torchvision==0.14.1
|
||||
tqdm==4.64.1
|
||||
transformers==4.26.0
|
||||
torch==2.0.0
|
||||
torchvision==0.15.1
|
||||
tqdm==4.65.0
|
||||
transformers==4.27.2
|
||||
typer==0.7.0
|
||||
typing-inspect==0.8.0
|
||||
typing_extensions==4.4.0
|
||||
unstructured==0.4.11
|
||||
typing_extensions==4.5.0
|
||||
urllib3==1.26.14
|
||||
Werkzeug==2.2.3
|
||||
wrapt==1.14.1
|
||||
XlsxWriter==3.0.8
|
||||
xxhash==3.2.0
|
||||
vine==5.0.0
|
||||
wcwidth==0.2.6
|
||||
yarl==1.8.2
|
||||
|
||||
50
application/static/dist/css/output.css
vendored
50
application/static/dist/css/output.css
vendored
@@ -525,6 +525,10 @@ video {
|
||||
position: absolute;
|
||||
}
|
||||
|
||||
.relative {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.inset-0 {
|
||||
top: 0px;
|
||||
right: 0px;
|
||||
@@ -604,6 +608,10 @@ video {
|
||||
min-height: 100vh;
|
||||
}
|
||||
|
||||
.w-auto {
|
||||
width: auto;
|
||||
}
|
||||
|
||||
.w-full {
|
||||
width: 100%;
|
||||
}
|
||||
@@ -648,12 +656,16 @@ video {
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.rounded {
|
||||
border-radius: 0.25rem;
|
||||
}
|
||||
|
||||
.rounded-lg {
|
||||
border-radius: 0.5rem;
|
||||
}
|
||||
|
||||
.rounded {
|
||||
border-radius: 0.25rem;
|
||||
.rounded-md {
|
||||
border-radius: 0.375rem;
|
||||
}
|
||||
|
||||
.border {
|
||||
@@ -723,6 +735,11 @@ video {
|
||||
padding-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.py-4 {
|
||||
padding-top: 1rem;
|
||||
padding-bottom: 1rem;
|
||||
}
|
||||
|
||||
.pt-4 {
|
||||
padding-top: 1rem;
|
||||
}
|
||||
@@ -761,6 +778,11 @@ video {
|
||||
line-height: 1.25rem;
|
||||
}
|
||||
|
||||
.text-xl {
|
||||
font-size: 1.25rem;
|
||||
line-height: 1.75rem;
|
||||
}
|
||||
|
||||
.font-medium {
|
||||
font-weight: 500;
|
||||
}
|
||||
@@ -842,6 +864,11 @@ video {
|
||||
}
|
||||
}
|
||||
|
||||
.hover\:bg-blue-600:hover {
|
||||
--tw-bg-opacity: 1;
|
||||
background-color: rgb(37 99 235 / var(--tw-bg-opacity));
|
||||
}
|
||||
|
||||
.hover\:bg-blue-700:hover {
|
||||
--tw-bg-opacity: 1;
|
||||
background-color: rgb(29 78 216 / var(--tw-bg-opacity));
|
||||
@@ -862,11 +889,26 @@ video {
|
||||
border-color: rgb(59 130 246 / var(--tw-border-opacity));
|
||||
}
|
||||
|
||||
.focus\:outline-none:focus {
|
||||
outline: 2px solid transparent;
|
||||
outline-offset: 2px;
|
||||
}
|
||||
|
||||
.focus\:ring-2:focus {
|
||||
--tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
|
||||
--tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(2px + var(--tw-ring-offset-width)) var(--tw-ring-color);
|
||||
box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
|
||||
}
|
||||
|
||||
.focus\:ring-blue-500:focus {
|
||||
--tw-ring-opacity: 1;
|
||||
--tw-ring-color: rgb(59 130 246 / var(--tw-ring-opacity));
|
||||
}
|
||||
|
||||
.focus\:ring-offset-2:focus {
|
||||
--tw-ring-offset-width: 2px;
|
||||
}
|
||||
|
||||
@media (min-width: 640px) {
|
||||
.sm\:my-8 {
|
||||
margin-top: 2rem;
|
||||
@@ -881,6 +923,10 @@ video {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
.sm\:inline {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
.sm\:h-screen {
|
||||
height: 100vh;
|
||||
}
|
||||
|
||||
@@ -71,4 +71,6 @@ function submitForm(event){
|
||||
});
|
||||
}
|
||||
|
||||
window.addEventListener('submit',submitForm)
|
||||
//window.addEventListener('submit',submitForm)
|
||||
// rewrite using id = button-submit
|
||||
document.getElementById("button-submit").addEventListener('click',submitForm)
|
||||
|
||||
@@ -86,6 +86,19 @@ This will return a new DataFrame with all the columns from both tables, and only
|
||||
<option selected value="default">Choose documentation</option>
|
||||
<option value="default">Default</option>
|
||||
</select>
|
||||
<form action="/api/upload" method="post" enctype="multipart/form-data" class="mt-2">
|
||||
<input type="file" name="file" class="py-4" id="file-upload">
|
||||
<input type="text" name="user" value="local" hidden>
|
||||
<input type="text" name="name" placeholder="Name:">
|
||||
|
||||
|
||||
<button type="submit" class="py-2 px-4 text-white bg-blue-500 rounded-md hover:bg-blue-600 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500">
|
||||
Upload
|
||||
</button>
|
||||
</form>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -130,7 +143,7 @@ This will return a new DataFrame with all the columns from both tables, and only
|
||||
function docsIndex() {
|
||||
// loads latest index from https://raw.githubusercontent.com/arc53/DocsHUB/main/combined.json
|
||||
// and stores it in localStorage
|
||||
fetch('https://d3dg1063dc54p9.cloudfront.net/combined.json')
|
||||
fetch('/api/combine')
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
localStorage.setItem("docsIndex", JSON.stringify(data));
|
||||
@@ -150,19 +163,26 @@ This will return a new DataFrame with all the columns from both tables, and only
|
||||
// create option for each key in docsIndex
|
||||
for (var key in docsIndex) {
|
||||
var option = document.createElement("option");
|
||||
if (docsIndex[key].name == docsIndex[key].language) {
|
||||
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
||||
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
|
||||
if (docsIndex[key].model == "{{ embeddings_choice }}") {
|
||||
select.add(option);
|
||||
if (docsIndex[key].location == 'docshub'){
|
||||
if (docsIndex[key].name == docsIndex[key].language) {
|
||||
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
||||
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
|
||||
if (docsIndex[key].model == "{{ embeddings_choice }}") {
|
||||
select.add(option);
|
||||
}
|
||||
}
|
||||
else {
|
||||
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
||||
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
|
||||
if (docsIndex[key].model == "{{ embeddings_choice }}") {
|
||||
select.add(option);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
||||
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
|
||||
if (docsIndex[key].model == "{{ embeddings_choice }}") {
|
||||
select.add(option);
|
||||
}
|
||||
option.text = docsIndex[key].name;
|
||||
option.value = docsIndex[key].location + "/" + docsIndex[key].name;
|
||||
select.add(option);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
94
application/worker.py
Normal file
94
application/worker.py
Normal file
@@ -0,0 +1,94 @@
|
||||
import os
|
||||
import shutil
|
||||
import string
|
||||
import zipfile
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import nltk
|
||||
import requests
|
||||
|
||||
from core.settings import settings
|
||||
from parser.file.bulk import SimpleDirectoryReader
|
||||
from parser.open_ai_func import call_openai_api
|
||||
from parser.schema.base import Document
|
||||
from parser.token_func import group_split
|
||||
|
||||
try:
|
||||
nltk.download('punkt', quiet=True)
|
||||
nltk.download('averaged_perceptron_tagger', quiet=True)
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
def metadata_from_filename(title):
|
||||
return {'title': title}
|
||||
|
||||
def generate_random_string(length):
|
||||
return ''.join([string.ascii_letters[i % 52] for i in range(length)])
|
||||
|
||||
|
||||
def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
# directory = 'inputs' or 'temp'
|
||||
# formats = [".rst", ".md"]
|
||||
input_files = None
|
||||
recursive = True
|
||||
limit = None
|
||||
exclude = True
|
||||
# name_job = 'job1'
|
||||
# filename = 'install.rst'
|
||||
# user = 'local'
|
||||
sample = False
|
||||
token_check = True
|
||||
min_tokens = 150
|
||||
max_tokens = 1250
|
||||
full_path = directory + '/' + user + '/' + name_job
|
||||
# check if API_URL env variable is set
|
||||
file_data = {'name': name_job, 'file': filename, 'user': user}
|
||||
response = requests.get(urljoin(settings.API_URL, "/api/download"), params=file_data)
|
||||
file = response.content
|
||||
|
||||
if not os.path.exists(full_path):
|
||||
os.makedirs(full_path)
|
||||
with open(full_path + '/' + filename, 'wb') as f:
|
||||
f.write(file)
|
||||
|
||||
# check if file is .zip and extract it
|
||||
if filename.endswith('.zip'):
|
||||
with zipfile.ZipFile(full_path + '/' + filename, 'r') as zip_ref:
|
||||
zip_ref.extractall(full_path)
|
||||
os.remove(full_path + '/' + filename)
|
||||
|
||||
self.update_state(state='PROGRESS', meta={'current': 1})
|
||||
|
||||
raw_docs = SimpleDirectoryReader(input_dir=full_path, input_files=input_files, recursive=recursive,
|
||||
required_exts=formats, num_files_limit=limit,
|
||||
exclude_hidden=exclude, file_metadata=metadata_from_filename).load_data()
|
||||
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
|
||||
|
||||
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
|
||||
call_openai_api(docs, full_path, self)
|
||||
self.update_state(state='PROGRESS', meta={'current': 100})
|
||||
|
||||
if sample:
|
||||
for i in range(min(5, len(raw_docs))):
|
||||
print(raw_docs[i].text)
|
||||
|
||||
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
|
||||
# and send them to the server (provide user and name in form)
|
||||
file_data = {'name': name_job, 'user': user}
|
||||
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
|
||||
'file_pkl': open(full_path + '/index.pkl', 'rb')}
|
||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
||||
|
||||
response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path="))
|
||||
# delete local
|
||||
shutil.rmtree(full_path)
|
||||
|
||||
return {
|
||||
'directory': directory,
|
||||
'formats': formats,
|
||||
'name_job': name_job,
|
||||
'filename': filename,
|
||||
'user': user,
|
||||
'limited': False
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
from app import app
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run()
|
||||
app.run(debug=True, port=5001)
|
||||
|
||||
20
docker-compose-dev.yaml
Normal file
20
docker-compose-dev.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
|
||||
redis:
|
||||
image: redis:6-alpine
|
||||
ports:
|
||||
- 6379:6379
|
||||
|
||||
mongo:
|
||||
image: mongo:6
|
||||
ports:
|
||||
- 27017:27017
|
||||
volumes:
|
||||
- mongodb_data_container:/data/db
|
||||
|
||||
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
@@ -4,12 +4,58 @@ services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
environment:
|
||||
- API_HOST=http://backend:5001
|
||||
- VITE_API_HOST=http://localhost:5001
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
ports:
|
||||
- "5173:5173"
|
||||
depends_on:
|
||||
- backend
|
||||
|
||||
backend:
|
||||
build: ./application
|
||||
environment:
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
ports:
|
||||
- "5001:5001"
|
||||
volumes:
|
||||
- ./application/indexes:/app/indexes
|
||||
- ./application/inputs:/app/inputs
|
||||
- ./application/vectors:/app/vectors
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
worker:
|
||||
build: ./application
|
||||
command: celery -A app.celery worker -l INFO
|
||||
environment:
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- API_URL=http://backend:5001
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
redis:
|
||||
image: redis:6-alpine
|
||||
ports:
|
||||
- 6379:6379
|
||||
|
||||
mongo:
|
||||
image: mongo:6
|
||||
ports:
|
||||
- 27017:27017
|
||||
volumes:
|
||||
- mongodb_data_container:/data/db
|
||||
|
||||
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
6
extensions/chatwoot/.env_sample
Normal file
6
extensions/chatwoot/.env_sample
Normal file
@@ -0,0 +1,6 @@
|
||||
docsgpt_url=<docsgpt_api_url>
|
||||
chatwoot_url=<chatwoot_url>
|
||||
docsgpt_key=<openai_api_key or other llm>
|
||||
chatwoot_token=xxxxx
|
||||
account_id=(optional) 1
|
||||
assignee_id=(optional) 1
|
||||
@@ -1,13 +1,18 @@
|
||||
import requests
|
||||
import dotenv
|
||||
import os
|
||||
import json
|
||||
import pprint
|
||||
|
||||
import dotenv
|
||||
import requests
|
||||
from flask import Flask, request
|
||||
|
||||
dotenv.load_dotenv()
|
||||
docsgpt_url = os.getenv("docsgpt_url")
|
||||
chatwoot_url = os.getenv("chatwoot_url")
|
||||
docsgpt_key = os.getenv("docsgpt_key")
|
||||
chatwoot_token = os.getenv("chatwoot_token")
|
||||
# account_id = os.getenv("account_id")
|
||||
# assignee_id = os.getenv("assignee_id")
|
||||
label_stop = "human-requested"
|
||||
|
||||
|
||||
def send_to_bot(sender, message):
|
||||
@@ -40,38 +45,45 @@ def send_to_chatwoot(account, conversation, message):
|
||||
return r.json()
|
||||
|
||||
|
||||
from flask import Flask, request
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route('/docsgpt', methods=['POST'])
|
||||
def docsgpt():
|
||||
data = request.get_json()
|
||||
message_type = data['message_type']
|
||||
pp = pprint.PrettyPrinter(indent=4)
|
||||
pp.pprint(data)
|
||||
try:
|
||||
message_type = data['message_type']
|
||||
except KeyError:
|
||||
return "Not a message"
|
||||
message = data['content']
|
||||
conversation = data['conversation']['id']
|
||||
contact = data['sender']['id']
|
||||
account = data['account']['id']
|
||||
assignee = data['conversation']['meta']['assignee']['id']
|
||||
print(account)
|
||||
print(label_stop)
|
||||
print(data['conversation']['labels'])
|
||||
print(assignee)
|
||||
|
||||
if(message_type == "incoming"):
|
||||
if label_stop in data['conversation']['labels']:
|
||||
return "Label stop"
|
||||
# elif str(account) != str(account_id):
|
||||
# return "Not the right account"
|
||||
|
||||
# elif str(assignee) != str(assignee_id):
|
||||
# return "Not the right assignee"
|
||||
|
||||
if (message_type == "incoming"):
|
||||
bot_response = send_to_bot(contact, message)
|
||||
create_message = send_to_chatwoot(
|
||||
account, conversation, bot_response)
|
||||
response = requests.post(
|
||||
url="https://86x89umx77.execute-api.eu-west-2.amazonaws.com/docsgpt-logs",
|
||||
headers={
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
},
|
||||
data=json.dumps({
|
||||
"answer": str(bot_response),
|
||||
"question": str(message),
|
||||
"source": "chatwoot"
|
||||
})
|
||||
)
|
||||
else:
|
||||
return "Not an incoming message"
|
||||
|
||||
return create_message
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True, host='0.0.0.0')
|
||||
app.run(host='0.0.0.0', port=80)
|
||||
|
||||
678
extensions/chrome/dist/output.css
vendored
Normal file
678
extensions/chrome/dist/output.css
vendored
Normal file
@@ -0,0 +1,678 @@
|
||||
/*
|
||||
! tailwindcss v3.2.7 | MIT License | https://tailwindcss.com
|
||||
*/
|
||||
|
||||
/*
|
||||
1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)
|
||||
2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)
|
||||
*/
|
||||
|
||||
*,
|
||||
::before,
|
||||
::after {
|
||||
box-sizing: border-box;
|
||||
/* 1 */
|
||||
border-width: 0;
|
||||
/* 2 */
|
||||
border-style: solid;
|
||||
/* 2 */
|
||||
border-color: #e5e7eb;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
::before,
|
||||
::after {
|
||||
--tw-content: '';
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use a consistent sensible line-height in all browsers.
|
||||
2. Prevent adjustments of font size after orientation changes in iOS.
|
||||
3. Use a more readable tab size.
|
||||
4. Use the user's configured `sans` font-family by default.
|
||||
5. Use the user's configured `sans` font-feature-settings by default.
|
||||
*/
|
||||
|
||||
html {
|
||||
line-height: 1.5;
|
||||
/* 1 */
|
||||
-webkit-text-size-adjust: 100%;
|
||||
/* 2 */
|
||||
-moz-tab-size: 4;
|
||||
/* 3 */
|
||||
-o-tab-size: 4;
|
||||
tab-size: 4;
|
||||
/* 3 */
|
||||
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
|
||||
/* 4 */
|
||||
font-feature-settings: normal;
|
||||
/* 5 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove the margin in all browsers.
|
||||
2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.
|
||||
*/
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
/* 1 */
|
||||
line-height: inherit;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Add the correct height in Firefox.
|
||||
2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)
|
||||
3. Ensure horizontal rules are visible by default.
|
||||
*/
|
||||
|
||||
hr {
|
||||
height: 0;
|
||||
/* 1 */
|
||||
color: inherit;
|
||||
/* 2 */
|
||||
border-top-width: 1px;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct text decoration in Chrome, Edge, and Safari.
|
||||
*/
|
||||
|
||||
abbr:where([title]) {
|
||||
-webkit-text-decoration: underline dotted;
|
||||
text-decoration: underline dotted;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the default font size and weight for headings.
|
||||
*/
|
||||
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6 {
|
||||
font-size: inherit;
|
||||
font-weight: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Reset links to optimize for opt-in styling instead of opt-out.
|
||||
*/
|
||||
|
||||
a {
|
||||
color: inherit;
|
||||
text-decoration: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font weight in Edge and Safari.
|
||||
*/
|
||||
|
||||
b,
|
||||
strong {
|
||||
font-weight: bolder;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use the user's configured `mono` font family by default.
|
||||
2. Correct the odd `em` font sizing in all browsers.
|
||||
*/
|
||||
|
||||
code,
|
||||
kbd,
|
||||
samp,
|
||||
pre {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
|
||||
/* 1 */
|
||||
font-size: 1em;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font size in all browsers.
|
||||
*/
|
||||
|
||||
small {
|
||||
font-size: 80%;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent `sub` and `sup` elements from affecting the line height in all browsers.
|
||||
*/
|
||||
|
||||
sub,
|
||||
sup {
|
||||
font-size: 75%;
|
||||
line-height: 0;
|
||||
position: relative;
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
sub {
|
||||
bottom: -0.25em;
|
||||
}
|
||||
|
||||
sup {
|
||||
top: -0.5em;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)
|
||||
2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)
|
||||
3. Remove gaps between table borders by default.
|
||||
*/
|
||||
|
||||
table {
|
||||
text-indent: 0;
|
||||
/* 1 */
|
||||
border-color: inherit;
|
||||
/* 2 */
|
||||
border-collapse: collapse;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Change the font styles in all browsers.
|
||||
2. Remove the margin in Firefox and Safari.
|
||||
3. Remove default padding in all browsers.
|
||||
*/
|
||||
|
||||
button,
|
||||
input,
|
||||
optgroup,
|
||||
select,
|
||||
textarea {
|
||||
font-family: inherit;
|
||||
/* 1 */
|
||||
font-size: 100%;
|
||||
/* 1 */
|
||||
font-weight: inherit;
|
||||
/* 1 */
|
||||
line-height: inherit;
|
||||
/* 1 */
|
||||
color: inherit;
|
||||
/* 1 */
|
||||
margin: 0;
|
||||
/* 2 */
|
||||
padding: 0;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inheritance of text transform in Edge and Firefox.
|
||||
*/
|
||||
|
||||
button,
|
||||
select {
|
||||
text-transform: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Remove default button styles.
|
||||
*/
|
||||
|
||||
button,
|
||||
[type='button'],
|
||||
[type='reset'],
|
||||
[type='submit'] {
|
||||
-webkit-appearance: button;
|
||||
/* 1 */
|
||||
background-color: transparent;
|
||||
/* 2 */
|
||||
background-image: none;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Use the modern Firefox focus style for all focusable elements.
|
||||
*/
|
||||
|
||||
:-moz-focusring {
|
||||
outline: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)
|
||||
*/
|
||||
|
||||
:-moz-ui-invalid {
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct vertical alignment in Chrome and Firefox.
|
||||
*/
|
||||
|
||||
progress {
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
/*
|
||||
Correct the cursor style of increment and decrement buttons in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-inner-spin-button,
|
||||
::-webkit-outer-spin-button {
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the odd appearance in Chrome and Safari.
|
||||
2. Correct the outline style in Safari.
|
||||
*/
|
||||
|
||||
[type='search'] {
|
||||
-webkit-appearance: textfield;
|
||||
/* 1 */
|
||||
outline-offset: -2px;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inner padding in Chrome and Safari on macOS.
|
||||
*/
|
||||
|
||||
::-webkit-search-decoration {
|
||||
-webkit-appearance: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Change font properties to `inherit` in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-file-upload-button {
|
||||
-webkit-appearance: button;
|
||||
/* 1 */
|
||||
font: inherit;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct display in Chrome and Safari.
|
||||
*/
|
||||
|
||||
summary {
|
||||
display: list-item;
|
||||
}
|
||||
|
||||
/*
|
||||
Removes the default spacing and border for appropriate elements.
|
||||
*/
|
||||
|
||||
blockquote,
|
||||
dl,
|
||||
dd,
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6,
|
||||
hr,
|
||||
figure,
|
||||
p,
|
||||
pre {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
fieldset {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
legend {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
ol,
|
||||
ul,
|
||||
menu {
|
||||
list-style: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent resizing textareas horizontally by default.
|
||||
*/
|
||||
|
||||
textarea {
|
||||
resize: vertical;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)
|
||||
2. Set the default placeholder color to the user's configured gray 400 color.
|
||||
*/
|
||||
|
||||
input::-moz-placeholder, textarea::-moz-placeholder {
|
||||
opacity: 1;
|
||||
/* 1 */
|
||||
color: #9ca3af;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
input::placeholder,
|
||||
textarea::placeholder {
|
||||
opacity: 1;
|
||||
/* 1 */
|
||||
color: #9ca3af;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Set the default cursor for buttons.
|
||||
*/
|
||||
|
||||
button,
|
||||
[role="button"] {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/*
|
||||
Make sure disabled buttons don't get the pointer cursor.
|
||||
*/
|
||||
|
||||
:disabled {
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)
|
||||
This can trigger a poorly considered lint error in some tools but is included by design.
|
||||
*/
|
||||
|
||||
img,
|
||||
svg,
|
||||
video,
|
||||
canvas,
|
||||
audio,
|
||||
iframe,
|
||||
embed,
|
||||
object {
|
||||
display: block;
|
||||
/* 1 */
|
||||
vertical-align: middle;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
*/
|
||||
|
||||
img,
|
||||
video {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/* Make elements with the HTML hidden attribute stay hidden by default */
|
||||
|
||||
[hidden] {
|
||||
display: none;
|
||||
}
|
||||
|
||||
*, ::before, ::after {
|
||||
--tw-border-spacing-x: 0;
|
||||
--tw-border-spacing-y: 0;
|
||||
--tw-translate-x: 0;
|
||||
--tw-translate-y: 0;
|
||||
--tw-rotate: 0;
|
||||
--tw-skew-x: 0;
|
||||
--tw-skew-y: 0;
|
||||
--tw-scale-x: 1;
|
||||
--tw-scale-y: 1;
|
||||
--tw-pan-x: ;
|
||||
--tw-pan-y: ;
|
||||
--tw-pinch-zoom: ;
|
||||
--tw-scroll-snap-strictness: proximity;
|
||||
--tw-ordinal: ;
|
||||
--tw-slashed-zero: ;
|
||||
--tw-numeric-figure: ;
|
||||
--tw-numeric-spacing: ;
|
||||
--tw-numeric-fraction: ;
|
||||
--tw-ring-inset: ;
|
||||
--tw-ring-offset-width: 0px;
|
||||
--tw-ring-offset-color: #fff;
|
||||
--tw-ring-color: rgb(59 130 246 / 0.5);
|
||||
--tw-ring-offset-shadow: 0 0 #0000;
|
||||
--tw-ring-shadow: 0 0 #0000;
|
||||
--tw-shadow: 0 0 #0000;
|
||||
--tw-shadow-colored: 0 0 #0000;
|
||||
--tw-blur: ;
|
||||
--tw-brightness: ;
|
||||
--tw-contrast: ;
|
||||
--tw-grayscale: ;
|
||||
--tw-hue-rotate: ;
|
||||
--tw-invert: ;
|
||||
--tw-saturate: ;
|
||||
--tw-sepia: ;
|
||||
--tw-drop-shadow: ;
|
||||
--tw-backdrop-blur: ;
|
||||
--tw-backdrop-brightness: ;
|
||||
--tw-backdrop-contrast: ;
|
||||
--tw-backdrop-grayscale: ;
|
||||
--tw-backdrop-hue-rotate: ;
|
||||
--tw-backdrop-invert: ;
|
||||
--tw-backdrop-opacity: ;
|
||||
--tw-backdrop-saturate: ;
|
||||
--tw-backdrop-sepia: ;
|
||||
}
|
||||
|
||||
::backdrop {
|
||||
--tw-border-spacing-x: 0;
|
||||
--tw-border-spacing-y: 0;
|
||||
--tw-translate-x: 0;
|
||||
--tw-translate-y: 0;
|
||||
--tw-rotate: 0;
|
||||
--tw-skew-x: 0;
|
||||
--tw-skew-y: 0;
|
||||
--tw-scale-x: 1;
|
||||
--tw-scale-y: 1;
|
||||
--tw-pan-x: ;
|
||||
--tw-pan-y: ;
|
||||
--tw-pinch-zoom: ;
|
||||
--tw-scroll-snap-strictness: proximity;
|
||||
--tw-ordinal: ;
|
||||
--tw-slashed-zero: ;
|
||||
--tw-numeric-figure: ;
|
||||
--tw-numeric-spacing: ;
|
||||
--tw-numeric-fraction: ;
|
||||
--tw-ring-inset: ;
|
||||
--tw-ring-offset-width: 0px;
|
||||
--tw-ring-offset-color: #fff;
|
||||
--tw-ring-color: rgb(59 130 246 / 0.5);
|
||||
--tw-ring-offset-shadow: 0 0 #0000;
|
||||
--tw-ring-shadow: 0 0 #0000;
|
||||
--tw-shadow: 0 0 #0000;
|
||||
--tw-shadow-colored: 0 0 #0000;
|
||||
--tw-blur: ;
|
||||
--tw-brightness: ;
|
||||
--tw-contrast: ;
|
||||
--tw-grayscale: ;
|
||||
--tw-hue-rotate: ;
|
||||
--tw-invert: ;
|
||||
--tw-saturate: ;
|
||||
--tw-sepia: ;
|
||||
--tw-drop-shadow: ;
|
||||
--tw-backdrop-blur: ;
|
||||
--tw-backdrop-brightness: ;
|
||||
--tw-backdrop-contrast: ;
|
||||
--tw-backdrop-grayscale: ;
|
||||
--tw-backdrop-hue-rotate: ;
|
||||
--tw-backdrop-invert: ;
|
||||
--tw-backdrop-opacity: ;
|
||||
--tw-backdrop-saturate: ;
|
||||
--tw-backdrop-sepia: ;
|
||||
}
|
||||
|
||||
.mb-2 {
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.ml-2 {
|
||||
margin-left: 0.5rem;
|
||||
}
|
||||
|
||||
.mr-2 {
|
||||
margin-right: 0.5rem;
|
||||
}
|
||||
|
||||
.mt-4 {
|
||||
margin-top: 1rem;
|
||||
}
|
||||
|
||||
.flex {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.w-\[26rem\] {
|
||||
width: 26rem;
|
||||
}
|
||||
|
||||
.w-full {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.flex-col {
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.items-center {
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.justify-between {
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
.self-start {
|
||||
align-self: flex-start;
|
||||
}
|
||||
|
||||
.self-end {
|
||||
align-self: flex-end;
|
||||
}
|
||||
|
||||
.rounded-lg {
|
||||
border-radius: 0.5rem;
|
||||
}
|
||||
|
||||
.bg-blue-500 {
|
||||
--tw-bg-opacity: 1;
|
||||
background-color: rgb(59 130 246 / var(--tw-bg-opacity));
|
||||
}
|
||||
|
||||
.bg-gray-200 {
|
||||
--tw-bg-opacity: 1;
|
||||
background-color: rgb(229 231 235 / var(--tw-bg-opacity));
|
||||
}
|
||||
|
||||
.bg-gray-900 {
|
||||
--tw-bg-opacity: 1;
|
||||
background-color: rgb(17 24 39 / var(--tw-bg-opacity));
|
||||
}
|
||||
|
||||
.bg-indigo-500 {
|
||||
--tw-bg-opacity: 1;
|
||||
background-color: rgb(99 102 241 / var(--tw-bg-opacity));
|
||||
}
|
||||
|
||||
.bg-white {
|
||||
--tw-bg-opacity: 1;
|
||||
background-color: rgb(255 255 255 / var(--tw-bg-opacity));
|
||||
}
|
||||
|
||||
.p-2 {
|
||||
padding: 0.5rem;
|
||||
}
|
||||
|
||||
.p-4 {
|
||||
padding: 1rem;
|
||||
}
|
||||
|
||||
.text-lg {
|
||||
font-size: 1.125rem;
|
||||
line-height: 1.75rem;
|
||||
}
|
||||
|
||||
.text-sm {
|
||||
font-size: 0.875rem;
|
||||
line-height: 1.25rem;
|
||||
}
|
||||
|
||||
.font-medium {
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.text-blue-500 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(59 130 246 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.text-gray-700 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(55 65 81 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.text-white {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(255 255 255 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.shadow {
|
||||
--tw-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
|
||||
--tw-shadow-colored: 0 1px 3px 0 var(--tw-shadow-color), 0 1px 2px -1px var(--tw-shadow-color);
|
||||
box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow);
|
||||
}
|
||||
|
||||
#chat-container {
|
||||
width: 500px;
|
||||
height: 450px;
|
||||
background-color: white;
|
||||
padding: 10px;
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
.bg-gray-200 {
|
||||
background-color: #edf2f7;
|
||||
}
|
||||
|
||||
.bg-gray-900 {
|
||||
background-color: #1a202c;
|
||||
}
|
||||
|
||||
.rounded-lg {
|
||||
border-radius: 0.5rem;
|
||||
}
|
||||
|
||||
.shadow {
|
||||
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12), 0 1px 2px rgba(0, 0, 0, 0.24);
|
||||
}
|
||||
|
||||
.text-gray-700 {
|
||||
color: #4a5568;
|
||||
}
|
||||
|
||||
.text-sm {
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.p-4 {
|
||||
padding: 1.5rem;
|
||||
}
|
||||
|
||||
.hover\:text-blue-800:hover {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(30 64 175 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "DocsGPT - Documentation AI butler",
|
||||
"version": "0.0.1",
|
||||
"manifest_version": 2,
|
||||
"manifest_version": 3,
|
||||
"description": "AI assistant for developers, that helps you answer your questions about the documentation you are reading.",
|
||||
"icons": {
|
||||
"16": "icons/icon16.png",
|
||||
@@ -10,12 +10,19 @@
|
||||
},
|
||||
"default_locale": "en",
|
||||
"background": {
|
||||
"page": "src/bg/background.html",
|
||||
"persistent": true
|
||||
"service_worker": "src/bg/service-worker.js"
|
||||
},
|
||||
"action": {
|
||||
"default_title": "DocsGPT - Documentation AI butler",
|
||||
"default_popup": "popup.html"
|
||||
},
|
||||
"permissions": ["activeTab", "storage"],
|
||||
"browser_action": {
|
||||
"default_popup": "popup.html"
|
||||
}
|
||||
"host_permissions": [
|
||||
"*://*/*"
|
||||
],
|
||||
"content_scripts": [{
|
||||
"js": ["popup.js"],
|
||||
"matches": ["https://github.com/*"]
|
||||
}]
|
||||
|
||||
}
|
||||
|
||||
@@ -1,4 +1,19 @@
|
||||
{
|
||||
"name": "docsgpt-chrome-extension",
|
||||
"version": "0.0.1",
|
||||
"description": "DocsGPT - Documentation AI butler",
|
||||
"main": "popup.js",
|
||||
"author": "",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
"dev": "npx tailwindcss -i ./styles.css -o ./dist/output.css --watch"
|
||||
},
|
||||
"keywords": [
|
||||
"DocsGPT",
|
||||
"Documentation",
|
||||
"Chrome",
|
||||
"extension"
|
||||
],
|
||||
"devDependencies": {
|
||||
"tailwindcss": "^3.2.4"
|
||||
}
|
||||
|
||||
@@ -21,12 +21,12 @@ document.getElementById("message-form").addEventListener("submit", function(even
|
||||
}
|
||||
|
||||
// send post request to server http://127.0.0.1:5000/ with message in json body
|
||||
fetch('http://127.0.0.1:5000/api/answer', {
|
||||
fetch('http://127.0.0.1:5001/api/answer', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({question: message}),
|
||||
body: JSON.stringify({question: message, history: null}),
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<script type="text/javascript" src="background.js"></script>
|
||||
</head>
|
||||
</html>
|
||||
@@ -1,22 +0,0 @@
|
||||
// if you checked "fancy-settings" in extensionizr.com, uncomment this lines
|
||||
|
||||
// var settings = new Store("settings", {
|
||||
// "sample_setting": "This is how you use Store.js to remember values"
|
||||
// });
|
||||
|
||||
|
||||
//example of using a message handler from the inject scripts
|
||||
chrome.extension.onMessage.addListener(
|
||||
function(request, sender, sendResponse) {
|
||||
chrome.pageAction.show(sender.tab.id);
|
||||
sendResponse();
|
||||
});
|
||||
|
||||
|
||||
chrome.runtime.onMessage.addListener(
|
||||
function(request, sender, sendResponse) {
|
||||
if (request.msg === "sendMessage") {
|
||||
sendResponse({response: "Message received"});
|
||||
}
|
||||
});
|
||||
|
||||
12
extensions/chrome/src/bg/service-worker.js
Normal file
12
extensions/chrome/src/bg/service-worker.js
Normal file
@@ -0,0 +1,12 @@
|
||||
// This is the service worker script, which executes in its own context
|
||||
// when the extension is installed or refreshed (or when you access its console).
|
||||
// It would correspond to the background script in chrome extensions v2.
|
||||
|
||||
console.log("This prints to the console of the service worker (background script)");
|
||||
chrome.runtime.onMessage.addListener(
|
||||
function(request, sender, sendResponse) {
|
||||
if (request.msg === "sendMessage") {
|
||||
sendResponse({response: "Message received"});
|
||||
}
|
||||
}
|
||||
);
|
||||
68
extensions/discord/bot.py
Normal file
68
extensions/discord/bot.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
import discord
|
||||
import requests
|
||||
from discord.ext import commands
|
||||
import dotenv
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# Replace 'YOUR_BOT_TOKEN' with your bot's token
|
||||
TOKEN = os.getenv("DISCORD_TOKEN")
|
||||
PREFIX = '@DocsGPT'
|
||||
BASE_API_URL = 'http://localhost:5001'
|
||||
|
||||
intents = discord.Intents.default()
|
||||
intents.message_content = True
|
||||
|
||||
bot = commands.Bot(command_prefix=PREFIX, intents=intents)
|
||||
|
||||
|
||||
def split_string(input_str):
|
||||
pattern = r'^<@!?{0}>\s*'.format(bot.user.id)
|
||||
match = re.match(pattern, input_str)
|
||||
if match:
|
||||
content = input_str[match.end():].strip()
|
||||
return str(bot.user.id), content
|
||||
return None, input_str
|
||||
|
||||
|
||||
@bot.event
|
||||
async def on_ready():
|
||||
print(f'{bot.user.name} has connected to Discord!')
|
||||
|
||||
|
||||
async def fetch_answer(question):
|
||||
data = {
|
||||
'sender': 'discord',
|
||||
'question': question,
|
||||
'history': ''
|
||||
}
|
||||
headers = {"Content-Type": "application/json",
|
||||
"Accept": "application/json"}
|
||||
response = requests.post(BASE_API_URL + '/api/answer', json=data, headers=headers)
|
||||
if response.status_code == 200:
|
||||
return response.json()['answer']
|
||||
return 'Sorry, I could not fetch the answer.'
|
||||
|
||||
|
||||
@bot.event
|
||||
async def on_message(message):
|
||||
if message.author == bot.user:
|
||||
return
|
||||
|
||||
content = message.content.strip()
|
||||
prefix, content = split_string(content)
|
||||
if prefix is None:
|
||||
return
|
||||
|
||||
part_prefix = str(bot.user.id)
|
||||
if part_prefix == prefix:
|
||||
answer = await fetch_answer(content)
|
||||
await message.channel.send(answer)
|
||||
|
||||
await bot.process_commands(message)
|
||||
|
||||
|
||||
bot.run(TOKEN)
|
||||
25
extensions/web-widget/README.md
Normal file
25
extensions/web-widget/README.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# Chat Widget
|
||||
|
||||
A simple chat widget that can be easily integrated into any website.
|
||||
|
||||
## Installation
|
||||
|
||||
1. Host the `widget.html`, `styles.css`, and `script.js` files from the `src` folder on your own server or a Content Delivery Network (CDN). Make sure to note the URLs for these files.
|
||||
|
||||
2. Update the URLs in the `dist/chat-widget.js` file to match the locations of your hosted files:
|
||||
|
||||
```javascript
|
||||
fetch("https://your-server-or-cdn.com/path/to/widget.html"),
|
||||
fetch("https://your-server-or-cdn.com/path/to/styles.css"),
|
||||
fetch("https://your-server-or-cdn.com/path/to/script.js"),
|
||||
```
|
||||
|
||||
3. Host the `dist/chat-widget.js` file on your own server or a Content Delivery Network (CDN). Make sure to note the URL for this file.
|
||||
|
||||
|
||||
##Integration
|
||||
|
||||
To integrate the chat widget into a website, add the following script tag to the HTML file, replacing URL_TO_CHAT_WIDGET_JS with the actual URL of your hosted chat-widget.js file:
|
||||
```javascript
|
||||
<script src="URL_TO_CHAT_WIDGET_JS"></script>
|
||||
```
|
||||
41
extensions/web-widget/dist/chat-widget.js
vendored
Normal file
41
extensions/web-widget/dist/chat-widget.js
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
(async function () {
|
||||
// Fetch the HTML, CSS, and JavaScript from your server or CDN
|
||||
const [htmlRes, jsRes] = await Promise.all([
|
||||
fetch("https://s3-eu-west-2.amazonaws.com/arc53data/widget.html"),
|
||||
// fetch("https://s3-eu-west-2.amazonaws.com/arc53data/tailwind.css"),
|
||||
fetch("https://s3-eu-west-2.amazonaws.com/arc53data/script.js"),
|
||||
]);
|
||||
|
||||
const html = await htmlRes.text();
|
||||
//const css = await cssRes.text();
|
||||
const js = await jsRes.text();
|
||||
|
||||
// create a new link element
|
||||
const link = document.createElement("link");
|
||||
|
||||
//set the rel, href, type, and integrity attributes
|
||||
link.rel = "stylesheet";
|
||||
link.href = "https://cdn.tailwindcss.com/";
|
||||
link.type = "text/css";
|
||||
link.integrity = "sha384-PDOmVviaTm8N1W35y1NSmo80w6GPaGhbDuOBAF/5hRffaeGc6yOwIo1qAt4gqLGA%";
|
||||
|
||||
// get the document head and append the link element to it
|
||||
// document.head.appendChild(link);
|
||||
|
||||
|
||||
|
||||
// Create a style element for the CSS
|
||||
// const style = document.createElement("style");
|
||||
// style.innerHTML = css;
|
||||
// document.head.appendChild(style);
|
||||
|
||||
// Create a container for the chat widget and inject the HTML
|
||||
const chatWidgetContainer = document.createElement("div");
|
||||
chatWidgetContainer.innerHTML = html;
|
||||
document.body.appendChild(chatWidgetContainer);
|
||||
|
||||
// Execute the JavaScript code
|
||||
const script = document.createElement("script");
|
||||
script.innerHTML = js;
|
||||
document.body.appendChild(script);
|
||||
})();
|
||||
807
extensions/web-widget/dist/output.css
vendored
Normal file
807
extensions/web-widget/dist/output.css
vendored
Normal file
@@ -0,0 +1,807 @@
|
||||
/*
|
||||
! tailwindcss v3.3.1 | MIT License | https://tailwindcss.com
|
||||
*/
|
||||
|
||||
/*
|
||||
1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)
|
||||
2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)
|
||||
*/
|
||||
|
||||
*,
|
||||
::before,
|
||||
::after {
|
||||
box-sizing: border-box;
|
||||
/* 1 */
|
||||
border-width: 0;
|
||||
/* 2 */
|
||||
border-style: solid;
|
||||
/* 2 */
|
||||
border-color: #e5e7eb;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
::before,
|
||||
::after {
|
||||
--tw-content: '';
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use a consistent sensible line-height in all browsers.
|
||||
2. Prevent adjustments of font size after orientation changes in iOS.
|
||||
3. Use a more readable tab size.
|
||||
4. Use the user's configured `sans` font-family by default.
|
||||
5. Use the user's configured `sans` font-feature-settings by default.
|
||||
6. Use the user's configured `sans` font-variation-settings by default.
|
||||
*/
|
||||
|
||||
html {
|
||||
line-height: 1.5;
|
||||
/* 1 */
|
||||
-webkit-text-size-adjust: 100%;
|
||||
/* 2 */
|
||||
-moz-tab-size: 4;
|
||||
/* 3 */
|
||||
-o-tab-size: 4;
|
||||
tab-size: 4;
|
||||
/* 3 */
|
||||
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
|
||||
/* 4 */
|
||||
font-feature-settings: normal;
|
||||
/* 5 */
|
||||
font-variation-settings: normal;
|
||||
/* 6 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove the margin in all browsers.
|
||||
2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.
|
||||
*/
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
/* 1 */
|
||||
line-height: inherit;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Add the correct height in Firefox.
|
||||
2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)
|
||||
3. Ensure horizontal rules are visible by default.
|
||||
*/
|
||||
|
||||
hr {
|
||||
height: 0;
|
||||
/* 1 */
|
||||
color: inherit;
|
||||
/* 2 */
|
||||
border-top-width: 1px;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct text decoration in Chrome, Edge, and Safari.
|
||||
*/
|
||||
|
||||
abbr:where([title]) {
|
||||
-webkit-text-decoration: underline dotted;
|
||||
text-decoration: underline dotted;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the default font size and weight for headings.
|
||||
*/
|
||||
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6 {
|
||||
font-size: inherit;
|
||||
font-weight: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Reset links to optimize for opt-in styling instead of opt-out.
|
||||
*/
|
||||
|
||||
a {
|
||||
color: inherit;
|
||||
text-decoration: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font weight in Edge and Safari.
|
||||
*/
|
||||
|
||||
b,
|
||||
strong {
|
||||
font-weight: bolder;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use the user's configured `mono` font family by default.
|
||||
2. Correct the odd `em` font sizing in all browsers.
|
||||
*/
|
||||
|
||||
code,
|
||||
kbd,
|
||||
samp,
|
||||
pre {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
|
||||
/* 1 */
|
||||
font-size: 1em;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font size in all browsers.
|
||||
*/
|
||||
|
||||
small {
|
||||
font-size: 80%;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent `sub` and `sup` elements from affecting the line height in all browsers.
|
||||
*/
|
||||
|
||||
sub,
|
||||
sup {
|
||||
font-size: 75%;
|
||||
line-height: 0;
|
||||
position: relative;
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
sub {
|
||||
bottom: -0.25em;
|
||||
}
|
||||
|
||||
sup {
|
||||
top: -0.5em;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)
|
||||
2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)
|
||||
3. Remove gaps between table borders by default.
|
||||
*/
|
||||
|
||||
table {
|
||||
text-indent: 0;
|
||||
/* 1 */
|
||||
border-color: inherit;
|
||||
/* 2 */
|
||||
border-collapse: collapse;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Change the font styles in all browsers.
|
||||
2. Remove the margin in Firefox and Safari.
|
||||
3. Remove default padding in all browsers.
|
||||
*/
|
||||
|
||||
button,
|
||||
input,
|
||||
optgroup,
|
||||
select,
|
||||
textarea {
|
||||
font-family: inherit;
|
||||
/* 1 */
|
||||
font-size: 100%;
|
||||
/* 1 */
|
||||
font-weight: inherit;
|
||||
/* 1 */
|
||||
line-height: inherit;
|
||||
/* 1 */
|
||||
color: inherit;
|
||||
/* 1 */
|
||||
margin: 0;
|
||||
/* 2 */
|
||||
padding: 0;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inheritance of text transform in Edge and Firefox.
|
||||
*/
|
||||
|
||||
button,
|
||||
select {
|
||||
text-transform: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Remove default button styles.
|
||||
*/
|
||||
|
||||
button,
|
||||
[type='button'],
|
||||
[type='reset'],
|
||||
[type='submit'] {
|
||||
-webkit-appearance: button;
|
||||
/* 1 */
|
||||
background-color: transparent;
|
||||
/* 2 */
|
||||
background-image: none;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Use the modern Firefox focus style for all focusable elements.
|
||||
*/
|
||||
|
||||
:-moz-focusring {
|
||||
outline: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)
|
||||
*/
|
||||
|
||||
:-moz-ui-invalid {
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct vertical alignment in Chrome and Firefox.
|
||||
*/
|
||||
|
||||
progress {
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
/*
|
||||
Correct the cursor style of increment and decrement buttons in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-inner-spin-button,
|
||||
::-webkit-outer-spin-button {
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the odd appearance in Chrome and Safari.
|
||||
2. Correct the outline style in Safari.
|
||||
*/
|
||||
|
||||
[type='search'] {
|
||||
-webkit-appearance: textfield;
|
||||
/* 1 */
|
||||
outline-offset: -2px;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inner padding in Chrome and Safari on macOS.
|
||||
*/
|
||||
|
||||
::-webkit-search-decoration {
|
||||
-webkit-appearance: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Change font properties to `inherit` in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-file-upload-button {
|
||||
-webkit-appearance: button;
|
||||
/* 1 */
|
||||
font: inherit;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct display in Chrome and Safari.
|
||||
*/
|
||||
|
||||
summary {
|
||||
display: list-item;
|
||||
}
|
||||
|
||||
/*
|
||||
Removes the default spacing and border for appropriate elements.
|
||||
*/
|
||||
|
||||
blockquote,
|
||||
dl,
|
||||
dd,
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6,
|
||||
hr,
|
||||
figure,
|
||||
p,
|
||||
pre {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
fieldset {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
legend {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
ol,
|
||||
ul,
|
||||
menu {
|
||||
list-style: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent resizing textareas horizontally by default.
|
||||
*/
|
||||
|
||||
textarea {
|
||||
resize: vertical;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)
|
||||
2. Set the default placeholder color to the user's configured gray 400 color.
|
||||
*/
|
||||
|
||||
input::-moz-placeholder, textarea::-moz-placeholder {
|
||||
opacity: 1;
|
||||
/* 1 */
|
||||
color: #9ca3af;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
input::placeholder,
|
||||
textarea::placeholder {
|
||||
opacity: 1;
|
||||
/* 1 */
|
||||
color: #9ca3af;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Set the default cursor for buttons.
|
||||
*/
|
||||
|
||||
button,
|
||||
[role="button"] {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/*
|
||||
Make sure disabled buttons don't get the pointer cursor.
|
||||
*/
|
||||
|
||||
:disabled {
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)
|
||||
This can trigger a poorly considered lint error in some tools but is included by design.
|
||||
*/
|
||||
|
||||
img,
|
||||
svg,
|
||||
video,
|
||||
canvas,
|
||||
audio,
|
||||
iframe,
|
||||
embed,
|
||||
object {
|
||||
display: block;
|
||||
/* 1 */
|
||||
vertical-align: middle;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
*/
|
||||
|
||||
img,
|
||||
video {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/* Make elements with the HTML hidden attribute stay hidden by default */
|
||||
|
||||
[hidden] {
|
||||
display: none;
|
||||
}
|
||||
|
||||
*, ::before, ::after {
|
||||
--tw-border-spacing-x: 0;
|
||||
--tw-border-spacing-y: 0;
|
||||
--tw-translate-x: 0;
|
||||
--tw-translate-y: 0;
|
||||
--tw-rotate: 0;
|
||||
--tw-skew-x: 0;
|
||||
--tw-skew-y: 0;
|
||||
--tw-scale-x: 1;
|
||||
--tw-scale-y: 1;
|
||||
--tw-pan-x: ;
|
||||
--tw-pan-y: ;
|
||||
--tw-pinch-zoom: ;
|
||||
--tw-scroll-snap-strictness: proximity;
|
||||
--tw-ordinal: ;
|
||||
--tw-slashed-zero: ;
|
||||
--tw-numeric-figure: ;
|
||||
--tw-numeric-spacing: ;
|
||||
--tw-numeric-fraction: ;
|
||||
--tw-ring-inset: ;
|
||||
--tw-ring-offset-width: 0px;
|
||||
--tw-ring-offset-color: #fff;
|
||||
--tw-ring-color: rgb(59 130 246 / 0.5);
|
||||
--tw-ring-offset-shadow: 0 0 #0000;
|
||||
--tw-ring-shadow: 0 0 #0000;
|
||||
--tw-shadow: 0 0 #0000;
|
||||
--tw-shadow-colored: 0 0 #0000;
|
||||
--tw-blur: ;
|
||||
--tw-brightness: ;
|
||||
--tw-contrast: ;
|
||||
--tw-grayscale: ;
|
||||
--tw-hue-rotate: ;
|
||||
--tw-invert: ;
|
||||
--tw-saturate: ;
|
||||
--tw-sepia: ;
|
||||
--tw-drop-shadow: ;
|
||||
--tw-backdrop-blur: ;
|
||||
--tw-backdrop-brightness: ;
|
||||
--tw-backdrop-contrast: ;
|
||||
--tw-backdrop-grayscale: ;
|
||||
--tw-backdrop-hue-rotate: ;
|
||||
--tw-backdrop-invert: ;
|
||||
--tw-backdrop-opacity: ;
|
||||
--tw-backdrop-saturate: ;
|
||||
--tw-backdrop-sepia: ;
|
||||
}
|
||||
|
||||
::backdrop {
|
||||
--tw-border-spacing-x: 0;
|
||||
--tw-border-spacing-y: 0;
|
||||
--tw-translate-x: 0;
|
||||
--tw-translate-y: 0;
|
||||
--tw-rotate: 0;
|
||||
--tw-skew-x: 0;
|
||||
--tw-skew-y: 0;
|
||||
--tw-scale-x: 1;
|
||||
--tw-scale-y: 1;
|
||||
--tw-pan-x: ;
|
||||
--tw-pan-y: ;
|
||||
--tw-pinch-zoom: ;
|
||||
--tw-scroll-snap-strictness: proximity;
|
||||
--tw-ordinal: ;
|
||||
--tw-slashed-zero: ;
|
||||
--tw-numeric-figure: ;
|
||||
--tw-numeric-spacing: ;
|
||||
--tw-numeric-fraction: ;
|
||||
--tw-ring-inset: ;
|
||||
--tw-ring-offset-width: 0px;
|
||||
--tw-ring-offset-color: #fff;
|
||||
--tw-ring-color: rgb(59 130 246 / 0.5);
|
||||
--tw-ring-offset-shadow: 0 0 #0000;
|
||||
--tw-ring-shadow: 0 0 #0000;
|
||||
--tw-shadow: 0 0 #0000;
|
||||
--tw-shadow-colored: 0 0 #0000;
|
||||
--tw-blur: ;
|
||||
--tw-brightness: ;
|
||||
--tw-contrast: ;
|
||||
--tw-grayscale: ;
|
||||
--tw-hue-rotate: ;
|
||||
--tw-invert: ;
|
||||
--tw-saturate: ;
|
||||
--tw-sepia: ;
|
||||
--tw-drop-shadow: ;
|
||||
--tw-backdrop-blur: ;
|
||||
--tw-backdrop-brightness: ;
|
||||
--tw-backdrop-contrast: ;
|
||||
--tw-backdrop-grayscale: ;
|
||||
--tw-backdrop-hue-rotate: ;
|
||||
--tw-backdrop-invert: ;
|
||||
--tw-backdrop-opacity: ;
|
||||
--tw-backdrop-saturate: ;
|
||||
--tw-backdrop-sepia: ;
|
||||
}
|
||||
|
||||
.fixed {
|
||||
position: fixed;
|
||||
}
|
||||
|
||||
.absolute {
|
||||
position: absolute;
|
||||
}
|
||||
|
||||
.relative {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.inset-y-0 {
|
||||
top: 0px;
|
||||
bottom: 0px;
|
||||
}
|
||||
|
||||
.bottom-5 {
|
||||
bottom: 1.25rem;
|
||||
}
|
||||
|
||||
.left-5 {
|
||||
left: 1.25rem;
|
||||
}
|
||||
|
||||
.right-2 {
|
||||
right: 0.5rem;
|
||||
}
|
||||
|
||||
.z-50 {
|
||||
z-index: 50;
|
||||
}
|
||||
|
||||
.m-0 {
|
||||
margin: 0px;
|
||||
}
|
||||
|
||||
.-mx-2 {
|
||||
margin-left: -0.5rem;
|
||||
margin-right: -0.5rem;
|
||||
}
|
||||
|
||||
.mt-1 {
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
|
||||
.flex {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.hidden {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.w-full {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.flex-1 {
|
||||
flex: 1 1 0%;
|
||||
}
|
||||
|
||||
.transform {
|
||||
transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));
|
||||
}
|
||||
|
||||
.items-center {
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.justify-center {
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.gap-2 {
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.divide-y > :not([hidden]) ~ :not([hidden]) {
|
||||
--tw-divide-y-reverse: 0;
|
||||
border-top-width: calc(1px * calc(1 - var(--tw-divide-y-reverse)));
|
||||
border-bottom-width: calc(1px * var(--tw-divide-y-reverse));
|
||||
}
|
||||
|
||||
.rounded-md {
|
||||
border-radius: 0.375rem;
|
||||
}
|
||||
|
||||
.rounded-b {
|
||||
border-bottom-right-radius: 0.25rem;
|
||||
border-bottom-left-radius: 0.25rem;
|
||||
}
|
||||
|
||||
.border {
|
||||
border-width: 1px;
|
||||
}
|
||||
|
||||
.bg-transparent {
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
.bg-gradient-to-br {
|
||||
background-image: linear-gradient(to bottom right, var(--tw-gradient-stops));
|
||||
}
|
||||
|
||||
.from-gray-100\/80 {
|
||||
--tw-gradient-from: rgb(243 244 246 / 0.8) var(--tw-gradient-from-position);
|
||||
--tw-gradient-from-position: ;
|
||||
--tw-gradient-to: rgb(243 244 246 / 0) var(--tw-gradient-from-position);
|
||||
--tw-gradient-to-position: ;
|
||||
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
|
||||
}
|
||||
|
||||
.via-white {
|
||||
--tw-gradient-via-position: ;
|
||||
--tw-gradient-to: rgb(255 255 255 / 0) var(--tw-gradient-to-position);
|
||||
--tw-gradient-to-position: ;
|
||||
--tw-gradient-stops: var(--tw-gradient-from), #fff var(--tw-gradient-via-position), var(--tw-gradient-to);
|
||||
}
|
||||
|
||||
.to-white {
|
||||
--tw-gradient-to: #fff var(--tw-gradient-to-position);
|
||||
--tw-gradient-to-position: ;
|
||||
}
|
||||
|
||||
.p-3 {
|
||||
padding: 0.75rem;
|
||||
}
|
||||
|
||||
.px-2 {
|
||||
padding-left: 0.5rem;
|
||||
padding-right: 0.5rem;
|
||||
}
|
||||
|
||||
.px-5 {
|
||||
padding-left: 1.25rem;
|
||||
padding-right: 1.25rem;
|
||||
}
|
||||
|
||||
.py-3 {
|
||||
padding-top: 0.75rem;
|
||||
padding-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
.pl-5 {
|
||||
padding-left: 1.25rem;
|
||||
}
|
||||
|
||||
.pr-8 {
|
||||
padding-right: 2rem;
|
||||
}
|
||||
|
||||
.font-sans {
|
||||
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
|
||||
}
|
||||
|
||||
.text-sm {
|
||||
font-size: 0.875rem;
|
||||
line-height: 1.25rem;
|
||||
}
|
||||
|
||||
.text-xs {
|
||||
font-size: 0.75rem;
|
||||
line-height: 1rem;
|
||||
}
|
||||
|
||||
.font-bold {
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.text-gray-400 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(156 163 175 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.text-gray-600 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(75 85 99 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.text-gray-700 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(55 65 81 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.text-gray-800 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(31 41 55 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.shadow {
|
||||
--tw-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
|
||||
--tw-shadow-colored: 0 1px 3px 0 var(--tw-shadow-color), 0 1px 2px -1px var(--tw-shadow-color);
|
||||
box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow);
|
||||
}
|
||||
|
||||
.backdrop-blur-sm {
|
||||
--tw-backdrop-blur: blur(4px);
|
||||
-webkit-backdrop-filter: var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);
|
||||
backdrop-filter: var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);
|
||||
}
|
||||
|
||||
.transition {
|
||||
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, -webkit-backdrop-filter;
|
||||
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, backdrop-filter;
|
||||
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, backdrop-filter, -webkit-backdrop-filter;
|
||||
transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
|
||||
transition-duration: 150ms;
|
||||
}
|
||||
|
||||
.delay-200 {
|
||||
transition-delay: 200ms;
|
||||
}
|
||||
|
||||
.duration-300 {
|
||||
transition-duration: 300ms;
|
||||
}
|
||||
|
||||
.hover\:bg-gray-100:hover {
|
||||
--tw-bg-opacity: 1;
|
||||
background-color: rgb(243 244 246 / var(--tw-bg-opacity));
|
||||
}
|
||||
|
||||
.focus\:outline-none:focus {
|
||||
outline: 2px solid transparent;
|
||||
outline-offset: 2px;
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
.dark\:divide-gray-700 > :not([hidden]) ~ :not([hidden]) {
|
||||
--tw-divide-opacity: 1;
|
||||
border-color: rgb(55 65 81 / var(--tw-divide-opacity));
|
||||
}
|
||||
|
||||
.dark\:border-gray-700 {
|
||||
--tw-border-opacity: 1;
|
||||
border-color: rgb(55 65 81 / var(--tw-border-opacity));
|
||||
}
|
||||
|
||||
.dark\:from-gray-900\/80 {
|
||||
--tw-gradient-from: rgb(17 24 39 / 0.8) var(--tw-gradient-from-position);
|
||||
--tw-gradient-from-position: ;
|
||||
--tw-gradient-to: rgb(17 24 39 / 0) var(--tw-gradient-from-position);
|
||||
--tw-gradient-to-position: ;
|
||||
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
|
||||
}
|
||||
|
||||
.dark\:via-gray-900 {
|
||||
--tw-gradient-via-position: ;
|
||||
--tw-gradient-to: rgb(17 24 39 / 0) var(--tw-gradient-to-position);
|
||||
--tw-gradient-to-position: ;
|
||||
--tw-gradient-stops: var(--tw-gradient-from), #111827 var(--tw-gradient-via-position), var(--tw-gradient-to);
|
||||
}
|
||||
|
||||
.dark\:to-gray-900 {
|
||||
--tw-gradient-to: #111827 var(--tw-gradient-to-position);
|
||||
--tw-gradient-to-position: ;
|
||||
}
|
||||
|
||||
.dark\:text-gray-200 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(229 231 235 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.dark\:text-gray-300 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(209 213 219 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.dark\:text-gray-500 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(107 114 128 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.dark\:text-white {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(255 255 255 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.dark\:hover\:bg-gray-800\/70:hover {
|
||||
background-color: rgb(31 41 55 / 0.7);
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 768px) {
|
||||
.md\:pl-0 {
|
||||
padding-left: 0px;
|
||||
}
|
||||
}
|
||||
12
extensions/web-widget/index.html
Normal file
12
extensions/web-widget/index.html
Normal file
@@ -0,0 +1,12 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Chat Widget Test</title>
|
||||
<link href="dist/output.css" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
<script src="dist/chat-widget.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
1002
extensions/web-widget/package-lock.json
generated
Normal file
1002
extensions/web-widget/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
15
extensions/web-widget/package.json
Normal file
15
extensions/web-widget/package.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"name": "web-widget",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"tailwindcss": "^3.3.1"
|
||||
}
|
||||
}
|
||||
58
extensions/web-widget/src/html/widget.html
Normal file
58
extensions/web-widget/src/html/widget.html
Normal file
@@ -0,0 +1,58 @@
|
||||
<div id="docsgpt-widget" class="dark fixed bottom-5 left-5 pl-5 md:pl-0 z-50">
|
||||
<style>
|
||||
@keyframes dotBounce {
|
||||
0%, 80%, 100% {
|
||||
transform: translateY(0);
|
||||
}
|
||||
40% {
|
||||
transform: translateY(-5px);
|
||||
}
|
||||
}
|
||||
|
||||
.dot-animation {
|
||||
display: inline-block;
|
||||
animation: dotBounce 1s infinite ease-in-out;
|
||||
}
|
||||
|
||||
.delay-200 {
|
||||
animation-delay: 200ms;
|
||||
}
|
||||
|
||||
.delay-400 {
|
||||
animation-delay: 400ms;
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
<div class="divide-y dark:divide-gray-700 rounded-md border dark:border-gray-700 bg-gradient-to-br from-gray-100/80 via-white to-white dark:from-gray-900/80 dark:via-gray-900 dark:to-gray-900 font-sans shadow backdrop-blur-sm" style="width: 18rem; transform: translateY(0%) translateZ(0px);"><div>
|
||||
<div class="flex items-center gap-2 p-3">
|
||||
<div id="docsgpt-init-message" class="flex-1">
|
||||
<h3 class="text-sm font-bold text-gray-700 dark:text-gray-200">Looking for help with documentation?</h3>
|
||||
<p class="mt-1 text-xs text-gray-400 dark:text-gray-500">DocsGPT AI assistant will help you with docs</p>
|
||||
</div>
|
||||
<div id="docsgpt-answer" class="hidden">
|
||||
<p class="mt-1 text-xs text-gray-600 dark:text-gray-300">Come cool answer</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<div class="w-full">
|
||||
<button id="ask-docsgpt" class="flex w-full justify-center px-5 py-3 text-sm text-gray-800 font-bold dark:text-white transition duration-300 hover:bg-gray-100 rounded-b dark:hover:bg-gray-800/70">
|
||||
Ask DocsGPT
|
||||
</button>
|
||||
|
||||
<form id="docsgpt-chat-form" class="relative w-full m-0 hidden" style="opacity: 1;" data-projection-id="1">
|
||||
<input id="docsgpt-chat-input" type="text" class="w-full bg-transparent px-5 py-3 pr-8 text-sm text-gray-700 dark:text-white focus:outline-none" placeholder="What do you want to do?" value="">
|
||||
<button class="absolute inset-y-0 right-2 -mx-2 px-2" type="submit" style="opacity: 0;" data-projection-id="2">
|
||||
|
||||
</button>
|
||||
</form>
|
||||
<p id="docsgpt-chat-processing" class="hidden flex w-full justify-center px-5 py-3 text-sm text-gray-800 font-bold dark:text-white transition duration-300 rounded-b animate-fadeIn animate-2s">
|
||||
Processing<span class="dot-animation">.</span><span class="dot-animation delay-200">.</span><span class="dot-animation delay-400">.</span>
|
||||
</p>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
3
extensions/web-widget/src/input.css
Normal file
3
extensions/web-widget/src/input.css
Normal file
@@ -0,0 +1,3 @@
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
56
extensions/web-widget/src/js/script.js
Normal file
56
extensions/web-widget/src/js/script.js
Normal file
@@ -0,0 +1,56 @@
|
||||
const API_ENDPOINT = "http://localhost:5001/api/answer"; // Replace with your API endpoint
|
||||
|
||||
const widgetInitMessage = document.getElementById("docsgpt-init-message");
|
||||
const widgetAnswerMessage = document.getElementById("docsgpt-answer");
|
||||
const widgetAnswerMessageP = widgetAnswerMessage.querySelector("p");
|
||||
const askDocsGPTButton = document.getElementById("ask-docsgpt");
|
||||
const chatInput = document.getElementById("docsgpt-chat-input");
|
||||
const chatForm = document.getElementById("docsgpt-chat-form");
|
||||
const chatProcessing = document.getElementById("docsgpt-chat-processing");
|
||||
|
||||
async function sendMessage(message) {
|
||||
const requestData = {
|
||||
"question": message,
|
||||
"active_docs": "default",
|
||||
"api_key": "token",
|
||||
"embeddings_key": "token",
|
||||
"model": "default",
|
||||
"history": null,
|
||||
}
|
||||
const response = await fetch(API_ENDPOINT, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(requestData),
|
||||
});
|
||||
const data = await response.json();
|
||||
return data.answer;
|
||||
}
|
||||
|
||||
askDocsGPTButton.addEventListener("click", () => {
|
||||
askDocsGPTButton.classList.add("hidden");
|
||||
chatForm.classList.remove("hidden");
|
||||
chatForm.focus();
|
||||
widgetInitMessage.classList.remove("hidden");
|
||||
widgetAnswerMessage.classList.add("hidden");
|
||||
|
||||
|
||||
});
|
||||
|
||||
chatForm.addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const message = chatInput.value.trim();
|
||||
if (!message) return;
|
||||
|
||||
chatInput.value = "";
|
||||
chatForm.classList.add("hidden");
|
||||
chatProcessing.classList.remove("hidden");
|
||||
|
||||
const reply = await sendMessage(message);
|
||||
chatProcessing.classList.add("hidden");
|
||||
|
||||
// inside <p> tag
|
||||
widgetAnswerMessageP.innerHTML = reply;
|
||||
widgetAnswerMessage.classList.remove("hidden");
|
||||
widgetInitMessage.classList.add("hidden");
|
||||
askDocsGPTButton.classList.remove("hidden");
|
||||
});
|
||||
10
extensions/web-widget/tailwind.config.js
Normal file
10
extensions/web-widget/tailwind.config.js
Normal file
@@ -0,0 +1,10 @@
|
||||
/** @type {import('tailwindcss').Config} */
|
||||
module.exports = {
|
||||
content: ["./src/**/*.{html,js}"],
|
||||
theme: {
|
||||
extend: {},
|
||||
},
|
||||
plugins: [],
|
||||
}
|
||||
|
||||
|
||||
2
frontend/.env.development
Normal file
2
frontend/.env.development
Normal file
@@ -0,0 +1,2 @@
|
||||
# Please put appropriate value
|
||||
VITE_API_HOST=http://localhost:5001
|
||||
1
frontend/.env.production
Normal file
1
frontend/.env.production
Normal file
@@ -0,0 +1 @@
|
||||
VITE_API_HOST = https://gptcloud.arc53.com
|
||||
@@ -4,6 +4,7 @@
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>DocsGPT 🦖</title>
|
||||
<link rel="shortcut icon" type="image/x-icon" href="/favicon.ico" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="root" class="h-screen"></div>
|
||||
|
||||
6127
frontend/package-lock.json
generated
6127
frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -23,12 +23,16 @@
|
||||
"@vercel/analytics": "^0.1.10",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-dropzone": "^14.2.3",
|
||||
"react-markdown": "^8.0.7",
|
||||
"react-redux": "^8.0.5",
|
||||
"react-router-dom": "^6.8.1"
|
||||
"react-router-dom": "^6.8.1",
|
||||
"react-syntax-highlighter": "^15.5.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/react": "^18.0.27",
|
||||
"@types/react-dom": "^18.0.10",
|
||||
"@types/react-syntax-highlighter": "^15.5.6",
|
||||
"@typescript-eslint/eslint-plugin": "^5.51.0",
|
||||
"@typescript-eslint/parser": "^5.51.0",
|
||||
"@vitejs/plugin-react": "^3.1.0",
|
||||
@@ -49,6 +53,7 @@
|
||||
"prettier-plugin-tailwindcss": "^0.2.2",
|
||||
"tailwindcss": "^3.2.4",
|
||||
"typescript": "^4.9.5",
|
||||
"vite": "^4.1.0"
|
||||
"vite": "^4.1.0",
|
||||
"vite-plugin-svgr": "^2.4.0"
|
||||
}
|
||||
}
|
||||
|
||||
BIN
frontend/public/favicon.ico
Normal file
BIN
frontend/public/favicon.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 15 KiB |
@@ -1,12 +1,14 @@
|
||||
import { useRef, useState } from 'react';
|
||||
import { useEffect, useRef, useState } from 'react';
|
||||
import { NavLink } from 'react-router-dom';
|
||||
import Arrow1 from './assets/arrow.svg';
|
||||
import Arrow2 from './assets/dropdown-arrow.svg';
|
||||
import Exit from './assets/exit.svg';
|
||||
import Message from './assets/message.svg';
|
||||
import Hamburger from './assets/hamburger.svg';
|
||||
import Key from './assets/key.svg';
|
||||
import Info from './assets/info.svg';
|
||||
import Link from './assets/link.svg';
|
||||
import UploadIcon from './assets/upload.svg';
|
||||
import { ActiveState } from './models/misc';
|
||||
import APIKeyModal from './preferences/APIKeyModal';
|
||||
import SelectDocsModal from './preferences/SelectDocsModal';
|
||||
@@ -19,6 +21,8 @@ import {
|
||||
setSelectedDocs,
|
||||
} from './preferences/preferenceSlice';
|
||||
import { useOutsideAlerter } from './hooks';
|
||||
import Upload from './upload/Upload';
|
||||
import { Doc } from './preferences/preferenceApi';
|
||||
|
||||
export default function Navigation({
|
||||
navState,
|
||||
@@ -34,15 +38,35 @@ export default function Navigation({
|
||||
const [isDocsListOpen, setIsDocsListOpen] = useState(false);
|
||||
|
||||
const isApiKeySet = useSelector(selectApiKeyStatus);
|
||||
const [apiKeyModalState, setApiKeyModalState] = useState<ActiveState>(
|
||||
isApiKeySet ? 'INACTIVE' : 'ACTIVE',
|
||||
);
|
||||
const [apiKeyModalState, setApiKeyModalState] =
|
||||
useState<ActiveState>('INACTIVE');
|
||||
|
||||
const isSelectedDocsSet = useSelector(selectSelectedDocsStatus);
|
||||
const [selectedDocsModalState, setSelectedDocsModalState] =
|
||||
useState<ActiveState>(isSelectedDocsSet ? 'INACTIVE' : 'ACTIVE');
|
||||
|
||||
const [uploadModalState, setUploadModalState] =
|
||||
useState<ActiveState>('INACTIVE');
|
||||
|
||||
const navRef = useRef(null);
|
||||
const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
|
||||
|
||||
const handleDeleteClick = (index: number, doc: Doc) => {
|
||||
const docPath = 'indexes/' + 'local' + '/' + doc.name;
|
||||
|
||||
fetch(`${apiHost}/api/delete_old?path=${docPath}`, {
|
||||
method: 'GET',
|
||||
})
|
||||
.then(() => {
|
||||
// remove the image element from the DOM
|
||||
const imageElement = document.querySelector(
|
||||
`#img-${index}`,
|
||||
) as HTMLElement;
|
||||
const parentElement = imageElement.parentNode as HTMLElement;
|
||||
parentElement.parentNode?.removeChild(parentElement);
|
||||
})
|
||||
.catch((error) => console.error(error));
|
||||
};
|
||||
useOutsideAlerter(
|
||||
navRef,
|
||||
() => {
|
||||
@@ -58,6 +82,19 @@ export default function Navigation({
|
||||
[navState, isDocsListOpen, apiKeyModalState],
|
||||
);
|
||||
|
||||
/*
|
||||
Needed to fix bug where if mobile nav was closed and then window was resized to desktop, nav would still be closed but the button to open would be gone, as per #1 on issue #146
|
||||
*/
|
||||
useEffect(() => {
|
||||
window.addEventListener('resize', () => {
|
||||
if (window.matchMedia('(min-width: 768px)').matches) {
|
||||
setNavState('ACTIVE');
|
||||
} else {
|
||||
setNavState('INACTIVE');
|
||||
}
|
||||
});
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<>
|
||||
<div
|
||||
@@ -66,7 +103,7 @@ export default function Navigation({
|
||||
navState === 'INACTIVE' && '-ml-96 md:-ml-[14rem]'
|
||||
} duration-20 fixed z-20 flex h-full w-72 flex-col border-r-2 bg-gray-50 transition-all`}
|
||||
>
|
||||
<div className={'h-16 w-full border-b-2'}>
|
||||
<div className={'visible h-16 w-full border-b-2 md:hidden'}>
|
||||
<button
|
||||
className="float-right mr-5 mt-5 h-5 w-5"
|
||||
onClick={() =>
|
||||
@@ -95,8 +132,8 @@ export default function Navigation({
|
||||
</NavLink>
|
||||
|
||||
<div className="flex-grow border-b-2 border-gray-100"></div>
|
||||
<div className="flex flex-grow flex-col-reverse border-b-2">
|
||||
<div className="relative my-4 px-6">
|
||||
<div className="flex flex-col-reverse border-b-2">
|
||||
<div className="relative my-4 flex gap-2 px-2">
|
||||
<div
|
||||
className="flex h-12 w-full cursor-pointer justify-between rounded-md border-2 bg-white"
|
||||
onClick={() => setIsDocsListOpen(!isDocsListOpen)}
|
||||
@@ -110,15 +147,20 @@ export default function Navigation({
|
||||
src={Arrow2}
|
||||
alt="arrow"
|
||||
className={`${
|
||||
isDocsListOpen ? 'rotate-0' : '-rotate-90'
|
||||
isDocsListOpen ? 'rotate-0' : 'rotate-180'
|
||||
} mr-3 w-3 transition-all`}
|
||||
/>
|
||||
</div>
|
||||
<img
|
||||
className="mt-2 h-9 w-9 hover:cursor-pointer"
|
||||
src={UploadIcon}
|
||||
onClick={() => setUploadModalState('ACTIVE')}
|
||||
></img>
|
||||
{isDocsListOpen && (
|
||||
<div className="absolute top-12 left-0 right-0 mx-6 max-h-52 overflow-y-scroll bg-white shadow-lg">
|
||||
<div className="absolute top-12 left-0 right-6 ml-2 mr-4 max-h-52 overflow-y-scroll bg-white shadow-lg">
|
||||
{docs ? (
|
||||
docs.map((doc, index) => {
|
||||
if (doc.model) {
|
||||
if (doc.model === 'openai_text-embedding-ada-002') {
|
||||
return (
|
||||
<div
|
||||
key={index}
|
||||
@@ -126,11 +168,23 @@ export default function Navigation({
|
||||
dispatch(setSelectedDocs(doc));
|
||||
setIsDocsListOpen(false);
|
||||
}}
|
||||
className="h-10 w-full cursor-pointer border-x-2 border-b-2 hover:bg-gray-100"
|
||||
className="flex h-10 w-full cursor-pointer items-center justify-between border-x-2 border-b-2 hover:bg-gray-100"
|
||||
>
|
||||
<p className="ml-5 py-3">
|
||||
<p className="ml-5 flex-1 overflow-hidden overflow-ellipsis whitespace-nowrap py-3">
|
||||
{doc.name} {doc.version}
|
||||
</p>
|
||||
{doc.location === 'local' ? (
|
||||
<img
|
||||
src={Exit}
|
||||
alt="Exit"
|
||||
className="mr-4 h-3 w-3 cursor-pointer hover:opacity-50"
|
||||
id={`img-${index}`}
|
||||
onClick={(event) => {
|
||||
event.stopPropagation();
|
||||
handleDeleteClick(index, doc);
|
||||
}}
|
||||
/>
|
||||
) : null}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -143,7 +197,7 @@ export default function Navigation({
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<p className="ml-6 font-bold text-jet">Source Docs</p>
|
||||
<p className="ml-6 mt-3 font-bold text-jet">Source Docs</p>
|
||||
</div>
|
||||
<div className="flex flex-col gap-2 border-b-2 py-2">
|
||||
<div
|
||||
@@ -209,6 +263,10 @@ export default function Navigation({
|
||||
setModalState={setApiKeyModalState}
|
||||
isCancellable={isApiKeySet}
|
||||
/>
|
||||
<Upload
|
||||
modalState={uploadModalState}
|
||||
setModalState={setUploadModalState}
|
||||
></Upload>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
4
frontend/src/assets/dislike.svg
Normal file
4
frontend/src/assets/dislike.svg
Normal file
@@ -0,0 +1,4 @@
|
||||
<svg width="16" height="16" viewBox="0 0 16 16" fill="current" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M6.37776 10.1001V12.9C6.37776 13.457 6.599 13.9911 6.99282 14.3849C7.38664 14.7788 7.92077 15 8.47772 15L11.2777 8.70011V1.00025H3.38181C3.04419 0.996436 2.71656 1.11477 2.45929 1.33344C2.20203 1.55212 2.03246 1.8564 1.98184 2.19023L1.01585 8.49012C0.985398 8.69076 0.998931 8.89563 1.05551 9.09053C1.1121 9.28543 1.21038 9.46569 1.34355 9.61884C1.47671 9.77198 1.64159 9.89434 1.82674 9.97744C2.01189 10.0605 2.2129 10.1024 2.41583 10.1001H6.37776ZM11.2777 1.00025H13.1466C13.5428 0.993247 13.9277 1.13195 14.2284 1.39002C14.5291 1.64809 14.7245 2.00758 14.7776 2.40023V7.30014C14.7245 7.69279 14.5291 8.05227 14.2284 8.31035C13.9277 8.56842 13.5428 8.70712 13.1466 8.70011H11.2777" fill="white"/>
|
||||
<path d="M11.2777 8.70011L8.47772 15C7.92077 15 7.38664 14.7788 6.99282 14.3849C6.599 13.9911 6.37776 13.457 6.37776 12.9V10.1001H2.41583C2.2129 10.1024 2.01189 10.0605 1.82674 9.97744C1.64159 9.89434 1.47671 9.77198 1.34355 9.61884C1.21038 9.46569 1.1121 9.28543 1.05551 9.09053C0.998931 8.89563 0.985398 8.69076 1.01585 8.49012L1.98184 2.19023C2.03246 1.8564 2.20203 1.55212 2.45929 1.33344C2.71656 1.11477 3.04419 0.996436 3.38181 1.00025H11.2777M11.2777 8.70011V1.00025M11.2777 8.70011H13.1466C13.5428 8.70712 13.9277 8.56842 14.2284 8.31035C14.5291 8.05227 14.7245 7.69279 14.7776 7.30014V2.40023C14.7245 2.00758 14.5291 1.64809 14.2284 1.39002C13.9277 1.13195 13.5428 0.993247 13.1466 1.00025H11.2777" stroke="current" stroke-width="1.4" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.6 KiB |
4
frontend/src/assets/like.svg
Normal file
4
frontend/src/assets/like.svg
Normal file
@@ -0,0 +1,4 @@
|
||||
<svg width="16" height="16" viewBox="0 0 16 16" fill="current" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M9.39995 5.89997V3.09999C9.39995 2.54304 9.1787 2.0089 8.78487 1.61507C8.39105 1.22125 7.85691 1 7.29996 1L4.49998 7.29996V14.9999H12.3959C12.7336 15.0037 13.0612 14.8854 13.3185 14.6667C13.5757 14.448 13.7453 14.1437 13.7959 13.8099L14.7619 7.50996C14.7924 7.30931 14.7788 7.10444 14.7222 6.90954C14.6657 6.71464 14.5674 6.53437 14.4342 6.38123C14.301 6.22808 14.1362 6.10572 13.951 6.02262C13.7659 5.93952 13.5649 5.89767 13.3619 5.89997H9.39995ZM4.49998 14.9999H2.39999C2.02869 14.9999 1.6726 14.8524 1.41005 14.5899C1.1475 14.3273 1 13.9712 1 13.5999V8.69995C1 8.32865 1.1475 7.97256 1.41005 7.71001C1.6726 7.44746 2.02869 7.29996 2.39999 7.29996H4.49998" fill="white"/>
|
||||
<path d="M4.49998 7.29996L7.29996 1C7.85691 1 8.39105 1.22125 8.78487 1.61507C9.1787 2.0089 9.39995 2.54304 9.39995 3.09999V5.89997H13.3619C13.5649 5.89767 13.7659 5.93952 13.951 6.02262C14.1362 6.10572 14.301 6.22808 14.4342 6.38123C14.5674 6.53437 14.6657 6.71464 14.7223 6.90954C14.7788 7.10444 14.7924 7.30931 14.7619 7.50996L13.7959 13.8099C13.7453 14.1437 13.5757 14.448 13.3185 14.6667C13.0612 14.8854 12.7336 15.0037 12.3959 14.9999H4.49998M4.49998 7.29996V14.9999M4.49998 7.29996H2.39999C2.02869 7.29996 1.6726 7.44746 1.41005 7.71001C1.1475 7.97256 1 8.32865 1 8.69995V13.5999C1 13.9712 1.1475 14.3273 1.41005 14.5899C1.6726 14.8524 2.02869 14.9999 2.39999 14.9999H4.49998" stroke="current" stroke-width="1.39999" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.5 KiB |
3
frontend/src/assets/upload.svg
Normal file
3
frontend/src/assets/upload.svg
Normal file
@@ -0,0 +1,3 @@
|
||||
<svg width="24" height="16" viewBox="0 0 24 16" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M19.35 6.04C18.67 2.59 15.64 0 12 0C9.11 0 6.6 1.64 5.35 4.04C2.34 4.36 0 6.91 0 10C0 13.31 2.69 16 6 16H19C21.76 16 24 13.76 24 11C24 8.36 21.95 6.22 19.35 6.04ZM14 9V13H10V9H7L12 4L17 9H14Z" fill="black" fill-opacity="0.54"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 340 B |
@@ -1,19 +1,22 @@
|
||||
import { useEffect, useRef } from 'react';
|
||||
import { Fragment, useEffect, useRef } from 'react';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
import Hero from '../Hero';
|
||||
import { AppDispatch } from '../store';
|
||||
import ConversationBubble from './ConversationBubble';
|
||||
import {
|
||||
addMessage,
|
||||
addQuery,
|
||||
fetchAnswer,
|
||||
selectConversation,
|
||||
selectQueries,
|
||||
selectStatus,
|
||||
updateQuery,
|
||||
} from './conversationSlice';
|
||||
import Send from './../assets/send.svg';
|
||||
import Spinner from './../assets/spinner.svg';
|
||||
import { FEEDBACK, Query } from './conversationModels';
|
||||
import { sendFeedback } from './conversationApi';
|
||||
|
||||
export default function Conversation() {
|
||||
const messages = useSelector(selectConversation);
|
||||
const queries = useSelector(selectQueries);
|
||||
const status = useSelector(selectStatus);
|
||||
const dispatch = useDispatch<AppDispatch>();
|
||||
const endMessageRef = useRef<HTMLDivElement>(null);
|
||||
@@ -21,35 +24,74 @@ export default function Conversation() {
|
||||
|
||||
useEffect(
|
||||
() => endMessageRef?.current?.scrollIntoView({ behavior: 'smooth' }),
|
||||
[messages],
|
||||
[queries.length, queries[queries.length - 1]],
|
||||
);
|
||||
|
||||
const handleQuestion = (question: string) => {
|
||||
dispatch(addMessage({ text: question, type: 'QUESTION' }));
|
||||
dispatch(addQuery({ prompt: question }));
|
||||
dispatch(fetchAnswer({ question }));
|
||||
};
|
||||
|
||||
const handleFeedback = (query: Query, feedback: FEEDBACK, index: number) => {
|
||||
const prevFeedback = query.feedback;
|
||||
dispatch(updateQuery({ index, query: { feedback } }));
|
||||
sendFeedback(query.prompt, query.response!, feedback).catch(() =>
|
||||
dispatch(updateQuery({ index, query: { feedback: prevFeedback } })),
|
||||
);
|
||||
};
|
||||
|
||||
const prepResponseView = (query: Query, index: number) => {
|
||||
let responseView;
|
||||
if (query.error) {
|
||||
responseView = (
|
||||
<ConversationBubble
|
||||
ref={endMessageRef}
|
||||
className={`${index === queries.length - 1 ? 'mb-24' : 'mb-7'}`}
|
||||
key={`${index}ERROR`}
|
||||
message={query.error}
|
||||
type="ERROR"
|
||||
></ConversationBubble>
|
||||
);
|
||||
} else if (query.response) {
|
||||
responseView = (
|
||||
<ConversationBubble
|
||||
ref={endMessageRef}
|
||||
className={`${index === queries.length - 1 ? 'mb-24' : 'mb-7'}`}
|
||||
key={`${index}ANSWER`}
|
||||
message={query.response}
|
||||
type={'ANSWER'}
|
||||
feedback={query.feedback}
|
||||
handleFeedback={(feedback: FEEDBACK) =>
|
||||
handleFeedback(query, feedback, index)
|
||||
}
|
||||
></ConversationBubble>
|
||||
);
|
||||
}
|
||||
return responseView;
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex justify-center p-6">
|
||||
{messages.length > 0 && (
|
||||
<div className="mt-20 flex w-10/12 flex-col transition-all md:w-1/2">
|
||||
{messages.map((message, index) => {
|
||||
<div className="flex justify-center p-4">
|
||||
{queries.length > 0 && (
|
||||
<div className="mt-20 flex flex-col transition-all md:w-3/4">
|
||||
{queries.map((query, index) => {
|
||||
return (
|
||||
<ConversationBubble
|
||||
ref={index === messages.length - 1 ? endMessageRef : null}
|
||||
className={`${
|
||||
index === messages.length - 1 ? 'mb-24' : 'mb-7'
|
||||
}`}
|
||||
key={index}
|
||||
message={message.text}
|
||||
type={message.type}
|
||||
></ConversationBubble>
|
||||
<Fragment key={index}>
|
||||
<ConversationBubble
|
||||
ref={endMessageRef}
|
||||
className={'mb-7'}
|
||||
key={`${index}QUESTION`}
|
||||
message={query.prompt}
|
||||
type="QUESTION"
|
||||
></ConversationBubble>
|
||||
{prepResponseView(query, index)}
|
||||
</Fragment>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
{messages.length === 0 && <Hero className="mt-24 md:mt-52"></Hero>}
|
||||
<div className="fixed bottom-6 flex w-10/12 flex-col items-end self-center md:w-[50%]">
|
||||
{queries.length === 0 && <Hero className="mt-24 md:mt-52"></Hero>}
|
||||
<div className="fixed bottom-0 flex w-10/12 flex-col items-end self-center md:w-[50%]">
|
||||
<div className="flex w-full">
|
||||
<div
|
||||
ref={inputRef}
|
||||
@@ -85,7 +127,7 @@ export default function Conversation() {
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
<p className="mt-3 w-10/12 self-center text-center text-xs text-gray-2000">
|
||||
<p className="w-[100vw] self-center bg-white p-5 text-center text-xs text-gray-2000">
|
||||
This is a chatbot that uses the GPT-3, Faiss and LangChain to answer
|
||||
questions.
|
||||
</p>
|
||||
|
||||
@@ -1,51 +1,137 @@
|
||||
import { forwardRef } from 'react';
|
||||
import { forwardRef, useState } from 'react';
|
||||
import Avatar from '../Avatar';
|
||||
import { MESSAGE_TYPE } from './conversationModels';
|
||||
import { FEEDBACK, MESSAGE_TYPE } from './conversationModels';
|
||||
import Alert from './../assets/alert.svg';
|
||||
import { ReactComponent as Like } from './../assets/like.svg';
|
||||
import { ReactComponent as Dislike } from './../assets/dislike.svg';
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
|
||||
import { vscDarkPlus } from 'react-syntax-highlighter/dist/cjs/styles/prism';
|
||||
|
||||
const ConversationBubble = forwardRef<
|
||||
HTMLDivElement,
|
||||
{
|
||||
message: string;
|
||||
type: MESSAGE_TYPE;
|
||||
className: string;
|
||||
className?: string;
|
||||
feedback?: FEEDBACK;
|
||||
handleFeedback?: (feedback: FEEDBACK) => void;
|
||||
}
|
||||
>(function ConversationBubble({ message, type, className }, ref) {
|
||||
return (
|
||||
<div
|
||||
ref={ref}
|
||||
className={`flex ${
|
||||
type === 'ANSWER'
|
||||
? 'self-start '
|
||||
: type === 'ERROR'
|
||||
? 'self-start'
|
||||
: 'flex-row-reverse self-end '
|
||||
} ${className}`}
|
||||
>
|
||||
<Avatar
|
||||
className="mt-4 text-2xl"
|
||||
avatar={type === 'QUESTION' ? '🧑💻' : '🦖'}
|
||||
></Avatar>
|
||||
<div
|
||||
className={`${
|
||||
type === 'QUESTION'
|
||||
? ' mr-2 ml-10 bg-blue-1000 py-5 px-5 text-white'
|
||||
: ' ml-2 mr-10 bg-gray-1000 py-5 px-5'
|
||||
} flex items-center rounded-3xl ${
|
||||
type === 'ERROR'
|
||||
? 'rounded-lg border border-red-2000 bg-red-1000 p-2 text-red-3000'
|
||||
: ''
|
||||
}`}
|
||||
>
|
||||
{type === 'ERROR' && (
|
||||
<img src={Alert} alt="alert" className="mr-2 inline" />
|
||||
)}
|
||||
<p className="whitespace-pre-wrap break-words">{message}</p>
|
||||
>(function ConversationBubble(
|
||||
{ message, type, className, feedback, handleFeedback },
|
||||
ref,
|
||||
) {
|
||||
const [showFeedback, setShowFeedback] = useState(false);
|
||||
const List = ({
|
||||
ordered,
|
||||
children,
|
||||
}: {
|
||||
ordered?: boolean;
|
||||
children: React.ReactNode;
|
||||
}) => {
|
||||
const Tag = ordered ? 'ol' : 'ul';
|
||||
return <Tag className="list-inside list-disc">{children}</Tag>;
|
||||
};
|
||||
let bubble;
|
||||
|
||||
if (type === 'QUESTION') {
|
||||
bubble = (
|
||||
<div ref={ref} className={`flex flex-row-reverse self-end ${className}`}>
|
||||
<Avatar className="mt-4 text-2xl" avatar="🧑💻"></Avatar>
|
||||
<div className="mr-2 ml-10 flex items-center rounded-3xl bg-blue-1000 p-3.5 text-white">
|
||||
<ReactMarkdown className="whitespace-pre-wrap break-words">
|
||||
{message}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
);
|
||||
} else {
|
||||
bubble = (
|
||||
<div
|
||||
ref={ref}
|
||||
className={`flex self-start ${className}`}
|
||||
onMouseEnter={() => setShowFeedback(true)}
|
||||
onMouseLeave={() => setShowFeedback(false)}
|
||||
>
|
||||
<Avatar className="mt-4 text-2xl" avatar="🦖"></Avatar>
|
||||
<div
|
||||
className={`ml-2 mr-5 flex items-center rounded-3xl bg-gray-1000 p-3.5 ${
|
||||
type === 'ERROR'
|
||||
? ' rounded-lg border border-red-2000 bg-red-1000 p-2 text-red-3000'
|
||||
: ''
|
||||
}`}
|
||||
>
|
||||
{type === 'ERROR' && (
|
||||
<img src={Alert} alt="alert" className="mr-2 inline" />
|
||||
)}
|
||||
<ReactMarkdown
|
||||
className="whitespace-pre-wrap break-words"
|
||||
components={{
|
||||
code({ node, inline, className, children, ...props }) {
|
||||
const match = /language-(\w+)/.exec(className || '');
|
||||
|
||||
return !inline && match ? (
|
||||
<SyntaxHighlighter
|
||||
PreTag="div"
|
||||
language={match[1]}
|
||||
{...props}
|
||||
style={vscDarkPlus}
|
||||
>
|
||||
{String(children).replace(/\n$/, '')}
|
||||
</SyntaxHighlighter>
|
||||
) : (
|
||||
<code className={className ? className : ''} {...props}>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
},
|
||||
ul({ node, children }) {
|
||||
return <List>{children}</List>;
|
||||
},
|
||||
ol({ node, children }) {
|
||||
return <List ordered>{children}</List>;
|
||||
},
|
||||
}}
|
||||
>
|
||||
{message}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
<div
|
||||
className={`mr-2 flex items-center justify-center ${
|
||||
feedback === 'LIKE' || (type !== 'ERROR' && showFeedback)
|
||||
? ''
|
||||
: 'md:invisible'
|
||||
}`}
|
||||
>
|
||||
<Like
|
||||
className={`cursor-pointer ${
|
||||
feedback === 'LIKE'
|
||||
? 'fill-blue-1000 stroke-blue-1000'
|
||||
: 'fill-none stroke-gray-4000 hover:fill-gray-4000'
|
||||
}`}
|
||||
onClick={() => handleFeedback?.('LIKE')}
|
||||
></Like>
|
||||
</div>
|
||||
<div
|
||||
className={`mr-10 flex items-center justify-center ${
|
||||
feedback === 'DISLIKE' || (type !== 'ERROR' && showFeedback)
|
||||
? ''
|
||||
: 'md:invisible'
|
||||
}`}
|
||||
>
|
||||
<Dislike
|
||||
className={`cursor-pointer ${
|
||||
feedback === 'DISLIKE'
|
||||
? 'fill-red-2000 stroke-red-2000'
|
||||
: 'fill-none stroke-gray-4000 hover:fill-gray-4000'
|
||||
}`}
|
||||
onClick={() => handleFeedback?.('DISLIKE')}
|
||||
></Dislike>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
return bubble;
|
||||
});
|
||||
|
||||
export default ConversationBubble;
|
||||
|
||||
// TODO : split question and answer into two diff JSX
|
||||
|
||||
@@ -1,29 +1,33 @@
|
||||
import { Answer } from './conversationModels';
|
||||
import { Answer, FEEDBACK } from './conversationModels';
|
||||
import { Doc } from '../preferences/preferenceApi';
|
||||
|
||||
const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
|
||||
|
||||
export function fetchAnswerApi(
|
||||
question: string,
|
||||
apiKey: string,
|
||||
selectedDocs: Doc,
|
||||
history: Array<any> = [],
|
||||
): Promise<Answer> {
|
||||
let namePath = selectedDocs.name;
|
||||
if (selectedDocs.language === namePath) {
|
||||
namePath = '.project';
|
||||
}
|
||||
|
||||
const docPath =
|
||||
selectedDocs.name === 'default'
|
||||
? 'default'
|
||||
: selectedDocs.language +
|
||||
'/' +
|
||||
namePath +
|
||||
'/' +
|
||||
selectedDocs.version +
|
||||
'/' +
|
||||
selectedDocs.model +
|
||||
'/';
|
||||
|
||||
const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
|
||||
let docPath = 'default';
|
||||
if (selectedDocs.location === 'local') {
|
||||
docPath = 'local' + '/' + selectedDocs.name + '/';
|
||||
} else if (selectedDocs.location === 'remote') {
|
||||
docPath =
|
||||
selectedDocs.language +
|
||||
'/' +
|
||||
namePath +
|
||||
'/' +
|
||||
selectedDocs.version +
|
||||
'/' +
|
||||
selectedDocs.model +
|
||||
'/';
|
||||
}
|
||||
|
||||
return fetch(apiHost + '/api/answer', {
|
||||
method: 'POST',
|
||||
@@ -34,7 +38,7 @@ export function fetchAnswerApi(
|
||||
question: question,
|
||||
api_key: apiKey,
|
||||
embeddings_key: apiKey,
|
||||
history: localStorage.getItem('chatHistory'),
|
||||
history: history,
|
||||
active_docs: docPath,
|
||||
}),
|
||||
})
|
||||
@@ -42,7 +46,7 @@ export function fetchAnswerApi(
|
||||
if (response.ok) {
|
||||
return response.json();
|
||||
} else {
|
||||
Promise.reject(response);
|
||||
return Promise.reject(new Error(response.statusText));
|
||||
}
|
||||
})
|
||||
.then((data) => {
|
||||
@@ -51,8 +55,72 @@ export function fetchAnswerApi(
|
||||
});
|
||||
}
|
||||
|
||||
function getRandomInt(min: number, max: number) {
|
||||
min = Math.ceil(min);
|
||||
max = Math.floor(max);
|
||||
return Math.floor(Math.random() * (max - min) + min); // The maximum is exclusive and the minimum is inclusive
|
||||
export function fetchAnswerSteaming(
|
||||
question: string,
|
||||
apiKey: string,
|
||||
selectedDocs: Doc,
|
||||
history: Array<any> = [],
|
||||
onEvent: (event: MessageEvent) => void,
|
||||
): Promise<Answer> {
|
||||
let namePath = selectedDocs.name;
|
||||
if (selectedDocs.language === namePath) {
|
||||
namePath = '.project';
|
||||
}
|
||||
|
||||
let docPath = 'default';
|
||||
if (selectedDocs.location === 'local') {
|
||||
docPath = 'local' + '/' + selectedDocs.name + '/';
|
||||
} else if (selectedDocs.location === 'remote') {
|
||||
docPath =
|
||||
selectedDocs.language +
|
||||
'/' +
|
||||
namePath +
|
||||
'/' +
|
||||
selectedDocs.version +
|
||||
'/' +
|
||||
selectedDocs.model +
|
||||
'/';
|
||||
}
|
||||
|
||||
return new Promise<Answer>((resolve, reject) => {
|
||||
const url = new URL(apiHost + '/stream');
|
||||
url.searchParams.append('question', question);
|
||||
url.searchParams.append('api_key', apiKey);
|
||||
url.searchParams.append('embeddings_key', apiKey);
|
||||
url.searchParams.append('active_docs', docPath);
|
||||
url.searchParams.append('history', JSON.stringify(history));
|
||||
|
||||
const eventSource = new EventSource(url.href);
|
||||
|
||||
eventSource.onmessage = onEvent;
|
||||
|
||||
eventSource.onerror = (error) => {
|
||||
console.log('Connection failed.');
|
||||
eventSource.close();
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export function sendFeedback(
|
||||
prompt: string,
|
||||
response: string,
|
||||
feedback: FEEDBACK,
|
||||
) {
|
||||
return fetch(`${apiHost}/api/feedback`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
question: prompt,
|
||||
answer: response,
|
||||
feedback: feedback,
|
||||
}),
|
||||
}).then((response) => {
|
||||
if (response.ok) {
|
||||
return Promise.resolve();
|
||||
} else {
|
||||
return Promise.reject();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
export type MESSAGE_TYPE = 'QUESTION' | 'ANSWER' | 'ERROR';
|
||||
export type Status = 'idle' | 'loading' | 'failed';
|
||||
export type FEEDBACK = 'LIKE' | 'DISLIKE';
|
||||
|
||||
export interface Message {
|
||||
text: string;
|
||||
@@ -7,7 +8,7 @@ export interface Message {
|
||||
}
|
||||
|
||||
export interface ConversationState {
|
||||
conversation: Message[];
|
||||
queries: Query[];
|
||||
status: Status;
|
||||
}
|
||||
|
||||
@@ -16,3 +17,10 @@ export interface Answer {
|
||||
query: string;
|
||||
result: string;
|
||||
}
|
||||
|
||||
export interface Query {
|
||||
prompt: string;
|
||||
response?: string;
|
||||
feedback?: FEEDBACK;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
@@ -1,34 +1,100 @@
|
||||
import { createAsyncThunk, createSlice, PayloadAction } from '@reduxjs/toolkit';
|
||||
import store from '../store';
|
||||
import { fetchAnswerApi } from './conversationApi';
|
||||
import { Answer, ConversationState, Message } from './conversationModels';
|
||||
import { fetchAnswerApi, fetchAnswerSteaming } from './conversationApi';
|
||||
import { Answer, ConversationState, Query, Status } from './conversationModels';
|
||||
|
||||
const initialState: ConversationState = {
|
||||
conversation: [],
|
||||
queries: [],
|
||||
status: 'idle',
|
||||
};
|
||||
|
||||
export const fetchAnswer = createAsyncThunk<
|
||||
Answer,
|
||||
{ question: string },
|
||||
{ state: RootState }
|
||||
>('fetchAnswer', async ({ question }, { getState }) => {
|
||||
const state = getState();
|
||||
const API_STREAMING = import.meta.env.VITE_API_STREAMING === 'true';
|
||||
|
||||
const answer = await fetchAnswerApi(
|
||||
question,
|
||||
state.preference.apiKey,
|
||||
state.preference.selectedDocs!,
|
||||
);
|
||||
return answer;
|
||||
});
|
||||
export const fetchAnswer = createAsyncThunk<Answer, { question: string }>(
|
||||
'fetchAnswer',
|
||||
async ({ question }, { dispatch, getState }) => {
|
||||
const state = getState() as RootState;
|
||||
if (state.preference) {
|
||||
if (API_STREAMING) {
|
||||
await fetchAnswerSteaming(
|
||||
question,
|
||||
state.preference.apiKey,
|
||||
state.preference.selectedDocs!,
|
||||
state.conversation.queries,
|
||||
(event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
|
||||
// check if the 'end' event has been received
|
||||
if (data.type === 'end') {
|
||||
// set status to 'idle'
|
||||
dispatch(conversationSlice.actions.setStatus('idle'));
|
||||
} else {
|
||||
const result = data.answer;
|
||||
dispatch(
|
||||
updateStreamingQuery({
|
||||
index: state.conversation.queries.length - 1,
|
||||
query: { response: result },
|
||||
}),
|
||||
);
|
||||
}
|
||||
},
|
||||
);
|
||||
} else {
|
||||
const answer = await fetchAnswerApi(
|
||||
question,
|
||||
state.preference.apiKey,
|
||||
state.preference.selectedDocs!,
|
||||
state.conversation.queries,
|
||||
);
|
||||
if (answer) {
|
||||
dispatch(
|
||||
updateQuery({
|
||||
index: state.conversation.queries.length - 1,
|
||||
query: { response: answer.answer },
|
||||
}),
|
||||
);
|
||||
dispatch(conversationSlice.actions.setStatus('idle'));
|
||||
}
|
||||
}
|
||||
}
|
||||
return { answer: '', query: question, result: '' };
|
||||
},
|
||||
);
|
||||
|
||||
export const conversationSlice = createSlice({
|
||||
name: 'conversation',
|
||||
initialState,
|
||||
reducers: {
|
||||
addMessage(state, action: PayloadAction<Message>) {
|
||||
state.conversation.push(action.payload);
|
||||
addQuery(state, action: PayloadAction<Query>) {
|
||||
state.queries.push(action.payload);
|
||||
},
|
||||
updateStreamingQuery(
|
||||
state,
|
||||
action: PayloadAction<{ index: number; query: Partial<Query> }>,
|
||||
) {
|
||||
const index = action.payload.index;
|
||||
if (action.payload.query.response) {
|
||||
state.queries[index].response =
|
||||
(state.queries[index].response || '') + action.payload.query.response;
|
||||
} else {
|
||||
state.queries[index] = {
|
||||
...state.queries[index],
|
||||
...action.payload.query,
|
||||
};
|
||||
}
|
||||
},
|
||||
updateQuery(
|
||||
state,
|
||||
action: PayloadAction<{ index: number; query: Partial<Query> }>,
|
||||
) {
|
||||
const index = action.payload.index;
|
||||
state.queries[index] = {
|
||||
...state.queries[index],
|
||||
...action.payload.query,
|
||||
};
|
||||
},
|
||||
setStatus(state, action: PayloadAction<Status>) {
|
||||
state.status = action.payload;
|
||||
},
|
||||
},
|
||||
extraReducers(builder) {
|
||||
@@ -36,29 +102,20 @@ export const conversationSlice = createSlice({
|
||||
.addCase(fetchAnswer.pending, (state) => {
|
||||
state.status = 'loading';
|
||||
})
|
||||
.addCase(fetchAnswer.fulfilled, (state, action) => {
|
||||
state.status = 'idle';
|
||||
state.conversation.push({
|
||||
text: action.payload.answer,
|
||||
type: 'ANSWER',
|
||||
});
|
||||
})
|
||||
.addCase(fetchAnswer.rejected, (state, action) => {
|
||||
state.status = 'failed';
|
||||
state.conversation.push({
|
||||
text: 'Something went wrong. Please try again later.',
|
||||
type: 'ERROR',
|
||||
});
|
||||
state.queries[state.queries.length - 1].error =
|
||||
'Something went wrong. Please try again later.';
|
||||
});
|
||||
},
|
||||
});
|
||||
|
||||
type RootState = ReturnType<typeof store.getState>;
|
||||
|
||||
export const selectConversation = (state: RootState) =>
|
||||
state.conversation.conversation;
|
||||
export const selectQueries = (state: RootState) => state.conversation.queries;
|
||||
|
||||
export const selectStatus = (state: RootState) => state.conversation.status;
|
||||
|
||||
export const { addMessage } = conversationSlice.actions;
|
||||
export const { addQuery, updateQuery, updateStreamingQuery } =
|
||||
conversationSlice.actions;
|
||||
export default conversationSlice.reducer;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
// not all properties in Doc are going to be present. Make some optional
|
||||
export type Doc = {
|
||||
location: string;
|
||||
name: string;
|
||||
language: string;
|
||||
version: string;
|
||||
@@ -13,9 +14,10 @@ export type Doc = {
|
||||
//Fetches all JSON objects from the source. We only use the objects with the "model" property in SelectDocsModal.tsx. Hopefully can clean up the source file later.
|
||||
export async function getDocs(): Promise<Doc[] | null> {
|
||||
try {
|
||||
const response = await fetch(
|
||||
'https://d3dg1063dc54p9.cloudfront.net/combined.json',
|
||||
);
|
||||
const apiHost =
|
||||
import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
|
||||
|
||||
const response = await fetch(apiHost + '/api/combine');
|
||||
const data = await response.json();
|
||||
|
||||
const docs: Doc[] = [];
|
||||
@@ -52,17 +54,13 @@ export function setLocalRecentDocs(doc: Doc): void {
|
||||
namePath = '.project';
|
||||
}
|
||||
|
||||
const docPath =
|
||||
doc.name === 'default'
|
||||
? 'default'
|
||||
: doc.language +
|
||||
'/' +
|
||||
namePath +
|
||||
'/' +
|
||||
doc.version +
|
||||
'/' +
|
||||
doc.model +
|
||||
'/';
|
||||
let docPath = 'default';
|
||||
if (doc.location === 'local') {
|
||||
docPath = 'local' + '/' + doc.name + '/';
|
||||
} else if (doc.location === 'remote') {
|
||||
docPath =
|
||||
doc.language + '/' + namePath + '/' + doc.version + '/' + doc.model + '/';
|
||||
}
|
||||
const apiHost = import.meta.env.VITE_API_HOST || 'https://docsapi.arc53.com';
|
||||
fetch(apiHost + '/api/docs_check', {
|
||||
method: 'POST',
|
||||
|
||||
@@ -13,8 +13,18 @@ interface Preference {
|
||||
}
|
||||
|
||||
const initialState: Preference = {
|
||||
apiKey: '',
|
||||
selectedDocs: null,
|
||||
apiKey: 'xxx',
|
||||
selectedDocs: {
|
||||
name: 'default',
|
||||
language: 'default',
|
||||
location: 'default',
|
||||
version: 'default',
|
||||
description: 'default',
|
||||
fullName: 'default',
|
||||
dat: 'default',
|
||||
docLink: 'default',
|
||||
model: 'openai_text-embedding-ada-002',
|
||||
} as Doc,
|
||||
sourceDocs: null,
|
||||
};
|
||||
|
||||
@@ -29,7 +39,7 @@ export const prefSlice = createSlice({
|
||||
state.selectedDocs = action.payload;
|
||||
},
|
||||
setSourceDocs: (state, action) => {
|
||||
state.sourceDocs?.push(...action.payload);
|
||||
state.sourceDocs = action.payload;
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
@@ -15,6 +15,7 @@ const store = configureStore({
|
||||
selectedDocs: doc !== null ? JSON.parse(doc) : null,
|
||||
sourceDocs: [
|
||||
{
|
||||
location: '',
|
||||
language: '',
|
||||
name: 'default',
|
||||
version: '',
|
||||
|
||||
238
frontend/src/upload/Upload.tsx
Normal file
238
frontend/src/upload/Upload.tsx
Normal file
@@ -0,0 +1,238 @@
|
||||
import React from 'react';
|
||||
import { useCallback, useEffect, useState } from 'react';
|
||||
import { useDropzone } from 'react-dropzone';
|
||||
import { useDispatch } from 'react-redux';
|
||||
import { ActiveState } from '../models/misc';
|
||||
import { getDocs } from '../preferences/preferenceApi';
|
||||
import { setSourceDocs } from '../preferences/preferenceSlice';
|
||||
|
||||
export default function Upload({
|
||||
modalState,
|
||||
setModalState,
|
||||
}: {
|
||||
modalState: ActiveState;
|
||||
setModalState: (state: ActiveState) => void;
|
||||
}) {
|
||||
const [docName, setDocName] = useState('');
|
||||
const [files, setfiles] = useState<File[]>([]);
|
||||
const [progress, setProgress] = useState<{
|
||||
type: 'UPLOAD' | 'TRAINIING';
|
||||
percentage: number;
|
||||
taskId?: string;
|
||||
failed?: boolean;
|
||||
}>();
|
||||
|
||||
function Progress({
|
||||
title,
|
||||
isCancellable = false,
|
||||
isFailed = false,
|
||||
}: {
|
||||
title: string;
|
||||
isCancellable?: boolean;
|
||||
isFailed?: boolean;
|
||||
}) {
|
||||
return (
|
||||
<div className="mt-5 flex flex-col items-center gap-2">
|
||||
<p className="text-xl tracking-[0.15px]">{title}...</p>
|
||||
<p className="text-sm text-gray-2000">This may take several minutes</p>
|
||||
<p className={`ml-5 text-xl text-red-400 ${isFailed ? '' : 'hidden'}`}>
|
||||
Over the token limit, please consider uploading smaller document
|
||||
</p>
|
||||
<p className="mt-10 text-2xl">{progress?.percentage || 0}%</p>
|
||||
|
||||
<div className="mb-10 w-[50%]">
|
||||
<div className="h-1 w-[100%] bg-blue-4000"></div>
|
||||
<div
|
||||
className={`relative bottom-1 h-1 bg-blue-5000 transition-all`}
|
||||
style={{ width: `${progress?.percentage || 0}%` }}
|
||||
></div>
|
||||
</div>
|
||||
|
||||
<button
|
||||
onClick={() => {
|
||||
setDocName('');
|
||||
setfiles([]);
|
||||
setProgress(undefined);
|
||||
setModalState('INACTIVE');
|
||||
}}
|
||||
className={`rounded-md bg-blue-3000 px-4 py-2 text-sm font-medium text-white ${
|
||||
isCancellable ? '' : 'hidden'
|
||||
}`}
|
||||
>
|
||||
Finish
|
||||
</button>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function UploadProgress() {
|
||||
return <Progress title="Upload is in progress"></Progress>;
|
||||
}
|
||||
|
||||
function TrainingProgress() {
|
||||
const dispatch = useDispatch();
|
||||
useEffect(() => {
|
||||
(progress?.percentage ?? 0) < 100 &&
|
||||
setTimeout(() => {
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
fetch(`${apiHost}/api/task_status?task_id=${progress?.taskId}`)
|
||||
.then((data) => data.json())
|
||||
.then((data) => {
|
||||
if (data.status == 'SUCCESS') {
|
||||
if (data.result.limited === true) {
|
||||
getDocs().then((data) => dispatch(setSourceDocs(data)));
|
||||
setProgress(
|
||||
(progress) =>
|
||||
progress && {
|
||||
...progress,
|
||||
percentage: 100,
|
||||
failed: true,
|
||||
},
|
||||
);
|
||||
} else {
|
||||
getDocs().then((data) => dispatch(setSourceDocs(data)));
|
||||
setProgress(
|
||||
(progress) =>
|
||||
progress && {
|
||||
...progress,
|
||||
percentage: 100,
|
||||
failed: false,
|
||||
},
|
||||
);
|
||||
}
|
||||
} else if (data.status == 'PROGRESS') {
|
||||
setProgress(
|
||||
(progress) =>
|
||||
progress && {
|
||||
...progress,
|
||||
percentage: data.result.current,
|
||||
},
|
||||
);
|
||||
}
|
||||
});
|
||||
}, 5000);
|
||||
}, [progress, dispatch]);
|
||||
return (
|
||||
<Progress
|
||||
title="Training is in progress"
|
||||
isCancellable={progress?.percentage === 100}
|
||||
isFailed={progress?.failed === true}
|
||||
></Progress>
|
||||
);
|
||||
}
|
||||
|
||||
const onDrop = useCallback((acceptedFiles: File[]) => {
|
||||
setfiles(acceptedFiles);
|
||||
setDocName(acceptedFiles[0]?.name);
|
||||
}, []);
|
||||
|
||||
const doNothing = () => undefined;
|
||||
|
||||
const uploadFile = () => {
|
||||
const formData = new FormData();
|
||||
files.forEach((file) => {
|
||||
formData.append('file', file);
|
||||
});
|
||||
formData.append('name', docName);
|
||||
formData.append('user', 'local');
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
const xhr = new XMLHttpRequest();
|
||||
xhr.upload.addEventListener('progress', (event) => {
|
||||
const progress = +((event.loaded / event.total) * 100).toFixed(2);
|
||||
setProgress({ type: 'UPLOAD', percentage: progress });
|
||||
});
|
||||
xhr.onload = () => {
|
||||
const { task_id } = JSON.parse(xhr.responseText);
|
||||
setProgress({ type: 'TRAINIING', percentage: 0, taskId: task_id });
|
||||
};
|
||||
xhr.open('POST', `${apiHost + '/api/upload'}`);
|
||||
xhr.send(formData);
|
||||
};
|
||||
|
||||
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
||||
onDrop,
|
||||
multiple: false,
|
||||
onDragEnter: doNothing,
|
||||
onDragOver: doNothing,
|
||||
onDragLeave: doNothing,
|
||||
maxSize: 25000000,
|
||||
accept: {
|
||||
'application/pdf': ['.pdf'],
|
||||
'text/plain': ['.txt'],
|
||||
'text/x-rst': ['.rst'],
|
||||
'text/x-markdown': ['.md'],
|
||||
'application/zip': ['.zip'],
|
||||
},
|
||||
});
|
||||
|
||||
let view;
|
||||
if (progress?.type === 'UPLOAD') {
|
||||
view = <UploadProgress></UploadProgress>;
|
||||
} else if (progress?.type === 'TRAINIING') {
|
||||
view = <TrainingProgress></TrainingProgress>;
|
||||
} else {
|
||||
view = (
|
||||
<>
|
||||
<p className="text-xl text-jet">Upload New Documentation</p>
|
||||
<p className="mb-3 text-xs text-gray-4000">
|
||||
Please upload .pdf, .txt, .rst, .md, .zip limited to 25mb
|
||||
</p>
|
||||
<input
|
||||
type="text"
|
||||
className="h-10 w-[60%] rounded-md border-2 border-gray-5000 px-3 outline-none"
|
||||
value={docName}
|
||||
onChange={(e) => setDocName(e.target.value)}
|
||||
></input>
|
||||
<div className="relative bottom-12 left-2 mt-[-18.39px]">
|
||||
<span className="bg-white px-2 text-xs text-gray-4000">Name</span>
|
||||
</div>
|
||||
<div {...getRootProps()}>
|
||||
<span className="rounded-md border border-blue-2000 px-4 py-2 font-medium text-blue-2000 hover:cursor-pointer">
|
||||
<input type="button" {...getInputProps()} />
|
||||
Choose Files
|
||||
</span>
|
||||
</div>
|
||||
<div className="mt-9">
|
||||
<p className="mb-5 font-medium text-eerie-black">Uploaded Files</p>
|
||||
{files.map((file) => (
|
||||
<p key={file.name} className="text-gray-6000">
|
||||
{file.name}
|
||||
</p>
|
||||
))}
|
||||
{files.length === 0 && <p className="text-gray-6000">None</p>}
|
||||
</div>
|
||||
<div className="flex flex-row-reverse">
|
||||
<button
|
||||
onClick={uploadFile}
|
||||
className="ml-6 rounded-md bg-blue-3000 py-2 px-6 text-white"
|
||||
>
|
||||
Train
|
||||
</button>
|
||||
<button
|
||||
onClick={() => {
|
||||
setDocName('');
|
||||
setfiles([]);
|
||||
setModalState('INACTIVE');
|
||||
}}
|
||||
className="font-medium"
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
</div>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<article
|
||||
className={`${
|
||||
modalState === 'ACTIVE' ? 'visible' : 'hidden'
|
||||
} absolute z-30 h-screen w-screen bg-gray-alpha`}
|
||||
>
|
||||
<article className="mx-auto mt-24 flex w-[90vw] max-w-lg flex-col gap-4 rounded-lg bg-white p-6 shadow-lg">
|
||||
{view}
|
||||
</article>
|
||||
</article>
|
||||
);
|
||||
}
|
||||
// TODO: sanitize all inputs
|
||||
7
frontend/svg.d.ts
vendored
Normal file
7
frontend/svg.d.ts
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
declare module '*.svg' {
|
||||
import * as React from 'react';
|
||||
|
||||
export const ReactComponent: React.FunctionComponent<
|
||||
React.SVGProps<SVGSVGElement> & { title?: string }
|
||||
>;
|
||||
}
|
||||
@@ -15,10 +15,17 @@ module.exports = {
|
||||
'gray-1000': '#F6F6F6',
|
||||
'gray-2000': 'rgba(0, 0, 0, 0.5)',
|
||||
'gray-3000': 'rgba(243, 243, 243, 1)',
|
||||
'gray-4000': '#949494',
|
||||
'gray-5000': '#BBBBBB',
|
||||
'gray-6000': '#757575',
|
||||
'red-1000': 'rgb(254, 202, 202)',
|
||||
'red-2000': '#F44336',
|
||||
'red-3000': '#621B16',
|
||||
'blue-1000': '#7D54D1',
|
||||
'blue-2000': '#002B49',
|
||||
'blue-3000': '#4B02E2',
|
||||
'blue-4000': 'rgba(0, 125, 255, 0.36)',
|
||||
'blue-5000': 'rgba(0, 125, 255)',
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
@@ -16,6 +16,6 @@
|
||||
"noEmit": true,
|
||||
"jsx": "react-jsx"
|
||||
},
|
||||
"include": ["src"],
|
||||
"include": ["src", "svg.d.ts"],
|
||||
"references": [{ "path": "./tsconfig.node.json" }]
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import { defineConfig } from 'vite';
|
||||
import react from '@vitejs/plugin-react';
|
||||
import svgr from 'vite-plugin-svgr';
|
||||
|
||||
// https://vitejs.dev/config/
|
||||
export default defineConfig({
|
||||
plugins: [react()],
|
||||
plugins: [react(), svgr()],
|
||||
});
|
||||
|
||||
@@ -1,20 +1,13 @@
|
||||
import ast
|
||||
import json
|
||||
from pathlib import Path
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
import faiss
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
|
||||
import dotenv
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.prompts import PromptTemplate
|
||||
import pickle
|
||||
import dotenv
|
||||
import tiktoken
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
import ast
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
|
||||
ps = list(Path("inputs").glob("**/*.py"))
|
||||
data = []
|
||||
sources = []
|
||||
@@ -24,13 +17,6 @@ for p in ps:
|
||||
sources.append(p)
|
||||
|
||||
|
||||
|
||||
# with open('inputs/client.py', 'r') as f:
|
||||
# tree = ast.parse(f.read())
|
||||
|
||||
# print(tree)
|
||||
|
||||
|
||||
def get_functions_in_class(node):
|
||||
functions = []
|
||||
functions_code = []
|
||||
@@ -64,21 +50,9 @@ for code in data:
|
||||
c1 += 1
|
||||
|
||||
# save the structure dict as json
|
||||
import json
|
||||
with open('structure_dict.json', 'w') as f:
|
||||
json.dump(structure_dict, f)
|
||||
|
||||
|
||||
# llm = OpenAI(temperature=0)
|
||||
# prompt = PromptTemplate(
|
||||
# input_variables=["code"],
|
||||
# template="Code: {code}, Documentation: ",
|
||||
# )
|
||||
#
|
||||
# print(prompt.format(code="print('hello world')"))
|
||||
# print(llm(prompt.format(code="print('hello world')")))
|
||||
|
||||
|
||||
if not Path("outputs").exists():
|
||||
Path("outputs").mkdir()
|
||||
|
||||
@@ -119,8 +93,3 @@ for source, classes in structure_dict.items():
|
||||
else:
|
||||
with open(f"outputs/{source_w}", "a") as f:
|
||||
f.write(f"\n\nFunction: {functions[function]}, \nDocumentation: {response}")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,24 +1,20 @@
|
||||
import os
|
||||
import sys
|
||||
import nltk
|
||||
import dotenv
|
||||
import typer
|
||||
import ast
|
||||
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
import dotenv
|
||||
import nltk
|
||||
import typer
|
||||
|
||||
from parser.file.bulk import SimpleDirectoryReader
|
||||
from parser.schema.base import Document
|
||||
from parser.open_ai_func import call_openai_api, get_user_permission
|
||||
from parser.py2doc import transform_to_docs
|
||||
from parser.py2doc import extract_functions_and_classes as extract_py
|
||||
from parser.js2doc import extract_functions_and_classes as extract_js
|
||||
from parser.java2doc import extract_functions_and_classes as extract_java
|
||||
|
||||
from parser.js2doc import extract_functions_and_classes as extract_js
|
||||
from parser.open_ai_func import call_openai_api, get_user_permission
|
||||
from parser.py2doc import extract_functions_and_classes as extract_py
|
||||
from parser.py2doc import transform_to_docs
|
||||
from parser.schema.base import Document
|
||||
from parser.token_func import group_split
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
@@ -28,25 +24,32 @@ nltk.download('punkt', quiet=True)
|
||||
nltk.download('averaged_perceptron_tagger', quiet=True)
|
||||
|
||||
|
||||
#Splits all files in specified folder to documents
|
||||
def metadata_from_filename(title):
|
||||
return {'title': title}
|
||||
|
||||
# Splits all files in specified folder to documents
|
||||
@app.command()
|
||||
def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
|
||||
help="Whether to skip price confirmation"),
|
||||
help="Whether to skip price confirmation"),
|
||||
dir: Optional[List[str]] = typer.Option(["inputs"],
|
||||
help="""List of paths to directory for index creation.
|
||||
E.g. --dir inputs --dir inputs2"""),
|
||||
file: Optional[List[str]] = typer.Option(None,
|
||||
help="""File paths to use (Optional; overrides dir).
|
||||
help="""File paths to use (Optional; overrides dir).
|
||||
E.g. --file inputs/1.md --file inputs/2.md"""),
|
||||
recursive: Optional[bool] = typer.Option(True,
|
||||
help="Whether to recursively search in subdirectories."),
|
||||
limit: Optional[int] = typer.Option(None,
|
||||
help="Maximum number of files to read."),
|
||||
recursive: Optional[bool] = typer.Option(True, help="Whether to recursively search in subdirectories."),
|
||||
limit: Optional[int] = typer.Option(None, help="Maximum number of files to read."),
|
||||
formats: Optional[List[str]] = typer.Option([".rst", ".md"],
|
||||
help="""List of required extensions (list with .)
|
||||
Currently supported: .rst, .md, .pdf, .docx, .csv, .epub, .html"""),
|
||||
exclude: Optional[bool] = typer.Option(True, help="Whether to exclude hidden files (dotfiles).")):
|
||||
|
||||
help="""List of required extensions (list with .)
|
||||
Currently supported:
|
||||
.rst, .md, .pdf, .docx, .csv, .epub, .html, .mdx"""),
|
||||
exclude: Optional[bool] = typer.Option(True, help="Whether to exclude hidden files (dotfiles)."),
|
||||
sample: Optional[bool] = typer.Option(False,
|
||||
help="Whether to output sample of the first 5 split documents."),
|
||||
token_check: Optional[bool] = typer.Option(True, help="Whether to group small documents and split large."),
|
||||
min_tokens: Optional[int] = typer.Option(150, help="Minimum number of tokens to not group."),
|
||||
max_tokens: Optional[int] = typer.Option(2000, help="Maximum number of tokens to not split."),
|
||||
):
|
||||
"""
|
||||
Creates index from specified location or files.
|
||||
By default /inputs folder is used, .rst and .md are parsed.
|
||||
@@ -55,12 +58,22 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
|
||||
def process_one_docs(directory, folder_name):
|
||||
raw_docs = SimpleDirectoryReader(input_dir=directory, input_files=file, recursive=recursive,
|
||||
required_exts=formats, num_files_limit=limit,
|
||||
exclude_hidden=exclude).load_data()
|
||||
raw_docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
exclude_hidden=exclude, file_metadata=metadata_from_filename).load_data()
|
||||
|
||||
# Here we split the documents, as needed, into smaller chunks.
|
||||
# We do this due to the context limits of the LLMs.
|
||||
text_splitter = RecursiveCharacterTextSplitter()
|
||||
docs = text_splitter.split_documents(raw_docs)
|
||||
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens,
|
||||
token_check=token_check)
|
||||
# Old method
|
||||
# text_splitter = RecursiveCharacterTextSplitter()
|
||||
# docs = text_splitter.split_documents(raw_docs)
|
||||
|
||||
# Sample feature
|
||||
if sample:
|
||||
for i in range(min(5, len(raw_docs))):
|
||||
print(raw_docs[i].text)
|
||||
|
||||
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
|
||||
# Here we check for command line arguments for bot calls.
|
||||
# If no argument exists or the yes is not True, then the
|
||||
@@ -88,12 +101,11 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
|
||||
|
||||
@app.command()
|
||||
def convert(dir: Optional[str] = typer.Option("inputs",
|
||||
help="""Path to directory to make documentation for.
|
||||
help="""Path to directory to make documentation for.
|
||||
E.g. --dir inputs """),
|
||||
formats: Optional[str] = typer.Option("py",
|
||||
help="""Required language.
|
||||
help="""Required language.
|
||||
py, js, java supported for now""")):
|
||||
|
||||
"""
|
||||
Creates documentation linked to original functions from specified location.
|
||||
By default /inputs folder is used, .py is parsed.
|
||||
@@ -107,5 +119,7 @@ def convert(dir: Optional[str] = typer.Option("inputs",
|
||||
else:
|
||||
raise Exception("Sorry, language not supported yet")
|
||||
transform_to_docs(functions_dict, classes_dict, formats, dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
app()
|
||||
|
||||
@@ -1,38 +1,42 @@
|
||||
from pathlib import Path
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
import faiss
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
import pickle
|
||||
import dotenv
|
||||
import tiktoken
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
import dotenv
|
||||
import faiss
|
||||
import tiktoken
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain.vectorstores import FAISS
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens/1000) * 0.0004)
|
||||
total_price = ((num_tokens / 1000) * 0.0004)
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
def call_openai_api():
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
|
||||
faiss.write_index(store.index, "docs.index")
|
||||
store.index = None
|
||||
with open("faiss_store.pkl", "wb") as f:
|
||||
pickle.dump(store, f)
|
||||
|
||||
|
||||
def get_user_permission():
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
|
||||
docs_content = (" ".join(docs))
|
||||
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
#Here we check for user permission before calling the API.
|
||||
# Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
call_openai_api()
|
||||
@@ -41,7 +45,8 @@ def get_user_permission():
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
|
||||
#Load .env file
|
||||
|
||||
# Load .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
ap = ArgumentParser("Script for training DocsGPT on .rst documentation files.")
|
||||
|
||||
@@ -1,71 +1,75 @@
|
||||
import os
|
||||
import pickle
|
||||
import dotenv
|
||||
import tiktoken
|
||||
import sys
|
||||
import faiss
|
||||
import shutil
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
from langchain.vectorstores import FAISS
|
||||
|
||||
import dotenv
|
||||
import faiss
|
||||
import tiktoken
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain.vectorstores import FAISS
|
||||
from sphinx.cmd.build import main as sphinx_main
|
||||
from argparse import ArgumentParser
|
||||
|
||||
|
||||
def convert_rst_to_txt(src_dir, dst_dir):
|
||||
# Check if the source directory exists
|
||||
if not os.path.exists(src_dir):
|
||||
raise Exception("Source directory does not exist")
|
||||
# Walk through the source directory
|
||||
for root, dirs, files in os.walk(src_dir):
|
||||
for file in files:
|
||||
# Check if the file has .rst extension
|
||||
if file.endswith(".rst"):
|
||||
# Construct the full path of the file
|
||||
src_file = os.path.join(root, file.replace(".rst", ""))
|
||||
# Convert the .rst file to .txt file using sphinx-build
|
||||
args = f". -b text -D extensions=sphinx.ext.autodoc " \
|
||||
f"-D master_doc={src_file} " \
|
||||
f"-D source_suffix=.rst " \
|
||||
f"-C {dst_dir} "
|
||||
sphinx_main(args.split())
|
||||
elif file.endswith(".md"):
|
||||
# Rename the .md file to .rst file
|
||||
src_file = os.path.join(root, file)
|
||||
dst_file = os.path.join(root, file.replace(".md", ".rst"))
|
||||
os.rename(src_file, dst_file)
|
||||
# Convert the .rst file to .txt file using sphinx-build
|
||||
args = f". -b text -D extensions=sphinx.ext.autodoc " \
|
||||
f"-D master_doc={dst_file} " \
|
||||
f"-D source_suffix=.rst " \
|
||||
f"-C {dst_dir} "
|
||||
sphinx_main(args.split())
|
||||
# Check if the source directory exists
|
||||
if not os.path.exists(src_dir):
|
||||
raise Exception("Source directory does not exist")
|
||||
# Walk through the source directory
|
||||
for root, dirs, files in os.walk(src_dir):
|
||||
for file in files:
|
||||
# Check if the file has .rst extension
|
||||
if file.endswith(".rst"):
|
||||
# Construct the full path of the file
|
||||
src_file = os.path.join(root, file.replace(".rst", ""))
|
||||
# Convert the .rst file to .txt file using sphinx-build
|
||||
args = f". -b text -D extensions=sphinx.ext.autodoc " \
|
||||
f"-D master_doc={src_file} " \
|
||||
f"-D source_suffix=.rst " \
|
||||
f"-C {dst_dir} "
|
||||
sphinx_main(args.split())
|
||||
elif file.endswith(".md"):
|
||||
# Rename the .md file to .rst file
|
||||
src_file = os.path.join(root, file)
|
||||
dst_file = os.path.join(root, file.replace(".md", ".rst"))
|
||||
os.rename(src_file, dst_file)
|
||||
# Convert the .rst file to .txt file using sphinx-build
|
||||
args = f". -b text -D extensions=sphinx.ext.autodoc " \
|
||||
f"-D master_doc={dst_file} " \
|
||||
f"-D source_suffix=.rst " \
|
||||
f"-C {dst_dir} "
|
||||
sphinx_main(args.split())
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens/1000) * 0.0004)
|
||||
total_price = ((num_tokens / 1000) * 0.0004)
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
def call_openai_api():
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
|
||||
faiss.write_index(store.index, "docs.index")
|
||||
store.index = None
|
||||
with open("faiss_store.pkl", "wb") as f:
|
||||
pickle.dump(store, f)
|
||||
|
||||
|
||||
def get_user_permission():
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
|
||||
docs_content = (" ".join(docs))
|
||||
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
#Here we check for user permission before calling the API.
|
||||
# Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
call_openai_api()
|
||||
@@ -74,6 +78,7 @@ def get_user_permission():
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
|
||||
|
||||
ap = ArgumentParser("Script for training DocsGPT on Sphinx documentation")
|
||||
ap.add_argument("-i", "--inputs",
|
||||
type=str,
|
||||
@@ -81,17 +86,17 @@ ap.add_argument("-i", "--inputs",
|
||||
help="Directory containing documentation files")
|
||||
args = ap.parse_args()
|
||||
|
||||
#Load .env file
|
||||
# Load .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
#Directory to vector
|
||||
# Directory to vector
|
||||
src_dir = args.inputs
|
||||
dst_dir = "tmp"
|
||||
|
||||
convert_rst_to_txt(src_dir, dst_dir)
|
||||
|
||||
# Here we load in the data in the format that Notion exports it in.
|
||||
ps = list(Path("tmp/"+ src_dir).glob("**/*.txt"))
|
||||
ps = list(Path("tmp/" + src_dir).glob("**/*.txt"))
|
||||
|
||||
# parse all child directories
|
||||
data = []
|
||||
|
||||
@@ -3,7 +3,6 @@ from abc import abstractmethod
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
|
||||
from parser.schema.base import Document
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
"""Simple reader that reads files of different formats from a directory."""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
from parser.file.base import BaseReader
|
||||
from parser.file.base_parser import BaseParser
|
||||
from parser.file.docs_parser import DocxParser, PDFParser
|
||||
@@ -12,6 +9,8 @@ from parser.file.markdown_parser import MarkdownParser
|
||||
from parser.file.rst_parser import RstParser
|
||||
from parser.file.tabular_parser import PandasCSVParser
|
||||
from parser.schema.base import Document
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".pdf": PDFParser(),
|
||||
@@ -21,6 +20,7 @@ DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".md": MarkdownParser(),
|
||||
".rst": RstParser(),
|
||||
".html": HTMLParser(),
|
||||
".mdx": MarkdownParser(),
|
||||
}
|
||||
|
||||
|
||||
@@ -51,16 +51,17 @@ class SimpleDirectoryReader(BaseReader):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_dir: Optional[str] = None,
|
||||
input_files: Optional[List] = None,
|
||||
exclude_hidden: bool = True,
|
||||
errors: str = "ignore",
|
||||
recursive: bool = True,
|
||||
required_exts: Optional[List[str]] = None,
|
||||
file_extractor: Optional[Dict[str, BaseParser]] = None,
|
||||
num_files_limit: Optional[int] = None,
|
||||
file_metadata: Optional[Callable[[str], Dict]] = None,
|
||||
self,
|
||||
input_dir: Optional[str] = None,
|
||||
input_files: Optional[List] = None,
|
||||
exclude_hidden: bool = True,
|
||||
errors: str = "ignore",
|
||||
recursive: bool = True,
|
||||
required_exts: Optional[List[str]] = None,
|
||||
file_extractor: Optional[Dict[str, BaseParser]] = None,
|
||||
num_files_limit: Optional[int] = None,
|
||||
file_metadata: Optional[Callable[[str], Dict]] = None,
|
||||
chunk_size_max: int = 2048,
|
||||
) -> None:
|
||||
"""Initialize with parameters."""
|
||||
super().__init__()
|
||||
@@ -74,6 +75,8 @@ class SimpleDirectoryReader(BaseReader):
|
||||
self.exclude_hidden = exclude_hidden
|
||||
self.required_exts = required_exts
|
||||
self.num_files_limit = num_files_limit
|
||||
print("input_files")
|
||||
print(input_files)
|
||||
|
||||
if input_files:
|
||||
self.input_files = []
|
||||
@@ -99,8 +102,8 @@ class SimpleDirectoryReader(BaseReader):
|
||||
elif self.exclude_hidden and input_file.name.startswith("."):
|
||||
continue
|
||||
elif (
|
||||
self.required_exts is not None
|
||||
and input_file.suffix not in self.required_exts
|
||||
self.required_exts is not None
|
||||
and input_file.suffix not in self.required_exts
|
||||
):
|
||||
continue
|
||||
else:
|
||||
@@ -111,7 +114,7 @@ class SimpleDirectoryReader(BaseReader):
|
||||
new_input_files.extend(sub_input_files)
|
||||
|
||||
if self.num_files_limit is not None and self.num_files_limit > 0:
|
||||
new_input_files = new_input_files[0 : self.num_files_limit]
|
||||
new_input_files = new_input_files[0: self.num_files_limit]
|
||||
|
||||
# print total number of files added
|
||||
logging.debug(
|
||||
@@ -147,10 +150,15 @@ class SimpleDirectoryReader(BaseReader):
|
||||
data = f.read()
|
||||
if isinstance(data, List):
|
||||
data_list.extend(data)
|
||||
if self.file_metadata is not None:
|
||||
for _ in range(len(data)):
|
||||
metadata_list.append(self.file_metadata(str(input_file)))
|
||||
else:
|
||||
data_list.append(str(data))
|
||||
if self.file_metadata is not None:
|
||||
metadata_list.append(self.file_metadata(str(input_file)))
|
||||
if self.file_metadata is not None:
|
||||
metadata_list.append(self.file_metadata(str(input_file)))
|
||||
|
||||
|
||||
|
||||
if concatenate:
|
||||
return [Document("\n".join(data_list))]
|
||||
|
||||
@@ -9,6 +9,7 @@ from typing import Dict, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class HTMLParser(BaseParser):
|
||||
"""HTML parser."""
|
||||
|
||||
@@ -23,21 +24,20 @@ class HTMLParser(BaseParser):
|
||||
Union[str, List[str]]: a string or a List of strings.
|
||||
"""
|
||||
try:
|
||||
import unstructured
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
except ImportError:
|
||||
raise ValueError("unstructured package is required to parse HTML files.")
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
|
||||
# Using the unstructured library to convert the html to isd format
|
||||
# isd sample : isd = [
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
with open(file, "r", encoding="utf-8") as fp:
|
||||
elements = partition_html(file=fp)
|
||||
isd = convert_to_isd(elements)
|
||||
isd = convert_to_isd(elements)
|
||||
|
||||
# Removing non ascii charactwers from isd_el['text']
|
||||
for isd_el in isd:
|
||||
@@ -46,15 +46,15 @@ class HTMLParser(BaseParser):
|
||||
# Removing all the \n characters from isd_el['text'] using regex and replace with single space
|
||||
# Removing all the extra spaces from isd_el['text'] using regex and replace with single space
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE|re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}"," ", isd_el['text'], flags=re.MULTILINE|re.DOTALL)
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
|
||||
# more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
|
||||
for isd_el in isd:
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True )
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)
|
||||
|
||||
# Creating a list of all the indexes of isd_el['type'] = 'Title'
|
||||
title_indexes = [i for i,isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
|
||||
# Creating 'Chunks' - List of lists of strings
|
||||
# each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
|
||||
@@ -64,19 +64,20 @@ class HTMLParser(BaseParser):
|
||||
Chunks = [[]]
|
||||
final_chunks = list(list())
|
||||
|
||||
for i,isd_el in enumerate(isd):
|
||||
for i, isd_el in enumerate(isd):
|
||||
if i in title_indexes:
|
||||
Chunks.append([])
|
||||
Chunks[-1].append(isd_el['text'])
|
||||
|
||||
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25 #TODO: This value can be an user defined variable
|
||||
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25
|
||||
# TODO: This value can be a user defined variable
|
||||
for chunk in Chunks:
|
||||
# sum of lenth of all the strings in the chunk
|
||||
sum = 0
|
||||
sum += len(str(chunk))
|
||||
if sum < 25:
|
||||
Chunks.remove(chunk)
|
||||
else :
|
||||
else:
|
||||
# appending all the approved chunks to final_chunks as a single string
|
||||
final_chunks.append(" ".join([str(item) for item in chunk]))
|
||||
return final_chunks
|
||||
|
||||
@@ -7,6 +7,7 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
import tiktoken
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
@@ -19,19 +20,33 @@ class MarkdownParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
# remove_tables: bool = True,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
max_tokens: int = 2048,
|
||||
# remove_tables: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
self._remove_hyperlinks = remove_hyperlinks
|
||||
self._remove_images = remove_images
|
||||
self._max_tokens = max_tokens
|
||||
# self._remove_tables = remove_tables
|
||||
|
||||
def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str],
|
||||
current_text: str):
|
||||
"""Append to tups chunk."""
|
||||
num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(current_text))
|
||||
if num_tokens > self._max_tokens:
|
||||
chunks = [current_text[i:i + self._max_tokens] for i in range(0, len(current_text), self._max_tokens)]
|
||||
for chunk in chunks:
|
||||
tups.append((current_header, chunk))
|
||||
else:
|
||||
tups.append((current_header, current_text))
|
||||
return tups
|
||||
|
||||
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
|
||||
"""Convert a markdown file to a dictionary.
|
||||
|
||||
@@ -50,13 +65,13 @@ class MarkdownParser(BaseParser):
|
||||
if current_header is not None:
|
||||
if current_text == "" or None:
|
||||
continue
|
||||
markdown_tups.append((current_header, current_text))
|
||||
markdown_tups = self.tups_chunk_append(markdown_tups, current_header, current_text)
|
||||
|
||||
current_header = line
|
||||
current_text = ""
|
||||
else:
|
||||
current_text += line + "\n"
|
||||
markdown_tups.append((current_header, current_text))
|
||||
markdown_tups = self.tups_chunk_append(markdown_tups, current_header, current_text)
|
||||
|
||||
if current_header is not None:
|
||||
# pass linting, assert keys are defined
|
||||
@@ -101,7 +116,7 @@ class MarkdownParser(BaseParser):
|
||||
return {}
|
||||
|
||||
def parse_tups(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
@@ -116,7 +131,7 @@ class MarkdownParser(BaseParser):
|
||||
return markdown_tups
|
||||
|
||||
def parse_file(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> Union[str, List[str]]:
|
||||
"""Parse file into string."""
|
||||
tups = self.parse_tups(filepath, errors=errors)
|
||||
|
||||
@@ -5,7 +5,7 @@ Contains parser for md files.
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
@@ -19,17 +19,17 @@ class RstParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
remove_table_excess: bool = True,
|
||||
remove_interpreters: bool = True,
|
||||
remove_directives: bool = True,
|
||||
remove_whitespaces_excess: bool = True,
|
||||
#Be carefull with remove_characters_excess, might cause data loss
|
||||
remove_characters_excess: bool = True,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
remove_table_excess: bool = True,
|
||||
remove_interpreters: bool = True,
|
||||
remove_directives: bool = True,
|
||||
remove_whitespaces_excess: bool = True,
|
||||
# Be carefull with remove_characters_excess, might cause data loss
|
||||
remove_characters_excess: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -55,7 +55,8 @@ class RstParser(BaseParser):
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
header_match = re.match(r"^[^\S\n]*[-=]+[^\S\n]*$", line)
|
||||
if header_match and i > 0 and (len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
|
||||
if header_match and i > 0 and (
|
||||
len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
|
||||
if current_header is not None:
|
||||
if current_text == "" or None:
|
||||
continue
|
||||
@@ -68,9 +69,10 @@ class RstParser(BaseParser):
|
||||
current_text = ""
|
||||
else:
|
||||
current_text += line + "\n"
|
||||
|
||||
rst_tups.append((current_header, current_text))
|
||||
|
||||
#TODO: Format for rst
|
||||
# TODO: Format for rst
|
||||
#
|
||||
# if current_header is not None:
|
||||
# # pass linting, assert keys are defined
|
||||
@@ -134,7 +136,7 @@ class RstParser(BaseParser):
|
||||
return {}
|
||||
|
||||
def parse_tups(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
@@ -157,7 +159,7 @@ class RstParser(BaseParser):
|
||||
return rst_tups
|
||||
|
||||
def parse_file(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> Union[str, List[str]]:
|
||||
"""Parse file into string."""
|
||||
tups = self.parse_tups(filepath, errors=errors)
|
||||
|
||||
@@ -77,13 +77,13 @@ class PandasCSVParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user