mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 16:43:16 +00:00
Compare commits
108 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
577d58c92b | ||
|
|
899777632b | ||
|
|
bbf55ca46e | ||
|
|
3f88b04c4a | ||
|
|
f8910ba136 | ||
|
|
6c95d8b13e | ||
|
|
e6bccaaf4e | ||
|
|
3b8039a580 | ||
|
|
fae3f55010 | ||
|
|
20c877f75b | ||
|
|
8380858a82 | ||
|
|
d2358c399d | ||
|
|
c3af8a77af | ||
|
|
bc5a0b030b | ||
|
|
0b94f1717f | ||
|
|
aaa1249a41 | ||
|
|
ffaa22c49b | ||
|
|
0b78480977 | ||
|
|
6b6737613a | ||
|
|
da5d62cc1c | ||
|
|
6a68b63192 | ||
|
|
ff2e79fe7b | ||
|
|
1800e51b19 | ||
|
|
ba9c505249 | ||
|
|
bc9f1c17ed | ||
|
|
74845aed64 | ||
|
|
e49dd0cc6a | ||
|
|
27c45ae24a | ||
|
|
364a14adaf | ||
|
|
5c560b1dd5 | ||
|
|
28b8b88332 | ||
|
|
e39ef0cc9e | ||
|
|
8098d3fec8 | ||
|
|
059ffe09ea | ||
|
|
36a845c29e | ||
|
|
ce6f0dab56 | ||
|
|
f200ab10a4 | ||
|
|
3001688e0e | ||
|
|
a73774099e | ||
|
|
b28676d52c | ||
|
|
eef012b4d1 | ||
|
|
1417a1c020 | ||
|
|
962becb9a5 | ||
|
|
168648e789 | ||
|
|
7f56f57778 | ||
|
|
6cadddc2fc | ||
|
|
15fd54eac4 | ||
|
|
31350e6302 | ||
|
|
8742cdae0a | ||
|
|
4efcb388ff | ||
|
|
2d92e95c8a | ||
|
|
47e5d5684a | ||
|
|
b723e14d98 | ||
|
|
c9d24b8f42 | ||
|
|
43622e7ab1 | ||
|
|
5cfc185ba5 | ||
|
|
4be2635fbe | ||
|
|
0beafb8391 | ||
|
|
1d2654b9fa | ||
|
|
a4bc3673e7 | ||
|
|
fa080537e8 | ||
|
|
bdf67a7db7 | ||
|
|
db4cdc901c | ||
|
|
16a540b89b | ||
|
|
e00ec9ac3f | ||
|
|
fc760afdfc | ||
|
|
cb47bcdb0e | ||
|
|
8d62559ca8 | ||
|
|
dbe9c4dc18 | ||
|
|
1609b4562d | ||
|
|
b6cadb1d65 | ||
|
|
7aafac5b5e | ||
|
|
36f0aacb19 | ||
|
|
0c1a6a918d | ||
|
|
d1f5ff4dba | ||
|
|
77e6df2a1c | ||
|
|
119c037f24 | ||
|
|
97fe1abfd8 | ||
|
|
3a0163f0fb | ||
|
|
d3fab69155 | ||
|
|
9395d2c091 | ||
|
|
b9efb98280 | ||
|
|
60bb264663 | ||
|
|
316dd2f165 | ||
|
|
8a0f700563 | ||
|
|
3d0c6eafec | ||
|
|
46e055833b | ||
|
|
80dfdd1cb9 | ||
|
|
db21678b74 | ||
|
|
09c7fe0565 | ||
|
|
b6dfb2c856 | ||
|
|
ab46ba521f | ||
|
|
4a7670f2aa | ||
|
|
9ba86bc174 | ||
|
|
2ebe5e051c | ||
|
|
24e98abd15 | ||
|
|
b7f1a94ba4 | ||
|
|
70bc7465c9 | ||
|
|
65c2568427 | ||
|
|
186e7bf402 | ||
|
|
e6f1c7d0c3 | ||
|
|
87ad9a3190 | ||
|
|
0ed45f8754 | ||
|
|
116e4401c4 | ||
|
|
c3c0e643d2 | ||
|
|
d5522e7c08 | ||
|
|
658b14ba26 | ||
|
|
38f8469d0b |
2
.env-template
Normal file
2
.env-template
Normal file
@@ -0,0 +1,2 @@
|
||||
OPENAI_API_KEY=<LLM api key (for example, open ai key)>
|
||||
EMBEDDINGS_KEY=<LLM embeddings api key (for example, open ai key)>
|
||||
10
.github/workflows/ci.yml
vendored
10
.github/workflows/ci.yml
vendored
@@ -9,6 +9,10 @@ on:
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -23,17 +27,17 @@ jobs:
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GHCR_TOKEN }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Runs a single command using the runners shell
|
||||
- name: Build and push Docker images to docker.io and ghcr.io
|
||||
uses: docker/build-push-action@v2
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
file: './application/Dockerfile'
|
||||
platforms: linux/amd64
|
||||
|
||||
10
.github/workflows/cife.yml
vendored
10
.github/workflows/cife.yml
vendored
@@ -9,6 +9,10 @@ on:
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -23,17 +27,17 @@ jobs:
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GHCR_TOKEN }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Runs a single command using the runners shell
|
||||
- name: Build and push Docker images to docker.io and ghcr.io
|
||||
uses: docker/build-push-action@v2
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
file: './frontend/Dockerfile'
|
||||
platforms: linux/amd64
|
||||
|
||||
17
.github/workflows/lint.yml
vendored
Normal file
17
.github/workflows/lint.yml
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
name: Python linting
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- '*'
|
||||
pull_request:
|
||||
types: [ opened, synchronize ]
|
||||
|
||||
jobs:
|
||||
ruff:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Lint with Ruff
|
||||
uses: chartboost/ruff-action@v1
|
||||
41
.github/workflows/sync_fork.yaml
vendored
Normal file
41
.github/workflows/sync_fork.yaml
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
name: Upstream Sync
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 * * * *" # every hour
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
sync_latest_from_upstream:
|
||||
name: Sync latest commits from upstream repo
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ github.event.repository.fork }}
|
||||
|
||||
steps:
|
||||
# Step 1: run a standard checkout action
|
||||
- name: Checkout target repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
# Step 2: run the sync action
|
||||
- name: Sync upstream changes
|
||||
id: sync
|
||||
uses: aormsby/Fork-Sync-With-Upstream-action@v3.4
|
||||
with:
|
||||
# set your upstream repo and branch
|
||||
upstream_sync_repo: arc53/DocsGPT
|
||||
upstream_sync_branch: main
|
||||
target_sync_branch: main
|
||||
target_repo_token: ${{ secrets.GITHUB_TOKEN }} # automatically generated, no need to set
|
||||
|
||||
# Set test_mode true to run tests instead of the true action!!
|
||||
test_mode: false
|
||||
|
||||
- name: Sync check
|
||||
if: failure()
|
||||
run: |
|
||||
echo "::error::由于权限不足,导致同步失败(这是预期的行为),请前往仓库首页手动执行[Sync fork]。"
|
||||
echo "::error::Due to insufficient permissions, synchronization failed (as expected). Please go to the repository homepage and manually perform [Sync fork]."
|
||||
exit 1
|
||||
2
.ruff.toml
Normal file
2
.ruff.toml
Normal file
@@ -0,0 +1,2 @@
|
||||
# Allow lines to be as long as 120 characters.
|
||||
line-length = 120
|
||||
19
README.md
19
README.md
@@ -55,8 +55,9 @@ You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, pleas
|
||||
Note: Make sure you have docker installed
|
||||
|
||||
1. Open dowload this repository with `git clone https://github.com/arc53/DocsGPT.git`
|
||||
2. Open docker-compose.yaml and replace <your_api_key> with your OpenAI's key (there are 4 places)
|
||||
2. Create .env file in your root directory and set your OPENAI_API_KEY with your openai api key and VITE_API_STREAMING to true or false if you dont want streaming answers
|
||||
3. Run `docker-compose build && docker-compose up`
|
||||
4. Navigate to http://localhost:5173/
|
||||
|
||||
To stop just run Ctrl + C
|
||||
|
||||
@@ -67,19 +68,23 @@ Spin up only 2 containers from docker-compose.yaml (by deleting all services exc
|
||||
Make sure you have python 3.10 or 3.11 installed
|
||||
|
||||
1. Navigate to `/application` folder
|
||||
2. Install dependencies
|
||||
2. Run `docker-compose -f docker-compose-dev.yaml build && docker-compose -f docker-compose-dev.yaml up -d`
|
||||
3. Export required variables
|
||||
`export CELERY_BROKER_URL=redis://localhost:6379/0`
|
||||
`export CELERY_RESULT_BACKEND=redis://localhost:6379/1`
|
||||
`export MONGO_URI=mongodb://localhost:27017/docsgpt`
|
||||
4. Install dependencies
|
||||
`pip install -r requirements.txt`
|
||||
3. Prepare .env file
|
||||
5. Prepare .env file
|
||||
Copy .env_sample and create .env with your openai api token
|
||||
4. Run the app
|
||||
`python app.py`
|
||||
5. Start worker with `celery -A app.celery worker -l INFO`
|
||||
6. Run the app
|
||||
`python wsgi.py`
|
||||
7. Start worker with `celery -A app.celery worker -l INFO`
|
||||
|
||||
To start frontend
|
||||
1. Navigate to `/frontend` folder
|
||||
2. Install dependencies
|
||||
`npm install`
|
||||
3. In the file `.env.development` instead of `VITE_API_HOST = https://docsapi.arc53.com` use `VITE_API_HOST=http://localhost:5001`
|
||||
3. Run the app
|
||||
4. `npm run dev`
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ FROM python:3.10-slim-bullseye as builder
|
||||
RUN apt-get update && apt-get install -y gcc curl
|
||||
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
RUN pip install --upgrade pip && pip install tiktoken==0.1.2
|
||||
RUN pip install --upgrade pip && pip install tiktoken==0.3.3
|
||||
COPY requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
|
||||
@@ -1,16 +1,20 @@
|
||||
import asyncio
|
||||
import datetime
|
||||
import http.client
|
||||
import json
|
||||
import os
|
||||
import traceback
|
||||
|
||||
import openai
|
||||
import dotenv
|
||||
import requests
|
||||
from celery import Celery
|
||||
from celery.result import AsyncResult
|
||||
from flask import Flask, request, render_template, send_from_directory, jsonify
|
||||
from flask import Flask, request, render_template, send_from_directory, jsonify, Response
|
||||
from langchain import FAISS
|
||||
from langchain import VectorDBQA, HuggingFaceHub, Cohere, OpenAI
|
||||
from langchain.chains import ChatVectorDBChain
|
||||
from langchain.chains import LLMChain, ConversationalRetrievalChain
|
||||
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, \
|
||||
@@ -20,26 +24,19 @@ from langchain.prompts.chat import (
|
||||
ChatPromptTemplate,
|
||||
SystemMessagePromptTemplate,
|
||||
HumanMessagePromptTemplate,
|
||||
AIMessagePromptTemplate,
|
||||
)
|
||||
from pymongo import MongoClient
|
||||
from werkzeug.utils import secure_filename
|
||||
from langchain.llms import GPT4All
|
||||
|
||||
from core.settings import settings
|
||||
from error import bad_request
|
||||
from worker import ingest_worker
|
||||
|
||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||
|
||||
if os.getenv("LLM_NAME") is not None:
|
||||
llm_choice = os.getenv("LLM_NAME")
|
||||
else:
|
||||
llm_choice = "openai_chat"
|
||||
|
||||
if os.getenv("EMBEDDINGS_NAME") is not None:
|
||||
embeddings_choice = os.getenv("EMBEDDINGS_NAME")
|
||||
else:
|
||||
embeddings_choice = "openai_text-embedding-ada-002"
|
||||
|
||||
if llm_choice == "manifest":
|
||||
if settings.LLM_NAME == "manifest":
|
||||
from manifest import Manifest
|
||||
from langchain.llms.manifest import ManifestWrapper
|
||||
|
||||
@@ -76,27 +73,71 @@ with open("prompts/chat_combine_prompt.txt", "r") as f:
|
||||
with open("prompts/chat_reduce_prompt.txt", "r") as f:
|
||||
chat_reduce_template = f.read()
|
||||
|
||||
if os.getenv("API_KEY") is not None:
|
||||
if settings.API_KEY is not None:
|
||||
api_key_set = True
|
||||
else:
|
||||
api_key_set = False
|
||||
if os.getenv("EMBEDDINGS_KEY") is not None:
|
||||
if settings.EMBEDDINGS_KEY is not None:
|
||||
embeddings_key_set = True
|
||||
else:
|
||||
embeddings_key_set = False
|
||||
|
||||
app = Flask(__name__)
|
||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER = "inputs"
|
||||
app.config['CELERY_BROKER_URL'] = os.getenv("CELERY_BROKER_URL")
|
||||
app.config['CELERY_RESULT_BACKEND'] = os.getenv("CELERY_RESULT_BACKEND")
|
||||
app.config['MONGO_URI'] = os.getenv("MONGO_URI")
|
||||
celery = Celery(app.name, broker=app.config['CELERY_BROKER_URL'], backend=app.config['CELERY_RESULT_BACKEND'])
|
||||
celery.conf.update(app.config)
|
||||
app.config['CELERY_BROKER_URL'] = settings.CELERY_BROKER_URL
|
||||
app.config['CELERY_RESULT_BACKEND'] = settings.CELERY_RESULT_BACKEND
|
||||
app.config['MONGO_URI'] = settings.MONGO_URI
|
||||
celery = Celery()
|
||||
celery.config_from_object('celeryconfig')
|
||||
mongo = MongoClient(app.config['MONGO_URI'])
|
||||
db = mongo["docsgpt"]
|
||||
vectors_collection = db["vectors"]
|
||||
|
||||
|
||||
async def async_generate(chain, question, chat_history):
|
||||
result = await chain.arun({"question": question, "chat_history": chat_history})
|
||||
return result
|
||||
|
||||
|
||||
def run_async_chain(chain, question, chat_history):
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
result = {}
|
||||
try:
|
||||
answer = loop.run_until_complete(async_generate(chain, question, chat_history))
|
||||
finally:
|
||||
loop.close()
|
||||
result["answer"] = answer
|
||||
return result
|
||||
|
||||
|
||||
def get_vectorstore(data):
|
||||
if "active_docs" in data:
|
||||
if data["active_docs"].split("/")[0] == "local":
|
||||
if data["active_docs"].split("/")[1] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = "indexes/" + data["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + data["active_docs"]
|
||||
if data['active_docs'] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
return vectorstore
|
||||
|
||||
def get_docsearch(vectorstore, embeddings_key):
|
||||
if settings.EMBEDDINGS_NAME == "openai_text-embedding-ada-002":
|
||||
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
|
||||
elif settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
|
||||
elif settings.EMBEDDINGS_NAME == "huggingface_hkunlp/instructor-large":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
|
||||
elif settings.EMBEDDINGS_NAME == "cohere_medium":
|
||||
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
|
||||
return docsearch
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest(self, directory, formats, name_job, filename, user):
|
||||
resp = ingest_worker(self, directory, formats, name_job, filename, user)
|
||||
@@ -105,8 +146,68 @@ def ingest(self, directory, formats, name_job, filename, user):
|
||||
|
||||
@app.route("/")
|
||||
def home():
|
||||
return render_template("index.html", api_key_set=api_key_set, llm_choice=llm_choice,
|
||||
embeddings_choice=embeddings_choice)
|
||||
return render_template("index.html", api_key_set=api_key_set, llm_choice=settings.LLM_NAME,
|
||||
embeddings_choice=settings.EMBEDDINGS_NAME)
|
||||
|
||||
def complete_stream(question, docsearch, chat_history, api_key):
|
||||
openai.api_key = api_key
|
||||
llm = ChatOpenAI(openai_api_key=api_key)
|
||||
docs = docsearch.similarity_search(question, k=2)
|
||||
# join all page_content together with a newline
|
||||
docs_together = "\n".join([doc.page_content for doc in docs])
|
||||
p_chat_combine = chat_combine_template.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
if len(chat_history) > 1:
|
||||
tokens_current_history = 0
|
||||
# count tokens in history
|
||||
chat_history.reverse()
|
||||
for i in chat_history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
|
||||
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append({"role": "user", "content": i["prompt"]})
|
||||
messages_combine.append({"role": "system", "content": i["response"]})
|
||||
messages_combine.append({"role": "user", "content": question})
|
||||
completion = openai.ChatCompletion.create(model="gpt-3.5-turbo",
|
||||
messages=messages_combine, stream=True, max_tokens=500, temperature=0)
|
||||
|
||||
for line in completion:
|
||||
if 'content' in line['choices'][0]['delta']:
|
||||
# check if the delta contains content
|
||||
data = json.dumps({"answer": str(line['choices'][0]['delta']['content'])})
|
||||
yield f"data: {data}\n\n"
|
||||
# send data.type = "end" to indicate that the stream has ended as json
|
||||
data = json.dumps({"type": "end"})
|
||||
yield f"data: {data}\n\n"
|
||||
@app.route("/stream", methods=['POST', 'GET'])
|
||||
def stream():
|
||||
# get parameter from url question
|
||||
question = request.args.get('question')
|
||||
history = request.args.get('history')
|
||||
# history to json object from string
|
||||
history = json.loads(history)
|
||||
|
||||
# check if active_docs is set
|
||||
|
||||
if not api_key_set:
|
||||
api_key = request.args.get("api_key")
|
||||
else:
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = request.args.get("embeddings_key")
|
||||
else:
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
if "active_docs" in request.args:
|
||||
vectorstore = get_vectorstore({"active_docs": request.args.get("active_docs")})
|
||||
else:
|
||||
vectorstore = ""
|
||||
docsearch = get_docsearch(vectorstore, embeddings_key)
|
||||
|
||||
|
||||
#question = "Hi"
|
||||
return Response(complete_stream(question, docsearch,
|
||||
chat_history= history, api_key=api_key), mimetype='text/event-stream')
|
||||
|
||||
|
||||
@app.route("/api/answer", methods=["POST"])
|
||||
@@ -118,85 +219,82 @@ def api_answer():
|
||||
if not api_key_set:
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
api_key = os.getenv("API_KEY")
|
||||
api_key = settings.API_KEY
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = os.getenv("EMBEDDINGS_KEY")
|
||||
embeddings_key = settings.EMBEDDINGS_KEY
|
||||
|
||||
# use try and except to check for exception
|
||||
try:
|
||||
# check if the vectorstore is set
|
||||
if "active_docs" in data:
|
||||
if data["active_docs"].split("/")[0] == "local":
|
||||
vectorstore = "indexes/" + data["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + data["active_docs"]
|
||||
if data['active_docs'] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
print(vectorstore)
|
||||
# vectorstore = "outputs/inputs/"
|
||||
vectorstore = get_vectorstore(data)
|
||||
# loading the index and the store and the prompt template
|
||||
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
|
||||
if embeddings_choice == "openai_text-embedding-ada-002":
|
||||
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
|
||||
elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
|
||||
elif embeddings_choice == "huggingface_hkunlp/instructor-large":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
|
||||
elif embeddings_choice == "cohere_medium":
|
||||
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
|
||||
|
||||
# create a prompt template
|
||||
if history:
|
||||
history = json.loads(history)
|
||||
template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}",
|
||||
history[1])
|
||||
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp,
|
||||
template_format="jinja2")
|
||||
else:
|
||||
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template,
|
||||
template_format="jinja2")
|
||||
docsearch = get_docsearch(vectorstore, embeddings_key)
|
||||
|
||||
q_prompt = PromptTemplate(input_variables=["context", "question"], template=template_quest,
|
||||
template_format="jinja2")
|
||||
if llm_choice == "openai_chat":
|
||||
# llm = ChatOpenAI(openai_api_key=api_key, model_name="gpt-4")
|
||||
llm = ChatOpenAI(openai_api_key=api_key)
|
||||
messages_combine = [
|
||||
SystemMessagePromptTemplate.from_template(chat_combine_template),
|
||||
HumanMessagePromptTemplate.from_template("{question}")
|
||||
]
|
||||
if settings.LLM_NAME == "openai_chat":
|
||||
llm = ChatOpenAI(openai_api_key=api_key) # optional parameter: model_name="gpt-4"
|
||||
messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)]
|
||||
if history:
|
||||
tokens_current_history = 0
|
||||
#count tokens in history
|
||||
history.reverse()
|
||||
for i in history:
|
||||
if "prompt" in i and "response" in i:
|
||||
tokens_batch = llm.get_num_tokens(i["prompt"]) + llm.get_num_tokens(i["response"])
|
||||
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
||||
tokens_current_history += tokens_batch
|
||||
messages_combine.append(HumanMessagePromptTemplate.from_template(i["prompt"]))
|
||||
messages_combine.append(AIMessagePromptTemplate.from_template(i["response"]))
|
||||
messages_combine.append(HumanMessagePromptTemplate.from_template("{question}"))
|
||||
import sys
|
||||
print(messages_combine, file=sys.stderr)
|
||||
p_chat_combine = ChatPromptTemplate.from_messages(messages_combine)
|
||||
messages_reduce = [
|
||||
SystemMessagePromptTemplate.from_template(chat_reduce_template),
|
||||
HumanMessagePromptTemplate.from_template("{question}")
|
||||
]
|
||||
p_chat_reduce = ChatPromptTemplate.from_messages(messages_reduce)
|
||||
elif llm_choice == "openai":
|
||||
elif settings.LLM_NAME == "openai":
|
||||
llm = OpenAI(openai_api_key=api_key, temperature=0)
|
||||
elif llm_choice == "manifest":
|
||||
elif settings.LLM_NAME == "manifest":
|
||||
llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
|
||||
elif llm_choice == "huggingface":
|
||||
elif settings.LLM_NAME == "huggingface":
|
||||
llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
|
||||
elif llm_choice == "cohere":
|
||||
elif settings.LLM_NAME == "cohere":
|
||||
llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
|
||||
elif settings.LLM_NAME == "gpt4all":
|
||||
llm = GPT4All(model=settings.MODEL_PATH)
|
||||
else:
|
||||
raise ValueError("unknown LLM model")
|
||||
|
||||
if settings.LLM_NAME == "openai_chat":
|
||||
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
|
||||
chain = ConversationalRetrievalChain(
|
||||
retriever=docsearch.as_retriever(k=2),
|
||||
question_generator=question_generator,
|
||||
combine_docs_chain=doc_chain,
|
||||
)
|
||||
chat_history = []
|
||||
# result = chain({"question": question, "chat_history": chat_history})
|
||||
# generate async with async generate method
|
||||
result = run_async_chain(chain, question, chat_history)
|
||||
elif settings.LLM_NAME == "gpt4all":
|
||||
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
|
||||
doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine)
|
||||
chain = ConversationalRetrievalChain(
|
||||
retriever=docsearch.as_retriever(k=2),
|
||||
question_generator=question_generator,
|
||||
combine_docs_chain=doc_chain,
|
||||
)
|
||||
chat_history = []
|
||||
# result = chain({"question": question, "chat_history": chat_history})
|
||||
# generate async with async generate method
|
||||
result = run_async_chain(chain, question, chat_history)
|
||||
|
||||
if llm_choice == "openai_chat":
|
||||
chain = ChatVectorDBChain.from_llm(
|
||||
llm=llm,
|
||||
vectorstore=docsearch,
|
||||
prompt=p_chat_combine,
|
||||
qa_prompt=p_chat_reduce,
|
||||
top_k_docs_for_context=3,
|
||||
return_source_documents=False)
|
||||
result = chain({"question": question, "chat_history": []})
|
||||
else:
|
||||
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
|
||||
combine_prompt=c_prompt, question_prompt=q_prompt)
|
||||
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=4)
|
||||
combine_prompt=chat_combine_template, question_prompt=q_prompt)
|
||||
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=3)
|
||||
result = chain({"query": question})
|
||||
|
||||
print(result)
|
||||
@@ -207,7 +305,7 @@ def api_answer():
|
||||
result['answer'] = result['answer'].replace("\\n", "\n")
|
||||
try:
|
||||
result['answer'] = result['answer'].split("SOURCES:")[0]
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# mock result
|
||||
@@ -276,7 +374,7 @@ def api_feedback():
|
||||
"feedback": feedback
|
||||
})
|
||||
)
|
||||
return {"status": 'ok'}
|
||||
return {"status": http.client.responses.get(response.status_code, 'ok')}
|
||||
|
||||
|
||||
@app.route('/api/combine', methods=['GET'])
|
||||
@@ -285,7 +383,17 @@ def combined_json():
|
||||
"""Provide json file with combined available indexes."""
|
||||
# get json from https://d3dg1063dc54p9.cloudfront.net/combined.json
|
||||
|
||||
data = []
|
||||
data = [{
|
||||
"name": 'default',
|
||||
"language": 'default',
|
||||
"version": '',
|
||||
"description": 'default',
|
||||
"fullName": 'default',
|
||||
"date": 'default',
|
||||
"docLink": 'default',
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local"
|
||||
}]
|
||||
# structure: name, language, version, description, fullName, date, docLink
|
||||
# append data from vectors_collection
|
||||
for index in vectors_collection.find({'user': user}):
|
||||
@@ -297,7 +405,7 @@ def combined_json():
|
||||
"fullName": index['name'],
|
||||
"date": index['date'],
|
||||
"docLink": index['location'],
|
||||
"model": embeddings_choice,
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local"
|
||||
})
|
||||
|
||||
@@ -335,7 +443,7 @@ def upload_file():
|
||||
os.makedirs(save_dir)
|
||||
|
||||
file.save(os.path.join(save_dir, filename))
|
||||
task = ingest.delay('temp', [".rst", ".md", ".pdf"], job_name, filename, user)
|
||||
task = ingest.delay('temp', [".rst", ".md", ".pdf", ".txt"], job_name, filename, user)
|
||||
# task id
|
||||
task_id = task.id
|
||||
return {"status": 'ok', "task_id": task_id}
|
||||
@@ -388,7 +496,7 @@ def upload_index_files():
|
||||
"language": job_name,
|
||||
"location": save_dir,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": embeddings_choice,
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": "local"
|
||||
})
|
||||
return {"status": 'ok'}
|
||||
|
||||
8
application/celeryconfig.py
Normal file
8
application/celeryconfig.py
Normal file
@@ -0,0 +1,8 @@
|
||||
import os
|
||||
|
||||
broker_url = os.getenv("CELERY_BROKER_URL")
|
||||
result_backend = os.getenv("CELERY_RESULT_BACKEND")
|
||||
|
||||
task_serializer = 'json'
|
||||
result_serializer = 'json'
|
||||
accept_content = ['json']
|
||||
0
application/core/__init__.py
Normal file
0
application/core/__init__.py
Normal file
22
application/core/settings.py
Normal file
22
application/core/settings.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import BaseSettings
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
LLM_NAME: str = "openai_chat"
|
||||
EMBEDDINGS_NAME: str = "openai_text-embedding-ada-002"
|
||||
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
|
||||
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
|
||||
MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
|
||||
MODEL_PATH: str = "./models/gpt4all-model.bin"
|
||||
TOKENS_MAX_HISTORY: int = 150
|
||||
|
||||
API_URL: str = "http://localhost:5001" # backend url for celery worker
|
||||
|
||||
API_KEY: str = None # LLM api key
|
||||
EMBEDDINGS_KEY: str = None # api key for embeddings (if using openai, just copy API_KEY
|
||||
|
||||
|
||||
path = Path(__file__).parent.parent.absolute()
|
||||
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
|
||||
@@ -1,13 +1,15 @@
|
||||
from flask import jsonify
|
||||
from werkzeug.http import HTTP_STATUS_CODES
|
||||
|
||||
def response_error(code_status,message=None):
|
||||
payload = {'error':HTTP_STATUS_CODES.get(code_status,"something went wrong")}
|
||||
|
||||
def response_error(code_status, message=None):
|
||||
payload = {'error': HTTP_STATUS_CODES.get(code_status, "something went wrong")}
|
||||
if message:
|
||||
payload['message'] = message
|
||||
response = jsonify(payload)
|
||||
response.status_code = code_status
|
||||
return response
|
||||
|
||||
def bad_request(status_code=400,message=''):
|
||||
return response_error(code_status=status_code,message=message)
|
||||
|
||||
def bad_request(status_code=400, message=''):
|
||||
return response_error(code_status=status_code, message=message)
|
||||
|
||||
@@ -3,7 +3,6 @@ from abc import abstractmethod
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
|
||||
from parser.schema.base import Document
|
||||
|
||||
|
||||
|
||||
@@ -52,17 +52,17 @@ class SimpleDirectoryReader(BaseReader):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_dir: Optional[str] = None,
|
||||
input_files: Optional[List] = None,
|
||||
exclude_hidden: bool = True,
|
||||
errors: str = "ignore",
|
||||
recursive: bool = True,
|
||||
required_exts: Optional[List[str]] = None,
|
||||
file_extractor: Optional[Dict[str, BaseParser]] = None,
|
||||
num_files_limit: Optional[int] = None,
|
||||
file_metadata: Optional[Callable[[str], Dict]] = None,
|
||||
chunk_size_max: int = 2048,
|
||||
self,
|
||||
input_dir: Optional[str] = None,
|
||||
input_files: Optional[List] = None,
|
||||
exclude_hidden: bool = True,
|
||||
errors: str = "ignore",
|
||||
recursive: bool = True,
|
||||
required_exts: Optional[List[str]] = None,
|
||||
file_extractor: Optional[Dict[str, BaseParser]] = None,
|
||||
num_files_limit: Optional[int] = None,
|
||||
file_metadata: Optional[Callable[[str], Dict]] = None,
|
||||
chunk_size_max: int = 2048,
|
||||
) -> None:
|
||||
"""Initialize with parameters."""
|
||||
super().__init__()
|
||||
@@ -102,8 +102,8 @@ class SimpleDirectoryReader(BaseReader):
|
||||
elif self.exclude_hidden and input_file.name.startswith("."):
|
||||
continue
|
||||
elif (
|
||||
self.required_exts is not None
|
||||
and input_file.suffix not in self.required_exts
|
||||
self.required_exts is not None
|
||||
and input_file.suffix not in self.required_exts
|
||||
):
|
||||
continue
|
||||
else:
|
||||
@@ -114,7 +114,7 @@ class SimpleDirectoryReader(BaseReader):
|
||||
new_input_files.extend(sub_input_files)
|
||||
|
||||
if self.num_files_limit is not None and self.num_files_limit > 0:
|
||||
new_input_files = new_input_files[0 : self.num_files_limit]
|
||||
new_input_files = new_input_files[0: self.num_files_limit]
|
||||
|
||||
# print total number of files added
|
||||
logging.debug(
|
||||
|
||||
@@ -9,6 +9,7 @@ from typing import Dict, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class HTMLParser(BaseParser):
|
||||
"""HTML parser."""
|
||||
|
||||
@@ -23,38 +24,37 @@ class HTMLParser(BaseParser):
|
||||
Union[str, List[str]]: a string or a List of strings.
|
||||
"""
|
||||
try:
|
||||
import unstructured
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
except ImportError:
|
||||
raise ValueError("unstructured package is required to parse HTML files.")
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
|
||||
# Using the unstructured library to convert the html to isd format
|
||||
# isd sample : isd = [
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
with open(file, "r", encoding="utf-8") as fp:
|
||||
elements = partition_html(file=fp)
|
||||
isd = convert_to_isd(elements)
|
||||
isd = convert_to_isd(elements)
|
||||
|
||||
# Removing non ascii charactwers from isd_el['text']
|
||||
# Removing non ascii charactwers from isd_el['text']
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = isd_el['text'].encode("ascii", "ignore").decode()
|
||||
|
||||
# Removing all the \n characters from isd_el['text'] using regex and replace with single space
|
||||
# Removing all the extra spaces from isd_el['text'] using regex and replace with single space
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE|re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}"," ", isd_el['text'], flags=re.MULTILINE|re.DOTALL)
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
|
||||
# more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
|
||||
for isd_el in isd:
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True )
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)
|
||||
|
||||
# Creating a list of all the indexes of isd_el['type'] = 'Title'
|
||||
title_indexes = [i for i,isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
|
||||
# Creating 'Chunks' - List of lists of strings
|
||||
# each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
|
||||
@@ -64,19 +64,20 @@ class HTMLParser(BaseParser):
|
||||
Chunks = [[]]
|
||||
final_chunks = list(list())
|
||||
|
||||
for i,isd_el in enumerate(isd):
|
||||
for i, isd_el in enumerate(isd):
|
||||
if i in title_indexes:
|
||||
Chunks.append([])
|
||||
Chunks[-1].append(isd_el['text'])
|
||||
|
||||
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25 #TODO: This value can be an user defined variable
|
||||
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25
|
||||
# TODO: This value can be an user defined variable
|
||||
for chunk in Chunks:
|
||||
# sum of lenth of all the strings in the chunk
|
||||
sum = 0
|
||||
sum += len(str(chunk))
|
||||
if sum < 25:
|
||||
Chunks.remove(chunk)
|
||||
else :
|
||||
else:
|
||||
# appending all the approved chunks to final_chunks as a single string
|
||||
final_chunks.append(" ".join([str(item) for item in chunk]))
|
||||
return final_chunks
|
||||
|
||||
@@ -7,8 +7,8 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
import tiktoken
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class MarkdownParser(BaseParser):
|
||||
@@ -20,13 +20,13 @@ class MarkdownParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
max_tokens: int = 2048,
|
||||
# remove_tables: bool = True,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
max_tokens: int = 2048,
|
||||
# remove_tables: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -35,8 +35,8 @@ class MarkdownParser(BaseParser):
|
||||
self._max_tokens = max_tokens
|
||||
# self._remove_tables = remove_tables
|
||||
|
||||
|
||||
def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str], current_text: str):
|
||||
def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str],
|
||||
current_text: str):
|
||||
"""Append to tups chunk."""
|
||||
num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(current_text))
|
||||
if num_tokens > self._max_tokens:
|
||||
@@ -46,6 +46,7 @@ class MarkdownParser(BaseParser):
|
||||
else:
|
||||
tups.append((current_header, current_text))
|
||||
return tups
|
||||
|
||||
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
|
||||
"""Convert a markdown file to a dictionary.
|
||||
|
||||
@@ -115,7 +116,7 @@ class MarkdownParser(BaseParser):
|
||||
return {}
|
||||
|
||||
def parse_tups(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
@@ -130,7 +131,7 @@ class MarkdownParser(BaseParser):
|
||||
return markdown_tups
|
||||
|
||||
def parse_file(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> Union[str, List[str]]:
|
||||
"""Parse file into string."""
|
||||
tups = self.parse_tups(filepath, errors=errors)
|
||||
|
||||
@@ -5,10 +5,10 @@ Contains parser for md files.
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
import tiktoken
|
||||
|
||||
|
||||
class RstParser(BaseParser):
|
||||
"""reStructuredText parser.
|
||||
@@ -19,17 +19,17 @@ class RstParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
remove_table_excess: bool = True,
|
||||
remove_interpreters: bool = True,
|
||||
remove_directives: bool = True,
|
||||
remove_whitespaces_excess: bool = True,
|
||||
#Be carefull with remove_characters_excess, might cause data loss
|
||||
remove_characters_excess: bool = True,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
remove_table_excess: bool = True,
|
||||
remove_interpreters: bool = True,
|
||||
remove_directives: bool = True,
|
||||
remove_whitespaces_excess: bool = True,
|
||||
# Be carefull with remove_characters_excess, might cause data loss
|
||||
remove_characters_excess: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -41,7 +41,6 @@ class RstParser(BaseParser):
|
||||
self._remove_whitespaces_excess = remove_whitespaces_excess
|
||||
self._remove_characters_excess = remove_characters_excess
|
||||
|
||||
|
||||
def rst_to_tups(self, rst_text: str) -> List[Tuple[Optional[str], str]]:
|
||||
"""Convert a reStructuredText file to a dictionary.
|
||||
|
||||
@@ -56,7 +55,8 @@ class RstParser(BaseParser):
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
header_match = re.match(r"^[^\S\n]*[-=]+[^\S\n]*$", line)
|
||||
if header_match and i > 0 and (len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
|
||||
if header_match and i > 0 and (
|
||||
len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
|
||||
if current_header is not None:
|
||||
if current_text == "" or None:
|
||||
continue
|
||||
@@ -72,7 +72,7 @@ class RstParser(BaseParser):
|
||||
|
||||
rst_tups.append((current_header, current_text))
|
||||
|
||||
#TODO: Format for rst
|
||||
# TODO: Format for rst
|
||||
#
|
||||
# if current_header is not None:
|
||||
# # pass linting, assert keys are defined
|
||||
@@ -136,7 +136,7 @@ class RstParser(BaseParser):
|
||||
return {}
|
||||
|
||||
def parse_tups(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
@@ -159,7 +159,7 @@ class RstParser(BaseParser):
|
||||
return rst_tups
|
||||
|
||||
def parse_file(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> Union[str, List[str]]:
|
||||
"""Parse file into string."""
|
||||
tups = self.parse_tups(filepath, errors=errors)
|
||||
|
||||
@@ -77,13 +77,13 @@ class PandasCSVParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import os
|
||||
|
||||
import javalang
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
files_list = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
@@ -9,6 +11,7 @@ def find_files(directory):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, "r") as file:
|
||||
java_code = file.read()
|
||||
@@ -28,6 +31,7 @@ def extract_functions(file_path):
|
||||
methods[method_name] = method_source_code
|
||||
return methods
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -47,6 +51,7 @@ def extract_classes(file_path):
|
||||
classes[class_name] = class_string
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
@@ -58,4 +63,4 @@ def extract_functions_and_classes(directory):
|
||||
classes = extract_classes(file)
|
||||
if classes:
|
||||
classes_dict[file] = classes
|
||||
return functions_dict, classes_dict
|
||||
return functions_dict, classes_dict
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
import esprima
|
||||
|
||||
import escodegen
|
||||
import esprima
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
@@ -11,6 +12,7 @@ def find_files(directory):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -26,7 +28,6 @@ def extract_functions(file_path):
|
||||
func_name = declaration.id.name if declaration.id else '<anonymous>'
|
||||
functions[func_name] = escodegen.generate(declaration.init)
|
||||
elif node.type == 'ClassDeclaration':
|
||||
class_name = node.id.name
|
||||
for subnode in node.body.body:
|
||||
if subnode.type == 'MethodDefinition':
|
||||
func_name = subnode.key.name
|
||||
@@ -38,6 +39,7 @@ def extract_functions(file_path):
|
||||
functions[func_name] = escodegen.generate(declaration.init)
|
||||
return functions
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -53,6 +55,7 @@ def extract_classes(file_path):
|
||||
classes[class_name] = ", ".join(function_names)
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
|
||||
@@ -1,32 +1,32 @@
|
||||
import os
|
||||
import faiss
|
||||
import pickle
|
||||
|
||||
import tiktoken
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
|
||||
#from langchain.embeddings import HuggingFaceEmbeddings
|
||||
#from langchain.embeddings import HuggingFaceInstructEmbeddings
|
||||
#from langchain.embeddings import CohereEmbeddings
|
||||
|
||||
from langchain.vectorstores import FAISS
|
||||
from retry import retry
|
||||
|
||||
|
||||
# from langchain.embeddings import HuggingFaceEmbeddings
|
||||
# from langchain.embeddings import HuggingFaceInstructEmbeddings
|
||||
# from langchain.embeddings import CohereEmbeddings
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens/1000) * 0.0004)
|
||||
total_price = ((num_tokens / 1000) * 0.0004)
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
@retry(tries=10, delay=60)
|
||||
def store_add_texts_with_retry(store, i):
|
||||
store.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
#store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
|
||||
|
||||
def call_openai_api(docs, folder_name, task_status):
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
|
||||
# create output folder if it doesn't exist
|
||||
if not os.path.exists(f"{folder_name}"):
|
||||
@@ -44,7 +44,8 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
# hf = HuggingFaceEmbeddings(model_name=model_name)
|
||||
# store = FAISS.from_documents(docs_test, hf)
|
||||
s1 = len(docs)
|
||||
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
||||
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs),
|
||||
bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
||||
try:
|
||||
task_status.update_state(state='PROGRESS', meta={'current': int((c1 / s1) * 100)})
|
||||
store_add_texts_with_retry(store, i)
|
||||
@@ -58,20 +59,20 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
c1 += 1
|
||||
store.save_local(f"{folder_name}")
|
||||
|
||||
|
||||
def get_user_permission(docs, folder_name):
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
|
||||
#docs_content = (" ".join(docs))
|
||||
# docs_content = (" ".join(docs))
|
||||
docs_content = ""
|
||||
for doc in docs:
|
||||
docs_content += doc.page_content
|
||||
|
||||
|
||||
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
#Here we check for user permission before calling the API.
|
||||
# Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
call_openai_api(docs, folder_name)
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import os
|
||||
import ast
|
||||
import tiktoken
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import tiktoken
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
files_list = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
@@ -13,6 +15,7 @@ def find_files(directory):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -25,6 +28,7 @@ def extract_functions(file_path):
|
||||
functions[func_name] = func_def
|
||||
return functions
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -40,6 +44,7 @@ def extract_classes(file_path):
|
||||
classes[class_name] = ", ".join(function_names)
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
@@ -53,11 +58,12 @@ def extract_functions_and_classes(directory):
|
||||
classes_dict[file] = classes
|
||||
return functions_dict, classes_dict
|
||||
|
||||
|
||||
def parse_functions(functions_dict, formats, dir):
|
||||
c1 = len(functions_dict)
|
||||
for i, (source, functions) in enumerate(functions_dict.items(), start=1):
|
||||
print(f"Processing file {i}/{c1}")
|
||||
source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
|
||||
source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
|
||||
subfolders = "/".join(source_w.split("/")[:-1])
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
|
||||
for j, (name, function) in enumerate(functions.items(), start=1):
|
||||
@@ -70,18 +76,19 @@ def parse_functions(functions_dict, formats, dir):
|
||||
response = llm(prompt.format(code=function))
|
||||
mode = "a" if Path(f"outputs/{source_w}").exists() else "w"
|
||||
with open(f"outputs/{source_w}", mode) as f:
|
||||
f.write(f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
f.write(
|
||||
f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
def parse_classes(classes_dict, formats, dir):
|
||||
c1 = len(classes_dict)
|
||||
for i, (source, classes) in enumerate(classes_dict.items()):
|
||||
print(f"Processing file {i+1}/{c1}")
|
||||
source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
|
||||
print(f"Processing file {i + 1}/{c1}")
|
||||
source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
|
||||
subfolders = "/".join(source_w.split("/")[:-1])
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
|
||||
for name, function_names in classes.items():
|
||||
print(f"Processing Class {i+1}/{c1}")
|
||||
print(f"Processing Class {i + 1}/{c1}")
|
||||
prompt = PromptTemplate(
|
||||
input_variables=["class_name", "functions_names"],
|
||||
template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
|
||||
@@ -92,6 +99,7 @@ def parse_classes(classes_dict, formats, dir):
|
||||
with open(f"outputs/{source_w}", "a" if Path(f"outputs/{source_w}").exists() else "w") as f:
|
||||
f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
def transform_to_docs(functions_dict, classes_dict, formats, dir):
|
||||
docs_content = ''.join([str(key) + str(value) for key, value in functions_dict.items()])
|
||||
docs_content += ''.join([str(key) + str(value) for key, value in classes_dict.items()])
|
||||
@@ -110,4 +118,4 @@ def transform_to_docs(functions_dict, classes_dict, formats, dir):
|
||||
parse_classes(classes_dict, formats, dir)
|
||||
print("All done!")
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
print("The API was not called. No money was spent.")
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
|
||||
from parser.schema.schema import BaseDocument
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import re
|
||||
import tiktoken
|
||||
|
||||
from typing import List
|
||||
from parser.schema.base import Document
|
||||
from math import ceil
|
||||
from typing import List
|
||||
|
||||
import tiktoken
|
||||
from parser.schema.base import Document
|
||||
|
||||
|
||||
def separate_header_and_body(text):
|
||||
@@ -13,6 +13,7 @@ def separate_header_and_body(text):
|
||||
body = text[len(header):]
|
||||
return header, body
|
||||
|
||||
|
||||
def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) -> List[Document]:
|
||||
docs = []
|
||||
current_group = None
|
||||
@@ -23,7 +24,8 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
if current_group is None:
|
||||
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
|
||||
extra_info=doc.extra_info)
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(
|
||||
current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
|
||||
current_group.text += " " + doc.text
|
||||
else:
|
||||
docs.append(current_group)
|
||||
@@ -35,6 +37,7 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
|
||||
return docs
|
||||
|
||||
|
||||
def split_documents(documents: List[Document], max_tokens: int) -> List[Document]:
|
||||
docs = []
|
||||
for doc in documents:
|
||||
@@ -54,17 +57,18 @@ def split_documents(documents: List[Document], max_tokens: int) -> List[Document
|
||||
docs.append(new_doc)
|
||||
return docs
|
||||
|
||||
|
||||
def group_split(documents: List[Document], max_tokens: int = 2000, min_tokens: int = 150, token_check: bool = True):
|
||||
if token_check == False:
|
||||
if not token_check:
|
||||
return documents
|
||||
print("Grouping small documents")
|
||||
try:
|
||||
documents = group_documents(documents=documents, min_tokens=min_tokens, max_tokens=max_tokens)
|
||||
except:
|
||||
except Exception:
|
||||
print("Grouping failed, try running without token_check")
|
||||
print("Separating large documents")
|
||||
try:
|
||||
documents = split_documents(documents=documents, max_tokens=max_tokens)
|
||||
except:
|
||||
except Exception:
|
||||
print("Grouping failed, try running without token_check")
|
||||
return documents
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible.
|
||||
Use the following pieces of context to help answer the users question.
|
||||
Use the following pieces of context to help answer the users question. If its not relevant to the question, provide friendly responses.
|
||||
You have access to chat history, and can use it to help answer the question.
|
||||
When using code examples, use the following format:
|
||||
```(language)
|
||||
(code)
|
||||
```
|
||||
----------------
|
||||
{summaries}
|
||||
@@ -1,3 +1,3 @@
|
||||
Use the following portion of a long document to see if any of the text is relevant to answer the question.
|
||||
{context}
|
||||
Provide all relevant text to the question verbatim. Summarize if needed. If nothing relevant return "-".
|
||||
Use the following pieces of context to help answer the users question. If its not relevant to the question, respond with "-"
|
||||
----------------
|
||||
{context}
|
||||
@@ -8,8 +8,8 @@ async-timeout==4.0.2
|
||||
attrs==22.2.0
|
||||
billiard==3.6.4.0
|
||||
blobfile==2.0.1
|
||||
boto3==1.26.84
|
||||
botocore==1.29.84
|
||||
boto3==1.26.102
|
||||
botocore==1.29.102
|
||||
cffi==1.15.1
|
||||
charset-normalizer==3.1.0
|
||||
click==8.1.3
|
||||
@@ -27,8 +27,11 @@ entrypoints==0.4
|
||||
faiss-cpu==1.7.3
|
||||
filelock==3.9.0
|
||||
Flask==2.2.3
|
||||
Flask-Cors==3.0.10
|
||||
frozenlist==1.3.3
|
||||
geojson==2.5.0
|
||||
greenlet==2.0.2
|
||||
gpt4all==0.1.7
|
||||
hub==3.0.1
|
||||
huggingface-hub==0.12.1
|
||||
humbug==0.2.8
|
||||
@@ -38,14 +41,17 @@ Jinja2==3.1.2
|
||||
jmespath==1.0.1
|
||||
joblib==1.2.0
|
||||
kombu==5.2.4
|
||||
langchain==0.0.118
|
||||
langchain==0.0.179
|
||||
loguru==0.6.0
|
||||
lxml==4.9.2
|
||||
MarkupSafe==2.1.2
|
||||
marshmallow==3.19.0
|
||||
marshmallow-enum==1.5.1
|
||||
mpmath==1.3.0
|
||||
multidict==6.0.4
|
||||
multiprocess==0.70.14
|
||||
mypy-extensions==1.0.0
|
||||
networkx==3.0
|
||||
nltk==3.8.1
|
||||
numcodecs==0.11.0
|
||||
numpy==1.24.2
|
||||
@@ -64,29 +70,37 @@ pycryptodomex==3.17
|
||||
pydantic==1.10.5
|
||||
PyJWT==2.6.0
|
||||
pymongo==4.3.3
|
||||
pyowm==3.3.0
|
||||
PyPDF2==3.0.1
|
||||
PySocks==1.7.1
|
||||
python-dateutil==2.8.2
|
||||
python-dotenv==1.0.0
|
||||
python-jose==3.3.0
|
||||
pytz==2022.7.1
|
||||
PyYAML==6.0
|
||||
redis==4.5.2
|
||||
redis==4.5.4
|
||||
regex==2022.10.31
|
||||
requests==2.28.2
|
||||
retry==0.9.2
|
||||
rsa==4.9
|
||||
s3transfer==0.6.0
|
||||
scikit-learn==1.2.2
|
||||
scipy==1.10.1
|
||||
sentence-transformers==2.2.2
|
||||
sentencepiece==0.1.97
|
||||
six==1.16.0
|
||||
SQLAlchemy==1.4.46
|
||||
sympy==1.11.1
|
||||
tenacity==8.2.2
|
||||
tiktoken==0.3.0
|
||||
tokenizers==0.13.2
|
||||
threadpoolctl==3.1.0
|
||||
torch==2.0.0
|
||||
torchvision==0.15.1
|
||||
tqdm==4.65.0
|
||||
transformers==4.26.1
|
||||
transformers==4.27.2
|
||||
typer==0.7.0
|
||||
typing-inspect==0.8.0
|
||||
typing_extensions==4.5.0
|
||||
urllib3==1.26.14
|
||||
vine==5.0.0
|
||||
wcwidth==0.2.6
|
||||
Werkzeug==2.2.3
|
||||
yarl==1.8.2
|
||||
|
||||
@@ -1,28 +1,31 @@
|
||||
import requests
|
||||
import nltk
|
||||
import os
|
||||
|
||||
from parser.file.bulk import SimpleDirectoryReader
|
||||
from parser.schema.base import Document
|
||||
from parser.open_ai_func import call_openai_api
|
||||
from parser.token_func import group_split
|
||||
from celery import current_task
|
||||
|
||||
|
||||
import shutil
|
||||
import string
|
||||
import zipfile
|
||||
import shutil
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import nltk
|
||||
import requests
|
||||
|
||||
from core.settings import settings
|
||||
from parser.file.bulk import SimpleDirectoryReader
|
||||
from parser.open_ai_func import call_openai_api
|
||||
from parser.schema.base import Document
|
||||
from parser.token_func import group_split
|
||||
|
||||
try:
|
||||
nltk.download('punkt', quiet=True)
|
||||
nltk.download('averaged_perceptron_tagger', quiet=True)
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
def metadata_from_filename(title):
|
||||
return {'title': title}
|
||||
|
||||
def generate_random_string(length):
|
||||
return ''.join([string.ascii_letters[i % 52] for i in range(length)])
|
||||
|
||||
|
||||
|
||||
def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
# directory = 'inputs' or 'temp'
|
||||
# formats = [".rst", ".md"]
|
||||
@@ -39,12 +42,8 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
max_tokens = 1250
|
||||
full_path = directory + '/' + user + '/' + name_job
|
||||
# check if API_URL env variable is set
|
||||
if not os.environ.get('API_URL'):
|
||||
url = 'http://localhost:5001/api/download'
|
||||
else:
|
||||
url = os.environ.get('API_URL') + '/api/download'
|
||||
file_data = {'name': name_job, 'file': filename, 'user': user}
|
||||
response = requests.get(url, params=file_data)
|
||||
response = requests.get(urljoin(settings.API_URL, "/api/download"), params=file_data)
|
||||
file = response.content
|
||||
|
||||
if not os.path.exists(full_path):
|
||||
@@ -52,19 +51,17 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
with open(full_path + '/' + filename, 'wb') as f:
|
||||
f.write(file)
|
||||
|
||||
#check if file is .zip and extract it
|
||||
# check if file is .zip and extract it
|
||||
if filename.endswith('.zip'):
|
||||
with zipfile.ZipFile(full_path + '/' + filename, 'r') as zip_ref:
|
||||
zip_ref.extractall(full_path)
|
||||
os.remove(full_path + '/' + filename)
|
||||
|
||||
|
||||
import time
|
||||
self.update_state(state='PROGRESS', meta={'current': 1})
|
||||
|
||||
raw_docs = SimpleDirectoryReader(input_dir=full_path, input_files=input_files, recursive=recursive,
|
||||
required_exts=formats, num_files_limit=limit,
|
||||
exclude_hidden=exclude).load_data()
|
||||
exclude_hidden=exclude, file_metadata=metadata_from_filename).load_data()
|
||||
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
|
||||
|
||||
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
@@ -72,28 +69,26 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
call_openai_api(docs, full_path, self)
|
||||
self.update_state(state='PROGRESS', meta={'current': 100})
|
||||
|
||||
if sample == True:
|
||||
if sample:
|
||||
for i in range(min(5, len(raw_docs))):
|
||||
print(raw_docs[i].text)
|
||||
|
||||
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
|
||||
# and send them to the server (provide user and name in form)
|
||||
if not os.environ.get('API_URL'):
|
||||
url = 'http://localhost:5001/api/upload_index'
|
||||
else:
|
||||
url = os.environ.get('API_URL') + '/api/upload_index'
|
||||
file_data = {'name': name_job, 'user': user}
|
||||
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
|
||||
'file_pkl': open(full_path + '/index.pkl', 'rb')}
|
||||
response = requests.post(url, files=files, data=file_data)
|
||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
||||
|
||||
#deletes remote
|
||||
if not os.environ.get('API_URL'):
|
||||
url = 'http://localhost:5001/api/delete_old?path=' + 'inputs/' + user + '/' + name_job
|
||||
else:
|
||||
url = os.environ.get('API_URL') + '/api/delete_old?path=' + 'inputs/' + user + '/' + name_job
|
||||
response = requests.get(url)
|
||||
response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path="))
|
||||
# delete local
|
||||
shutil.rmtree(full_path)
|
||||
|
||||
return {'directory': directory, 'formats': formats, 'name_job': name_job, 'filename': filename, 'user': user}
|
||||
return {
|
||||
'directory': directory,
|
||||
'formats': formats,
|
||||
'name_job': name_job,
|
||||
'filename': filename,
|
||||
'user': user,
|
||||
'limited': False
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from app import app
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run()
|
||||
app.run(debug=True, port=5001)
|
||||
|
||||
20
docker-compose-dev.yaml
Normal file
20
docker-compose-dev.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
|
||||
redis:
|
||||
image: redis:6-alpine
|
||||
ports:
|
||||
- 6379:6379
|
||||
|
||||
mongo:
|
||||
image: mongo:6
|
||||
ports:
|
||||
- 27017:27017
|
||||
volumes:
|
||||
- mongodb_data_container:/data/db
|
||||
|
||||
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
@@ -5,23 +5,26 @@ services:
|
||||
build: ./frontend
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:5001
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
ports:
|
||||
- "5173:5173"
|
||||
depends_on:
|
||||
- backend
|
||||
- backend
|
||||
|
||||
backend:
|
||||
build: ./application
|
||||
environment:
|
||||
- API_KEY=<your_api_key>
|
||||
- EMBEDDINGS_KEY=<your_api_key>
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
ports:
|
||||
- "5001:5001"
|
||||
volumes:
|
||||
- app_data_container:/app
|
||||
- ./application/indexes:/app/indexes
|
||||
- ./application/inputs:/app/inputs
|
||||
- ./application/vectors:/app/vectors
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
@@ -30,8 +33,8 @@ services:
|
||||
build: ./application
|
||||
command: celery -A app.celery worker -l INFO
|
||||
environment:
|
||||
- API_KEY=<your_api_key>
|
||||
- EMBEDDINGS_KEY=<your_api_key>
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
@@ -55,5 +58,4 @@ services:
|
||||
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
app_data_container:
|
||||
mongodb_data_container:
|
||||
@@ -1,18 +1,20 @@
|
||||
import requests
|
||||
import dotenv
|
||||
import os
|
||||
import json
|
||||
import pprint
|
||||
|
||||
import dotenv
|
||||
import requests
|
||||
from flask import Flask, request
|
||||
|
||||
dotenv.load_dotenv()
|
||||
docsgpt_url = os.getenv("docsgpt_url")
|
||||
chatwoot_url = os.getenv("chatwoot_url")
|
||||
docsgpt_key = os.getenv("docsgpt_key")
|
||||
chatwoot_token = os.getenv("chatwoot_token")
|
||||
#account_id = os.getenv("account_id")
|
||||
#assignee_id = os.getenv("assignee_id")
|
||||
# account_id = os.getenv("account_id")
|
||||
# assignee_id = os.getenv("assignee_id")
|
||||
label_stop = "human-requested"
|
||||
|
||||
|
||||
def send_to_bot(sender, message):
|
||||
data = {
|
||||
'sender': sender,
|
||||
@@ -43,7 +45,6 @@ def send_to_chatwoot(account, conversation, message):
|
||||
return r.json()
|
||||
|
||||
|
||||
from flask import Flask, request
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@@ -74,7 +75,7 @@ def docsgpt():
|
||||
# elif str(assignee) != str(assignee_id):
|
||||
# return "Not the right assignee"
|
||||
|
||||
if(message_type == "incoming"):
|
||||
if (message_type == "incoming"):
|
||||
bot_response = send_to_bot(contact, message)
|
||||
create_message = send_to_chatwoot(
|
||||
account, conversation, bot_response)
|
||||
@@ -83,5 +84,6 @@ def docsgpt():
|
||||
|
||||
return create_message
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=80)
|
||||
app.run(host='0.0.0.0', port=80)
|
||||
|
||||
@@ -10,7 +10,7 @@ dotenv.load_dotenv()
|
||||
|
||||
# Replace 'YOUR_BOT_TOKEN' with your bot's token
|
||||
TOKEN = os.getenv("DISCORD_TOKEN")
|
||||
PREFIX = '@docsgpt '
|
||||
PREFIX = '@DocsGPT'
|
||||
BASE_API_URL = 'http://localhost:5001'
|
||||
|
||||
intents = discord.Intents.default()
|
||||
@@ -20,13 +20,11 @@ bot = commands.Bot(command_prefix=PREFIX, intents=intents)
|
||||
|
||||
|
||||
def split_string(input_str):
|
||||
pattern = r'<(.*?)>'
|
||||
match = re.search(pattern, input_str)
|
||||
|
||||
pattern = r'^<@!?{0}>\s*'.format(bot.user.id)
|
||||
match = re.match(pattern, input_str)
|
||||
if match:
|
||||
content = match.group(1)
|
||||
rest = input_str[:match.start()] + input_str[match.end():]
|
||||
return content, rest.strip()
|
||||
content = input_str[match.end():].strip()
|
||||
return str(bot.user.id), content
|
||||
return None, input_str
|
||||
|
||||
|
||||
@@ -59,8 +57,8 @@ async def on_message(message):
|
||||
if prefix is None:
|
||||
return
|
||||
|
||||
part_prefix = "@"
|
||||
if part_prefix in prefix:
|
||||
part_prefix = str(bot.user.id)
|
||||
if part_prefix == prefix:
|
||||
answer = await fetch_answer(content)
|
||||
await message.channel.send(answer)
|
||||
|
||||
|
||||
25
extensions/web-widget/README.md
Normal file
25
extensions/web-widget/README.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# Chat Widget
|
||||
|
||||
A simple chat widget that can be easily integrated into any website.
|
||||
|
||||
## Installation
|
||||
|
||||
1. Host the `widget.html`, `styles.css`, and `script.js` files from the `src` folder on your own server or a Content Delivery Network (CDN). Make sure to note the URLs for these files.
|
||||
|
||||
2. Update the URLs in the `dist/chat-widget.js` file to match the locations of your hosted files:
|
||||
|
||||
```javascript
|
||||
fetch("https://your-server-or-cdn.com/path/to/widget.html"),
|
||||
fetch("https://your-server-or-cdn.com/path/to/styles.css"),
|
||||
fetch("https://your-server-or-cdn.com/path/to/script.js"),
|
||||
```
|
||||
|
||||
3. Host the `dist/chat-widget.js` file on your own server or a Content Delivery Network (CDN). Make sure to note the URL for this file.
|
||||
|
||||
|
||||
##Integration
|
||||
|
||||
To integrate the chat widget into a website, add the following script tag to the HTML file, replacing URL_TO_CHAT_WIDGET_JS with the actual URL of your hosted chat-widget.js file:
|
||||
```javascript
|
||||
<script src="URL_TO_CHAT_WIDGET_JS"></script>
|
||||
```
|
||||
41
extensions/web-widget/dist/chat-widget.js
vendored
Normal file
41
extensions/web-widget/dist/chat-widget.js
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
(async function () {
|
||||
// Fetch the HTML, CSS, and JavaScript from your server or CDN
|
||||
const [htmlRes, jsRes] = await Promise.all([
|
||||
fetch("https://s3-eu-west-2.amazonaws.com/arc53data/widget.html"),
|
||||
// fetch("https://s3-eu-west-2.amazonaws.com/arc53data/tailwind.css"),
|
||||
fetch("https://s3-eu-west-2.amazonaws.com/arc53data/script.js"),
|
||||
]);
|
||||
|
||||
const html = await htmlRes.text();
|
||||
//const css = await cssRes.text();
|
||||
const js = await jsRes.text();
|
||||
|
||||
// create a new link element
|
||||
const link = document.createElement("link");
|
||||
|
||||
//set the rel, href, type, and integrity attributes
|
||||
link.rel = "stylesheet";
|
||||
link.href = "https://cdn.tailwindcss.com/";
|
||||
link.type = "text/css";
|
||||
link.integrity = "sha384-PDOmVviaTm8N1W35y1NSmo80w6GPaGhbDuOBAF/5hRffaeGc6yOwIo1qAt4gqLGA%";
|
||||
|
||||
// get the document head and append the link element to it
|
||||
// document.head.appendChild(link);
|
||||
|
||||
|
||||
|
||||
// Create a style element for the CSS
|
||||
// const style = document.createElement("style");
|
||||
// style.innerHTML = css;
|
||||
// document.head.appendChild(style);
|
||||
|
||||
// Create a container for the chat widget and inject the HTML
|
||||
const chatWidgetContainer = document.createElement("div");
|
||||
chatWidgetContainer.innerHTML = html;
|
||||
document.body.appendChild(chatWidgetContainer);
|
||||
|
||||
// Execute the JavaScript code
|
||||
const script = document.createElement("script");
|
||||
script.innerHTML = js;
|
||||
document.body.appendChild(script);
|
||||
})();
|
||||
807
extensions/web-widget/dist/output.css
vendored
Normal file
807
extensions/web-widget/dist/output.css
vendored
Normal file
@@ -0,0 +1,807 @@
|
||||
/*
|
||||
! tailwindcss v3.3.1 | MIT License | https://tailwindcss.com
|
||||
*/
|
||||
|
||||
/*
|
||||
1. Prevent padding and border from affecting element width. (https://github.com/mozdevs/cssremedy/issues/4)
|
||||
2. Allow adding a border to an element by just adding a border-width. (https://github.com/tailwindcss/tailwindcss/pull/116)
|
||||
*/
|
||||
|
||||
*,
|
||||
::before,
|
||||
::after {
|
||||
box-sizing: border-box;
|
||||
/* 1 */
|
||||
border-width: 0;
|
||||
/* 2 */
|
||||
border-style: solid;
|
||||
/* 2 */
|
||||
border-color: #e5e7eb;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
::before,
|
||||
::after {
|
||||
--tw-content: '';
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use a consistent sensible line-height in all browsers.
|
||||
2. Prevent adjustments of font size after orientation changes in iOS.
|
||||
3. Use a more readable tab size.
|
||||
4. Use the user's configured `sans` font-family by default.
|
||||
5. Use the user's configured `sans` font-feature-settings by default.
|
||||
6. Use the user's configured `sans` font-variation-settings by default.
|
||||
*/
|
||||
|
||||
html {
|
||||
line-height: 1.5;
|
||||
/* 1 */
|
||||
-webkit-text-size-adjust: 100%;
|
||||
/* 2 */
|
||||
-moz-tab-size: 4;
|
||||
/* 3 */
|
||||
-o-tab-size: 4;
|
||||
tab-size: 4;
|
||||
/* 3 */
|
||||
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
|
||||
/* 4 */
|
||||
font-feature-settings: normal;
|
||||
/* 5 */
|
||||
font-variation-settings: normal;
|
||||
/* 6 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove the margin in all browsers.
|
||||
2. Inherit line-height from `html` so users can set them as a class directly on the `html` element.
|
||||
*/
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
/* 1 */
|
||||
line-height: inherit;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Add the correct height in Firefox.
|
||||
2. Correct the inheritance of border color in Firefox. (https://bugzilla.mozilla.org/show_bug.cgi?id=190655)
|
||||
3. Ensure horizontal rules are visible by default.
|
||||
*/
|
||||
|
||||
hr {
|
||||
height: 0;
|
||||
/* 1 */
|
||||
color: inherit;
|
||||
/* 2 */
|
||||
border-top-width: 1px;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct text decoration in Chrome, Edge, and Safari.
|
||||
*/
|
||||
|
||||
abbr:where([title]) {
|
||||
-webkit-text-decoration: underline dotted;
|
||||
text-decoration: underline dotted;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the default font size and weight for headings.
|
||||
*/
|
||||
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6 {
|
||||
font-size: inherit;
|
||||
font-weight: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Reset links to optimize for opt-in styling instead of opt-out.
|
||||
*/
|
||||
|
||||
a {
|
||||
color: inherit;
|
||||
text-decoration: inherit;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font weight in Edge and Safari.
|
||||
*/
|
||||
|
||||
b,
|
||||
strong {
|
||||
font-weight: bolder;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Use the user's configured `mono` font family by default.
|
||||
2. Correct the odd `em` font sizing in all browsers.
|
||||
*/
|
||||
|
||||
code,
|
||||
kbd,
|
||||
samp,
|
||||
pre {
|
||||
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
|
||||
/* 1 */
|
||||
font-size: 1em;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct font size in all browsers.
|
||||
*/
|
||||
|
||||
small {
|
||||
font-size: 80%;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent `sub` and `sup` elements from affecting the line height in all browsers.
|
||||
*/
|
||||
|
||||
sub,
|
||||
sup {
|
||||
font-size: 75%;
|
||||
line-height: 0;
|
||||
position: relative;
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
sub {
|
||||
bottom: -0.25em;
|
||||
}
|
||||
|
||||
sup {
|
||||
top: -0.5em;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Remove text indentation from table contents in Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=999088, https://bugs.webkit.org/show_bug.cgi?id=201297)
|
||||
2. Correct table border color inheritance in all Chrome and Safari. (https://bugs.chromium.org/p/chromium/issues/detail?id=935729, https://bugs.webkit.org/show_bug.cgi?id=195016)
|
||||
3. Remove gaps between table borders by default.
|
||||
*/
|
||||
|
||||
table {
|
||||
text-indent: 0;
|
||||
/* 1 */
|
||||
border-color: inherit;
|
||||
/* 2 */
|
||||
border-collapse: collapse;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
1. Change the font styles in all browsers.
|
||||
2. Remove the margin in Firefox and Safari.
|
||||
3. Remove default padding in all browsers.
|
||||
*/
|
||||
|
||||
button,
|
||||
input,
|
||||
optgroup,
|
||||
select,
|
||||
textarea {
|
||||
font-family: inherit;
|
||||
/* 1 */
|
||||
font-size: 100%;
|
||||
/* 1 */
|
||||
font-weight: inherit;
|
||||
/* 1 */
|
||||
line-height: inherit;
|
||||
/* 1 */
|
||||
color: inherit;
|
||||
/* 1 */
|
||||
margin: 0;
|
||||
/* 2 */
|
||||
padding: 0;
|
||||
/* 3 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inheritance of text transform in Edge and Firefox.
|
||||
*/
|
||||
|
||||
button,
|
||||
select {
|
||||
text-transform: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Remove default button styles.
|
||||
*/
|
||||
|
||||
button,
|
||||
[type='button'],
|
||||
[type='reset'],
|
||||
[type='submit'] {
|
||||
-webkit-appearance: button;
|
||||
/* 1 */
|
||||
background-color: transparent;
|
||||
/* 2 */
|
||||
background-image: none;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Use the modern Firefox focus style for all focusable elements.
|
||||
*/
|
||||
|
||||
:-moz-focusring {
|
||||
outline: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the additional `:invalid` styles in Firefox. (https://github.com/mozilla/gecko-dev/blob/2f9eacd9d3d995c937b4251a5557d95d494c9be1/layout/style/res/forms.css#L728-L737)
|
||||
*/
|
||||
|
||||
:-moz-ui-invalid {
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct vertical alignment in Chrome and Firefox.
|
||||
*/
|
||||
|
||||
progress {
|
||||
vertical-align: baseline;
|
||||
}
|
||||
|
||||
/*
|
||||
Correct the cursor style of increment and decrement buttons in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-inner-spin-button,
|
||||
::-webkit-outer-spin-button {
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the odd appearance in Chrome and Safari.
|
||||
2. Correct the outline style in Safari.
|
||||
*/
|
||||
|
||||
[type='search'] {
|
||||
-webkit-appearance: textfield;
|
||||
/* 1 */
|
||||
outline-offset: -2px;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Remove the inner padding in Chrome and Safari on macOS.
|
||||
*/
|
||||
|
||||
::-webkit-search-decoration {
|
||||
-webkit-appearance: none;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Correct the inability to style clickable types in iOS and Safari.
|
||||
2. Change font properties to `inherit` in Safari.
|
||||
*/
|
||||
|
||||
::-webkit-file-upload-button {
|
||||
-webkit-appearance: button;
|
||||
/* 1 */
|
||||
font: inherit;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Add the correct display in Chrome and Safari.
|
||||
*/
|
||||
|
||||
summary {
|
||||
display: list-item;
|
||||
}
|
||||
|
||||
/*
|
||||
Removes the default spacing and border for appropriate elements.
|
||||
*/
|
||||
|
||||
blockquote,
|
||||
dl,
|
||||
dd,
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
h5,
|
||||
h6,
|
||||
hr,
|
||||
figure,
|
||||
p,
|
||||
pre {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
fieldset {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
legend {
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
ol,
|
||||
ul,
|
||||
menu {
|
||||
list-style: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/*
|
||||
Prevent resizing textareas horizontally by default.
|
||||
*/
|
||||
|
||||
textarea {
|
||||
resize: vertical;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Reset the default placeholder opacity in Firefox. (https://github.com/tailwindlabs/tailwindcss/issues/3300)
|
||||
2. Set the default placeholder color to the user's configured gray 400 color.
|
||||
*/
|
||||
|
||||
input::-moz-placeholder, textarea::-moz-placeholder {
|
||||
opacity: 1;
|
||||
/* 1 */
|
||||
color: #9ca3af;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
input::placeholder,
|
||||
textarea::placeholder {
|
||||
opacity: 1;
|
||||
/* 1 */
|
||||
color: #9ca3af;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Set the default cursor for buttons.
|
||||
*/
|
||||
|
||||
button,
|
||||
[role="button"] {
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
/*
|
||||
Make sure disabled buttons don't get the pointer cursor.
|
||||
*/
|
||||
|
||||
:disabled {
|
||||
cursor: default;
|
||||
}
|
||||
|
||||
/*
|
||||
1. Make replaced elements `display: block` by default. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
2. Add `vertical-align: middle` to align replaced elements more sensibly by default. (https://github.com/jensimmons/cssremedy/issues/14#issuecomment-634934210)
|
||||
This can trigger a poorly considered lint error in some tools but is included by design.
|
||||
*/
|
||||
|
||||
img,
|
||||
svg,
|
||||
video,
|
||||
canvas,
|
||||
audio,
|
||||
iframe,
|
||||
embed,
|
||||
object {
|
||||
display: block;
|
||||
/* 1 */
|
||||
vertical-align: middle;
|
||||
/* 2 */
|
||||
}
|
||||
|
||||
/*
|
||||
Constrain images and videos to the parent width and preserve their intrinsic aspect ratio. (https://github.com/mozdevs/cssremedy/issues/14)
|
||||
*/
|
||||
|
||||
img,
|
||||
video {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
/* Make elements with the HTML hidden attribute stay hidden by default */
|
||||
|
||||
[hidden] {
|
||||
display: none;
|
||||
}
|
||||
|
||||
*, ::before, ::after {
|
||||
--tw-border-spacing-x: 0;
|
||||
--tw-border-spacing-y: 0;
|
||||
--tw-translate-x: 0;
|
||||
--tw-translate-y: 0;
|
||||
--tw-rotate: 0;
|
||||
--tw-skew-x: 0;
|
||||
--tw-skew-y: 0;
|
||||
--tw-scale-x: 1;
|
||||
--tw-scale-y: 1;
|
||||
--tw-pan-x: ;
|
||||
--tw-pan-y: ;
|
||||
--tw-pinch-zoom: ;
|
||||
--tw-scroll-snap-strictness: proximity;
|
||||
--tw-ordinal: ;
|
||||
--tw-slashed-zero: ;
|
||||
--tw-numeric-figure: ;
|
||||
--tw-numeric-spacing: ;
|
||||
--tw-numeric-fraction: ;
|
||||
--tw-ring-inset: ;
|
||||
--tw-ring-offset-width: 0px;
|
||||
--tw-ring-offset-color: #fff;
|
||||
--tw-ring-color: rgb(59 130 246 / 0.5);
|
||||
--tw-ring-offset-shadow: 0 0 #0000;
|
||||
--tw-ring-shadow: 0 0 #0000;
|
||||
--tw-shadow: 0 0 #0000;
|
||||
--tw-shadow-colored: 0 0 #0000;
|
||||
--tw-blur: ;
|
||||
--tw-brightness: ;
|
||||
--tw-contrast: ;
|
||||
--tw-grayscale: ;
|
||||
--tw-hue-rotate: ;
|
||||
--tw-invert: ;
|
||||
--tw-saturate: ;
|
||||
--tw-sepia: ;
|
||||
--tw-drop-shadow: ;
|
||||
--tw-backdrop-blur: ;
|
||||
--tw-backdrop-brightness: ;
|
||||
--tw-backdrop-contrast: ;
|
||||
--tw-backdrop-grayscale: ;
|
||||
--tw-backdrop-hue-rotate: ;
|
||||
--tw-backdrop-invert: ;
|
||||
--tw-backdrop-opacity: ;
|
||||
--tw-backdrop-saturate: ;
|
||||
--tw-backdrop-sepia: ;
|
||||
}
|
||||
|
||||
::backdrop {
|
||||
--tw-border-spacing-x: 0;
|
||||
--tw-border-spacing-y: 0;
|
||||
--tw-translate-x: 0;
|
||||
--tw-translate-y: 0;
|
||||
--tw-rotate: 0;
|
||||
--tw-skew-x: 0;
|
||||
--tw-skew-y: 0;
|
||||
--tw-scale-x: 1;
|
||||
--tw-scale-y: 1;
|
||||
--tw-pan-x: ;
|
||||
--tw-pan-y: ;
|
||||
--tw-pinch-zoom: ;
|
||||
--tw-scroll-snap-strictness: proximity;
|
||||
--tw-ordinal: ;
|
||||
--tw-slashed-zero: ;
|
||||
--tw-numeric-figure: ;
|
||||
--tw-numeric-spacing: ;
|
||||
--tw-numeric-fraction: ;
|
||||
--tw-ring-inset: ;
|
||||
--tw-ring-offset-width: 0px;
|
||||
--tw-ring-offset-color: #fff;
|
||||
--tw-ring-color: rgb(59 130 246 / 0.5);
|
||||
--tw-ring-offset-shadow: 0 0 #0000;
|
||||
--tw-ring-shadow: 0 0 #0000;
|
||||
--tw-shadow: 0 0 #0000;
|
||||
--tw-shadow-colored: 0 0 #0000;
|
||||
--tw-blur: ;
|
||||
--tw-brightness: ;
|
||||
--tw-contrast: ;
|
||||
--tw-grayscale: ;
|
||||
--tw-hue-rotate: ;
|
||||
--tw-invert: ;
|
||||
--tw-saturate: ;
|
||||
--tw-sepia: ;
|
||||
--tw-drop-shadow: ;
|
||||
--tw-backdrop-blur: ;
|
||||
--tw-backdrop-brightness: ;
|
||||
--tw-backdrop-contrast: ;
|
||||
--tw-backdrop-grayscale: ;
|
||||
--tw-backdrop-hue-rotate: ;
|
||||
--tw-backdrop-invert: ;
|
||||
--tw-backdrop-opacity: ;
|
||||
--tw-backdrop-saturate: ;
|
||||
--tw-backdrop-sepia: ;
|
||||
}
|
||||
|
||||
.fixed {
|
||||
position: fixed;
|
||||
}
|
||||
|
||||
.absolute {
|
||||
position: absolute;
|
||||
}
|
||||
|
||||
.relative {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.inset-y-0 {
|
||||
top: 0px;
|
||||
bottom: 0px;
|
||||
}
|
||||
|
||||
.bottom-5 {
|
||||
bottom: 1.25rem;
|
||||
}
|
||||
|
||||
.left-5 {
|
||||
left: 1.25rem;
|
||||
}
|
||||
|
||||
.right-2 {
|
||||
right: 0.5rem;
|
||||
}
|
||||
|
||||
.z-50 {
|
||||
z-index: 50;
|
||||
}
|
||||
|
||||
.m-0 {
|
||||
margin: 0px;
|
||||
}
|
||||
|
||||
.-mx-2 {
|
||||
margin-left: -0.5rem;
|
||||
margin-right: -0.5rem;
|
||||
}
|
||||
|
||||
.mt-1 {
|
||||
margin-top: 0.25rem;
|
||||
}
|
||||
|
||||
.flex {
|
||||
display: flex;
|
||||
}
|
||||
|
||||
.hidden {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.w-full {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.flex-1 {
|
||||
flex: 1 1 0%;
|
||||
}
|
||||
|
||||
.transform {
|
||||
transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));
|
||||
}
|
||||
|
||||
.items-center {
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.justify-center {
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.gap-2 {
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.divide-y > :not([hidden]) ~ :not([hidden]) {
|
||||
--tw-divide-y-reverse: 0;
|
||||
border-top-width: calc(1px * calc(1 - var(--tw-divide-y-reverse)));
|
||||
border-bottom-width: calc(1px * var(--tw-divide-y-reverse));
|
||||
}
|
||||
|
||||
.rounded-md {
|
||||
border-radius: 0.375rem;
|
||||
}
|
||||
|
||||
.rounded-b {
|
||||
border-bottom-right-radius: 0.25rem;
|
||||
border-bottom-left-radius: 0.25rem;
|
||||
}
|
||||
|
||||
.border {
|
||||
border-width: 1px;
|
||||
}
|
||||
|
||||
.bg-transparent {
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
.bg-gradient-to-br {
|
||||
background-image: linear-gradient(to bottom right, var(--tw-gradient-stops));
|
||||
}
|
||||
|
||||
.from-gray-100\/80 {
|
||||
--tw-gradient-from: rgb(243 244 246 / 0.8) var(--tw-gradient-from-position);
|
||||
--tw-gradient-from-position: ;
|
||||
--tw-gradient-to: rgb(243 244 246 / 0) var(--tw-gradient-from-position);
|
||||
--tw-gradient-to-position: ;
|
||||
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
|
||||
}
|
||||
|
||||
.via-white {
|
||||
--tw-gradient-via-position: ;
|
||||
--tw-gradient-to: rgb(255 255 255 / 0) var(--tw-gradient-to-position);
|
||||
--tw-gradient-to-position: ;
|
||||
--tw-gradient-stops: var(--tw-gradient-from), #fff var(--tw-gradient-via-position), var(--tw-gradient-to);
|
||||
}
|
||||
|
||||
.to-white {
|
||||
--tw-gradient-to: #fff var(--tw-gradient-to-position);
|
||||
--tw-gradient-to-position: ;
|
||||
}
|
||||
|
||||
.p-3 {
|
||||
padding: 0.75rem;
|
||||
}
|
||||
|
||||
.px-2 {
|
||||
padding-left: 0.5rem;
|
||||
padding-right: 0.5rem;
|
||||
}
|
||||
|
||||
.px-5 {
|
||||
padding-left: 1.25rem;
|
||||
padding-right: 1.25rem;
|
||||
}
|
||||
|
||||
.py-3 {
|
||||
padding-top: 0.75rem;
|
||||
padding-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
.pl-5 {
|
||||
padding-left: 1.25rem;
|
||||
}
|
||||
|
||||
.pr-8 {
|
||||
padding-right: 2rem;
|
||||
}
|
||||
|
||||
.font-sans {
|
||||
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
|
||||
}
|
||||
|
||||
.text-sm {
|
||||
font-size: 0.875rem;
|
||||
line-height: 1.25rem;
|
||||
}
|
||||
|
||||
.text-xs {
|
||||
font-size: 0.75rem;
|
||||
line-height: 1rem;
|
||||
}
|
||||
|
||||
.font-bold {
|
||||
font-weight: 700;
|
||||
}
|
||||
|
||||
.text-gray-400 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(156 163 175 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.text-gray-600 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(75 85 99 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.text-gray-700 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(55 65 81 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.text-gray-800 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(31 41 55 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.shadow {
|
||||
--tw-shadow: 0 1px 3px 0 rgb(0 0 0 / 0.1), 0 1px 2px -1px rgb(0 0 0 / 0.1);
|
||||
--tw-shadow-colored: 0 1px 3px 0 var(--tw-shadow-color), 0 1px 2px -1px var(--tw-shadow-color);
|
||||
box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow);
|
||||
}
|
||||
|
||||
.backdrop-blur-sm {
|
||||
--tw-backdrop-blur: blur(4px);
|
||||
-webkit-backdrop-filter: var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);
|
||||
backdrop-filter: var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);
|
||||
}
|
||||
|
||||
.transition {
|
||||
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, -webkit-backdrop-filter;
|
||||
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, backdrop-filter;
|
||||
transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, backdrop-filter, -webkit-backdrop-filter;
|
||||
transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
|
||||
transition-duration: 150ms;
|
||||
}
|
||||
|
||||
.delay-200 {
|
||||
transition-delay: 200ms;
|
||||
}
|
||||
|
||||
.duration-300 {
|
||||
transition-duration: 300ms;
|
||||
}
|
||||
|
||||
.hover\:bg-gray-100:hover {
|
||||
--tw-bg-opacity: 1;
|
||||
background-color: rgb(243 244 246 / var(--tw-bg-opacity));
|
||||
}
|
||||
|
||||
.focus\:outline-none:focus {
|
||||
outline: 2px solid transparent;
|
||||
outline-offset: 2px;
|
||||
}
|
||||
|
||||
@media (prefers-color-scheme: dark) {
|
||||
.dark\:divide-gray-700 > :not([hidden]) ~ :not([hidden]) {
|
||||
--tw-divide-opacity: 1;
|
||||
border-color: rgb(55 65 81 / var(--tw-divide-opacity));
|
||||
}
|
||||
|
||||
.dark\:border-gray-700 {
|
||||
--tw-border-opacity: 1;
|
||||
border-color: rgb(55 65 81 / var(--tw-border-opacity));
|
||||
}
|
||||
|
||||
.dark\:from-gray-900\/80 {
|
||||
--tw-gradient-from: rgb(17 24 39 / 0.8) var(--tw-gradient-from-position);
|
||||
--tw-gradient-from-position: ;
|
||||
--tw-gradient-to: rgb(17 24 39 / 0) var(--tw-gradient-from-position);
|
||||
--tw-gradient-to-position: ;
|
||||
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to);
|
||||
}
|
||||
|
||||
.dark\:via-gray-900 {
|
||||
--tw-gradient-via-position: ;
|
||||
--tw-gradient-to: rgb(17 24 39 / 0) var(--tw-gradient-to-position);
|
||||
--tw-gradient-to-position: ;
|
||||
--tw-gradient-stops: var(--tw-gradient-from), #111827 var(--tw-gradient-via-position), var(--tw-gradient-to);
|
||||
}
|
||||
|
||||
.dark\:to-gray-900 {
|
||||
--tw-gradient-to: #111827 var(--tw-gradient-to-position);
|
||||
--tw-gradient-to-position: ;
|
||||
}
|
||||
|
||||
.dark\:text-gray-200 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(229 231 235 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.dark\:text-gray-300 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(209 213 219 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.dark\:text-gray-500 {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(107 114 128 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.dark\:text-white {
|
||||
--tw-text-opacity: 1;
|
||||
color: rgb(255 255 255 / var(--tw-text-opacity));
|
||||
}
|
||||
|
||||
.dark\:hover\:bg-gray-800\/70:hover {
|
||||
background-color: rgb(31 41 55 / 0.7);
|
||||
}
|
||||
}
|
||||
|
||||
@media (min-width: 768px) {
|
||||
.md\:pl-0 {
|
||||
padding-left: 0px;
|
||||
}
|
||||
}
|
||||
12
extensions/web-widget/index.html
Normal file
12
extensions/web-widget/index.html
Normal file
@@ -0,0 +1,12 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Chat Widget Test</title>
|
||||
<link href="dist/output.css" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
<script src="dist/chat-widget.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
1002
extensions/web-widget/package-lock.json
generated
Normal file
1002
extensions/web-widget/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
15
extensions/web-widget/package.json
Normal file
15
extensions/web-widget/package.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"name": "web-widget",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"keywords": [],
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"devDependencies": {
|
||||
"tailwindcss": "^3.3.1"
|
||||
}
|
||||
}
|
||||
58
extensions/web-widget/src/html/widget.html
Normal file
58
extensions/web-widget/src/html/widget.html
Normal file
@@ -0,0 +1,58 @@
|
||||
<div id="docsgpt-widget" class="dark fixed bottom-5 left-5 pl-5 md:pl-0 z-50">
|
||||
<style>
|
||||
@keyframes dotBounce {
|
||||
0%, 80%, 100% {
|
||||
transform: translateY(0);
|
||||
}
|
||||
40% {
|
||||
transform: translateY(-5px);
|
||||
}
|
||||
}
|
||||
|
||||
.dot-animation {
|
||||
display: inline-block;
|
||||
animation: dotBounce 1s infinite ease-in-out;
|
||||
}
|
||||
|
||||
.delay-200 {
|
||||
animation-delay: 200ms;
|
||||
}
|
||||
|
||||
.delay-400 {
|
||||
animation-delay: 400ms;
|
||||
}
|
||||
</style>
|
||||
|
||||
|
||||
<div class="divide-y dark:divide-gray-700 rounded-md border dark:border-gray-700 bg-gradient-to-br from-gray-100/80 via-white to-white dark:from-gray-900/80 dark:via-gray-900 dark:to-gray-900 font-sans shadow backdrop-blur-sm" style="width: 18rem; transform: translateY(0%) translateZ(0px);"><div>
|
||||
<div class="flex items-center gap-2 p-3">
|
||||
<div id="docsgpt-init-message" class="flex-1">
|
||||
<h3 class="text-sm font-bold text-gray-700 dark:text-gray-200">Looking for help with documentation?</h3>
|
||||
<p class="mt-1 text-xs text-gray-400 dark:text-gray-500">DocsGPT AI assistant will help you with docs</p>
|
||||
</div>
|
||||
<div id="docsgpt-answer" class="hidden">
|
||||
<p class="mt-1 text-xs text-gray-600 dark:text-gray-300">Come cool answer</p>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<div class="w-full">
|
||||
<button id="ask-docsgpt" class="flex w-full justify-center px-5 py-3 text-sm text-gray-800 font-bold dark:text-white transition duration-300 hover:bg-gray-100 rounded-b dark:hover:bg-gray-800/70">
|
||||
Ask DocsGPT
|
||||
</button>
|
||||
|
||||
<form id="docsgpt-chat-form" class="relative w-full m-0 hidden" style="opacity: 1;" data-projection-id="1">
|
||||
<input id="docsgpt-chat-input" type="text" class="w-full bg-transparent px-5 py-3 pr-8 text-sm text-gray-700 dark:text-white focus:outline-none" placeholder="What do you want to do?" value="">
|
||||
<button class="absolute inset-y-0 right-2 -mx-2 px-2" type="submit" style="opacity: 0;" data-projection-id="2">
|
||||
|
||||
</button>
|
||||
</form>
|
||||
<p id="docsgpt-chat-processing" class="hidden flex w-full justify-center px-5 py-3 text-sm text-gray-800 font-bold dark:text-white transition duration-300 rounded-b animate-fadeIn animate-2s">
|
||||
Processing<span class="dot-animation">.</span><span class="dot-animation delay-200">.</span><span class="dot-animation delay-400">.</span>
|
||||
</p>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
3
extensions/web-widget/src/input.css
Normal file
3
extensions/web-widget/src/input.css
Normal file
@@ -0,0 +1,3 @@
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
56
extensions/web-widget/src/js/script.js
Normal file
56
extensions/web-widget/src/js/script.js
Normal file
@@ -0,0 +1,56 @@
|
||||
const API_ENDPOINT = "http://localhost:5001/api/answer"; // Replace with your API endpoint
|
||||
|
||||
const widgetInitMessage = document.getElementById("docsgpt-init-message");
|
||||
const widgetAnswerMessage = document.getElementById("docsgpt-answer");
|
||||
const widgetAnswerMessageP = widgetAnswerMessage.querySelector("p");
|
||||
const askDocsGPTButton = document.getElementById("ask-docsgpt");
|
||||
const chatInput = document.getElementById("docsgpt-chat-input");
|
||||
const chatForm = document.getElementById("docsgpt-chat-form");
|
||||
const chatProcessing = document.getElementById("docsgpt-chat-processing");
|
||||
|
||||
async function sendMessage(message) {
|
||||
const requestData = {
|
||||
"question": message,
|
||||
"active_docs": "default",
|
||||
"api_key": "token",
|
||||
"embeddings_key": "token",
|
||||
"model": "default",
|
||||
"history": null,
|
||||
}
|
||||
const response = await fetch(API_ENDPOINT, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify(requestData),
|
||||
});
|
||||
const data = await response.json();
|
||||
return data.answer;
|
||||
}
|
||||
|
||||
askDocsGPTButton.addEventListener("click", () => {
|
||||
askDocsGPTButton.classList.add("hidden");
|
||||
chatForm.classList.remove("hidden");
|
||||
chatForm.focus();
|
||||
widgetInitMessage.classList.remove("hidden");
|
||||
widgetAnswerMessage.classList.add("hidden");
|
||||
|
||||
|
||||
});
|
||||
|
||||
chatForm.addEventListener("submit", async (e) => {
|
||||
e.preventDefault();
|
||||
const message = chatInput.value.trim();
|
||||
if (!message) return;
|
||||
|
||||
chatInput.value = "";
|
||||
chatForm.classList.add("hidden");
|
||||
chatProcessing.classList.remove("hidden");
|
||||
|
||||
const reply = await sendMessage(message);
|
||||
chatProcessing.classList.add("hidden");
|
||||
|
||||
// inside <p> tag
|
||||
widgetAnswerMessageP.innerHTML = reply;
|
||||
widgetAnswerMessage.classList.remove("hidden");
|
||||
widgetInitMessage.classList.add("hidden");
|
||||
askDocsGPTButton.classList.remove("hidden");
|
||||
});
|
||||
10
extensions/web-widget/tailwind.config.js
Normal file
10
extensions/web-widget/tailwind.config.js
Normal file
@@ -0,0 +1,10 @@
|
||||
/** @type {import('tailwindcss').Config} */
|
||||
module.exports = {
|
||||
content: ["./src/**/*.{html,js}"],
|
||||
theme: {
|
||||
extend: {},
|
||||
},
|
||||
plugins: [],
|
||||
}
|
||||
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
# Please put appropriate value
|
||||
VITE_API_HOST = http://localhost:5001
|
||||
VITE_API_HOST=http://localhost:5001
|
||||
@@ -1 +1 @@
|
||||
VITE_API_HOST = https://docsapi.arc53.com
|
||||
VITE_API_HOST = https://gptcloud.arc53.com
|
||||
6011
frontend/package-lock.json
generated
6011
frontend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -24,12 +24,15 @@
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-dropzone": "^14.2.3",
|
||||
"react-markdown": "^8.0.7",
|
||||
"react-redux": "^8.0.5",
|
||||
"react-router-dom": "^6.8.1"
|
||||
"react-router-dom": "^6.8.1",
|
||||
"react-syntax-highlighter": "^15.5.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/react": "^18.0.27",
|
||||
"@types/react-dom": "^18.0.10",
|
||||
"@types/react-syntax-highlighter": "^15.5.6",
|
||||
"@typescript-eslint/eslint-plugin": "^5.51.0",
|
||||
"@typescript-eslint/parser": "^5.51.0",
|
||||
"@vitejs/plugin-react": "^3.1.0",
|
||||
|
||||
@@ -38,9 +38,8 @@ export default function Navigation({
|
||||
const [isDocsListOpen, setIsDocsListOpen] = useState(false);
|
||||
|
||||
const isApiKeySet = useSelector(selectApiKeyStatus);
|
||||
const [apiKeyModalState, setApiKeyModalState] = useState<ActiveState>(
|
||||
isApiKeySet ? 'INACTIVE' : 'ACTIVE',
|
||||
);
|
||||
const [apiKeyModalState, setApiKeyModalState] =
|
||||
useState<ActiveState>('INACTIVE');
|
||||
|
||||
const isSelectedDocsSet = useSelector(selectSelectedDocsStatus);
|
||||
const [selectedDocsModalState, setSelectedDocsModalState] =
|
||||
@@ -148,7 +147,7 @@ export default function Navigation({
|
||||
src={Arrow2}
|
||||
alt="arrow"
|
||||
className={`${
|
||||
isDocsListOpen ? 'rotate-0' : '-rotate-90'
|
||||
isDocsListOpen ? 'rotate-0' : 'rotate-180'
|
||||
} mr-3 w-3 transition-all`}
|
||||
/>
|
||||
</div>
|
||||
|
||||
@@ -71,19 +71,15 @@ export default function Conversation() {
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex justify-center p-6">
|
||||
<div className="flex justify-center p-4">
|
||||
{queries.length > 0 && (
|
||||
<div className="mt-20 flex w-10/12 flex-col transition-all md:w-3/4">
|
||||
<div className="mt-20 flex flex-col transition-all md:w-3/4">
|
||||
{queries.map((query, index) => {
|
||||
return (
|
||||
<Fragment key={index}>
|
||||
<ConversationBubble
|
||||
ref={endMessageRef}
|
||||
className={`${
|
||||
index === queries.length - 1 && status === 'loading'
|
||||
? 'mb-24'
|
||||
: 'mb-7'
|
||||
}`}
|
||||
className={'mb-7'}
|
||||
key={`${index}QUESTION`}
|
||||
message={query.prompt}
|
||||
type="QUESTION"
|
||||
|
||||
@@ -4,6 +4,9 @@ import { FEEDBACK, MESSAGE_TYPE } from './conversationModels';
|
||||
import Alert from './../assets/alert.svg';
|
||||
import { ReactComponent as Like } from './../assets/like.svg';
|
||||
import { ReactComponent as Dislike } from './../assets/dislike.svg';
|
||||
import ReactMarkdown from 'react-markdown';
|
||||
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter';
|
||||
import { vscDarkPlus } from 'react-syntax-highlighter/dist/cjs/styles/prism';
|
||||
|
||||
const ConversationBubble = forwardRef<
|
||||
HTMLDivElement,
|
||||
@@ -19,14 +22,26 @@ const ConversationBubble = forwardRef<
|
||||
ref,
|
||||
) {
|
||||
const [showFeedback, setShowFeedback] = useState(false);
|
||||
const List = ({
|
||||
ordered,
|
||||
children,
|
||||
}: {
|
||||
ordered?: boolean;
|
||||
children: React.ReactNode;
|
||||
}) => {
|
||||
const Tag = ordered ? 'ol' : 'ul';
|
||||
return <Tag className="list-inside list-disc">{children}</Tag>;
|
||||
};
|
||||
let bubble;
|
||||
|
||||
if (type === 'QUESTION') {
|
||||
bubble = (
|
||||
<div ref={ref} className={`flex flex-row-reverse self-end ${className}`}>
|
||||
<Avatar className="mt-4 text-2xl" avatar="🧑💻"></Avatar>
|
||||
<div className="mr-2 ml-10 flex items-center rounded-3xl bg-blue-1000 py-5 px-5 text-white">
|
||||
<p className="whitespace-pre-wrap break-words">{message}</p>
|
||||
<div className="mr-2 ml-10 flex items-center rounded-3xl bg-blue-1000 p-3.5 text-white">
|
||||
<ReactMarkdown className="whitespace-pre-wrap break-words">
|
||||
{message}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
@@ -40,7 +55,7 @@ const ConversationBubble = forwardRef<
|
||||
>
|
||||
<Avatar className="mt-4 text-2xl" avatar="🦖"></Avatar>
|
||||
<div
|
||||
className={`ml-2 mr-5 flex items-center rounded-3xl bg-gray-1000 py-5 px-5 ${
|
||||
className={`ml-2 mr-5 flex items-center rounded-3xl bg-gray-1000 p-3.5 ${
|
||||
type === 'ERROR'
|
||||
? ' rounded-lg border border-red-2000 bg-red-1000 p-2 text-red-3000'
|
||||
: ''
|
||||
@@ -49,7 +64,37 @@ const ConversationBubble = forwardRef<
|
||||
{type === 'ERROR' && (
|
||||
<img src={Alert} alt="alert" className="mr-2 inline" />
|
||||
)}
|
||||
<p className="whitespace-pre-wrap break-words">{message}</p>
|
||||
<ReactMarkdown
|
||||
className="whitespace-pre-wrap break-words"
|
||||
components={{
|
||||
code({ node, inline, className, children, ...props }) {
|
||||
const match = /language-(\w+)/.exec(className || '');
|
||||
|
||||
return !inline && match ? (
|
||||
<SyntaxHighlighter
|
||||
PreTag="div"
|
||||
language={match[1]}
|
||||
{...props}
|
||||
style={vscDarkPlus}
|
||||
>
|
||||
{String(children).replace(/\n$/, '')}
|
||||
</SyntaxHighlighter>
|
||||
) : (
|
||||
<code className={className ? className : ''} {...props}>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
},
|
||||
ul({ node, children }) {
|
||||
return <List>{children}</List>;
|
||||
},
|
||||
ol({ node, children }) {
|
||||
return <List ordered>{children}</List>;
|
||||
},
|
||||
}}
|
||||
>
|
||||
{message}
|
||||
</ReactMarkdown>
|
||||
</div>
|
||||
<div
|
||||
className={`mr-2 flex items-center justify-center ${
|
||||
|
||||
@@ -7,6 +7,7 @@ export function fetchAnswerApi(
|
||||
question: string,
|
||||
apiKey: string,
|
||||
selectedDocs: Doc,
|
||||
history: Array<any> = [],
|
||||
): Promise<Answer> {
|
||||
let namePath = selectedDocs.name;
|
||||
if (selectedDocs.language === namePath) {
|
||||
@@ -37,7 +38,7 @@ export function fetchAnswerApi(
|
||||
question: question,
|
||||
api_key: apiKey,
|
||||
embeddings_key: apiKey,
|
||||
history: localStorage.getItem('chatHistory'),
|
||||
history: history,
|
||||
active_docs: docPath,
|
||||
}),
|
||||
})
|
||||
@@ -45,7 +46,7 @@ export function fetchAnswerApi(
|
||||
if (response.ok) {
|
||||
return response.json();
|
||||
} else {
|
||||
Promise.reject(response);
|
||||
return Promise.reject(new Error(response.statusText));
|
||||
}
|
||||
})
|
||||
.then((data) => {
|
||||
@@ -54,6 +55,52 @@ export function fetchAnswerApi(
|
||||
});
|
||||
}
|
||||
|
||||
export function fetchAnswerSteaming(
|
||||
question: string,
|
||||
apiKey: string,
|
||||
selectedDocs: Doc,
|
||||
history: Array<any> = [],
|
||||
onEvent: (event: MessageEvent) => void,
|
||||
): Promise<Answer> {
|
||||
let namePath = selectedDocs.name;
|
||||
if (selectedDocs.language === namePath) {
|
||||
namePath = '.project';
|
||||
}
|
||||
|
||||
let docPath = 'default';
|
||||
if (selectedDocs.location === 'local') {
|
||||
docPath = 'local' + '/' + selectedDocs.name + '/';
|
||||
} else if (selectedDocs.location === 'remote') {
|
||||
docPath =
|
||||
selectedDocs.language +
|
||||
'/' +
|
||||
namePath +
|
||||
'/' +
|
||||
selectedDocs.version +
|
||||
'/' +
|
||||
selectedDocs.model +
|
||||
'/';
|
||||
}
|
||||
|
||||
return new Promise<Answer>((resolve, reject) => {
|
||||
const url = new URL(apiHost + '/stream');
|
||||
url.searchParams.append('question', question);
|
||||
url.searchParams.append('api_key', apiKey);
|
||||
url.searchParams.append('embeddings_key', apiKey);
|
||||
url.searchParams.append('active_docs', docPath);
|
||||
url.searchParams.append('history', JSON.stringify(history));
|
||||
|
||||
const eventSource = new EventSource(url.href);
|
||||
|
||||
eventSource.onmessage = onEvent;
|
||||
|
||||
eventSource.onerror = (error) => {
|
||||
console.log('Connection failed.');
|
||||
eventSource.close();
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export function sendFeedback(
|
||||
prompt: string,
|
||||
response: string,
|
||||
|
||||
@@ -1,27 +1,65 @@
|
||||
import { createAsyncThunk, createSlice, PayloadAction } from '@reduxjs/toolkit';
|
||||
import store from '../store';
|
||||
import { fetchAnswerApi } from './conversationApi';
|
||||
import { Answer, ConversationState, Query } from './conversationModels';
|
||||
import { fetchAnswerApi, fetchAnswerSteaming } from './conversationApi';
|
||||
import { Answer, ConversationState, Query, Status } from './conversationModels';
|
||||
|
||||
const initialState: ConversationState = {
|
||||
queries: [],
|
||||
status: 'idle',
|
||||
};
|
||||
|
||||
export const fetchAnswer = createAsyncThunk<
|
||||
Answer,
|
||||
{ question: string },
|
||||
{ state: RootState }
|
||||
>('fetchAnswer', async ({ question }, { getState }) => {
|
||||
const state = getState();
|
||||
const API_STREAMING = import.meta.env.VITE_API_STREAMING === 'true';
|
||||
|
||||
const answer = await fetchAnswerApi(
|
||||
question,
|
||||
state.preference.apiKey,
|
||||
state.preference.selectedDocs!,
|
||||
);
|
||||
return answer;
|
||||
});
|
||||
export const fetchAnswer = createAsyncThunk<Answer, { question: string }>(
|
||||
'fetchAnswer',
|
||||
async ({ question }, { dispatch, getState }) => {
|
||||
const state = getState() as RootState;
|
||||
if (state.preference) {
|
||||
if (API_STREAMING) {
|
||||
await fetchAnswerSteaming(
|
||||
question,
|
||||
state.preference.apiKey,
|
||||
state.preference.selectedDocs!,
|
||||
state.conversation.queries,
|
||||
(event) => {
|
||||
const data = JSON.parse(event.data);
|
||||
|
||||
// check if the 'end' event has been received
|
||||
if (data.type === 'end') {
|
||||
// set status to 'idle'
|
||||
dispatch(conversationSlice.actions.setStatus('idle'));
|
||||
} else {
|
||||
const result = data.answer;
|
||||
dispatch(
|
||||
updateStreamingQuery({
|
||||
index: state.conversation.queries.length - 1,
|
||||
query: { response: result },
|
||||
}),
|
||||
);
|
||||
}
|
||||
},
|
||||
);
|
||||
} else {
|
||||
const answer = await fetchAnswerApi(
|
||||
question,
|
||||
state.preference.apiKey,
|
||||
state.preference.selectedDocs!,
|
||||
state.conversation.queries,
|
||||
);
|
||||
if (answer) {
|
||||
dispatch(
|
||||
updateQuery({
|
||||
index: state.conversation.queries.length - 1,
|
||||
query: { response: answer.answer },
|
||||
}),
|
||||
);
|
||||
dispatch(conversationSlice.actions.setStatus('idle'));
|
||||
}
|
||||
}
|
||||
}
|
||||
return { answer: '', query: question, result: '' };
|
||||
},
|
||||
);
|
||||
|
||||
export const conversationSlice = createSlice({
|
||||
name: 'conversation',
|
||||
@@ -30,6 +68,21 @@ export const conversationSlice = createSlice({
|
||||
addQuery(state, action: PayloadAction<Query>) {
|
||||
state.queries.push(action.payload);
|
||||
},
|
||||
updateStreamingQuery(
|
||||
state,
|
||||
action: PayloadAction<{ index: number; query: Partial<Query> }>,
|
||||
) {
|
||||
const index = action.payload.index;
|
||||
if (action.payload.query.response) {
|
||||
state.queries[index].response =
|
||||
(state.queries[index].response || '') + action.payload.query.response;
|
||||
} else {
|
||||
state.queries[index] = {
|
||||
...state.queries[index],
|
||||
...action.payload.query,
|
||||
};
|
||||
}
|
||||
},
|
||||
updateQuery(
|
||||
state,
|
||||
action: PayloadAction<{ index: number; query: Partial<Query> }>,
|
||||
@@ -40,17 +93,15 @@ export const conversationSlice = createSlice({
|
||||
...action.payload.query,
|
||||
};
|
||||
},
|
||||
setStatus(state, action: PayloadAction<Status>) {
|
||||
state.status = action.payload;
|
||||
},
|
||||
},
|
||||
extraReducers(builder) {
|
||||
builder
|
||||
.addCase(fetchAnswer.pending, (state) => {
|
||||
state.status = 'loading';
|
||||
})
|
||||
.addCase(fetchAnswer.fulfilled, (state, action) => {
|
||||
state.status = 'idle';
|
||||
state.queries[state.queries.length - 1].response =
|
||||
action.payload.answer;
|
||||
})
|
||||
.addCase(fetchAnswer.rejected, (state, action) => {
|
||||
state.status = 'failed';
|
||||
state.queries[state.queries.length - 1].error =
|
||||
@@ -65,5 +116,6 @@ export const selectQueries = (state: RootState) => state.conversation.queries;
|
||||
|
||||
export const selectStatus = (state: RootState) => state.conversation.status;
|
||||
|
||||
export const { addQuery, updateQuery } = conversationSlice.actions;
|
||||
export const { addQuery, updateQuery, updateStreamingQuery } =
|
||||
conversationSlice.actions;
|
||||
export default conversationSlice.reducer;
|
||||
|
||||
@@ -13,8 +13,18 @@ interface Preference {
|
||||
}
|
||||
|
||||
const initialState: Preference = {
|
||||
apiKey: '',
|
||||
selectedDocs: null,
|
||||
apiKey: 'xxx',
|
||||
selectedDocs: {
|
||||
name: 'default',
|
||||
language: 'default',
|
||||
location: 'default',
|
||||
version: 'default',
|
||||
description: 'default',
|
||||
fullName: 'default',
|
||||
dat: 'default',
|
||||
docLink: 'default',
|
||||
model: 'openai_text-embedding-ada-002',
|
||||
} as Doc,
|
||||
sourceDocs: null,
|
||||
};
|
||||
|
||||
@@ -29,7 +39,7 @@ export const prefSlice = createSlice({
|
||||
state.selectedDocs = action.payload;
|
||||
},
|
||||
setSourceDocs: (state, action) => {
|
||||
state.sourceDocs?.push(...action.payload);
|
||||
state.sourceDocs = action.payload;
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
@@ -19,20 +19,27 @@ export default function Upload({
|
||||
type: 'UPLOAD' | 'TRAINIING';
|
||||
percentage: number;
|
||||
taskId?: string;
|
||||
failed?: boolean;
|
||||
}>();
|
||||
|
||||
function Progress({
|
||||
title,
|
||||
isCancellable = false,
|
||||
isFailed = false,
|
||||
}: {
|
||||
title: string;
|
||||
isCancellable?: boolean;
|
||||
isFailed?: boolean;
|
||||
}) {
|
||||
return (
|
||||
<div className="mt-5 flex flex-col items-center gap-2">
|
||||
<p className="text-xl tracking-[0.15px]">{title}...</p>
|
||||
<p className="text-sm text-gray-2000">This may take several minutes</p>
|
||||
<p className={`ml-5 text-xl text-red-400 ${isFailed ? '' : 'hidden'}`}>
|
||||
Over the token limit, please consider uploading smaller document
|
||||
</p>
|
||||
<p className="mt-10 text-2xl">{progress?.percentage || 0}%</p>
|
||||
|
||||
<div className="mb-10 w-[50%]">
|
||||
<div className="h-1 w-[100%] bg-blue-4000"></div>
|
||||
<div
|
||||
@@ -40,6 +47,7 @@ export default function Upload({
|
||||
style={{ width: `${progress?.percentage || 0}%` }}
|
||||
></div>
|
||||
</div>
|
||||
|
||||
<button
|
||||
onClick={() => {
|
||||
setDocName('');
|
||||
@@ -71,11 +79,28 @@ export default function Upload({
|
||||
.then((data) => data.json())
|
||||
.then((data) => {
|
||||
if (data.status == 'SUCCESS') {
|
||||
getDocs().then((data) => dispatch(setSourceDocs(data)));
|
||||
setProgress(
|
||||
(progress) => progress && { ...progress, percentage: 100 },
|
||||
);
|
||||
} else {
|
||||
if (data.result.limited === true) {
|
||||
getDocs().then((data) => dispatch(setSourceDocs(data)));
|
||||
setProgress(
|
||||
(progress) =>
|
||||
progress && {
|
||||
...progress,
|
||||
percentage: 100,
|
||||
failed: true,
|
||||
},
|
||||
);
|
||||
} else {
|
||||
getDocs().then((data) => dispatch(setSourceDocs(data)));
|
||||
setProgress(
|
||||
(progress) =>
|
||||
progress && {
|
||||
...progress,
|
||||
percentage: 100,
|
||||
failed: false,
|
||||
},
|
||||
);
|
||||
}
|
||||
} else if (data.status == 'PROGRESS') {
|
||||
setProgress(
|
||||
(progress) =>
|
||||
progress && {
|
||||
@@ -91,6 +116,7 @@ export default function Upload({
|
||||
<Progress
|
||||
title="Training is in progress"
|
||||
isCancellable={progress?.percentage === 100}
|
||||
isFailed={progress?.failed === true}
|
||||
></Progress>
|
||||
);
|
||||
}
|
||||
@@ -125,10 +151,18 @@ export default function Upload({
|
||||
|
||||
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
||||
onDrop,
|
||||
multiple: true,
|
||||
multiple: false,
|
||||
onDragEnter: doNothing,
|
||||
onDragOver: doNothing,
|
||||
onDragLeave: doNothing,
|
||||
maxSize: 25000000,
|
||||
accept: {
|
||||
'application/pdf': ['.pdf'],
|
||||
'text/plain': ['.txt'],
|
||||
'text/x-rst': ['.rst'],
|
||||
'text/x-markdown': ['.md'],
|
||||
'application/zip': ['.zip'],
|
||||
},
|
||||
});
|
||||
|
||||
let view;
|
||||
@@ -139,7 +173,10 @@ export default function Upload({
|
||||
} else {
|
||||
view = (
|
||||
<>
|
||||
<p className="mb-7 text-xl text-jet">Upload New Documentation</p>
|
||||
<p className="text-xl text-jet">Upload New Documentation</p>
|
||||
<p className="mb-3 text-xs text-gray-4000">
|
||||
Please upload .pdf, .txt, .rst, .md, .zip limited to 25mb
|
||||
</p>
|
||||
<input
|
||||
type="text"
|
||||
className="h-10 w-[60%] rounded-md border-2 border-gray-5000 px-3 outline-none"
|
||||
|
||||
@@ -1,20 +1,13 @@
|
||||
import ast
|
||||
import json
|
||||
from pathlib import Path
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
import faiss
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
|
||||
import dotenv
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.prompts import PromptTemplate
|
||||
import pickle
|
||||
import dotenv
|
||||
import tiktoken
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
import ast
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
|
||||
ps = list(Path("inputs").glob("**/*.py"))
|
||||
data = []
|
||||
sources = []
|
||||
@@ -24,13 +17,6 @@ for p in ps:
|
||||
sources.append(p)
|
||||
|
||||
|
||||
|
||||
# with open('inputs/client.py', 'r') as f:
|
||||
# tree = ast.parse(f.read())
|
||||
|
||||
# print(tree)
|
||||
|
||||
|
||||
def get_functions_in_class(node):
|
||||
functions = []
|
||||
functions_code = []
|
||||
@@ -64,21 +50,9 @@ for code in data:
|
||||
c1 += 1
|
||||
|
||||
# save the structure dict as json
|
||||
import json
|
||||
with open('structure_dict.json', 'w') as f:
|
||||
json.dump(structure_dict, f)
|
||||
|
||||
|
||||
# llm = OpenAI(temperature=0)
|
||||
# prompt = PromptTemplate(
|
||||
# input_variables=["code"],
|
||||
# template="Code: {code}, Documentation: ",
|
||||
# )
|
||||
#
|
||||
# print(prompt.format(code="print('hello world')"))
|
||||
# print(llm(prompt.format(code="print('hello world')")))
|
||||
|
||||
|
||||
if not Path("outputs").exists():
|
||||
Path("outputs").mkdir()
|
||||
|
||||
@@ -119,8 +93,3 @@ for source, classes in structure_dict.items():
|
||||
else:
|
||||
with open(f"outputs/{source_w}", "a") as f:
|
||||
f.write(f"\n\nFunction: {functions[function]}, \nDocumentation: {response}")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,21 +1,20 @@
|
||||
import os
|
||||
import sys
|
||||
import nltk
|
||||
import dotenv
|
||||
import typer
|
||||
|
||||
from collections import defaultdict
|
||||
from typing import List, Optional
|
||||
|
||||
from parser.file.bulk import SimpleDirectoryReader
|
||||
from parser.schema.base import Document
|
||||
from parser.open_ai_func import call_openai_api, get_user_permission
|
||||
from parser.py2doc import transform_to_docs
|
||||
from parser.py2doc import extract_functions_and_classes as extract_py
|
||||
from parser.js2doc import extract_functions_and_classes as extract_js
|
||||
from parser.java2doc import extract_functions_and_classes as extract_java
|
||||
from parser.token_func import group_split
|
||||
import dotenv
|
||||
import nltk
|
||||
import typer
|
||||
|
||||
from parser.file.bulk import SimpleDirectoryReader
|
||||
from parser.java2doc import extract_functions_and_classes as extract_java
|
||||
from parser.js2doc import extract_functions_and_classes as extract_js
|
||||
from parser.open_ai_func import call_openai_api, get_user_permission
|
||||
from parser.py2doc import extract_functions_and_classes as extract_py
|
||||
from parser.py2doc import transform_to_docs
|
||||
from parser.schema.base import Document
|
||||
from parser.token_func import group_split
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
@@ -25,28 +24,32 @@ nltk.download('punkt', quiet=True)
|
||||
nltk.download('averaged_perceptron_tagger', quiet=True)
|
||||
|
||||
|
||||
#Splits all files in specified folder to documents
|
||||
def metadata_from_filename(title):
|
||||
return {'title': title}
|
||||
|
||||
# Splits all files in specified folder to documents
|
||||
@app.command()
|
||||
def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
|
||||
help="Whether to skip price confirmation"),
|
||||
help="Whether to skip price confirmation"),
|
||||
dir: Optional[List[str]] = typer.Option(["inputs"],
|
||||
help="""List of paths to directory for index creation.
|
||||
E.g. --dir inputs --dir inputs2"""),
|
||||
file: Optional[List[str]] = typer.Option(None,
|
||||
help="""File paths to use (Optional; overrides dir).
|
||||
help="""File paths to use (Optional; overrides dir).
|
||||
E.g. --file inputs/1.md --file inputs/2.md"""),
|
||||
recursive: Optional[bool] = typer.Option(True, help="Whether to recursively search in subdirectories."),
|
||||
limit: Optional[int] = typer.Option(None, help="Maximum number of files to read."),
|
||||
formats: Optional[List[str]] = typer.Option([".rst", ".md"],
|
||||
help="""List of required extensions (list with .)
|
||||
Currently supported: .rst, .md, .pdf, .docx, .csv, .epub, .html, .mdx"""),
|
||||
help="""List of required extensions (list with .)
|
||||
Currently supported:
|
||||
.rst, .md, .pdf, .docx, .csv, .epub, .html, .mdx"""),
|
||||
exclude: Optional[bool] = typer.Option(True, help="Whether to exclude hidden files (dotfiles)."),
|
||||
sample: Optional[bool] = typer.Option(False, help="Whether to output sample of the first 5 split documents."),
|
||||
sample: Optional[bool] = typer.Option(False,
|
||||
help="Whether to output sample of the first 5 split documents."),
|
||||
token_check: Optional[bool] = typer.Option(True, help="Whether to group small documents and split large."),
|
||||
min_tokens: Optional[int] = typer.Option(150, help="Minimum number of tokens to not group."),
|
||||
max_tokens: Optional[int] = typer.Option(2000, help="Maximum number of tokens to not split."),
|
||||
):
|
||||
|
||||
"""
|
||||
Creates index from specified location or files.
|
||||
By default /inputs folder is used, .rst and .md are parsed.
|
||||
@@ -55,23 +58,23 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
|
||||
def process_one_docs(directory, folder_name):
|
||||
raw_docs = SimpleDirectoryReader(input_dir=directory, input_files=file, recursive=recursive,
|
||||
required_exts=formats, num_files_limit=limit,
|
||||
exclude_hidden=exclude).load_data()
|
||||
exclude_hidden=exclude, file_metadata=metadata_from_filename).load_data()
|
||||
|
||||
# Here we split the documents, as needed, into smaller chunks.
|
||||
# We do this due to the context limits of the LLMs.
|
||||
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
|
||||
#Old method
|
||||
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens,
|
||||
token_check=token_check)
|
||||
# Old method
|
||||
# text_splitter = RecursiveCharacterTextSplitter()
|
||||
# docs = text_splitter.split_documents(raw_docs)
|
||||
|
||||
#Sample feature
|
||||
if sample == True:
|
||||
# Sample feature
|
||||
if sample:
|
||||
for i in range(min(5, len(raw_docs))):
|
||||
print(raw_docs[i].text)
|
||||
|
||||
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
|
||||
|
||||
# Here we check for command line arguments for bot calls.
|
||||
# If no argument exists or the yes is not True, then the
|
||||
# user permission is requested to call the API.
|
||||
@@ -98,12 +101,11 @@ def ingest(yes: bool = typer.Option(False, "-y", "--yes", prompt=False,
|
||||
|
||||
@app.command()
|
||||
def convert(dir: Optional[str] = typer.Option("inputs",
|
||||
help="""Path to directory to make documentation for.
|
||||
help="""Path to directory to make documentation for.
|
||||
E.g. --dir inputs """),
|
||||
formats: Optional[str] = typer.Option("py",
|
||||
help="""Required language.
|
||||
help="""Required language.
|
||||
py, js, java supported for now""")):
|
||||
|
||||
"""
|
||||
Creates documentation linked to original functions from specified location.
|
||||
By default /inputs folder is used, .py is parsed.
|
||||
@@ -117,7 +119,7 @@ def convert(dir: Optional[str] = typer.Option("inputs",
|
||||
else:
|
||||
raise Exception("Sorry, language not supported yet")
|
||||
transform_to_docs(functions_dict, classes_dict, formats, dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
|
||||
|
||||
app()
|
||||
|
||||
@@ -1,38 +1,42 @@
|
||||
from pathlib import Path
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
import faiss
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
import pickle
|
||||
import dotenv
|
||||
import tiktoken
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
import dotenv
|
||||
import faiss
|
||||
import tiktoken
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain.vectorstores import FAISS
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens/1000) * 0.0004)
|
||||
total_price = ((num_tokens / 1000) * 0.0004)
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
def call_openai_api():
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
|
||||
faiss.write_index(store.index, "docs.index")
|
||||
store.index = None
|
||||
with open("faiss_store.pkl", "wb") as f:
|
||||
pickle.dump(store, f)
|
||||
|
||||
|
||||
def get_user_permission():
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
|
||||
docs_content = (" ".join(docs))
|
||||
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
#Here we check for user permission before calling the API.
|
||||
# Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
call_openai_api()
|
||||
@@ -41,7 +45,8 @@ def get_user_permission():
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
|
||||
#Load .env file
|
||||
|
||||
# Load .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
ap = ArgumentParser("Script for training DocsGPT on .rst documentation files.")
|
||||
|
||||
@@ -1,71 +1,75 @@
|
||||
import os
|
||||
import pickle
|
||||
import dotenv
|
||||
import tiktoken
|
||||
import sys
|
||||
import faiss
|
||||
import shutil
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
from langchain.vectorstores import FAISS
|
||||
|
||||
import dotenv
|
||||
import faiss
|
||||
import tiktoken
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
from langchain.vectorstores import FAISS
|
||||
from sphinx.cmd.build import main as sphinx_main
|
||||
from argparse import ArgumentParser
|
||||
|
||||
|
||||
def convert_rst_to_txt(src_dir, dst_dir):
|
||||
# Check if the source directory exists
|
||||
if not os.path.exists(src_dir):
|
||||
raise Exception("Source directory does not exist")
|
||||
# Walk through the source directory
|
||||
for root, dirs, files in os.walk(src_dir):
|
||||
for file in files:
|
||||
# Check if the file has .rst extension
|
||||
if file.endswith(".rst"):
|
||||
# Construct the full path of the file
|
||||
src_file = os.path.join(root, file.replace(".rst", ""))
|
||||
# Convert the .rst file to .txt file using sphinx-build
|
||||
args = f". -b text -D extensions=sphinx.ext.autodoc " \
|
||||
f"-D master_doc={src_file} " \
|
||||
f"-D source_suffix=.rst " \
|
||||
f"-C {dst_dir} "
|
||||
sphinx_main(args.split())
|
||||
elif file.endswith(".md"):
|
||||
# Rename the .md file to .rst file
|
||||
src_file = os.path.join(root, file)
|
||||
dst_file = os.path.join(root, file.replace(".md", ".rst"))
|
||||
os.rename(src_file, dst_file)
|
||||
# Convert the .rst file to .txt file using sphinx-build
|
||||
args = f". -b text -D extensions=sphinx.ext.autodoc " \
|
||||
f"-D master_doc={dst_file} " \
|
||||
f"-D source_suffix=.rst " \
|
||||
f"-C {dst_dir} "
|
||||
sphinx_main(args.split())
|
||||
# Check if the source directory exists
|
||||
if not os.path.exists(src_dir):
|
||||
raise Exception("Source directory does not exist")
|
||||
# Walk through the source directory
|
||||
for root, dirs, files in os.walk(src_dir):
|
||||
for file in files:
|
||||
# Check if the file has .rst extension
|
||||
if file.endswith(".rst"):
|
||||
# Construct the full path of the file
|
||||
src_file = os.path.join(root, file.replace(".rst", ""))
|
||||
# Convert the .rst file to .txt file using sphinx-build
|
||||
args = f". -b text -D extensions=sphinx.ext.autodoc " \
|
||||
f"-D master_doc={src_file} " \
|
||||
f"-D source_suffix=.rst " \
|
||||
f"-C {dst_dir} "
|
||||
sphinx_main(args.split())
|
||||
elif file.endswith(".md"):
|
||||
# Rename the .md file to .rst file
|
||||
src_file = os.path.join(root, file)
|
||||
dst_file = os.path.join(root, file.replace(".md", ".rst"))
|
||||
os.rename(src_file, dst_file)
|
||||
# Convert the .rst file to .txt file using sphinx-build
|
||||
args = f". -b text -D extensions=sphinx.ext.autodoc " \
|
||||
f"-D master_doc={dst_file} " \
|
||||
f"-D source_suffix=.rst " \
|
||||
f"-C {dst_dir} "
|
||||
sphinx_main(args.split())
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens/1000) * 0.0004)
|
||||
total_price = ((num_tokens / 1000) * 0.0004)
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
def call_openai_api():
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
|
||||
faiss.write_index(store.index, "docs.index")
|
||||
store.index = None
|
||||
with open("faiss_store.pkl", "wb") as f:
|
||||
pickle.dump(store, f)
|
||||
|
||||
|
||||
def get_user_permission():
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
|
||||
docs_content = (" ".join(docs))
|
||||
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
#Here we check for user permission before calling the API.
|
||||
# Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
call_openai_api()
|
||||
@@ -74,6 +78,7 @@ def get_user_permission():
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
|
||||
|
||||
ap = ArgumentParser("Script for training DocsGPT on Sphinx documentation")
|
||||
ap.add_argument("-i", "--inputs",
|
||||
type=str,
|
||||
@@ -81,17 +86,17 @@ ap.add_argument("-i", "--inputs",
|
||||
help="Directory containing documentation files")
|
||||
args = ap.parse_args()
|
||||
|
||||
#Load .env file
|
||||
# Load .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
#Directory to vector
|
||||
# Directory to vector
|
||||
src_dir = args.inputs
|
||||
dst_dir = "tmp"
|
||||
|
||||
convert_rst_to_txt(src_dir, dst_dir)
|
||||
|
||||
# Here we load in the data in the format that Notion exports it in.
|
||||
ps = list(Path("tmp/"+ src_dir).glob("**/*.txt"))
|
||||
ps = list(Path("tmp/" + src_dir).glob("**/*.txt"))
|
||||
|
||||
# parse all child directories
|
||||
data = []
|
||||
|
||||
@@ -3,7 +3,6 @@ from abc import abstractmethod
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
|
||||
from parser.schema.base import Document
|
||||
|
||||
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
"""Simple reader that reads files of different formats from a directory."""
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
from parser.file.base import BaseReader
|
||||
from parser.file.base_parser import BaseParser
|
||||
from parser.file.docs_parser import DocxParser, PDFParser
|
||||
@@ -12,6 +9,8 @@ from parser.file.markdown_parser import MarkdownParser
|
||||
from parser.file.rst_parser import RstParser
|
||||
from parser.file.tabular_parser import PandasCSVParser
|
||||
from parser.schema.base import Document
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".pdf": PDFParser(),
|
||||
@@ -52,17 +51,17 @@ class SimpleDirectoryReader(BaseReader):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_dir: Optional[str] = None,
|
||||
input_files: Optional[List] = None,
|
||||
exclude_hidden: bool = True,
|
||||
errors: str = "ignore",
|
||||
recursive: bool = True,
|
||||
required_exts: Optional[List[str]] = None,
|
||||
file_extractor: Optional[Dict[str, BaseParser]] = None,
|
||||
num_files_limit: Optional[int] = None,
|
||||
file_metadata: Optional[Callable[[str], Dict]] = None,
|
||||
chunk_size_max: int = 2048,
|
||||
self,
|
||||
input_dir: Optional[str] = None,
|
||||
input_files: Optional[List] = None,
|
||||
exclude_hidden: bool = True,
|
||||
errors: str = "ignore",
|
||||
recursive: bool = True,
|
||||
required_exts: Optional[List[str]] = None,
|
||||
file_extractor: Optional[Dict[str, BaseParser]] = None,
|
||||
num_files_limit: Optional[int] = None,
|
||||
file_metadata: Optional[Callable[[str], Dict]] = None,
|
||||
chunk_size_max: int = 2048,
|
||||
) -> None:
|
||||
"""Initialize with parameters."""
|
||||
super().__init__()
|
||||
@@ -103,8 +102,8 @@ class SimpleDirectoryReader(BaseReader):
|
||||
elif self.exclude_hidden and input_file.name.startswith("."):
|
||||
continue
|
||||
elif (
|
||||
self.required_exts is not None
|
||||
and input_file.suffix not in self.required_exts
|
||||
self.required_exts is not None
|
||||
and input_file.suffix not in self.required_exts
|
||||
):
|
||||
continue
|
||||
else:
|
||||
@@ -115,7 +114,7 @@ class SimpleDirectoryReader(BaseReader):
|
||||
new_input_files.extend(sub_input_files)
|
||||
|
||||
if self.num_files_limit is not None and self.num_files_limit > 0:
|
||||
new_input_files = new_input_files[0 : self.num_files_limit]
|
||||
new_input_files = new_input_files[0: self.num_files_limit]
|
||||
|
||||
# print total number of files added
|
||||
logging.debug(
|
||||
@@ -151,10 +150,15 @@ class SimpleDirectoryReader(BaseReader):
|
||||
data = f.read()
|
||||
if isinstance(data, List):
|
||||
data_list.extend(data)
|
||||
if self.file_metadata is not None:
|
||||
for _ in range(len(data)):
|
||||
metadata_list.append(self.file_metadata(str(input_file)))
|
||||
else:
|
||||
data_list.append(str(data))
|
||||
if self.file_metadata is not None:
|
||||
metadata_list.append(self.file_metadata(str(input_file)))
|
||||
if self.file_metadata is not None:
|
||||
metadata_list.append(self.file_metadata(str(input_file)))
|
||||
|
||||
|
||||
|
||||
if concatenate:
|
||||
return [Document("\n".join(data_list))]
|
||||
|
||||
@@ -9,6 +9,7 @@ from typing import Dict, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class HTMLParser(BaseParser):
|
||||
"""HTML parser."""
|
||||
|
||||
@@ -23,21 +24,20 @@ class HTMLParser(BaseParser):
|
||||
Union[str, List[str]]: a string or a List of strings.
|
||||
"""
|
||||
try:
|
||||
import unstructured
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
except ImportError:
|
||||
raise ValueError("unstructured package is required to parse HTML files.")
|
||||
from unstructured.partition.html import partition_html
|
||||
from unstructured.staging.base import convert_to_isd
|
||||
from unstructured.cleaners.core import clean
|
||||
|
||||
# Using the unstructured library to convert the html to isd format
|
||||
# isd sample : isd = [
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
# {"text": "My Title", "type": "Title"},
|
||||
# {"text": "My Narrative", "type": "NarrativeText"}
|
||||
# ]
|
||||
with open(file, "r", encoding="utf-8") as fp:
|
||||
elements = partition_html(file=fp)
|
||||
isd = convert_to_isd(elements)
|
||||
isd = convert_to_isd(elements)
|
||||
|
||||
# Removing non ascii charactwers from isd_el['text']
|
||||
for isd_el in isd:
|
||||
@@ -46,15 +46,15 @@ class HTMLParser(BaseParser):
|
||||
# Removing all the \n characters from isd_el['text'] using regex and replace with single space
|
||||
# Removing all the extra spaces from isd_el['text'] using regex and replace with single space
|
||||
for isd_el in isd:
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE|re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}"," ", isd_el['text'], flags=re.MULTILINE|re.DOTALL)
|
||||
isd_el['text'] = re.sub(r'\n', ' ', isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
isd_el['text'] = re.sub(r"\s{2,}", " ", isd_el['text'], flags=re.MULTILINE | re.DOTALL)
|
||||
|
||||
# more cleaning: extra_whitespaces, dashes, bullets, trailing_punctuation
|
||||
for isd_el in isd:
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True )
|
||||
clean(isd_el['text'], extra_whitespace=True, dashes=True, bullets=True, trailing_punctuation=True)
|
||||
|
||||
# Creating a list of all the indexes of isd_el['type'] = 'Title'
|
||||
title_indexes = [i for i,isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
title_indexes = [i for i, isd_el in enumerate(isd) if isd_el['type'] == 'Title']
|
||||
|
||||
# Creating 'Chunks' - List of lists of strings
|
||||
# each list starting with with isd_el['type'] = 'Title' and all the data till the next 'Title'
|
||||
@@ -64,19 +64,20 @@ class HTMLParser(BaseParser):
|
||||
Chunks = [[]]
|
||||
final_chunks = list(list())
|
||||
|
||||
for i,isd_el in enumerate(isd):
|
||||
for i, isd_el in enumerate(isd):
|
||||
if i in title_indexes:
|
||||
Chunks.append([])
|
||||
Chunks[-1].append(isd_el['text'])
|
||||
|
||||
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25 #TODO: This value can be an user defined variable
|
||||
# Removing all the chunks with sum of lenth of all the strings in the chunk < 25
|
||||
# TODO: This value can be a user defined variable
|
||||
for chunk in Chunks:
|
||||
# sum of lenth of all the strings in the chunk
|
||||
sum = 0
|
||||
sum += len(str(chunk))
|
||||
if sum < 25:
|
||||
Chunks.remove(chunk)
|
||||
else :
|
||||
else:
|
||||
# appending all the approved chunks to final_chunks as a single string
|
||||
final_chunks.append(" ".join([str(item) for item in chunk]))
|
||||
return final_chunks
|
||||
|
||||
@@ -7,8 +7,8 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
import tiktoken
|
||||
from parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class MarkdownParser(BaseParser):
|
||||
@@ -20,13 +20,13 @@ class MarkdownParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
max_tokens: int = 2048,
|
||||
# remove_tables: bool = True,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
max_tokens: int = 2048,
|
||||
# remove_tables: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -35,8 +35,8 @@ class MarkdownParser(BaseParser):
|
||||
self._max_tokens = max_tokens
|
||||
# self._remove_tables = remove_tables
|
||||
|
||||
|
||||
def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str], current_text: str):
|
||||
def tups_chunk_append(self, tups: List[Tuple[Optional[str], str]], current_header: Optional[str],
|
||||
current_text: str):
|
||||
"""Append to tups chunk."""
|
||||
num_tokens = len(tiktoken.get_encoding("cl100k_base").encode(current_text))
|
||||
if num_tokens > self._max_tokens:
|
||||
@@ -46,6 +46,7 @@ class MarkdownParser(BaseParser):
|
||||
else:
|
||||
tups.append((current_header, current_text))
|
||||
return tups
|
||||
|
||||
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
|
||||
"""Convert a markdown file to a dictionary.
|
||||
|
||||
@@ -115,7 +116,7 @@ class MarkdownParser(BaseParser):
|
||||
return {}
|
||||
|
||||
def parse_tups(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
@@ -130,7 +131,7 @@ class MarkdownParser(BaseParser):
|
||||
return markdown_tups
|
||||
|
||||
def parse_file(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> Union[str, List[str]]:
|
||||
"""Parse file into string."""
|
||||
tups = self.parse_tups(filepath, errors=errors)
|
||||
|
||||
@@ -5,10 +5,10 @@ Contains parser for md files.
|
||||
"""
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
import tiktoken
|
||||
|
||||
|
||||
class RstParser(BaseParser):
|
||||
"""reStructuredText parser.
|
||||
@@ -19,17 +19,17 @@ class RstParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
remove_table_excess: bool = True,
|
||||
remove_interpreters: bool = True,
|
||||
remove_directives: bool = True,
|
||||
remove_whitespaces_excess: bool = True,
|
||||
#Be carefull with remove_characters_excess, might cause data loss
|
||||
remove_characters_excess: bool = True,
|
||||
**kwargs: Any,
|
||||
self,
|
||||
*args: Any,
|
||||
remove_hyperlinks: bool = True,
|
||||
remove_images: bool = True,
|
||||
remove_table_excess: bool = True,
|
||||
remove_interpreters: bool = True,
|
||||
remove_directives: bool = True,
|
||||
remove_whitespaces_excess: bool = True,
|
||||
# Be carefull with remove_characters_excess, might cause data loss
|
||||
remove_characters_excess: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -41,7 +41,6 @@ class RstParser(BaseParser):
|
||||
self._remove_whitespaces_excess = remove_whitespaces_excess
|
||||
self._remove_characters_excess = remove_characters_excess
|
||||
|
||||
|
||||
def rst_to_tups(self, rst_text: str) -> List[Tuple[Optional[str], str]]:
|
||||
"""Convert a reStructuredText file to a dictionary.
|
||||
|
||||
@@ -56,7 +55,8 @@ class RstParser(BaseParser):
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
header_match = re.match(r"^[^\S\n]*[-=]+[^\S\n]*$", line)
|
||||
if header_match and i > 0 and (len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
|
||||
if header_match and i > 0 and (
|
||||
len(lines[i - 1].strip()) == len(header_match.group().strip()) or lines[i - 2] == lines[i - 2]):
|
||||
if current_header is not None:
|
||||
if current_text == "" or None:
|
||||
continue
|
||||
@@ -72,7 +72,7 @@ class RstParser(BaseParser):
|
||||
|
||||
rst_tups.append((current_header, current_text))
|
||||
|
||||
#TODO: Format for rst
|
||||
# TODO: Format for rst
|
||||
#
|
||||
# if current_header is not None:
|
||||
# # pass linting, assert keys are defined
|
||||
@@ -136,7 +136,7 @@ class RstParser(BaseParser):
|
||||
return {}
|
||||
|
||||
def parse_tups(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> List[Tuple[Optional[str], str]]:
|
||||
"""Parse file into tuples."""
|
||||
with open(filepath, "r") as f:
|
||||
@@ -159,7 +159,7 @@ class RstParser(BaseParser):
|
||||
return rst_tups
|
||||
|
||||
def parse_file(
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
self, filepath: Path, errors: str = "ignore"
|
||||
) -> Union[str, List[str]]:
|
||||
"""Parse file into string."""
|
||||
tups = self.parse_tups(filepath, errors=errors)
|
||||
|
||||
@@ -77,13 +77,13 @@ class PandasCSVParser(BaseParser):
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
self,
|
||||
*args: Any,
|
||||
concat_rows: bool = True,
|
||||
col_joiner: str = ", ",
|
||||
row_joiner: str = "\n",
|
||||
pandas_config: dict = {},
|
||||
**kwargs: Any
|
||||
) -> None:
|
||||
"""Init params."""
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import os
|
||||
|
||||
import javalang
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
files_list = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
@@ -9,6 +11,7 @@ def find_files(directory):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, "r") as file:
|
||||
java_code = file.read()
|
||||
@@ -28,6 +31,7 @@ def extract_functions(file_path):
|
||||
methods[method_name] = method_source_code
|
||||
return methods
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -47,6 +51,7 @@ def extract_classes(file_path):
|
||||
classes[class_name] = class_string
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
@@ -58,4 +63,4 @@ def extract_functions_and_classes(directory):
|
||||
classes = extract_classes(file)
|
||||
if classes:
|
||||
classes_dict[file] = classes
|
||||
return functions_dict, classes_dict
|
||||
return functions_dict, classes_dict
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import os
|
||||
import esprima
|
||||
|
||||
import escodegen
|
||||
import esprima
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
@@ -11,6 +12,7 @@ def find_files(directory):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -26,7 +28,6 @@ def extract_functions(file_path):
|
||||
func_name = declaration.id.name if declaration.id else '<anonymous>'
|
||||
functions[func_name] = escodegen.generate(declaration.init)
|
||||
elif node.type == 'ClassDeclaration':
|
||||
class_name = node.id.name
|
||||
for subnode in node.body.body:
|
||||
if subnode.type == 'MethodDefinition':
|
||||
func_name = subnode.key.name
|
||||
@@ -38,6 +39,7 @@ def extract_functions(file_path):
|
||||
functions[func_name] = escodegen.generate(declaration.init)
|
||||
return functions
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -53,6 +55,7 @@ def extract_classes(file_path):
|
||||
classes[class_name] = ", ".join(function_names)
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
|
||||
@@ -1,32 +1,32 @@
|
||||
import os
|
||||
import faiss
|
||||
import pickle
|
||||
|
||||
import tiktoken
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
|
||||
#from langchain.embeddings import HuggingFaceEmbeddings
|
||||
#from langchain.embeddings import HuggingFaceInstructEmbeddings
|
||||
#from langchain.embeddings import CohereEmbeddings
|
||||
|
||||
from langchain.vectorstores import FAISS
|
||||
from retry import retry
|
||||
|
||||
|
||||
# from langchain.embeddings import HuggingFaceEmbeddings
|
||||
# from langchain.embeddings import HuggingFaceInstructEmbeddings
|
||||
# from langchain.embeddings import CohereEmbeddings
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
num_tokens = len(encoding.encode(string))
|
||||
total_price = ((num_tokens/1000) * 0.0004)
|
||||
total_price = ((num_tokens / 1000) * 0.0004)
|
||||
return num_tokens, total_price
|
||||
|
||||
|
||||
@retry(tries=10, delay=60)
|
||||
def store_add_texts_with_retry(store, i):
|
||||
store.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
#store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
|
||||
|
||||
def call_openai_api(docs, folder_name):
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
# Function to create a vector store from the documents and save it to disk.
|
||||
|
||||
# create output folder if it doesn't exist
|
||||
if not os.path.exists(f"outputs/{folder_name}"):
|
||||
@@ -37,21 +37,22 @@ def call_openai_api(docs, folder_name):
|
||||
# remove the first element from docs
|
||||
docs.pop(0)
|
||||
# cut first n docs if you want to restart
|
||||
#docs = docs[:n]
|
||||
# docs = docs[:n]
|
||||
c1 = 0
|
||||
# pinecone.init(
|
||||
# api_key="", # find at app.pinecone.io
|
||||
# environment="us-east1-gcp" # next to api key in console
|
||||
# )
|
||||
#index_name = "pandas"
|
||||
# index_name = "pandas"
|
||||
store = FAISS.from_documents(docs_test, OpenAIEmbeddings())
|
||||
#store_pine = Pinecone.from_documents(docs_test, OpenAIEmbeddings(), index_name=index_name)
|
||||
# store_pine = Pinecone.from_documents(docs_test, OpenAIEmbeddings(), index_name=index_name)
|
||||
|
||||
# Uncomment for MPNet embeddings
|
||||
# model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
# hf = HuggingFaceEmbeddings(model_name=model_name)
|
||||
# store = FAISS.from_documents(docs_test, hf)
|
||||
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
||||
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs),
|
||||
bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
||||
try:
|
||||
store_add_texts_with_retry(store, i)
|
||||
except Exception as e:
|
||||
@@ -64,20 +65,20 @@ def call_openai_api(docs, folder_name):
|
||||
c1 += 1
|
||||
store.save_local(f"outputs/{folder_name}")
|
||||
|
||||
|
||||
def get_user_permission(docs, folder_name):
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
|
||||
#docs_content = (" ".join(docs))
|
||||
# docs_content = (" ".join(docs))
|
||||
docs_content = ""
|
||||
for doc in docs:
|
||||
docs_content += doc.page_content
|
||||
|
||||
|
||||
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
|
||||
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
|
||||
print(f"Number of Tokens = {format(tokens, ',d')}")
|
||||
print(f"Approx Cost = ${format(total_price, ',.2f')}")
|
||||
#Here we check for user permission before calling the API.
|
||||
# Here we check for user permission before calling the API.
|
||||
user_input = input("Price Okay? (Y/N) \n").lower()
|
||||
if user_input == "y":
|
||||
call_openai_api(docs, folder_name)
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import os
|
||||
import ast
|
||||
import tiktoken
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import tiktoken
|
||||
from langchain.llms import OpenAI
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
|
||||
def find_files(directory):
|
||||
files_list = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
@@ -13,6 +15,7 @@ def find_files(directory):
|
||||
files_list.append(os.path.join(root, file))
|
||||
return files_list
|
||||
|
||||
|
||||
def extract_functions(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -25,6 +28,7 @@ def extract_functions(file_path):
|
||||
functions[func_name] = func_def
|
||||
return functions
|
||||
|
||||
|
||||
def extract_classes(file_path):
|
||||
with open(file_path, 'r') as file:
|
||||
source_code = file.read()
|
||||
@@ -40,6 +44,7 @@ def extract_classes(file_path):
|
||||
classes[class_name] = ", ".join(function_names)
|
||||
return classes
|
||||
|
||||
|
||||
def extract_functions_and_classes(directory):
|
||||
files = find_files(directory)
|
||||
functions_dict = {}
|
||||
@@ -53,11 +58,12 @@ def extract_functions_and_classes(directory):
|
||||
classes_dict[file] = classes
|
||||
return functions_dict, classes_dict
|
||||
|
||||
|
||||
def parse_functions(functions_dict, formats, dir):
|
||||
c1 = len(functions_dict)
|
||||
for i, (source, functions) in enumerate(functions_dict.items(), start=1):
|
||||
print(f"Processing file {i}/{c1}")
|
||||
source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
|
||||
source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
|
||||
subfolders = "/".join(source_w.split("/")[:-1])
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
|
||||
for j, (name, function) in enumerate(functions.items(), start=1):
|
||||
@@ -70,18 +76,19 @@ def parse_functions(functions_dict, formats, dir):
|
||||
response = llm(prompt.format(code=function))
|
||||
mode = "a" if Path(f"outputs/{source_w}").exists() else "w"
|
||||
with open(f"outputs/{source_w}", mode) as f:
|
||||
f.write(f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
f.write(
|
||||
f"\n\n# Function name: {name} \n\nFunction: \n```\n{function}\n```, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
def parse_classes(classes_dict, formats, dir):
|
||||
c1 = len(classes_dict)
|
||||
for i, (source, classes) in enumerate(classes_dict.items()):
|
||||
print(f"Processing file {i+1}/{c1}")
|
||||
source_w = source.replace(dir+"/", "").replace("."+formats, ".md")
|
||||
print(f"Processing file {i + 1}/{c1}")
|
||||
source_w = source.replace(dir + "/", "").replace("." + formats, ".md")
|
||||
subfolders = "/".join(source_w.split("/")[:-1])
|
||||
Path(f"outputs/{subfolders}").mkdir(parents=True, exist_ok=True)
|
||||
for name, function_names in classes.items():
|
||||
print(f"Processing Class {i+1}/{c1}")
|
||||
print(f"Processing Class {i + 1}/{c1}")
|
||||
prompt = PromptTemplate(
|
||||
input_variables=["class_name", "functions_names"],
|
||||
template="Class name: {class_name} \nFunctions: {functions_names}, \nDocumentation: ",
|
||||
@@ -92,6 +99,7 @@ def parse_classes(classes_dict, formats, dir):
|
||||
with open(f"outputs/{source_w}", "a" if Path(f"outputs/{source_w}").exists() else "w") as f:
|
||||
f.write(f"\n\n# Class name: {name} \n\nFunctions: \n{function_names}, \nDocumentation: \n{response}")
|
||||
|
||||
|
||||
def transform_to_docs(functions_dict, classes_dict, formats, dir):
|
||||
docs_content = ''.join([str(key) + str(value) for key, value in functions_dict.items()])
|
||||
docs_content += ''.join([str(key) + str(value) for key, value in classes_dict.items()])
|
||||
@@ -110,4 +118,4 @@ def transform_to_docs(functions_dict, classes_dict, formats, dir):
|
||||
parse_classes(classes_dict, formats, dir)
|
||||
print("All done!")
|
||||
else:
|
||||
print("The API was not called. No money was spent.")
|
||||
print("The API was not called. No money was spent.")
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
|
||||
from parser.schema.schema import BaseDocument
|
||||
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import re
|
||||
import tiktoken
|
||||
|
||||
from typing import List
|
||||
from parser.schema.base import Document
|
||||
from math import ceil
|
||||
from typing import List
|
||||
|
||||
import tiktoken
|
||||
from parser.schema.base import Document
|
||||
|
||||
|
||||
def separate_header_and_body(text):
|
||||
@@ -13,6 +13,7 @@ def separate_header_and_body(text):
|
||||
body = text[len(header):]
|
||||
return header, body
|
||||
|
||||
|
||||
def group_documents(documents: List[Document], min_tokens: int, max_tokens: int) -> List[Document]:
|
||||
docs = []
|
||||
current_group = None
|
||||
@@ -23,7 +24,8 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
if current_group is None:
|
||||
current_group = Document(text=doc.text, doc_id=doc.doc_id, embedding=doc.embedding,
|
||||
extra_info=doc.extra_info)
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
|
||||
elif len(tiktoken.get_encoding("cl100k_base").encode(
|
||||
current_group.text)) + doc_len < max_tokens and doc_len >= min_tokens:
|
||||
current_group.text += " " + doc.text
|
||||
else:
|
||||
docs.append(current_group)
|
||||
@@ -35,6 +37,7 @@ def group_documents(documents: List[Document], min_tokens: int, max_tokens: int)
|
||||
|
||||
return docs
|
||||
|
||||
|
||||
def split_documents(documents: List[Document], max_tokens: int) -> List[Document]:
|
||||
docs = []
|
||||
for doc in documents:
|
||||
@@ -54,17 +57,18 @@ def split_documents(documents: List[Document], max_tokens: int) -> List[Document
|
||||
docs.append(new_doc)
|
||||
return docs
|
||||
|
||||
|
||||
def group_split(documents: List[Document], max_tokens: int = 2000, min_tokens: int = 150, token_check: bool = True):
|
||||
if token_check == False:
|
||||
if not token_check:
|
||||
return documents
|
||||
print("Grouping small documents")
|
||||
try:
|
||||
documents = group_documents(documents=documents, min_tokens=min_tokens, max_tokens=max_tokens)
|
||||
except:
|
||||
except Exception:
|
||||
print("Grouping failed, try running without token_check")
|
||||
print("Separating large documents")
|
||||
try:
|
||||
documents = split_documents(documents=documents, max_tokens=max_tokens)
|
||||
except:
|
||||
except Exception:
|
||||
print("Grouping failed, try running without token_check")
|
||||
return documents
|
||||
|
||||
@@ -33,7 +33,7 @@ esutils==1.0.1
|
||||
et-xmlfile==1.1.0
|
||||
faiss-cpu==1.7.3
|
||||
filelock==3.9.0
|
||||
Flask==2.2.2
|
||||
Flask==2.2.5
|
||||
frozenlist==1.3.3
|
||||
greenlet==2.0.2
|
||||
gunicorn==20.1.0
|
||||
@@ -88,7 +88,7 @@ python-magic==0.4.27
|
||||
python-pptx==0.6.21
|
||||
pytz==2022.7.1
|
||||
PyYAML==6.0
|
||||
redis==4.5.1
|
||||
redis==4.5.4
|
||||
regex==2022.10.31
|
||||
requests==2.28.2
|
||||
retry==0.9.2
|
||||
@@ -124,8 +124,7 @@ typing-inspect==0.8.0
|
||||
typing_extensions==4.4.0
|
||||
unstructured==0.4.11
|
||||
urllib3==1.26.14
|
||||
Werkzeug==2.2.3
|
||||
wrapt==1.14.1
|
||||
XlsxWriter==3.0.8
|
||||
xxhash==3.2.0
|
||||
yarl==1.8.2
|
||||
yarl==1.8.2
|
||||
45
setup.sh
Executable file
45
setup.sh
Executable file
@@ -0,0 +1,45 @@
|
||||
#!/bin/bash
|
||||
cd "$(dirname "$0")" || exit
|
||||
|
||||
# Create the required directories on the host machine if they don't exist
|
||||
[ ! -d "./application/indexes" ] && mkdir -p ./application/indexes
|
||||
[ ! -d "./application/inputs" ] && mkdir -p ./application/inputs
|
||||
[ ! -d "./application/vectors" ] && mkdir -p ./application/vectors
|
||||
|
||||
# Build frontend and backend images
|
||||
docker build -t frontend_image ./frontend
|
||||
docker build -t backend_image ./application
|
||||
|
||||
# Run redis and mongo services
|
||||
docker run -d --name redis -p 6379:6379 redis:6-alpine
|
||||
docker run -d --name mongo -p 27017:27017 -v mongodb_data_container:/data/db mongo:6
|
||||
|
||||
# Run backend and worker services
|
||||
docker run -d --name backend -p 5001:5001 \
|
||||
--link redis:redis --link mongo:mongo \
|
||||
-v $(pwd)/application/indexes:/app/indexes \
|
||||
-v $(pwd)/application/inputs:/app/inputs \
|
||||
-v $(pwd)/application/vectors:/app/vectors \
|
||||
-e API_KEY=$OPENAI_API_KEY \
|
||||
-e EMBEDDINGS_KEY=$OPENAI_API_KEY \
|
||||
-e CELERY_BROKER_URL=redis://redis:6379/0 \
|
||||
-e CELERY_RESULT_BACKEND=redis://redis:6379/1 \
|
||||
-e MONGO_URI=mongodb://mongo:27017/docsgpt \
|
||||
backend_image
|
||||
|
||||
docker run -d --name worker \
|
||||
--link redis:redis --link mongo:mongo \
|
||||
-e API_KEY=$OPENAI_API_KEY \
|
||||
-e EMBEDDINGS_KEY=$OPENAI_API_KEY \
|
||||
-e CELERY_BROKER_URL=redis://redis:6379/0 \
|
||||
-e CELERY_RESULT_BACKEND=redis://redis:6379/1 \
|
||||
-e MONGO_URI=mongodb://mongo:27017/docsgpt \
|
||||
-e API_URL=http://backend:5001 \
|
||||
backend_image \
|
||||
celery -A app.celery worker -l INFO
|
||||
|
||||
# Run frontend service
|
||||
docker run -d --name frontend -p 5173:5173 \
|
||||
-e VITE_API_HOST=http://localhost:5001 \
|
||||
frontend_image
|
||||
|
||||
Reference in New Issue
Block a user