This commit is contained in:
Alex
2023-02-03 12:45:29 +00:00
parent 2135b8420f
commit b71a9bf5ee
14 changed files with 1683 additions and 0 deletions

1
application/.env_sample Normal file
View File

@@ -0,0 +1 @@
OPENAI_API_KEY=your_api_key

11
application/Dockerfile Normal file
View File

@@ -0,0 +1,11 @@
FROM python:3.9
WORKDIR /app
COPY . /app
RUN pip install --no-cache-dir -r requirements.txt
ENV FLASK_APP=app.py
ENV FLASK_ENV=development
EXPOSE 5000
CMD ["flask", "run", "--host=0.0.0.0"]

61
application/app.py Normal file
View File

@@ -0,0 +1,61 @@
import os
import pickle
import dotenv
import datetime
from flask import Flask, request, render_template
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
import faiss
from langchain import OpenAI
from langchain.chains import VectorDBQAWithSourcesChain
from langchain.prompts import PromptTemplate
# loading the .env file
dotenv.load_dotenv()
# loading the index and the store and the prompt template
index = faiss.read_index("docs.index")
with open("combine_prompt.txt", "r") as f:
template = f.read()
with open("faiss_store.pkl", "rb") as f:
store = pickle.load(f)
app = Flask(__name__)
@app.route("/")
def home():
return render_template("index.html")
@app.route("/api/answer", methods=["POST"])
def api_answer():
data = request.get_json()
question = data["question"]
store.index = index
# create a prompt template
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template)
# create a chain with the prompt template and the store
chain = VectorDBQAWithSourcesChain.from_llm(llm=OpenAI(temperature=0), vectorstore=store, combine_prompt=c_prompt)
# fetch the answer
result = chain({"question": question})
# some formatting for the frontend
result['answer'] = result['answer'].replace("\\n", "<br>")
result['answer'] = result['answer'].replace("SOURCES:", "")
return result
# handling CORS
@app.after_request
def after_request(response):
response.headers.add('Access-Control-Allow-Origin', '*')
response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
return response
if __name__ == "__main__":
app.run(debug=True)

View File

@@ -0,0 +1,27 @@
You are a DocsGPT bot assistant by Arc53 that provides help with programming libraries. You give thorough answers with code examples.
Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
ALWAYS return a "SOURCES" part in your answer.
QUESTION: How to merge tables in pandas?
=========
Content: pandas provides various facilities for easily combining together Series or DataFrame with various kinds of set logic for the indexes and relational algebra functionality in the case of join / merge-type operations.
Source: 28-pl
Content: pandas provides a single function, merge(), as the entry point for all standard database join operations between DataFrame or named Series objects: \n\npandas.merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None)
Source: 30-pl
=========
FINAL ANSWER: To merge two tables in pandas, you can use the pd.merge() function. The basic syntax is: \n\npd.merge(left, right, on, how) \n\nwhere left and right are the two tables to merge, on is the column to merge on, and how is the type of merge to perform. \n\nFor example, to merge the two tables df1 and df2 on the column 'id', you can use: \n\npd.merge(df1, df2, on='id', how='inner')
SOURCES: 28-pl 30-pl
QUESTION: How to eat vegetables using pandas?
=========
Content: ExtensionArray.repeat(repeats, axis=None) Returns a new ExtensionArray where each element of the current ExtensionArray is repeated consecutively a given number of times. \n\nParameters: repeats int or array of ints. The number of repetitions for each element. This should be a positive integer. Repeating 0 times will return an empty array. axis (0 or index, 1 or columns), default 0 The axis along which to repeat values. Currently only axis=0 is supported.
Source: 0-pl
=========
FINAL ANSWER: You can't eat vegetables using pandas. You can only eat them using your mouth.
SOURCES:
QUESTION: {question}
=========
{summaries}
=========
FINAL ANSWER:

BIN
application/docs.index Normal file

Binary file not shown.

BIN
application/faiss_store.pkl Normal file

Binary file not shown.

1331
application/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

5
application/package.json Normal file
View File

@@ -0,0 +1,5 @@
{
"devDependencies": {
"tailwindcss": "^3.2.4"
}
}

View File

@@ -0,0 +1,34 @@
aiohttp==3.8.3
aiosignal==1.3.1
async-timeout==4.0.2
attrs==22.2.0
charset-normalizer==2.1.1
click==8.1.3
dataclasses-json==0.5.7
faiss-cpu==1.7.3
Flask==2.2.2
frozenlist==1.3.3
greenlet==2.0.2
idna==3.4
itsdangerous==2.1.2
Jinja2==3.1.2
langchain==0.0.76
MarkupSafe==2.1.2
marshmallow==3.19.0
marshmallow-enum==1.5.1
multidict==6.0.4
mypy-extensions==0.4.3
numpy==1.24.1
openai==0.26.4
packaging==23.0
pydantic==1.10.4
python-dotenv==0.21.1
PyYAML==6.0
requests==2.28.2
SQLAlchemy==1.4.46
tqdm==4.64.1
typing-inspect==0.8.0
typing_extensions==4.4.0
urllib3==1.26.14
Werkzeug==2.2.2
yarl==1.8.2

View File

@@ -0,0 +1,44 @@
var el = document.getElementById('message-form');
if (el) {
el.addEventListener("submit", function (event) {
console.log("submitting")
event.preventDefault()
var message = document.getElementById("message-input").value;
msg_html = '<div class="bg-blue-500 text-white p-2 rounded-lg mb-2 self-end"><p class="text-sm">'
msg_html += message
msg_html += '</p></div>'
document.getElementById("messages").innerHTML += msg_html;
let chatWindow = document.getElementById("chat-container");
chatWindow.scrollTop = chatWindow.scrollHeight;
document.getElementById("message-input").value = "";
document.getElementById("button-submit").innerHTML = '<i class="fa fa-circle-o-notch fa-spin"></i> Thinking...';
document.getElementById("button-submit").disabled = true;
fetch('/api/answer', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({question: message}),
})
.then(response => response.json())
.then(data => {
console.log('Success:', data);
msg_html = '<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start"><code class="text-sm">'
msg_html += data.answer
msg_html += '</code></div>'
document.getElementById("messages").innerHTML += msg_html;
let chatWindow = document.getElementById("chat-container");
chatWindow.scrollTop = chatWindow.scrollHeight;
document.getElementById("button-submit").innerHTML = 'Send';
document.getElementById("button-submit").disabled = false;
})
.catch((error) => {
console.error('Error:', error);
document.getElementById("button-submit").innerHTML = 'Send';
document.getElementById("button-submit").disabled = false;
});
});
}

View File

@@ -0,0 +1,56 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
#chat-container {
height: 44rem;
background-color: white;
padding: 10px;
overflow: auto;
}
.bg-gray-200 {
background-color: #edf2f7;
}
.bg-gray-900 {
background-color: #1a202c;
}
.rounded-lg {
border-radius: 0.5rem;
}
.shadow {
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12), 0 1px 2px rgba(0, 0, 0, 0.24);
}
.text-gray-700 {
color: #4a5568;
}
.text-sm {
font-size: 0.875rem;
}
.p-4 {
padding: 1.5rem;
}
.loader {
border: 16px solid #f3f3f3; /* Light grey */
border-top: 16px solid #3498db; /* Blue */
border-radius: 50%;
width: 120px;
height: 120px;
animation: spin 2s linear infinite;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}

View File

@@ -0,0 +1,8 @@
/** @type {import('tailwindcss').Config} */
module.exports = {
content: ["./templates/**/*.html", "./static/src/**/*.js"],
theme: {
extend: {},
},
plugins: [],
}

View File

@@ -0,0 +1,65 @@
<!DOCTYPE html>
<html>
<head>
<title>DocsGPT 🦖 Preview</title>
<link href="{{url_for('static',filename='dist/css/output.css')}}" rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
</head>
<body>
<header class="bg-white p-2 flex justify-between items-center">
<h1 class="text-lg font-medium">DocsGPT</h1>
<a href="https://github.com/arc53/docsgpt" class="text-blue-500 hover:text-blue-800">About</a>
</header>
<div class="flex">
<div class="w-3/4">
<div class="w-full flex flex-col">
<div id="chat-container">
<div id="messages" class="w-full flex flex-col" >
<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
<p class="text-sm">Hello, ask me anything about this library. Im here to help</p>
</div>
<div class="bg-blue-500 text-white p-2 rounded-lg mb-2 self-end">
<p class="text-sm">How to create API key for Api gateway?</p>
</div>
<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
<p class="text-sm">Import the boto3 library and create a client for the API Gateway service:</p>
</div>
<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
<code class="text-sm">client = boto3.client('apigateway')</code>
</div>
<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
<p class="text-sm">Create an API key:</p>
</div>
<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
<code class="text-sm">response = client.create_api_key(<br>name='API_KEY_NAME',<br>description='API key description',<br>enabled=True)<br>api_key = response['value']</code>
</div>
</div>
</div>
<div class=" flex mt-4 mb-2">
<form id="message-form">
<input id="message-input" class="bg-white p-2 rounded-lg ml-2 w-[46rem]" type="text" placeholder="Type your message here...">
<button id="button-submit" class="bg-blue-500 text-white p-2 rounded-lg ml-2 mr-2 ml-2" type="submit">Send</button>
</form>
</div>
</div>
</div>
<div class="w-1/4 p-2">
<p class="text-sm">This is a chatbot that uses the GPT-3, Faiss and <a href="https://github.com/hwchase17/langchain >" class="text-blue-500 hover:text-blue-800">Longchain</a> to answer questions</p>
<p class="text-sm">The source code is available on <a href="https://github.com/arc53/docsgpt" class="text-blue-500 hover:text-blue-800">Github</a></p>
</div>
</div>
<script src="{{url_for('static',filename='src/chat.js')}}"></script>
</body>
</html>

40
scripts/ingest_rst.py Normal file
View File

@@ -0,0 +1,40 @@
from pathlib import Path
from langchain.text_splitter import CharacterTextSplitter
import faiss
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
import pickle
import dotenv
import os
dotenv.load_dotenv()
# Here we load in the data in the format that Notion exports it in.
ps = list(Path("pandasdocs/").glob("**/*.rst"))
# parse all child directories
data = []
sources = []
for p in ps:
with open(p) as f:
data.append(f.read())
sources.append(p)
# Here we split the documents, as needed, into smaller chunks.
# We do this due to the context limits of the LLMs.
text_splitter = CharacterTextSplitter(chunk_size=1500, separator="\n")
docs = []
metadatas = []
for i, d in enumerate(data):
splits = text_splitter.split_text(d)
docs.extend(splits)
metadatas.extend([{"source": sources[i]}] * len(splits))
# Here we create a vector store from the documents and save it to disk.
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
faiss.write_index(store.index, "docs.index")
store.index = None
with open("faiss_store.pkl", "wb") as f:
pickle.dump(store, f)