mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 00:23:17 +00:00
init2
This commit is contained in:
1
application/.env_sample
Normal file
1
application/.env_sample
Normal file
@@ -0,0 +1 @@
|
||||
OPENAI_API_KEY=your_api_key
|
||||
11
application/Dockerfile
Normal file
11
application/Dockerfile
Normal file
@@ -0,0 +1,11 @@
|
||||
FROM python:3.9
|
||||
|
||||
WORKDIR /app
|
||||
COPY . /app
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
ENV FLASK_APP=app.py
|
||||
ENV FLASK_ENV=development
|
||||
|
||||
EXPOSE 5000
|
||||
|
||||
CMD ["flask", "run", "--host=0.0.0.0"]
|
||||
61
application/app.py
Normal file
61
application/app.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import os
|
||||
import pickle
|
||||
import dotenv
|
||||
import datetime
|
||||
from flask import Flask, request, render_template
|
||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||
import faiss
|
||||
from langchain import OpenAI
|
||||
from langchain.chains import VectorDBQAWithSourcesChain
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
# loading the .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# loading the index and the store and the prompt template
|
||||
index = faiss.read_index("docs.index")
|
||||
with open("combine_prompt.txt", "r") as f:
|
||||
template = f.read()
|
||||
|
||||
with open("faiss_store.pkl", "rb") as f:
|
||||
store = pickle.load(f)
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def home():
|
||||
return render_template("index.html")
|
||||
|
||||
|
||||
@app.route("/api/answer", methods=["POST"])
|
||||
def api_answer():
|
||||
data = request.get_json()
|
||||
question = data["question"]
|
||||
|
||||
store.index = index
|
||||
# create a prompt template
|
||||
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template)
|
||||
# create a chain with the prompt template and the store
|
||||
chain = VectorDBQAWithSourcesChain.from_llm(llm=OpenAI(temperature=0), vectorstore=store, combine_prompt=c_prompt)
|
||||
# fetch the answer
|
||||
result = chain({"question": question})
|
||||
|
||||
# some formatting for the frontend
|
||||
result['answer'] = result['answer'].replace("\\n", "<br>")
|
||||
result['answer'] = result['answer'].replace("SOURCES:", "")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# handling CORS
|
||||
@app.after_request
|
||||
def after_request(response):
|
||||
response.headers.add('Access-Control-Allow-Origin', '*')
|
||||
response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
|
||||
response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
|
||||
return response
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True)
|
||||
27
application/combine_prompt.txt
Normal file
27
application/combine_prompt.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
You are a DocsGPT bot assistant by Arc53 that provides help with programming libraries. You give thorough answers with code examples.
|
||||
Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
|
||||
ALWAYS return a "SOURCES" part in your answer.
|
||||
|
||||
QUESTION: How to merge tables in pandas?
|
||||
=========
|
||||
Content: pandas provides various facilities for easily combining together Series or DataFrame with various kinds of set logic for the indexes and relational algebra functionality in the case of join / merge-type operations.
|
||||
Source: 28-pl
|
||||
Content: pandas provides a single function, merge(), as the entry point for all standard database join operations between DataFrame or named Series objects: \n\npandas.merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None)
|
||||
Source: 30-pl
|
||||
=========
|
||||
FINAL ANSWER: To merge two tables in pandas, you can use the pd.merge() function. The basic syntax is: \n\npd.merge(left, right, on, how) \n\nwhere left and right are the two tables to merge, on is the column to merge on, and how is the type of merge to perform. \n\nFor example, to merge the two tables df1 and df2 on the column 'id', you can use: \n\npd.merge(df1, df2, on='id', how='inner')
|
||||
SOURCES: 28-pl 30-pl
|
||||
|
||||
QUESTION: How to eat vegetables using pandas?
|
||||
=========
|
||||
Content: ExtensionArray.repeat(repeats, axis=None) Returns a new ExtensionArray where each element of the current ExtensionArray is repeated consecutively a given number of times. \n\nParameters: repeats int or array of ints. The number of repetitions for each element. This should be a positive integer. Repeating 0 times will return an empty array. axis (0 or ‘index’, 1 or ‘columns’), default 0 The axis along which to repeat values. Currently only axis=0 is supported.
|
||||
Source: 0-pl
|
||||
=========
|
||||
FINAL ANSWER: You can't eat vegetables using pandas. You can only eat them using your mouth.
|
||||
SOURCES:
|
||||
|
||||
QUESTION: {question}
|
||||
=========
|
||||
{summaries}
|
||||
=========
|
||||
FINAL ANSWER:
|
||||
BIN
application/docs.index
Normal file
BIN
application/docs.index
Normal file
Binary file not shown.
BIN
application/faiss_store.pkl
Normal file
BIN
application/faiss_store.pkl
Normal file
Binary file not shown.
1331
application/package-lock.json
generated
Normal file
1331
application/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
5
application/package.json
Normal file
5
application/package.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"devDependencies": {
|
||||
"tailwindcss": "^3.2.4"
|
||||
}
|
||||
}
|
||||
34
application/requirements.txt
Normal file
34
application/requirements.txt
Normal file
@@ -0,0 +1,34 @@
|
||||
aiohttp==3.8.3
|
||||
aiosignal==1.3.1
|
||||
async-timeout==4.0.2
|
||||
attrs==22.2.0
|
||||
charset-normalizer==2.1.1
|
||||
click==8.1.3
|
||||
dataclasses-json==0.5.7
|
||||
faiss-cpu==1.7.3
|
||||
Flask==2.2.2
|
||||
frozenlist==1.3.3
|
||||
greenlet==2.0.2
|
||||
idna==3.4
|
||||
itsdangerous==2.1.2
|
||||
Jinja2==3.1.2
|
||||
langchain==0.0.76
|
||||
MarkupSafe==2.1.2
|
||||
marshmallow==3.19.0
|
||||
marshmallow-enum==1.5.1
|
||||
multidict==6.0.4
|
||||
mypy-extensions==0.4.3
|
||||
numpy==1.24.1
|
||||
openai==0.26.4
|
||||
packaging==23.0
|
||||
pydantic==1.10.4
|
||||
python-dotenv==0.21.1
|
||||
PyYAML==6.0
|
||||
requests==2.28.2
|
||||
SQLAlchemy==1.4.46
|
||||
tqdm==4.64.1
|
||||
typing-inspect==0.8.0
|
||||
typing_extensions==4.4.0
|
||||
urllib3==1.26.14
|
||||
Werkzeug==2.2.2
|
||||
yarl==1.8.2
|
||||
44
application/static/src/chat.js
Normal file
44
application/static/src/chat.js
Normal file
@@ -0,0 +1,44 @@
|
||||
var el = document.getElementById('message-form');
|
||||
if (el) {
|
||||
el.addEventListener("submit", function (event) {
|
||||
console.log("submitting")
|
||||
event.preventDefault()
|
||||
var message = document.getElementById("message-input").value;
|
||||
msg_html = '<div class="bg-blue-500 text-white p-2 rounded-lg mb-2 self-end"><p class="text-sm">'
|
||||
msg_html += message
|
||||
msg_html += '</p></div>'
|
||||
document.getElementById("messages").innerHTML += msg_html;
|
||||
let chatWindow = document.getElementById("chat-container");
|
||||
chatWindow.scrollTop = chatWindow.scrollHeight;
|
||||
document.getElementById("message-input").value = "";
|
||||
document.getElementById("button-submit").innerHTML = '<i class="fa fa-circle-o-notch fa-spin"></i> Thinking...';
|
||||
document.getElementById("button-submit").disabled = true;
|
||||
|
||||
fetch('/api/answer', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({question: message}),
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
console.log('Success:', data);
|
||||
msg_html = '<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start"><code class="text-sm">'
|
||||
msg_html += data.answer
|
||||
msg_html += '</code></div>'
|
||||
document.getElementById("messages").innerHTML += msg_html;
|
||||
let chatWindow = document.getElementById("chat-container");
|
||||
chatWindow.scrollTop = chatWindow.scrollHeight;
|
||||
document.getElementById("button-submit").innerHTML = 'Send';
|
||||
document.getElementById("button-submit").disabled = false;
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Error:', error);
|
||||
document.getElementById("button-submit").innerHTML = 'Send';
|
||||
document.getElementById("button-submit").disabled = false;
|
||||
});
|
||||
|
||||
|
||||
});
|
||||
}
|
||||
56
application/static/src/input.css
Normal file
56
application/static/src/input.css
Normal file
@@ -0,0 +1,56 @@
|
||||
@tailwind base;
|
||||
@tailwind components;
|
||||
@tailwind utilities;
|
||||
|
||||
#chat-container {
|
||||
height: 44rem;
|
||||
background-color: white;
|
||||
padding: 10px;
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
|
||||
|
||||
.bg-gray-200 {
|
||||
background-color: #edf2f7;
|
||||
}
|
||||
|
||||
.bg-gray-900 {
|
||||
background-color: #1a202c;
|
||||
}
|
||||
|
||||
.rounded-lg {
|
||||
border-radius: 0.5rem;
|
||||
}
|
||||
|
||||
.shadow {
|
||||
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12), 0 1px 2px rgba(0, 0, 0, 0.24);
|
||||
}
|
||||
|
||||
.text-gray-700 {
|
||||
color: #4a5568;
|
||||
}
|
||||
|
||||
.text-sm {
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.p-4 {
|
||||
padding: 1.5rem;
|
||||
}
|
||||
|
||||
.loader {
|
||||
border: 16px solid #f3f3f3; /* Light grey */
|
||||
border-top: 16px solid #3498db; /* Blue */
|
||||
border-radius: 50%;
|
||||
width: 120px;
|
||||
height: 120px;
|
||||
animation: spin 2s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
0% { transform: rotate(0deg); }
|
||||
100% { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
|
||||
8
application/tailwind.config.js
Normal file
8
application/tailwind.config.js
Normal file
@@ -0,0 +1,8 @@
|
||||
/** @type {import('tailwindcss').Config} */
|
||||
module.exports = {
|
||||
content: ["./templates/**/*.html", "./static/src/**/*.js"],
|
||||
theme: {
|
||||
extend: {},
|
||||
},
|
||||
plugins: [],
|
||||
}
|
||||
65
application/templates/index.html
Normal file
65
application/templates/index.html
Normal file
@@ -0,0 +1,65 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>DocsGPT 🦖 Preview</title>
|
||||
<link href="{{url_for('static',filename='dist/css/output.css')}}" rel="stylesheet">
|
||||
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
|
||||
|
||||
|
||||
</head>
|
||||
<body>
|
||||
<header class="bg-white p-2 flex justify-between items-center">
|
||||
<h1 class="text-lg font-medium">DocsGPT</h1>
|
||||
<a href="https://github.com/arc53/docsgpt" class="text-blue-500 hover:text-blue-800">About</a>
|
||||
</header>
|
||||
<div class="flex">
|
||||
<div class="w-3/4">
|
||||
<div class="w-full flex flex-col">
|
||||
<div id="chat-container">
|
||||
|
||||
<div id="messages" class="w-full flex flex-col" >
|
||||
<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
|
||||
<p class="text-sm">Hello, ask me anything about this library. Im here to help</p>
|
||||
</div>
|
||||
<div class="bg-blue-500 text-white p-2 rounded-lg mb-2 self-end">
|
||||
<p class="text-sm">How to create API key for Api gateway?</p>
|
||||
</div>
|
||||
<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
|
||||
<p class="text-sm">Import the boto3 library and create a client for the API Gateway service:</p>
|
||||
|
||||
|
||||
</div>
|
||||
<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
|
||||
<code class="text-sm">client = boto3.client('apigateway')</code>
|
||||
|
||||
|
||||
</div>
|
||||
<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
|
||||
<p class="text-sm">Create an API key:</p>
|
||||
|
||||
</div>
|
||||
<div class="bg-indigo-500 text-white p-2 rounded-lg mb-2 self-start">
|
||||
<code class="text-sm">response = client.create_api_key(<br>name='API_KEY_NAME',<br>description='API key description',<br>enabled=True)<br>api_key = response['value']</code>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class=" flex mt-4 mb-2">
|
||||
<form id="message-form">
|
||||
<input id="message-input" class="bg-white p-2 rounded-lg ml-2 w-[46rem]" type="text" placeholder="Type your message here...">
|
||||
<button id="button-submit" class="bg-blue-500 text-white p-2 rounded-lg ml-2 mr-2 ml-2" type="submit">Send</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<div class="w-1/4 p-2">
|
||||
<p class="text-sm">This is a chatbot that uses the GPT-3, Faiss and <a href="https://github.com/hwchase17/langchain >" class="text-blue-500 hover:text-blue-800">Longchain</a> to answer questions</p>
|
||||
<p class="text-sm">The source code is available on <a href="https://github.com/arc53/docsgpt" class="text-blue-500 hover:text-blue-800">Github</a></p>
|
||||
</div>
|
||||
</div>
|
||||
<script src="{{url_for('static',filename='src/chat.js')}}"></script>
|
||||
</body>
|
||||
</html>
|
||||
40
scripts/ingest_rst.py
Normal file
40
scripts/ingest_rst.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from pathlib import Path
|
||||
from langchain.text_splitter import CharacterTextSplitter
|
||||
import faiss
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
import pickle
|
||||
import dotenv
|
||||
import os
|
||||
|
||||
dotenv.load_dotenv()
|
||||
|
||||
|
||||
# Here we load in the data in the format that Notion exports it in.
|
||||
ps = list(Path("pandasdocs/").glob("**/*.rst"))
|
||||
# parse all child directories
|
||||
|
||||
data = []
|
||||
sources = []
|
||||
for p in ps:
|
||||
with open(p) as f:
|
||||
data.append(f.read())
|
||||
sources.append(p)
|
||||
|
||||
# Here we split the documents, as needed, into smaller chunks.
|
||||
# We do this due to the context limits of the LLMs.
|
||||
text_splitter = CharacterTextSplitter(chunk_size=1500, separator="\n")
|
||||
docs = []
|
||||
metadatas = []
|
||||
for i, d in enumerate(data):
|
||||
splits = text_splitter.split_text(d)
|
||||
docs.extend(splits)
|
||||
metadatas.extend([{"source": sources[i]}] * len(splits))
|
||||
|
||||
|
||||
# Here we create a vector store from the documents and save it to disk.
|
||||
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
|
||||
faiss.write_index(store.index, "docs.index")
|
||||
store.index = None
|
||||
with open("faiss_store.pkl", "wb") as f:
|
||||
pickle.dump(store, f)
|
||||
Reference in New Issue
Block a user