mirror of
https://github.com/arc53/DocsGPT.git
synced 2026-05-01 14:36:40 +00:00
Merge branch 'main' of https://github.com/arc53/DocsGPT into main
This commit is contained in:
@@ -1,54 +1,88 @@
|
||||
import os
|
||||
import pickle
|
||||
import json
|
||||
|
||||
import dotenv
|
||||
import datetime
|
||||
from flask import Flask, request, render_template
|
||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||
import faiss
|
||||
from langchain import OpenAI
|
||||
from langchain.chains import VectorDBQAWithSourcesChain
|
||||
from langchain.prompts import PromptTemplate
|
||||
import requests
|
||||
from flask import Flask, request, render_template
|
||||
from langchain import FAISS
|
||||
from langchain import OpenAI, VectorDBQA, HuggingFaceHub, Cohere
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, HuggingFaceInstructEmbeddings
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||
|
||||
if os.getenv("LLM_NAME") is not None:
|
||||
llm_choice = os.getenv("LLM_NAME")
|
||||
else:
|
||||
llm_choice = "openai"
|
||||
|
||||
if os.getenv("EMBEDDINGS_NAME") is not None:
|
||||
embeddings_choice = os.getenv("EMBEDDINGS_NAME")
|
||||
else:
|
||||
embeddings_choice = "openai_text-embedding-ada-002"
|
||||
|
||||
|
||||
|
||||
if llm_choice == "manifest":
|
||||
from manifest import Manifest
|
||||
from langchain.llms.manifest import ManifestWrapper
|
||||
|
||||
manifest = Manifest(
|
||||
client_name="huggingface",
|
||||
client_connection="http://127.0.0.1:5000"
|
||||
)
|
||||
|
||||
# Redirect PosixPath to WindowsPath on Windows
|
||||
import platform
|
||||
|
||||
if platform.system() == "Windows":
|
||||
import pathlib
|
||||
|
||||
temp = pathlib.PosixPath
|
||||
pathlib.PosixPath = pathlib.WindowsPath
|
||||
|
||||
# loading the .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
|
||||
with open("combine_prompt.txt", "r") as f:
|
||||
template = f.read()
|
||||
|
||||
# check if OPENAI_API_KEY is set
|
||||
if os.getenv("OPENAI_API_KEY") is not None:
|
||||
api_key_set = True
|
||||
with open("combine_prompt_hist.txt", "r") as f:
|
||||
template_hist = f.read()
|
||||
|
||||
if os.getenv("API_KEY") is not None:
|
||||
api_key_set = True
|
||||
else:
|
||||
api_key_set = False
|
||||
|
||||
|
||||
if os.getenv("EMBEDDINGS_KEY") is not None:
|
||||
embeddings_key_set = True
|
||||
else:
|
||||
embeddings_key_set = False
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def home():
|
||||
return render_template("index.html", api_key_set=api_key_set)
|
||||
return render_template("index.html", api_key_set=api_key_set, llm_choice=llm_choice,
|
||||
embeddings_choice=embeddings_choice)
|
||||
|
||||
|
||||
@app.route("/api/answer", methods=["POST"])
|
||||
def api_answer():
|
||||
data = request.get_json()
|
||||
question = data["question"]
|
||||
history = data["history"]
|
||||
if not api_key_set:
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
api_key = os.getenv("API_KEY")
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = os.getenv("EMBEDDINGS_KEY")
|
||||
|
||||
|
||||
# check if the vectorstore is set
|
||||
if "active_docs" in data:
|
||||
@@ -59,21 +93,44 @@ def api_answer():
|
||||
vectorstore = ""
|
||||
|
||||
# loading the index and the store and the prompt template
|
||||
index = faiss.read_index(f"{vectorstore}docs.index")
|
||||
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
|
||||
if embeddings_choice == "openai_text-embedding-ada-002":
|
||||
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
|
||||
elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
|
||||
elif embeddings_choice == "huggingface_hkunlp/instructor-large":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
|
||||
elif embeddings_choice == "cohere_medium":
|
||||
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
|
||||
|
||||
with open(f"{vectorstore}faiss_store.pkl", "rb") as f:
|
||||
store = pickle.load(f)
|
||||
|
||||
store.index = index
|
||||
# create a prompt template
|
||||
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template)
|
||||
# create a chain with the prompt template and the store
|
||||
if history:
|
||||
history = json.loads(history)
|
||||
template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}", history[1])
|
||||
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp, template_format="jinja2")
|
||||
else:
|
||||
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template, template_format="jinja2")
|
||||
|
||||
if llm_choice == "openai":
|
||||
llm = OpenAI(openai_api_key=api_key, temperature=0)
|
||||
elif llm_choice == "manifest":
|
||||
llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
|
||||
elif llm_choice == "huggingface":
|
||||
llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
|
||||
elif llm_choice == "cohere":
|
||||
llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
|
||||
|
||||
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
|
||||
combine_prompt=c_prompt)
|
||||
|
||||
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=4)
|
||||
|
||||
chain = VectorDBQAWithSourcesChain.from_llm(llm=OpenAI(openai_api_key=api_key, temperature=0), vectorstore=store, combine_prompt=c_prompt)
|
||||
# fetch the answer
|
||||
result = chain({"question": question})
|
||||
result = chain({"query": question})
|
||||
print(result)
|
||||
|
||||
# some formatting for the frontend
|
||||
result['answer'] = result['result']
|
||||
result['answer'] = result['answer'].replace("\\n", "<br>")
|
||||
result['answer'] = result['answer'].replace("SOURCES:", "")
|
||||
# mock result
|
||||
@@ -83,30 +140,33 @@ def api_answer():
|
||||
# }
|
||||
return result
|
||||
|
||||
|
||||
@app.route("/api/docs_check", methods=["POST"])
|
||||
def check_docs():
|
||||
# check if docs exist in a vectorstore folder
|
||||
data = request.get_json()
|
||||
vectorstore = "vectors/" + data["docs"]
|
||||
base_path = 'https://raw.githubusercontent.com/arc53/DocsHUB/main/'
|
||||
#
|
||||
if os.path.exists(vectorstore):
|
||||
if os.path.exists(vectorstore) or data["docs"] == "default":
|
||||
return {"status": 'exists'}
|
||||
else:
|
||||
r = requests.get(base_path + vectorstore + "docs.index")
|
||||
# save to vectors directory
|
||||
# check if the directory exists
|
||||
if not os.path.exists(vectorstore):
|
||||
os.makedirs(vectorstore)
|
||||
r = requests.get(base_path + vectorstore + "index.faiss")
|
||||
|
||||
with open(vectorstore + "docs.index", "wb") as f:
|
||||
f.write(r.content)
|
||||
# download the store
|
||||
r = requests.get(base_path + vectorstore + "faiss_store.pkl")
|
||||
with open(vectorstore + "faiss_store.pkl", "wb") as f:
|
||||
f.write(r.content)
|
||||
if r.status_code != 200:
|
||||
return {"status": 'null'}
|
||||
else:
|
||||
if not os.path.exists(vectorstore):
|
||||
os.makedirs(vectorstore)
|
||||
with open(vectorstore + "index.faiss", "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
# download the store
|
||||
r = requests.get(base_path + vectorstore + "index.pkl")
|
||||
with open(vectorstore + "index.pkl", "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
return {"status": 'loaded'}
|
||||
|
||||
return {"status": 'loaded'}
|
||||
|
||||
# handling CORS
|
||||
@app.after_request
|
||||
|
||||
@@ -20,8 +20,8 @@ Source: 0-pl
|
||||
FINAL ANSWER: You can't eat vegetables using pandas. You can only eat them using your mouth.
|
||||
SOURCES:
|
||||
|
||||
QUESTION: {question}
|
||||
QUESTION: {{ question }}
|
||||
=========
|
||||
{summaries}
|
||||
{{ summaries }}
|
||||
=========
|
||||
FINAL ANSWER:
|
||||
27
application/combine_prompt_hist.txt
Normal file
27
application/combine_prompt_hist.txt
Normal file
@@ -0,0 +1,27 @@
|
||||
You are a DocsGPT bot assistant by Arc53 that provides help with programming libraries. You give thorough answers with code examples.
|
||||
Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
|
||||
ALWAYS return a "SOURCES" part in your answer. You can also remeber things from previous questions and use them in your answer.
|
||||
|
||||
QUESTION: How to merge tables in pandas?
|
||||
=========
|
||||
Content: pandas provides various facilities for easily combining together Series or DataFrame with various kinds of set logic for the indexes and relational algebra functionality in the case of join / merge-type operations.
|
||||
Source: 28-pl
|
||||
Content: pandas provides a single function, merge(), as the entry point for all standard database join operations between DataFrame or named Series objects: \n\npandas.merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None)
|
||||
Source: 30-pl
|
||||
=========
|
||||
FINAL ANSWER: To merge two tables in pandas, you can use the pd.merge() function. The basic syntax is: \n\npd.merge(left, right, on, how) \n\nwhere left and right are the two tables to merge, on is the column to merge on, and how is the type of merge to perform. \n\nFor example, to merge the two tables df1 and df2 on the column 'id', you can use: \n\npd.merge(df1, df2, on='id', how='inner')
|
||||
SOURCES: 28-pl 30-pl
|
||||
|
||||
QUESTION: {{ historyquestion }}
|
||||
=========
|
||||
CONTENT:
|
||||
SOURCE:
|
||||
=========
|
||||
FINAL ANSWER: {{ historyanswer }}
|
||||
SOURCES:
|
||||
|
||||
QUESTION: {{ question }}
|
||||
=========
|
||||
{{ summaries }}
|
||||
=========
|
||||
FINAL ANSWER:
|
||||
Binary file not shown.
Binary file not shown.
BIN
application/index.faiss
Normal file
BIN
application/index.faiss
Normal file
Binary file not shown.
BIN
application/index.pkl
Normal file
BIN
application/index.pkl
Normal file
Binary file not shown.
@@ -45,6 +45,7 @@ pytz==2022.7.1
|
||||
PyYAML==6.0
|
||||
regex==2022.10.31
|
||||
requests==2.28.2
|
||||
retry==0.9.2
|
||||
six==1.16.0
|
||||
snowballstemmer==2.2.0
|
||||
Sphinx==6.1.3
|
||||
@@ -60,10 +61,11 @@ tiktoken==0.1.2
|
||||
tokenizers==0.13.2
|
||||
tqdm==4.64.1
|
||||
transformers==4.26.0
|
||||
typer==0.7.0
|
||||
typing-inspect==0.8.0
|
||||
typing_extensions==4.4.0
|
||||
unstructured==0.4.8
|
||||
urllib3==1.26.14
|
||||
Werkzeug==2.2.2
|
||||
Werkzeug==2.2.3
|
||||
XlsxWriter==3.0.8
|
||||
yarl==1.8.2
|
||||
|
||||
@@ -25,6 +25,8 @@ if (el) {
|
||||
|
||||
body: JSON.stringify({question: message,
|
||||
api_key: localStorage.getItem('apiKey'),
|
||||
embeddings_key: localStorage.getItem('apiKey'),
|
||||
history: localStorage.getItem('chatHistory'),
|
||||
active_docs: localStorage.getItem('activeDocs')}),
|
||||
})
|
||||
.then(response => response.json())
|
||||
@@ -38,9 +40,12 @@ if (el) {
|
||||
chatWindow.scrollTop = chatWindow.scrollHeight;
|
||||
document.getElementById("button-submit").innerHTML = 'Send';
|
||||
document.getElementById("button-submit").disabled = false;
|
||||
let chatHistory = [message, data.answer];
|
||||
localStorage.setItem('chatHistory', JSON.stringify(chatHistory));
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error('Error:', error);
|
||||
console.log(error);
|
||||
document.getElementById("button-submit").innerHTML = 'Send';
|
||||
document.getElementById("button-submit").disabled = false;
|
||||
});
|
||||
|
||||
@@ -131,13 +131,17 @@ This will return a new DataFrame with all the columns from both tables, and only
|
||||
var option = document.createElement("option");
|
||||
if (docsIndex[key].name == docsIndex[key].language) {
|
||||
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
||||
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/";
|
||||
select.add(option);
|
||||
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
|
||||
if (docsIndex[key].model == "{{ embeddings_choice }}") {
|
||||
select.add(option);
|
||||
}
|
||||
}
|
||||
else {
|
||||
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
||||
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/";
|
||||
select.add(option);
|
||||
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
|
||||
if (docsIndex[key].model == "{{ embeddings_choice }}") {
|
||||
select.add(option);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user