switching between llms

This commit is contained in:
Alex
2023-02-15 18:40:23 +00:00
parent f9fe3f2f48
commit 4d1ff8238d
6 changed files with 76 additions and 52 deletions

View File

@@ -1,56 +1,68 @@
import os
import pickle
import dotenv
import datetime
from flask import Flask, request, render_template
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
import faiss
import dotenv
import requests
from flask import Flask, request, render_template
from langchain import FAISS
from langchain import OpenAI, VectorDBQA, HuggingFaceHub, Cohere
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, HuggingFaceInstructEmbeddings
from langchain.prompts import PromptTemplate
import requests
from langchain.embeddings import OpenAIEmbeddings
# from manifest import Manifest
# from langchain.llms.manifest import ManifestWrapper
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
# manifest = Manifest(
# client_name = "huggingface",
# client_connection = "http://127.0.0.1:5000"
# )
if os.getenv("LLM_NAME") is not None:
llm_choice = os.getenv("LLM_NAME")
else:
llm_choice = "openai"
if os.getenv("EMBEDDINGS_NAME") is not None:
embeddings_choice = os.getenv("EMBEDDINGS_NAME")
else:
embeddings_choice = "openai_text-embedding-ada-002"
if llm_choice == "manifest":
from manifest import Manifest
from langchain.llms.manifest import ManifestWrapper
manifest = Manifest(
client_name="huggingface",
client_connection="http://127.0.0.1:5000"
)
# Redirect PosixPath to WindowsPath on Windows
import platform
if platform.system() == "Windows":
import pathlib
temp = pathlib.PosixPath
pathlib.PosixPath = pathlib.WindowsPath
# loading the .env file
dotenv.load_dotenv()
with open("combine_prompt.txt", "r") as f:
template = f.read()
# check if OPENAI_API_KEY is set
if os.getenv("OPENAI_API_KEY") is not None:
if os.getenv("API_KEY") is not None:
api_key_set = True
else:
api_key_set = False
if os.getenv("EMBEDDINGS_KEY") is not None:
embeddings_key_set = True
else:
embeddings_key_set = False
app = Flask(__name__)
@app.route("/")
def home():
return render_template("index.html", api_key_set=api_key_set)
return render_template("index.html", api_key_set=api_key_set, llm_choice=llm_choice,
embeddings_choice=embeddings_choice)
@app.route("/api/answer", methods=["POST"])
@@ -60,7 +72,14 @@ def api_answer():
if not api_key_set:
api_key = data["api_key"]
else:
api_key = os.getenv("OPENAI_API_KEY")
api_key = os.getenv("API_KEY")
if not embeddings_key_set:
embeddings_key = data["embeddings_key"]
else:
embeddings_key = os.getenv("EMBEDDINGS_KEY")
print(embeddings_key)
print(api_key)
# check if the vectorstore is set
if "active_docs" in data:
@@ -70,24 +89,32 @@ def api_answer():
else:
vectorstore = ""
# loading the index and the store and the prompt template
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=api_key))
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
if embeddings_choice == "openai_text-embedding-ada-002":
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
elif embeddings_choice == "huggingface_hkunlp/instructor-large":
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
elif embeddings_choice == "cohere_medium":
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
# create a prompt template
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template)
# create a chain with the prompt template and the store
#llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
if llm_choice == "openai":
llm = OpenAI(openai_api_key=api_key, temperature=0)
#llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
# llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
elif llm_choice == "manifest":
llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
elif llm_choice == "huggingface":
llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
elif llm_choice == "cohere":
llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
qa_chain = load_qa_chain(llm = llm, chain_type="map_reduce",
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
combine_prompt=c_prompt)
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=2)
# fetch the answer
@@ -105,6 +132,7 @@ def api_answer():
# }
return result
@app.route("/api/docs_check", methods=["POST"])
def check_docs():
# check if docs exist in a vectorstore folder
@@ -130,6 +158,7 @@ def check_docs():
return {"status": 'loaded'}
# handling CORS
@app.after_request
def after_request(response):

BIN
application/index.faiss Normal file

Binary file not shown.

BIN
application/index.pkl Normal file

Binary file not shown.

View File

@@ -25,6 +25,7 @@ if (el) {
body: JSON.stringify({question: message,
api_key: localStorage.getItem('apiKey'),
embeddings_key: localStorage.getItem('apiKey'),
active_docs: localStorage.getItem('activeDocs')}),
})
.then(response => response.json())

View File

@@ -131,15 +131,19 @@ This will return a new DataFrame with all the columns from both tables, and only
var option = document.createElement("option");
if (docsIndex[key].name == docsIndex[key].language) {
option.text = docsIndex[key].name + " " + docsIndex[key].version;
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/";
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
if (docsIndex[key].model == "{{ embeddings_choice }}") {
select.add(option);
}
}
else {
option.text = docsIndex[key].name + " " + docsIndex[key].version;
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/";
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
if (docsIndex[key].model == "{{ embeddings_choice }}") {
select.add(option);
}
}
}
}
{% if not api_key_set %}

View File

@@ -6,6 +6,8 @@ from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
#from langchain.embeddings import HuggingFaceEmbeddings
#from langchain.embeddings import HuggingFaceInstructEmbeddings
#from langchain.embeddings import CohereEmbeddings
from retry import retry
@@ -44,28 +46,16 @@ def call_openai_api(docs, folder_name):
# store = FAISS.from_documents(docs_test, hf)
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format='{l_bar}{bar}| Time Left: {remaining}'):
try:
import time
store_add_texts_with_retry(store, i)
except Exception as e:
print(e)
print("Error on ", i)
print("Saving progress")
print(f"stopped at {c1} out of {len(docs)}")
faiss.write_index(store.index, f"outputs/{folder_name}/docs.index")
store_index_bak = store.index
store.index = None
with open(f"outputs/{folder_name}/faiss_store.pkl", "wb") as f:
pickle.dump(store, f)
print("Sleeping for 60 seconds and trying again")
time.sleep(60)
store.index = store_index_bak
store.add_texts([i.page_content], metadatas=[i.metadata])
store.save_local(f"outputs/{folder_name}")
break
c1 += 1
faiss.write_index(store.index, f"outputs/{folder_name}/docs.index")
store.index = None
with open(f"outputs/{folder_name}/faiss_store.pkl", "wb") as f:
pickle.dump(store, f)
store.save_local(f"outputs/{folder_name}")
def get_user_permission(docs, folder_name):
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.