mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
switching between llms
This commit is contained in:
@@ -1,56 +1,68 @@
|
|||||||
import os
|
import os
|
||||||
import pickle
|
|
||||||
import dotenv
|
|
||||||
import datetime
|
|
||||||
from flask import Flask, request, render_template
|
|
||||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
|
||||||
import faiss
|
|
||||||
|
|
||||||
|
import dotenv
|
||||||
|
import requests
|
||||||
|
from flask import Flask, request, render_template
|
||||||
from langchain import FAISS
|
from langchain import FAISS
|
||||||
from langchain import OpenAI, VectorDBQA, HuggingFaceHub, Cohere
|
from langchain import OpenAI, VectorDBQA, HuggingFaceHub, Cohere
|
||||||
from langchain.chains.question_answering import load_qa_chain
|
from langchain.chains.question_answering import load_qa_chain
|
||||||
|
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, HuggingFaceInstructEmbeddings
|
||||||
from langchain.prompts import PromptTemplate
|
from langchain.prompts import PromptTemplate
|
||||||
import requests
|
|
||||||
from langchain.embeddings import OpenAIEmbeddings
|
|
||||||
|
|
||||||
# from manifest import Manifest
|
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||||
# from langchain.llms.manifest import ManifestWrapper
|
|
||||||
|
|
||||||
# manifest = Manifest(
|
if os.getenv("LLM_NAME") is not None:
|
||||||
# client_name = "huggingface",
|
llm_choice = os.getenv("LLM_NAME")
|
||||||
# client_connection = "http://127.0.0.1:5000"
|
else:
|
||||||
# )
|
llm_choice = "openai"
|
||||||
|
|
||||||
|
if os.getenv("EMBEDDINGS_NAME") is not None:
|
||||||
|
embeddings_choice = os.getenv("EMBEDDINGS_NAME")
|
||||||
|
else:
|
||||||
|
embeddings_choice = "openai_text-embedding-ada-002"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if llm_choice == "manifest":
|
||||||
|
from manifest import Manifest
|
||||||
|
from langchain.llms.manifest import ManifestWrapper
|
||||||
|
|
||||||
|
manifest = Manifest(
|
||||||
|
client_name="huggingface",
|
||||||
|
client_connection="http://127.0.0.1:5000"
|
||||||
|
)
|
||||||
|
|
||||||
# Redirect PosixPath to WindowsPath on Windows
|
# Redirect PosixPath to WindowsPath on Windows
|
||||||
import platform
|
import platform
|
||||||
|
|
||||||
if platform.system() == "Windows":
|
if platform.system() == "Windows":
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
temp = pathlib.PosixPath
|
temp = pathlib.PosixPath
|
||||||
pathlib.PosixPath = pathlib.WindowsPath
|
pathlib.PosixPath = pathlib.WindowsPath
|
||||||
|
|
||||||
# loading the .env file
|
# loading the .env file
|
||||||
dotenv.load_dotenv()
|
dotenv.load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
with open("combine_prompt.txt", "r") as f:
|
with open("combine_prompt.txt", "r") as f:
|
||||||
template = f.read()
|
template = f.read()
|
||||||
|
|
||||||
# check if OPENAI_API_KEY is set
|
if os.getenv("API_KEY") is not None:
|
||||||
if os.getenv("OPENAI_API_KEY") is not None:
|
|
||||||
api_key_set = True
|
api_key_set = True
|
||||||
|
|
||||||
else:
|
else:
|
||||||
api_key_set = False
|
api_key_set = False
|
||||||
|
if os.getenv("EMBEDDINGS_KEY") is not None:
|
||||||
|
embeddings_key_set = True
|
||||||
|
else:
|
||||||
|
embeddings_key_set = False
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def home():
|
def home():
|
||||||
return render_template("index.html", api_key_set=api_key_set)
|
return render_template("index.html", api_key_set=api_key_set, llm_choice=llm_choice,
|
||||||
|
embeddings_choice=embeddings_choice)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/answer", methods=["POST"])
|
@app.route("/api/answer", methods=["POST"])
|
||||||
@@ -60,7 +72,14 @@ def api_answer():
|
|||||||
if not api_key_set:
|
if not api_key_set:
|
||||||
api_key = data["api_key"]
|
api_key = data["api_key"]
|
||||||
else:
|
else:
|
||||||
api_key = os.getenv("OPENAI_API_KEY")
|
api_key = os.getenv("API_KEY")
|
||||||
|
if not embeddings_key_set:
|
||||||
|
embeddings_key = data["embeddings_key"]
|
||||||
|
else:
|
||||||
|
embeddings_key = os.getenv("EMBEDDINGS_KEY")
|
||||||
|
|
||||||
|
print(embeddings_key)
|
||||||
|
print(api_key)
|
||||||
|
|
||||||
# check if the vectorstore is set
|
# check if the vectorstore is set
|
||||||
if "active_docs" in data:
|
if "active_docs" in data:
|
||||||
@@ -70,24 +89,32 @@ def api_answer():
|
|||||||
else:
|
else:
|
||||||
vectorstore = ""
|
vectorstore = ""
|
||||||
|
|
||||||
|
|
||||||
# loading the index and the store and the prompt template
|
# loading the index and the store and the prompt template
|
||||||
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=api_key))
|
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
|
||||||
|
if embeddings_choice == "openai_text-embedding-ada-002":
|
||||||
|
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
|
||||||
|
elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||||
|
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
|
||||||
|
elif embeddings_choice == "huggingface_hkunlp/instructor-large":
|
||||||
|
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
|
||||||
|
elif embeddings_choice == "cohere_medium":
|
||||||
|
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
|
||||||
|
|
||||||
# create a prompt template
|
# create a prompt template
|
||||||
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template)
|
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template)
|
||||||
# create a chain with the prompt template and the store
|
|
||||||
|
|
||||||
|
if llm_choice == "openai":
|
||||||
#llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
|
|
||||||
llm = OpenAI(openai_api_key=api_key, temperature=0)
|
llm = OpenAI(openai_api_key=api_key, temperature=0)
|
||||||
#llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
|
elif llm_choice == "manifest":
|
||||||
# llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
|
llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
|
||||||
|
elif llm_choice == "huggingface":
|
||||||
|
llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
|
||||||
|
elif llm_choice == "cohere":
|
||||||
|
llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
|
||||||
|
|
||||||
qa_chain = load_qa_chain(llm = llm, chain_type="map_reduce",
|
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
|
||||||
combine_prompt=c_prompt)
|
combine_prompt=c_prompt)
|
||||||
|
|
||||||
|
|
||||||
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=2)
|
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=2)
|
||||||
|
|
||||||
# fetch the answer
|
# fetch the answer
|
||||||
@@ -105,6 +132,7 @@ def api_answer():
|
|||||||
# }
|
# }
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api/docs_check", methods=["POST"])
|
@app.route("/api/docs_check", methods=["POST"])
|
||||||
def check_docs():
|
def check_docs():
|
||||||
# check if docs exist in a vectorstore folder
|
# check if docs exist in a vectorstore folder
|
||||||
@@ -130,6 +158,7 @@ def check_docs():
|
|||||||
|
|
||||||
return {"status": 'loaded'}
|
return {"status": 'loaded'}
|
||||||
|
|
||||||
|
|
||||||
# handling CORS
|
# handling CORS
|
||||||
@app.after_request
|
@app.after_request
|
||||||
def after_request(response):
|
def after_request(response):
|
||||||
|
|||||||
BIN
application/index.faiss
Normal file
BIN
application/index.faiss
Normal file
Binary file not shown.
BIN
application/index.pkl
Normal file
BIN
application/index.pkl
Normal file
Binary file not shown.
@@ -25,6 +25,7 @@ if (el) {
|
|||||||
|
|
||||||
body: JSON.stringify({question: message,
|
body: JSON.stringify({question: message,
|
||||||
api_key: localStorage.getItem('apiKey'),
|
api_key: localStorage.getItem('apiKey'),
|
||||||
|
embeddings_key: localStorage.getItem('apiKey'),
|
||||||
active_docs: localStorage.getItem('activeDocs')}),
|
active_docs: localStorage.getItem('activeDocs')}),
|
||||||
})
|
})
|
||||||
.then(response => response.json())
|
.then(response => response.json())
|
||||||
|
|||||||
@@ -131,15 +131,19 @@ This will return a new DataFrame with all the columns from both tables, and only
|
|||||||
var option = document.createElement("option");
|
var option = document.createElement("option");
|
||||||
if (docsIndex[key].name == docsIndex[key].language) {
|
if (docsIndex[key].name == docsIndex[key].language) {
|
||||||
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
||||||
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/";
|
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
|
||||||
|
if (docsIndex[key].model == "{{ embeddings_choice }}") {
|
||||||
select.add(option);
|
select.add(option);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
||||||
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/";
|
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
|
||||||
|
if (docsIndex[key].model == "{{ embeddings_choice }}") {
|
||||||
select.add(option);
|
select.add(option);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
{% if not api_key_set %}
|
{% if not api_key_set %}
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ from langchain.vectorstores import FAISS
|
|||||||
from langchain.embeddings import OpenAIEmbeddings
|
from langchain.embeddings import OpenAIEmbeddings
|
||||||
|
|
||||||
#from langchain.embeddings import HuggingFaceEmbeddings
|
#from langchain.embeddings import HuggingFaceEmbeddings
|
||||||
|
#from langchain.embeddings import HuggingFaceInstructEmbeddings
|
||||||
|
#from langchain.embeddings import CohereEmbeddings
|
||||||
|
|
||||||
from retry import retry
|
from retry import retry
|
||||||
|
|
||||||
@@ -44,28 +46,16 @@ def call_openai_api(docs, folder_name):
|
|||||||
# store = FAISS.from_documents(docs_test, hf)
|
# store = FAISS.from_documents(docs_test, hf)
|
||||||
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
||||||
try:
|
try:
|
||||||
import time
|
|
||||||
store_add_texts_with_retry(store, i)
|
store_add_texts_with_retry(store, i)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
print("Error on ", i)
|
print("Error on ", i)
|
||||||
print("Saving progress")
|
print("Saving progress")
|
||||||
print(f"stopped at {c1} out of {len(docs)}")
|
print(f"stopped at {c1} out of {len(docs)}")
|
||||||
faiss.write_index(store.index, f"outputs/{folder_name}/docs.index")
|
store.save_local(f"outputs/{folder_name}")
|
||||||
store_index_bak = store.index
|
break
|
||||||
store.index = None
|
|
||||||
with open(f"outputs/{folder_name}/faiss_store.pkl", "wb") as f:
|
|
||||||
pickle.dump(store, f)
|
|
||||||
print("Sleeping for 60 seconds and trying again")
|
|
||||||
time.sleep(60)
|
|
||||||
store.index = store_index_bak
|
|
||||||
store.add_texts([i.page_content], metadatas=[i.metadata])
|
|
||||||
c1 += 1
|
c1 += 1
|
||||||
|
store.save_local(f"outputs/{folder_name}")
|
||||||
faiss.write_index(store.index, f"outputs/{folder_name}/docs.index")
|
|
||||||
store.index = None
|
|
||||||
with open(f"outputs/{folder_name}/faiss_store.pkl", "wb") as f:
|
|
||||||
pickle.dump(store, f)
|
|
||||||
|
|
||||||
def get_user_permission(docs, folder_name):
|
def get_user_permission(docs, folder_name):
|
||||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||||
|
|||||||
Reference in New Issue
Block a user