mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
Merge branch 'main' into feat/analytics-and-logs
This commit is contained in:
@@ -9,6 +9,7 @@ import traceback
|
||||
|
||||
from pymongo import MongoClient
|
||||
from bson.objectid import ObjectId
|
||||
from bson.dbref import DBRef
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
@@ -20,7 +21,7 @@ logger = logging.getLogger(__name__)
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
db = mongo["docsgpt"]
|
||||
conversations_collection = db["conversations"]
|
||||
vectors_collection = db["vectors"]
|
||||
sources_collection = db["sources"]
|
||||
prompts_collection = db["prompts"]
|
||||
api_key_collection = db["api_keys"]
|
||||
user_logs_collection = db["user_logs"]
|
||||
@@ -37,9 +38,7 @@ if settings.MODEL_NAME: # in case there is particular model name configured
|
||||
gpt_model = settings.MODEL_NAME
|
||||
|
||||
# load the prompts
|
||||
current_dir = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
|
||||
chat_combine_template = f.read()
|
||||
|
||||
@@ -75,35 +74,34 @@ def run_async_chain(chain, question, chat_history):
|
||||
|
||||
def get_data_from_api_key(api_key):
|
||||
data = api_key_collection.find_one({"key": api_key})
|
||||
|
||||
# # Raise custom exception if the API key is not found
|
||||
if data is None:
|
||||
raise Exception("Invalid API Key, please generate new key", 401)
|
||||
|
||||
if "retriever" not in data:
|
||||
data["retriever"] = None
|
||||
|
||||
if "source" in data and isinstance(data["source"], DBRef):
|
||||
source_doc = db.dereference(data["source"])
|
||||
data["source"] = str(source_doc["_id"])
|
||||
if "retriever" in source_doc:
|
||||
data["retriever"] = source_doc["retriever"]
|
||||
else:
|
||||
data["source"] = {}
|
||||
return data
|
||||
|
||||
|
||||
def get_vectorstore(data):
|
||||
if "active_docs" in data:
|
||||
if data["active_docs"].split("/")[0] == "default":
|
||||
vectorstore = ""
|
||||
elif data["active_docs"].split("/")[0] == "local":
|
||||
vectorstore = "indexes/" + data["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + data["active_docs"]
|
||||
if data["active_docs"] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
vectorstore = os.path.join("application", vectorstore)
|
||||
return vectorstore
|
||||
def get_retriever(source_id: str):
|
||||
doc = sources_collection.find_one({"_id": ObjectId(source_id)})
|
||||
if doc is None:
|
||||
raise Exception("Source document does not exist", 404)
|
||||
retriever_name = None if "retriever" not in doc else doc["retriever"]
|
||||
return retriever_name
|
||||
|
||||
|
||||
|
||||
def is_azure_configured():
|
||||
return (
|
||||
settings.OPENAI_API_BASE
|
||||
and settings.OPENAI_API_VERSION
|
||||
and settings.AZURE_DEPLOYMENT_NAME
|
||||
)
|
||||
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
|
||||
|
||||
|
||||
def save_conversation(conversation_id, question, response, source_log_docs, llm):
|
||||
@@ -263,29 +261,29 @@ def stream():
|
||||
else:
|
||||
token_limit = settings.DEFAULT_MAX_HISTORY
|
||||
|
||||
# check if active_docs or api_key is set
|
||||
## retriever can be "brave_search, duckduck_search or classic"
|
||||
retriever_name = data["retriever"] if "retriever" in data else "classic"
|
||||
|
||||
# check if active_docs or api_key is set
|
||||
if "api_key" in data:
|
||||
data_key = get_data_from_api_key(data["api_key"])
|
||||
chunks = int(data_key["chunks"])
|
||||
prompt_id = data_key["prompt_id"]
|
||||
source = {"active_docs": data_key["source"]}
|
||||
retriever_name = data_key["retriever"] or retriever_name
|
||||
user_api_key = data["api_key"]
|
||||
|
||||
elif "active_docs" in data:
|
||||
source = {"active_docs": data["active_docs"]}
|
||||
source = {"active_docs" : data["active_docs"]}
|
||||
retriever_name = get_retriever(data["active_docs"]) or retriever_name
|
||||
user_api_key = None
|
||||
|
||||
else:
|
||||
source = {}
|
||||
user_api_key = None
|
||||
|
||||
if source["active_docs"].split("/")[0] in ["default", "local"]:
|
||||
retriever_name = "classic"
|
||||
else:
|
||||
retriever_name = source["active_docs"]
|
||||
|
||||
current_app.logger.info(
|
||||
f"/stream - request_data: {data}, source: {source}",
|
||||
extra={"data": json.dumps({"request_data": data, "source": source})},
|
||||
current_app.logger.info(f"/stream - request_data: {data}, source: {source}",
|
||||
extra={"data": json.dumps({"request_data": data, "source": source})}
|
||||
)
|
||||
|
||||
prompt = get_prompt(prompt_id)
|
||||
@@ -369,6 +367,10 @@ def api_answer():
|
||||
else:
|
||||
token_limit = settings.DEFAULT_MAX_HISTORY
|
||||
|
||||
## retriever can be brave_search, duckduck_search or classic
|
||||
retriever_name = data["retriever"] if "retriever" in data else "classic"
|
||||
|
||||
# use try and except to check for exception
|
||||
try:
|
||||
# check if the vectorstore is set
|
||||
if "api_key" in data:
|
||||
@@ -376,15 +378,15 @@ def api_answer():
|
||||
chunks = int(data_key["chunks"])
|
||||
prompt_id = data_key["prompt_id"]
|
||||
source = {"active_docs": data_key["source"]}
|
||||
retriever_name = data_key["retriever"] or retriever_name
|
||||
user_api_key = data["api_key"]
|
||||
else:
|
||||
source = data
|
||||
elif "active_docs" in data:
|
||||
source = {"active_docs":data["active_docs"]}
|
||||
retriever_name = get_retriever(data["active_docs"]) or retriever_name
|
||||
user_api_key = None
|
||||
|
||||
if source["active_docs"].split("/")[0] in ["default", "local"]:
|
||||
retriever_name = "classic"
|
||||
else:
|
||||
retriever_name = source["active_docs"]
|
||||
source = {}
|
||||
user_api_key = None
|
||||
|
||||
prompt = get_prompt(prompt_id)
|
||||
|
||||
@@ -421,8 +423,8 @@ def api_answer():
|
||||
)
|
||||
|
||||
result = {"answer": response_full, "sources": source_log_docs}
|
||||
result["conversation_id"] = save_conversation(
|
||||
conversation_id, question, response_full, source_log_docs, llm
|
||||
result["conversation_id"] = str(
|
||||
save_conversation(conversation_id, question, response_full, source_log_docs, llm)
|
||||
)
|
||||
retriever_params = retriever.get_params()
|
||||
user_logs_collection.insert_one(
|
||||
@@ -459,19 +461,19 @@ def api_search():
|
||||
if "api_key" in data:
|
||||
data_key = get_data_from_api_key(data["api_key"])
|
||||
chunks = int(data_key["chunks"])
|
||||
source = {"active_docs": data_key["source"]}
|
||||
user_api_key = data["api_key"]
|
||||
source = {"active_docs":data_key["source"]}
|
||||
user_api_key = data_key["api_key"]
|
||||
elif "active_docs" in data:
|
||||
source = {"active_docs": data["active_docs"]}
|
||||
source = {"active_docs":data["active_docs"]}
|
||||
user_api_key = None
|
||||
else:
|
||||
source = {}
|
||||
user_api_key = None
|
||||
|
||||
if source["active_docs"].split("/")[0] in ["default", "local"]:
|
||||
retriever_name = "classic"
|
||||
if "retriever" in data:
|
||||
retriever_name = data["retriever"]
|
||||
else:
|
||||
retriever_name = source["active_docs"]
|
||||
retriever_name = "classic"
|
||||
if "token_limit" in data:
|
||||
token_limit = data["token_limit"]
|
||||
else:
|
||||
|
||||
@@ -3,13 +3,13 @@ import datetime
|
||||
from flask import Blueprint, request, send_from_directory
|
||||
from pymongo import MongoClient
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
from application.core.settings import settings
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
db = mongo["docsgpt"]
|
||||
conversations_collection = db["conversations"]
|
||||
vectors_collection = db["vectors"]
|
||||
sources_collection = db["sources"]
|
||||
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
@@ -35,7 +35,12 @@ def upload_index_files():
|
||||
return {"status": "no name"}
|
||||
job_name = secure_filename(request.form["name"])
|
||||
tokens = secure_filename(request.form["tokens"])
|
||||
save_dir = os.path.join(current_dir, "indexes", user, job_name)
|
||||
retriever = secure_filename(request.form["retriever"])
|
||||
id = secure_filename(request.form["id"])
|
||||
type = secure_filename(request.form["type"])
|
||||
remote_data = secure_filename(request.form["remote_data"]) if "remote_data" in request.form else None
|
||||
|
||||
save_dir = os.path.join(current_dir, "indexes", str(id))
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
if "file_faiss" not in request.files:
|
||||
print("No file part")
|
||||
@@ -55,17 +60,19 @@ def upload_index_files():
|
||||
os.makedirs(save_dir)
|
||||
file_faiss.save(os.path.join(save_dir, "index.faiss"))
|
||||
file_pkl.save(os.path.join(save_dir, "index.pkl"))
|
||||
# create entry in vectors_collection
|
||||
vectors_collection.insert_one(
|
||||
# create entry in sources_collection
|
||||
sources_collection.insert_one(
|
||||
{
|
||||
"_id": ObjectId(id),
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"language": job_name,
|
||||
"location": save_dir,
|
||||
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"type": "local",
|
||||
"tokens": tokens
|
||||
"type": type,
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"remote_data": remote_data
|
||||
}
|
||||
)
|
||||
return {"status": "ok"}
|
||||
@@ -20,7 +20,7 @@ from application.vectorstore.vector_creator import VectorCreator
|
||||
mongo = MongoClient(settings.MONGO_URI)
|
||||
db = mongo["docsgpt"]
|
||||
conversations_collection = db["conversations"]
|
||||
vectors_collection = db["vectors"]
|
||||
sources_collection = db["sources"]
|
||||
prompts_collection = db["prompts"]
|
||||
feedback_collection = db["feedback"]
|
||||
api_key_collection = db["api_keys"]
|
||||
@@ -30,9 +30,7 @@ user_logs_collection = db["user_logs"]
|
||||
|
||||
user = Blueprint("user", __name__)
|
||||
|
||||
current_dir = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
def generate_minute_range(start_date, end_date):
|
||||
@@ -83,9 +81,7 @@ def get_conversations():
|
||||
conversations = conversations_collection.find().sort("date", -1).limit(30)
|
||||
list_conversations = []
|
||||
for conversation in conversations:
|
||||
list_conversations.append(
|
||||
{"id": str(conversation["_id"]), "name": conversation["name"]}
|
||||
)
|
||||
list_conversations.append({"id": str(conversation["_id"]), "name": conversation["name"]})
|
||||
|
||||
# list_conversations = [{"id": "default", "name": "default"}, {"id": "jeff", "name": "jeff"}]
|
||||
|
||||
@@ -116,15 +112,10 @@ def api_feedback():
|
||||
question = data["question"]
|
||||
answer = data["answer"]
|
||||
feedback = data["feedback"]
|
||||
|
||||
feedback_collection.insert_one(
|
||||
{
|
||||
"question": question,
|
||||
"answer": answer,
|
||||
"feedback": feedback,
|
||||
"timestamp": datetime.datetime.now(datetime.timezone.utc),
|
||||
}
|
||||
)
|
||||
new_doc = {"question": question, "answer": answer, "feedback": feedback, "timestamp": datetime.datetime.now(datetime.timezone.utc)}
|
||||
if "api_key" in data:
|
||||
new_doc["api_key"] = data["api_key"]
|
||||
feedback_collection.insert_one(new_doc)
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@@ -137,7 +128,7 @@ def delete_by_ids():
|
||||
return {"status": "error"}
|
||||
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
result = vectors_collection.delete_index(ids=ids)
|
||||
result = sources_collection.delete_index(ids=ids)
|
||||
if result:
|
||||
return {"status": "ok"}
|
||||
return {"status": "error"}
|
||||
@@ -147,28 +138,24 @@ def delete_by_ids():
|
||||
def delete_old():
|
||||
"""Delete old indexes."""
|
||||
import shutil
|
||||
|
||||
path = request.args.get("path")
|
||||
dirs = path.split("/")
|
||||
dirs_clean = []
|
||||
for i in range(0, len(dirs)):
|
||||
dirs_clean.append(secure_filename(dirs[i]))
|
||||
# check that path strats with indexes or vectors
|
||||
|
||||
if dirs_clean[0] not in ["indexes", "vectors"]:
|
||||
return {"status": "error"}
|
||||
path_clean = "/".join(dirs_clean)
|
||||
vectors_collection.delete_one({"name": dirs_clean[-1], "user": dirs_clean[-2]})
|
||||
source_id = request.args.get("source_id")
|
||||
doc = sources_collection.find_one({
|
||||
"_id": ObjectId(source_id),
|
||||
"user": "local",
|
||||
})
|
||||
if(doc is None):
|
||||
return {"status":"not found"},404
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
try:
|
||||
shutil.rmtree(os.path.join(current_dir, path_clean))
|
||||
shutil.rmtree(os.path.join(current_dir, str(doc["_id"])))
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
else:
|
||||
vetorstore = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)
|
||||
)
|
||||
vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, source_id=str(doc["_id"]))
|
||||
vetorstore.delete_index()
|
||||
sources_collection.delete_one({
|
||||
"_id": ObjectId(source_id),
|
||||
})
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
@@ -202,9 +189,7 @@ def upload_file():
|
||||
file.save(os.path.join(temp_dir, filename))
|
||||
|
||||
# Use shutil.make_archive to zip the temp directory
|
||||
zip_path = shutil.make_archive(
|
||||
base_name=os.path.join(save_dir, job_name), format="zip", root_dir=temp_dir
|
||||
)
|
||||
zip_path = shutil.make_archive(base_name=os.path.join(save_dir, job_name), format="zip", root_dir=temp_dir)
|
||||
final_filename = os.path.basename(zip_path)
|
||||
|
||||
# Clean up the temporary directory after zipping
|
||||
@@ -246,9 +231,7 @@ def upload_remote():
|
||||
source_data = request.form["data"]
|
||||
|
||||
if source_data:
|
||||
task = ingest_remote.delay(
|
||||
source_data=source_data, job_name=job_name, user=user, loader=source
|
||||
)
|
||||
task = ingest_remote.delay(source_data=source_data, job_name=job_name, user=user, loader=source)
|
||||
task_id = task.id
|
||||
return {"status": "ok", "task_id": task_id}
|
||||
else:
|
||||
@@ -275,54 +258,36 @@ def combined_json():
|
||||
data = [
|
||||
{
|
||||
"name": "default",
|
||||
"language": "default",
|
||||
"version": "",
|
||||
"description": "default",
|
||||
"fullName": "default",
|
||||
"date": "default",
|
||||
"docLink": "default",
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "remote",
|
||||
"tokens": "",
|
||||
"retriever": "classic",
|
||||
}
|
||||
]
|
||||
# structure: name, language, version, description, fullName, date, docLink
|
||||
# append data from vectors_collection in sorted order in descending order of date
|
||||
for index in vectors_collection.find({"user": user}).sort("date", -1):
|
||||
# append data from sources_collection in sorted order in descending order of date
|
||||
for index in sources_collection.find({"user": user}).sort("date", -1):
|
||||
data.append(
|
||||
{
|
||||
"id": str(index["_id"]),
|
||||
"name": index["name"],
|
||||
"language": index["language"],
|
||||
"version": "",
|
||||
"description": index["name"],
|
||||
"fullName": index["name"],
|
||||
"date": index["date"],
|
||||
"docLink": index["location"],
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "local",
|
||||
"tokens": index["tokens"] if ("tokens" in index.keys()) else "",
|
||||
"retriever": index["retriever"] if ("retriever" in index.keys()) else "classic",
|
||||
}
|
||||
)
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
data_remote = requests.get(
|
||||
"https://d3dg1063dc54p9.cloudfront.net/combined.json"
|
||||
).json()
|
||||
for index in data_remote:
|
||||
index["location"] = "remote"
|
||||
data.append(index)
|
||||
if "duckduck_search" in settings.RETRIEVERS_ENABLED:
|
||||
data.append(
|
||||
{
|
||||
"name": "DuckDuckGo Search",
|
||||
"language": "en",
|
||||
"version": "",
|
||||
"description": "duckduck_search",
|
||||
"fullName": "DuckDuckGo Search",
|
||||
"date": "duckduck_search",
|
||||
"docLink": "duckduck_search",
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "custom",
|
||||
"tokens": "",
|
||||
"retriever": "duckduck_search",
|
||||
}
|
||||
)
|
||||
if "brave_search" in settings.RETRIEVERS_ENABLED:
|
||||
@@ -330,14 +295,11 @@ def combined_json():
|
||||
{
|
||||
"name": "Brave Search",
|
||||
"language": "en",
|
||||
"version": "",
|
||||
"description": "brave_search",
|
||||
"fullName": "Brave Search",
|
||||
"date": "brave_search",
|
||||
"docLink": "brave_search",
|
||||
"model": settings.EMBEDDINGS_NAME,
|
||||
"location": "custom",
|
||||
"tokens": "",
|
||||
"retriever": "brave_search",
|
||||
}
|
||||
)
|
||||
|
||||
@@ -346,39 +308,13 @@ def combined_json():
|
||||
|
||||
@user.route("/api/docs_check", methods=["POST"])
|
||||
def check_docs():
|
||||
# check if docs exist in a vectorstore folder
|
||||
data = request.get_json()
|
||||
# split docs on / and take first part
|
||||
if data["docs"].split("/")[0] == "local":
|
||||
return {"status": "exists"}
|
||||
|
||||
vectorstore = "vectors/" + secure_filename(data["docs"])
|
||||
base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"
|
||||
if os.path.exists(vectorstore) or data["docs"] == "default":
|
||||
return {"status": "exists"}
|
||||
else:
|
||||
file_url = urlparse(base_path + vectorstore + "index.faiss")
|
||||
|
||||
if (
|
||||
file_url.scheme in ["https"]
|
||||
and file_url.netloc == "raw.githubusercontent.com"
|
||||
and file_url.path.startswith("/arc53/DocsHUB/main/")
|
||||
):
|
||||
r = requests.get(file_url.geturl())
|
||||
if r.status_code != 200:
|
||||
return {"status": "null"}
|
||||
else:
|
||||
if not os.path.exists(vectorstore):
|
||||
os.makedirs(vectorstore)
|
||||
with open(vectorstore + "index.faiss", "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
r = requests.get(base_path + vectorstore + "index.pkl")
|
||||
with open(vectorstore + "index.pkl", "wb") as f:
|
||||
f.write(r.content)
|
||||
else:
|
||||
return {"status": "null"}
|
||||
|
||||
return {"status": "loaded"}
|
||||
return {"status": "not found"}
|
||||
|
||||
|
||||
@user.route("/api/create_prompt", methods=["POST"])
|
||||
@@ -409,9 +345,7 @@ def get_prompts():
|
||||
list_prompts.append({"id": "creative", "name": "creative", "type": "public"})
|
||||
list_prompts.append({"id": "strict", "name": "strict", "type": "public"})
|
||||
for prompt in prompts:
|
||||
list_prompts.append(
|
||||
{"id": str(prompt["_id"]), "name": prompt["name"], "type": "private"}
|
||||
)
|
||||
list_prompts.append({"id": str(prompt["_id"]), "name": prompt["name"], "type": "private"})
|
||||
|
||||
return jsonify(list_prompts)
|
||||
|
||||
@@ -420,21 +354,15 @@ def get_prompts():
|
||||
def get_single_prompt():
|
||||
prompt_id = request.args.get("id")
|
||||
if prompt_id == "default":
|
||||
with open(
|
||||
os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r"
|
||||
) as f:
|
||||
with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
|
||||
chat_combine_template = f.read()
|
||||
return jsonify({"content": chat_combine_template})
|
||||
elif prompt_id == "creative":
|
||||
with open(
|
||||
os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r"
|
||||
) as f:
|
||||
with open(os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r") as f:
|
||||
chat_reduce_creative = f.read()
|
||||
return jsonify({"content": chat_reduce_creative})
|
||||
elif prompt_id == "strict":
|
||||
with open(
|
||||
os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r"
|
||||
) as f:
|
||||
with open(os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r") as f:
|
||||
chat_reduce_strict = f.read()
|
||||
return jsonify({"content": chat_reduce_strict})
|
||||
|
||||
@@ -463,9 +391,7 @@ def update_prompt_name():
|
||||
# check if name is null
|
||||
if name == "":
|
||||
return {"status": "error"}
|
||||
prompts_collection.update_one(
|
||||
{"_id": ObjectId(id)}, {"$set": {"name": name, "content": content}}
|
||||
)
|
||||
prompts_collection.update_one({"_id": ObjectId(id)}, {"$set": {"name": name, "content": content}})
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@@ -475,12 +401,23 @@ def get_api_keys():
|
||||
keys = api_key_collection.find({"user": user})
|
||||
list_keys = []
|
||||
for key in keys:
|
||||
if "source" in key and isinstance(key["source"],DBRef):
|
||||
source = db.dereference(key["source"])
|
||||
if source is None:
|
||||
continue
|
||||
else:
|
||||
source_name = source["name"]
|
||||
elif "retriever" in key:
|
||||
source_name = key["retriever"]
|
||||
else:
|
||||
continue
|
||||
|
||||
list_keys.append(
|
||||
{
|
||||
"id": str(key["_id"]),
|
||||
"name": key["name"],
|
||||
"key": key["key"][:4] + "..." + key["key"][-4:],
|
||||
"source": key["source"],
|
||||
"source": source_name,
|
||||
"prompt_id": key["prompt_id"],
|
||||
"chunks": key["chunks"],
|
||||
}
|
||||
@@ -492,21 +429,22 @@ def get_api_keys():
|
||||
def create_api_key():
|
||||
data = request.get_json()
|
||||
name = data["name"]
|
||||
source = data["source"]
|
||||
prompt_id = data["prompt_id"]
|
||||
chunks = data["chunks"]
|
||||
key = str(uuid.uuid4())
|
||||
user = "local"
|
||||
resp = api_key_collection.insert_one(
|
||||
{
|
||||
new_api_key = {
|
||||
"name": name,
|
||||
"key": key,
|
||||
"source": source,
|
||||
"user": user,
|
||||
"prompt_id": prompt_id,
|
||||
"chunks": chunks,
|
||||
}
|
||||
)
|
||||
if "source" in data and ObjectId.is_valid(data["source"]):
|
||||
new_api_key["source"] = DBRef("sources", ObjectId(data["source"]))
|
||||
if "retriever" in data:
|
||||
new_api_key["retriever"] = data["retriever"]
|
||||
resp = api_key_collection.insert_one(new_api_key)
|
||||
new_id = str(resp.inserted_id)
|
||||
return {"id": new_id, "key": key}
|
||||
|
||||
@@ -533,36 +471,37 @@ def share_conversation():
|
||||
conversation_id = data["conversation_id"]
|
||||
isPromptable = request.args.get("isPromptable").lower() == "true"
|
||||
|
||||
conversation = conversations_collection.find_one(
|
||||
{"_id": ObjectId(conversation_id)}
|
||||
)
|
||||
conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
|
||||
if(conversation is None):
|
||||
raise Exception("Conversation does not exist")
|
||||
current_n_queries = len(conversation["queries"])
|
||||
|
||||
##generate binary representation of uuid
|
||||
explicit_binary = Binary.from_uuid(uuid.uuid4(), UuidRepresentation.STANDARD)
|
||||
|
||||
if isPromptable:
|
||||
source = "default" if "source" not in data else data["source"]
|
||||
prompt_id = "default" if "prompt_id" not in data else data["prompt_id"]
|
||||
chunks = "2" if "chunks" not in data else data["chunks"]
|
||||
|
||||
name = conversation["name"] + "(shared)"
|
||||
pre_existing_api_document = api_key_collection.find_one(
|
||||
{
|
||||
new_api_key_data = {
|
||||
"prompt_id": prompt_id,
|
||||
"chunks": chunks,
|
||||
"source": source,
|
||||
"user": user,
|
||||
}
|
||||
if "source" in data and ObjectId.is_valid(data["source"]):
|
||||
new_api_key_data["source"] = DBRef("sources",ObjectId(data["source"]))
|
||||
elif "retriever" in data:
|
||||
new_api_key_data["retriever"] = data["retriever"]
|
||||
|
||||
pre_existing_api_document = api_key_collection.find_one(
|
||||
new_api_key_data
|
||||
)
|
||||
api_uuid = str(uuid.uuid4())
|
||||
if pre_existing_api_document:
|
||||
api_uuid = pre_existing_api_document["key"]
|
||||
pre_existing = shared_conversations_collections.find_one(
|
||||
{
|
||||
"conversation_id": DBRef(
|
||||
"conversations", ObjectId(conversation_id)
|
||||
),
|
||||
"conversation_id": DBRef("conversations", ObjectId(conversation_id)),
|
||||
"isPromptable": isPromptable,
|
||||
"first_n_queries": current_n_queries,
|
||||
"user": user,
|
||||
@@ -593,20 +532,17 @@ def share_conversation():
|
||||
"api_key": api_uuid,
|
||||
}
|
||||
)
|
||||
return jsonify(
|
||||
{"success": True, "identifier": str(explicit_binary.as_uuid())}
|
||||
)
|
||||
return jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())})
|
||||
else:
|
||||
api_key_collection.insert_one(
|
||||
{
|
||||
"name": name,
|
||||
"key": api_uuid,
|
||||
"source": source,
|
||||
"user": user,
|
||||
"prompt_id": prompt_id,
|
||||
"chunks": chunks,
|
||||
}
|
||||
)
|
||||
|
||||
api_uuid = str(uuid.uuid4())
|
||||
new_api_key_data["key"] = api_uuid
|
||||
new_api_key_data["name"] = name
|
||||
if "source" in data and ObjectId.is_valid(data["source"]):
|
||||
new_api_key_data["source"] = DBRef("sources", ObjectId(data["source"]))
|
||||
if "retriever" in data:
|
||||
new_api_key_data["retriever"] = data["retriever"]
|
||||
api_key_collection.insert_one(new_api_key_data)
|
||||
shared_conversations_collections.insert_one(
|
||||
{
|
||||
"uuid": explicit_binary,
|
||||
@@ -622,9 +558,7 @@ def share_conversation():
|
||||
)
|
||||
## Identifier as route parameter in frontend
|
||||
return (
|
||||
jsonify(
|
||||
{"success": True, "identifier": str(explicit_binary.as_uuid())}
|
||||
),
|
||||
jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())}),
|
||||
201,
|
||||
)
|
||||
|
||||
@@ -639,9 +573,7 @@ def share_conversation():
|
||||
)
|
||||
if pre_existing is not None:
|
||||
return (
|
||||
jsonify(
|
||||
{"success": True, "identifier": str(pre_existing["uuid"].as_uuid())}
|
||||
),
|
||||
jsonify({"success": True, "identifier": str(pre_existing["uuid"].as_uuid())}),
|
||||
200,
|
||||
)
|
||||
else:
|
||||
@@ -659,9 +591,7 @@ def share_conversation():
|
||||
)
|
||||
## Identifier as route parameter in frontend
|
||||
return (
|
||||
jsonify(
|
||||
{"success": True, "identifier": str(explicit_binary.as_uuid())}
|
||||
),
|
||||
jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())}),
|
||||
201,
|
||||
)
|
||||
except Exception as err:
|
||||
@@ -673,16 +603,10 @@ def share_conversation():
|
||||
@user.route("/api/shared_conversation/<string:identifier>", methods=["GET"])
|
||||
def get_publicly_shared_conversations(identifier: str):
|
||||
try:
|
||||
query_uuid = Binary.from_uuid(
|
||||
uuid.UUID(identifier), UuidRepresentation.STANDARD
|
||||
)
|
||||
query_uuid = Binary.from_uuid(uuid.UUID(identifier), UuidRepresentation.STANDARD)
|
||||
shared = shared_conversations_collections.find_one({"uuid": query_uuid})
|
||||
conversation_queries = []
|
||||
if (
|
||||
shared
|
||||
and "conversation_id" in shared
|
||||
and isinstance(shared["conversation_id"], DBRef)
|
||||
):
|
||||
if shared and "conversation_id" in shared and isinstance(shared["conversation_id"], DBRef):
|
||||
# Resolve the DBRef
|
||||
conversation_ref = shared["conversation_id"]
|
||||
conversation = db.dereference(conversation_ref)
|
||||
@@ -696,9 +620,7 @@ def get_publicly_shared_conversations(identifier: str):
|
||||
),
|
||||
404,
|
||||
)
|
||||
conversation_queries = conversation["queries"][
|
||||
: (shared["first_n_queries"])
|
||||
]
|
||||
conversation_queries = conversation["queries"][: (shared["first_n_queries"])]
|
||||
for query in conversation_queries:
|
||||
query.pop("sources") ## avoid exposing sources
|
||||
else:
|
||||
|
||||
@@ -18,7 +18,7 @@ class Settings(BaseSettings):
|
||||
DEFAULT_MAX_HISTORY: int = 150
|
||||
MODEL_TOKEN_LIMITS: dict = {"gpt-3.5-turbo": 4096, "claude-2": 1e5}
|
||||
UPLOAD_FOLDER: str = "inputs"
|
||||
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant"
|
||||
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus"
|
||||
RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search
|
||||
|
||||
API_URL: str = "http://localhost:7091" # backend url for celery worker
|
||||
@@ -62,6 +62,11 @@ class Settings(BaseSettings):
|
||||
QDRANT_PATH: Optional[str] = None
|
||||
QDRANT_DISTANCE_FUNC: str = "Cosine"
|
||||
|
||||
# Milvus vectorstore config
|
||||
MILVUS_COLLECTION_NAME: Optional[str] = "docsgpt"
|
||||
MILVUS_URI: Optional[str] = "./milvus_local.db" # milvus lite version as default
|
||||
MILVUS_TOKEN: Optional[str] = ""
|
||||
|
||||
BRAVE_SEARCH_API_KEY: Optional[str] = None
|
||||
|
||||
FLASK_DEBUG_MODE: bool = False
|
||||
|
||||
@@ -11,12 +11,14 @@ from retry import retry
|
||||
|
||||
|
||||
@retry(tries=10, delay=60)
|
||||
def store_add_texts_with_retry(store, i):
|
||||
def store_add_texts_with_retry(store, i, id):
|
||||
# add source_id to the metadata
|
||||
i.metadata["source_id"] = str(id)
|
||||
store.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
|
||||
|
||||
def call_openai_api(docs, folder_name, task_status):
|
||||
def call_openai_api(docs, folder_name, id, task_status):
|
||||
# Function to create a vector store from the documents and save it to disk
|
||||
|
||||
if not os.path.exists(f"{folder_name}"):
|
||||
@@ -32,13 +34,13 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
store = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE,
|
||||
docs_init=docs_init,
|
||||
path=f"{folder_name}",
|
||||
source_id=f"{folder_name}",
|
||||
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
|
||||
)
|
||||
else:
|
||||
store = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE,
|
||||
path=f"{folder_name}",
|
||||
source_id=str(id),
|
||||
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
|
||||
)
|
||||
# Uncomment for MPNet embeddings
|
||||
@@ -57,7 +59,7 @@ def call_openai_api(docs, folder_name, task_status):
|
||||
task_status.update_state(
|
||||
state="PROGRESS", meta={"current": int((c1 / s1) * 100)}
|
||||
)
|
||||
store_add_texts_with_retry(store, i)
|
||||
store_add_texts_with_retry(store, i, id)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Error on ", i)
|
||||
|
||||
@@ -9,13 +9,15 @@ EbookLib==0.18
|
||||
elasticsearch==8.14.0
|
||||
escodegen==1.0.11
|
||||
esprima==4.0.1
|
||||
Flask==3.0.1
|
||||
faiss-cpu==1.8.0
|
||||
Flask==3.0.3
|
||||
faiss-cpu==1.8.0.post1
|
||||
gunicorn==23.0.0
|
||||
html2text==2020.1.16
|
||||
javalang==0.13.0
|
||||
langchain==0.1.4
|
||||
langchain-openai==0.0.5
|
||||
langchain==0.2.16
|
||||
langchain-community==0.2.16
|
||||
langchain-core==0.2.38
|
||||
langchain-openai==0.1.23
|
||||
openapi3_parser==1.1.16
|
||||
pandas==2.2.2
|
||||
pydantic_settings==2.4.0
|
||||
@@ -26,9 +28,9 @@ qdrant-client==1.11.0
|
||||
redis==5.0.1
|
||||
Requests==2.32.0
|
||||
retry==0.9.2
|
||||
sentence-transformers
|
||||
tiktoken
|
||||
sentence-transformers==3.0.1
|
||||
tiktoken==0.7.0
|
||||
torch
|
||||
tqdm==4.66.3
|
||||
transformers==4.44.0
|
||||
Werkzeug==3.0.3
|
||||
tqdm==4.66.5
|
||||
transformers==4.44.2
|
||||
Werkzeug==3.0.4
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
import os
|
||||
from application.retriever.base import BaseRetriever
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
@@ -21,7 +20,7 @@ class ClassicRAG(BaseRetriever):
|
||||
user_api_key=None,
|
||||
):
|
||||
self.question = question
|
||||
self.vectorstore = self._get_vectorstore(source=source)
|
||||
self.vectorstore = source['active_docs'] if 'active_docs' in source else None
|
||||
self.chat_history = chat_history
|
||||
self.prompt = prompt
|
||||
self.chunks = chunks
|
||||
@@ -38,21 +37,6 @@ class ClassicRAG(BaseRetriever):
|
||||
)
|
||||
self.user_api_key = user_api_key
|
||||
|
||||
def _get_vectorstore(self, source):
|
||||
if "active_docs" in source:
|
||||
if source["active_docs"].split("/")[0] == "default":
|
||||
vectorstore = ""
|
||||
elif source["active_docs"].split("/")[0] == "local":
|
||||
vectorstore = "indexes/" + source["active_docs"]
|
||||
else:
|
||||
vectorstore = "vectors/" + source["active_docs"]
|
||||
if source["active_docs"] == "default":
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
vectorstore = os.path.join("application", vectorstore)
|
||||
return vectorstore
|
||||
|
||||
def _get_data(self):
|
||||
if self.chunks == 0:
|
||||
docs = []
|
||||
|
||||
@@ -5,15 +5,16 @@ from application.retriever.brave_search import BraveRetSearch
|
||||
|
||||
|
||||
class RetrieverCreator:
|
||||
retievers = {
|
||||
retrievers = {
|
||||
'classic': ClassicRAG,
|
||||
'duckduck_search': DuckDuckSearch,
|
||||
'brave_search': BraveRetSearch
|
||||
'brave_search': BraveRetSearch,
|
||||
'default': ClassicRAG
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create_retriever(cls, type, *args, **kwargs):
|
||||
retiever_class = cls.retievers.get(type.lower())
|
||||
retiever_class = cls.retrievers.get(type.lower())
|
||||
if not retiever_class:
|
||||
raise ValueError(f"No retievers class found for type {type}")
|
||||
return retiever_class(*args, **kwargs)
|
||||
@@ -1,13 +1,30 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import os
|
||||
from langchain_community.embeddings import (
|
||||
HuggingFaceEmbeddings,
|
||||
CohereEmbeddings,
|
||||
HuggingFaceInstructEmbeddings,
|
||||
)
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from application.core.settings import settings
|
||||
|
||||
class EmbeddingsWrapper:
|
||||
def __init__(self, model_name, *args, **kwargs):
|
||||
self.model = SentenceTransformer(model_name, config_kwargs={'allow_dangerous_deserialization': True}, *args, **kwargs)
|
||||
self.dimension = self.model.get_sentence_embedding_dimension()
|
||||
|
||||
def embed_query(self, query: str):
|
||||
return self.model.encode(query).tolist()
|
||||
|
||||
def embed_documents(self, documents: list):
|
||||
return self.model.encode(documents).tolist()
|
||||
|
||||
def __call__(self, text):
|
||||
if isinstance(text, str):
|
||||
return self.embed_query(text)
|
||||
elif isinstance(text, list):
|
||||
return self.embed_documents(text)
|
||||
else:
|
||||
raise ValueError("Input must be a string or a list of strings")
|
||||
|
||||
|
||||
|
||||
class EmbeddingsSingleton:
|
||||
_instances = {}
|
||||
|
||||
@@ -23,16 +40,15 @@ class EmbeddingsSingleton:
|
||||
def _create_instance(embeddings_name, *args, **kwargs):
|
||||
embeddings_factory = {
|
||||
"openai_text-embedding-ada-002": OpenAIEmbeddings,
|
||||
"huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings,
|
||||
"huggingface_sentence-transformers-all-mpnet-base-v2": HuggingFaceEmbeddings,
|
||||
"huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings,
|
||||
"cohere_medium": CohereEmbeddings
|
||||
"huggingface_sentence-transformers/all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
|
||||
"huggingface_sentence-transformers-all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
|
||||
"huggingface_hkunlp/instructor-large": lambda: EmbeddingsWrapper("hkunlp/instructor-large"),
|
||||
}
|
||||
|
||||
if embeddings_name not in embeddings_factory:
|
||||
raise ValueError(f"Invalid embeddings_name: {embeddings_name}")
|
||||
|
||||
if embeddings_name in embeddings_factory:
|
||||
return embeddings_factory[embeddings_name](*args, **kwargs)
|
||||
else:
|
||||
return EmbeddingsWrapper(embeddings_name, *args, **kwargs)
|
||||
|
||||
class BaseVectorStore(ABC):
|
||||
def __init__(self):
|
||||
@@ -58,22 +74,14 @@ class BaseVectorStore(ABC):
|
||||
embeddings_name,
|
||||
openai_api_key=embeddings_key
|
||||
)
|
||||
elif embeddings_name == "cohere_medium":
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name,
|
||||
cohere_api_key=embeddings_key
|
||||
)
|
||||
elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
if os.path.exists("./model/all-mpnet-base-v2"):
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name,
|
||||
model_name="./model/all-mpnet-base-v2",
|
||||
model_kwargs={"device": "cpu"}
|
||||
embeddings_name="./model/all-mpnet-base-v2",
|
||||
)
|
||||
else:
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name,
|
||||
model_kwargs={"device": "cpu"}
|
||||
)
|
||||
else:
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(embeddings_name)
|
||||
|
||||
@@ -9,9 +9,9 @@ import elasticsearch
|
||||
class ElasticsearchStore(BaseVectorStore):
|
||||
_es_connection = None # Class attribute to hold the Elasticsearch connection
|
||||
|
||||
def __init__(self, path, embeddings_key, index_name=settings.ELASTIC_INDEX):
|
||||
def __init__(self, source_id, embeddings_key, index_name=settings.ELASTIC_INDEX):
|
||||
super().__init__()
|
||||
self.path = path.replace("application/indexes/", "").rstrip("/")
|
||||
self.source_id = source_id.replace("application/indexes/", "").rstrip("/")
|
||||
self.embeddings_key = embeddings_key
|
||||
self.index_name = index_name
|
||||
|
||||
@@ -81,7 +81,7 @@ class ElasticsearchStore(BaseVectorStore):
|
||||
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key)
|
||||
vector = embeddings.embed_query(question)
|
||||
knn = {
|
||||
"filter": [{"match": {"metadata.store.keyword": self.path}}],
|
||||
"filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
|
||||
"field": "vector",
|
||||
"k": k,
|
||||
"num_candidates": 100,
|
||||
@@ -100,7 +100,7 @@ class ElasticsearchStore(BaseVectorStore):
|
||||
}
|
||||
}
|
||||
],
|
||||
"filter": [{"match": {"metadata.store.keyword": self.path}}],
|
||||
"filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
|
||||
}
|
||||
},
|
||||
"rank": {"rrf": {}},
|
||||
@@ -209,5 +209,4 @@ class ElasticsearchStore(BaseVectorStore):
|
||||
|
||||
def delete_index(self):
|
||||
self._es_connection.delete_by_query(index=self.index_name, query={"match": {
|
||||
"metadata.store.keyword": self.path}},)
|
||||
|
||||
"metadata.source_id.keyword": self.source_id}},)
|
||||
|
||||
@@ -1,12 +1,22 @@
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from application.core.settings import settings
|
||||
import os
|
||||
|
||||
def get_vectorstore(path):
|
||||
if path:
|
||||
vectorstore = "indexes/"+path
|
||||
vectorstore = os.path.join("application", vectorstore)
|
||||
else:
|
||||
vectorstore = os.path.join("application")
|
||||
|
||||
return vectorstore
|
||||
|
||||
class FaissStore(BaseVectorStore):
|
||||
|
||||
def __init__(self, path, embeddings_key, docs_init=None):
|
||||
def __init__(self, source_id, embeddings_key, docs_init=None):
|
||||
super().__init__()
|
||||
self.path = path
|
||||
self.path = get_vectorstore(source_id)
|
||||
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
|
||||
if docs_init:
|
||||
self.docsearch = FAISS.from_documents(
|
||||
@@ -14,7 +24,8 @@ class FaissStore(BaseVectorStore):
|
||||
)
|
||||
else:
|
||||
self.docsearch = FAISS.load_local(
|
||||
self.path, embeddings
|
||||
self.path, embeddings,
|
||||
allow_dangerous_deserialization=True
|
||||
)
|
||||
self.assert_embedding_dimensions(embeddings)
|
||||
|
||||
@@ -37,10 +48,10 @@ class FaissStore(BaseVectorStore):
|
||||
"""
|
||||
if settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
try:
|
||||
word_embedding_dimension = embeddings.client[1].word_embedding_dimension
|
||||
word_embedding_dimension = embeddings.dimension
|
||||
except AttributeError as e:
|
||||
raise AttributeError("word_embedding_dimension not found in embeddings.client[1]") from e
|
||||
raise AttributeError("'dimension' attribute not found in embeddings instance. Make sure the embeddings object is properly initialized.") from e
|
||||
docsearch_index_dimension = self.docsearch.index.d
|
||||
if word_embedding_dimension != docsearch_index_dimension:
|
||||
raise ValueError(f"word_embedding_dimension ({word_embedding_dimension}) " +
|
||||
f"!= docsearch_index_word_embedding_dimension ({docsearch_index_dimension})")
|
||||
raise ValueError(f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) " +
|
||||
f"!= docsearch index dimension ({docsearch_index_dimension})")
|
||||
37
application/vectorstore/milvus.py
Normal file
37
application/vectorstore/milvus.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from typing import List, Optional
|
||||
from uuid import uuid4
|
||||
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
|
||||
|
||||
class MilvusStore(BaseVectorStore):
|
||||
def __init__(self, path: str = "", embeddings_key: str = "embeddings"):
|
||||
super().__init__()
|
||||
from langchain_milvus import Milvus
|
||||
|
||||
connection_args = {
|
||||
"uri": settings.MILVUS_URI,
|
||||
"token": settings.MILVUS_TOKEN,
|
||||
}
|
||||
self._docsearch = Milvus(
|
||||
embedding_function=self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key),
|
||||
collection_name=settings.MILVUS_COLLECTION_NAME,
|
||||
connection_args=connection_args,
|
||||
)
|
||||
self._path = path
|
||||
|
||||
def search(self, question, k=2, *args, **kwargs):
|
||||
return self._docsearch.similarity_search(query=question, k=k, filter={"path": self._path} *args, **kwargs)
|
||||
|
||||
def add_texts(self, texts: List[str], metadatas: Optional[List[dict]], *args, **kwargs):
|
||||
ids = [str(uuid4()) for _ in range(len(texts))]
|
||||
|
||||
return self._docsearch.add_texts(texts=texts, metadatas=metadatas, ids=ids, *args, **kwargs)
|
||||
|
||||
def save_local(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def delete_index(self, *args, **kwargs):
|
||||
pass
|
||||
@@ -5,7 +5,7 @@ from application.vectorstore.document_class import Document
|
||||
class MongoDBVectorStore(BaseVectorStore):
|
||||
def __init__(
|
||||
self,
|
||||
path: str = "",
|
||||
source_id: str = "",
|
||||
embeddings_key: str = "embeddings",
|
||||
collection: str = "documents",
|
||||
index_name: str = "vector_search_index",
|
||||
@@ -18,7 +18,7 @@ class MongoDBVectorStore(BaseVectorStore):
|
||||
self._embedding_key = embedding_key
|
||||
self._embeddings_key = embeddings_key
|
||||
self._mongo_uri = settings.MONGO_URI
|
||||
self._path = path.replace("application/indexes/", "").rstrip("/")
|
||||
self._source_id = source_id.replace("application/indexes/", "").rstrip("/")
|
||||
self._embedding = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
|
||||
|
||||
try:
|
||||
@@ -46,7 +46,7 @@ class MongoDBVectorStore(BaseVectorStore):
|
||||
"numCandidates": k * 10,
|
||||
"index": self._index_name,
|
||||
"filter": {
|
||||
"store": {"$eq": self._path}
|
||||
"source_id": {"$eq": self._source_id}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -123,4 +123,4 @@ class MongoDBVectorStore(BaseVectorStore):
|
||||
return result_ids
|
||||
|
||||
def delete_index(self, *args, **kwargs):
|
||||
self._collection.delete_many({"store": self._path})
|
||||
self._collection.delete_many({"source_id": self._source_id})
|
||||
@@ -5,12 +5,12 @@ from qdrant_client import models
|
||||
|
||||
|
||||
class QdrantStore(BaseVectorStore):
|
||||
def __init__(self, path: str = "", embeddings_key: str = "embeddings"):
|
||||
def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"):
|
||||
self._filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key="metadata.store",
|
||||
match=models.MatchValue(value=path.replace("application/indexes/", "").rstrip("/")),
|
||||
key="metadata.source_id",
|
||||
match=models.MatchValue(value=source_id.replace("application/indexes/", "").rstrip("/")),
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from application.vectorstore.faiss import FaissStore
|
||||
from application.vectorstore.elasticsearch import ElasticsearchStore
|
||||
from application.vectorstore.milvus import MilvusStore
|
||||
from application.vectorstore.mongodb import MongoDBVectorStore
|
||||
from application.vectorstore.qdrant import QdrantStore
|
||||
|
||||
@@ -10,6 +11,7 @@ class VectorCreator:
|
||||
"elasticsearch": ElasticsearchStore,
|
||||
"mongodb": MongoDBVectorStore,
|
||||
"qdrant": QdrantStore,
|
||||
"milvus": MilvusStore,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -6,6 +6,7 @@ from urllib.parse import urljoin
|
||||
import logging
|
||||
|
||||
import requests
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.parser.file.bulk import SimpleDirectoryReader
|
||||
@@ -16,10 +17,10 @@ from application.parser.token_func import group_split
|
||||
from application.utils import count_tokens_docs
|
||||
|
||||
|
||||
|
||||
# Define a function to extract metadata from a given filename.
|
||||
def metadata_from_filename(title):
|
||||
store = "/".join(title.split("/")[1:3])
|
||||
return {"title": title, "store": store}
|
||||
return {"title": title}
|
||||
|
||||
|
||||
# Define a function to generate a random string of a given length.
|
||||
@@ -27,9 +28,7 @@ def generate_random_string(length):
|
||||
return "".join([string.ascii_letters[i % 52] for i in range(length)])
|
||||
|
||||
|
||||
current_dir = os.path.dirname(
|
||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
)
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
|
||||
@@ -60,7 +59,7 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
|
||||
|
||||
|
||||
# Define the main function for ingesting and processing documents.
|
||||
def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
def ingest_worker(self, directory, formats, name_job, filename, user, retriever="classic"):
|
||||
"""
|
||||
Ingest and process documents.
|
||||
|
||||
@@ -71,6 +70,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
name_job (str): Name of the job for this ingestion task.
|
||||
filename (str): Name of the file to be ingested.
|
||||
user (str): Identifier for the user initiating the ingestion.
|
||||
retriever (str): Type of retriever to use for processing the documents.
|
||||
|
||||
Returns:
|
||||
dict: Information about the completed ingestion task, including input parameters and a "limited" flag.
|
||||
@@ -106,9 +106,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
|
||||
# check if file is .zip and extract it
|
||||
if filename.endswith(".zip"):
|
||||
extract_zip_recursive(
|
||||
os.path.join(full_path, filename), full_path, 0, recursion_depth
|
||||
)
|
||||
extract_zip_recursive(os.path.join(full_path, filename), full_path, 0, recursion_depth)
|
||||
|
||||
self.update_state(state="PROGRESS", meta={"current": 1})
|
||||
|
||||
@@ -129,8 +127,9 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
)
|
||||
|
||||
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
id = ObjectId()
|
||||
|
||||
call_openai_api(docs, full_path, self)
|
||||
call_openai_api(docs, full_path, id, self)
|
||||
tokens = count_tokens_docs(docs)
|
||||
self.update_state(state="PROGRESS", meta={"current": 100})
|
||||
|
||||
@@ -140,22 +139,15 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
|
||||
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
|
||||
# and send them to the server (provide user and name in form)
|
||||
file_data = {"name": name_job, "user": user, "tokens":tokens}
|
||||
file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever, "id": str(id), 'type': 'local'}
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
files = {
|
||||
"file_faiss": open(full_path + "/index.faiss", "rb"),
|
||||
"file_pkl": open(full_path + "/index.pkl", "rb"),
|
||||
}
|
||||
response = requests.post(
|
||||
urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
|
||||
)
|
||||
response = requests.get(
|
||||
urljoin(settings.API_URL, "/api/delete_old?path=" + full_path)
|
||||
)
|
||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
||||
else:
|
||||
response = requests.post(
|
||||
urljoin(settings.API_URL, "/api/upload_index"), data=file_data
|
||||
)
|
||||
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
|
||||
|
||||
# delete local
|
||||
shutil.rmtree(full_path)
|
||||
@@ -170,7 +162,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
}
|
||||
|
||||
|
||||
def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
|
||||
def remote_worker(self, source_data, name_job, user, loader, directory="temp", retriever="classic"):
|
||||
token_check = True
|
||||
min_tokens = 150
|
||||
max_tokens = 1250
|
||||
@@ -191,22 +183,21 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
|
||||
token_check=token_check,
|
||||
)
|
||||
# docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
call_openai_api(docs, full_path, self)
|
||||
tokens = count_tokens_docs(docs)
|
||||
id = ObjectId()
|
||||
call_openai_api(docs, full_path, id, self)
|
||||
self.update_state(state="PROGRESS", meta={"current": 100})
|
||||
|
||||
# Proceed with uploading and cleaning as in the original function
|
||||
file_data = {"name": name_job, "user": user, "tokens":tokens}
|
||||
file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever,
|
||||
"id": str(id), 'type': loader, 'remote_data': source_data}
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
files = {
|
||||
"file_faiss": open(full_path + "/index.faiss", "rb"),
|
||||
"file_pkl": open(full_path + "/index.pkl", "rb"),
|
||||
}
|
||||
|
||||
requests.post(
|
||||
urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
|
||||
)
|
||||
requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
|
||||
requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
|
||||
else:
|
||||
requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
|
||||
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
|
||||
redis:
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
version: "3.9"
|
||||
|
||||
services:
|
||||
frontend:
|
||||
build: ./frontend
|
||||
|
||||
4
extensions/react-widget/package-lock.json
generated
4
extensions/react-widget/package-lock.json
generated
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "docsgpt",
|
||||
"version": "0.4.1",
|
||||
"version": "0.4.2",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "docsgpt",
|
||||
"version": "0.4.1",
|
||||
"version": "0.4.2",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@babel/plugin-transform-flow-strip-types": "^7.23.3",
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "docsgpt",
|
||||
"version": "0.4.1",
|
||||
"version": "0.4.2",
|
||||
"private": false,
|
||||
"description": "DocsGPT 🦖 is an innovative open-source tool designed to simplify the retrieval of information from project documentation using advanced GPT models 🤖.",
|
||||
"source": "./src/index.html",
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
## chmod +x publish.sh - to upgrade ownership
|
||||
set -e
|
||||
cat package.json >> package_copy.json
|
||||
cat package-lock.json >> package-lock_copy.json
|
||||
publish_package() {
|
||||
PACKAGE_NAME=$1
|
||||
BUILD_COMMAND=$2
|
||||
@@ -24,6 +25,9 @@ publish_package() {
|
||||
|
||||
# Publish to npm
|
||||
npm publish
|
||||
# Clean up
|
||||
mv package_copy.json package.json
|
||||
mv package-lock_copy.json package-lock.json
|
||||
echo "Published ${PACKAGE_NAME}"
|
||||
}
|
||||
|
||||
@@ -33,7 +37,7 @@ publish_package "docsgpt" "build"
|
||||
# Publish docsgpt-react package
|
||||
publish_package "docsgpt-react" "build:react"
|
||||
|
||||
# Clean up
|
||||
mv package_copy.json package.json
|
||||
|
||||
rm -rf package_copy.json
|
||||
rm -rf package-lock_copy.json
|
||||
echo "---Process completed---"
|
||||
4
extensions/react-widget/src/assets/dislike.svg
Normal file
4
extensions/react-widget/src/assets/dislike.svg
Normal file
@@ -0,0 +1,4 @@
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="current" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M6.37776 10.1001V12.9C6.37776 13.457 6.599 13.9911 6.99282 14.3849C7.38664 14.7788 7.92077 15 8.47772 15L11.2777 8.70011V1.00025H3.38181C3.04419 0.996436 2.71656 1.11477 2.45929 1.33344C2.20203 1.55212 2.03246 1.8564 1.98184 2.19023L1.01585 8.49012C0.985398 8.69076 0.998931 8.89563 1.05551 9.09053C1.1121 9.28543 1.21038 9.46569 1.34355 9.61884C1.47671 9.77198 1.64159 9.89434 1.82674 9.97744C2.01189 10.0605 2.2129 10.1024 2.41583 10.1001H6.37776ZM11.2777 1.00025H13.1466C13.5428 0.993247 13.9277 1.13195 14.2284 1.39002C14.5291 1.64809 14.7245 2.00758 14.7776 2.40023V7.30014C14.7245 7.69279 14.5291 8.05227 14.2284 8.31035C13.9277 8.56842 13.5428 8.70712 13.1466 8.70011H11.2777" fill="none"/>
|
||||
<path d="M11.2777 8.70011L8.47772 15C7.92077 15 7.38664 14.7788 6.99282 14.3849C6.599 13.9911 6.37776 13.457 6.37776 12.9V10.1001H2.41583C2.2129 10.1024 2.01189 10.0605 1.82674 9.97744C1.64159 9.89434 1.47671 9.77198 1.34355 9.61884C1.21038 9.46569 1.1121 9.28543 1.05551 9.09053C0.998931 8.89563 0.985398 8.69076 1.01585 8.49012L1.98184 2.19023C2.03246 1.8564 2.20203 1.55212 2.45929 1.33344C2.71656 1.11477 3.04419 0.996436 3.38181 1.00025H11.2777M11.2777 8.70011V1.00025M11.2777 8.70011H13.1466C13.5428 8.70712 13.9277 8.56842 14.2284 8.31035C14.5291 8.05227 14.7245 7.69279 14.7776 7.30014V2.40023C14.7245 2.00758 14.5291 1.64809 14.2284 1.39002C13.9277 1.13195 13.5428 0.993247 13.1466 1.00025H11.2777" stroke="current" stroke-width="1.4" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.6 KiB |
4
extensions/react-widget/src/assets/like.svg
Normal file
4
extensions/react-widget/src/assets/like.svg
Normal file
@@ -0,0 +1,4 @@
|
||||
<svg width="14" height="14" viewBox="0 0 16 16" fill="current" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M9.39995 5.89997V3.09999C9.39995 2.54304 9.1787 2.0089 8.78487 1.61507C8.39105 1.22125 7.85691 1 7.29996 1L4.49998 7.29996V14.9999H12.3959C12.7336 15.0037 13.0612 14.8854 13.3185 14.6667C13.5757 14.448 13.7453 14.1437 13.7959 13.8099L14.7619 7.50996C14.7924 7.30931 14.7788 7.10444 14.7222 6.90954C14.6657 6.71464 14.5674 6.53437 14.4342 6.38123C14.301 6.22808 14.1362 6.10572 13.951 6.02262C13.7659 5.93952 13.5649 5.89767 13.3619 5.89997H9.39995ZM4.49998 14.9999H2.39999C2.02869 14.9999 1.6726 14.8524 1.41005 14.5899C1.1475 14.3273 1 13.9712 1 13.5999V8.69995C1 8.32865 1.1475 7.97256 1.41005 7.71001C1.6726 7.44746 2.02869 7.29996 2.39999 7.29996H4.49998" fill="none"/>
|
||||
<path d="M4.49998 7.29996L7.29996 1C7.85691 1 8.39105 1.22125 8.78487 1.61507C9.1787 2.0089 9.39995 2.54304 9.39995 3.09999V5.89997H13.3619C13.5649 5.89767 13.7659 5.93952 13.951 6.02262C14.1362 6.10572 14.301 6.22808 14.4342 6.38123C14.5674 6.53437 14.6657 6.71464 14.7223 6.90954C14.7788 7.10444 14.7924 7.30931 14.7619 7.50996L13.7959 13.8099C13.7453 14.1437 13.5757 14.448 13.3185 14.6667C13.0612 14.8854 12.7336 15.0037 12.3959 14.9999H4.49998M4.49998 7.29996V14.9999M4.49998 7.29996H2.39999C2.02869 7.29996 1.6726 7.44746 1.41005 7.71001C1.1475 7.97256 1 8.32865 1 8.69995V13.5999C1 13.9712 1.1475 14.3273 1.41005 14.5899C1.6726 14.8524 2.02869 14.9999 2.39999 14.9999H4.49998" stroke="current" stroke-width="1.39999" stroke-linecap="round" stroke-linejoin="round"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 1.5 KiB |
@@ -1,11 +1,13 @@
|
||||
"use client";
|
||||
import React from 'react'
|
||||
import React, { useRef } from 'react'
|
||||
import DOMPurify from 'dompurify';
|
||||
import styled, { keyframes, createGlobalStyle } from 'styled-components';
|
||||
import { PaperPlaneIcon, RocketIcon, ExclamationTriangleIcon, Cross2Icon } from '@radix-ui/react-icons';
|
||||
import { MESSAGE_TYPE, Query, Status, WidgetProps } from '../types/index';
|
||||
import { fetchAnswerStreaming } from '../requests/streamingApi';
|
||||
import { FEEDBACK, MESSAGE_TYPE, Query, Status, WidgetProps } from '../types/index';
|
||||
import { fetchAnswerStreaming, sendFeedback } from '../requests/streamingApi';
|
||||
import { ThemeProvider } from 'styled-components';
|
||||
import Like from "../assets/like.svg"
|
||||
import Dislike from "../assets/dislike.svg"
|
||||
import MarkdownIt from 'markdown-it';
|
||||
const themes = {
|
||||
dark: {
|
||||
@@ -63,6 +65,10 @@ const GlobalStyles = createGlobalStyle`
|
||||
background-color: #646464;
|
||||
color: #fff !important;
|
||||
}
|
||||
.response code {
|
||||
white-space: pre-wrap !important;
|
||||
line-break: loose !important;
|
||||
}
|
||||
`;
|
||||
const Overlay = styled.div`
|
||||
position: fixed;
|
||||
@@ -195,12 +201,24 @@ const Conversation = styled.div<{ size: string }>`
|
||||
width:${props => props.size === 'large' ? '90vw' : props.size === 'medium' ? '60vw' : '400px'} !important;
|
||||
}
|
||||
`;
|
||||
|
||||
const MessageBubble = styled.div<{ type: MESSAGE_TYPE }>`
|
||||
const Feedback = styled.div`
|
||||
background-color: transparent;
|
||||
font-weight: normal;
|
||||
gap: 12px;
|
||||
display: flex;
|
||||
padding: 6px;
|
||||
clear: both;
|
||||
`;
|
||||
const MessageBubble = styled.div<{ type: MESSAGE_TYPE }>`
|
||||
display: block;
|
||||
font-size: 16px;
|
||||
justify-content: ${props => props.type === 'QUESTION' ? 'flex-end' : 'flex-start'};
|
||||
margin: 0.5rem;
|
||||
position: relative;
|
||||
width: 100%;;
|
||||
float: right;
|
||||
margin: 0rem;
|
||||
&:hover ${Feedback} * {
|
||||
visibility: visible !important;
|
||||
}
|
||||
`;
|
||||
const Message = styled.div<{ type: MESSAGE_TYPE }>`
|
||||
background: ${props => props.type === 'QUESTION' ?
|
||||
@@ -208,6 +226,7 @@ const Message = styled.div<{ type: MESSAGE_TYPE }>`
|
||||
props.theme.secondary.bg};
|
||||
color: ${props => props.type === 'ANSWER' ? props.theme.primary.text : '#fff'};
|
||||
border: none;
|
||||
float: ${props => props.type === 'QUESTION' ? 'right' : 'left'};
|
||||
max-width: ${props => props.type === 'ANSWER' ? '100%' : '80'};
|
||||
overflow: auto;
|
||||
margin: 4px;
|
||||
@@ -315,6 +334,7 @@ const HeroDescription = styled.p`
|
||||
font-size: 14px;
|
||||
line-height: 1.5;
|
||||
`;
|
||||
|
||||
const Hero = ({ title, description, theme }: { title: string, description: string, theme: string }) => {
|
||||
return (
|
||||
<>
|
||||
@@ -345,7 +365,8 @@ export const DocsGPTWidget = ({
|
||||
size = 'small',
|
||||
theme = 'dark',
|
||||
buttonIcon = 'https://d3dg1063dc54p9.cloudfront.net/widget/message.svg',
|
||||
buttonBg = 'linear-gradient(to bottom right, #5AF0EC, #E80D9D)'
|
||||
buttonBg = 'linear-gradient(to bottom right, #5AF0EC, #E80D9D)',
|
||||
collectFeedback = true
|
||||
}: WidgetProps) => {
|
||||
const [prompt, setPrompt] = React.useState('');
|
||||
const [status, setStatus] = React.useState<Status>('idle');
|
||||
@@ -353,6 +374,7 @@ export const DocsGPTWidget = ({
|
||||
const [conversationId, setConversationId] = React.useState<string | null>(null)
|
||||
const [open, setOpen] = React.useState<boolean>(false)
|
||||
const [eventInterrupt, setEventInterrupt] = React.useState<boolean>(false); //click or scroll by user while autoScrolling
|
||||
const isBubbleHovered = useRef<boolean>(false)
|
||||
const endMessageRef = React.useRef<HTMLDivElement | null>(null);
|
||||
const md = new MarkdownIt();
|
||||
|
||||
@@ -376,6 +398,36 @@ export const DocsGPTWidget = ({
|
||||
!eventInterrupt && scrollToBottom(endMessageRef.current);
|
||||
}, [queries.length, queries[queries.length - 1]?.response]);
|
||||
|
||||
async function handleFeedback(feedback: FEEDBACK, index: number) {
|
||||
let query = queries[index]
|
||||
if (!query.response)
|
||||
return;
|
||||
if (query.feedback != feedback) {
|
||||
sendFeedback({
|
||||
question: query.prompt,
|
||||
answer: query.response,
|
||||
feedback: feedback,
|
||||
apikey: apiKey
|
||||
}, apiHost)
|
||||
.then(res => {
|
||||
if (res.status == 200) {
|
||||
query.feedback = feedback;
|
||||
setQueries((prev: Query[]) => {
|
||||
return prev.map((q, i) => (i === index ? query : q));
|
||||
});
|
||||
}
|
||||
})
|
||||
.catch(err => console.log("Connection failed",err))
|
||||
}
|
||||
else {
|
||||
delete query.feedback;
|
||||
setQueries((prev: Query[]) => {
|
||||
return prev.map((q, i) => (i === index ? query : q));
|
||||
});
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
async function stream(question: string) {
|
||||
setStatus('loading')
|
||||
try {
|
||||
@@ -473,7 +525,7 @@ export const DocsGPTWidget = ({
|
||||
</MessageBubble>
|
||||
}
|
||||
{
|
||||
query.response ? <MessageBubble type='ANSWER'>
|
||||
query.response ? <MessageBubble onMouseOver={() => { isBubbleHovered.current = true }} type='ANSWER'>
|
||||
<Message
|
||||
type='ANSWER'
|
||||
ref={(index === queries.length - 1) ? endMessageRef : null}
|
||||
@@ -483,6 +535,24 @@ export const DocsGPTWidget = ({
|
||||
dangerouslySetInnerHTML={{ __html: DOMPurify.sanitize(md.render(query.response)) }}
|
||||
/>
|
||||
</Message>
|
||||
|
||||
{collectFeedback &&
|
||||
<Feedback>
|
||||
<Like
|
||||
style={{
|
||||
stroke: query.feedback == 'LIKE' ? '#8860DB' : '#c0c0c0',
|
||||
visibility: query.feedback == 'LIKE' ? 'visible' : 'hidden'
|
||||
}}
|
||||
fill='none'
|
||||
onClick={() => handleFeedback("LIKE", index)} />
|
||||
<Dislike
|
||||
style={{
|
||||
stroke: query.feedback == 'DISLIKE' ? '#ed8085' : '#c0c0c0',
|
||||
visibility: query.feedback == 'DISLIKE' ? 'visible' : 'hidden'
|
||||
}}
|
||||
fill='none'
|
||||
onClick={() => handleFeedback("DISLIKE", index)} />
|
||||
</Feedback>}
|
||||
</MessageBubble>
|
||||
: <div>
|
||||
{
|
||||
@@ -518,7 +588,7 @@ export const DocsGPTWidget = ({
|
||||
type='text' placeholder="What do you want to do?" />
|
||||
<StyledButton
|
||||
size={size}
|
||||
disabled={prompt.length == 0 || status !== 'idle'}>
|
||||
disabled={prompt.trim().length == 0 || status !== 'idle'}>
|
||||
<PaperPlaneIcon width={15} height={15} color='white' />
|
||||
</StyledButton>
|
||||
</PromptContainer>
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import { FEEDBACK } from "@/types";
|
||||
interface HistoryItem {
|
||||
prompt: string;
|
||||
response?: string;
|
||||
@@ -11,6 +12,12 @@ interface FetchAnswerStreamingProps {
|
||||
apiHost?: string;
|
||||
onEvent?: (event: MessageEvent) => void;
|
||||
}
|
||||
interface FeedbackPayload {
|
||||
question: string;
|
||||
answer: string;
|
||||
apikey: string;
|
||||
feedback: FEEDBACK;
|
||||
}
|
||||
export function fetchAnswerStreaming({
|
||||
question = '',
|
||||
apiKey = '',
|
||||
@@ -20,12 +27,12 @@ export function fetchAnswerStreaming({
|
||||
onEvent = () => { console.log("Event triggered, but no handler provided."); }
|
||||
}: FetchAnswerStreamingProps): Promise<void> {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
const body= {
|
||||
const body = {
|
||||
question: question,
|
||||
history: JSON.stringify(history),
|
||||
conversation_id: conversationId,
|
||||
model: 'default',
|
||||
api_key:apiKey
|
||||
api_key: apiKey
|
||||
};
|
||||
fetch(apiHost + '/stream', {
|
||||
method: 'POST',
|
||||
@@ -81,3 +88,19 @@ export function fetchAnswerStreaming({
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
export const sendFeedback = (payload: FeedbackPayload,apiHost:string): Promise<Response> => {
|
||||
return fetch(`${apiHost}/api/feedback`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({
|
||||
question: payload.question,
|
||||
answer: payload.answer,
|
||||
feedback: payload.feedback,
|
||||
api_key:payload.apikey
|
||||
}),
|
||||
});
|
||||
};
|
||||
@@ -23,4 +23,5 @@ export interface WidgetProps {
|
||||
theme?:THEME,
|
||||
buttonIcon?:string;
|
||||
buttonBg?:string;
|
||||
collectFeedback?:boolean
|
||||
}
|
||||
@@ -24,9 +24,9 @@ import ConversationTile from './conversation/ConversationTile';
|
||||
import { useDarkTheme, useMediaQuery, useOutsideAlerter } from './hooks';
|
||||
import useDefaultDocument from './hooks/useDefaultDocument';
|
||||
import DeleteConvModal from './modals/DeleteConvModal';
|
||||
import { ActiveState } from './models/misc';
|
||||
import { ActiveState, Doc } from './models/misc';
|
||||
import APIKeyModal from './preferences/APIKeyModal';
|
||||
import { Doc, getConversations, getDocs } from './preferences/preferenceApi';
|
||||
import { getConversations, getDocs } from './preferences/preferenceApi';
|
||||
import {
|
||||
selectApiKeyStatus,
|
||||
selectConversationId,
|
||||
@@ -124,10 +124,8 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
|
||||
};
|
||||
|
||||
const handleDeleteClick = (doc: Doc) => {
|
||||
const docPath = `indexes/local/${doc.name}`;
|
||||
|
||||
userService
|
||||
.deletePath(docPath)
|
||||
.deletePath(doc.id ?? '')
|
||||
.then(() => {
|
||||
return getDocs();
|
||||
})
|
||||
|
||||
@@ -10,7 +10,7 @@ const endpoints = {
|
||||
DELETE_PROMPT: '/api/delete_prompt',
|
||||
UPDATE_PROMPT: '/api/update_prompt',
|
||||
SINGLE_PROMPT: (id: string) => `/api/get_single_prompt?id=${id}`,
|
||||
DELETE_PATH: (docPath: string) => `/api/delete_old?path=${docPath}`,
|
||||
DELETE_PATH: (docPath: string) => `/api/delete_old?source_id=${docPath}`,
|
||||
TASK_STATUS: (task_id: string) => `/api/task_status?task_id=${task_id}`,
|
||||
MESSAGE_ANALYTICS: '/api/get_message_analytics',
|
||||
TOKEN_ANALYTICS: '/api/get_token_analytics',
|
||||
|
||||
@@ -27,6 +27,7 @@ function Dropdown({
|
||||
| string
|
||||
| { label: string; value: string }
|
||||
| { value: number; description: string }
|
||||
| { name: string; id: string; type: string }
|
||||
| null;
|
||||
onSelect:
|
||||
| ((value: string) => void)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import React from 'react';
|
||||
import Trash from '../assets/trash.svg';
|
||||
import Arrow2 from '../assets/dropdown-arrow.svg';
|
||||
import { Doc } from '../preferences/preferenceApi';
|
||||
import { Doc } from '../models/misc';
|
||||
import { useDispatch } from 'react-redux';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
type Props = {
|
||||
@@ -63,9 +63,6 @@ function SourceDropdown({
|
||||
<p className="max-w-3/4 truncate whitespace-nowrap">
|
||||
{selectedDocs?.name || 'None'}
|
||||
</p>
|
||||
<p className="flex flex-col items-center justify-center">
|
||||
{selectedDocs?.version}
|
||||
</p>
|
||||
</div>
|
||||
</span>
|
||||
<img
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { Fragment, useEffect, useRef, useState } from 'react';
|
||||
import { Fragment, useEffect, useRef, useState } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
|
||||
@@ -30,7 +30,7 @@ export default function Conversation() {
|
||||
const status = useSelector(selectStatus);
|
||||
const conversationId = useSelector(selectConversationId);
|
||||
const dispatch = useDispatch<AppDispatch>();
|
||||
const endMessageRef = useRef<HTMLDivElement>(null);
|
||||
const conversationRef = useRef<HTMLDivElement>(null);
|
||||
const inputRef = useRef<HTMLTextAreaElement>(null);
|
||||
const [isDarkTheme] = useDarkTheme();
|
||||
const [hasScrolledToLast, setHasScrolledToLast] = useState(true);
|
||||
@@ -58,26 +58,6 @@ export default function Conversation() {
|
||||
fetchStream.current && fetchStream.current.abort();
|
||||
}, [conversationId]);
|
||||
|
||||
useEffect(() => {
|
||||
const observerCallback: IntersectionObserverCallback = (entries) => {
|
||||
entries.forEach((entry) => {
|
||||
setHasScrolledToLast(entry.isIntersecting);
|
||||
});
|
||||
};
|
||||
|
||||
const observer = new IntersectionObserver(observerCallback, {
|
||||
root: null,
|
||||
threshold: [1, 0.8],
|
||||
});
|
||||
if (endMessageRef.current) {
|
||||
observer.observe(endMessageRef.current);
|
||||
}
|
||||
|
||||
return () => {
|
||||
observer.disconnect();
|
||||
};
|
||||
}, [endMessageRef.current]);
|
||||
|
||||
useEffect(() => {
|
||||
if (queries.length) {
|
||||
queries[queries.length - 1].error && setLastQueryReturnedErr(true);
|
||||
@@ -86,10 +66,16 @@ export default function Conversation() {
|
||||
}, [queries[queries.length - 1]]);
|
||||
|
||||
const scrollIntoView = () => {
|
||||
endMessageRef?.current?.scrollIntoView({
|
||||
if (!conversationRef?.current || eventInterrupt) return;
|
||||
|
||||
if (status === 'idle' || !queries[queries.length - 1].response) {
|
||||
conversationRef.current.scrollTo({
|
||||
behavior: 'smooth',
|
||||
block: 'start',
|
||||
top: conversationRef.current.scrollHeight,
|
||||
});
|
||||
} else {
|
||||
conversationRef.current.scrollTop = conversationRef.current.scrollHeight;
|
||||
}
|
||||
};
|
||||
|
||||
const handleQuestion = ({
|
||||
@@ -143,7 +129,6 @@ export default function Conversation() {
|
||||
if (query.response) {
|
||||
responseView = (
|
||||
<ConversationBubble
|
||||
ref={endMessageRef}
|
||||
className={`${index === queries.length - 1 ? 'mb-32' : 'mb-7'}`}
|
||||
key={`${index}ANSWER`}
|
||||
message={query.response}
|
||||
@@ -176,7 +161,6 @@ export default function Conversation() {
|
||||
);
|
||||
responseView = (
|
||||
<ConversationBubble
|
||||
ref={endMessageRef}
|
||||
className={`${index === queries.length - 1 ? 'mb-32' : 'mb-7'} `}
|
||||
key={`${index}ERROR`}
|
||||
message={query.error}
|
||||
@@ -234,6 +218,7 @@ export default function Conversation() {
|
||||
</>
|
||||
)}
|
||||
<div
|
||||
ref={conversationRef}
|
||||
onWheel={handleUserInterruption}
|
||||
onTouchMove={handleUserInterruption}
|
||||
className="flex h-[90%] w-full flex-1 justify-center overflow-y-auto p-4 md:h-[83vh]"
|
||||
|
||||
@@ -250,7 +250,10 @@ const ConversationBubble = forwardRef<
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<code className={className ? className : ''} {...props}>
|
||||
<code
|
||||
className={className ? className : 'whitespace-pre-line'}
|
||||
{...props}
|
||||
>
|
||||
{children}
|
||||
</code>
|
||||
);
|
||||
|
||||
@@ -1,32 +1,6 @@
|
||||
import conversationService from '../api/services/conversationService';
|
||||
import { Doc } from '../preferences/preferenceApi';
|
||||
import { Answer, FEEDBACK } from './conversationModels';
|
||||
|
||||
function getDocPath(selectedDocs: Doc | null): string {
|
||||
let docPath = 'default';
|
||||
if (selectedDocs) {
|
||||
let namePath = selectedDocs.name;
|
||||
if (selectedDocs.language === namePath) {
|
||||
namePath = '.project';
|
||||
}
|
||||
if (selectedDocs.location === 'local') {
|
||||
docPath = 'local' + '/' + selectedDocs.name + '/';
|
||||
} else if (selectedDocs.location === 'remote') {
|
||||
docPath =
|
||||
selectedDocs.language +
|
||||
'/' +
|
||||
namePath +
|
||||
'/' +
|
||||
selectedDocs.version +
|
||||
'/' +
|
||||
selectedDocs.model +
|
||||
'/';
|
||||
} else if (selectedDocs.location === 'custom') {
|
||||
docPath = selectedDocs.docLink;
|
||||
}
|
||||
}
|
||||
return docPath;
|
||||
}
|
||||
import { Doc } from '../models/misc';
|
||||
import { Answer, FEEDBACK, RetrievalPayload } from './conversationModels';
|
||||
|
||||
export function handleFetchAnswer(
|
||||
question: string,
|
||||
@@ -54,23 +28,22 @@ export function handleFetchAnswer(
|
||||
title: any;
|
||||
}
|
||||
> {
|
||||
const docPath = getDocPath(selectedDocs);
|
||||
history = history.map((item) => {
|
||||
return { prompt: item.prompt, response: item.response };
|
||||
});
|
||||
return conversationService
|
||||
.answer(
|
||||
{
|
||||
const payload: RetrievalPayload = {
|
||||
question: question,
|
||||
history: history,
|
||||
active_docs: docPath,
|
||||
history: JSON.stringify(history),
|
||||
conversation_id: conversationId,
|
||||
prompt_id: promptId,
|
||||
chunks: chunks,
|
||||
token_limit: token_limit,
|
||||
},
|
||||
signal,
|
||||
)
|
||||
};
|
||||
if (selectedDocs && 'id' in selectedDocs)
|
||||
payload.active_docs = selectedDocs.id as string;
|
||||
payload.retriever = selectedDocs?.retriever as string;
|
||||
return conversationService
|
||||
.answer(payload, signal)
|
||||
.then((response) => {
|
||||
if (response.ok) {
|
||||
return response.json();
|
||||
@@ -101,16 +74,27 @@ export function handleFetchAnswerSteaming(
|
||||
token_limit: number,
|
||||
onEvent: (event: MessageEvent) => void,
|
||||
): Promise<Answer> {
|
||||
const docPath = getDocPath(selectedDocs);
|
||||
history = history.map((item) => {
|
||||
return { prompt: item.prompt, response: item.response };
|
||||
});
|
||||
const payload: RetrievalPayload = {
|
||||
question: question,
|
||||
history: JSON.stringify(history),
|
||||
conversation_id: conversationId,
|
||||
prompt_id: promptId,
|
||||
chunks: chunks,
|
||||
token_limit: token_limit,
|
||||
};
|
||||
if (selectedDocs && 'id' in selectedDocs)
|
||||
payload.active_docs = selectedDocs.id as string;
|
||||
payload.retriever = selectedDocs?.retriever as string;
|
||||
|
||||
return new Promise<Answer>((resolve, reject) => {
|
||||
conversationService
|
||||
.answerStream(
|
||||
{
|
||||
question: question,
|
||||
active_docs: docPath,
|
||||
active_docs: selectedDocs?.id as string,
|
||||
history: JSON.stringify(history),
|
||||
conversation_id: conversationId,
|
||||
prompt_id: promptId,
|
||||
@@ -176,11 +160,23 @@ export function handleSearch(
|
||||
chunks: string,
|
||||
token_limit: number,
|
||||
) {
|
||||
const docPath = getDocPath(selectedDocs);
|
||||
history = history.map((item) => {
|
||||
return { prompt: item.prompt, response: item.response };
|
||||
});
|
||||
const payload: RetrievalPayload = {
|
||||
question: question,
|
||||
history: JSON.stringify(history),
|
||||
conversation_id: conversation_id,
|
||||
chunks: chunks,
|
||||
token_limit: token_limit,
|
||||
};
|
||||
if (selectedDocs && 'id' in selectedDocs)
|
||||
payload.active_docs = selectedDocs.id as string;
|
||||
payload.retriever = selectedDocs?.retriever as string;
|
||||
return conversationService
|
||||
.search({
|
||||
question: question,
|
||||
active_docs: docPath,
|
||||
active_docs: selectedDocs?.id as string,
|
||||
conversation_id,
|
||||
history,
|
||||
chunks: chunks,
|
||||
|
||||
@@ -31,3 +31,13 @@ export interface Query {
|
||||
conversationId?: string | null;
|
||||
title?: string | null;
|
||||
}
|
||||
export interface RetrievalPayload {
|
||||
question: string;
|
||||
active_docs?: string;
|
||||
retriever?: string;
|
||||
history: string;
|
||||
conversation_id: string | null;
|
||||
prompt_id?: string | null;
|
||||
chunks: string;
|
||||
token_limit: number;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import React from 'react';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
|
||||
import { Doc, getDocs } from '../preferences/preferenceApi';
|
||||
import { getDocs } from '../preferences/preferenceApi';
|
||||
import { Doc } from '../models/misc';
|
||||
import {
|
||||
selectSelectedDocs,
|
||||
setSelectedDocs,
|
||||
|
||||
@@ -22,8 +22,9 @@ export default function CreateAPIKeyModal({
|
||||
|
||||
const [APIKeyName, setAPIKeyName] = React.useState<string>('');
|
||||
const [sourcePath, setSourcePath] = React.useState<{
|
||||
label: string;
|
||||
value: string;
|
||||
name: string;
|
||||
id: string;
|
||||
type: string;
|
||||
} | null>(null);
|
||||
const [prompt, setPrompt] = React.useState<{
|
||||
name: string;
|
||||
@@ -41,27 +42,17 @@ export default function CreateAPIKeyModal({
|
||||
? docs
|
||||
.filter((doc) => doc.model === embeddingsName)
|
||||
.map((doc: Doc) => {
|
||||
let namePath = doc.name;
|
||||
if (doc.language === namePath) {
|
||||
namePath = '.project';
|
||||
}
|
||||
let docPath = 'default';
|
||||
if (doc.location === 'local') {
|
||||
docPath = 'local' + '/' + doc.name + '/';
|
||||
} else if (doc.location === 'remote') {
|
||||
docPath =
|
||||
doc.language +
|
||||
'/' +
|
||||
namePath +
|
||||
'/' +
|
||||
doc.version +
|
||||
'/' +
|
||||
doc.model +
|
||||
'/';
|
||||
if ('id' in doc) {
|
||||
return {
|
||||
name: doc.name,
|
||||
id: doc.id as string,
|
||||
type: 'local',
|
||||
};
|
||||
}
|
||||
return {
|
||||
label: doc.name,
|
||||
value: docPath,
|
||||
name: doc.name,
|
||||
id: doc.id ?? 'default',
|
||||
type: doc.type ?? 'default',
|
||||
};
|
||||
})
|
||||
: [];
|
||||
@@ -107,9 +98,14 @@ export default function CreateAPIKeyModal({
|
||||
<Dropdown
|
||||
placeholder={t('modals.createAPIKey.sourceDoc')}
|
||||
selectedValue={sourcePath}
|
||||
onSelect={(selection: { label: string; value: string }) =>
|
||||
setSourcePath(selection)
|
||||
}
|
||||
onSelect={(selection: {
|
||||
name: string;
|
||||
id: string;
|
||||
type: string;
|
||||
}) => {
|
||||
setSourcePath(selection);
|
||||
console.log(selection);
|
||||
}}
|
||||
options={extractDocPaths()}
|
||||
size="w-full"
|
||||
rounded="xl"
|
||||
@@ -142,16 +138,22 @@ export default function CreateAPIKeyModal({
|
||||
</div>
|
||||
<button
|
||||
disabled={!sourcePath || APIKeyName.length === 0 || !prompt}
|
||||
onClick={() =>
|
||||
sourcePath &&
|
||||
prompt &&
|
||||
createAPIKey({
|
||||
onClick={() => {
|
||||
if (sourcePath && prompt) {
|
||||
const payload: any = {
|
||||
name: APIKeyName,
|
||||
source: sourcePath.value,
|
||||
prompt_id: prompt.id,
|
||||
chunks: chunk,
|
||||
})
|
||||
};
|
||||
if (sourcePath.type === 'default') {
|
||||
payload.retriever = sourcePath.id;
|
||||
}
|
||||
if (sourcePath.type === 'local') {
|
||||
payload.source = sourcePath.id;
|
||||
}
|
||||
createAPIKey(payload);
|
||||
}
|
||||
}}
|
||||
className="float-right mt-4 rounded-full bg-purple-30 px-5 py-2 text-sm text-white hover:bg-[#6F3FD1] disabled:opacity-50"
|
||||
>
|
||||
{t('modals.createAPIKey.create')}
|
||||
|
||||
@@ -46,27 +46,9 @@ export const ShareConversationModal = ({
|
||||
? docs
|
||||
.filter((doc) => doc.model === embeddingsName)
|
||||
.map((doc: Doc) => {
|
||||
let namePath = doc.name;
|
||||
if (doc.language === namePath) {
|
||||
namePath = '.project';
|
||||
}
|
||||
let docPath = 'default';
|
||||
if (doc.location === 'local') {
|
||||
docPath = 'local' + '/' + doc.name + '/';
|
||||
} else if (doc.location === 'remote') {
|
||||
docPath =
|
||||
doc.language +
|
||||
'/' +
|
||||
namePath +
|
||||
'/' +
|
||||
doc.version +
|
||||
'/' +
|
||||
doc.model +
|
||||
'/';
|
||||
}
|
||||
return {
|
||||
label: doc.name,
|
||||
value: docPath,
|
||||
value: doc.id ?? 'default',
|
||||
};
|
||||
})
|
||||
: [];
|
||||
|
||||
@@ -4,16 +4,13 @@ export type User = {
|
||||
avatar: string;
|
||||
};
|
||||
export type Doc = {
|
||||
location: string;
|
||||
id?: string;
|
||||
name: string;
|
||||
language: string;
|
||||
version: string;
|
||||
description: string;
|
||||
fullName: string;
|
||||
date: string;
|
||||
docLink: string;
|
||||
model: string;
|
||||
tokens?: string;
|
||||
type?: string;
|
||||
retriever?: string;
|
||||
};
|
||||
|
||||
export type PromptProps = {
|
||||
|
||||
@@ -1,18 +1,6 @@
|
||||
import conversationService from '../api/services/conversationService';
|
||||
import userService from '../api/services/userService';
|
||||
|
||||
// not all properties in Doc are going to be present. Make some optional
|
||||
export type Doc = {
|
||||
location: string;
|
||||
name: string;
|
||||
language: string;
|
||||
version: string;
|
||||
description: string;
|
||||
fullName: string;
|
||||
date: string;
|
||||
docLink: string;
|
||||
model: string;
|
||||
};
|
||||
import { Doc } from '../models/misc';
|
||||
|
||||
//Fetches all JSON objects from the source. We only use the objects with the "model" property in SelectDocsModal.tsx. Hopefully can clean up the source file later.
|
||||
export async function getDocs(): Promise<Doc[] | null> {
|
||||
@@ -78,17 +66,10 @@ export function setLocalPrompt(prompt: string): void {
|
||||
|
||||
export function setLocalRecentDocs(doc: Doc | null): void {
|
||||
localStorage.setItem('DocsGPTRecentDocs', JSON.stringify(doc));
|
||||
let namePath = doc?.name;
|
||||
if (doc?.language === namePath) {
|
||||
namePath = '.project';
|
||||
}
|
||||
|
||||
let docPath = 'default';
|
||||
if (doc?.location === 'local') {
|
||||
if (doc?.type === 'local') {
|
||||
docPath = 'local' + '/' + doc.name + '/';
|
||||
} else if (doc?.location === 'remote') {
|
||||
docPath =
|
||||
doc.language + '/' + namePath + '/' + doc.version + '/' + doc.model + '/';
|
||||
}
|
||||
userService
|
||||
.checkDocs({
|
||||
|
||||
@@ -4,9 +4,9 @@ import {
|
||||
createSlice,
|
||||
isAnyOf,
|
||||
} from '@reduxjs/toolkit';
|
||||
import { Doc, setLocalApiKey, setLocalRecentDocs } from './preferenceApi';
|
||||
import { setLocalApiKey, setLocalRecentDocs } from './preferenceApi';
|
||||
import { RootState } from '../store';
|
||||
import { ActiveState } from '../models/misc';
|
||||
import { ActiveState, Doc } from '../models/misc';
|
||||
|
||||
interface Preference {
|
||||
apiKey: string;
|
||||
@@ -25,15 +25,13 @@ const initialState: Preference = {
|
||||
chunks: '2',
|
||||
token_limit: 2000,
|
||||
selectedDocs: {
|
||||
id: 'default',
|
||||
name: 'default',
|
||||
language: 'default',
|
||||
location: 'default',
|
||||
version: 'default',
|
||||
description: 'default',
|
||||
fullName: 'default',
|
||||
type: 'remote',
|
||||
date: 'default',
|
||||
docLink: 'default',
|
||||
model: 'openai_text-embedding-ada-002',
|
||||
retriever: 'classic',
|
||||
} as Doc,
|
||||
sourceDocs: null,
|
||||
conversations: null,
|
||||
|
||||
@@ -47,7 +47,8 @@ export default function APIKeys() {
|
||||
|
||||
const handleCreateKey = (payload: {
|
||||
name: string;
|
||||
source: string;
|
||||
source?: string;
|
||||
retriever?: string;
|
||||
prompt_id: string;
|
||||
chunks: string;
|
||||
}) => {
|
||||
|
||||
@@ -61,12 +61,10 @@ const Documents: React.FC<DocumentsProps> = ({
|
||||
{document.tokens ? formatTokens(+document.tokens) : ''}
|
||||
</td>
|
||||
<td className="border-r border-t px-4 py-2">
|
||||
{document.location === 'remote'
|
||||
? 'Pre-loaded'
|
||||
: 'Private'}
|
||||
{document.type === 'remote' ? 'Pre-loaded' : 'Private'}
|
||||
</td>
|
||||
<td className="border-t px-4 py-2">
|
||||
{document.location !== 'remote' && (
|
||||
{document.type !== 'remote' && (
|
||||
<img
|
||||
src={Trash}
|
||||
alt="Delete"
|
||||
|
||||
@@ -6,7 +6,7 @@ import userService from '../api/services/userService';
|
||||
import ArrowLeft from '../assets/arrow-left.svg';
|
||||
import ArrowRight from '../assets/arrow-right.svg';
|
||||
import i18n from '../locale/i18n';
|
||||
import { Doc } from '../preferences/preferenceApi';
|
||||
import { Doc } from '../models/misc';
|
||||
import {
|
||||
selectSourceDocs,
|
||||
setSourceDocs,
|
||||
@@ -39,9 +39,8 @@ export default function Settings() {
|
||||
};
|
||||
|
||||
const handleDeleteClick = (index: number, doc: Doc) => {
|
||||
const docPath = 'indexes/' + 'local' + '/' + doc.name;
|
||||
userService
|
||||
.deletePath(docPath)
|
||||
.deletePath(doc.id ?? '')
|
||||
.then((response) => {
|
||||
if (response.ok && documents) {
|
||||
const updatedDocuments = [
|
||||
|
||||
@@ -26,15 +26,12 @@ const store = configureStore({
|
||||
conversations: null,
|
||||
sourceDocs: [
|
||||
{
|
||||
location: '',
|
||||
language: '',
|
||||
name: 'default',
|
||||
version: '',
|
||||
date: '',
|
||||
description: '',
|
||||
docLink: '',
|
||||
fullName: '',
|
||||
model: '1.0',
|
||||
type: 'remote',
|
||||
id: 'default',
|
||||
retriever: 'clasic',
|
||||
},
|
||||
],
|
||||
modalState: 'INACTIVE',
|
||||
|
||||
@@ -120,7 +120,7 @@ function Upload({
|
||||
dispatch(setSourceDocs(data));
|
||||
dispatch(
|
||||
setSelectedDocs(
|
||||
data?.find((d) => d.location.toLowerCase() === 'local'),
|
||||
data?.find((d) => d.type?.toLowerCase() === 'local'),
|
||||
),
|
||||
);
|
||||
});
|
||||
@@ -137,7 +137,7 @@ function Upload({
|
||||
dispatch(setSourceDocs(data));
|
||||
dispatch(
|
||||
setSelectedDocs(
|
||||
data?.find((d) => d.location.toLowerCase() === 'local'),
|
||||
data?.find((d) => d.type?.toLowerCase() === 'local'),
|
||||
),
|
||||
);
|
||||
});
|
||||
|
||||
1448
mock-backend/package-lock.json
generated
1448
mock-backend/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -13,7 +13,7 @@
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"cors": "^2.8.5",
|
||||
"json-server": "^0.17.4",
|
||||
"json-server": "^1.0.0-beta.2",
|
||||
"uuid": "^9.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
55
scripts/migrate_to_v1_vectorstore.py
Normal file
55
scripts/migrate_to_v1_vectorstore.py
Normal file
@@ -0,0 +1,55 @@
|
||||
import pymongo
|
||||
import os
|
||||
|
||||
def migrate_to_v1_vectorstore_mongo():
|
||||
client = pymongo.MongoClient("mongodb://localhost:27017/")
|
||||
db = client["docsgpt"]
|
||||
vectors_collection = db["vectors"]
|
||||
sources_collection = db["sources"]
|
||||
|
||||
for vector in vectors_collection.find():
|
||||
if "location" in vector:
|
||||
del vector["location"]
|
||||
if "retriever" not in vector:
|
||||
vector["retriever"] = "classic"
|
||||
vector["remote_data"] = None
|
||||
vectors_collection.update_one({"_id": vector["_id"]}, {"$set": vector})
|
||||
|
||||
# move data from vectors_collection to sources_collection
|
||||
for vector in vectors_collection.find():
|
||||
sources_collection.insert_one(vector)
|
||||
|
||||
vectors_collection.drop()
|
||||
|
||||
client.close()
|
||||
|
||||
def migrate_faiss_to_v1_vectorstore():
|
||||
client = pymongo.MongoClient("mongodb://localhost:27017/")
|
||||
db = client["docsgpt"]
|
||||
vectors_collection = db["vectors"]
|
||||
|
||||
for vector in vectors_collection.find():
|
||||
old_path = f"./application/indexes/{vector['user']}/{vector['name']}"
|
||||
new_path = f"./application/indexes/{vector['_id']}"
|
||||
try:
|
||||
os.rename(old_path, new_path)
|
||||
except OSError as e:
|
||||
print(f"Error moving {old_path} to {new_path}: {e}")
|
||||
|
||||
client.close()
|
||||
|
||||
def migrate_mongo_atlas_vector_to_v1_vectorstore():
|
||||
client = pymongo.MongoClient("mongodb+srv://<username>:<password>@<cluster>/<dbname>?retryWrites=true&w=majority")
|
||||
db = client["docsgpt"]
|
||||
vectors_collection = db["vectors"]
|
||||
|
||||
# mongodb atlas collection
|
||||
documents_collection = db["documents"]
|
||||
|
||||
for vector in vectors_collection.find():
|
||||
documents_collection.update_many({"store": vector["user"] + "/" + vector["name"]}, {"$set": {"source_id": str(vector["_id"])}})
|
||||
|
||||
client.close()
|
||||
|
||||
migrate_faiss_to_v1_vectorstore()
|
||||
migrate_to_v1_vectorstore_mongo()
|
||||
Reference in New Issue
Block a user