purge unwanted linting

This commit is contained in:
ManishMadan2882
2024-09-10 16:26:13 +05:30
parent 37ab5f9d7a
commit e318228a08
2 changed files with 81 additions and 102 deletions

View File

@@ -2,8 +2,6 @@ import os
import uuid
import shutil
from flask import Blueprint, request, jsonify
from urllib.parse import urlparse
import requests
from pymongo import MongoClient
from bson.objectid import ObjectId
from bson.binary import Binary, UuidRepresentation
@@ -17,7 +15,7 @@ from application.vectorstore.vector_creator import VectorCreator
mongo = MongoClient(settings.MONGO_URI)
db = mongo["docsgpt"]
conversations_collection = db["conversations"]
vectors_collection = db["vectors"]
sources_collection = db["sources"]
prompts_collection = db["prompts"]
feedback_collection = db["feedback"]
api_key_collection = db["api_keys"]
@@ -102,7 +100,7 @@ def delete_by_ids():
return {"status": "error"}
if settings.VECTOR_STORE == "faiss":
result = vectors_collection.delete_index(ids=ids)
result = sources_collection.delete_index(ids=ids)
if result:
return {"status": "ok"}
return {"status": "error"}
@@ -112,26 +110,24 @@ def delete_by_ids():
def delete_old():
"""Delete old indexes."""
import shutil
path = request.args.get("path")
dirs = path.split("/")
dirs_clean = []
for i in range(0, len(dirs)):
dirs_clean.append(secure_filename(dirs[i]))
# check that path strats with indexes or vectors
if dirs_clean[0] not in ["indexes", "vectors"]:
return {"status": "error"}
path_clean = "/".join(dirs_clean)
vectors_collection.delete_one({"name": dirs_clean[-1], "user": dirs_clean[-2]})
source_id = request.args.get("source_id")
doc = sources_collection.find_one({
"_id": ObjectId(source_id),
"user": "local",
})
if(doc is None):
return {"status":"not found"},404
if settings.VECTOR_STORE == "faiss":
try:
shutil.rmtree(os.path.join(current_dir, path_clean))
shutil.rmtree(os.path.join(current_dir, str(doc["_id"])))
except FileNotFoundError:
pass
else:
vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean))
vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, source_id=str(doc["_id"]))
vetorstore.delete_index()
sources_collection.delete_one({
"_id": ObjectId(source_id),
})
return {"status": "ok"}
@@ -234,52 +230,36 @@ def combined_json():
data = [
{
"name": "default",
"language": "default",
"version": "",
"description": "default",
"fullName": "default",
"date": "default",
"docLink": "default",
"model": settings.EMBEDDINGS_NAME,
"location": "remote",
"tokens": "",
"retriever": "classic",
}
]
# structure: name, language, version, description, fullName, date, docLink
# append data from vectors_collection in sorted order in descending order of date
for index in vectors_collection.find({"user": user}).sort("date", -1):
# append data from sources_collection in sorted order in descending order of date
for index in sources_collection.find({"user": user}).sort("date", -1):
data.append(
{
"id": str(index["_id"]),
"name": index["name"],
"language": index["language"],
"version": "",
"description": index["name"],
"fullName": index["name"],
"date": index["date"],
"docLink": index["location"],
"model": settings.EMBEDDINGS_NAME,
"location": "local",
"tokens": index["tokens"] if ("tokens" in index.keys()) else "",
"retriever": index["retriever"] if ("retriever" in index.keys()) else "classic",
}
)
if settings.VECTOR_STORE == "faiss":
data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
for index in data_remote:
index["location"] = "remote"
data.append(index)
if "duckduck_search" in settings.RETRIEVERS_ENABLED:
data.append(
{
"name": "DuckDuckGo Search",
"language": "en",
"version": "",
"description": "duckduck_search",
"fullName": "DuckDuckGo Search",
"date": "duckduck_search",
"docLink": "duckduck_search",
"model": settings.EMBEDDINGS_NAME,
"location": "custom",
"tokens": "",
"retriever": "duckduck_search",
}
)
if "brave_search" in settings.RETRIEVERS_ENABLED:
@@ -287,14 +267,11 @@ def combined_json():
{
"name": "Brave Search",
"language": "en",
"version": "",
"description": "brave_search",
"fullName": "Brave Search",
"date": "brave_search",
"docLink": "brave_search",
"model": settings.EMBEDDINGS_NAME,
"location": "custom",
"tokens": "",
"retriever": "brave_search",
}
)
@@ -303,39 +280,13 @@ def combined_json():
@user.route("/api/docs_check", methods=["POST"])
def check_docs():
# check if docs exist in a vectorstore folder
data = request.get_json()
# split docs on / and take first part
if data["docs"].split("/")[0] == "local":
return {"status": "exists"}
vectorstore = "vectors/" + secure_filename(data["docs"])
base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"
if os.path.exists(vectorstore) or data["docs"] == "default":
return {"status": "exists"}
else:
file_url = urlparse(base_path + vectorstore + "index.faiss")
if (
file_url.scheme in ["https"]
and file_url.netloc == "raw.githubusercontent.com"
and file_url.path.startswith("/arc53/DocsHUB/main/")
):
r = requests.get(file_url.geturl())
if r.status_code != 200:
return {"status": "null"}
else:
if not os.path.exists(vectorstore):
os.makedirs(vectorstore)
with open(vectorstore + "index.faiss", "wb") as f:
f.write(r.content)
r = requests.get(base_path + vectorstore + "index.pkl")
with open(vectorstore + "index.pkl", "wb") as f:
f.write(r.content)
else:
return {"status": "null"}
return {"status": "loaded"}
return {"status": "not found"}
@user.route("/api/create_prompt", methods=["POST"])
@@ -422,12 +373,23 @@ def get_api_keys():
keys = api_key_collection.find({"user": user})
list_keys = []
for key in keys:
if "source" in key and isinstance(key["source"],DBRef):
source = db.dereference(key["source"])
if source is None:
continue
else:
source_name = source["name"]
elif "retriever" in key:
source_name = key["retriever"]
else:
continue
list_keys.append(
{
"id": str(key["_id"]),
"name": key["name"],
"key": key["key"][:4] + "..." + key["key"][-4:],
"source": key["source"],
"source": source_name,
"prompt_id": key["prompt_id"],
"chunks": key["chunks"],
}
@@ -439,21 +401,22 @@ def get_api_keys():
def create_api_key():
data = request.get_json()
name = data["name"]
source = data["source"]
prompt_id = data["prompt_id"]
chunks = data["chunks"]
key = str(uuid.uuid4())
user = "local"
resp = api_key_collection.insert_one(
{
"name": name,
"key": key,
"source": source,
"user": user,
"prompt_id": prompt_id,
"chunks": chunks,
}
)
new_api_key = {
"name": name,
"key": key,
"user": user,
"prompt_id": prompt_id,
"chunks": chunks,
}
if "source" in data and ObjectId.is_valid(data["source"]):
new_api_key["source"] = DBRef("sources", ObjectId(data["source"]))
if "retriever" in data:
new_api_key["retriever"] = data["retriever"]
resp = api_key_collection.insert_one(new_api_key)
new_id = str(resp.inserted_id)
return {"id": new_id, "key": key}
@@ -481,26 +444,31 @@ def share_conversation():
isPromptable = request.args.get("isPromptable").lower() == "true"
conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
if(conversation is None):
raise Exception("Conversation does not exist")
current_n_queries = len(conversation["queries"])
##generate binary representation of uuid
explicit_binary = Binary.from_uuid(uuid.uuid4(), UuidRepresentation.STANDARD)
if isPromptable:
source = "default" if "source" not in data else data["source"]
prompt_id = "default" if "prompt_id" not in data else data["prompt_id"]
chunks = "2" if "chunks" not in data else data["chunks"]
name = conversation["name"] + "(shared)"
pre_existing_api_document = api_key_collection.find_one(
{
new_api_key_data = {
"prompt_id": prompt_id,
"chunks": chunks,
"source": source,
"user": user,
}
if "source" in data and ObjectId.is_valid(data["source"]):
new_api_key_data["source"] = DBRef("sources",ObjectId(data["source"]))
elif "retriever" in data:
new_api_key_data["retriever"] = data["retriever"]
pre_existing_api_document = api_key_collection.find_one(
new_api_key_data
)
api_uuid = str(uuid.uuid4())
if pre_existing_api_document:
api_uuid = pre_existing_api_document["key"]
pre_existing = shared_conversations_collections.find_one(
@@ -538,17 +506,16 @@ def share_conversation():
)
return jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())})
else:
api_key_collection.insert_one(
{
"name": name,
"key": api_uuid,
"source": source,
"user": user,
"prompt_id": prompt_id,
"chunks": chunks,
}
)
shared_conversations_collections.insert_one(
api_uuid = str(uuid.uuid4())
new_api_key_data["key"] = api_uuid
new_api_key_data["name"] = name
if "source" in data and ObjectId.is_valid(data["source"]):
new_api_key_data["source"] = DBRef("sources", ObjectId(data["source"]))
if "retriever" in data:
new_api_key_data["retriever"] = data["retriever"]
api_key_collection.insert_one(new_api_key_data)
shared_conversations_collections.insert_one(
{
"uuid": explicit_binary,
"conversation_id": {
@@ -560,7 +527,7 @@ def share_conversation():
"user": user,
"api_key": api_uuid,
}
)
)
## Identifier as route parameter in frontend
return (
jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())}),