diff --git a/application/api/user/routes.py b/application/api/user/routes.py index c49b43fd..91e343fc 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -2,8 +2,6 @@ import os import uuid import shutil from flask import Blueprint, request, jsonify -from urllib.parse import urlparse -import requests from pymongo import MongoClient from bson.objectid import ObjectId from bson.binary import Binary, UuidRepresentation @@ -17,7 +15,7 @@ from application.vectorstore.vector_creator import VectorCreator mongo = MongoClient(settings.MONGO_URI) db = mongo["docsgpt"] conversations_collection = db["conversations"] -vectors_collection = db["vectors"] +sources_collection = db["sources"] prompts_collection = db["prompts"] feedback_collection = db["feedback"] api_key_collection = db["api_keys"] @@ -102,7 +100,7 @@ def delete_by_ids(): return {"status": "error"} if settings.VECTOR_STORE == "faiss": - result = vectors_collection.delete_index(ids=ids) + result = sources_collection.delete_index(ids=ids) if result: return {"status": "ok"} return {"status": "error"} @@ -112,26 +110,24 @@ def delete_by_ids(): def delete_old(): """Delete old indexes.""" import shutil - - path = request.args.get("path") - dirs = path.split("/") - dirs_clean = [] - for i in range(0, len(dirs)): - dirs_clean.append(secure_filename(dirs[i])) - # check that path strats with indexes or vectors - - if dirs_clean[0] not in ["indexes", "vectors"]: - return {"status": "error"} - path_clean = "/".join(dirs_clean) - vectors_collection.delete_one({"name": dirs_clean[-1], "user": dirs_clean[-2]}) + source_id = request.args.get("source_id") + doc = sources_collection.find_one({ + "_id": ObjectId(source_id), + "user": "local", + }) + if(doc is None): + return {"status":"not found"},404 if settings.VECTOR_STORE == "faiss": try: - shutil.rmtree(os.path.join(current_dir, path_clean)) + shutil.rmtree(os.path.join(current_dir, str(doc["_id"]))) except FileNotFoundError: pass else: - vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)) + vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, source_id=str(doc["_id"])) vetorstore.delete_index() + sources_collection.delete_one({ + "_id": ObjectId(source_id), + }) return {"status": "ok"} @@ -234,52 +230,36 @@ def combined_json(): data = [ { "name": "default", - "language": "default", - "version": "", - "description": "default", - "fullName": "default", "date": "default", - "docLink": "default", "model": settings.EMBEDDINGS_NAME, "location": "remote", "tokens": "", + "retriever": "classic", } ] # structure: name, language, version, description, fullName, date, docLink - # append data from vectors_collection in sorted order in descending order of date - for index in vectors_collection.find({"user": user}).sort("date", -1): + # append data from sources_collection in sorted order in descending order of date + for index in sources_collection.find({"user": user}).sort("date", -1): data.append( { + "id": str(index["_id"]), "name": index["name"], - "language": index["language"], - "version": "", - "description": index["name"], - "fullName": index["name"], "date": index["date"], - "docLink": index["location"], "model": settings.EMBEDDINGS_NAME, "location": "local", "tokens": index["tokens"] if ("tokens" in index.keys()) else "", + "retriever": index["retriever"] if ("retriever" in index.keys()) else "classic", } ) - if settings.VECTOR_STORE == "faiss": - data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json() - for index in data_remote: - index["location"] = "remote" - data.append(index) if "duckduck_search" in settings.RETRIEVERS_ENABLED: data.append( { "name": "DuckDuckGo Search", - "language": "en", - "version": "", - "description": "duckduck_search", - "fullName": "DuckDuckGo Search", "date": "duckduck_search", - "docLink": "duckduck_search", "model": settings.EMBEDDINGS_NAME, "location": "custom", "tokens": "", + "retriever": "duckduck_search", } ) if "brave_search" in settings.RETRIEVERS_ENABLED: @@ -287,14 +267,11 @@ def combined_json(): { "name": "Brave Search", "language": "en", - "version": "", - "description": "brave_search", - "fullName": "Brave Search", "date": "brave_search", - "docLink": "brave_search", "model": settings.EMBEDDINGS_NAME, "location": "custom", "tokens": "", + "retriever": "brave_search", } ) @@ -303,39 +280,13 @@ def combined_json(): @user.route("/api/docs_check", methods=["POST"]) def check_docs(): - # check if docs exist in a vectorstore folder data = request.get_json() - # split docs on / and take first part - if data["docs"].split("/")[0] == "local": - return {"status": "exists"} + vectorstore = "vectors/" + secure_filename(data["docs"]) - base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/" if os.path.exists(vectorstore) or data["docs"] == "default": return {"status": "exists"} else: - file_url = urlparse(base_path + vectorstore + "index.faiss") - - if ( - file_url.scheme in ["https"] - and file_url.netloc == "raw.githubusercontent.com" - and file_url.path.startswith("/arc53/DocsHUB/main/") - ): - r = requests.get(file_url.geturl()) - if r.status_code != 200: - return {"status": "null"} - else: - if not os.path.exists(vectorstore): - os.makedirs(vectorstore) - with open(vectorstore + "index.faiss", "wb") as f: - f.write(r.content) - - r = requests.get(base_path + vectorstore + "index.pkl") - with open(vectorstore + "index.pkl", "wb") as f: - f.write(r.content) - else: - return {"status": "null"} - - return {"status": "loaded"} + return {"status": "not found"} @user.route("/api/create_prompt", methods=["POST"]) @@ -422,12 +373,23 @@ def get_api_keys(): keys = api_key_collection.find({"user": user}) list_keys = [] for key in keys: + if "source" in key and isinstance(key["source"],DBRef): + source = db.dereference(key["source"]) + if source is None: + continue + else: + source_name = source["name"] + elif "retriever" in key: + source_name = key["retriever"] + else: + continue + list_keys.append( { "id": str(key["_id"]), "name": key["name"], "key": key["key"][:4] + "..." + key["key"][-4:], - "source": key["source"], + "source": source_name, "prompt_id": key["prompt_id"], "chunks": key["chunks"], } @@ -439,21 +401,22 @@ def get_api_keys(): def create_api_key(): data = request.get_json() name = data["name"] - source = data["source"] prompt_id = data["prompt_id"] chunks = data["chunks"] key = str(uuid.uuid4()) user = "local" - resp = api_key_collection.insert_one( - { - "name": name, - "key": key, - "source": source, - "user": user, - "prompt_id": prompt_id, - "chunks": chunks, - } - ) + new_api_key = { + "name": name, + "key": key, + "user": user, + "prompt_id": prompt_id, + "chunks": chunks, + } + if "source" in data and ObjectId.is_valid(data["source"]): + new_api_key["source"] = DBRef("sources", ObjectId(data["source"])) + if "retriever" in data: + new_api_key["retriever"] = data["retriever"] + resp = api_key_collection.insert_one(new_api_key) new_id = str(resp.inserted_id) return {"id": new_id, "key": key} @@ -481,26 +444,31 @@ def share_conversation(): isPromptable = request.args.get("isPromptable").lower() == "true" conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)}) + if(conversation is None): + raise Exception("Conversation does not exist") current_n_queries = len(conversation["queries"]) ##generate binary representation of uuid explicit_binary = Binary.from_uuid(uuid.uuid4(), UuidRepresentation.STANDARD) if isPromptable: - source = "default" if "source" not in data else data["source"] prompt_id = "default" if "prompt_id" not in data else data["prompt_id"] chunks = "2" if "chunks" not in data else data["chunks"] name = conversation["name"] + "(shared)" - pre_existing_api_document = api_key_collection.find_one( - { + new_api_key_data = { "prompt_id": prompt_id, "chunks": chunks, - "source": source, "user": user, } + if "source" in data and ObjectId.is_valid(data["source"]): + new_api_key_data["source"] = DBRef("sources",ObjectId(data["source"])) + elif "retriever" in data: + new_api_key_data["retriever"] = data["retriever"] + + pre_existing_api_document = api_key_collection.find_one( + new_api_key_data ) - api_uuid = str(uuid.uuid4()) if pre_existing_api_document: api_uuid = pre_existing_api_document["key"] pre_existing = shared_conversations_collections.find_one( @@ -538,17 +506,16 @@ def share_conversation(): ) return jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())}) else: - api_key_collection.insert_one( - { - "name": name, - "key": api_uuid, - "source": source, - "user": user, - "prompt_id": prompt_id, - "chunks": chunks, - } - ) - shared_conversations_collections.insert_one( + + api_uuid = str(uuid.uuid4()) + new_api_key_data["key"] = api_uuid + new_api_key_data["name"] = name + if "source" in data and ObjectId.is_valid(data["source"]): + new_api_key_data["source"] = DBRef("sources", ObjectId(data["source"])) + if "retriever" in data: + new_api_key_data["retriever"] = data["retriever"] + api_key_collection.insert_one(new_api_key_data) + shared_conversations_collections.insert_one( { "uuid": explicit_binary, "conversation_id": { @@ -560,7 +527,7 @@ def share_conversation(): "user": user, "api_key": api_uuid, } - ) + ) ## Identifier as route parameter in frontend return ( jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())}), diff --git a/extensions/react-widget/package.json b/extensions/react-widget/package.json index 813478e2..d449d0a3 100644 --- a/extensions/react-widget/package.json +++ b/extensions/react-widget/package.json @@ -1,5 +1,5 @@ { - "name": "docsgpt-react", + "name": "docsgpt", "version": "0.4.2", "private": false, "description": "DocsGPT 🦖 is an innovative open-source tool designed to simplify the retrieval of information from project documentation using advanced GPT models 🤖.", @@ -11,6 +11,18 @@ "dist", "package.json" ], + "targets": { + "modern": { + "engines": { + "browsers": "Chrome 80" + } + }, + "legacy": { + "engines": { + "browsers": "> 0.5%, last 2 versions, not dead" + } + } + }, "@parcel/resolver-default": { "packageExports": true },