From 3c6fd365fbc66d94f31c77db784932ff4ec7eb57 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Fri, 9 Aug 2024 18:27:54 +0530
Subject: [PATCH 01/18] store only local docs as location

---
 application/api/answer/routes.py |  62 ++++++++--------
 application/api/user/routes.py   | 118 ++++++++++---------------------
 2 files changed, 72 insertions(+), 108 deletions(-)

diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index f076285d..85cc3afd 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -77,23 +77,23 @@ def get_data_from_api_key(api_key):
     if data is None:
         raise Exception("Invalid API Key, please generate new key", 401)
 
-    if isinstance(data["source"], DBRef):
-        source_id = db.dereference(data["source"])["_id"]
-        data["source"] = get_source(source_id)
+    if "retriever" not in data:
+        data["retriever"] = "classic"
 
+    if "source" in data and isinstance(data["source"], DBRef):
+        source_doc = db.dereference(data["source"])
+        data["source"] = str(source_doc._id)
+        if "retriever" in source_doc:
+            data["retriever"] = source_doc["retriever"]
     return data
 
 
-def get_source(active_doc):
-    if ObjectId.is_valid(active_doc):
-        doc = vectors_collection.find_one({"_id": ObjectId(active_doc)})
-        if doc is None:
-            raise Exception("Source document does not exist", 404)
-        print("res", doc)
-        source = {"active_docs": "/".join(doc["location"].split("/")[-2:])}
-    else:
-        source = {"active_docs": active_doc}
-    return source
+def get_retriever(source_id: str):
+    doc = vectors_collection.find_one({"_id": ObjectId(source_id)})
+    if doc is None:
+        raise Exception("Source document does not exist", 404)
+    retriever_name = "classic" if "retriever" not in doc else doc["retriever"]
+    return retriever_name
 
 
 def get_vectorstore(data):
@@ -244,25 +244,31 @@ def stream():
         else:
             token_limit = settings.DEFAULT_MAX_HISTORY
 
-        # check if active_docs or api_key is set
+        ## retriever can be "brave_search, duckduck_search or classic"
+        retriever_name = data["retriever"] if "retriever" in data else "classic"
 
+        # check if active_docs or api_key is set
         if "api_key" in data:
             data_key = get_data_from_api_key(data["api_key"])
             chunks = int(data_key["chunks"])
             prompt_id = data_key["prompt_id"]
-            source = data_key["source"]
+            source = {"active_docs": data_key["source"]}
+            retriever_name = data_key["retriever"]
             user_api_key = data["api_key"]
+
         elif "active_docs" in data:
-            source = get_source(data["active_docs"])
+            source = {"active_docs" : data["active_docs"]}
+            retriever_name = get_retriever(data["active_docs"])    
             user_api_key = None
+
         else:
             source = {}
             user_api_key = None
 
-        if source["active_docs"].split("/")[0] == "default" or source["active_docs"].split("/")[0] == "local":
+        """ if source["active_docs"].split("/")[0] == "default" or source["active_docs"].split("/")[0] == "local":
             retriever_name = "classic"
         else:
-            retriever_name = source["active_docs"]
+            retriever_name = source["active_docs"] """
 
         prompt = get_prompt(prompt_id)
 
@@ -341,6 +347,9 @@ def api_answer():
     else:
         token_limit = settings.DEFAULT_MAX_HISTORY
 
+    ## retriever can be brave_search, duckduck_search or classic
+    retriever_name = data["retriever"] if "retriever" in data else "classic"
+
     # use try and except  to check for exception
     try:
         # check if the vectorstore is set
@@ -350,15 +359,10 @@ def api_answer():
             prompt_id = data_key["prompt_id"]
             source = data_key["source"]
             user_api_key = data["api_key"]
-        else:
-            source = get_source(data["active_docs"])
+        elif "active_docs" in data:
+            source = data["active_docs"]
             user_api_key = None
 
-        if source["active_docs"].split("/")[0] == "default" or source["active_docs"].split("/")[0] == "local":
-            retriever_name = "classic"
-        else:
-            retriever_name = source["active_docs"]
-
         prompt = get_prompt(prompt_id)
 
         retriever = RetrieverCreator.create_retriever(
@@ -410,16 +414,16 @@ def api_search():
         source = data_key["source"]
         user_api_key = data_key["api_key"]
     elif "active_docs" in data:
-        source = get_source(data["active_docs"])
+        source = data["active_docs"]
         user_api_key = None
     else:
         source = {}
         user_api_key = None
 
-    if source["active_docs"].split("/")[0] == "default" or source["active_docs"].split("/")[0] == "local":
-        retriever_name = "classic"
+    if "retriever" in data:
+        retriever_name = data["retriever"]
     else:
-        retriever_name = source["active_docs"]
+        retriever_name = "classic"
     if "token_limit" in data:
         token_limit = data["token_limit"]
     else:
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 06bab591..aab30469 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -25,9 +25,7 @@ shared_conversations_collections = db["shared_conversations"]
 
 user = Blueprint("user", __name__)
 
-current_dir = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-)
+current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
 @user.route("/api/delete_conversation", methods=["POST"])
@@ -57,9 +55,7 @@ def get_conversations():
     conversations = conversations_collection.find().sort("date", -1).limit(30)
     list_conversations = []
     for conversation in conversations:
-        list_conversations.append(
-            {"id": str(conversation["_id"]), "name": conversation["name"]}
-        )
+        list_conversations.append({"id": str(conversation["_id"]), "name": conversation["name"]})
 
     # list_conversations = [{"id": "default", "name": "default"}, {"id": "jeff", "name": "jeff"}]
 
@@ -138,9 +134,7 @@ def delete_old():
         except FileNotFoundError:
             pass
     else:
-        vetorstore = VectorCreator.create_vectorstore(
-            settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)
-        )
+        vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean))
         vetorstore.delete_index()
 
     return {"status": "ok"}
@@ -175,9 +169,7 @@ def upload_file():
             file.save(os.path.join(temp_dir, filename))
 
         # Use shutil.make_archive to zip the temp directory
-        zip_path = shutil.make_archive(
-            base_name=os.path.join(save_dir, job_name), format="zip", root_dir=temp_dir
-        )
+        zip_path = shutil.make_archive(base_name=os.path.join(save_dir, job_name), format="zip", root_dir=temp_dir)
         final_filename = os.path.basename(zip_path)
 
         # Clean up the temporary directory after zipping
@@ -219,9 +211,7 @@ def upload_remote():
     source_data = request.form["data"]
 
     if source_data:
-        task = ingest_remote.delay(
-            source_data=source_data, job_name=job_name, user=user, loader=source
-        )
+        task = ingest_remote.delay(source_data=source_data, job_name=job_name, user=user, loader=source)
         task_id = task.id
         return {"status": "ok", "task_id": task_id}
     else:
@@ -264,7 +254,7 @@ def combined_json():
     for index in vectors_collection.find({"user": user}).sort("date", -1):
         data.append(
             {
-                "id":str(index["_id"]),
+                "id": str(index["_id"]),
                 "name": index["name"],
                 "language": index["language"],
                 "version": "",
@@ -278,9 +268,7 @@ def combined_json():
             }
         )
     if settings.VECTOR_STORE == "faiss":
-        data_remote = requests.get(
-            "https://d3dg1063dc54p9.cloudfront.net/combined.json"
-        ).json()
+        data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
         for index in data_remote:
             index["location"] = "remote"
             data.append(index)
@@ -383,9 +371,7 @@ def get_prompts():
     list_prompts.append({"id": "creative", "name": "creative", "type": "public"})
     list_prompts.append({"id": "strict", "name": "strict", "type": "public"})
     for prompt in prompts:
-        list_prompts.append(
-            {"id": str(prompt["_id"]), "name": prompt["name"], "type": "private"}
-        )
+        list_prompts.append({"id": str(prompt["_id"]), "name": prompt["name"], "type": "private"})
 
     return jsonify(list_prompts)
 
@@ -394,21 +380,15 @@ def get_prompts():
 def get_single_prompt():
     prompt_id = request.args.get("id")
     if prompt_id == "default":
-        with open(
-            os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r"
-        ) as f:
+        with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
             chat_combine_template = f.read()
         return jsonify({"content": chat_combine_template})
     elif prompt_id == "creative":
-        with open(
-            os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r"
-        ) as f:
+        with open(os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r") as f:
             chat_reduce_creative = f.read()
         return jsonify({"content": chat_reduce_creative})
     elif prompt_id == "strict":
-        with open(
-            os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r"
-        ) as f:
+        with open(os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r") as f:
             chat_reduce_strict = f.read()
         return jsonify({"content": chat_reduce_strict})
 
@@ -437,9 +417,7 @@ def update_prompt_name():
     # check if name is null
     if name == "":
         return {"status": "error"}
-    prompts_collection.update_one(
-        {"_id": ObjectId(id)}, {"$set": {"name": name, "content": content}}
-    )
+    prompts_collection.update_one({"_id": ObjectId(id)}, {"$set": {"name": name, "content": content}})
     return {"status": "ok"}
 
 
@@ -449,12 +427,15 @@ def get_api_keys():
     keys = api_key_collection.find({"user": user})
     list_keys = []
     for key in keys:
+        source_name = (
+            db.dereference(key["source"])["name"] if isinstance(key["source"], DBRef) else key["source"].split("/")[0]
+        )
         list_keys.append(
             {
                 "id": str(key["_id"]),
                 "name": key["name"],
                 "key": key["key"][:4] + "..." + key["key"][-4:],
-                "source": str(key["source"]),
+                "source": source_name,
                 "prompt_id": key["prompt_id"],
                 "chunks": key["chunks"],
             }
@@ -466,23 +447,22 @@ def get_api_keys():
 def create_api_key():
     data = request.get_json()
     name = data["name"]
-    source = data["source"]
     prompt_id = data["prompt_id"]
     chunks = data["chunks"]
     key = str(uuid.uuid4())
     user = "local"
-    if(ObjectId.is_valid(data["source"])):
-        source = DBRef("vectors",ObjectId(data["source"]))
-    resp = api_key_collection.insert_one(
-        {
-            "name": name,
-            "key": key,
-            "source": source,
-            "user": user,
-            "prompt_id": prompt_id,
-            "chunks": chunks,
-        }
-    )
+    new_api_key = {
+        "name": name,
+        "key": key,
+        "user": user,
+        "prompt_id": prompt_id,
+        "chunks": chunks,
+    }
+    if "source" in data and ObjectId.is_valid(data["source"]):
+        new_api_key["source"] = DBRef("vectors", ObjectId(data["source"]))
+    if "retriever" in data:
+        new_api_key["retriever"] = data["retriever"]
+    resp = api_key_collection.insert_one(new_api_key)
     new_id = str(resp.inserted_id)
     return {"id": new_id, "key": key}
 
@@ -509,9 +489,7 @@ def share_conversation():
         conversation_id = data["conversation_id"]
         isPromptable = request.args.get("isPromptable").lower() == "true"
 
-        conversation = conversations_collection.find_one(
-            {"_id": ObjectId(conversation_id)}
-        )
+        conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
         current_n_queries = len(conversation["queries"])
 
         ##generate binary representation of uuid
@@ -527,7 +505,7 @@ def share_conversation():
                 {
                     "prompt_id": prompt_id,
                     "chunks": chunks,
-                    "source": DBRef("vectors",ObjectId(source)) if ObjectId.is_valid(source) else source,
+                    "source": DBRef("vectors", ObjectId(source)) if ObjectId.is_valid(source) else source,
                     "user": user,
                 }
             )
@@ -536,9 +514,7 @@ def share_conversation():
                 api_uuid = pre_existing_api_document["key"]
                 pre_existing = shared_conversations_collections.find_one(
                     {
-                        "conversation_id": DBRef(
-                            "conversations", ObjectId(conversation_id)
-                        ),
+                        "conversation_id": DBRef("conversations", ObjectId(conversation_id)),
                         "isPromptable": isPromptable,
                         "first_n_queries": current_n_queries,
                         "user": user,
@@ -569,15 +545,13 @@ def share_conversation():
                             "api_key": api_uuid,
                         }
                     )
-                    return jsonify(
-                        {"success": True, "identifier": str(explicit_binary.as_uuid())}
-                    )
+                    return jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())})
             else:
                 api_key_collection.insert_one(
                     {
                         "name": name,
                         "key": api_uuid,
-                        "source": DBRef("vectors",ObjectId(source)) if ObjectId.is_valid(source) else source,
+                        "source": DBRef("vectors", ObjectId(source)) if ObjectId.is_valid(source) else source,
                         "user": user,
                         "prompt_id": prompt_id,
                         "chunks": chunks,
@@ -598,9 +572,7 @@ def share_conversation():
             )
             ## Identifier as route parameter in frontend
             return (
-                jsonify(
-                    {"success": True, "identifier": str(explicit_binary.as_uuid())}
-                ),
+                jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())}),
                 201,
             )
 
@@ -615,9 +587,7 @@ def share_conversation():
         )
         if pre_existing is not None:
             return (
-                jsonify(
-                    {"success": True, "identifier": str(pre_existing["uuid"].as_uuid())}
-                ),
+                jsonify({"success": True, "identifier": str(pre_existing["uuid"].as_uuid())}),
                 200,
             )
         else:
@@ -635,9 +605,7 @@ def share_conversation():
             )
             ## Identifier as route parameter in frontend
             return (
-                jsonify(
-                    {"success": True, "identifier": str(explicit_binary.as_uuid())}
-                ),
+                jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())}),
                 201,
             )
     except Exception as err:
@@ -649,16 +617,10 @@ def share_conversation():
 @user.route("/api/shared_conversation/<string:identifier>", methods=["GET"])
 def get_publicly_shared_conversations(identifier: str):
     try:
-        query_uuid = Binary.from_uuid(
-            uuid.UUID(identifier), UuidRepresentation.STANDARD
-        )
+        query_uuid = Binary.from_uuid(uuid.UUID(identifier), UuidRepresentation.STANDARD)
         shared = shared_conversations_collections.find_one({"uuid": query_uuid})
         conversation_queries = []
-        if (
-            shared
-            and "conversation_id" in shared
-            and isinstance(shared["conversation_id"], DBRef)
-        ):
+        if shared and "conversation_id" in shared and isinstance(shared["conversation_id"], DBRef):
             # Resolve the DBRef
             conversation_ref = shared["conversation_id"]
             conversation = db.dereference(conversation_ref)
@@ -672,9 +634,7 @@ def get_publicly_shared_conversations(identifier: str):
                     ),
                     404,
                 )
-            conversation_queries = conversation["queries"][
-                : (shared["first_n_queries"])
-            ]
+            conversation_queries = conversation["queries"][: (shared["first_n_queries"])]
             for query in conversation_queries:
                 query.pop("sources")  ## avoid exposing sources
         else:

From 1eb168be55fdcd594c2548f9aa45c4617f778fd8 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Sun, 11 Aug 2024 19:33:31 +0530
Subject: [PATCH 02/18] vector indexes to be named after mongo _id

---
 application/api/internal/routes.py   | 10 +++++--
 application/api/user/routes.py       | 23 +++++++--------
 application/retriever/classic_rag.py |  9 +-----
 application/worker.py                | 43 +++++++++-------------------
 4 files changed, 34 insertions(+), 51 deletions(-)

diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py
index 6039ecdf..f4203822 100755
--- a/application/api/internal/routes.py
+++ b/application/api/internal/routes.py
@@ -3,7 +3,7 @@ import datetime
 from flask import Blueprint, request, send_from_directory
 from pymongo import MongoClient
 from werkzeug.utils import secure_filename
-
+from bson.objectid import ObjectId
 
 from application.core.settings import settings
 mongo = MongoClient(settings.MONGO_URI)
@@ -35,7 +35,12 @@ def upload_index_files():
         return {"status": "no name"}
     job_name = secure_filename(request.form["name"])
     tokens = secure_filename(request.form["tokens"])
-    save_dir = os.path.join(current_dir, "indexes", user, job_name)
+    """"
+    ObjectId serves as a dir name in application/indexes, 
+    and for indexing the vector metadata in the collection
+    """
+    _id = ObjectId()
+    save_dir = os.path.join(current_dir, "indexes", str(_id))
     if settings.VECTOR_STORE == "faiss":
         if "file_faiss" not in request.files:
             print("No file part")
@@ -58,6 +63,7 @@ def upload_index_files():
     # create entry in vectors_collection
     vectors_collection.insert_one(
         {
+            "_id":_id,
             "user": user,
             "name": job_name,
             "language": job_name,
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index aab30469..7ce0b2e2 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -116,18 +116,17 @@ def delete_by_ids():
 def delete_old():
     """Delete old indexes."""
     import shutil
-
-    path = request.args.get("path")
-    dirs = path.split("/")
-    dirs_clean = []
-    for i in range(0, len(dirs)):
-        dirs_clean.append(secure_filename(dirs[i]))
-    # check that path strats with indexes or vectors
-
-    if dirs_clean[0] not in ["indexes", "vectors"]:
-        return {"status": "error"}
-    path_clean = "/".join(dirs_clean)
-    vectors_collection.delete_one({"name": dirs_clean[-1], "user": dirs_clean[-2]})
+    name = request.args.get("name")
+    user = request.args.get("user")
+    doc = vectors_collection.find_one({
+        "user":user,
+        "name":name
+    })
+    print("user",user)
+    print("file",name)
+    if(doc is None):
+        return {"status":"not found"},404
+    path_clean = doc["location"]
     if settings.VECTOR_STORE == "faiss":
         try:
             shutil.rmtree(os.path.join(current_dir, path_clean))
diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index 2b77db34..4a1aa5bc 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -40,14 +40,7 @@ class ClassicRAG(BaseRetriever):
 
     def _get_vectorstore(self, source):
         if "active_docs" in source:
-            if source["active_docs"].split("/")[0] == "default":
-                vectorstore = ""
-            elif source["active_docs"].split("/")[0] == "local":
-                vectorstore = "indexes/" + source["active_docs"]
-            else:
-                vectorstore = "vectors/" + source["active_docs"]
-            if source["active_docs"] == "default":
-                vectorstore = ""
+            vectorstore = "indexes/"+source["active_docs"]
         else:
             vectorstore = ""
         vectorstore = os.path.join("application", vectorstore)
diff --git a/application/worker.py b/application/worker.py
index bd1bc15a..b3258983 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -14,6 +14,7 @@ from application.parser.open_ai_func import call_openai_api
 from application.parser.schema.base import Document
 from application.parser.token_func import group_split
 
+
 # Define a function to extract metadata from a given filename.
 def metadata_from_filename(title):
     store = "/".join(title.split("/")[1:3])
@@ -25,9 +26,7 @@ def generate_random_string(length):
     return "".join([string.ascii_letters[i % 52] for i in range(length)])
 
 
-current_dir = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-)
+current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
 def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
@@ -93,9 +92,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
     print(full_path, file=sys.stderr)
     # check if API_URL env variable is set
     file_data = {"name": name_job, "file": filename, "user": user}
-    response = requests.get(
-        urljoin(settings.API_URL, "/api/download"), params=file_data
-    )
+    response = requests.get(urljoin(settings.API_URL, "/api/download"), params=file_data)
     # check if file is in the response
     print(response, file=sys.stderr)
     file = response.content
@@ -107,9 +104,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
 
     # check if file is .zip and extract it
     if filename.endswith(".zip"):
-        extract_zip_recursive(
-            os.path.join(full_path, filename), full_path, 0, recursion_depth
-        )
+        extract_zip_recursive(os.path.join(full_path, filename), full_path, 0, recursion_depth)
 
     self.update_state(state="PROGRESS", meta={"current": 1})
 
@@ -141,22 +136,16 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
 
     # get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
     # and send them to the server (provide user and name in form)
-    file_data = {"name": name_job, "user": user, "tokens":tokens}
+    file_data = {"name": name_job, "user": user, "tokens": tokens}
     if settings.VECTOR_STORE == "faiss":
         files = {
             "file_faiss": open(full_path + "/index.faiss", "rb"),
             "file_pkl": open(full_path + "/index.pkl", "rb"),
         }
-        response = requests.post(
-            urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
-        )
-        response = requests.get(
-            urljoin(settings.API_URL, "/api/delete_old?path=" + full_path)
-        )
+        response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
+        response = requests.get(urljoin(settings.API_URL, "/api/delete_old?name=" + name_job + "&?user=" + user))
     else:
-        response = requests.post(
-            urljoin(settings.API_URL, "/api/upload_index"), data=file_data
-        )
+        response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
 
     # delete local
     shutil.rmtree(full_path)
@@ -196,17 +185,15 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
     self.update_state(state="PROGRESS", meta={"current": 100})
 
     # Proceed with uploading and cleaning as in the original function
-    file_data = {"name": name_job, "user": user, "tokens":tokens}
+    file_data = {"name": name_job, "user": user, "tokens": tokens}
     if settings.VECTOR_STORE == "faiss":
         files = {
             "file_faiss": open(full_path + "/index.faiss", "rb"),
             "file_pkl": open(full_path + "/index.pkl", "rb"),
         }
-        
-        requests.post(
-            urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
-        )
-        requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
+
+        requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
+        requests.get(urljoin(settings.API_URL, "/api/delete_old?name=" + name_job + "&?user=" + user))
     else:
         requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
 
@@ -222,9 +209,7 @@ def count_tokens_docs(docs):
     for doc in docs:
         docs_content += doc.page_content
 
-    tokens, total_price = num_tokens_from_string(
-        string=docs_content, encoding_name="cl100k_base"
-    )
+    tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
     # Here we print the number of tokens and the approx user cost with some visually appealing formatting.
     return tokens
 
@@ -234,4 +219,4 @@ def num_tokens_from_string(string: str, encoding_name: str) -> int:
     encoding = tiktoken.get_encoding(encoding_name)
     num_tokens = len(encoding.encode(string))
     total_price = (num_tokens / 1000) * 0.0004
-    return num_tokens, total_price
\ No newline at end of file
+    return num_tokens, total_price

From dc4078d744f7b94bac6edefd48dcbcc9098a82c9 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Sun, 11 Aug 2024 21:26:30 +0530
Subject: [PATCH 03/18] migration(fixes): retriver/sharing endpoints

---
 application/api/answer/routes.py | 16 +++++++++-----
 application/api/user/routes.py   | 38 ++++++++++++++++++--------------
 2 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index 85cc3afd..e2d7b6e8 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -82,9 +82,11 @@ def get_data_from_api_key(api_key):
 
     if "source" in data and isinstance(data["source"], DBRef):
         source_doc = db.dereference(data["source"])
-        data["source"] = str(source_doc._id)
+        data["source"] = str(source_doc["_id"])
         if "retriever" in source_doc:
             data["retriever"] = source_doc["retriever"]
+    else:
+        data["source"] = {}
     return data
 
 
@@ -357,10 +359,14 @@ def api_answer():
             data_key = get_data_from_api_key(data["api_key"])
             chunks = int(data_key["chunks"])
             prompt_id = data_key["prompt_id"]
-            source = data_key["source"]
+            source = {"active_docs": data_key["source"]}
+            retriever_name = data_key["retriever"]
             user_api_key = data["api_key"]
         elif "active_docs" in data:
-            source = data["active_docs"]
+            source = {"active_docs":data["active_docs"]}
+            user_api_key = None
+        else:
+            source = {}
             user_api_key = None
 
         prompt = get_prompt(prompt_id)
@@ -411,10 +417,10 @@ def api_search():
     if "api_key" in data:
         data_key = get_data_from_api_key(data["api_key"])
         chunks = int(data_key["chunks"])
-        source = data_key["source"]
+        source = {"active_docs":data_key["source"]}
         user_api_key = data_key["api_key"]
     elif "active_docs" in data:
-        source = data["active_docs"]
+        source = {"active_docs":data["active_docs"]}
         user_api_key = None
     else:
         source = {}
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 7ce0b2e2..84831a65 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -489,26 +489,31 @@ def share_conversation():
         isPromptable = request.args.get("isPromptable").lower() == "true"
 
         conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
+        if(conversation is None):
+            raise Exception("Conversation does not exist")
         current_n_queries = len(conversation["queries"])
 
         ##generate binary representation of uuid
         explicit_binary = Binary.from_uuid(uuid.uuid4(), UuidRepresentation.STANDARD)
 
         if isPromptable:
-            source = "default" if "source" not in data else data["source"]
             prompt_id = "default" if "prompt_id" not in data else data["prompt_id"]
             chunks = "2" if "chunks" not in data else data["chunks"]
 
             name = conversation["name"] + "(shared)"
-            pre_existing_api_document = api_key_collection.find_one(
-                {
+            new_api_key_data =  {
                     "prompt_id": prompt_id,
                     "chunks": chunks,
-                    "source": DBRef("vectors", ObjectId(source)) if ObjectId.is_valid(source) else source,
                     "user": user,
                 }
+            if "source" in data and ObjectId.is_valid(data["source"]):
+                new_api_key_data["source"] = DBRef("vectors",ObjectId(data["source"]))
+            elif "retriever" in data:
+                new_api_key_data["retriever"] = data["retriever"]
+                 
+            pre_existing_api_document = api_key_collection.find_one(
+                new_api_key_data
             )
-            api_uuid = str(uuid.uuid4())
             if pre_existing_api_document:
                 api_uuid = pre_existing_api_document["key"]
                 pre_existing = shared_conversations_collections.find_one(
@@ -546,17 +551,16 @@ def share_conversation():
                     )
                     return jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())})
             else:
-                api_key_collection.insert_one(
-                    {
-                        "name": name,
-                        "key": api_uuid,
-                        "source": DBRef("vectors", ObjectId(source)) if ObjectId.is_valid(source) else source,
-                        "user": user,
-                        "prompt_id": prompt_id,
-                        "chunks": chunks,
-                    }
-                )
-            shared_conversations_collections.insert_one(
+                
+                api_uuid = str(uuid.uuid4())
+                new_api_key_data["key"] = api_uuid
+                new_api_key_data["name"] = name
+                if "source" in data and ObjectId.is_valid(data["source"]):
+                    new_api_key_data["source"] = DBRef("vectors", ObjectId(data["source"]))
+                if "retriever" in data:
+                    new_api_key_data["retriever"] = data["retriever"]
+                api_key_collection.insert_one(new_api_key_data)
+                shared_conversations_collections.insert_one(
                 {
                     "uuid": explicit_binary,
                     "conversation_id": {
@@ -568,7 +572,7 @@ def share_conversation():
                     "user": user,
                     "api_key": api_uuid,
                 }
-            )
+              )
             ## Identifier as route parameter in frontend
             return (
                 jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())}),

From 7e8dd6bba8f3fb18c2cd682c6c5117e11303da09 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Mon, 12 Aug 2024 01:06:21 +0530
Subject: [PATCH 04/18] fix: get api keys endpoint

---
 application/api/user/routes.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 84831a65..7c6e979c 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -426,9 +426,17 @@ def get_api_keys():
     keys = api_key_collection.find({"user": user})
     list_keys = []
     for key in keys:
-        source_name = (
-            db.dereference(key["source"])["name"] if isinstance(key["source"], DBRef) else key["source"].split("/")[0]
-        )
+        if "source" in key and isinstance(key["source"],DBRef):
+            source = db.dereference(key["source"])
+            if source is None:
+                continue
+            else:
+                source_name = source["name"]
+        elif "retriever" in key:
+            source_name = key["retriever"]
+        else:
+            continue
+            
         list_keys.append(
             {
                 "id": str(key["_id"]),

From deeffbf77d1754b81d35b3353f44cef5f9d2f3ee Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Mon, 12 Aug 2024 15:50:16 +0530
Subject: [PATCH 05/18] fix(retriever):classic should not override

---
 application/api/answer/routes.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index e2d7b6e8..caca7c67 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -78,7 +78,7 @@ def get_data_from_api_key(api_key):
         raise Exception("Invalid API Key, please generate new key", 401)
 
     if "retriever" not in data:
-        data["retriever"] = "classic"
+        data["retriever"] = None
 
     if "source" in data and isinstance(data["source"], DBRef):
         source_doc = db.dereference(data["source"])
@@ -94,7 +94,7 @@ def get_retriever(source_id: str):
     doc = vectors_collection.find_one({"_id": ObjectId(source_id)})
     if doc is None:
         raise Exception("Source document does not exist", 404)
-    retriever_name = "classic" if "retriever" not in doc else doc["retriever"]
+    retriever_name = None if "retriever" not in doc else doc["retriever"]
     return retriever_name
 
 
@@ -255,12 +255,12 @@ def stream():
             chunks = int(data_key["chunks"])
             prompt_id = data_key["prompt_id"]
             source = {"active_docs": data_key["source"]}
-            retriever_name = data_key["retriever"]
+            retriever_name = data_key["retriever"] or retriever_name
             user_api_key = data["api_key"]
 
         elif "active_docs" in data:
             source = {"active_docs" : data["active_docs"]}
-            retriever_name = get_retriever(data["active_docs"])    
+            retriever_name = get_retriever(data["active_docs"]) or retriever_name
             user_api_key = None
 
         else:
@@ -273,7 +273,7 @@ def stream():
             retriever_name = source["active_docs"] """
 
         prompt = get_prompt(prompt_id)
-
+       
         retriever = RetrieverCreator.create_retriever(
             retriever_name,
             question=question,
@@ -360,10 +360,11 @@ def api_answer():
             chunks = int(data_key["chunks"])
             prompt_id = data_key["prompt_id"]
             source = {"active_docs": data_key["source"]}
-            retriever_name = data_key["retriever"]
+            retriever_name = data_key["retriever"] or retriever_name
             user_api_key = data["api_key"]
         elif "active_docs" in data:
             source = {"active_docs":data["active_docs"]}
+            retriever_name = get_retriever(data["active_docs"]) or retriever_name
             user_api_key = None
         else:
             source = {}

From 0891ef6d0ad54b1f1c0aa7406ece3fd85441ba12 Mon Sep 17 00:00:00 2001
From: ManishMadan2882 <manishmadan321@gmail.com>
Date: Wed, 14 Aug 2024 17:15:20 +0530
Subject: [PATCH 06/18] frontend: adapting to migration

---
 frontend/src/components/Dropdown.tsx          |   5 +
 .../src/conversation/conversationHandlers.ts  | 102 +++++++-----------
 .../src/conversation/conversationModels.ts    |  10 ++
 frontend/src/modals/CreateAPIKeyModal.tsx     |  70 ++++++------
 frontend/src/models/misc.ts                   |   1 +
 frontend/src/settings/APIKeys.tsx             |   3 +-
 6 files changed, 94 insertions(+), 97 deletions(-)

diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx
index 17516aaa..0353a191 100644
--- a/frontend/src/components/Dropdown.tsx
+++ b/frontend/src/components/Dropdown.tsx
@@ -26,6 +26,7 @@ function Dropdown({
     | string
     | { label: string; value: string }
     | { value: number; description: string }
+    | { name: string; id: string; type: string }
     | null;
   onSelect:
     | ((value: string) => void)
@@ -96,6 +97,10 @@ function Dropdown({
                     ? selectedValue.value + ` (${selectedValue.description})`
                     : selectedValue.description
                 }`
+              : selectedValue &&
+                'name' in selectedValue &&
+                'id' in selectedValue
+              ? `${selectedValue.name}`
               : placeholder
               ? placeholder
               : 'From URL'}
diff --git a/frontend/src/conversation/conversationHandlers.ts b/frontend/src/conversation/conversationHandlers.ts
index 90bbc0a9..9e3d5d2c 100644
--- a/frontend/src/conversation/conversationHandlers.ts
+++ b/frontend/src/conversation/conversationHandlers.ts
@@ -1,32 +1,6 @@
 import conversationService from '../api/services/conversationService';
 import { Doc } from '../preferences/preferenceApi';
-import { Answer, FEEDBACK } from './conversationModels';
-
-function getDocPath(selectedDocs: Doc | null): string {
-  let docPath = 'default';
-  if (selectedDocs) {
-    let namePath = selectedDocs.name;
-    if (selectedDocs.language === namePath) {
-      namePath = '.project';
-    }
-    if (selectedDocs.location === 'local') {
-      docPath = 'local' + '/' + selectedDocs.name + '/';
-    } else if (selectedDocs.location === 'remote') {
-      docPath =
-        selectedDocs.language +
-        '/' +
-        namePath +
-        '/' +
-        selectedDocs.version +
-        '/' +
-        selectedDocs.model +
-        '/';
-    } else if (selectedDocs.location === 'custom') {
-      docPath = selectedDocs.docLink;
-    }
-  }
-  return docPath;
-}
+import { Answer, FEEDBACK, RetrievalPayload } from './conversationModels';
 
 export function handleFetchAnswer(
   question: string,
@@ -54,23 +28,22 @@ export function handleFetchAnswer(
       title: any;
     }
 > {
-  const docPath = getDocPath(selectedDocs);
   history = history.map((item) => {
     return { prompt: item.prompt, response: item.response };
   });
+  const payload: RetrievalPayload = {
+    question: question,
+    history: JSON.stringify(history),
+    conversation_id: conversationId,
+    prompt_id: promptId,
+    chunks: chunks,
+    token_limit: token_limit,
+  };
+  if (selectedDocs && 'id' in selectedDocs)
+    payload.active_docs = selectedDocs.id as string;
+  else payload.retriever = selectedDocs?.docLink as string;
   return conversationService
-    .answer(
-      {
-        question: question,
-        history: history,
-        active_docs: docPath,
-        conversation_id: conversationId,
-        prompt_id: promptId,
-        chunks: chunks,
-        token_limit: token_limit,
-      },
-      signal,
-    )
+    .answer(payload, signal)
     .then((response) => {
       if (response.ok) {
         return response.json();
@@ -101,24 +74,24 @@ export function handleFetchAnswerSteaming(
   token_limit: number,
   onEvent: (event: MessageEvent) => void,
 ): Promise<Answer> {
-  const docPath = getDocPath(selectedDocs);
   history = history.map((item) => {
     return { prompt: item.prompt, response: item.response };
   });
+  const payload: RetrievalPayload = {
+    question: question,
+    history: JSON.stringify(history),
+    conversation_id: conversationId,
+    prompt_id: promptId,
+    chunks: chunks,
+    token_limit: token_limit,
+  };
+  if (selectedDocs && 'id' in selectedDocs)
+    payload.active_docs = selectedDocs.id as string;
+  else payload.retriever = selectedDocs?.docLink as string;
+
   return new Promise<Answer>((resolve, reject) => {
     conversationService
-      .answerStream(
-        {
-          question: question,
-          active_docs: docPath,
-          history: JSON.stringify(history),
-          conversation_id: conversationId,
-          prompt_id: promptId,
-          chunks: chunks,
-          token_limit: token_limit,
-        },
-        signal,
-      )
+      .answerStream(payload, signal)
       .then((response) => {
         if (!response.body) throw Error('No response body');
 
@@ -175,16 +148,21 @@ export function handleSearch(
   chunks: string,
   token_limit: number,
 ) {
-  const docPath = getDocPath(selectedDocs);
+  history = history.map((item) => {
+    return { prompt: item.prompt, response: item.response };
+  });
+  const payload: RetrievalPayload = {
+    question: question,
+    history: JSON.stringify(history),
+    conversation_id: conversation_id,
+    chunks: chunks,
+    token_limit: token_limit,
+  };
+  if (selectedDocs && 'id' in selectedDocs)
+    payload.active_docs = selectedDocs.id as string;
+  else payload.retriever = selectedDocs?.docLink as string;
   return conversationService
-    .search({
-      question: question,
-      active_docs: docPath,
-      conversation_id,
-      history,
-      chunks: chunks,
-      token_limit: token_limit,
-    })
+    .search(payload)
     .then((response) => response.json())
     .then((data) => {
       return data;
diff --git a/frontend/src/conversation/conversationModels.ts b/frontend/src/conversation/conversationModels.ts
index 347a2521..bf86678b 100644
--- a/frontend/src/conversation/conversationModels.ts
+++ b/frontend/src/conversation/conversationModels.ts
@@ -31,3 +31,13 @@ export interface Query {
   conversationId?: string | null;
   title?: string | null;
 }
+export interface RetrievalPayload {
+  question: string;
+  active_docs?: string;
+  retriever?: string;
+  history: string;
+  conversation_id: string | null;
+  prompt_id?: string | null;
+  chunks: string;
+  token_limit: number;
+}
diff --git a/frontend/src/modals/CreateAPIKeyModal.tsx b/frontend/src/modals/CreateAPIKeyModal.tsx
index 2f67d83b..e59fd37e 100644
--- a/frontend/src/modals/CreateAPIKeyModal.tsx
+++ b/frontend/src/modals/CreateAPIKeyModal.tsx
@@ -22,8 +22,9 @@ export default function CreateAPIKeyModal({
 
   const [APIKeyName, setAPIKeyName] = React.useState<string>('');
   const [sourcePath, setSourcePath] = React.useState<{
-    label: string;
-    value: string;
+    name: string;
+    id: string;
+    type: string;
   } | null>(null);
   const [prompt, setPrompt] = React.useState<{
     name: string;
@@ -41,27 +42,17 @@ export default function CreateAPIKeyModal({
       ? docs
           .filter((doc) => doc.model === embeddingsName)
           .map((doc: Doc) => {
-            let namePath = doc.name;
-            if (doc.language === namePath) {
-              namePath = '.project';
-            }
-            let docPath = 'default';
-            if (doc.location === 'local') {
-              docPath = 'local' + '/' + doc.name + '/';
-            } else if (doc.location === 'remote') {
-              docPath =
-                doc.language +
-                '/' +
-                namePath +
-                '/' +
-                doc.version +
-                '/' +
-                doc.model +
-                '/';
+            if ('id' in doc) {
+              return {
+                name: doc.name,
+                id: doc.id as string,
+                type: 'local',
+              };
             }
             return {
-              label: doc.name,
-              value: docPath,
+              name: doc.name as string,
+              id: doc.docLink as string,
+              type: 'default',
             };
           })
       : [];
@@ -107,9 +98,14 @@ export default function CreateAPIKeyModal({
           <Dropdown
             placeholder={t('modals.createAPIKey.sourceDoc')}
             selectedValue={sourcePath}
-            onSelect={(selection: { label: string; value: string }) =>
-              setSourcePath(selection)
-            }
+            onSelect={(selection: {
+              name: string;
+              id: string;
+              type: string;
+            }) => {
+              setSourcePath(selection);
+              console.log(selection);
+            }}
             options={extractDocPaths()}
             size="w-full"
             rounded="xl"
@@ -142,16 +138,22 @@ export default function CreateAPIKeyModal({
         </div>
         <button
           disabled={!sourcePath || APIKeyName.length === 0 || !prompt}
-          onClick={() =>
-            sourcePath &&
-            prompt &&
-            createAPIKey({
-              name: APIKeyName,
-              source: sourcePath.value,
-              prompt_id: prompt.id,
-              chunks: chunk,
-            })
-          }
+          onClick={() => {
+            if (sourcePath && prompt) {
+              const payload: any = {
+                name: APIKeyName,
+                prompt_id: prompt.id,
+                chunks: chunk,
+              };
+              if (sourcePath.type === 'default') {
+                payload.retriever = sourcePath.id;
+              }
+              if (sourcePath.type === 'local') {
+                payload.source = sourcePath.id;
+              }
+              createAPIKey(payload);
+            }
+          }}
           className="float-right mt-4 rounded-full bg-purple-30 px-5 py-2 text-sm text-white hover:bg-[#6F3FD1] disabled:opacity-50"
         >
           {t('modals.createAPIKey.create')}
diff --git a/frontend/src/models/misc.ts b/frontend/src/models/misc.ts
index ab8d6b85..71ecc084 100644
--- a/frontend/src/models/misc.ts
+++ b/frontend/src/models/misc.ts
@@ -4,6 +4,7 @@ export type User = {
   avatar: string;
 };
 export type Doc = {
+  id?: string;
   location: string;
   name: string;
   language: string;
diff --git a/frontend/src/settings/APIKeys.tsx b/frontend/src/settings/APIKeys.tsx
index 18904d24..ba5a784e 100644
--- a/frontend/src/settings/APIKeys.tsx
+++ b/frontend/src/settings/APIKeys.tsx
@@ -48,7 +48,8 @@ export default function APIKeys() {
 
   const handleCreateKey = (payload: {
     name: string;
-    source: string;
+    source?: string;
+    retriever?: string;
     prompt_id: string;
     chunks: string;
   }) => {

From a353e696486463ac808b299926541e4de1b23939 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Sun, 8 Sep 2024 16:59:51 +0100
Subject: [PATCH 07/18] feat: new vectors structure

---
 application/api/internal/routes.py            | 21 +++++------
 application/api/user/routes.py                | 28 +++------------
 application/retriever/classic_rag.py          | 10 +-----
 application/vectorstore/elasticsearch.py      |  1 -
 application/vectorstore/faiss.py              | 12 ++++++-
 application/worker.py                         | 15 +++++---
 frontend/src/Navigation.tsx                   |  4 +--
 frontend/src/components/SourceDropdown.tsx    |  3 --
 .../src/conversation/conversationHandlers.ts  |  6 ++--
 .../src/modals/ShareConversationModal.tsx     | 20 +----------
 frontend/src/preferences/preferenceApi.ts     | 18 +++-------
 frontend/src/preferences/preferenceSlice.ts   |  8 ++---
 frontend/src/settings/index.tsx               |  3 +-
 scripts/migrate_to_v1_vectorstore.py          | 35 +++++++++++++++++++
 14 files changed, 85 insertions(+), 99 deletions(-)
 create mode 100644 scripts/migrate_to_v1_vectorstore.py

diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py
index f4203822..f6eef4c4 100755
--- a/application/api/internal/routes.py
+++ b/application/api/internal/routes.py
@@ -35,12 +35,12 @@ def upload_index_files():
         return {"status": "no name"}
     job_name = secure_filename(request.form["name"])
     tokens = secure_filename(request.form["tokens"])
-    """"
-    ObjectId serves as a dir name in application/indexes, 
-    and for indexing the vector metadata in the collection
-    """
-    _id = ObjectId()
-    save_dir = os.path.join(current_dir, "indexes", str(_id))
+    retriever = secure_filename(request.form["retriever"])
+    id = secure_filename(request.form["id"])
+    type = secure_filename(request.form["type"])
+    remote_data = secure_filename(request.form["remote_data"]) if "remote_data" in  request.form else None
+
+    save_dir = os.path.join(current_dir, "indexes", str(id))
     if settings.VECTOR_STORE == "faiss":
         if "file_faiss" not in request.files:
             print("No file part")
@@ -63,15 +63,16 @@ def upload_index_files():
     # create entry in vectors_collection
     vectors_collection.insert_one(
         {
-            "_id":_id,
+            "_id": ObjectId(id),
             "user": user,
             "name": job_name,
             "language": job_name,
-            "location": save_dir,
             "date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
             "model": settings.EMBEDDINGS_NAME,
-            "type": "local",
-            "tokens": tokens
+            "type": type,
+            "tokens": tokens,
+            "retriever": retriever,
+            "remote_data": remote_data
         }
     )
     return {"status": "ok"}
\ No newline at end of file
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 7c6e979c..43e532e1 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -237,15 +237,11 @@ def combined_json():
     data = [
         {
             "name": "default",
-            "language": "default",
-            "version": "",
-            "description": "default",
-            "fullName": "default",
             "date": "default",
-            "docLink": "default",
             "model": settings.EMBEDDINGS_NAME,
             "location": "remote",
             "tokens": "",
+            "retriever": "classic",
         }
     ]
     # structure: name, language, version, description, fullName, date, docLink
@@ -255,35 +251,22 @@ def combined_json():
             {
                 "id": str(index["_id"]),
                 "name": index["name"],
-                "language": index["language"],
-                "version": "",
-                "description": index["name"],
-                "fullName": index["name"],
                 "date": index["date"],
-                "docLink": index["location"],
                 "model": settings.EMBEDDINGS_NAME,
                 "location": "local",
                 "tokens": index["tokens"] if ("tokens" in index.keys()) else "",
+                "retriever": index["retriever"] if ("retriever" in index.keys()) else "classic",
             }
         )
-    if settings.VECTOR_STORE == "faiss":
-        data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
-        for index in data_remote:
-            index["location"] = "remote"
-            data.append(index)
     if "duckduck_search" in settings.RETRIEVERS_ENABLED:
         data.append(
             {
                 "name": "DuckDuckGo Search",
-                "language": "en",
-                "version": "",
-                "description": "duckduck_search",
-                "fullName": "DuckDuckGo Search",
                 "date": "duckduck_search",
-                "docLink": "duckduck_search",
                 "model": settings.EMBEDDINGS_NAME,
                 "location": "custom",
                 "tokens": "",
+                "retriever": "duckduck_search",
             }
         )
     if "brave_search" in settings.RETRIEVERS_ENABLED:
@@ -291,14 +274,11 @@ def combined_json():
             {
                 "name": "Brave Search",
                 "language": "en",
-                "version": "",
-                "description": "brave_search",
-                "fullName": "Brave Search",
                 "date": "brave_search",
-                "docLink": "brave_search",
                 "model": settings.EMBEDDINGS_NAME,
                 "location": "custom",
                 "tokens": "",
+                "retriever": "brave_search",
             }
         )
 
diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index 4a1aa5bc..810bb179 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -21,7 +21,7 @@ class ClassicRAG(BaseRetriever):
         user_api_key=None,
     ):
         self.question = question
-        self.vectorstore = self._get_vectorstore(source=source)
+        self.vectorstore = source['active_docs'] if 'active_docs' in source else None
         self.chat_history = chat_history
         self.prompt = prompt
         self.chunks = chunks
@@ -38,14 +38,6 @@ class ClassicRAG(BaseRetriever):
         )
         self.user_api_key = user_api_key
 
-    def _get_vectorstore(self, source):
-        if "active_docs" in source:
-            vectorstore = "indexes/"+source["active_docs"]
-        else:
-            vectorstore = ""
-        vectorstore = os.path.join("application", vectorstore)
-        return vectorstore
-
     def _get_data(self):
         if self.chunks == 0:
             docs = []
diff --git a/application/vectorstore/elasticsearch.py b/application/vectorstore/elasticsearch.py
index bb28d5ce..061292b0 100644
--- a/application/vectorstore/elasticsearch.py
+++ b/application/vectorstore/elasticsearch.py
@@ -210,4 +210,3 @@ class ElasticsearchStore(BaseVectorStore):
     def delete_index(self):
         self._es_connection.delete_by_query(index=self.index_name, query={"match": {
                                       "metadata.store.keyword": self.path}},)
-
diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py
index 8e8f3b8e..957e61ef 100644
--- a/application/vectorstore/faiss.py
+++ b/application/vectorstore/faiss.py
@@ -1,12 +1,22 @@
 from langchain_community.vectorstores import FAISS
 from application.vectorstore.base import BaseVectorStore
 from application.core.settings import settings
+import os
+
+def get_vectorstore(path):
+    if path:
+        vectorstore = "indexes/"+path
+        vectorstore = os.path.join("application", vectorstore)
+    else:
+        vectorstore = os.path.join("application")
+
+    return vectorstore
 
 class FaissStore(BaseVectorStore):
 
     def __init__(self, path, embeddings_key, docs_init=None):
         super().__init__()
-        self.path = path
+        self.path = get_vectorstore(path)
         embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
         if docs_init:
             self.docsearch = FAISS.from_documents(
diff --git a/application/worker.py b/application/worker.py
index b3258983..852d9785 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -6,6 +6,7 @@ import tiktoken
 from urllib.parse import urljoin
 
 import requests
+from bson.objectid import ObjectId
 
 from application.core.settings import settings
 from application.parser.file.bulk import SimpleDirectoryReader
@@ -57,7 +58,7 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
 
 
 # Define the main function for ingesting and processing documents.
-def ingest_worker(self, directory, formats, name_job, filename, user):
+def ingest_worker(self, directory, formats, name_job, filename, user, retriever="classic"):
     """
     Ingest and process documents.
 
@@ -68,6 +69,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
         name_job (str): Name of the job for this ingestion task.
         filename (str): Name of the file to be ingested.
         user (str): Identifier for the user initiating the ingestion.
+        retriever (str): Type of retriever to use for processing the documents.
 
     Returns:
         dict: Information about the completed ingestion task, including input parameters and a "limited" flag.
@@ -136,7 +138,8 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
 
     # get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
     # and send them to the server (provide user and name in form)
-    file_data = {"name": name_job, "user": user, "tokens": tokens}
+    id = ObjectId()
+    file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever, "id": str(id), 'type': 'local'}
     if settings.VECTOR_STORE == "faiss":
         files = {
             "file_faiss": open(full_path + "/index.faiss", "rb"),
@@ -160,7 +163,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
     }
 
 
-def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
+def remote_worker(self, source_data, name_job, user, loader, directory="temp", retriever="classic"):
     token_check = True
     min_tokens = 150
     max_tokens = 1250
@@ -180,12 +183,14 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
         token_check=token_check,
     )
     # docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
-    call_openai_api(docs, full_path, self)
     tokens = count_tokens_docs(docs)
+    call_openai_api(docs, full_path, self)
     self.update_state(state="PROGRESS", meta={"current": 100})
 
     # Proceed with uploading and cleaning as in the original function
-    file_data = {"name": name_job, "user": user, "tokens": tokens}
+    id = ObjectId()
+    file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever, 
+                 "id": str(id), 'type': loader, 'remote_data': source_data}
     if settings.VECTOR_STORE == "faiss":
         files = {
             "file_faiss": open(full_path + "/index.faiss", "rb"),
diff --git a/frontend/src/Navigation.tsx b/frontend/src/Navigation.tsx
index cbfe5d95..b67d874b 100644
--- a/frontend/src/Navigation.tsx
+++ b/frontend/src/Navigation.tsx
@@ -124,10 +124,8 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
   };
 
   const handleDeleteClick = (doc: Doc) => {
-    const docPath = `indexes/local/${doc.name}`;
-
     userService
-      .deletePath(docPath)
+      .deletePath(doc.id ?? '')
       .then(() => {
         return getDocs();
       })
diff --git a/frontend/src/components/SourceDropdown.tsx b/frontend/src/components/SourceDropdown.tsx
index ce130b4d..e76b9664 100644
--- a/frontend/src/components/SourceDropdown.tsx
+++ b/frontend/src/components/SourceDropdown.tsx
@@ -63,9 +63,6 @@ function SourceDropdown({
             <p className="max-w-3/4 truncate whitespace-nowrap">
               {selectedDocs?.name || 'None'}
             </p>
-            <p className="flex flex-col items-center justify-center">
-              {selectedDocs?.version}
-            </p>
           </div>
         </span>
         <img
diff --git a/frontend/src/conversation/conversationHandlers.ts b/frontend/src/conversation/conversationHandlers.ts
index 9e3d5d2c..eaea1b4f 100644
--- a/frontend/src/conversation/conversationHandlers.ts
+++ b/frontend/src/conversation/conversationHandlers.ts
@@ -41,7 +41,7 @@ export function handleFetchAnswer(
   };
   if (selectedDocs && 'id' in selectedDocs)
     payload.active_docs = selectedDocs.id as string;
-  else payload.retriever = selectedDocs?.docLink as string;
+  payload.retriever = selectedDocs?.retriever as string;
   return conversationService
     .answer(payload, signal)
     .then((response) => {
@@ -87,7 +87,7 @@ export function handleFetchAnswerSteaming(
   };
   if (selectedDocs && 'id' in selectedDocs)
     payload.active_docs = selectedDocs.id as string;
-  else payload.retriever = selectedDocs?.docLink as string;
+  payload.retriever = selectedDocs?.retriever as string;
 
   return new Promise<Answer>((resolve, reject) => {
     conversationService
@@ -160,7 +160,7 @@ export function handleSearch(
   };
   if (selectedDocs && 'id' in selectedDocs)
     payload.active_docs = selectedDocs.id as string;
-  else payload.retriever = selectedDocs?.docLink as string;
+  payload.retriever = selectedDocs?.retriever as string;
   return conversationService
     .search(payload)
     .then((response) => response.json())
diff --git a/frontend/src/modals/ShareConversationModal.tsx b/frontend/src/modals/ShareConversationModal.tsx
index c7ef0ad6..fbb49468 100644
--- a/frontend/src/modals/ShareConversationModal.tsx
+++ b/frontend/src/modals/ShareConversationModal.tsx
@@ -46,27 +46,9 @@ export const ShareConversationModal = ({
       ? docs
           .filter((doc) => doc.model === embeddingsName)
           .map((doc: Doc) => {
-            let namePath = doc.name;
-            if (doc.language === namePath) {
-              namePath = '.project';
-            }
-            let docPath = 'default';
-            if (doc.location === 'local') {
-              docPath = 'local' + '/' + doc.name + '/';
-            } else if (doc.location === 'remote') {
-              docPath =
-                doc.language +
-                '/' +
-                namePath +
-                '/' +
-                doc.version +
-                '/' +
-                doc.model +
-                '/';
-            }
             return {
               label: doc.name,
-              value: docPath,
+              value: doc.id ?? 'default',
             };
           })
       : [];
diff --git a/frontend/src/preferences/preferenceApi.ts b/frontend/src/preferences/preferenceApi.ts
index 29a41645..96f87e1d 100644
--- a/frontend/src/preferences/preferenceApi.ts
+++ b/frontend/src/preferences/preferenceApi.ts
@@ -3,15 +3,12 @@ import userService from '../api/services/userService';
 
 // not all properties in Doc are going to be present. Make some optional
 export type Doc = {
-  location: string;
+  id: string | null;
   name: string;
-  language: string;
-  version: string;
-  description: string;
-  fullName: string;
+  type: string;
   date: string;
-  docLink: string;
   model: string;
+  retriever: string;
 };
 
 //Fetches all JSON objects from the source. We only use the objects with the "model" property in SelectDocsModal.tsx. Hopefully can clean up the source file later.
@@ -78,17 +75,10 @@ export function setLocalPrompt(prompt: string): void {
 
 export function setLocalRecentDocs(doc: Doc): void {
   localStorage.setItem('DocsGPTRecentDocs', JSON.stringify(doc));
-  let namePath = doc.name;
-  if (doc.language === namePath) {
-    namePath = '.project';
-  }
 
   let docPath = 'default';
-  if (doc.location === 'local') {
+  if (doc.type === 'local') {
     docPath = 'local' + '/' + doc.name + '/';
-  } else if (doc.location === 'remote') {
-    docPath =
-      doc.language + '/' + namePath + '/' + doc.version + '/' + doc.model + '/';
   }
   userService
     .checkDocs({
diff --git a/frontend/src/preferences/preferenceSlice.ts b/frontend/src/preferences/preferenceSlice.ts
index 370f260e..45e55d3f 100644
--- a/frontend/src/preferences/preferenceSlice.ts
+++ b/frontend/src/preferences/preferenceSlice.ts
@@ -25,15 +25,13 @@ const initialState: Preference = {
   chunks: '2',
   token_limit: 2000,
   selectedDocs: {
+    id: 'default',
     name: 'default',
-    language: 'default',
-    location: 'default',
-    version: 'default',
-    description: 'default',
-    fullName: 'default',
+    type: 'remote',
     date: 'default',
     docLink: 'default',
     model: 'openai_text-embedding-ada-002',
+    retriever: 'classic',
   } as Doc,
   sourceDocs: null,
   conversations: null,
diff --git a/frontend/src/settings/index.tsx b/frontend/src/settings/index.tsx
index 226ebb3b..141bd227 100644
--- a/frontend/src/settings/index.tsx
+++ b/frontend/src/settings/index.tsx
@@ -35,9 +35,8 @@ export default function Settings() {
   };
 
   const handleDeleteClick = (index: number, doc: Doc) => {
-    const docPath = 'indexes/' + 'local' + '/' + doc.name;
     userService
-      .deletePath(docPath)
+      .deletePath(doc.id ?? '')
       .then((response) => {
         if (response.ok && documents) {
           const updatedDocuments = [
diff --git a/scripts/migrate_to_v1_vectorstore.py b/scripts/migrate_to_v1_vectorstore.py
new file mode 100644
index 00000000..5255d222
--- /dev/null
+++ b/scripts/migrate_to_v1_vectorstore.py
@@ -0,0 +1,35 @@
+import pymongo
+import os
+
+def migrate_to_v1_vectorstore_mongo():
+    client = pymongo.MongoClient("mongodb://localhost:27017/")
+    db = client["docsgpt"]
+    vectors_collection = db["vectors"]
+
+    for vector in vectors_collection.find():
+        if "location" in vector:
+            del vector["location"]
+        if "retriever" not in vector:
+            vector["retriever"] = "classic"
+            vector["remote_data"] = None
+        vectors_collection.update_one({"_id": vector["_id"]}, {"$set": vector})
+
+    client.close()
+
+def migrate_faiss_to_v1_vectorstore():
+    client = pymongo.MongoClient("mongodb://localhost:27017/")
+    db = client["docsgpt"]
+    vectors_collection = db["vectors"]
+
+    for vector in vectors_collection.find():
+        old_path = f"./application/indexes/{vector['user']}/{vector['name']}"
+        new_path = f"./application/indexes/{vector['_id']}"
+        try:
+            os.rename(old_path, new_path)
+        except OSError as e:
+            print(f"Error moving {old_path} to {new_path}: {e}")
+
+    client.close()
+
+migrate_faiss_to_v1_vectorstore()
+migrate_to_v1_vectorstore_mongo()
\ No newline at end of file

From 6c0da2ea94f43c9c5649ebffe9f281e6a3483445 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Sun, 8 Sep 2024 17:02:48 +0100
Subject: [PATCH 08/18] lint: ruff fix

---
 application/retriever/classic_rag.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/application/retriever/classic_rag.py b/application/retriever/classic_rag.py
index 810bb179..499a4b7e 100644
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -1,4 +1,3 @@
-import os
 from application.retriever.base import BaseRetriever
 from application.core.settings import settings
 from application.vectorstore.vector_creator import VectorCreator

From 2fce25b0c8700fed39957ecf9d67918c5c8e8648 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Sun, 8 Sep 2024 22:52:09 +0100
Subject: [PATCH 09/18] fix: Doc type

---
 frontend/src/modals/CreateAPIKeyModal.tsx    |  6 +++---
 frontend/src/models/misc.ts                  |  8 ++------
 frontend/src/preferences/SelectDocsModal.tsx |  7 +++----
 frontend/src/preferences/preferenceApi.ts    | 11 +----------
 4 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/frontend/src/modals/CreateAPIKeyModal.tsx b/frontend/src/modals/CreateAPIKeyModal.tsx
index e59fd37e..71d86330 100644
--- a/frontend/src/modals/CreateAPIKeyModal.tsx
+++ b/frontend/src/modals/CreateAPIKeyModal.tsx
@@ -50,9 +50,9 @@ export default function CreateAPIKeyModal({
               };
             }
             return {
-              name: doc.name as string,
-              id: doc.docLink as string,
-              type: 'default',
+              name: doc.name,
+              id: doc.id ?? 'default',
+              type: doc.type ?? 'default',
             };
           })
       : [];
diff --git a/frontend/src/models/misc.ts b/frontend/src/models/misc.ts
index 71ecc084..bf77fd0b 100644
--- a/frontend/src/models/misc.ts
+++ b/frontend/src/models/misc.ts
@@ -5,16 +5,12 @@ export type User = {
 };
 export type Doc = {
   id?: string;
-  location: string;
   name: string;
-  language: string;
-  version: string;
-  description: string;
-  fullName: string;
   date: string;
-  docLink: string;
   model: string;
   tokens?: string;
+  type?: string;
+  retriever?: string;
 };
 
 export type PromptProps = {
diff --git a/frontend/src/preferences/SelectDocsModal.tsx b/frontend/src/preferences/SelectDocsModal.tsx
index edb2714b..f2aa4754 100644
--- a/frontend/src/preferences/SelectDocsModal.tsx
+++ b/frontend/src/preferences/SelectDocsModal.tsx
@@ -8,7 +8,8 @@ import {
   selectSourceDocs,
   selectSelectedDocs,
 } from './preferenceSlice';
-import { getDocs, Doc } from './preferenceApi';
+import { Doc } from '../models/misc';
+import { getDocs } from './preferenceApi';
 
 export default function APIKeyModal({
   modalState,
@@ -75,9 +76,7 @@ export default function APIKeyModal({
                 {!localSelectedDocs ? (
                   <p className="py-3 text-gray-500">Select</p>
                 ) : (
-                  <p className="py-3">
-                    {localSelectedDocs.name} {localSelectedDocs.version}
-                  </p>
+                  <p className="py-3">{localSelectedDocs.name}</p>
                 )}
               </div>
               {isDocsListOpen && (
diff --git a/frontend/src/preferences/preferenceApi.ts b/frontend/src/preferences/preferenceApi.ts
index 96f87e1d..90dbff7a 100644
--- a/frontend/src/preferences/preferenceApi.ts
+++ b/frontend/src/preferences/preferenceApi.ts
@@ -1,15 +1,6 @@
 import conversationService from '../api/services/conversationService';
 import userService from '../api/services/userService';
-
-// not all properties in Doc are going to be present. Make some optional
-export type Doc = {
-  id: string | null;
-  name: string;
-  type: string;
-  date: string;
-  model: string;
-  retriever: string;
-};
+import { Doc } from '../models/misc';
 
 //Fetches all JSON objects from the source. We only use the objects with the "model" property in SelectDocsModal.tsx. Hopefully can clean up the source file later.
 export async function getDocs(): Promise<Doc[] | null> {

From fe78e9a3361e7fd8e2b7ed2a6f59b6f4b06a9972 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Sun, 8 Sep 2024 22:55:45 +0100
Subject: [PATCH 10/18] lint: more lintingg

---
 frontend/src/Navigation.tsx                 | 4 ++--
 frontend/src/preferences/preferenceSlice.ts | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/frontend/src/Navigation.tsx b/frontend/src/Navigation.tsx
index b67d874b..4a617970 100644
--- a/frontend/src/Navigation.tsx
+++ b/frontend/src/Navigation.tsx
@@ -23,9 +23,9 @@ import {
 import ConversationTile from './conversation/ConversationTile';
 import { useDarkTheme, useMediaQuery, useOutsideAlerter } from './hooks';
 import DeleteConvModal from './modals/DeleteConvModal';
-import { ActiveState } from './models/misc';
+import { ActiveState, Doc } from './models/misc';
 import APIKeyModal from './preferences/APIKeyModal';
-import { Doc, getConversations, getDocs } from './preferences/preferenceApi';
+import { getConversations, getDocs } from './preferences/preferenceApi';
 import {
   selectApiKeyStatus,
   selectConversationId,
diff --git a/frontend/src/preferences/preferenceSlice.ts b/frontend/src/preferences/preferenceSlice.ts
index 45e55d3f..6b33f99c 100644
--- a/frontend/src/preferences/preferenceSlice.ts
+++ b/frontend/src/preferences/preferenceSlice.ts
@@ -4,9 +4,9 @@ import {
   createSlice,
   isAnyOf,
 } from '@reduxjs/toolkit';
-import { Doc, setLocalApiKey, setLocalRecentDocs } from './preferenceApi';
+import { setLocalApiKey, setLocalRecentDocs } from './preferenceApi';
 import { RootState } from '../store';
-import { ActiveState } from '../models/misc';
+import { ActiveState, Doc } from '../models/misc';
 
 interface Preference {
   apiKey: string;

From f105fd1b2c03359884e698b47ef07d82953be3a9 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Sun, 8 Sep 2024 23:19:10 +0100
Subject: [PATCH 11/18] lint: final fix

---
 frontend/src/components/SourceDropdown.tsx        | 2 +-
 frontend/src/conversation/conversationHandlers.ts | 2 +-
 frontend/src/preferences/SelectDocsModal.tsx      | 4 +---
 frontend/src/settings/Documents.tsx               | 6 ++----
 frontend/src/settings/index.tsx                   | 2 +-
 frontend/src/store.ts                             | 9 +++------
 frontend/src/upload/Upload.tsx                    | 4 ++--
 7 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/frontend/src/components/SourceDropdown.tsx b/frontend/src/components/SourceDropdown.tsx
index e76b9664..d5146da5 100644
--- a/frontend/src/components/SourceDropdown.tsx
+++ b/frontend/src/components/SourceDropdown.tsx
@@ -1,7 +1,7 @@
 import React from 'react';
 import Trash from '../assets/trash.svg';
 import Arrow2 from '../assets/dropdown-arrow.svg';
-import { Doc } from '../preferences/preferenceApi';
+import { Doc } from '../models/misc';
 import { useDispatch } from 'react-redux';
 import { useTranslation } from 'react-i18next';
 type Props = {
diff --git a/frontend/src/conversation/conversationHandlers.ts b/frontend/src/conversation/conversationHandlers.ts
index eaea1b4f..335f07e9 100644
--- a/frontend/src/conversation/conversationHandlers.ts
+++ b/frontend/src/conversation/conversationHandlers.ts
@@ -1,5 +1,5 @@
 import conversationService from '../api/services/conversationService';
-import { Doc } from '../preferences/preferenceApi';
+import { Doc } from '../models/misc';
 import { Answer, FEEDBACK, RetrievalPayload } from './conversationModels';
 
 export function handleFetchAnswer(
diff --git a/frontend/src/preferences/SelectDocsModal.tsx b/frontend/src/preferences/SelectDocsModal.tsx
index f2aa4754..97d7626d 100644
--- a/frontend/src/preferences/SelectDocsModal.tsx
+++ b/frontend/src/preferences/SelectDocsModal.tsx
@@ -93,9 +93,7 @@ export default function APIKeyModal({
                             }}
                             className="h-10 w-full cursor-pointer border-x-2 border-b-2 hover:bg-gray-100"
                           >
-                            <p className="ml-5 py-3">
-                              {doc.name} {doc.version}
-                            </p>
+                            <p className="ml-5 py-3">{doc.name}</p>
                           </div>
                         );
                       }
diff --git a/frontend/src/settings/Documents.tsx b/frontend/src/settings/Documents.tsx
index 19bde1fa..342e2f52 100644
--- a/frontend/src/settings/Documents.tsx
+++ b/frontend/src/settings/Documents.tsx
@@ -61,12 +61,10 @@ const Documents: React.FC<DocumentsProps> = ({
                       {document.tokens ? formatTokens(+document.tokens) : ''}
                     </td>
                     <td className="border-r border-t px-4 py-2">
-                      {document.location === 'remote'
-                        ? 'Pre-loaded'
-                        : 'Private'}
+                      {document.type === 'remote' ? 'Pre-loaded' : 'Private'}
                     </td>
                     <td className="border-t px-4 py-2">
-                      {document.location !== 'remote' && (
+                      {document.type !== 'remote' && (
                         <img
                           src={Trash}
                           alt="Delete"
diff --git a/frontend/src/settings/index.tsx b/frontend/src/settings/index.tsx
index 141bd227..5bef3b43 100644
--- a/frontend/src/settings/index.tsx
+++ b/frontend/src/settings/index.tsx
@@ -6,7 +6,7 @@ import userService from '../api/services/userService';
 import ArrowLeft from '../assets/arrow-left.svg';
 import ArrowRight from '../assets/arrow-right.svg';
 import i18n from '../locale/i18n';
-import { Doc } from '../preferences/preferenceApi';
+import { Doc } from '../models/misc';
 import {
   selectSourceDocs,
   setSourceDocs,
diff --git a/frontend/src/store.ts b/frontend/src/store.ts
index 3d1408b3..a085dad3 100644
--- a/frontend/src/store.ts
+++ b/frontend/src/store.ts
@@ -26,15 +26,12 @@ const store = configureStore({
       conversations: null,
       sourceDocs: [
         {
-          location: '',
-          language: '',
           name: 'default',
-          version: '',
           date: '',
-          description: '',
-          docLink: '',
-          fullName: '',
           model: '1.0',
+          type: 'remote',
+          id: 'default',
+          retriever: 'clasic',
         },
       ],
       modalState: 'INACTIVE',
diff --git a/frontend/src/upload/Upload.tsx b/frontend/src/upload/Upload.tsx
index 5d5d1ac5..7681ce34 100644
--- a/frontend/src/upload/Upload.tsx
+++ b/frontend/src/upload/Upload.tsx
@@ -120,7 +120,7 @@ function Upload({
                     dispatch(setSourceDocs(data));
                     dispatch(
                       setSelectedDocs(
-                        data?.find((d) => d.location.toLowerCase() === 'local'),
+                        data?.find((d) => d.type?.toLowerCase() === 'local'),
                       ),
                     );
                   });
@@ -137,7 +137,7 @@ function Upload({
                     dispatch(setSourceDocs(data));
                     dispatch(
                       setSelectedDocs(
-                        data?.find((d) => d.location.toLowerCase() === 'local'),
+                        data?.find((d) => d.type?.toLowerCase() === 'local'),
                       ),
                     );
                   });

From 8166642ff9504c91fad50ffad28d274e8fbef1c8 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 9 Sep 2024 12:00:59 +0100
Subject: [PATCH 12/18] fix: write id instead of old path on remote db's

---
 application/parser/open_ai_func.py | 4 ++--
 application/worker.py              | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/application/parser/open_ai_func.py b/application/parser/open_ai_func.py
index c58e8059..d9871a17 100755
--- a/application/parser/open_ai_func.py
+++ b/application/parser/open_ai_func.py
@@ -16,7 +16,7 @@ def store_add_texts_with_retry(store, i):
     # store_pine.add_texts([i.page_content], metadatas=[i.metadata])
 
 
-def call_openai_api(docs, folder_name, task_status):
+def call_openai_api(docs, folder_name, id, task_status):
     # Function to create a vector store from the documents and save it to disk
 
     if not os.path.exists(f"{folder_name}"):
@@ -38,7 +38,7 @@ def call_openai_api(docs, folder_name, task_status):
     else:
         store = VectorCreator.create_vectorstore(
             settings.VECTOR_STORE,
-            path=f"{folder_name}",
+            path=id,
             embeddings_key=os.getenv("EMBEDDINGS_KEY"),
         )
     # Uncomment for MPNet embeddings
diff --git a/application/worker.py b/application/worker.py
index 852d9785..2b3751d0 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -127,8 +127,9 @@ def ingest_worker(self, directory, formats, name_job, filename, user, retriever=
     )
 
     docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
+    id = ObjectId()
 
-    call_openai_api(docs, full_path, self)
+    call_openai_api(docs, full_path, id, self)
     tokens = count_tokens_docs(docs)
     self.update_state(state="PROGRESS", meta={"current": 100})
 
@@ -138,7 +139,6 @@ def ingest_worker(self, directory, formats, name_job, filename, user, retriever=
 
     # get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
     # and send them to the server (provide user and name in form)
-    id = ObjectId()
     file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever, "id": str(id), 'type': 'local'}
     if settings.VECTOR_STORE == "faiss":
         files = {
@@ -184,7 +184,8 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp", r
     )
     # docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
     tokens = count_tokens_docs(docs)
-    call_openai_api(docs, full_path, self)
+    id = ObjectId()
+    call_openai_api(docs, full_path, id, self)
     self.update_state(state="PROGRESS", meta={"current": 100})
 
     # Proceed with uploading and cleaning as in the original function

From 888e13e198ae7c1058bb3ddce197d3adee36ba91 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 9 Sep 2024 13:01:58 +0100
Subject: [PATCH 13/18] feat: mongo vector migrate script

---
 scripts/migrate_to_v1_vectorstore.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/scripts/migrate_to_v1_vectorstore.py b/scripts/migrate_to_v1_vectorstore.py
index 5255d222..3a5a82f0 100644
--- a/scripts/migrate_to_v1_vectorstore.py
+++ b/scripts/migrate_to_v1_vectorstore.py
@@ -31,5 +31,20 @@ def migrate_faiss_to_v1_vectorstore():
 
     client.close()
 
+def migrate_mongo_atlas_vector_to_v1_vectorstore():
+    client = pymongo.MongoClient("mongodb+srv://<username>:<password>@<cluster>/<dbname>?retryWrites=true&w=majority")
+    db = client["docsgpt"]
+    vectors_collection = db["vectors"]
+
+    for vector in vectors_collection.find():
+        if "location" in vector:
+            del vector["location"]
+        if "retriever" not in vector:
+            vector["retriever"] = "classic"
+            vector["remote_data"] = None
+        vectors_collection.update_one({"_id": vector["_id"]}, {"$set": vector})
+
+    client.close()
+
 migrate_faiss_to_v1_vectorstore()
 migrate_to_v1_vectorstore_mongo()
\ No newline at end of file

From 1bb81614a5083a80de62e9a99113fcab6124ad09 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 9 Sep 2024 13:37:11 +0100
Subject: [PATCH 14/18] fix: metadata things

---
 application/api/user/routes.py     | 17 ++++++++---------
 application/parser/open_ai_func.py |  8 +++++---
 application/worker.py              |  4 +---
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 43e532e1..2f422d4e 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -116,25 +116,24 @@ def delete_by_ids():
 def delete_old():
     """Delete old indexes."""
     import shutil
-    name = request.args.get("name")
-    user = request.args.get("user")
+    path = request.args.get("path")
     doc = vectors_collection.find_one({
-        "user":user,
-        "name":name
+        "_id": ObjectId(path),
+        "user": "local",
     })
-    print("user",user)
-    print("file",name)
     if(doc is None):
         return {"status":"not found"},404
-    path_clean = doc["location"]
     if settings.VECTOR_STORE == "faiss":
         try:
-            shutil.rmtree(os.path.join(current_dir, path_clean))
+            shutil.rmtree(os.path.join(current_dir, str(doc["_id"])))
         except FileNotFoundError:
             pass
     else:
-        vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean))
+        vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, path=str(doc["_id"]))
         vetorstore.delete_index()
+    vectors_collection.delete_one({
+        "_id": ObjectId(path),
+    })
 
     return {"status": "ok"}
 
diff --git a/application/parser/open_ai_func.py b/application/parser/open_ai_func.py
index d9871a17..30daee2e 100755
--- a/application/parser/open_ai_func.py
+++ b/application/parser/open_ai_func.py
@@ -11,7 +11,9 @@ from retry import retry
 
 
 @retry(tries=10, delay=60)
-def store_add_texts_with_retry(store, i):
+def store_add_texts_with_retry(store, i, id):
+    # add store to the metadata 
+    i.metadata["store"] = str(id)
     store.add_texts([i.page_content], metadatas=[i.metadata])
     # store_pine.add_texts([i.page_content], metadatas=[i.metadata])
 
@@ -38,7 +40,7 @@ def call_openai_api(docs, folder_name, id, task_status):
     else:
         store = VectorCreator.create_vectorstore(
             settings.VECTOR_STORE,
-            path=id,
+            path=str(id),
             embeddings_key=os.getenv("EMBEDDINGS_KEY"),
         )
     # Uncomment for MPNet embeddings
@@ -57,7 +59,7 @@ def call_openai_api(docs, folder_name, id, task_status):
             task_status.update_state(
                 state="PROGRESS", meta={"current": int((c1 / s1) * 100)}
             )
-            store_add_texts_with_retry(store, i)
+            store_add_texts_with_retry(store, i, id)
         except Exception as e:
             print(e)
             print("Error on ", i)
diff --git a/application/worker.py b/application/worker.py
index 2b3751d0..7abf0a02 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -18,8 +18,7 @@ from application.parser.token_func import group_split
 
 # Define a function to extract metadata from a given filename.
 def metadata_from_filename(title):
-    store = "/".join(title.split("/")[1:3])
-    return {"title": title, "store": store}
+    return {"title": title}
 
 
 # Define a function to generate a random string of a given length.
@@ -189,7 +188,6 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp", r
     self.update_state(state="PROGRESS", meta={"current": 100})
 
     # Proceed with uploading and cleaning as in the original function
-    id = ObjectId()
     file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever, 
                  "id": str(id), 'type': loader, 'remote_data': source_data}
     if settings.VECTOR_STORE == "faiss":

From 2f9c72c1cfb3a828bdcf5ac76467f79b5af92bc0 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 9 Sep 2024 15:46:18 +0100
Subject: [PATCH 15/18] feat: migrate store to source_id

---
 application/api/answer/routes.py         |  4 ++--
 application/api/internal/routes.py       |  6 +++---
 application/api/user/routes.py           | 20 ++++++++++----------
 application/parser/open_ai_func.py       |  8 ++++----
 application/vectorstore/elasticsearch.py | 10 +++++-----
 application/vectorstore/mongodb.py       |  8 ++++----
 application/vectorstore/qdrant.py        |  6 +++---
 application/worker.py                    |  2 --
 frontend/src/api/endpoints.ts            |  2 +-
 scripts/migrate_to_v1_vectorstore.py     | 17 +++++++++++------
 10 files changed, 43 insertions(+), 40 deletions(-)

diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index caca7c67..de9b8bb3 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -21,7 +21,7 @@ logger = logging.getLogger(__name__)
 mongo = MongoClient(settings.MONGO_URI)
 db = mongo["docsgpt"]
 conversations_collection = db["conversations"]
-vectors_collection = db["vectors"]
+sources_collection = db["sources"]
 prompts_collection = db["prompts"]
 api_key_collection = db["api_keys"]
 answer = Blueprint("answer", __name__)
@@ -91,7 +91,7 @@ def get_data_from_api_key(api_key):
 
 
 def get_retriever(source_id: str):
-    doc = vectors_collection.find_one({"_id": ObjectId(source_id)})
+    doc = sources_collection.find_one({"_id": ObjectId(source_id)})
     if doc is None:
         raise Exception("Source document does not exist", 404)
     retriever_name = None if "retriever" not in doc else doc["retriever"]
diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py
index f6eef4c4..cea6c8ca 100755
--- a/application/api/internal/routes.py
+++ b/application/api/internal/routes.py
@@ -9,7 +9,7 @@ from application.core.settings import settings
 mongo = MongoClient(settings.MONGO_URI)
 db = mongo["docsgpt"]
 conversations_collection = db["conversations"]
-vectors_collection = db["vectors"]
+sources_collection = db["sources"]
 
 current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
@@ -60,8 +60,8 @@ def upload_index_files():
             os.makedirs(save_dir)
         file_faiss.save(os.path.join(save_dir, "index.faiss"))
         file_pkl.save(os.path.join(save_dir, "index.pkl"))
-    # create entry in vectors_collection
-    vectors_collection.insert_one(
+    # create entry in sources_collection
+    sources_collection.insert_one(
         {
             "_id": ObjectId(id),
             "user": user,
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 2f422d4e..5ac6e741 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -17,7 +17,7 @@ from application.vectorstore.vector_creator import VectorCreator
 mongo = MongoClient(settings.MONGO_URI)
 db = mongo["docsgpt"]
 conversations_collection = db["conversations"]
-vectors_collection = db["vectors"]
+sources_collection = db["sources"]
 prompts_collection = db["prompts"]
 feedback_collection = db["feedback"]
 api_key_collection = db["api_keys"]
@@ -106,7 +106,7 @@ def delete_by_ids():
         return {"status": "error"}
 
     if settings.VECTOR_STORE == "faiss":
-        result = vectors_collection.delete_index(ids=ids)
+        result = sources_collection.delete_index(ids=ids)
         if result:
             return {"status": "ok"}
     return {"status": "error"}
@@ -116,9 +116,9 @@ def delete_by_ids():
 def delete_old():
     """Delete old indexes."""
     import shutil
-    path = request.args.get("path")
-    doc = vectors_collection.find_one({
-        "_id": ObjectId(path),
+    source_id = request.args.get("source_id")
+    doc = sources_collection.find_one({
+        "_id": ObjectId(source_id),
         "user": "local",
     })
     if(doc is None):
@@ -129,10 +129,10 @@ def delete_old():
         except FileNotFoundError:
             pass
     else:
-        vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, path=str(doc["_id"]))
+        vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, source_id=str(doc["_id"]))
         vetorstore.delete_index()
-    vectors_collection.delete_one({
-        "_id": ObjectId(path),
+    sources_collection.delete_one({
+        "_id": ObjectId(source_id),
     })
 
     return {"status": "ok"}
@@ -244,8 +244,8 @@ def combined_json():
         }
     ]
     # structure: name, language, version, description, fullName, date, docLink
-    # append data from vectors_collection in sorted order in descending order of date
-    for index in vectors_collection.find({"user": user}).sort("date", -1):
+    # append data from sources_collection in sorted order in descending order of date
+    for index in sources_collection.find({"user": user}).sort("date", -1):
         data.append(
             {
                 "id": str(index["_id"]),
diff --git a/application/parser/open_ai_func.py b/application/parser/open_ai_func.py
index 30daee2e..84f92db9 100755
--- a/application/parser/open_ai_func.py
+++ b/application/parser/open_ai_func.py
@@ -12,8 +12,8 @@ from retry import retry
 
 @retry(tries=10, delay=60)
 def store_add_texts_with_retry(store, i, id):
-    # add store to the metadata 
-    i.metadata["store"] = str(id)
+    # add source_id to the metadata 
+    i.metadata["source_id"] = str(id)
     store.add_texts([i.page_content], metadatas=[i.metadata])
     # store_pine.add_texts([i.page_content], metadatas=[i.metadata])
 
@@ -34,13 +34,13 @@ def call_openai_api(docs, folder_name, id, task_status):
         store = VectorCreator.create_vectorstore(
             settings.VECTOR_STORE,
             docs_init=docs_init,
-            path=f"{folder_name}",
+            source_id=f"{folder_name}",
             embeddings_key=os.getenv("EMBEDDINGS_KEY"),
         )
     else:
         store = VectorCreator.create_vectorstore(
             settings.VECTOR_STORE,
-            path=str(id),
+            source_id=str(id),
             embeddings_key=os.getenv("EMBEDDINGS_KEY"),
         )
     # Uncomment for MPNet embeddings
diff --git a/application/vectorstore/elasticsearch.py b/application/vectorstore/elasticsearch.py
index 061292b0..e393e4a5 100644
--- a/application/vectorstore/elasticsearch.py
+++ b/application/vectorstore/elasticsearch.py
@@ -9,9 +9,9 @@ import elasticsearch
 class ElasticsearchStore(BaseVectorStore):
     _es_connection = None  # Class attribute to hold the Elasticsearch connection
 
-    def __init__(self, path, embeddings_key, index_name=settings.ELASTIC_INDEX):
+    def __init__(self, source_id, embeddings_key, index_name=settings.ELASTIC_INDEX):
         super().__init__()
-        self.path = path.replace("application/indexes/", "").rstrip("/")
+        self.source_id = source_id.replace("application/indexes/", "").rstrip("/")
         self.embeddings_key = embeddings_key
         self.index_name = index_name
         
@@ -81,7 +81,7 @@ class ElasticsearchStore(BaseVectorStore):
         embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key)
         vector = embeddings.embed_query(question)
         knn = {
-            "filter": [{"match": {"metadata.store.keyword": self.path}}],
+            "filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
             "field": "vector",
             "k": k,
             "num_candidates": 100,
@@ -100,7 +100,7 @@ class ElasticsearchStore(BaseVectorStore):
                             }
                         }
                     ],
-                    "filter": [{"match": {"metadata.store.keyword": self.path}}],
+                    "filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
                 }
             },
             "rank": {"rrf": {}},
@@ -209,4 +209,4 @@ class ElasticsearchStore(BaseVectorStore):
 
     def delete_index(self):
         self._es_connection.delete_by_query(index=self.index_name, query={"match": {
-                                      "metadata.store.keyword": self.path}},)
+                                      "metadata.source_id.keyword": self.source_id}},)
diff --git a/application/vectorstore/mongodb.py b/application/vectorstore/mongodb.py
index 337fc41f..32bca489 100644
--- a/application/vectorstore/mongodb.py
+++ b/application/vectorstore/mongodb.py
@@ -5,7 +5,7 @@ from application.vectorstore.document_class import Document
 class MongoDBVectorStore(BaseVectorStore):
     def __init__(
         self,
-        path: str = "",
+        source_id: str = "",
         embeddings_key: str = "embeddings",
         collection: str = "documents",
         index_name: str = "vector_search_index",
@@ -18,7 +18,7 @@ class MongoDBVectorStore(BaseVectorStore):
         self._embedding_key = embedding_key
         self._embeddings_key = embeddings_key
         self._mongo_uri = settings.MONGO_URI
-        self._path = path.replace("application/indexes/", "").rstrip("/")
+        self._source_id = source_id.replace("application/indexes/", "").rstrip("/")
         self._embedding = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
 
         try:
@@ -46,7 +46,7 @@ class MongoDBVectorStore(BaseVectorStore):
                     "numCandidates": k * 10, 
                     "index": self._index_name,
                     "filter": {
-                        "store": {"$eq": self._path}
+                        "source_id": {"$eq": self._source_id}
                     }
                 }
             }
@@ -123,4 +123,4 @@ class MongoDBVectorStore(BaseVectorStore):
         return result_ids
     
     def delete_index(self, *args, **kwargs):
-        self._collection.delete_many({"store": self._path})
\ No newline at end of file
+        self._collection.delete_many({"source_id": self._source_id})
\ No newline at end of file
diff --git a/application/vectorstore/qdrant.py b/application/vectorstore/qdrant.py
index 482d06a1..3f94505f 100644
--- a/application/vectorstore/qdrant.py
+++ b/application/vectorstore/qdrant.py
@@ -5,12 +5,12 @@ from qdrant_client import models
 
 
 class QdrantStore(BaseVectorStore):
-    def __init__(self, path: str = "", embeddings_key: str = "embeddings"):
+    def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"):
         self._filter = models.Filter(
             must=[
                 models.FieldCondition(
-                    key="metadata.store",
-                    match=models.MatchValue(value=path.replace("application/indexes/", "").rstrip("/")),
+                    key="metadata.source_id",
+                    match=models.MatchValue(value=source_id.replace("application/indexes/", "").rstrip("/")),
                 )
             ]
         )
diff --git a/application/worker.py b/application/worker.py
index 7abf0a02..40e66431 100755
--- a/application/worker.py
+++ b/application/worker.py
@@ -145,7 +145,6 @@ def ingest_worker(self, directory, formats, name_job, filename, user, retriever=
             "file_pkl": open(full_path + "/index.pkl", "rb"),
         }
         response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
-        response = requests.get(urljoin(settings.API_URL, "/api/delete_old?name=" + name_job + "&?user=" + user))
     else:
         response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
 
@@ -197,7 +196,6 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp", r
         }
 
         requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
-        requests.get(urljoin(settings.API_URL, "/api/delete_old?name=" + name_job + "&?user=" + user))
     else:
         requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
 
diff --git a/frontend/src/api/endpoints.ts b/frontend/src/api/endpoints.ts
index af2fb920..c06ac3d2 100644
--- a/frontend/src/api/endpoints.ts
+++ b/frontend/src/api/endpoints.ts
@@ -10,7 +10,7 @@ const endpoints = {
     DELETE_PROMPT: '/api/delete_prompt',
     UPDATE_PROMPT: '/api/update_prompt',
     SINGLE_PROMPT: (id: string) => `/api/get_single_prompt?id=${id}`,
-    DELETE_PATH: (docPath: string) => `/api/delete_old?path=${docPath}`,
+    DELETE_PATH: (docPath: string) => `/api/delete_old?source_id=${docPath}`,
     TASK_STATUS: (task_id: string) => `/api/task_status?task_id=${task_id}`,
   },
   CONVERSATION: {
diff --git a/scripts/migrate_to_v1_vectorstore.py b/scripts/migrate_to_v1_vectorstore.py
index 3a5a82f0..9a709795 100644
--- a/scripts/migrate_to_v1_vectorstore.py
+++ b/scripts/migrate_to_v1_vectorstore.py
@@ -5,6 +5,7 @@ def migrate_to_v1_vectorstore_mongo():
     client = pymongo.MongoClient("mongodb://localhost:27017/")
     db = client["docsgpt"]
     vectors_collection = db["vectors"]
+    sources_collection = db["sources"]
 
     for vector in vectors_collection.find():
         if "location" in vector:
@@ -14,6 +15,12 @@ def migrate_to_v1_vectorstore_mongo():
             vector["remote_data"] = None
         vectors_collection.update_one({"_id": vector["_id"]}, {"$set": vector})
 
+    # move data from vectors_collection to sources_collection
+    for vector in vectors_collection.find():
+        sources_collection.insert_one(vector)
+
+    vectors_collection.drop()
+
     client.close()
 
 def migrate_faiss_to_v1_vectorstore():
@@ -36,13 +43,11 @@ def migrate_mongo_atlas_vector_to_v1_vectorstore():
     db = client["docsgpt"]
     vectors_collection = db["vectors"]
 
+    # mongodb atlas collection
+    documents_collection = db["documents"]
+
     for vector in vectors_collection.find():
-        if "location" in vector:
-            del vector["location"]
-        if "retriever" not in vector:
-            vector["retriever"] = "classic"
-            vector["remote_data"] = None
-        vectors_collection.update_one({"_id": vector["_id"]}, {"$set": vector})
+        documents_collection.update_many({"store": vector["user"] + "/" + vector["name"]}, {"$set": {"source_id": str(vector["_id"])}})
 
     client.close()
 

From 444d50f751fca1fc93f3a8d8c7a73cf0e15c0ab9 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 9 Sep 2024 16:43:20 +0100
Subject: [PATCH 16/18] fix: faiss source name

---
 application/vectorstore/faiss.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py
index 957e61ef..b504ebf8 100644
--- a/application/vectorstore/faiss.py
+++ b/application/vectorstore/faiss.py
@@ -14,9 +14,9 @@ def get_vectorstore(path):
 
 class FaissStore(BaseVectorStore):
 
-    def __init__(self, path, embeddings_key, docs_init=None):
+    def __init__(self, source_id, embeddings_key, docs_init=None):
         super().__init__()
-        self.path = get_vectorstore(path)
+        self.path = get_vectorstore(source_id)
         embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
         if docs_init:
             self.docsearch = FAISS.from_documents(

From 90f64e2527c99e5bc9cb8845543c53285eb03d3a Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 9 Sep 2024 20:22:03 +0100
Subject: [PATCH 17/18] fix: references on shar / api keys

---
 application/api/answer/routes.py | 15 -------------
 application/api/user/routes.py   | 36 +++++---------------------------
 2 files changed, 5 insertions(+), 46 deletions(-)

diff --git a/application/api/answer/routes.py b/application/api/answer/routes.py
index de9b8bb3..15feeb3c 100644
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -98,21 +98,6 @@ def get_retriever(source_id: str):
     return retriever_name
 
 
-def get_vectorstore(data):
-    if "active_docs" in data:
-        if data["active_docs"].split("/")[0] == "default":
-            vectorstore = ""
-        elif data["active_docs"].split("/")[0] == "local":
-            vectorstore = "indexes/" + data["active_docs"]
-        else:
-            vectorstore = "vectors/" + data["active_docs"]
-        if data["active_docs"] == "default":
-            vectorstore = ""
-    else:
-        vectorstore = ""
-    vectorstore = os.path.join("application", vectorstore)
-    return vectorstore
-
 
 def is_azure_configured():
     return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index 5ac6e741..e6e0cb7d 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -286,39 +286,13 @@ def combined_json():
 
 @user.route("/api/docs_check", methods=["POST"])
 def check_docs():
-    # check if docs exist in a vectorstore folder
     data = request.get_json()
-    # split docs on / and take first part
-    if data["docs"].split("/")[0] == "local":
-        return {"status": "exists"}
+
     vectorstore = "vectors/" + secure_filename(data["docs"])
-    base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"
     if os.path.exists(vectorstore) or data["docs"] == "default":
         return {"status": "exists"}
     else:
-        file_url = urlparse(base_path + vectorstore + "index.faiss")
-
-        if (
-            file_url.scheme in ["https"]
-            and file_url.netloc == "raw.githubusercontent.com"
-            and file_url.path.startswith("/arc53/DocsHUB/main/")
-        ):
-            r = requests.get(file_url.geturl())
-            if r.status_code != 200:
-                return {"status": "null"}
-            else:
-                if not os.path.exists(vectorstore):
-                    os.makedirs(vectorstore)
-                with open(vectorstore + "index.faiss", "wb") as f:
-                    f.write(r.content)
-
-                r = requests.get(base_path + vectorstore + "index.pkl")
-                with open(vectorstore + "index.pkl", "wb") as f:
-                    f.write(r.content)
-        else:
-            return {"status": "null"}
-
-        return {"status": "loaded"}
+        return {"status": "not found"}
 
 
 @user.route("/api/create_prompt", methods=["POST"])
@@ -445,7 +419,7 @@ def create_api_key():
         "chunks": chunks,
     }
     if "source" in data and ObjectId.is_valid(data["source"]):
-        new_api_key["source"] = DBRef("vectors", ObjectId(data["source"]))
+        new_api_key["source"] = DBRef("sources", ObjectId(data["source"]))
     if "retriever" in data:
         new_api_key["retriever"] = data["retriever"]
     resp = api_key_collection.insert_one(new_api_key)
@@ -494,7 +468,7 @@ def share_conversation():
                     "user": user,
                 }
             if "source" in data and ObjectId.is_valid(data["source"]):
-                new_api_key_data["source"] = DBRef("vectors",ObjectId(data["source"]))
+                new_api_key_data["source"] = DBRef("sources",ObjectId(data["source"]))
             elif "retriever" in data:
                 new_api_key_data["retriever"] = data["retriever"]
                  
@@ -543,7 +517,7 @@ def share_conversation():
                 new_api_key_data["key"] = api_uuid
                 new_api_key_data["name"] = name
                 if "source" in data and ObjectId.is_valid(data["source"]):
-                    new_api_key_data["source"] = DBRef("vectors", ObjectId(data["source"]))
+                    new_api_key_data["source"] = DBRef("sources", ObjectId(data["source"]))
                 if "retriever" in data:
                     new_api_key_data["retriever"] = data["retriever"]
                 api_key_collection.insert_one(new_api_key_data)

From ca779bb0aff5ebe4ed13b6993f0964a489091d9d Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Mon, 9 Sep 2024 20:24:15 +0100
Subject: [PATCH 18/18] lint: ruff fix

---
 application/api/user/routes.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/application/api/user/routes.py b/application/api/user/routes.py
index e6e0cb7d..73023a89 100644
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -2,8 +2,6 @@ import os
 import uuid
 import shutil
 from flask import Blueprint, request, jsonify
-from urllib.parse import urlparse
-import requests
 from pymongo import MongoClient
 from bson.objectid import ObjectId
 from bson.binary import Binary, UuidRepresentation