Merge branch 'main' into feat/analytics-and-logs

2025-11-29 08:33:20 +00:00 · 2024-09-11 17:58:04 +05:30
parent bea0bbfcdb c12d7a8d82
commit 72e68a163c
51 changed files with 1116 additions and 1498 deletions
--- a/application/api/answer/routes.py
+++ b/application/api/answer/routes.py
@@ -9,6 +9,7 @@ import traceback

 from pymongo import MongoClient
 from bson.objectid import ObjectId
+from bson.dbref import DBRef

 from application.core.settings import settings
 from application.llm.llm_creator import LLMCreator
@@ -20,7 +21,7 @@ logger = logging.getLogger(__name__)
 mongo = MongoClient(settings.MONGO_URI)
 db = mongo["docsgpt"]
 conversations_collection = db["conversations"]
-vectors_collection = db["vectors"]
+sources_collection = db["sources"]
 prompts_collection = db["prompts"]
 api_key_collection = db["api_keys"]
 user_logs_collection = db["user_logs"]
@@ -37,9 +38,7 @@ if settings.MODEL_NAME:  # in case there is particular model name configured
    gpt_model = settings.MODEL_NAME

 # load the prompts
-current_dir = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-)
+current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
    chat_combine_template = f.read()

@@ -75,35 +74,34 @@ def run_async_chain(chain, question, chat_history):

 def get_data_from_api_key(api_key):
    data = api_key_collection.find_one({"key": api_key})
-
    # # Raise custom exception if the API key is not found
    if data is None:
        raise Exception("Invalid API Key, please generate new key", 401)
+
+    if "retriever" not in data:
+        data["retriever"] = None
+
+    if "source" in data and isinstance(data["source"], DBRef):
+        source_doc = db.dereference(data["source"])
+        data["source"] = str(source_doc["_id"])
+        if "retriever" in source_doc:
+            data["retriever"] = source_doc["retriever"]
+    else:
+        data["source"] = {}
    return data


-def get_vectorstore(data):
-    if "active_docs" in data:
-        if data["active_docs"].split("/")[0] == "default":
-            vectorstore = ""
-        elif data["active_docs"].split("/")[0] == "local":
-            vectorstore = "indexes/" + data["active_docs"]
-        else:
-            vectorstore = "vectors/" + data["active_docs"]
-        if data["active_docs"] == "default":
-            vectorstore = ""
-    else:
-        vectorstore = ""
-    vectorstore = os.path.join("application", vectorstore)
-    return vectorstore
+def get_retriever(source_id: str):
+    doc = sources_collection.find_one({"_id": ObjectId(source_id)})
+    if doc is None:
+        raise Exception("Source document does not exist", 404)
+    retriever_name = None if "retriever" not in doc else doc["retriever"]
+    return retriever_name
+


 def is_azure_configured():
-    return (
-        settings.OPENAI_API_BASE
-        and settings.OPENAI_API_VERSION
-        and settings.AZURE_DEPLOYMENT_NAME
-    )
+    return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME


 def save_conversation(conversation_id, question, response, source_log_docs, llm):
@@ -263,29 +261,29 @@ def stream():
        else:
            token_limit = settings.DEFAULT_MAX_HISTORY

-        # check if active_docs or api_key is set
+        ## retriever can be "brave_search, duckduck_search or classic"
+        retriever_name = data["retriever"] if "retriever" in data else "classic"

+        # check if active_docs or api_key is set
        if "api_key" in data:
            data_key = get_data_from_api_key(data["api_key"])
            chunks = int(data_key["chunks"])
            prompt_id = data_key["prompt_id"]
            source = {"active_docs": data_key["source"]}
+            retriever_name = data_key["retriever"] or retriever_name
            user_api_key = data["api_key"]
+
        elif "active_docs" in data:
-            source = {"active_docs": data["active_docs"]}
+            source = {"active_docs" : data["active_docs"]}
+            retriever_name = get_retriever(data["active_docs"]) or retriever_name
            user_api_key = None
+
        else:
            source = {}
            user_api_key = None

-        if source["active_docs"].split("/")[0] in ["default", "local"]:
-            retriever_name = "classic"
-        else:
-            retriever_name = source["active_docs"]
-
-        current_app.logger.info(
-            f"/stream - request_data: {data}, source: {source}",
-            extra={"data": json.dumps({"request_data": data, "source": source})},
+        current_app.logger.info(f"/stream - request_data: {data}, source: {source}",
+            extra={"data": json.dumps({"request_data": data, "source": source})}
        )

        prompt = get_prompt(prompt_id)
@@ -369,6 +367,10 @@ def api_answer():
    else:
        token_limit = settings.DEFAULT_MAX_HISTORY

+    ## retriever can be brave_search, duckduck_search or classic
+    retriever_name = data["retriever"] if "retriever" in data else "classic"
+
+    # use try and except  to check for exception
    try:
        # check if the vectorstore is set
        if "api_key" in data:
@@ -376,15 +378,15 @@ def api_answer():
            chunks = int(data_key["chunks"])
            prompt_id = data_key["prompt_id"]
            source = {"active_docs": data_key["source"]}
+            retriever_name = data_key["retriever"] or retriever_name
            user_api_key = data["api_key"]
-        else:
-            source = data
+        elif "active_docs" in data:
+            source = {"active_docs":data["active_docs"]}
+            retriever_name = get_retriever(data["active_docs"]) or retriever_name
            user_api_key = None
-
-        if source["active_docs"].split("/")[0] in ["default", "local"]:
-            retriever_name = "classic"
        else:
-            retriever_name = source["active_docs"]
+            source = {}
+            user_api_key = None

        prompt = get_prompt(prompt_id)

@@ -421,8 +423,8 @@ def api_answer():
        )

        result = {"answer": response_full, "sources": source_log_docs}
-        result["conversation_id"] = save_conversation(
-            conversation_id, question, response_full, source_log_docs, llm
+        result["conversation_id"] = str(
+            save_conversation(conversation_id, question, response_full, source_log_docs, llm)
        )
        retriever_params = retriever.get_params()
        user_logs_collection.insert_one(
@@ -459,19 +461,19 @@ def api_search():
    if "api_key" in data:
        data_key = get_data_from_api_key(data["api_key"])
        chunks = int(data_key["chunks"])
-        source = {"active_docs": data_key["source"]}
-        user_api_key = data["api_key"]
+        source = {"active_docs":data_key["source"]}
+        user_api_key = data_key["api_key"]
    elif "active_docs" in data:
-        source = {"active_docs": data["active_docs"]}
+        source = {"active_docs":data["active_docs"]}
        user_api_key = None
    else:
        source = {}
        user_api_key = None

-    if source["active_docs"].split("/")[0] in ["default", "local"]:
-        retriever_name = "classic"
+    if "retriever" in data:
+        retriever_name = data["retriever"]
    else:
-        retriever_name = source["active_docs"]
+        retriever_name = "classic"
    if "token_limit" in data:
        token_limit = data["token_limit"]
    else:
--- a/application/api/internal/routes.py
+++ b/application/api/internal/routes.py
@@ -3,13 +3,13 @@ import datetime
 from flask import Blueprint, request, send_from_directory
 from pymongo import MongoClient
 from werkzeug.utils import secure_filename
-
+from bson.objectid import ObjectId

 from application.core.settings import settings
 mongo = MongoClient(settings.MONGO_URI)
 db = mongo["docsgpt"]
 conversations_collection = db["conversations"]
-vectors_collection = db["vectors"]
+sources_collection = db["sources"]

 current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

@@ -35,7 +35,12 @@ def upload_index_files():
        return {"status": "no name"}
    job_name = secure_filename(request.form["name"])
    tokens = secure_filename(request.form["tokens"])
-    save_dir = os.path.join(current_dir, "indexes", user, job_name)
+    retriever = secure_filename(request.form["retriever"])
+    id = secure_filename(request.form["id"])
+    type = secure_filename(request.form["type"])
+    remote_data = secure_filename(request.form["remote_data"]) if "remote_data" in  request.form else None
+
+    save_dir = os.path.join(current_dir, "indexes", str(id))
    if settings.VECTOR_STORE == "faiss":
        if "file_faiss" not in request.files:
            print("No file part")
@@ -55,17 +60,19 @@ def upload_index_files():
            os.makedirs(save_dir)
        file_faiss.save(os.path.join(save_dir, "index.faiss"))
        file_pkl.save(os.path.join(save_dir, "index.pkl"))
-    # create entry in vectors_collection
-    vectors_collection.insert_one(
+    # create entry in sources_collection
+    sources_collection.insert_one(
        {
+            "_id": ObjectId(id),
            "user": user,
            "name": job_name,
            "language": job_name,
-            "location": save_dir,
            "date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
            "model": settings.EMBEDDINGS_NAME,
-            "type": "local",
-            "tokens": tokens
+            "type": type,
+            "tokens": tokens,
+            "retriever": retriever,
+            "remote_data": remote_data
        }
    )
    return {"status": "ok"}
--- a/application/api/user/routes.py
+++ b/application/api/user/routes.py
@@ -20,7 +20,7 @@ from application.vectorstore.vector_creator import VectorCreator
 mongo = MongoClient(settings.MONGO_URI)
 db = mongo["docsgpt"]
 conversations_collection = db["conversations"]
-vectors_collection = db["vectors"]
+sources_collection = db["sources"]
 prompts_collection = db["prompts"]
 feedback_collection = db["feedback"]
 api_key_collection = db["api_keys"]
@@ -30,9 +30,7 @@ user_logs_collection = db["user_logs"]

 user = Blueprint("user", __name__)

-current_dir = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-)
+current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))


 def generate_minute_range(start_date, end_date):
@@ -83,9 +81,7 @@ def get_conversations():
    conversations = conversations_collection.find().sort("date", -1).limit(30)
    list_conversations = []
    for conversation in conversations:
-        list_conversations.append(
-            {"id": str(conversation["_id"]), "name": conversation["name"]}
-        )
+        list_conversations.append({"id": str(conversation["_id"]), "name": conversation["name"]})

    # list_conversations = [{"id": "default", "name": "default"}, {"id": "jeff", "name": "jeff"}]

@@ -116,15 +112,10 @@ def api_feedback():
    question = data["question"]
    answer = data["answer"]
    feedback = data["feedback"]
-
-    feedback_collection.insert_one(
-        {
-            "question": question,
-            "answer": answer,
-            "feedback": feedback,
-            "timestamp": datetime.datetime.now(datetime.timezone.utc),
-        }
-    )
+    new_doc = {"question": question, "answer": answer, "feedback": feedback, "timestamp": datetime.datetime.now(datetime.timezone.utc)}
+    if "api_key" in data:
+        new_doc["api_key"] = data["api_key"]
+    feedback_collection.insert_one(new_doc)
    return {"status": "ok"}


@@ -137,7 +128,7 @@ def delete_by_ids():
        return {"status": "error"}

    if settings.VECTOR_STORE == "faiss":
-        result = vectors_collection.delete_index(ids=ids)
+        result = sources_collection.delete_index(ids=ids)
        if result:
            return {"status": "ok"}
    return {"status": "error"}
@@ -147,28 +138,24 @@ def delete_by_ids():
 def delete_old():
    """Delete old indexes."""
    import shutil
-
-    path = request.args.get("path")
-    dirs = path.split("/")
-    dirs_clean = []
-    for i in range(0, len(dirs)):
-        dirs_clean.append(secure_filename(dirs[i]))
-    # check that path strats with indexes or vectors
-
-    if dirs_clean[0] not in ["indexes", "vectors"]:
-        return {"status": "error"}
-    path_clean = "/".join(dirs_clean)
-    vectors_collection.delete_one({"name": dirs_clean[-1], "user": dirs_clean[-2]})
+    source_id = request.args.get("source_id")
+    doc = sources_collection.find_one({
+        "_id": ObjectId(source_id),
+        "user": "local",
+    })
+    if(doc is None):
+        return {"status":"not found"},404
    if settings.VECTOR_STORE == "faiss":
        try:
-            shutil.rmtree(os.path.join(current_dir, path_clean))
+            shutil.rmtree(os.path.join(current_dir, str(doc["_id"])))
        except FileNotFoundError:
            pass
    else:
-        vetorstore = VectorCreator.create_vectorstore(
-            settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)
-        )
+        vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, source_id=str(doc["_id"]))
        vetorstore.delete_index()
+    sources_collection.delete_one({
+        "_id": ObjectId(source_id),
+    })

    return {"status": "ok"}

@@ -202,9 +189,7 @@ def upload_file():
            file.save(os.path.join(temp_dir, filename))

        # Use shutil.make_archive to zip the temp directory
-        zip_path = shutil.make_archive(
-            base_name=os.path.join(save_dir, job_name), format="zip", root_dir=temp_dir
-        )
+        zip_path = shutil.make_archive(base_name=os.path.join(save_dir, job_name), format="zip", root_dir=temp_dir)
        final_filename = os.path.basename(zip_path)

        # Clean up the temporary directory after zipping
@@ -246,9 +231,7 @@ def upload_remote():
    source_data = request.form["data"]

    if source_data:
-        task = ingest_remote.delay(
-            source_data=source_data, job_name=job_name, user=user, loader=source
-        )
+        task = ingest_remote.delay(source_data=source_data, job_name=job_name, user=user, loader=source)
        task_id = task.id
        return {"status": "ok", "task_id": task_id}
    else:
@@ -275,54 +258,36 @@ def combined_json():
    data = [
        {
            "name": "default",
-            "language": "default",
-            "version": "",
-            "description": "default",
-            "fullName": "default",
            "date": "default",
-            "docLink": "default",
            "model": settings.EMBEDDINGS_NAME,
            "location": "remote",
            "tokens": "",
+            "retriever": "classic",
        }
    ]
    # structure: name, language, version, description, fullName, date, docLink
-    # append data from vectors_collection in sorted order in descending order of date
-    for index in vectors_collection.find({"user": user}).sort("date", -1):
+    # append data from sources_collection in sorted order in descending order of date
+    for index in sources_collection.find({"user": user}).sort("date", -1):
        data.append(
            {
+                "id": str(index["_id"]),
                "name": index["name"],
-                "language": index["language"],
-                "version": "",
-                "description": index["name"],
-                "fullName": index["name"],
                "date": index["date"],
-                "docLink": index["location"],
                "model": settings.EMBEDDINGS_NAME,
                "location": "local",
                "tokens": index["tokens"] if ("tokens" in index.keys()) else "",
+                "retriever": index["retriever"] if ("retriever" in index.keys()) else "classic",
            }
        )
-    if settings.VECTOR_STORE == "faiss":
-        data_remote = requests.get(
-            "https://d3dg1063dc54p9.cloudfront.net/combined.json"
-        ).json()
-        for index in data_remote:
-            index["location"] = "remote"
-            data.append(index)
    if "duckduck_search" in settings.RETRIEVERS_ENABLED:
        data.append(
            {
                "name": "DuckDuckGo Search",
-                "language": "en",
-                "version": "",
-                "description": "duckduck_search",
-                "fullName": "DuckDuckGo Search",
                "date": "duckduck_search",
-                "docLink": "duckduck_search",
                "model": settings.EMBEDDINGS_NAME,
                "location": "custom",
                "tokens": "",
+                "retriever": "duckduck_search",
            }
        )
    if "brave_search" in settings.RETRIEVERS_ENABLED:
@@ -330,14 +295,11 @@ def combined_json():
            {
                "name": "Brave Search",
                "language": "en",
-                "version": "",
-                "description": "brave_search",
-                "fullName": "Brave Search",
                "date": "brave_search",
-                "docLink": "brave_search",
                "model": settings.EMBEDDINGS_NAME,
                "location": "custom",
                "tokens": "",
+                "retriever": "brave_search",
            }
        )

@@ -346,39 +308,13 @@ def combined_json():

@user.route("/api/docs_check", methods=["POST"])
 def check_docs():
-    # check if docs exist in a vectorstore folder
    data = request.get_json()
-    # split docs on / and take first part
-    if data["docs"].split("/")[0] == "local":
-        return {"status": "exists"}
+
    vectorstore = "vectors/" + secure_filename(data["docs"])
-    base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"
    if os.path.exists(vectorstore) or data["docs"] == "default":
        return {"status": "exists"}
    else:
-        file_url = urlparse(base_path + vectorstore + "index.faiss")
-
-        if (
-            file_url.scheme in ["https"]
-            and file_url.netloc == "raw.githubusercontent.com"
-            and file_url.path.startswith("/arc53/DocsHUB/main/")
-        ):
-            r = requests.get(file_url.geturl())
-            if r.status_code != 200:
-                return {"status": "null"}
-            else:
-                if not os.path.exists(vectorstore):
-                    os.makedirs(vectorstore)
-                with open(vectorstore + "index.faiss", "wb") as f:
-                    f.write(r.content)
-
-                r = requests.get(base_path + vectorstore + "index.pkl")
-                with open(vectorstore + "index.pkl", "wb") as f:
-                    f.write(r.content)
-        else:
-            return {"status": "null"}
-
-        return {"status": "loaded"}
+        return {"status": "not found"}


@user.route("/api/create_prompt", methods=["POST"])
@@ -409,9 +345,7 @@ def get_prompts():
    list_prompts.append({"id": "creative", "name": "creative", "type": "public"})
    list_prompts.append({"id": "strict", "name": "strict", "type": "public"})
    for prompt in prompts:
-        list_prompts.append(
-            {"id": str(prompt["_id"]), "name": prompt["name"], "type": "private"}
-        )
+        list_prompts.append({"id": str(prompt["_id"]), "name": prompt["name"], "type": "private"})

    return jsonify(list_prompts)

@@ -420,21 +354,15 @@ def get_prompts():
 def get_single_prompt():
    prompt_id = request.args.get("id")
    if prompt_id == "default":
-        with open(
-            os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r"
-        ) as f:
+        with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
            chat_combine_template = f.read()
        return jsonify({"content": chat_combine_template})
    elif prompt_id == "creative":
-        with open(
-            os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r"
-        ) as f:
+        with open(os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r") as f:
            chat_reduce_creative = f.read()
        return jsonify({"content": chat_reduce_creative})
    elif prompt_id == "strict":
-        with open(
-            os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r"
-        ) as f:
+        with open(os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r") as f:
            chat_reduce_strict = f.read()
        return jsonify({"content": chat_reduce_strict})

@@ -463,9 +391,7 @@ def update_prompt_name():
    # check if name is null
    if name == "":
        return {"status": "error"}
-    prompts_collection.update_one(
-        {"_id": ObjectId(id)}, {"$set": {"name": name, "content": content}}
-    )
+    prompts_collection.update_one({"_id": ObjectId(id)}, {"$set": {"name": name, "content": content}})
    return {"status": "ok"}


@@ -475,12 +401,23 @@ def get_api_keys():
    keys = api_key_collection.find({"user": user})
    list_keys = []
    for key in keys:
+        if "source" in key and isinstance(key["source"],DBRef):
+            source = db.dereference(key["source"])
+            if source is None:
+                continue
+            else:
+                source_name = source["name"]
+        elif "retriever" in key:
+            source_name = key["retriever"]
+        else:
+            continue
+            
        list_keys.append(
            {
                "id": str(key["_id"]),
                "name": key["name"],
                "key": key["key"][:4] + "..." + key["key"][-4:],
-                "source": key["source"],
+                "source": source_name,
                "prompt_id": key["prompt_id"],
                "chunks": key["chunks"],
            }
@@ -492,21 +429,22 @@ def get_api_keys():
 def create_api_key():
    data = request.get_json()
    name = data["name"]
-    source = data["source"]
    prompt_id = data["prompt_id"]
    chunks = data["chunks"]
    key = str(uuid.uuid4())
    user = "local"
-    resp = api_key_collection.insert_one(
-        {
+    new_api_key = {
        "name": name,
        "key": key,
-            "source": source,
        "user": user,
        "prompt_id": prompt_id,
        "chunks": chunks,
    }
-    )
+    if "source" in data and ObjectId.is_valid(data["source"]):
+        new_api_key["source"] = DBRef("sources", ObjectId(data["source"]))
+    if "retriever" in data:
+        new_api_key["retriever"] = data["retriever"]
+    resp = api_key_collection.insert_one(new_api_key)
    new_id = str(resp.inserted_id)
    return {"id": new_id, "key": key}

@@ -533,36 +471,37 @@ def share_conversation():
        conversation_id = data["conversation_id"]
        isPromptable = request.args.get("isPromptable").lower() == "true"

-        conversation = conversations_collection.find_one(
-            {"_id": ObjectId(conversation_id)}
-        )
+        conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
+        if(conversation is None):
+            raise Exception("Conversation does not exist")
        current_n_queries = len(conversation["queries"])

        ##generate binary representation of uuid
        explicit_binary = Binary.from_uuid(uuid.uuid4(), UuidRepresentation.STANDARD)

        if isPromptable:
-            source = "default" if "source" not in data else data["source"]
            prompt_id = "default" if "prompt_id" not in data else data["prompt_id"]
            chunks = "2" if "chunks" not in data else data["chunks"]

            name = conversation["name"] + "(shared)"
-            pre_existing_api_document = api_key_collection.find_one(
-                {
+            new_api_key_data =  {
                    "prompt_id": prompt_id,
                    "chunks": chunks,
-                    "source": source,
                    "user": user,
                }
+            if "source" in data and ObjectId.is_valid(data["source"]):
+                new_api_key_data["source"] = DBRef("sources",ObjectId(data["source"]))
+            elif "retriever" in data:
+                new_api_key_data["retriever"] = data["retriever"]
+                 
+            pre_existing_api_document = api_key_collection.find_one(
+                new_api_key_data
            )
-            api_uuid = str(uuid.uuid4())
            if pre_existing_api_document:
                api_uuid = pre_existing_api_document["key"]
                pre_existing = shared_conversations_collections.find_one(
                    {
-                        "conversation_id": DBRef(
-                            "conversations", ObjectId(conversation_id)
-                        ),
+                        "conversation_id": DBRef("conversations", ObjectId(conversation_id)),
                        "isPromptable": isPromptable,
                        "first_n_queries": current_n_queries,
                        "user": user,
@@ -593,20 +532,17 @@ def share_conversation():
                            "api_key": api_uuid,
                        }
                    )
-                    return jsonify(
-                        {"success": True, "identifier": str(explicit_binary.as_uuid())}
-                    )
+                    return jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())})
            else:
-                api_key_collection.insert_one(
-                    {
-                        "name": name,
-                        "key": api_uuid,
-                        "source": source,
-                        "user": user,
-                        "prompt_id": prompt_id,
-                        "chunks": chunks,
-                    }
-                )
+                
+                api_uuid = str(uuid.uuid4())
+                new_api_key_data["key"] = api_uuid
+                new_api_key_data["name"] = name
+                if "source" in data and ObjectId.is_valid(data["source"]):
+                    new_api_key_data["source"] = DBRef("sources", ObjectId(data["source"]))
+                if "retriever" in data:
+                    new_api_key_data["retriever"] = data["retriever"]
+                api_key_collection.insert_one(new_api_key_data)
                shared_conversations_collections.insert_one(
                {
                    "uuid": explicit_binary,
@@ -622,9 +558,7 @@ def share_conversation():
              )
            ## Identifier as route parameter in frontend
            return (
-                jsonify(
-                    {"success": True, "identifier": str(explicit_binary.as_uuid())}
-                ),
+                jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())}),
                201,
            )

@@ -639,9 +573,7 @@ def share_conversation():
        )
        if pre_existing is not None:
            return (
-                jsonify(
-                    {"success": True, "identifier": str(pre_existing["uuid"].as_uuid())}
-                ),
+                jsonify({"success": True, "identifier": str(pre_existing["uuid"].as_uuid())}),
                200,
            )
        else:
@@ -659,9 +591,7 @@ def share_conversation():
            )
            ## Identifier as route parameter in frontend
            return (
-                jsonify(
-                    {"success": True, "identifier": str(explicit_binary.as_uuid())}
-                ),
+                jsonify({"success": True, "identifier": str(explicit_binary.as_uuid())}),
                201,
            )
    except Exception as err:
@@ -673,16 +603,10 @@ def share_conversation():
@user.route("/api/shared_conversation/<string:identifier>", methods=["GET"])
 def get_publicly_shared_conversations(identifier: str):
    try:
-        query_uuid = Binary.from_uuid(
-            uuid.UUID(identifier), UuidRepresentation.STANDARD
-        )
+        query_uuid = Binary.from_uuid(uuid.UUID(identifier), UuidRepresentation.STANDARD)
        shared = shared_conversations_collections.find_one({"uuid": query_uuid})
        conversation_queries = []
-        if (
-            shared
-            and "conversation_id" in shared
-            and isinstance(shared["conversation_id"], DBRef)
-        ):
+        if shared and "conversation_id" in shared and isinstance(shared["conversation_id"], DBRef):
            # Resolve the DBRef
            conversation_ref = shared["conversation_id"]
            conversation = db.dereference(conversation_ref)
@@ -696,9 +620,7 @@ def get_publicly_shared_conversations(identifier: str):
                    ),
                    404,
                )
-            conversation_queries = conversation["queries"][
-                : (shared["first_n_queries"])
-            ]
+            conversation_queries = conversation["queries"][: (shared["first_n_queries"])]
            for query in conversation_queries:
                query.pop("sources")  ## avoid exposing sources
        else:
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -18,7 +18,7 @@ class Settings(BaseSettings):
    DEFAULT_MAX_HISTORY: int = 150
    MODEL_TOKEN_LIMITS: dict = {"gpt-3.5-turbo": 4096, "claude-2": 1e5}
    UPLOAD_FOLDER: str = "inputs"
-    VECTOR_STORE: str = "faiss"  # "faiss" or "elasticsearch" or "qdrant"
+    VECTOR_STORE: str = "faiss"  # "faiss" or "elasticsearch" or "qdrant" or "milvus"
    RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search

    API_URL: str = "http://localhost:7091"  # backend url for celery worker
@@ -62,6 +62,11 @@ class Settings(BaseSettings):
    QDRANT_PATH: Optional[str] = None
    QDRANT_DISTANCE_FUNC: str = "Cosine"

+    # Milvus vectorstore config
+    MILVUS_COLLECTION_NAME: Optional[str] = "docsgpt"
+    MILVUS_URI: Optional[str] = "./milvus_local.db"   # milvus lite version as default
+    MILVUS_TOKEN: Optional[str] = ""
+
    BRAVE_SEARCH_API_KEY: Optional[str] = None

    FLASK_DEBUG_MODE: bool = False
--- a/application/parser/open_ai_func.py
+++ b/application/parser/open_ai_func.py
@@ -11,12 +11,14 @@ from retry import retry


@retry(tries=10, delay=60)
-def store_add_texts_with_retry(store, i):
+def store_add_texts_with_retry(store, i, id):
+    # add source_id to the metadata 
+    i.metadata["source_id"] = str(id)
    store.add_texts([i.page_content], metadatas=[i.metadata])
    # store_pine.add_texts([i.page_content], metadatas=[i.metadata])


-def call_openai_api(docs, folder_name, task_status):
+def call_openai_api(docs, folder_name, id, task_status):
    # Function to create a vector store from the documents and save it to disk

    if not os.path.exists(f"{folder_name}"):
@@ -32,13 +34,13 @@ def call_openai_api(docs, folder_name, task_status):
        store = VectorCreator.create_vectorstore(
            settings.VECTOR_STORE,
            docs_init=docs_init,
-            path=f"{folder_name}",
+            source_id=f"{folder_name}",
            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
        )
    else:
        store = VectorCreator.create_vectorstore(
            settings.VECTOR_STORE,
-            path=f"{folder_name}",
+            source_id=str(id),
            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
        )
    # Uncomment for MPNet embeddings
@@ -57,7 +59,7 @@ def call_openai_api(docs, folder_name, task_status):
            task_status.update_state(
                state="PROGRESS", meta={"current": int((c1 / s1) * 100)}
            )
-            store_add_texts_with_retry(store, i)
+            store_add_texts_with_retry(store, i, id)
        except Exception as e:
            print(e)
            print("Error on ", i)
--- a/application/requirements.txt
+++ b/application/requirements.txt
@@ -9,13 +9,15 @@ EbookLib==0.18
 elasticsearch==8.14.0
 escodegen==1.0.11
 esprima==4.0.1
-Flask==3.0.1
-faiss-cpu==1.8.0
+Flask==3.0.3
+faiss-cpu==1.8.0.post1
 gunicorn==23.0.0
 html2text==2020.1.16
 javalang==0.13.0
-langchain==0.1.4
-langchain-openai==0.0.5
+langchain==0.2.16
+langchain-community==0.2.16
+langchain-core==0.2.38
+langchain-openai==0.1.23
 openapi3_parser==1.1.16
 pandas==2.2.2
 pydantic_settings==2.4.0
@@ -26,9 +28,9 @@ qdrant-client==1.11.0
 redis==5.0.1
 Requests==2.32.0
 retry==0.9.2
-sentence-transformers
-tiktoken
+sentence-transformers==3.0.1
+tiktoken==0.7.0
 torch
-tqdm==4.66.3
-transformers==4.44.0
-Werkzeug==3.0.3
+tqdm==4.66.5
+transformers==4.44.2
+Werkzeug==3.0.4
--- a/application/retriever/classic_rag.py
+++ b/application/retriever/classic_rag.py
@@ -1,4 +1,3 @@
-import os
 from application.retriever.base import BaseRetriever
 from application.core.settings import settings
 from application.vectorstore.vector_creator import VectorCreator
@@ -21,7 +20,7 @@ class ClassicRAG(BaseRetriever):
        user_api_key=None,
    ):
        self.question = question
-        self.vectorstore = self._get_vectorstore(source=source)
+        self.vectorstore = source['active_docs'] if 'active_docs' in source else None
        self.chat_history = chat_history
        self.prompt = prompt
        self.chunks = chunks
@@ -38,21 +37,6 @@ class ClassicRAG(BaseRetriever):
        )
        self.user_api_key = user_api_key

-    def _get_vectorstore(self, source):
-        if "active_docs" in source:
-            if source["active_docs"].split("/")[0] == "default":
-                vectorstore = ""
-            elif source["active_docs"].split("/")[0] == "local":
-                vectorstore = "indexes/" + source["active_docs"]
-            else:
-                vectorstore = "vectors/" + source["active_docs"]
-            if source["active_docs"] == "default":
-                vectorstore = ""
-        else:
-            vectorstore = ""
-        vectorstore = os.path.join("application", vectorstore)
-        return vectorstore
-
    def _get_data(self):
        if self.chunks == 0:
            docs = []
--- a/application/retriever/retriever_creator.py
+++ b/application/retriever/retriever_creator.py
@@ -5,15 +5,16 @@ from application.retriever.brave_search import BraveRetSearch


 class RetrieverCreator:
-    retievers = {
+    retrievers = {
        'classic': ClassicRAG,
        'duckduck_search': DuckDuckSearch,
-        'brave_search': BraveRetSearch
+        'brave_search': BraveRetSearch,
+        'default': ClassicRAG
    }

    @classmethod
    def create_retriever(cls, type, *args, **kwargs):
-        retiever_class = cls.retievers.get(type.lower())
+        retiever_class = cls.retrievers.get(type.lower())
        if not retiever_class:
            raise ValueError(f"No retievers class found for type {type}")
        return retiever_class(*args, **kwargs)
--- a/application/vectorstore/base.py
+++ b/application/vectorstore/base.py
@@ -1,13 +1,30 @@
 from abc import ABC, abstractmethod
 import os
-from langchain_community.embeddings import (
-    HuggingFaceEmbeddings,
-    CohereEmbeddings,
-    HuggingFaceInstructEmbeddings,
-)
+from sentence_transformers import SentenceTransformer
 from langchain_openai import OpenAIEmbeddings
 from application.core.settings import settings

+class EmbeddingsWrapper:
+    def __init__(self, model_name, *args, **kwargs):
+        self.model = SentenceTransformer(model_name, config_kwargs={'allow_dangerous_deserialization': True}, *args, **kwargs)
+        self.dimension = self.model.get_sentence_embedding_dimension()
+
+    def embed_query(self, query: str):
+        return self.model.encode(query).tolist()
+    
+    def embed_documents(self, documents: list):
+        return self.model.encode(documents).tolist()
+    
+    def __call__(self, text):
+        if isinstance(text, str):
+            return self.embed_query(text)
+        elif isinstance(text, list):
+            return self.embed_documents(text)
+        else:
+            raise ValueError("Input must be a string or a list of strings")
+
+
+
 class EmbeddingsSingleton:
    _instances = {}

@@ -23,16 +40,15 @@ class EmbeddingsSingleton:
    def _create_instance(embeddings_name, *args, **kwargs):
        embeddings_factory = {
            "openai_text-embedding-ada-002": OpenAIEmbeddings,
-            "huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings,
-            "huggingface_sentence-transformers-all-mpnet-base-v2": HuggingFaceEmbeddings,
-            "huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings,
-            "cohere_medium": CohereEmbeddings
+            "huggingface_sentence-transformers/all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
+            "huggingface_sentence-transformers-all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
+            "huggingface_hkunlp/instructor-large": lambda: EmbeddingsWrapper("hkunlp/instructor-large"),
        }

-        if embeddings_name not in embeddings_factory:
-            raise ValueError(f"Invalid embeddings_name: {embeddings_name}")
-
+        if embeddings_name in embeddings_factory:
            return embeddings_factory[embeddings_name](*args, **kwargs)
+        else:
+            return EmbeddingsWrapper(embeddings_name, *args, **kwargs)

 class BaseVectorStore(ABC):
    def __init__(self):
@@ -58,22 +74,14 @@ class BaseVectorStore(ABC):
                    embeddings_name,
                    openai_api_key=embeddings_key
                )
-        elif embeddings_name == "cohere_medium":
-            embedding_instance = EmbeddingsSingleton.get_instance(
-                embeddings_name,
-                cohere_api_key=embeddings_key
-            )
        elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
            if os.path.exists("./model/all-mpnet-base-v2"):
                embedding_instance = EmbeddingsSingleton.get_instance(
-                    embeddings_name,
-                    model_name="./model/all-mpnet-base-v2",
-                    model_kwargs={"device": "cpu"}
+                    embeddings_name="./model/all-mpnet-base-v2",
                )
            else:
                embedding_instance = EmbeddingsSingleton.get_instance(
                    embeddings_name,
-                    model_kwargs={"device": "cpu"}
                )
        else:
            embedding_instance = EmbeddingsSingleton.get_instance(embeddings_name)
--- a/application/vectorstore/elasticsearch.py
+++ b/application/vectorstore/elasticsearch.py
@@ -9,9 +9,9 @@ import elasticsearch
 class ElasticsearchStore(BaseVectorStore):
    _es_connection = None  # Class attribute to hold the Elasticsearch connection

-    def __init__(self, path, embeddings_key, index_name=settings.ELASTIC_INDEX):
+    def __init__(self, source_id, embeddings_key, index_name=settings.ELASTIC_INDEX):
        super().__init__()
-        self.path = path.replace("application/indexes/", "").rstrip("/")
+        self.source_id = source_id.replace("application/indexes/", "").rstrip("/")
        self.embeddings_key = embeddings_key
        self.index_name = index_name
        
@@ -81,7 +81,7 @@ class ElasticsearchStore(BaseVectorStore):
        embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key)
        vector = embeddings.embed_query(question)
        knn = {
-            "filter": [{"match": {"metadata.store.keyword": self.path}}],
+            "filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
            "field": "vector",
            "k": k,
            "num_candidates": 100,
@@ -100,7 +100,7 @@ class ElasticsearchStore(BaseVectorStore):
                            }
                        }
                    ],
-                    "filter": [{"match": {"metadata.store.keyword": self.path}}],
+                    "filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
                }
            },
            "rank": {"rrf": {}},
@@ -209,5 +209,4 @@ class ElasticsearchStore(BaseVectorStore):

    def delete_index(self):
        self._es_connection.delete_by_query(index=self.index_name, query={"match": {
-                                      "metadata.store.keyword": self.path}},)
-
+                                      "metadata.source_id.keyword": self.source_id}},)
--- a/application/vectorstore/faiss.py
+++ b/application/vectorstore/faiss.py
@@ -1,12 +1,22 @@
 from langchain_community.vectorstores import FAISS
 from application.vectorstore.base import BaseVectorStore
 from application.core.settings import settings
+import os
+
+def get_vectorstore(path):
+    if path:
+        vectorstore = "indexes/"+path
+        vectorstore = os.path.join("application", vectorstore)
+    else:
+        vectorstore = os.path.join("application")
+
+    return vectorstore

 class FaissStore(BaseVectorStore):

-    def __init__(self, path, embeddings_key, docs_init=None):
+    def __init__(self, source_id, embeddings_key, docs_init=None):
        super().__init__()
-        self.path = path
+        self.path = get_vectorstore(source_id)
        embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
        if docs_init:
            self.docsearch = FAISS.from_documents(
@@ -14,7 +24,8 @@ class FaissStore(BaseVectorStore):
            )
        else:
            self.docsearch = FAISS.load_local(
-                self.path, embeddings
+                self.path, embeddings, 
+                allow_dangerous_deserialization=True
            )
        self.assert_embedding_dimensions(embeddings)

@@ -37,10 +48,10 @@ class FaissStore(BaseVectorStore):
        """
        if settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2":
            try:
-                word_embedding_dimension = embeddings.client[1].word_embedding_dimension
+                word_embedding_dimension = embeddings.dimension
            except AttributeError as e:
-                raise AttributeError("word_embedding_dimension not found in embeddings.client[1]") from e
+                raise AttributeError("'dimension' attribute not found in embeddings instance. Make sure the embeddings object is properly initialized.") from e
            docsearch_index_dimension = self.docsearch.index.d
            if word_embedding_dimension != docsearch_index_dimension:
-                raise ValueError(f"word_embedding_dimension ({word_embedding_dimension}) " +
-                                 f"!= docsearch_index_word_embedding_dimension ({docsearch_index_dimension})")
+                raise ValueError(f"Embedding dimension mismatch: embeddings.dimension ({word_embedding_dimension}) " +
+                                 f"!= docsearch index dimension ({docsearch_index_dimension})")
--- a/application/vectorstore/milvus.py
+++ b/application/vectorstore/milvus.py
@@ -0,0 +1,37 @@
+from typing import List, Optional
+from uuid import uuid4
+
+
+from application.core.settings import settings
+from application.vectorstore.base import BaseVectorStore
+
+
+class MilvusStore(BaseVectorStore):
+    def __init__(self, path: str = "", embeddings_key: str = "embeddings"):
+        super().__init__()
+        from langchain_milvus import Milvus
+
+        connection_args = {
+            "uri": settings.MILVUS_URI,
+            "token": settings.MILVUS_TOKEN,
+        }
+        self._docsearch = Milvus(
+            embedding_function=self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key),
+            collection_name=settings.MILVUS_COLLECTION_NAME,
+            connection_args=connection_args,
+        )
+        self._path = path
+
+    def search(self, question, k=2, *args, **kwargs):
+        return self._docsearch.similarity_search(query=question, k=k, filter={"path": self._path} *args, **kwargs)
+
+    def add_texts(self, texts: List[str], metadatas: Optional[List[dict]], *args, **kwargs):
+        ids = [str(uuid4()) for _ in range(len(texts))]
+
+        return self._docsearch.add_texts(texts=texts, metadatas=metadatas, ids=ids, *args, **kwargs)
+
+    def save_local(self, *args, **kwargs):
+        pass
+
+    def delete_index(self, *args, **kwargs):
+        pass
--- a/application/vectorstore/mongodb.py
+++ b/application/vectorstore/mongodb.py
@@ -5,7 +5,7 @@ from application.vectorstore.document_class import Document
 class MongoDBVectorStore(BaseVectorStore):
    def __init__(
        self,
-        path: str = "",
+        source_id: str = "",
        embeddings_key: str = "embeddings",
        collection: str = "documents",
        index_name: str = "vector_search_index",
@@ -18,7 +18,7 @@ class MongoDBVectorStore(BaseVectorStore):
        self._embedding_key = embedding_key
        self._embeddings_key = embeddings_key
        self._mongo_uri = settings.MONGO_URI
-        self._path = path.replace("application/indexes/", "").rstrip("/")
+        self._source_id = source_id.replace("application/indexes/", "").rstrip("/")
        self._embedding = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)

        try:
@@ -46,7 +46,7 @@ class MongoDBVectorStore(BaseVectorStore):
                    "numCandidates": k * 10, 
                    "index": self._index_name,
                    "filter": {
-                        "store": {"$eq": self._path}
+                        "source_id": {"$eq": self._source_id}
                    }
                }
            }
@@ -123,4 +123,4 @@ class MongoDBVectorStore(BaseVectorStore):
        return result_ids
    
    def delete_index(self, *args, **kwargs):
-        self._collection.delete_many({"store": self._path})
+        self._collection.delete_many({"source_id": self._source_id})
--- a/application/vectorstore/qdrant.py
+++ b/application/vectorstore/qdrant.py
@@ -5,12 +5,12 @@ from qdrant_client import models


 class QdrantStore(BaseVectorStore):
-    def __init__(self, path: str = "", embeddings_key: str = "embeddings"):
+    def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"):
        self._filter = models.Filter(
            must=[
                models.FieldCondition(
-                    key="metadata.store",
-                    match=models.MatchValue(value=path.replace("application/indexes/", "").rstrip("/")),
+                    key="metadata.source_id",
+                    match=models.MatchValue(value=source_id.replace("application/indexes/", "").rstrip("/")),
                )
            ]
        )
--- a/application/vectorstore/vector_creator.py
+++ b/application/vectorstore/vector_creator.py
@@ -1,5 +1,6 @@
 from application.vectorstore.faiss import FaissStore
 from application.vectorstore.elasticsearch import ElasticsearchStore
+from application.vectorstore.milvus import MilvusStore
 from application.vectorstore.mongodb import MongoDBVectorStore
 from application.vectorstore.qdrant import QdrantStore

@@ -10,6 +11,7 @@ class VectorCreator:
        "elasticsearch": ElasticsearchStore,
        "mongodb": MongoDBVectorStore,
        "qdrant": QdrantStore,
+        "milvus": MilvusStore,
    }

    @classmethod
--- a/application/worker.py
+++ b/application/worker.py
@@ -6,6 +6,7 @@ from urllib.parse import urljoin
 import logging

 import requests
+from bson.objectid import ObjectId

 from application.core.settings import settings
 from application.parser.file.bulk import SimpleDirectoryReader
@@ -16,10 +17,10 @@ from application.parser.token_func import group_split
 from application.utils import count_tokens_docs


+
 # Define a function to extract metadata from a given filename.
 def metadata_from_filename(title):
-    store = "/".join(title.split("/")[1:3])
-    return {"title": title, "store": store}
+    return {"title": title}


 # Define a function to generate a random string of a given length.
@@ -27,9 +28,7 @@ def generate_random_string(length):
    return "".join([string.ascii_letters[i % 52] for i in range(length)])


-current_dir = os.path.dirname(
-    os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-)
+current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))


 def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
@@ -60,7 +59,7 @@ def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):


 # Define the main function for ingesting and processing documents.
-def ingest_worker(self, directory, formats, name_job, filename, user):
+def ingest_worker(self, directory, formats, name_job, filename, user, retriever="classic"):
    """
    Ingest and process documents.

@@ -71,6 +70,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
        name_job (str): Name of the job for this ingestion task.
        filename (str): Name of the file to be ingested.
        user (str): Identifier for the user initiating the ingestion.
+        retriever (str): Type of retriever to use for processing the documents.

    Returns:
        dict: Information about the completed ingestion task, including input parameters and a "limited" flag.
@@ -106,9 +106,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):

    # check if file is .zip and extract it
    if filename.endswith(".zip"):
-        extract_zip_recursive(
-            os.path.join(full_path, filename), full_path, 0, recursion_depth
-        )
+        extract_zip_recursive(os.path.join(full_path, filename), full_path, 0, recursion_depth)

    self.update_state(state="PROGRESS", meta={"current": 1})

@@ -129,8 +127,9 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
    )

    docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
+    id = ObjectId()

-    call_openai_api(docs, full_path, self)
+    call_openai_api(docs, full_path, id, self)
    tokens = count_tokens_docs(docs)
    self.update_state(state="PROGRESS", meta={"current": 100})

@@ -140,22 +139,15 @@ def ingest_worker(self, directory, formats, name_job, filename, user):

    # get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
    # and send them to the server (provide user and name in form)
-    file_data = {"name": name_job, "user": user, "tokens":tokens}
+    file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever, "id": str(id), 'type': 'local'}
    if settings.VECTOR_STORE == "faiss":
        files = {
            "file_faiss": open(full_path + "/index.faiss", "rb"),
            "file_pkl": open(full_path + "/index.pkl", "rb"),
        }
-        response = requests.post(
-            urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
-        )
-        response = requests.get(
-            urljoin(settings.API_URL, "/api/delete_old?path=" + full_path)
-        )
+        response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
    else:
-        response = requests.post(
-            urljoin(settings.API_URL, "/api/upload_index"), data=file_data
-        )
+        response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)

    # delete local
    shutil.rmtree(full_path)
@@ -170,7 +162,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
    }


-def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
+def remote_worker(self, source_data, name_job, user, loader, directory="temp", retriever="classic"):
    token_check = True
    min_tokens = 150
    max_tokens = 1250
@@ -191,22 +183,21 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
        token_check=token_check,
    )
    # docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
-    call_openai_api(docs, full_path, self)
    tokens = count_tokens_docs(docs)
+    id = ObjectId()
+    call_openai_api(docs, full_path, id, self)
    self.update_state(state="PROGRESS", meta={"current": 100})

    # Proceed with uploading and cleaning as in the original function
-    file_data = {"name": name_job, "user": user, "tokens":tokens}
+    file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever, 
+                 "id": str(id), 'type': loader, 'remote_data': source_data}
    if settings.VECTOR_STORE == "faiss":
        files = {
            "file_faiss": open(full_path + "/index.faiss", "rb"),
            "file_pkl": open(full_path + "/index.pkl", "rb"),
        }

-        requests.post(
-            urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data
-        )
-        requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
+        requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
    else:
        requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)

--- a/docker-compose-azure.yaml
+++ b/docker-compose-azure.yaml
@@ -1,5 +1,3 @@
-version: "3.9"
-
 services:
  frontend:
    build: ./frontend
--- a/docker-compose-dev.yaml
+++ b/docker-compose-dev.yaml
@@ -1,5 +1,3 @@
-version: "3.9"
-
 services:

  redis:
--- a/docker-compose-local.yaml
+++ b/docker-compose-local.yaml
@@ -1,5 +1,3 @@
-version: "3.9"
-
 services:
  frontend:
    build: ./frontend
--- a/docker-compose-mock.yaml
+++ b/docker-compose-mock.yaml
@@ -1,5 +1,3 @@
-version: "3.9"
-
 services:
  frontend:
    build: ./frontend
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -1,5 +1,3 @@
-version: "3.9"
-
 services:
  frontend:
    build: ./frontend
--- a/extensions/react-widget/package-lock.json
+++ b/extensions/react-widget/package-lock.json
@@ -1,12 +1,12 @@
 {
  "name": "docsgpt",
-  "version": "0.4.1",
+  "version": "0.4.2",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "docsgpt",
-      "version": "0.4.1",
+      "version": "0.4.2",
      "license": "Apache-2.0",
      "dependencies": {
        "@babel/plugin-transform-flow-strip-types": "^7.23.3",
--- a/extensions/react-widget/package.json
+++ b/extensions/react-widget/package.json
@@ -1,6 +1,6 @@
 {
  "name": "docsgpt",
-  "version": "0.4.1",
+  "version": "0.4.2",
  "private": false,
  "description": "DocsGPT 🦖 is an innovative open-source tool designed to simplify the retrieval of information from project documentation using advanced GPT models 🤖.",
  "source": "./src/index.html",
--- a/extensions/react-widget/publish.sh
+++ b/extensions/react-widget/publish.sh
@@ -2,6 +2,7 @@
 ## chmod +x publish.sh - to upgrade ownership
 set -e
 cat package.json >> package_copy.json
+cat package-lock.json >> package-lock_copy.json
 publish_package() {
  PACKAGE_NAME=$1
  BUILD_COMMAND=$2
@@ -24,6 +25,9 @@ publish_package() {

  # Publish to npm
  npm publish
+  # Clean up
+  mv package_copy.json package.json
+  mv package-lock_copy.json package-lock.json
  echo "Published ${PACKAGE_NAME}"
 }

@@ -33,7 +37,7 @@ publish_package "docsgpt" "build"
 # Publish docsgpt-react package
 publish_package "docsgpt-react" "build:react"

-# Clean up
-mv package_copy.json package.json
+
 rm -rf package_copy.json
+rm -rf package-lock_copy.json
 echo "---Process completed---"
--- a/extensions/react-widget/src/assets/dislike.svg
+++ b/extensions/react-widget/src/assets/dislike.svg
@@ -0,0 +1,4 @@
+<svg width="14" height="14" viewBox="0 0 16 16" fill="current" xmlns="http://www.w3.org/2000/svg">
+<path d="M6.37776 10.1001V12.9C6.37776 13.457 6.599 13.9911 6.99282 14.3849C7.38664 14.7788 7.92077 15 8.47772 15L11.2777 8.70011V1.00025H3.38181C3.04419 0.996436 2.71656 1.11477 2.45929 1.33344C2.20203 1.55212 2.03246 1.8564 1.98184 2.19023L1.01585 8.49012C0.985398 8.69076 0.998931 8.89563 1.05551 9.09053C1.1121 9.28543 1.21038 9.46569 1.34355 9.61884C1.47671 9.77198 1.64159 9.89434 1.82674 9.97744C2.01189 10.0605 2.2129 10.1024 2.41583 10.1001H6.37776ZM11.2777 1.00025H13.1466C13.5428 0.993247 13.9277 1.13195 14.2284 1.39002C14.5291 1.64809 14.7245 2.00758 14.7776 2.40023V7.30014C14.7245 7.69279 14.5291 8.05227 14.2284 8.31035C13.9277 8.56842 13.5428 8.70712 13.1466 8.70011H11.2777" fill="none"/>
+<path d="M11.2777 8.70011L8.47772 15C7.92077 15 7.38664 14.7788 6.99282 14.3849C6.599 13.9911 6.37776 13.457 6.37776 12.9V10.1001H2.41583C2.2129 10.1024 2.01189 10.0605 1.82674 9.97744C1.64159 9.89434 1.47671 9.77198 1.34355 9.61884C1.21038 9.46569 1.1121 9.28543 1.05551 9.09053C0.998931 8.89563 0.985398 8.69076 1.01585 8.49012L1.98184 2.19023C2.03246 1.8564 2.20203 1.55212 2.45929 1.33344C2.71656 1.11477 3.04419 0.996436 3.38181 1.00025H11.2777M11.2777 8.70011V1.00025M11.2777 8.70011H13.1466C13.5428 8.70712 13.9277 8.56842 14.2284 8.31035C14.5291 8.05227 14.7245 7.69279 14.7776 7.30014V2.40023C14.7245 2.00758 14.5291 1.64809 14.2284 1.39002C13.9277 1.13195 13.5428 0.993247 13.1466 1.00025H11.2777" stroke="current" stroke-width="1.4" stroke-linecap="round" stroke-linejoin="round"/>
+</svg>
--- a/extensions/react-widget/src/assets/like.svg
+++ b/extensions/react-widget/src/assets/like.svg
@@ -0,0 +1,4 @@
+<svg width="14" height="14" viewBox="0 0 16 16" fill="current" xmlns="http://www.w3.org/2000/svg">
+<path d="M9.39995 5.89997V3.09999C9.39995 2.54304 9.1787 2.0089 8.78487 1.61507C8.39105 1.22125 7.85691 1 7.29996 1L4.49998 7.29996V14.9999H12.3959C12.7336 15.0037 13.0612 14.8854 13.3185 14.6667C13.5757 14.448 13.7453 14.1437 13.7959 13.8099L14.7619 7.50996C14.7924 7.30931 14.7788 7.10444 14.7222 6.90954C14.6657 6.71464 14.5674 6.53437 14.4342 6.38123C14.301 6.22808 14.1362 6.10572 13.951 6.02262C13.7659 5.93952 13.5649 5.89767 13.3619 5.89997H9.39995ZM4.49998 14.9999H2.39999C2.02869 14.9999 1.6726 14.8524 1.41005 14.5899C1.1475 14.3273 1 13.9712 1 13.5999V8.69995C1 8.32865 1.1475 7.97256 1.41005 7.71001C1.6726 7.44746 2.02869 7.29996 2.39999 7.29996H4.49998" fill="none"/>
+<path d="M4.49998 7.29996L7.29996 1C7.85691 1 8.39105 1.22125 8.78487 1.61507C9.1787 2.0089 9.39995 2.54304 9.39995 3.09999V5.89997H13.3619C13.5649 5.89767 13.7659 5.93952 13.951 6.02262C14.1362 6.10572 14.301 6.22808 14.4342 6.38123C14.5674 6.53437 14.6657 6.71464 14.7223 6.90954C14.7788 7.10444 14.7924 7.30931 14.7619 7.50996L13.7959 13.8099C13.7453 14.1437 13.5757 14.448 13.3185 14.6667C13.0612 14.8854 12.7336 15.0037 12.3959 14.9999H4.49998M4.49998 7.29996V14.9999M4.49998 7.29996H2.39999C2.02869 7.29996 1.6726 7.44746 1.41005 7.71001C1.1475 7.97256 1 8.32865 1 8.69995V13.5999C1 13.9712 1.1475 14.3273 1.41005 14.5899C1.6726 14.8524 2.02869 14.9999 2.39999 14.9999H4.49998" stroke="current" stroke-width="1.39999" stroke-linecap="round" stroke-linejoin="round"/>
+</svg>
--- a/extensions/react-widget/src/components/DocsGPTWidget.tsx
+++ b/extensions/react-widget/src/components/DocsGPTWidget.tsx
@@ -1,11 +1,13 @@
 "use client";
-import React from 'react'
+import React, { useRef } from 'react'
 import DOMPurify from 'dompurify';
 import styled, { keyframes, createGlobalStyle } from 'styled-components';
 import { PaperPlaneIcon, RocketIcon, ExclamationTriangleIcon, Cross2Icon } from '@radix-ui/react-icons';
-import { MESSAGE_TYPE, Query, Status, WidgetProps } from '../types/index';
-import { fetchAnswerStreaming } from '../requests/streamingApi';
+import { FEEDBACK, MESSAGE_TYPE, Query, Status, WidgetProps } from '../types/index';
+import { fetchAnswerStreaming, sendFeedback } from '../requests/streamingApi';
 import { ThemeProvider } from 'styled-components';
+import Like from "../assets/like.svg"
+import Dislike from "../assets/dislike.svg"
 import MarkdownIt from 'markdown-it';
 const themes = {
  dark: {
@@ -63,6 +65,10 @@ const GlobalStyles = createGlobalStyle`
  background-color: #646464;
  color: #fff !important;
 }
+.response code {
+  white-space: pre-wrap !important;
+  line-break: loose !important;
+}
 `;
 const Overlay = styled.div`
  position: fixed;
@@ -195,12 +201,24 @@ const Conversation = styled.div<{ size: string }>`
    width:${props => props.size === 'large' ? '90vw' : props.size === 'medium' ? '60vw' : '400px'} !important;
    }
 `;
-
-const MessageBubble = styled.div<{ type: MESSAGE_TYPE }>`
+const Feedback = styled.div`
+  background-color: transparent;
+  font-weight: normal;
+  gap: 12px;
  display: flex;
+  padding: 6px;
+  clear: both;
+`;
+const MessageBubble = styled.div<{ type: MESSAGE_TYPE }>`
+    display: block;
    font-size: 16px;
-    justify-content: ${props => props.type === 'QUESTION' ? 'flex-end' : 'flex-start'};
-    margin: 0.5rem;
+    position: relative;
+    width: 100%;;
+    float: right;
+    margin: 0rem;
+    &:hover ${Feedback} * {
+    visibility: visible !important;
+  }
 `;
 const Message = styled.div<{ type: MESSAGE_TYPE }>`
    background: ${props => props.type === 'QUESTION' ?
@@ -208,6 +226,7 @@ const Message = styled.div<{ type: MESSAGE_TYPE }>`
    props.theme.secondary.bg};
    color: ${props => props.type === 'ANSWER' ? props.theme.primary.text : '#fff'};
    border: none;
+    float: ${props => props.type === 'QUESTION' ? 'right' : 'left'};
    max-width: ${props => props.type === 'ANSWER' ? '100%' : '80'};
    overflow: auto;
    margin: 4px;
@@ -315,6 +334,7 @@ const HeroDescription = styled.p`
  font-size: 14px;
  line-height: 1.5;
 `;
+
 const Hero = ({ title, description, theme }: { title: string, description: string, theme: string }) => {
  return (
    <>
@@ -345,7 +365,8 @@ export const DocsGPTWidget = ({
  size = 'small',
  theme = 'dark',
  buttonIcon = 'https://d3dg1063dc54p9.cloudfront.net/widget/message.svg',
-  buttonBg = 'linear-gradient(to bottom right, #5AF0EC, #E80D9D)'
+  buttonBg = 'linear-gradient(to bottom right, #5AF0EC, #E80D9D)',
+  collectFeedback = true
 }: WidgetProps) => {
  const [prompt, setPrompt] = React.useState('');
  const [status, setStatus] = React.useState<Status>('idle');
@@ -353,6 +374,7 @@ export const DocsGPTWidget = ({
  const [conversationId, setConversationId] = React.useState<string | null>(null)
  const [open, setOpen] = React.useState<boolean>(false)
  const [eventInterrupt, setEventInterrupt] = React.useState<boolean>(false); //click or scroll by user while autoScrolling
+  const isBubbleHovered = useRef<boolean>(false)
  const endMessageRef = React.useRef<HTMLDivElement | null>(null);
  const md = new MarkdownIt();

@@ -376,6 +398,36 @@ export const DocsGPTWidget = ({
    !eventInterrupt && scrollToBottom(endMessageRef.current);
  }, [queries.length, queries[queries.length - 1]?.response]);

+  async function handleFeedback(feedback: FEEDBACK, index: number) {
+    let query = queries[index]
+    if (!query.response)
+      return;
+    if (query.feedback != feedback) {
+      sendFeedback({
+        question: query.prompt,
+        answer: query.response,
+        feedback: feedback,
+        apikey: apiKey
+      }, apiHost)
+        .then(res => {
+          if (res.status == 200) {
+            query.feedback = feedback;
+            setQueries((prev: Query[]) => {
+              return prev.map((q, i) => (i === index ? query : q));
+            });
+          }
+        })
+        .catch(err => console.log("Connection failed",err))
+    }
+    else {
+      delete query.feedback;
+      setQueries((prev: Query[]) => {
+        return prev.map((q, i) => (i === index ? query : q));
+      });
+
+    }
+  }
+
  async function stream(question: string) {
    setStatus('loading')
    try {
@@ -473,7 +525,7 @@ export const DocsGPTWidget = ({
                      </MessageBubble>
                    }
                    {
-                      query.response ? <MessageBubble type='ANSWER'>
+                      query.response ? <MessageBubble onMouseOver={() => { isBubbleHovered.current = true }} type='ANSWER'>
                        <Message
                          type='ANSWER'
                          ref={(index === queries.length - 1) ? endMessageRef : null}
@@ -483,6 +535,24 @@ export const DocsGPTWidget = ({
                            dangerouslySetInnerHTML={{ __html: DOMPurify.sanitize(md.render(query.response)) }}
                          />
                        </Message>
+
+                        {collectFeedback &&
+                          <Feedback>
+                            <Like
+                              style={{
+                                stroke: query.feedback == 'LIKE' ? '#8860DB' : '#c0c0c0',
+                                visibility: query.feedback == 'LIKE' ? 'visible' : 'hidden'
+                              }}
+                              fill='none'
+                              onClick={() => handleFeedback("LIKE", index)} />
+                            <Dislike
+                              style={{
+                                stroke: query.feedback == 'DISLIKE' ? '#ed8085' : '#c0c0c0',
+                                visibility: query.feedback == 'DISLIKE' ? 'visible' : 'hidden'
+                              }}
+                              fill='none'
+                              onClick={() => handleFeedback("DISLIKE", index)} />
+                          </Feedback>}
                      </MessageBubble>
                        : <div>
                          {
@@ -518,7 +588,7 @@ export const DocsGPTWidget = ({
              type='text' placeholder="What do you want to do?" />
            <StyledButton
              size={size}
-              disabled={prompt.length == 0 || status !== 'idle'}>
+              disabled={prompt.trim().length == 0 || status !== 'idle'}>
              <PaperPlaneIcon width={15} height={15} color='white' />
            </StyledButton>
          </PromptContainer>
--- a/extensions/react-widget/src/requests/streamingApi.ts
+++ b/extensions/react-widget/src/requests/streamingApi.ts
@@ -1,3 +1,4 @@
+import { FEEDBACK } from "@/types";
 interface HistoryItem {
  prompt: string;
  response?: string;
@@ -11,6 +12,12 @@ interface FetchAnswerStreamingProps {
  apiHost?: string;
  onEvent?: (event: MessageEvent) => void;
 }
+interface FeedbackPayload {
+  question: string;
+  answer: string;
+  apikey: string;
+  feedback: FEEDBACK;
+}
 export function fetchAnswerStreaming({
  question = '',
  apiKey = '',
@@ -20,12 +27,12 @@ export function fetchAnswerStreaming({
  onEvent = () => { console.log("Event triggered, but no handler provided."); }
 }: FetchAnswerStreamingProps): Promise<void> {
  return new Promise<void>((resolve, reject) => {
-    const body= {
+    const body = {
      question: question,
      history: JSON.stringify(history),
      conversation_id: conversationId,
      model: 'default',
-      api_key:apiKey
+      api_key: apiKey
    };
    fetch(apiHost + '/stream', {
      method: 'POST',
@@ -81,3 +88,19 @@ export function fetchAnswerStreaming({
      });
  });
 }
+
+
+export const sendFeedback = (payload: FeedbackPayload,apiHost:string): Promise<Response> => {
+  return fetch(`${apiHost}/api/feedback`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json'
+    },
+    body: JSON.stringify({
+      question: payload.question,
+      answer: payload.answer,
+      feedback: payload.feedback,
+      api_key:payload.apikey
+    }),
+  });
+};
--- a/extensions/react-widget/src/types/index.ts
+++ b/extensions/react-widget/src/types/index.ts
@@ -23,4 +23,5 @@ export interface WidgetProps {
  theme?:THEME,
  buttonIcon?:string;
  buttonBg?:string;
+  collectFeedback?:boolean
 }
--- a/frontend/src/Navigation.tsx
+++ b/frontend/src/Navigation.tsx
@@ -24,9 +24,9 @@ import ConversationTile from './conversation/ConversationTile';
 import { useDarkTheme, useMediaQuery, useOutsideAlerter } from './hooks';
 import useDefaultDocument from './hooks/useDefaultDocument';
 import DeleteConvModal from './modals/DeleteConvModal';
-import { ActiveState } from './models/misc';
+import { ActiveState, Doc } from './models/misc';
 import APIKeyModal from './preferences/APIKeyModal';
-import { Doc, getConversations, getDocs } from './preferences/preferenceApi';
+import { getConversations, getDocs } from './preferences/preferenceApi';
 import {
  selectApiKeyStatus,
  selectConversationId,
@@ -124,10 +124,8 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) {
  };

  const handleDeleteClick = (doc: Doc) => {
-    const docPath = `indexes/local/${doc.name}`;
-
    userService
-      .deletePath(docPath)
+      .deletePath(doc.id ?? '')
      .then(() => {
        return getDocs();
      })
--- a/frontend/src/api/endpoints.ts
+++ b/frontend/src/api/endpoints.ts
@@ -10,7 +10,7 @@ const endpoints = {
    DELETE_PROMPT: '/api/delete_prompt',
    UPDATE_PROMPT: '/api/update_prompt',
    SINGLE_PROMPT: (id: string) => `/api/get_single_prompt?id=${id}`,
-    DELETE_PATH: (docPath: string) => `/api/delete_old?path=${docPath}`,
+    DELETE_PATH: (docPath: string) => `/api/delete_old?source_id=${docPath}`,
    TASK_STATUS: (task_id: string) => `/api/task_status?task_id=${task_id}`,
    MESSAGE_ANALYTICS: '/api/get_message_analytics',
    TOKEN_ANALYTICS: '/api/get_token_analytics',
--- a/frontend/src/components/Dropdown.tsx
+++ b/frontend/src/components/Dropdown.tsx
@@ -27,6 +27,7 @@ function Dropdown({
    | string
    | { label: string; value: string }
    | { value: number; description: string }
+    | { name: string; id: string; type: string }
    | null;
  onSelect:
    | ((value: string) => void)
--- a/frontend/src/components/SourceDropdown.tsx
+++ b/frontend/src/components/SourceDropdown.tsx
@@ -1,7 +1,7 @@
 import React from 'react';
 import Trash from '../assets/trash.svg';
 import Arrow2 from '../assets/dropdown-arrow.svg';
-import { Doc } from '../preferences/preferenceApi';
+import { Doc } from '../models/misc';
 import { useDispatch } from 'react-redux';
 import { useTranslation } from 'react-i18next';
 type Props = {
@@ -63,9 +63,6 @@ function SourceDropdown({
            <p className="max-w-3/4 truncate whitespace-nowrap">
              {selectedDocs?.name || 'None'}
            </p>
-            <p className="flex flex-col items-center justify-center">
-              {selectedDocs?.version}
-            </p>
          </div>
        </span>
        <img
--- a/frontend/src/conversation/Conversation.tsx
+++ b/frontend/src/conversation/Conversation.tsx
@@ -1,4 +1,4 @@
-import React, { Fragment, useEffect, useRef, useState } from 'react';
+import { Fragment, useEffect, useRef, useState } from 'react';
 import { useTranslation } from 'react-i18next';
 import { useDispatch, useSelector } from 'react-redux';

@@ -30,7 +30,7 @@ export default function Conversation() {
  const status = useSelector(selectStatus);
  const conversationId = useSelector(selectConversationId);
  const dispatch = useDispatch<AppDispatch>();
-  const endMessageRef = useRef<HTMLDivElement>(null);
+  const conversationRef = useRef<HTMLDivElement>(null);
  const inputRef = useRef<HTMLTextAreaElement>(null);
  const [isDarkTheme] = useDarkTheme();
  const [hasScrolledToLast, setHasScrolledToLast] = useState(true);
@@ -58,26 +58,6 @@ export default function Conversation() {
    fetchStream.current && fetchStream.current.abort();
  }, [conversationId]);

-  useEffect(() => {
-    const observerCallback: IntersectionObserverCallback = (entries) => {
-      entries.forEach((entry) => {
-        setHasScrolledToLast(entry.isIntersecting);
-      });
-    };
-
-    const observer = new IntersectionObserver(observerCallback, {
-      root: null,
-      threshold: [1, 0.8],
-    });
-    if (endMessageRef.current) {
-      observer.observe(endMessageRef.current);
-    }
-
-    return () => {
-      observer.disconnect();
-    };
-  }, [endMessageRef.current]);
-
  useEffect(() => {
    if (queries.length) {
      queries[queries.length - 1].error && setLastQueryReturnedErr(true);
@@ -86,10 +66,16 @@ export default function Conversation() {
  }, [queries[queries.length - 1]]);

  const scrollIntoView = () => {
-    endMessageRef?.current?.scrollIntoView({
+    if (!conversationRef?.current || eventInterrupt) return;
+
+    if (status === 'idle' || !queries[queries.length - 1].response) {
+      conversationRef.current.scrollTo({
        behavior: 'smooth',
-      block: 'start',
+        top: conversationRef.current.scrollHeight,
      });
+    } else {
+      conversationRef.current.scrollTop = conversationRef.current.scrollHeight;
+    }
  };

  const handleQuestion = ({
@@ -143,7 +129,6 @@ export default function Conversation() {
    if (query.response) {
      responseView = (
        <ConversationBubble
-          ref={endMessageRef}
          className={`${index === queries.length - 1 ? 'mb-32' : 'mb-7'}`}
          key={`${index}ANSWER`}
          message={query.response}
@@ -176,7 +161,6 @@ export default function Conversation() {
      );
      responseView = (
        <ConversationBubble
-          ref={endMessageRef}
          className={`${index === queries.length - 1 ? 'mb-32' : 'mb-7'} `}
          key={`${index}ERROR`}
          message={query.error}
@@ -234,6 +218,7 @@ export default function Conversation() {
        </>
      )}
      <div
+        ref={conversationRef}
        onWheel={handleUserInterruption}
        onTouchMove={handleUserInterruption}
        className="flex h-[90%] w-full flex-1 justify-center overflow-y-auto p-4 md:h-[83vh]"
--- a/frontend/src/conversation/ConversationBubble.tsx
+++ b/frontend/src/conversation/ConversationBubble.tsx
@@ -250,7 +250,10 @@ const ConversationBubble = forwardRef<
                      </div>
                    </div>
                  ) : (
-                    <code className={className ? className : ''} {...props}>
+                    <code
+                      className={className ? className : 'whitespace-pre-line'}
+                      {...props}
+                    >
                      {children}
                    </code>
                  );
--- a/frontend/src/conversation/conversationHandlers.ts
+++ b/frontend/src/conversation/conversationHandlers.ts
@@ -1,32 +1,6 @@
 import conversationService from '../api/services/conversationService';
-import { Doc } from '../preferences/preferenceApi';
-import { Answer, FEEDBACK } from './conversationModels';
-
-function getDocPath(selectedDocs: Doc | null): string {
-  let docPath = 'default';
-  if (selectedDocs) {
-    let namePath = selectedDocs.name;
-    if (selectedDocs.language === namePath) {
-      namePath = '.project';
-    }
-    if (selectedDocs.location === 'local') {
-      docPath = 'local' + '/' + selectedDocs.name + '/';
-    } else if (selectedDocs.location === 'remote') {
-      docPath =
-        selectedDocs.language +
-        '/' +
-        namePath +
-        '/' +
-        selectedDocs.version +
-        '/' +
-        selectedDocs.model +
-        '/';
-    } else if (selectedDocs.location === 'custom') {
-      docPath = selectedDocs.docLink;
-    }
-  }
-  return docPath;
-}
+import { Doc } from '../models/misc';
+import { Answer, FEEDBACK, RetrievalPayload } from './conversationModels';

 export function handleFetchAnswer(
  question: string,
@@ -54,23 +28,22 @@ export function handleFetchAnswer(
      title: any;
    }
 > {
-  const docPath = getDocPath(selectedDocs);
  history = history.map((item) => {
    return { prompt: item.prompt, response: item.response };
  });
-  return conversationService
-    .answer(
-      {
+  const payload: RetrievalPayload = {
    question: question,
-        history: history,
-        active_docs: docPath,
+    history: JSON.stringify(history),
    conversation_id: conversationId,
    prompt_id: promptId,
    chunks: chunks,
    token_limit: token_limit,
-      },
-      signal,
-    )
+  };
+  if (selectedDocs && 'id' in selectedDocs)
+    payload.active_docs = selectedDocs.id as string;
+  payload.retriever = selectedDocs?.retriever as string;
+  return conversationService
+    .answer(payload, signal)
    .then((response) => {
      if (response.ok) {
        return response.json();
@@ -101,16 +74,27 @@ export function handleFetchAnswerSteaming(
  token_limit: number,
  onEvent: (event: MessageEvent) => void,
 ): Promise<Answer> {
-  const docPath = getDocPath(selectedDocs);
  history = history.map((item) => {
    return { prompt: item.prompt, response: item.response };
  });
+  const payload: RetrievalPayload = {
+    question: question,
+    history: JSON.stringify(history),
+    conversation_id: conversationId,
+    prompt_id: promptId,
+    chunks: chunks,
+    token_limit: token_limit,
+  };
+  if (selectedDocs && 'id' in selectedDocs)
+    payload.active_docs = selectedDocs.id as string;
+  payload.retriever = selectedDocs?.retriever as string;
+
  return new Promise<Answer>((resolve, reject) => {
    conversationService
      .answerStream(
        {
          question: question,
-          active_docs: docPath,
+          active_docs: selectedDocs?.id as string,
          history: JSON.stringify(history),
          conversation_id: conversationId,
          prompt_id: promptId,
@@ -176,11 +160,23 @@ export function handleSearch(
  chunks: string,
  token_limit: number,
 ) {
-  const docPath = getDocPath(selectedDocs);
+  history = history.map((item) => {
+    return { prompt: item.prompt, response: item.response };
+  });
+  const payload: RetrievalPayload = {
+    question: question,
+    history: JSON.stringify(history),
+    conversation_id: conversation_id,
+    chunks: chunks,
+    token_limit: token_limit,
+  };
+  if (selectedDocs && 'id' in selectedDocs)
+    payload.active_docs = selectedDocs.id as string;
+  payload.retriever = selectedDocs?.retriever as string;
  return conversationService
    .search({
      question: question,
-      active_docs: docPath,
+      active_docs: selectedDocs?.id as string,
      conversation_id,
      history,
      chunks: chunks,
--- a/frontend/src/conversation/conversationModels.ts
+++ b/frontend/src/conversation/conversationModels.ts
@@ -31,3 +31,13 @@ export interface Query {
  conversationId?: string | null;
  title?: string | null;
 }
+export interface RetrievalPayload {
+  question: string;
+  active_docs?: string;
+  retriever?: string;
+  history: string;
+  conversation_id: string | null;
+  prompt_id?: string | null;
+  chunks: string;
+  token_limit: number;
+}
--- a/frontend/src/hooks/useDefaultDocument.ts
+++ b/frontend/src/hooks/useDefaultDocument.ts
@@ -1,7 +1,8 @@
 import React from 'react';
 import { useDispatch, useSelector } from 'react-redux';

-import { Doc, getDocs } from '../preferences/preferenceApi';
+import { getDocs } from '../preferences/preferenceApi';
+import { Doc } from '../models/misc';
 import {
  selectSelectedDocs,
  setSelectedDocs,
--- a/frontend/src/modals/CreateAPIKeyModal.tsx
+++ b/frontend/src/modals/CreateAPIKeyModal.tsx
@@ -22,8 +22,9 @@ export default function CreateAPIKeyModal({

  const [APIKeyName, setAPIKeyName] = React.useState<string>('');
  const [sourcePath, setSourcePath] = React.useState<{
-    label: string;
-    value: string;
+    name: string;
+    id: string;
+    type: string;
  } | null>(null);
  const [prompt, setPrompt] = React.useState<{
    name: string;
@@ -41,27 +42,17 @@ export default function CreateAPIKeyModal({
      ? docs
          .filter((doc) => doc.model === embeddingsName)
          .map((doc: Doc) => {
-            let namePath = doc.name;
-            if (doc.language === namePath) {
-              namePath = '.project';
-            }
-            let docPath = 'default';
-            if (doc.location === 'local') {
-              docPath = 'local' + '/' + doc.name + '/';
-            } else if (doc.location === 'remote') {
-              docPath =
-                doc.language +
-                '/' +
-                namePath +
-                '/' +
-                doc.version +
-                '/' +
-                doc.model +
-                '/';
+            if ('id' in doc) {
+              return {
+                name: doc.name,
+                id: doc.id as string,
+                type: 'local',
+              };
            }
            return {
-              label: doc.name,
-              value: docPath,
+              name: doc.name,
+              id: doc.id ?? 'default',
+              type: doc.type ?? 'default',
            };
          })
      : [];
@@ -107,9 +98,14 @@ export default function CreateAPIKeyModal({
          <Dropdown
            placeholder={t('modals.createAPIKey.sourceDoc')}
            selectedValue={sourcePath}
-            onSelect={(selection: { label: string; value: string }) =>
-              setSourcePath(selection)
-            }
+            onSelect={(selection: {
+              name: string;
+              id: string;
+              type: string;
+            }) => {
+              setSourcePath(selection);
+              console.log(selection);
+            }}
            options={extractDocPaths()}
            size="w-full"
            rounded="xl"
@@ -142,16 +138,22 @@ export default function CreateAPIKeyModal({
        </div>
        <button
          disabled={!sourcePath || APIKeyName.length === 0 || !prompt}
-          onClick={() =>
-            sourcePath &&
-            prompt &&
-            createAPIKey({
+          onClick={() => {
+            if (sourcePath && prompt) {
+              const payload: any = {
                name: APIKeyName,
-              source: sourcePath.value,
                prompt_id: prompt.id,
                chunks: chunk,
-            })
+              };
+              if (sourcePath.type === 'default') {
+                payload.retriever = sourcePath.id;
              }
+              if (sourcePath.type === 'local') {
+                payload.source = sourcePath.id;
+              }
+              createAPIKey(payload);
+            }
+          }}
          className="float-right mt-4 rounded-full bg-purple-30 px-5 py-2 text-sm text-white hover:bg-[#6F3FD1] disabled:opacity-50"
        >
          {t('modals.createAPIKey.create')}
--- a/frontend/src/modals/ShareConversationModal.tsx
+++ b/frontend/src/modals/ShareConversationModal.tsx
@@ -46,27 +46,9 @@ export const ShareConversationModal = ({
      ? docs
          .filter((doc) => doc.model === embeddingsName)
          .map((doc: Doc) => {
-            let namePath = doc.name;
-            if (doc.language === namePath) {
-              namePath = '.project';
-            }
-            let docPath = 'default';
-            if (doc.location === 'local') {
-              docPath = 'local' + '/' + doc.name + '/';
-            } else if (doc.location === 'remote') {
-              docPath =
-                doc.language +
-                '/' +
-                namePath +
-                '/' +
-                doc.version +
-                '/' +
-                doc.model +
-                '/';
-            }
            return {
              label: doc.name,
-              value: docPath,
+              value: doc.id ?? 'default',
            };
          })
      : [];
--- a/frontend/src/models/misc.ts
+++ b/frontend/src/models/misc.ts
@@ -4,16 +4,13 @@ export type User = {
  avatar: string;
 };
 export type Doc = {
-  location: string;
+  id?: string;
  name: string;
-  language: string;
-  version: string;
-  description: string;
-  fullName: string;
  date: string;
-  docLink: string;
  model: string;
  tokens?: string;
+  type?: string;
+  retriever?: string;
 };

 export type PromptProps = {
--- a/frontend/src/preferences/preferenceApi.ts
+++ b/frontend/src/preferences/preferenceApi.ts
@@ -1,18 +1,6 @@
 import conversationService from '../api/services/conversationService';
 import userService from '../api/services/userService';
-
-// not all properties in Doc are going to be present. Make some optional
-export type Doc = {
-  location: string;
-  name: string;
-  language: string;
-  version: string;
-  description: string;
-  fullName: string;
-  date: string;
-  docLink: string;
-  model: string;
-};
+import { Doc } from '../models/misc';

 //Fetches all JSON objects from the source. We only use the objects with the "model" property in SelectDocsModal.tsx. Hopefully can clean up the source file later.
 export async function getDocs(): Promise<Doc[] | null> {
@@ -78,17 +66,10 @@ export function setLocalPrompt(prompt: string): void {

 export function setLocalRecentDocs(doc: Doc | null): void {
  localStorage.setItem('DocsGPTRecentDocs', JSON.stringify(doc));
-  let namePath = doc?.name;
-  if (doc?.language === namePath) {
-    namePath = '.project';
-  }

  let docPath = 'default';
-  if (doc?.location === 'local') {
+  if (doc?.type === 'local') {
    docPath = 'local' + '/' + doc.name + '/';
-  } else if (doc?.location === 'remote') {
-    docPath =
-      doc.language + '/' + namePath + '/' + doc.version + '/' + doc.model + '/';
  }
  userService
    .checkDocs({
--- a/frontend/src/preferences/preferenceSlice.ts
+++ b/frontend/src/preferences/preferenceSlice.ts
@@ -4,9 +4,9 @@ import {
  createSlice,
  isAnyOf,
 } from '@reduxjs/toolkit';
-import { Doc, setLocalApiKey, setLocalRecentDocs } from './preferenceApi';
+import { setLocalApiKey, setLocalRecentDocs } from './preferenceApi';
 import { RootState } from '../store';
-import { ActiveState } from '../models/misc';
+import { ActiveState, Doc } from '../models/misc';

 interface Preference {
  apiKey: string;
@@ -25,15 +25,13 @@ const initialState: Preference = {
  chunks: '2',
  token_limit: 2000,
  selectedDocs: {
+    id: 'default',
    name: 'default',
-    language: 'default',
-    location: 'default',
-    version: 'default',
-    description: 'default',
-    fullName: 'default',
+    type: 'remote',
    date: 'default',
    docLink: 'default',
    model: 'openai_text-embedding-ada-002',
+    retriever: 'classic',
  } as Doc,
  sourceDocs: null,
  conversations: null,
--- a/frontend/src/settings/APIKeys.tsx
+++ b/frontend/src/settings/APIKeys.tsx
@@ -47,7 +47,8 @@ export default function APIKeys() {

  const handleCreateKey = (payload: {
    name: string;
-    source: string;
+    source?: string;
+    retriever?: string;
    prompt_id: string;
    chunks: string;
  }) => {
--- a/frontend/src/settings/Documents.tsx
+++ b/frontend/src/settings/Documents.tsx
@@ -61,12 +61,10 @@ const Documents: React.FC<DocumentsProps> = ({
                      {document.tokens ? formatTokens(+document.tokens) : ''}
                    </td>
                    <td className="border-r border-t px-4 py-2">
-                      {document.location === 'remote'
-                        ? 'Pre-loaded'
-                        : 'Private'}
+                      {document.type === 'remote' ? 'Pre-loaded' : 'Private'}
                    </td>
                    <td className="border-t px-4 py-2">
-                      {document.location !== 'remote' && (
+                      {document.type !== 'remote' && (
                        <img
                          src={Trash}
                          alt="Delete"
--- a/frontend/src/settings/index.tsx
+++ b/frontend/src/settings/index.tsx
@@ -6,7 +6,7 @@ import userService from '../api/services/userService';
 import ArrowLeft from '../assets/arrow-left.svg';
 import ArrowRight from '../assets/arrow-right.svg';
 import i18n from '../locale/i18n';
-import { Doc } from '../preferences/preferenceApi';
+import { Doc } from '../models/misc';
 import {
  selectSourceDocs,
  setSourceDocs,
@@ -39,9 +39,8 @@ export default function Settings() {
  };

  const handleDeleteClick = (index: number, doc: Doc) => {
-    const docPath = 'indexes/' + 'local' + '/' + doc.name;
    userService
-      .deletePath(docPath)
+      .deletePath(doc.id ?? '')
      .then((response) => {
        if (response.ok && documents) {
          const updatedDocuments = [
--- a/frontend/src/store.ts
+++ b/frontend/src/store.ts
@@ -26,15 +26,12 @@ const store = configureStore({
      conversations: null,
      sourceDocs: [
        {
-          location: '',
-          language: '',
          name: 'default',
-          version: '',
          date: '',
-          description: '',
-          docLink: '',
-          fullName: '',
          model: '1.0',
+          type: 'remote',
+          id: 'default',
+          retriever: 'clasic',
        },
      ],
      modalState: 'INACTIVE',
--- a/frontend/src/upload/Upload.tsx
+++ b/frontend/src/upload/Upload.tsx
@@ -120,7 +120,7 @@ function Upload({
                    dispatch(setSourceDocs(data));
                    dispatch(
                      setSelectedDocs(
-                        data?.find((d) => d.location.toLowerCase() === 'local'),
+                        data?.find((d) => d.type?.toLowerCase() === 'local'),
                      ),
                    );
                  });
@@ -137,7 +137,7 @@ function Upload({
                    dispatch(setSourceDocs(data));
                    dispatch(
                      setSelectedDocs(
-                        data?.find((d) => d.location.toLowerCase() === 'local'),
+                        data?.find((d) => d.type?.toLowerCase() === 'local'),
                      ),
                    );
                  });
--- a/mock-backend/package-lock.json
+++ b/mock-backend/package-lock.json
--- a/mock-backend/package.json
+++ b/mock-backend/package.json
@@ -13,7 +13,7 @@
  "license": "ISC",
  "dependencies": {
    "cors": "^2.8.5",
-    "json-server": "^0.17.4",
+    "json-server": "^1.0.0-beta.2",
    "uuid": "^9.0.1"
  },
  "devDependencies": {
--- a/scripts/migrate_to_v1_vectorstore.py
+++ b/scripts/migrate_to_v1_vectorstore.py
@@ -0,0 +1,55 @@
+import pymongo
+import os
+
+def migrate_to_v1_vectorstore_mongo():
+    client = pymongo.MongoClient("mongodb://localhost:27017/")
+    db = client["docsgpt"]
+    vectors_collection = db["vectors"]
+    sources_collection = db["sources"]
+
+    for vector in vectors_collection.find():
+        if "location" in vector:
+            del vector["location"]
+        if "retriever" not in vector:
+            vector["retriever"] = "classic"
+            vector["remote_data"] = None
+        vectors_collection.update_one({"_id": vector["_id"]}, {"$set": vector})
+
+    # move data from vectors_collection to sources_collection
+    for vector in vectors_collection.find():
+        sources_collection.insert_one(vector)
+
+    vectors_collection.drop()
+
+    client.close()
+
+def migrate_faiss_to_v1_vectorstore():
+    client = pymongo.MongoClient("mongodb://localhost:27017/")
+    db = client["docsgpt"]
+    vectors_collection = db["vectors"]
+
+    for vector in vectors_collection.find():
+        old_path = f"./application/indexes/{vector['user']}/{vector['name']}"
+        new_path = f"./application/indexes/{vector['_id']}"
+        try:
+            os.rename(old_path, new_path)
+        except OSError as e:
+            print(f"Error moving {old_path} to {new_path}: {e}")
+
+    client.close()
+
+def migrate_mongo_atlas_vector_to_v1_vectorstore():
+    client = pymongo.MongoClient("mongodb+srv://<username>:<password>@<cluster>/<dbname>?retryWrites=true&w=majority")
+    db = client["docsgpt"]
+    vectors_collection = db["vectors"]
+
+    # mongodb atlas collection
+    documents_collection = db["documents"]
+
+    for vector in vectors_collection.find():
+        documents_collection.update_many({"store": vector["user"] + "/" + vector["name"]}, {"$set": {"source_id": str(vector["_id"])}})
+
+    client.close()
+
+migrate_faiss_to_v1_vectorstore()
+migrate_to_v1_vectorstore_mongo()