From 1bb81614a5083a80de62e9a99113fcab6124ad09 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 9 Sep 2024 13:37:11 +0100 Subject: [PATCH] fix: metadata things --- application/api/user/routes.py | 17 ++++++++--------- application/parser/open_ai_func.py | 8 +++++--- application/worker.py | 4 +--- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 43e532e1..2f422d4e 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -116,25 +116,24 @@ def delete_by_ids(): def delete_old(): """Delete old indexes.""" import shutil - name = request.args.get("name") - user = request.args.get("user") + path = request.args.get("path") doc = vectors_collection.find_one({ - "user":user, - "name":name + "_id": ObjectId(path), + "user": "local", }) - print("user",user) - print("file",name) if(doc is None): return {"status":"not found"},404 - path_clean = doc["location"] if settings.VECTOR_STORE == "faiss": try: - shutil.rmtree(os.path.join(current_dir, path_clean)) + shutil.rmtree(os.path.join(current_dir, str(doc["_id"]))) except FileNotFoundError: pass else: - vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)) + vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, path=str(doc["_id"])) vetorstore.delete_index() + vectors_collection.delete_one({ + "_id": ObjectId(path), + }) return {"status": "ok"} diff --git a/application/parser/open_ai_func.py b/application/parser/open_ai_func.py index d9871a17..30daee2e 100755 --- a/application/parser/open_ai_func.py +++ b/application/parser/open_ai_func.py @@ -11,7 +11,9 @@ from retry import retry @retry(tries=10, delay=60) -def store_add_texts_with_retry(store, i): +def store_add_texts_with_retry(store, i, id): + # add store to the metadata + i.metadata["store"] = str(id) store.add_texts([i.page_content], metadatas=[i.metadata]) # store_pine.add_texts([i.page_content], metadatas=[i.metadata]) @@ -38,7 +40,7 @@ def call_openai_api(docs, folder_name, id, task_status): else: store = VectorCreator.create_vectorstore( settings.VECTOR_STORE, - path=id, + path=str(id), embeddings_key=os.getenv("EMBEDDINGS_KEY"), ) # Uncomment for MPNet embeddings @@ -57,7 +59,7 @@ def call_openai_api(docs, folder_name, id, task_status): task_status.update_state( state="PROGRESS", meta={"current": int((c1 / s1) * 100)} ) - store_add_texts_with_retry(store, i) + store_add_texts_with_retry(store, i, id) except Exception as e: print(e) print("Error on ", i) diff --git a/application/worker.py b/application/worker.py index 2b3751d0..7abf0a02 100755 --- a/application/worker.py +++ b/application/worker.py @@ -18,8 +18,7 @@ from application.parser.token_func import group_split # Define a function to extract metadata from a given filename. def metadata_from_filename(title): - store = "/".join(title.split("/")[1:3]) - return {"title": title, "store": store} + return {"title": title} # Define a function to generate a random string of a given length. @@ -189,7 +188,6 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp", r self.update_state(state="PROGRESS", meta={"current": 100}) # Proceed with uploading and cleaning as in the original function - id = ObjectId() file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever, "id": str(id), 'type': loader, 'remote_data': source_data} if settings.VECTOR_STORE == "faiss":