fix: metadata things

This commit is contained in:
Alex
2024-09-09 13:37:11 +01:00
parent 888e13e198
commit 1bb81614a5
3 changed files with 14 additions and 15 deletions

View File

@@ -116,25 +116,24 @@ def delete_by_ids():
def delete_old():
"""Delete old indexes."""
import shutil
name = request.args.get("name")
user = request.args.get("user")
path = request.args.get("path")
doc = vectors_collection.find_one({
"user":user,
"name":name
"_id": ObjectId(path),
"user": "local",
})
print("user",user)
print("file",name)
if(doc is None):
return {"status":"not found"},404
path_clean = doc["location"]
if settings.VECTOR_STORE == "faiss":
try:
shutil.rmtree(os.path.join(current_dir, path_clean))
shutil.rmtree(os.path.join(current_dir, str(doc["_id"])))
except FileNotFoundError:
pass
else:
vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean))
vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, path=str(doc["_id"]))
vetorstore.delete_index()
vectors_collection.delete_one({
"_id": ObjectId(path),
})
return {"status": "ok"}

View File

@@ -11,7 +11,9 @@ from retry import retry
@retry(tries=10, delay=60)
def store_add_texts_with_retry(store, i):
def store_add_texts_with_retry(store, i, id):
# add store to the metadata
i.metadata["store"] = str(id)
store.add_texts([i.page_content], metadatas=[i.metadata])
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
@@ -38,7 +40,7 @@ def call_openai_api(docs, folder_name, id, task_status):
else:
store = VectorCreator.create_vectorstore(
settings.VECTOR_STORE,
path=id,
path=str(id),
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
)
# Uncomment for MPNet embeddings
@@ -57,7 +59,7 @@ def call_openai_api(docs, folder_name, id, task_status):
task_status.update_state(
state="PROGRESS", meta={"current": int((c1 / s1) * 100)}
)
store_add_texts_with_retry(store, i)
store_add_texts_with_retry(store, i, id)
except Exception as e:
print(e)
print("Error on ", i)

View File

@@ -18,8 +18,7 @@ from application.parser.token_func import group_split
# Define a function to extract metadata from a given filename.
def metadata_from_filename(title):
store = "/".join(title.split("/")[1:3])
return {"title": title, "store": store}
return {"title": title}
# Define a function to generate a random string of a given length.
@@ -189,7 +188,6 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp", r
self.update_state(state="PROGRESS", meta={"current": 100})
# Proceed with uploading and cleaning as in the original function
id = ObjectId()
file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever,
"id": str(id), 'type': loader, 'remote_data': source_data}
if settings.VECTOR_STORE == "faiss":