mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-30 00:53:14 +00:00
fix: metadata things
This commit is contained in:
@@ -116,25 +116,24 @@ def delete_by_ids():
|
||||
def delete_old():
|
||||
"""Delete old indexes."""
|
||||
import shutil
|
||||
name = request.args.get("name")
|
||||
user = request.args.get("user")
|
||||
path = request.args.get("path")
|
||||
doc = vectors_collection.find_one({
|
||||
"user":user,
|
||||
"name":name
|
||||
"_id": ObjectId(path),
|
||||
"user": "local",
|
||||
})
|
||||
print("user",user)
|
||||
print("file",name)
|
||||
if(doc is None):
|
||||
return {"status":"not found"},404
|
||||
path_clean = doc["location"]
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
try:
|
||||
shutil.rmtree(os.path.join(current_dir, path_clean))
|
||||
shutil.rmtree(os.path.join(current_dir, str(doc["_id"])))
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
else:
|
||||
vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean))
|
||||
vetorstore = VectorCreator.create_vectorstore(settings.VECTOR_STORE, path=str(doc["_id"]))
|
||||
vetorstore.delete_index()
|
||||
vectors_collection.delete_one({
|
||||
"_id": ObjectId(path),
|
||||
})
|
||||
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@@ -11,7 +11,9 @@ from retry import retry
|
||||
|
||||
|
||||
@retry(tries=10, delay=60)
|
||||
def store_add_texts_with_retry(store, i):
|
||||
def store_add_texts_with_retry(store, i, id):
|
||||
# add store to the metadata
|
||||
i.metadata["store"] = str(id)
|
||||
store.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
|
||||
@@ -38,7 +40,7 @@ def call_openai_api(docs, folder_name, id, task_status):
|
||||
else:
|
||||
store = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE,
|
||||
path=id,
|
||||
path=str(id),
|
||||
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
|
||||
)
|
||||
# Uncomment for MPNet embeddings
|
||||
@@ -57,7 +59,7 @@ def call_openai_api(docs, folder_name, id, task_status):
|
||||
task_status.update_state(
|
||||
state="PROGRESS", meta={"current": int((c1 / s1) * 100)}
|
||||
)
|
||||
store_add_texts_with_retry(store, i)
|
||||
store_add_texts_with_retry(store, i, id)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Error on ", i)
|
||||
|
||||
@@ -18,8 +18,7 @@ from application.parser.token_func import group_split
|
||||
|
||||
# Define a function to extract metadata from a given filename.
|
||||
def metadata_from_filename(title):
|
||||
store = "/".join(title.split("/")[1:3])
|
||||
return {"title": title, "store": store}
|
||||
return {"title": title}
|
||||
|
||||
|
||||
# Define a function to generate a random string of a given length.
|
||||
@@ -189,7 +188,6 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp", r
|
||||
self.update_state(state="PROGRESS", meta={"current": 100})
|
||||
|
||||
# Proceed with uploading and cleaning as in the original function
|
||||
id = ObjectId()
|
||||
file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever,
|
||||
"id": str(id), 'type': loader, 'remote_data': source_data}
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
|
||||
Reference in New Issue
Block a user