fix: write id instead of old path on remote db's

2026-05-03 15:32:04 +00:00 · 2024-09-09 12:00:59 +01:00
parent f105fd1b2c
commit 8166642ff9
2 changed files with 6 additions and 5 deletions
--- a/application/parser/open_ai_func.py
+++ b/application/parser/open_ai_func.py
@@ -16,7 +16,7 @@ def store_add_texts_with_retry(store, i):
    # store_pine.add_texts([i.page_content], metadatas=[i.metadata])
-def call_openai_api(docs, folder_name, task_status):
+def call_openai_api(docs, folder_name, id, task_status):
    # Function to create a vector store from the documents and save it to disk
    if not os.path.exists(f"{folder_name}"):
@@ -38,7 +38,7 @@ def call_openai_api(docs, folder_name, task_status):
    else:
        store = VectorCreator.create_vectorstore(
            settings.VECTOR_STORE,
-            path=f"{folder_name}",
+            path=id,
            embeddings_key=os.getenv("EMBEDDINGS_KEY"),
        )
    # Uncomment for MPNet embeddings
--- a/application/worker.py
+++ b/application/worker.py
@@ -127,8 +127,9 @@ def ingest_worker(self, directory, formats, name_job, filename, user, retriever=
    )
    docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
    id = ObjectId()
-    call_openai_api(docs, full_path, self)
+    call_openai_api(docs, full_path, id, self)
    tokens = count_tokens_docs(docs)
    self.update_state(state="PROGRESS", meta={"current": 100})
@@ -138,7 +139,6 @@ def ingest_worker(self, directory, formats, name_job, filename, user, retriever=
    # get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
    # and send them to the server (provide user and name in form)
    id = ObjectId()
    file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever, "id": str(id), 'type': 'local'}
    if settings.VECTOR_STORE == "faiss":
        files = {
@@ -184,7 +184,8 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp", r
    )
    # docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
    tokens = count_tokens_docs(docs)
-    call_openai_api(docs, full_path, self)
+    id = ObjectId()
    call_openai_api(docs, full_path, id, self)
    self.update_state(state="PROGRESS", meta={"current": 100})
    # Proceed with uploading and cleaning as in the original function