fix: write id instead of old path on remote db's

This commit is contained in:
Alex
2024-09-09 12:00:59 +01:00
parent f105fd1b2c
commit 8166642ff9
2 changed files with 6 additions and 5 deletions

View File

@@ -16,7 +16,7 @@ def store_add_texts_with_retry(store, i):
# store_pine.add_texts([i.page_content], metadatas=[i.metadata]) # store_pine.add_texts([i.page_content], metadatas=[i.metadata])
def call_openai_api(docs, folder_name, task_status): def call_openai_api(docs, folder_name, id, task_status):
# Function to create a vector store from the documents and save it to disk # Function to create a vector store from the documents and save it to disk
if not os.path.exists(f"{folder_name}"): if not os.path.exists(f"{folder_name}"):
@@ -38,7 +38,7 @@ def call_openai_api(docs, folder_name, task_status):
else: else:
store = VectorCreator.create_vectorstore( store = VectorCreator.create_vectorstore(
settings.VECTOR_STORE, settings.VECTOR_STORE,
path=f"{folder_name}", path=id,
embeddings_key=os.getenv("EMBEDDINGS_KEY"), embeddings_key=os.getenv("EMBEDDINGS_KEY"),
) )
# Uncomment for MPNet embeddings # Uncomment for MPNet embeddings

View File

@@ -127,8 +127,9 @@ def ingest_worker(self, directory, formats, name_job, filename, user, retriever=
) )
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs] docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
id = ObjectId()
call_openai_api(docs, full_path, self) call_openai_api(docs, full_path, id, self)
tokens = count_tokens_docs(docs) tokens = count_tokens_docs(docs)
self.update_state(state="PROGRESS", meta={"current": 100}) self.update_state(state="PROGRESS", meta={"current": 100})
@@ -138,7 +139,6 @@ def ingest_worker(self, directory, formats, name_job, filename, user, retriever=
# get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl # get files from outputs/inputs/index.faiss and outputs/inputs/index.pkl
# and send them to the server (provide user and name in form) # and send them to the server (provide user and name in form)
id = ObjectId()
file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever, "id": str(id), 'type': 'local'} file_data = {"name": name_job, "user": user, "tokens": tokens, "retriever": retriever, "id": str(id), 'type': 'local'}
if settings.VECTOR_STORE == "faiss": if settings.VECTOR_STORE == "faiss":
files = { files = {
@@ -184,7 +184,8 @@ def remote_worker(self, source_data, name_job, user, loader, directory="temp", r
) )
# docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs] # docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
tokens = count_tokens_docs(docs) tokens = count_tokens_docs(docs)
call_openai_api(docs, full_path, self) id = ObjectId()
call_openai_api(docs, full_path, id, self)
self.update_state(state="PROGRESS", meta={"current": 100}) self.update_state(state="PROGRESS", meta={"current": 100})
# Proceed with uploading and cleaning as in the original function # Proceed with uploading and cleaning as in the original function