feat: sync remote sources through celery periodic tasks

This commit is contained in:
Siddhant Rai
2024-09-25 15:20:11 +05:30
parent f92658de82
commit 3d292aa485
13 changed files with 404 additions and 88 deletions

View File

@@ -1,9 +1,11 @@
import os
from application.vectorstore.vector_creator import VectorCreator
from application.core.settings import settings
from retry import retry
from application.core.settings import settings
from application.vectorstore.vector_creator import VectorCreator
# from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain_community.embeddings import HuggingFaceInstructEmbeddings
@@ -12,7 +14,7 @@ from retry import retry
@retry(tries=10, delay=60)
def store_add_texts_with_retry(store, i, id):
# add source_id to the metadata
# add source_id to the metadata
i.metadata["source_id"] = str(id)
store.add_texts([i.page_content], metadatas=[i.metadata])
# store_pine.add_texts([i.page_content], metadatas=[i.metadata])
@@ -43,6 +45,7 @@ def call_openai_api(docs, folder_name, id, task_status):
source_id=str(id),
embeddings_key=os.getenv("EMBEDDINGS_KEY"),
)
store.delete_index()
# Uncomment for MPNet embeddings
# model_name = "sentence-transformers/all-mpnet-base-v2"
# hf = HuggingFaceEmbeddings(model_name=model_name)
@@ -70,5 +73,3 @@ def call_openai_api(docs, folder_name, id, task_status):
c1 += 1
if settings.VECTOR_STORE == "faiss":
store.save_local(f"{folder_name}")