feat: migrate store to source_id

This commit is contained in:
Alex
2024-09-09 15:46:18 +01:00
parent 1bb81614a5
commit 2f9c72c1cf
10 changed files with 43 additions and 40 deletions

View File

@@ -9,9 +9,9 @@ import elasticsearch
class ElasticsearchStore(BaseVectorStore):
_es_connection = None # Class attribute to hold the Elasticsearch connection
def __init__(self, path, embeddings_key, index_name=settings.ELASTIC_INDEX):
def __init__(self, source_id, embeddings_key, index_name=settings.ELASTIC_INDEX):
super().__init__()
self.path = path.replace("application/indexes/", "").rstrip("/")
self.source_id = source_id.replace("application/indexes/", "").rstrip("/")
self.embeddings_key = embeddings_key
self.index_name = index_name
@@ -81,7 +81,7 @@ class ElasticsearchStore(BaseVectorStore):
embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, self.embeddings_key)
vector = embeddings.embed_query(question)
knn = {
"filter": [{"match": {"metadata.store.keyword": self.path}}],
"filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
"field": "vector",
"k": k,
"num_candidates": 100,
@@ -100,7 +100,7 @@ class ElasticsearchStore(BaseVectorStore):
}
}
],
"filter": [{"match": {"metadata.store.keyword": self.path}}],
"filter": [{"match": {"metadata.source_id.keyword": self.source_id}}],
}
},
"rank": {"rrf": {}},
@@ -209,4 +209,4 @@ class ElasticsearchStore(BaseVectorStore):
def delete_index(self):
self._es_connection.delete_by_query(index=self.index_name, query={"match": {
"metadata.store.keyword": self.path}},)
"metadata.source_id.keyword": self.source_id}},)

View File

@@ -5,7 +5,7 @@ from application.vectorstore.document_class import Document
class MongoDBVectorStore(BaseVectorStore):
def __init__(
self,
path: str = "",
source_id: str = "",
embeddings_key: str = "embeddings",
collection: str = "documents",
index_name: str = "vector_search_index",
@@ -18,7 +18,7 @@ class MongoDBVectorStore(BaseVectorStore):
self._embedding_key = embedding_key
self._embeddings_key = embeddings_key
self._mongo_uri = settings.MONGO_URI
self._path = path.replace("application/indexes/", "").rstrip("/")
self._source_id = source_id.replace("application/indexes/", "").rstrip("/")
self._embedding = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
try:
@@ -46,7 +46,7 @@ class MongoDBVectorStore(BaseVectorStore):
"numCandidates": k * 10,
"index": self._index_name,
"filter": {
"store": {"$eq": self._path}
"source_id": {"$eq": self._source_id}
}
}
}
@@ -123,4 +123,4 @@ class MongoDBVectorStore(BaseVectorStore):
return result_ids
def delete_index(self, *args, **kwargs):
self._collection.delete_many({"store": self._path})
self._collection.delete_many({"source_id": self._source_id})

View File

@@ -5,12 +5,12 @@ from qdrant_client import models
class QdrantStore(BaseVectorStore):
def __init__(self, path: str = "", embeddings_key: str = "embeddings"):
def __init__(self, source_id: str = "", embeddings_key: str = "embeddings"):
self._filter = models.Filter(
must=[
models.FieldCondition(
key="metadata.store",
match=models.MatchValue(value=path.replace("application/indexes/", "").rstrip("/")),
key="metadata.source_id",
match=models.MatchValue(value=source_id.replace("application/indexes/", "").rstrip("/")),
)
]
)