Refactor MongoDBVectorStore and add delete_index method

This commit is contained in:
Alex
2024-01-06 17:59:01 +00:00
parent 293b7b09a9
commit 4ff834de76
2 changed files with 52 additions and 16 deletions

View File

@@ -5,21 +5,22 @@ from application.vectorstore.document_class import Document
class MongoDBVectorStore(BaseVectorStore):
def __init__(
self,
path: str = "",
embeddings_key: str = "embeddings",
collection: str = "documents",
index_name: str = "default",
index_name: str = "vector_search_index",
text_key: str = "text",
embedding_key: str = "embedding",
embedding_api_key: str = "embedding_api_key",
path: str = "",
database: str = "docsgpt",
):
self._collection = collection
self._embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embedding_api_key)
self._index_name = index_name
self._text_key = text_key
self._embedding_key = embedding_key
self._embeddings_key = embeddings_key
self._mongo_uri = settings.MONGO_URI
self._path = path
# import pymongo
self._path = path.replace("application/indexes/", "").rstrip("/")
self._embedding = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
try:
import pymongo
except ImportError:
@@ -27,30 +28,40 @@ class MongoDBVectorStore(BaseVectorStore):
"Could not import pymongo python package. "
"Please install it with `pip install pymongo`."
)
self._client = pymongo.MongoClient(self._mongo_uri)
self._database = self._client[database]
self._collection = self._database[collection]
def search(self, question, k=2, *args, **kwargs):
query_vector = self._embeddings.embed_query(question)
query_vector = self._embedding.embed_query(question)
pipeline = [
{
"$vectorSearch": {
"queryVector": query_vector,
"path": self._embedding_key,
"limit": k,
"index": self._index_name
"numCandidates": k * 10,
"index": self._index_name,
"filter": {
"store": {"$eq": self._path}
}
}
}
]
cursor = self._client._collection.aggregate(pipeline)
cursor = self._collection.aggregate(pipeline)
results = []
for doc in cursor:
text = doc[self._text_key]
doc.pop("_id")
doc.pop(self._text_key)
doc.pop(self._embedding_key)
metadata = doc
results.append(Document(text, metadata))
return results
def _insert_texts(self, texts, metadatas):
@@ -62,7 +73,7 @@ class MongoDBVectorStore(BaseVectorStore):
for t, m, embedding in zip(texts, metadatas, embeddings)
]
# insert the documents in MongoDB Atlas
insert_result = self.client._collection.insert_many(to_insert)
insert_result = self._collection.insert_many(to_insert)
return insert_result.inserted_ids
def add_texts(self,
@@ -75,6 +86,26 @@ class MongoDBVectorStore(BaseVectorStore):
**kwargs,):
#dims = self._embedding.client[1].word_embedding_dimension
# # check if index exists
# if create_index_if_not_exists:
# # check if index exists
# info = self._collection.index_information()
# if self._index_name not in info:
# index_mongo = {
# "fields": [{
# "type": "vector",
# "path": self._embedding_key,
# "numDimensions": dims,
# "similarity": "cosine",
# },
# {
# "type": "filter",
# "path": "store"
# }]
# }
# self._collection.create_index(self._index_name, index_mongo)
batch_size = 100
_metadatas = metadatas or ({} for _ in texts)
texts_batch = []
@@ -89,4 +120,7 @@ class MongoDBVectorStore(BaseVectorStore):
metadatas_batch = []
if texts_batch:
result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
return result_ids
return result_ids
def delete_index(self, *args, **kwargs):
self._collection.delete_many({"store": self._path})

View File

@@ -1,11 +1,13 @@
from application.vectorstore.faiss import FaissStore
from application.vectorstore.elasticsearch import ElasticsearchStore
from application.vectorstore.mongodb import MongoDBVectorStore
class VectorCreator:
vectorstores = {
'faiss': FaissStore,
'elasticsearch':ElasticsearchStore
'elasticsearch':ElasticsearchStore,
'mongodb': MongoDBVectorStore,
}
@classmethod