From 1c4d7a6ad1f0dce36e7304e57172bf0587de79f5 Mon Sep 17 00:00:00 2001 From: Jacksonxhx Date: Tue, 30 Jul 2024 17:44:27 +0800 Subject: [PATCH] integrated milvus db --- application/core/settings.py | 7 ++++- application/vectorstore/milvus.py | 38 +++++++++++++++++++++++ application/vectorstore/vector_creator.py | 2 ++ 3 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 application/vectorstore/milvus.py diff --git a/application/core/settings.py b/application/core/settings.py index 6ae5475c..ff29bbb2 100644 --- a/application/core/settings.py +++ b/application/core/settings.py @@ -18,7 +18,7 @@ class Settings(BaseSettings): DEFAULT_MAX_HISTORY: int = 150 MODEL_TOKEN_LIMITS: dict = {"gpt-3.5-turbo": 4096, "claude-2": 1e5} UPLOAD_FOLDER: str = "inputs" - VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant" + VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search API_URL: str = "http://localhost:7091" # backend url for celery worker @@ -61,6 +61,11 @@ class Settings(BaseSettings): QDRANT_PATH: Optional[str] = None QDRANT_DISTANCE_FUNC: str = "Cosine" + # Milvus vectorstore config + MILVUS_COLLECTION_NAME: Optional[str] = "docsgpt" + MILVUS_URI: Optional[str] = "./milvus_local.db" # milvus lite version as default + MILVUS_TOKEN: Optional[str] = "" + BRAVE_SEARCH_API_KEY: Optional[str] = None FLASK_DEBUG_MODE: bool = False diff --git a/application/vectorstore/milvus.py b/application/vectorstore/milvus.py new file mode 100644 index 00000000..0861f593 --- /dev/null +++ b/application/vectorstore/milvus.py @@ -0,0 +1,38 @@ +from typing import List, Optional +from langchain_community.vectorstores.milvus import Milvus + +from application.core.settings import settings +from application.vectorstore.base import BaseVectorStore + + +class MilvusStore(BaseVectorStore): + def __init__(self, path: str = "", embeddings_key: str = "embeddings"): + super().__init__() + if path: + connection_args ={ + "uri": path, + "tpken": settings.MILVUS_TOKEN, + } + else: + connection_args = { + "uri": settings.MILVUS_URL, + 'token': settings.MILVUS_TOKEN, + } + self._docsearch = Milvus( + embedding_function=self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key), + collection_name=settings.COLLECTION_NAME, + connection_args=connection_args, + drop_old=True, + ) + + def search(self, question, k=2, *args, **kwargs): + return self._docsearch.similarity_search(query=question, k=k, *args, **kwargs) + + def add_texts(self, texts: List[str], metadatas: Optional[List[dict]], *args, **kwargs): + return self._docsearch.add_texts(texts=texts, metadatas=metadatas, *args, **kwargs) + + def save_local(self, *args, **kwargs): + pass + + def delete_index(self, *args, **kwargs): + pass diff --git a/application/vectorstore/vector_creator.py b/application/vectorstore/vector_creator.py index 27b38645..259fa31f 100644 --- a/application/vectorstore/vector_creator.py +++ b/application/vectorstore/vector_creator.py @@ -1,5 +1,6 @@ from application.vectorstore.faiss import FaissStore from application.vectorstore.elasticsearch import ElasticsearchStore +from application.vectorstore.milvus import MilvusStore from application.vectorstore.mongodb import MongoDBVectorStore from application.vectorstore.qdrant import QdrantStore @@ -10,6 +11,7 @@ class VectorCreator: "elasticsearch": ElasticsearchStore, "mongodb": MongoDBVectorStore, "qdrant": QdrantStore, + "milvus": MilvusStore, } @classmethod