mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
Revert "(fix:indexes) look for the right path"
This reverts commit 5ad34e2216.
This commit is contained in:
@@ -1,45 +1,35 @@
|
||||
import os
|
||||
import tempfile
|
||||
import logging
|
||||
|
||||
from langchain_community.vectorstores import FAISS
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.parser.schema.base import Document
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from application.storage.storage_creator import StorageCreator
|
||||
|
||||
|
||||
def get_vectorstore_path(source_id: str) -> str:
|
||||
if source_id:
|
||||
clean_id = source_id.replace("application/indexes/", "").rstrip("/")
|
||||
return f"indexes/{clean_id}"
|
||||
def get_vectorstore(path: str) -> str:
|
||||
if path:
|
||||
vectorstore = os.path.join("application", "indexes", path)
|
||||
else:
|
||||
return "indexes"
|
||||
vectorstore = os.path.join("application")
|
||||
return vectorstore
|
||||
|
||||
|
||||
class FaissStore(BaseVectorStore):
|
||||
def __init__(self, source_id: str, embeddings_key: str, docs_init=None):
|
||||
super().__init__()
|
||||
self.source_id = source_id
|
||||
self.storage = StorageCreator.get_storage()
|
||||
self.storage_path = get_vectorstore_path(source_id)
|
||||
self.path = get_vectorstore(source_id)
|
||||
self.embeddings = self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key)
|
||||
|
||||
try:
|
||||
if docs_init:
|
||||
self.docsearch = FAISS.from_documents(docs_init, self.embeddings)
|
||||
else:
|
||||
if self.storage.__class__.__name__ == "LocalStorage":
|
||||
# For local storage, we can use the path directly
|
||||
local_path = self.storage._get_full_path(self.storage_path)
|
||||
self.docsearch = FAISS.load_local(
|
||||
local_path, self.embeddings, allow_dangerous_deserialization=True
|
||||
)
|
||||
else:
|
||||
# For non-local storage (S3, etc.), download files to temp directory first
|
||||
self.docsearch = self._load_from_remote_storage()
|
||||
except Exception as e:
|
||||
logging.error(f"Error initializing FAISS store: {e}")
|
||||
self.docsearch = FAISS.load_local(
|
||||
self.path, self.embeddings, allow_dangerous_deserialization=True
|
||||
)
|
||||
except Exception:
|
||||
raise
|
||||
|
||||
self.assert_embedding_dimensions(self.embeddings)
|
||||
@@ -50,26 +40,8 @@ class FaissStore(BaseVectorStore):
|
||||
def add_texts(self, *args, **kwargs):
|
||||
return self.docsearch.add_texts(*args, **kwargs)
|
||||
|
||||
def save_local(self, folder_path=None):
|
||||
path_to_use = folder_path or self.storage_path
|
||||
|
||||
if folder_path or self.storage.__class__.__name__ == "LocalStorage":
|
||||
# If it's a local path or temp dir, save directly
|
||||
local_path = path_to_use
|
||||
if self.storage.__class__.__name__ == "LocalStorage" and not folder_path:
|
||||
local_path = self.storage._get_full_path(path_to_use)
|
||||
|
||||
os.makedirs(os.path.dirname(local_path) if os.path.dirname(local_path) else local_path, exist_ok=True)
|
||||
|
||||
self.docsearch.save_local(local_path)
|
||||
|
||||
if folder_path and self.storage.__class__.__name__ != "LocalStorage":
|
||||
self._upload_index_to_remote(folder_path)
|
||||
else:
|
||||
# For remote storage, save to temp dir first, then upload
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
self.docsearch.save_local(temp_dir)
|
||||
self._upload_index_to_remote(temp_dir)
|
||||
def save_local(self, *args, **kwargs):
|
||||
return self.docsearch.save_local(*args, **kwargs)
|
||||
|
||||
def delete_index(self, *args, **kwargs):
|
||||
return self.docsearch.delete(*args, **kwargs)
|
||||
@@ -108,62 +80,10 @@ class FaissStore(BaseVectorStore):
|
||||
metadata = metadata or {}
|
||||
doc = Document(text=text, extra_info=metadata).to_langchain_format()
|
||||
doc_id = self.docsearch.add_documents([doc])
|
||||
self.save_local()
|
||||
self.save_local(self.path)
|
||||
return doc_id
|
||||
|
||||
def delete_chunk(self, chunk_id):
|
||||
self.delete_index([chunk_id])
|
||||
self.save_local()
|
||||
self.save_local(self.path)
|
||||
return True
|
||||
|
||||
def _load_from_remote_storage(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
try:
|
||||
# Check if both index files exist in remote storage
|
||||
faiss_path = f"{self.storage_path}/index.faiss"
|
||||
pkl_path = f"{self.storage_path}/index.pkl"
|
||||
|
||||
if not self.storage.file_exists(faiss_path) or not self.storage.file_exists(pkl_path):
|
||||
raise FileNotFoundError(f"FAISS index files not found at {self.storage_path}")
|
||||
|
||||
# Download both files to temp directory
|
||||
faiss_file = self.storage.get_file(faiss_path)
|
||||
pkl_file = self.storage.get_file(pkl_path)
|
||||
|
||||
local_faiss_path = os.path.join(temp_dir, "index.faiss")
|
||||
local_pkl_path = os.path.join(temp_dir, "index.pkl")
|
||||
|
||||
with open(local_faiss_path, 'wb') as f:
|
||||
f.write(faiss_file.read())
|
||||
|
||||
with open(local_pkl_path, 'wb') as f:
|
||||
f.write(pkl_file.read())
|
||||
|
||||
# Load the index from the temp directory
|
||||
return FAISS.load_local(
|
||||
temp_dir, self.embeddings, allow_dangerous_deserialization=True
|
||||
)
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading FAISS index from remote storage: {e}")
|
||||
raise
|
||||
|
||||
def _upload_index_to_remote(self, local_folder):
|
||||
try:
|
||||
# Get paths to the index files
|
||||
local_faiss_path = os.path.join(local_folder, "index.faiss")
|
||||
local_pkl_path = os.path.join(local_folder, "index.pkl")
|
||||
|
||||
remote_faiss_path = f"{self.storage_path}/index.faiss"
|
||||
remote_pkl_path = f"{self.storage_path}/index.pkl"
|
||||
|
||||
# Upload both files to remote storage
|
||||
with open(local_faiss_path, 'rb') as f:
|
||||
self.storage.save_file(f, remote_faiss_path)
|
||||
|
||||
with open(local_pkl_path, 'rb') as f:
|
||||
self.storage.save_file(f, remote_pkl_path)
|
||||
|
||||
logging.info(f"Successfully uploaded FAISS index to {self.storage_path}")
|
||||
except Exception as e:
|
||||
logging.error(f"Error uploading FAISS index to remote storage: {e}")
|
||||
raise
|
||||
|
||||
Reference in New Issue
Block a user