mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
(fix:update_chunk) data integrity, uplod back faiss
This commit is contained in:
@@ -3695,35 +3695,45 @@ class UpdateChunk(Resource):
|
||||
)
|
||||
try:
|
||||
store = get_vector_store(doc_id)
|
||||
|
||||
chunks = store.get_chunks()
|
||||
existing_chunk = next((c for c in chunks if c["doc_id"] == chunk_id), None)
|
||||
if not existing_chunk:
|
||||
return make_response(jsonify({"error": "Chunk not found"}), 404)
|
||||
deleted = store.delete_chunk(chunk_id)
|
||||
if not deleted:
|
||||
return make_response(
|
||||
jsonify({"error": "Failed to delete existing chunk"}), 500
|
||||
)
|
||||
|
||||
new_text = text if text is not None else existing_chunk["text"]
|
||||
new_metadata = (
|
||||
metadata if metadata is not None else existing_chunk["metadata"]
|
||||
)
|
||||
|
||||
if text is not None and metadata is None:
|
||||
token_count = num_tokens_from_string(new_text)
|
||||
new_metadata["token_count"] = token_count
|
||||
if metadata is not None:
|
||||
new_metadata = existing_chunk["metadata"].copy()
|
||||
new_metadata.update(metadata)
|
||||
else:
|
||||
new_metadata = existing_chunk["metadata"].copy()
|
||||
|
||||
new_chunk_id = store.add_chunk(new_text, new_metadata)
|
||||
if text is not None:
|
||||
new_metadata["token_count"] = num_tokens_from_string(new_text)
|
||||
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"message": "Chunk updated successfully",
|
||||
"new_chunk_id": new_chunk_id,
|
||||
}
|
||||
),
|
||||
200,
|
||||
)
|
||||
try:
|
||||
new_chunk_id = store.add_chunk(new_text, new_metadata)
|
||||
|
||||
deleted = store.delete_chunk(chunk_id)
|
||||
if not deleted:
|
||||
current_app.logger.warning(f"Failed to delete old chunk {chunk_id}, but new chunk {new_chunk_id} was created")
|
||||
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"message": "Chunk updated successfully",
|
||||
"chunk_id": new_chunk_id,
|
||||
"original_chunk_id": chunk_id,
|
||||
}
|
||||
),
|
||||
200,
|
||||
)
|
||||
except Exception as add_error:
|
||||
current_app.logger.error(f"Failed to add updated chunk: {add_error}")
|
||||
return make_response(
|
||||
jsonify({"error": "Failed to update chunk - addition failed"}), 500
|
||||
)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error updating chunk: {e}", exc_info=True)
|
||||
return make_response(jsonify({"success": False}), 500)
|
||||
|
||||
@@ -67,25 +67,36 @@ class FaissStore(BaseVectorStore):
|
||||
def add_texts(self, *args, **kwargs):
|
||||
return self.docsearch.add_texts(*args, **kwargs)
|
||||
|
||||
def save_local(self, path):
|
||||
def _save_to_storage(self):
|
||||
"""
|
||||
Save the FAISS index to disk and upload to storage.
|
||||
|
||||
Args:
|
||||
path: Path where the index should be stored
|
||||
Save the FAISS index to storage using temporary directory pattern.
|
||||
Works consistently for both local and S3 storage.
|
||||
"""
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
self.docsearch.save_local(temp_dir)
|
||||
|
||||
with open(os.path.join(temp_dir, "index.faiss"), "rb") as f_faiss:
|
||||
|
||||
faiss_path = os.path.join(temp_dir, "index.faiss")
|
||||
pkl_path = os.path.join(temp_dir, "index.pkl")
|
||||
|
||||
with open(faiss_path, "rb") as f_faiss:
|
||||
faiss_data = f_faiss.read()
|
||||
|
||||
with open(os.path.join(temp_dir, "index.pkl"), "rb") as f_pkl:
|
||||
|
||||
with open(pkl_path, "rb") as f_pkl:
|
||||
pkl_data = f_pkl.read()
|
||||
|
||||
self.storage.save_file(io.BytesIO(faiss_data), f"{path}/index.faiss")
|
||||
self.storage.save_file(io.BytesIO(pkl_data), f"{path}/index.pkl")
|
||||
|
||||
|
||||
storage_path = get_vectorstore(self.source_id)
|
||||
self.storage.save_file(io.BytesIO(faiss_data), f"{storage_path}/index.faiss")
|
||||
self.storage.save_file(io.BytesIO(pkl_data), f"{storage_path}/index.pkl")
|
||||
|
||||
return True
|
||||
|
||||
def save_local(self, path=None):
|
||||
if path:
|
||||
os.makedirs(path, exist_ok=True)
|
||||
self.docsearch.save_local(path)
|
||||
|
||||
self._save_to_storage()
|
||||
|
||||
return True
|
||||
|
||||
def delete_index(self, *args, **kwargs):
|
||||
@@ -122,13 +133,17 @@ class FaissStore(BaseVectorStore):
|
||||
return chunks
|
||||
|
||||
def add_chunk(self, text, metadata=None):
|
||||
"""Add a new chunk and save to storage."""
|
||||
metadata = metadata or {}
|
||||
doc = Document(text=text, extra_info=metadata).to_langchain_format()
|
||||
doc_id = self.docsearch.add_documents([doc])
|
||||
self.save_local(self.path)
|
||||
self._save_to_storage()
|
||||
return doc_id
|
||||
|
||||
|
||||
|
||||
def delete_chunk(self, chunk_id):
|
||||
"""Delete a chunk and save to storage."""
|
||||
self.delete_index([chunk_id])
|
||||
self.save_local(self.path)
|
||||
self._save_to_storage()
|
||||
return True
|
||||
|
||||
Reference in New Issue
Block a user