(fix:update_chunk) data integrity, uplod back faiss

This commit is contained in:
ManishMadan2882
2025-08-05 06:31:00 +05:30
parent b000b235a2
commit e349eb28b0
2 changed files with 61 additions and 36 deletions

View File

@@ -3695,35 +3695,45 @@ class UpdateChunk(Resource):
)
try:
store = get_vector_store(doc_id)
chunks = store.get_chunks()
existing_chunk = next((c for c in chunks if c["doc_id"] == chunk_id), None)
if not existing_chunk:
return make_response(jsonify({"error": "Chunk not found"}), 404)
deleted = store.delete_chunk(chunk_id)
if not deleted:
return make_response(
jsonify({"error": "Failed to delete existing chunk"}), 500
)
new_text = text if text is not None else existing_chunk["text"]
new_metadata = (
metadata if metadata is not None else existing_chunk["metadata"]
)
if text is not None and metadata is None:
token_count = num_tokens_from_string(new_text)
new_metadata["token_count"] = token_count
if metadata is not None:
new_metadata = existing_chunk["metadata"].copy()
new_metadata.update(metadata)
else:
new_metadata = existing_chunk["metadata"].copy()
new_chunk_id = store.add_chunk(new_text, new_metadata)
if text is not None:
new_metadata["token_count"] = num_tokens_from_string(new_text)
return make_response(
jsonify(
{
"message": "Chunk updated successfully",
"new_chunk_id": new_chunk_id,
}
),
200,
)
try:
new_chunk_id = store.add_chunk(new_text, new_metadata)
deleted = store.delete_chunk(chunk_id)
if not deleted:
current_app.logger.warning(f"Failed to delete old chunk {chunk_id}, but new chunk {new_chunk_id} was created")
return make_response(
jsonify(
{
"message": "Chunk updated successfully",
"chunk_id": new_chunk_id,
"original_chunk_id": chunk_id,
}
),
200,
)
except Exception as add_error:
current_app.logger.error(f"Failed to add updated chunk: {add_error}")
return make_response(
jsonify({"error": "Failed to update chunk - addition failed"}), 500
)
except Exception as e:
current_app.logger.error(f"Error updating chunk: {e}", exc_info=True)
return make_response(jsonify({"success": False}), 500)

View File

@@ -67,25 +67,36 @@ class FaissStore(BaseVectorStore):
def add_texts(self, *args, **kwargs):
return self.docsearch.add_texts(*args, **kwargs)
def save_local(self, path):
def _save_to_storage(self):
"""
Save the FAISS index to disk and upload to storage.
Args:
path: Path where the index should be stored
Save the FAISS index to storage using temporary directory pattern.
Works consistently for both local and S3 storage.
"""
with tempfile.TemporaryDirectory() as temp_dir:
self.docsearch.save_local(temp_dir)
with open(os.path.join(temp_dir, "index.faiss"), "rb") as f_faiss:
faiss_path = os.path.join(temp_dir, "index.faiss")
pkl_path = os.path.join(temp_dir, "index.pkl")
with open(faiss_path, "rb") as f_faiss:
faiss_data = f_faiss.read()
with open(os.path.join(temp_dir, "index.pkl"), "rb") as f_pkl:
with open(pkl_path, "rb") as f_pkl:
pkl_data = f_pkl.read()
self.storage.save_file(io.BytesIO(faiss_data), f"{path}/index.faiss")
self.storage.save_file(io.BytesIO(pkl_data), f"{path}/index.pkl")
storage_path = get_vectorstore(self.source_id)
self.storage.save_file(io.BytesIO(faiss_data), f"{storage_path}/index.faiss")
self.storage.save_file(io.BytesIO(pkl_data), f"{storage_path}/index.pkl")
return True
def save_local(self, path=None):
if path:
os.makedirs(path, exist_ok=True)
self.docsearch.save_local(path)
self._save_to_storage()
return True
def delete_index(self, *args, **kwargs):
@@ -122,13 +133,17 @@ class FaissStore(BaseVectorStore):
return chunks
def add_chunk(self, text, metadata=None):
"""Add a new chunk and save to storage."""
metadata = metadata or {}
doc = Document(text=text, extra_info=metadata).to_langchain_format()
doc_id = self.docsearch.add_documents([doc])
self.save_local(self.path)
self._save_to_storage()
return doc_id
def delete_chunk(self, chunk_id):
"""Delete a chunk and save to storage."""
self.delete_index([chunk_id])
self.save_local(self.path)
self._save_to_storage()
return True