diff --git a/application/api/user/routes.py b/application/api/user/routes.py index d8a19913..da7ebe07 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -3695,35 +3695,45 @@ class UpdateChunk(Resource): ) try: store = get_vector_store(doc_id) + chunks = store.get_chunks() existing_chunk = next((c for c in chunks if c["doc_id"] == chunk_id), None) if not existing_chunk: return make_response(jsonify({"error": "Chunk not found"}), 404) - deleted = store.delete_chunk(chunk_id) - if not deleted: - return make_response( - jsonify({"error": "Failed to delete existing chunk"}), 500 - ) + new_text = text if text is not None else existing_chunk["text"] - new_metadata = ( - metadata if metadata is not None else existing_chunk["metadata"] - ) - if text is not None and metadata is None: - token_count = num_tokens_from_string(new_text) - new_metadata["token_count"] = token_count + if metadata is not None: + new_metadata = existing_chunk["metadata"].copy() + new_metadata.update(metadata) + else: + new_metadata = existing_chunk["metadata"].copy() - new_chunk_id = store.add_chunk(new_text, new_metadata) + if text is not None: + new_metadata["token_count"] = num_tokens_from_string(new_text) - return make_response( - jsonify( - { - "message": "Chunk updated successfully", - "new_chunk_id": new_chunk_id, - } - ), - 200, - ) + try: + new_chunk_id = store.add_chunk(new_text, new_metadata) + + deleted = store.delete_chunk(chunk_id) + if not deleted: + current_app.logger.warning(f"Failed to delete old chunk {chunk_id}, but new chunk {new_chunk_id} was created") + + return make_response( + jsonify( + { + "message": "Chunk updated successfully", + "chunk_id": new_chunk_id, + "original_chunk_id": chunk_id, + } + ), + 200, + ) + except Exception as add_error: + current_app.logger.error(f"Failed to add updated chunk: {add_error}") + return make_response( + jsonify({"error": "Failed to update chunk - addition failed"}), 500 + ) except Exception as e: current_app.logger.error(f"Error updating chunk: {e}", exc_info=True) return make_response(jsonify({"success": False}), 500) diff --git a/application/vectorstore/faiss.py b/application/vectorstore/faiss.py index b9c63cc8..3e86203b 100644 --- a/application/vectorstore/faiss.py +++ b/application/vectorstore/faiss.py @@ -67,25 +67,36 @@ class FaissStore(BaseVectorStore): def add_texts(self, *args, **kwargs): return self.docsearch.add_texts(*args, **kwargs) - def save_local(self, path): + def _save_to_storage(self): """ - Save the FAISS index to disk and upload to storage. - - Args: - path: Path where the index should be stored + Save the FAISS index to storage using temporary directory pattern. + Works consistently for both local and S3 storage. """ with tempfile.TemporaryDirectory() as temp_dir: self.docsearch.save_local(temp_dir) - - with open(os.path.join(temp_dir, "index.faiss"), "rb") as f_faiss: + + faiss_path = os.path.join(temp_dir, "index.faiss") + pkl_path = os.path.join(temp_dir, "index.pkl") + + with open(faiss_path, "rb") as f_faiss: faiss_data = f_faiss.read() - - with open(os.path.join(temp_dir, "index.pkl"), "rb") as f_pkl: + + with open(pkl_path, "rb") as f_pkl: pkl_data = f_pkl.read() - - self.storage.save_file(io.BytesIO(faiss_data), f"{path}/index.faiss") - self.storage.save_file(io.BytesIO(pkl_data), f"{path}/index.pkl") - + + storage_path = get_vectorstore(self.source_id) + self.storage.save_file(io.BytesIO(faiss_data), f"{storage_path}/index.faiss") + self.storage.save_file(io.BytesIO(pkl_data), f"{storage_path}/index.pkl") + + return True + + def save_local(self, path=None): + if path: + os.makedirs(path, exist_ok=True) + self.docsearch.save_local(path) + + self._save_to_storage() + return True def delete_index(self, *args, **kwargs): @@ -122,13 +133,17 @@ class FaissStore(BaseVectorStore): return chunks def add_chunk(self, text, metadata=None): + """Add a new chunk and save to storage.""" metadata = metadata or {} doc = Document(text=text, extra_info=metadata).to_langchain_format() doc_id = self.docsearch.add_documents([doc]) - self.save_local(self.path) + self._save_to_storage() return doc_id + + def delete_chunk(self, chunk_id): + """Delete a chunk and save to storage.""" self.delete_index([chunk_id]) - self.save_local(self.path) + self._save_to_storage() return True