From 78d5ed2ed2c69f2e07377fbafc8c74aaadcfba4b Mon Sep 17 00:00:00 2001 From: ManishMadan2882 Date: Wed, 11 Jun 2025 21:04:50 +0530 Subject: [PATCH] (fix:ingestion) uuid for non-ascii filename --- application/api/internal/routes.py | 17 +++++++++-------- application/api/user/routes.py | 16 +++++++--------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/application/api/internal/routes.py b/application/api/internal/routes.py index 70c62877..2f60d8ea 100755 --- a/application/api/internal/routes.py +++ b/application/api/internal/routes.py @@ -7,6 +7,7 @@ import logging from application.core.mongo_db import MongoDB from application.core.settings import settings from application.storage.storage_creator import StorageCreator +from application.utils import safe_filename logger = logging.getLogger(__name__) @@ -37,18 +38,18 @@ def upload_index_files(): """Upload two files(index.faiss, index.pkl) to the user's folder.""" if "user" not in request.form: return {"status": "no user"} - user = secure_filename(request.form["user"]) + user = safe_filename(request.form["user"]) if "name" not in request.form: return {"status": "no name"} - job_name = secure_filename(request.form["name"]) - tokens = secure_filename(request.form["tokens"]) - retriever = secure_filename(request.form["retriever"]) - id = secure_filename(request.form["id"]) - type = secure_filename(request.form["type"]) + job_name = safe_filename(request.form["name"]) + tokens = request.form["tokens"] + retriever = request.form["retriever"] + id = request.form["id"] + type = request.form["type"] remote_data = request.form["remote_data"] if "remote_data" in request.form else None - sync_frequency = secure_filename(request.form["sync_frequency"]) if "sync_frequency" in request.form else None + sync_frequency = request.form["sync_frequency"] if "sync_frequency" in request.form else None - original_file_path = request.form.get("original_file_path") + original_file_path = request.form.get("original_file_path") # Already sanitized path storage = StorageCreator.get_storage() index_base_path = f"indexes/{id}" diff --git a/application/api/user/routes.py b/application/api/user/routes.py index d98e4092..dbcc751e 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -28,7 +28,7 @@ from application.core.settings import settings from application.extensions import api from application.storage.storage_creator import StorageCreator from application.tts.google_tts import GoogleTTS -from application.utils import check_required_fields, validate_function_name +from application.utils import check_required_fields, safe_filename, validate_function_name from application.vectorstore.vector_creator import VectorCreator storage = StorageCreator.get_storage() @@ -497,20 +497,17 @@ class UploadFile(Resource): ), 400, ) - user = secure_filename(decoded_token.get("sub")) - job_name = secure_filename(request.form["name"]) + user = safe_filename(decoded_token.get("sub")) + job_name = safe_filename(request.form["name"]) try: - from application.storage.storage_creator import StorageCreator - storage = StorageCreator.get_storage() - base_path = f"{settings.UPLOAD_FOLDER}/{user}/{job_name}" if len(files) > 1: temp_files = [] for file in files: - filename = secure_filename(file.filename) + filename = safe_filename(file.filename) temp_path = f"{base_path}/temp/{filename}" storage.save_file(file, temp_path) temp_files.append(temp_path) @@ -604,7 +601,7 @@ class UploadFile(Resource): # For single file file = files[0] - filename = secure_filename(file.filename) + filename = safe_filename(file.filename) file_path = f"{base_path}/{filename}" storage.save_file(file, file_path) @@ -3457,7 +3454,8 @@ class StoreAttachment(Resource): jsonify({"status": "error", "message": "Missing file"}), 400, ) - user = secure_filename(decoded_token.get("sub")) + # Apply safe_filename to user ID + user = safe_filename(decoded_token.get("sub")) try: attachment_id = ObjectId()