diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 083f5f22..1e0d13fe 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -2488,50 +2488,49 @@ class StoreAttachment(Resource): }, ) ) - @api.doc(description="Stores an attachment without vectorization or training") + @api.doc(description="Stores a single attachment without vectorization or training") def post(self): decoded_token = request.decoded_token if not decoded_token: return make_response(jsonify({"success": False}), 401) - files = request.files.getlist("file") + # Get single file instead of list + file = request.files.get("file") - if not files or all(file.filename == "" for file in files): + if not file or file.filename == "": return make_response( - jsonify({"status": "error", "message": "Missing files"}), + jsonify({"status": "error", "message": "Missing file"}), 400, ) user = secure_filename(decoded_token.get("sub")) - saved_files = [] try: - for file in files: - original_filename = secure_filename(file.filename) - folder_name = original_filename - - # Create directory structure: user/attachments/filename/ - base_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, "attachments", folder_name) - os.makedirs(base_dir, exist_ok=True) - - file_path = os.path.join(base_dir, original_filename) - - # Handle filename conflicts - if os.path.exists(file_path): - name_parts = os.path.splitext(original_filename) - timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") - new_filename = f"{name_parts[0]}_{timestamp}{name_parts[1]}" - file_path = os.path.join(base_dir, new_filename) - original_filename = new_filename - - file.save(file_path) - saved_files.append({"folder": folder_name, "filename": original_filename}) - current_app.logger.info(f"Saved file: {file_path}") + original_filename = secure_filename(file.filename) + folder_name = original_filename - # Start async task to process files + # Create directory structure: user/attachments/filename/ + base_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, "attachments", folder_name) + os.makedirs(base_dir, exist_ok=True) + + file_path = os.path.join(base_dir, original_filename) + + # Handle filename conflicts + if os.path.exists(file_path): + name_parts = os.path.splitext(original_filename) + timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + new_filename = f"{name_parts[0]}_{timestamp}{name_parts[1]}" + file_path = os.path.join(base_dir, new_filename) + original_filename = new_filename + + file.save(file_path) + file_info = {"folder": folder_name, "filename": original_filename} + current_app.logger.info(f"Saved file: {file_path}") + + # Start async task to process single file task = store_attachment.delay( os.path.abspath(os.path.join(current_dir, settings.UPLOAD_FOLDER)), - saved_files, + file_info, user ) @@ -2539,11 +2538,11 @@ class StoreAttachment(Resource): jsonify({ "success": True, "task_id": task.id, - "message": "Files uploaded successfully. Processing started." + "message": "File uploaded successfully. Processing started." }), 200 ) except Exception as err: current_app.logger.error(f"Error storing attachment: {err}") - return make_response(jsonify({"success": False, "error": str(err)}), 400) \ No newline at end of file + return make_response(jsonify({"success": False, "error": str(err)}), 400) diff --git a/application/worker.py b/application/worker.py index 260a1db5..4ee61421 100755 --- a/application/worker.py +++ b/application/worker.py @@ -313,18 +313,18 @@ def sync_worker(self, frequency): for key in ["total_sync_count", "sync_success", "sync_failure"] } -def attachment_worker(self, directory, saved_files, user): +def attachment_worker(self, directory, file_info, user): """ - Process and store attachments without vectorization. + Process and store a single attachment without vectorization. Args: self: Reference to the instance of the task. directory (str): Base directory for storing files. - saved_files (list): List of dictionaries with folder and filename info. + file_info (dict): Dictionary with folder and filename info. user (str): User identifier. Returns: - dict: Information about processed attachments. + dict: Information about processed attachment. """ import datetime import os @@ -334,71 +334,63 @@ def attachment_worker(self, directory, saved_files, user): db = mongo["docsgpt"] attachments_collection = db["attachments"] - file_entries = [] - total_tokens = 0 - total_files = len(saved_files) + job_name = file_info["folder"] + logging.info(f"Processing attachment: {job_name}", extra={"user": user, "job": job_name}) - job_name = saved_files[0]["folder"] if saved_files else "attachment_job" - logging.info(f"Processing attachments: {job_name}", extra={"user": user, "job": job_name}) + self.update_state(state="PROGRESS", meta={"current": 10}) - for idx, file_info in enumerate(saved_files): - - progress = int(((idx + 1) / total_files) * 100) - self.update_state(state="PROGRESS", meta={"current": progress}) + folder_name = file_info["folder"] + filename = file_info["filename"] + + base_dir = os.path.join(directory, user, "attachments", folder_name) + file_path = os.path.join(base_dir, filename) + + logging.info(f"Processing file: {file_path}", extra={"user": user, "job": job_name}) + + if not os.path.exists(file_path): + logging.warning(f"File not found: {file_path}", extra={"user": user, "job": job_name}) + return {"error": "File not found"} + + try: + reader = SimpleDirectoryReader( + input_files=[file_path] + ) - folder_name = file_info["folder"] - filename = file_info["filename"] + documents = reader.load_data() - base_dir = os.path.join(directory, user, "attachments", folder_name) - file_path = os.path.join(base_dir, filename) + self.update_state(state="PROGRESS", meta={"current": 50}) - logging.info(f"Processing file: {file_path}", extra={"user": user, "job": job_name}) - - if not os.path.exists(file_path): - logging.warning(f"File not found: {file_path}", extra={"user": user, "job": job_name}) - continue - - try: - reader = SimpleDirectoryReader( - input_files=[file_path] - ) + if documents: + content = documents[0].text + token_count = num_tokens_from_string(content) - documents = reader.load_data() + file_path_relative = f"{user}/attachments/{folder_name}/{filename}" - if documents: - content = documents[0].text - token_count = num_tokens_from_string(content) - total_tokens += token_count - - file_entries.append({ - "path": f"{user}/attachments/{folder_name}/{filename}", - "content": content, - "token_count": token_count - }) - logging.info(f"Successfully processed {filename} with {token_count} tokens", - extra={"user": user, "job": job_name}) - except Exception as e: - logging.error(f"Error processing file {filename}: {e}", - extra={"user": user, "job": job_name}, exc_info=True) - - if file_entries: - attachment_id = attachments_collection.insert_one({ - "user": user, - "files": file_entries, - "total_tokens": total_tokens, - "date": datetime.datetime.now(), - }).inserted_id - - logging.info(f"Stored attachment with ID: {attachment_id}", - extra={"user": user, "job": job_name}) - - return { - "attachment_id": str(attachment_id), - "files": [{"filename": fe["filename"], "folder": fe["folder"], "path": fe["path"]} for fe in file_entries], - "total_tokens": total_tokens, - "file_contents": [{"filename": fe["filename"], "token_count": fe["token_count"]} for fe in file_entries] - } - else: - logging.warning("No files were successfully processed", - extra={"user": user, "job": job_name}) - return {"error": "No files were successfully processed"} + attachment_id = attachments_collection.insert_one({ + "user": user, + "path": file_path_relative, + "content": content, + "token_count": token_count, + "date": datetime.datetime.now(), + }).inserted_id + + logging.info(f"Stored attachment with ID: {attachment_id}", + extra={"user": user, "job": job_name}) + + self.update_state(state="PROGRESS", meta={"current": 100}) + + return { + "attachment_id": str(attachment_id), + "filename": filename, + "folder": folder_name, + "path": file_path_relative, + "token_count": token_count + } + else: + logging.warning("No content was extracted from the file", + extra={"user": user, "job": job_name}) + return {"error": "No content was extracted from the file"} + except Exception as e: + logging.error(f"Error processing file {filename}: {e}", + extra={"user": user, "job": job_name}, exc_info=True) + return {"error": f"Error processing file: {str(e)}"}