(feat:attachment) upload single file

This commit is contained in:
ManishMadan2882
2025-03-27 03:28:03 +05:30
parent 502dc9ec52
commit 55050a9f58
2 changed files with 86 additions and 95 deletions

View File

@@ -2488,50 +2488,49 @@ class StoreAttachment(Resource):
},
)
)
@api.doc(description="Stores an attachment without vectorization or training")
@api.doc(description="Stores a single attachment without vectorization or training")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
files = request.files.getlist("file")
# Get single file instead of list
file = request.files.get("file")
if not files or all(file.filename == "" for file in files):
if not file or file.filename == "":
return make_response(
jsonify({"status": "error", "message": "Missing files"}),
jsonify({"status": "error", "message": "Missing file"}),
400,
)
user = secure_filename(decoded_token.get("sub"))
saved_files = []
try:
for file in files:
original_filename = secure_filename(file.filename)
folder_name = original_filename
# Create directory structure: user/attachments/filename/
base_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, "attachments", folder_name)
os.makedirs(base_dir, exist_ok=True)
file_path = os.path.join(base_dir, original_filename)
# Handle filename conflicts
if os.path.exists(file_path):
name_parts = os.path.splitext(original_filename)
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
new_filename = f"{name_parts[0]}_{timestamp}{name_parts[1]}"
file_path = os.path.join(base_dir, new_filename)
original_filename = new_filename
file.save(file_path)
saved_files.append({"folder": folder_name, "filename": original_filename})
current_app.logger.info(f"Saved file: {file_path}")
original_filename = secure_filename(file.filename)
folder_name = original_filename
# Start async task to process files
# Create directory structure: user/attachments/filename/
base_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, "attachments", folder_name)
os.makedirs(base_dir, exist_ok=True)
file_path = os.path.join(base_dir, original_filename)
# Handle filename conflicts
if os.path.exists(file_path):
name_parts = os.path.splitext(original_filename)
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
new_filename = f"{name_parts[0]}_{timestamp}{name_parts[1]}"
file_path = os.path.join(base_dir, new_filename)
original_filename = new_filename
file.save(file_path)
file_info = {"folder": folder_name, "filename": original_filename}
current_app.logger.info(f"Saved file: {file_path}")
# Start async task to process single file
task = store_attachment.delay(
os.path.abspath(os.path.join(current_dir, settings.UPLOAD_FOLDER)),
saved_files,
file_info,
user
)
@@ -2539,11 +2538,11 @@ class StoreAttachment(Resource):
jsonify({
"success": True,
"task_id": task.id,
"message": "Files uploaded successfully. Processing started."
"message": "File uploaded successfully. Processing started."
}),
200
)
except Exception as err:
current_app.logger.error(f"Error storing attachment: {err}")
return make_response(jsonify({"success": False, "error": str(err)}), 400)
return make_response(jsonify({"success": False, "error": str(err)}), 400)

View File

@@ -313,18 +313,18 @@ def sync_worker(self, frequency):
for key in ["total_sync_count", "sync_success", "sync_failure"]
}
def attachment_worker(self, directory, saved_files, user):
def attachment_worker(self, directory, file_info, user):
"""
Process and store attachments without vectorization.
Process and store a single attachment without vectorization.
Args:
self: Reference to the instance of the task.
directory (str): Base directory for storing files.
saved_files (list): List of dictionaries with folder and filename info.
file_info (dict): Dictionary with folder and filename info.
user (str): User identifier.
Returns:
dict: Information about processed attachments.
dict: Information about processed attachment.
"""
import datetime
import os
@@ -334,71 +334,63 @@ def attachment_worker(self, directory, saved_files, user):
db = mongo["docsgpt"]
attachments_collection = db["attachments"]
file_entries = []
total_tokens = 0
total_files = len(saved_files)
job_name = file_info["folder"]
logging.info(f"Processing attachment: {job_name}", extra={"user": user, "job": job_name})
job_name = saved_files[0]["folder"] if saved_files else "attachment_job"
logging.info(f"Processing attachments: {job_name}", extra={"user": user, "job": job_name})
self.update_state(state="PROGRESS", meta={"current": 10})
for idx, file_info in enumerate(saved_files):
progress = int(((idx + 1) / total_files) * 100)
self.update_state(state="PROGRESS", meta={"current": progress})
folder_name = file_info["folder"]
filename = file_info["filename"]
base_dir = os.path.join(directory, user, "attachments", folder_name)
file_path = os.path.join(base_dir, filename)
logging.info(f"Processing file: {file_path}", extra={"user": user, "job": job_name})
if not os.path.exists(file_path):
logging.warning(f"File not found: {file_path}", extra={"user": user, "job": job_name})
return {"error": "File not found"}
try:
reader = SimpleDirectoryReader(
input_files=[file_path]
)
folder_name = file_info["folder"]
filename = file_info["filename"]
documents = reader.load_data()
base_dir = os.path.join(directory, user, "attachments", folder_name)
file_path = os.path.join(base_dir, filename)
self.update_state(state="PROGRESS", meta={"current": 50})
logging.info(f"Processing file: {file_path}", extra={"user": user, "job": job_name})
if not os.path.exists(file_path):
logging.warning(f"File not found: {file_path}", extra={"user": user, "job": job_name})
continue
try:
reader = SimpleDirectoryReader(
input_files=[file_path]
)
if documents:
content = documents[0].text
token_count = num_tokens_from_string(content)
documents = reader.load_data()
file_path_relative = f"{user}/attachments/{folder_name}/{filename}"
if documents:
content = documents[0].text
token_count = num_tokens_from_string(content)
total_tokens += token_count
file_entries.append({
"path": f"{user}/attachments/{folder_name}/{filename}",
"content": content,
"token_count": token_count
})
logging.info(f"Successfully processed {filename} with {token_count} tokens",
extra={"user": user, "job": job_name})
except Exception as e:
logging.error(f"Error processing file {filename}: {e}",
extra={"user": user, "job": job_name}, exc_info=True)
if file_entries:
attachment_id = attachments_collection.insert_one({
"user": user,
"files": file_entries,
"total_tokens": total_tokens,
"date": datetime.datetime.now(),
}).inserted_id
logging.info(f"Stored attachment with ID: {attachment_id}",
extra={"user": user, "job": job_name})
return {
"attachment_id": str(attachment_id),
"files": [{"filename": fe["filename"], "folder": fe["folder"], "path": fe["path"]} for fe in file_entries],
"total_tokens": total_tokens,
"file_contents": [{"filename": fe["filename"], "token_count": fe["token_count"]} for fe in file_entries]
}
else:
logging.warning("No files were successfully processed",
extra={"user": user, "job": job_name})
return {"error": "No files were successfully processed"}
attachment_id = attachments_collection.insert_one({
"user": user,
"path": file_path_relative,
"content": content,
"token_count": token_count,
"date": datetime.datetime.now(),
}).inserted_id
logging.info(f"Stored attachment with ID: {attachment_id}",
extra={"user": user, "job": job_name})
self.update_state(state="PROGRESS", meta={"current": 100})
return {
"attachment_id": str(attachment_id),
"filename": filename,
"folder": folder_name,
"path": file_path_relative,
"token_count": token_count
}
else:
logging.warning("No content was extracted from the file",
extra={"user": user, "job": job_name})
return {"error": "No content was extracted from the file"}
except Exception as e:
logging.error(f"Error processing file {filename}: {e}",
extra={"user": user, "job": job_name}, exc_info=True)
return {"error": f"Error processing file: {str(e)}"}