mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
Merge pull request #917 from arc53/multiple-uploads
Multiple file upload
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
|
import shutil
|
||||||
from flask import Blueprint, request, jsonify
|
from flask import Blueprint, request, jsonify
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
import requests
|
import requests
|
||||||
@@ -136,30 +137,43 @@ def upload_file():
|
|||||||
return {"status": "no name"}
|
return {"status": "no name"}
|
||||||
job_name = secure_filename(request.form["name"])
|
job_name = secure_filename(request.form["name"])
|
||||||
# check if the post request has the file part
|
# check if the post request has the file part
|
||||||
if "file" not in request.files:
|
files = request.files.getlist("file")
|
||||||
print("No file part")
|
|
||||||
return {"status": "no file"}
|
if not files or all(file.filename == '' for file in files):
|
||||||
file = request.files["file"]
|
|
||||||
if file.filename == "":
|
|
||||||
return {"status": "no file name"}
|
return {"status": "no file name"}
|
||||||
|
|
||||||
if file:
|
# Directory where files will be saved
|
||||||
filename = secure_filename(file.filename)
|
save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
|
||||||
# save dir
|
os.makedirs(save_dir, exist_ok=True)
|
||||||
save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
|
|
||||||
# create dir if not exists
|
if len(files) > 1:
|
||||||
if not os.path.exists(save_dir):
|
# Multiple files; prepare them for zip
|
||||||
os.makedirs(save_dir)
|
temp_dir = os.path.join(save_dir, "temp")
|
||||||
|
os.makedirs(temp_dir, exist_ok=True)
|
||||||
file.save(os.path.join(save_dir, filename))
|
|
||||||
task = ingest.delay(settings.UPLOAD_FOLDER, [".rst", ".md", ".pdf", ".txt", ".docx",
|
for file in files:
|
||||||
".csv", ".epub", ".html", ".mdx"],
|
filename = secure_filename(file.filename)
|
||||||
job_name, filename, user)
|
file.save(os.path.join(temp_dir, filename))
|
||||||
# task id
|
|
||||||
task_id = task.id
|
# Use shutil.make_archive to zip the temp directory
|
||||||
return {"status": "ok", "task_id": task_id}
|
zip_path = shutil.make_archive(base_name=os.path.join(save_dir, job_name), format='zip', root_dir=temp_dir)
|
||||||
|
final_filename = os.path.basename(zip_path)
|
||||||
|
|
||||||
|
# Clean up the temporary directory after zipping
|
||||||
|
shutil.rmtree(temp_dir)
|
||||||
else:
|
else:
|
||||||
return {"status": "error"}
|
# Single file
|
||||||
|
file = files[0]
|
||||||
|
final_filename = secure_filename(file.filename)
|
||||||
|
file_path = os.path.join(save_dir, final_filename)
|
||||||
|
file.save(file_path)
|
||||||
|
|
||||||
|
# Call ingest with the single file or zipped file
|
||||||
|
task = ingest.delay(settings.UPLOAD_FOLDER, [".rst", ".md", ".pdf", ".txt", ".docx",
|
||||||
|
".csv", ".epub", ".html", ".mdx"],
|
||||||
|
job_name, final_filename, user)
|
||||||
|
|
||||||
|
return {"status": "ok", "task_id": task.id}
|
||||||
|
|
||||||
@user.route("/api/remote", methods=["POST"])
|
@user.route("/api/remote", methods=["POST"])
|
||||||
def upload_remote():
|
def upload_remote():
|
||||||
|
|||||||
@@ -36,6 +36,32 @@ current_dir = os.path.dirname(
|
|||||||
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def extract_zip_recursive(zip_path, extract_to, current_depth=0, max_depth=5):
|
||||||
|
"""
|
||||||
|
Recursively extract zip files with a limit on recursion depth.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
zip_path (str): Path to the zip file to be extracted.
|
||||||
|
extract_to (str): Destination path for extracted files.
|
||||||
|
current_depth (int): Current depth of recursion.
|
||||||
|
max_depth (int): Maximum allowed depth of recursion to prevent infinite loops.
|
||||||
|
"""
|
||||||
|
if current_depth > max_depth:
|
||||||
|
print(f"Reached maximum recursion depth of {max_depth}")
|
||||||
|
return
|
||||||
|
|
||||||
|
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
||||||
|
zip_ref.extractall(extract_to)
|
||||||
|
os.remove(zip_path) # Remove the zip file after extracting
|
||||||
|
|
||||||
|
# Check for nested zip files and extract them
|
||||||
|
for root, dirs, files in os.walk(extract_to):
|
||||||
|
for file in files:
|
||||||
|
if file.endswith(".zip"):
|
||||||
|
# If a nested zip file is found, extract it recursively
|
||||||
|
file_path = os.path.join(root, file)
|
||||||
|
extract_zip_recursive(file_path, root, current_depth + 1, max_depth)
|
||||||
|
|
||||||
|
|
||||||
# Define the main function for ingesting and processing documents.
|
# Define the main function for ingesting and processing documents.
|
||||||
def ingest_worker(self, directory, formats, name_job, filename, user):
|
def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||||
@@ -66,9 +92,11 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
|||||||
token_check = True
|
token_check = True
|
||||||
min_tokens = 150
|
min_tokens = 150
|
||||||
max_tokens = 1250
|
max_tokens = 1250
|
||||||
full_path = directory + "/" + user + "/" + name_job
|
recursion_depth = 2
|
||||||
|
full_path = os.path.join(directory, user, name_job)
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
print(full_path, file=sys.stderr)
|
print(full_path, file=sys.stderr)
|
||||||
# check if API_URL env variable is set
|
# check if API_URL env variable is set
|
||||||
file_data = {"name": name_job, "file": filename, "user": user}
|
file_data = {"name": name_job, "file": filename, "user": user}
|
||||||
@@ -81,14 +109,12 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
|||||||
|
|
||||||
if not os.path.exists(full_path):
|
if not os.path.exists(full_path):
|
||||||
os.makedirs(full_path)
|
os.makedirs(full_path)
|
||||||
with open(full_path + "/" + filename, "wb") as f:
|
with open(os.path.join(full_path, filename), "wb") as f:
|
||||||
f.write(file)
|
f.write(file)
|
||||||
|
|
||||||
# check if file is .zip and extract it
|
# check if file is .zip and extract it
|
||||||
if filename.endswith(".zip"):
|
if filename.endswith(".zip"):
|
||||||
with zipfile.ZipFile(full_path + "/" + filename, "r") as zip_ref:
|
extract_zip_recursive(os.path.join(full_path, filename), full_path, 0, recursion_depth)
|
||||||
zip_ref.extractall(full_path)
|
|
||||||
os.remove(full_path + "/" + filename)
|
|
||||||
|
|
||||||
self.update_state(state="PROGRESS", meta={"current": 1})
|
self.update_state(state="PROGRESS", meta={"current": 1})
|
||||||
|
|
||||||
|
|||||||
@@ -201,7 +201,7 @@ export default function Upload({
|
|||||||
|
|
||||||
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
||||||
onDrop,
|
onDrop,
|
||||||
multiple: false,
|
multiple: true,
|
||||||
onDragEnter: doNothing,
|
onDragEnter: doNothing,
|
||||||
onDragOver: doNothing,
|
onDragOver: doNothing,
|
||||||
onDragLeave: doNothing,
|
onDragLeave: doNothing,
|
||||||
|
|||||||
Reference in New Issue
Block a user