mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
(fix:ingestion) display names are separate
This commit is contained in:
@@ -38,10 +38,10 @@ def upload_index_files():
|
||||
"""Upload two files(index.faiss, index.pkl) to the user's folder."""
|
||||
if "user" not in request.form:
|
||||
return {"status": "no user"}
|
||||
user = safe_filename(request.form["user"])
|
||||
user = request.form["user"]
|
||||
if "name" not in request.form:
|
||||
return {"status": "no name"}
|
||||
job_name = safe_filename(request.form["name"])
|
||||
job_name = request.form["name"]
|
||||
tokens = request.form["tokens"]
|
||||
retriever = request.form["retriever"]
|
||||
id = request.form["id"]
|
||||
|
||||
@@ -497,12 +497,16 @@ class UploadFile(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
user = safe_filename(decoded_token.get("sub"))
|
||||
job_name = safe_filename(request.form["name"])
|
||||
user = decoded_token.get("sub")
|
||||
job_name = request.form["name"]
|
||||
|
||||
# Create safe versions for filesystem operations
|
||||
safe_user = safe_filename(user)
|
||||
dir_name = safe_filename(job_name)
|
||||
|
||||
try:
|
||||
storage = StorageCreator.get_storage()
|
||||
base_path = f"{settings.UPLOAD_FOLDER}/{user}/{job_name}"
|
||||
base_path = f"{settings.UPLOAD_FOLDER}/{safe_user}/{dir_name}"
|
||||
|
||||
if len(files) > 1:
|
||||
temp_files = []
|
||||
@@ -512,11 +516,11 @@ class UploadFile(Resource):
|
||||
storage.save_file(file, temp_path)
|
||||
temp_files.append(temp_path)
|
||||
print(f"Saved file: {filename}")
|
||||
zip_filename = f"{job_name}.zip"
|
||||
zip_filename = f"{dir_name}.zip"
|
||||
zip_path = f"{base_path}/{zip_filename}"
|
||||
zip_temp_path = None
|
||||
|
||||
def create_zip_archive(temp_paths, job_name, storage):
|
||||
def create_zip_archive(temp_paths, dir_name, storage):
|
||||
import tempfile
|
||||
|
||||
with tempfile.NamedTemporaryFile(
|
||||
@@ -556,7 +560,7 @@ class UploadFile(Resource):
|
||||
return zip_output_path
|
||||
|
||||
try:
|
||||
zip_temp_path = create_zip_archive(temp_files, job_name, storage)
|
||||
zip_temp_path = create_zip_archive(temp_files, dir_name, storage)
|
||||
with open(zip_temp_path, "rb") as zip_file:
|
||||
storage.save_file(zip_file, zip_path)
|
||||
task = ingest.delay(
|
||||
@@ -581,6 +585,8 @@ class UploadFile(Resource):
|
||||
job_name,
|
||||
zip_filename,
|
||||
user,
|
||||
dir_name,
|
||||
safe_user,
|
||||
)
|
||||
finally:
|
||||
# Clean up temporary files
|
||||
@@ -628,6 +634,8 @@ class UploadFile(Resource):
|
||||
job_name,
|
||||
filename, # Corrected variable for single-file case
|
||||
user,
|
||||
dir_name,
|
||||
safe_user,
|
||||
)
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error uploading file: {err}", exc_info=True)
|
||||
|
||||
@@ -11,8 +11,8 @@ from application.worker import (
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest(self, directory, formats, name_job, filename, user):
|
||||
resp = ingest_worker(self, directory, formats, name_job, filename, user)
|
||||
def ingest(self, directory, formats, job_name, filename, user, dir_name, user_dir):
|
||||
resp = ingest_worker(self, directory, formats, job_name, filename, user, dir_name, user_dir)
|
||||
return resp
|
||||
|
||||
|
||||
|
||||
@@ -194,7 +194,7 @@ def run_agent_logic(agent_config, input_data):
|
||||
|
||||
# Define the main function for ingesting and processing documents.
|
||||
def ingest_worker(
|
||||
self, directory, formats, name_job, filename, user, retriever="classic"
|
||||
self, directory, formats, job_name, filename, user, dir_name=None, user_dir=None, retriever="classic"
|
||||
):
|
||||
"""
|
||||
Ingest and process documents.
|
||||
@@ -203,9 +203,11 @@ def ingest_worker(
|
||||
self: Reference to the instance of the task.
|
||||
directory (str): Specifies the directory for ingesting ('inputs' or 'temp').
|
||||
formats (list of str): List of file extensions to consider for ingestion (e.g., [".rst", ".md"]).
|
||||
name_job (str): Name of the job for this ingestion task.
|
||||
job_name (str): Name of the job for this ingestion task (original, unsanitized).
|
||||
filename (str): Name of the file to be ingested.
|
||||
user (str): Identifier for the user initiating the ingestion.
|
||||
user (str): Identifier for the user initiating the ingestion (original, unsanitized).
|
||||
dir_name (str, optional): Sanitized directory name for filesystem operations.
|
||||
user_dir (str, optional): Sanitized user ID for filesystem operations.
|
||||
retriever (str): Type of retriever to use for processing the documents.
|
||||
|
||||
Returns:
|
||||
@@ -216,13 +218,13 @@ def ingest_worker(
|
||||
limit = None
|
||||
exclude = True
|
||||
sample = False
|
||||
|
||||
|
||||
storage = StorageCreator.get_storage()
|
||||
|
||||
full_path = os.path.join(directory, user, name_job)
|
||||
full_path = os.path.join(directory, user_dir, dir_name)
|
||||
source_file_path = os.path.join(full_path, filename)
|
||||
|
||||
logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job})
|
||||
logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": job_name})
|
||||
|
||||
# Create temporary working directory
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
@@ -283,9 +285,9 @@ def ingest_worker(
|
||||
for i in range(min(5, len(raw_docs))):
|
||||
logging.info(f"Sample document {i}: {raw_docs[i]}")
|
||||
file_data = {
|
||||
"name": name_job,
|
||||
"name": job_name, # Use original job_name
|
||||
"file": filename,
|
||||
"user": user,
|
||||
"user": user, # Use original user
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"id": str(id),
|
||||
@@ -302,9 +304,9 @@ def ingest_worker(
|
||||
return {
|
||||
"directory": directory,
|
||||
"formats": formats,
|
||||
"name_job": name_job,
|
||||
"name_job": job_name, # Use original job_name
|
||||
"filename": filename,
|
||||
"user": user,
|
||||
"user": user, # Use original user
|
||||
"limited": False,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user