(fix:ingestion) display names are separate

This commit is contained in:
ManishMadan2882
2025-06-12 00:57:46 +05:30
parent 44b6ec25a2
commit b8a10e0962
4 changed files with 30 additions and 20 deletions

View File

@@ -194,7 +194,7 @@ def run_agent_logic(agent_config, input_data):
# Define the main function for ingesting and processing documents.
def ingest_worker(
self, directory, formats, name_job, filename, user, retriever="classic"
self, directory, formats, job_name, filename, user, dir_name=None, user_dir=None, retriever="classic"
):
"""
Ingest and process documents.
@@ -203,9 +203,11 @@ def ingest_worker(
self: Reference to the instance of the task.
directory (str): Specifies the directory for ingesting ('inputs' or 'temp').
formats (list of str): List of file extensions to consider for ingestion (e.g., [".rst", ".md"]).
name_job (str): Name of the job for this ingestion task.
job_name (str): Name of the job for this ingestion task (original, unsanitized).
filename (str): Name of the file to be ingested.
user (str): Identifier for the user initiating the ingestion.
user (str): Identifier for the user initiating the ingestion (original, unsanitized).
dir_name (str, optional): Sanitized directory name for filesystem operations.
user_dir (str, optional): Sanitized user ID for filesystem operations.
retriever (str): Type of retriever to use for processing the documents.
Returns:
@@ -216,13 +218,13 @@ def ingest_worker(
limit = None
exclude = True
sample = False
storage = StorageCreator.get_storage()
full_path = os.path.join(directory, user, name_job)
full_path = os.path.join(directory, user_dir, dir_name)
source_file_path = os.path.join(full_path, filename)
logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": name_job})
logging.info(f"Ingest file: {full_path}", extra={"user": user, "job": job_name})
# Create temporary working directory
with tempfile.TemporaryDirectory() as temp_dir:
@@ -283,9 +285,9 @@ def ingest_worker(
for i in range(min(5, len(raw_docs))):
logging.info(f"Sample document {i}: {raw_docs[i]}")
file_data = {
"name": name_job,
"name": job_name, # Use original job_name
"file": filename,
"user": user,
"user": user, # Use original user
"tokens": tokens,
"retriever": retriever,
"id": str(id),
@@ -302,9 +304,9 @@ def ingest_worker(
return {
"directory": directory,
"formats": formats,
"name_job": name_job,
"name_job": job_name, # Use original job_name
"filename": filename,
"user": user,
"user": user, # Use original user
"limited": False,
}