From 8b3b16bce4d834c2f26aad0f506eff0341459ed6 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 13 Oct 2023 08:46:35 +0100 Subject: [PATCH] inputs --- application/worker.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/application/worker.py b/application/worker.py index fe4e2615..444772d5 100644 --- a/application/worker.py +++ b/application/worker.py @@ -106,7 +106,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user): 'limited': False } -def remote_worker(self, urls, name_job, user, directory = 'temp', loader = 'url'): +def remote_worker(self, inputs, name_job, user, directory = 'temp', loader = 'url'): sample = False token_check = True min_tokens = 150 @@ -117,9 +117,11 @@ def remote_worker(self, urls, name_job, user, directory = 'temp', loader = 'url' os.makedirs(full_path) self.update_state(state='PROGRESS', meta={'current': 1}) - + + # inputs {"data": [url]} for url type task just urls + # Use RemoteCreator to load data from URL - remote_loader = RemoteCreator.create_loader(loader, urls) + remote_loader = RemoteCreator.create_loader(loader, inputs['data']) raw_docs = remote_loader.load_data() raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check) @@ -146,7 +148,7 @@ def remote_worker(self, urls, name_job, user, directory = 'temp', loader = 'url' shutil.rmtree(full_path) return { - 'urls': urls, + 'urls': inputs['data'], 'name_job': name_job, 'user': user, 'limited': False