This commit is contained in:
Alex
2023-10-13 08:46:35 +01:00
parent 024674eef3
commit 8b3b16bce4

View File

@@ -106,7 +106,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
'limited': False 'limited': False
} }
def remote_worker(self, urls, name_job, user, directory = 'temp', loader = 'url'): def remote_worker(self, inputs, name_job, user, directory = 'temp', loader = 'url'):
sample = False sample = False
token_check = True token_check = True
min_tokens = 150 min_tokens = 150
@@ -118,8 +118,10 @@ def remote_worker(self, urls, name_job, user, directory = 'temp', loader = 'url'
self.update_state(state='PROGRESS', meta={'current': 1}) self.update_state(state='PROGRESS', meta={'current': 1})
# inputs {"data": [url]} for url type task just urls
# Use RemoteCreator to load data from URL # Use RemoteCreator to load data from URL
remote_loader = RemoteCreator.create_loader(loader, urls) remote_loader = RemoteCreator.create_loader(loader, inputs['data'])
raw_docs = remote_loader.load_data() raw_docs = remote_loader.load_data()
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check) raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
@@ -146,7 +148,7 @@ def remote_worker(self, urls, name_job, user, directory = 'temp', loader = 'url'
shutil.rmtree(full_path) shutil.rmtree(full_path)
return { return {
'urls': urls, 'urls': inputs['data'],
'name_job': name_job, 'name_job': name_job,
'user': user, 'user': user,
'limited': False 'limited': False