upload_remote class

This commit is contained in:
Pavel
2024-02-13 23:41:36 +03:00
parent c144f30606
commit 030c2a740f
3 changed files with 37 additions and 6 deletions

View File

@@ -123,7 +123,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
'limited': False
}
def remote_worker(self, inputs, name_job, user, directory = 'temp', loader = 'url'):
def remote_worker(self, source_data, name_job, user, directory = 'temp', loader = 'url'):
sample = False
token_check = True
min_tokens = 150
@@ -135,10 +135,10 @@ def remote_worker(self, inputs, name_job, user, directory = 'temp', loader = 'ur
self.update_state(state='PROGRESS', meta={'current': 1})
# inputs {"data": [url]} for url type task just urls
# source_data {"data": [url]} for url type task just urls
# Use RemoteCreator to load data from URL
remote_loader = RemoteCreator.create_loader(loader, inputs)
remote_loader = RemoteCreator.create_loader(loader, source_data)
raw_docs = remote_loader.load_data()
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
@@ -165,7 +165,7 @@ def remote_worker(self, inputs, name_job, user, directory = 'temp', loader = 'ur
shutil.rmtree(full_path)
return {
'urls': inputs['data'],
'urls': source_data['data'],
'name_job': name_job,
'user': user,
'limited': False