mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
inputs
This commit is contained in:
@@ -106,7 +106,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
|||||||
'limited': False
|
'limited': False
|
||||||
}
|
}
|
||||||
|
|
||||||
def remote_worker(self, urls, name_job, user, directory = 'temp', loader = 'url'):
|
def remote_worker(self, inputs, name_job, user, directory = 'temp', loader = 'url'):
|
||||||
sample = False
|
sample = False
|
||||||
token_check = True
|
token_check = True
|
||||||
min_tokens = 150
|
min_tokens = 150
|
||||||
@@ -118,8 +118,10 @@ def remote_worker(self, urls, name_job, user, directory = 'temp', loader = 'url'
|
|||||||
|
|
||||||
self.update_state(state='PROGRESS', meta={'current': 1})
|
self.update_state(state='PROGRESS', meta={'current': 1})
|
||||||
|
|
||||||
|
# inputs {"data": [url]} for url type task just urls
|
||||||
|
|
||||||
# Use RemoteCreator to load data from URL
|
# Use RemoteCreator to load data from URL
|
||||||
remote_loader = RemoteCreator.create_loader(loader, urls)
|
remote_loader = RemoteCreator.create_loader(loader, inputs['data'])
|
||||||
raw_docs = remote_loader.load_data()
|
raw_docs = remote_loader.load_data()
|
||||||
|
|
||||||
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
|
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
|
||||||
@@ -146,7 +148,7 @@ def remote_worker(self, urls, name_job, user, directory = 'temp', loader = 'url'
|
|||||||
shutil.rmtree(full_path)
|
shutil.rmtree(full_path)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'urls': urls,
|
'urls': inputs['data'],
|
||||||
'name_job': name_job,
|
'name_job': name_job,
|
||||||
'user': user,
|
'user': user,
|
||||||
'limited': False
|
'limited': False
|
||||||
|
|||||||
Reference in New Issue
Block a user