mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-12-01 09:33:14 +00:00
upload_remote class
This commit is contained in:
@@ -5,7 +5,7 @@ from pymongo import MongoClient
|
||||
from bson.objectid import ObjectId
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from application.api.user.tasks import ingest
|
||||
from application.api.user.tasks import ingest, ingest_remote
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
@@ -157,6 +157,32 @@ def upload_file():
|
||||
return {"status": "ok", "task_id": task_id}
|
||||
else:
|
||||
return {"status": "error"}
|
||||
|
||||
@user.route("/api/remote", methods=["POST"])
|
||||
def upload_remote():
|
||||
"""Upload a remote source to get vectorized and indexed."""
|
||||
if "user" not in request.form:
|
||||
return {"status": "no user"}
|
||||
user = secure_filename(request.form["user"])
|
||||
if "source" not in request.form:
|
||||
return {"status": "no source"}
|
||||
source = secure_filename(request.form["source"])
|
||||
if "name" not in request.form:
|
||||
return {"status": "no name"}
|
||||
job_name = secure_filename(request.form["name"])
|
||||
# check if the post request has the file part
|
||||
if "data" not in request.form:
|
||||
print("No data")
|
||||
return {"status": "no data"}
|
||||
source_data = request.form["data"]
|
||||
|
||||
if source_data:
|
||||
task = ingest_remote.delay(source_data=source_data, job_name=job_name, user=user, loader=source)
|
||||
# task id
|
||||
task_id = task.id
|
||||
return {"status": "ok", "task_id": task_id}
|
||||
else:
|
||||
return {"status": "error"}
|
||||
|
||||
@user.route("/api/task_status", methods=["GET"])
|
||||
def task_status():
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
from application.worker import ingest_worker
|
||||
from application.worker import ingest_worker, remote_worker
|
||||
from application.celery import celery
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest(self, directory, formats, name_job, filename, user):
|
||||
resp = ingest_worker(self, directory, formats, name_job, filename, user)
|
||||
return resp
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest_remote(self, source_data, job_name, user, loader):
|
||||
resp = remote_worker(self, source_data, job_name, user, loader)
|
||||
return resp
|
||||
|
||||
@@ -123,7 +123,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
|
||||
'limited': False
|
||||
}
|
||||
|
||||
def remote_worker(self, inputs, name_job, user, directory = 'temp', loader = 'url'):
|
||||
def remote_worker(self, source_data, name_job, user, directory = 'temp', loader = 'url'):
|
||||
sample = False
|
||||
token_check = True
|
||||
min_tokens = 150
|
||||
@@ -135,10 +135,10 @@ def remote_worker(self, inputs, name_job, user, directory = 'temp', loader = 'ur
|
||||
|
||||
self.update_state(state='PROGRESS', meta={'current': 1})
|
||||
|
||||
# inputs {"data": [url]} for url type task just urls
|
||||
# source_data {"data": [url]} for url type task just urls
|
||||
|
||||
# Use RemoteCreator to load data from URL
|
||||
remote_loader = RemoteCreator.create_loader(loader, inputs)
|
||||
remote_loader = RemoteCreator.create_loader(loader, source_data)
|
||||
raw_docs = remote_loader.load_data()
|
||||
|
||||
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
|
||||
@@ -165,7 +165,7 @@ def remote_worker(self, inputs, name_job, user, directory = 'temp', loader = 'ur
|
||||
shutil.rmtree(full_path)
|
||||
|
||||
return {
|
||||
'urls': inputs['data'],
|
||||
'urls': source_data['data'],
|
||||
'name_job': name_job,
|
||||
'user': user,
|
||||
'limited': False
|
||||
|
||||
Reference in New Issue
Block a user