mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
change input
This commit is contained in:
@@ -9,7 +9,8 @@ class CrawlerLoader(BaseRemote):
|
||||
self.loader = WebBaseLoader # Initialize the document loader
|
||||
self.limit = limit # Set the limit for the number of pages to scrape
|
||||
|
||||
def load_data(self, url):
|
||||
def load_data(self, inputs):
|
||||
url = inputs['data']
|
||||
# Check if the input is a list and if it is, use the first element
|
||||
if isinstance(url, list) and url:
|
||||
url = url[0]
|
||||
|
||||
@@ -9,11 +9,12 @@ class SitemapLoader(BaseRemote):
|
||||
self.loader = WebBaseLoader
|
||||
self.limit = limit # Adding limit to control the number of URLs to process
|
||||
|
||||
def load_data(self, sitemap_url):
|
||||
def load_data(self, inputs):
|
||||
sitemap_url= inputs['data']
|
||||
# Check if the input is a list and if it is, use the first element
|
||||
if isinstance(sitemap_url, list) and sitemap_url:
|
||||
url = sitemap_url[0]
|
||||
|
||||
|
||||
urls = self._extract_urls(sitemap_url)
|
||||
if not urls:
|
||||
print(f"No URLs found in the sitemap: {sitemap_url}")
|
||||
|
||||
@@ -5,7 +5,9 @@ class WebLoader(BaseRemote):
|
||||
from langchain.document_loaders import WebBaseLoader
|
||||
self.loader = WebBaseLoader
|
||||
|
||||
def load_data(self, urls):
|
||||
def load_data(self, inputs):
|
||||
urls = inputs['data']
|
||||
|
||||
if isinstance(urls, str):
|
||||
urls = [urls] # Convert string to list if a single URL is passed
|
||||
|
||||
|
||||
@@ -121,7 +121,7 @@ def remote_worker(self, inputs, name_job, user, directory = 'temp', loader = 'ur
|
||||
# inputs {"data": [url]} for url type task just urls
|
||||
|
||||
# Use RemoteCreator to load data from URL
|
||||
remote_loader = RemoteCreator.create_loader(loader, inputs['data'])
|
||||
remote_loader = RemoteCreator.create_loader(loader, inputs)
|
||||
raw_docs = remote_loader.load_data()
|
||||
|
||||
raw_docs = group_split(documents=raw_docs, min_tokens=min_tokens, max_tokens=max_tokens, token_check=token_check)
|
||||
|
||||
Reference in New Issue
Block a user