No crawler, no sitemap

This commit is contained in:
Pavel
2023-10-12 01:03:40 +04:00
parent 8f2ad38503
commit 658867cb46
7 changed files with 76 additions and 1 deletions

View File

@@ -1,6 +1,6 @@
"""Base reader class."""
from abc import abstractmethod
from typing import Any, List
from typing import Any, List, Iterator
from langchain.docstore.document import Document as LCDocument
from application.parser.schema.base import Document

View File

@@ -0,0 +1,18 @@
# from sitemap_loader import SitemapLoader
# from crawler_loader import CrawlerLoader
from application.parser.remote.web_loader import WebLoader
class RemoteCreator:
loaders = {
'url': WebLoader,
# 'sitemap': SitemapLoader,
# 'crawler': CrawlerLoader
}
@classmethod
def create_loader(cls, type, *args, **kwargs):
loader_class = cls.loaders.get(type.lower())
if not loader_class:
raise ValueError(f"No LLM class found for type {type}")
return loader_class(*args, **kwargs)

View File

@@ -0,0 +1,10 @@
from application.parser.remote.base import BaseRemote
class WebLoader(BaseRemote):
def __init__(self):
from langchain.document_loaders import WebBaseLoader
self.loader = WebBaseLoader
def load_data(self, urls):
loader = self.loader(urls)
return loader.load()