feat: added reddit loader

This commit is contained in:
Siddhant Rai
2024-03-16 20:22:05 +05:30
parent 80a4a094af
commit 60cfea1126
6 changed files with 117 additions and 58 deletions

View File

@@ -0,0 +1,27 @@
from application.parser.remote.base import BaseRemote
from langchain_community.document_loaders import RedditPostsLoader
class RedditPostsLoaderRemote(BaseRemote):
def load_data(self, inputs):
client_id = inputs.get("client_id")
client_secret = inputs.get("client_secret")
user_agent = inputs.get("user_agent")
categories = inputs.get("categories", ["new", "hot"])
mode = inputs.get("mode", "subreddit")
search_queries = inputs.get("search_queries")
self.loader = RedditPostsLoader(
client_id=client_id,
client_secret=client_secret,
user_agent=user_agent,
categories=categories,
mode=mode,
search_queries=search_queries,
)
documents = []
try:
documents.extend(self.loader.load())
except Exception as e:
print(f"Error processing Data: {e}")
print(f"Loaded {len(documents)} documents from Reddit")
return documents[:5]

View File

@@ -1,13 +1,15 @@
from application.parser.remote.sitemap_loader import SitemapLoader
from application.parser.remote.crawler_loader import CrawlerLoader
from application.parser.remote.web_loader import WebLoader
from application.parser.remote.reddit_loader import RedditPostsLoaderRemote
class RemoteCreator:
loaders = {
'url': WebLoader,
'sitemap': SitemapLoader,
'crawler': CrawlerLoader
"url": WebLoader,
"sitemap": SitemapLoader,
"crawler": CrawlerLoader,
"reddit": RedditPostsLoaderRemote,
}
@classmethod
@@ -15,4 +17,4 @@ class RemoteCreator:
loader_class = cls.loaders.get(type.lower())
if not loader_class:
raise ValueError(f"No LLM class found for type {type}")
return loader_class(*args, **kwargs)
return loader_class(*args, **kwargs)