mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-12-02 01:53:14 +00:00
feat: added reddit loader
This commit is contained in:
27
application/parser/remote/reddit_loader.py
Normal file
27
application/parser/remote/reddit_loader.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from application.parser.remote.base import BaseRemote
|
||||
from langchain_community.document_loaders import RedditPostsLoader
|
||||
|
||||
|
||||
class RedditPostsLoaderRemote(BaseRemote):
|
||||
def load_data(self, inputs):
|
||||
client_id = inputs.get("client_id")
|
||||
client_secret = inputs.get("client_secret")
|
||||
user_agent = inputs.get("user_agent")
|
||||
categories = inputs.get("categories", ["new", "hot"])
|
||||
mode = inputs.get("mode", "subreddit")
|
||||
search_queries = inputs.get("search_queries")
|
||||
self.loader = RedditPostsLoader(
|
||||
client_id=client_id,
|
||||
client_secret=client_secret,
|
||||
user_agent=user_agent,
|
||||
categories=categories,
|
||||
mode=mode,
|
||||
search_queries=search_queries,
|
||||
)
|
||||
documents = []
|
||||
try:
|
||||
documents.extend(self.loader.load())
|
||||
except Exception as e:
|
||||
print(f"Error processing Data: {e}")
|
||||
print(f"Loaded {len(documents)} documents from Reddit")
|
||||
return documents[:5]
|
||||
@@ -1,13 +1,15 @@
|
||||
from application.parser.remote.sitemap_loader import SitemapLoader
|
||||
from application.parser.remote.crawler_loader import CrawlerLoader
|
||||
from application.parser.remote.web_loader import WebLoader
|
||||
from application.parser.remote.reddit_loader import RedditPostsLoaderRemote
|
||||
|
||||
|
||||
class RemoteCreator:
|
||||
loaders = {
|
||||
'url': WebLoader,
|
||||
'sitemap': SitemapLoader,
|
||||
'crawler': CrawlerLoader
|
||||
"url": WebLoader,
|
||||
"sitemap": SitemapLoader,
|
||||
"crawler": CrawlerLoader,
|
||||
"reddit": RedditPostsLoaderRemote,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@@ -15,4 +17,4 @@ class RemoteCreator:
|
||||
loader_class = cls.loaders.get(type.lower())
|
||||
if not loader_class:
|
||||
raise ValueError(f"No LLM class found for type {type}")
|
||||
return loader_class(*args, **kwargs)
|
||||
return loader_class(*args, **kwargs)
|
||||
|
||||
Reference in New Issue
Block a user