From eed1bfbe50e191dbdf0e5d7aca15618cd77e7612 Mon Sep 17 00:00:00 2001 From: Siddhant Rai Date: Tue, 26 Mar 2024 16:07:44 +0530 Subject: [PATCH] feat: fields to handle reddit loader + minor changes --- application/parser/remote/reddit_loader.py | 22 ++-- application/worker.py | 2 +- frontend/src/upload/Upload.tsx | 134 +++++++++++++++++---- 3 files changed, 120 insertions(+), 38 deletions(-) diff --git a/application/parser/remote/reddit_loader.py b/application/parser/remote/reddit_loader.py index f377717b..3c9f93ea 100644 --- a/application/parser/remote/reddit_loader.py +++ b/application/parser/remote/reddit_loader.py @@ -4,12 +4,13 @@ from langchain_community.document_loaders import RedditPostsLoader class RedditPostsLoaderRemote(BaseRemote): def load_data(self, inputs): - client_id = inputs.get("client_id") - client_secret = inputs.get("client_secret") - user_agent = inputs.get("user_agent") - categories = inputs.get("categories", ["new", "hot"]) - mode = inputs.get("mode", "subreddit") - search_queries = inputs.get("search_queries") + data = eval(inputs) + client_id = data.get("client_id") + client_secret = data.get("client_secret") + user_agent = data.get("user_agent") + categories = data.get("categories", ["new", "hot"]) + mode = data.get("mode", "subreddit") + search_queries = data.get("search_queries") self.loader = RedditPostsLoader( client_id=client_id, client_secret=client_secret, @@ -17,11 +18,8 @@ class RedditPostsLoaderRemote(BaseRemote): categories=categories, mode=mode, search_queries=search_queries, + number_posts=10, ) - documents = [] - try: - documents.extend(self.loader.load()) - except Exception as e: - print(f"Error processing Data: {e}") + documents = self.loader.load() print(f"Loaded {len(documents)} documents from Reddit") - return documents[:5] + return documents diff --git a/application/worker.py b/application/worker.py index b783c335..3891fde9 100644 --- a/application/worker.py +++ b/application/worker.py @@ -149,7 +149,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user): } -def remote_worker(self, source_data, name_job, user, directory="temp", loader="url"): +def remote_worker(self, source_data, name_job, user, loader, directory="temp"): # sample = False token_check = True min_tokens = 150 diff --git a/frontend/src/upload/Upload.tsx b/frontend/src/upload/Upload.tsx index 6870ee26..1614375d 100644 --- a/frontend/src/upload/Upload.tsx +++ b/frontend/src/upload/Upload.tsx @@ -17,6 +17,12 @@ export default function Upload({ const [docName, setDocName] = useState(''); const [urlName, setUrlName] = useState(''); const [url, setUrl] = useState(''); + const [redditData, setRedditData] = useState({ + client_id: '', + client_secret: '', + user_agent: '', + search_queries: [''], + }); const urlOptions: { label: string; value: string }[] = [ { label: 'Crawler', value: 'crawler' }, // { label: 'Sitemap', value: 'sitemap' }, @@ -164,7 +170,6 @@ export default function Upload({ }; const uploadRemote = () => { - console.log('here'); const formData = new FormData(); formData.append('name', urlName); formData.append('user', 'local'); @@ -172,6 +177,13 @@ export default function Upload({ formData.append('source', urlType?.value); } formData.append('data', url); + if ( + redditData.client_id.length > 0 && + redditData.client_secret.length > 0 + ) { + formData.set('name', 'other'); + formData.set('data', JSON.stringify(redditData)); + } const apiHost = import.meta.env.VITE_API_HOST; const xhr = new XMLHttpRequest(); xhr.upload.addEventListener('progress', (event) => { @@ -203,6 +215,19 @@ export default function Upload({ ['.docx'], }, }); + const handleChange = (e: React.ChangeEvent) => { + const { name, value } = e.target; + if (name === 'search_queries' && value.length > 0) { + setRedditData({ + ...redditData, + [name]: value.split(',').map((item) => item.trim()), + }); + } else + setRedditData({ + ...redditData, + [name]: value, + }); + }; let view; if (progress?.type === 'UPLOAD') { view = ; @@ -282,30 +307,89 @@ export default function Upload({ setUrlType(value) } /> - setUrlName(e.target.value)} - > -
- - Name - -
- setUrl(e.target.value)} - > -
- - Link - -
+ {urlType.label !== 'Reddit' ? ( + <> + setUrlName(e.target.value)} + > +
+ + Name + +
+ setUrl(e.target.value)} + > +
+ + Link + +
+ + ) : ( + <> + +
+ + Client ID + +
+ +
+ + Client secret + +
+ +
+ + User agent + +
+ +
+ + Search queries + +
+ + )} )}