feat: fields to handle reddit loader + minor changes

This commit is contained in:
Siddhant Rai
2024-03-26 16:07:44 +05:30
parent 0c3970a266
commit eed1bfbe50
3 changed files with 120 additions and 38 deletions

View File

@@ -4,12 +4,13 @@ from langchain_community.document_loaders import RedditPostsLoader
class RedditPostsLoaderRemote(BaseRemote):
def load_data(self, inputs):
client_id = inputs.get("client_id")
client_secret = inputs.get("client_secret")
user_agent = inputs.get("user_agent")
categories = inputs.get("categories", ["new", "hot"])
mode = inputs.get("mode", "subreddit")
search_queries = inputs.get("search_queries")
data = eval(inputs)
client_id = data.get("client_id")
client_secret = data.get("client_secret")
user_agent = data.get("user_agent")
categories = data.get("categories", ["new", "hot"])
mode = data.get("mode", "subreddit")
search_queries = data.get("search_queries")
self.loader = RedditPostsLoader(
client_id=client_id,
client_secret=client_secret,
@@ -17,11 +18,8 @@ class RedditPostsLoaderRemote(BaseRemote):
categories=categories,
mode=mode,
search_queries=search_queries,
number_posts=10,
)
documents = []
try:
documents.extend(self.loader.load())
except Exception as e:
print(f"Error processing Data: {e}")
documents = self.loader.load()
print(f"Loaded {len(documents)} documents from Reddit")
return documents[:5]
return documents

View File

@@ -149,7 +149,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
}
def remote_worker(self, source_data, name_job, user, directory="temp", loader="url"):
def remote_worker(self, source_data, name_job, user, loader, directory="temp"):
# sample = False
token_check = True
min_tokens = 150

View File

@@ -17,6 +17,12 @@ export default function Upload({
const [docName, setDocName] = useState('');
const [urlName, setUrlName] = useState('');
const [url, setUrl] = useState('');
const [redditData, setRedditData] = useState({
client_id: '',
client_secret: '',
user_agent: '',
search_queries: [''],
});
const urlOptions: { label: string; value: string }[] = [
{ label: 'Crawler', value: 'crawler' },
// { label: 'Sitemap', value: 'sitemap' },
@@ -164,7 +170,6 @@ export default function Upload({
};
const uploadRemote = () => {
console.log('here');
const formData = new FormData();
formData.append('name', urlName);
formData.append('user', 'local');
@@ -172,6 +177,13 @@ export default function Upload({
formData.append('source', urlType?.value);
}
formData.append('data', url);
if (
redditData.client_id.length > 0 &&
redditData.client_secret.length > 0
) {
formData.set('name', 'other');
formData.set('data', JSON.stringify(redditData));
}
const apiHost = import.meta.env.VITE_API_HOST;
const xhr = new XMLHttpRequest();
xhr.upload.addEventListener('progress', (event) => {
@@ -203,6 +215,19 @@ export default function Upload({
['.docx'],
},
});
const handleChange = (e: React.ChangeEvent<HTMLInputElement>) => {
const { name, value } = e.target;
if (name === 'search_queries' && value.length > 0) {
setRedditData({
...redditData,
[name]: value.split(',').map((item) => item.trim()),
});
} else
setRedditData({
...redditData,
[name]: value,
});
};
let view;
if (progress?.type === 'UPLOAD') {
view = <UploadProgress></UploadProgress>;
@@ -282,30 +307,89 @@ export default function Upload({
setUrlType(value)
}
/>
<input
placeholder="Enter name"
type="text"
className="h-10 w-full rounded-full border-2 border-silver px-3 outline-none dark:bg-transparent dark:text-silver"
value={urlName}
onChange={(e) => setUrlName(e.target.value)}
></input>
<div className="relative bottom-12 left-2 mt-[-18.39px]">
<span className="bg-white px-2 text-xs text-silver dark:bg-outer-space dark:text-silver">
Name
</span>
</div>
<input
placeholder="URL Link"
type="text"
className="h-10 w-full rounded-full border-2 border-silver px-3 outline-none dark:bg-transparent dark:text-silver"
value={url}
onChange={(e) => setUrl(e.target.value)}
></input>
<div className="relative bottom-12 left-2 mt-[-18.39px]">
<span className="bg-white px-2 text-xs text-silver dark:bg-outer-space dark:text-silver">
Link
</span>
</div>
{urlType.label !== 'Reddit' ? (
<>
<input
placeholder="Enter name"
type="text"
className="h-10 w-full rounded-full border-2 border-silver px-3 outline-none dark:bg-transparent dark:text-silver"
value={urlName}
onChange={(e) => setUrlName(e.target.value)}
></input>
<div className="relative bottom-12 left-2 mt-[-18.39px]">
<span className="bg-white px-2 text-xs text-silver dark:bg-outer-space dark:text-silver">
Name
</span>
</div>
<input
placeholder="URL Link"
type="text"
className="h-10 w-full rounded-full border-2 border-silver px-3 outline-none dark:bg-transparent dark:text-silver"
value={url}
onChange={(e) => setUrl(e.target.value)}
></input>
<div className="relative bottom-12 left-2 mt-[-18.39px]">
<span className="bg-white px-2 text-xs text-silver dark:bg-outer-space dark:text-silver">
Link
</span>
</div>
</>
) : (
<>
<input
placeholder="Enter client ID"
type="text"
className="h-10 w-full rounded-full border-2 border-silver px-3 outline-none dark:bg-transparent dark:text-silver"
name="client_id"
value={redditData.client_id}
onChange={handleChange}
></input>
<div className="relative bottom-12 left-2 mt-[-18.39px]">
<span className="bg-white px-2 text-xs text-silver dark:bg-outer-space dark:text-silver">
Client ID
</span>
</div>
<input
placeholder="Enter client secret"
type="text"
className="h-10 w-full rounded-full border-2 border-silver px-3 outline-none dark:bg-transparent dark:text-silver"
name="client_secret"
value={redditData.client_secret}
onChange={handleChange}
></input>
<div className="relative bottom-12 left-2 mt-[-18.39px]">
<span className="bg-white px-2 text-xs text-silver dark:bg-outer-space dark:text-silver">
Client secret
</span>
</div>
<input
placeholder="Enter user agent"
type="text"
className="h-10 w-full rounded-full border-2 border-silver px-3 outline-none dark:bg-transparent dark:text-silver"
name="user_agent"
value={redditData.user_agent}
onChange={handleChange}
></input>
<div className="relative bottom-12 left-2 mt-[-18.39px]">
<span className="bg-white px-2 text-xs text-silver dark:bg-outer-space dark:text-silver">
User agent
</span>
</div>
<input
placeholder="Enter search queries"
type="text"
className="h-10 w-full rounded-full border-2 border-silver px-3 outline-none dark:bg-transparent dark:text-silver"
name="search_queries"
value={redditData.search_queries}
onChange={handleChange}
></input>
<div className="relative bottom-12 left-2 mt-[-18.39px]">
<span className="bg-white px-2 text-xs text-silver dark:bg-outer-space dark:text-silver">
Search queries
</span>
</div>
</>
)}
</>
)}
<div className="flex flex-row-reverse">