Merge pull request #1621 from ManishMadan2882/main

Refactor Upload
This commit is contained in:
Alex
2025-02-10 13:04:53 +00:00
committed by GitHub
12 changed files with 268 additions and 182 deletions

View File

@@ -3,6 +3,7 @@ import math
import os
import shutil
import uuid
import json
from bson.binary import Binary, UuidRepresentation
from bson.dbref import DBRef
@@ -428,18 +429,22 @@ class UploadRemote(Resource):
return missing_fields
try:
if "repo_url" in data:
source_data = data["repo_url"]
loader = "github"
else:
source_data = data["data"]
loader = data["source"]
config = json.loads(data["data"])
source_data = None
task = ingest_remote.delay(
if data["source"] == "github":
source_data = config.get("repo_url")
elif data["source"] in ["crawler", "url"]:
source_data = config.get("url")
elif data["source"] == "reddit":
source_data = config
task = ingest_remote.delay(
source_data=source_data,
job_name=data["name"],
user=data["user"],
loader=loader,
loader=data["source"]
)
except Exception as err:
current_app.logger.error(f"Error uploading remote source: {err}")

View File

@@ -1635,7 +1635,7 @@
"version": "18.3.0",
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.0.tgz",
"integrity": "sha512-EhwApuTmMBmXuFOikhQLIBUn6uFg81SwLMOAUgodJF14SOBOCMdU04gDoYi0WOJJHD144TL32z4yDqCW3dnkQg==",
"devOptional": true,
"dev": true,
"dependencies": {
"@types/react": "*"
}
@@ -9376,7 +9376,7 @@
"version": "5.7.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.7.2.tgz",
"integrity": "sha512-i5t66RHxDvVN40HfDd1PsEThGNnlMCMT3jMUuoh9/0TaqWevNontacunWyN02LA9/fIbEWlcHZcgTKb9QoaLfg==",
"devOptional": true,
"dev": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"

View File

@@ -8,6 +8,7 @@ const Input = ({
isAutoFocused = false,
placeholder,
label,
required = false,
maxLength,
className,
colorVariant = 'silver',
@@ -40,13 +41,19 @@ const Input = ({
onChange={onChange}
onPaste={onPaste}
onKeyDown={onKeyDown}
required={required}
>
{children}
</input>
{label && (
<div className="absolute -top-2 left-2">
<span className="bg-white px-2 text-xs text-gray-4000 dark:bg-outer-space dark:text-silver">
<span className="bg-white px-2 text-xs text-gray-4000 dark:bg-outer-space dark:text-silver flex items-center">
{label}
{required && (
<span className="text-[#D30000] dark:text-[#D42626] ml-0.5">
*
</span>
)}
</span>
</div>
)}

View File

@@ -9,6 +9,7 @@ export type InputProps = {
name?: string;
placeholder?: string;
label?: string;
required?: boolean;
className?: string;
children?: React.ReactElement;
onChange: (

View File

@@ -156,7 +156,9 @@
"completed": "Training completed",
"wait": "This may take several minutes",
"tokenLimit": "Over the token limit, please consider uploading smaller document"
}
},
"showAdvanced": "Show advanced options",
"hideAdvanced": "Hide advanced options"
},
"createAPIKey": {
"label": "Create New API Key",

View File

@@ -156,7 +156,9 @@
"completed": "Entrenamiento completado",
"wait": "Esto puede tardar varios minutos",
"tokenLimit": "Excede el límite de tokens, considere cargar un documento más pequeño"
}
},
"showAdvanced": "Mostrar opciones avanzadas",
"hideAdvanced": "Ocultar opciones avanzadas"
},
"createAPIKey": {
"label": "Crear Nueva Clave de API",

View File

@@ -155,7 +155,9 @@
"completed": "トレーニング完了",
"wait": "数分かかる場合があります",
"tokenLimit": "トークン制限を超えています。より小さいドキュメントをアップロードしてください"
}
},
"showAdvanced": "詳細オプションを表示",
"hideAdvanced": "詳細オプションを非表示"
},
"createAPIKey": {
"label": "新しいAPIキーを作成",

View File

@@ -156,7 +156,9 @@
"completed": "Обучение завершено",
"wait": "Это может занять несколько минут",
"tokenLimit": "Превышен лимит токенов, рассмотрите возможность загрузки документа меньшего размера"
}
},
"showAdvanced": "Показать расширенные настройки",
"hideAdvanced": "Скрыть расширенные настройки"
},
"createAPIKey": {
"label": "Создать новый API ключ",

View File

@@ -156,7 +156,9 @@
"completed": "訓練完成",
"wait": "這可能需要幾分鐘",
"tokenLimit": "超出令牌限制,請考慮上傳較小的文檔"
}
},
"showAdvanced": "顯示進階選項",
"hideAdvanced": "隱藏進階選項"
},
"createAPIKey": {
"label": "建立新的 API 金鑰",

View File

@@ -156,7 +156,9 @@
"completed": "训练完成",
"wait": "这可能需要几分钟",
"tokenLimit": "超出令牌限制,请考虑上传较小的文档"
}
},
"showAdvanced": "显示高级选项",
"hideAdvanced": "隐藏高级选项"
},
"createAPIKey": {
"label": "创建新的 API 密钥",

View File

@@ -20,25 +20,11 @@ import WrapperModal from '../modals/WrapperModal';
import {
IngestorType,
IngestorConfig,
RedditIngestorConfig,
GithubIngestorConfig,
CrawlerIngestorConfig,
UrlIngestorConfig,
IngestorFormSchemas,
FormField,
} from './types/ingestor';
import { IngestorDefaultConfigs } from '../upload/types/ingestor';
type IngestorState = {
type: IngestorType;
name: string;
config:
| RedditIngestorConfig
| GithubIngestorConfig
| CrawlerIngestorConfig
| UrlIngestorConfig;
};
function Upload({
receivedFile = [],
setModalState,
@@ -55,93 +41,124 @@ function Upload({
onSuccessfulUpload?: () => void;
}) {
const [docName, setDocName] = useState(receivedFile[0]?.name);
const [remoteName, setRemoteName] = useState('');
const [files, setfiles] = useState<File[]>(receivedFile);
const [activeTab, setActiveTab] = useState<string | null>(renderTab);
const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
const renderFormFields = () => {
const schema = IngestorFormSchemas[ingestor.type];
if (!schema) return null;
return schema.map((field: FormField) => {
switch (field.type) {
case 'string':
return (
<div key={field.name} className="mb-4">
<Input
placeholder={field.label}
type="text"
name={field.name}
value={(ingestor.config as any)[field.name]}
onChange={(e) =>
handleIngestorChange(field.name, e.target.value)
}
borderVariant="thin"
label={field.label}
colorVariant="gray"
/>
</div>
);
case 'number':
return (
<div key={field.name} className="mb-4">
<Input
placeholder={field.label}
type="number"
name={field.name}
value={(ingestor.config as any)[field.name]}
onChange={(e) =>
handleIngestorChange(field.name, parseInt(e.target.value))
}
borderVariant="thin"
label={field.label}
colorVariant="gray"
/>
</div>
);
case 'enum':
return (
<div key={field.name} className="mb-4">
<Dropdown
key={field.name}
options={field.options || []}
selectedValue={(ingestor.config as any)[field.name]}
onSelect={(
selected: { label: string; value: string } | string,
) => {
const value =
typeof selected === 'string' ? selected : selected.value;
handleIngestorChange(field.name, value);
}}
size="w-full"
rounded="3xl"
placeholder={field.label}
border="border"
borderColor="gray-5000"
/>
</div>
);
case 'boolean':
return (
<div key={field.name} className="mb-4">
<ToggleSwitch
label={field.label}
checked={(ingestor.config as any)[field.name]}
onChange={(checked: boolean) => {
const syntheticEvent = {
target: {
name: field.name,
value: checked,
},
} as unknown as React.ChangeEvent<HTMLInputElement>;
handleIngestorChange(field.name, syntheticEvent.target.value);
}}
className="mt-2"
/>
</div>
);
default:
return null;
}
});
const generalFields = schema.filter((field) => !field.advanced);
const advancedFields = schema.filter((field) => field.advanced);
return (
<>
{generalFields.map((field: FormField) => renderField(field))}
{advancedFields.length > 0 && showAdvancedOptions && (
<>
<hr className="my-4 border-[#C4C4C4]/40 border-[1px]" />
{advancedFields.map((field: FormField) => renderField(field))}
</>
)}
</>
);
};
const renderField = (field: FormField) => {
const isRequired = field.required ?? false;
switch (field.type) {
case 'string':
return (
<Input
key={field.name}
placeholder={field.label}
type="text"
name={field.name}
value={String(
ingestor.config[field.name as keyof typeof ingestor.config],
)}
onChange={(e) =>
handleIngestorChange(
field.name as keyof IngestorConfig['config'],
e.target.value,
)
}
borderVariant="thin"
label={field.label}
required={isRequired}
colorVariant="gray"
/>
);
case 'number':
return (
<Input
key={field.name}
placeholder={field.label}
type="number"
name={field.name}
value={String(
ingestor.config[field.name as keyof typeof ingestor.config],
)}
onChange={(e) =>
handleIngestorChange(
field.name as keyof IngestorConfig['config'],
Number(e.target.value),
)
}
borderVariant="thin"
label={field.label}
required={isRequired}
colorVariant="gray"
/>
);
case 'enum':
return (
<Dropdown
key={field.name}
options={field.options || []}
selectedValue={
field.options?.find(
(opt) =>
opt.value ===
ingestor.config[field.name as keyof typeof ingestor.config],
) || null
}
onSelect={(selected: { label: string; value: string }) => {
handleIngestorChange(
field.name as keyof IngestorConfig['config'],
selected.value,
);
}}
size="w-full"
rounded="3xl"
placeholder={field.label}
border="border"
borderColor="gray-5000"
/>
);
case 'boolean':
return (
<ToggleSwitch
key={field.name}
label={field.label}
checked={Boolean(
ingestor.config[field.name as keyof typeof ingestor.config],
)}
onChange={(checked: boolean) => {
handleIngestorChange(
field.name as keyof IngestorConfig['config'],
checked,
);
}}
className="mt-2"
/>
);
default:
return null;
}
};
// New unified ingestor state
@@ -172,11 +189,6 @@ function Upload({
{ label: 'Reddit', value: 'reddit' },
];
const [urlType, setUrlType] = useState<{ label: string; value: string }>({
label: 'Crawler',
value: 'crawler',
});
const sourceDocs = useSelector(selectSourceDocs);
useEffect(() => {
if (setTimeoutRef.current) {
@@ -363,7 +375,7 @@ function Upload({
const onDrop = useCallback((acceptedFiles: File[]) => {
setfiles(acceptedFiles);
setDocName(acceptedFiles[0]?.name);
setDocName(acceptedFiles[0]?.name || '');
}, []);
const doNothing = () => undefined;
@@ -374,7 +386,7 @@ function Upload({
formData.append('file', file);
});
formData.append('name', activeTab === 'file' ? docName : ingestor.name);
formData.append('name', docName);
formData.append('user', 'local');
const apiHost = import.meta.env.VITE_API_HOST;
const xhr = new XMLHttpRequest();
@@ -394,43 +406,58 @@ function Upload({
const uploadRemote = () => {
const formData = new FormData();
formData.append('name', ingestor.name);
formData.append('name', remoteName);
formData.append('user', 'local');
formData.append('source', ingestor.type);
if (ingestor.type === 'reddit') {
const redditConfig = ingestor.config as RedditIngestorConfig;
redditConfig.name = ingestor.name;
formData.set('data', JSON.stringify(redditConfig));
} else if (ingestor.type === 'github') {
const githubConfig = ingestor.config as GithubIngestorConfig;
githubConfig.name = ingestor.name;
formData.append('repo_url', githubConfig.repo_url);
formData.append('data', githubConfig.repo_url);
} else {
const urlBasedConfig = ingestor.config as
| CrawlerIngestorConfig
| UrlIngestorConfig;
urlBasedConfig.name = ingestor.name;
formData.append('data', urlBasedConfig.url);
}
const defaultConfig = IngestorDefaultConfigs[ingestor.type].config;
const apiHost = import.meta.env.VITE_API_HOST;
const mergedConfig = { ...defaultConfig, ...ingestor.config };
const filteredConfig = Object.entries(mergedConfig).reduce(
(acc, [key, value]) => {
const field = IngestorFormSchemas[ingestor.type].find(
(f) => f.name === key,
);
// Include the field if:
// 1. It's required, or
// 2. It's optional and has a non-empty value
if (
field?.required ||
(value !== undefined && value !== null && value !== '')
) {
acc[key] = value;
}
return acc;
},
{} as Record<string, any>,
);
formData.append('data', JSON.stringify(filteredConfig));
const apiHost: string = import.meta.env.VITE_API_HOST;
const xhr = new XMLHttpRequest();
xhr.upload.addEventListener('progress', (event) => {
const progress = +((event.loaded / event.total) * 100).toFixed(2);
setProgress({ type: 'UPLOAD', percentage: progress });
xhr.upload.addEventListener('progress', (event: ProgressEvent) => {
if (event.lengthComputable) {
const progressPercentage = +(
(event.loaded / event.total) *
100
).toFixed(2);
setProgress({ type: 'UPLOAD', percentage: progressPercentage });
}
});
xhr.onload = () => {
const { task_id } = JSON.parse(xhr.responseText);
setTimeoutRef.current = setTimeout(() => {
setProgress({ type: 'TRAINING', percentage: 0, taskId: task_id });
const response = JSON.parse(xhr.responseText) as { task_id: string };
setTimeoutRef.current = window.setTimeout(() => {
setProgress({
type: 'TRAINING',
percentage: 0,
taskId: response.task_id,
});
}, 3000);
};
xhr.open('POST', `${apiHost + '/api/remote'}`);
xhr.open('POST', `${apiHost}/api/remote`);
xhr.send(formData);
};
const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop,
multiple: true,
@@ -461,43 +488,55 @@ function Upload({
},
});
const isUploadDisabled = () => {
const isUploadDisabled = (): boolean => {
if (activeTab === 'file') {
return !docName || files.length === 0;
return !docName?.trim() || files.length === 0;
}
if (activeTab !== 'remote') return false;
if (!ingestor.name) return true;
return Object.values(ingestor.config).some((value) => {
if (Array.isArray(value)) {
return value.length === 0;
if (activeTab === 'remote') {
if (!remoteName?.trim()) {
return true;
}
return !value;
});
};
const formFields: FormField[] = IngestorFormSchemas[ingestor.type];
for (const field of formFields) {
if (field.required) {
// Validate only required fields
const value =
ingestor.config[field.name as keyof typeof ingestor.config];
const handleIngestorChange = (key: string, value: any) => {
setIngestor((prevState: IngestorConfig): IngestorConfig => {
if (key === 'name') {
return {
...prevState,
name: value,
};
if (typeof value === 'string' && !value.trim()) {
return true;
}
if (
typeof value === 'number' &&
(value === null || value === undefined || value <= 0)
) {
return true;
}
if (typeof value === 'boolean' && value === undefined) {
return true;
}
}
}
return {
...prevState,
config: {
...(prevState.config as any),
[key]: value,
},
};
});
return false;
}
return true;
};
const handleIngestorChange = (
key: keyof IngestorConfig['config'],
value: string | number | boolean,
) => {
setIngestor((prevState) => ({
...prevState,
config: {
...prevState.config,
[key]: value,
},
}));
};
const handleIngestorTypeChange = (type: IngestorType) => {
//Updates the ingestor seleced in dropdown and resets the config to the default config for that type
const defaultConfig = IngestorDefaultConfigs[type];
setIngestor({
@@ -598,7 +637,7 @@ function Upload({
{activeTab === 'remote' && (
<>
<Dropdown
border="border"
border="border-2"
options={urlOptions}
selectedValue={
urlOptions.find((opt) => opt.value === ingestor.type) || null
@@ -614,15 +653,26 @@ function Upload({
<Input
type="text"
colorVariant="gray"
value={ingestor['name']}
onChange={(e) =>
setIngestor({ ...ingestor, name: e.target.value })
}
value={remoteName}
onChange={(e) => setRemoteName(e.target.value)}
borderVariant="thin"
placeholder="Name"
label="Name"
required={true}
/>
{renderFormFields()}
{IngestorFormSchemas[ingestor.type].some(
(field) => field.advanced,
) && (
<button
onClick={() => setShowAdvancedOptions(!showAdvancedOptions)}
className="text-purple-30 text-sm font-normal pl-0 py-2 bg-transparent hover:cursor-pointer text-left"
>
{showAdvancedOptions
? t('modals.uploadDoc.hideAdvanced')
: t('modals.uploadDoc.showAdvanced')}
</button>
)}
</>
)}
<div className="flex justify-between">
@@ -643,6 +693,7 @@ function Upload({
uploadRemote();
}
}}
disabled={isUploadDisabled()}
className={`rounded-3xl px-4 py-2 font-medium ${
isUploadDisabled()
? 'cursor-not-allowed bg-gray-300 text-gray-500'

View File

@@ -1,5 +1,5 @@
export interface BaseIngestorConfig {
name: string;
[key: string]: string | number | boolean | undefined;
}
export interface RedditIngestorConfig extends BaseIngestorConfig {
@@ -44,9 +44,11 @@ export type IngestorFormData = {
export type FieldType = 'string' | 'number' | 'enum' | 'boolean';
export interface FormField {
name: keyof BaseIngestorConfig | string;
name: string;
label: string;
type: FieldType;
required?: boolean;
advanced?: boolean;
options?: { label: string; value: string }[];
}
@@ -56,6 +58,7 @@ export const IngestorFormSchemas: Record<IngestorType, FormField[]> = {
name: 'url',
label: 'URL',
type: 'string',
required: true,
},
],
url: [
@@ -63,6 +66,7 @@ export const IngestorFormSchemas: Record<IngestorType, FormField[]> = {
name: 'url',
label: 'URL',
type: 'string',
required: true,
},
],
reddit: [
@@ -70,26 +74,31 @@ export const IngestorFormSchemas: Record<IngestorType, FormField[]> = {
name: 'client_id',
label: 'Client ID',
type: 'string',
required: true,
},
{
name: 'client_secret',
label: 'Client Secret',
type: 'string',
required: true,
},
{
name: 'user_agent',
label: 'User Agent',
type: 'string',
required: true,
},
{
name: 'search_queries',
label: 'Search Queries',
type: 'string',
required: true,
},
{
name: 'number_posts',
label: 'Number of Posts',
type: 'number',
required: true,
},
],
github: [
@@ -97,6 +106,7 @@ export const IngestorFormSchemas: Record<IngestorType, FormField[]> = {
name: 'repo_url',
label: 'Repository URL',
type: 'string',
required: true,
},
],
};