(feat:load_files) search feature

This commit is contained in:
ManishMadan2882
2025-09-05 10:35:23 +05:30
parent 5a9bc6d2bf
commit 7896526f19
3 changed files with 159 additions and 75 deletions

View File

@@ -339,8 +339,15 @@ class ConnectorRefresh(Resource):
@connectors_ns.route("/api/connectors/files") @connectors_ns.route("/api/connectors/files")
class ConnectorFiles(Resource): class ConnectorFiles(Resource):
@api.expect(api.model("ConnectorFilesModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True), "folder_id": fields.String(required=False), "limit": fields.Integer(required=False), "page_token": fields.String(required=False)})) @api.expect(api.model("ConnectorFilesModel", {
@api.doc(description="List files from a connector provider (supports pagination)") "provider": fields.String(required=True),
"session_token": fields.String(required=True),
"folder_id": fields.String(required=False),
"limit": fields.Integer(required=False),
"page_token": fields.String(required=False),
"search_query": fields.String(required=False)
}))
@api.doc(description="List files from a connector provider (supports pagination and search)")
def post(self): def post(self):
try: try:
data = request.get_json() data = request.get_json()
@@ -349,10 +356,11 @@ class ConnectorFiles(Resource):
folder_id = data.get('folder_id') folder_id = data.get('folder_id')
limit = data.get('limit', 10) limit = data.get('limit', 10)
page_token = data.get('page_token') page_token = data.get('page_token')
search_query = data.get('search_query')
if not provider or not session_token: if not provider or not session_token:
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400) return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
decoded_token = request.decoded_token decoded_token = request.decoded_token
if not decoded_token: if not decoded_token:
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401) return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
@@ -362,13 +370,17 @@ class ConnectorFiles(Resource):
return make_response(jsonify({"success": False, "error": "Invalid or unauthorized session"}), 401) return make_response(jsonify({"success": False, "error": "Invalid or unauthorized session"}), 401)
loader = ConnectorCreator.create_connector(provider, session_token) loader = ConnectorCreator.create_connector(provider, session_token)
documents = loader.load_data({ input_config = {
'limit': limit, 'limit': limit,
'list_only': True, 'list_only': True,
'session_token': session_token, 'session_token': session_token,
'folder_id': folder_id, 'folder_id': folder_id,
'page_token': page_token 'page_token': page_token
}) }
if search_query:
input_config['search_query'] = search_query
documents = loader.load_data(input_config)
files = [] files = []
for doc in documents[:limit]: for doc in documents[:limit]:
@@ -386,13 +398,20 @@ class ConnectorFiles(Resource):
'name': metadata.get('file_name', 'Unknown File'), 'name': metadata.get('file_name', 'Unknown File'),
'type': metadata.get('mime_type', 'unknown'), 'type': metadata.get('mime_type', 'unknown'),
'size': metadata.get('size', None), 'size': metadata.get('size', None),
'modifiedTime': formatted_time 'modifiedTime': formatted_time,
'isFolder': metadata.get('is_folder', False)
}) })
next_token = getattr(loader, 'next_page_token', None) next_token = getattr(loader, 'next_page_token', None)
has_more = bool(next_token) has_more = bool(next_token)
return make_response(jsonify({"success": True, "files": files, "total": len(files), "next_page_token": next_token, "has_more": has_more}), 200) return make_response(jsonify({
"success": True,
"files": files,
"total": len(files),
"next_page_token": next_token,
"has_more": has_more
}), 200)
except Exception as e: except Exception as e:
current_app.logger.error(f"Error loading connector files: {e}") current_app.logger.error(f"Error loading connector files: {e}")
return make_response(jsonify({"success": False, "error": f"Failed to load files: {str(e)}"}), 500) return make_response(jsonify({"success": False, "error": f"Failed to load files: {str(e)}"}), 500)

View File

@@ -120,6 +120,7 @@ class GoogleDriveLoader(BaseConnectorLoader):
list_only = inputs.get('list_only', False) list_only = inputs.get('list_only', False)
load_content = not list_only load_content = not list_only
page_token = inputs.get('page_token') page_token = inputs.get('page_token')
search_query = inputs.get('search_query')
self.next_page_token = None self.next_page_token = None
if file_ids: if file_ids:
@@ -128,12 +129,18 @@ class GoogleDriveLoader(BaseConnectorLoader):
try: try:
doc = self._load_file_by_id(file_id, load_content=load_content) doc = self._load_file_by_id(file_id, load_content=load_content)
if doc: if doc:
documents.append(doc) if not search_query or (
search_query.lower() in doc.extra_info.get('file_name', '').lower()
):
documents.append(doc)
elif hasattr(self, '_credential_refreshed') and self._credential_refreshed: elif hasattr(self, '_credential_refreshed') and self._credential_refreshed:
self._credential_refreshed = False self._credential_refreshed = False
logging.info(f"Retrying load of file {file_id} after credential refresh") logging.info(f"Retrying load of file {file_id} after credential refresh")
doc = self._load_file_by_id(file_id, load_content=load_content) doc = self._load_file_by_id(file_id, load_content=load_content)
if doc: if doc and (
not search_query or
search_query.lower() in doc.extra_info.get('file_name', '').lower()
):
documents.append(doc) documents.append(doc)
except Exception as e: except Exception as e:
logging.error(f"Error loading file {file_id}: {e}") logging.error(f"Error loading file {file_id}: {e}")
@@ -141,7 +148,13 @@ class GoogleDriveLoader(BaseConnectorLoader):
else: else:
# Browsing mode: list immediate children of provided folder or root # Browsing mode: list immediate children of provided folder or root
parent_id = folder_id if folder_id else 'root' parent_id = folder_id if folder_id else 'root'
documents = self._list_items_in_parent(parent_id, limit=limit, load_content=load_content, page_token=page_token) documents = self._list_items_in_parent(
parent_id,
limit=limit,
load_content=load_content,
page_token=page_token,
search_query=search_query
)
logging.info(f"Loaded {len(documents)} documents from Google Drive") logging.info(f"Loaded {len(documents)} documents from Google Drive")
return documents return documents
@@ -184,13 +197,18 @@ class GoogleDriveLoader(BaseConnectorLoader):
return None return None
def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None) -> List[Document]: def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None, search_query: Optional[str] = None) -> List[Document]:
self._ensure_service() self._ensure_service()
documents: List[Document] = [] documents: List[Document] = []
try: try:
query = f"'{parent_id}' in parents and trashed=false" query = f"'{parent_id}' in parents and trashed=false"
if search_query:
safe_search = search_query.replace("'", "\\'")
query += f" and name contains '{safe_search}'"
next_token_out: Optional[str] = None next_token_out: Optional[str] = None
while True: while True:
@@ -205,7 +223,8 @@ class GoogleDriveLoader(BaseConnectorLoader):
q=query, q=query,
fields='nextPageToken,files(id,name,mimeType,size,createdTime,modifiedTime,parents)', fields='nextPageToken,files(id,name,mimeType,size,createdTime,modifiedTime,parents)',
pageToken=page_token, pageToken=page_token,
pageSize=page_size pageSize=page_size,
orderBy='name'
).execute() ).execute()
items = results.get('files', []) items = results.get('files', [])

View File

@@ -64,6 +64,7 @@ function Upload({
const [authError, setAuthError] = useState<string>(''); const [authError, setAuthError] = useState<string>('');
const [currentFolderId, setCurrentFolderId] = useState<string | null>(null); const [currentFolderId, setCurrentFolderId] = useState<string | null>(null);
const [folderPath, setFolderPath] = useState<Array<{id: string | null, name: string}>>([{id: null, name: 'My Drive'}]); const [folderPath, setFolderPath] = useState<Array<{id: string | null, name: string}>>([{id: null, name: 'My Drive'}]);
const [searchQuery, setSearchQuery] = useState<string>('');
const [nextPageToken, setNextPageToken] = useState<string | null>(null); const [nextPageToken, setNextPageToken] = useState<string | null>(null);
const [hasMoreFiles, setHasMoreFiles] = useState<boolean>(false); const [hasMoreFiles, setHasMoreFiles] = useState<boolean>(false);
@@ -549,7 +550,7 @@ function Upload({
setNextPageToken(null); setNextPageToken(null);
setHasMoreFiles(false); setHasMoreFiles(false);
loadGoogleDriveFiles(sessionToken, null, null, false); loadGoogleDriveFiles(sessionToken, null, undefined, '');
} else { } else {
removeSessionToken(ingestor.type); removeSessionToken(ingestor.type);
setIsGoogleDriveConnected(false); setIsGoogleDriveConnected(false);
@@ -562,57 +563,63 @@ function Upload({
} }
}; };
const loadGoogleDriveFiles = async ( const loadGoogleDriveFiles = useCallback(
sessionToken: string, (
folderId?: string | null, sessionToken: string,
pageToken?: string | null, folderId: string | null,
append: boolean = false, pageToken?: string,
) => { searchQuery: string = ''
setIsLoadingFiles(true); ) => {
setIsLoadingFiles(true);
try {
const apiHost = import.meta.env.VITE_API_HOST; const apiHost = import.meta.env.VITE_API_HOST;
const requestBody: any = { if (!pageToken) {
session_token: sessionToken, setGoogleDriveFiles([]);
limit: 10,
};
if (folderId) {
requestBody.folder_id = folderId;
}
if (pageToken) {
requestBody.page_token = pageToken;
} }
const filesResponse = await fetch(`${apiHost}/api/connectors/files`, { fetch(`${apiHost}/api/connectors/files`, {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'Authorization': `Bearer ${token}` 'Authorization': `Bearer ${token}`
}, },
body: JSON.stringify({ ...requestBody, provider: 'google_drive' }) body: JSON.stringify({
}); provider: 'google_drive',
session_token: sessionToken,
if (!filesResponse.ok) { folder_id: folderId,
throw new Error(`Failed to load files: ${filesResponse.status}`); limit: 10,
} page_token: pageToken,
search_query: searchQuery
const filesData = await filesResponse.json(); })
})
if (filesData.success && Array.isArray(filesData.files)) { .then(response => response.json())
setGoogleDriveFiles(prev => append ? [...prev, ...filesData.files] : filesData.files); .then(data => {
setNextPageToken(filesData.next_page_token || null); if (data.success) {
setHasMoreFiles(Boolean(filesData.has_more)); setGoogleDriveFiles(prev =>
} else { pageToken ? [...prev, ...data.files] : data.files
throw new Error(filesData.error || 'Failed to load files'); );
} setNextPageToken(data.next_page_token);
setHasMoreFiles(!!data.next_page_token);
} catch (error) { } else {
console.error('Error loading Google Drive files:', error); console.error('Error loading files:', data.error);
setAuthError(error instanceof Error ? error.message : 'Failed to load files. Please make sure your Google Drive account is properly connected and you granted offline access during authorization.'); if (!pageToken) {
} finally { setGoogleDriveFiles([]);
setIsLoadingFiles(false); }
} }
}; })
.catch(err => {
console.error('Error loading files:', err);
if (!pageToken) {
setGoogleDriveFiles([]);
}
})
.finally(() => {
setIsLoadingFiles(false);
});
},
[token]
);
@@ -628,33 +635,38 @@ function Upload({
}; };
const handleFolderClick = (folderId: string, folderName: string) => { const handleFolderClick = (folderId: string, folderName: string) => {
if (folderId === currentFolderId) {
return;
}
setIsLoadingFiles(true);
setCurrentFolderId(folderId);
setFolderPath(prev => [...prev, { id: folderId, name: folderName }]);
setSearchQuery('');
const sessionToken = getSessionToken(ingestor.type); const sessionToken = getSessionToken(ingestor.type);
if (sessionToken) { if (sessionToken) {
setCurrentFolderId(folderId); loadGoogleDriveFiles(sessionToken, folderId, undefined, '');
setFolderPath(prev => [...prev, {id: folderId, name: folderName}]);
setGoogleDriveFiles([]);
setNextPageToken(null);
setHasMoreFiles(false);
setSelectedFiles([]);
loadGoogleDriveFiles(sessionToken, folderId, null, false);
} }
}; };
const navigateBack = (index: number) => { const navigateBack = (index: number) => {
const sessionToken = getSessionToken(ingestor.type); if (index < folderPath.length - 1) {
if (sessionToken) {
const newPath = folderPath.slice(0, index + 1); const newPath = folderPath.slice(0, index + 1);
const targetFolderId = newPath[newPath.length - 1]?.id; const targetFolderId = newPath[newPath.length - 1].id;
setIsLoadingFiles(true);
setCurrentFolderId(targetFolderId as string | null);
setFolderPath(newPath); setFolderPath(newPath);
setCurrentFolderId(targetFolderId);
setGoogleDriveFiles([]); setSearchQuery('');
setNextPageToken(null); const sessionToken = getSessionToken(ingestor.type);
setHasMoreFiles(false); if (sessionToken) {
setSelectedFiles([]); loadGoogleDriveFiles(sessionToken, targetFolderId, undefined, '');
loadGoogleDriveFiles(sessionToken, targetFolderId ?? null, null, false); }
} }
}; };
@@ -957,6 +969,40 @@ function Upload({
))} ))}
</div> </div>
{/* Search input */}
<div className="mb-3">
<div className="relative">
<input
type="text"
placeholder="Search files and folders..."
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
onKeyDown={(e) => {
if (e.key === 'Enter') {
const sessionToken = getSessionToken(ingestor.type);
if (sessionToken) {
loadGoogleDriveFiles(sessionToken, currentFolderId, undefined, searchQuery);
}
}
}}
className="w-full px-3 py-2 pr-10 text-sm border border-gray-300 rounded-lg focus:outline-none focus:ring-1 focus:ring-blue-500 focus:border-blue-500 dark:bg-gray-700 dark:border-gray-600 dark:text-white"
/>
<button
onClick={() => {
const sessionToken = getSessionToken(ingestor.type);
if (sessionToken) {
loadGoogleDriveFiles(sessionToken, currentFolderId, undefined, searchQuery);
}
}}
className="absolute inset-y-0 right-0 flex items-center px-3 text-gray-500 dark:text-gray-400"
>
<svg className="w-4 h-4" fill="none" stroke="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
</svg>
</button>
</div>
</div>
<div className="flex items-center justify-between"> <div className="flex items-center justify-between">
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300"> <h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Select Files from Google Drive Select Files from Google Drive
@@ -1131,7 +1177,7 @@ function Upload({
if (isNearBottom && hasMoreFiles && !isLoadingFiles && nextPageToken) { if (isNearBottom && hasMoreFiles && !isLoadingFiles && nextPageToken) {
const sessionToken = getSessionToken(ingestor.type); const sessionToken = getSessionToken(ingestor.type);
if (sessionToken) { if (sessionToken) {
loadGoogleDriveFiles(sessionToken, currentFolderId, nextPageToken, true); loadGoogleDriveFiles(sessionToken, currentFolderId, nextPageToken);
} }
} }
}; };