From 7896526f191517bf5c355cf201588c222510ea9c Mon Sep 17 00:00:00 2001 From: ManishMadan2882 Date: Fri, 5 Sep 2025 10:35:23 +0530 Subject: [PATCH] (feat:load_files) search feature --- application/api/connector/routes.py | 33 +++- .../parser/connectors/google_drive/loader.py | 29 ++- frontend/src/upload/Upload.tsx | 172 +++++++++++------- 3 files changed, 159 insertions(+), 75 deletions(-) diff --git a/application/api/connector/routes.py b/application/api/connector/routes.py index f203a703..5b12d8c4 100644 --- a/application/api/connector/routes.py +++ b/application/api/connector/routes.py @@ -339,8 +339,15 @@ class ConnectorRefresh(Resource): @connectors_ns.route("/api/connectors/files") class ConnectorFiles(Resource): - @api.expect(api.model("ConnectorFilesModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True), "folder_id": fields.String(required=False), "limit": fields.Integer(required=False), "page_token": fields.String(required=False)})) - @api.doc(description="List files from a connector provider (supports pagination)") + @api.expect(api.model("ConnectorFilesModel", { + "provider": fields.String(required=True), + "session_token": fields.String(required=True), + "folder_id": fields.String(required=False), + "limit": fields.Integer(required=False), + "page_token": fields.String(required=False), + "search_query": fields.String(required=False) + })) + @api.doc(description="List files from a connector provider (supports pagination and search)") def post(self): try: data = request.get_json() @@ -349,10 +356,11 @@ class ConnectorFiles(Resource): folder_id = data.get('folder_id') limit = data.get('limit', 10) page_token = data.get('page_token') + search_query = data.get('search_query') + if not provider or not session_token: return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400) - decoded_token = request.decoded_token if not decoded_token: return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401) @@ -362,13 +370,17 @@ class ConnectorFiles(Resource): return make_response(jsonify({"success": False, "error": "Invalid or unauthorized session"}), 401) loader = ConnectorCreator.create_connector(provider, session_token) - documents = loader.load_data({ + input_config = { 'limit': limit, 'list_only': True, 'session_token': session_token, 'folder_id': folder_id, 'page_token': page_token - }) + } + if search_query: + input_config['search_query'] = search_query + + documents = loader.load_data(input_config) files = [] for doc in documents[:limit]: @@ -386,13 +398,20 @@ class ConnectorFiles(Resource): 'name': metadata.get('file_name', 'Unknown File'), 'type': metadata.get('mime_type', 'unknown'), 'size': metadata.get('size', None), - 'modifiedTime': formatted_time + 'modifiedTime': formatted_time, + 'isFolder': metadata.get('is_folder', False) }) next_token = getattr(loader, 'next_page_token', None) has_more = bool(next_token) - return make_response(jsonify({"success": True, "files": files, "total": len(files), "next_page_token": next_token, "has_more": has_more}), 200) + return make_response(jsonify({ + "success": True, + "files": files, + "total": len(files), + "next_page_token": next_token, + "has_more": has_more + }), 200) except Exception as e: current_app.logger.error(f"Error loading connector files: {e}") return make_response(jsonify({"success": False, "error": f"Failed to load files: {str(e)}"}), 500) diff --git a/application/parser/connectors/google_drive/loader.py b/application/parser/connectors/google_drive/loader.py index 07219344..7705a3c1 100644 --- a/application/parser/connectors/google_drive/loader.py +++ b/application/parser/connectors/google_drive/loader.py @@ -120,6 +120,7 @@ class GoogleDriveLoader(BaseConnectorLoader): list_only = inputs.get('list_only', False) load_content = not list_only page_token = inputs.get('page_token') + search_query = inputs.get('search_query') self.next_page_token = None if file_ids: @@ -128,12 +129,18 @@ class GoogleDriveLoader(BaseConnectorLoader): try: doc = self._load_file_by_id(file_id, load_content=load_content) if doc: - documents.append(doc) + if not search_query or ( + search_query.lower() in doc.extra_info.get('file_name', '').lower() + ): + documents.append(doc) elif hasattr(self, '_credential_refreshed') and self._credential_refreshed: self._credential_refreshed = False logging.info(f"Retrying load of file {file_id} after credential refresh") doc = self._load_file_by_id(file_id, load_content=load_content) - if doc: + if doc and ( + not search_query or + search_query.lower() in doc.extra_info.get('file_name', '').lower() + ): documents.append(doc) except Exception as e: logging.error(f"Error loading file {file_id}: {e}") @@ -141,7 +148,13 @@ class GoogleDriveLoader(BaseConnectorLoader): else: # Browsing mode: list immediate children of provided folder or root parent_id = folder_id if folder_id else 'root' - documents = self._list_items_in_parent(parent_id, limit=limit, load_content=load_content, page_token=page_token) + documents = self._list_items_in_parent( + parent_id, + limit=limit, + load_content=load_content, + page_token=page_token, + search_query=search_query + ) logging.info(f"Loaded {len(documents)} documents from Google Drive") return documents @@ -184,13 +197,18 @@ class GoogleDriveLoader(BaseConnectorLoader): return None - def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None) -> List[Document]: + def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None, search_query: Optional[str] = None) -> List[Document]: self._ensure_service() documents: List[Document] = [] try: query = f"'{parent_id}' in parents and trashed=false" + + if search_query: + safe_search = search_query.replace("'", "\\'") + query += f" and name contains '{safe_search}'" + next_token_out: Optional[str] = None while True: @@ -205,7 +223,8 @@ class GoogleDriveLoader(BaseConnectorLoader): q=query, fields='nextPageToken,files(id,name,mimeType,size,createdTime,modifiedTime,parents)', pageToken=page_token, - pageSize=page_size + pageSize=page_size, + orderBy='name' ).execute() items = results.get('files', []) diff --git a/frontend/src/upload/Upload.tsx b/frontend/src/upload/Upload.tsx index 46a36f4c..3f0b5bfe 100644 --- a/frontend/src/upload/Upload.tsx +++ b/frontend/src/upload/Upload.tsx @@ -64,6 +64,7 @@ function Upload({ const [authError, setAuthError] = useState(''); const [currentFolderId, setCurrentFolderId] = useState(null); const [folderPath, setFolderPath] = useState>([{id: null, name: 'My Drive'}]); + const [searchQuery, setSearchQuery] = useState(''); const [nextPageToken, setNextPageToken] = useState(null); const [hasMoreFiles, setHasMoreFiles] = useState(false); @@ -549,7 +550,7 @@ function Upload({ setNextPageToken(null); setHasMoreFiles(false); - loadGoogleDriveFiles(sessionToken, null, null, false); + loadGoogleDriveFiles(sessionToken, null, undefined, ''); } else { removeSessionToken(ingestor.type); setIsGoogleDriveConnected(false); @@ -562,57 +563,63 @@ function Upload({ } }; - const loadGoogleDriveFiles = async ( - sessionToken: string, - folderId?: string | null, - pageToken?: string | null, - append: boolean = false, - ) => { - setIsLoadingFiles(true); + const loadGoogleDriveFiles = useCallback( + ( + sessionToken: string, + folderId: string | null, + pageToken?: string, + searchQuery: string = '' + ) => { - try { + setIsLoadingFiles(true); + const apiHost = import.meta.env.VITE_API_HOST; - const requestBody: any = { - session_token: sessionToken, - limit: 10, - }; - if (folderId) { - requestBody.folder_id = folderId; - } - if (pageToken) { - requestBody.page_token = pageToken; + if (!pageToken) { + setGoogleDriveFiles([]); } - const filesResponse = await fetch(`${apiHost}/api/connectors/files`, { + fetch(`${apiHost}/api/connectors/files`, { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${token}` }, - body: JSON.stringify({ ...requestBody, provider: 'google_drive' }) - }); - - if (!filesResponse.ok) { - throw new Error(`Failed to load files: ${filesResponse.status}`); - } - - const filesData = await filesResponse.json(); - - if (filesData.success && Array.isArray(filesData.files)) { - setGoogleDriveFiles(prev => append ? [...prev, ...filesData.files] : filesData.files); - setNextPageToken(filesData.next_page_token || null); - setHasMoreFiles(Boolean(filesData.has_more)); - } else { - throw new Error(filesData.error || 'Failed to load files'); - } - - } catch (error) { - console.error('Error loading Google Drive files:', error); - setAuthError(error instanceof Error ? error.message : 'Failed to load files. Please make sure your Google Drive account is properly connected and you granted offline access during authorization.'); - } finally { - setIsLoadingFiles(false); - } - }; + body: JSON.stringify({ + provider: 'google_drive', + session_token: sessionToken, + folder_id: folderId, + limit: 10, + page_token: pageToken, + search_query: searchQuery + }) + }) + .then(response => response.json()) + .then(data => { + if (data.success) { + setGoogleDriveFiles(prev => + pageToken ? [...prev, ...data.files] : data.files + ); + setNextPageToken(data.next_page_token); + setHasMoreFiles(!!data.next_page_token); + } else { + console.error('Error loading files:', data.error); + if (!pageToken) { + setGoogleDriveFiles([]); + } + } + }) + .catch(err => { + console.error('Error loading files:', err); + if (!pageToken) { + setGoogleDriveFiles([]); + } + }) + .finally(() => { + setIsLoadingFiles(false); + }); + }, + [token] + ); @@ -628,33 +635,38 @@ function Upload({ }; const handleFolderClick = (folderId: string, folderName: string) => { + if (folderId === currentFolderId) { + return; + } + + setIsLoadingFiles(true); + + setCurrentFolderId(folderId); + setFolderPath(prev => [...prev, { id: folderId, name: folderName }]); + + setSearchQuery(''); + const sessionToken = getSessionToken(ingestor.type); if (sessionToken) { - setCurrentFolderId(folderId); - setFolderPath(prev => [...prev, {id: folderId, name: folderName}]); - - setGoogleDriveFiles([]); - setNextPageToken(null); - setHasMoreFiles(false); - setSelectedFiles([]); - loadGoogleDriveFiles(sessionToken, folderId, null, false); + loadGoogleDriveFiles(sessionToken, folderId, undefined, ''); } }; const navigateBack = (index: number) => { - const sessionToken = getSessionToken(ingestor.type); - if (sessionToken) { + if (index < folderPath.length - 1) { const newPath = folderPath.slice(0, index + 1); - const targetFolderId = newPath[newPath.length - 1]?.id; - - setCurrentFolderId(targetFolderId as string | null); + const targetFolderId = newPath[newPath.length - 1].id; + + setIsLoadingFiles(true); + setFolderPath(newPath); - - setGoogleDriveFiles([]); - setNextPageToken(null); - setHasMoreFiles(false); - setSelectedFiles([]); - loadGoogleDriveFiles(sessionToken, targetFolderId ?? null, null, false); + setCurrentFolderId(targetFolderId); + + setSearchQuery(''); + const sessionToken = getSessionToken(ingestor.type); + if (sessionToken) { + loadGoogleDriveFiles(sessionToken, targetFolderId, undefined, ''); + } } }; @@ -957,6 +969,40 @@ function Upload({ ))} + {/* Search input */} +
+
+ setSearchQuery(e.target.value)} + onKeyDown={(e) => { + if (e.key === 'Enter') { + const sessionToken = getSessionToken(ingestor.type); + if (sessionToken) { + loadGoogleDriveFiles(sessionToken, currentFolderId, undefined, searchQuery); + } + } + }} + className="w-full px-3 py-2 pr-10 text-sm border border-gray-300 rounded-lg focus:outline-none focus:ring-1 focus:ring-blue-500 focus:border-blue-500 dark:bg-gray-700 dark:border-gray-600 dark:text-white" + /> + +
+
+

Select Files from Google Drive @@ -1131,7 +1177,7 @@ function Upload({ if (isNearBottom && hasMoreFiles && !isLoadingFiles && nextPageToken) { const sessionToken = getSessionToken(ingestor.type); if (sessionToken) { - loadGoogleDriveFiles(sessionToken, currentFolderId, nextPageToken, true); + loadGoogleDriveFiles(sessionToken, currentFolderId, nextPageToken); } } };