diff --git a/application/api/connector/routes.py b/application/api/connector/routes.py index 821b2d91..f203a703 100644 --- a/application/api/connector/routes.py +++ b/application/api/connector/routes.py @@ -1,5 +1,6 @@ import datetime import json +import logging from bson.objectid import ObjectId @@ -338,15 +339,16 @@ class ConnectorRefresh(Resource): @connectors_ns.route("/api/connectors/files") class ConnectorFiles(Resource): - @api.expect(api.model("ConnectorFilesModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True), "folder_id": fields.String(required=False), "limit": fields.Integer(required=False)})) - @api.doc(description="List files from a connector provider") + @api.expect(api.model("ConnectorFilesModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True), "folder_id": fields.String(required=False), "limit": fields.Integer(required=False), "page_token": fields.String(required=False)})) + @api.doc(description="List files from a connector provider (supports pagination)") def post(self): try: data = request.get_json() provider = data.get('provider') session_token = data.get('session_token') folder_id = data.get('folder_id') - limit = data.get('limit', 50) + limit = data.get('limit', 10) + page_token = data.get('page_token') if not provider or not session_token: return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400) @@ -364,21 +366,33 @@ class ConnectorFiles(Resource): 'limit': limit, 'list_only': True, 'session_token': session_token, - 'folder_id': folder_id + 'folder_id': folder_id, + 'page_token': page_token }) files = [] for doc in documents[:limit]: metadata = doc.extra_info + modified_time = metadata.get('modified_time') + if modified_time: + date_part = modified_time.split('T')[0] + time_part = modified_time.split('T')[1].split('.')[0].split('Z')[0] + formatted_time = f"{date_part} {time_part}" + else: + formatted_time = None + files.append({ 'id': doc.doc_id, 'name': metadata.get('file_name', 'Unknown File'), 'type': metadata.get('mime_type', 'unknown'), - 'size': metadata.get('size', 'Unknown'), - 'modifiedTime': metadata.get('modified_time', 'Unknown') + 'size': metadata.get('size', None), + 'modifiedTime': formatted_time }) - return make_response(jsonify({"success": True, "files": files, "total": len(files)}), 200) + next_token = getattr(loader, 'next_page_token', None) + has_more = bool(next_token) + + return make_response(jsonify({"success": True, "files": files, "total": len(files), "next_page_token": next_token, "has_more": has_more}), 200) except Exception as e: current_app.logger.error(f"Error loading connector files: {e}") return make_response(jsonify({"success": False, "error": f"Failed to load files: {str(e)}"}), 500) diff --git a/application/parser/connectors/google_drive/loader.py b/application/parser/connectors/google_drive/loader.py index 22d6acc9..07219344 100644 --- a/application/parser/connectors/google_drive/loader.py +++ b/application/parser/connectors/google_drive/loader.py @@ -57,6 +57,8 @@ class GoogleDriveLoader(BaseConnectorLoader): logging.warning(f"Could not build Google Drive service: {e}") self.service = None + self.next_page_token = None + def _process_file(self, file_metadata: Dict[str, Any], load_content: bool = True) -> Optional[Document]: @@ -74,7 +76,7 @@ class GoogleDriveLoader(BaseConnectorLoader): doc_metadata = { 'file_name': file_name, 'mime_type': mime_type, - 'size': file_metadata.get('size', 'Unknown'), + 'size': file_metadata.get('size', None), 'created_time': file_metadata.get('createdTime'), 'modified_time': file_metadata.get('modifiedTime'), 'parents': file_metadata.get('parents', []), @@ -117,6 +119,8 @@ class GoogleDriveLoader(BaseConnectorLoader): limit = inputs.get('limit', 100) list_only = inputs.get('list_only', False) load_content = not list_only + page_token = inputs.get('page_token') + self.next_page_token = None if file_ids: # Specific files requested: load them @@ -137,7 +141,7 @@ class GoogleDriveLoader(BaseConnectorLoader): else: # Browsing mode: list immediate children of provided folder or root parent_id = folder_id if folder_id else 'root' - documents = self._list_items_in_parent(parent_id, limit=limit, load_content=load_content) + documents = self._list_items_in_parent(parent_id, limit=limit, load_content=load_content, page_token=page_token) logging.info(f"Loaded {len(documents)} documents from Google Drive") return documents @@ -180,14 +184,14 @@ class GoogleDriveLoader(BaseConnectorLoader): return None - def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False) -> List[Document]: + def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None) -> List[Document]: self._ensure_service() documents: List[Document] = [] try: query = f"'{parent_id}' in parents and trashed=false" - page_token = None + next_token_out: Optional[str] = None while True: page_size = 100 @@ -211,7 +215,7 @@ class GoogleDriveLoader(BaseConnectorLoader): doc_metadata = { 'file_name': item.get('name', 'Unknown'), 'mime_type': mime_type, - 'size': item.get('size', 'Unknown'), + 'size': item.get('size', None), 'created_time': item.get('createdTime'), 'modified_time': item.get('modifiedTime'), 'parents': item.get('parents', []), @@ -225,12 +229,15 @@ class GoogleDriveLoader(BaseConnectorLoader): documents.append(doc) if limit and len(documents) >= limit: + self.next_page_token = results.get('nextPageToken') return documents page_token = results.get('nextPageToken') + next_token_out = page_token if not page_token: break + self.next_page_token = next_token_out return documents except Exception as e: logging.error(f"Error listing items under parent {parent_id}: {e}") diff --git a/frontend/src/components/ConnectorTreeComponent.tsx b/frontend/src/components/ConnectorTreeComponent.tsx index dfd868b3..53900d0f 100644 --- a/frontend/src/components/ConnectorTreeComponent.tsx +++ b/frontend/src/components/ConnectorTreeComponent.tsx @@ -1,6 +1,7 @@ import React, { useState, useRef, useEffect } from 'react'; import { useTranslation } from 'react-i18next'; import { useSelector } from 'react-redux'; +import { formatBytes } from '../utils/stringUtils'; import { selectToken } from '../preferences/preferenceSlice'; import Chunks from './Chunks'; import ContextMenu, { MenuOption } from './ContextMenu'; @@ -226,13 +227,7 @@ const ConnectorTreeComponent: React.FC = ({ return current; }; - const formatBytes = (bytes: number): string => { - if (bytes === 0) return '0 Bytes'; - const k = 1024; - const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']; - const i = Math.floor(Math.log(bytes) / Math.log(k)); - return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; - }; + const getMenuRef = (id: string) => { if (!menuRefs.current[id]) { diff --git a/frontend/src/components/FileTreeComponent.tsx b/frontend/src/components/FileTreeComponent.tsx index ad714869..3e97d4e9 100644 --- a/frontend/src/components/FileTreeComponent.tsx +++ b/frontend/src/components/FileTreeComponent.tsx @@ -2,6 +2,7 @@ import React, { useState, useRef, useEffect } from 'react'; import { useTranslation } from 'react-i18next'; import { useSelector } from 'react-redux'; import { selectToken } from '../preferences/preferenceSlice'; +import { formatBytes } from '../utils/stringUtils'; import Chunks from './Chunks'; import ContextMenu, { MenuOption } from './ContextMenu'; import userService from '../api/services/userService'; @@ -128,13 +129,7 @@ const FileTreeComponent: React.FC = ({ } }, [docId, token]); - const formatBytes = (bytes: number): string => { - if (bytes === 0) return '0 Bytes'; - const k = 1024; - const sizes = ['Bytes', 'KB', 'MB', 'GB']; - const i = Math.floor(Math.log(bytes) / Math.log(k)); - return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i]; - }; + const navigateToDirectory = (dirName: string) => { setCurrentPath((prev) => [...prev, dirName]); diff --git a/frontend/src/upload/Upload.tsx b/frontend/src/upload/Upload.tsx index c780e68c..a014bc21 100644 --- a/frontend/src/upload/Upload.tsx +++ b/frontend/src/upload/Upload.tsx @@ -5,6 +5,8 @@ import { useDispatch, useSelector } from 'react-redux'; import userService from '../api/services/userService'; import { getSessionToken, setSessionToken, removeSessionToken } from '../utils/providerUtils'; +import { formatDate } from '../utils/dateTimeUtils'; +import { formatBytes } from '../utils/stringUtils'; import FileUpload from '../assets/file_upload.svg'; import WebsiteCollect from '../assets/website_collect.svg'; import Dropdown from '../components/Dropdown'; @@ -63,7 +65,9 @@ function Upload({ const [currentFolderId, setCurrentFolderId] = useState(null); const [folderPath, setFolderPath] = useState>([{id: null, name: 'My Drive'}]); - + const [nextPageToken, setNextPageToken] = useState(null); + const [hasMoreFiles, setHasMoreFiles] = useState(false); + const scrollContainerRef = useRef(null); const renderFormFields = () => { const schema = IngestorFormSchemas[ingestor.type]; @@ -449,16 +453,16 @@ function Upload({ if (ingestor.type === 'google_drive') { const sessionToken = getSessionToken(ingestor.type); - + const selectedItems = googleDriveFiles.filter(file => selectedFiles.includes(file.id)); const selectedFolderIds = selectedItems .filter(item => item.type === 'application/vnd.google-apps.folder' || item.isFolder) .map(folder => folder.id); - + const selectedFileIds = selectedItems .filter(item => item.type !== 'application/vnd.google-apps.folder' && !item.isFolder) .map(file => file.id); - + configData = { file_ids: selectedFileIds, folder_ids: selectedFolderIds, @@ -466,7 +470,7 @@ function Upload({ session_token: sessionToken || null }; } else { - + configData = { ...ingestor.config }; } @@ -501,13 +505,14 @@ function Upload({ useEffect(() => { if (ingestor.type === 'google_drive') { const sessionToken = getSessionToken(ingestor.type); - + if (sessionToken) { // Auto-authenticate if session token exists setIsGoogleDriveConnected(true); setAuthError(''); - + // Fetch user email and files using the existing session token + fetchUserEmailAndLoadFiles(sessionToken); } } @@ -516,7 +521,7 @@ function Upload({ const fetchUserEmailAndLoadFiles = async (sessionToken: string) => { try { const apiHost = import.meta.env.VITE_API_HOST; - + const validateResponse = await fetch(`${apiHost}/api/connectors/validate-session`, { method: 'POST', headers: { @@ -525,19 +530,26 @@ function Upload({ }, body: JSON.stringify({ provider: 'google_drive', session_token: sessionToken }) }); - + if (!validateResponse.ok) { removeSessionToken(ingestor.type); setIsGoogleDriveConnected(false); setAuthError('Session expired. Please reconnect to Google Drive.'); return; } - + const validateData = await validateResponse.json(); - + if (validateData.success) { setUserEmail(validateData.user_email || 'Connected User'); - loadGoogleDriveFiles(sessionToken, null); + // reset pagination state and files + setGoogleDriveFiles([]); + + + + setNextPageToken(null); + setHasMoreFiles(false); + loadGoogleDriveFiles(sessionToken, null, null, false); } else { removeSessionToken(ingestor.type); setIsGoogleDriveConnected(false); @@ -550,18 +562,26 @@ function Upload({ } }; - const loadGoogleDriveFiles = async (sessionToken: string, folderId?: string | null) => { + const loadGoogleDriveFiles = async ( + sessionToken: string, + folderId?: string | null, + pageToken?: string | null, + append: boolean = false, + ) => { setIsLoadingFiles(true); try { const apiHost = import.meta.env.VITE_API_HOST; const requestBody: any = { session_token: sessionToken, - limit: 50 + limit: 10, }; if (folderId) { requestBody.folder_id = folderId; } + if (pageToken) { + requestBody.page_token = pageToken; + } const filesResponse = await fetch(`${apiHost}/api/connectors/files`, { method: 'POST', @@ -578,8 +598,10 @@ function Upload({ const filesData = await filesResponse.json(); - if (filesData.success && filesData.files) { - setGoogleDriveFiles(filesData.files); + if (filesData.success && Array.isArray(filesData.files)) { + setGoogleDriveFiles(prev => append ? [...prev, ...filesData.files] : filesData.files); + setNextPageToken(filesData.next_page_token || null); + setHasMoreFiles(Boolean(filesData.has_more)); } else { throw new Error(filesData.error || 'Failed to load files'); } @@ -587,50 +609,13 @@ function Upload({ } catch (error) { console.error('Error loading Google Drive files:', error); setAuthError(error instanceof Error ? error.message : 'Failed to load files. Please make sure your Google Drive account is properly connected and you granted offline access during authorization.'); - - // Fallback to mock data for demo purposes - console.log('Using mock data as fallback...'); - const mockFiles = [ - { - id: '1', - name: 'Project Documentation.pdf', - type: 'application/pdf', - size: '2.5 MB', - modifiedTime: '2024-01-15', - iconUrl: '�' - }, - { - id: '2', - name: 'Meeting Notes.docx', - type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', - size: '1.2 MB', - modifiedTime: '2024-01-14', - iconUrl: '�' - }, - { - id: '3', - name: 'Presentation.pptx', - type: 'application/vnd.openxmlformats-officedocument.presentationml.presentation', - size: '5.8 MB', - modifiedTime: '2024-01-13', - iconUrl: '�' - }, - { - id: 'folder1', - name: 'Documents', - type: 'application/vnd.google-apps.folder', - size: '0 bytes', - modifiedTime: '2024-01-13', - iconUrl: '📁', - isFolder: true - } - ]; - setGoogleDriveFiles(mockFiles); } finally { setIsLoadingFiles(false); } }; + + // Handle file selection const handleFileSelect = (fileId: string) => { setSelectedFiles(prev => { @@ -647,7 +632,12 @@ function Upload({ if (sessionToken) { setCurrentFolderId(folderId); setFolderPath(prev => [...prev, {id: folderId, name: folderName}]); - loadGoogleDriveFiles(sessionToken, folderId); + + setGoogleDriveFiles([]); + setNextPageToken(null); + setHasMoreFiles(false); + setSelectedFiles([]); + loadGoogleDriveFiles(sessionToken, folderId, null, false); } }; @@ -657,9 +647,14 @@ function Upload({ const newPath = folderPath.slice(0, index + 1); const targetFolderId = newPath[newPath.length - 1]?.id; - setCurrentFolderId(targetFolderId); + setCurrentFolderId(targetFolderId as string | null); setFolderPath(newPath); - loadGoogleDriveFiles(sessionToken, targetFolderId); + + setGoogleDriveFiles([]); + setNextPageToken(null); + setHasMoreFiles(false); + setSelectedFiles([]); + loadGoogleDriveFiles(sessionToken, targetFolderId ?? null, null, false); } }; @@ -895,7 +890,7 @@ function Upload({ setIsGoogleDriveConnected(true); setIsAuthenticating(false); setAuthError(''); - + if (data.session_token) { setSessionToken(ingestor.type, data.session_token); loadGoogleDriveFiles(data.session_token, null); @@ -920,13 +915,13 @@ function Upload({ + )} + {isLoadingFiles && ( +
+
+ Loading more files... +
+ )} +{!hasMoreFiles && !isLoadingFiles && ( + All files loaded + )} + + + + )} + + + )} diff --git a/frontend/src/utils/stringUtils.ts b/frontend/src/utils/stringUtils.ts index e87a7af3..89c69df2 100644 --- a/frontend/src/utils/stringUtils.ts +++ b/frontend/src/utils/stringUtils.ts @@ -2,3 +2,12 @@ export function truncate(str: string, n: number) { // slices long strings and ends with ... return str.length > n ? str.slice(0, n - 1) + '...' : str; } + +export function formatBytes(bytes: number | null): string { + if (!bytes || bytes <= 0) return ''; + + const k = 1024; + const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB']; + const i = Math.floor(Math.log(bytes) / Math.log(k)); + return `${parseFloat((bytes / Math.pow(k, i)).toFixed(2))} ${sizes[i]}`; +}