mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 16:43:16 +00:00
(feat:connector) raw sync flow
This commit is contained in:
@@ -146,6 +146,116 @@ class GoogleDriveLoader(BaseConnectorLoader):
|
||||
logging.error(f"Error loading data from Google Drive: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def scan_drive_contents(self, file_ids: List[str], folder_ids: List[str],
|
||||
modified_after: str = "2024-01-01T00:00:00Z") -> Dict[str, Any]:
|
||||
"""
|
||||
Scan Google Drive contents and check for files/folders modified after a specific date.
|
||||
|
||||
Args:
|
||||
file_ids: List of specific file IDs to check
|
||||
folder_ids: List of folder IDs to scan for modified contents
|
||||
modified_after: ISO 8601 formatted date string (default: "2024-01-01T00:00:00Z")
|
||||
|
||||
Returns:
|
||||
Dictionary containing:
|
||||
- 'modified_files': List of file IDs that were modified after the given date
|
||||
- 'modified_folders': List of folder IDs that were modified after the given date
|
||||
- 'scan_summary': Summary of the scan results
|
||||
"""
|
||||
self._ensure_service()
|
||||
|
||||
modified_files = []
|
||||
modified_folders = []
|
||||
|
||||
try:
|
||||
for file_id in file_ids:
|
||||
try:
|
||||
file_metadata = self.service.files().get(
|
||||
fileId=file_id,
|
||||
fields='id,name,modifiedTime,mimeType'
|
||||
).execute()
|
||||
|
||||
modified_time = file_metadata.get('modifiedTime', '')
|
||||
if modified_time > modified_after:
|
||||
modified_files.append({
|
||||
'id': file_id,
|
||||
'name': file_metadata.get('name', 'Unknown'),
|
||||
'modifiedTime': modified_time,
|
||||
'mimeType': file_metadata.get('mimeType', '')
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"Error checking file {file_id}: {e}")
|
||||
continue
|
||||
|
||||
for folder_id in folder_ids:
|
||||
try:
|
||||
folder_metadata = self.service.files().get(
|
||||
fileId=folder_id,
|
||||
fields='id,name,modifiedTime,mimeType'
|
||||
).execute()
|
||||
|
||||
folder_modified_time = folder_metadata.get('modifiedTime', '')
|
||||
if folder_modified_time > modified_after:
|
||||
modified_folders.append({
|
||||
'id': folder_id,
|
||||
'name': folder_metadata.get('name', 'Unknown'),
|
||||
'modifiedTime': folder_modified_time,
|
||||
'mimeType': folder_metadata.get('mimeType', '')
|
||||
})
|
||||
|
||||
query = f"'{folder_id}' in parents and modifiedTime > '{modified_after}'"
|
||||
|
||||
page_token = None
|
||||
while True:
|
||||
results = self.service.files().list(
|
||||
q=query,
|
||||
spaces='drive',
|
||||
fields='nextPageToken, files(id, name, modifiedTime, mimeType)',
|
||||
pageToken=page_token
|
||||
).execute()
|
||||
|
||||
items = results.get('files', [])
|
||||
|
||||
for item in items:
|
||||
item_info = {
|
||||
'id': item['id'],
|
||||
'name': item['name'],
|
||||
'modifiedTime': item['modifiedTime'],
|
||||
'mimeType': item['mimeType']
|
||||
}
|
||||
|
||||
if item['mimeType'] == 'application/vnd.google-apps.folder':
|
||||
modified_folders.append(item_info)
|
||||
else:
|
||||
modified_files.append(item_info)
|
||||
|
||||
page_token = results.get('nextPageToken')
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"Error scanning folder {folder_id}: {e}")
|
||||
continue
|
||||
|
||||
summary = {
|
||||
'total_modified_files': len(modified_files),
|
||||
'total_modified_folders': len(modified_folders),
|
||||
'scan_date': modified_after
|
||||
}
|
||||
|
||||
logging.info(f"Drive scan completed: {summary['total_modified_files']} files and {summary['total_modified_folders']} folders modified after {modified_after}")
|
||||
|
||||
return {
|
||||
'modified_files': modified_files,
|
||||
'modified_folders': modified_folders,
|
||||
'scan_summary': summary
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error scanning drive contents: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def _load_file_by_id(self, file_id: str, load_content: bool = True) -> Optional[Document]:
|
||||
self._ensure_service()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user