diff --git a/application/parser/remote/google_auth.py b/application/parser/remote/google_auth.py new file mode 100644 index 00000000..2b736e69 --- /dev/null +++ b/application/parser/remote/google_auth.py @@ -0,0 +1,336 @@ +import logging +import time +import datetime +from typing import Optional, Dict, Any + +from google.oauth2.credentials import Credentials +from google_auth_oauthlib.flow import Flow +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + +from application.core.settings import settings + + +class GoogleDriveAuth: + """ + Handles Google OAuth 2.0 authentication for Google Drive access. + """ + + SCOPES = [ + 'https://www.googleapis.com/auth/drive.readonly', + 'https://www.googleapis.com/auth/drive.metadata.readonly' + ] + + def __init__(self): + self.client_id = settings.GOOGLE_CLIENT_ID + self.client_secret = settings.GOOGLE_CLIENT_SECRET + self.redirect_uri = settings.GOOGLE_REDIRECT_URI or "http://localhost:7091/api/google-drive/callback" + + if not self.client_id or not self.client_secret: + raise ValueError("Google OAuth credentials not configured. Please set GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET in settings.") + + def get_authorization_url(self, state: Optional[str] = None) -> str: + """ + Generate Google OAuth authorization URL. + + Args: + state: Optional state parameter for CSRF protection + + Returns: + Authorization URL for Google OAuth flow + """ + try: + flow = Flow.from_client_config( + { + "web": { + "client_id": self.client_id, + "client_secret": self.client_secret, + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "redirect_uris": [self.redirect_uri] + } + }, + scopes=self.SCOPES + ) + flow.redirect_uri = self.redirect_uri + + authorization_url, _ = flow.authorization_url( + access_type='offline', + prompt='consent', + include_granted_scopes='true', + state=state + ) + + return authorization_url + + except Exception as e: + logging.error(f"Error generating authorization URL: {e}") + raise + + def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]: + """ + Exchange authorization code for access and refresh tokens. + + Args: + authorization_code: Authorization code from OAuth callback + + Returns: + Dictionary containing token information + """ + try: + if not authorization_code: + raise ValueError("Authorization code is required") + + flow = Flow.from_client_config( + { + "web": { + "client_id": self.client_id, + "client_secret": self.client_secret, + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "redirect_uris": [self.redirect_uri] + } + }, + scopes=self.SCOPES + ) + flow.redirect_uri = self.redirect_uri + + flow.fetch_token(code=authorization_code) + + credentials = flow.credentials + + if credentials.expiry: + try: + expiry = credentials.expiry + if expiry.tzinfo is None: + # If expiry is offset-naive, make it offset-aware + expiry = expiry.replace(tzinfo=datetime.timezone.utc) + + current_time = datetime.datetime.now(datetime.timezone.utc) + time_until_expiry = expiry - current_time + logging.info(f"Token expires in: {time_until_expiry}") + except Exception as e: + logging.warning(f"Error calculating token expiry: {e}") + else: + logging.info("Token has no expiry information") + + if not credentials.refresh_token: + logging.warning("OAuth flow did not return a refresh_token.") + if not credentials.token: + raise ValueError("OAuth flow did not return an access token") + + if not credentials.token_uri: + credentials.token_uri = "https://oauth2.googleapis.com/token" + + if not credentials.client_id: + credentials.client_id = self.client_id + + if not credentials.client_secret: + credentials.client_secret = self.client_secret + + if credentials.expiry: + try: + expiry_dt = credentials.expiry + if expiry_dt.tzinfo is None: # Ensure UTC timezone + expiry_dt = expiry_dt.replace(tzinfo=datetime.timezone.utc) + + current_time = datetime.datetime.now(datetime.timezone.utc) + time_until_expiry = expiry_dt - current_time + logging.info(f"Access token expires in {time_until_expiry}") + except Exception as e: + logging.warning(f"Error calculating token expiry: {e}") + + if not credentials.refresh_token: + raise ValueError( + "No refresh token received. This typically happens when offline access wasn't granted. " + ) + + expiry_iso = None + if credentials.expiry: + expiry_iso = credentials.expiry.isoformat() + + return { + 'access_token': credentials.token, + 'refresh_token': credentials.refresh_token, + 'token_uri': credentials.token_uri, + 'client_id': credentials.client_id, + 'client_secret': credentials.client_secret, + 'scopes': credentials.scopes, + 'expiry': expiry_iso + } + + except Exception as e: + logging.error(f"Error exchanging code for tokens: {e}") + raise + + def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]: + try: + if not refresh_token: + raise ValueError("Refresh token is required") + + credentials = Credentials( + token=None, + refresh_token=refresh_token, + token_uri="https://oauth2.googleapis.com/token", + client_id=self.client_id, + client_secret=self.client_secret + ) + + from google.auth.transport.requests import Request + credentials.refresh(Request()) + + expiry_iso = None + if credentials.expiry: + expiry_iso = credentials.expiry.isoformat() + + return { + 'access_token': credentials.token, + 'refresh_token': refresh_token, + 'token_uri': credentials.token_uri, + 'client_id': credentials.client_id, + 'client_secret': credentials.client_secret, + 'scopes': credentials.scopes, + 'expiry': expiry_iso + } + except Exception as e: + logging.error(f"Error refreshing access token: {e}", exc_info=True) + raise + + def create_credentials_from_token_info(self, token_info: Dict[str, Any]) -> Credentials: + from application.core.settings import settings + + access_token = token_info.get('access_token') + if not access_token: + raise ValueError("No access token found in token_info") + + credentials = Credentials( + token=access_token, + refresh_token=token_info.get('refresh_token'), + token_uri= 'https://oauth2.googleapis.com/token', + client_id=settings.GOOGLE_CLIENT_ID, + client_secret=settings.GOOGLE_CLIENT_SECRET, + scopes=token_info.get('scopes', ['https://www.googleapis.com/auth/drive.readonly']) + ) + + if not credentials.token: + raise ValueError("Credentials created without valid access token") + + return credentials + + def build_drive_service(self, credentials: Credentials): + try: + if not credentials: + raise ValueError("No credentials provided") + + if not credentials.token and not credentials.refresh_token: + raise ValueError("No access token or refresh token available. User must re-authorize with offline access.") + + needs_refresh = credentials.expired or not credentials.token + if needs_refresh: + if credentials.refresh_token: + try: + from google.auth.transport.requests import Request + credentials.refresh(Request()) + except Exception as refresh_error: + raise ValueError(f"Failed to refresh credentials: {refresh_error}") + else: + raise ValueError("No access token or refresh token available. User must re-authorize with offline access.") + + return build('drive', 'v3', credentials=credentials) + + except HttpError as e: + raise ValueError(f"Failed to build Google Drive service: HTTP {e.resp.status}") + except Exception as e: + raise ValueError(f"Failed to build Google Drive service: {str(e)}") + + def is_token_expired(self, token_info): + if 'expiry' in token_info and token_info['expiry']: + try: + import datetime as dt + from dateutil import parser + + expiry_input = token_info['expiry'] + + if isinstance(expiry_input, str): + # Parse ISO format string + expiry_dt = parser.parse(expiry_input) + elif isinstance(expiry_input, dt.datetime): + expiry_dt = expiry_input + else: + logging.warning(f"Unexpected expiry format: {type(expiry_input)}") + return True + + # Ensure UTC timezone + if expiry_dt.tzinfo is None: + expiry_dt = expiry_dt.replace(tzinfo=dt.timezone.utc) + + current_time = dt.datetime.now(dt.timezone.utc) + + return current_time >= expiry_dt - dt.timedelta(seconds=60) + + except Exception: + return True + + if 'access_token' in token_info and token_info['access_token']: + return False + + return True + + def get_token_info_from_session(self, session_token: str) -> Dict[str, Any]: + try: + from application.core.mongo_db import MongoDB + from application.core.settings import settings + + mongo = MongoDB.get_client() + db = mongo[settings.MONGO_DB_NAME] + sessions_collection = db["drive_sessions"] + + session = sessions_collection.find_one({"session_token": session_token}) + if not session: + raise ValueError(f"Invalid session token: {session_token}") + + if "token_info" not in session: + raise ValueError("Session missing token information") + + token_info = session["token_info"] + if not token_info: + raise ValueError("Invalid token information") + + required_fields = ["access_token", "refresh_token"] + missing_fields = [field for field in required_fields if field not in token_info or not token_info.get(field)] + if missing_fields: + raise ValueError(f"Missing required token fields: {missing_fields}") + + if 'client_id' not in token_info: + token_info['client_id'] = settings.GOOGLE_CLIENT_ID + if 'client_secret' not in token_info: + token_info['client_secret'] = settings.GOOGLE_CLIENT_SECRET + if 'token_uri' not in token_info: + token_info['token_uri'] = 'https://oauth2.googleapis.com/token' + + return token_info + + except Exception as e: + raise ValueError(f"Failed to retrieve Google Drive token information: {str(e)}") + + def validate_credentials(self, credentials: Credentials) -> bool: + """ + Validate Google Drive credentials by making a test API call. + + Args: + credentials: Google credentials object + + Returns: + True if credentials are valid, False otherwise + """ + try: + service = self.build_drive_service(credentials) + service.about().get(fields="user").execute() + return True + + except HttpError as e: + logging.error(f"HTTP error validating credentials: {e}") + return False + except Exception as e: + logging.error(f"Error validating credentials: {e}") + return False diff --git a/application/requirements.txt b/application/requirements.txt index 3778d941..b7076ed8 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -13,6 +13,9 @@ Flask==3.1.1 faiss-cpu==1.9.0.post1 flask-restx==1.3.0 google-genai==1.3.0 +google-api-python-client==2.179.0 +google-auth-httplib2==0.2.0 +google-auth-oauthlib==1.2.2 gTTS==2.5.4 gunicorn==23.0.0 javalang==0.13.0