feat: add Microsoft Entra ID integration

- Updated .env-template and settings.py for Microsoft Entra ID configuration.
- Enhanced ConnectorsCallback to support SharePoint authentication.
- Introduced SharePointAuth and SharePointLoader classes.
- Added required dependencies in requirements.txt.
This commit is contained in:
Abhishek Malviya
2025-10-07 15:23:32 +05:30
parent 8b8e616557
commit 8edb6dcf2a
7 changed files with 173 additions and 6 deletions

View File

@@ -1,5 +1,7 @@
from application.parser.connectors.google_drive.loader import GoogleDriveLoader
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
from application.parser.connectors.share_point.auth import SharePointAuth
from application.parser.connectors.share_point.loader import SharePointLoader
class ConnectorCreator:
@@ -12,10 +14,12 @@ class ConnectorCreator:
connectors = {
"google_drive": GoogleDriveLoader,
"share_point": SharePointLoader,
}
auth_providers = {
"google_drive": GoogleDriveAuth,
"share_point": SharePointAuth,
}
@classmethod

View File

@@ -0,0 +1,91 @@
import logging
import datetime
from typing import Optional, Dict, Any
from msal import ConfidentialClientApplication
from application.core.settings import settings
from application.parser.connectors.base import BaseConnectorAuth
class SharePointAuth(BaseConnectorAuth):
"""
Handles Microsoft OAuth 2.0 authentication.
# Documentation:
- https://learn.microsoft.com/en-us/entra/identity-platform/v2-oauth2-auth-code-flow
- https://learn.microsoft.com/en-gb/entra/msal/python/
"""
# Microsoft Graph scopes for SharePoint access
SCOPES = [
"User.Read",
]
def __init__(self):
self.client_id = settings.MICROSOFT_CLIENT_ID
self.client_secret = settings.MICROSOFT_CLIENT_SECRET
if not self.client_id or not self.client_secret:
raise ValueError(
"Microsoft OAuth credentials not configured. Please set MICROSOFT_CLIENT_ID and MICROSOFT_CLIENT_SECRET in settings."
)
self.redirect_uri = settings.MICROSOFT_REDIRECT_URI
self.tenant_id = settings.MICROSOFT_TENANT_ID
self.authority = getattr(settings, "MICROSOFT_AUTHORITY", f"https://{self.tenant_id}.ciamlogin.com/{self.tenant_id}")
self.auth_app = ConfidentialClientApplication(
client_id=self.client_id, client_credential=self.client_secret, authority=self.authority
)
def get_authorization_url(self, state: Optional[str] = None) -> str:
return self.auth_app.get_authorization_request_url(
scopes=self.SCOPES, state=state, redirect_uri=self.redirect_uri
)
def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
result = self.auth_app.acquire_token_by_authorization_code(
code=authorization_code, scopes=self.SCOPES, redirect_uri=self.redirect_uri
)
if "error" in result:
logging.error(f"Error acquiring token: {result.get('error_description')}")
raise ValueError(f"Error acquiring token: {result.get('error_description')}")
return self.map_token_response(result)
def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
result = self.auth_app.acquire_token_by_refresh_token(refresh_token=refresh_token, scopes=self.SCOPES)
if "error" in result:
logging.error(f"Error acquiring token: {result.get('error_description')}")
raise ValueError(f"Error acquiring token: {result.get('error_description')}")
return self.map_token_response(result)
def is_token_expired(self, token_info: Dict[str, Any]) -> bool:
if not token_info or "expiry" not in token_info:
# If no expiry info, consider token expired to be safe
return True
# Get expiry timestamp and current time
expiry_timestamp = token_info["expiry"]
current_timestamp = int(datetime.datetime.now().timestamp())
# Token is expired if current time is greater than or equal to expiry time
return current_timestamp >= expiry_timestamp
def map_token_response(self, result) -> Dict[str, Any]:
return {
"access_token": result.get("access_token"),
"refresh_token": result.get("refresh_token"),
"token_uri": result.get("id_token_claims", {}).get("iss"),
"scopes": result.get("scope"),
"expiry": result.get("id_token_claims", {}).get("exp"),
"user_info": {
"name": result.get("id_token_claims", {}).get("name"),
"email": result.get("id_token_claims", {}).get("preferred_username"),
},
"raw_token": result,
}

View File

@@ -0,0 +1,44 @@
from typing import List, Dict, Any
from application.parser.connectors.base import BaseConnectorLoader
from application.parser.schema.base import Document
class SharePointLoader(BaseConnectorLoader):
def __init__(self, session_token: str):
pass
def load_data(self, inputs: Dict[str, Any]) -> List[Document]:
"""
Load documents from the external knowledge base.
Args:
inputs: Configuration dictionary containing:
- file_ids: Optional list of specific file IDs to load
- folder_ids: Optional list of folder IDs to browse/download
- limit: Maximum number of items to return
- list_only: If True, return metadata without content
- recursive: Whether to recursively process folders
Returns:
List of Document objects
"""
pass
def download_to_directory(self, local_dir: str, source_config: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Download files/folders to a local directory.
Args:
local_dir: Local directory path to download files to
source_config: Configuration for what to download
Returns:
Dictionary containing download results:
- files_downloaded: Number of files downloaded
- directory_path: Path where files were downloaded
- empty_result: Whether no files were downloaded
- source_type: Type of connector
- config_used: Configuration that was used
- error: Error message if download failed (optional)
"""
pass