feat: add Microsoft Entra ID integration

- Updated .env-template and settings.py for Microsoft Entra ID configuration.
- Enhanced ConnectorsCallback to support SharePoint authentication.
- Introduced SharePointAuth and SharePointLoader classes.
- Added required dependencies in requirements.txt.
This commit is contained in:
Abhishek Malviya
2025-10-07 15:23:32 +05:30
parent 8b8e616557
commit 8edb6dcf2a
7 changed files with 173 additions and 6 deletions

View File

@@ -6,4 +6,20 @@ VITE_API_STREAMING=true
OPENAI_API_BASE=
OPENAI_API_VERSION=
AZURE_DEPLOYMENT_NAME=
AZURE_EMBEDDINGS_DEPLOYMENT_NAME=
AZURE_EMBEDDINGS_DEPLOYMENT_NAME=
#Azure AD Application (client) ID
MICROSOFT_CLIENT_ID=your-azure-ad-client-id
#Azure AD Application client secret
MICROSOFT_CLIENT_SECRET=your-azure-ad-client-secret
#Azure AD Tenant ID (or 'common' for multi-tenant)
MICROSOFT_TENANT_ID=your-azure-ad-tenant-id
#Your project's redirect URI that you registered in Azure Portal.
#For example: http://localhost:5000/redirect
MICROSOFT_REDIRECT_URI=http://localhost:7091/api/connectors/callback/ms_entra_id
#If you are using a Microsoft Entra ID tenant,
#configure the AUTHORITY variable as
#"https://login.microsoftonline.com/TENANT_GUID"
#or "https://login.microsoftonline.com/contoso.onmicrosoft.com".
#Alternatively, use "https://login.microsoftonline.com/common" for multi-tenant app.
MICROSOFT_AUTHORITY=https://{tenentId}.ciamlogin.com/{tenentId}

View File

@@ -298,10 +298,14 @@ class ConnectorsCallback(Resource):
session_token = str(uuid.uuid4())
try:
credentials = auth.create_credentials_from_token_info(token_info)
service = auth.build_drive_service(credentials)
user_info = service.about().get(fields="user").execute()
user_email = user_info.get('user', {}).get('emailAddress', 'Connected User')
if provider == "google_drive":
credentials = auth.create_credentials_from_token_info(token_info)
service = auth.build_drive_service(credentials)
user_info = service.about().get(fields="user").execute()
user_email = user_info.get('user', {}).get('emailAddress', 'Connected User')
else:
user_email = token_info.get('user_info', {}).get('email', 'Connected User')
except Exception as e:
current_app.logger.warning(f"Could not get user info: {e}")
user_email = 'Connected User'

View File

@@ -51,6 +51,13 @@ class Settings(BaseSettings):
"http://127.0.0.1:7091/api/connectors/callback" ##add redirect url as it is to your provider's console(gcp)
)
# Microsoft Entra ID (Azure AD) integration
MICROSOFT_CLIENT_ID: Optional[str] = None # Azure AD Application (client) ID
MICROSOFT_CLIENT_SECRET: Optional[str] = None # Azure AD Application client secret
MICROSOFT_TENANT_ID: Optional[str] = "common" # Azure AD Tenant ID (or 'common' for multi-tenant)
MICROSOFT_REDIRECT_URI: Optional[str] = "http://localhost:7091/api/connectors/callback" # Your project's redirect URI that you registered in Azure Portal.
MICROSOFT_AUTHORITY: Optional[str] = None # e.g., "https://login.microsoftonline.com/{tenant_id}"
# LLM Cache
CACHE_REDIS_URL: str = "redis://localhost:6379/2"

View File

@@ -1,5 +1,7 @@
from application.parser.connectors.google_drive.loader import GoogleDriveLoader
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
from application.parser.connectors.share_point.auth import SharePointAuth
from application.parser.connectors.share_point.loader import SharePointLoader
class ConnectorCreator:
@@ -12,10 +14,12 @@ class ConnectorCreator:
connectors = {
"google_drive": GoogleDriveLoader,
"share_point": SharePointLoader,
}
auth_providers = {
"google_drive": GoogleDriveAuth,
"share_point": SharePointAuth,
}
@classmethod

View File

@@ -0,0 +1,91 @@
import logging
import datetime
from typing import Optional, Dict, Any
from msal import ConfidentialClientApplication
from application.core.settings import settings
from application.parser.connectors.base import BaseConnectorAuth
class SharePointAuth(BaseConnectorAuth):
"""
Handles Microsoft OAuth 2.0 authentication.
# Documentation:
- https://learn.microsoft.com/en-us/entra/identity-platform/v2-oauth2-auth-code-flow
- https://learn.microsoft.com/en-gb/entra/msal/python/
"""
# Microsoft Graph scopes for SharePoint access
SCOPES = [
"User.Read",
]
def __init__(self):
self.client_id = settings.MICROSOFT_CLIENT_ID
self.client_secret = settings.MICROSOFT_CLIENT_SECRET
if not self.client_id or not self.client_secret:
raise ValueError(
"Microsoft OAuth credentials not configured. Please set MICROSOFT_CLIENT_ID and MICROSOFT_CLIENT_SECRET in settings."
)
self.redirect_uri = settings.MICROSOFT_REDIRECT_URI
self.tenant_id = settings.MICROSOFT_TENANT_ID
self.authority = getattr(settings, "MICROSOFT_AUTHORITY", f"https://{self.tenant_id}.ciamlogin.com/{self.tenant_id}")
self.auth_app = ConfidentialClientApplication(
client_id=self.client_id, client_credential=self.client_secret, authority=self.authority
)
def get_authorization_url(self, state: Optional[str] = None) -> str:
return self.auth_app.get_authorization_request_url(
scopes=self.SCOPES, state=state, redirect_uri=self.redirect_uri
)
def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
result = self.auth_app.acquire_token_by_authorization_code(
code=authorization_code, scopes=self.SCOPES, redirect_uri=self.redirect_uri
)
if "error" in result:
logging.error(f"Error acquiring token: {result.get('error_description')}")
raise ValueError(f"Error acquiring token: {result.get('error_description')}")
return self.map_token_response(result)
def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
result = self.auth_app.acquire_token_by_refresh_token(refresh_token=refresh_token, scopes=self.SCOPES)
if "error" in result:
logging.error(f"Error acquiring token: {result.get('error_description')}")
raise ValueError(f"Error acquiring token: {result.get('error_description')}")
return self.map_token_response(result)
def is_token_expired(self, token_info: Dict[str, Any]) -> bool:
if not token_info or "expiry" not in token_info:
# If no expiry info, consider token expired to be safe
return True
# Get expiry timestamp and current time
expiry_timestamp = token_info["expiry"]
current_timestamp = int(datetime.datetime.now().timestamp())
# Token is expired if current time is greater than or equal to expiry time
return current_timestamp >= expiry_timestamp
def map_token_response(self, result) -> Dict[str, Any]:
return {
"access_token": result.get("access_token"),
"refresh_token": result.get("refresh_token"),
"token_uri": result.get("id_token_claims", {}).get("iss"),
"scopes": result.get("scope"),
"expiry": result.get("id_token_claims", {}).get("exp"),
"user_info": {
"name": result.get("id_token_claims", {}).get("name"),
"email": result.get("id_token_claims", {}).get("preferred_username"),
},
"raw_token": result,
}

View File

@@ -0,0 +1,44 @@
from typing import List, Dict, Any
from application.parser.connectors.base import BaseConnectorLoader
from application.parser.schema.base import Document
class SharePointLoader(BaseConnectorLoader):
def __init__(self, session_token: str):
pass
def load_data(self, inputs: Dict[str, Any]) -> List[Document]:
"""
Load documents from the external knowledge base.
Args:
inputs: Configuration dictionary containing:
- file_ids: Optional list of specific file IDs to load
- folder_ids: Optional list of folder IDs to browse/download
- limit: Maximum number of items to return
- list_only: If True, return metadata without content
- recursive: Whether to recursively process folders
Returns:
List of Document objects
"""
pass
def download_to_directory(self, local_dir: str, source_config: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Download files/folders to a local directory.
Args:
local_dir: Local directory path to download files to
source_config: Configuration for what to download
Returns:
Dictionary containing download results:
- files_downloaded: Number of files downloaded
- directory_path: Path where files were downloaded
- empty_result: Whether no files were downloaded
- source_type: Type of connector
- config_used: Configuration that was used
- error: Error message if download failed (optional)
"""
pass

View File

@@ -40,6 +40,7 @@ markupsafe==3.0.2
marshmallow==3.26.1
mpmath==1.3.0
multidict==6.4.3
msal==1.34.0
mypy-extensions==1.0.0
networkx==3.4.2
numpy==2.2.1
@@ -87,4 +88,4 @@ werkzeug>=3.1.0,<3.1.2
yarl==1.20.0
markdownify==1.1.0
tldextract==5.1.3
websockets==14.1
websockets==14.1