mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-12-01 17:43:15 +00:00
Merge pull request #2029 from abfeb8/main
feat: add Microsoft Entra ID integration
This commit is contained in:
@@ -113,10 +113,14 @@ class ConnectorsCallback(Resource):
|
||||
session_token = str(uuid.uuid4())
|
||||
|
||||
try:
|
||||
credentials = auth.create_credentials_from_token_info(token_info)
|
||||
service = auth.build_drive_service(credentials)
|
||||
user_info = service.about().get(fields="user").execute()
|
||||
user_email = user_info.get('user', {}).get('emailAddress', 'Connected User')
|
||||
if provider == "google_drive":
|
||||
credentials = auth.create_credentials_from_token_info(token_info)
|
||||
service = auth.build_drive_service(credentials)
|
||||
user_info = service.about().get(fields="user").execute()
|
||||
user_email = user_info.get('user', {}).get('emailAddress', 'Connected User')
|
||||
else:
|
||||
user_email = token_info.get('user_info', {}).get('email', 'Connected User')
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.warning(f"Could not get user info: {e}")
|
||||
user_email = 'Connected User'
|
||||
|
||||
@@ -55,6 +55,11 @@ class Settings(BaseSettings):
|
||||
"http://127.0.0.1:7091/api/connectors/callback" ##add redirect url as it is to your provider's console(gcp)
|
||||
)
|
||||
|
||||
# Microsoft Entra ID (Azure AD) integration
|
||||
MICROSOFT_CLIENT_ID: Optional[str] = None # Azure AD Application (client) ID
|
||||
MICROSOFT_CLIENT_SECRET: Optional[str] = None # Azure AD Application client secret
|
||||
MICROSOFT_TENANT_ID: Optional[str] = "common" # Azure AD Tenant ID (or 'common' for multi-tenant)
|
||||
MICROSOFT_AUTHORITY: Optional[str] = None # e.g., "https://login.microsoftonline.com/{tenant_id}"
|
||||
# GitHub source
|
||||
GITHUB_ACCESS_TOKEN: Optional[str] = None # PAT token with read repo access
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from application.parser.connectors.google_drive.loader import GoogleDriveLoader
|
||||
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
|
||||
from application.parser.connectors.share_point.auth import SharePointAuth
|
||||
from application.parser.connectors.share_point.loader import SharePointLoader
|
||||
|
||||
|
||||
class ConnectorCreator:
|
||||
@@ -12,10 +14,12 @@ class ConnectorCreator:
|
||||
|
||||
connectors = {
|
||||
"google_drive": GoogleDriveLoader,
|
||||
"share_point": SharePointLoader,
|
||||
}
|
||||
|
||||
auth_providers = {
|
||||
"google_drive": GoogleDriveAuth,
|
||||
"share_point": SharePointAuth,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
10
application/parser/connectors/share_point/__init__.py
Normal file
10
application/parser/connectors/share_point/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
Share Point connector package for DocsGPT.
|
||||
|
||||
This module provides authentication and document loading capabilities for Share Point.
|
||||
"""
|
||||
|
||||
from .auth import SharePointAuth
|
||||
from .loader import SharePointLoader
|
||||
|
||||
__all__ = ['SharePointAuth', 'SharePointLoader']
|
||||
91
application/parser/connectors/share_point/auth.py
Normal file
91
application/parser/connectors/share_point/auth.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import logging
|
||||
import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from msal import ConfidentialClientApplication
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.parser.connectors.base import BaseConnectorAuth
|
||||
|
||||
|
||||
class SharePointAuth(BaseConnectorAuth):
|
||||
"""
|
||||
Handles Microsoft OAuth 2.0 authentication.
|
||||
|
||||
# Documentation:
|
||||
- https://learn.microsoft.com/en-us/entra/identity-platform/v2-oauth2-auth-code-flow
|
||||
- https://learn.microsoft.com/en-gb/entra/msal/python/
|
||||
"""
|
||||
|
||||
# Microsoft Graph scopes for SharePoint access
|
||||
SCOPES = [
|
||||
"User.Read",
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self.client_id = settings.MICROSOFT_CLIENT_ID
|
||||
self.client_secret = settings.MICROSOFT_CLIENT_SECRET
|
||||
|
||||
if not self.client_id or not self.client_secret:
|
||||
raise ValueError(
|
||||
"Microsoft OAuth credentials not configured. Please set MICROSOFT_CLIENT_ID and MICROSOFT_CLIENT_SECRET in settings."
|
||||
)
|
||||
|
||||
self.redirect_uri = settings.CONNECTOR_REDIRECT_BASE_URI
|
||||
self.tenant_id = settings.MICROSOFT_TENANT_ID
|
||||
self.authority = getattr(settings, "MICROSOFT_AUTHORITY", f"https://{self.tenant_id}.ciamlogin.com/{self.tenant_id}")
|
||||
|
||||
self.auth_app = ConfidentialClientApplication(
|
||||
client_id=self.client_id, client_credential=self.client_secret, authority=self.authority
|
||||
)
|
||||
|
||||
def get_authorization_url(self, state: Optional[str] = None) -> str:
|
||||
return self.auth_app.get_authorization_request_url(
|
||||
scopes=self.SCOPES, state=state, redirect_uri=self.redirect_uri
|
||||
)
|
||||
|
||||
def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
|
||||
result = self.auth_app.acquire_token_by_authorization_code(
|
||||
code=authorization_code, scopes=self.SCOPES, redirect_uri=self.redirect_uri
|
||||
)
|
||||
|
||||
if "error" in result:
|
||||
logging.error(f"Error acquiring token: {result.get('error_description')}")
|
||||
raise ValueError(f"Error acquiring token: {result.get('error_description')}")
|
||||
|
||||
return self.map_token_response(result)
|
||||
|
||||
def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
|
||||
result = self.auth_app.acquire_token_by_refresh_token(refresh_token=refresh_token, scopes=self.SCOPES)
|
||||
|
||||
if "error" in result:
|
||||
logging.error(f"Error acquiring token: {result.get('error_description')}")
|
||||
raise ValueError(f"Error acquiring token: {result.get('error_description')}")
|
||||
|
||||
return self.map_token_response(result)
|
||||
|
||||
def is_token_expired(self, token_info: Dict[str, Any]) -> bool:
|
||||
if not token_info or "expiry" not in token_info:
|
||||
# If no expiry info, consider token expired to be safe
|
||||
return True
|
||||
|
||||
# Get expiry timestamp and current time
|
||||
expiry_timestamp = token_info["expiry"]
|
||||
current_timestamp = int(datetime.datetime.now().timestamp())
|
||||
|
||||
# Token is expired if current time is greater than or equal to expiry time
|
||||
return current_timestamp >= expiry_timestamp
|
||||
|
||||
def map_token_response(self, result) -> Dict[str, Any]:
|
||||
return {
|
||||
"access_token": result.get("access_token"),
|
||||
"refresh_token": result.get("refresh_token"),
|
||||
"token_uri": result.get("id_token_claims", {}).get("iss"),
|
||||
"scopes": result.get("scope"),
|
||||
"expiry": result.get("id_token_claims", {}).get("exp"),
|
||||
"user_info": {
|
||||
"name": result.get("id_token_claims", {}).get("name"),
|
||||
"email": result.get("id_token_claims", {}).get("preferred_username"),
|
||||
},
|
||||
"raw_token": result,
|
||||
}
|
||||
44
application/parser/connectors/share_point/loader.py
Normal file
44
application/parser/connectors/share_point/loader.py
Normal file
@@ -0,0 +1,44 @@
|
||||
from typing import List, Dict, Any
|
||||
from application.parser.connectors.base import BaseConnectorLoader
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
class SharePointLoader(BaseConnectorLoader):
|
||||
def __init__(self, session_token: str):
|
||||
pass
|
||||
|
||||
def load_data(self, inputs: Dict[str, Any]) -> List[Document]:
|
||||
"""
|
||||
Load documents from the external knowledge base.
|
||||
|
||||
Args:
|
||||
inputs: Configuration dictionary containing:
|
||||
- file_ids: Optional list of specific file IDs to load
|
||||
- folder_ids: Optional list of folder IDs to browse/download
|
||||
- limit: Maximum number of items to return
|
||||
- list_only: If True, return metadata without content
|
||||
- recursive: Whether to recursively process folders
|
||||
|
||||
Returns:
|
||||
List of Document objects
|
||||
"""
|
||||
pass
|
||||
|
||||
def download_to_directory(self, local_dir: str, source_config: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Download files/folders to a local directory.
|
||||
|
||||
Args:
|
||||
local_dir: Local directory path to download files to
|
||||
source_config: Configuration for what to download
|
||||
|
||||
Returns:
|
||||
Dictionary containing download results:
|
||||
- files_downloaded: Number of files downloaded
|
||||
- directory_path: Path where files were downloaded
|
||||
- empty_result: Whether no files were downloaded
|
||||
- source_type: Type of connector
|
||||
- config_used: Configuration that was used
|
||||
- error: Error message if download failed (optional)
|
||||
"""
|
||||
pass
|
||||
@@ -40,6 +40,7 @@ markupsafe==3.0.2
|
||||
marshmallow==3.26.1
|
||||
mpmath==1.3.0
|
||||
multidict==6.4.3
|
||||
msal==1.34.0
|
||||
mypy-extensions==1.0.0
|
||||
networkx==3.4.2
|
||||
numpy==2.2.1
|
||||
@@ -87,4 +88,4 @@ werkzeug>=3.1.0,<3.1.2
|
||||
yarl==1.20.0
|
||||
markdownify==1.1.0
|
||||
tldextract==5.1.3
|
||||
websockets==14.1
|
||||
websockets==14.1
|
||||
Reference in New Issue
Block a user