mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
(feat:connectors) separate layer
This commit is contained in:
@@ -3995,7 +3995,7 @@ class GoogleDriveAuth(Resource):
|
||||
def get(self):
|
||||
"""Get Google Drive OAuth authorization URL"""
|
||||
try:
|
||||
from application.parser.remote.google_auth import GoogleDriveAuth
|
||||
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
|
||||
|
||||
auth = GoogleDriveAuth()
|
||||
|
||||
@@ -4029,7 +4029,7 @@ class GoogleDriveCallback(Resource):
|
||||
def get(self):
|
||||
"""Handle Google Drive OAuth callback"""
|
||||
try:
|
||||
from application.parser.remote.google_auth import GoogleDriveAuth
|
||||
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
|
||||
from flask import request
|
||||
import uuid
|
||||
|
||||
@@ -4193,7 +4193,7 @@ class GoogleDriveRefresh(Resource):
|
||||
def post(self):
|
||||
"""Refresh Google Drive access token"""
|
||||
try:
|
||||
from application.parser.remote.google_auth import GoogleDriveAuth
|
||||
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
|
||||
|
||||
data = request.get_json()
|
||||
refresh_token = data.get('refresh_token')
|
||||
@@ -4241,7 +4241,7 @@ class GoogleDriveFiles(Resource):
|
||||
def post(self):
|
||||
"""Get list of files from Google Drive"""
|
||||
try:
|
||||
from application.parser.remote.google_drive_loader import GoogleDriveLoader
|
||||
from application.parser.connectors.google_drive.loader import GoogleDriveLoader
|
||||
|
||||
data = request.get_json()
|
||||
session_token = data.get('session_token')
|
||||
@@ -4329,7 +4329,7 @@ class GoogleDriveValidateSession(Resource):
|
||||
"""Validate Google Drive session token and return user info"""
|
||||
try:
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.parser.remote.google_auth import GoogleDriveAuth
|
||||
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
|
||||
|
||||
data = request.get_json()
|
||||
session_token = data.get('session_token')
|
||||
|
||||
11
application/parser/connectors/__init__.py
Normal file
11
application/parser/connectors/__init__.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
External knowledge base connectors for DocsGPT.
|
||||
|
||||
This module contains connectors for external knowledge bases and document storage systems
|
||||
that require authentication and specialized handling, separate from simple web scrapers.
|
||||
"""
|
||||
|
||||
from .connector_creator import ConnectorCreator
|
||||
from .google_drive import GoogleDriveAuth, GoogleDriveLoader
|
||||
|
||||
__all__ = ['ConnectorCreator', 'GoogleDriveAuth', 'GoogleDriveLoader']
|
||||
57
application/parser/connectors/connector_creator.py
Normal file
57
application/parser/connectors/connector_creator.py
Normal file
@@ -0,0 +1,57 @@
|
||||
from application.parser.connectors.google_drive.loader import GoogleDriveLoader
|
||||
|
||||
|
||||
class ConnectorCreator:
|
||||
"""
|
||||
Factory class for creating external knowledge base connectors.
|
||||
|
||||
These are different from remote loaders as they typically require
|
||||
authentication and connect to external document storage systems.
|
||||
"""
|
||||
|
||||
connectors = {
|
||||
"google_drive": GoogleDriveLoader,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create_connector(cls, connector_type, *args, **kwargs):
|
||||
"""
|
||||
Create a connector instance for the specified type.
|
||||
|
||||
Args:
|
||||
connector_type: Type of connector to create (e.g., 'google_drive')
|
||||
*args, **kwargs: Arguments to pass to the connector constructor
|
||||
|
||||
Returns:
|
||||
Connector instance
|
||||
|
||||
Raises:
|
||||
ValueError: If connector type is not supported
|
||||
"""
|
||||
connector_class = cls.connectors.get(connector_type.lower())
|
||||
if not connector_class:
|
||||
raise ValueError(f"No connector class found for type {connector_type}")
|
||||
return connector_class(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def get_supported_connectors(cls):
|
||||
"""
|
||||
Get list of supported connector types.
|
||||
|
||||
Returns:
|
||||
List of supported connector type strings
|
||||
"""
|
||||
return list(cls.connectors.keys())
|
||||
|
||||
@classmethod
|
||||
def is_supported(cls, connector_type):
|
||||
"""
|
||||
Check if a connector type is supported.
|
||||
|
||||
Args:
|
||||
connector_type: Type of connector to check
|
||||
|
||||
Returns:
|
||||
True if supported, False otherwise
|
||||
"""
|
||||
return connector_type.lower() in cls.connectors
|
||||
10
application/parser/connectors/google_drive/__init__.py
Normal file
10
application/parser/connectors/google_drive/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
Google Drive connector for DocsGPT.
|
||||
|
||||
This module provides authentication and document loading capabilities for Google Drive.
|
||||
"""
|
||||
|
||||
from .auth import GoogleDriveAuth
|
||||
from .loader import GoogleDriveLoader
|
||||
|
||||
__all__ = ['GoogleDriveAuth', 'GoogleDriveLoader']
|
||||
@@ -12,7 +12,7 @@ from googleapiclient.http import MediaIoBaseDownload
|
||||
from googleapiclient.errors import HttpError
|
||||
|
||||
from application.parser.remote.base import BaseRemote
|
||||
from application.parser.remote.google_auth import GoogleDriveAuth
|
||||
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
@@ -3,17 +3,25 @@ from application.parser.remote.crawler_loader import CrawlerLoader
|
||||
from application.parser.remote.web_loader import WebLoader
|
||||
from application.parser.remote.reddit_loader import RedditPostsLoaderRemote
|
||||
from application.parser.remote.github_loader import GitHubLoader
|
||||
from application.parser.remote.google_drive_loader import GoogleDriveLoader
|
||||
|
||||
|
||||
class RemoteCreator:
|
||||
"""
|
||||
Factory class for creating remote content loaders.
|
||||
|
||||
These loaders fetch content from remote web sources like URLs,
|
||||
sitemaps, web crawlers, social media platforms, etc.
|
||||
|
||||
For external knowledge base connectors (like Google Drive),
|
||||
use ConnectorCreator instead.
|
||||
"""
|
||||
|
||||
loaders = {
|
||||
"url": WebLoader,
|
||||
"sitemap": SitemapLoader,
|
||||
"crawler": CrawlerLoader,
|
||||
"reddit": RedditPostsLoaderRemote,
|
||||
"github": GitHubLoader,
|
||||
"google_drive": GoogleDriveLoader,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -874,8 +874,8 @@ def ingest_connector(
|
||||
if not session_token:
|
||||
raise ValueError("Google Drive connector requires session_token")
|
||||
|
||||
from application.parser.remote.google_drive_loader import GoogleDriveLoader
|
||||
remote_loader = GoogleDriveLoader(session_token)
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
remote_loader = ConnectorCreator.create_connector("google_drive", session_token)
|
||||
|
||||
# Create a clean config for storage that excludes the session token
|
||||
api_source_config = {
|
||||
@@ -895,19 +895,25 @@ def ingest_connector(
|
||||
}
|
||||
)
|
||||
else:
|
||||
# For other connectors, maintain backward compatibility
|
||||
source_config = {
|
||||
"session_token": session_token
|
||||
}
|
||||
if file_ids:
|
||||
source_config["file_ids"] = file_ids
|
||||
if folder_ids:
|
||||
source_config["folder_ids"] = folder_ids
|
||||
source_config["recursive"] = recursive
|
||||
# For other external knowledge base connectors (future: dropbox, onedrive, etc.)
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
|
||||
remote_loader = RemoteCreator.create_loader(source_type, source_config)
|
||||
api_source_config = source_config
|
||||
download_info = remote_loader.download_to_directory(temp_dir)
|
||||
if not ConnectorCreator.is_supported(source_type):
|
||||
raise ValueError(f"Unsupported connector type: {source_type}. Supported types: {ConnectorCreator.get_supported_connectors()}")
|
||||
|
||||
# Create connector with session token and other parameters
|
||||
remote_loader = ConnectorCreator.create_connector(source_type, session_token)
|
||||
|
||||
api_source_config = {
|
||||
"file_ids": file_ids or [],
|
||||
"folder_ids": folder_ids or [],
|
||||
"recursive": recursive
|
||||
}
|
||||
|
||||
download_info = remote_loader.download_to_directory(
|
||||
temp_dir,
|
||||
api_source_config
|
||||
)
|
||||
|
||||
if download_info.get("empty_result", False) or not download_info.get("files_downloaded", 0):
|
||||
logging.warning(f"No files were downloaded from {source_type}")
|
||||
@@ -917,7 +923,7 @@ def ingest_connector(
|
||||
"user": user,
|
||||
"tokens": 0,
|
||||
"type": source_type,
|
||||
"source_config": source_config,
|
||||
"source_config": api_source_config,
|
||||
"directory_structure": "{}",
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user