(feat:connectors) separate layer

This commit is contained in:
ManishMadan2882
2025-08-26 01:38:36 +05:30
parent 15a9e97a1e
commit f09f1433a9
8 changed files with 125 additions and 33 deletions

View File

@@ -3995,7 +3995,7 @@ class GoogleDriveAuth(Resource):
def get(self):
"""Get Google Drive OAuth authorization URL"""
try:
from application.parser.remote.google_auth import GoogleDriveAuth
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
auth = GoogleDriveAuth()
@@ -4029,7 +4029,7 @@ class GoogleDriveCallback(Resource):
def get(self):
"""Handle Google Drive OAuth callback"""
try:
from application.parser.remote.google_auth import GoogleDriveAuth
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
from flask import request
import uuid
@@ -4193,7 +4193,7 @@ class GoogleDriveRefresh(Resource):
def post(self):
"""Refresh Google Drive access token"""
try:
from application.parser.remote.google_auth import GoogleDriveAuth
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
data = request.get_json()
refresh_token = data.get('refresh_token')
@@ -4241,7 +4241,7 @@ class GoogleDriveFiles(Resource):
def post(self):
"""Get list of files from Google Drive"""
try:
from application.parser.remote.google_drive_loader import GoogleDriveLoader
from application.parser.connectors.google_drive.loader import GoogleDriveLoader
data = request.get_json()
session_token = data.get('session_token')
@@ -4329,7 +4329,7 @@ class GoogleDriveValidateSession(Resource):
"""Validate Google Drive session token and return user info"""
try:
from application.core.mongo_db import MongoDB
from application.parser.remote.google_auth import GoogleDriveAuth
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
data = request.get_json()
session_token = data.get('session_token')

View File

@@ -0,0 +1,11 @@
"""
External knowledge base connectors for DocsGPT.
This module contains connectors for external knowledge bases and document storage systems
that require authentication and specialized handling, separate from simple web scrapers.
"""
from .connector_creator import ConnectorCreator
from .google_drive import GoogleDriveAuth, GoogleDriveLoader
__all__ = ['ConnectorCreator', 'GoogleDriveAuth', 'GoogleDriveLoader']

View File

@@ -0,0 +1,57 @@
from application.parser.connectors.google_drive.loader import GoogleDriveLoader
class ConnectorCreator:
"""
Factory class for creating external knowledge base connectors.
These are different from remote loaders as they typically require
authentication and connect to external document storage systems.
"""
connectors = {
"google_drive": GoogleDriveLoader,
}
@classmethod
def create_connector(cls, connector_type, *args, **kwargs):
"""
Create a connector instance for the specified type.
Args:
connector_type: Type of connector to create (e.g., 'google_drive')
*args, **kwargs: Arguments to pass to the connector constructor
Returns:
Connector instance
Raises:
ValueError: If connector type is not supported
"""
connector_class = cls.connectors.get(connector_type.lower())
if not connector_class:
raise ValueError(f"No connector class found for type {connector_type}")
return connector_class(*args, **kwargs)
@classmethod
def get_supported_connectors(cls):
"""
Get list of supported connector types.
Returns:
List of supported connector type strings
"""
return list(cls.connectors.keys())
@classmethod
def is_supported(cls, connector_type):
"""
Check if a connector type is supported.
Args:
connector_type: Type of connector to check
Returns:
True if supported, False otherwise
"""
return connector_type.lower() in cls.connectors

View File

@@ -0,0 +1,10 @@
"""
Google Drive connector for DocsGPT.
This module provides authentication and document loading capabilities for Google Drive.
"""
from .auth import GoogleDriveAuth
from .loader import GoogleDriveLoader
__all__ = ['GoogleDriveAuth', 'GoogleDriveLoader']

View File

@@ -12,7 +12,7 @@ from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.errors import HttpError
from application.parser.remote.base import BaseRemote
from application.parser.remote.google_auth import GoogleDriveAuth
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
from application.parser.schema.base import Document

View File

@@ -3,17 +3,25 @@ from application.parser.remote.crawler_loader import CrawlerLoader
from application.parser.remote.web_loader import WebLoader
from application.parser.remote.reddit_loader import RedditPostsLoaderRemote
from application.parser.remote.github_loader import GitHubLoader
from application.parser.remote.google_drive_loader import GoogleDriveLoader
class RemoteCreator:
"""
Factory class for creating remote content loaders.
These loaders fetch content from remote web sources like URLs,
sitemaps, web crawlers, social media platforms, etc.
For external knowledge base connectors (like Google Drive),
use ConnectorCreator instead.
"""
loaders = {
"url": WebLoader,
"sitemap": SitemapLoader,
"crawler": CrawlerLoader,
"reddit": RedditPostsLoaderRemote,
"github": GitHubLoader,
"google_drive": GoogleDriveLoader,
}
@classmethod

View File

@@ -874,8 +874,8 @@ def ingest_connector(
if not session_token:
raise ValueError("Google Drive connector requires session_token")
from application.parser.remote.google_drive_loader import GoogleDriveLoader
remote_loader = GoogleDriveLoader(session_token)
from application.parser.connectors.connector_creator import ConnectorCreator
remote_loader = ConnectorCreator.create_connector("google_drive", session_token)
# Create a clean config for storage that excludes the session token
api_source_config = {
@@ -895,19 +895,25 @@ def ingest_connector(
}
)
else:
# For other connectors, maintain backward compatibility
source_config = {
"session_token": session_token
}
if file_ids:
source_config["file_ids"] = file_ids
if folder_ids:
source_config["folder_ids"] = folder_ids
source_config["recursive"] = recursive
# For other external knowledge base connectors (future: dropbox, onedrive, etc.)
from application.parser.connectors.connector_creator import ConnectorCreator
remote_loader = RemoteCreator.create_loader(source_type, source_config)
api_source_config = source_config
download_info = remote_loader.download_to_directory(temp_dir)
if not ConnectorCreator.is_supported(source_type):
raise ValueError(f"Unsupported connector type: {source_type}. Supported types: {ConnectorCreator.get_supported_connectors()}")
# Create connector with session token and other parameters
remote_loader = ConnectorCreator.create_connector(source_type, session_token)
api_source_config = {
"file_ids": file_ids or [],
"folder_ids": folder_ids or [],
"recursive": recursive
}
download_info = remote_loader.download_to_directory(
temp_dir,
api_source_config
)
if download_info.get("empty_result", False) or not download_info.get("files_downloaded", 0):
logging.warning(f"No files were downloaded from {source_type}")
@@ -917,7 +923,7 @@ def ingest_connector(
"user": user,
"tokens": 0,
"type": source_type,
"source_config": source_config,
"source_config": api_source_config,
"directory_structure": "{}",
}