mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-30 17:13:15 +00:00
Merge pull request #1937 from ManishMadan2882/main
Connectors: Google Drive Ingestion
This commit is contained in:
@@ -5,19 +5,17 @@ from typing import Dict, Generator, List, Optional
|
||||
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from application.agents.tools.tool_action_parser import ToolActionParser
|
||||
from application.agents.tools.tool_manager import ToolManager
|
||||
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
|
||||
from application.llm.handlers.handler_creator import LLMHandlerCreator
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.logging import build_stack_data, log_activity, LogContext
|
||||
from application.retriever.base import BaseRetriever
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseAgent(ABC):
|
||||
def __init__(
|
||||
@@ -157,7 +155,7 @@ class BaseAgent(ABC):
|
||||
}
|
||||
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
|
||||
self.tool_calls.append(tool_call_data)
|
||||
return f"Failed to parse tool call.", call_id
|
||||
return "Failed to parse tool call.", call_id
|
||||
|
||||
# Check if tool_id exists in available tools
|
||||
if tool_id not in tools_dict:
|
||||
|
||||
627
application/api/connector/routes.py
Normal file
627
application/api/connector/routes.py
Normal file
@@ -0,0 +1,627 @@
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
|
||||
|
||||
from bson.objectid import ObjectId
|
||||
from flask import (
|
||||
Blueprint,
|
||||
current_app,
|
||||
jsonify,
|
||||
make_response,
|
||||
request
|
||||
)
|
||||
from flask_restx import fields, Namespace, Resource
|
||||
|
||||
|
||||
|
||||
|
||||
from application.api.user.tasks import (
|
||||
ingest_connector_task,
|
||||
)
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
from application.api import api
|
||||
|
||||
|
||||
from application.utils import (
|
||||
check_required_fields
|
||||
)
|
||||
|
||||
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
|
||||
|
||||
|
||||
mongo = MongoDB.get_client()
|
||||
db = mongo[settings.MONGO_DB_NAME]
|
||||
sources_collection = db["sources"]
|
||||
sessions_collection = db["connector_sessions"]
|
||||
|
||||
connector = Blueprint("connector", __name__)
|
||||
connectors_ns = Namespace("connectors", description="Connector operations", path="/")
|
||||
api.add_namespace(connectors_ns)
|
||||
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/upload")
|
||||
class UploadConnector(Resource):
|
||||
@api.expect(
|
||||
api.model(
|
||||
"ConnectorUploadModel",
|
||||
{
|
||||
"user": fields.String(required=True, description="User ID"),
|
||||
"source": fields.String(
|
||||
required=True, description="Source type (google_drive, github, etc.)"
|
||||
),
|
||||
"name": fields.String(required=True, description="Job name"),
|
||||
"data": fields.String(required=True, description="Configuration data"),
|
||||
"repo_url": fields.String(description="GitHub repository URL"),
|
||||
},
|
||||
)
|
||||
)
|
||||
@api.doc(
|
||||
description="Uploads connector source for vectorization",
|
||||
)
|
||||
def post(self):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
data = request.form
|
||||
required_fields = ["user", "source", "name", "data"]
|
||||
missing_fields = check_required_fields(data, required_fields)
|
||||
if missing_fields:
|
||||
return missing_fields
|
||||
try:
|
||||
config = json.loads(data["data"])
|
||||
source_data = None
|
||||
sync_frequency = config.get("sync_frequency", "never")
|
||||
|
||||
if data["source"] == "github":
|
||||
source_data = config.get("repo_url")
|
||||
elif data["source"] in ["crawler", "url"]:
|
||||
source_data = config.get("url")
|
||||
elif data["source"] == "reddit":
|
||||
source_data = config
|
||||
elif data["source"] in ConnectorCreator.get_supported_connectors():
|
||||
session_token = config.get("session_token")
|
||||
if not session_token:
|
||||
return make_response(jsonify({
|
||||
"success": False,
|
||||
"error": f"Missing session_token in {data['source']} configuration"
|
||||
}), 400)
|
||||
|
||||
file_ids = config.get("file_ids", [])
|
||||
if isinstance(file_ids, str):
|
||||
file_ids = [id.strip() for id in file_ids.split(',') if id.strip()]
|
||||
elif not isinstance(file_ids, list):
|
||||
file_ids = []
|
||||
|
||||
folder_ids = config.get("folder_ids", [])
|
||||
if isinstance(folder_ids, str):
|
||||
folder_ids = [id.strip() for id in folder_ids.split(',') if id.strip()]
|
||||
elif not isinstance(folder_ids, list):
|
||||
folder_ids = []
|
||||
|
||||
config["file_ids"] = file_ids
|
||||
config["folder_ids"] = folder_ids
|
||||
|
||||
task = ingest_connector_task.delay(
|
||||
job_name=data["name"],
|
||||
user=decoded_token.get("sub"),
|
||||
source_type=data["source"],
|
||||
session_token=session_token,
|
||||
file_ids=file_ids,
|
||||
folder_ids=folder_ids,
|
||||
recursive=config.get("recursive", False),
|
||||
retriever=config.get("retriever", "classic"),
|
||||
sync_frequency=sync_frequency
|
||||
)
|
||||
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
|
||||
task = ingest_connector_task.delay(
|
||||
source_data=source_data,
|
||||
job_name=data["name"],
|
||||
user=decoded_token.get("sub"),
|
||||
loader=data["source"],
|
||||
sync_frequency=sync_frequency
|
||||
)
|
||||
except Exception as err:
|
||||
current_app.logger.error(
|
||||
f"Error uploading connector source: {err}", exc_info=True
|
||||
)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/task_status")
|
||||
class ConnectorTaskStatus(Resource):
|
||||
task_status_model = api.model(
|
||||
"ConnectorTaskStatusModel",
|
||||
{"task_id": fields.String(required=True, description="Task ID")},
|
||||
)
|
||||
|
||||
@api.expect(task_status_model)
|
||||
@api.doc(description="Get connector task status")
|
||||
def get(self):
|
||||
task_id = request.args.get("task_id")
|
||||
if not task_id:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Task ID is required"}), 400
|
||||
)
|
||||
try:
|
||||
from application.celery_init import celery
|
||||
|
||||
task = celery.AsyncResult(task_id)
|
||||
task_meta = task.info
|
||||
print(f"Task status: {task.status}")
|
||||
if not isinstance(
|
||||
task_meta, (dict, list, str, int, float, bool, type(None))
|
||||
):
|
||||
task_meta = str(task_meta)
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error getting task status: {err}", exc_info=True)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
return make_response(jsonify({"status": task.status, "result": task_meta}), 200)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/sources")
|
||||
class ConnectorSources(Resource):
|
||||
@api.doc(description="Get connector sources")
|
||||
def get(self):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
user = decoded_token.get("sub")
|
||||
try:
|
||||
sources = sources_collection.find({"user": user, "type": "connector"}).sort("date", -1)
|
||||
connector_sources = []
|
||||
for source in sources:
|
||||
connector_sources.append({
|
||||
"id": str(source["_id"]),
|
||||
"name": source.get("name"),
|
||||
"date": source.get("date"),
|
||||
"type": source.get("type"),
|
||||
"source": source.get("source"),
|
||||
"tokens": source.get("tokens", ""),
|
||||
"retriever": source.get("retriever", "classic"),
|
||||
"syncFrequency": source.get("sync_frequency", ""),
|
||||
})
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error retrieving connector sources: {err}", exc_info=True)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
return make_response(jsonify(connector_sources), 200)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/delete")
|
||||
class DeleteConnectorSource(Resource):
|
||||
@api.doc(
|
||||
description="Delete a connector source",
|
||||
params={"source_id": "The source ID to delete"},
|
||||
)
|
||||
def delete(self):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
source_id = request.args.get("source_id")
|
||||
if not source_id:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "source_id is required"}), 400
|
||||
)
|
||||
try:
|
||||
result = sources_collection.delete_one(
|
||||
{"_id": ObjectId(source_id), "user": decoded_token.get("sub")}
|
||||
)
|
||||
if result.deleted_count == 0:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Source not found"}), 404
|
||||
)
|
||||
except Exception as err:
|
||||
current_app.logger.error(
|
||||
f"Error deleting connector source: {err}", exc_info=True
|
||||
)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
return make_response(jsonify({"success": True}), 200)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/auth")
|
||||
class ConnectorAuth(Resource):
|
||||
@api.doc(description="Get connector OAuth authorization URL", params={"provider": "Connector provider (e.g., google_drive)"})
|
||||
def get(self):
|
||||
try:
|
||||
provider = request.args.get('provider') or request.args.get('source')
|
||||
if not provider:
|
||||
return make_response(jsonify({"success": False, "error": "Missing provider"}), 400)
|
||||
|
||||
if not ConnectorCreator.is_supported(provider):
|
||||
return make_response(jsonify({"success": False, "error": f"Unsupported provider: {provider}"}), 400)
|
||||
|
||||
import uuid
|
||||
state = str(uuid.uuid4())
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
authorization_url = auth.get_authorization_url(state=state)
|
||||
return make_response(jsonify({
|
||||
"success": True,
|
||||
"authorization_url": authorization_url,
|
||||
"state": state
|
||||
}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error generating connector auth URL: {e}")
|
||||
return make_response(jsonify({"success": False, "error": str(e)}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/callback")
|
||||
class ConnectorsCallback(Resource):
|
||||
@api.doc(description="Handle OAuth callback for external connectors")
|
||||
def get(self):
|
||||
"""Handle OAuth callback for external connectors"""
|
||||
try:
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
from flask import request, redirect
|
||||
import uuid
|
||||
|
||||
provider = request.args.get('provider', 'google_drive')
|
||||
authorization_code = request.args.get('code')
|
||||
_ = request.args.get('state')
|
||||
error = request.args.get('error')
|
||||
|
||||
if error:
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=OAuth+error:+{error}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.&provider={provider}")
|
||||
|
||||
if not authorization_code:
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Authorization+code+not+provided.+Please+complete+the+authorization+process+and+make+sure+to+grant+offline+access.&provider={provider}")
|
||||
|
||||
try:
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
token_info = auth.exchange_code_for_tokens(authorization_code)
|
||||
|
||||
session_token = str(uuid.uuid4())
|
||||
|
||||
|
||||
try:
|
||||
credentials = auth.create_credentials_from_token_info(token_info)
|
||||
service = auth.build_drive_service(credentials)
|
||||
user_info = service.about().get(fields="user").execute()
|
||||
user_email = user_info.get('user', {}).get('emailAddress', 'Connected User')
|
||||
except Exception as e:
|
||||
current_app.logger.warning(f"Could not get user info: {e}")
|
||||
user_email = 'Connected User'
|
||||
|
||||
sanitized_token_info = {
|
||||
"access_token": token_info.get("access_token"),
|
||||
"refresh_token": token_info.get("refresh_token"),
|
||||
"token_uri": token_info.get("token_uri"),
|
||||
"expiry": token_info.get("expiry"),
|
||||
"scopes": token_info.get("scopes")
|
||||
}
|
||||
|
||||
user_id = request.decoded_token.get("sub") if getattr(request, "decoded_token", None) else None
|
||||
sessions_collection.insert_one({
|
||||
"session_token": session_token,
|
||||
"user": user_id,
|
||||
"token_info": sanitized_token_info,
|
||||
"created_at": datetime.datetime.now(datetime.timezone.utc),
|
||||
"user_email": user_email,
|
||||
"provider": provider
|
||||
})
|
||||
|
||||
# Redirect to success page with session token and user email
|
||||
return redirect(f"/api/connectors/callback-status?status=success&message=Authentication+successful&provider={provider}&session_token={session_token}&user_email={user_email}")
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error exchanging code for tokens: {str(e)}", exc_info=True)
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+exchange+authorization+code+for+tokens:+{str(e)}&provider={provider}")
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error handling connector callback: {e}")
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+complete+connector+authentication:+{str(e)}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.")
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/refresh")
|
||||
class ConnectorRefresh(Resource):
|
||||
@api.expect(api.model("ConnectorRefreshModel", {"provider": fields.String(required=True), "refresh_token": fields.String(required=True)}))
|
||||
@api.doc(description="Refresh connector access token")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
provider = data.get('provider')
|
||||
refresh_token = data.get('refresh_token')
|
||||
|
||||
if not provider or not refresh_token:
|
||||
return make_response(jsonify({"success": False, "error": "provider and refresh_token are required"}), 400)
|
||||
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
token_info = auth.refresh_access_token(refresh_token)
|
||||
return make_response(jsonify({"success": True, "token_info": token_info}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error refreshing token for connector: {e}")
|
||||
return make_response(jsonify({"success": False, "error": str(e)}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/files")
|
||||
class ConnectorFiles(Resource):
|
||||
@api.expect(api.model("ConnectorFilesModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True), "folder_id": fields.String(required=False), "limit": fields.Integer(required=False), "page_token": fields.String(required=False)}))
|
||||
@api.doc(description="List files from a connector provider (supports pagination)")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
provider = data.get('provider')
|
||||
session_token = data.get('session_token')
|
||||
folder_id = data.get('folder_id')
|
||||
limit = data.get('limit', 10)
|
||||
page_token = data.get('page_token')
|
||||
if not provider or not session_token:
|
||||
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
|
||||
|
||||
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
|
||||
user = decoded_token.get('sub')
|
||||
session = sessions_collection.find_one({"session_token": session_token, "user": user})
|
||||
if not session:
|
||||
return make_response(jsonify({"success": False, "error": "Invalid or unauthorized session"}), 401)
|
||||
|
||||
loader = ConnectorCreator.create_connector(provider, session_token)
|
||||
documents = loader.load_data({
|
||||
'limit': limit,
|
||||
'list_only': True,
|
||||
'session_token': session_token,
|
||||
'folder_id': folder_id,
|
||||
'page_token': page_token
|
||||
})
|
||||
|
||||
files = []
|
||||
for doc in documents[:limit]:
|
||||
metadata = doc.extra_info
|
||||
modified_time = metadata.get('modified_time')
|
||||
if modified_time:
|
||||
date_part = modified_time.split('T')[0]
|
||||
time_part = modified_time.split('T')[1].split('.')[0].split('Z')[0]
|
||||
formatted_time = f"{date_part} {time_part}"
|
||||
else:
|
||||
formatted_time = None
|
||||
|
||||
files.append({
|
||||
'id': doc.doc_id,
|
||||
'name': metadata.get('file_name', 'Unknown File'),
|
||||
'type': metadata.get('mime_type', 'unknown'),
|
||||
'size': metadata.get('size', None),
|
||||
'modifiedTime': formatted_time
|
||||
})
|
||||
|
||||
next_token = getattr(loader, 'next_page_token', None)
|
||||
has_more = bool(next_token)
|
||||
|
||||
return make_response(jsonify({"success": True, "files": files, "total": len(files), "next_page_token": next_token, "has_more": has_more}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error loading connector files: {e}")
|
||||
return make_response(jsonify({"success": False, "error": f"Failed to load files: {str(e)}"}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/validate-session")
|
||||
class ConnectorValidateSession(Resource):
|
||||
@api.expect(api.model("ConnectorValidateSessionModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True)}))
|
||||
@api.doc(description="Validate connector session token and return user info")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
provider = data.get('provider')
|
||||
session_token = data.get('session_token')
|
||||
if not provider or not session_token:
|
||||
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
|
||||
|
||||
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
|
||||
user = decoded_token.get('sub')
|
||||
|
||||
session = sessions_collection.find_one({"session_token": session_token, "user": user})
|
||||
if not session or "token_info" not in session:
|
||||
return make_response(jsonify({"success": False, "error": "Invalid or expired session"}), 401)
|
||||
|
||||
token_info = session["token_info"]
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
is_expired = auth.is_token_expired(token_info)
|
||||
|
||||
return make_response(jsonify({
|
||||
"success": True,
|
||||
"expired": is_expired,
|
||||
"user_email": session.get('user_email', 'Connected User')
|
||||
}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error validating connector session: {e}")
|
||||
return make_response(jsonify({"success": False, "error": str(e)}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/disconnect")
|
||||
class ConnectorDisconnect(Resource):
|
||||
@api.expect(api.model("ConnectorDisconnectModel", {"provider": fields.String(required=True), "session_token": fields.String(required=False)}))
|
||||
@api.doc(description="Disconnect a connector session")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
provider = data.get('provider')
|
||||
session_token = data.get('session_token')
|
||||
if not provider:
|
||||
return make_response(jsonify({"success": False, "error": "provider is required"}), 400)
|
||||
|
||||
|
||||
if session_token:
|
||||
sessions_collection.delete_one({"session_token": session_token})
|
||||
|
||||
return make_response(jsonify({"success": True}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error disconnecting connector session: {e}")
|
||||
return make_response(jsonify({"success": False, "error": str(e)}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/sync")
|
||||
class ConnectorSync(Resource):
|
||||
@api.expect(
|
||||
api.model(
|
||||
"ConnectorSyncModel",
|
||||
{
|
||||
"source_id": fields.String(required=True, description="Source ID to sync"),
|
||||
"session_token": fields.String(required=True, description="Authentication token")
|
||||
},
|
||||
)
|
||||
)
|
||||
@api.doc(description="Sync connector source to check for modifications")
|
||||
def post(self):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
|
||||
try:
|
||||
data = request.get_json()
|
||||
source_id = data.get('source_id')
|
||||
session_token = data.get('session_token')
|
||||
|
||||
if not all([source_id, session_token]):
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": "source_id and session_token are required"
|
||||
}),
|
||||
400
|
||||
)
|
||||
source = sources_collection.find_one({"_id": ObjectId(source_id)})
|
||||
if not source:
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": "Source not found"
|
||||
}),
|
||||
404
|
||||
)
|
||||
|
||||
if source.get('user') != decoded_token.get('sub'):
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": "Unauthorized access to source"
|
||||
}),
|
||||
403
|
||||
)
|
||||
|
||||
remote_data = {}
|
||||
try:
|
||||
if source.get('remote_data'):
|
||||
remote_data = json.loads(source.get('remote_data'))
|
||||
except json.JSONDecodeError:
|
||||
current_app.logger.error(f"Invalid remote_data format for source {source_id}")
|
||||
remote_data = {}
|
||||
|
||||
source_type = remote_data.get('provider')
|
||||
if not source_type:
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": "Source provider not found in remote_data"
|
||||
}),
|
||||
400
|
||||
)
|
||||
|
||||
# Extract configuration from remote_data
|
||||
file_ids = remote_data.get('file_ids', [])
|
||||
folder_ids = remote_data.get('folder_ids', [])
|
||||
recursive = remote_data.get('recursive', True)
|
||||
|
||||
# Start the sync task
|
||||
task = ingest_connector_task.delay(
|
||||
job_name=source.get('name'),
|
||||
user=decoded_token.get('sub'),
|
||||
source_type=source_type,
|
||||
session_token=session_token,
|
||||
file_ids=file_ids,
|
||||
folder_ids=folder_ids,
|
||||
recursive=recursive,
|
||||
retriever=source.get('retriever', 'classic'),
|
||||
operation_mode="sync",
|
||||
doc_id=source_id,
|
||||
sync_frequency=source.get('sync_frequency', 'never')
|
||||
)
|
||||
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": True,
|
||||
"task_id": task.id
|
||||
}),
|
||||
200
|
||||
)
|
||||
|
||||
except Exception as err:
|
||||
current_app.logger.error(
|
||||
f"Error syncing connector source: {err}",
|
||||
exc_info=True
|
||||
)
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": str(err)
|
||||
}),
|
||||
400
|
||||
)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/callback-status")
|
||||
class ConnectorCallbackStatus(Resource):
|
||||
@api.doc(description="Return HTML page with connector authentication status")
|
||||
def get(self):
|
||||
"""Return HTML page with connector authentication status"""
|
||||
try:
|
||||
status = request.args.get('status', 'error')
|
||||
message = request.args.get('message', '')
|
||||
provider = request.args.get('provider', 'connector')
|
||||
session_token = request.args.get('session_token', '')
|
||||
user_email = request.args.get('user_email', '')
|
||||
|
||||
html_content = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>{provider.replace('_', ' ').title()} Authentication</title>
|
||||
<style>
|
||||
body {{ font-family: Arial, sans-serif; text-align: center; padding: 40px; }}
|
||||
.container {{ max-width: 600px; margin: 0 auto; }}
|
||||
.success {{ color: #4CAF50; }}
|
||||
.error {{ color: #F44336; }}
|
||||
</style>
|
||||
<script>
|
||||
window.onload = function() {{
|
||||
const status = "{status}";
|
||||
const sessionToken = "{session_token}";
|
||||
const userEmail = "{user_email}";
|
||||
|
||||
if (status === "success" && window.opener) {{
|
||||
window.opener.postMessage({{
|
||||
type: '{provider}_auth_success',
|
||||
session_token: sessionToken,
|
||||
user_email: userEmail
|
||||
}}, '*');
|
||||
|
||||
setTimeout(() => window.close(), 3000);
|
||||
}}
|
||||
}};
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h2>{provider.replace('_', ' ').title()} Authentication</h2>
|
||||
<div class="{status}">
|
||||
<p>{message}</p>
|
||||
{f'<p>Connected as: {user_email}</p>' if status == 'success' else ''}
|
||||
</div>
|
||||
<p><small>You can close this window. {f"Your {provider.replace('_', ' ').title()} is now connected and ready to use." if status == 'success' else ''}</small></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
return make_response(html_content, 200, {'Content-Type': 'text/html'})
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error rendering callback status page: {e}")
|
||||
return make_response("Authentication error occurred", 500, {'Content-Type': 'text/html'})
|
||||
|
||||
|
||||
@@ -28,6 +28,7 @@ from application.agents.tools.tool_manager import ToolManager
|
||||
|
||||
from application.api.user.tasks import (
|
||||
ingest,
|
||||
ingest_connector_task,
|
||||
ingest_remote,
|
||||
process_agent_webhook,
|
||||
store_attachment,
|
||||
@@ -46,6 +47,7 @@ from application.utils import (
|
||||
)
|
||||
from application.utils import num_tokens_from_string
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
|
||||
storage = StorageCreator.get_storage()
|
||||
|
||||
@@ -492,9 +494,9 @@ class DeleteOldIndexes(Resource):
|
||||
)
|
||||
if not doc:
|
||||
return make_response(jsonify({"status": "not found"}), 404)
|
||||
|
||||
|
||||
storage = StorageCreator.get_storage()
|
||||
|
||||
|
||||
try:
|
||||
# Delete vector index
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
@@ -508,7 +510,7 @@ class DeleteOldIndexes(Resource):
|
||||
settings.VECTOR_STORE, source_id=str(doc["_id"])
|
||||
)
|
||||
vectorstore.delete_index()
|
||||
|
||||
|
||||
if "file_path" in doc and doc["file_path"]:
|
||||
file_path = doc["file_path"]
|
||||
if storage.is_directory(file_path):
|
||||
@@ -517,7 +519,7 @@ class DeleteOldIndexes(Resource):
|
||||
storage.delete_file(f)
|
||||
else:
|
||||
storage.delete_file(file_path)
|
||||
|
||||
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except Exception as err:
|
||||
@@ -525,7 +527,7 @@ class DeleteOldIndexes(Resource):
|
||||
f"Error deleting files and indexes: {err}", exc_info=True
|
||||
)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
|
||||
|
||||
sources_collection.delete_one({"_id": ObjectId(source_id)})
|
||||
return make_response(jsonify({"success": True}), 200)
|
||||
|
||||
@@ -573,30 +575,30 @@ class UploadFile(Resource):
|
||||
|
||||
try:
|
||||
storage = StorageCreator.get_storage()
|
||||
|
||||
|
||||
|
||||
|
||||
for file in files:
|
||||
original_filename = file.filename
|
||||
safe_file = safe_filename(original_filename)
|
||||
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
temp_file_path = os.path.join(temp_dir, safe_file)
|
||||
file.save(temp_file_path)
|
||||
|
||||
|
||||
if zipfile.is_zipfile(temp_file_path):
|
||||
try:
|
||||
with zipfile.ZipFile(temp_file_path, 'r') as zip_ref:
|
||||
zip_ref.extractall(path=temp_dir)
|
||||
|
||||
|
||||
# Walk through extracted files and upload them
|
||||
for root, _, files in os.walk(temp_dir):
|
||||
for extracted_file in files:
|
||||
if os.path.join(root, extracted_file) == temp_file_path:
|
||||
continue
|
||||
|
||||
|
||||
rel_path = os.path.relpath(os.path.join(root, extracted_file), temp_dir)
|
||||
storage_path = f"{base_path}/{rel_path}"
|
||||
|
||||
|
||||
with open(os.path.join(root, extracted_file), 'rb') as f:
|
||||
storage.save_file(f, storage_path)
|
||||
except Exception as e:
|
||||
@@ -610,7 +612,7 @@ class UploadFile(Resource):
|
||||
file_path = f"{base_path}/{safe_file}"
|
||||
with open(temp_file_path, 'rb') as f:
|
||||
storage.save_file(f, file_path)
|
||||
|
||||
|
||||
task = ingest.delay(
|
||||
settings.UPLOAD_FOLDER,
|
||||
[
|
||||
@@ -686,8 +688,8 @@ class ManageSourceFiles(Resource):
|
||||
try:
|
||||
storage = StorageCreator.get_storage()
|
||||
source_file_path = source.get("file_path", "")
|
||||
parent_dir = request.form.get("parent_dir", "")
|
||||
|
||||
parent_dir = request.form.get("parent_dir", "")
|
||||
|
||||
if parent_dir and (parent_dir.startswith("/") or ".." in parent_dir):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Invalid parent directory path"}), 400
|
||||
@@ -701,7 +703,7 @@ class ManageSourceFiles(Resource):
|
||||
)
|
||||
|
||||
added_files = []
|
||||
|
||||
|
||||
target_dir = source_file_path
|
||||
if parent_dir:
|
||||
target_dir = f"{source_file_path}/{parent_dir}"
|
||||
@@ -877,6 +879,42 @@ class UploadRemote(Resource):
|
||||
source_data = config.get("url")
|
||||
elif data["source"] == "reddit":
|
||||
source_data = config
|
||||
elif data["source"] in ConnectorCreator.get_supported_connectors():
|
||||
session_token = config.get("session_token")
|
||||
if not session_token:
|
||||
return make_response(jsonify({
|
||||
"success": False,
|
||||
"error": f"Missing session_token in {data['source']} configuration"
|
||||
}), 400)
|
||||
|
||||
# Process file_ids
|
||||
file_ids = config.get("file_ids", [])
|
||||
if isinstance(file_ids, str):
|
||||
file_ids = [id.strip() for id in file_ids.split(',') if id.strip()]
|
||||
elif not isinstance(file_ids, list):
|
||||
file_ids = []
|
||||
|
||||
# Process folder_ids
|
||||
folder_ids = config.get("folder_ids", [])
|
||||
if isinstance(folder_ids, str):
|
||||
folder_ids = [id.strip() for id in folder_ids.split(',') if id.strip()]
|
||||
elif not isinstance(folder_ids, list):
|
||||
folder_ids = []
|
||||
|
||||
config["file_ids"] = file_ids
|
||||
config["folder_ids"] = folder_ids
|
||||
|
||||
task = ingest_connector_task.delay(
|
||||
job_name=data["name"],
|
||||
user=decoded_token.get("sub"),
|
||||
source_type=data["source"],
|
||||
session_token=session_token,
|
||||
file_ids=file_ids,
|
||||
folder_ids=folder_ids,
|
||||
recursive=config.get("recursive", False),
|
||||
retriever=config.get("retriever", "classic")
|
||||
)
|
||||
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
|
||||
task = ingest_remote.delay(
|
||||
source_data=source_data,
|
||||
job_name=data["name"],
|
||||
@@ -984,7 +1022,8 @@ class PaginatedSources(Resource):
|
||||
"tokens": doc.get("tokens", ""),
|
||||
"retriever": doc.get("retriever", "classic"),
|
||||
"syncFrequency": doc.get("sync_frequency", ""),
|
||||
"isNested": bool(doc.get("directory_structure"))
|
||||
"isNested": bool(doc.get("directory_structure")),
|
||||
"type": doc.get("type", "file")
|
||||
}
|
||||
paginated_docs.append(doc_data)
|
||||
response = {
|
||||
@@ -1032,7 +1071,8 @@ class CombinedJson(Resource):
|
||||
"tokens": index.get("tokens", ""),
|
||||
"retriever": index.get("retriever", "classic"),
|
||||
"syncFrequency": index.get("sync_frequency", ""),
|
||||
"is_nested": bool(index.get("directory_structure"))
|
||||
"is_nested": bool(index.get("directory_structure")),
|
||||
"type": index.get("type", "file") # Add type field with default "file"
|
||||
}
|
||||
)
|
||||
except Exception as err:
|
||||
@@ -1407,7 +1447,7 @@ class CreateAgent(Resource):
|
||||
except json.JSONDecodeError:
|
||||
data["json_schema"] = None
|
||||
print(f"Received data: {data}")
|
||||
|
||||
|
||||
# Validate JSON schema if provided
|
||||
if data.get("json_schema"):
|
||||
try:
|
||||
@@ -1415,19 +1455,19 @@ class CreateAgent(Resource):
|
||||
json_schema = data.get("json_schema")
|
||||
if not isinstance(json_schema, dict):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "JSON schema must be a valid JSON object"}),
|
||||
jsonify({"success": False, "message": "JSON schema must be a valid JSON object"}),
|
||||
400
|
||||
)
|
||||
|
||||
|
||||
# Validate that it has either a 'schema' property or is itself a schema
|
||||
if "schema" not in json_schema and "type" not in json_schema:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "JSON schema must contain either a 'schema' property or be a valid JSON schema with 'type' property"}),
|
||||
jsonify({"success": False, "message": "JSON schema must contain either a 'schema' property or be a valid JSON schema with 'type' property"}),
|
||||
400
|
||||
)
|
||||
except Exception as e:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": f"Invalid JSON schema: {str(e)}"}),
|
||||
jsonify({"success": False, "message": f"Invalid JSON schema: {str(e)}"}),
|
||||
400
|
||||
)
|
||||
|
||||
@@ -3561,7 +3601,7 @@ class GetChunks(Resource):
|
||||
try:
|
||||
store = get_vector_store(doc_id)
|
||||
chunks = store.get_chunks()
|
||||
|
||||
|
||||
filtered_chunks = []
|
||||
for chunk in chunks:
|
||||
metadata = chunk.get("metadata", {})
|
||||
@@ -3582,9 +3622,9 @@ class GetChunks(Resource):
|
||||
continue
|
||||
|
||||
filtered_chunks.append(chunk)
|
||||
|
||||
|
||||
chunks = filtered_chunks
|
||||
|
||||
|
||||
total_chunks = len(chunks)
|
||||
start = (page - 1) * per_page
|
||||
end = start + per_page
|
||||
@@ -3905,35 +3945,47 @@ class DirectoryStructure(Resource):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
|
||||
|
||||
user = decoded_token.get("sub")
|
||||
doc_id = request.args.get("id")
|
||||
|
||||
|
||||
if not doc_id:
|
||||
return make_response(
|
||||
jsonify({"error": "Document ID is required"}), 400
|
||||
)
|
||||
|
||||
|
||||
if not ObjectId.is_valid(doc_id):
|
||||
return make_response(jsonify({"error": "Invalid document ID"}), 400)
|
||||
|
||||
|
||||
try:
|
||||
doc = sources_collection.find_one({"_id": ObjectId(doc_id), "user": user})
|
||||
if not doc:
|
||||
return make_response(
|
||||
jsonify({"error": "Document not found or access denied"}), 404
|
||||
)
|
||||
|
||||
|
||||
directory_structure = doc.get("directory_structure", {})
|
||||
|
||||
base_path = doc.get("file_path", "")
|
||||
|
||||
provider = None
|
||||
remote_data = doc.get("remote_data")
|
||||
try:
|
||||
if isinstance(remote_data, str) and remote_data:
|
||||
remote_data_obj = json.loads(remote_data)
|
||||
provider = remote_data_obj.get("provider")
|
||||
except Exception as e:
|
||||
current_app.logger.warning(
|
||||
f"Failed to parse remote_data for doc {doc_id}: {e}")
|
||||
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": True,
|
||||
"directory_structure": directory_structure,
|
||||
"base_path": doc.get("file_path", "")
|
||||
"base_path": base_path,
|
||||
"provider": provider,
|
||||
}), 200
|
||||
)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(
|
||||
f"Error retrieving directory structure: {e}", exc_info=True
|
||||
@@ -3941,3 +3993,6 @@ class DirectoryStructure(Resource):
|
||||
return make_response(
|
||||
jsonify({"success": False, "error": str(e)}), 500
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -47,6 +47,39 @@ def process_agent_webhook(self, agent_id, payload):
|
||||
return resp
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest_connector_task(
|
||||
self,
|
||||
job_name,
|
||||
user,
|
||||
source_type,
|
||||
session_token=None,
|
||||
file_ids=None,
|
||||
folder_ids=None,
|
||||
recursive=True,
|
||||
retriever="classic",
|
||||
operation_mode="upload",
|
||||
doc_id=None,
|
||||
sync_frequency="never"
|
||||
):
|
||||
from application.worker import ingest_connector
|
||||
resp = ingest_connector(
|
||||
self,
|
||||
job_name,
|
||||
user,
|
||||
source_type,
|
||||
session_token=session_token,
|
||||
file_ids=file_ids,
|
||||
folder_ids=folder_ids,
|
||||
recursive=recursive,
|
||||
retriever=retriever,
|
||||
operation_mode=operation_mode,
|
||||
doc_id=doc_id,
|
||||
sync_frequency=sync_frequency
|
||||
)
|
||||
return resp
|
||||
|
||||
|
||||
@celery.on_after_configure.connect
|
||||
def setup_periodic_tasks(sender, **kwargs):
|
||||
sender.add_periodic_task(
|
||||
|
||||
@@ -16,6 +16,7 @@ from application.api import api # noqa: E402
|
||||
from application.api.answer import answer # noqa: E402
|
||||
from application.api.internal.routes import internal # noqa: E402
|
||||
from application.api.user.routes import user # noqa: E402
|
||||
from application.api.connector.routes import connector # noqa: E402
|
||||
from application.celery_init import celery # noqa: E402
|
||||
from application.core.settings import settings # noqa: E402
|
||||
|
||||
@@ -30,6 +31,7 @@ app = Flask(__name__)
|
||||
app.register_blueprint(user)
|
||||
app.register_blueprint(answer)
|
||||
app.register_blueprint(internal)
|
||||
app.register_blueprint(connector)
|
||||
app.config.update(
|
||||
UPLOAD_FOLDER="inputs",
|
||||
CELERY_BROKER_URL=settings.CELERY_BROKER_URL,
|
||||
|
||||
@@ -40,6 +40,13 @@ class Settings(BaseSettings):
|
||||
FALLBACK_LLM_NAME: Optional[str] = None # model name for fallback llm
|
||||
FALLBACK_LLM_API_KEY: Optional[str] = None # api key for fallback llm
|
||||
|
||||
# Google Drive integration
|
||||
GOOGLE_CLIENT_ID: Optional[str] = None # Replace with your actual Google OAuth client ID
|
||||
GOOGLE_CLIENT_SECRET: Optional[str] = None# Replace with your actual Google OAuth client secret
|
||||
CONNECTOR_REDIRECT_BASE_URI: Optional[str] = "http://127.0.0.1:7091/api/connectors/callback"
|
||||
##append ?provider={provider_name} in your Provider console like http://127.0.0.1:7091/api/connectors/callback?provider=google_drive
|
||||
|
||||
|
||||
# LLM Cache
|
||||
CACHE_REDIS_URL: str = "redis://localhost:6379/2"
|
||||
|
||||
|
||||
18
application/parser/connectors/__init__.py
Normal file
18
application/parser/connectors/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""
|
||||
External knowledge base connectors for DocsGPT.
|
||||
|
||||
This module contains connectors for external knowledge bases and document storage systems
|
||||
that require authentication and specialized handling, separate from simple web scrapers.
|
||||
"""
|
||||
|
||||
from .base import BaseConnectorAuth, BaseConnectorLoader
|
||||
from .connector_creator import ConnectorCreator
|
||||
from .google_drive import GoogleDriveAuth, GoogleDriveLoader
|
||||
|
||||
__all__ = [
|
||||
'BaseConnectorAuth',
|
||||
'BaseConnectorLoader',
|
||||
'ConnectorCreator',
|
||||
'GoogleDriveAuth',
|
||||
'GoogleDriveLoader'
|
||||
]
|
||||
129
application/parser/connectors/base.py
Normal file
129
application/parser/connectors/base.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
Base classes for external knowledge base connectors.
|
||||
|
||||
This module provides minimal abstract base classes that define the essential
|
||||
interface for external knowledge base connectors.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
class BaseConnectorAuth(ABC):
|
||||
"""
|
||||
Abstract base class for connector authentication.
|
||||
|
||||
Defines the minimal interface that all connector authentication
|
||||
implementations must follow.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_authorization_url(self, state: Optional[str] = None) -> str:
|
||||
"""
|
||||
Generate authorization URL for OAuth flows.
|
||||
|
||||
Args:
|
||||
state: Optional state parameter for CSRF protection
|
||||
|
||||
Returns:
|
||||
Authorization URL
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Exchange authorization code for access tokens.
|
||||
|
||||
Args:
|
||||
authorization_code: Authorization code from OAuth callback
|
||||
|
||||
Returns:
|
||||
Dictionary containing token information
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Refresh an expired access token.
|
||||
|
||||
Args:
|
||||
refresh_token: Refresh token
|
||||
|
||||
Returns:
|
||||
Dictionary containing refreshed token information
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def is_token_expired(self, token_info: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Check if a token is expired.
|
||||
|
||||
Args:
|
||||
token_info: Token information dictionary
|
||||
|
||||
Returns:
|
||||
True if token is expired, False otherwise
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class BaseConnectorLoader(ABC):
|
||||
"""
|
||||
Abstract base class for connector loaders.
|
||||
|
||||
Defines the minimal interface that all connector loader
|
||||
implementations must follow.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, session_token: str):
|
||||
"""
|
||||
Initialize the connector loader.
|
||||
|
||||
Args:
|
||||
session_token: Authentication session token
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def load_data(self, inputs: Dict[str, Any]) -> List[Document]:
|
||||
"""
|
||||
Load documents from the external knowledge base.
|
||||
|
||||
Args:
|
||||
inputs: Configuration dictionary containing:
|
||||
- file_ids: Optional list of specific file IDs to load
|
||||
- folder_ids: Optional list of folder IDs to browse/download
|
||||
- limit: Maximum number of items to return
|
||||
- list_only: If True, return metadata without content
|
||||
- recursive: Whether to recursively process folders
|
||||
|
||||
Returns:
|
||||
List of Document objects
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def download_to_directory(self, local_dir: str, source_config: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Download files/folders to a local directory.
|
||||
|
||||
Args:
|
||||
local_dir: Local directory path to download files to
|
||||
source_config: Configuration for what to download
|
||||
|
||||
Returns:
|
||||
Dictionary containing download results:
|
||||
- files_downloaded: Number of files downloaded
|
||||
- directory_path: Path where files were downloaded
|
||||
- empty_result: Whether no files were downloaded
|
||||
- source_type: Type of connector
|
||||
- config_used: Configuration that was used
|
||||
- error: Error message if download failed (optional)
|
||||
"""
|
||||
pass
|
||||
81
application/parser/connectors/connector_creator.py
Normal file
81
application/parser/connectors/connector_creator.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from application.parser.connectors.google_drive.loader import GoogleDriveLoader
|
||||
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
|
||||
|
||||
|
||||
class ConnectorCreator:
|
||||
"""
|
||||
Factory class for creating external knowledge base connectors and auth providers.
|
||||
|
||||
These are different from remote loaders as they typically require
|
||||
authentication and connect to external document storage systems.
|
||||
"""
|
||||
|
||||
connectors = {
|
||||
"google_drive": GoogleDriveLoader,
|
||||
}
|
||||
|
||||
auth_providers = {
|
||||
"google_drive": GoogleDriveAuth,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create_connector(cls, connector_type, *args, **kwargs):
|
||||
"""
|
||||
Create a connector instance for the specified type.
|
||||
|
||||
Args:
|
||||
connector_type: Type of connector to create (e.g., 'google_drive')
|
||||
*args, **kwargs: Arguments to pass to the connector constructor
|
||||
|
||||
Returns:
|
||||
Connector instance
|
||||
|
||||
Raises:
|
||||
ValueError: If connector type is not supported
|
||||
"""
|
||||
connector_class = cls.connectors.get(connector_type.lower())
|
||||
if not connector_class:
|
||||
raise ValueError(f"No connector class found for type {connector_type}")
|
||||
return connector_class(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_auth(cls, connector_type):
|
||||
"""
|
||||
Create an auth provider instance for the specified connector type.
|
||||
|
||||
Args:
|
||||
connector_type: Type of connector auth to create (e.g., 'google_drive')
|
||||
|
||||
Returns:
|
||||
Auth provider instance
|
||||
|
||||
Raises:
|
||||
ValueError: If connector type is not supported for auth
|
||||
"""
|
||||
auth_class = cls.auth_providers.get(connector_type.lower())
|
||||
if not auth_class:
|
||||
raise ValueError(f"No auth class found for type {connector_type}")
|
||||
return auth_class()
|
||||
|
||||
@classmethod
|
||||
def get_supported_connectors(cls):
|
||||
"""
|
||||
Get list of supported connector types.
|
||||
|
||||
Returns:
|
||||
List of supported connector type strings
|
||||
"""
|
||||
return list(cls.connectors.keys())
|
||||
|
||||
@classmethod
|
||||
def is_supported(cls, connector_type):
|
||||
"""
|
||||
Check if a connector type is supported.
|
||||
|
||||
Args:
|
||||
connector_type: Type of connector to check
|
||||
|
||||
Returns:
|
||||
True if supported, False otherwise
|
||||
"""
|
||||
return connector_type.lower() in cls.connectors
|
||||
10
application/parser/connectors/google_drive/__init__.py
Normal file
10
application/parser/connectors/google_drive/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
Google Drive connector for DocsGPT.
|
||||
|
||||
This module provides authentication and document loading capabilities for Google Drive.
|
||||
"""
|
||||
|
||||
from .auth import GoogleDriveAuth
|
||||
from .loader import GoogleDriveLoader
|
||||
|
||||
__all__ = ['GoogleDriveAuth', 'GoogleDriveLoader']
|
||||
268
application/parser/connectors/google_drive/auth.py
Normal file
268
application/parser/connectors/google_drive/auth.py
Normal file
@@ -0,0 +1,268 @@
|
||||
import logging
|
||||
import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from google.oauth2.credentials import Credentials
|
||||
from google_auth_oauthlib.flow import Flow
|
||||
from googleapiclient.discovery import build
|
||||
from googleapiclient.errors import HttpError
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.parser.connectors.base import BaseConnectorAuth
|
||||
|
||||
|
||||
class GoogleDriveAuth(BaseConnectorAuth):
|
||||
"""
|
||||
Handles Google OAuth 2.0 authentication for Google Drive access.
|
||||
"""
|
||||
|
||||
SCOPES = [
|
||||
'https://www.googleapis.com/auth/drive.readonly',
|
||||
'https://www.googleapis.com/auth/drive.metadata.readonly'
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self.client_id = settings.GOOGLE_CLIENT_ID
|
||||
self.client_secret = settings.GOOGLE_CLIENT_SECRET
|
||||
self.redirect_uri = f"{settings.CONNECTOR_REDIRECT_BASE_URI}?provider=google_drive"
|
||||
|
||||
if not self.client_id or not self.client_secret:
|
||||
raise ValueError("Google OAuth credentials not configured. Please set GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET in settings.")
|
||||
|
||||
|
||||
|
||||
def get_authorization_url(self, state: Optional[str] = None) -> str:
|
||||
try:
|
||||
flow = Flow.from_client_config(
|
||||
{
|
||||
"web": {
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"redirect_uris": [self.redirect_uri]
|
||||
}
|
||||
},
|
||||
scopes=self.SCOPES
|
||||
)
|
||||
flow.redirect_uri = self.redirect_uri
|
||||
|
||||
authorization_url, _ = flow.authorization_url(
|
||||
access_type='offline',
|
||||
prompt='consent',
|
||||
include_granted_scopes='true',
|
||||
state=state
|
||||
)
|
||||
|
||||
return authorization_url
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error generating authorization URL: {e}")
|
||||
raise
|
||||
|
||||
def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
|
||||
try:
|
||||
if not authorization_code:
|
||||
raise ValueError("Authorization code is required")
|
||||
|
||||
flow = Flow.from_client_config(
|
||||
{
|
||||
"web": {
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"redirect_uris": [self.redirect_uri]
|
||||
}
|
||||
},
|
||||
scopes=self.SCOPES
|
||||
)
|
||||
flow.redirect_uri = self.redirect_uri
|
||||
|
||||
flow.fetch_token(code=authorization_code)
|
||||
|
||||
credentials = flow.credentials
|
||||
|
||||
if not credentials.refresh_token:
|
||||
logging.warning("OAuth flow did not return a refresh_token.")
|
||||
if not credentials.token:
|
||||
raise ValueError("OAuth flow did not return an access token")
|
||||
|
||||
if not credentials.token_uri:
|
||||
credentials.token_uri = "https://oauth2.googleapis.com/token"
|
||||
|
||||
if not credentials.client_id:
|
||||
credentials.client_id = self.client_id
|
||||
|
||||
if not credentials.client_secret:
|
||||
credentials.client_secret = self.client_secret
|
||||
|
||||
if not credentials.refresh_token:
|
||||
raise ValueError(
|
||||
"No refresh token received. This typically happens when offline access wasn't granted. "
|
||||
)
|
||||
|
||||
return {
|
||||
'access_token': credentials.token,
|
||||
'refresh_token': credentials.refresh_token,
|
||||
'token_uri': credentials.token_uri,
|
||||
'client_id': credentials.client_id,
|
||||
'client_secret': credentials.client_secret,
|
||||
'scopes': credentials.scopes,
|
||||
'expiry': credentials.expiry.isoformat() if credentials.expiry else None
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error exchanging code for tokens: {e}")
|
||||
raise
|
||||
|
||||
def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
|
||||
try:
|
||||
if not refresh_token:
|
||||
raise ValueError("Refresh token is required")
|
||||
|
||||
credentials = Credentials(
|
||||
token=None,
|
||||
refresh_token=refresh_token,
|
||||
token_uri="https://oauth2.googleapis.com/token",
|
||||
client_id=self.client_id,
|
||||
client_secret=self.client_secret
|
||||
)
|
||||
|
||||
from google.auth.transport.requests import Request
|
||||
credentials.refresh(Request())
|
||||
|
||||
return {
|
||||
'access_token': credentials.token,
|
||||
'refresh_token': refresh_token,
|
||||
'token_uri': credentials.token_uri,
|
||||
'client_id': credentials.client_id,
|
||||
'client_secret': credentials.client_secret,
|
||||
'scopes': credentials.scopes,
|
||||
'expiry': credentials.expiry.isoformat() if credentials.expiry else None
|
||||
}
|
||||
except Exception as e:
|
||||
logging.error(f"Error refreshing access token: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def create_credentials_from_token_info(self, token_info: Dict[str, Any]) -> Credentials:
|
||||
from application.core.settings import settings
|
||||
|
||||
access_token = token_info.get('access_token')
|
||||
if not access_token:
|
||||
raise ValueError("No access token found in token_info")
|
||||
|
||||
credentials = Credentials(
|
||||
token=access_token,
|
||||
refresh_token=token_info.get('refresh_token'),
|
||||
token_uri= 'https://oauth2.googleapis.com/token',
|
||||
client_id=settings.GOOGLE_CLIENT_ID,
|
||||
client_secret=settings.GOOGLE_CLIENT_SECRET,
|
||||
scopes=token_info.get('scopes', ['https://www.googleapis.com/auth/drive.readonly'])
|
||||
)
|
||||
|
||||
if not credentials.token:
|
||||
raise ValueError("Credentials created without valid access token")
|
||||
|
||||
return credentials
|
||||
|
||||
def build_drive_service(self, credentials: Credentials):
|
||||
try:
|
||||
if not credentials:
|
||||
raise ValueError("No credentials provided")
|
||||
|
||||
if not credentials.token and not credentials.refresh_token:
|
||||
raise ValueError("No access token or refresh token available. User must re-authorize with offline access.")
|
||||
|
||||
needs_refresh = credentials.expired or not credentials.token
|
||||
if needs_refresh:
|
||||
if credentials.refresh_token:
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
credentials.refresh(Request())
|
||||
except Exception as refresh_error:
|
||||
raise ValueError(f"Failed to refresh credentials: {refresh_error}")
|
||||
else:
|
||||
raise ValueError("No access token or refresh token available. User must re-authorize with offline access.")
|
||||
|
||||
return build('drive', 'v3', credentials=credentials)
|
||||
|
||||
except HttpError as e:
|
||||
raise ValueError(f"Failed to build Google Drive service: HTTP {e.resp.status}")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to build Google Drive service: {str(e)}")
|
||||
|
||||
def is_token_expired(self, token_info):
|
||||
if 'expiry' in token_info and token_info['expiry']:
|
||||
try:
|
||||
from dateutil import parser
|
||||
# Google Drive provides timezone-aware ISO8601 dates
|
||||
expiry_dt = parser.parse(token_info['expiry'])
|
||||
current_time = datetime.datetime.now(datetime.timezone.utc)
|
||||
return current_time >= expiry_dt - datetime.timedelta(seconds=60)
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
if 'access_token' in token_info and token_info['access_token']:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_token_info_from_session(self, session_token: str) -> Dict[str, Any]:
|
||||
try:
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
|
||||
mongo = MongoDB.get_client()
|
||||
db = mongo[settings.MONGO_DB_NAME]
|
||||
|
||||
sessions_collection = db["connector_sessions"]
|
||||
session = sessions_collection.find_one({"session_token": session_token})
|
||||
if not session:
|
||||
raise ValueError(f"Invalid session token: {session_token}")
|
||||
|
||||
if "token_info" not in session:
|
||||
raise ValueError("Session missing token information")
|
||||
|
||||
token_info = session["token_info"]
|
||||
if not token_info:
|
||||
raise ValueError("Invalid token information")
|
||||
|
||||
required_fields = ["access_token", "refresh_token"]
|
||||
missing_fields = [field for field in required_fields if field not in token_info or not token_info.get(field)]
|
||||
if missing_fields:
|
||||
raise ValueError(f"Missing required token fields: {missing_fields}")
|
||||
|
||||
if 'client_id' not in token_info:
|
||||
token_info['client_id'] = settings.GOOGLE_CLIENT_ID
|
||||
if 'client_secret' not in token_info:
|
||||
token_info['client_secret'] = settings.GOOGLE_CLIENT_SECRET
|
||||
if 'token_uri' not in token_info:
|
||||
token_info['token_uri'] = 'https://oauth2.googleapis.com/token'
|
||||
|
||||
return token_info
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to retrieve Google Drive token information: {str(e)}")
|
||||
|
||||
def validate_credentials(self, credentials: Credentials) -> bool:
|
||||
"""
|
||||
Validate Google Drive credentials by making a test API call.
|
||||
|
||||
Args:
|
||||
credentials: Google credentials object
|
||||
|
||||
Returns:
|
||||
True if credentials are valid, False otherwise
|
||||
"""
|
||||
try:
|
||||
service = self.build_drive_service(credentials)
|
||||
service.about().get(fields="user").execute()
|
||||
return True
|
||||
|
||||
except HttpError as e:
|
||||
logging.error(f"HTTP error validating credentials: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logging.error(f"Error validating credentials: {e}")
|
||||
return False
|
||||
536
application/parser/connectors/google_drive/loader.py
Normal file
536
application/parser/connectors/google_drive/loader.py
Normal file
@@ -0,0 +1,536 @@
|
||||
"""
|
||||
Google Drive loader for DocsGPT.
|
||||
Loads documents from Google Drive using Google Drive API.
|
||||
"""
|
||||
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from googleapiclient.http import MediaIoBaseDownload
|
||||
from googleapiclient.errors import HttpError
|
||||
|
||||
from application.parser.connectors.base import BaseConnectorLoader
|
||||
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
class GoogleDriveLoader(BaseConnectorLoader):
|
||||
|
||||
SUPPORTED_MIME_TYPES = {
|
||||
'application/pdf': '.pdf',
|
||||
'application/vnd.google-apps.document': '.docx',
|
||||
'application/vnd.google-apps.presentation': '.pptx',
|
||||
'application/vnd.google-apps.spreadsheet': '.xlsx',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
|
||||
'application/msword': '.doc',
|
||||
'application/vnd.ms-powerpoint': '.ppt',
|
||||
'application/vnd.ms-excel': '.xls',
|
||||
'text/plain': '.txt',
|
||||
'text/csv': '.csv',
|
||||
'text/html': '.html',
|
||||
'application/rtf': '.rtf',
|
||||
'image/jpeg': '.jpg',
|
||||
'image/jpg': '.jpg',
|
||||
'image/png': '.png',
|
||||
}
|
||||
|
||||
EXPORT_FORMATS = {
|
||||
'application/vnd.google-apps.document': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'application/vnd.google-apps.presentation': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'application/vnd.google-apps.spreadsheet': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
||||
}
|
||||
|
||||
def __init__(self, session_token: str):
|
||||
self.auth = GoogleDriveAuth()
|
||||
self.session_token = session_token
|
||||
|
||||
token_info = self.auth.get_token_info_from_session(session_token)
|
||||
self.credentials = self.auth.create_credentials_from_token_info(token_info)
|
||||
|
||||
try:
|
||||
self.service = self.auth.build_drive_service(self.credentials)
|
||||
except Exception as e:
|
||||
logging.warning(f"Could not build Google Drive service: {e}")
|
||||
self.service = None
|
||||
|
||||
self.next_page_token = None
|
||||
|
||||
|
||||
|
||||
def _process_file(self, file_metadata: Dict[str, Any], load_content: bool = True) -> Optional[Document]:
|
||||
try:
|
||||
file_id = file_metadata.get('id')
|
||||
file_name = file_metadata.get('name', 'Unknown')
|
||||
mime_type = file_metadata.get('mimeType', 'application/octet-stream')
|
||||
|
||||
if mime_type not in self.SUPPORTED_MIME_TYPES and not mime_type.startswith('application/vnd.google-apps.'):
|
||||
return None
|
||||
if mime_type not in self.SUPPORTED_MIME_TYPES and not mime_type.startswith('application/vnd.google-apps.'):
|
||||
logging.info(f"Skipping unsupported file type: {mime_type} for file {file_name}")
|
||||
return None
|
||||
# Google Drive provides timezone-aware ISO8601 dates
|
||||
doc_metadata = {
|
||||
'file_name': file_name,
|
||||
'mime_type': mime_type,
|
||||
'size': file_metadata.get('size', None),
|
||||
'created_time': file_metadata.get('createdTime'),
|
||||
'modified_time': file_metadata.get('modifiedTime'),
|
||||
'parents': file_metadata.get('parents', []),
|
||||
'source': 'google_drive'
|
||||
}
|
||||
|
||||
if not load_content:
|
||||
return Document(
|
||||
text="",
|
||||
doc_id=file_id,
|
||||
extra_info=doc_metadata
|
||||
)
|
||||
|
||||
content = self._download_file_content(file_id, mime_type)
|
||||
if content is None:
|
||||
logging.warning(f"Could not load content for file {file_name} ({file_id})")
|
||||
return None
|
||||
|
||||
return Document(
|
||||
text=content,
|
||||
doc_id=file_id,
|
||||
extra_info=doc_metadata
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error processing file: {e}")
|
||||
return None
|
||||
|
||||
def load_data(self, inputs: Dict[str, Any]) -> List[Document]:
|
||||
session_token = inputs.get('session_token')
|
||||
if session_token and session_token != self.session_token:
|
||||
logging.warning("Session token in inputs differs from loader's session token. Using loader's session token.")
|
||||
self.config = inputs
|
||||
|
||||
try:
|
||||
documents: List[Document] = []
|
||||
|
||||
folder_id = inputs.get('folder_id')
|
||||
file_ids = inputs.get('file_ids', [])
|
||||
limit = inputs.get('limit', 100)
|
||||
list_only = inputs.get('list_only', False)
|
||||
load_content = not list_only
|
||||
page_token = inputs.get('page_token')
|
||||
self.next_page_token = None
|
||||
|
||||
if file_ids:
|
||||
# Specific files requested: load them
|
||||
for file_id in file_ids:
|
||||
try:
|
||||
doc = self._load_file_by_id(file_id, load_content=load_content)
|
||||
if doc:
|
||||
documents.append(doc)
|
||||
elif hasattr(self, '_credential_refreshed') and self._credential_refreshed:
|
||||
self._credential_refreshed = False
|
||||
logging.info(f"Retrying load of file {file_id} after credential refresh")
|
||||
doc = self._load_file_by_id(file_id, load_content=load_content)
|
||||
if doc:
|
||||
documents.append(doc)
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading file {file_id}: {e}")
|
||||
continue
|
||||
else:
|
||||
# Browsing mode: list immediate children of provided folder or root
|
||||
parent_id = folder_id if folder_id else 'root'
|
||||
documents = self._list_items_in_parent(parent_id, limit=limit, load_content=load_content, page_token=page_token)
|
||||
|
||||
logging.info(f"Loaded {len(documents)} documents from Google Drive")
|
||||
return documents
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading data from Google Drive: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
|
||||
def _load_file_by_id(self, file_id: str, load_content: bool = True) -> Optional[Document]:
|
||||
self._ensure_service()
|
||||
|
||||
try:
|
||||
file_metadata = self.service.files().get(
|
||||
fileId=file_id,
|
||||
fields='id,name,mimeType,size,createdTime,modifiedTime,parents'
|
||||
).execute()
|
||||
|
||||
return self._process_file(file_metadata, load_content=load_content)
|
||||
|
||||
except HttpError as e:
|
||||
logging.error(f"HTTP error loading file {file_id}: {e.resp.status} - {e.content}")
|
||||
|
||||
if e.resp.status in [401, 403]:
|
||||
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
self.credentials.refresh(Request())
|
||||
self._ensure_service()
|
||||
return None
|
||||
except Exception as refresh_error:
|
||||
raise ValueError(f"Authentication failed and could not be refreshed: {refresh_error}")
|
||||
else:
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading file {file_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None) -> List[Document]:
|
||||
self._ensure_service()
|
||||
|
||||
documents: List[Document] = []
|
||||
|
||||
try:
|
||||
query = f"'{parent_id}' in parents and trashed=false"
|
||||
next_token_out: Optional[str] = None
|
||||
|
||||
while True:
|
||||
page_size = 100
|
||||
if limit:
|
||||
remaining = max(0, limit - len(documents))
|
||||
if remaining == 0:
|
||||
break
|
||||
page_size = min(100, remaining)
|
||||
|
||||
results = self.service.files().list(
|
||||
q=query,
|
||||
fields='nextPageToken,files(id,name,mimeType,size,createdTime,modifiedTime,parents)',
|
||||
pageToken=page_token,
|
||||
pageSize=page_size
|
||||
).execute()
|
||||
|
||||
items = results.get('files', [])
|
||||
for item in items:
|
||||
mime_type = item.get('mimeType')
|
||||
if mime_type == 'application/vnd.google-apps.folder':
|
||||
doc_metadata = {
|
||||
'file_name': item.get('name', 'Unknown'),
|
||||
'mime_type': mime_type,
|
||||
'size': item.get('size', None),
|
||||
'created_time': item.get('createdTime'),
|
||||
'modified_time': item.get('modifiedTime'),
|
||||
'parents': item.get('parents', []),
|
||||
'source': 'google_drive',
|
||||
'is_folder': True
|
||||
}
|
||||
documents.append(Document(text="", doc_id=item.get('id'), extra_info=doc_metadata))
|
||||
else:
|
||||
doc = self._process_file(item, load_content=load_content)
|
||||
if doc:
|
||||
documents.append(doc)
|
||||
|
||||
if limit and len(documents) >= limit:
|
||||
self.next_page_token = results.get('nextPageToken')
|
||||
return documents
|
||||
|
||||
page_token = results.get('nextPageToken')
|
||||
next_token_out = page_token
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
self.next_page_token = next_token_out
|
||||
return documents
|
||||
except Exception as e:
|
||||
logging.error(f"Error listing items under parent {parent_id}: {e}")
|
||||
return documents
|
||||
|
||||
|
||||
|
||||
|
||||
def _download_file_content(self, file_id: str, mime_type: str) -> Optional[str]:
|
||||
if not self.credentials.token:
|
||||
logging.warning("No access token in credentials, attempting to refresh")
|
||||
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
self.credentials.refresh(Request())
|
||||
logging.info("Credentials refreshed successfully")
|
||||
self._ensure_service()
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to refresh credentials: {e}")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing or invalid refresh_token")
|
||||
else:
|
||||
logging.error("No access token and no refresh_token available")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
|
||||
|
||||
if self.credentials.expired:
|
||||
logging.warning("Credentials are expired, attempting to refresh")
|
||||
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
self.credentials.refresh(Request())
|
||||
logging.info("Credentials refreshed successfully")
|
||||
self._ensure_service()
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to refresh expired credentials: {e}")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: expired credentials")
|
||||
else:
|
||||
logging.error("Credentials expired and no refresh_token available")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
|
||||
|
||||
try:
|
||||
if mime_type in self.EXPORT_FORMATS:
|
||||
export_mime_type = self.EXPORT_FORMATS[mime_type]
|
||||
request = self.service.files().export_media(
|
||||
fileId=file_id,
|
||||
mimeType=export_mime_type
|
||||
)
|
||||
else:
|
||||
request = self.service.files().get_media(fileId=file_id)
|
||||
|
||||
file_io = io.BytesIO()
|
||||
downloader = MediaIoBaseDownload(file_io, request)
|
||||
|
||||
done = False
|
||||
while done is False:
|
||||
try:
|
||||
_, done = downloader.next_chunk()
|
||||
except HttpError as e:
|
||||
logging.error(f"HTTP error downloading file {file_id}: {e.resp.status} - {e.content}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error during download of file {file_id}: {e}")
|
||||
return None
|
||||
|
||||
content_bytes = file_io.getvalue()
|
||||
|
||||
try:
|
||||
content = content_bytes.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
content = content_bytes.decode('latin-1')
|
||||
except UnicodeDecodeError:
|
||||
logging.error(f"Could not decode file {file_id} as text")
|
||||
return None
|
||||
|
||||
return content
|
||||
|
||||
except HttpError as e:
|
||||
logging.error(f"HTTP error downloading file {file_id}: {e.resp.status} - {e.content}")
|
||||
|
||||
if e.resp.status in [401, 403]:
|
||||
logging.error(f"Authentication error downloading file {file_id}")
|
||||
|
||||
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
|
||||
logging.info(f"Attempting to refresh credentials for file {file_id}")
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
self.credentials.refresh(Request())
|
||||
logging.info("Credentials refreshed successfully")
|
||||
self._credential_refreshed = True
|
||||
self._ensure_service()
|
||||
return None
|
||||
except Exception as refresh_error:
|
||||
logging.error(f"Error refreshing credentials: {refresh_error}")
|
||||
raise ValueError(f"Authentication failed and could not be refreshed: {refresh_error}")
|
||||
else:
|
||||
logging.error("Cannot refresh credentials: missing refresh_token")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading file {file_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _download_file_to_directory(self, file_id: str, local_dir: str) -> bool:
|
||||
try:
|
||||
self._ensure_service()
|
||||
return self._download_single_file(file_id, local_dir)
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading file {file_id}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def _ensure_service(self):
|
||||
if not self.service:
|
||||
try:
|
||||
self.service = self.auth.build_drive_service(self.credentials)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Cannot access Google Drive: {e}")
|
||||
|
||||
def _download_single_file(self, file_id: str, local_dir: str) -> bool:
|
||||
file_metadata = self.service.files().get(
|
||||
fileId=file_id,
|
||||
fields='name,mimeType'
|
||||
).execute()
|
||||
|
||||
file_name = file_metadata['name']
|
||||
mime_type = file_metadata['mimeType']
|
||||
|
||||
if mime_type not in self.SUPPORTED_MIME_TYPES and not mime_type.startswith('application/vnd.google-apps.'):
|
||||
return False
|
||||
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
full_path = os.path.join(local_dir, file_name)
|
||||
|
||||
if mime_type in self.EXPORT_FORMATS:
|
||||
export_mime_type = self.EXPORT_FORMATS[mime_type]
|
||||
request = self.service.files().export_media(
|
||||
fileId=file_id,
|
||||
mimeType=export_mime_type
|
||||
)
|
||||
extension = self._get_extension_for_mime_type(export_mime_type)
|
||||
if not full_path.endswith(extension):
|
||||
full_path += extension
|
||||
else:
|
||||
request = self.service.files().get_media(fileId=file_id)
|
||||
|
||||
with open(full_path, 'wb') as f:
|
||||
downloader = MediaIoBaseDownload(f, request)
|
||||
done = False
|
||||
while not done:
|
||||
_, done = downloader.next_chunk()
|
||||
|
||||
return True
|
||||
|
||||
def _download_folder_recursive(self, folder_id: str, local_dir: str, recursive: bool = True) -> int:
|
||||
files_downloaded = 0
|
||||
try:
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
|
||||
query = f"'{folder_id}' in parents and trashed=false"
|
||||
page_token = None
|
||||
|
||||
while True:
|
||||
results = self.service.files().list(
|
||||
q=query,
|
||||
fields='nextPageToken, files(id, name, mimeType)',
|
||||
pageToken=page_token,
|
||||
pageSize=1000
|
||||
).execute()
|
||||
|
||||
items = results.get('files', [])
|
||||
logging.info(f"Found {len(items)} items in folder {folder_id}")
|
||||
|
||||
for item in items:
|
||||
item_name = item['name']
|
||||
item_id = item['id']
|
||||
mime_type = item['mimeType']
|
||||
|
||||
if mime_type == 'application/vnd.google-apps.folder':
|
||||
if recursive:
|
||||
# Create subfolder and recurse
|
||||
subfolder_path = os.path.join(local_dir, item_name)
|
||||
os.makedirs(subfolder_path, exist_ok=True)
|
||||
subfolder_files = self._download_folder_recursive(
|
||||
item_id,
|
||||
subfolder_path,
|
||||
recursive
|
||||
)
|
||||
files_downloaded += subfolder_files
|
||||
logging.info(f"Downloaded {subfolder_files} files from subfolder {item_name}")
|
||||
else:
|
||||
# Download file
|
||||
success = self._download_single_file(item_id, local_dir)
|
||||
if success:
|
||||
files_downloaded += 1
|
||||
logging.info(f"Downloaded file: {item_name}")
|
||||
else:
|
||||
logging.warning(f"Failed to download file: {item_name}")
|
||||
|
||||
page_token = results.get('nextPageToken')
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
return files_downloaded
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in _download_folder_recursive for folder {folder_id}: {e}", exc_info=True)
|
||||
return files_downloaded
|
||||
|
||||
def _get_extension_for_mime_type(self, mime_type: str) -> str:
|
||||
extensions = {
|
||||
'application/pdf': '.pdf',
|
||||
'text/plain': '.txt',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',
|
||||
'text/html': '.html',
|
||||
'text/markdown': '.md',
|
||||
}
|
||||
return extensions.get(mime_type, '.bin')
|
||||
|
||||
def _download_folder_contents(self, folder_id: str, local_dir: str, recursive: bool = True) -> int:
|
||||
try:
|
||||
self._ensure_service()
|
||||
return self._download_folder_recursive(folder_id, local_dir, recursive)
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading folder {folder_id}: {e}", exc_info=True)
|
||||
return 0
|
||||
|
||||
def download_to_directory(self, local_dir: str, source_config: dict = None) -> dict:
|
||||
if source_config is None:
|
||||
source_config = {}
|
||||
|
||||
config = source_config if source_config else getattr(self, 'config', {})
|
||||
files_downloaded = 0
|
||||
|
||||
try:
|
||||
folder_ids = config.get('folder_ids', [])
|
||||
file_ids = config.get('file_ids', [])
|
||||
recursive = config.get('recursive', True)
|
||||
|
||||
self._ensure_service()
|
||||
|
||||
if file_ids:
|
||||
if isinstance(file_ids, str):
|
||||
file_ids = [file_ids]
|
||||
|
||||
for file_id in file_ids:
|
||||
if self._download_file_to_directory(file_id, local_dir):
|
||||
files_downloaded += 1
|
||||
|
||||
# Process folders
|
||||
if folder_ids:
|
||||
if isinstance(folder_ids, str):
|
||||
folder_ids = [folder_ids]
|
||||
|
||||
for folder_id in folder_ids:
|
||||
try:
|
||||
folder_metadata = self.service.files().get(
|
||||
fileId=folder_id,
|
||||
fields='name'
|
||||
).execute()
|
||||
folder_name = folder_metadata.get('name', '')
|
||||
folder_path = os.path.join(local_dir, folder_name)
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
|
||||
folder_files = self._download_folder_recursive(
|
||||
folder_id,
|
||||
folder_path,
|
||||
recursive
|
||||
)
|
||||
files_downloaded += folder_files
|
||||
logging.info(f"Downloaded {folder_files} files from folder {folder_name}")
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading folder {folder_id}: {e}", exc_info=True)
|
||||
|
||||
if not file_ids and not folder_ids:
|
||||
raise ValueError("No folder_ids or file_ids provided for download")
|
||||
|
||||
return {
|
||||
"files_downloaded": files_downloaded,
|
||||
"directory_path": local_dir,
|
||||
"empty_result": files_downloaded == 0,
|
||||
"source_type": "google_drive",
|
||||
"config_used": config
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"files_downloaded": files_downloaded,
|
||||
"directory_path": local_dir,
|
||||
"empty_result": True,
|
||||
"source_type": "google_drive",
|
||||
"config_used": config,
|
||||
"error": str(e)
|
||||
}
|
||||
@@ -6,6 +6,16 @@ from application.parser.remote.github_loader import GitHubLoader
|
||||
|
||||
|
||||
class RemoteCreator:
|
||||
"""
|
||||
Factory class for creating remote content loaders.
|
||||
|
||||
These loaders fetch content from remote web sources like URLs,
|
||||
sitemaps, web crawlers, social media platforms, etc.
|
||||
|
||||
For external knowledge base connectors (like Google Drive),
|
||||
use ConnectorCreator instead.
|
||||
"""
|
||||
|
||||
loaders = {
|
||||
"url": WebLoader,
|
||||
"sitemap": SitemapLoader,
|
||||
@@ -18,5 +28,5 @@ class RemoteCreator:
|
||||
def create_loader(cls, type, *args, **kwargs):
|
||||
loader_class = cls.loaders.get(type.lower())
|
||||
if not loader_class:
|
||||
raise ValueError(f"No LLM class found for type {type}")
|
||||
raise ValueError(f"No loader class found for type {type}")
|
||||
return loader_class(*args, **kwargs)
|
||||
|
||||
@@ -13,6 +13,9 @@ Flask==3.1.1
|
||||
faiss-cpu==1.9.0.post1
|
||||
flask-restx==1.3.0
|
||||
google-genai==1.3.0
|
||||
google-api-python-client==2.179.0
|
||||
google-auth-httplib2==0.2.0
|
||||
google-auth-oauthlib==1.2.2
|
||||
gTTS==2.5.4
|
||||
gunicorn==23.0.0
|
||||
javalang==0.13.0
|
||||
|
||||
@@ -6,6 +6,7 @@ import os
|
||||
import shutil
|
||||
import string
|
||||
import tempfile
|
||||
from typing import Any, Dict
|
||||
import zipfile
|
||||
|
||||
from collections import Counter
|
||||
@@ -21,6 +22,7 @@ from application.api.answer.services.stream_processor import get_prompt
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
from application.parser.chunking import Chunker
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
from application.parser.embedding_pipeline import embed_and_store_documents
|
||||
from application.parser.file.bulk import SimpleDirectoryReader
|
||||
from application.parser.remote.remote_creator import RemoteCreator
|
||||
@@ -649,8 +651,11 @@ def remote_worker(
|
||||
"id": str(id),
|
||||
"type": loader,
|
||||
"remote_data": source_data,
|
||||
"sync_frequency": sync_frequency,
|
||||
"sync_frequency": sync_frequency
|
||||
}
|
||||
|
||||
if operation_mode == "sync":
|
||||
file_data["last_sync"] = datetime.datetime.now()
|
||||
upload_index(full_path, file_data)
|
||||
except Exception as e:
|
||||
logging.error("Error in remote_worker task: %s", str(e), exc_info=True)
|
||||
@@ -707,7 +712,7 @@ def sync_worker(self, frequency):
|
||||
self, source_data, name, user, source_type, frequency, retriever, doc_id
|
||||
)
|
||||
sync_counts["total_sync_count"] += 1
|
||||
sync_counts[
|
||||
sync_counts[
|
||||
"sync_success" if resp["status"] == "success" else "sync_failure"
|
||||
] += 1
|
||||
return {
|
||||
@@ -744,7 +749,7 @@ def attachment_worker(self, file_info, user):
|
||||
input_files=[local_path], exclude_hidden=True, errors="ignore"
|
||||
)
|
||||
.load_data()[0]
|
||||
.text,
|
||||
.text,
|
||||
)
|
||||
|
||||
|
||||
@@ -835,3 +840,174 @@ def agent_webhook_worker(self, agent_id, payload):
|
||||
f"Webhook processed for agent {agent_id}", extra={"agent_id": agent_id}
|
||||
)
|
||||
return {"status": "success", "result": result}
|
||||
|
||||
|
||||
def ingest_connector(
|
||||
self,
|
||||
job_name: str,
|
||||
user: str,
|
||||
source_type: str,
|
||||
session_token=None,
|
||||
file_ids=None,
|
||||
folder_ids=None,
|
||||
recursive=True,
|
||||
retriever: str = "classic",
|
||||
operation_mode: str = "upload",
|
||||
doc_id=None,
|
||||
sync_frequency: str = "never",
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Ingestion for internal knowledge bases (GoogleDrive, etc.).
|
||||
|
||||
Args:
|
||||
job_name: Name of the ingestion job
|
||||
user: User identifier
|
||||
source_type: Type of remote source ("google_drive", "dropbox", etc.)
|
||||
session_token: Authentication token for the service
|
||||
file_ids: List of file IDs to download
|
||||
folder_ids: List of folder IDs to download
|
||||
recursive: Whether to recursively download folders
|
||||
retriever: Type of retriever to use
|
||||
operation_mode: "upload" for initial ingestion, "sync" for incremental sync
|
||||
doc_id: Document ID for sync operations (required when operation_mode="sync")
|
||||
sync_frequency: How often to sync ("never", "daily", "weekly", "monthly")
|
||||
"""
|
||||
logging.info(f"Starting remote ingestion from {source_type} for user: {user}, job: {job_name}")
|
||||
self.update_state(state="PROGRESS", meta={"current": 1})
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
try:
|
||||
# Step 1: Initialize the appropriate loader
|
||||
self.update_state(state="PROGRESS", meta={"current": 10, "status": "Initializing connector"})
|
||||
|
||||
if not session_token:
|
||||
raise ValueError(f"{source_type} connector requires session_token")
|
||||
|
||||
if not ConnectorCreator.is_supported(source_type):
|
||||
raise ValueError(f"Unsupported connector type: {source_type}. Supported types: {ConnectorCreator.get_supported_connectors()}")
|
||||
|
||||
remote_loader = ConnectorCreator.create_connector(source_type, session_token)
|
||||
|
||||
# Create a clean config for storage
|
||||
api_source_config = {
|
||||
"file_ids": file_ids or [],
|
||||
"folder_ids": folder_ids or [],
|
||||
"recursive": recursive
|
||||
}
|
||||
|
||||
# Step 2: Download files to temp directory
|
||||
self.update_state(state="PROGRESS", meta={"current": 20, "status": "Downloading files"})
|
||||
download_info = remote_loader.download_to_directory(
|
||||
temp_dir,
|
||||
api_source_config
|
||||
)
|
||||
|
||||
if download_info.get("empty_result", False) or not download_info.get("files_downloaded", 0):
|
||||
logging.warning(f"No files were downloaded from {source_type}")
|
||||
# Create empty result directly instead of calling a separate method
|
||||
return {
|
||||
"name": job_name,
|
||||
"user": user,
|
||||
"tokens": 0,
|
||||
"type": source_type,
|
||||
"source_config": api_source_config,
|
||||
"directory_structure": "{}",
|
||||
}
|
||||
|
||||
# Step 3: Use SimpleDirectoryReader to process downloaded files
|
||||
self.update_state(state="PROGRESS", meta={"current": 40, "status": "Processing files"})
|
||||
reader = SimpleDirectoryReader(
|
||||
input_dir=temp_dir,
|
||||
recursive=True,
|
||||
required_exts=[
|
||||
".rst", ".md", ".pdf", ".txt", ".docx", ".csv", ".epub",
|
||||
".html", ".mdx", ".json", ".xlsx", ".pptx", ".png",
|
||||
".jpg", ".jpeg",
|
||||
],
|
||||
exclude_hidden=True,
|
||||
file_metadata=metadata_from_filename,
|
||||
)
|
||||
raw_docs = reader.load_data()
|
||||
directory_structure = getattr(reader, 'directory_structure', {})
|
||||
|
||||
|
||||
|
||||
# Step 4: Process documents (chunking, embedding, etc.)
|
||||
self.update_state(state="PROGRESS", meta={"current": 60, "status": "Processing documents"})
|
||||
|
||||
chunker = Chunker(
|
||||
chunking_strategy="classic_chunk",
|
||||
max_tokens=MAX_TOKENS,
|
||||
min_tokens=MIN_TOKENS,
|
||||
duplicate_headers=False,
|
||||
)
|
||||
raw_docs = chunker.chunk(documents=raw_docs)
|
||||
|
||||
# Preserve source information in document metadata
|
||||
for doc in raw_docs:
|
||||
if hasattr(doc, 'extra_info') and doc.extra_info:
|
||||
source = doc.extra_info.get('source')
|
||||
if source and os.path.isabs(source):
|
||||
# Convert absolute path to relative path
|
||||
doc.extra_info['source'] = os.path.relpath(source, start=temp_dir)
|
||||
|
||||
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
|
||||
if operation_mode == "upload":
|
||||
id = ObjectId()
|
||||
elif operation_mode == "sync":
|
||||
if not doc_id or not ObjectId.is_valid(doc_id):
|
||||
logging.error("Invalid doc_id provided for sync operation: %s", doc_id)
|
||||
raise ValueError("doc_id must be provided for sync operation.")
|
||||
id = ObjectId(doc_id)
|
||||
else:
|
||||
raise ValueError(f"Invalid operation_mode: {operation_mode}")
|
||||
|
||||
vector_store_path = os.path.join(temp_dir, "vector_store")
|
||||
os.makedirs(vector_store_path, exist_ok=True)
|
||||
|
||||
self.update_state(state="PROGRESS", meta={"current": 80, "status": "Storing documents"})
|
||||
embed_and_store_documents(docs, vector_store_path, id, self)
|
||||
|
||||
tokens = count_tokens_docs(docs)
|
||||
|
||||
# Step 6: Upload index files
|
||||
file_data = {
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"id": str(id),
|
||||
"type": "connector",
|
||||
"remote_data": json.dumps({
|
||||
"provider": source_type,
|
||||
**api_source_config
|
||||
}),
|
||||
"directory_structure": json.dumps(directory_structure),
|
||||
"sync_frequency": sync_frequency
|
||||
}
|
||||
|
||||
if operation_mode == "sync":
|
||||
file_data["last_sync"] = datetime.datetime.now()
|
||||
else:
|
||||
file_data["last_sync"] = datetime.datetime.now()
|
||||
|
||||
upload_index(vector_store_path, file_data)
|
||||
|
||||
# Ensure we mark the task as complete
|
||||
self.update_state(state="PROGRESS", meta={"current": 100, "status": "Complete"})
|
||||
|
||||
logging.info(f"Remote ingestion completed: {job_name}")
|
||||
|
||||
return {
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"tokens": tokens,
|
||||
"type": source_type,
|
||||
"id": str(id),
|
||||
"status": "complete"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error during remote ingestion: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
@@ -38,6 +38,7 @@ const endpoints = {
|
||||
UPDATE_TOOL_STATUS: '/api/update_tool_status',
|
||||
UPDATE_TOOL: '/api/update_tool',
|
||||
DELETE_TOOL: '/api/delete_tool',
|
||||
SYNC_CONNECTOR: '/api/connectors/sync',
|
||||
GET_CHUNKS: (
|
||||
docId: string,
|
||||
page: number,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import apiClient from '../client';
|
||||
import endpoints from '../endpoints';
|
||||
import { getSessionToken } from '../../utils/providerUtils';
|
||||
|
||||
const userService = {
|
||||
getConfig: (): Promise<any> => apiClient.get(endpoints.USER.CONFIG, null),
|
||||
@@ -104,6 +105,18 @@ const userService = {
|
||||
apiClient.get(endpoints.USER.DIRECTORY_STRUCTURE(docId), token),
|
||||
manageSourceFiles: (data: FormData, token: string | null): Promise<any> =>
|
||||
apiClient.postFormData(endpoints.USER.MANAGE_SOURCE_FILES, data, token),
|
||||
syncConnector: (docId: string, provider: string, token: string | null): Promise<any> => {
|
||||
const sessionToken = getSessionToken(provider);
|
||||
return apiClient.post(
|
||||
endpoints.USER.SYNC_CONNECTOR,
|
||||
{
|
||||
source_id: docId,
|
||||
session_token: sessionToken,
|
||||
provider: provider
|
||||
},
|
||||
token
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
export default userService;
|
||||
|
||||
112
frontend/src/components/ConnectorAuth.tsx
Normal file
112
frontend/src/components/ConnectorAuth.tsx
Normal file
@@ -0,0 +1,112 @@
|
||||
import React, { useRef } from 'react';
|
||||
import { useSelector } from 'react-redux';
|
||||
import { selectToken } from '../preferences/preferenceSlice';
|
||||
|
||||
interface ConnectorAuthProps {
|
||||
provider: string;
|
||||
onSuccess: (data: { session_token: string; user_email: string }) => void;
|
||||
onError: (error: string) => void;
|
||||
label?: string;
|
||||
}
|
||||
|
||||
const providerLabel = (provider: string) => {
|
||||
const map: Record<string, string> = {
|
||||
google_drive: 'Google Drive',
|
||||
};
|
||||
return map[provider] || provider.replace(/_/g, ' ');
|
||||
};
|
||||
|
||||
const ConnectorAuth: React.FC<ConnectorAuthProps> = ({ provider, onSuccess, onError, label }) => {
|
||||
const token = useSelector(selectToken);
|
||||
const completedRef = useRef(false);
|
||||
const intervalRef = useRef<number | null>(null);
|
||||
|
||||
const cleanup = () => {
|
||||
if (intervalRef.current) {
|
||||
clearInterval(intervalRef.current);
|
||||
intervalRef.current = null;
|
||||
}
|
||||
window.removeEventListener('message', handleAuthMessage as any);
|
||||
};
|
||||
|
||||
const handleAuthMessage = (event: MessageEvent) => {
|
||||
const successGeneric = event.data?.type === 'connector_auth_success';
|
||||
const successProvider = event.data?.type === `${provider}_auth_success` || event.data?.type === 'google_drive_auth_success';
|
||||
const errorProvider = event.data?.type === `${provider}_auth_error` || event.data?.type === 'google_drive_auth_error';
|
||||
|
||||
if (successGeneric || successProvider) {
|
||||
completedRef.current = true;
|
||||
cleanup();
|
||||
onSuccess({
|
||||
session_token: event.data.session_token,
|
||||
user_email: event.data.user_email || 'Connected User',
|
||||
});
|
||||
} else if (errorProvider) {
|
||||
completedRef.current = true;
|
||||
cleanup();
|
||||
onError(event.data.error || 'Authentication failed');
|
||||
}
|
||||
};
|
||||
|
||||
const handleAuth = async () => {
|
||||
try {
|
||||
completedRef.current = false;
|
||||
cleanup();
|
||||
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
const authResponse = await fetch(`${apiHost}/api/connectors/auth?provider=${provider}`, {
|
||||
headers: { Authorization: `Bearer ${token}` },
|
||||
});
|
||||
|
||||
if (!authResponse.ok) {
|
||||
throw new Error(`Failed to get authorization URL: ${authResponse.status}`);
|
||||
}
|
||||
|
||||
const authData = await authResponse.json();
|
||||
if (!authData.success || !authData.authorization_url) {
|
||||
throw new Error(authData.error || 'Failed to get authorization URL');
|
||||
}
|
||||
|
||||
const authWindow = window.open(
|
||||
authData.authorization_url,
|
||||
`${provider}-auth`,
|
||||
'width=500,height=600,scrollbars=yes,resizable=yes'
|
||||
);
|
||||
if (!authWindow) {
|
||||
throw new Error('Failed to open authentication window. Please allow popups.');
|
||||
}
|
||||
|
||||
window.addEventListener('message', handleAuthMessage as any);
|
||||
|
||||
const checkClosed = window.setInterval(() => {
|
||||
if (authWindow.closed) {
|
||||
clearInterval(checkClosed);
|
||||
window.removeEventListener('message', handleAuthMessage as any);
|
||||
if (!completedRef.current) {
|
||||
onError('Authentication was cancelled');
|
||||
}
|
||||
}
|
||||
}, 1000);
|
||||
intervalRef.current = checkClosed;
|
||||
} catch (error) {
|
||||
onError(error instanceof Error ? error.message : 'Authentication failed');
|
||||
}
|
||||
};
|
||||
|
||||
const buttonLabel = label || `Connect ${providerLabel(provider)}`;
|
||||
|
||||
return (
|
||||
<button
|
||||
onClick={handleAuth}
|
||||
className="w-full flex items-center justify-center gap-2 rounded-lg bg-blue-500 px-4 py-3 text-white hover:bg-blue-600 transition-colors"
|
||||
>
|
||||
<svg className="h-5 w-5" viewBox="0 0 24 24">
|
||||
<path fill="currentColor" d="M6.28 3l5.72 10H24l-5.72-10H6.28zm11.44 0L12 13l5.72 10H24L18.28 3h-.56zM0 13l5.72 10h5.72L5.72 13H0z"/>
|
||||
</svg>
|
||||
{buttonLabel}
|
||||
</button>
|
||||
);
|
||||
};
|
||||
|
||||
export default ConnectorAuth;
|
||||
|
||||
733
frontend/src/components/ConnectorTreeComponent.tsx
Normal file
733
frontend/src/components/ConnectorTreeComponent.tsx
Normal file
@@ -0,0 +1,733 @@
|
||||
import React, { useState, useRef, useEffect } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useSelector } from 'react-redux';
|
||||
import { formatBytes } from '../utils/stringUtils';
|
||||
import { selectToken } from '../preferences/preferenceSlice';
|
||||
import Chunks from './Chunks';
|
||||
import ContextMenu, { MenuOption } from './ContextMenu';
|
||||
import userService from '../api/services/userService';
|
||||
import FileIcon from '../assets/file.svg';
|
||||
import FolderIcon from '../assets/folder.svg';
|
||||
import ArrowLeft from '../assets/arrow-left.svg';
|
||||
import ThreeDots from '../assets/three-dots.svg';
|
||||
import EyeView from '../assets/eye-view.svg';
|
||||
import SyncIcon from '../assets/sync.svg';
|
||||
import { useOutsideAlerter } from '../hooks';
|
||||
|
||||
interface FileNode {
|
||||
type?: string;
|
||||
token_count?: number;
|
||||
size_bytes?: number;
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
interface DirectoryStructure {
|
||||
[key: string]: FileNode;
|
||||
}
|
||||
|
||||
interface ConnectorTreeComponentProps {
|
||||
docId: string;
|
||||
sourceName: string;
|
||||
onBackToDocuments: () => void;
|
||||
}
|
||||
|
||||
interface SearchResult {
|
||||
name: string;
|
||||
path: string;
|
||||
isFile: boolean;
|
||||
}
|
||||
|
||||
const ConnectorTreeComponent: React.FC<ConnectorTreeComponentProps> = ({
|
||||
docId,
|
||||
sourceName,
|
||||
onBackToDocuments,
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const [loading, setLoading] = useState<boolean>(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [directoryStructure, setDirectoryStructure] =
|
||||
useState<DirectoryStructure | null>(null);
|
||||
const [currentPath, setCurrentPath] = useState<string[]>([]);
|
||||
const token = useSelector(selectToken);
|
||||
const [activeMenuId, setActiveMenuId] = useState<string | null>(null);
|
||||
const menuRefs = useRef<{
|
||||
[key: string]: React.RefObject<HTMLDivElement | null>;
|
||||
}>({});
|
||||
const [selectedFile, setSelectedFile] = useState<{
|
||||
id: string;
|
||||
name: string;
|
||||
} | null>(null);
|
||||
const [searchQuery, setSearchQuery] = useState('');
|
||||
const [searchResults, setSearchResults] = useState<SearchResult[]>([]);
|
||||
const searchDropdownRef = useRef<HTMLDivElement>(null);
|
||||
const [isSyncing, setIsSyncing] = useState<boolean>(false);
|
||||
const [syncProgress, setSyncProgress] = useState<number>(0);
|
||||
const [sourceProvider, setSourceProvider] = useState<string>('');
|
||||
const [syncDone, setSyncDone] = useState<boolean>(false);
|
||||
|
||||
useOutsideAlerter(
|
||||
searchDropdownRef,
|
||||
() => {
|
||||
setSearchQuery('');
|
||||
setSearchResults([]);
|
||||
},
|
||||
[],
|
||||
false,
|
||||
);
|
||||
|
||||
const handleFileClick = (fileName: string) => {
|
||||
const fullPath = [...currentPath, fileName].join('/');
|
||||
setSelectedFile({
|
||||
id: fullPath,
|
||||
name: fileName,
|
||||
});
|
||||
};
|
||||
|
||||
const handleSync = async () => {
|
||||
if (isSyncing) return;
|
||||
|
||||
const provider = sourceProvider;
|
||||
|
||||
setIsSyncing(true);
|
||||
setSyncProgress(0);
|
||||
|
||||
try {
|
||||
const response = await userService.syncConnector(docId, provider, token);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success) {
|
||||
console.log('Sync started successfully:', data.task_id);
|
||||
setSyncProgress(10);
|
||||
|
||||
// Poll task status using userService
|
||||
const maxAttempts = 30;
|
||||
const pollInterval = 2000;
|
||||
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
try {
|
||||
const statusResponse = await userService.getTaskStatus(
|
||||
data.task_id,
|
||||
token,
|
||||
);
|
||||
const statusData = await statusResponse.json();
|
||||
|
||||
console.log(
|
||||
`Task status (attempt ${attempt + 1}):`,
|
||||
statusData.status,
|
||||
);
|
||||
|
||||
if (statusData.status === 'SUCCESS') {
|
||||
setSyncProgress(100);
|
||||
console.log('Sync completed successfully');
|
||||
|
||||
// Refresh directory structure
|
||||
try {
|
||||
const refreshResponse = await userService.getDirectoryStructure(
|
||||
docId,
|
||||
token,
|
||||
);
|
||||
const refreshData = await refreshResponse.json();
|
||||
if (refreshData && refreshData.directory_structure) {
|
||||
setDirectoryStructure(refreshData.directory_structure);
|
||||
setCurrentPath([]);
|
||||
}
|
||||
if (refreshData && refreshData.provider) {
|
||||
setSourceProvider(refreshData.provider);
|
||||
}
|
||||
|
||||
setSyncDone(true);
|
||||
setTimeout(() => setSyncDone(false), 5000);
|
||||
} catch (err) {
|
||||
console.error('Error refreshing directory structure:', err);
|
||||
}
|
||||
break;
|
||||
} else if (statusData.status === 'FAILURE') {
|
||||
console.error('Sync task failed:', statusData.result);
|
||||
break;
|
||||
} else if (statusData.status === 'PROGRESS') {
|
||||
const progress = Number(
|
||||
statusData.result && statusData.result.current != null
|
||||
? statusData.result.current
|
||||
: statusData.meta && statusData.meta.current != null
|
||||
? statusData.meta.current
|
||||
: 0,
|
||||
);
|
||||
setSyncProgress(Math.max(10, progress));
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, pollInterval));
|
||||
} catch (error) {
|
||||
console.error('Error polling task status:', error);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
console.error('Sync failed:', data.error);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('Error syncing connector:', err);
|
||||
} finally {
|
||||
setIsSyncing(false);
|
||||
setSyncProgress(0);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const fetchDirectoryStructure = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
|
||||
const directoryResponse = await userService.getDirectoryStructure(
|
||||
docId,
|
||||
token,
|
||||
);
|
||||
const directoryData = await directoryResponse.json();
|
||||
|
||||
if (directoryData && directoryData.directory_structure) {
|
||||
setDirectoryStructure(directoryData.directory_structure);
|
||||
} else {
|
||||
setError('Invalid response format');
|
||||
}
|
||||
|
||||
if (directoryData && directoryData.provider) {
|
||||
setSourceProvider(directoryData.provider);
|
||||
}
|
||||
} catch (err) {
|
||||
setError('Failed to load source information');
|
||||
console.error(err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
if (docId) {
|
||||
fetchDirectoryStructure();
|
||||
}
|
||||
}, [docId, token]);
|
||||
|
||||
const navigateToDirectory = (dirName: string) => {
|
||||
setCurrentPath([...currentPath, dirName]);
|
||||
};
|
||||
|
||||
const navigateUp = () => {
|
||||
setCurrentPath(currentPath.slice(0, -1));
|
||||
};
|
||||
|
||||
const getCurrentDirectory = (): DirectoryStructure => {
|
||||
if (!directoryStructure) return {};
|
||||
|
||||
let current = directoryStructure;
|
||||
for (const dir of currentPath) {
|
||||
if (current[dir] && !current[dir].type) {
|
||||
current = current[dir] as DirectoryStructure;
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return current;
|
||||
};
|
||||
|
||||
|
||||
|
||||
const getMenuRef = (id: string) => {
|
||||
if (!menuRefs.current[id]) {
|
||||
menuRefs.current[id] = React.createRef();
|
||||
}
|
||||
return menuRefs.current[id];
|
||||
};
|
||||
|
||||
const handleMenuClick = (
|
||||
e: React.MouseEvent<HTMLButtonElement>,
|
||||
id: string,
|
||||
) => {
|
||||
e.stopPropagation();
|
||||
setActiveMenuId(activeMenuId === id ? null : id);
|
||||
};
|
||||
|
||||
const getActionOptions = (
|
||||
name: string,
|
||||
isFile: boolean,
|
||||
_itemId: string,
|
||||
): MenuOption[] => {
|
||||
const options: MenuOption[] = [];
|
||||
|
||||
options.push({
|
||||
icon: EyeView,
|
||||
label: t('settings.sources.view'),
|
||||
onClick: (event: React.SyntheticEvent) => {
|
||||
event.stopPropagation();
|
||||
if (isFile) {
|
||||
handleFileClick(name);
|
||||
} else {
|
||||
navigateToDirectory(name);
|
||||
}
|
||||
},
|
||||
iconWidth: 18,
|
||||
iconHeight: 18,
|
||||
variant: 'primary',
|
||||
});
|
||||
|
||||
return options;
|
||||
};
|
||||
|
||||
const calculateDirectoryStats = (
|
||||
structure: DirectoryStructure,
|
||||
): { totalSize: number; totalTokens: number } => {
|
||||
let totalSize = 0;
|
||||
let totalTokens = 0;
|
||||
|
||||
Object.entries(structure).forEach(([_, node]) => {
|
||||
if (node.type) {
|
||||
// It's a file
|
||||
totalSize += node.size_bytes || 0;
|
||||
totalTokens += node.token_count || 0;
|
||||
} else {
|
||||
// It's a directory, recurse
|
||||
const stats = calculateDirectoryStats(node);
|
||||
totalSize += stats.totalSize;
|
||||
totalTokens += stats.totalTokens;
|
||||
}
|
||||
});
|
||||
|
||||
return { totalSize, totalTokens };
|
||||
};
|
||||
|
||||
const handleBackNavigation = () => {
|
||||
if (selectedFile) {
|
||||
setSelectedFile(null);
|
||||
} else if (currentPath.length === 0) {
|
||||
if (onBackToDocuments) {
|
||||
onBackToDocuments();
|
||||
}
|
||||
} else {
|
||||
navigateUp();
|
||||
}
|
||||
};
|
||||
|
||||
const renderPathNavigation = () => {
|
||||
return (
|
||||
<div className="mb-0 flex min-h-[38px] flex-col gap-2 text-base sm:flex-row sm:items-center sm:justify-between">
|
||||
{/* Left side with path navigation */}
|
||||
<div className="flex w-full items-center sm:w-auto">
|
||||
<button
|
||||
className="mr-3 flex h-[29px] w-[29px] items-center justify-center rounded-full border p-2 text-sm font-medium text-gray-400 dark:border-0 dark:bg-[#28292D] dark:text-gray-500 dark:hover:bg-[#2E2F34]"
|
||||
onClick={handleBackNavigation}
|
||||
>
|
||||
<img src={ArrowLeft} alt="left-arrow" className="h-3 w-3" />
|
||||
</button>
|
||||
|
||||
<div className="flex flex-wrap items-center">
|
||||
<span className="font-semibold break-words text-[#7D54D1]">
|
||||
{sourceName}
|
||||
</span>
|
||||
{currentPath.length > 0 && (
|
||||
<>
|
||||
<span className="mx-1 flex-shrink-0 text-gray-500">/</span>
|
||||
{currentPath.map((dir, index) => (
|
||||
<React.Fragment key={index}>
|
||||
<span className="break-words text-gray-700 dark:text-[#E0E0E0]">
|
||||
{dir}
|
||||
</span>
|
||||
{index < currentPath.length - 1 && (
|
||||
<span className="mx-1 flex-shrink-0 text-gray-500">
|
||||
/
|
||||
</span>
|
||||
)}
|
||||
</React.Fragment>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="relative mt-2 flex w-full flex-row flex-nowrap items-center justify-end gap-2 sm:mt-0 sm:w-auto">
|
||||
{renderFileSearch()}
|
||||
|
||||
{/* Sync button */}
|
||||
<button
|
||||
onClick={handleSync}
|
||||
disabled={isSyncing}
|
||||
className={`flex h-[38px] min-w-[108px] items-center justify-center rounded-full px-4 text-[14px] font-medium whitespace-nowrap transition-colors ${
|
||||
isSyncing
|
||||
? 'cursor-not-allowed bg-gray-300 text-gray-600 dark:bg-gray-600 dark:text-gray-400'
|
||||
: 'bg-purple-30 hover:bg-violets-are-blue text-white'
|
||||
}`}
|
||||
title={
|
||||
isSyncing
|
||||
? `${t('settings.sources.syncing')} ${syncProgress}%`
|
||||
: syncDone
|
||||
? 'Done'
|
||||
: t('settings.sources.sync')
|
||||
}
|
||||
>
|
||||
<img
|
||||
src={SyncIcon}
|
||||
alt={t('settings.sources.sync')}
|
||||
className={`mr-2 h-4 w-4 brightness-0 invert filter ${isSyncing ? 'animate-spin' : ''}`}
|
||||
/>
|
||||
{isSyncing
|
||||
? `${syncProgress}%`
|
||||
: syncDone
|
||||
? 'Done'
|
||||
: t('settings.sources.sync')}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const renderFileTree = (directory: DirectoryStructure) => {
|
||||
if (!directory) return [];
|
||||
|
||||
// Create parent directory row
|
||||
const parentRow =
|
||||
currentPath.length > 0
|
||||
? [
|
||||
<tr
|
||||
key="parent-dir"
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={navigateUp}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex items-center">
|
||||
<img
|
||||
src={FolderIcon}
|
||||
alt={t('settings.sources.parentFolderAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
..
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4"></td>
|
||||
</tr>,
|
||||
]
|
||||
: [];
|
||||
|
||||
// Sort entries: directories first, then files, both alphabetically
|
||||
const sortedEntries = Object.entries(directory).sort(
|
||||
([nameA, nodeA], [nameB, nodeB]) => {
|
||||
const isFileA = !!nodeA.type;
|
||||
const isFileB = !!nodeB.type;
|
||||
|
||||
if (isFileA !== isFileB) {
|
||||
return isFileA ? 1 : -1; // Directories first
|
||||
}
|
||||
|
||||
return nameA.localeCompare(nameB); // Alphabetical within each group
|
||||
},
|
||||
);
|
||||
|
||||
// Process directories
|
||||
const directoryRows = sortedEntries
|
||||
.filter(([_, node]) => !node.type)
|
||||
.map(([name, node]) => {
|
||||
const itemId = `dir-${name}`;
|
||||
const menuRef = getMenuRef(itemId);
|
||||
|
||||
// Calculate directory stats
|
||||
const dirStats = calculateDirectoryStats(node as DirectoryStructure);
|
||||
|
||||
return (
|
||||
<tr
|
||||
key={itemId}
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={() => navigateToDirectory(name)}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex min-w-0 items-center">
|
||||
<img
|
||||
src={FolderIcon}
|
||||
alt={t('settings.sources.folderAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
{name}
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
{dirStats.totalTokens > 0
|
||||
? dirStats.totalTokens.toLocaleString()
|
||||
: '-'}
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
{dirStats.totalSize > 0 ? formatBytes(dirStats.totalSize) : '-'}
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4">
|
||||
<div ref={menuRef} className="relative">
|
||||
<button
|
||||
onClick={(e) => handleMenuClick(e, itemId)}
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
|
||||
aria-label={t('settings.sources.menuAlt')}
|
||||
>
|
||||
<img
|
||||
src={ThreeDots}
|
||||
alt={t('settings.sources.menuAlt')}
|
||||
className="opacity-60 hover:opacity-100"
|
||||
/>
|
||||
</button>
|
||||
<ContextMenu
|
||||
isOpen={activeMenuId === itemId}
|
||||
setIsOpen={(isOpen) =>
|
||||
setActiveMenuId(isOpen ? itemId : null)
|
||||
}
|
||||
options={getActionOptions(name, false, itemId)}
|
||||
anchorRef={menuRef}
|
||||
position="bottom-left"
|
||||
offset={{ x: -4, y: 4 }}
|
||||
/>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
});
|
||||
|
||||
// Process files
|
||||
const fileRows = sortedEntries
|
||||
.filter(([_, node]) => !!node.type)
|
||||
.map(([name, node]) => {
|
||||
const itemId = `file-${name}`;
|
||||
const menuRef = getMenuRef(itemId);
|
||||
|
||||
return (
|
||||
<tr
|
||||
key={itemId}
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={() => handleFileClick(name)}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex min-w-0 items-center">
|
||||
<img
|
||||
src={FileIcon}
|
||||
alt={t('settings.sources.fileAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
{name}
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
{node.token_count?.toLocaleString() || '-'}
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm md:px-4 dark:text-[#E0E0E0]">
|
||||
{node.size_bytes ? formatBytes(node.size_bytes) : '-'}
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4">
|
||||
<div ref={menuRef} className="relative">
|
||||
<button
|
||||
onClick={(e) => handleMenuClick(e, itemId)}
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
|
||||
aria-label={t('settings.sources.menuAlt')}
|
||||
>
|
||||
<img
|
||||
src={ThreeDots}
|
||||
alt={t('settings.sources.menuAlt')}
|
||||
className="opacity-60 hover:opacity-100"
|
||||
/>
|
||||
</button>
|
||||
<ContextMenu
|
||||
isOpen={activeMenuId === itemId}
|
||||
setIsOpen={(isOpen) =>
|
||||
setActiveMenuId(isOpen ? itemId : null)
|
||||
}
|
||||
options={getActionOptions(name, true, itemId)}
|
||||
anchorRef={menuRef}
|
||||
position="bottom-left"
|
||||
offset={{ x: -4, y: 4 }}
|
||||
/>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
});
|
||||
|
||||
return [...parentRow, ...directoryRows, ...fileRows];
|
||||
};
|
||||
|
||||
const searchFiles = (
|
||||
query: string,
|
||||
structure: DirectoryStructure,
|
||||
currentPath: string[] = [],
|
||||
): SearchResult[] => {
|
||||
let results: SearchResult[] = [];
|
||||
|
||||
Object.entries(structure).forEach(([name, node]) => {
|
||||
const fullPath = [...currentPath, name].join('/');
|
||||
|
||||
if (name.toLowerCase().includes(query.toLowerCase())) {
|
||||
results.push({
|
||||
name,
|
||||
path: fullPath,
|
||||
isFile: !!node.type,
|
||||
});
|
||||
}
|
||||
|
||||
if (!node.type) {
|
||||
// If it's a directory, search recursively
|
||||
results = [
|
||||
...results,
|
||||
...searchFiles(query, node as DirectoryStructure, [
|
||||
...currentPath,
|
||||
name,
|
||||
]),
|
||||
];
|
||||
}
|
||||
});
|
||||
|
||||
return results;
|
||||
};
|
||||
|
||||
const handleSearchSelect = (result: SearchResult) => {
|
||||
if (result.isFile) {
|
||||
const pathParts = result.path.split('/');
|
||||
const fileName = pathParts.pop() || '';
|
||||
setCurrentPath(pathParts);
|
||||
|
||||
setSelectedFile({
|
||||
id: result.path,
|
||||
name: fileName,
|
||||
});
|
||||
} else {
|
||||
setCurrentPath(result.path.split('/'));
|
||||
setSelectedFile(null);
|
||||
}
|
||||
setSearchQuery('');
|
||||
setSearchResults([]);
|
||||
};
|
||||
|
||||
const renderFileSearch = () => {
|
||||
return (
|
||||
<div className="relative w-52" ref={searchDropdownRef}>
|
||||
<input
|
||||
type="text"
|
||||
value={searchQuery}
|
||||
onChange={(e) => {
|
||||
setSearchQuery(e.target.value);
|
||||
if (directoryStructure) {
|
||||
setSearchResults(searchFiles(e.target.value, directoryStructure));
|
||||
}
|
||||
}}
|
||||
placeholder={t('settings.sources.searchFiles')}
|
||||
className={`h-[38px] w-full border border-[#D1D9E0] px-4 py-2 dark:border-[#6A6A6A] ${searchQuery ? 'rounded-t-[24px]' : 'rounded-[24px]'} bg-transparent focus:outline-none dark:text-[#E0E0E0]`}
|
||||
/>
|
||||
|
||||
{searchQuery && (
|
||||
<div className="absolute top-full right-0 left-0 z-10 max-h-[calc(100vh-200px)] w-full overflow-hidden rounded-b-[12px] border border-t-0 border-[#D1D9E0] bg-white shadow-lg transition-all duration-200 dark:border-[#6A6A6A] dark:bg-[#1F2023]">
|
||||
<div className="max-h-[calc(100vh-200px)] overflow-x-hidden overflow-y-auto overscroll-contain">
|
||||
{searchResults.length === 0 ? (
|
||||
<div className="py-2 text-center text-sm text-gray-500 dark:text-gray-400">
|
||||
{t('settings.sources.noResults')}
|
||||
</div>
|
||||
) : (
|
||||
searchResults.map((result, index) => (
|
||||
<div
|
||||
key={index}
|
||||
onClick={() => handleSearchSelect(result)}
|
||||
title={result.path}
|
||||
className={`flex min-w-0 cursor-pointer items-center px-3 py-2 hover:bg-[#ECEEEF] dark:hover:bg-[#27282D] ${
|
||||
index !== searchResults.length - 1
|
||||
? 'border-b border-[#D1D9E0] dark:border-[#6A6A6A]'
|
||||
: ''
|
||||
}`}
|
||||
>
|
||||
<img
|
||||
src={result.isFile ? FileIcon : FolderIcon}
|
||||
alt={
|
||||
result.isFile
|
||||
? t('settings.sources.fileAlt')
|
||||
: t('settings.sources.folderAlt')
|
||||
}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="flex-1 truncate text-sm dark:text-[#E0E0E0]">
|
||||
{result.path.split('/').pop() || result.path}
|
||||
</span>
|
||||
</div>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const handleFileSearch = (searchQuery: string) => {
|
||||
if (directoryStructure) {
|
||||
return searchFiles(searchQuery, directoryStructure);
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
const handleFileSelect = (path: string) => {
|
||||
const pathParts = path.split('/');
|
||||
const fileName = pathParts.pop() || '';
|
||||
setCurrentPath(pathParts);
|
||||
setSelectedFile({
|
||||
id: path,
|
||||
name: fileName,
|
||||
});
|
||||
};
|
||||
|
||||
const currentDirectory = getCurrentDirectory();
|
||||
|
||||
const navigateToPath = (index: number) => {
|
||||
setCurrentPath(currentPath.slice(0, index + 1));
|
||||
};
|
||||
|
||||
return (
|
||||
<div>
|
||||
{selectedFile ? (
|
||||
<div className="flex">
|
||||
<div className="flex-1">
|
||||
<Chunks
|
||||
documentId={docId}
|
||||
documentName={sourceName}
|
||||
handleGoBack={() => setSelectedFile(null)}
|
||||
path={selectedFile.id}
|
||||
onFileSearch={handleFileSearch}
|
||||
onFileSelect={handleFileSelect}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex w-full max-w-full flex-col overflow-hidden">
|
||||
<div className="mb-2">{renderPathNavigation()}</div>
|
||||
|
||||
<div className="w-full">
|
||||
<div className="overflow-x-auto rounded-[6px] border border-[#D1D9E0] dark:border-[#6A6A6A]">
|
||||
<table className="w-full min-w-[600px] table-auto bg-transparent">
|
||||
<thead className="bg-gray-100 dark:bg-[#27282D]">
|
||||
<tr className="border-b border-[#D1D9E0] dark:border-[#6A6A6A]">
|
||||
<th className="min-w-[200px] px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]">
|
||||
{t('settings.sources.fileName')}
|
||||
</th>
|
||||
<th className="min-w-[80px] px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]">
|
||||
{t('settings.sources.tokens')}
|
||||
</th>
|
||||
<th className="min-w-[80px] px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]">
|
||||
{t('settings.sources.size')}
|
||||
</th>
|
||||
<th className="w-10 px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]"></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>{renderFileTree(getCurrentDirectory())}</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default ConnectorTreeComponent;
|
||||
@@ -2,6 +2,7 @@ import React, { useState, useRef, useEffect } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useSelector } from 'react-redux';
|
||||
import { selectToken } from '../preferences/preferenceSlice';
|
||||
import { formatBytes } from '../utils/stringUtils';
|
||||
import Chunks from './Chunks';
|
||||
import ContextMenu, { MenuOption } from './ContextMenu';
|
||||
import userService from '../api/services/userService';
|
||||
@@ -128,13 +129,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
}
|
||||
}, [docId, token]);
|
||||
|
||||
const formatBytes = (bytes: number): string => {
|
||||
if (bytes === 0) return '0 Bytes';
|
||||
const k = 1024;
|
||||
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
||||
};
|
||||
|
||||
|
||||
const navigateToDirectory = (dirName: string) => {
|
||||
setCurrentPath((prev) => [...prev, dirName]);
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "Pre-loaded",
|
||||
"private": "Private",
|
||||
"sync": "Sync",
|
||||
"syncing": "Syncing...",
|
||||
"syncFrequency": {
|
||||
"never": "Never",
|
||||
"daily": "Daily",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "Precargado",
|
||||
"private": "Privado",
|
||||
"sync": "Sincronizar",
|
||||
"syncing": "Sincronizando...",
|
||||
"syncFrequency": {
|
||||
"never": "Nunca",
|
||||
"daily": "Diario",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "プリロード済み",
|
||||
"private": "プライベート",
|
||||
"sync": "同期",
|
||||
"syncing": "同期中...",
|
||||
"syncFrequency": {
|
||||
"never": "なし",
|
||||
"daily": "毎日",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "Предзагруженный",
|
||||
"private": "Частный",
|
||||
"sync": "Синхронизация",
|
||||
"syncing": "Синхронизация...",
|
||||
"syncFrequency": {
|
||||
"never": "Никогда",
|
||||
"daily": "Ежедневно",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "預載入",
|
||||
"private": "私人",
|
||||
"sync": "同步",
|
||||
"syncing": "同步中...",
|
||||
"syncFrequency": {
|
||||
"never": "從不",
|
||||
"daily": "每天",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "预加载",
|
||||
"private": "私有",
|
||||
"sync": "同步",
|
||||
"syncing": "同步中...",
|
||||
"syncFrequency": {
|
||||
"never": "从不",
|
||||
"daily": "每天",
|
||||
|
||||
@@ -29,6 +29,7 @@ import {
|
||||
import Upload from '../upload/Upload';
|
||||
import { formatDate } from '../utils/dateTimeUtils';
|
||||
import FileTreeComponent from '../components/FileTreeComponent';
|
||||
import ConnectorTreeComponent from '../components/ConnectorTreeComponent';
|
||||
import Chunks from '../components/Chunks';
|
||||
|
||||
const formatTokens = (tokens: number): string => {
|
||||
@@ -271,19 +272,27 @@ export default function Sources({
|
||||
|
||||
return documentToView ? (
|
||||
<div className="mt-8 flex flex-col">
|
||||
{documentToView.isNested ? (
|
||||
<FileTreeComponent
|
||||
{documentToView.isNested ? (
|
||||
documentToView.type === 'connector' ? (
|
||||
<ConnectorTreeComponent
|
||||
docId={documentToView.id || ''}
|
||||
sourceName={documentToView.name}
|
||||
onBackToDocuments={() => setDocumentToView(undefined)}
|
||||
/>
|
||||
) : (
|
||||
<Chunks
|
||||
documentId={documentToView.id || ''}
|
||||
documentName={documentToView.name}
|
||||
handleGoBack={() => setDocumentToView(undefined)}
|
||||
<FileTreeComponent
|
||||
docId={documentToView.id || ''}
|
||||
sourceName={documentToView.name}
|
||||
onBackToDocuments={() => setDocumentToView(undefined)}
|
||||
/>
|
||||
)}
|
||||
)
|
||||
) : (
|
||||
<Chunks
|
||||
documentId={documentToView.id || ''}
|
||||
documentName={documentToView.name}
|
||||
handleGoBack={() => setDocumentToView(undefined)}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="mt-8 flex w-full max-w-full flex-col overflow-hidden">
|
||||
|
||||
@@ -4,6 +4,9 @@ import { useTranslation } from 'react-i18next';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
|
||||
import userService from '../api/services/userService';
|
||||
import { getSessionToken, setSessionToken, removeSessionToken } from '../utils/providerUtils';
|
||||
import { formatDate } from '../utils/dateTimeUtils';
|
||||
import { formatBytes } from '../utils/stringUtils';
|
||||
import FileUpload from '../assets/file_upload.svg';
|
||||
import WebsiteCollect from '../assets/website_collect.svg';
|
||||
import Dropdown from '../components/Dropdown';
|
||||
@@ -25,6 +28,9 @@ import {
|
||||
IngestorFormSchemas,
|
||||
IngestorType,
|
||||
} from './types/ingestor';
|
||||
import FileIcon from '../assets/file.svg';
|
||||
import FolderIcon from '../assets/folder.svg';
|
||||
import ConnectorAuth from '../components/ConnectorAuth';
|
||||
|
||||
function Upload({
|
||||
receivedFile = [],
|
||||
@@ -48,6 +54,21 @@ function Upload({
|
||||
const [activeTab, setActiveTab] = useState<string | null>(renderTab);
|
||||
const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
|
||||
|
||||
// Google Drive state
|
||||
const [isGoogleDriveConnected, setIsGoogleDriveConnected] = useState(false);
|
||||
const [googleDriveFiles, setGoogleDriveFiles] = useState<any[]>([]);
|
||||
const [selectedFiles, setSelectedFiles] = useState<string[]>([]);
|
||||
const [isLoadingFiles, setIsLoadingFiles] = useState(false);
|
||||
const [isAuthenticating, setIsAuthenticating] = useState(false);
|
||||
const [userEmail, setUserEmail] = useState<string>('');
|
||||
const [authError, setAuthError] = useState<string>('');
|
||||
const [currentFolderId, setCurrentFolderId] = useState<string | null>(null);
|
||||
const [folderPath, setFolderPath] = useState<Array<{id: string | null, name: string}>>([{id: null, name: 'My Drive'}]);
|
||||
|
||||
const [nextPageToken, setNextPageToken] = useState<string | null>(null);
|
||||
const [hasMoreFiles, setHasMoreFiles] = useState<boolean>(false);
|
||||
const scrollContainerRef = useRef<HTMLDivElement | null>(null);
|
||||
|
||||
const renderFormFields = () => {
|
||||
const schema = IngestorFormSchemas[ingestor.type];
|
||||
if (!schema) return null;
|
||||
@@ -204,6 +225,7 @@ function Upload({
|
||||
{ label: 'Link', value: 'url' },
|
||||
{ label: 'GitHub', value: 'github' },
|
||||
{ label: 'Reddit', value: 'reddit' },
|
||||
{ label: 'Google Drive', value: 'google_drive' },
|
||||
];
|
||||
|
||||
const sourceDocs = useSelector(selectSourceDocs);
|
||||
@@ -315,8 +337,7 @@ function Upload({
|
||||
data?.find(
|
||||
(d: Doc) => d.type?.toLowerCase() === 'local',
|
||||
),
|
||||
),
|
||||
);
|
||||
));
|
||||
});
|
||||
setProgress(
|
||||
(progress) =>
|
||||
@@ -428,29 +449,32 @@ function Upload({
|
||||
formData.append('user', 'local');
|
||||
formData.append('source', ingestor.type);
|
||||
|
||||
const defaultConfig = IngestorDefaultConfigs[ingestor.type].config;
|
||||
let configData;
|
||||
|
||||
const mergedConfig = { ...defaultConfig, ...ingestor.config };
|
||||
const filteredConfig = Object.entries(mergedConfig).reduce(
|
||||
(acc, [key, value]) => {
|
||||
const field = IngestorFormSchemas[ingestor.type].find(
|
||||
(f) => f.name === key,
|
||||
);
|
||||
// Include the field if:
|
||||
// 1. It's required, or
|
||||
// 2. It's optional and has a non-empty value
|
||||
if (
|
||||
field?.required ||
|
||||
(value !== undefined && value !== null && value !== '')
|
||||
) {
|
||||
acc[key] = value;
|
||||
}
|
||||
return acc;
|
||||
},
|
||||
{} as Record<string, any>,
|
||||
);
|
||||
if (ingestor.type === 'google_drive') {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
|
||||
formData.append('data', JSON.stringify(filteredConfig));
|
||||
const selectedItems = googleDriveFiles.filter(file => selectedFiles.includes(file.id));
|
||||
const selectedFolderIds = selectedItems
|
||||
.filter(item => item.type === 'application/vnd.google-apps.folder' || item.isFolder)
|
||||
.map(folder => folder.id);
|
||||
|
||||
const selectedFileIds = selectedItems
|
||||
.filter(item => item.type !== 'application/vnd.google-apps.folder' && !item.isFolder)
|
||||
.map(file => file.id);
|
||||
|
||||
configData = {
|
||||
file_ids: selectedFileIds,
|
||||
folder_ids: selectedFolderIds,
|
||||
recursive: ingestor.config.recursive,
|
||||
session_token: sessionToken || null
|
||||
};
|
||||
} else {
|
||||
|
||||
configData = { ...ingestor.config };
|
||||
}
|
||||
|
||||
formData.append('data', JSON.stringify(configData));
|
||||
|
||||
const apiHost: string = import.meta.env.VITE_API_HOST;
|
||||
const xhr = new XMLHttpRequest();
|
||||
@@ -477,6 +501,171 @@ function Upload({
|
||||
xhr.setRequestHeader('Authorization', `Bearer ${token}`);
|
||||
xhr.send(formData);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (ingestor.type === 'google_drive') {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
|
||||
if (sessionToken) {
|
||||
// Auto-authenticate if session token exists
|
||||
setIsGoogleDriveConnected(true);
|
||||
setAuthError('');
|
||||
|
||||
// Fetch user email and files using the existing session token
|
||||
|
||||
fetchUserEmailAndLoadFiles(sessionToken);
|
||||
}
|
||||
}
|
||||
}, [ingestor.type]);
|
||||
|
||||
const fetchUserEmailAndLoadFiles = async (sessionToken: string) => {
|
||||
try {
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
|
||||
const validateResponse = await fetch(`${apiHost}/api/connectors/validate-session`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${token}`
|
||||
},
|
||||
body: JSON.stringify({ provider: 'google_drive', session_token: sessionToken })
|
||||
});
|
||||
|
||||
if (!validateResponse.ok) {
|
||||
removeSessionToken(ingestor.type);
|
||||
setIsGoogleDriveConnected(false);
|
||||
setAuthError('Session expired. Please reconnect to Google Drive.');
|
||||
return;
|
||||
}
|
||||
|
||||
const validateData = await validateResponse.json();
|
||||
|
||||
if (validateData.success) {
|
||||
setUserEmail(validateData.user_email || 'Connected User');
|
||||
// reset pagination state and files
|
||||
setGoogleDriveFiles([]);
|
||||
|
||||
|
||||
|
||||
setNextPageToken(null);
|
||||
setHasMoreFiles(false);
|
||||
loadGoogleDriveFiles(sessionToken, null, null, false);
|
||||
} else {
|
||||
removeSessionToken(ingestor.type);
|
||||
setIsGoogleDriveConnected(false);
|
||||
setAuthError(validateData.error || 'Session expired. Please reconnect your Google Drive account and make sure to grant offline access.');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error validating Google Drive session:', error);
|
||||
setAuthError('Failed to validate session. Please reconnect.');
|
||||
setIsGoogleDriveConnected(false);
|
||||
}
|
||||
};
|
||||
|
||||
const loadGoogleDriveFiles = async (
|
||||
sessionToken: string,
|
||||
folderId?: string | null,
|
||||
pageToken?: string | null,
|
||||
append: boolean = false,
|
||||
) => {
|
||||
setIsLoadingFiles(true);
|
||||
|
||||
try {
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
const requestBody: any = {
|
||||
session_token: sessionToken,
|
||||
limit: 10,
|
||||
};
|
||||
if (folderId) {
|
||||
requestBody.folder_id = folderId;
|
||||
}
|
||||
if (pageToken) {
|
||||
requestBody.page_token = pageToken;
|
||||
}
|
||||
|
||||
const filesResponse = await fetch(`${apiHost}/api/connectors/files`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${token}`
|
||||
},
|
||||
body: JSON.stringify({ ...requestBody, provider: 'google_drive' })
|
||||
});
|
||||
|
||||
if (!filesResponse.ok) {
|
||||
throw new Error(`Failed to load files: ${filesResponse.status}`);
|
||||
}
|
||||
|
||||
const filesData = await filesResponse.json();
|
||||
|
||||
if (filesData.success && Array.isArray(filesData.files)) {
|
||||
setGoogleDriveFiles(prev => append ? [...prev, ...filesData.files] : filesData.files);
|
||||
setNextPageToken(filesData.next_page_token || null);
|
||||
setHasMoreFiles(Boolean(filesData.has_more));
|
||||
} else {
|
||||
throw new Error(filesData.error || 'Failed to load files');
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('Error loading Google Drive files:', error);
|
||||
setAuthError(error instanceof Error ? error.message : 'Failed to load files. Please make sure your Google Drive account is properly connected and you granted offline access during authorization.');
|
||||
} finally {
|
||||
setIsLoadingFiles(false);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
// Handle file selection
|
||||
const handleFileSelect = (fileId: string) => {
|
||||
setSelectedFiles(prev => {
|
||||
if (prev.includes(fileId)) {
|
||||
return prev.filter(id => id !== fileId);
|
||||
} else {
|
||||
return [...prev, fileId];
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const handleFolderClick = (folderId: string, folderName: string) => {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
if (sessionToken) {
|
||||
setCurrentFolderId(folderId);
|
||||
setFolderPath(prev => [...prev, {id: folderId, name: folderName}]);
|
||||
|
||||
setGoogleDriveFiles([]);
|
||||
setNextPageToken(null);
|
||||
setHasMoreFiles(false);
|
||||
setSelectedFiles([]);
|
||||
loadGoogleDriveFiles(sessionToken, folderId, null, false);
|
||||
}
|
||||
};
|
||||
|
||||
const navigateBack = (index: number) => {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
if (sessionToken) {
|
||||
const newPath = folderPath.slice(0, index + 1);
|
||||
const targetFolderId = newPath[newPath.length - 1]?.id;
|
||||
|
||||
setCurrentFolderId(targetFolderId as string | null);
|
||||
setFolderPath(newPath);
|
||||
|
||||
setGoogleDriveFiles([]);
|
||||
setNextPageToken(null);
|
||||
setHasMoreFiles(false);
|
||||
setSelectedFiles([]);
|
||||
loadGoogleDriveFiles(sessionToken, targetFolderId ?? null, null, false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleSelectAll = () => {
|
||||
if (selectedFiles.length === googleDriveFiles.length) {
|
||||
setSelectedFiles([]);
|
||||
} else {
|
||||
setSelectedFiles(googleDriveFiles.map(file => file.id));
|
||||
}
|
||||
};
|
||||
|
||||
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
||||
onDrop,
|
||||
multiple: true,
|
||||
@@ -515,6 +704,10 @@ function Upload({
|
||||
if (!remoteName?.trim()) {
|
||||
return true;
|
||||
}
|
||||
if (ingestor.type === 'google_drive') {
|
||||
return !isGoogleDriveConnected || selectedFiles.length === 0;
|
||||
}
|
||||
|
||||
const formFields: FormField[] = IngestorFormSchemas[ingestor.type];
|
||||
for (const field of formFields) {
|
||||
if (field.required) {
|
||||
@@ -636,7 +829,7 @@ function Upload({
|
||||
{files.map((file) => (
|
||||
<p
|
||||
key={file.name}
|
||||
className="text-gray-6000 truncate overflow-hidden text-ellipsis"
|
||||
className="text-gray-6000 dark:text-[#ececf1] truncate overflow-hidden text-ellipsis"
|
||||
title={file.name}
|
||||
>
|
||||
{file.name}
|
||||
@@ -679,6 +872,202 @@ function Upload({
|
||||
required={true}
|
||||
labelBgClassName="bg-white dark:bg-charleston-green-2"
|
||||
/>
|
||||
{ingestor.type === 'google_drive' && (
|
||||
<div className="space-y-4">
|
||||
{authError && (
|
||||
<div className="rounded-lg border border-red-200 bg-red-50 p-3 dark:border-red-600 dark:bg-red-900/20">
|
||||
<p className="text-sm text-red-600 dark:text-red-400">
|
||||
⚠️ {authError}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!isGoogleDriveConnected ? (
|
||||
<ConnectorAuth
|
||||
provider="google_drive"
|
||||
onSuccess={(data) => {
|
||||
setUserEmail(data.user_email);
|
||||
setIsGoogleDriveConnected(true);
|
||||
setIsAuthenticating(false);
|
||||
setAuthError('');
|
||||
|
||||
if (data.session_token) {
|
||||
setSessionToken(ingestor.type, data.session_token);
|
||||
loadGoogleDriveFiles(data.session_token, null);
|
||||
}
|
||||
}}
|
||||
onError={(error) => {
|
||||
setAuthError(error);
|
||||
setIsAuthenticating(false);
|
||||
setIsGoogleDriveConnected(false);
|
||||
}}
|
||||
/>
|
||||
) : (
|
||||
<div className="space-y-4">
|
||||
{/* Connection Status */}
|
||||
<div className="w-full flex items-center justify-between rounded-lg bg-green-500 px-4 py-2 text-white text-sm">
|
||||
<div className="flex items-center gap-2">
|
||||
<svg className="h-4 w-4" viewBox="0 0 24 24">
|
||||
<path fill="currentColor" d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41z"/>
|
||||
</svg>
|
||||
<span>Connected as {userEmail}</span>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => {
|
||||
removeSessionToken(ingestor.type);
|
||||
|
||||
setIsGoogleDriveConnected(false);
|
||||
setGoogleDriveFiles([]);
|
||||
setSelectedFiles([]);
|
||||
setUserEmail('');
|
||||
setAuthError('');
|
||||
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
fetch(`${apiHost}/api/connectors/disconnect`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${token}`
|
||||
},
|
||||
body: JSON.stringify({ provider: ingestor.type, session_token: getSessionToken(ingestor.type) })
|
||||
}).catch(err => console.error('Error disconnecting from Google Drive:', err));
|
||||
}}
|
||||
className="text-white hover:text-gray-200 text-xs underline"
|
||||
>
|
||||
Disconnect
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* File Browser */}
|
||||
<div className="border border-gray-200 rounded-lg dark:border-gray-600">
|
||||
<div className="p-3 border-b border-gray-200 dark:border-gray-600 bg-gray-50 dark:bg-gray-800 rounded-t-lg">
|
||||
{/* Breadcrumb navigation */}
|
||||
<div className="flex items-center gap-1 mb-2">
|
||||
{folderPath.map((path, index) => (
|
||||
<div key={path.id || 'root'} className="flex items-center gap-1">
|
||||
{index > 0 && <span className="text-gray-400">/</span>}
|
||||
<button
|
||||
onClick={() => navigateBack(index)}
|
||||
className="text-sm text-blue-600 hover:text-blue-800 dark:text-blue-400 hover:underline"
|
||||
disabled={index === folderPath.length - 1}
|
||||
>
|
||||
{path.name}
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-between">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Select Files from Google Drive
|
||||
</h4>
|
||||
{googleDriveFiles.length > 0 && (
|
||||
<button
|
||||
onClick={handleSelectAll}
|
||||
className="text-xs text-blue-600 hover:text-blue-800 dark:text-blue-400"
|
||||
>
|
||||
{selectedFiles.length === googleDriveFiles.length ? 'Deselect All' : 'Select All'}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{selectedFiles.length > 0 && (
|
||||
<p className="text-xs text-gray-500 mt-1">
|
||||
{selectedFiles.length} file{selectedFiles.length !== 1 ? 's' : ''} selected
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="max-h-72 overflow-y-auto" ref={scrollContainerRef}>
|
||||
{isLoadingFiles && googleDriveFiles.length === 0 ? (
|
||||
<div className="p-4 text-center">
|
||||
<div className="inline-flex items-center gap-2 text-sm text-gray-600 dark:text-gray-400">
|
||||
<div className="h-4 w-4 animate-spin rounded-full border-2 border-blue-500 border-t-transparent"></div>
|
||||
Loading files...
|
||||
</div>
|
||||
</div>
|
||||
) : googleDriveFiles.length === 0 ? (
|
||||
<div className="p-4 text-center text-sm text-gray-500 dark:text-gray-400">
|
||||
No files found in your Google Drive
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
<div className="divide-y divide-gray-200 dark:divide-gray-600">
|
||||
{googleDriveFiles.map((file) => (
|
||||
<div
|
||||
key={file.id}
|
||||
className={`p-3 transition-colors ${
|
||||
selectedFiles.includes(file.id) ? 'bg-blue-50 dark:bg-blue-900/20' : ''
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="flex-shrink-0">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={selectedFiles.includes(file.id)}
|
||||
onChange={() => handleFileSelect(file.id)}
|
||||
className="h-4 w-4 text-blue-600 rounded border-gray-300 focus:ring-blue-500"
|
||||
/>
|
||||
</div>
|
||||
{file.type === 'application/vnd.google-apps.folder' || file.isFolder ? (
|
||||
<div
|
||||
className="text-lg cursor-pointer hover:text-blue-600"
|
||||
onClick={() => handleFolderClick(file.id, file.name)}
|
||||
>
|
||||
<img src={FolderIcon} alt="Folder" className="h-6 w-6" />
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-lg">
|
||||
<img src={FileIcon} alt="File" className="h-6 w-6" />
|
||||
</div>
|
||||
)}
|
||||
<div className="flex-1 min-w-0">
|
||||
<p
|
||||
className={`text-sm font-medium truncate dark:text-[#ececf1] ${
|
||||
file.type === 'application/vnd.google-apps.folder' || file.isFolder
|
||||
? 'cursor-pointer hover:text-blue-600'
|
||||
: ''
|
||||
}`}
|
||||
onClick={() => {
|
||||
if (file.type === 'application/vnd.google-apps.folder' || file.isFolder) {
|
||||
handleFolderClick(file.id, file.name);
|
||||
}
|
||||
}}
|
||||
>
|
||||
{file.name}
|
||||
</p>
|
||||
<p className="text-xs text-gray-500 dark:text-gray-400">
|
||||
{file.size && `${formatBytes(file.size)} • `}Modified {formatDate(file.modifiedTime)}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<div className="p-4 flex items-center justify-center border-t border-gray-100 dark:border-gray-800">
|
||||
{isLoadingFiles && (
|
||||
<div className="inline-flex items-center gap-2 text-sm text-gray-600 dark:text-gray-400">
|
||||
<div className="h-4 w-4 animate-spin rounded-full border-2 border-blue-500 border-t-transparent"></div>
|
||||
Loading more files...
|
||||
</div>
|
||||
)}
|
||||
{!hasMoreFiles && !isLoadingFiles && (
|
||||
<span className="text-sm text-gray-500 dark:text-gray-400">All files loaded</span>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="hidden" aria-hidden="true">
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{renderFormFields()}
|
||||
{IngestorFormSchemas[ingestor.type].some(
|
||||
(field) => field.advanced,
|
||||
@@ -719,7 +1108,10 @@ function Upload({
|
||||
: 'bg-purple-30 hover:bg-violets-are-blue cursor-pointer text-white'
|
||||
}`}
|
||||
>
|
||||
{t('modals.uploadDoc.train')}
|
||||
{ingestor.type === 'google_drive' && selectedFiles.length > 0
|
||||
? `Train with ${selectedFiles.length} file${selectedFiles.length !== 1 ? 's' : ''}`
|
||||
: t('modals.uploadDoc.train')
|
||||
}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
@@ -727,6 +1119,30 @@ function Upload({
|
||||
);
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
const scrollContainer = scrollContainerRef.current;
|
||||
|
||||
const handleScroll = () => {
|
||||
if (!scrollContainer) return;
|
||||
|
||||
const { scrollTop, scrollHeight, clientHeight } = scrollContainer;
|
||||
const isNearBottom = scrollHeight - scrollTop - clientHeight < 50;
|
||||
|
||||
if (isNearBottom && hasMoreFiles && !isLoadingFiles && nextPageToken) {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
if (sessionToken) {
|
||||
loadGoogleDriveFiles(sessionToken, currentFolderId, nextPageToken, true);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
scrollContainer?.addEventListener('scroll', handleScroll);
|
||||
|
||||
return () => {
|
||||
scrollContainer?.removeEventListener('scroll', handleScroll);
|
||||
};
|
||||
}, [hasMoreFiles, isLoadingFiles, nextPageToken, currentFolderId, ingestor.type]);
|
||||
|
||||
return (
|
||||
<WrapperModal
|
||||
isPerformingTask={progress !== undefined && progress.percentage < 100}
|
||||
|
||||
@@ -22,7 +22,14 @@ export interface UrlIngestorConfig extends BaseIngestorConfig {
|
||||
url: string;
|
||||
}
|
||||
|
||||
export type IngestorType = 'crawler' | 'github' | 'reddit' | 'url';
|
||||
export interface GoogleDriveIngestorConfig extends BaseIngestorConfig {
|
||||
folder_id?: string;
|
||||
file_ids?: string;
|
||||
recursive?: boolean;
|
||||
token_info?: any;
|
||||
}
|
||||
|
||||
export type IngestorType = 'crawler' | 'github' | 'reddit' | 'url' | 'google_drive';
|
||||
|
||||
export interface IngestorConfig {
|
||||
type: IngestorType;
|
||||
@@ -31,7 +38,8 @@ export interface IngestorConfig {
|
||||
| RedditIngestorConfig
|
||||
| GithubIngestorConfig
|
||||
| CrawlerIngestorConfig
|
||||
| UrlIngestorConfig;
|
||||
| UrlIngestorConfig
|
||||
| GoogleDriveIngestorConfig;
|
||||
}
|
||||
|
||||
export type IngestorFormData = {
|
||||
@@ -109,6 +117,14 @@ export const IngestorFormSchemas: Record<IngestorType, FormField[]> = {
|
||||
required: true,
|
||||
},
|
||||
],
|
||||
google_drive: [
|
||||
{
|
||||
name: 'recursive',
|
||||
label: 'Include subfolders',
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
export const IngestorDefaultConfigs: Record<
|
||||
@@ -143,4 +159,12 @@ export const IngestorDefaultConfigs: Record<
|
||||
repo_url: '',
|
||||
} as GithubIngestorConfig,
|
||||
},
|
||||
google_drive: {
|
||||
name: '',
|
||||
config: {
|
||||
folder_id: '',
|
||||
file_ids: '',
|
||||
recursive: true,
|
||||
} as GoogleDriveIngestorConfig,
|
||||
},
|
||||
};
|
||||
|
||||
17
frontend/src/utils/providerUtils.ts
Normal file
17
frontend/src/utils/providerUtils.ts
Normal file
@@ -0,0 +1,17 @@
|
||||
/**
|
||||
* Utility functions for managing session tokens for different cloud service providers.
|
||||
* Follows the convention: {provider}_session_token
|
||||
*/
|
||||
|
||||
|
||||
export const getSessionToken = (provider: string): string | null => {
|
||||
return localStorage.getItem(`${provider}_session_token`);
|
||||
};
|
||||
|
||||
export const setSessionToken = (provider: string, token: string): void => {
|
||||
localStorage.setItem(`${provider}_session_token`, token);
|
||||
};
|
||||
|
||||
export const removeSessionToken = (provider: string): void => {
|
||||
localStorage.removeItem(`${provider}_session_token`);
|
||||
};
|
||||
@@ -2,3 +2,12 @@ export function truncate(str: string, n: number) {
|
||||
// slices long strings and ends with ...
|
||||
return str.length > n ? str.slice(0, n - 1) + '...' : str;
|
||||
}
|
||||
|
||||
export function formatBytes(bytes: number | null): string {
|
||||
if (!bytes || bytes <= 0) return '';
|
||||
|
||||
const k = 1024;
|
||||
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(2))} ${sizes[i]}`;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user