mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-30 09:03:15 +00:00
Merge branch 'main' into feat/remote-mcp
This commit is contained in:
@@ -5,19 +5,17 @@ from typing import Dict, Generator, List, Optional
|
||||
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from application.agents.tools.tool_action_parser import ToolActionParser
|
||||
from application.agents.tools.tool_manager import ToolManager
|
||||
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
|
||||
from application.llm.handlers.handler_creator import LLMHandlerCreator
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.logging import build_stack_data, log_activity, LogContext
|
||||
from application.retriever.base import BaseRetriever
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseAgent(ABC):
|
||||
def __init__(
|
||||
@@ -157,7 +155,7 @@ class BaseAgent(ABC):
|
||||
}
|
||||
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
|
||||
self.tool_calls.append(tool_call_data)
|
||||
return f"Failed to parse tool call.", call_id
|
||||
return "Failed to parse tool call.", call_id
|
||||
|
||||
# Check if tool_id exists in available tools
|
||||
if tool_id not in tools_dict:
|
||||
|
||||
@@ -69,11 +69,8 @@ class StreamProcessor:
|
||||
self.decoded_token.get("sub") if self.decoded_token is not None else None
|
||||
)
|
||||
self.conversation_id = self.data.get("conversation_id")
|
||||
self.source = (
|
||||
{"active_docs": self.data["active_docs"]}
|
||||
if "active_docs" in self.data
|
||||
else {}
|
||||
)
|
||||
self.source = {}
|
||||
self.all_sources = []
|
||||
self.attachments = []
|
||||
self.history = []
|
||||
self.agent_config = {}
|
||||
@@ -85,6 +82,8 @@ class StreamProcessor:
|
||||
|
||||
def initialize(self):
|
||||
"""Initialize all required components for processing"""
|
||||
self._configure_agent()
|
||||
self._configure_source()
|
||||
self._configure_retriever()
|
||||
self._configure_agent()
|
||||
self._load_conversation_history()
|
||||
@@ -171,13 +170,77 @@ class StreamProcessor:
|
||||
source = data.get("source")
|
||||
if isinstance(source, DBRef):
|
||||
source_doc = self.db.dereference(source)
|
||||
data["source"] = str(source_doc["_id"])
|
||||
data["retriever"] = source_doc.get("retriever", data.get("retriever"))
|
||||
data["chunks"] = source_doc.get("chunks", data.get("chunks"))
|
||||
if source_doc:
|
||||
data["source"] = str(source_doc["_id"])
|
||||
data["retriever"] = source_doc.get("retriever", data.get("retriever"))
|
||||
data["chunks"] = source_doc.get("chunks", data.get("chunks"))
|
||||
else:
|
||||
data["source"] = None
|
||||
elif source == "default":
|
||||
data["source"] = "default"
|
||||
else:
|
||||
data["source"] = None
|
||||
# Handle multiple sources
|
||||
|
||||
sources = data.get("sources", [])
|
||||
if sources and isinstance(sources, list):
|
||||
sources_list = []
|
||||
for i, source_ref in enumerate(sources):
|
||||
if source_ref == "default":
|
||||
processed_source = {
|
||||
"id": "default",
|
||||
"retriever": "classic",
|
||||
"chunks": data.get("chunks", "2"),
|
||||
}
|
||||
sources_list.append(processed_source)
|
||||
elif isinstance(source_ref, DBRef):
|
||||
source_doc = self.db.dereference(source_ref)
|
||||
if source_doc:
|
||||
processed_source = {
|
||||
"id": str(source_doc["_id"]),
|
||||
"retriever": source_doc.get("retriever", "classic"),
|
||||
"chunks": source_doc.get("chunks", data.get("chunks", "2")),
|
||||
}
|
||||
sources_list.append(processed_source)
|
||||
data["sources"] = sources_list
|
||||
else:
|
||||
data["sources"] = []
|
||||
return data
|
||||
|
||||
def _configure_source(self):
|
||||
"""Configure the source based on agent data"""
|
||||
api_key = self.data.get("api_key") or self.agent_key
|
||||
|
||||
if api_key:
|
||||
agent_data = self._get_data_from_api_key(api_key)
|
||||
|
||||
if agent_data.get("sources") and len(agent_data["sources"]) > 0:
|
||||
source_ids = [
|
||||
source["id"] for source in agent_data["sources"] if source.get("id")
|
||||
]
|
||||
if source_ids:
|
||||
self.source = {"active_docs": source_ids}
|
||||
else:
|
||||
self.source = {}
|
||||
self.all_sources = agent_data["sources"]
|
||||
elif agent_data.get("source"):
|
||||
self.source = {"active_docs": agent_data["source"]}
|
||||
self.all_sources = [
|
||||
{
|
||||
"id": agent_data["source"],
|
||||
"retriever": agent_data.get("retriever", "classic"),
|
||||
}
|
||||
]
|
||||
else:
|
||||
self.source = {}
|
||||
self.all_sources = []
|
||||
return
|
||||
if "active_docs" in self.data:
|
||||
self.source = {"active_docs": self.data["active_docs"]}
|
||||
return
|
||||
self.source = {}
|
||||
self.all_sources = []
|
||||
|
||||
def _configure_agent(self):
|
||||
"""Configure the agent based on request data"""
|
||||
agent_id = self.data.get("agent_id")
|
||||
@@ -203,7 +266,13 @@ class StreamProcessor:
|
||||
if data_key.get("retriever"):
|
||||
self.retriever_config["retriever_name"] = data_key["retriever"]
|
||||
if data_key.get("chunks") is not None:
|
||||
self.retriever_config["chunks"] = data_key["chunks"]
|
||||
try:
|
||||
self.retriever_config["chunks"] = int(data_key["chunks"])
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(
|
||||
f"Invalid chunks value: {data_key['chunks']}, using default value 2"
|
||||
)
|
||||
self.retriever_config["chunks"] = 2
|
||||
elif self.agent_key:
|
||||
data_key = self._get_data_from_api_key(self.agent_key)
|
||||
self.agent_config.update(
|
||||
@@ -224,7 +293,13 @@ class StreamProcessor:
|
||||
if data_key.get("retriever"):
|
||||
self.retriever_config["retriever_name"] = data_key["retriever"]
|
||||
if data_key.get("chunks") is not None:
|
||||
self.retriever_config["chunks"] = data_key["chunks"]
|
||||
try:
|
||||
self.retriever_config["chunks"] = int(data_key["chunks"])
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(
|
||||
f"Invalid chunks value: {data_key['chunks']}, using default value 2"
|
||||
)
|
||||
self.retriever_config["chunks"] = 2
|
||||
else:
|
||||
self.agent_config.update(
|
||||
{
|
||||
@@ -243,7 +318,8 @@ class StreamProcessor:
|
||||
"token_limit": self.data.get("token_limit", settings.DEFAULT_MAX_HISTORY),
|
||||
}
|
||||
|
||||
if "isNoneDoc" in self.data and self.data["isNoneDoc"]:
|
||||
api_key = self.data.get("api_key") or self.agent_key
|
||||
if not api_key and "isNoneDoc" in self.data and self.data["isNoneDoc"]:
|
||||
self.retriever_config["chunks"] = 0
|
||||
|
||||
def create_agent(self):
|
||||
|
||||
626
application/api/connector/routes.py
Normal file
626
application/api/connector/routes.py
Normal file
@@ -0,0 +1,626 @@
|
||||
import datetime
|
||||
import json
|
||||
|
||||
|
||||
from bson.objectid import ObjectId
|
||||
from flask import (
|
||||
Blueprint,
|
||||
current_app,
|
||||
jsonify,
|
||||
make_response,
|
||||
request
|
||||
)
|
||||
from flask_restx import fields, Namespace, Resource
|
||||
|
||||
|
||||
|
||||
|
||||
from application.api.user.tasks import (
|
||||
ingest_connector_task,
|
||||
)
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
from application.api import api
|
||||
|
||||
|
||||
from application.utils import (
|
||||
check_required_fields
|
||||
)
|
||||
|
||||
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
|
||||
|
||||
|
||||
mongo = MongoDB.get_client()
|
||||
db = mongo[settings.MONGO_DB_NAME]
|
||||
sources_collection = db["sources"]
|
||||
sessions_collection = db["connector_sessions"]
|
||||
|
||||
connector = Blueprint("connector", __name__)
|
||||
connectors_ns = Namespace("connectors", description="Connector operations", path="/")
|
||||
api.add_namespace(connectors_ns)
|
||||
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/upload")
|
||||
class UploadConnector(Resource):
|
||||
@api.expect(
|
||||
api.model(
|
||||
"ConnectorUploadModel",
|
||||
{
|
||||
"user": fields.String(required=True, description="User ID"),
|
||||
"source": fields.String(
|
||||
required=True, description="Source type (google_drive, github, etc.)"
|
||||
),
|
||||
"name": fields.String(required=True, description="Job name"),
|
||||
"data": fields.String(required=True, description="Configuration data"),
|
||||
"repo_url": fields.String(description="GitHub repository URL"),
|
||||
},
|
||||
)
|
||||
)
|
||||
@api.doc(
|
||||
description="Uploads connector source for vectorization",
|
||||
)
|
||||
def post(self):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
data = request.form
|
||||
required_fields = ["user", "source", "name", "data"]
|
||||
missing_fields = check_required_fields(data, required_fields)
|
||||
if missing_fields:
|
||||
return missing_fields
|
||||
try:
|
||||
config = json.loads(data["data"])
|
||||
source_data = None
|
||||
sync_frequency = config.get("sync_frequency", "never")
|
||||
|
||||
if data["source"] == "github":
|
||||
source_data = config.get("repo_url")
|
||||
elif data["source"] in ["crawler", "url"]:
|
||||
source_data = config.get("url")
|
||||
elif data["source"] == "reddit":
|
||||
source_data = config
|
||||
elif data["source"] in ConnectorCreator.get_supported_connectors():
|
||||
session_token = config.get("session_token")
|
||||
if not session_token:
|
||||
return make_response(jsonify({
|
||||
"success": False,
|
||||
"error": f"Missing session_token in {data['source']} configuration"
|
||||
}), 400)
|
||||
|
||||
file_ids = config.get("file_ids", [])
|
||||
if isinstance(file_ids, str):
|
||||
file_ids = [id.strip() for id in file_ids.split(',') if id.strip()]
|
||||
elif not isinstance(file_ids, list):
|
||||
file_ids = []
|
||||
|
||||
folder_ids = config.get("folder_ids", [])
|
||||
if isinstance(folder_ids, str):
|
||||
folder_ids = [id.strip() for id in folder_ids.split(',') if id.strip()]
|
||||
elif not isinstance(folder_ids, list):
|
||||
folder_ids = []
|
||||
|
||||
config["file_ids"] = file_ids
|
||||
config["folder_ids"] = folder_ids
|
||||
|
||||
task = ingest_connector_task.delay(
|
||||
job_name=data["name"],
|
||||
user=decoded_token.get("sub"),
|
||||
source_type=data["source"],
|
||||
session_token=session_token,
|
||||
file_ids=file_ids,
|
||||
folder_ids=folder_ids,
|
||||
recursive=config.get("recursive", False),
|
||||
retriever=config.get("retriever", "classic"),
|
||||
sync_frequency=sync_frequency
|
||||
)
|
||||
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
|
||||
task = ingest_connector_task.delay(
|
||||
source_data=source_data,
|
||||
job_name=data["name"],
|
||||
user=decoded_token.get("sub"),
|
||||
loader=data["source"],
|
||||
sync_frequency=sync_frequency
|
||||
)
|
||||
except Exception as err:
|
||||
current_app.logger.error(
|
||||
f"Error uploading connector source: {err}", exc_info=True
|
||||
)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/task_status")
|
||||
class ConnectorTaskStatus(Resource):
|
||||
task_status_model = api.model(
|
||||
"ConnectorTaskStatusModel",
|
||||
{"task_id": fields.String(required=True, description="Task ID")},
|
||||
)
|
||||
|
||||
@api.expect(task_status_model)
|
||||
@api.doc(description="Get connector task status")
|
||||
def get(self):
|
||||
task_id = request.args.get("task_id")
|
||||
if not task_id:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Task ID is required"}), 400
|
||||
)
|
||||
try:
|
||||
from application.celery_init import celery
|
||||
|
||||
task = celery.AsyncResult(task_id)
|
||||
task_meta = task.info
|
||||
print(f"Task status: {task.status}")
|
||||
if not isinstance(
|
||||
task_meta, (dict, list, str, int, float, bool, type(None))
|
||||
):
|
||||
task_meta = str(task_meta)
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error getting task status: {err}", exc_info=True)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
return make_response(jsonify({"status": task.status, "result": task_meta}), 200)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/sources")
|
||||
class ConnectorSources(Resource):
|
||||
@api.doc(description="Get connector sources")
|
||||
def get(self):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
user = decoded_token.get("sub")
|
||||
try:
|
||||
sources = sources_collection.find({"user": user, "type": "connector"}).sort("date", -1)
|
||||
connector_sources = []
|
||||
for source in sources:
|
||||
connector_sources.append({
|
||||
"id": str(source["_id"]),
|
||||
"name": source.get("name"),
|
||||
"date": source.get("date"),
|
||||
"type": source.get("type"),
|
||||
"source": source.get("source"),
|
||||
"tokens": source.get("tokens", ""),
|
||||
"retriever": source.get("retriever", "classic"),
|
||||
"syncFrequency": source.get("sync_frequency", ""),
|
||||
})
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error retrieving connector sources: {err}", exc_info=True)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
return make_response(jsonify(connector_sources), 200)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/delete")
|
||||
class DeleteConnectorSource(Resource):
|
||||
@api.doc(
|
||||
description="Delete a connector source",
|
||||
params={"source_id": "The source ID to delete"},
|
||||
)
|
||||
def delete(self):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
source_id = request.args.get("source_id")
|
||||
if not source_id:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "source_id is required"}), 400
|
||||
)
|
||||
try:
|
||||
result = sources_collection.delete_one(
|
||||
{"_id": ObjectId(source_id), "user": decoded_token.get("sub")}
|
||||
)
|
||||
if result.deleted_count == 0:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Source not found"}), 404
|
||||
)
|
||||
except Exception as err:
|
||||
current_app.logger.error(
|
||||
f"Error deleting connector source: {err}", exc_info=True
|
||||
)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
return make_response(jsonify({"success": True}), 200)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/auth")
|
||||
class ConnectorAuth(Resource):
|
||||
@api.doc(description="Get connector OAuth authorization URL", params={"provider": "Connector provider (e.g., google_drive)"})
|
||||
def get(self):
|
||||
try:
|
||||
provider = request.args.get('provider') or request.args.get('source')
|
||||
if not provider:
|
||||
return make_response(jsonify({"success": False, "error": "Missing provider"}), 400)
|
||||
|
||||
if not ConnectorCreator.is_supported(provider):
|
||||
return make_response(jsonify({"success": False, "error": f"Unsupported provider: {provider}"}), 400)
|
||||
|
||||
import uuid
|
||||
state = str(uuid.uuid4())
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
authorization_url = auth.get_authorization_url(state=state)
|
||||
return make_response(jsonify({
|
||||
"success": True,
|
||||
"authorization_url": authorization_url,
|
||||
"state": state
|
||||
}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error generating connector auth URL: {e}")
|
||||
return make_response(jsonify({"success": False, "error": str(e)}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/callback")
|
||||
class ConnectorsCallback(Resource):
|
||||
@api.doc(description="Handle OAuth callback for external connectors")
|
||||
def get(self):
|
||||
"""Handle OAuth callback for external connectors"""
|
||||
try:
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
from flask import request, redirect
|
||||
import uuid
|
||||
|
||||
provider = request.args.get('provider', 'google_drive')
|
||||
authorization_code = request.args.get('code')
|
||||
_ = request.args.get('state')
|
||||
error = request.args.get('error')
|
||||
|
||||
if error:
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=OAuth+error:+{error}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.&provider={provider}")
|
||||
|
||||
if not authorization_code:
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Authorization+code+not+provided.+Please+complete+the+authorization+process+and+make+sure+to+grant+offline+access.&provider={provider}")
|
||||
|
||||
try:
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
token_info = auth.exchange_code_for_tokens(authorization_code)
|
||||
|
||||
session_token = str(uuid.uuid4())
|
||||
|
||||
|
||||
try:
|
||||
credentials = auth.create_credentials_from_token_info(token_info)
|
||||
service = auth.build_drive_service(credentials)
|
||||
user_info = service.about().get(fields="user").execute()
|
||||
user_email = user_info.get('user', {}).get('emailAddress', 'Connected User')
|
||||
except Exception as e:
|
||||
current_app.logger.warning(f"Could not get user info: {e}")
|
||||
user_email = 'Connected User'
|
||||
|
||||
sanitized_token_info = {
|
||||
"access_token": token_info.get("access_token"),
|
||||
"refresh_token": token_info.get("refresh_token"),
|
||||
"token_uri": token_info.get("token_uri"),
|
||||
"expiry": token_info.get("expiry"),
|
||||
"scopes": token_info.get("scopes")
|
||||
}
|
||||
|
||||
user_id = request.decoded_token.get("sub") if getattr(request, "decoded_token", None) else None
|
||||
sessions_collection.insert_one({
|
||||
"session_token": session_token,
|
||||
"user": user_id,
|
||||
"token_info": sanitized_token_info,
|
||||
"created_at": datetime.datetime.now(datetime.timezone.utc),
|
||||
"user_email": user_email,
|
||||
"provider": provider
|
||||
})
|
||||
|
||||
# Redirect to success page with session token and user email
|
||||
return redirect(f"/api/connectors/callback-status?status=success&message=Authentication+successful&provider={provider}&session_token={session_token}&user_email={user_email}")
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error exchanging code for tokens: {str(e)}", exc_info=True)
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+exchange+authorization+code+for+tokens:+{str(e)}&provider={provider}")
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error handling connector callback: {e}")
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+complete+connector+authentication:+{str(e)}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.")
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/refresh")
|
||||
class ConnectorRefresh(Resource):
|
||||
@api.expect(api.model("ConnectorRefreshModel", {"provider": fields.String(required=True), "refresh_token": fields.String(required=True)}))
|
||||
@api.doc(description="Refresh connector access token")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
provider = data.get('provider')
|
||||
refresh_token = data.get('refresh_token')
|
||||
|
||||
if not provider or not refresh_token:
|
||||
return make_response(jsonify({"success": False, "error": "provider and refresh_token are required"}), 400)
|
||||
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
token_info = auth.refresh_access_token(refresh_token)
|
||||
return make_response(jsonify({"success": True, "token_info": token_info}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error refreshing token for connector: {e}")
|
||||
return make_response(jsonify({"success": False, "error": str(e)}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/files")
|
||||
class ConnectorFiles(Resource):
|
||||
@api.expect(api.model("ConnectorFilesModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True), "folder_id": fields.String(required=False), "limit": fields.Integer(required=False), "page_token": fields.String(required=False)}))
|
||||
@api.doc(description="List files from a connector provider (supports pagination)")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
provider = data.get('provider')
|
||||
session_token = data.get('session_token')
|
||||
folder_id = data.get('folder_id')
|
||||
limit = data.get('limit', 10)
|
||||
page_token = data.get('page_token')
|
||||
if not provider or not session_token:
|
||||
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
|
||||
|
||||
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
|
||||
user = decoded_token.get('sub')
|
||||
session = sessions_collection.find_one({"session_token": session_token, "user": user})
|
||||
if not session:
|
||||
return make_response(jsonify({"success": False, "error": "Invalid or unauthorized session"}), 401)
|
||||
|
||||
loader = ConnectorCreator.create_connector(provider, session_token)
|
||||
documents = loader.load_data({
|
||||
'limit': limit,
|
||||
'list_only': True,
|
||||
'session_token': session_token,
|
||||
'folder_id': folder_id,
|
||||
'page_token': page_token
|
||||
})
|
||||
|
||||
files = []
|
||||
for doc in documents[:limit]:
|
||||
metadata = doc.extra_info
|
||||
modified_time = metadata.get('modified_time')
|
||||
if modified_time:
|
||||
date_part = modified_time.split('T')[0]
|
||||
time_part = modified_time.split('T')[1].split('.')[0].split('Z')[0]
|
||||
formatted_time = f"{date_part} {time_part}"
|
||||
else:
|
||||
formatted_time = None
|
||||
|
||||
files.append({
|
||||
'id': doc.doc_id,
|
||||
'name': metadata.get('file_name', 'Unknown File'),
|
||||
'type': metadata.get('mime_type', 'unknown'),
|
||||
'size': metadata.get('size', None),
|
||||
'modifiedTime': formatted_time
|
||||
})
|
||||
|
||||
next_token = getattr(loader, 'next_page_token', None)
|
||||
has_more = bool(next_token)
|
||||
|
||||
return make_response(jsonify({"success": True, "files": files, "total": len(files), "next_page_token": next_token, "has_more": has_more}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error loading connector files: {e}")
|
||||
return make_response(jsonify({"success": False, "error": f"Failed to load files: {str(e)}"}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/validate-session")
|
||||
class ConnectorValidateSession(Resource):
|
||||
@api.expect(api.model("ConnectorValidateSessionModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True)}))
|
||||
@api.doc(description="Validate connector session token and return user info")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
provider = data.get('provider')
|
||||
session_token = data.get('session_token')
|
||||
if not provider or not session_token:
|
||||
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
|
||||
|
||||
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
|
||||
user = decoded_token.get('sub')
|
||||
|
||||
session = sessions_collection.find_one({"session_token": session_token, "user": user})
|
||||
if not session or "token_info" not in session:
|
||||
return make_response(jsonify({"success": False, "error": "Invalid or expired session"}), 401)
|
||||
|
||||
token_info = session["token_info"]
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
is_expired = auth.is_token_expired(token_info)
|
||||
|
||||
return make_response(jsonify({
|
||||
"success": True,
|
||||
"expired": is_expired,
|
||||
"user_email": session.get('user_email', 'Connected User')
|
||||
}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error validating connector session: {e}")
|
||||
return make_response(jsonify({"success": False, "error": str(e)}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/disconnect")
|
||||
class ConnectorDisconnect(Resource):
|
||||
@api.expect(api.model("ConnectorDisconnectModel", {"provider": fields.String(required=True), "session_token": fields.String(required=False)}))
|
||||
@api.doc(description="Disconnect a connector session")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
provider = data.get('provider')
|
||||
session_token = data.get('session_token')
|
||||
if not provider:
|
||||
return make_response(jsonify({"success": False, "error": "provider is required"}), 400)
|
||||
|
||||
|
||||
if session_token:
|
||||
sessions_collection.delete_one({"session_token": session_token})
|
||||
|
||||
return make_response(jsonify({"success": True}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error disconnecting connector session: {e}")
|
||||
return make_response(jsonify({"success": False, "error": str(e)}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/sync")
|
||||
class ConnectorSync(Resource):
|
||||
@api.expect(
|
||||
api.model(
|
||||
"ConnectorSyncModel",
|
||||
{
|
||||
"source_id": fields.String(required=True, description="Source ID to sync"),
|
||||
"session_token": fields.String(required=True, description="Authentication token")
|
||||
},
|
||||
)
|
||||
)
|
||||
@api.doc(description="Sync connector source to check for modifications")
|
||||
def post(self):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
|
||||
try:
|
||||
data = request.get_json()
|
||||
source_id = data.get('source_id')
|
||||
session_token = data.get('session_token')
|
||||
|
||||
if not all([source_id, session_token]):
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": "source_id and session_token are required"
|
||||
}),
|
||||
400
|
||||
)
|
||||
source = sources_collection.find_one({"_id": ObjectId(source_id)})
|
||||
if not source:
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": "Source not found"
|
||||
}),
|
||||
404
|
||||
)
|
||||
|
||||
if source.get('user') != decoded_token.get('sub'):
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": "Unauthorized access to source"
|
||||
}),
|
||||
403
|
||||
)
|
||||
|
||||
remote_data = {}
|
||||
try:
|
||||
if source.get('remote_data'):
|
||||
remote_data = json.loads(source.get('remote_data'))
|
||||
except json.JSONDecodeError:
|
||||
current_app.logger.error(f"Invalid remote_data format for source {source_id}")
|
||||
remote_data = {}
|
||||
|
||||
source_type = remote_data.get('provider')
|
||||
if not source_type:
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": "Source provider not found in remote_data"
|
||||
}),
|
||||
400
|
||||
)
|
||||
|
||||
# Extract configuration from remote_data
|
||||
file_ids = remote_data.get('file_ids', [])
|
||||
folder_ids = remote_data.get('folder_ids', [])
|
||||
recursive = remote_data.get('recursive', True)
|
||||
|
||||
# Start the sync task
|
||||
task = ingest_connector_task.delay(
|
||||
job_name=source.get('name'),
|
||||
user=decoded_token.get('sub'),
|
||||
source_type=source_type,
|
||||
session_token=session_token,
|
||||
file_ids=file_ids,
|
||||
folder_ids=folder_ids,
|
||||
recursive=recursive,
|
||||
retriever=source.get('retriever', 'classic'),
|
||||
operation_mode="sync",
|
||||
doc_id=source_id,
|
||||
sync_frequency=source.get('sync_frequency', 'never')
|
||||
)
|
||||
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": True,
|
||||
"task_id": task.id
|
||||
}),
|
||||
200
|
||||
)
|
||||
|
||||
except Exception as err:
|
||||
current_app.logger.error(
|
||||
f"Error syncing connector source: {err}",
|
||||
exc_info=True
|
||||
)
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": str(err)
|
||||
}),
|
||||
400
|
||||
)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/callback-status")
|
||||
class ConnectorCallbackStatus(Resource):
|
||||
@api.doc(description="Return HTML page with connector authentication status")
|
||||
def get(self):
|
||||
"""Return HTML page with connector authentication status"""
|
||||
try:
|
||||
status = request.args.get('status', 'error')
|
||||
message = request.args.get('message', '')
|
||||
provider = request.args.get('provider', 'connector')
|
||||
session_token = request.args.get('session_token', '')
|
||||
user_email = request.args.get('user_email', '')
|
||||
|
||||
html_content = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>{provider.replace('_', ' ').title()} Authentication</title>
|
||||
<style>
|
||||
body {{ font-family: Arial, sans-serif; text-align: center; padding: 40px; }}
|
||||
.container {{ max-width: 600px; margin: 0 auto; }}
|
||||
.success {{ color: #4CAF50; }}
|
||||
.error {{ color: #F44336; }}
|
||||
</style>
|
||||
<script>
|
||||
window.onload = function() {{
|
||||
const status = "{status}";
|
||||
const sessionToken = "{session_token}";
|
||||
const userEmail = "{user_email}";
|
||||
|
||||
if (status === "success" && window.opener) {{
|
||||
window.opener.postMessage({{
|
||||
type: '{provider}_auth_success',
|
||||
session_token: sessionToken,
|
||||
user_email: userEmail
|
||||
}}, '*');
|
||||
|
||||
setTimeout(() => window.close(), 3000);
|
||||
}}
|
||||
}};
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h2>{provider.replace('_', ' ').title()} Authentication</h2>
|
||||
<div class="{status}">
|
||||
<p>{message}</p>
|
||||
{f'<p>Connected as: {user_email}</p>' if status == 'success' else ''}
|
||||
</div>
|
||||
<p><small>You can close this window. {f"Your {provider.replace('_', ' ').title()} is now connected and ready to use." if status == 'success' else ''}</small></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
return make_response(html_content, 200, {'Content-Type': 'text/html'})
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error rendering callback status page: {e}")
|
||||
return make_response("Authentication error occurred", 500, {'Content-Type': 'text/html'})
|
||||
|
||||
|
||||
@@ -32,13 +32,15 @@ from application.api import api
|
||||
|
||||
from application.api.user.tasks import (
|
||||
ingest,
|
||||
ingest_connector_task,
|
||||
ingest_remote,
|
||||
process_agent_webhook,
|
||||
store_attachment,
|
||||
)
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
from application.security.encryption import encrypt_credentials, decrypt_credentials
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
from application.security.encryption import decrypt_credentials, encrypt_credentials
|
||||
from application.storage.storage_creator import StorageCreator
|
||||
from application.tts.google_tts import GoogleTTS
|
||||
from application.utils import (
|
||||
@@ -76,7 +78,6 @@ try:
|
||||
users_collection.create_index("user_id", unique=True)
|
||||
except Exception as e:
|
||||
print("Error creating indexes:", e)
|
||||
|
||||
user = Blueprint("user", __name__)
|
||||
user_ns = Namespace("user", description="User related operations", path="/")
|
||||
api.add_namespace(user_ns)
|
||||
@@ -129,11 +130,9 @@ def ensure_user_doc(user_id):
|
||||
updates["agent_preferences.pinned"] = []
|
||||
if "shared_with_me" not in prefs:
|
||||
updates["agent_preferences.shared_with_me"] = []
|
||||
|
||||
if updates:
|
||||
users_collection.update_one({"user_id": user_id}, {"$set": updates})
|
||||
user_doc = users_collection.find_one({"user_id": user_id})
|
||||
|
||||
return user_doc
|
||||
|
||||
|
||||
@@ -185,7 +184,6 @@ def handle_image_upload(
|
||||
jsonify({"success": False, "message": "Image upload failed"}),
|
||||
400,
|
||||
)
|
||||
|
||||
return image_url, None
|
||||
|
||||
|
||||
@@ -299,8 +297,8 @@ class GetSingleConversation(Resource):
|
||||
)
|
||||
if not conversation:
|
||||
return make_response(jsonify({"status": "not found"}), 404)
|
||||
|
||||
# Process queries to include attachment names
|
||||
|
||||
queries = conversation["queries"]
|
||||
for query in queries:
|
||||
if "attachments" in query and query["attachments"]:
|
||||
@@ -501,6 +499,7 @@ class DeleteOldIndexes(Resource):
|
||||
|
||||
try:
|
||||
# Delete vector index
|
||||
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
index_path = f"indexes/{str(doc['_id'])}"
|
||||
if storage.file_exists(f"{index_path}/index.faiss"):
|
||||
@@ -571,6 +570,7 @@ class UploadFile(Resource):
|
||||
job_name = request.form["name"]
|
||||
|
||||
# Create safe versions for filesystem operations
|
||||
|
||||
safe_user = safe_filename(user)
|
||||
dir_name = safe_filename(job_name)
|
||||
base_path = f"{settings.UPLOAD_FOLDER}/{safe_user}/{dir_name}"
|
||||
@@ -592,6 +592,7 @@ class UploadFile(Resource):
|
||||
zip_ref.extractall(path=temp_dir)
|
||||
|
||||
# Walk through extracted files and upload them
|
||||
|
||||
for root, _, files in os.walk(temp_dir):
|
||||
for extracted_file in files:
|
||||
if (
|
||||
@@ -614,11 +615,13 @@ class UploadFile(Resource):
|
||||
f"Error extracting zip: {e}", exc_info=True
|
||||
)
|
||||
# If zip extraction fails, save the original zip file
|
||||
|
||||
file_path = f"{base_path}/{safe_file}"
|
||||
with open(temp_file_path, "rb") as f:
|
||||
storage.save_file(f, file_path)
|
||||
else:
|
||||
# For non-zip files, save directly
|
||||
|
||||
file_path = f"{base_path}/{safe_file}"
|
||||
with open(temp_file_path, "rb") as f:
|
||||
storage.save_file(f, file_path)
|
||||
@@ -709,7 +712,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
if operation not in ["add", "remove", "remove_directory"]:
|
||||
return make_response(
|
||||
jsonify(
|
||||
@@ -720,14 +722,12 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
try:
|
||||
ObjectId(source_id)
|
||||
except Exception:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Invalid source ID format"}), 400
|
||||
)
|
||||
|
||||
try:
|
||||
source = sources_collection.find_one(
|
||||
{"_id": ObjectId(source_id), "user": user}
|
||||
@@ -760,7 +760,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
if operation == "add":
|
||||
files = request.files.getlist("file")
|
||||
if not files or all(file.filename == "" for file in files):
|
||||
@@ -773,23 +772,22 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
added_files = []
|
||||
|
||||
target_dir = source_file_path
|
||||
if parent_dir:
|
||||
target_dir = f"{source_file_path}/{parent_dir}"
|
||||
|
||||
for file in files:
|
||||
if file.filename:
|
||||
safe_filename_str = safe_filename(file.filename)
|
||||
file_path = f"{target_dir}/{safe_filename_str}"
|
||||
|
||||
# Save file to storage
|
||||
|
||||
storage.save_file(file, file_path)
|
||||
added_files.append(safe_filename_str)
|
||||
|
||||
# Trigger re-ingestion pipeline
|
||||
|
||||
from application.api.user.tasks import reingest_source_task
|
||||
|
||||
task = reingest_source_task.delay(source_id=source_id, user=user)
|
||||
@@ -819,7 +817,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
try:
|
||||
file_paths = (
|
||||
json.loads(file_paths_str)
|
||||
@@ -833,18 +830,19 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
# Remove files from storage and directory structure
|
||||
|
||||
removed_files = []
|
||||
for file_path in file_paths:
|
||||
full_path = f"{source_file_path}/{file_path}"
|
||||
|
||||
# Remove from storage
|
||||
|
||||
if storage.file_exists(full_path):
|
||||
storage.delete_file(full_path)
|
||||
removed_files.append(file_path)
|
||||
|
||||
# Trigger re-ingestion pipeline
|
||||
|
||||
from application.api.user.tasks import reingest_source_task
|
||||
|
||||
task = reingest_source_task.delay(source_id=source_id, user=user)
|
||||
@@ -873,8 +871,8 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
# Validate directory path (prevent path traversal)
|
||||
|
||||
if directory_path.startswith("/") or ".." in directory_path:
|
||||
current_app.logger.warning(
|
||||
f"Invalid directory path attempted for removal. "
|
||||
@@ -908,7 +906,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
404,
|
||||
)
|
||||
|
||||
success = storage.remove_directory(full_directory_path)
|
||||
|
||||
if not success:
|
||||
@@ -923,7 +920,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
500,
|
||||
)
|
||||
|
||||
current_app.logger.info(
|
||||
f"Successfully removed directory. "
|
||||
f"User: {user}, Source ID: {source_id}, Directory path: {directory_path}, "
|
||||
@@ -931,6 +927,7 @@ class ManageSourceFiles(Resource):
|
||||
)
|
||||
|
||||
# Trigger re-ingestion pipeline
|
||||
|
||||
from application.api.user.tasks import reingest_source_task
|
||||
|
||||
task = reingest_source_task.delay(source_id=source_id, user=user)
|
||||
@@ -1005,6 +1002,50 @@ class UploadRemote(Resource):
|
||||
source_data = config.get("url")
|
||||
elif data["source"] == "reddit":
|
||||
source_data = config
|
||||
elif data["source"] in ConnectorCreator.get_supported_connectors():
|
||||
session_token = config.get("session_token")
|
||||
if not session_token:
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"error": f"Missing session_token in {data['source']} configuration",
|
||||
}
|
||||
),
|
||||
400,
|
||||
)
|
||||
# Process file_ids
|
||||
|
||||
file_ids = config.get("file_ids", [])
|
||||
if isinstance(file_ids, str):
|
||||
file_ids = [id.strip() for id in file_ids.split(",") if id.strip()]
|
||||
elif not isinstance(file_ids, list):
|
||||
file_ids = []
|
||||
# Process folder_ids
|
||||
|
||||
folder_ids = config.get("folder_ids", [])
|
||||
if isinstance(folder_ids, str):
|
||||
folder_ids = [
|
||||
id.strip() for id in folder_ids.split(",") if id.strip()
|
||||
]
|
||||
elif not isinstance(folder_ids, list):
|
||||
folder_ids = []
|
||||
config["file_ids"] = file_ids
|
||||
config["folder_ids"] = folder_ids
|
||||
|
||||
task = ingest_connector_task.delay(
|
||||
job_name=data["name"],
|
||||
user=decoded_token.get("sub"),
|
||||
source_type=data["source"],
|
||||
session_token=session_token,
|
||||
file_ids=file_ids,
|
||||
folder_ids=folder_ids,
|
||||
recursive=config.get("recursive", False),
|
||||
retriever=config.get("retriever", "classic"),
|
||||
)
|
||||
return make_response(
|
||||
jsonify({"success": True, "task_id": task.id}), 200
|
||||
)
|
||||
task = ingest_remote.delay(
|
||||
source_data=source_data,
|
||||
job_name=data["name"],
|
||||
@@ -1113,6 +1154,7 @@ class PaginatedSources(Resource):
|
||||
"retriever": doc.get("retriever", "classic"),
|
||||
"syncFrequency": doc.get("sync_frequency", ""),
|
||||
"isNested": bool(doc.get("directory_structure")),
|
||||
"type": doc.get("type", "file"),
|
||||
}
|
||||
paginated_docs.append(doc_data)
|
||||
response = {
|
||||
@@ -1161,6 +1203,9 @@ class CombinedJson(Resource):
|
||||
"retriever": index.get("retriever", "classic"),
|
||||
"syncFrequency": index.get("sync_frequency", ""),
|
||||
"is_nested": bool(index.get("directory_structure")),
|
||||
"type": index.get(
|
||||
"type", "file"
|
||||
), # Add type field with default "file"
|
||||
}
|
||||
)
|
||||
except Exception as err:
|
||||
@@ -1376,17 +1421,14 @@ class GetAgent(Resource):
|
||||
def get(self):
|
||||
if not (decoded_token := request.decoded_token):
|
||||
return {"success": False}, 401
|
||||
|
||||
if not (agent_id := request.args.get("id")):
|
||||
return {"success": False, "message": "ID required"}, 400
|
||||
|
||||
try:
|
||||
agent = agents_collection.find_one(
|
||||
{"_id": ObjectId(agent_id), "user": decoded_token["sub"]}
|
||||
)
|
||||
if not agent:
|
||||
return {"status": "Not found"}, 404
|
||||
|
||||
data = {
|
||||
"id": str(agent["_id"]),
|
||||
"name": agent["name"],
|
||||
@@ -1400,6 +1442,16 @@ class GetAgent(Resource):
|
||||
and (source_doc := db.dereference(agent.get("source")))
|
||||
else ""
|
||||
),
|
||||
"sources": [
|
||||
(
|
||||
str(db.dereference(source_ref)["_id"])
|
||||
if isinstance(source_ref, DBRef) and db.dereference(source_ref)
|
||||
else source_ref
|
||||
)
|
||||
for source_ref in agent.get("sources", [])
|
||||
if (isinstance(source_ref, DBRef) and db.dereference(source_ref))
|
||||
or source_ref == "default"
|
||||
],
|
||||
"chunks": agent["chunks"],
|
||||
"retriever": agent.get("retriever", ""),
|
||||
"prompt_id": agent.get("prompt_id", ""),
|
||||
@@ -1422,7 +1474,6 @@ class GetAgent(Resource):
|
||||
"shared_token": agent.get("shared_token", ""),
|
||||
}
|
||||
return make_response(jsonify(data), 200)
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Agent fetch error: {e}", exc_info=True)
|
||||
return {"success": False}, 400
|
||||
@@ -1434,7 +1485,6 @@ class GetAgents(Resource):
|
||||
def get(self):
|
||||
if not (decoded_token := request.decoded_token):
|
||||
return {"success": False}, 401
|
||||
|
||||
user = decoded_token.get("sub")
|
||||
try:
|
||||
user_doc = ensure_user_doc(user)
|
||||
@@ -1453,8 +1503,24 @@ class GetAgents(Resource):
|
||||
str(source_doc["_id"])
|
||||
if isinstance(agent.get("source"), DBRef)
|
||||
and (source_doc := db.dereference(agent.get("source")))
|
||||
else ""
|
||||
else (
|
||||
agent.get("source", "")
|
||||
if agent.get("source") == "default"
|
||||
else ""
|
||||
)
|
||||
),
|
||||
"sources": [
|
||||
(
|
||||
source_ref
|
||||
if source_ref == "default"
|
||||
else str(db.dereference(source_ref)["_id"])
|
||||
)
|
||||
for source_ref in agent.get("sources", [])
|
||||
if source_ref == "default"
|
||||
or (
|
||||
isinstance(source_ref, DBRef) and db.dereference(source_ref)
|
||||
)
|
||||
],
|
||||
"chunks": agent["chunks"],
|
||||
"retriever": agent.get("retriever", ""),
|
||||
"prompt_id": agent.get("prompt_id", ""),
|
||||
@@ -1497,7 +1563,14 @@ class CreateAgent(Resource):
|
||||
"image": fields.Raw(
|
||||
required=False, description="Image file upload", type="file"
|
||||
),
|
||||
"source": fields.String(required=True, description="Source ID"),
|
||||
"source": fields.String(
|
||||
required=False, description="Source ID (legacy single source)"
|
||||
),
|
||||
"sources": fields.List(
|
||||
fields.String,
|
||||
required=False,
|
||||
description="List of source identifiers for multiple sources",
|
||||
),
|
||||
"chunks": fields.Integer(required=True, description="Chunks count"),
|
||||
"retriever": fields.String(required=True, description="Retriever ID"),
|
||||
"prompt_id": fields.String(required=True, description="Prompt ID"),
|
||||
@@ -1530,6 +1603,11 @@ class CreateAgent(Resource):
|
||||
data["tools"] = json.loads(data["tools"])
|
||||
except json.JSONDecodeError:
|
||||
data["tools"] = []
|
||||
if "sources" in data:
|
||||
try:
|
||||
data["sources"] = json.loads(data["sources"])
|
||||
except json.JSONDecodeError:
|
||||
data["sources"] = []
|
||||
if "json_schema" in data:
|
||||
try:
|
||||
data["json_schema"] = json.loads(data["json_schema"])
|
||||
@@ -1538,9 +1616,11 @@ class CreateAgent(Resource):
|
||||
print(f"Received data: {data}")
|
||||
|
||||
# Validate JSON schema if provided
|
||||
|
||||
if data.get("json_schema"):
|
||||
try:
|
||||
# Basic validation - ensure it's a valid JSON structure
|
||||
|
||||
json_schema = data.get("json_schema")
|
||||
if not isinstance(json_schema, dict):
|
||||
return make_response(
|
||||
@@ -1554,6 +1634,7 @@ class CreateAgent(Resource):
|
||||
)
|
||||
|
||||
# Validate that it has either a 'schema' property or is itself a schema
|
||||
|
||||
if "schema" not in json_schema and "type" not in json_schema:
|
||||
return make_response(
|
||||
jsonify(
|
||||
@@ -1571,7 +1652,6 @@ class CreateAgent(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
if data.get("status") not in ["draft", "published"]:
|
||||
return make_response(
|
||||
jsonify(
|
||||
@@ -1582,17 +1662,27 @@ class CreateAgent(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
if data.get("status") == "published":
|
||||
required_fields = [
|
||||
"name",
|
||||
"description",
|
||||
"source",
|
||||
"chunks",
|
||||
"retriever",
|
||||
"prompt_id",
|
||||
"agent_type",
|
||||
]
|
||||
# Require either source or sources (but not both)
|
||||
|
||||
if not data.get("source") and not data.get("sources"):
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": "Either 'source' or 'sources' field is required for published agents",
|
||||
}
|
||||
),
|
||||
400,
|
||||
)
|
||||
validate_fields = ["name", "description", "prompt_id", "agent_type"]
|
||||
else:
|
||||
required_fields = ["name"]
|
||||
@@ -1603,25 +1693,37 @@ class CreateAgent(Resource):
|
||||
return missing_fields
|
||||
if invalid_fields:
|
||||
return invalid_fields
|
||||
|
||||
image_url, error = handle_image_upload(request, "", user, storage)
|
||||
if error:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Image upload failed"}), 400
|
||||
)
|
||||
|
||||
try:
|
||||
key = str(uuid.uuid4()) if data.get("status") == "published" else ""
|
||||
|
||||
sources_list = []
|
||||
if data.get("sources") and len(data.get("sources", [])) > 0:
|
||||
for source_id in data.get("sources", []):
|
||||
if source_id == "default":
|
||||
sources_list.append("default")
|
||||
elif ObjectId.is_valid(source_id):
|
||||
sources_list.append(DBRef("sources", ObjectId(source_id)))
|
||||
source_field = ""
|
||||
else:
|
||||
source_value = data.get("source", "")
|
||||
if source_value == "default":
|
||||
source_field = "default"
|
||||
elif ObjectId.is_valid(source_value):
|
||||
source_field = DBRef("sources", ObjectId(source_value))
|
||||
else:
|
||||
source_field = ""
|
||||
new_agent = {
|
||||
"user": user,
|
||||
"name": data.get("name"),
|
||||
"description": data.get("description", ""),
|
||||
"image": image_url,
|
||||
"source": (
|
||||
DBRef("sources", ObjectId(data.get("source")))
|
||||
if ObjectId.is_valid(data.get("source"))
|
||||
else ""
|
||||
),
|
||||
"source": source_field,
|
||||
"sources": sources_list,
|
||||
"chunks": data.get("chunks", ""),
|
||||
"retriever": data.get("retriever", ""),
|
||||
"prompt_id": data.get("prompt_id", ""),
|
||||
@@ -1636,7 +1738,11 @@ class CreateAgent(Resource):
|
||||
}
|
||||
if new_agent["chunks"] == "":
|
||||
new_agent["chunks"] = "0"
|
||||
if new_agent["source"] == "" and new_agent["retriever"] == "":
|
||||
if (
|
||||
new_agent["source"] == ""
|
||||
and new_agent["retriever"] == ""
|
||||
and not new_agent["sources"]
|
||||
):
|
||||
new_agent["retriever"] = "classic"
|
||||
resp = agents_collection.insert_one(new_agent)
|
||||
new_id = str(resp.inserted_id)
|
||||
@@ -1658,7 +1764,14 @@ class UpdateAgent(Resource):
|
||||
"image": fields.String(
|
||||
required=False, description="New image URL or identifier"
|
||||
),
|
||||
"source": fields.String(required=True, description="Source ID"),
|
||||
"source": fields.String(
|
||||
required=False, description="Source ID (legacy single source)"
|
||||
),
|
||||
"sources": fields.List(
|
||||
fields.String,
|
||||
required=False,
|
||||
description="List of source identifiers for multiple sources",
|
||||
),
|
||||
"chunks": fields.Integer(required=True, description="Chunks count"),
|
||||
"retriever": fields.String(required=True, description="Retriever ID"),
|
||||
"prompt_id": fields.String(required=True, description="Prompt ID"),
|
||||
@@ -1691,12 +1804,16 @@ class UpdateAgent(Resource):
|
||||
data["tools"] = json.loads(data["tools"])
|
||||
except json.JSONDecodeError:
|
||||
data["tools"] = []
|
||||
if "sources" in data:
|
||||
try:
|
||||
data["sources"] = json.loads(data["sources"])
|
||||
except json.JSONDecodeError:
|
||||
data["sources"] = []
|
||||
if "json_schema" in data:
|
||||
try:
|
||||
data["json_schema"] = json.loads(data["json_schema"])
|
||||
except json.JSONDecodeError:
|
||||
data["json_schema"] = None
|
||||
|
||||
if not ObjectId.is_valid(agent_id):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Invalid agent ID format"}), 400
|
||||
@@ -1720,7 +1837,6 @@ class UpdateAgent(Resource):
|
||||
),
|
||||
404,
|
||||
)
|
||||
|
||||
image_url, error = handle_image_upload(
|
||||
request, existing_agent.get("image", ""), user, storage
|
||||
)
|
||||
@@ -1728,13 +1844,13 @@ class UpdateAgent(Resource):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Image upload failed"}), 400
|
||||
)
|
||||
|
||||
update_fields = {}
|
||||
allowed_fields = [
|
||||
"name",
|
||||
"description",
|
||||
"image",
|
||||
"source",
|
||||
"sources",
|
||||
"chunks",
|
||||
"retriever",
|
||||
"prompt_id",
|
||||
@@ -1758,7 +1874,11 @@ class UpdateAgent(Resource):
|
||||
update_fields[field] = new_status
|
||||
elif field == "source":
|
||||
source_id = data.get("source")
|
||||
if source_id and ObjectId.is_valid(source_id):
|
||||
if source_id == "default":
|
||||
# Handle special "default" source
|
||||
|
||||
update_fields[field] = "default"
|
||||
elif source_id and ObjectId.is_valid(source_id):
|
||||
update_fields[field] = DBRef("sources", ObjectId(source_id))
|
||||
elif source_id:
|
||||
return make_response(
|
||||
@@ -1772,6 +1892,30 @@ class UpdateAgent(Resource):
|
||||
)
|
||||
else:
|
||||
update_fields[field] = ""
|
||||
elif field == "sources":
|
||||
sources_list = data.get("sources", [])
|
||||
if sources_list and isinstance(sources_list, list):
|
||||
valid_sources = []
|
||||
for source_id in sources_list:
|
||||
if source_id == "default":
|
||||
valid_sources.append("default")
|
||||
elif ObjectId.is_valid(source_id):
|
||||
valid_sources.append(
|
||||
DBRef("sources", ObjectId(source_id))
|
||||
)
|
||||
else:
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"message": f"Invalid source ID format: {source_id}",
|
||||
}
|
||||
),
|
||||
400,
|
||||
)
|
||||
update_fields[field] = valid_sources
|
||||
else:
|
||||
update_fields[field] = []
|
||||
elif field == "chunks":
|
||||
chunks_value = data.get("chunks")
|
||||
if chunks_value == "":
|
||||
@@ -1837,7 +1981,6 @@ class UpdateAgent(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
if not existing_agent.get("key"):
|
||||
newly_generated_key = str(uuid.uuid4())
|
||||
update_fields["key"] = newly_generated_key
|
||||
@@ -1924,7 +2067,6 @@ class PinnedAgents(Resource):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
|
||||
user_id = decoded_token.get("sub")
|
||||
|
||||
try:
|
||||
@@ -1933,7 +2075,6 @@ class PinnedAgents(Resource):
|
||||
|
||||
if not pinned_ids:
|
||||
return make_response(jsonify([]), 200)
|
||||
|
||||
pinned_object_ids = [ObjectId(agent_id) for agent_id in pinned_ids]
|
||||
|
||||
pinned_agents_cursor = agents_collection.find(
|
||||
@@ -1943,6 +2084,7 @@ class PinnedAgents(Resource):
|
||||
existing_ids = {str(agent["_id"]) for agent in pinned_agents}
|
||||
|
||||
# Clean up any stale pinned IDs
|
||||
|
||||
stale_ids = [
|
||||
agent_id for agent_id in pinned_ids if agent_id not in existing_ids
|
||||
]
|
||||
@@ -1951,7 +2093,6 @@ class PinnedAgents(Resource):
|
||||
{"user_id": user_id},
|
||||
{"$pullAll": {"agent_preferences.pinned": stale_ids}},
|
||||
)
|
||||
|
||||
list_pinned_agents = [
|
||||
{
|
||||
"id": str(agent["_id"]),
|
||||
@@ -1988,11 +2129,9 @@ class PinnedAgents(Resource):
|
||||
for agent in pinned_agents
|
||||
if "source" in agent or "retriever" in agent
|
||||
]
|
||||
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error retrieving pinned agents: {err}")
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
|
||||
return make_response(jsonify(list_pinned_agents), 200)
|
||||
|
||||
|
||||
@@ -2056,7 +2195,6 @@ class RemoveSharedAgent(Resource):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "ID is required"}), 400
|
||||
)
|
||||
|
||||
try:
|
||||
agent = agents_collection.find_one(
|
||||
{"_id": ObjectId(agent_id), "shared_publicly": True}
|
||||
@@ -2066,7 +2204,6 @@ class RemoveSharedAgent(Resource):
|
||||
jsonify({"success": False, "message": "Shared agent not found"}),
|
||||
404,
|
||||
)
|
||||
|
||||
ensure_user_doc(user_id)
|
||||
users_collection.update_one(
|
||||
{"user_id": user_id},
|
||||
@@ -2079,7 +2216,6 @@ class RemoveSharedAgent(Resource):
|
||||
)
|
||||
|
||||
return make_response(jsonify({"success": True, "action": "removed"}), 200)
|
||||
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error removing shared agent: {err}")
|
||||
return make_response(
|
||||
@@ -2102,7 +2238,6 @@ class SharedAgent(Resource):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Token or ID is required"}), 400
|
||||
)
|
||||
|
||||
try:
|
||||
query = {
|
||||
"shared_publicly": True,
|
||||
@@ -2114,7 +2249,6 @@ class SharedAgent(Resource):
|
||||
jsonify({"success": False, "message": "Shared agent not found"}),
|
||||
404,
|
||||
)
|
||||
|
||||
agent_id = str(shared_agent["_id"])
|
||||
data = {
|
||||
"id": agent_id,
|
||||
@@ -2154,7 +2288,6 @@ class SharedAgent(Resource):
|
||||
if tool_data:
|
||||
enriched_tools.append(tool_data.get("name", ""))
|
||||
data["tools"] = enriched_tools
|
||||
|
||||
decoded_token = getattr(request, "decoded_token", None)
|
||||
if decoded_token:
|
||||
user_id = decoded_token.get("sub")
|
||||
@@ -2166,9 +2299,7 @@ class SharedAgent(Resource):
|
||||
{"user_id": user_id},
|
||||
{"$addToSet": {"agent_preferences.shared_with_me": agent_id}},
|
||||
)
|
||||
|
||||
return make_response(jsonify(data), 200)
|
||||
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error retrieving shared agent: {err}")
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
@@ -2202,7 +2333,6 @@ class SharedAgents(Resource):
|
||||
{"user_id": user_id},
|
||||
{"$pullAll": {"agent_preferences.shared_with_me": stale_ids}},
|
||||
)
|
||||
|
||||
pinned_ids = set(user_doc.get("agent_preferences", {}).get("pinned", []))
|
||||
|
||||
list_shared_agents = [
|
||||
@@ -2229,7 +2359,6 @@ class SharedAgents(Resource):
|
||||
]
|
||||
|
||||
return make_response(jsonify(list_shared_agents), 200)
|
||||
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error retrieving shared agents: {err}")
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
@@ -3762,20 +3891,21 @@ class GetChunks(Resource):
|
||||
metadata = chunk.get("metadata", {})
|
||||
|
||||
# Filter by path if provided
|
||||
|
||||
if path:
|
||||
chunk_source = metadata.get("source", "")
|
||||
# Check if the chunk's source matches the requested path
|
||||
|
||||
if not chunk_source or not chunk_source.endswith(path):
|
||||
continue
|
||||
|
||||
# Filter by search term if provided
|
||||
|
||||
if search_term:
|
||||
text_match = search_term in chunk.get("text", "").lower()
|
||||
title_match = search_term in metadata.get("title", "").lower()
|
||||
|
||||
if not (text_match or title_match):
|
||||
continue
|
||||
|
||||
filtered_chunks.append(chunk)
|
||||
|
||||
chunks = filtered_chunks
|
||||
@@ -3937,7 +4067,6 @@ class UpdateChunk(Resource):
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
metadata["token_count"] = token_count
|
||||
|
||||
if not ObjectId.is_valid(doc_id):
|
||||
return make_response(jsonify({"error": "Invalid doc_id"}), 400)
|
||||
doc = sources_collection.find_one({"_id": ObjectId(doc_id), "user": user})
|
||||
@@ -3952,7 +4081,6 @@ class UpdateChunk(Resource):
|
||||
existing_chunk = next((c for c in chunks if c["doc_id"] == chunk_id), None)
|
||||
if not existing_chunk:
|
||||
return make_response(jsonify({"error": "Chunk not found"}), 404)
|
||||
|
||||
new_text = text if text is not None else existing_chunk["text"]
|
||||
|
||||
if metadata is not None:
|
||||
@@ -3960,10 +4088,8 @@ class UpdateChunk(Resource):
|
||||
new_metadata.update(metadata)
|
||||
else:
|
||||
new_metadata = existing_chunk["metadata"].copy()
|
||||
|
||||
if text is not None:
|
||||
new_metadata["token_count"] = num_tokens_from_string(new_text)
|
||||
|
||||
try:
|
||||
new_chunk_id = store.add_chunk(new_text, new_metadata)
|
||||
|
||||
@@ -4019,7 +4145,6 @@ class StoreAttachment(Resource):
|
||||
jsonify({"status": "error", "message": "Missing file"}),
|
||||
400,
|
||||
)
|
||||
|
||||
user = None
|
||||
if decoded_token:
|
||||
user = safe_filename(decoded_token.get("sub"))
|
||||
@@ -4034,7 +4159,6 @@ class StoreAttachment(Resource):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Authentication required"}), 401
|
||||
)
|
||||
|
||||
try:
|
||||
attachment_id = ObjectId()
|
||||
original_filename = safe_filename(os.path.basename(file.filename))
|
||||
@@ -4076,7 +4200,6 @@ class ServeImage(Resource):
|
||||
content_type = f"image/{extension}"
|
||||
if extension == "jpg":
|
||||
content_type = "image/jpeg"
|
||||
|
||||
response = make_response(file_obj.read())
|
||||
response.headers.set("Content-Type", content_type)
|
||||
response.headers.set("Cache-Control", "max-age=86400")
|
||||
@@ -4121,18 +4244,29 @@ class DirectoryStructure(Resource):
|
||||
)
|
||||
|
||||
directory_structure = doc.get("directory_structure", {})
|
||||
base_path = doc.get("file_path", "")
|
||||
|
||||
provider = None
|
||||
remote_data = doc.get("remote_data")
|
||||
try:
|
||||
if isinstance(remote_data, str) and remote_data:
|
||||
remote_data_obj = json.loads(remote_data)
|
||||
provider = remote_data_obj.get("provider")
|
||||
except Exception as e:
|
||||
current_app.logger.warning(
|
||||
f"Failed to parse remote_data for doc {doc_id}: {e}"
|
||||
)
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": True,
|
||||
"directory_structure": directory_structure,
|
||||
"base_path": doc.get("file_path", ""),
|
||||
"base_path": base_path,
|
||||
"provider": provider,
|
||||
}
|
||||
),
|
||||
200,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(
|
||||
f"Error retrieving directory structure: {e}", exc_info=True
|
||||
|
||||
@@ -47,6 +47,39 @@ def process_agent_webhook(self, agent_id, payload):
|
||||
return resp
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest_connector_task(
|
||||
self,
|
||||
job_name,
|
||||
user,
|
||||
source_type,
|
||||
session_token=None,
|
||||
file_ids=None,
|
||||
folder_ids=None,
|
||||
recursive=True,
|
||||
retriever="classic",
|
||||
operation_mode="upload",
|
||||
doc_id=None,
|
||||
sync_frequency="never"
|
||||
):
|
||||
from application.worker import ingest_connector
|
||||
resp = ingest_connector(
|
||||
self,
|
||||
job_name,
|
||||
user,
|
||||
source_type,
|
||||
session_token=session_token,
|
||||
file_ids=file_ids,
|
||||
folder_ids=folder_ids,
|
||||
recursive=recursive,
|
||||
retriever=retriever,
|
||||
operation_mode=operation_mode,
|
||||
doc_id=doc_id,
|
||||
sync_frequency=sync_frequency
|
||||
)
|
||||
return resp
|
||||
|
||||
|
||||
@celery.on_after_configure.connect
|
||||
def setup_periodic_tasks(sender, **kwargs):
|
||||
sender.add_periodic_task(
|
||||
|
||||
@@ -16,6 +16,7 @@ from application.api import api # noqa: E402
|
||||
from application.api.answer import answer # noqa: E402
|
||||
from application.api.internal.routes import internal # noqa: E402
|
||||
from application.api.user.routes import user # noqa: E402
|
||||
from application.api.connector.routes import connector # noqa: E402
|
||||
from application.celery_init import celery # noqa: E402
|
||||
from application.core.settings import settings # noqa: E402
|
||||
|
||||
@@ -30,6 +31,7 @@ app = Flask(__name__)
|
||||
app.register_blueprint(user)
|
||||
app.register_blueprint(answer)
|
||||
app.register_blueprint(internal)
|
||||
app.register_blueprint(connector)
|
||||
app.config.update(
|
||||
UPLOAD_FOLDER="inputs",
|
||||
CELERY_BROKER_URL=settings.CELERY_BROKER_URL,
|
||||
|
||||
@@ -40,6 +40,13 @@ class Settings(BaseSettings):
|
||||
FALLBACK_LLM_NAME: Optional[str] = None # model name for fallback llm
|
||||
FALLBACK_LLM_API_KEY: Optional[str] = None # api key for fallback llm
|
||||
|
||||
# Google Drive integration
|
||||
GOOGLE_CLIENT_ID: Optional[str] = None # Replace with your actual Google OAuth client ID
|
||||
GOOGLE_CLIENT_SECRET: Optional[str] = None# Replace with your actual Google OAuth client secret
|
||||
CONNECTOR_REDIRECT_BASE_URI: Optional[str] = "http://127.0.0.1:7091/api/connectors/callback"
|
||||
##append ?provider={provider_name} in your Provider console like http://127.0.0.1:7091/api/connectors/callback?provider=google_drive
|
||||
|
||||
|
||||
# LLM Cache
|
||||
CACHE_REDIS_URL: str = "redis://localhost:6379/2"
|
||||
|
||||
|
||||
18
application/parser/connectors/__init__.py
Normal file
18
application/parser/connectors/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""
|
||||
External knowledge base connectors for DocsGPT.
|
||||
|
||||
This module contains connectors for external knowledge bases and document storage systems
|
||||
that require authentication and specialized handling, separate from simple web scrapers.
|
||||
"""
|
||||
|
||||
from .base import BaseConnectorAuth, BaseConnectorLoader
|
||||
from .connector_creator import ConnectorCreator
|
||||
from .google_drive import GoogleDriveAuth, GoogleDriveLoader
|
||||
|
||||
__all__ = [
|
||||
'BaseConnectorAuth',
|
||||
'BaseConnectorLoader',
|
||||
'ConnectorCreator',
|
||||
'GoogleDriveAuth',
|
||||
'GoogleDriveLoader'
|
||||
]
|
||||
129
application/parser/connectors/base.py
Normal file
129
application/parser/connectors/base.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
Base classes for external knowledge base connectors.
|
||||
|
||||
This module provides minimal abstract base classes that define the essential
|
||||
interface for external knowledge base connectors.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
class BaseConnectorAuth(ABC):
|
||||
"""
|
||||
Abstract base class for connector authentication.
|
||||
|
||||
Defines the minimal interface that all connector authentication
|
||||
implementations must follow.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_authorization_url(self, state: Optional[str] = None) -> str:
|
||||
"""
|
||||
Generate authorization URL for OAuth flows.
|
||||
|
||||
Args:
|
||||
state: Optional state parameter for CSRF protection
|
||||
|
||||
Returns:
|
||||
Authorization URL
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Exchange authorization code for access tokens.
|
||||
|
||||
Args:
|
||||
authorization_code: Authorization code from OAuth callback
|
||||
|
||||
Returns:
|
||||
Dictionary containing token information
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Refresh an expired access token.
|
||||
|
||||
Args:
|
||||
refresh_token: Refresh token
|
||||
|
||||
Returns:
|
||||
Dictionary containing refreshed token information
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def is_token_expired(self, token_info: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Check if a token is expired.
|
||||
|
||||
Args:
|
||||
token_info: Token information dictionary
|
||||
|
||||
Returns:
|
||||
True if token is expired, False otherwise
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class BaseConnectorLoader(ABC):
|
||||
"""
|
||||
Abstract base class for connector loaders.
|
||||
|
||||
Defines the minimal interface that all connector loader
|
||||
implementations must follow.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, session_token: str):
|
||||
"""
|
||||
Initialize the connector loader.
|
||||
|
||||
Args:
|
||||
session_token: Authentication session token
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def load_data(self, inputs: Dict[str, Any]) -> List[Document]:
|
||||
"""
|
||||
Load documents from the external knowledge base.
|
||||
|
||||
Args:
|
||||
inputs: Configuration dictionary containing:
|
||||
- file_ids: Optional list of specific file IDs to load
|
||||
- folder_ids: Optional list of folder IDs to browse/download
|
||||
- limit: Maximum number of items to return
|
||||
- list_only: If True, return metadata without content
|
||||
- recursive: Whether to recursively process folders
|
||||
|
||||
Returns:
|
||||
List of Document objects
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def download_to_directory(self, local_dir: str, source_config: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Download files/folders to a local directory.
|
||||
|
||||
Args:
|
||||
local_dir: Local directory path to download files to
|
||||
source_config: Configuration for what to download
|
||||
|
||||
Returns:
|
||||
Dictionary containing download results:
|
||||
- files_downloaded: Number of files downloaded
|
||||
- directory_path: Path where files were downloaded
|
||||
- empty_result: Whether no files were downloaded
|
||||
- source_type: Type of connector
|
||||
- config_used: Configuration that was used
|
||||
- error: Error message if download failed (optional)
|
||||
"""
|
||||
pass
|
||||
81
application/parser/connectors/connector_creator.py
Normal file
81
application/parser/connectors/connector_creator.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from application.parser.connectors.google_drive.loader import GoogleDriveLoader
|
||||
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
|
||||
|
||||
|
||||
class ConnectorCreator:
|
||||
"""
|
||||
Factory class for creating external knowledge base connectors and auth providers.
|
||||
|
||||
These are different from remote loaders as they typically require
|
||||
authentication and connect to external document storage systems.
|
||||
"""
|
||||
|
||||
connectors = {
|
||||
"google_drive": GoogleDriveLoader,
|
||||
}
|
||||
|
||||
auth_providers = {
|
||||
"google_drive": GoogleDriveAuth,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create_connector(cls, connector_type, *args, **kwargs):
|
||||
"""
|
||||
Create a connector instance for the specified type.
|
||||
|
||||
Args:
|
||||
connector_type: Type of connector to create (e.g., 'google_drive')
|
||||
*args, **kwargs: Arguments to pass to the connector constructor
|
||||
|
||||
Returns:
|
||||
Connector instance
|
||||
|
||||
Raises:
|
||||
ValueError: If connector type is not supported
|
||||
"""
|
||||
connector_class = cls.connectors.get(connector_type.lower())
|
||||
if not connector_class:
|
||||
raise ValueError(f"No connector class found for type {connector_type}")
|
||||
return connector_class(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_auth(cls, connector_type):
|
||||
"""
|
||||
Create an auth provider instance for the specified connector type.
|
||||
|
||||
Args:
|
||||
connector_type: Type of connector auth to create (e.g., 'google_drive')
|
||||
|
||||
Returns:
|
||||
Auth provider instance
|
||||
|
||||
Raises:
|
||||
ValueError: If connector type is not supported for auth
|
||||
"""
|
||||
auth_class = cls.auth_providers.get(connector_type.lower())
|
||||
if not auth_class:
|
||||
raise ValueError(f"No auth class found for type {connector_type}")
|
||||
return auth_class()
|
||||
|
||||
@classmethod
|
||||
def get_supported_connectors(cls):
|
||||
"""
|
||||
Get list of supported connector types.
|
||||
|
||||
Returns:
|
||||
List of supported connector type strings
|
||||
"""
|
||||
return list(cls.connectors.keys())
|
||||
|
||||
@classmethod
|
||||
def is_supported(cls, connector_type):
|
||||
"""
|
||||
Check if a connector type is supported.
|
||||
|
||||
Args:
|
||||
connector_type: Type of connector to check
|
||||
|
||||
Returns:
|
||||
True if supported, False otherwise
|
||||
"""
|
||||
return connector_type.lower() in cls.connectors
|
||||
10
application/parser/connectors/google_drive/__init__.py
Normal file
10
application/parser/connectors/google_drive/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
Google Drive connector for DocsGPT.
|
||||
|
||||
This module provides authentication and document loading capabilities for Google Drive.
|
||||
"""
|
||||
|
||||
from .auth import GoogleDriveAuth
|
||||
from .loader import GoogleDriveLoader
|
||||
|
||||
__all__ = ['GoogleDriveAuth', 'GoogleDriveLoader']
|
||||
268
application/parser/connectors/google_drive/auth.py
Normal file
268
application/parser/connectors/google_drive/auth.py
Normal file
@@ -0,0 +1,268 @@
|
||||
import logging
|
||||
import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from google.oauth2.credentials import Credentials
|
||||
from google_auth_oauthlib.flow import Flow
|
||||
from googleapiclient.discovery import build
|
||||
from googleapiclient.errors import HttpError
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.parser.connectors.base import BaseConnectorAuth
|
||||
|
||||
|
||||
class GoogleDriveAuth(BaseConnectorAuth):
|
||||
"""
|
||||
Handles Google OAuth 2.0 authentication for Google Drive access.
|
||||
"""
|
||||
|
||||
SCOPES = [
|
||||
'https://www.googleapis.com/auth/drive.readonly',
|
||||
'https://www.googleapis.com/auth/drive.metadata.readonly'
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self.client_id = settings.GOOGLE_CLIENT_ID
|
||||
self.client_secret = settings.GOOGLE_CLIENT_SECRET
|
||||
self.redirect_uri = f"{settings.CONNECTOR_REDIRECT_BASE_URI}?provider=google_drive"
|
||||
|
||||
if not self.client_id or not self.client_secret:
|
||||
raise ValueError("Google OAuth credentials not configured. Please set GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET in settings.")
|
||||
|
||||
|
||||
|
||||
def get_authorization_url(self, state: Optional[str] = None) -> str:
|
||||
try:
|
||||
flow = Flow.from_client_config(
|
||||
{
|
||||
"web": {
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"redirect_uris": [self.redirect_uri]
|
||||
}
|
||||
},
|
||||
scopes=self.SCOPES
|
||||
)
|
||||
flow.redirect_uri = self.redirect_uri
|
||||
|
||||
authorization_url, _ = flow.authorization_url(
|
||||
access_type='offline',
|
||||
prompt='consent',
|
||||
include_granted_scopes='true',
|
||||
state=state
|
||||
)
|
||||
|
||||
return authorization_url
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error generating authorization URL: {e}")
|
||||
raise
|
||||
|
||||
def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
|
||||
try:
|
||||
if not authorization_code:
|
||||
raise ValueError("Authorization code is required")
|
||||
|
||||
flow = Flow.from_client_config(
|
||||
{
|
||||
"web": {
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"redirect_uris": [self.redirect_uri]
|
||||
}
|
||||
},
|
||||
scopes=self.SCOPES
|
||||
)
|
||||
flow.redirect_uri = self.redirect_uri
|
||||
|
||||
flow.fetch_token(code=authorization_code)
|
||||
|
||||
credentials = flow.credentials
|
||||
|
||||
if not credentials.refresh_token:
|
||||
logging.warning("OAuth flow did not return a refresh_token.")
|
||||
if not credentials.token:
|
||||
raise ValueError("OAuth flow did not return an access token")
|
||||
|
||||
if not credentials.token_uri:
|
||||
credentials.token_uri = "https://oauth2.googleapis.com/token"
|
||||
|
||||
if not credentials.client_id:
|
||||
credentials.client_id = self.client_id
|
||||
|
||||
if not credentials.client_secret:
|
||||
credentials.client_secret = self.client_secret
|
||||
|
||||
if not credentials.refresh_token:
|
||||
raise ValueError(
|
||||
"No refresh token received. This typically happens when offline access wasn't granted. "
|
||||
)
|
||||
|
||||
return {
|
||||
'access_token': credentials.token,
|
||||
'refresh_token': credentials.refresh_token,
|
||||
'token_uri': credentials.token_uri,
|
||||
'client_id': credentials.client_id,
|
||||
'client_secret': credentials.client_secret,
|
||||
'scopes': credentials.scopes,
|
||||
'expiry': credentials.expiry.isoformat() if credentials.expiry else None
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error exchanging code for tokens: {e}")
|
||||
raise
|
||||
|
||||
def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
|
||||
try:
|
||||
if not refresh_token:
|
||||
raise ValueError("Refresh token is required")
|
||||
|
||||
credentials = Credentials(
|
||||
token=None,
|
||||
refresh_token=refresh_token,
|
||||
token_uri="https://oauth2.googleapis.com/token",
|
||||
client_id=self.client_id,
|
||||
client_secret=self.client_secret
|
||||
)
|
||||
|
||||
from google.auth.transport.requests import Request
|
||||
credentials.refresh(Request())
|
||||
|
||||
return {
|
||||
'access_token': credentials.token,
|
||||
'refresh_token': refresh_token,
|
||||
'token_uri': credentials.token_uri,
|
||||
'client_id': credentials.client_id,
|
||||
'client_secret': credentials.client_secret,
|
||||
'scopes': credentials.scopes,
|
||||
'expiry': credentials.expiry.isoformat() if credentials.expiry else None
|
||||
}
|
||||
except Exception as e:
|
||||
logging.error(f"Error refreshing access token: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def create_credentials_from_token_info(self, token_info: Dict[str, Any]) -> Credentials:
|
||||
from application.core.settings import settings
|
||||
|
||||
access_token = token_info.get('access_token')
|
||||
if not access_token:
|
||||
raise ValueError("No access token found in token_info")
|
||||
|
||||
credentials = Credentials(
|
||||
token=access_token,
|
||||
refresh_token=token_info.get('refresh_token'),
|
||||
token_uri= 'https://oauth2.googleapis.com/token',
|
||||
client_id=settings.GOOGLE_CLIENT_ID,
|
||||
client_secret=settings.GOOGLE_CLIENT_SECRET,
|
||||
scopes=token_info.get('scopes', ['https://www.googleapis.com/auth/drive.readonly'])
|
||||
)
|
||||
|
||||
if not credentials.token:
|
||||
raise ValueError("Credentials created without valid access token")
|
||||
|
||||
return credentials
|
||||
|
||||
def build_drive_service(self, credentials: Credentials):
|
||||
try:
|
||||
if not credentials:
|
||||
raise ValueError("No credentials provided")
|
||||
|
||||
if not credentials.token and not credentials.refresh_token:
|
||||
raise ValueError("No access token or refresh token available. User must re-authorize with offline access.")
|
||||
|
||||
needs_refresh = credentials.expired or not credentials.token
|
||||
if needs_refresh:
|
||||
if credentials.refresh_token:
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
credentials.refresh(Request())
|
||||
except Exception as refresh_error:
|
||||
raise ValueError(f"Failed to refresh credentials: {refresh_error}")
|
||||
else:
|
||||
raise ValueError("No access token or refresh token available. User must re-authorize with offline access.")
|
||||
|
||||
return build('drive', 'v3', credentials=credentials)
|
||||
|
||||
except HttpError as e:
|
||||
raise ValueError(f"Failed to build Google Drive service: HTTP {e.resp.status}")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to build Google Drive service: {str(e)}")
|
||||
|
||||
def is_token_expired(self, token_info):
|
||||
if 'expiry' in token_info and token_info['expiry']:
|
||||
try:
|
||||
from dateutil import parser
|
||||
# Google Drive provides timezone-aware ISO8601 dates
|
||||
expiry_dt = parser.parse(token_info['expiry'])
|
||||
current_time = datetime.datetime.now(datetime.timezone.utc)
|
||||
return current_time >= expiry_dt - datetime.timedelta(seconds=60)
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
if 'access_token' in token_info and token_info['access_token']:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_token_info_from_session(self, session_token: str) -> Dict[str, Any]:
|
||||
try:
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
|
||||
mongo = MongoDB.get_client()
|
||||
db = mongo[settings.MONGO_DB_NAME]
|
||||
|
||||
sessions_collection = db["connector_sessions"]
|
||||
session = sessions_collection.find_one({"session_token": session_token})
|
||||
if not session:
|
||||
raise ValueError(f"Invalid session token: {session_token}")
|
||||
|
||||
if "token_info" not in session:
|
||||
raise ValueError("Session missing token information")
|
||||
|
||||
token_info = session["token_info"]
|
||||
if not token_info:
|
||||
raise ValueError("Invalid token information")
|
||||
|
||||
required_fields = ["access_token", "refresh_token"]
|
||||
missing_fields = [field for field in required_fields if field not in token_info or not token_info.get(field)]
|
||||
if missing_fields:
|
||||
raise ValueError(f"Missing required token fields: {missing_fields}")
|
||||
|
||||
if 'client_id' not in token_info:
|
||||
token_info['client_id'] = settings.GOOGLE_CLIENT_ID
|
||||
if 'client_secret' not in token_info:
|
||||
token_info['client_secret'] = settings.GOOGLE_CLIENT_SECRET
|
||||
if 'token_uri' not in token_info:
|
||||
token_info['token_uri'] = 'https://oauth2.googleapis.com/token'
|
||||
|
||||
return token_info
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to retrieve Google Drive token information: {str(e)}")
|
||||
|
||||
def validate_credentials(self, credentials: Credentials) -> bool:
|
||||
"""
|
||||
Validate Google Drive credentials by making a test API call.
|
||||
|
||||
Args:
|
||||
credentials: Google credentials object
|
||||
|
||||
Returns:
|
||||
True if credentials are valid, False otherwise
|
||||
"""
|
||||
try:
|
||||
service = self.build_drive_service(credentials)
|
||||
service.about().get(fields="user").execute()
|
||||
return True
|
||||
|
||||
except HttpError as e:
|
||||
logging.error(f"HTTP error validating credentials: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logging.error(f"Error validating credentials: {e}")
|
||||
return False
|
||||
536
application/parser/connectors/google_drive/loader.py
Normal file
536
application/parser/connectors/google_drive/loader.py
Normal file
@@ -0,0 +1,536 @@
|
||||
"""
|
||||
Google Drive loader for DocsGPT.
|
||||
Loads documents from Google Drive using Google Drive API.
|
||||
"""
|
||||
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from googleapiclient.http import MediaIoBaseDownload
|
||||
from googleapiclient.errors import HttpError
|
||||
|
||||
from application.parser.connectors.base import BaseConnectorLoader
|
||||
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
class GoogleDriveLoader(BaseConnectorLoader):
|
||||
|
||||
SUPPORTED_MIME_TYPES = {
|
||||
'application/pdf': '.pdf',
|
||||
'application/vnd.google-apps.document': '.docx',
|
||||
'application/vnd.google-apps.presentation': '.pptx',
|
||||
'application/vnd.google-apps.spreadsheet': '.xlsx',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
|
||||
'application/msword': '.doc',
|
||||
'application/vnd.ms-powerpoint': '.ppt',
|
||||
'application/vnd.ms-excel': '.xls',
|
||||
'text/plain': '.txt',
|
||||
'text/csv': '.csv',
|
||||
'text/html': '.html',
|
||||
'application/rtf': '.rtf',
|
||||
'image/jpeg': '.jpg',
|
||||
'image/jpg': '.jpg',
|
||||
'image/png': '.png',
|
||||
}
|
||||
|
||||
EXPORT_FORMATS = {
|
||||
'application/vnd.google-apps.document': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'application/vnd.google-apps.presentation': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'application/vnd.google-apps.spreadsheet': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
||||
}
|
||||
|
||||
def __init__(self, session_token: str):
|
||||
self.auth = GoogleDriveAuth()
|
||||
self.session_token = session_token
|
||||
|
||||
token_info = self.auth.get_token_info_from_session(session_token)
|
||||
self.credentials = self.auth.create_credentials_from_token_info(token_info)
|
||||
|
||||
try:
|
||||
self.service = self.auth.build_drive_service(self.credentials)
|
||||
except Exception as e:
|
||||
logging.warning(f"Could not build Google Drive service: {e}")
|
||||
self.service = None
|
||||
|
||||
self.next_page_token = None
|
||||
|
||||
|
||||
|
||||
def _process_file(self, file_metadata: Dict[str, Any], load_content: bool = True) -> Optional[Document]:
|
||||
try:
|
||||
file_id = file_metadata.get('id')
|
||||
file_name = file_metadata.get('name', 'Unknown')
|
||||
mime_type = file_metadata.get('mimeType', 'application/octet-stream')
|
||||
|
||||
if mime_type not in self.SUPPORTED_MIME_TYPES and not mime_type.startswith('application/vnd.google-apps.'):
|
||||
return None
|
||||
if mime_type not in self.SUPPORTED_MIME_TYPES and not mime_type.startswith('application/vnd.google-apps.'):
|
||||
logging.info(f"Skipping unsupported file type: {mime_type} for file {file_name}")
|
||||
return None
|
||||
# Google Drive provides timezone-aware ISO8601 dates
|
||||
doc_metadata = {
|
||||
'file_name': file_name,
|
||||
'mime_type': mime_type,
|
||||
'size': file_metadata.get('size', None),
|
||||
'created_time': file_metadata.get('createdTime'),
|
||||
'modified_time': file_metadata.get('modifiedTime'),
|
||||
'parents': file_metadata.get('parents', []),
|
||||
'source': 'google_drive'
|
||||
}
|
||||
|
||||
if not load_content:
|
||||
return Document(
|
||||
text="",
|
||||
doc_id=file_id,
|
||||
extra_info=doc_metadata
|
||||
)
|
||||
|
||||
content = self._download_file_content(file_id, mime_type)
|
||||
if content is None:
|
||||
logging.warning(f"Could not load content for file {file_name} ({file_id})")
|
||||
return None
|
||||
|
||||
return Document(
|
||||
text=content,
|
||||
doc_id=file_id,
|
||||
extra_info=doc_metadata
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error processing file: {e}")
|
||||
return None
|
||||
|
||||
def load_data(self, inputs: Dict[str, Any]) -> List[Document]:
|
||||
session_token = inputs.get('session_token')
|
||||
if session_token and session_token != self.session_token:
|
||||
logging.warning("Session token in inputs differs from loader's session token. Using loader's session token.")
|
||||
self.config = inputs
|
||||
|
||||
try:
|
||||
documents: List[Document] = []
|
||||
|
||||
folder_id = inputs.get('folder_id')
|
||||
file_ids = inputs.get('file_ids', [])
|
||||
limit = inputs.get('limit', 100)
|
||||
list_only = inputs.get('list_only', False)
|
||||
load_content = not list_only
|
||||
page_token = inputs.get('page_token')
|
||||
self.next_page_token = None
|
||||
|
||||
if file_ids:
|
||||
# Specific files requested: load them
|
||||
for file_id in file_ids:
|
||||
try:
|
||||
doc = self._load_file_by_id(file_id, load_content=load_content)
|
||||
if doc:
|
||||
documents.append(doc)
|
||||
elif hasattr(self, '_credential_refreshed') and self._credential_refreshed:
|
||||
self._credential_refreshed = False
|
||||
logging.info(f"Retrying load of file {file_id} after credential refresh")
|
||||
doc = self._load_file_by_id(file_id, load_content=load_content)
|
||||
if doc:
|
||||
documents.append(doc)
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading file {file_id}: {e}")
|
||||
continue
|
||||
else:
|
||||
# Browsing mode: list immediate children of provided folder or root
|
||||
parent_id = folder_id if folder_id else 'root'
|
||||
documents = self._list_items_in_parent(parent_id, limit=limit, load_content=load_content, page_token=page_token)
|
||||
|
||||
logging.info(f"Loaded {len(documents)} documents from Google Drive")
|
||||
return documents
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading data from Google Drive: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
|
||||
def _load_file_by_id(self, file_id: str, load_content: bool = True) -> Optional[Document]:
|
||||
self._ensure_service()
|
||||
|
||||
try:
|
||||
file_metadata = self.service.files().get(
|
||||
fileId=file_id,
|
||||
fields='id,name,mimeType,size,createdTime,modifiedTime,parents'
|
||||
).execute()
|
||||
|
||||
return self._process_file(file_metadata, load_content=load_content)
|
||||
|
||||
except HttpError as e:
|
||||
logging.error(f"HTTP error loading file {file_id}: {e.resp.status} - {e.content}")
|
||||
|
||||
if e.resp.status in [401, 403]:
|
||||
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
self.credentials.refresh(Request())
|
||||
self._ensure_service()
|
||||
return None
|
||||
except Exception as refresh_error:
|
||||
raise ValueError(f"Authentication failed and could not be refreshed: {refresh_error}")
|
||||
else:
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading file {file_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None) -> List[Document]:
|
||||
self._ensure_service()
|
||||
|
||||
documents: List[Document] = []
|
||||
|
||||
try:
|
||||
query = f"'{parent_id}' in parents and trashed=false"
|
||||
next_token_out: Optional[str] = None
|
||||
|
||||
while True:
|
||||
page_size = 100
|
||||
if limit:
|
||||
remaining = max(0, limit - len(documents))
|
||||
if remaining == 0:
|
||||
break
|
||||
page_size = min(100, remaining)
|
||||
|
||||
results = self.service.files().list(
|
||||
q=query,
|
||||
fields='nextPageToken,files(id,name,mimeType,size,createdTime,modifiedTime,parents)',
|
||||
pageToken=page_token,
|
||||
pageSize=page_size
|
||||
).execute()
|
||||
|
||||
items = results.get('files', [])
|
||||
for item in items:
|
||||
mime_type = item.get('mimeType')
|
||||
if mime_type == 'application/vnd.google-apps.folder':
|
||||
doc_metadata = {
|
||||
'file_name': item.get('name', 'Unknown'),
|
||||
'mime_type': mime_type,
|
||||
'size': item.get('size', None),
|
||||
'created_time': item.get('createdTime'),
|
||||
'modified_time': item.get('modifiedTime'),
|
||||
'parents': item.get('parents', []),
|
||||
'source': 'google_drive',
|
||||
'is_folder': True
|
||||
}
|
||||
documents.append(Document(text="", doc_id=item.get('id'), extra_info=doc_metadata))
|
||||
else:
|
||||
doc = self._process_file(item, load_content=load_content)
|
||||
if doc:
|
||||
documents.append(doc)
|
||||
|
||||
if limit and len(documents) >= limit:
|
||||
self.next_page_token = results.get('nextPageToken')
|
||||
return documents
|
||||
|
||||
page_token = results.get('nextPageToken')
|
||||
next_token_out = page_token
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
self.next_page_token = next_token_out
|
||||
return documents
|
||||
except Exception as e:
|
||||
logging.error(f"Error listing items under parent {parent_id}: {e}")
|
||||
return documents
|
||||
|
||||
|
||||
|
||||
|
||||
def _download_file_content(self, file_id: str, mime_type: str) -> Optional[str]:
|
||||
if not self.credentials.token:
|
||||
logging.warning("No access token in credentials, attempting to refresh")
|
||||
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
self.credentials.refresh(Request())
|
||||
logging.info("Credentials refreshed successfully")
|
||||
self._ensure_service()
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to refresh credentials: {e}")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing or invalid refresh_token")
|
||||
else:
|
||||
logging.error("No access token and no refresh_token available")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
|
||||
|
||||
if self.credentials.expired:
|
||||
logging.warning("Credentials are expired, attempting to refresh")
|
||||
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
self.credentials.refresh(Request())
|
||||
logging.info("Credentials refreshed successfully")
|
||||
self._ensure_service()
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to refresh expired credentials: {e}")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: expired credentials")
|
||||
else:
|
||||
logging.error("Credentials expired and no refresh_token available")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
|
||||
|
||||
try:
|
||||
if mime_type in self.EXPORT_FORMATS:
|
||||
export_mime_type = self.EXPORT_FORMATS[mime_type]
|
||||
request = self.service.files().export_media(
|
||||
fileId=file_id,
|
||||
mimeType=export_mime_type
|
||||
)
|
||||
else:
|
||||
request = self.service.files().get_media(fileId=file_id)
|
||||
|
||||
file_io = io.BytesIO()
|
||||
downloader = MediaIoBaseDownload(file_io, request)
|
||||
|
||||
done = False
|
||||
while done is False:
|
||||
try:
|
||||
_, done = downloader.next_chunk()
|
||||
except HttpError as e:
|
||||
logging.error(f"HTTP error downloading file {file_id}: {e.resp.status} - {e.content}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error during download of file {file_id}: {e}")
|
||||
return None
|
||||
|
||||
content_bytes = file_io.getvalue()
|
||||
|
||||
try:
|
||||
content = content_bytes.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
content = content_bytes.decode('latin-1')
|
||||
except UnicodeDecodeError:
|
||||
logging.error(f"Could not decode file {file_id} as text")
|
||||
return None
|
||||
|
||||
return content
|
||||
|
||||
except HttpError as e:
|
||||
logging.error(f"HTTP error downloading file {file_id}: {e.resp.status} - {e.content}")
|
||||
|
||||
if e.resp.status in [401, 403]:
|
||||
logging.error(f"Authentication error downloading file {file_id}")
|
||||
|
||||
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
|
||||
logging.info(f"Attempting to refresh credentials for file {file_id}")
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
self.credentials.refresh(Request())
|
||||
logging.info("Credentials refreshed successfully")
|
||||
self._credential_refreshed = True
|
||||
self._ensure_service()
|
||||
return None
|
||||
except Exception as refresh_error:
|
||||
logging.error(f"Error refreshing credentials: {refresh_error}")
|
||||
raise ValueError(f"Authentication failed and could not be refreshed: {refresh_error}")
|
||||
else:
|
||||
logging.error("Cannot refresh credentials: missing refresh_token")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading file {file_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _download_file_to_directory(self, file_id: str, local_dir: str) -> bool:
|
||||
try:
|
||||
self._ensure_service()
|
||||
return self._download_single_file(file_id, local_dir)
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading file {file_id}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def _ensure_service(self):
|
||||
if not self.service:
|
||||
try:
|
||||
self.service = self.auth.build_drive_service(self.credentials)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Cannot access Google Drive: {e}")
|
||||
|
||||
def _download_single_file(self, file_id: str, local_dir: str) -> bool:
|
||||
file_metadata = self.service.files().get(
|
||||
fileId=file_id,
|
||||
fields='name,mimeType'
|
||||
).execute()
|
||||
|
||||
file_name = file_metadata['name']
|
||||
mime_type = file_metadata['mimeType']
|
||||
|
||||
if mime_type not in self.SUPPORTED_MIME_TYPES and not mime_type.startswith('application/vnd.google-apps.'):
|
||||
return False
|
||||
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
full_path = os.path.join(local_dir, file_name)
|
||||
|
||||
if mime_type in self.EXPORT_FORMATS:
|
||||
export_mime_type = self.EXPORT_FORMATS[mime_type]
|
||||
request = self.service.files().export_media(
|
||||
fileId=file_id,
|
||||
mimeType=export_mime_type
|
||||
)
|
||||
extension = self._get_extension_for_mime_type(export_mime_type)
|
||||
if not full_path.endswith(extension):
|
||||
full_path += extension
|
||||
else:
|
||||
request = self.service.files().get_media(fileId=file_id)
|
||||
|
||||
with open(full_path, 'wb') as f:
|
||||
downloader = MediaIoBaseDownload(f, request)
|
||||
done = False
|
||||
while not done:
|
||||
_, done = downloader.next_chunk()
|
||||
|
||||
return True
|
||||
|
||||
def _download_folder_recursive(self, folder_id: str, local_dir: str, recursive: bool = True) -> int:
|
||||
files_downloaded = 0
|
||||
try:
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
|
||||
query = f"'{folder_id}' in parents and trashed=false"
|
||||
page_token = None
|
||||
|
||||
while True:
|
||||
results = self.service.files().list(
|
||||
q=query,
|
||||
fields='nextPageToken, files(id, name, mimeType)',
|
||||
pageToken=page_token,
|
||||
pageSize=1000
|
||||
).execute()
|
||||
|
||||
items = results.get('files', [])
|
||||
logging.info(f"Found {len(items)} items in folder {folder_id}")
|
||||
|
||||
for item in items:
|
||||
item_name = item['name']
|
||||
item_id = item['id']
|
||||
mime_type = item['mimeType']
|
||||
|
||||
if mime_type == 'application/vnd.google-apps.folder':
|
||||
if recursive:
|
||||
# Create subfolder and recurse
|
||||
subfolder_path = os.path.join(local_dir, item_name)
|
||||
os.makedirs(subfolder_path, exist_ok=True)
|
||||
subfolder_files = self._download_folder_recursive(
|
||||
item_id,
|
||||
subfolder_path,
|
||||
recursive
|
||||
)
|
||||
files_downloaded += subfolder_files
|
||||
logging.info(f"Downloaded {subfolder_files} files from subfolder {item_name}")
|
||||
else:
|
||||
# Download file
|
||||
success = self._download_single_file(item_id, local_dir)
|
||||
if success:
|
||||
files_downloaded += 1
|
||||
logging.info(f"Downloaded file: {item_name}")
|
||||
else:
|
||||
logging.warning(f"Failed to download file: {item_name}")
|
||||
|
||||
page_token = results.get('nextPageToken')
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
return files_downloaded
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in _download_folder_recursive for folder {folder_id}: {e}", exc_info=True)
|
||||
return files_downloaded
|
||||
|
||||
def _get_extension_for_mime_type(self, mime_type: str) -> str:
|
||||
extensions = {
|
||||
'application/pdf': '.pdf',
|
||||
'text/plain': '.txt',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',
|
||||
'text/html': '.html',
|
||||
'text/markdown': '.md',
|
||||
}
|
||||
return extensions.get(mime_type, '.bin')
|
||||
|
||||
def _download_folder_contents(self, folder_id: str, local_dir: str, recursive: bool = True) -> int:
|
||||
try:
|
||||
self._ensure_service()
|
||||
return self._download_folder_recursive(folder_id, local_dir, recursive)
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading folder {folder_id}: {e}", exc_info=True)
|
||||
return 0
|
||||
|
||||
def download_to_directory(self, local_dir: str, source_config: dict = None) -> dict:
|
||||
if source_config is None:
|
||||
source_config = {}
|
||||
|
||||
config = source_config if source_config else getattr(self, 'config', {})
|
||||
files_downloaded = 0
|
||||
|
||||
try:
|
||||
folder_ids = config.get('folder_ids', [])
|
||||
file_ids = config.get('file_ids', [])
|
||||
recursive = config.get('recursive', True)
|
||||
|
||||
self._ensure_service()
|
||||
|
||||
if file_ids:
|
||||
if isinstance(file_ids, str):
|
||||
file_ids = [file_ids]
|
||||
|
||||
for file_id in file_ids:
|
||||
if self._download_file_to_directory(file_id, local_dir):
|
||||
files_downloaded += 1
|
||||
|
||||
# Process folders
|
||||
if folder_ids:
|
||||
if isinstance(folder_ids, str):
|
||||
folder_ids = [folder_ids]
|
||||
|
||||
for folder_id in folder_ids:
|
||||
try:
|
||||
folder_metadata = self.service.files().get(
|
||||
fileId=folder_id,
|
||||
fields='name'
|
||||
).execute()
|
||||
folder_name = folder_metadata.get('name', '')
|
||||
folder_path = os.path.join(local_dir, folder_name)
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
|
||||
folder_files = self._download_folder_recursive(
|
||||
folder_id,
|
||||
folder_path,
|
||||
recursive
|
||||
)
|
||||
files_downloaded += folder_files
|
||||
logging.info(f"Downloaded {folder_files} files from folder {folder_name}")
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading folder {folder_id}: {e}", exc_info=True)
|
||||
|
||||
if not file_ids and not folder_ids:
|
||||
raise ValueError("No folder_ids or file_ids provided for download")
|
||||
|
||||
return {
|
||||
"files_downloaded": files_downloaded,
|
||||
"directory_path": local_dir,
|
||||
"empty_result": files_downloaded == 0,
|
||||
"source_type": "google_drive",
|
||||
"config_used": config
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"files_downloaded": files_downloaded,
|
||||
"directory_path": local_dir,
|
||||
"empty_result": True,
|
||||
"source_type": "google_drive",
|
||||
"config_used": config,
|
||||
"error": str(e)
|
||||
}
|
||||
@@ -6,6 +6,16 @@ from application.parser.remote.github_loader import GitHubLoader
|
||||
|
||||
|
||||
class RemoteCreator:
|
||||
"""
|
||||
Factory class for creating remote content loaders.
|
||||
|
||||
These loaders fetch content from remote web sources like URLs,
|
||||
sitemaps, web crawlers, social media platforms, etc.
|
||||
|
||||
For external knowledge base connectors (like Google Drive),
|
||||
use ConnectorCreator instead.
|
||||
"""
|
||||
|
||||
loaders = {
|
||||
"url": WebLoader,
|
||||
"sitemap": SitemapLoader,
|
||||
@@ -18,5 +28,5 @@ class RemoteCreator:
|
||||
def create_loader(cls, type, *args, **kwargs):
|
||||
loader_class = cls.loaders.get(type.lower())
|
||||
if not loader_class:
|
||||
raise ValueError(f"No LLM class found for type {type}")
|
||||
raise ValueError(f"No loader class found for type {type}")
|
||||
return loader_class(*args, **kwargs)
|
||||
|
||||
@@ -14,6 +14,9 @@ Flask==3.1.1
|
||||
faiss-cpu==1.9.0.post1
|
||||
flask-restx==1.3.0
|
||||
google-genai==1.3.0
|
||||
google-api-python-client==2.179.0
|
||||
google-auth-httplib2==0.2.0
|
||||
google-auth-oauthlib==1.2.2
|
||||
gTTS==2.5.4
|
||||
gunicorn==23.0.0
|
||||
javalang==0.13.0
|
||||
|
||||
@@ -5,10 +5,6 @@ class BaseRetriever(ABC):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def gen(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def search(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.retriever.base import BaseRetriever
|
||||
@@ -20,10 +21,20 @@ class ClassicRAG(BaseRetriever):
|
||||
api_key=settings.API_KEY,
|
||||
decoded_token=None,
|
||||
):
|
||||
self.original_question = ""
|
||||
"""Initialize ClassicRAG retriever with vectorstore sources and LLM configuration"""
|
||||
self.original_question = source.get("question", "")
|
||||
self.chat_history = chat_history if chat_history is not None else []
|
||||
self.prompt = prompt
|
||||
self.chunks = chunks
|
||||
if isinstance(chunks, str):
|
||||
try:
|
||||
self.chunks = int(chunks)
|
||||
except ValueError:
|
||||
logging.warning(
|
||||
f"Invalid chunks value '{chunks}', using default value 2"
|
||||
)
|
||||
self.chunks = 2
|
||||
else:
|
||||
self.chunks = chunks
|
||||
self.gpt_model = gpt_model
|
||||
self.token_limit = (
|
||||
token_limit
|
||||
@@ -44,25 +55,52 @@ class ClassicRAG(BaseRetriever):
|
||||
user_api_key=self.user_api_key,
|
||||
decoded_token=decoded_token,
|
||||
)
|
||||
self.vectorstore = source["active_docs"] if "active_docs" in source else None
|
||||
|
||||
if "active_docs" in source and source["active_docs"] is not None:
|
||||
if isinstance(source["active_docs"], list):
|
||||
self.vectorstores = source["active_docs"]
|
||||
else:
|
||||
self.vectorstores = [source["active_docs"]]
|
||||
else:
|
||||
self.vectorstores = []
|
||||
self.question = self._rephrase_query()
|
||||
self.decoded_token = decoded_token
|
||||
self._validate_vectorstore_config()
|
||||
|
||||
def _validate_vectorstore_config(self):
|
||||
"""Validate vectorstore IDs and remove any empty/invalid entries"""
|
||||
if not self.vectorstores:
|
||||
logging.warning("No vectorstores configured for retrieval")
|
||||
return
|
||||
invalid_ids = [
|
||||
vs_id for vs_id in self.vectorstores if not vs_id or not vs_id.strip()
|
||||
]
|
||||
if invalid_ids:
|
||||
logging.warning(f"Found invalid vectorstore IDs: {invalid_ids}")
|
||||
self.vectorstores = [
|
||||
vs_id for vs_id in self.vectorstores if vs_id and vs_id.strip()
|
||||
]
|
||||
|
||||
def _rephrase_query(self):
|
||||
"""Rephrase user query with chat history context for better retrieval"""
|
||||
if (
|
||||
not self.original_question
|
||||
or not self.chat_history
|
||||
or self.chat_history == []
|
||||
or self.chunks == 0
|
||||
or self.vectorstore is None
|
||||
or not self.vectorstores
|
||||
):
|
||||
return self.original_question
|
||||
|
||||
prompt = f"""Given the following conversation history:
|
||||
|
||||
{self.chat_history}
|
||||
|
||||
|
||||
|
||||
Rephrase the following user question to be a standalone search query
|
||||
|
||||
that captures all relevant context from the conversation:
|
||||
|
||||
"""
|
||||
|
||||
messages = [
|
||||
@@ -79,44 +117,62 @@ class ClassicRAG(BaseRetriever):
|
||||
return self.original_question
|
||||
|
||||
def _get_data(self):
|
||||
if self.chunks == 0 or self.vectorstore is None:
|
||||
docs = []
|
||||
else:
|
||||
docsearch = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE, self.vectorstore, settings.EMBEDDINGS_KEY
|
||||
)
|
||||
docs_temp = docsearch.search(self.question, k=self.chunks)
|
||||
docs = [
|
||||
{
|
||||
"title": i.metadata.get(
|
||||
"title", i.metadata.get("post_title", i.page_content)
|
||||
).split("/")[-1],
|
||||
"text": i.page_content,
|
||||
"source": (
|
||||
i.metadata.get("source")
|
||||
if i.metadata.get("source")
|
||||
else "local"
|
||||
),
|
||||
}
|
||||
for i in docs_temp
|
||||
]
|
||||
"""Retrieve relevant documents from configured vectorstores"""
|
||||
if self.chunks == 0 or not self.vectorstores:
|
||||
return []
|
||||
all_docs = []
|
||||
chunks_per_source = max(1, self.chunks // len(self.vectorstores))
|
||||
|
||||
return docs
|
||||
for vectorstore_id in self.vectorstores:
|
||||
if vectorstore_id:
|
||||
try:
|
||||
docsearch = VectorCreator.create_vectorstore(
|
||||
settings.VECTOR_STORE, vectorstore_id, settings.EMBEDDINGS_KEY
|
||||
)
|
||||
docs_temp = docsearch.search(self.question, k=chunks_per_source)
|
||||
|
||||
def gen():
|
||||
pass
|
||||
for doc in docs_temp:
|
||||
if hasattr(doc, "page_content") and hasattr(doc, "metadata"):
|
||||
page_content = doc.page_content
|
||||
metadata = doc.metadata
|
||||
else:
|
||||
page_content = doc.get("text", doc.get("page_content", ""))
|
||||
metadata = doc.get("metadata", {})
|
||||
title = metadata.get(
|
||||
"title", metadata.get("post_title", page_content)
|
||||
)
|
||||
if isinstance(title, str):
|
||||
title = title.split("/")[-1]
|
||||
else:
|
||||
title = str(title).split("/")[-1]
|
||||
all_docs.append(
|
||||
{
|
||||
"title": title,
|
||||
"text": page_content,
|
||||
"source": metadata.get("source") or vectorstore_id,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logging.error(
|
||||
f"Error searching vectorstore {vectorstore_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
continue
|
||||
return all_docs
|
||||
|
||||
def search(self, query: str = ""):
|
||||
"""Search for documents using optional query override"""
|
||||
if query:
|
||||
self.original_question = query
|
||||
self.question = self._rephrase_query()
|
||||
return self._get_data()
|
||||
|
||||
def get_params(self):
|
||||
"""Return current retriever configuration parameters"""
|
||||
return {
|
||||
"question": self.original_question,
|
||||
"rephrased_question": self.question,
|
||||
"source": self.vectorstore,
|
||||
"sources": self.vectorstores,
|
||||
"chunks": self.chunks,
|
||||
"token_limit": self.token_limit,
|
||||
"gpt_model": self.gpt_model,
|
||||
|
||||
@@ -1,20 +1,28 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import os
|
||||
from sentence_transformers import SentenceTransformer
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
from application.core.settings import settings
|
||||
|
||||
|
||||
class EmbeddingsWrapper:
|
||||
def __init__(self, model_name, *args, **kwargs):
|
||||
self.model = SentenceTransformer(model_name, config_kwargs={'allow_dangerous_deserialization': True}, *args, **kwargs)
|
||||
self.model = SentenceTransformer(
|
||||
model_name,
|
||||
config_kwargs={"allow_dangerous_deserialization": True},
|
||||
*args,
|
||||
**kwargs
|
||||
)
|
||||
self.dimension = self.model.get_sentence_embedding_dimension()
|
||||
|
||||
def embed_query(self, query: str):
|
||||
return self.model.encode(query).tolist()
|
||||
|
||||
|
||||
def embed_documents(self, documents: list):
|
||||
return self.model.encode(documents).tolist()
|
||||
|
||||
|
||||
def __call__(self, text):
|
||||
if isinstance(text, str):
|
||||
return self.embed_query(text)
|
||||
@@ -24,15 +32,14 @@ class EmbeddingsWrapper:
|
||||
raise ValueError("Input must be a string or a list of strings")
|
||||
|
||||
|
||||
|
||||
class EmbeddingsSingleton:
|
||||
_instances = {}
|
||||
|
||||
@staticmethod
|
||||
def get_instance(embeddings_name, *args, **kwargs):
|
||||
if embeddings_name not in EmbeddingsSingleton._instances:
|
||||
EmbeddingsSingleton._instances[embeddings_name] = EmbeddingsSingleton._create_instance(
|
||||
embeddings_name, *args, **kwargs
|
||||
EmbeddingsSingleton._instances[embeddings_name] = (
|
||||
EmbeddingsSingleton._create_instance(embeddings_name, *args, **kwargs)
|
||||
)
|
||||
return EmbeddingsSingleton._instances[embeddings_name]
|
||||
|
||||
@@ -40,9 +47,15 @@ class EmbeddingsSingleton:
|
||||
def _create_instance(embeddings_name, *args, **kwargs):
|
||||
embeddings_factory = {
|
||||
"openai_text-embedding-ada-002": OpenAIEmbeddings,
|
||||
"huggingface_sentence-transformers/all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
|
||||
"huggingface_sentence-transformers-all-mpnet-base-v2": lambda: EmbeddingsWrapper("sentence-transformers/all-mpnet-base-v2"),
|
||||
"huggingface_hkunlp/instructor-large": lambda: EmbeddingsWrapper("hkunlp/instructor-large"),
|
||||
"huggingface_sentence-transformers/all-mpnet-base-v2": lambda: EmbeddingsWrapper(
|
||||
"sentence-transformers/all-mpnet-base-v2"
|
||||
),
|
||||
"huggingface_sentence-transformers-all-mpnet-base-v2": lambda: EmbeddingsWrapper(
|
||||
"sentence-transformers/all-mpnet-base-v2"
|
||||
),
|
||||
"huggingface_hkunlp/instructor-large": lambda: EmbeddingsWrapper(
|
||||
"hkunlp/instructor-large"
|
||||
),
|
||||
}
|
||||
|
||||
if embeddings_name in embeddings_factory:
|
||||
@@ -50,34 +63,63 @@ class EmbeddingsSingleton:
|
||||
else:
|
||||
return EmbeddingsWrapper(embeddings_name, *args, **kwargs)
|
||||
|
||||
|
||||
class BaseVectorStore(ABC):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def search(self, *args, **kwargs):
|
||||
"""Search for similar documents/chunks in the vectorstore"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def add_texts(self, texts, metadatas=None, *args, **kwargs):
|
||||
"""Add texts with their embeddings to the vectorstore"""
|
||||
pass
|
||||
|
||||
def delete_index(self, *args, **kwargs):
|
||||
"""Delete the entire index/collection"""
|
||||
pass
|
||||
|
||||
def save_local(self, *args, **kwargs):
|
||||
"""Save vectorstore to local storage"""
|
||||
pass
|
||||
|
||||
def get_chunks(self, *args, **kwargs):
|
||||
"""Get all chunks from the vectorstore"""
|
||||
pass
|
||||
|
||||
def add_chunk(self, text, metadata=None, *args, **kwargs):
|
||||
"""Add a single chunk to the vectorstore"""
|
||||
pass
|
||||
|
||||
def delete_chunk(self, chunk_id, *args, **kwargs):
|
||||
"""Delete a specific chunk from the vectorstore"""
|
||||
pass
|
||||
|
||||
def is_azure_configured(self):
|
||||
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
|
||||
return (
|
||||
settings.OPENAI_API_BASE
|
||||
and settings.OPENAI_API_VERSION
|
||||
and settings.AZURE_DEPLOYMENT_NAME
|
||||
)
|
||||
|
||||
def _get_embeddings(self, embeddings_name, embeddings_key=None):
|
||||
if embeddings_name == "openai_text-embedding-ada-002":
|
||||
if self.is_azure_configured():
|
||||
os.environ["OPENAI_API_TYPE"] = "azure"
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name,
|
||||
model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME
|
||||
embeddings_name, model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME
|
||||
)
|
||||
else:
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name,
|
||||
openai_api_key=embeddings_key
|
||||
embeddings_name, openai_api_key=embeddings_key
|
||||
)
|
||||
elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
if os.path.exists("./models/all-mpnet-base-v2"):
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
embeddings_name = "./models/all-mpnet-base-v2",
|
||||
embeddings_name="./models/all-mpnet-base-v2",
|
||||
)
|
||||
else:
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(
|
||||
@@ -87,4 +129,3 @@ class BaseVectorStore(ABC):
|
||||
embedding_instance = EmbeddingsSingleton.get_instance(embeddings_name)
|
||||
|
||||
return embedding_instance
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ import os
|
||||
import shutil
|
||||
import string
|
||||
import tempfile
|
||||
from typing import Any, Dict
|
||||
import zipfile
|
||||
|
||||
from collections import Counter
|
||||
@@ -21,6 +22,7 @@ from application.api.answer.services.stream_processor import get_prompt
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
from application.parser.chunking import Chunker
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
from application.parser.embedding_pipeline import embed_and_store_documents
|
||||
from application.parser.file.bulk import SimpleDirectoryReader
|
||||
from application.parser.remote.remote_creator import RemoteCreator
|
||||
@@ -649,8 +651,11 @@ def remote_worker(
|
||||
"id": str(id),
|
||||
"type": loader,
|
||||
"remote_data": source_data,
|
||||
"sync_frequency": sync_frequency,
|
||||
"sync_frequency": sync_frequency
|
||||
}
|
||||
|
||||
if operation_mode == "sync":
|
||||
file_data["last_sync"] = datetime.datetime.now()
|
||||
upload_index(full_path, file_data)
|
||||
except Exception as e:
|
||||
logging.error("Error in remote_worker task: %s", str(e), exc_info=True)
|
||||
@@ -707,7 +712,7 @@ def sync_worker(self, frequency):
|
||||
self, source_data, name, user, source_type, frequency, retriever, doc_id
|
||||
)
|
||||
sync_counts["total_sync_count"] += 1
|
||||
sync_counts[
|
||||
sync_counts[
|
||||
"sync_success" if resp["status"] == "success" else "sync_failure"
|
||||
] += 1
|
||||
return {
|
||||
@@ -744,7 +749,7 @@ def attachment_worker(self, file_info, user):
|
||||
input_files=[local_path], exclude_hidden=True, errors="ignore"
|
||||
)
|
||||
.load_data()[0]
|
||||
.text,
|
||||
.text,
|
||||
)
|
||||
|
||||
|
||||
@@ -835,3 +840,174 @@ def agent_webhook_worker(self, agent_id, payload):
|
||||
f"Webhook processed for agent {agent_id}", extra={"agent_id": agent_id}
|
||||
)
|
||||
return {"status": "success", "result": result}
|
||||
|
||||
|
||||
def ingest_connector(
|
||||
self,
|
||||
job_name: str,
|
||||
user: str,
|
||||
source_type: str,
|
||||
session_token=None,
|
||||
file_ids=None,
|
||||
folder_ids=None,
|
||||
recursive=True,
|
||||
retriever: str = "classic",
|
||||
operation_mode: str = "upload",
|
||||
doc_id=None,
|
||||
sync_frequency: str = "never",
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Ingestion for internal knowledge bases (GoogleDrive, etc.).
|
||||
|
||||
Args:
|
||||
job_name: Name of the ingestion job
|
||||
user: User identifier
|
||||
source_type: Type of remote source ("google_drive", "dropbox", etc.)
|
||||
session_token: Authentication token for the service
|
||||
file_ids: List of file IDs to download
|
||||
folder_ids: List of folder IDs to download
|
||||
recursive: Whether to recursively download folders
|
||||
retriever: Type of retriever to use
|
||||
operation_mode: "upload" for initial ingestion, "sync" for incremental sync
|
||||
doc_id: Document ID for sync operations (required when operation_mode="sync")
|
||||
sync_frequency: How often to sync ("never", "daily", "weekly", "monthly")
|
||||
"""
|
||||
logging.info(f"Starting remote ingestion from {source_type} for user: {user}, job: {job_name}")
|
||||
self.update_state(state="PROGRESS", meta={"current": 1})
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
try:
|
||||
# Step 1: Initialize the appropriate loader
|
||||
self.update_state(state="PROGRESS", meta={"current": 10, "status": "Initializing connector"})
|
||||
|
||||
if not session_token:
|
||||
raise ValueError(f"{source_type} connector requires session_token")
|
||||
|
||||
if not ConnectorCreator.is_supported(source_type):
|
||||
raise ValueError(f"Unsupported connector type: {source_type}. Supported types: {ConnectorCreator.get_supported_connectors()}")
|
||||
|
||||
remote_loader = ConnectorCreator.create_connector(source_type, session_token)
|
||||
|
||||
# Create a clean config for storage
|
||||
api_source_config = {
|
||||
"file_ids": file_ids or [],
|
||||
"folder_ids": folder_ids or [],
|
||||
"recursive": recursive
|
||||
}
|
||||
|
||||
# Step 2: Download files to temp directory
|
||||
self.update_state(state="PROGRESS", meta={"current": 20, "status": "Downloading files"})
|
||||
download_info = remote_loader.download_to_directory(
|
||||
temp_dir,
|
||||
api_source_config
|
||||
)
|
||||
|
||||
if download_info.get("empty_result", False) or not download_info.get("files_downloaded", 0):
|
||||
logging.warning(f"No files were downloaded from {source_type}")
|
||||
# Create empty result directly instead of calling a separate method
|
||||
return {
|
||||
"name": job_name,
|
||||
"user": user,
|
||||
"tokens": 0,
|
||||
"type": source_type,
|
||||
"source_config": api_source_config,
|
||||
"directory_structure": "{}",
|
||||
}
|
||||
|
||||
# Step 3: Use SimpleDirectoryReader to process downloaded files
|
||||
self.update_state(state="PROGRESS", meta={"current": 40, "status": "Processing files"})
|
||||
reader = SimpleDirectoryReader(
|
||||
input_dir=temp_dir,
|
||||
recursive=True,
|
||||
required_exts=[
|
||||
".rst", ".md", ".pdf", ".txt", ".docx", ".csv", ".epub",
|
||||
".html", ".mdx", ".json", ".xlsx", ".pptx", ".png",
|
||||
".jpg", ".jpeg",
|
||||
],
|
||||
exclude_hidden=True,
|
||||
file_metadata=metadata_from_filename,
|
||||
)
|
||||
raw_docs = reader.load_data()
|
||||
directory_structure = getattr(reader, 'directory_structure', {})
|
||||
|
||||
|
||||
|
||||
# Step 4: Process documents (chunking, embedding, etc.)
|
||||
self.update_state(state="PROGRESS", meta={"current": 60, "status": "Processing documents"})
|
||||
|
||||
chunker = Chunker(
|
||||
chunking_strategy="classic_chunk",
|
||||
max_tokens=MAX_TOKENS,
|
||||
min_tokens=MIN_TOKENS,
|
||||
duplicate_headers=False,
|
||||
)
|
||||
raw_docs = chunker.chunk(documents=raw_docs)
|
||||
|
||||
# Preserve source information in document metadata
|
||||
for doc in raw_docs:
|
||||
if hasattr(doc, 'extra_info') and doc.extra_info:
|
||||
source = doc.extra_info.get('source')
|
||||
if source and os.path.isabs(source):
|
||||
# Convert absolute path to relative path
|
||||
doc.extra_info['source'] = os.path.relpath(source, start=temp_dir)
|
||||
|
||||
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
|
||||
if operation_mode == "upload":
|
||||
id = ObjectId()
|
||||
elif operation_mode == "sync":
|
||||
if not doc_id or not ObjectId.is_valid(doc_id):
|
||||
logging.error("Invalid doc_id provided for sync operation: %s", doc_id)
|
||||
raise ValueError("doc_id must be provided for sync operation.")
|
||||
id = ObjectId(doc_id)
|
||||
else:
|
||||
raise ValueError(f"Invalid operation_mode: {operation_mode}")
|
||||
|
||||
vector_store_path = os.path.join(temp_dir, "vector_store")
|
||||
os.makedirs(vector_store_path, exist_ok=True)
|
||||
|
||||
self.update_state(state="PROGRESS", meta={"current": 80, "status": "Storing documents"})
|
||||
embed_and_store_documents(docs, vector_store_path, id, self)
|
||||
|
||||
tokens = count_tokens_docs(docs)
|
||||
|
||||
# Step 6: Upload index files
|
||||
file_data = {
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"id": str(id),
|
||||
"type": "connector",
|
||||
"remote_data": json.dumps({
|
||||
"provider": source_type,
|
||||
**api_source_config
|
||||
}),
|
||||
"directory_structure": json.dumps(directory_structure),
|
||||
"sync_frequency": sync_frequency
|
||||
}
|
||||
|
||||
if operation_mode == "sync":
|
||||
file_data["last_sync"] = datetime.datetime.now()
|
||||
else:
|
||||
file_data["last_sync"] = datetime.datetime.now()
|
||||
|
||||
upload_index(vector_store_path, file_data)
|
||||
|
||||
# Ensure we mark the task as complete
|
||||
self.update_state(state="PROGRESS", meta={"current": 100, "status": "Complete"})
|
||||
|
||||
logging.info(f"Remote ingestion completed: {job_name}")
|
||||
|
||||
return {
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"tokens": tokens,
|
||||
"type": source_type,
|
||||
"id": str(id),
|
||||
"status": "complete"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error during remote ingestion: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
@@ -29,7 +29,7 @@ export default function Hero({
|
||||
</div>
|
||||
|
||||
{/* Demo Buttons Section */}
|
||||
<div className="mb-8 w-full max-w-full md:mb-16">
|
||||
<div className="mb-3 w-full max-w-full md:mb-3">
|
||||
<div className="grid grid-cols-1 gap-3 text-xs md:grid-cols-1 md:gap-4 lg:grid-cols-2">
|
||||
{demos?.map(
|
||||
(demo: { header: string; query: string }, key: number) =>
|
||||
|
||||
@@ -45,6 +45,7 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
|
||||
description: '',
|
||||
image: '',
|
||||
source: '',
|
||||
sources: [],
|
||||
chunks: '',
|
||||
retriever: '',
|
||||
prompt_id: 'default',
|
||||
@@ -150,7 +151,41 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
|
||||
const formData = new FormData();
|
||||
formData.append('name', agent.name);
|
||||
formData.append('description', agent.description);
|
||||
formData.append('source', agent.source);
|
||||
|
||||
if (selectedSourceIds.size > 1) {
|
||||
const sourcesArray = Array.from(selectedSourceIds)
|
||||
.map((id) => {
|
||||
const sourceDoc = sourceDocs?.find(
|
||||
(source) =>
|
||||
source.id === id || source.retriever === id || source.name === id,
|
||||
);
|
||||
if (sourceDoc?.name === 'Default' && !sourceDoc?.id) {
|
||||
return 'default';
|
||||
}
|
||||
return sourceDoc?.id || id;
|
||||
})
|
||||
.filter(Boolean);
|
||||
formData.append('sources', JSON.stringify(sourcesArray));
|
||||
formData.append('source', '');
|
||||
} else if (selectedSourceIds.size === 1) {
|
||||
const singleSourceId = Array.from(selectedSourceIds)[0];
|
||||
const sourceDoc = sourceDocs?.find(
|
||||
(source) =>
|
||||
source.id === singleSourceId ||
|
||||
source.retriever === singleSourceId ||
|
||||
source.name === singleSourceId,
|
||||
);
|
||||
let finalSourceId;
|
||||
if (sourceDoc?.name === 'Default' && !sourceDoc?.id)
|
||||
finalSourceId = 'default';
|
||||
else finalSourceId = sourceDoc?.id || singleSourceId;
|
||||
formData.append('source', String(finalSourceId));
|
||||
formData.append('sources', JSON.stringify([]));
|
||||
} else {
|
||||
formData.append('source', '');
|
||||
formData.append('sources', JSON.stringify([]));
|
||||
}
|
||||
|
||||
formData.append('chunks', agent.chunks);
|
||||
formData.append('retriever', agent.retriever);
|
||||
formData.append('prompt_id', agent.prompt_id);
|
||||
@@ -196,7 +231,41 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
|
||||
const formData = new FormData();
|
||||
formData.append('name', agent.name);
|
||||
formData.append('description', agent.description);
|
||||
formData.append('source', agent.source);
|
||||
|
||||
if (selectedSourceIds.size > 1) {
|
||||
const sourcesArray = Array.from(selectedSourceIds)
|
||||
.map((id) => {
|
||||
const sourceDoc = sourceDocs?.find(
|
||||
(source) =>
|
||||
source.id === id || source.retriever === id || source.name === id,
|
||||
);
|
||||
if (sourceDoc?.name === 'Default' && !sourceDoc?.id) {
|
||||
return 'default';
|
||||
}
|
||||
return sourceDoc?.id || id;
|
||||
})
|
||||
.filter(Boolean);
|
||||
formData.append('sources', JSON.stringify(sourcesArray));
|
||||
formData.append('source', '');
|
||||
} else if (selectedSourceIds.size === 1) {
|
||||
const singleSourceId = Array.from(selectedSourceIds)[0];
|
||||
const sourceDoc = sourceDocs?.find(
|
||||
(source) =>
|
||||
source.id === singleSourceId ||
|
||||
source.retriever === singleSourceId ||
|
||||
source.name === singleSourceId,
|
||||
);
|
||||
let finalSourceId;
|
||||
if (sourceDoc?.name === 'Default' && !sourceDoc?.id)
|
||||
finalSourceId = 'default';
|
||||
else finalSourceId = sourceDoc?.id || singleSourceId;
|
||||
formData.append('source', String(finalSourceId));
|
||||
formData.append('sources', JSON.stringify([]));
|
||||
} else {
|
||||
formData.append('source', '');
|
||||
formData.append('sources', JSON.stringify([]));
|
||||
}
|
||||
|
||||
formData.append('chunks', agent.chunks);
|
||||
formData.append('retriever', agent.retriever);
|
||||
formData.append('prompt_id', agent.prompt_id);
|
||||
@@ -293,9 +362,33 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
|
||||
throw new Error('Failed to fetch agent');
|
||||
}
|
||||
const data = await response.json();
|
||||
if (data.source) setSelectedSourceIds(new Set([data.source]));
|
||||
else if (data.retriever)
|
||||
|
||||
if (data.sources && data.sources.length > 0) {
|
||||
const mappedSources = data.sources.map((sourceId: string) => {
|
||||
if (sourceId === 'default') {
|
||||
const defaultSource = sourceDocs?.find(
|
||||
(source) => source.name === 'Default',
|
||||
);
|
||||
return defaultSource?.retriever || 'classic';
|
||||
}
|
||||
return sourceId;
|
||||
});
|
||||
setSelectedSourceIds(new Set(mappedSources));
|
||||
} else if (data.source) {
|
||||
if (data.source === 'default') {
|
||||
const defaultSource = sourceDocs?.find(
|
||||
(source) => source.name === 'Default',
|
||||
);
|
||||
setSelectedSourceIds(
|
||||
new Set([defaultSource?.retriever || 'classic']),
|
||||
);
|
||||
} else {
|
||||
setSelectedSourceIds(new Set([data.source]));
|
||||
}
|
||||
} else if (data.retriever) {
|
||||
setSelectedSourceIds(new Set([data.retriever]));
|
||||
}
|
||||
|
||||
if (data.tools) setSelectedToolIds(new Set(data.tools));
|
||||
if (data.status === 'draft') setEffectiveMode('draft');
|
||||
if (data.json_schema) {
|
||||
@@ -311,25 +404,57 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
|
||||
}, [agentId, mode, token]);
|
||||
|
||||
useEffect(() => {
|
||||
const selectedSource = Array.from(selectedSourceIds).map((id) =>
|
||||
sourceDocs?.find(
|
||||
(source) =>
|
||||
source.id === id || source.retriever === id || source.name === id,
|
||||
),
|
||||
);
|
||||
if (selectedSource[0]?.model === embeddingsName) {
|
||||
if (selectedSource[0] && 'id' in selectedSource[0]) {
|
||||
const selectedSources = Array.from(selectedSourceIds)
|
||||
.map((id) =>
|
||||
sourceDocs?.find(
|
||||
(source) =>
|
||||
source.id === id || source.retriever === id || source.name === id,
|
||||
),
|
||||
)
|
||||
.filter(Boolean);
|
||||
|
||||
if (selectedSources.length > 0) {
|
||||
// Handle multiple sources
|
||||
if (selectedSources.length > 1) {
|
||||
// Multiple sources selected - store in sources array
|
||||
const sourceIds = selectedSources
|
||||
.map((source) => source?.id)
|
||||
.filter((id): id is string => Boolean(id));
|
||||
setAgent((prev) => ({
|
||||
...prev,
|
||||
source: selectedSource[0]?.id || 'default',
|
||||
sources: sourceIds,
|
||||
source: '', // Clear single source for multiple sources
|
||||
retriever: '',
|
||||
}));
|
||||
} else
|
||||
setAgent((prev) => ({
|
||||
...prev,
|
||||
source: '',
|
||||
retriever: selectedSource[0]?.retriever || 'classic',
|
||||
}));
|
||||
} else {
|
||||
// Single source selected - maintain backward compatibility
|
||||
const selectedSource = selectedSources[0];
|
||||
if (selectedSource?.model === embeddingsName) {
|
||||
if (selectedSource && 'id' in selectedSource) {
|
||||
setAgent((prev) => ({
|
||||
...prev,
|
||||
source: selectedSource?.id || 'default',
|
||||
sources: [], // Clear sources array for single source
|
||||
retriever: '',
|
||||
}));
|
||||
} else {
|
||||
setAgent((prev) => ({
|
||||
...prev,
|
||||
source: '',
|
||||
sources: [], // Clear sources array
|
||||
retriever: selectedSource?.retriever || 'classic',
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No sources selected
|
||||
setAgent((prev) => ({
|
||||
...prev,
|
||||
source: '',
|
||||
sources: [],
|
||||
retriever: '',
|
||||
}));
|
||||
}
|
||||
}, [selectedSourceIds]);
|
||||
|
||||
@@ -461,7 +586,7 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
|
||||
onChange={(e) => setAgent({ ...agent, name: e.target.value })}
|
||||
/>
|
||||
<textarea
|
||||
className="border-silver text-jet dark:bg-raisin-black dark:text-bright-gray dark:placeholder:text-silver mt-3 h-32 w-full rounded-3xl border bg-white px-5 py-4 text-sm outline-hidden placeholder:text-gray-400 dark:border-[#7E7E7E]"
|
||||
className="border-silver text-jet dark:bg-raisin-black dark:text-bright-gray dark:placeholder:text-silver mt-3 h-32 w-full rounded-xl border bg-white px-5 py-4 text-sm outline-hidden placeholder:text-gray-400 dark:border-[#7E7E7E]"
|
||||
placeholder="Describe your agent"
|
||||
value={agent.description}
|
||||
onChange={(e) =>
|
||||
@@ -510,7 +635,7 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
|
||||
)
|
||||
.filter(Boolean)
|
||||
.join(', ')
|
||||
: 'Select source'}
|
||||
: 'Select sources'}
|
||||
</button>
|
||||
<MultiSelectPopup
|
||||
isOpen={isSourcePopupOpen}
|
||||
@@ -526,12 +651,10 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
|
||||
selectedIds={selectedSourceIds}
|
||||
onSelectionChange={(newSelectedIds: Set<string | number>) => {
|
||||
setSelectedSourceIds(newSelectedIds);
|
||||
setIsSourcePopupOpen(false);
|
||||
}}
|
||||
title="Select Source"
|
||||
title="Select Sources"
|
||||
searchPlaceholder="Search sources..."
|
||||
noOptionsMessage="No source available"
|
||||
singleSelect={true}
|
||||
noOptionsMessage="No sources available"
|
||||
/>
|
||||
</div>
|
||||
<div className="mt-3">
|
||||
|
||||
@@ -10,6 +10,7 @@ export type Agent = {
|
||||
description: string;
|
||||
image: string;
|
||||
source: string;
|
||||
sources?: string[];
|
||||
chunks: string;
|
||||
retriever: string;
|
||||
prompt_id: string;
|
||||
|
||||
@@ -38,6 +38,7 @@ const endpoints = {
|
||||
UPDATE_TOOL_STATUS: '/api/update_tool_status',
|
||||
UPDATE_TOOL: '/api/update_tool',
|
||||
DELETE_TOOL: '/api/delete_tool',
|
||||
SYNC_CONNECTOR: '/api/connectors/sync',
|
||||
GET_CHUNKS: (
|
||||
docId: string,
|
||||
page: number,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import apiClient from '../client';
|
||||
import endpoints from '../endpoints';
|
||||
import { getSessionToken } from '../../utils/providerUtils';
|
||||
|
||||
const userService = {
|
||||
getConfig: (): Promise<any> => apiClient.get(endpoints.USER.CONFIG, null),
|
||||
@@ -111,6 +112,22 @@ const userService = {
|
||||
apiClient.post(endpoints.USER.MCP_TEST_CONNECTION, data, token),
|
||||
saveMCPServer: (data: any, token: string | null): Promise<any> =>
|
||||
apiClient.post(endpoints.USER.MCP_SAVE_SERVER, data, token),
|
||||
syncConnector: (
|
||||
docId: string,
|
||||
provider: string,
|
||||
token: string | null,
|
||||
): Promise<any> => {
|
||||
const sessionToken = getSessionToken(provider);
|
||||
return apiClient.post(
|
||||
endpoints.USER.SYNC_CONNECTOR,
|
||||
{
|
||||
source_id: docId,
|
||||
session_token: sessionToken,
|
||||
provider: provider,
|
||||
},
|
||||
token,
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
export default userService;
|
||||
|
||||
@@ -1 +1 @@
|
||||
<svg width="16px" height="16px" viewBox="0 0 1024 1024" class="icon" version="1.1" xmlns="http://www.w3.org/2000/svg" fill="#11ee1c" stroke="#11ee1c" stroke-width="83.96799999999999"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"><path d="M866.133333 258.133333L362.666667 761.6l-204.8-204.8L98.133333 618.666667 362.666667 881.066667l563.2-563.2z" fill="#11ee1c"></path></g></svg>
|
||||
<svg width="16px" height="16px" viewBox="0 0 1024 1024" class="icon" version="1.1" xmlns="http://www.w3.org/2000/svg" fill="#11ee1c" stroke="#11ee1c" stroke-width="83.96799999999999"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"><path d="M866.133333 258.133333L362.666667 761.6l-204.8-204.8L98.133333 618.666667 362.666667 881.066667l563.2-563.2z" fill="#0C9D35"></path></g></svg>
|
||||
|
Before Width: | Height: | Size: 490 B After Width: | Height: | Size: 490 B |
@@ -1 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24"><path fill="white" d="M10.72,19.9a8,8,0,0,1-6.5-9.79A7.77,7.77,0,0,1,10.4,4.16a8,8,0,0,1,9.49,6.52A1.54,1.54,0,0,0,21.38,12h.13a1.37,1.37,0,0,0,1.38-1.54,11,11,0,1,0-12.7,12.39A1.54,1.54,0,0,0,12,21.34h0A1.47,1.47,0,0,0,10.72,19.9Z"><animateTransform attributeName="transform" dur="0.75s" repeatCount="indefinite" type="rotate" values="0 12 12;360 12 12"/></path></svg>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24"><path fill="black" d="M10.72,19.9a8,8,0,0,1-6.5-9.79A7.77,7.77,0,0,1,10.4,4.16a8,8,0,0,1,9.49,6.52A1.54,1.54,0,0,0,21.38,12h.13a1.37,1.37,0,0,0,1.38-1.54,11,11,0,1,0-12.7,12.39A1.54,1.54,0,0,0,12,21.34h0A1.47,1.47,0,0,0,10.72,19.9Z"><animateTransform attributeName="transform" dur="0.75s" repeatCount="indefinite" type="rotate" values="0 12 12;360 12 12"/></path></svg>
|
||||
|
Before Width: | Height: | Size: 454 B After Width: | Height: | Size: 454 B |
@@ -1 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24"><path fill="black" d="M10.72,19.9a8,8,0,0,1-6.5-9.79A7.77,7.77,0,0,1,10.4,4.16a8,8,0,0,1,9.49,6.52A1.54,1.54,0,0,0,21.38,12h.13a1.37,1.37,0,0,0,1.38-1.54,11,11,0,1,0-12.7,12.39A1.54,1.54,0,0,0,12,21.34h0A1.47,1.47,0,0,0,10.72,19.9Z"><animateTransform attributeName="transform" dur="0.75s" repeatCount="indefinite" type="rotate" values="0 12 12;360 12 12"/></path></svg>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24"><path fill="white" d="M10.72,19.9a8,8,0,0,1-6.5-9.79A7.77,7.77,0,0,1,10.4,4.16a8,8,0,0,1,9.49,6.52A1.54,1.54,0,0,0,21.38,12h.13a1.37,1.37,0,0,0,1.38-1.54,11,11,0,1,0-12.7,12.39A1.54,1.54,0,0,0,12,21.34h0A1.47,1.47,0,0,0,10.72,19.9Z"><animateTransform attributeName="transform" dur="0.75s" repeatCount="indefinite" type="rotate" values="0 12 12;360 12 12"/></path></svg>
|
||||
|
Before Width: | Height: | Size: 454 B After Width: | Height: | Size: 454 B |
130
frontend/src/components/ConnectorAuth.tsx
Normal file
130
frontend/src/components/ConnectorAuth.tsx
Normal file
@@ -0,0 +1,130 @@
|
||||
import React, { useRef } from 'react';
|
||||
import { useSelector } from 'react-redux';
|
||||
import { selectToken } from '../preferences/preferenceSlice';
|
||||
|
||||
interface ConnectorAuthProps {
|
||||
provider: string;
|
||||
onSuccess: (data: { session_token: string; user_email: string }) => void;
|
||||
onError: (error: string) => void;
|
||||
label?: string;
|
||||
}
|
||||
|
||||
const providerLabel = (provider: string) => {
|
||||
const map: Record<string, string> = {
|
||||
google_drive: 'Google Drive',
|
||||
};
|
||||
return map[provider] || provider.replace(/_/g, ' ');
|
||||
};
|
||||
|
||||
const ConnectorAuth: React.FC<ConnectorAuthProps> = ({
|
||||
provider,
|
||||
onSuccess,
|
||||
onError,
|
||||
label,
|
||||
}) => {
|
||||
const token = useSelector(selectToken);
|
||||
const completedRef = useRef(false);
|
||||
const intervalRef = useRef<number | null>(null);
|
||||
|
||||
const cleanup = () => {
|
||||
if (intervalRef.current) {
|
||||
clearInterval(intervalRef.current);
|
||||
intervalRef.current = null;
|
||||
}
|
||||
window.removeEventListener('message', handleAuthMessage as any);
|
||||
};
|
||||
|
||||
const handleAuthMessage = (event: MessageEvent) => {
|
||||
const successGeneric = event.data?.type === 'connector_auth_success';
|
||||
const successProvider =
|
||||
event.data?.type === `${provider}_auth_success` ||
|
||||
event.data?.type === 'google_drive_auth_success';
|
||||
const errorProvider =
|
||||
event.data?.type === `${provider}_auth_error` ||
|
||||
event.data?.type === 'google_drive_auth_error';
|
||||
|
||||
if (successGeneric || successProvider) {
|
||||
completedRef.current = true;
|
||||
cleanup();
|
||||
onSuccess({
|
||||
session_token: event.data.session_token,
|
||||
user_email: event.data.user_email || 'Connected User',
|
||||
});
|
||||
} else if (errorProvider) {
|
||||
completedRef.current = true;
|
||||
cleanup();
|
||||
onError(event.data.error || 'Authentication failed');
|
||||
}
|
||||
};
|
||||
|
||||
const handleAuth = async () => {
|
||||
try {
|
||||
completedRef.current = false;
|
||||
cleanup();
|
||||
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
const authResponse = await fetch(
|
||||
`${apiHost}/api/connectors/auth?provider=${provider}`,
|
||||
{
|
||||
headers: { Authorization: `Bearer ${token}` },
|
||||
},
|
||||
);
|
||||
|
||||
if (!authResponse.ok) {
|
||||
throw new Error(
|
||||
`Failed to get authorization URL: ${authResponse.status}`,
|
||||
);
|
||||
}
|
||||
|
||||
const authData = await authResponse.json();
|
||||
if (!authData.success || !authData.authorization_url) {
|
||||
throw new Error(authData.error || 'Failed to get authorization URL');
|
||||
}
|
||||
|
||||
const authWindow = window.open(
|
||||
authData.authorization_url,
|
||||
`${provider}-auth`,
|
||||
'width=500,height=600,scrollbars=yes,resizable=yes',
|
||||
);
|
||||
if (!authWindow) {
|
||||
throw new Error(
|
||||
'Failed to open authentication window. Please allow popups.',
|
||||
);
|
||||
}
|
||||
|
||||
window.addEventListener('message', handleAuthMessage as any);
|
||||
|
||||
const checkClosed = window.setInterval(() => {
|
||||
if (authWindow.closed) {
|
||||
clearInterval(checkClosed);
|
||||
window.removeEventListener('message', handleAuthMessage as any);
|
||||
if (!completedRef.current) {
|
||||
onError('Authentication was cancelled');
|
||||
}
|
||||
}
|
||||
}, 1000);
|
||||
intervalRef.current = checkClosed;
|
||||
} catch (error) {
|
||||
onError(error instanceof Error ? error.message : 'Authentication failed');
|
||||
}
|
||||
};
|
||||
|
||||
const buttonLabel = label || `Connect ${providerLabel(provider)}`;
|
||||
|
||||
return (
|
||||
<button
|
||||
onClick={handleAuth}
|
||||
className="flex w-full items-center justify-center gap-2 rounded-lg bg-blue-500 px-4 py-3 text-white transition-colors hover:bg-blue-600"
|
||||
>
|
||||
<svg className="h-5 w-5" viewBox="0 0 24 24">
|
||||
<path
|
||||
fill="currentColor"
|
||||
d="M6.28 3l5.72 10H24l-5.72-10H6.28zm11.44 0L12 13l5.72 10H24L18.28 3h-.56zM0 13l5.72 10h5.72L5.72 13H0z"
|
||||
/>
|
||||
</svg>
|
||||
{buttonLabel}
|
||||
</button>
|
||||
);
|
||||
};
|
||||
|
||||
export default ConnectorAuth;
|
||||
731
frontend/src/components/ConnectorTreeComponent.tsx
Normal file
731
frontend/src/components/ConnectorTreeComponent.tsx
Normal file
@@ -0,0 +1,731 @@
|
||||
import React, { useState, useRef, useEffect } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useSelector } from 'react-redux';
|
||||
import { formatBytes } from '../utils/stringUtils';
|
||||
import { selectToken } from '../preferences/preferenceSlice';
|
||||
import Chunks from './Chunks';
|
||||
import ContextMenu, { MenuOption } from './ContextMenu';
|
||||
import userService from '../api/services/userService';
|
||||
import FileIcon from '../assets/file.svg';
|
||||
import FolderIcon from '../assets/folder.svg';
|
||||
import ArrowLeft from '../assets/arrow-left.svg';
|
||||
import ThreeDots from '../assets/three-dots.svg';
|
||||
import EyeView from '../assets/eye-view.svg';
|
||||
import SyncIcon from '../assets/sync.svg';
|
||||
import { useOutsideAlerter } from '../hooks';
|
||||
|
||||
interface FileNode {
|
||||
type?: string;
|
||||
token_count?: number;
|
||||
size_bytes?: number;
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
interface DirectoryStructure {
|
||||
[key: string]: FileNode;
|
||||
}
|
||||
|
||||
interface ConnectorTreeComponentProps {
|
||||
docId: string;
|
||||
sourceName: string;
|
||||
onBackToDocuments: () => void;
|
||||
}
|
||||
|
||||
interface SearchResult {
|
||||
name: string;
|
||||
path: string;
|
||||
isFile: boolean;
|
||||
}
|
||||
|
||||
const ConnectorTreeComponent: React.FC<ConnectorTreeComponentProps> = ({
|
||||
docId,
|
||||
sourceName,
|
||||
onBackToDocuments,
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const [loading, setLoading] = useState<boolean>(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [directoryStructure, setDirectoryStructure] =
|
||||
useState<DirectoryStructure | null>(null);
|
||||
const [currentPath, setCurrentPath] = useState<string[]>([]);
|
||||
const token = useSelector(selectToken);
|
||||
const [activeMenuId, setActiveMenuId] = useState<string | null>(null);
|
||||
const menuRefs = useRef<{
|
||||
[key: string]: React.RefObject<HTMLDivElement | null>;
|
||||
}>({});
|
||||
const [selectedFile, setSelectedFile] = useState<{
|
||||
id: string;
|
||||
name: string;
|
||||
} | null>(null);
|
||||
const [searchQuery, setSearchQuery] = useState('');
|
||||
const [searchResults, setSearchResults] = useState<SearchResult[]>([]);
|
||||
const searchDropdownRef = useRef<HTMLDivElement>(null);
|
||||
const [isSyncing, setIsSyncing] = useState<boolean>(false);
|
||||
const [syncProgress, setSyncProgress] = useState<number>(0);
|
||||
const [sourceProvider, setSourceProvider] = useState<string>('');
|
||||
const [syncDone, setSyncDone] = useState<boolean>(false);
|
||||
|
||||
useOutsideAlerter(
|
||||
searchDropdownRef,
|
||||
() => {
|
||||
setSearchQuery('');
|
||||
setSearchResults([]);
|
||||
},
|
||||
[],
|
||||
false,
|
||||
);
|
||||
|
||||
const handleFileClick = (fileName: string) => {
|
||||
const fullPath = [...currentPath, fileName].join('/');
|
||||
setSelectedFile({
|
||||
id: fullPath,
|
||||
name: fileName,
|
||||
});
|
||||
};
|
||||
|
||||
const handleSync = async () => {
|
||||
if (isSyncing) return;
|
||||
|
||||
const provider = sourceProvider;
|
||||
|
||||
setIsSyncing(true);
|
||||
setSyncProgress(0);
|
||||
|
||||
try {
|
||||
const response = await userService.syncConnector(docId, provider, token);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success) {
|
||||
console.log('Sync started successfully:', data.task_id);
|
||||
setSyncProgress(10);
|
||||
|
||||
// Poll task status using userService
|
||||
const maxAttempts = 30;
|
||||
const pollInterval = 2000;
|
||||
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
try {
|
||||
const statusResponse = await userService.getTaskStatus(
|
||||
data.task_id,
|
||||
token,
|
||||
);
|
||||
const statusData = await statusResponse.json();
|
||||
|
||||
console.log(
|
||||
`Task status (attempt ${attempt + 1}):`,
|
||||
statusData.status,
|
||||
);
|
||||
|
||||
if (statusData.status === 'SUCCESS') {
|
||||
setSyncProgress(100);
|
||||
console.log('Sync completed successfully');
|
||||
|
||||
// Refresh directory structure
|
||||
try {
|
||||
const refreshResponse = await userService.getDirectoryStructure(
|
||||
docId,
|
||||
token,
|
||||
);
|
||||
const refreshData = await refreshResponse.json();
|
||||
if (refreshData && refreshData.directory_structure) {
|
||||
setDirectoryStructure(refreshData.directory_structure);
|
||||
setCurrentPath([]);
|
||||
}
|
||||
if (refreshData && refreshData.provider) {
|
||||
setSourceProvider(refreshData.provider);
|
||||
}
|
||||
|
||||
setSyncDone(true);
|
||||
setTimeout(() => setSyncDone(false), 5000);
|
||||
} catch (err) {
|
||||
console.error('Error refreshing directory structure:', err);
|
||||
}
|
||||
break;
|
||||
} else if (statusData.status === 'FAILURE') {
|
||||
console.error('Sync task failed:', statusData.result);
|
||||
break;
|
||||
} else if (statusData.status === 'PROGRESS') {
|
||||
const progress = Number(
|
||||
statusData.result && statusData.result.current != null
|
||||
? statusData.result.current
|
||||
: statusData.meta && statusData.meta.current != null
|
||||
? statusData.meta.current
|
||||
: 0,
|
||||
);
|
||||
setSyncProgress(Math.max(10, progress));
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, pollInterval));
|
||||
} catch (error) {
|
||||
console.error('Error polling task status:', error);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
console.error('Sync failed:', data.error);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('Error syncing connector:', err);
|
||||
} finally {
|
||||
setIsSyncing(false);
|
||||
setSyncProgress(0);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const fetchDirectoryStructure = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
|
||||
const directoryResponse = await userService.getDirectoryStructure(
|
||||
docId,
|
||||
token,
|
||||
);
|
||||
const directoryData = await directoryResponse.json();
|
||||
|
||||
if (directoryData && directoryData.directory_structure) {
|
||||
setDirectoryStructure(directoryData.directory_structure);
|
||||
} else {
|
||||
setError('Invalid response format');
|
||||
}
|
||||
|
||||
if (directoryData && directoryData.provider) {
|
||||
setSourceProvider(directoryData.provider);
|
||||
}
|
||||
} catch (err) {
|
||||
setError('Failed to load source information');
|
||||
console.error(err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
if (docId) {
|
||||
fetchDirectoryStructure();
|
||||
}
|
||||
}, [docId, token]);
|
||||
|
||||
const navigateToDirectory = (dirName: string) => {
|
||||
setCurrentPath([...currentPath, dirName]);
|
||||
};
|
||||
|
||||
const navigateUp = () => {
|
||||
setCurrentPath(currentPath.slice(0, -1));
|
||||
};
|
||||
|
||||
const getCurrentDirectory = (): DirectoryStructure => {
|
||||
if (!directoryStructure) return {};
|
||||
|
||||
let current = directoryStructure;
|
||||
for (const dir of currentPath) {
|
||||
if (current[dir] && !current[dir].type) {
|
||||
current = current[dir] as DirectoryStructure;
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return current;
|
||||
};
|
||||
|
||||
const getMenuRef = (id: string) => {
|
||||
if (!menuRefs.current[id]) {
|
||||
menuRefs.current[id] = React.createRef();
|
||||
}
|
||||
return menuRefs.current[id];
|
||||
};
|
||||
|
||||
const handleMenuClick = (
|
||||
e: React.MouseEvent<HTMLButtonElement>,
|
||||
id: string,
|
||||
) => {
|
||||
e.stopPropagation();
|
||||
setActiveMenuId(activeMenuId === id ? null : id);
|
||||
};
|
||||
|
||||
const getActionOptions = (
|
||||
name: string,
|
||||
isFile: boolean,
|
||||
_itemId: string,
|
||||
): MenuOption[] => {
|
||||
const options: MenuOption[] = [];
|
||||
|
||||
options.push({
|
||||
icon: EyeView,
|
||||
label: t('settings.sources.view'),
|
||||
onClick: (event: React.SyntheticEvent) => {
|
||||
event.stopPropagation();
|
||||
if (isFile) {
|
||||
handleFileClick(name);
|
||||
} else {
|
||||
navigateToDirectory(name);
|
||||
}
|
||||
},
|
||||
iconWidth: 18,
|
||||
iconHeight: 18,
|
||||
variant: 'primary',
|
||||
});
|
||||
|
||||
return options;
|
||||
};
|
||||
|
||||
const calculateDirectoryStats = (
|
||||
structure: DirectoryStructure,
|
||||
): { totalSize: number; totalTokens: number } => {
|
||||
let totalSize = 0;
|
||||
let totalTokens = 0;
|
||||
|
||||
Object.entries(structure).forEach(([_, node]) => {
|
||||
if (node.type) {
|
||||
// It's a file
|
||||
totalSize += node.size_bytes || 0;
|
||||
totalTokens += node.token_count || 0;
|
||||
} else {
|
||||
// It's a directory, recurse
|
||||
const stats = calculateDirectoryStats(node);
|
||||
totalSize += stats.totalSize;
|
||||
totalTokens += stats.totalTokens;
|
||||
}
|
||||
});
|
||||
|
||||
return { totalSize, totalTokens };
|
||||
};
|
||||
|
||||
const handleBackNavigation = () => {
|
||||
if (selectedFile) {
|
||||
setSelectedFile(null);
|
||||
} else if (currentPath.length === 0) {
|
||||
if (onBackToDocuments) {
|
||||
onBackToDocuments();
|
||||
}
|
||||
} else {
|
||||
navigateUp();
|
||||
}
|
||||
};
|
||||
|
||||
const renderPathNavigation = () => {
|
||||
return (
|
||||
<div className="mb-0 flex min-h-[38px] flex-col gap-2 text-base sm:flex-row sm:items-center sm:justify-between">
|
||||
{/* Left side with path navigation */}
|
||||
<div className="flex w-full items-center sm:w-auto">
|
||||
<button
|
||||
className="mr-3 flex h-[29px] w-[29px] items-center justify-center rounded-full border p-2 text-sm font-medium text-gray-400 dark:border-0 dark:bg-[#28292D] dark:text-gray-500 dark:hover:bg-[#2E2F34]"
|
||||
onClick={handleBackNavigation}
|
||||
>
|
||||
<img src={ArrowLeft} alt="left-arrow" className="h-3 w-3" />
|
||||
</button>
|
||||
|
||||
<div className="flex flex-wrap items-center">
|
||||
<span className="font-semibold break-words text-[#7D54D1]">
|
||||
{sourceName}
|
||||
</span>
|
||||
{currentPath.length > 0 && (
|
||||
<>
|
||||
<span className="mx-1 flex-shrink-0 text-gray-500">/</span>
|
||||
{currentPath.map((dir, index) => (
|
||||
<React.Fragment key={index}>
|
||||
<span className="break-words text-gray-700 dark:text-[#E0E0E0]">
|
||||
{dir}
|
||||
</span>
|
||||
{index < currentPath.length - 1 && (
|
||||
<span className="mx-1 flex-shrink-0 text-gray-500">
|
||||
/
|
||||
</span>
|
||||
)}
|
||||
</React.Fragment>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="relative mt-2 flex w-full flex-row flex-nowrap items-center justify-end gap-2 sm:mt-0 sm:w-auto">
|
||||
{renderFileSearch()}
|
||||
|
||||
{/* Sync button */}
|
||||
<button
|
||||
onClick={handleSync}
|
||||
disabled={isSyncing}
|
||||
className={`flex h-[38px] min-w-[108px] items-center justify-center rounded-full px-4 text-[14px] font-medium whitespace-nowrap transition-colors ${
|
||||
isSyncing
|
||||
? 'cursor-not-allowed bg-gray-300 text-gray-600 dark:bg-gray-600 dark:text-gray-400'
|
||||
: 'bg-purple-30 hover:bg-violets-are-blue text-white'
|
||||
}`}
|
||||
title={
|
||||
isSyncing
|
||||
? `${t('settings.sources.syncing')} ${syncProgress}%`
|
||||
: syncDone
|
||||
? 'Done'
|
||||
: t('settings.sources.sync')
|
||||
}
|
||||
>
|
||||
<img
|
||||
src={SyncIcon}
|
||||
alt={t('settings.sources.sync')}
|
||||
className={`mr-2 h-4 w-4 brightness-0 invert filter ${isSyncing ? 'animate-spin' : ''}`}
|
||||
/>
|
||||
{isSyncing
|
||||
? `${syncProgress}%`
|
||||
: syncDone
|
||||
? 'Done'
|
||||
: t('settings.sources.sync')}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const renderFileTree = (directory: DirectoryStructure) => {
|
||||
if (!directory) return [];
|
||||
|
||||
// Create parent directory row
|
||||
const parentRow =
|
||||
currentPath.length > 0
|
||||
? [
|
||||
<tr
|
||||
key="parent-dir"
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={navigateUp}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex items-center">
|
||||
<img
|
||||
src={FolderIcon}
|
||||
alt={t('settings.sources.parentFolderAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
..
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4"></td>
|
||||
</tr>,
|
||||
]
|
||||
: [];
|
||||
|
||||
// Sort entries: directories first, then files, both alphabetically
|
||||
const sortedEntries = Object.entries(directory).sort(
|
||||
([nameA, nodeA], [nameB, nodeB]) => {
|
||||
const isFileA = !!nodeA.type;
|
||||
const isFileB = !!nodeB.type;
|
||||
|
||||
if (isFileA !== isFileB) {
|
||||
return isFileA ? 1 : -1; // Directories first
|
||||
}
|
||||
|
||||
return nameA.localeCompare(nameB); // Alphabetical within each group
|
||||
},
|
||||
);
|
||||
|
||||
// Process directories
|
||||
const directoryRows = sortedEntries
|
||||
.filter(([_, node]) => !node.type)
|
||||
.map(([name, node]) => {
|
||||
const itemId = `dir-${name}`;
|
||||
const menuRef = getMenuRef(itemId);
|
||||
|
||||
// Calculate directory stats
|
||||
const dirStats = calculateDirectoryStats(node as DirectoryStructure);
|
||||
|
||||
return (
|
||||
<tr
|
||||
key={itemId}
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={() => navigateToDirectory(name)}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex min-w-0 items-center">
|
||||
<img
|
||||
src={FolderIcon}
|
||||
alt={t('settings.sources.folderAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
{name}
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
{dirStats.totalTokens > 0
|
||||
? dirStats.totalTokens.toLocaleString()
|
||||
: '-'}
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
{dirStats.totalSize > 0 ? formatBytes(dirStats.totalSize) : '-'}
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4">
|
||||
<div ref={menuRef} className="relative">
|
||||
<button
|
||||
onClick={(e) => handleMenuClick(e, itemId)}
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
|
||||
aria-label={t('settings.sources.menuAlt')}
|
||||
>
|
||||
<img
|
||||
src={ThreeDots}
|
||||
alt={t('settings.sources.menuAlt')}
|
||||
className="opacity-60 hover:opacity-100"
|
||||
/>
|
||||
</button>
|
||||
<ContextMenu
|
||||
isOpen={activeMenuId === itemId}
|
||||
setIsOpen={(isOpen) =>
|
||||
setActiveMenuId(isOpen ? itemId : null)
|
||||
}
|
||||
options={getActionOptions(name, false, itemId)}
|
||||
anchorRef={menuRef}
|
||||
position="bottom-left"
|
||||
offset={{ x: -4, y: 4 }}
|
||||
/>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
});
|
||||
|
||||
// Process files
|
||||
const fileRows = sortedEntries
|
||||
.filter(([_, node]) => !!node.type)
|
||||
.map(([name, node]) => {
|
||||
const itemId = `file-${name}`;
|
||||
const menuRef = getMenuRef(itemId);
|
||||
|
||||
return (
|
||||
<tr
|
||||
key={itemId}
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={() => handleFileClick(name)}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex min-w-0 items-center">
|
||||
<img
|
||||
src={FileIcon}
|
||||
alt={t('settings.sources.fileAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
{name}
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
{node.token_count?.toLocaleString() || '-'}
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm md:px-4 dark:text-[#E0E0E0]">
|
||||
{node.size_bytes ? formatBytes(node.size_bytes) : '-'}
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4">
|
||||
<div ref={menuRef} className="relative">
|
||||
<button
|
||||
onClick={(e) => handleMenuClick(e, itemId)}
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
|
||||
aria-label={t('settings.sources.menuAlt')}
|
||||
>
|
||||
<img
|
||||
src={ThreeDots}
|
||||
alt={t('settings.sources.menuAlt')}
|
||||
className="opacity-60 hover:opacity-100"
|
||||
/>
|
||||
</button>
|
||||
<ContextMenu
|
||||
isOpen={activeMenuId === itemId}
|
||||
setIsOpen={(isOpen) =>
|
||||
setActiveMenuId(isOpen ? itemId : null)
|
||||
}
|
||||
options={getActionOptions(name, true, itemId)}
|
||||
anchorRef={menuRef}
|
||||
position="bottom-left"
|
||||
offset={{ x: -4, y: 4 }}
|
||||
/>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
});
|
||||
|
||||
return [...parentRow, ...directoryRows, ...fileRows];
|
||||
};
|
||||
|
||||
const searchFiles = (
|
||||
query: string,
|
||||
structure: DirectoryStructure,
|
||||
currentPath: string[] = [],
|
||||
): SearchResult[] => {
|
||||
let results: SearchResult[] = [];
|
||||
|
||||
Object.entries(structure).forEach(([name, node]) => {
|
||||
const fullPath = [...currentPath, name].join('/');
|
||||
|
||||
if (name.toLowerCase().includes(query.toLowerCase())) {
|
||||
results.push({
|
||||
name,
|
||||
path: fullPath,
|
||||
isFile: !!node.type,
|
||||
});
|
||||
}
|
||||
|
||||
if (!node.type) {
|
||||
// If it's a directory, search recursively
|
||||
results = [
|
||||
...results,
|
||||
...searchFiles(query, node as DirectoryStructure, [
|
||||
...currentPath,
|
||||
name,
|
||||
]),
|
||||
];
|
||||
}
|
||||
});
|
||||
|
||||
return results;
|
||||
};
|
||||
|
||||
const handleSearchSelect = (result: SearchResult) => {
|
||||
if (result.isFile) {
|
||||
const pathParts = result.path.split('/');
|
||||
const fileName = pathParts.pop() || '';
|
||||
setCurrentPath(pathParts);
|
||||
|
||||
setSelectedFile({
|
||||
id: result.path,
|
||||
name: fileName,
|
||||
});
|
||||
} else {
|
||||
setCurrentPath(result.path.split('/'));
|
||||
setSelectedFile(null);
|
||||
}
|
||||
setSearchQuery('');
|
||||
setSearchResults([]);
|
||||
};
|
||||
|
||||
const renderFileSearch = () => {
|
||||
return (
|
||||
<div className="relative w-52" ref={searchDropdownRef}>
|
||||
<input
|
||||
type="text"
|
||||
value={searchQuery}
|
||||
onChange={(e) => {
|
||||
setSearchQuery(e.target.value);
|
||||
if (directoryStructure) {
|
||||
setSearchResults(searchFiles(e.target.value, directoryStructure));
|
||||
}
|
||||
}}
|
||||
placeholder={t('settings.sources.searchFiles')}
|
||||
className={`h-[38px] w-full border border-[#D1D9E0] px-4 py-2 dark:border-[#6A6A6A] ${searchQuery ? 'rounded-t-[24px]' : 'rounded-[24px]'} bg-transparent focus:outline-none dark:text-[#E0E0E0]`}
|
||||
/>
|
||||
|
||||
{searchQuery && (
|
||||
<div className="absolute top-full right-0 left-0 z-10 max-h-[calc(100vh-200px)] w-full overflow-hidden rounded-b-[12px] border border-t-0 border-[#D1D9E0] bg-white shadow-lg transition-all duration-200 dark:border-[#6A6A6A] dark:bg-[#1F2023]">
|
||||
<div className="max-h-[calc(100vh-200px)] overflow-x-hidden overflow-y-auto overscroll-contain">
|
||||
{searchResults.length === 0 ? (
|
||||
<div className="py-2 text-center text-sm text-gray-500 dark:text-gray-400">
|
||||
{t('settings.sources.noResults')}
|
||||
</div>
|
||||
) : (
|
||||
searchResults.map((result, index) => (
|
||||
<div
|
||||
key={index}
|
||||
onClick={() => handleSearchSelect(result)}
|
||||
title={result.path}
|
||||
className={`flex min-w-0 cursor-pointer items-center px-3 py-2 hover:bg-[#ECEEEF] dark:hover:bg-[#27282D] ${
|
||||
index !== searchResults.length - 1
|
||||
? 'border-b border-[#D1D9E0] dark:border-[#6A6A6A]'
|
||||
: ''
|
||||
}`}
|
||||
>
|
||||
<img
|
||||
src={result.isFile ? FileIcon : FolderIcon}
|
||||
alt={
|
||||
result.isFile
|
||||
? t('settings.sources.fileAlt')
|
||||
: t('settings.sources.folderAlt')
|
||||
}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="flex-1 truncate text-sm dark:text-[#E0E0E0]">
|
||||
{result.path.split('/').pop() || result.path}
|
||||
</span>
|
||||
</div>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const handleFileSearch = (searchQuery: string) => {
|
||||
if (directoryStructure) {
|
||||
return searchFiles(searchQuery, directoryStructure);
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
const handleFileSelect = (path: string) => {
|
||||
const pathParts = path.split('/');
|
||||
const fileName = pathParts.pop() || '';
|
||||
setCurrentPath(pathParts);
|
||||
setSelectedFile({
|
||||
id: path,
|
||||
name: fileName,
|
||||
});
|
||||
};
|
||||
|
||||
const currentDirectory = getCurrentDirectory();
|
||||
|
||||
const navigateToPath = (index: number) => {
|
||||
setCurrentPath(currentPath.slice(0, index + 1));
|
||||
};
|
||||
|
||||
return (
|
||||
<div>
|
||||
{selectedFile ? (
|
||||
<div className="flex">
|
||||
<div className="flex-1">
|
||||
<Chunks
|
||||
documentId={docId}
|
||||
documentName={sourceName}
|
||||
handleGoBack={() => setSelectedFile(null)}
|
||||
path={selectedFile.id}
|
||||
onFileSearch={handleFileSearch}
|
||||
onFileSelect={handleFileSelect}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex w-full max-w-full flex-col overflow-hidden">
|
||||
<div className="mb-2">{renderPathNavigation()}</div>
|
||||
|
||||
<div className="w-full">
|
||||
<div className="overflow-x-auto rounded-[6px] border border-[#D1D9E0] dark:border-[#6A6A6A]">
|
||||
<table className="w-full min-w-[600px] table-auto bg-transparent">
|
||||
<thead className="bg-gray-100 dark:bg-[#27282D]">
|
||||
<tr className="border-b border-[#D1D9E0] dark:border-[#6A6A6A]">
|
||||
<th className="min-w-[200px] px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]">
|
||||
{t('settings.sources.fileName')}
|
||||
</th>
|
||||
<th className="min-w-[80px] px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]">
|
||||
{t('settings.sources.tokens')}
|
||||
</th>
|
||||
<th className="min-w-[80px] px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]">
|
||||
{t('settings.sources.size')}
|
||||
</th>
|
||||
<th className="w-10 px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]"></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>{renderFileTree(getCurrentDirectory())}</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default ConnectorTreeComponent;
|
||||
@@ -2,6 +2,7 @@ import React, { useState, useRef, useEffect } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useSelector } from 'react-redux';
|
||||
import { selectToken } from '../preferences/preferenceSlice';
|
||||
import { formatBytes } from '../utils/stringUtils';
|
||||
import Chunks from './Chunks';
|
||||
import ContextMenu, { MenuOption } from './ContextMenu';
|
||||
import userService from '../api/services/userService';
|
||||
@@ -10,9 +11,7 @@ import FolderIcon from '../assets/folder.svg';
|
||||
import ArrowLeft from '../assets/arrow-left.svg';
|
||||
import ThreeDots from '../assets/three-dots.svg';
|
||||
import EyeView from '../assets/eye-view.svg';
|
||||
import OutlineSource from '../assets/outline-source.svg';
|
||||
import Trash from '../assets/red-trash.svg';
|
||||
import SearchIcon from '../assets/search.svg';
|
||||
import { useOutsideAlerter } from '../hooks';
|
||||
import ConfirmationModal from '../modals/ConfirmationModal';
|
||||
|
||||
@@ -128,14 +127,6 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
}
|
||||
}, [docId, token]);
|
||||
|
||||
const formatBytes = (bytes: number): string => {
|
||||
if (bytes === 0) return '0 Bytes';
|
||||
const k = 1024;
|
||||
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
||||
};
|
||||
|
||||
const navigateToDirectory = (dirName: string) => {
|
||||
setCurrentPath((prev) => [...prev, dirName]);
|
||||
};
|
||||
@@ -443,18 +434,18 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
|
||||
const renderPathNavigation = () => {
|
||||
return (
|
||||
<div className="mb-0 min-h-[38px] flex flex-col gap-2 text-base sm:flex-row sm:items-center sm:justify-between">
|
||||
<div className="mb-0 flex min-h-[38px] flex-col gap-2 text-base sm:flex-row sm:items-center sm:justify-between">
|
||||
{/* Left side with path navigation */}
|
||||
<div className="flex w-full items-center sm:w-auto">
|
||||
<button
|
||||
className="mr-3 flex h-[29px] w-[29px] items-center justify-center rounded-full border p-2 text-sm text-gray-400 dark:border-0 dark:bg-[#28292D] dark:text-gray-500 dark:hover:bg-[#2E2F34] font-medium"
|
||||
className="mr-3 flex h-[29px] w-[29px] items-center justify-center rounded-full border p-2 text-sm font-medium text-gray-400 dark:border-0 dark:bg-[#28292D] dark:text-gray-500 dark:hover:bg-[#2E2F34]"
|
||||
onClick={handleBackNavigation}
|
||||
>
|
||||
<img src={ArrowLeft} alt="left-arrow" className="h-3 w-3" />
|
||||
</button>
|
||||
|
||||
<div className="flex flex-wrap items-center">
|
||||
<span className="text-[#7D54D1] font-semibold break-words">
|
||||
<span className="font-semibold break-words text-[#7D54D1]">
|
||||
{sourceName}
|
||||
</span>
|
||||
{currentPath.length > 0 && (
|
||||
@@ -485,8 +476,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex relative flex-row flex-nowrap items-center gap-2 w-full sm:w-auto justify-end mt-2 sm:mt-0">
|
||||
|
||||
<div className="relative mt-2 flex w-full flex-row flex-nowrap items-center justify-end gap-2 sm:mt-0 sm:w-auto">
|
||||
{processingRef.current && (
|
||||
<div className="text-sm text-gray-500">
|
||||
{currentOpRef.current === 'add'
|
||||
@@ -495,13 +485,13 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
</div>
|
||||
)}
|
||||
|
||||
{renderFileSearch()}
|
||||
{renderFileSearch()}
|
||||
|
||||
{/* Add file button */}
|
||||
{!processingRef.current && (
|
||||
<button
|
||||
onClick={handleAddFile}
|
||||
className="bg-purple-30 hover:bg-violets-are-blue flex h-[38px] min-w-[108px] items-center justify-center rounded-full px-4 text-[14px] whitespace-nowrap text-white font-medium"
|
||||
className="bg-purple-30 hover:bg-violets-are-blue flex h-[38px] min-w-[108px] items-center justify-center rounded-full px-4 text-[14px] font-medium whitespace-nowrap text-white"
|
||||
title={t('settings.sources.addFile')}
|
||||
>
|
||||
{t('settings.sources.addFile')}
|
||||
@@ -543,32 +533,32 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
const parentRow =
|
||||
currentPath.length > 0
|
||||
? [
|
||||
<tr
|
||||
key="parent-dir"
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={navigateUp}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex items-center">
|
||||
<img
|
||||
src={FolderIcon}
|
||||
alt={t('settings.sources.parentFolderAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
..
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4"></td>
|
||||
</tr>,
|
||||
]
|
||||
<tr
|
||||
key="parent-dir"
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={navigateUp}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex items-center">
|
||||
<img
|
||||
src={FolderIcon}
|
||||
alt={t('settings.sources.parentFolderAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
..
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4"></td>
|
||||
</tr>,
|
||||
]
|
||||
: [];
|
||||
|
||||
// Render directories first, then files
|
||||
@@ -609,7 +599,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
<div ref={menuRef} className="relative">
|
||||
<button
|
||||
onClick={(e) => handleMenuClick(e, itemId)}
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E] font-medium"
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
|
||||
aria-label={t('settings.sources.menuAlt')}
|
||||
>
|
||||
<img
|
||||
@@ -665,7 +655,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
<div ref={menuRef} className="relative">
|
||||
<button
|
||||
onClick={(e) => handleMenuClick(e, itemId)}
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E] font-medium"
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
|
||||
aria-label={t('settings.sources.menuAlt')}
|
||||
>
|
||||
<img
|
||||
@@ -757,14 +747,12 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
}
|
||||
}}
|
||||
placeholder={t('settings.sources.searchFiles')}
|
||||
className={`w-full h-[38px] border border-[#D1D9E0] px-4 py-2 dark:border-[#6A6A6A]
|
||||
${searchQuery ? 'rounded-t-[24px]' : 'rounded-[24px]'}
|
||||
bg-transparent focus:outline-none dark:text-[#E0E0E0]`}
|
||||
className={`h-[38px] w-full border border-[#D1D9E0] px-4 py-2 dark:border-[#6A6A6A] ${searchQuery ? 'rounded-t-[24px]' : 'rounded-[24px]'} bg-transparent focus:outline-none dark:text-[#E0E0E0]`}
|
||||
/>
|
||||
|
||||
{searchQuery && (
|
||||
<div className="absolute top-full left-0 right-0 z-10 max-h-[calc(100vh-200px)] w-full overflow-hidden rounded-b-[12px] border border-t-0 border-[#D1D9E0] bg-white shadow-lg dark:border-[#6A6A6A] dark:bg-[#1F2023] transition-all duration-200">
|
||||
<div className="max-h-[calc(100vh-200px)] overflow-y-auto overflow-x-hidden overscroll-contain">
|
||||
<div className="absolute top-full right-0 left-0 z-10 max-h-[calc(100vh-200px)] w-full overflow-hidden rounded-b-[12px] border border-t-0 border-[#D1D9E0] bg-white shadow-lg transition-all duration-200 dark:border-[#6A6A6A] dark:bg-[#1F2023]">
|
||||
<div className="max-h-[calc(100vh-200px)] overflow-x-hidden overflow-y-auto overscroll-contain">
|
||||
{searchResults.length === 0 ? (
|
||||
<div className="py-2 text-center text-sm text-gray-500 dark:text-gray-400">
|
||||
{t('settings.sources.noResults')}
|
||||
@@ -775,10 +763,11 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
key={index}
|
||||
onClick={() => handleSearchSelect(result)}
|
||||
title={result.path}
|
||||
className={`flex min-w-0 cursor-pointer items-center px-3 py-2 hover:bg-[#ECEEEF] dark:hover:bg-[#27282D] ${index !== searchResults.length - 1
|
||||
className={`flex min-w-0 cursor-pointer items-center px-3 py-2 hover:bg-[#ECEEEF] dark:hover:bg-[#27282D] ${
|
||||
index !== searchResults.length - 1
|
||||
? 'border-b border-[#D1D9E0] dark:border-[#6A6A6A]'
|
||||
: ''
|
||||
}`}
|
||||
}`}
|
||||
>
|
||||
<img
|
||||
src={result.isFile ? FileIcon : FolderIcon}
|
||||
@@ -789,7 +778,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="text-sm dark:text-[#E0E0E0] truncate flex-1">
|
||||
<span className="flex-1 truncate text-sm dark:text-[#E0E0E0]">
|
||||
{result.path.split('/').pop() || result.path}
|
||||
</span>
|
||||
</div>
|
||||
@@ -871,7 +860,9 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
message={
|
||||
itemToDelete?.isFile
|
||||
? t('settings.sources.confirmDelete')
|
||||
: t('settings.sources.deleteDirectoryWarning', { name: itemToDelete?.name })
|
||||
: t('settings.sources.deleteDirectoryWarning', {
|
||||
name: itemToDelete?.name,
|
||||
})
|
||||
}
|
||||
modalState={deleteModalState}
|
||||
setModalState={setDeleteModalState}
|
||||
|
||||
@@ -259,7 +259,7 @@ export default function MessageInput({
|
||||
return (
|
||||
<div className="mx-2 flex w-full flex-col">
|
||||
<div className="border-dark-gray bg-lotion dark:border-grey relative flex w-full flex-col rounded-[23px] border dark:bg-transparent">
|
||||
<div className="flex flex-wrap gap-1.5 px-4 pt-3 pb-0 sm:gap-2 sm:px-6">
|
||||
<div className="flex flex-wrap gap-1.5 px-2 py-2 sm:gap-2 sm:px-3">
|
||||
{attachments.map((attachment, index) => (
|
||||
<div
|
||||
key={index}
|
||||
@@ -353,14 +353,14 @@ export default function MessageInput({
|
||||
onChange={handleChange}
|
||||
tabIndex={1}
|
||||
placeholder={t('inputPlaceholder')}
|
||||
className="inputbox-style no-scrollbar bg-lotion dark:text-bright-gray dark:placeholder:text-bright-gray/50 w-full overflow-x-hidden overflow-y-auto rounded-t-[23px] px-4 py-3 text-base leading-tight whitespace-pre-wrap opacity-100 placeholder:text-gray-500 focus:outline-hidden sm:px-6 sm:py-5 dark:bg-transparent"
|
||||
className="inputbox-style no-scrollbar bg-lotion dark:text-bright-gray dark:placeholder:text-bright-gray/50 w-full overflow-x-hidden overflow-y-auto rounded-t-[23px] px-2 text-base leading-tight whitespace-pre-wrap opacity-100 placeholder:text-gray-500 focus:outline-hidden sm:px-3 dark:bg-transparent"
|
||||
onInput={handleInput}
|
||||
onKeyDown={handleKeyDown}
|
||||
aria-label={t('inputPlaceholder')}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center px-3 py-1.5 sm:px-4 sm:py-2">
|
||||
<div className="flex items-center px-2 pb-1.5 sm:px-3 sm:pb-2">
|
||||
<div className="flex grow flex-wrap gap-1 sm:gap-2">
|
||||
{showSourceButton && (
|
||||
<button
|
||||
@@ -368,8 +368,8 @@ export default function MessageInput({
|
||||
className="xs:px-3 xs:py-1.5 dark:border-purple-taupe flex max-w-[130px] items-center rounded-[32px] border border-[#AAAAAA] px-2 py-1 transition-colors hover:bg-gray-100 sm:max-w-[150px] dark:hover:bg-[#2C2E3C]"
|
||||
onClick={() => setIsSourcesPopupOpen(!isSourcesPopupOpen)}
|
||||
title={
|
||||
selectedDocs
|
||||
? selectedDocs.name
|
||||
selectedDocs && selectedDocs.length > 0
|
||||
? selectedDocs.map((doc) => doc.name).join(', ')
|
||||
: t('conversation.sources.title')
|
||||
}
|
||||
>
|
||||
@@ -379,8 +379,10 @@ export default function MessageInput({
|
||||
className="mr-1 h-3.5 w-3.5 shrink-0 sm:mr-1.5 sm:h-4"
|
||||
/>
|
||||
<span className="xs:text-[12px] dark:text-bright-gray truncate overflow-hidden text-[10px] font-medium text-[#5D5D5D] sm:text-[14px]">
|
||||
{selectedDocs
|
||||
? selectedDocs.name
|
||||
{selectedDocs && selectedDocs.length > 0
|
||||
? selectedDocs.length === 1
|
||||
? selectedDocs[0].name
|
||||
: `${selectedDocs.length} sources selected`
|
||||
: t('conversation.sources.title')}
|
||||
</span>
|
||||
{!isTouch && (
|
||||
@@ -428,18 +430,18 @@ export default function MessageInput({
|
||||
<button
|
||||
onClick={loading ? undefined : handleSubmit}
|
||||
aria-label={loading ? t('loading') : t('send')}
|
||||
className={`flex items-center justify-center rounded-full p-2 sm:p-2.5 ${loading ? 'bg-gray-300 dark:bg-gray-600' : 'bg-black dark:bg-white'} ml-auto shrink-0`}
|
||||
className={`flex h-7 w-7 items-center justify-center rounded-full sm:h-9 sm:w-9 ${loading || !value.trim() ? 'bg-black opacity-60 dark:bg-[#F0F3F4] dark:opacity-80' : 'bg-black opacity-100 dark:bg-[#F0F3F4]'} ml-auto shrink-0`}
|
||||
disabled={loading}
|
||||
>
|
||||
{loading ? (
|
||||
<img
|
||||
src={isDarkTheme ? SpinnerDark : Spinner}
|
||||
className="h-3.5 w-3.5 animate-spin sm:h-4 sm:w-4"
|
||||
className="mx-auto my-auto block h-3.5 w-3.5 animate-spin sm:h-4 sm:w-4"
|
||||
alt={t('loading')}
|
||||
/>
|
||||
) : (
|
||||
<img
|
||||
className={`h-3.5 w-3.5 sm:h-4 sm:w-4 ${isDarkTheme ? 'invert filter' : ''}`}
|
||||
className={`mx-auto my-auto block h-3.5 w-3.5 translate-x-[-0.9px] translate-y-[1.1px] sm:h-4 sm:w-4 ${isDarkTheme ? 'invert filter' : ''}`}
|
||||
src={PaperPlane}
|
||||
alt={t('send')}
|
||||
/>
|
||||
|
||||
@@ -248,7 +248,7 @@ export default function MultiSelectPopup({
|
||||
</div>
|
||||
<div className="shrink-0">
|
||||
<div
|
||||
className={`dark:bg-charleston-green-2 flex h-4 w-4 items-center justify-center rounded-xs border border-[#C6C6C6] bg-white dark:border-[#757783]`}
|
||||
className={`dark:bg-charleston-green-2 flex h-4 w-4 items-center justify-center rounded-xs border-2 border-[#C6C6C6] bg-white dark:border-[#757783]`}
|
||||
aria-hidden="true"
|
||||
>
|
||||
{isSelected && (
|
||||
|
||||
@@ -17,7 +17,7 @@ type SourcesPopupProps = {
|
||||
isOpen: boolean;
|
||||
onClose: () => void;
|
||||
anchorRef: React.RefObject<HTMLButtonElement | null>;
|
||||
handlePostDocumentSelect: (doc: Doc | null) => void;
|
||||
handlePostDocumentSelect: (doc: Doc[] | null) => void;
|
||||
setUploadModalState: React.Dispatch<React.SetStateAction<ActiveState>>;
|
||||
};
|
||||
|
||||
@@ -149,9 +149,13 @@ export default function SourcesPopup({
|
||||
if (option.model === embeddingsName) {
|
||||
const isSelected =
|
||||
selectedDocs &&
|
||||
(option.id
|
||||
? selectedDocs.id === option.id
|
||||
: selectedDocs.date === option.date);
|
||||
Array.isArray(selectedDocs) &&
|
||||
selectedDocs.length > 0 &&
|
||||
selectedDocs.some((doc) =>
|
||||
option.id
|
||||
? doc.id === option.id
|
||||
: doc.date === option.date,
|
||||
);
|
||||
|
||||
return (
|
||||
<div
|
||||
@@ -159,11 +163,29 @@ export default function SourcesPopup({
|
||||
className="border-opacity-80 dark:border-dim-gray flex cursor-pointer items-center border-b border-[#D9D9D9] p-3 transition-colors hover:bg-gray-100 dark:text-[14px] dark:hover:bg-[#2C2E3C]"
|
||||
onClick={() => {
|
||||
if (isSelected) {
|
||||
dispatch(setSelectedDocs(null));
|
||||
handlePostDocumentSelect(null);
|
||||
const updatedDocs =
|
||||
selectedDocs && Array.isArray(selectedDocs)
|
||||
? selectedDocs.filter((doc) =>
|
||||
option.id
|
||||
? doc.id !== option.id
|
||||
: doc.date !== option.date,
|
||||
)
|
||||
: [];
|
||||
dispatch(
|
||||
setSelectedDocs(
|
||||
updatedDocs.length > 0 ? updatedDocs : null,
|
||||
),
|
||||
);
|
||||
handlePostDocumentSelect(
|
||||
updatedDocs.length > 0 ? updatedDocs : null,
|
||||
);
|
||||
} else {
|
||||
dispatch(setSelectedDocs(option));
|
||||
handlePostDocumentSelect(option);
|
||||
const updatedDocs =
|
||||
selectedDocs && Array.isArray(selectedDocs)
|
||||
? [...selectedDocs, option]
|
||||
: [option];
|
||||
dispatch(setSelectedDocs(updatedDocs));
|
||||
handlePostDocumentSelect(updatedDocs);
|
||||
}
|
||||
}}
|
||||
>
|
||||
@@ -178,7 +200,7 @@ export default function SourcesPopup({
|
||||
{option.name}
|
||||
</span>
|
||||
<div
|
||||
className={`flex h-4 w-4 shrink-0 items-center justify-center border border-[#C6C6C6] p-[0.5px] dark:border-[#757783]`}
|
||||
className={`flex h-4 w-4 shrink-0 items-center justify-center rounded-xs border-2 border-[#C6C6C6] p-[0.5px] dark:border-[#757783]`}
|
||||
>
|
||||
{isSelected && (
|
||||
<img
|
||||
|
||||
@@ -46,7 +46,7 @@ const ToggleSwitch: React.FC<ToggleSwitchProps> = ({
|
||||
|
||||
return (
|
||||
<label
|
||||
className={`flex cursor-pointer select-none flex-row items-center ${
|
||||
className={`flex cursor-pointer flex-row items-center select-none ${
|
||||
labelPosition === 'right' ? 'flex-row-reverse' : ''
|
||||
} ${disabled ? 'cursor-not-allowed opacity-50' : ''} ${className}`}
|
||||
>
|
||||
@@ -75,7 +75,7 @@ const ToggleSwitch: React.FC<ToggleSwitchProps> = ({
|
||||
}`}
|
||||
></div>
|
||||
<div
|
||||
className={`absolute ${toggle} flex items-center justify-center rounded-full bg-white opacity-80 transition ${
|
||||
className={`absolute ${toggle} flex items-center justify-center rounded-full bg-white transition ${
|
||||
checked ? `${translate} bg-silver` : ''
|
||||
}`}
|
||||
></div>
|
||||
|
||||
@@ -207,7 +207,7 @@ export default function ToolsPopup({
|
||||
</div>
|
||||
<div className="flex shrink-0 items-center">
|
||||
<div
|
||||
className={`flex h-4 w-4 items-center justify-center border border-[#C6C6C6] p-[0.5px] dark:border-[#757783]`}
|
||||
className={`flex h-4 w-4 items-center justify-center rounded-xs border-2 border-[#C6C6C6] p-[0.5px] dark:border-[#757783]`}
|
||||
>
|
||||
{tool.status && (
|
||||
<img
|
||||
|
||||
@@ -7,7 +7,7 @@ export function handleFetchAnswer(
|
||||
question: string,
|
||||
signal: AbortSignal,
|
||||
token: string | null,
|
||||
selectedDocs: Doc | null,
|
||||
selectedDocs: Doc[] | null,
|
||||
conversationId: string | null,
|
||||
promptId: string | null,
|
||||
chunks: string,
|
||||
@@ -52,10 +52,17 @@ export function handleFetchAnswer(
|
||||
payload.attachments = attachments;
|
||||
}
|
||||
|
||||
if (selectedDocs && 'id' in selectedDocs) {
|
||||
payload.active_docs = selectedDocs.id as string;
|
||||
if (selectedDocs && Array.isArray(selectedDocs)) {
|
||||
if (selectedDocs.length > 1) {
|
||||
// Handle multiple documents
|
||||
payload.active_docs = selectedDocs.map((doc) => doc.id!);
|
||||
payload.retriever = selectedDocs[0]?.retriever as string;
|
||||
} else if (selectedDocs.length === 1 && 'id' in selectedDocs[0]) {
|
||||
// Handle single document (backward compatibility)
|
||||
payload.active_docs = selectedDocs[0].id as string;
|
||||
payload.retriever = selectedDocs[0].retriever as string;
|
||||
}
|
||||
}
|
||||
payload.retriever = selectedDocs?.retriever as string;
|
||||
return conversationService
|
||||
.answer(payload, token, signal)
|
||||
.then((response) => {
|
||||
@@ -84,7 +91,7 @@ export function handleFetchAnswerSteaming(
|
||||
question: string,
|
||||
signal: AbortSignal,
|
||||
token: string | null,
|
||||
selectedDocs: Doc | null,
|
||||
selectedDocs: Doc[] | null,
|
||||
conversationId: string | null,
|
||||
promptId: string | null,
|
||||
chunks: string,
|
||||
@@ -112,10 +119,17 @@ export function handleFetchAnswerSteaming(
|
||||
payload.attachments = attachments;
|
||||
}
|
||||
|
||||
if (selectedDocs && 'id' in selectedDocs) {
|
||||
payload.active_docs = selectedDocs.id as string;
|
||||
if (selectedDocs && Array.isArray(selectedDocs)) {
|
||||
if (selectedDocs.length > 1) {
|
||||
// Handle multiple documents
|
||||
payload.active_docs = selectedDocs.map((doc) => doc.id!);
|
||||
payload.retriever = selectedDocs[0]?.retriever as string;
|
||||
} else if (selectedDocs.length === 1 && 'id' in selectedDocs[0]) {
|
||||
// Handle single document (backward compatibility)
|
||||
payload.active_docs = selectedDocs[0].id as string;
|
||||
payload.retriever = selectedDocs[0].retriever as string;
|
||||
}
|
||||
}
|
||||
payload.retriever = selectedDocs?.retriever as string;
|
||||
|
||||
return new Promise<Answer>((resolve, reject) => {
|
||||
conversationService
|
||||
@@ -171,7 +185,7 @@ export function handleFetchAnswerSteaming(
|
||||
export function handleSearch(
|
||||
question: string,
|
||||
token: string | null,
|
||||
selectedDocs: Doc | null,
|
||||
selectedDocs: Doc[] | null,
|
||||
conversation_id: string | null,
|
||||
chunks: string,
|
||||
token_limit: number,
|
||||
@@ -183,9 +197,17 @@ export function handleSearch(
|
||||
token_limit: token_limit,
|
||||
isNoneDoc: selectedDocs === null,
|
||||
};
|
||||
if (selectedDocs && 'id' in selectedDocs)
|
||||
payload.active_docs = selectedDocs.id as string;
|
||||
payload.retriever = selectedDocs?.retriever as string;
|
||||
if (selectedDocs && Array.isArray(selectedDocs)) {
|
||||
if (selectedDocs.length > 1) {
|
||||
// Handle multiple documents
|
||||
payload.active_docs = selectedDocs.map((doc) => doc.id!);
|
||||
payload.retriever = selectedDocs[0]?.retriever as string;
|
||||
} else if (selectedDocs.length === 1 && 'id' in selectedDocs[0]) {
|
||||
// Handle single document (backward compatibility)
|
||||
payload.active_docs = selectedDocs[0].id as string;
|
||||
payload.retriever = selectedDocs[0].retriever as string;
|
||||
}
|
||||
}
|
||||
return conversationService
|
||||
.search(payload, token)
|
||||
.then((response) => response.json())
|
||||
|
||||
@@ -54,7 +54,7 @@ export interface Query {
|
||||
|
||||
export interface RetrievalPayload {
|
||||
question: string;
|
||||
active_docs?: string;
|
||||
active_docs?: string | string[];
|
||||
retriever?: string;
|
||||
conversation_id: string | null;
|
||||
prompt_id?: string | null;
|
||||
|
||||
@@ -18,11 +18,14 @@ export default function useDefaultDocument() {
|
||||
const fetchDocs = () => {
|
||||
getDocs(token).then((data) => {
|
||||
dispatch(setSourceDocs(data));
|
||||
if (!selectedDoc)
|
||||
if (
|
||||
!selectedDoc ||
|
||||
(Array.isArray(selectedDoc) && selectedDoc.length === 0)
|
||||
)
|
||||
Array.isArray(data) &&
|
||||
data?.forEach((doc: Doc) => {
|
||||
if (doc.model && doc.name === 'default') {
|
||||
dispatch(setSelectedDocs(doc));
|
||||
dispatch(setSelectedDocs([doc]));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "Pre-loaded",
|
||||
"private": "Private",
|
||||
"sync": "Sync",
|
||||
"syncing": "Syncing...",
|
||||
"syncFrequency": {
|
||||
"never": "Never",
|
||||
"daily": "Daily",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "Precargado",
|
||||
"private": "Privado",
|
||||
"sync": "Sincronizar",
|
||||
"syncing": "Sincronizando...",
|
||||
"syncFrequency": {
|
||||
"never": "Nunca",
|
||||
"daily": "Diario",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "プリロード済み",
|
||||
"private": "プライベート",
|
||||
"sync": "同期",
|
||||
"syncing": "同期中...",
|
||||
"syncFrequency": {
|
||||
"never": "なし",
|
||||
"daily": "毎日",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "Предзагруженный",
|
||||
"private": "Частный",
|
||||
"sync": "Синхронизация",
|
||||
"syncing": "Синхронизация...",
|
||||
"syncFrequency": {
|
||||
"never": "Никогда",
|
||||
"daily": "Ежедневно",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "預載入",
|
||||
"private": "私人",
|
||||
"sync": "同步",
|
||||
"syncing": "同步中...",
|
||||
"syncFrequency": {
|
||||
"never": "從不",
|
||||
"daily": "每天",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "预加载",
|
||||
"private": "私有",
|
||||
"sync": "同步",
|
||||
"syncing": "同步中...",
|
||||
"syncFrequency": {
|
||||
"never": "从不",
|
||||
"daily": "每天",
|
||||
|
||||
@@ -60,7 +60,7 @@ export const ShareConversationModal = ({
|
||||
const [sourcePath, setSourcePath] = useState<{
|
||||
label: string;
|
||||
value: string;
|
||||
} | null>(preSelectedDoc ? extractDocPaths([preSelectedDoc])[0] : null);
|
||||
} | null>(preSelectedDoc ? extractDocPaths(preSelectedDoc)[0] : null);
|
||||
|
||||
const handleCopyKey = (url: string) => {
|
||||
navigator.clipboard.writeText(url);
|
||||
@@ -105,14 +105,14 @@ export const ShareConversationModal = ({
|
||||
return (
|
||||
<WrapperModal close={close}>
|
||||
<div className="flex flex-col gap-2">
|
||||
<h2 className="text-xl font-medium text-eerie-black dark:text-chinese-white">
|
||||
<h2 className="text-eerie-black dark:text-chinese-white text-xl font-medium">
|
||||
{t('modals.shareConv.label')}
|
||||
</h2>
|
||||
<p className="text-sm text-eerie-black dark:text-silver/60">
|
||||
<p className="text-eerie-black dark:text-silver/60 text-sm">
|
||||
{t('modals.shareConv.note')}
|
||||
</p>
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-lg text-eerie-black dark:text-white">
|
||||
<span className="text-eerie-black text-lg dark:text-white">
|
||||
{t('modals.shareConv.option')}
|
||||
</span>
|
||||
<ToggleSwitch
|
||||
@@ -136,19 +136,19 @@ export const ShareConversationModal = ({
|
||||
</div>
|
||||
)}
|
||||
<div className="flex items-baseline justify-between gap-2">
|
||||
<span className="no-scrollbar w-full overflow-x-auto whitespace-nowrap rounded-full border-2 border-silver px-4 py-3 text-eerie-black dark:border-silver/40 dark:text-white">
|
||||
<span className="no-scrollbar border-silver text-eerie-black dark:border-silver/40 w-full overflow-x-auto rounded-full border-2 px-4 py-3 whitespace-nowrap dark:text-white">
|
||||
{`${domain}/share/${identifier ?? '....'}`}
|
||||
</span>
|
||||
{status === 'fetched' ? (
|
||||
<button
|
||||
className="my-1 h-10 w-28 rounded-full bg-purple-30 p-2 text-sm text-white hover:bg-violets-are-blue"
|
||||
className="bg-purple-30 hover:bg-violets-are-blue my-1 h-10 w-28 rounded-full p-2 text-sm text-white"
|
||||
onClick={() => handleCopyKey(`${domain}/share/${identifier}`)}
|
||||
>
|
||||
{isCopied ? t('modals.saveKey.copied') : t('modals.saveKey.copy')}
|
||||
</button>
|
||||
) : (
|
||||
<button
|
||||
className="my-1 flex h-10 w-28 items-center justify-evenly rounded-full bg-purple-30 p-2 text-center text-sm font-normal text-white hover:bg-violets-are-blue"
|
||||
className="bg-purple-30 hover:bg-violets-are-blue my-1 flex h-10 w-28 items-center justify-evenly rounded-full p-2 text-center text-sm font-normal text-white"
|
||||
onClick={() => {
|
||||
shareCoversationPublicly(allowPrompt);
|
||||
}}
|
||||
|
||||
@@ -42,10 +42,10 @@ export default function WrapperModal({
|
||||
}, [close, isPerformingTask]);
|
||||
|
||||
const modalContent = (
|
||||
<div className="bg-gray-alpha bg-opacity-50 fixed top-0 left-0 z-30 flex h-screen w-screen items-center justify-center">
|
||||
<div className="fixed top-0 left-0 z-30 flex h-screen w-screen items-center justify-center">
|
||||
<div
|
||||
ref={modalRef}
|
||||
className={`relative w-11/12 rounded-2xl bg-white p-8 sm:w-[512px] dark:bg-[#26272E] ${className}`}
|
||||
className={`relative w-11/12 rounded-2xl bg-white p-8 shadow-2xl sm:w-[512px] dark:bg-[#26272E] ${className}`}
|
||||
>
|
||||
{!isPerformingTask && (
|
||||
<button
|
||||
|
||||
@@ -90,9 +90,9 @@ export function getLocalApiKey(): string | null {
|
||||
return key;
|
||||
}
|
||||
|
||||
export function getLocalRecentDocs(): string | null {
|
||||
const doc = localStorage.getItem('DocsGPTRecentDocs');
|
||||
return doc;
|
||||
export function getLocalRecentDocs(): Doc[] | null {
|
||||
const docs = localStorage.getItem('DocsGPTRecentDocs');
|
||||
return docs ? (JSON.parse(docs) as Doc[]) : null;
|
||||
}
|
||||
|
||||
export function getLocalPrompt(): string | null {
|
||||
@@ -108,19 +108,20 @@ export function setLocalPrompt(prompt: string): void {
|
||||
localStorage.setItem('DocsGPTPrompt', prompt);
|
||||
}
|
||||
|
||||
export function setLocalRecentDocs(doc: Doc | null): void {
|
||||
localStorage.setItem('DocsGPTRecentDocs', JSON.stringify(doc));
|
||||
export function setLocalRecentDocs(docs: Doc[] | null): void {
|
||||
if (docs && docs.length > 0) {
|
||||
localStorage.setItem('DocsGPTRecentDocs', JSON.stringify(docs));
|
||||
|
||||
let docPath = 'default';
|
||||
if (doc?.type === 'local') {
|
||||
docPath = 'local' + '/' + doc.name + '/';
|
||||
docs.forEach((doc) => {
|
||||
let docPath = 'default';
|
||||
if (doc.type === 'local') {
|
||||
docPath = 'local' + '/' + doc.name + '/';
|
||||
}
|
||||
userService
|
||||
.checkDocs({ docs: docPath }, null)
|
||||
.then((response) => response.json());
|
||||
});
|
||||
} else {
|
||||
localStorage.removeItem('DocsGPTRecentDocs');
|
||||
}
|
||||
userService
|
||||
.checkDocs(
|
||||
{
|
||||
docs: docPath,
|
||||
},
|
||||
null,
|
||||
)
|
||||
.then((response) => response.json());
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ export interface Preference {
|
||||
prompt: { name: string; id: string; type: string };
|
||||
chunks: string;
|
||||
token_limit: number;
|
||||
selectedDocs: Doc | null;
|
||||
selectedDocs: Doc[] | null;
|
||||
sourceDocs: Doc[] | null;
|
||||
conversations: {
|
||||
data: { name: string; id: string }[] | null;
|
||||
@@ -34,15 +34,16 @@ const initialState: Preference = {
|
||||
prompt: { name: 'default', id: 'default', type: 'public' },
|
||||
chunks: '2',
|
||||
token_limit: 2000,
|
||||
selectedDocs: {
|
||||
id: 'default',
|
||||
name: 'default',
|
||||
type: 'remote',
|
||||
date: 'default',
|
||||
docLink: 'default',
|
||||
model: 'openai_text-embedding-ada-002',
|
||||
retriever: 'classic',
|
||||
} as Doc,
|
||||
selectedDocs: [
|
||||
{
|
||||
id: 'default',
|
||||
name: 'default',
|
||||
type: 'remote',
|
||||
date: 'default',
|
||||
model: 'openai_text-embedding-ada-002',
|
||||
retriever: 'classic',
|
||||
},
|
||||
] as Doc[],
|
||||
sourceDocs: null,
|
||||
conversations: {
|
||||
data: null,
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
import React, { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
@@ -29,6 +28,7 @@ import {
|
||||
import Upload from '../upload/Upload';
|
||||
import { formatDate } from '../utils/dateTimeUtils';
|
||||
import FileTreeComponent from '../components/FileTreeComponent';
|
||||
import ConnectorTreeComponent from '../components/ConnectorTreeComponent';
|
||||
import Chunks from '../components/Chunks';
|
||||
|
||||
const formatTokens = (tokens: number): string => {
|
||||
@@ -272,11 +272,19 @@ export default function Sources({
|
||||
return documentToView ? (
|
||||
<div className="mt-8 flex flex-col">
|
||||
{documentToView.isNested ? (
|
||||
<FileTreeComponent
|
||||
docId={documentToView.id || ''}
|
||||
sourceName={documentToView.name}
|
||||
onBackToDocuments={() => setDocumentToView(undefined)}
|
||||
/>
|
||||
documentToView.type === 'connector' ? (
|
||||
<ConnectorTreeComponent
|
||||
docId={documentToView.id || ''}
|
||||
sourceName={documentToView.name}
|
||||
onBackToDocuments={() => setDocumentToView(undefined)}
|
||||
/>
|
||||
) : (
|
||||
<FileTreeComponent
|
||||
docId={documentToView.id || ''}
|
||||
sourceName={documentToView.name}
|
||||
onBackToDocuments={() => setDocumentToView(undefined)}
|
||||
/>
|
||||
)
|
||||
) : (
|
||||
<Chunks
|
||||
documentId={documentToView.id || ''}
|
||||
@@ -310,7 +318,7 @@ export default function Sources({
|
||||
setSearchTerm(e.target.value);
|
||||
setCurrentPage(1);
|
||||
}}
|
||||
className="w-full h-[32px] rounded-full border border-silver dark:border-silver/40 bg-transparent px-3 text-sm text-jet dark:text-bright-gray placeholder:text-gray-400 dark:placeholder:text-gray-500 outline-none focus:border-silver dark:focus:border-silver/60"
|
||||
className="border-silver dark:border-silver/40 text-jet dark:text-bright-gray focus:border-silver dark:focus:border-silver/60 h-[32px] w-full rounded-full border bg-transparent px-3 text-sm outline-none placeholder:text-gray-400 dark:placeholder:text-gray-500"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
@@ -327,7 +335,7 @@ export default function Sources({
|
||||
</div>
|
||||
<div className="relative w-full">
|
||||
{loading ? (
|
||||
<div className="w-full grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-6 px-2 py-4">
|
||||
<div className="grid w-full grid-cols-1 gap-6 px-2 py-4 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4">
|
||||
<SkeletonLoader component="sourceCards" count={rowsPerPage} />
|
||||
</div>
|
||||
) : !currentDocuments?.length ? (
|
||||
@@ -342,19 +350,19 @@ export default function Sources({
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="w-full grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-6 px-2 py-4">
|
||||
{currentDocuments.map((document, index) => {
|
||||
const docId = document.id ? document.id.toString() : '';
|
||||
<div className="grid w-full grid-cols-1 gap-6 px-2 py-4 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4">
|
||||
{currentDocuments.map((document, index) => {
|
||||
const docId = document.id ? document.id.toString() : '';
|
||||
|
||||
return (
|
||||
<div key={docId} className="relative">
|
||||
<div
|
||||
className={`flex h-[130px] w-full flex-col rounded-2xl bg-[#F9F9F9] p-3 transition-all duration-200 dark:bg-[#383838] ${
|
||||
activeMenuId === docId || syncMenuState.docId === docId
|
||||
? 'scale-[1.05]'
|
||||
: 'hover:scale-[1.05]'
|
||||
}`}
|
||||
>
|
||||
return (
|
||||
<div key={docId} className="relative">
|
||||
<div
|
||||
className={`flex h-[130px] w-full flex-col rounded-2xl bg-[#F9F9F9] p-3 transition-all duration-200 dark:bg-[#383838] ${
|
||||
activeMenuId === docId || syncMenuState.docId === docId
|
||||
? 'scale-[1.05]'
|
||||
: 'hover:scale-[1.05]'
|
||||
}`}
|
||||
>
|
||||
<div className="w-full flex-1">
|
||||
<div className="flex w-full items-center justify-between gap-2">
|
||||
<h3
|
||||
@@ -418,7 +426,7 @@ export default function Sources({
|
||||
<img
|
||||
src={CalendarIcon}
|
||||
alt=""
|
||||
className="w-[14px] h-[14px]"
|
||||
className="h-[14px] w-[14px]"
|
||||
/>
|
||||
<span className="font-inter text-[12px] leading-[18px] font-[500] text-[#848484] dark:text-[#848484]">
|
||||
{document.date ? formatDate(document.date) : ''}
|
||||
@@ -428,7 +436,7 @@ export default function Sources({
|
||||
<img
|
||||
src={DiscIcon}
|
||||
alt=""
|
||||
className="w-[14px] h-[14px]"
|
||||
className="h-[14px] w-[14px]"
|
||||
/>
|
||||
<span className="font-inter text-[12px] leading-[18px] font-[500] text-[#848484] dark:text-[#848484]">
|
||||
{document.tokens
|
||||
|
||||
@@ -4,6 +4,13 @@ import { useTranslation } from 'react-i18next';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
|
||||
import userService from '../api/services/userService';
|
||||
import {
|
||||
getSessionToken,
|
||||
setSessionToken,
|
||||
removeSessionToken,
|
||||
} from '../utils/providerUtils';
|
||||
import { formatDate } from '../utils/dateTimeUtils';
|
||||
import { formatBytes } from '../utils/stringUtils';
|
||||
import FileUpload from '../assets/file_upload.svg';
|
||||
import WebsiteCollect from '../assets/website_collect.svg';
|
||||
import Dropdown from '../components/Dropdown';
|
||||
@@ -25,6 +32,9 @@ import {
|
||||
IngestorFormSchemas,
|
||||
IngestorType,
|
||||
} from './types/ingestor';
|
||||
import FileIcon from '../assets/file.svg';
|
||||
import FolderIcon from '../assets/folder.svg';
|
||||
import ConnectorAuth from '../components/ConnectorAuth';
|
||||
|
||||
function Upload({
|
||||
receivedFile = [],
|
||||
@@ -48,6 +58,23 @@ function Upload({
|
||||
const [activeTab, setActiveTab] = useState<string | null>(renderTab);
|
||||
const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
|
||||
|
||||
// Google Drive state
|
||||
const [isGoogleDriveConnected, setIsGoogleDriveConnected] = useState(false);
|
||||
const [googleDriveFiles, setGoogleDriveFiles] = useState<any[]>([]);
|
||||
const [selectedFiles, setSelectedFiles] = useState<string[]>([]);
|
||||
const [isLoadingFiles, setIsLoadingFiles] = useState(false);
|
||||
const [isAuthenticating, setIsAuthenticating] = useState(false);
|
||||
const [userEmail, setUserEmail] = useState<string>('');
|
||||
const [authError, setAuthError] = useState<string>('');
|
||||
const [currentFolderId, setCurrentFolderId] = useState<string | null>(null);
|
||||
const [folderPath, setFolderPath] = useState<
|
||||
Array<{ id: string | null; name: string }>
|
||||
>([{ id: null, name: 'My Drive' }]);
|
||||
|
||||
const [nextPageToken, setNextPageToken] = useState<string | null>(null);
|
||||
const [hasMoreFiles, setHasMoreFiles] = useState<boolean>(false);
|
||||
const scrollContainerRef = useRef<HTMLDivElement | null>(null);
|
||||
|
||||
const renderFormFields = () => {
|
||||
const schema = IngestorFormSchemas[ingestor.type];
|
||||
if (!schema) return null;
|
||||
@@ -204,6 +231,7 @@ function Upload({
|
||||
{ label: 'Link', value: 'url' },
|
||||
{ label: 'GitHub', value: 'github' },
|
||||
{ label: 'Reddit', value: 'reddit' },
|
||||
{ label: 'Google Drive', value: 'google_drive' },
|
||||
];
|
||||
|
||||
const sourceDocs = useSelector(selectSourceDocs);
|
||||
@@ -428,29 +456,40 @@ function Upload({
|
||||
formData.append('user', 'local');
|
||||
formData.append('source', ingestor.type);
|
||||
|
||||
const defaultConfig = IngestorDefaultConfigs[ingestor.type].config;
|
||||
let configData;
|
||||
|
||||
const mergedConfig = { ...defaultConfig, ...ingestor.config };
|
||||
const filteredConfig = Object.entries(mergedConfig).reduce(
|
||||
(acc, [key, value]) => {
|
||||
const field = IngestorFormSchemas[ingestor.type].find(
|
||||
(f) => f.name === key,
|
||||
);
|
||||
// Include the field if:
|
||||
// 1. It's required, or
|
||||
// 2. It's optional and has a non-empty value
|
||||
if (
|
||||
field?.required ||
|
||||
(value !== undefined && value !== null && value !== '')
|
||||
) {
|
||||
acc[key] = value;
|
||||
}
|
||||
return acc;
|
||||
},
|
||||
{} as Record<string, any>,
|
||||
);
|
||||
if (ingestor.type === 'google_drive') {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
|
||||
formData.append('data', JSON.stringify(filteredConfig));
|
||||
const selectedItems = googleDriveFiles.filter((file) =>
|
||||
selectedFiles.includes(file.id),
|
||||
);
|
||||
const selectedFolderIds = selectedItems
|
||||
.filter(
|
||||
(item) =>
|
||||
item.type === 'application/vnd.google-apps.folder' || item.isFolder,
|
||||
)
|
||||
.map((folder) => folder.id);
|
||||
|
||||
const selectedFileIds = selectedItems
|
||||
.filter(
|
||||
(item) =>
|
||||
item.type !== 'application/vnd.google-apps.folder' &&
|
||||
!item.isFolder,
|
||||
)
|
||||
.map((file) => file.id);
|
||||
|
||||
configData = {
|
||||
file_ids: selectedFileIds,
|
||||
folder_ids: selectedFolderIds,
|
||||
recursive: ingestor.config.recursive,
|
||||
session_token: sessionToken || null,
|
||||
};
|
||||
} else {
|
||||
configData = { ...ingestor.config };
|
||||
}
|
||||
|
||||
formData.append('data', JSON.stringify(configData));
|
||||
|
||||
const apiHost: string = import.meta.env.VITE_API_HOST;
|
||||
const xhr = new XMLHttpRequest();
|
||||
@@ -477,6 +516,181 @@ function Upload({
|
||||
xhr.setRequestHeader('Authorization', `Bearer ${token}`);
|
||||
xhr.send(formData);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (ingestor.type === 'google_drive') {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
|
||||
if (sessionToken) {
|
||||
// Auto-authenticate if session token exists
|
||||
setIsGoogleDriveConnected(true);
|
||||
setAuthError('');
|
||||
|
||||
// Fetch user email and files using the existing session token
|
||||
|
||||
fetchUserEmailAndLoadFiles(sessionToken);
|
||||
}
|
||||
}
|
||||
}, [ingestor.type]);
|
||||
|
||||
const fetchUserEmailAndLoadFiles = async (sessionToken: string) => {
|
||||
try {
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
|
||||
const validateResponse = await fetch(
|
||||
`${apiHost}/api/connectors/validate-session`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
provider: 'google_drive',
|
||||
session_token: sessionToken,
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
if (!validateResponse.ok) {
|
||||
removeSessionToken(ingestor.type);
|
||||
setIsGoogleDriveConnected(false);
|
||||
setAuthError('Session expired. Please reconnect to Google Drive.');
|
||||
return;
|
||||
}
|
||||
|
||||
const validateData = await validateResponse.json();
|
||||
|
||||
if (validateData.success) {
|
||||
setUserEmail(validateData.user_email || 'Connected User');
|
||||
// reset pagination state and files
|
||||
setGoogleDriveFiles([]);
|
||||
|
||||
setNextPageToken(null);
|
||||
setHasMoreFiles(false);
|
||||
loadGoogleDriveFiles(sessionToken, null, null, false);
|
||||
} else {
|
||||
removeSessionToken(ingestor.type);
|
||||
setIsGoogleDriveConnected(false);
|
||||
setAuthError(
|
||||
validateData.error ||
|
||||
'Session expired. Please reconnect your Google Drive account and make sure to grant offline access.',
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error validating Google Drive session:', error);
|
||||
setAuthError('Failed to validate session. Please reconnect.');
|
||||
setIsGoogleDriveConnected(false);
|
||||
}
|
||||
};
|
||||
|
||||
const loadGoogleDriveFiles = async (
|
||||
sessionToken: string,
|
||||
folderId?: string | null,
|
||||
pageToken?: string | null,
|
||||
append = false,
|
||||
) => {
|
||||
setIsLoadingFiles(true);
|
||||
|
||||
try {
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
const requestBody: any = {
|
||||
session_token: sessionToken,
|
||||
limit: 10,
|
||||
};
|
||||
if (folderId) {
|
||||
requestBody.folder_id = folderId;
|
||||
}
|
||||
if (pageToken) {
|
||||
requestBody.page_token = pageToken;
|
||||
}
|
||||
|
||||
const filesResponse = await fetch(`${apiHost}/api/connectors/files`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify({ ...requestBody, provider: 'google_drive' }),
|
||||
});
|
||||
|
||||
if (!filesResponse.ok) {
|
||||
throw new Error(`Failed to load files: ${filesResponse.status}`);
|
||||
}
|
||||
|
||||
const filesData = await filesResponse.json();
|
||||
|
||||
if (filesData.success && Array.isArray(filesData.files)) {
|
||||
setGoogleDriveFiles((prev) =>
|
||||
append ? [...prev, ...filesData.files] : filesData.files,
|
||||
);
|
||||
setNextPageToken(filesData.next_page_token || null);
|
||||
setHasMoreFiles(Boolean(filesData.has_more));
|
||||
} else {
|
||||
throw new Error(filesData.error || 'Failed to load files');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error loading Google Drive files:', error);
|
||||
setAuthError(
|
||||
error instanceof Error
|
||||
? error.message
|
||||
: 'Failed to load files. Please make sure your Google Drive account is properly connected and you granted offline access during authorization.',
|
||||
);
|
||||
} finally {
|
||||
setIsLoadingFiles(false);
|
||||
}
|
||||
};
|
||||
|
||||
// Handle file selection
|
||||
const handleFileSelect = (fileId: string) => {
|
||||
setSelectedFiles((prev) => {
|
||||
if (prev.includes(fileId)) {
|
||||
return prev.filter((id) => id !== fileId);
|
||||
} else {
|
||||
return [...prev, fileId];
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const handleFolderClick = (folderId: string, folderName: string) => {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
if (sessionToken) {
|
||||
setCurrentFolderId(folderId);
|
||||
setFolderPath((prev) => [...prev, { id: folderId, name: folderName }]);
|
||||
|
||||
setGoogleDriveFiles([]);
|
||||
setNextPageToken(null);
|
||||
setHasMoreFiles(false);
|
||||
setSelectedFiles([]);
|
||||
loadGoogleDriveFiles(sessionToken, folderId, null, false);
|
||||
}
|
||||
};
|
||||
|
||||
const navigateBack = (index: number) => {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
if (sessionToken) {
|
||||
const newPath = folderPath.slice(0, index + 1);
|
||||
const targetFolderId = newPath[newPath.length - 1]?.id;
|
||||
|
||||
setCurrentFolderId(targetFolderId as string | null);
|
||||
setFolderPath(newPath);
|
||||
|
||||
setGoogleDriveFiles([]);
|
||||
setNextPageToken(null);
|
||||
setHasMoreFiles(false);
|
||||
setSelectedFiles([]);
|
||||
loadGoogleDriveFiles(sessionToken, targetFolderId ?? null, null, false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleSelectAll = () => {
|
||||
if (selectedFiles.length === googleDriveFiles.length) {
|
||||
setSelectedFiles([]);
|
||||
} else {
|
||||
setSelectedFiles(googleDriveFiles.map((file) => file.id));
|
||||
}
|
||||
};
|
||||
|
||||
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
||||
onDrop,
|
||||
multiple: true,
|
||||
@@ -515,6 +729,10 @@ function Upload({
|
||||
if (!remoteName?.trim()) {
|
||||
return true;
|
||||
}
|
||||
if (ingestor.type === 'google_drive') {
|
||||
return !isGoogleDriveConnected || selectedFiles.length === 0;
|
||||
}
|
||||
|
||||
const formFields: FormField[] = IngestorFormSchemas[ingestor.type];
|
||||
for (const field of formFields) {
|
||||
if (field.required) {
|
||||
@@ -636,7 +854,7 @@ function Upload({
|
||||
{files.map((file) => (
|
||||
<p
|
||||
key={file.name}
|
||||
className="text-gray-6000 truncate overflow-hidden text-ellipsis"
|
||||
className="text-gray-6000 truncate overflow-hidden text-ellipsis dark:text-[#ececf1]"
|
||||
title={file.name}
|
||||
>
|
||||
{file.name}
|
||||
@@ -679,6 +897,253 @@ function Upload({
|
||||
required={true}
|
||||
labelBgClassName="bg-white dark:bg-charleston-green-2"
|
||||
/>
|
||||
{ingestor.type === 'google_drive' && (
|
||||
<div className="space-y-4">
|
||||
{authError && (
|
||||
<div className="rounded-lg border border-red-200 bg-red-50 p-3 dark:border-red-600 dark:bg-red-900/20">
|
||||
<p className="text-sm text-red-600 dark:text-red-400">
|
||||
⚠️ {authError}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!isGoogleDriveConnected ? (
|
||||
<ConnectorAuth
|
||||
provider="google_drive"
|
||||
onSuccess={(data) => {
|
||||
setUserEmail(data.user_email);
|
||||
setIsGoogleDriveConnected(true);
|
||||
setIsAuthenticating(false);
|
||||
setAuthError('');
|
||||
|
||||
if (data.session_token) {
|
||||
setSessionToken(ingestor.type, data.session_token);
|
||||
loadGoogleDriveFiles(data.session_token, null);
|
||||
}
|
||||
}}
|
||||
onError={(error) => {
|
||||
setAuthError(error);
|
||||
setIsAuthenticating(false);
|
||||
setIsGoogleDriveConnected(false);
|
||||
}}
|
||||
/>
|
||||
) : (
|
||||
<div className="space-y-4">
|
||||
{/* Connection Status */}
|
||||
<div className="flex w-full items-center justify-between rounded-lg bg-green-500 px-4 py-2 text-sm text-white">
|
||||
<div className="flex items-center gap-2">
|
||||
<svg className="h-4 w-4" viewBox="0 0 24 24">
|
||||
<path
|
||||
fill="currentColor"
|
||||
d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41z"
|
||||
/>
|
||||
</svg>
|
||||
<span>Connected as {userEmail}</span>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => {
|
||||
removeSessionToken(ingestor.type);
|
||||
|
||||
setIsGoogleDriveConnected(false);
|
||||
setGoogleDriveFiles([]);
|
||||
setSelectedFiles([]);
|
||||
setUserEmail('');
|
||||
setAuthError('');
|
||||
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
fetch(`${apiHost}/api/connectors/disconnect`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
provider: ingestor.type,
|
||||
session_token: getSessionToken(ingestor.type),
|
||||
}),
|
||||
}).catch((err) =>
|
||||
console.error(
|
||||
'Error disconnecting from Google Drive:',
|
||||
err,
|
||||
),
|
||||
);
|
||||
}}
|
||||
className="text-xs text-white underline hover:text-gray-200"
|
||||
>
|
||||
Disconnect
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* File Browser */}
|
||||
<div className="rounded-lg border border-gray-200 dark:border-gray-600">
|
||||
<div className="rounded-t-lg border-b border-gray-200 bg-gray-50 p-3 dark:border-gray-600 dark:bg-gray-800">
|
||||
{/* Breadcrumb navigation */}
|
||||
<div className="mb-2 flex items-center gap-1">
|
||||
{folderPath.map((path, index) => (
|
||||
<div
|
||||
key={path.id || 'root'}
|
||||
className="flex items-center gap-1"
|
||||
>
|
||||
{index > 0 && (
|
||||
<span className="text-gray-400">/</span>
|
||||
)}
|
||||
<button
|
||||
onClick={() => navigateBack(index)}
|
||||
className="text-sm text-blue-600 hover:text-blue-800 hover:underline dark:text-blue-400"
|
||||
disabled={index === folderPath.length - 1}
|
||||
>
|
||||
{path.name}
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-between">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Select Files from Google Drive
|
||||
</h4>
|
||||
{googleDriveFiles.length > 0 && (
|
||||
<button
|
||||
onClick={handleSelectAll}
|
||||
className="text-xs text-blue-600 hover:text-blue-800 dark:text-blue-400"
|
||||
>
|
||||
{selectedFiles.length === googleDriveFiles.length
|
||||
? 'Deselect All'
|
||||
: 'Select All'}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{selectedFiles.length > 0 && (
|
||||
<p className="mt-1 text-xs text-gray-500">
|
||||
{selectedFiles.length} file
|
||||
{selectedFiles.length !== 1 ? 's' : ''} selected
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div
|
||||
className="max-h-72 overflow-y-auto"
|
||||
ref={scrollContainerRef}
|
||||
>
|
||||
{isLoadingFiles && googleDriveFiles.length === 0 ? (
|
||||
<div className="p-4 text-center">
|
||||
<div className="inline-flex items-center gap-2 text-sm text-gray-600 dark:text-gray-400">
|
||||
<div className="h-4 w-4 animate-spin rounded-full border-2 border-blue-500 border-t-transparent"></div>
|
||||
Loading files...
|
||||
</div>
|
||||
</div>
|
||||
) : googleDriveFiles.length === 0 ? (
|
||||
<div className="p-4 text-center text-sm text-gray-500 dark:text-gray-400">
|
||||
No files found in your Google Drive
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
<div className="divide-y divide-gray-200 dark:divide-gray-600">
|
||||
{googleDriveFiles.map((file) => (
|
||||
<div
|
||||
key={file.id}
|
||||
className={`p-3 transition-colors ${
|
||||
selectedFiles.includes(file.id)
|
||||
? 'bg-blue-50 dark:bg-blue-900/20'
|
||||
: ''
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="flex-shrink-0">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={selectedFiles.includes(
|
||||
file.id,
|
||||
)}
|
||||
onChange={() =>
|
||||
handleFileSelect(file.id)
|
||||
}
|
||||
className="h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500"
|
||||
/>
|
||||
</div>
|
||||
{file.type ===
|
||||
'application/vnd.google-apps.folder' ||
|
||||
file.isFolder ? (
|
||||
<div
|
||||
className="cursor-pointer text-lg hover:text-blue-600"
|
||||
onClick={() =>
|
||||
handleFolderClick(file.id, file.name)
|
||||
}
|
||||
>
|
||||
<img
|
||||
src={FolderIcon}
|
||||
alt="Folder"
|
||||
className="h-6 w-6"
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-lg">
|
||||
<img
|
||||
src={FileIcon}
|
||||
alt="File"
|
||||
className="h-6 w-6"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
<div className="min-w-0 flex-1">
|
||||
<p
|
||||
className={`truncate text-sm font-medium dark:text-[#ececf1] ${
|
||||
file.type ===
|
||||
'application/vnd.google-apps.folder' ||
|
||||
file.isFolder
|
||||
? 'cursor-pointer hover:text-blue-600'
|
||||
: ''
|
||||
}`}
|
||||
onClick={() => {
|
||||
if (
|
||||
file.type ===
|
||||
'application/vnd.google-apps.folder' ||
|
||||
file.isFolder
|
||||
) {
|
||||
handleFolderClick(
|
||||
file.id,
|
||||
file.name,
|
||||
);
|
||||
}
|
||||
}}
|
||||
>
|
||||
{file.name}
|
||||
</p>
|
||||
<p className="text-xs text-gray-500 dark:text-gray-400">
|
||||
{file.size &&
|
||||
`${formatBytes(file.size)} • `}
|
||||
Modified {formatDate(file.modifiedTime)}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-center border-t border-gray-100 p-4 dark:border-gray-800">
|
||||
{isLoadingFiles && (
|
||||
<div className="inline-flex items-center gap-2 text-sm text-gray-600 dark:text-gray-400">
|
||||
<div className="h-4 w-4 animate-spin rounded-full border-2 border-blue-500 border-t-transparent"></div>
|
||||
Loading more files...
|
||||
</div>
|
||||
)}
|
||||
{!hasMoreFiles && !isLoadingFiles && (
|
||||
<span className="text-sm text-gray-500 dark:text-gray-400">
|
||||
All files loaded
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="hidden" aria-hidden="true"></div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{renderFormFields()}
|
||||
{IngestorFormSchemas[ingestor.type].some(
|
||||
(field) => field.advanced,
|
||||
@@ -719,7 +1184,9 @@ function Upload({
|
||||
: 'bg-purple-30 hover:bg-violets-are-blue cursor-pointer text-white'
|
||||
}`}
|
||||
>
|
||||
{t('modals.uploadDoc.train')}
|
||||
{ingestor.type === 'google_drive' && selectedFiles.length > 0
|
||||
? `Train with ${selectedFiles.length} file${selectedFiles.length !== 1 ? 's' : ''}`
|
||||
: t('modals.uploadDoc.train')}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
@@ -727,6 +1194,41 @@ function Upload({
|
||||
);
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
const scrollContainer = scrollContainerRef.current;
|
||||
|
||||
const handleScroll = () => {
|
||||
if (!scrollContainer) return;
|
||||
|
||||
const { scrollTop, scrollHeight, clientHeight } = scrollContainer;
|
||||
const isNearBottom = scrollHeight - scrollTop - clientHeight < 50;
|
||||
|
||||
if (isNearBottom && hasMoreFiles && !isLoadingFiles && nextPageToken) {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
if (sessionToken) {
|
||||
loadGoogleDriveFiles(
|
||||
sessionToken,
|
||||
currentFolderId,
|
||||
nextPageToken,
|
||||
true,
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
scrollContainer?.addEventListener('scroll', handleScroll);
|
||||
|
||||
return () => {
|
||||
scrollContainer?.removeEventListener('scroll', handleScroll);
|
||||
};
|
||||
}, [
|
||||
hasMoreFiles,
|
||||
isLoadingFiles,
|
||||
nextPageToken,
|
||||
currentFolderId,
|
||||
ingestor.type,
|
||||
]);
|
||||
|
||||
return (
|
||||
<WrapperModal
|
||||
isPerformingTask={progress !== undefined && progress.percentage < 100}
|
||||
|
||||
@@ -22,7 +22,19 @@ export interface UrlIngestorConfig extends BaseIngestorConfig {
|
||||
url: string;
|
||||
}
|
||||
|
||||
export type IngestorType = 'crawler' | 'github' | 'reddit' | 'url';
|
||||
export interface GoogleDriveIngestorConfig extends BaseIngestorConfig {
|
||||
folder_id?: string;
|
||||
file_ids?: string;
|
||||
recursive?: boolean;
|
||||
token_info?: any;
|
||||
}
|
||||
|
||||
export type IngestorType =
|
||||
| 'crawler'
|
||||
| 'github'
|
||||
| 'reddit'
|
||||
| 'url'
|
||||
| 'google_drive';
|
||||
|
||||
export interface IngestorConfig {
|
||||
type: IngestorType;
|
||||
@@ -31,7 +43,8 @@ export interface IngestorConfig {
|
||||
| RedditIngestorConfig
|
||||
| GithubIngestorConfig
|
||||
| CrawlerIngestorConfig
|
||||
| UrlIngestorConfig;
|
||||
| UrlIngestorConfig
|
||||
| GoogleDriveIngestorConfig;
|
||||
}
|
||||
|
||||
export type IngestorFormData = {
|
||||
@@ -109,6 +122,14 @@ export const IngestorFormSchemas: Record<IngestorType, FormField[]> = {
|
||||
required: true,
|
||||
},
|
||||
],
|
||||
google_drive: [
|
||||
{
|
||||
name: 'recursive',
|
||||
label: 'Include subfolders',
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
export const IngestorDefaultConfigs: Record<
|
||||
@@ -143,4 +164,12 @@ export const IngestorDefaultConfigs: Record<
|
||||
repo_url: '',
|
||||
} as GithubIngestorConfig,
|
||||
},
|
||||
google_drive: {
|
||||
name: '',
|
||||
config: {
|
||||
folder_id: '',
|
||||
file_ids: '',
|
||||
recursive: true,
|
||||
} as GoogleDriveIngestorConfig,
|
||||
},
|
||||
};
|
||||
|
||||
16
frontend/src/utils/providerUtils.ts
Normal file
16
frontend/src/utils/providerUtils.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
/**
|
||||
* Utility functions for managing session tokens for different cloud service providers.
|
||||
* Follows the convention: {provider}_session_token
|
||||
*/
|
||||
|
||||
export const getSessionToken = (provider: string): string | null => {
|
||||
return localStorage.getItem(`${provider}_session_token`);
|
||||
};
|
||||
|
||||
export const setSessionToken = (provider: string, token: string): void => {
|
||||
localStorage.setItem(`${provider}_session_token`, token);
|
||||
};
|
||||
|
||||
export const removeSessionToken = (provider: string): void => {
|
||||
localStorage.removeItem(`${provider}_session_token`);
|
||||
};
|
||||
@@ -2,3 +2,12 @@ export function truncate(str: string, n: number) {
|
||||
// slices long strings and ends with ...
|
||||
return str.length > n ? str.slice(0, n - 1) + '...' : str;
|
||||
}
|
||||
|
||||
export function formatBytes(bytes: number | null): string {
|
||||
if (!bytes || bytes <= 0) return '';
|
||||
|
||||
const k = 1024;
|
||||
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(2))} ${sizes[i]}`;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user