mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 00:23:17 +00:00
Merge branch 'main' of https://github.com/siiddhantt/DocsGPT into pr/1930
This commit is contained in:
@@ -3,11 +3,11 @@
|
||||
</h1>
|
||||
|
||||
<p align="center">
|
||||
<strong>Open-Source RAG Assistant</strong>
|
||||
<strong>Private AI for agents, assistants and enterprise search</strong>
|
||||
</p>
|
||||
|
||||
<p align="left">
|
||||
<strong><a href="https://www.docsgpt.cloud/">DocsGPT</a></strong> is an open-source genAI tool that helps users get reliable answers from any knowledge source, while avoiding hallucinations. It enables quick and reliable information retrieval, with tooling and agentic system capability built in.
|
||||
<strong><a href="https://www.docsgpt.cloud/">DocsGPT</a></strong> is an open-source AI platform for building intelligent agents and assistants. Features Agent Builder, deep research tools, document analysis (PDF, Office, web content), Multi-model support (choose your provider or run locally), and rich API connectivity for agents with actionable tools and integrations. Deploy anywhere with complete privacy control.
|
||||
</p>
|
||||
|
||||
<div align="center">
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import logging
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Generator, List, Optional
|
||||
@@ -6,15 +7,15 @@ from bson.objectid import ObjectId
|
||||
|
||||
from application.agents.tools.tool_action_parser import ToolActionParser
|
||||
from application.agents.tools.tool_manager import ToolManager
|
||||
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
|
||||
from application.llm.handlers.handler_creator import LLMHandlerCreator
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
from application.logging import build_stack_data, log_activity, LogContext
|
||||
from application.retriever.base import BaseRetriever
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BaseAgent(ABC):
|
||||
def __init__(
|
||||
@@ -139,6 +140,40 @@ class BaseAgent(ABC):
|
||||
tool_id, action_name, call_args = parser.parse_args(call)
|
||||
|
||||
call_id = getattr(call, "id", None) or str(uuid.uuid4())
|
||||
|
||||
# Check if parsing failed
|
||||
if tool_id is None or action_name is None:
|
||||
error_message = f"Error: Failed to parse LLM tool call. Tool name: {getattr(call, 'name', 'unknown')}"
|
||||
logger.error(error_message)
|
||||
|
||||
tool_call_data = {
|
||||
"tool_name": "unknown",
|
||||
"call_id": call_id,
|
||||
"action_name": getattr(call, 'name', 'unknown'),
|
||||
"arguments": call_args or {},
|
||||
"result": f"Failed to parse tool call. Invalid tool name format: {getattr(call, 'name', 'unknown')}",
|
||||
}
|
||||
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
|
||||
self.tool_calls.append(tool_call_data)
|
||||
return "Failed to parse tool call.", call_id
|
||||
|
||||
# Check if tool_id exists in available tools
|
||||
if tool_id not in tools_dict:
|
||||
error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
|
||||
logger.error(error_message)
|
||||
|
||||
# Return error result
|
||||
tool_call_data = {
|
||||
"tool_name": "unknown",
|
||||
"call_id": call_id,
|
||||
"action_name": f"{action_name}_{tool_id}",
|
||||
"arguments": call_args,
|
||||
"result": f"Tool with ID {tool_id} not found. Available tools: {list(tools_dict.keys())}",
|
||||
}
|
||||
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
|
||||
self.tool_calls.append(tool_call_data)
|
||||
return f"Tool with ID {tool_id} not found.", call_id
|
||||
|
||||
tool_call_data = {
|
||||
"tool_name": tools_dict[tool_id]["name"],
|
||||
"call_id": call_id,
|
||||
|
||||
@@ -19,8 +19,20 @@ class ToolActionParser:
|
||||
def _parse_openai_llm(self, call):
|
||||
try:
|
||||
call_args = json.loads(call.arguments)
|
||||
tool_id = call.name.split("_")[-1]
|
||||
action_name = call.name.rsplit("_", 1)[0]
|
||||
tool_parts = call.name.split("_")
|
||||
|
||||
# If the tool name doesn't contain an underscore, it's likely a hallucinated tool
|
||||
if len(tool_parts) < 2:
|
||||
logger.warning(f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id")
|
||||
return None, None, None
|
||||
|
||||
tool_id = tool_parts[-1]
|
||||
action_name = "_".join(tool_parts[:-1])
|
||||
|
||||
# Validate that tool_id looks like a numerical ID
|
||||
if not tool_id.isdigit():
|
||||
logger.warning(f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call.")
|
||||
|
||||
except (AttributeError, TypeError) as e:
|
||||
logger.error(f"Error parsing OpenAI LLM call: {e}")
|
||||
return None, None, None
|
||||
@@ -29,8 +41,20 @@ class ToolActionParser:
|
||||
def _parse_google_llm(self, call):
|
||||
try:
|
||||
call_args = call.arguments
|
||||
tool_id = call.name.split("_")[-1]
|
||||
action_name = call.name.rsplit("_", 1)[0]
|
||||
tool_parts = call.name.split("_")
|
||||
|
||||
# If the tool name doesn't contain an underscore, it's likely a hallucinated tool
|
||||
if len(tool_parts) < 2:
|
||||
logger.warning(f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id")
|
||||
return None, None, None
|
||||
|
||||
tool_id = tool_parts[-1]
|
||||
action_name = "_".join(tool_parts[:-1])
|
||||
|
||||
# Validate that tool_id looks like a numerical ID
|
||||
if not tool_id.isdigit():
|
||||
logger.warning(f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call.")
|
||||
|
||||
except (AttributeError, TypeError) as e:
|
||||
logger.error(f"Error parsing Google LLM call: {e}")
|
||||
return None, None, None
|
||||
|
||||
@@ -85,6 +85,7 @@ class StreamProcessor:
|
||||
self._configure_agent()
|
||||
self._configure_source()
|
||||
self._configure_retriever()
|
||||
self._configure_agent()
|
||||
self._load_conversation_history()
|
||||
self._process_attachments()
|
||||
|
||||
@@ -260,6 +261,12 @@ class StreamProcessor:
|
||||
)
|
||||
self.initial_user_id = data_key.get("user")
|
||||
self.decoded_token = {"sub": data_key.get("user")}
|
||||
if data_key.get("source"):
|
||||
self.source = {"active_docs": data_key["source"]}
|
||||
if data_key.get("retriever"):
|
||||
self.retriever_config["retriever_name"] = data_key["retriever"]
|
||||
if data_key.get("chunks") is not None:
|
||||
self.retriever_config["chunks"] = data_key["chunks"]
|
||||
elif self.agent_key:
|
||||
data_key = self._get_data_from_api_key(self.agent_key)
|
||||
self.agent_config.update(
|
||||
@@ -275,6 +282,12 @@ class StreamProcessor:
|
||||
if self.is_shared_usage
|
||||
else {"sub": data_key.get("user")}
|
||||
)
|
||||
if data_key.get("source"):
|
||||
self.source = {"active_docs": data_key["source"]}
|
||||
if data_key.get("retriever"):
|
||||
self.retriever_config["retriever_name"] = data_key["retriever"]
|
||||
if data_key.get("chunks") is not None:
|
||||
self.retriever_config["chunks"] = data_key["chunks"]
|
||||
else:
|
||||
self.agent_config.update(
|
||||
{
|
||||
|
||||
627
application/api/connector/routes.py
Normal file
627
application/api/connector/routes.py
Normal file
@@ -0,0 +1,627 @@
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
|
||||
|
||||
from bson.objectid import ObjectId
|
||||
from flask import (
|
||||
Blueprint,
|
||||
current_app,
|
||||
jsonify,
|
||||
make_response,
|
||||
request
|
||||
)
|
||||
from flask_restx import fields, Namespace, Resource
|
||||
|
||||
|
||||
|
||||
|
||||
from application.api.user.tasks import (
|
||||
ingest_connector_task,
|
||||
)
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
from application.api import api
|
||||
|
||||
|
||||
from application.utils import (
|
||||
check_required_fields
|
||||
)
|
||||
|
||||
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
|
||||
|
||||
|
||||
mongo = MongoDB.get_client()
|
||||
db = mongo[settings.MONGO_DB_NAME]
|
||||
sources_collection = db["sources"]
|
||||
sessions_collection = db["connector_sessions"]
|
||||
|
||||
connector = Blueprint("connector", __name__)
|
||||
connectors_ns = Namespace("connectors", description="Connector operations", path="/")
|
||||
api.add_namespace(connectors_ns)
|
||||
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/upload")
|
||||
class UploadConnector(Resource):
|
||||
@api.expect(
|
||||
api.model(
|
||||
"ConnectorUploadModel",
|
||||
{
|
||||
"user": fields.String(required=True, description="User ID"),
|
||||
"source": fields.String(
|
||||
required=True, description="Source type (google_drive, github, etc.)"
|
||||
),
|
||||
"name": fields.String(required=True, description="Job name"),
|
||||
"data": fields.String(required=True, description="Configuration data"),
|
||||
"repo_url": fields.String(description="GitHub repository URL"),
|
||||
},
|
||||
)
|
||||
)
|
||||
@api.doc(
|
||||
description="Uploads connector source for vectorization",
|
||||
)
|
||||
def post(self):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
data = request.form
|
||||
required_fields = ["user", "source", "name", "data"]
|
||||
missing_fields = check_required_fields(data, required_fields)
|
||||
if missing_fields:
|
||||
return missing_fields
|
||||
try:
|
||||
config = json.loads(data["data"])
|
||||
source_data = None
|
||||
sync_frequency = config.get("sync_frequency", "never")
|
||||
|
||||
if data["source"] == "github":
|
||||
source_data = config.get("repo_url")
|
||||
elif data["source"] in ["crawler", "url"]:
|
||||
source_data = config.get("url")
|
||||
elif data["source"] == "reddit":
|
||||
source_data = config
|
||||
elif data["source"] in ConnectorCreator.get_supported_connectors():
|
||||
session_token = config.get("session_token")
|
||||
if not session_token:
|
||||
return make_response(jsonify({
|
||||
"success": False,
|
||||
"error": f"Missing session_token in {data['source']} configuration"
|
||||
}), 400)
|
||||
|
||||
file_ids = config.get("file_ids", [])
|
||||
if isinstance(file_ids, str):
|
||||
file_ids = [id.strip() for id in file_ids.split(',') if id.strip()]
|
||||
elif not isinstance(file_ids, list):
|
||||
file_ids = []
|
||||
|
||||
folder_ids = config.get("folder_ids", [])
|
||||
if isinstance(folder_ids, str):
|
||||
folder_ids = [id.strip() for id in folder_ids.split(',') if id.strip()]
|
||||
elif not isinstance(folder_ids, list):
|
||||
folder_ids = []
|
||||
|
||||
config["file_ids"] = file_ids
|
||||
config["folder_ids"] = folder_ids
|
||||
|
||||
task = ingest_connector_task.delay(
|
||||
job_name=data["name"],
|
||||
user=decoded_token.get("sub"),
|
||||
source_type=data["source"],
|
||||
session_token=session_token,
|
||||
file_ids=file_ids,
|
||||
folder_ids=folder_ids,
|
||||
recursive=config.get("recursive", False),
|
||||
retriever=config.get("retriever", "classic"),
|
||||
sync_frequency=sync_frequency
|
||||
)
|
||||
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
|
||||
task = ingest_connector_task.delay(
|
||||
source_data=source_data,
|
||||
job_name=data["name"],
|
||||
user=decoded_token.get("sub"),
|
||||
loader=data["source"],
|
||||
sync_frequency=sync_frequency
|
||||
)
|
||||
except Exception as err:
|
||||
current_app.logger.error(
|
||||
f"Error uploading connector source: {err}", exc_info=True
|
||||
)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/task_status")
|
||||
class ConnectorTaskStatus(Resource):
|
||||
task_status_model = api.model(
|
||||
"ConnectorTaskStatusModel",
|
||||
{"task_id": fields.String(required=True, description="Task ID")},
|
||||
)
|
||||
|
||||
@api.expect(task_status_model)
|
||||
@api.doc(description="Get connector task status")
|
||||
def get(self):
|
||||
task_id = request.args.get("task_id")
|
||||
if not task_id:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Task ID is required"}), 400
|
||||
)
|
||||
try:
|
||||
from application.celery_init import celery
|
||||
|
||||
task = celery.AsyncResult(task_id)
|
||||
task_meta = task.info
|
||||
print(f"Task status: {task.status}")
|
||||
if not isinstance(
|
||||
task_meta, (dict, list, str, int, float, bool, type(None))
|
||||
):
|
||||
task_meta = str(task_meta)
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error getting task status: {err}", exc_info=True)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
return make_response(jsonify({"status": task.status, "result": task_meta}), 200)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/sources")
|
||||
class ConnectorSources(Resource):
|
||||
@api.doc(description="Get connector sources")
|
||||
def get(self):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
user = decoded_token.get("sub")
|
||||
try:
|
||||
sources = sources_collection.find({"user": user, "type": "connector"}).sort("date", -1)
|
||||
connector_sources = []
|
||||
for source in sources:
|
||||
connector_sources.append({
|
||||
"id": str(source["_id"]),
|
||||
"name": source.get("name"),
|
||||
"date": source.get("date"),
|
||||
"type": source.get("type"),
|
||||
"source": source.get("source"),
|
||||
"tokens": source.get("tokens", ""),
|
||||
"retriever": source.get("retriever", "classic"),
|
||||
"syncFrequency": source.get("sync_frequency", ""),
|
||||
})
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error retrieving connector sources: {err}", exc_info=True)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
return make_response(jsonify(connector_sources), 200)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/delete")
|
||||
class DeleteConnectorSource(Resource):
|
||||
@api.doc(
|
||||
description="Delete a connector source",
|
||||
params={"source_id": "The source ID to delete"},
|
||||
)
|
||||
def delete(self):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
source_id = request.args.get("source_id")
|
||||
if not source_id:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "source_id is required"}), 400
|
||||
)
|
||||
try:
|
||||
result = sources_collection.delete_one(
|
||||
{"_id": ObjectId(source_id), "user": decoded_token.get("sub")}
|
||||
)
|
||||
if result.deleted_count == 0:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Source not found"}), 404
|
||||
)
|
||||
except Exception as err:
|
||||
current_app.logger.error(
|
||||
f"Error deleting connector source: {err}", exc_info=True
|
||||
)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
return make_response(jsonify({"success": True}), 200)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/auth")
|
||||
class ConnectorAuth(Resource):
|
||||
@api.doc(description="Get connector OAuth authorization URL", params={"provider": "Connector provider (e.g., google_drive)"})
|
||||
def get(self):
|
||||
try:
|
||||
provider = request.args.get('provider') or request.args.get('source')
|
||||
if not provider:
|
||||
return make_response(jsonify({"success": False, "error": "Missing provider"}), 400)
|
||||
|
||||
if not ConnectorCreator.is_supported(provider):
|
||||
return make_response(jsonify({"success": False, "error": f"Unsupported provider: {provider}"}), 400)
|
||||
|
||||
import uuid
|
||||
state = str(uuid.uuid4())
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
authorization_url = auth.get_authorization_url(state=state)
|
||||
return make_response(jsonify({
|
||||
"success": True,
|
||||
"authorization_url": authorization_url,
|
||||
"state": state
|
||||
}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error generating connector auth URL: {e}")
|
||||
return make_response(jsonify({"success": False, "error": str(e)}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/callback")
|
||||
class ConnectorsCallback(Resource):
|
||||
@api.doc(description="Handle OAuth callback for external connectors")
|
||||
def get(self):
|
||||
"""Handle OAuth callback for external connectors"""
|
||||
try:
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
from flask import request, redirect
|
||||
import uuid
|
||||
|
||||
provider = request.args.get('provider', 'google_drive')
|
||||
authorization_code = request.args.get('code')
|
||||
_ = request.args.get('state')
|
||||
error = request.args.get('error')
|
||||
|
||||
if error:
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=OAuth+error:+{error}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.&provider={provider}")
|
||||
|
||||
if not authorization_code:
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Authorization+code+not+provided.+Please+complete+the+authorization+process+and+make+sure+to+grant+offline+access.&provider={provider}")
|
||||
|
||||
try:
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
token_info = auth.exchange_code_for_tokens(authorization_code)
|
||||
|
||||
session_token = str(uuid.uuid4())
|
||||
|
||||
|
||||
try:
|
||||
credentials = auth.create_credentials_from_token_info(token_info)
|
||||
service = auth.build_drive_service(credentials)
|
||||
user_info = service.about().get(fields="user").execute()
|
||||
user_email = user_info.get('user', {}).get('emailAddress', 'Connected User')
|
||||
except Exception as e:
|
||||
current_app.logger.warning(f"Could not get user info: {e}")
|
||||
user_email = 'Connected User'
|
||||
|
||||
sanitized_token_info = {
|
||||
"access_token": token_info.get("access_token"),
|
||||
"refresh_token": token_info.get("refresh_token"),
|
||||
"token_uri": token_info.get("token_uri"),
|
||||
"expiry": token_info.get("expiry"),
|
||||
"scopes": token_info.get("scopes")
|
||||
}
|
||||
|
||||
user_id = request.decoded_token.get("sub") if getattr(request, "decoded_token", None) else None
|
||||
sessions_collection.insert_one({
|
||||
"session_token": session_token,
|
||||
"user": user_id,
|
||||
"token_info": sanitized_token_info,
|
||||
"created_at": datetime.datetime.now(datetime.timezone.utc),
|
||||
"user_email": user_email,
|
||||
"provider": provider
|
||||
})
|
||||
|
||||
# Redirect to success page with session token and user email
|
||||
return redirect(f"/api/connectors/callback-status?status=success&message=Authentication+successful&provider={provider}&session_token={session_token}&user_email={user_email}")
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error exchanging code for tokens: {str(e)}", exc_info=True)
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+exchange+authorization+code+for+tokens:+{str(e)}&provider={provider}")
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error handling connector callback: {e}")
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+complete+connector+authentication:+{str(e)}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.")
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/refresh")
|
||||
class ConnectorRefresh(Resource):
|
||||
@api.expect(api.model("ConnectorRefreshModel", {"provider": fields.String(required=True), "refresh_token": fields.String(required=True)}))
|
||||
@api.doc(description="Refresh connector access token")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
provider = data.get('provider')
|
||||
refresh_token = data.get('refresh_token')
|
||||
|
||||
if not provider or not refresh_token:
|
||||
return make_response(jsonify({"success": False, "error": "provider and refresh_token are required"}), 400)
|
||||
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
token_info = auth.refresh_access_token(refresh_token)
|
||||
return make_response(jsonify({"success": True, "token_info": token_info}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error refreshing token for connector: {e}")
|
||||
return make_response(jsonify({"success": False, "error": str(e)}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/files")
|
||||
class ConnectorFiles(Resource):
|
||||
@api.expect(api.model("ConnectorFilesModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True), "folder_id": fields.String(required=False), "limit": fields.Integer(required=False), "page_token": fields.String(required=False)}))
|
||||
@api.doc(description="List files from a connector provider (supports pagination)")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
provider = data.get('provider')
|
||||
session_token = data.get('session_token')
|
||||
folder_id = data.get('folder_id')
|
||||
limit = data.get('limit', 10)
|
||||
page_token = data.get('page_token')
|
||||
if not provider or not session_token:
|
||||
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
|
||||
|
||||
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
|
||||
user = decoded_token.get('sub')
|
||||
session = sessions_collection.find_one({"session_token": session_token, "user": user})
|
||||
if not session:
|
||||
return make_response(jsonify({"success": False, "error": "Invalid or unauthorized session"}), 401)
|
||||
|
||||
loader = ConnectorCreator.create_connector(provider, session_token)
|
||||
documents = loader.load_data({
|
||||
'limit': limit,
|
||||
'list_only': True,
|
||||
'session_token': session_token,
|
||||
'folder_id': folder_id,
|
||||
'page_token': page_token
|
||||
})
|
||||
|
||||
files = []
|
||||
for doc in documents[:limit]:
|
||||
metadata = doc.extra_info
|
||||
modified_time = metadata.get('modified_time')
|
||||
if modified_time:
|
||||
date_part = modified_time.split('T')[0]
|
||||
time_part = modified_time.split('T')[1].split('.')[0].split('Z')[0]
|
||||
formatted_time = f"{date_part} {time_part}"
|
||||
else:
|
||||
formatted_time = None
|
||||
|
||||
files.append({
|
||||
'id': doc.doc_id,
|
||||
'name': metadata.get('file_name', 'Unknown File'),
|
||||
'type': metadata.get('mime_type', 'unknown'),
|
||||
'size': metadata.get('size', None),
|
||||
'modifiedTime': formatted_time
|
||||
})
|
||||
|
||||
next_token = getattr(loader, 'next_page_token', None)
|
||||
has_more = bool(next_token)
|
||||
|
||||
return make_response(jsonify({"success": True, "files": files, "total": len(files), "next_page_token": next_token, "has_more": has_more}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error loading connector files: {e}")
|
||||
return make_response(jsonify({"success": False, "error": f"Failed to load files: {str(e)}"}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/validate-session")
|
||||
class ConnectorValidateSession(Resource):
|
||||
@api.expect(api.model("ConnectorValidateSessionModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True)}))
|
||||
@api.doc(description="Validate connector session token and return user info")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
provider = data.get('provider')
|
||||
session_token = data.get('session_token')
|
||||
if not provider or not session_token:
|
||||
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
|
||||
|
||||
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
|
||||
user = decoded_token.get('sub')
|
||||
|
||||
session = sessions_collection.find_one({"session_token": session_token, "user": user})
|
||||
if not session or "token_info" not in session:
|
||||
return make_response(jsonify({"success": False, "error": "Invalid or expired session"}), 401)
|
||||
|
||||
token_info = session["token_info"]
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
is_expired = auth.is_token_expired(token_info)
|
||||
|
||||
return make_response(jsonify({
|
||||
"success": True,
|
||||
"expired": is_expired,
|
||||
"user_email": session.get('user_email', 'Connected User')
|
||||
}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error validating connector session: {e}")
|
||||
return make_response(jsonify({"success": False, "error": str(e)}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/disconnect")
|
||||
class ConnectorDisconnect(Resource):
|
||||
@api.expect(api.model("ConnectorDisconnectModel", {"provider": fields.String(required=True), "session_token": fields.String(required=False)}))
|
||||
@api.doc(description="Disconnect a connector session")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
provider = data.get('provider')
|
||||
session_token = data.get('session_token')
|
||||
if not provider:
|
||||
return make_response(jsonify({"success": False, "error": "provider is required"}), 400)
|
||||
|
||||
|
||||
if session_token:
|
||||
sessions_collection.delete_one({"session_token": session_token})
|
||||
|
||||
return make_response(jsonify({"success": True}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error disconnecting connector session: {e}")
|
||||
return make_response(jsonify({"success": False, "error": str(e)}), 500)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/sync")
|
||||
class ConnectorSync(Resource):
|
||||
@api.expect(
|
||||
api.model(
|
||||
"ConnectorSyncModel",
|
||||
{
|
||||
"source_id": fields.String(required=True, description="Source ID to sync"),
|
||||
"session_token": fields.String(required=True, description="Authentication token")
|
||||
},
|
||||
)
|
||||
)
|
||||
@api.doc(description="Sync connector source to check for modifications")
|
||||
def post(self):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
|
||||
try:
|
||||
data = request.get_json()
|
||||
source_id = data.get('source_id')
|
||||
session_token = data.get('session_token')
|
||||
|
||||
if not all([source_id, session_token]):
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": "source_id and session_token are required"
|
||||
}),
|
||||
400
|
||||
)
|
||||
source = sources_collection.find_one({"_id": ObjectId(source_id)})
|
||||
if not source:
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": "Source not found"
|
||||
}),
|
||||
404
|
||||
)
|
||||
|
||||
if source.get('user') != decoded_token.get('sub'):
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": "Unauthorized access to source"
|
||||
}),
|
||||
403
|
||||
)
|
||||
|
||||
remote_data = {}
|
||||
try:
|
||||
if source.get('remote_data'):
|
||||
remote_data = json.loads(source.get('remote_data'))
|
||||
except json.JSONDecodeError:
|
||||
current_app.logger.error(f"Invalid remote_data format for source {source_id}")
|
||||
remote_data = {}
|
||||
|
||||
source_type = remote_data.get('provider')
|
||||
if not source_type:
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": "Source provider not found in remote_data"
|
||||
}),
|
||||
400
|
||||
)
|
||||
|
||||
# Extract configuration from remote_data
|
||||
file_ids = remote_data.get('file_ids', [])
|
||||
folder_ids = remote_data.get('folder_ids', [])
|
||||
recursive = remote_data.get('recursive', True)
|
||||
|
||||
# Start the sync task
|
||||
task = ingest_connector_task.delay(
|
||||
job_name=source.get('name'),
|
||||
user=decoded_token.get('sub'),
|
||||
source_type=source_type,
|
||||
session_token=session_token,
|
||||
file_ids=file_ids,
|
||||
folder_ids=folder_ids,
|
||||
recursive=recursive,
|
||||
retriever=source.get('retriever', 'classic'),
|
||||
operation_mode="sync",
|
||||
doc_id=source_id,
|
||||
sync_frequency=source.get('sync_frequency', 'never')
|
||||
)
|
||||
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": True,
|
||||
"task_id": task.id
|
||||
}),
|
||||
200
|
||||
)
|
||||
|
||||
except Exception as err:
|
||||
current_app.logger.error(
|
||||
f"Error syncing connector source: {err}",
|
||||
exc_info=True
|
||||
)
|
||||
return make_response(
|
||||
jsonify({
|
||||
"success": False,
|
||||
"error": str(err)
|
||||
}),
|
||||
400
|
||||
)
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/callback-status")
|
||||
class ConnectorCallbackStatus(Resource):
|
||||
@api.doc(description="Return HTML page with connector authentication status")
|
||||
def get(self):
|
||||
"""Return HTML page with connector authentication status"""
|
||||
try:
|
||||
status = request.args.get('status', 'error')
|
||||
message = request.args.get('message', '')
|
||||
provider = request.args.get('provider', 'connector')
|
||||
session_token = request.args.get('session_token', '')
|
||||
user_email = request.args.get('user_email', '')
|
||||
|
||||
html_content = f"""
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>{provider.replace('_', ' ').title()} Authentication</title>
|
||||
<style>
|
||||
body {{ font-family: Arial, sans-serif; text-align: center; padding: 40px; }}
|
||||
.container {{ max-width: 600px; margin: 0 auto; }}
|
||||
.success {{ color: #4CAF50; }}
|
||||
.error {{ color: #F44336; }}
|
||||
</style>
|
||||
<script>
|
||||
window.onload = function() {{
|
||||
const status = "{status}";
|
||||
const sessionToken = "{session_token}";
|
||||
const userEmail = "{user_email}";
|
||||
|
||||
if (status === "success" && window.opener) {{
|
||||
window.opener.postMessage({{
|
||||
type: '{provider}_auth_success',
|
||||
session_token: sessionToken,
|
||||
user_email: userEmail
|
||||
}}, '*');
|
||||
|
||||
setTimeout(() => window.close(), 3000);
|
||||
}}
|
||||
}};
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h2>{provider.replace('_', ' ').title()} Authentication</h2>
|
||||
<div class="{status}">
|
||||
<p>{message}</p>
|
||||
{f'<p>Connected as: {user_email}</p>' if status == 'success' else ''}
|
||||
</div>
|
||||
<p><small>You can close this window. {f"Your {provider.replace('_', ' ').title()} is now connected and ready to use." if status == 'success' else ''}</small></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
return make_response(html_content, 200, {'Content-Type': 'text/html'})
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error rendering callback status page: {e}")
|
||||
return make_response("Authentication error occurred", 500, {'Content-Type': 'text/html'})
|
||||
|
||||
|
||||
@@ -3,11 +3,12 @@ import json
|
||||
import math
|
||||
import os
|
||||
import secrets
|
||||
import tempfile
|
||||
import uuid
|
||||
import zipfile
|
||||
from functools import wraps
|
||||
from typing import Optional, Tuple
|
||||
import tempfile
|
||||
import zipfile
|
||||
|
||||
from bson.binary import Binary, UuidRepresentation
|
||||
from bson.dbref import DBRef
|
||||
from bson.objectid import ObjectId
|
||||
@@ -25,26 +26,28 @@ from pymongo import ReturnDocument
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from application.agents.tools.tool_manager import ToolManager
|
||||
from application.api import api
|
||||
|
||||
from application.api.user.tasks import (
|
||||
ingest,
|
||||
ingest_connector_task,
|
||||
ingest_remote,
|
||||
process_agent_webhook,
|
||||
store_attachment,
|
||||
)
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
from application.api import api
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
from application.storage.storage_creator import StorageCreator
|
||||
from application.tts.google_tts import GoogleTTS
|
||||
from application.utils import (
|
||||
check_required_fields,
|
||||
generate_image_url,
|
||||
num_tokens_from_string,
|
||||
safe_filename,
|
||||
validate_function_name,
|
||||
validate_required_fields,
|
||||
)
|
||||
from application.utils import num_tokens_from_string
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
|
||||
storage = StorageCreator.get_storage()
|
||||
@@ -72,7 +75,6 @@ try:
|
||||
users_collection.create_index("user_id", unique=True)
|
||||
except Exception as e:
|
||||
print("Error creating indexes:", e)
|
||||
|
||||
user = Blueprint("user", __name__)
|
||||
user_ns = Namespace("user", description="User related operations", path="/")
|
||||
api.add_namespace(user_ns)
|
||||
@@ -125,11 +127,9 @@ def ensure_user_doc(user_id):
|
||||
updates["agent_preferences.pinned"] = []
|
||||
if "shared_with_me" not in prefs:
|
||||
updates["agent_preferences.shared_with_me"] = []
|
||||
|
||||
if updates:
|
||||
users_collection.update_one({"user_id": user_id}, {"$set": updates})
|
||||
user_doc = users_collection.find_one({"user_id": user_id})
|
||||
|
||||
return user_doc
|
||||
|
||||
|
||||
@@ -181,7 +181,6 @@ def handle_image_upload(
|
||||
jsonify({"success": False, "message": "Image upload failed"}),
|
||||
400,
|
||||
)
|
||||
|
||||
return image_url, None
|
||||
|
||||
|
||||
@@ -295,8 +294,8 @@ class GetSingleConversation(Resource):
|
||||
)
|
||||
if not conversation:
|
||||
return make_response(jsonify({"status": "not found"}), 404)
|
||||
|
||||
# Process queries to include attachment names
|
||||
|
||||
queries = conversation["queries"]
|
||||
for query in queries:
|
||||
if "attachments" in query and query["attachments"]:
|
||||
@@ -492,11 +491,11 @@ class DeleteOldIndexes(Resource):
|
||||
)
|
||||
if not doc:
|
||||
return make_response(jsonify({"status": "not found"}), 404)
|
||||
|
||||
storage = StorageCreator.get_storage()
|
||||
|
||||
try:
|
||||
# Delete vector index
|
||||
|
||||
if settings.VECTOR_STORE == "faiss":
|
||||
index_path = f"indexes/{str(doc['_id'])}"
|
||||
if storage.file_exists(f"{index_path}/index.faiss"):
|
||||
@@ -508,7 +507,6 @@ class DeleteOldIndexes(Resource):
|
||||
settings.VECTOR_STORE, source_id=str(doc["_id"])
|
||||
)
|
||||
vectorstore.delete_index()
|
||||
|
||||
if "file_path" in doc and doc["file_path"]:
|
||||
file_path = doc["file_path"]
|
||||
if storage.is_directory(file_path):
|
||||
@@ -517,7 +515,6 @@ class DeleteOldIndexes(Resource):
|
||||
storage.delete_file(f)
|
||||
else:
|
||||
storage.delete_file(file_path)
|
||||
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
except Exception as err:
|
||||
@@ -525,7 +522,6 @@ class DeleteOldIndexes(Resource):
|
||||
f"Error deleting files and indexes: {err}", exc_info=True
|
||||
)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
|
||||
sources_collection.delete_one({"_id": ObjectId(source_id)})
|
||||
return make_response(jsonify({"success": True}), 200)
|
||||
|
||||
@@ -567,6 +563,7 @@ class UploadFile(Resource):
|
||||
job_name = request.form["name"]
|
||||
|
||||
# Create safe versions for filesystem operations
|
||||
|
||||
safe_user = safe_filename(user)
|
||||
dir_name = safe_filename(job_name)
|
||||
base_path = f"{settings.UPLOAD_FOLDER}/{safe_user}/{dir_name}"
|
||||
@@ -588,6 +585,7 @@ class UploadFile(Resource):
|
||||
zip_ref.extractall(path=temp_dir)
|
||||
|
||||
# Walk through extracted files and upload them
|
||||
|
||||
for root, _, files in os.walk(temp_dir):
|
||||
for extracted_file in files:
|
||||
if (
|
||||
@@ -595,7 +593,6 @@ class UploadFile(Resource):
|
||||
== temp_file_path
|
||||
):
|
||||
continue
|
||||
|
||||
rel_path = os.path.relpath(
|
||||
os.path.join(root, extracted_file), temp_dir
|
||||
)
|
||||
@@ -610,15 +607,16 @@ class UploadFile(Resource):
|
||||
f"Error extracting zip: {e}", exc_info=True
|
||||
)
|
||||
# If zip extraction fails, save the original zip file
|
||||
|
||||
file_path = f"{base_path}/{safe_file}"
|
||||
with open(temp_file_path, "rb") as f:
|
||||
storage.save_file(f, file_path)
|
||||
else:
|
||||
# For non-zip files, save directly
|
||||
|
||||
file_path = f"{base_path}/{safe_file}"
|
||||
with open(temp_file_path, "rb") as f:
|
||||
storage.save_file(f, file_path)
|
||||
|
||||
task = ingest.delay(
|
||||
settings.UPLOAD_FOLDER,
|
||||
[
|
||||
@@ -690,7 +688,6 @@ class ManageSourceFiles(Resource):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Unauthorized"}), 401
|
||||
)
|
||||
|
||||
user = decoded_token.get("sub")
|
||||
source_id = request.form.get("source_id")
|
||||
operation = request.form.get("operation")
|
||||
@@ -705,7 +702,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
if operation not in ["add", "remove", "remove_directory"]:
|
||||
return make_response(
|
||||
jsonify(
|
||||
@@ -716,14 +712,12 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
try:
|
||||
ObjectId(source_id)
|
||||
except Exception:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Invalid source ID format"}), 400
|
||||
)
|
||||
|
||||
try:
|
||||
source = sources_collection.find_one(
|
||||
{"_id": ObjectId(source_id), "user": user}
|
||||
@@ -743,7 +737,6 @@ class ManageSourceFiles(Resource):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Database error"}), 500
|
||||
)
|
||||
|
||||
try:
|
||||
storage = StorageCreator.get_storage()
|
||||
source_file_path = source.get("file_path", "")
|
||||
@@ -756,7 +749,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
if operation == "add":
|
||||
files = request.files.getlist("file")
|
||||
if not files or all(file.filename == "" for file in files):
|
||||
@@ -769,23 +761,22 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
added_files = []
|
||||
|
||||
target_dir = source_file_path
|
||||
if parent_dir:
|
||||
target_dir = f"{source_file_path}/{parent_dir}"
|
||||
|
||||
for file in files:
|
||||
if file.filename:
|
||||
safe_filename_str = safe_filename(file.filename)
|
||||
file_path = f"{target_dir}/{safe_filename_str}"
|
||||
|
||||
# Save file to storage
|
||||
|
||||
storage.save_file(file, file_path)
|
||||
added_files.append(safe_filename_str)
|
||||
|
||||
# Trigger re-ingestion pipeline
|
||||
|
||||
from application.api.user.tasks import reingest_source_task
|
||||
|
||||
task = reingest_source_task.delay(source_id=source_id, user=user)
|
||||
@@ -802,7 +793,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
200,
|
||||
)
|
||||
|
||||
elif operation == "remove":
|
||||
file_paths_str = request.form.get("file_paths")
|
||||
if not file_paths_str:
|
||||
@@ -815,7 +805,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
try:
|
||||
file_paths = (
|
||||
json.loads(file_paths_str)
|
||||
@@ -829,18 +818,19 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
# Remove files from storage and directory structure
|
||||
|
||||
removed_files = []
|
||||
for file_path in file_paths:
|
||||
full_path = f"{source_file_path}/{file_path}"
|
||||
|
||||
# Remove from storage
|
||||
|
||||
if storage.file_exists(full_path):
|
||||
storage.delete_file(full_path)
|
||||
removed_files.append(file_path)
|
||||
|
||||
# Trigger re-ingestion pipeline
|
||||
|
||||
from application.api.user.tasks import reingest_source_task
|
||||
|
||||
task = reingest_source_task.delay(source_id=source_id, user=user)
|
||||
@@ -856,7 +846,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
200,
|
||||
)
|
||||
|
||||
elif operation == "remove_directory":
|
||||
directory_path = request.form.get("directory_path")
|
||||
if not directory_path:
|
||||
@@ -869,8 +858,8 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
# Validate directory path (prevent path traversal)
|
||||
|
||||
if directory_path.startswith("/") or ".." in directory_path:
|
||||
current_app.logger.warning(
|
||||
f"Invalid directory path attempted for removal. "
|
||||
@@ -882,7 +871,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
full_directory_path = (
|
||||
f"{source_file_path}/{directory_path}"
|
||||
if directory_path
|
||||
@@ -904,7 +892,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
404,
|
||||
)
|
||||
|
||||
success = storage.remove_directory(full_directory_path)
|
||||
|
||||
if not success:
|
||||
@@ -919,7 +906,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
500,
|
||||
)
|
||||
|
||||
current_app.logger.info(
|
||||
f"Successfully removed directory. "
|
||||
f"User: {user}, Source ID: {source_id}, Directory path: {directory_path}, "
|
||||
@@ -927,6 +913,7 @@ class ManageSourceFiles(Resource):
|
||||
)
|
||||
|
||||
# Trigger re-ingestion pipeline
|
||||
|
||||
from application.api.user.tasks import reingest_source_task
|
||||
|
||||
task = reingest_source_task.delay(source_id=source_id, user=user)
|
||||
@@ -942,7 +929,6 @@ class ManageSourceFiles(Resource):
|
||||
),
|
||||
200,
|
||||
)
|
||||
|
||||
except Exception as err:
|
||||
error_context = f"operation={operation}, user={user}, source_id={source_id}"
|
||||
if operation == "remove_directory":
|
||||
@@ -954,7 +940,6 @@ class ManageSourceFiles(Resource):
|
||||
elif operation == "add":
|
||||
parent_dir = request.form.get("parent_dir", "")
|
||||
error_context += f", parent_dir={parent_dir}"
|
||||
|
||||
current_app.logger.error(
|
||||
f"Error managing source files: {err} ({error_context})", exc_info=True
|
||||
)
|
||||
@@ -1001,6 +986,50 @@ class UploadRemote(Resource):
|
||||
source_data = config.get("url")
|
||||
elif data["source"] == "reddit":
|
||||
source_data = config
|
||||
elif data["source"] in ConnectorCreator.get_supported_connectors():
|
||||
session_token = config.get("session_token")
|
||||
if not session_token:
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": False,
|
||||
"error": f"Missing session_token in {data['source']} configuration",
|
||||
}
|
||||
),
|
||||
400,
|
||||
)
|
||||
# Process file_ids
|
||||
|
||||
file_ids = config.get("file_ids", [])
|
||||
if isinstance(file_ids, str):
|
||||
file_ids = [id.strip() for id in file_ids.split(",") if id.strip()]
|
||||
elif not isinstance(file_ids, list):
|
||||
file_ids = []
|
||||
# Process folder_ids
|
||||
|
||||
folder_ids = config.get("folder_ids", [])
|
||||
if isinstance(folder_ids, str):
|
||||
folder_ids = [
|
||||
id.strip() for id in folder_ids.split(",") if id.strip()
|
||||
]
|
||||
elif not isinstance(folder_ids, list):
|
||||
folder_ids = []
|
||||
config["file_ids"] = file_ids
|
||||
config["folder_ids"] = folder_ids
|
||||
|
||||
task = ingest_connector_task.delay(
|
||||
job_name=data["name"],
|
||||
user=decoded_token.get("sub"),
|
||||
source_type=data["source"],
|
||||
session_token=session_token,
|
||||
file_ids=file_ids,
|
||||
folder_ids=folder_ids,
|
||||
recursive=config.get("recursive", False),
|
||||
retriever=config.get("retriever", "classic"),
|
||||
)
|
||||
return make_response(
|
||||
jsonify({"success": True, "task_id": task.id}), 200
|
||||
)
|
||||
task = ingest_remote.delay(
|
||||
source_data=source_data,
|
||||
job_name=data["name"],
|
||||
@@ -1109,6 +1138,7 @@ class PaginatedSources(Resource):
|
||||
"retriever": doc.get("retriever", "classic"),
|
||||
"syncFrequency": doc.get("sync_frequency", ""),
|
||||
"isNested": bool(doc.get("directory_structure")),
|
||||
"type": doc.get("type", "file"),
|
||||
}
|
||||
paginated_docs.append(doc_data)
|
||||
response = {
|
||||
@@ -1157,6 +1187,9 @@ class CombinedJson(Resource):
|
||||
"retriever": index.get("retriever", "classic"),
|
||||
"syncFrequency": index.get("sync_frequency", ""),
|
||||
"is_nested": bool(index.get("directory_structure")),
|
||||
"type": index.get(
|
||||
"type", "file"
|
||||
), # Add type field with default "file"
|
||||
}
|
||||
)
|
||||
except Exception as err:
|
||||
@@ -1372,17 +1405,14 @@ class GetAgent(Resource):
|
||||
def get(self):
|
||||
if not (decoded_token := request.decoded_token):
|
||||
return {"success": False}, 401
|
||||
|
||||
if not (agent_id := request.args.get("id")):
|
||||
return {"success": False, "message": "ID required"}, 400
|
||||
|
||||
try:
|
||||
agent = agents_collection.find_one(
|
||||
{"_id": ObjectId(agent_id), "user": decoded_token["sub"]}
|
||||
)
|
||||
if not agent:
|
||||
return {"status": "Not found"}, 404
|
||||
|
||||
data = {
|
||||
"id": str(agent["_id"]),
|
||||
"name": agent["name"],
|
||||
@@ -1428,7 +1458,6 @@ class GetAgent(Resource):
|
||||
"shared_token": agent.get("shared_token", ""),
|
||||
}
|
||||
return make_response(jsonify(data), 200)
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Agent fetch error: {e}", exc_info=True)
|
||||
return {"success": False}, 400
|
||||
@@ -1440,7 +1469,6 @@ class GetAgents(Resource):
|
||||
def get(self):
|
||||
if not (decoded_token := request.decoded_token):
|
||||
return {"success": False}, 401
|
||||
|
||||
user = decoded_token.get("sub")
|
||||
try:
|
||||
user_doc = ensure_user_doc(user)
|
||||
@@ -1501,7 +1529,6 @@ class GetAgents(Resource):
|
||||
for agent in agents
|
||||
if "source" in agent or "retriever" in agent
|
||||
]
|
||||
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error retrieving agents: {err}", exc_info=True)
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
@@ -1573,9 +1600,11 @@ class CreateAgent(Resource):
|
||||
print(f"Received data: {data}")
|
||||
|
||||
# Validate JSON schema if provided
|
||||
|
||||
if data.get("json_schema"):
|
||||
try:
|
||||
# Basic validation - ensure it's a valid JSON structure
|
||||
|
||||
json_schema = data.get("json_schema")
|
||||
if not isinstance(json_schema, dict):
|
||||
return make_response(
|
||||
@@ -1587,8 +1616,8 @@ class CreateAgent(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
# Validate that it has either a 'schema' property or is itself a schema
|
||||
|
||||
if "schema" not in json_schema and "type" not in json_schema:
|
||||
return make_response(
|
||||
jsonify(
|
||||
@@ -1606,7 +1635,6 @@ class CreateAgent(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
if data.get("status") not in ["draft", "published"]:
|
||||
return make_response(
|
||||
jsonify(
|
||||
@@ -1617,7 +1645,6 @@ class CreateAgent(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
if data.get("status") == "published":
|
||||
required_fields = [
|
||||
"name",
|
||||
@@ -1628,6 +1655,7 @@ class CreateAgent(Resource):
|
||||
"agent_type",
|
||||
]
|
||||
# Require either source or sources (but not both)
|
||||
|
||||
if not data.get("source") and not data.get("sources"):
|
||||
return make_response(
|
||||
jsonify(
|
||||
@@ -1648,13 +1676,11 @@ class CreateAgent(Resource):
|
||||
return missing_fields
|
||||
if invalid_fields:
|
||||
return invalid_fields
|
||||
|
||||
image_url, error = handle_image_upload(request, "", user, storage)
|
||||
if error:
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Image upload failed"}), 400
|
||||
)
|
||||
|
||||
try:
|
||||
key = str(uuid.uuid4()) if data.get("status") == "published" else ""
|
||||
|
||||
@@ -1674,7 +1700,6 @@ class CreateAgent(Resource):
|
||||
source_field = DBRef("sources", ObjectId(source_value))
|
||||
else:
|
||||
source_field = ""
|
||||
|
||||
new_agent = {
|
||||
"user": user,
|
||||
"name": data.get("name"),
|
||||
@@ -1772,7 +1797,6 @@ class UpdateAgent(Resource):
|
||||
data["json_schema"] = json.loads(data["json_schema"])
|
||||
except json.JSONDecodeError:
|
||||
data["json_schema"] = None
|
||||
|
||||
if not ObjectId.is_valid(agent_id):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Invalid agent ID format"}), 400
|
||||
@@ -1796,7 +1820,6 @@ class UpdateAgent(Resource):
|
||||
),
|
||||
404,
|
||||
)
|
||||
|
||||
image_url, error = handle_image_upload(
|
||||
request, existing_agent.get("image", ""), user, storage
|
||||
)
|
||||
@@ -1804,7 +1827,6 @@ class UpdateAgent(Resource):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Image upload failed"}), 400
|
||||
)
|
||||
|
||||
update_fields = {}
|
||||
allowed_fields = [
|
||||
"name",
|
||||
@@ -1837,6 +1859,7 @@ class UpdateAgent(Resource):
|
||||
source_id = data.get("source")
|
||||
if source_id == "default":
|
||||
# Handle special "default" source
|
||||
|
||||
update_fields[field] = "default"
|
||||
elif source_id and ObjectId.is_valid(source_id):
|
||||
update_fields[field] = DBRef("sources", ObjectId(source_id))
|
||||
@@ -1941,7 +1964,6 @@ class UpdateAgent(Resource):
|
||||
),
|
||||
400,
|
||||
)
|
||||
|
||||
if not existing_agent.get("key"):
|
||||
newly_generated_key = str(uuid.uuid4())
|
||||
update_fields["key"] = newly_generated_key
|
||||
@@ -2028,7 +2050,6 @@ class PinnedAgents(Resource):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
|
||||
user_id = decoded_token.get("sub")
|
||||
|
||||
try:
|
||||
@@ -2037,7 +2058,6 @@ class PinnedAgents(Resource):
|
||||
|
||||
if not pinned_ids:
|
||||
return make_response(jsonify([]), 200)
|
||||
|
||||
pinned_object_ids = [ObjectId(agent_id) for agent_id in pinned_ids]
|
||||
|
||||
pinned_agents_cursor = agents_collection.find(
|
||||
@@ -2047,6 +2067,7 @@ class PinnedAgents(Resource):
|
||||
existing_ids = {str(agent["_id"]) for agent in pinned_agents}
|
||||
|
||||
# Clean up any stale pinned IDs
|
||||
|
||||
stale_ids = [
|
||||
agent_id for agent_id in pinned_ids if agent_id not in existing_ids
|
||||
]
|
||||
@@ -2055,7 +2076,6 @@ class PinnedAgents(Resource):
|
||||
{"user_id": user_id},
|
||||
{"$pullAll": {"agent_preferences.pinned": stale_ids}},
|
||||
)
|
||||
|
||||
list_pinned_agents = [
|
||||
{
|
||||
"id": str(agent["_id"]),
|
||||
@@ -2092,11 +2112,9 @@ class PinnedAgents(Resource):
|
||||
for agent in pinned_agents
|
||||
if "source" in agent or "retriever" in agent
|
||||
]
|
||||
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error retrieving pinned agents: {err}")
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
|
||||
return make_response(jsonify(list_pinned_agents), 200)
|
||||
|
||||
|
||||
@@ -2160,7 +2178,6 @@ class RemoveSharedAgent(Resource):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "ID is required"}), 400
|
||||
)
|
||||
|
||||
try:
|
||||
agent = agents_collection.find_one(
|
||||
{"_id": ObjectId(agent_id), "shared_publicly": True}
|
||||
@@ -2170,7 +2187,6 @@ class RemoveSharedAgent(Resource):
|
||||
jsonify({"success": False, "message": "Shared agent not found"}),
|
||||
404,
|
||||
)
|
||||
|
||||
ensure_user_doc(user_id)
|
||||
users_collection.update_one(
|
||||
{"user_id": user_id},
|
||||
@@ -2183,7 +2199,6 @@ class RemoveSharedAgent(Resource):
|
||||
)
|
||||
|
||||
return make_response(jsonify({"success": True, "action": "removed"}), 200)
|
||||
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error removing shared agent: {err}")
|
||||
return make_response(
|
||||
@@ -2206,7 +2221,6 @@ class SharedAgent(Resource):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Token or ID is required"}), 400
|
||||
)
|
||||
|
||||
try:
|
||||
query = {
|
||||
"shared_publicly": True,
|
||||
@@ -2218,7 +2232,6 @@ class SharedAgent(Resource):
|
||||
jsonify({"success": False, "message": "Shared agent not found"}),
|
||||
404,
|
||||
)
|
||||
|
||||
agent_id = str(shared_agent["_id"])
|
||||
data = {
|
||||
"id": agent_id,
|
||||
@@ -2230,7 +2243,12 @@ class SharedAgent(Resource):
|
||||
else ""
|
||||
),
|
||||
"description": shared_agent.get("description", ""),
|
||||
"source": shared_agent.get("source", ""),
|
||||
"source": (
|
||||
str(source_doc["_id"])
|
||||
if isinstance(shared_agent.get("source"), DBRef)
|
||||
and (source_doc := db.dereference(shared_agent.get("source")))
|
||||
else ""
|
||||
),
|
||||
"chunks": shared_agent.get("chunks", "0"),
|
||||
"retriever": shared_agent.get("retriever", "classic"),
|
||||
"prompt_id": shared_agent.get("prompt_id", "default"),
|
||||
@@ -2253,7 +2271,6 @@ class SharedAgent(Resource):
|
||||
if tool_data:
|
||||
enriched_tools.append(tool_data.get("name", ""))
|
||||
data["tools"] = enriched_tools
|
||||
|
||||
decoded_token = getattr(request, "decoded_token", None)
|
||||
if decoded_token:
|
||||
user_id = decoded_token.get("sub")
|
||||
@@ -2265,9 +2282,7 @@ class SharedAgent(Resource):
|
||||
{"user_id": user_id},
|
||||
{"$addToSet": {"agent_preferences.shared_with_me": agent_id}},
|
||||
)
|
||||
|
||||
return make_response(jsonify(data), 200)
|
||||
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error retrieving shared agent: {err}")
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
@@ -2301,7 +2316,6 @@ class SharedAgents(Resource):
|
||||
{"user_id": user_id},
|
||||
{"$pullAll": {"agent_preferences.shared_with_me": stale_ids}},
|
||||
)
|
||||
|
||||
pinned_ids = set(user_doc.get("agent_preferences", {}).get("pinned", []))
|
||||
|
||||
list_shared_agents = [
|
||||
@@ -2328,7 +2342,6 @@ class SharedAgents(Resource):
|
||||
]
|
||||
|
||||
return make_response(jsonify(list_shared_agents), 200)
|
||||
|
||||
except Exception as err:
|
||||
current_app.logger.error(f"Error retrieving shared agents: {err}")
|
||||
return make_response(jsonify({"success": False}), 400)
|
||||
@@ -3808,22 +3821,22 @@ class GetChunks(Resource):
|
||||
metadata = chunk.get("metadata", {})
|
||||
|
||||
# Filter by path if provided
|
||||
|
||||
if path:
|
||||
chunk_source = metadata.get("source", "")
|
||||
# Check if the chunk's source matches the requested path
|
||||
|
||||
if not chunk_source or not chunk_source.endswith(path):
|
||||
continue
|
||||
|
||||
# Filter by search term if provided
|
||||
|
||||
if search_term:
|
||||
text_match = search_term in chunk.get("text", "").lower()
|
||||
title_match = search_term in metadata.get("title", "").lower()
|
||||
|
||||
if not (text_match or title_match):
|
||||
continue
|
||||
|
||||
filtered_chunks.append(chunk)
|
||||
|
||||
chunks = filtered_chunks
|
||||
|
||||
total_chunks = len(chunks)
|
||||
@@ -3983,7 +3996,6 @@ class UpdateChunk(Resource):
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
metadata["token_count"] = token_count
|
||||
|
||||
if not ObjectId.is_valid(doc_id):
|
||||
return make_response(jsonify({"error": "Invalid doc_id"}), 400)
|
||||
doc = sources_collection.find_one({"_id": ObjectId(doc_id), "user": user})
|
||||
@@ -3998,7 +4010,6 @@ class UpdateChunk(Resource):
|
||||
existing_chunk = next((c for c in chunks if c["doc_id"] == chunk_id), None)
|
||||
if not existing_chunk:
|
||||
return make_response(jsonify({"error": "Chunk not found"}), 404)
|
||||
|
||||
new_text = text if text is not None else existing_chunk["text"]
|
||||
|
||||
if metadata is not None:
|
||||
@@ -4006,10 +4017,8 @@ class UpdateChunk(Resource):
|
||||
new_metadata.update(metadata)
|
||||
else:
|
||||
new_metadata = existing_chunk["metadata"].copy()
|
||||
|
||||
if text is not None:
|
||||
new_metadata["token_count"] = num_tokens_from_string(new_text)
|
||||
|
||||
try:
|
||||
new_chunk_id = store.add_chunk(new_text, new_metadata)
|
||||
|
||||
@@ -4018,7 +4027,6 @@ class UpdateChunk(Resource):
|
||||
current_app.logger.warning(
|
||||
f"Failed to delete old chunk {chunk_id}, but new chunk {new_chunk_id} was created"
|
||||
)
|
||||
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
@@ -4065,7 +4073,6 @@ class StoreAttachment(Resource):
|
||||
jsonify({"status": "error", "message": "Missing file"}),
|
||||
400,
|
||||
)
|
||||
|
||||
user = None
|
||||
if decoded_token:
|
||||
user = safe_filename(decoded_token.get("sub"))
|
||||
@@ -4080,7 +4087,6 @@ class StoreAttachment(Resource):
|
||||
return make_response(
|
||||
jsonify({"success": False, "message": "Authentication required"}), 401
|
||||
)
|
||||
|
||||
try:
|
||||
attachment_id = ObjectId()
|
||||
original_filename = safe_filename(os.path.basename(file.filename))
|
||||
@@ -4122,7 +4128,6 @@ class ServeImage(Resource):
|
||||
content_type = f"image/{extension}"
|
||||
if extension == "jpg":
|
||||
content_type = "image/jpeg"
|
||||
|
||||
response = make_response(file_obj.read())
|
||||
response.headers.set("Content-Type", content_type)
|
||||
response.headers.set("Cache-Control", "max-age=86400")
|
||||
@@ -4149,36 +4154,43 @@ class DirectoryStructure(Resource):
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
|
||||
user = decoded_token.get("sub")
|
||||
doc_id = request.args.get("id")
|
||||
|
||||
if not doc_id:
|
||||
return make_response(jsonify({"error": "Document ID is required"}), 400)
|
||||
|
||||
if not ObjectId.is_valid(doc_id):
|
||||
return make_response(jsonify({"error": "Invalid document ID"}), 400)
|
||||
|
||||
try:
|
||||
doc = sources_collection.find_one({"_id": ObjectId(doc_id), "user": user})
|
||||
if not doc:
|
||||
return make_response(
|
||||
jsonify({"error": "Document not found or access denied"}), 404
|
||||
)
|
||||
|
||||
directory_structure = doc.get("directory_structure", {})
|
||||
base_path = doc.get("file_path", "")
|
||||
|
||||
provider = None
|
||||
remote_data = doc.get("remote_data")
|
||||
try:
|
||||
if isinstance(remote_data, str) and remote_data:
|
||||
remote_data_obj = json.loads(remote_data)
|
||||
provider = remote_data_obj.get("provider")
|
||||
except Exception as e:
|
||||
current_app.logger.warning(
|
||||
f"Failed to parse remote_data for doc {doc_id}: {e}"
|
||||
)
|
||||
return make_response(
|
||||
jsonify(
|
||||
{
|
||||
"success": True,
|
||||
"directory_structure": directory_structure,
|
||||
"base_path": doc.get("file_path", ""),
|
||||
"base_path": base_path,
|
||||
"provider": provider,
|
||||
}
|
||||
),
|
||||
200,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(
|
||||
f"Error retrieving directory structure: {e}", exc_info=True
|
||||
|
||||
@@ -47,6 +47,39 @@ def process_agent_webhook(self, agent_id, payload):
|
||||
return resp
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def ingest_connector_task(
|
||||
self,
|
||||
job_name,
|
||||
user,
|
||||
source_type,
|
||||
session_token=None,
|
||||
file_ids=None,
|
||||
folder_ids=None,
|
||||
recursive=True,
|
||||
retriever="classic",
|
||||
operation_mode="upload",
|
||||
doc_id=None,
|
||||
sync_frequency="never"
|
||||
):
|
||||
from application.worker import ingest_connector
|
||||
resp = ingest_connector(
|
||||
self,
|
||||
job_name,
|
||||
user,
|
||||
source_type,
|
||||
session_token=session_token,
|
||||
file_ids=file_ids,
|
||||
folder_ids=folder_ids,
|
||||
recursive=recursive,
|
||||
retriever=retriever,
|
||||
operation_mode=operation_mode,
|
||||
doc_id=doc_id,
|
||||
sync_frequency=sync_frequency
|
||||
)
|
||||
return resp
|
||||
|
||||
|
||||
@celery.on_after_configure.connect
|
||||
def setup_periodic_tasks(sender, **kwargs):
|
||||
sender.add_periodic_task(
|
||||
|
||||
@@ -16,6 +16,7 @@ from application.api import api # noqa: E402
|
||||
from application.api.answer import answer # noqa: E402
|
||||
from application.api.internal.routes import internal # noqa: E402
|
||||
from application.api.user.routes import user # noqa: E402
|
||||
from application.api.connector.routes import connector # noqa: E402
|
||||
from application.celery_init import celery # noqa: E402
|
||||
from application.core.settings import settings # noqa: E402
|
||||
|
||||
@@ -30,6 +31,7 @@ app = Flask(__name__)
|
||||
app.register_blueprint(user)
|
||||
app.register_blueprint(answer)
|
||||
app.register_blueprint(internal)
|
||||
app.register_blueprint(connector)
|
||||
app.config.update(
|
||||
UPLOAD_FOLDER="inputs",
|
||||
CELERY_BROKER_URL=settings.CELERY_BROKER_URL,
|
||||
|
||||
@@ -40,6 +40,13 @@ class Settings(BaseSettings):
|
||||
FALLBACK_LLM_NAME: Optional[str] = None # model name for fallback llm
|
||||
FALLBACK_LLM_API_KEY: Optional[str] = None # api key for fallback llm
|
||||
|
||||
# Google Drive integration
|
||||
GOOGLE_CLIENT_ID: Optional[str] = None # Replace with your actual Google OAuth client ID
|
||||
GOOGLE_CLIENT_SECRET: Optional[str] = None# Replace with your actual Google OAuth client secret
|
||||
CONNECTOR_REDIRECT_BASE_URI: Optional[str] = "http://127.0.0.1:7091/api/connectors/callback"
|
||||
##append ?provider={provider_name} in your Provider console like http://127.0.0.1:7091/api/connectors/callback?provider=google_drive
|
||||
|
||||
|
||||
# LLM Cache
|
||||
CACHE_REDIS_URL: str = "redis://localhost:6379/2"
|
||||
|
||||
|
||||
18
application/parser/connectors/__init__.py
Normal file
18
application/parser/connectors/__init__.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""
|
||||
External knowledge base connectors for DocsGPT.
|
||||
|
||||
This module contains connectors for external knowledge bases and document storage systems
|
||||
that require authentication and specialized handling, separate from simple web scrapers.
|
||||
"""
|
||||
|
||||
from .base import BaseConnectorAuth, BaseConnectorLoader
|
||||
from .connector_creator import ConnectorCreator
|
||||
from .google_drive import GoogleDriveAuth, GoogleDriveLoader
|
||||
|
||||
__all__ = [
|
||||
'BaseConnectorAuth',
|
||||
'BaseConnectorLoader',
|
||||
'ConnectorCreator',
|
||||
'GoogleDriveAuth',
|
||||
'GoogleDriveLoader'
|
||||
]
|
||||
129
application/parser/connectors/base.py
Normal file
129
application/parser/connectors/base.py
Normal file
@@ -0,0 +1,129 @@
|
||||
"""
|
||||
Base classes for external knowledge base connectors.
|
||||
|
||||
This module provides minimal abstract base classes that define the essential
|
||||
interface for external knowledge base connectors.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
class BaseConnectorAuth(ABC):
|
||||
"""
|
||||
Abstract base class for connector authentication.
|
||||
|
||||
Defines the minimal interface that all connector authentication
|
||||
implementations must follow.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_authorization_url(self, state: Optional[str] = None) -> str:
|
||||
"""
|
||||
Generate authorization URL for OAuth flows.
|
||||
|
||||
Args:
|
||||
state: Optional state parameter for CSRF protection
|
||||
|
||||
Returns:
|
||||
Authorization URL
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Exchange authorization code for access tokens.
|
||||
|
||||
Args:
|
||||
authorization_code: Authorization code from OAuth callback
|
||||
|
||||
Returns:
|
||||
Dictionary containing token information
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Refresh an expired access token.
|
||||
|
||||
Args:
|
||||
refresh_token: Refresh token
|
||||
|
||||
Returns:
|
||||
Dictionary containing refreshed token information
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def is_token_expired(self, token_info: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Check if a token is expired.
|
||||
|
||||
Args:
|
||||
token_info: Token information dictionary
|
||||
|
||||
Returns:
|
||||
True if token is expired, False otherwise
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class BaseConnectorLoader(ABC):
|
||||
"""
|
||||
Abstract base class for connector loaders.
|
||||
|
||||
Defines the minimal interface that all connector loader
|
||||
implementations must follow.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def __init__(self, session_token: str):
|
||||
"""
|
||||
Initialize the connector loader.
|
||||
|
||||
Args:
|
||||
session_token: Authentication session token
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def load_data(self, inputs: Dict[str, Any]) -> List[Document]:
|
||||
"""
|
||||
Load documents from the external knowledge base.
|
||||
|
||||
Args:
|
||||
inputs: Configuration dictionary containing:
|
||||
- file_ids: Optional list of specific file IDs to load
|
||||
- folder_ids: Optional list of folder IDs to browse/download
|
||||
- limit: Maximum number of items to return
|
||||
- list_only: If True, return metadata without content
|
||||
- recursive: Whether to recursively process folders
|
||||
|
||||
Returns:
|
||||
List of Document objects
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def download_to_directory(self, local_dir: str, source_config: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Download files/folders to a local directory.
|
||||
|
||||
Args:
|
||||
local_dir: Local directory path to download files to
|
||||
source_config: Configuration for what to download
|
||||
|
||||
Returns:
|
||||
Dictionary containing download results:
|
||||
- files_downloaded: Number of files downloaded
|
||||
- directory_path: Path where files were downloaded
|
||||
- empty_result: Whether no files were downloaded
|
||||
- source_type: Type of connector
|
||||
- config_used: Configuration that was used
|
||||
- error: Error message if download failed (optional)
|
||||
"""
|
||||
pass
|
||||
81
application/parser/connectors/connector_creator.py
Normal file
81
application/parser/connectors/connector_creator.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from application.parser.connectors.google_drive.loader import GoogleDriveLoader
|
||||
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
|
||||
|
||||
|
||||
class ConnectorCreator:
|
||||
"""
|
||||
Factory class for creating external knowledge base connectors and auth providers.
|
||||
|
||||
These are different from remote loaders as they typically require
|
||||
authentication and connect to external document storage systems.
|
||||
"""
|
||||
|
||||
connectors = {
|
||||
"google_drive": GoogleDriveLoader,
|
||||
}
|
||||
|
||||
auth_providers = {
|
||||
"google_drive": GoogleDriveAuth,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def create_connector(cls, connector_type, *args, **kwargs):
|
||||
"""
|
||||
Create a connector instance for the specified type.
|
||||
|
||||
Args:
|
||||
connector_type: Type of connector to create (e.g., 'google_drive')
|
||||
*args, **kwargs: Arguments to pass to the connector constructor
|
||||
|
||||
Returns:
|
||||
Connector instance
|
||||
|
||||
Raises:
|
||||
ValueError: If connector type is not supported
|
||||
"""
|
||||
connector_class = cls.connectors.get(connector_type.lower())
|
||||
if not connector_class:
|
||||
raise ValueError(f"No connector class found for type {connector_type}")
|
||||
return connector_class(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def create_auth(cls, connector_type):
|
||||
"""
|
||||
Create an auth provider instance for the specified connector type.
|
||||
|
||||
Args:
|
||||
connector_type: Type of connector auth to create (e.g., 'google_drive')
|
||||
|
||||
Returns:
|
||||
Auth provider instance
|
||||
|
||||
Raises:
|
||||
ValueError: If connector type is not supported for auth
|
||||
"""
|
||||
auth_class = cls.auth_providers.get(connector_type.lower())
|
||||
if not auth_class:
|
||||
raise ValueError(f"No auth class found for type {connector_type}")
|
||||
return auth_class()
|
||||
|
||||
@classmethod
|
||||
def get_supported_connectors(cls):
|
||||
"""
|
||||
Get list of supported connector types.
|
||||
|
||||
Returns:
|
||||
List of supported connector type strings
|
||||
"""
|
||||
return list(cls.connectors.keys())
|
||||
|
||||
@classmethod
|
||||
def is_supported(cls, connector_type):
|
||||
"""
|
||||
Check if a connector type is supported.
|
||||
|
||||
Args:
|
||||
connector_type: Type of connector to check
|
||||
|
||||
Returns:
|
||||
True if supported, False otherwise
|
||||
"""
|
||||
return connector_type.lower() in cls.connectors
|
||||
10
application/parser/connectors/google_drive/__init__.py
Normal file
10
application/parser/connectors/google_drive/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""
|
||||
Google Drive connector for DocsGPT.
|
||||
|
||||
This module provides authentication and document loading capabilities for Google Drive.
|
||||
"""
|
||||
|
||||
from .auth import GoogleDriveAuth
|
||||
from .loader import GoogleDriveLoader
|
||||
|
||||
__all__ = ['GoogleDriveAuth', 'GoogleDriveLoader']
|
||||
268
application/parser/connectors/google_drive/auth.py
Normal file
268
application/parser/connectors/google_drive/auth.py
Normal file
@@ -0,0 +1,268 @@
|
||||
import logging
|
||||
import datetime
|
||||
from typing import Optional, Dict, Any
|
||||
|
||||
from google.oauth2.credentials import Credentials
|
||||
from google_auth_oauthlib.flow import Flow
|
||||
from googleapiclient.discovery import build
|
||||
from googleapiclient.errors import HttpError
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.parser.connectors.base import BaseConnectorAuth
|
||||
|
||||
|
||||
class GoogleDriveAuth(BaseConnectorAuth):
|
||||
"""
|
||||
Handles Google OAuth 2.0 authentication for Google Drive access.
|
||||
"""
|
||||
|
||||
SCOPES = [
|
||||
'https://www.googleapis.com/auth/drive.readonly',
|
||||
'https://www.googleapis.com/auth/drive.metadata.readonly'
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self.client_id = settings.GOOGLE_CLIENT_ID
|
||||
self.client_secret = settings.GOOGLE_CLIENT_SECRET
|
||||
self.redirect_uri = f"{settings.CONNECTOR_REDIRECT_BASE_URI}?provider=google_drive"
|
||||
|
||||
if not self.client_id or not self.client_secret:
|
||||
raise ValueError("Google OAuth credentials not configured. Please set GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET in settings.")
|
||||
|
||||
|
||||
|
||||
def get_authorization_url(self, state: Optional[str] = None) -> str:
|
||||
try:
|
||||
flow = Flow.from_client_config(
|
||||
{
|
||||
"web": {
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"redirect_uris": [self.redirect_uri]
|
||||
}
|
||||
},
|
||||
scopes=self.SCOPES
|
||||
)
|
||||
flow.redirect_uri = self.redirect_uri
|
||||
|
||||
authorization_url, _ = flow.authorization_url(
|
||||
access_type='offline',
|
||||
prompt='consent',
|
||||
include_granted_scopes='true',
|
||||
state=state
|
||||
)
|
||||
|
||||
return authorization_url
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error generating authorization URL: {e}")
|
||||
raise
|
||||
|
||||
def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
|
||||
try:
|
||||
if not authorization_code:
|
||||
raise ValueError("Authorization code is required")
|
||||
|
||||
flow = Flow.from_client_config(
|
||||
{
|
||||
"web": {
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"redirect_uris": [self.redirect_uri]
|
||||
}
|
||||
},
|
||||
scopes=self.SCOPES
|
||||
)
|
||||
flow.redirect_uri = self.redirect_uri
|
||||
|
||||
flow.fetch_token(code=authorization_code)
|
||||
|
||||
credentials = flow.credentials
|
||||
|
||||
if not credentials.refresh_token:
|
||||
logging.warning("OAuth flow did not return a refresh_token.")
|
||||
if not credentials.token:
|
||||
raise ValueError("OAuth flow did not return an access token")
|
||||
|
||||
if not credentials.token_uri:
|
||||
credentials.token_uri = "https://oauth2.googleapis.com/token"
|
||||
|
||||
if not credentials.client_id:
|
||||
credentials.client_id = self.client_id
|
||||
|
||||
if not credentials.client_secret:
|
||||
credentials.client_secret = self.client_secret
|
||||
|
||||
if not credentials.refresh_token:
|
||||
raise ValueError(
|
||||
"No refresh token received. This typically happens when offline access wasn't granted. "
|
||||
)
|
||||
|
||||
return {
|
||||
'access_token': credentials.token,
|
||||
'refresh_token': credentials.refresh_token,
|
||||
'token_uri': credentials.token_uri,
|
||||
'client_id': credentials.client_id,
|
||||
'client_secret': credentials.client_secret,
|
||||
'scopes': credentials.scopes,
|
||||
'expiry': credentials.expiry.isoformat() if credentials.expiry else None
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error exchanging code for tokens: {e}")
|
||||
raise
|
||||
|
||||
def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
|
||||
try:
|
||||
if not refresh_token:
|
||||
raise ValueError("Refresh token is required")
|
||||
|
||||
credentials = Credentials(
|
||||
token=None,
|
||||
refresh_token=refresh_token,
|
||||
token_uri="https://oauth2.googleapis.com/token",
|
||||
client_id=self.client_id,
|
||||
client_secret=self.client_secret
|
||||
)
|
||||
|
||||
from google.auth.transport.requests import Request
|
||||
credentials.refresh(Request())
|
||||
|
||||
return {
|
||||
'access_token': credentials.token,
|
||||
'refresh_token': refresh_token,
|
||||
'token_uri': credentials.token_uri,
|
||||
'client_id': credentials.client_id,
|
||||
'client_secret': credentials.client_secret,
|
||||
'scopes': credentials.scopes,
|
||||
'expiry': credentials.expiry.isoformat() if credentials.expiry else None
|
||||
}
|
||||
except Exception as e:
|
||||
logging.error(f"Error refreshing access token: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def create_credentials_from_token_info(self, token_info: Dict[str, Any]) -> Credentials:
|
||||
from application.core.settings import settings
|
||||
|
||||
access_token = token_info.get('access_token')
|
||||
if not access_token:
|
||||
raise ValueError("No access token found in token_info")
|
||||
|
||||
credentials = Credentials(
|
||||
token=access_token,
|
||||
refresh_token=token_info.get('refresh_token'),
|
||||
token_uri= 'https://oauth2.googleapis.com/token',
|
||||
client_id=settings.GOOGLE_CLIENT_ID,
|
||||
client_secret=settings.GOOGLE_CLIENT_SECRET,
|
||||
scopes=token_info.get('scopes', ['https://www.googleapis.com/auth/drive.readonly'])
|
||||
)
|
||||
|
||||
if not credentials.token:
|
||||
raise ValueError("Credentials created without valid access token")
|
||||
|
||||
return credentials
|
||||
|
||||
def build_drive_service(self, credentials: Credentials):
|
||||
try:
|
||||
if not credentials:
|
||||
raise ValueError("No credentials provided")
|
||||
|
||||
if not credentials.token and not credentials.refresh_token:
|
||||
raise ValueError("No access token or refresh token available. User must re-authorize with offline access.")
|
||||
|
||||
needs_refresh = credentials.expired or not credentials.token
|
||||
if needs_refresh:
|
||||
if credentials.refresh_token:
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
credentials.refresh(Request())
|
||||
except Exception as refresh_error:
|
||||
raise ValueError(f"Failed to refresh credentials: {refresh_error}")
|
||||
else:
|
||||
raise ValueError("No access token or refresh token available. User must re-authorize with offline access.")
|
||||
|
||||
return build('drive', 'v3', credentials=credentials)
|
||||
|
||||
except HttpError as e:
|
||||
raise ValueError(f"Failed to build Google Drive service: HTTP {e.resp.status}")
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to build Google Drive service: {str(e)}")
|
||||
|
||||
def is_token_expired(self, token_info):
|
||||
if 'expiry' in token_info and token_info['expiry']:
|
||||
try:
|
||||
from dateutil import parser
|
||||
# Google Drive provides timezone-aware ISO8601 dates
|
||||
expiry_dt = parser.parse(token_info['expiry'])
|
||||
current_time = datetime.datetime.now(datetime.timezone.utc)
|
||||
return current_time >= expiry_dt - datetime.timedelta(seconds=60)
|
||||
except Exception:
|
||||
return True
|
||||
|
||||
if 'access_token' in token_info and token_info['access_token']:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def get_token_info_from_session(self, session_token: str) -> Dict[str, Any]:
|
||||
try:
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
|
||||
mongo = MongoDB.get_client()
|
||||
db = mongo[settings.MONGO_DB_NAME]
|
||||
|
||||
sessions_collection = db["connector_sessions"]
|
||||
session = sessions_collection.find_one({"session_token": session_token})
|
||||
if not session:
|
||||
raise ValueError(f"Invalid session token: {session_token}")
|
||||
|
||||
if "token_info" not in session:
|
||||
raise ValueError("Session missing token information")
|
||||
|
||||
token_info = session["token_info"]
|
||||
if not token_info:
|
||||
raise ValueError("Invalid token information")
|
||||
|
||||
required_fields = ["access_token", "refresh_token"]
|
||||
missing_fields = [field for field in required_fields if field not in token_info or not token_info.get(field)]
|
||||
if missing_fields:
|
||||
raise ValueError(f"Missing required token fields: {missing_fields}")
|
||||
|
||||
if 'client_id' not in token_info:
|
||||
token_info['client_id'] = settings.GOOGLE_CLIENT_ID
|
||||
if 'client_secret' not in token_info:
|
||||
token_info['client_secret'] = settings.GOOGLE_CLIENT_SECRET
|
||||
if 'token_uri' not in token_info:
|
||||
token_info['token_uri'] = 'https://oauth2.googleapis.com/token'
|
||||
|
||||
return token_info
|
||||
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to retrieve Google Drive token information: {str(e)}")
|
||||
|
||||
def validate_credentials(self, credentials: Credentials) -> bool:
|
||||
"""
|
||||
Validate Google Drive credentials by making a test API call.
|
||||
|
||||
Args:
|
||||
credentials: Google credentials object
|
||||
|
||||
Returns:
|
||||
True if credentials are valid, False otherwise
|
||||
"""
|
||||
try:
|
||||
service = self.build_drive_service(credentials)
|
||||
service.about().get(fields="user").execute()
|
||||
return True
|
||||
|
||||
except HttpError as e:
|
||||
logging.error(f"HTTP error validating credentials: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logging.error(f"Error validating credentials: {e}")
|
||||
return False
|
||||
536
application/parser/connectors/google_drive/loader.py
Normal file
536
application/parser/connectors/google_drive/loader.py
Normal file
@@ -0,0 +1,536 @@
|
||||
"""
|
||||
Google Drive loader for DocsGPT.
|
||||
Loads documents from Google Drive using Google Drive API.
|
||||
"""
|
||||
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
from googleapiclient.http import MediaIoBaseDownload
|
||||
from googleapiclient.errors import HttpError
|
||||
|
||||
from application.parser.connectors.base import BaseConnectorLoader
|
||||
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
class GoogleDriveLoader(BaseConnectorLoader):
|
||||
|
||||
SUPPORTED_MIME_TYPES = {
|
||||
'application/pdf': '.pdf',
|
||||
'application/vnd.google-apps.document': '.docx',
|
||||
'application/vnd.google-apps.presentation': '.pptx',
|
||||
'application/vnd.google-apps.spreadsheet': '.xlsx',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
|
||||
'application/msword': '.doc',
|
||||
'application/vnd.ms-powerpoint': '.ppt',
|
||||
'application/vnd.ms-excel': '.xls',
|
||||
'text/plain': '.txt',
|
||||
'text/csv': '.csv',
|
||||
'text/html': '.html',
|
||||
'application/rtf': '.rtf',
|
||||
'image/jpeg': '.jpg',
|
||||
'image/jpg': '.jpg',
|
||||
'image/png': '.png',
|
||||
}
|
||||
|
||||
EXPORT_FORMATS = {
|
||||
'application/vnd.google-apps.document': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'application/vnd.google-apps.presentation': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'application/vnd.google-apps.spreadsheet': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
||||
}
|
||||
|
||||
def __init__(self, session_token: str):
|
||||
self.auth = GoogleDriveAuth()
|
||||
self.session_token = session_token
|
||||
|
||||
token_info = self.auth.get_token_info_from_session(session_token)
|
||||
self.credentials = self.auth.create_credentials_from_token_info(token_info)
|
||||
|
||||
try:
|
||||
self.service = self.auth.build_drive_service(self.credentials)
|
||||
except Exception as e:
|
||||
logging.warning(f"Could not build Google Drive service: {e}")
|
||||
self.service = None
|
||||
|
||||
self.next_page_token = None
|
||||
|
||||
|
||||
|
||||
def _process_file(self, file_metadata: Dict[str, Any], load_content: bool = True) -> Optional[Document]:
|
||||
try:
|
||||
file_id = file_metadata.get('id')
|
||||
file_name = file_metadata.get('name', 'Unknown')
|
||||
mime_type = file_metadata.get('mimeType', 'application/octet-stream')
|
||||
|
||||
if mime_type not in self.SUPPORTED_MIME_TYPES and not mime_type.startswith('application/vnd.google-apps.'):
|
||||
return None
|
||||
if mime_type not in self.SUPPORTED_MIME_TYPES and not mime_type.startswith('application/vnd.google-apps.'):
|
||||
logging.info(f"Skipping unsupported file type: {mime_type} for file {file_name}")
|
||||
return None
|
||||
# Google Drive provides timezone-aware ISO8601 dates
|
||||
doc_metadata = {
|
||||
'file_name': file_name,
|
||||
'mime_type': mime_type,
|
||||
'size': file_metadata.get('size', None),
|
||||
'created_time': file_metadata.get('createdTime'),
|
||||
'modified_time': file_metadata.get('modifiedTime'),
|
||||
'parents': file_metadata.get('parents', []),
|
||||
'source': 'google_drive'
|
||||
}
|
||||
|
||||
if not load_content:
|
||||
return Document(
|
||||
text="",
|
||||
doc_id=file_id,
|
||||
extra_info=doc_metadata
|
||||
)
|
||||
|
||||
content = self._download_file_content(file_id, mime_type)
|
||||
if content is None:
|
||||
logging.warning(f"Could not load content for file {file_name} ({file_id})")
|
||||
return None
|
||||
|
||||
return Document(
|
||||
text=content,
|
||||
doc_id=file_id,
|
||||
extra_info=doc_metadata
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error processing file: {e}")
|
||||
return None
|
||||
|
||||
def load_data(self, inputs: Dict[str, Any]) -> List[Document]:
|
||||
session_token = inputs.get('session_token')
|
||||
if session_token and session_token != self.session_token:
|
||||
logging.warning("Session token in inputs differs from loader's session token. Using loader's session token.")
|
||||
self.config = inputs
|
||||
|
||||
try:
|
||||
documents: List[Document] = []
|
||||
|
||||
folder_id = inputs.get('folder_id')
|
||||
file_ids = inputs.get('file_ids', [])
|
||||
limit = inputs.get('limit', 100)
|
||||
list_only = inputs.get('list_only', False)
|
||||
load_content = not list_only
|
||||
page_token = inputs.get('page_token')
|
||||
self.next_page_token = None
|
||||
|
||||
if file_ids:
|
||||
# Specific files requested: load them
|
||||
for file_id in file_ids:
|
||||
try:
|
||||
doc = self._load_file_by_id(file_id, load_content=load_content)
|
||||
if doc:
|
||||
documents.append(doc)
|
||||
elif hasattr(self, '_credential_refreshed') and self._credential_refreshed:
|
||||
self._credential_refreshed = False
|
||||
logging.info(f"Retrying load of file {file_id} after credential refresh")
|
||||
doc = self._load_file_by_id(file_id, load_content=load_content)
|
||||
if doc:
|
||||
documents.append(doc)
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading file {file_id}: {e}")
|
||||
continue
|
||||
else:
|
||||
# Browsing mode: list immediate children of provided folder or root
|
||||
parent_id = folder_id if folder_id else 'root'
|
||||
documents = self._list_items_in_parent(parent_id, limit=limit, load_content=load_content, page_token=page_token)
|
||||
|
||||
logging.info(f"Loaded {len(documents)} documents from Google Drive")
|
||||
return documents
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading data from Google Drive: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
|
||||
def _load_file_by_id(self, file_id: str, load_content: bool = True) -> Optional[Document]:
|
||||
self._ensure_service()
|
||||
|
||||
try:
|
||||
file_metadata = self.service.files().get(
|
||||
fileId=file_id,
|
||||
fields='id,name,mimeType,size,createdTime,modifiedTime,parents'
|
||||
).execute()
|
||||
|
||||
return self._process_file(file_metadata, load_content=load_content)
|
||||
|
||||
except HttpError as e:
|
||||
logging.error(f"HTTP error loading file {file_id}: {e.resp.status} - {e.content}")
|
||||
|
||||
if e.resp.status in [401, 403]:
|
||||
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
self.credentials.refresh(Request())
|
||||
self._ensure_service()
|
||||
return None
|
||||
except Exception as refresh_error:
|
||||
raise ValueError(f"Authentication failed and could not be refreshed: {refresh_error}")
|
||||
else:
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading file {file_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None) -> List[Document]:
|
||||
self._ensure_service()
|
||||
|
||||
documents: List[Document] = []
|
||||
|
||||
try:
|
||||
query = f"'{parent_id}' in parents and trashed=false"
|
||||
next_token_out: Optional[str] = None
|
||||
|
||||
while True:
|
||||
page_size = 100
|
||||
if limit:
|
||||
remaining = max(0, limit - len(documents))
|
||||
if remaining == 0:
|
||||
break
|
||||
page_size = min(100, remaining)
|
||||
|
||||
results = self.service.files().list(
|
||||
q=query,
|
||||
fields='nextPageToken,files(id,name,mimeType,size,createdTime,modifiedTime,parents)',
|
||||
pageToken=page_token,
|
||||
pageSize=page_size
|
||||
).execute()
|
||||
|
||||
items = results.get('files', [])
|
||||
for item in items:
|
||||
mime_type = item.get('mimeType')
|
||||
if mime_type == 'application/vnd.google-apps.folder':
|
||||
doc_metadata = {
|
||||
'file_name': item.get('name', 'Unknown'),
|
||||
'mime_type': mime_type,
|
||||
'size': item.get('size', None),
|
||||
'created_time': item.get('createdTime'),
|
||||
'modified_time': item.get('modifiedTime'),
|
||||
'parents': item.get('parents', []),
|
||||
'source': 'google_drive',
|
||||
'is_folder': True
|
||||
}
|
||||
documents.append(Document(text="", doc_id=item.get('id'), extra_info=doc_metadata))
|
||||
else:
|
||||
doc = self._process_file(item, load_content=load_content)
|
||||
if doc:
|
||||
documents.append(doc)
|
||||
|
||||
if limit and len(documents) >= limit:
|
||||
self.next_page_token = results.get('nextPageToken')
|
||||
return documents
|
||||
|
||||
page_token = results.get('nextPageToken')
|
||||
next_token_out = page_token
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
self.next_page_token = next_token_out
|
||||
return documents
|
||||
except Exception as e:
|
||||
logging.error(f"Error listing items under parent {parent_id}: {e}")
|
||||
return documents
|
||||
|
||||
|
||||
|
||||
|
||||
def _download_file_content(self, file_id: str, mime_type: str) -> Optional[str]:
|
||||
if not self.credentials.token:
|
||||
logging.warning("No access token in credentials, attempting to refresh")
|
||||
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
self.credentials.refresh(Request())
|
||||
logging.info("Credentials refreshed successfully")
|
||||
self._ensure_service()
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to refresh credentials: {e}")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing or invalid refresh_token")
|
||||
else:
|
||||
logging.error("No access token and no refresh_token available")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
|
||||
|
||||
if self.credentials.expired:
|
||||
logging.warning("Credentials are expired, attempting to refresh")
|
||||
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
self.credentials.refresh(Request())
|
||||
logging.info("Credentials refreshed successfully")
|
||||
self._ensure_service()
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to refresh expired credentials: {e}")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: expired credentials")
|
||||
else:
|
||||
logging.error("Credentials expired and no refresh_token available")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
|
||||
|
||||
try:
|
||||
if mime_type in self.EXPORT_FORMATS:
|
||||
export_mime_type = self.EXPORT_FORMATS[mime_type]
|
||||
request = self.service.files().export_media(
|
||||
fileId=file_id,
|
||||
mimeType=export_mime_type
|
||||
)
|
||||
else:
|
||||
request = self.service.files().get_media(fileId=file_id)
|
||||
|
||||
file_io = io.BytesIO()
|
||||
downloader = MediaIoBaseDownload(file_io, request)
|
||||
|
||||
done = False
|
||||
while done is False:
|
||||
try:
|
||||
_, done = downloader.next_chunk()
|
||||
except HttpError as e:
|
||||
logging.error(f"HTTP error downloading file {file_id}: {e.resp.status} - {e.content}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error during download of file {file_id}: {e}")
|
||||
return None
|
||||
|
||||
content_bytes = file_io.getvalue()
|
||||
|
||||
try:
|
||||
content = content_bytes.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
content = content_bytes.decode('latin-1')
|
||||
except UnicodeDecodeError:
|
||||
logging.error(f"Could not decode file {file_id} as text")
|
||||
return None
|
||||
|
||||
return content
|
||||
|
||||
except HttpError as e:
|
||||
logging.error(f"HTTP error downloading file {file_id}: {e.resp.status} - {e.content}")
|
||||
|
||||
if e.resp.status in [401, 403]:
|
||||
logging.error(f"Authentication error downloading file {file_id}")
|
||||
|
||||
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
|
||||
logging.info(f"Attempting to refresh credentials for file {file_id}")
|
||||
try:
|
||||
from google.auth.transport.requests import Request
|
||||
self.credentials.refresh(Request())
|
||||
logging.info("Credentials refreshed successfully")
|
||||
self._credential_refreshed = True
|
||||
self._ensure_service()
|
||||
return None
|
||||
except Exception as refresh_error:
|
||||
logging.error(f"Error refreshing credentials: {refresh_error}")
|
||||
raise ValueError(f"Authentication failed and could not be refreshed: {refresh_error}")
|
||||
else:
|
||||
logging.error("Cannot refresh credentials: missing refresh_token")
|
||||
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
|
||||
|
||||
return None
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading file {file_id}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _download_file_to_directory(self, file_id: str, local_dir: str) -> bool:
|
||||
try:
|
||||
self._ensure_service()
|
||||
return self._download_single_file(file_id, local_dir)
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading file {file_id}: {e}", exc_info=True)
|
||||
return False
|
||||
|
||||
def _ensure_service(self):
|
||||
if not self.service:
|
||||
try:
|
||||
self.service = self.auth.build_drive_service(self.credentials)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Cannot access Google Drive: {e}")
|
||||
|
||||
def _download_single_file(self, file_id: str, local_dir: str) -> bool:
|
||||
file_metadata = self.service.files().get(
|
||||
fileId=file_id,
|
||||
fields='name,mimeType'
|
||||
).execute()
|
||||
|
||||
file_name = file_metadata['name']
|
||||
mime_type = file_metadata['mimeType']
|
||||
|
||||
if mime_type not in self.SUPPORTED_MIME_TYPES and not mime_type.startswith('application/vnd.google-apps.'):
|
||||
return False
|
||||
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
full_path = os.path.join(local_dir, file_name)
|
||||
|
||||
if mime_type in self.EXPORT_FORMATS:
|
||||
export_mime_type = self.EXPORT_FORMATS[mime_type]
|
||||
request = self.service.files().export_media(
|
||||
fileId=file_id,
|
||||
mimeType=export_mime_type
|
||||
)
|
||||
extension = self._get_extension_for_mime_type(export_mime_type)
|
||||
if not full_path.endswith(extension):
|
||||
full_path += extension
|
||||
else:
|
||||
request = self.service.files().get_media(fileId=file_id)
|
||||
|
||||
with open(full_path, 'wb') as f:
|
||||
downloader = MediaIoBaseDownload(f, request)
|
||||
done = False
|
||||
while not done:
|
||||
_, done = downloader.next_chunk()
|
||||
|
||||
return True
|
||||
|
||||
def _download_folder_recursive(self, folder_id: str, local_dir: str, recursive: bool = True) -> int:
|
||||
files_downloaded = 0
|
||||
try:
|
||||
os.makedirs(local_dir, exist_ok=True)
|
||||
|
||||
query = f"'{folder_id}' in parents and trashed=false"
|
||||
page_token = None
|
||||
|
||||
while True:
|
||||
results = self.service.files().list(
|
||||
q=query,
|
||||
fields='nextPageToken, files(id, name, mimeType)',
|
||||
pageToken=page_token,
|
||||
pageSize=1000
|
||||
).execute()
|
||||
|
||||
items = results.get('files', [])
|
||||
logging.info(f"Found {len(items)} items in folder {folder_id}")
|
||||
|
||||
for item in items:
|
||||
item_name = item['name']
|
||||
item_id = item['id']
|
||||
mime_type = item['mimeType']
|
||||
|
||||
if mime_type == 'application/vnd.google-apps.folder':
|
||||
if recursive:
|
||||
# Create subfolder and recurse
|
||||
subfolder_path = os.path.join(local_dir, item_name)
|
||||
os.makedirs(subfolder_path, exist_ok=True)
|
||||
subfolder_files = self._download_folder_recursive(
|
||||
item_id,
|
||||
subfolder_path,
|
||||
recursive
|
||||
)
|
||||
files_downloaded += subfolder_files
|
||||
logging.info(f"Downloaded {subfolder_files} files from subfolder {item_name}")
|
||||
else:
|
||||
# Download file
|
||||
success = self._download_single_file(item_id, local_dir)
|
||||
if success:
|
||||
files_downloaded += 1
|
||||
logging.info(f"Downloaded file: {item_name}")
|
||||
else:
|
||||
logging.warning(f"Failed to download file: {item_name}")
|
||||
|
||||
page_token = results.get('nextPageToken')
|
||||
if not page_token:
|
||||
break
|
||||
|
||||
return files_downloaded
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in _download_folder_recursive for folder {folder_id}: {e}", exc_info=True)
|
||||
return files_downloaded
|
||||
|
||||
def _get_extension_for_mime_type(self, mime_type: str) -> str:
|
||||
extensions = {
|
||||
'application/pdf': '.pdf',
|
||||
'text/plain': '.txt',
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',
|
||||
'text/html': '.html',
|
||||
'text/markdown': '.md',
|
||||
}
|
||||
return extensions.get(mime_type, '.bin')
|
||||
|
||||
def _download_folder_contents(self, folder_id: str, local_dir: str, recursive: bool = True) -> int:
|
||||
try:
|
||||
self._ensure_service()
|
||||
return self._download_folder_recursive(folder_id, local_dir, recursive)
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading folder {folder_id}: {e}", exc_info=True)
|
||||
return 0
|
||||
|
||||
def download_to_directory(self, local_dir: str, source_config: dict = None) -> dict:
|
||||
if source_config is None:
|
||||
source_config = {}
|
||||
|
||||
config = source_config if source_config else getattr(self, 'config', {})
|
||||
files_downloaded = 0
|
||||
|
||||
try:
|
||||
folder_ids = config.get('folder_ids', [])
|
||||
file_ids = config.get('file_ids', [])
|
||||
recursive = config.get('recursive', True)
|
||||
|
||||
self._ensure_service()
|
||||
|
||||
if file_ids:
|
||||
if isinstance(file_ids, str):
|
||||
file_ids = [file_ids]
|
||||
|
||||
for file_id in file_ids:
|
||||
if self._download_file_to_directory(file_id, local_dir):
|
||||
files_downloaded += 1
|
||||
|
||||
# Process folders
|
||||
if folder_ids:
|
||||
if isinstance(folder_ids, str):
|
||||
folder_ids = [folder_ids]
|
||||
|
||||
for folder_id in folder_ids:
|
||||
try:
|
||||
folder_metadata = self.service.files().get(
|
||||
fileId=folder_id,
|
||||
fields='name'
|
||||
).execute()
|
||||
folder_name = folder_metadata.get('name', '')
|
||||
folder_path = os.path.join(local_dir, folder_name)
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
|
||||
folder_files = self._download_folder_recursive(
|
||||
folder_id,
|
||||
folder_path,
|
||||
recursive
|
||||
)
|
||||
files_downloaded += folder_files
|
||||
logging.info(f"Downloaded {folder_files} files from folder {folder_name}")
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading folder {folder_id}: {e}", exc_info=True)
|
||||
|
||||
if not file_ids and not folder_ids:
|
||||
raise ValueError("No folder_ids or file_ids provided for download")
|
||||
|
||||
return {
|
||||
"files_downloaded": files_downloaded,
|
||||
"directory_path": local_dir,
|
||||
"empty_result": files_downloaded == 0,
|
||||
"source_type": "google_drive",
|
||||
"config_used": config
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"files_downloaded": files_downloaded,
|
||||
"directory_path": local_dir,
|
||||
"empty_result": True,
|
||||
"source_type": "google_drive",
|
||||
"config_used": config,
|
||||
"error": str(e)
|
||||
}
|
||||
@@ -6,6 +6,21 @@ from application.core.settings import settings
|
||||
from application.vectorstore.vector_creator import VectorCreator
|
||||
|
||||
|
||||
def sanitize_content(content: str) -> str:
|
||||
"""
|
||||
Remove NUL characters that can cause vector store ingestion to fail.
|
||||
|
||||
Args:
|
||||
content (str): Raw content that may contain NUL characters
|
||||
|
||||
Returns:
|
||||
str: Sanitized content with NUL characters removed
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
return content.replace('\x00', '')
|
||||
|
||||
|
||||
@retry(tries=10, delay=60)
|
||||
def add_text_to_store_with_retry(store, doc, source_id):
|
||||
"""
|
||||
@@ -16,6 +31,9 @@ def add_text_to_store_with_retry(store, doc, source_id):
|
||||
source_id: Unique identifier for the source.
|
||||
"""
|
||||
try:
|
||||
# Sanitize content to remove NUL characters that cause ingestion failures
|
||||
doc.page_content = sanitize_content(doc.page_content)
|
||||
|
||||
doc.metadata["source_id"] = str(source_id)
|
||||
store.add_texts([doc.page_content], metadatas=[doc.metadata])
|
||||
except Exception as e:
|
||||
|
||||
@@ -6,6 +6,16 @@ from application.parser.remote.github_loader import GitHubLoader
|
||||
|
||||
|
||||
class RemoteCreator:
|
||||
"""
|
||||
Factory class for creating remote content loaders.
|
||||
|
||||
These loaders fetch content from remote web sources like URLs,
|
||||
sitemaps, web crawlers, social media platforms, etc.
|
||||
|
||||
For external knowledge base connectors (like Google Drive),
|
||||
use ConnectorCreator instead.
|
||||
"""
|
||||
|
||||
loaders = {
|
||||
"url": WebLoader,
|
||||
"sitemap": SitemapLoader,
|
||||
@@ -18,5 +28,5 @@ class RemoteCreator:
|
||||
def create_loader(cls, type, *args, **kwargs):
|
||||
loader_class = cls.loaders.get(type.lower())
|
||||
if not loader_class:
|
||||
raise ValueError(f"No LLM class found for type {type}")
|
||||
raise ValueError(f"No loader class found for type {type}")
|
||||
return loader_class(*args, **kwargs)
|
||||
|
||||
@@ -13,6 +13,9 @@ Flask==3.1.1
|
||||
faiss-cpu==1.9.0.post1
|
||||
flask-restx==1.3.0
|
||||
google-genai==1.3.0
|
||||
google-api-python-client==2.179.0
|
||||
google-auth-httplib2==0.2.0
|
||||
google-auth-oauthlib==1.2.2
|
||||
gTTS==2.5.4
|
||||
gunicorn==23.0.0
|
||||
javalang==0.13.0
|
||||
|
||||
@@ -6,6 +6,7 @@ import os
|
||||
import shutil
|
||||
import string
|
||||
import tempfile
|
||||
from typing import Any, Dict
|
||||
import zipfile
|
||||
|
||||
from collections import Counter
|
||||
@@ -21,6 +22,7 @@ from application.api.answer.services.stream_processor import get_prompt
|
||||
from application.core.mongo_db import MongoDB
|
||||
from application.core.settings import settings
|
||||
from application.parser.chunking import Chunker
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
from application.parser.embedding_pipeline import embed_and_store_documents
|
||||
from application.parser.file.bulk import SimpleDirectoryReader
|
||||
from application.parser.remote.remote_creator import RemoteCreator
|
||||
@@ -649,8 +651,11 @@ def remote_worker(
|
||||
"id": str(id),
|
||||
"type": loader,
|
||||
"remote_data": source_data,
|
||||
"sync_frequency": sync_frequency,
|
||||
"sync_frequency": sync_frequency
|
||||
}
|
||||
|
||||
if operation_mode == "sync":
|
||||
file_data["last_sync"] = datetime.datetime.now()
|
||||
upload_index(full_path, file_data)
|
||||
except Exception as e:
|
||||
logging.error("Error in remote_worker task: %s", str(e), exc_info=True)
|
||||
@@ -707,7 +712,7 @@ def sync_worker(self, frequency):
|
||||
self, source_data, name, user, source_type, frequency, retriever, doc_id
|
||||
)
|
||||
sync_counts["total_sync_count"] += 1
|
||||
sync_counts[
|
||||
sync_counts[
|
||||
"sync_success" if resp["status"] == "success" else "sync_failure"
|
||||
] += 1
|
||||
return {
|
||||
@@ -744,7 +749,7 @@ def attachment_worker(self, file_info, user):
|
||||
input_files=[local_path], exclude_hidden=True, errors="ignore"
|
||||
)
|
||||
.load_data()[0]
|
||||
.text,
|
||||
.text,
|
||||
)
|
||||
|
||||
|
||||
@@ -835,3 +840,174 @@ def agent_webhook_worker(self, agent_id, payload):
|
||||
f"Webhook processed for agent {agent_id}", extra={"agent_id": agent_id}
|
||||
)
|
||||
return {"status": "success", "result": result}
|
||||
|
||||
|
||||
def ingest_connector(
|
||||
self,
|
||||
job_name: str,
|
||||
user: str,
|
||||
source_type: str,
|
||||
session_token=None,
|
||||
file_ids=None,
|
||||
folder_ids=None,
|
||||
recursive=True,
|
||||
retriever: str = "classic",
|
||||
operation_mode: str = "upload",
|
||||
doc_id=None,
|
||||
sync_frequency: str = "never",
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Ingestion for internal knowledge bases (GoogleDrive, etc.).
|
||||
|
||||
Args:
|
||||
job_name: Name of the ingestion job
|
||||
user: User identifier
|
||||
source_type: Type of remote source ("google_drive", "dropbox", etc.)
|
||||
session_token: Authentication token for the service
|
||||
file_ids: List of file IDs to download
|
||||
folder_ids: List of folder IDs to download
|
||||
recursive: Whether to recursively download folders
|
||||
retriever: Type of retriever to use
|
||||
operation_mode: "upload" for initial ingestion, "sync" for incremental sync
|
||||
doc_id: Document ID for sync operations (required when operation_mode="sync")
|
||||
sync_frequency: How often to sync ("never", "daily", "weekly", "monthly")
|
||||
"""
|
||||
logging.info(f"Starting remote ingestion from {source_type} for user: {user}, job: {job_name}")
|
||||
self.update_state(state="PROGRESS", meta={"current": 1})
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
try:
|
||||
# Step 1: Initialize the appropriate loader
|
||||
self.update_state(state="PROGRESS", meta={"current": 10, "status": "Initializing connector"})
|
||||
|
||||
if not session_token:
|
||||
raise ValueError(f"{source_type} connector requires session_token")
|
||||
|
||||
if not ConnectorCreator.is_supported(source_type):
|
||||
raise ValueError(f"Unsupported connector type: {source_type}. Supported types: {ConnectorCreator.get_supported_connectors()}")
|
||||
|
||||
remote_loader = ConnectorCreator.create_connector(source_type, session_token)
|
||||
|
||||
# Create a clean config for storage
|
||||
api_source_config = {
|
||||
"file_ids": file_ids or [],
|
||||
"folder_ids": folder_ids or [],
|
||||
"recursive": recursive
|
||||
}
|
||||
|
||||
# Step 2: Download files to temp directory
|
||||
self.update_state(state="PROGRESS", meta={"current": 20, "status": "Downloading files"})
|
||||
download_info = remote_loader.download_to_directory(
|
||||
temp_dir,
|
||||
api_source_config
|
||||
)
|
||||
|
||||
if download_info.get("empty_result", False) or not download_info.get("files_downloaded", 0):
|
||||
logging.warning(f"No files were downloaded from {source_type}")
|
||||
# Create empty result directly instead of calling a separate method
|
||||
return {
|
||||
"name": job_name,
|
||||
"user": user,
|
||||
"tokens": 0,
|
||||
"type": source_type,
|
||||
"source_config": api_source_config,
|
||||
"directory_structure": "{}",
|
||||
}
|
||||
|
||||
# Step 3: Use SimpleDirectoryReader to process downloaded files
|
||||
self.update_state(state="PROGRESS", meta={"current": 40, "status": "Processing files"})
|
||||
reader = SimpleDirectoryReader(
|
||||
input_dir=temp_dir,
|
||||
recursive=True,
|
||||
required_exts=[
|
||||
".rst", ".md", ".pdf", ".txt", ".docx", ".csv", ".epub",
|
||||
".html", ".mdx", ".json", ".xlsx", ".pptx", ".png",
|
||||
".jpg", ".jpeg",
|
||||
],
|
||||
exclude_hidden=True,
|
||||
file_metadata=metadata_from_filename,
|
||||
)
|
||||
raw_docs = reader.load_data()
|
||||
directory_structure = getattr(reader, 'directory_structure', {})
|
||||
|
||||
|
||||
|
||||
# Step 4: Process documents (chunking, embedding, etc.)
|
||||
self.update_state(state="PROGRESS", meta={"current": 60, "status": "Processing documents"})
|
||||
|
||||
chunker = Chunker(
|
||||
chunking_strategy="classic_chunk",
|
||||
max_tokens=MAX_TOKENS,
|
||||
min_tokens=MIN_TOKENS,
|
||||
duplicate_headers=False,
|
||||
)
|
||||
raw_docs = chunker.chunk(documents=raw_docs)
|
||||
|
||||
# Preserve source information in document metadata
|
||||
for doc in raw_docs:
|
||||
if hasattr(doc, 'extra_info') and doc.extra_info:
|
||||
source = doc.extra_info.get('source')
|
||||
if source and os.path.isabs(source):
|
||||
# Convert absolute path to relative path
|
||||
doc.extra_info['source'] = os.path.relpath(source, start=temp_dir)
|
||||
|
||||
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
|
||||
|
||||
if operation_mode == "upload":
|
||||
id = ObjectId()
|
||||
elif operation_mode == "sync":
|
||||
if not doc_id or not ObjectId.is_valid(doc_id):
|
||||
logging.error("Invalid doc_id provided for sync operation: %s", doc_id)
|
||||
raise ValueError("doc_id must be provided for sync operation.")
|
||||
id = ObjectId(doc_id)
|
||||
else:
|
||||
raise ValueError(f"Invalid operation_mode: {operation_mode}")
|
||||
|
||||
vector_store_path = os.path.join(temp_dir, "vector_store")
|
||||
os.makedirs(vector_store_path, exist_ok=True)
|
||||
|
||||
self.update_state(state="PROGRESS", meta={"current": 80, "status": "Storing documents"})
|
||||
embed_and_store_documents(docs, vector_store_path, id, self)
|
||||
|
||||
tokens = count_tokens_docs(docs)
|
||||
|
||||
# Step 6: Upload index files
|
||||
file_data = {
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"id": str(id),
|
||||
"type": "connector",
|
||||
"remote_data": json.dumps({
|
||||
"provider": source_type,
|
||||
**api_source_config
|
||||
}),
|
||||
"directory_structure": json.dumps(directory_structure),
|
||||
"sync_frequency": sync_frequency
|
||||
}
|
||||
|
||||
if operation_mode == "sync":
|
||||
file_data["last_sync"] = datetime.datetime.now()
|
||||
else:
|
||||
file_data["last_sync"] = datetime.datetime.now()
|
||||
|
||||
upload_index(vector_store_path, file_data)
|
||||
|
||||
# Ensure we mark the task as complete
|
||||
self.update_state(state="PROGRESS", meta={"current": 100, "status": "Complete"})
|
||||
|
||||
logging.info(f"Remote ingestion completed: {job_name}")
|
||||
|
||||
return {
|
||||
"user": user,
|
||||
"name": job_name,
|
||||
"tokens": tokens,
|
||||
"type": source_type,
|
||||
"id": str(id),
|
||||
"status": "complete"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error during remote ingestion: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
74
deployment/docker-compose-hub.yaml
Normal file
74
deployment/docker-compose-hub.yaml
Normal file
@@ -0,0 +1,74 @@
|
||||
name: docsgpt-oss
|
||||
services:
|
||||
|
||||
frontend:
|
||||
image: arc53/docsgpt-fe:develop
|
||||
environment:
|
||||
- VITE_API_HOST=http://localhost:7091
|
||||
- VITE_API_STREAMING=$VITE_API_STREAMING
|
||||
ports:
|
||||
- "5173:5173"
|
||||
depends_on:
|
||||
- backend
|
||||
|
||||
|
||||
backend:
|
||||
user: root
|
||||
image: arc53/docsgpt:develop
|
||||
environment:
|
||||
- API_KEY=$API_KEY
|
||||
- EMBEDDINGS_KEY=$API_KEY
|
||||
- LLM_PROVIDER=$LLM_PROVIDER
|
||||
- LLM_NAME=$LLM_NAME
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- CACHE_REDIS_URL=redis://redis:6379/2
|
||||
- OPENAI_BASE_URL=$OPENAI_BASE_URL
|
||||
ports:
|
||||
- "7091:7091"
|
||||
volumes:
|
||||
- ../application/indexes:/app/indexes
|
||||
- ../application/inputs:/app/inputs
|
||||
- ../application/vectors:/app/vectors
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
|
||||
worker:
|
||||
user: root
|
||||
image: arc53/docsgpt:develop
|
||||
command: celery -A application.app.celery worker -l INFO -B
|
||||
environment:
|
||||
- API_KEY=$API_KEY
|
||||
- EMBEDDINGS_KEY=$API_KEY
|
||||
- LLM_PROVIDER=$LLM_PROVIDER
|
||||
- LLM_NAME=$LLM_NAME
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/1
|
||||
- MONGO_URI=mongodb://mongo:27017/docsgpt
|
||||
- API_URL=http://backend:7091
|
||||
- CACHE_REDIS_URL=redis://redis:6379/2
|
||||
volumes:
|
||||
- ../application/indexes:/app/indexes
|
||||
- ../application/inputs:/app/inputs
|
||||
- ../application/vectors:/app/vectors
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
redis:
|
||||
image: redis:6-alpine
|
||||
ports:
|
||||
- 6379:6379
|
||||
|
||||
mongo:
|
||||
image: mongo:6
|
||||
ports:
|
||||
- 27017:27017
|
||||
volumes:
|
||||
- mongodb_data_container:/data/db
|
||||
|
||||
volumes:
|
||||
mongodb_data_container:
|
||||
@@ -5,6 +5,8 @@
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover" />
|
||||
<meta name="apple-mobile-web-app-capable" content="yes">
|
||||
<meta name="theme-color" content="#fbfbfb" media="(prefers-color-scheme: light)" />
|
||||
<meta name="theme-color" content="#161616" media="(prefers-color-scheme: dark)" />
|
||||
<title>DocsGPT</title>
|
||||
<link rel="shortcut icon" type="image/x-icon" href="/favicon.ico" />
|
||||
</head>
|
||||
|
||||
@@ -29,7 +29,7 @@ export default function Hero({
|
||||
</div>
|
||||
|
||||
{/* Demo Buttons Section */}
|
||||
<div className="mb-8 w-full max-w-full md:mb-16">
|
||||
<div className="mb-3 w-full max-w-full md:mb-3">
|
||||
<div className="grid grid-cols-1 gap-3 text-xs md:grid-cols-1 md:gap-4 lg:grid-cols-2">
|
||||
{demos?.map(
|
||||
(demo: { header: string; query: string }, key: number) =>
|
||||
|
||||
@@ -586,7 +586,7 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
|
||||
onChange={(e) => setAgent({ ...agent, name: e.target.value })}
|
||||
/>
|
||||
<textarea
|
||||
className="border-silver text-jet dark:bg-raisin-black dark:text-bright-gray dark:placeholder:text-silver mt-3 h-32 w-full rounded-3xl border bg-white px-5 py-4 text-sm outline-hidden placeholder:text-gray-400 dark:border-[#7E7E7E]"
|
||||
className="border-silver text-jet dark:bg-raisin-black dark:text-bright-gray dark:placeholder:text-silver mt-3 h-32 w-full rounded-xl border bg-white px-5 py-4 text-sm outline-hidden placeholder:text-gray-400 dark:border-[#7E7E7E]"
|
||||
placeholder="Describe your agent"
|
||||
value={agent.description}
|
||||
onChange={(e) =>
|
||||
|
||||
@@ -38,6 +38,7 @@ const endpoints = {
|
||||
UPDATE_TOOL_STATUS: '/api/update_tool_status',
|
||||
UPDATE_TOOL: '/api/update_tool',
|
||||
DELETE_TOOL: '/api/delete_tool',
|
||||
SYNC_CONNECTOR: '/api/connectors/sync',
|
||||
GET_CHUNKS: (
|
||||
docId: string,
|
||||
page: number,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import apiClient from '../client';
|
||||
import endpoints from '../endpoints';
|
||||
import { getSessionToken } from '../../utils/providerUtils';
|
||||
|
||||
const userService = {
|
||||
getConfig: (): Promise<any> => apiClient.get(endpoints.USER.CONFIG, null),
|
||||
@@ -89,7 +90,10 @@ const userService = {
|
||||
path?: string,
|
||||
search?: string,
|
||||
): Promise<any> =>
|
||||
apiClient.get(endpoints.USER.GET_CHUNKS(docId, page, perPage, path, search), token),
|
||||
apiClient.get(
|
||||
endpoints.USER.GET_CHUNKS(docId, page, perPage, path, search),
|
||||
token,
|
||||
),
|
||||
addChunk: (data: any, token: string | null): Promise<any> =>
|
||||
apiClient.post(endpoints.USER.ADD_CHUNK, data, token),
|
||||
deleteChunk: (
|
||||
@@ -104,6 +108,22 @@ const userService = {
|
||||
apiClient.get(endpoints.USER.DIRECTORY_STRUCTURE(docId), token),
|
||||
manageSourceFiles: (data: FormData, token: string | null): Promise<any> =>
|
||||
apiClient.postFormData(endpoints.USER.MANAGE_SOURCE_FILES, data, token),
|
||||
syncConnector: (
|
||||
docId: string,
|
||||
provider: string,
|
||||
token: string | null,
|
||||
): Promise<any> => {
|
||||
const sessionToken = getSessionToken(provider);
|
||||
return apiClient.post(
|
||||
endpoints.USER.SYNC_CONNECTOR,
|
||||
{
|
||||
source_id: docId,
|
||||
session_token: sessionToken,
|
||||
provider: provider,
|
||||
},
|
||||
token,
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
export default userService;
|
||||
|
||||
@@ -1 +1 @@
|
||||
<svg width="16px" height="16px" viewBox="0 0 1024 1024" class="icon" version="1.1" xmlns="http://www.w3.org/2000/svg" fill="#11ee1c" stroke="#11ee1c" stroke-width="83.96799999999999"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"><path d="M866.133333 258.133333L362.666667 761.6l-204.8-204.8L98.133333 618.666667 362.666667 881.066667l563.2-563.2z" fill="#11ee1c"></path></g></svg>
|
||||
<svg width="16px" height="16px" viewBox="0 0 1024 1024" class="icon" version="1.1" xmlns="http://www.w3.org/2000/svg" fill="#11ee1c" stroke="#11ee1c" stroke-width="83.96799999999999"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"><path d="M866.133333 258.133333L362.666667 761.6l-204.8-204.8L98.133333 618.666667 362.666667 881.066667l563.2-563.2z" fill="#0C9D35"></path></g></svg>
|
||||
|
Before Width: | Height: | Size: 490 B After Width: | Height: | Size: 490 B |
@@ -1 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24"><path fill="white" d="M10.72,19.9a8,8,0,0,1-6.5-9.79A7.77,7.77,0,0,1,10.4,4.16a8,8,0,0,1,9.49,6.52A1.54,1.54,0,0,0,21.38,12h.13a1.37,1.37,0,0,0,1.38-1.54,11,11,0,1,0-12.7,12.39A1.54,1.54,0,0,0,12,21.34h0A1.47,1.47,0,0,0,10.72,19.9Z"><animateTransform attributeName="transform" dur="0.75s" repeatCount="indefinite" type="rotate" values="0 12 12;360 12 12"/></path></svg>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24"><path fill="black" d="M10.72,19.9a8,8,0,0,1-6.5-9.79A7.77,7.77,0,0,1,10.4,4.16a8,8,0,0,1,9.49,6.52A1.54,1.54,0,0,0,21.38,12h.13a1.37,1.37,0,0,0,1.38-1.54,11,11,0,1,0-12.7,12.39A1.54,1.54,0,0,0,12,21.34h0A1.47,1.47,0,0,0,10.72,19.9Z"><animateTransform attributeName="transform" dur="0.75s" repeatCount="indefinite" type="rotate" values="0 12 12;360 12 12"/></path></svg>
|
||||
|
Before Width: | Height: | Size: 454 B After Width: | Height: | Size: 454 B |
@@ -1 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24"><path fill="black" d="M10.72,19.9a8,8,0,0,1-6.5-9.79A7.77,7.77,0,0,1,10.4,4.16a8,8,0,0,1,9.49,6.52A1.54,1.54,0,0,0,21.38,12h.13a1.37,1.37,0,0,0,1.38-1.54,11,11,0,1,0-12.7,12.39A1.54,1.54,0,0,0,12,21.34h0A1.47,1.47,0,0,0,10.72,19.9Z"><animateTransform attributeName="transform" dur="0.75s" repeatCount="indefinite" type="rotate" values="0 12 12;360 12 12"/></path></svg>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24"><path fill="white" d="M10.72,19.9a8,8,0,0,1-6.5-9.79A7.77,7.77,0,0,1,10.4,4.16a8,8,0,0,1,9.49,6.52A1.54,1.54,0,0,0,21.38,12h.13a1.37,1.37,0,0,0,1.38-1.54,11,11,0,1,0-12.7,12.39A1.54,1.54,0,0,0,12,21.34h0A1.47,1.47,0,0,0,10.72,19.9Z"><animateTransform attributeName="transform" dur="0.75s" repeatCount="indefinite" type="rotate" values="0 12 12;360 12 12"/></path></svg>
|
||||
|
Before Width: | Height: | Size: 454 B After Width: | Height: | Size: 454 B |
130
frontend/src/components/ConnectorAuth.tsx
Normal file
130
frontend/src/components/ConnectorAuth.tsx
Normal file
@@ -0,0 +1,130 @@
|
||||
import React, { useRef } from 'react';
|
||||
import { useSelector } from 'react-redux';
|
||||
import { selectToken } from '../preferences/preferenceSlice';
|
||||
|
||||
interface ConnectorAuthProps {
|
||||
provider: string;
|
||||
onSuccess: (data: { session_token: string; user_email: string }) => void;
|
||||
onError: (error: string) => void;
|
||||
label?: string;
|
||||
}
|
||||
|
||||
const providerLabel = (provider: string) => {
|
||||
const map: Record<string, string> = {
|
||||
google_drive: 'Google Drive',
|
||||
};
|
||||
return map[provider] || provider.replace(/_/g, ' ');
|
||||
};
|
||||
|
||||
const ConnectorAuth: React.FC<ConnectorAuthProps> = ({
|
||||
provider,
|
||||
onSuccess,
|
||||
onError,
|
||||
label,
|
||||
}) => {
|
||||
const token = useSelector(selectToken);
|
||||
const completedRef = useRef(false);
|
||||
const intervalRef = useRef<number | null>(null);
|
||||
|
||||
const cleanup = () => {
|
||||
if (intervalRef.current) {
|
||||
clearInterval(intervalRef.current);
|
||||
intervalRef.current = null;
|
||||
}
|
||||
window.removeEventListener('message', handleAuthMessage as any);
|
||||
};
|
||||
|
||||
const handleAuthMessage = (event: MessageEvent) => {
|
||||
const successGeneric = event.data?.type === 'connector_auth_success';
|
||||
const successProvider =
|
||||
event.data?.type === `${provider}_auth_success` ||
|
||||
event.data?.type === 'google_drive_auth_success';
|
||||
const errorProvider =
|
||||
event.data?.type === `${provider}_auth_error` ||
|
||||
event.data?.type === 'google_drive_auth_error';
|
||||
|
||||
if (successGeneric || successProvider) {
|
||||
completedRef.current = true;
|
||||
cleanup();
|
||||
onSuccess({
|
||||
session_token: event.data.session_token,
|
||||
user_email: event.data.user_email || 'Connected User',
|
||||
});
|
||||
} else if (errorProvider) {
|
||||
completedRef.current = true;
|
||||
cleanup();
|
||||
onError(event.data.error || 'Authentication failed');
|
||||
}
|
||||
};
|
||||
|
||||
const handleAuth = async () => {
|
||||
try {
|
||||
completedRef.current = false;
|
||||
cleanup();
|
||||
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
const authResponse = await fetch(
|
||||
`${apiHost}/api/connectors/auth?provider=${provider}`,
|
||||
{
|
||||
headers: { Authorization: `Bearer ${token}` },
|
||||
},
|
||||
);
|
||||
|
||||
if (!authResponse.ok) {
|
||||
throw new Error(
|
||||
`Failed to get authorization URL: ${authResponse.status}`,
|
||||
);
|
||||
}
|
||||
|
||||
const authData = await authResponse.json();
|
||||
if (!authData.success || !authData.authorization_url) {
|
||||
throw new Error(authData.error || 'Failed to get authorization URL');
|
||||
}
|
||||
|
||||
const authWindow = window.open(
|
||||
authData.authorization_url,
|
||||
`${provider}-auth`,
|
||||
'width=500,height=600,scrollbars=yes,resizable=yes',
|
||||
);
|
||||
if (!authWindow) {
|
||||
throw new Error(
|
||||
'Failed to open authentication window. Please allow popups.',
|
||||
);
|
||||
}
|
||||
|
||||
window.addEventListener('message', handleAuthMessage as any);
|
||||
|
||||
const checkClosed = window.setInterval(() => {
|
||||
if (authWindow.closed) {
|
||||
clearInterval(checkClosed);
|
||||
window.removeEventListener('message', handleAuthMessage as any);
|
||||
if (!completedRef.current) {
|
||||
onError('Authentication was cancelled');
|
||||
}
|
||||
}
|
||||
}, 1000);
|
||||
intervalRef.current = checkClosed;
|
||||
} catch (error) {
|
||||
onError(error instanceof Error ? error.message : 'Authentication failed');
|
||||
}
|
||||
};
|
||||
|
||||
const buttonLabel = label || `Connect ${providerLabel(provider)}`;
|
||||
|
||||
return (
|
||||
<button
|
||||
onClick={handleAuth}
|
||||
className="flex w-full items-center justify-center gap-2 rounded-lg bg-blue-500 px-4 py-3 text-white transition-colors hover:bg-blue-600"
|
||||
>
|
||||
<svg className="h-5 w-5" viewBox="0 0 24 24">
|
||||
<path
|
||||
fill="currentColor"
|
||||
d="M6.28 3l5.72 10H24l-5.72-10H6.28zm11.44 0L12 13l5.72 10H24L18.28 3h-.56zM0 13l5.72 10h5.72L5.72 13H0z"
|
||||
/>
|
||||
</svg>
|
||||
{buttonLabel}
|
||||
</button>
|
||||
);
|
||||
};
|
||||
|
||||
export default ConnectorAuth;
|
||||
731
frontend/src/components/ConnectorTreeComponent.tsx
Normal file
731
frontend/src/components/ConnectorTreeComponent.tsx
Normal file
@@ -0,0 +1,731 @@
|
||||
import React, { useState, useRef, useEffect } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useSelector } from 'react-redux';
|
||||
import { formatBytes } from '../utils/stringUtils';
|
||||
import { selectToken } from '../preferences/preferenceSlice';
|
||||
import Chunks from './Chunks';
|
||||
import ContextMenu, { MenuOption } from './ContextMenu';
|
||||
import userService from '../api/services/userService';
|
||||
import FileIcon from '../assets/file.svg';
|
||||
import FolderIcon from '../assets/folder.svg';
|
||||
import ArrowLeft from '../assets/arrow-left.svg';
|
||||
import ThreeDots from '../assets/three-dots.svg';
|
||||
import EyeView from '../assets/eye-view.svg';
|
||||
import SyncIcon from '../assets/sync.svg';
|
||||
import { useOutsideAlerter } from '../hooks';
|
||||
|
||||
interface FileNode {
|
||||
type?: string;
|
||||
token_count?: number;
|
||||
size_bytes?: number;
|
||||
[key: string]: any;
|
||||
}
|
||||
|
||||
interface DirectoryStructure {
|
||||
[key: string]: FileNode;
|
||||
}
|
||||
|
||||
interface ConnectorTreeComponentProps {
|
||||
docId: string;
|
||||
sourceName: string;
|
||||
onBackToDocuments: () => void;
|
||||
}
|
||||
|
||||
interface SearchResult {
|
||||
name: string;
|
||||
path: string;
|
||||
isFile: boolean;
|
||||
}
|
||||
|
||||
const ConnectorTreeComponent: React.FC<ConnectorTreeComponentProps> = ({
|
||||
docId,
|
||||
sourceName,
|
||||
onBackToDocuments,
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const [loading, setLoading] = useState<boolean>(true);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [directoryStructure, setDirectoryStructure] =
|
||||
useState<DirectoryStructure | null>(null);
|
||||
const [currentPath, setCurrentPath] = useState<string[]>([]);
|
||||
const token = useSelector(selectToken);
|
||||
const [activeMenuId, setActiveMenuId] = useState<string | null>(null);
|
||||
const menuRefs = useRef<{
|
||||
[key: string]: React.RefObject<HTMLDivElement | null>;
|
||||
}>({});
|
||||
const [selectedFile, setSelectedFile] = useState<{
|
||||
id: string;
|
||||
name: string;
|
||||
} | null>(null);
|
||||
const [searchQuery, setSearchQuery] = useState('');
|
||||
const [searchResults, setSearchResults] = useState<SearchResult[]>([]);
|
||||
const searchDropdownRef = useRef<HTMLDivElement>(null);
|
||||
const [isSyncing, setIsSyncing] = useState<boolean>(false);
|
||||
const [syncProgress, setSyncProgress] = useState<number>(0);
|
||||
const [sourceProvider, setSourceProvider] = useState<string>('');
|
||||
const [syncDone, setSyncDone] = useState<boolean>(false);
|
||||
|
||||
useOutsideAlerter(
|
||||
searchDropdownRef,
|
||||
() => {
|
||||
setSearchQuery('');
|
||||
setSearchResults([]);
|
||||
},
|
||||
[],
|
||||
false,
|
||||
);
|
||||
|
||||
const handleFileClick = (fileName: string) => {
|
||||
const fullPath = [...currentPath, fileName].join('/');
|
||||
setSelectedFile({
|
||||
id: fullPath,
|
||||
name: fileName,
|
||||
});
|
||||
};
|
||||
|
||||
const handleSync = async () => {
|
||||
if (isSyncing) return;
|
||||
|
||||
const provider = sourceProvider;
|
||||
|
||||
setIsSyncing(true);
|
||||
setSyncProgress(0);
|
||||
|
||||
try {
|
||||
const response = await userService.syncConnector(docId, provider, token);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success) {
|
||||
console.log('Sync started successfully:', data.task_id);
|
||||
setSyncProgress(10);
|
||||
|
||||
// Poll task status using userService
|
||||
const maxAttempts = 30;
|
||||
const pollInterval = 2000;
|
||||
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
||||
try {
|
||||
const statusResponse = await userService.getTaskStatus(
|
||||
data.task_id,
|
||||
token,
|
||||
);
|
||||
const statusData = await statusResponse.json();
|
||||
|
||||
console.log(
|
||||
`Task status (attempt ${attempt + 1}):`,
|
||||
statusData.status,
|
||||
);
|
||||
|
||||
if (statusData.status === 'SUCCESS') {
|
||||
setSyncProgress(100);
|
||||
console.log('Sync completed successfully');
|
||||
|
||||
// Refresh directory structure
|
||||
try {
|
||||
const refreshResponse = await userService.getDirectoryStructure(
|
||||
docId,
|
||||
token,
|
||||
);
|
||||
const refreshData = await refreshResponse.json();
|
||||
if (refreshData && refreshData.directory_structure) {
|
||||
setDirectoryStructure(refreshData.directory_structure);
|
||||
setCurrentPath([]);
|
||||
}
|
||||
if (refreshData && refreshData.provider) {
|
||||
setSourceProvider(refreshData.provider);
|
||||
}
|
||||
|
||||
setSyncDone(true);
|
||||
setTimeout(() => setSyncDone(false), 5000);
|
||||
} catch (err) {
|
||||
console.error('Error refreshing directory structure:', err);
|
||||
}
|
||||
break;
|
||||
} else if (statusData.status === 'FAILURE') {
|
||||
console.error('Sync task failed:', statusData.result);
|
||||
break;
|
||||
} else if (statusData.status === 'PROGRESS') {
|
||||
const progress = Number(
|
||||
statusData.result && statusData.result.current != null
|
||||
? statusData.result.current
|
||||
: statusData.meta && statusData.meta.current != null
|
||||
? statusData.meta.current
|
||||
: 0,
|
||||
);
|
||||
setSyncProgress(Math.max(10, progress));
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, pollInterval));
|
||||
} catch (error) {
|
||||
console.error('Error polling task status:', error);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
console.error('Sync failed:', data.error);
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('Error syncing connector:', err);
|
||||
} finally {
|
||||
setIsSyncing(false);
|
||||
setSyncProgress(0);
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const fetchDirectoryStructure = async () => {
|
||||
try {
|
||||
setLoading(true);
|
||||
|
||||
const directoryResponse = await userService.getDirectoryStructure(
|
||||
docId,
|
||||
token,
|
||||
);
|
||||
const directoryData = await directoryResponse.json();
|
||||
|
||||
if (directoryData && directoryData.directory_structure) {
|
||||
setDirectoryStructure(directoryData.directory_structure);
|
||||
} else {
|
||||
setError('Invalid response format');
|
||||
}
|
||||
|
||||
if (directoryData && directoryData.provider) {
|
||||
setSourceProvider(directoryData.provider);
|
||||
}
|
||||
} catch (err) {
|
||||
setError('Failed to load source information');
|
||||
console.error(err);
|
||||
} finally {
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
if (docId) {
|
||||
fetchDirectoryStructure();
|
||||
}
|
||||
}, [docId, token]);
|
||||
|
||||
const navigateToDirectory = (dirName: string) => {
|
||||
setCurrentPath([...currentPath, dirName]);
|
||||
};
|
||||
|
||||
const navigateUp = () => {
|
||||
setCurrentPath(currentPath.slice(0, -1));
|
||||
};
|
||||
|
||||
const getCurrentDirectory = (): DirectoryStructure => {
|
||||
if (!directoryStructure) return {};
|
||||
|
||||
let current = directoryStructure;
|
||||
for (const dir of currentPath) {
|
||||
if (current[dir] && !current[dir].type) {
|
||||
current = current[dir] as DirectoryStructure;
|
||||
} else {
|
||||
return {};
|
||||
}
|
||||
}
|
||||
return current;
|
||||
};
|
||||
|
||||
const getMenuRef = (id: string) => {
|
||||
if (!menuRefs.current[id]) {
|
||||
menuRefs.current[id] = React.createRef();
|
||||
}
|
||||
return menuRefs.current[id];
|
||||
};
|
||||
|
||||
const handleMenuClick = (
|
||||
e: React.MouseEvent<HTMLButtonElement>,
|
||||
id: string,
|
||||
) => {
|
||||
e.stopPropagation();
|
||||
setActiveMenuId(activeMenuId === id ? null : id);
|
||||
};
|
||||
|
||||
const getActionOptions = (
|
||||
name: string,
|
||||
isFile: boolean,
|
||||
_itemId: string,
|
||||
): MenuOption[] => {
|
||||
const options: MenuOption[] = [];
|
||||
|
||||
options.push({
|
||||
icon: EyeView,
|
||||
label: t('settings.sources.view'),
|
||||
onClick: (event: React.SyntheticEvent) => {
|
||||
event.stopPropagation();
|
||||
if (isFile) {
|
||||
handleFileClick(name);
|
||||
} else {
|
||||
navigateToDirectory(name);
|
||||
}
|
||||
},
|
||||
iconWidth: 18,
|
||||
iconHeight: 18,
|
||||
variant: 'primary',
|
||||
});
|
||||
|
||||
return options;
|
||||
};
|
||||
|
||||
const calculateDirectoryStats = (
|
||||
structure: DirectoryStructure,
|
||||
): { totalSize: number; totalTokens: number } => {
|
||||
let totalSize = 0;
|
||||
let totalTokens = 0;
|
||||
|
||||
Object.entries(structure).forEach(([_, node]) => {
|
||||
if (node.type) {
|
||||
// It's a file
|
||||
totalSize += node.size_bytes || 0;
|
||||
totalTokens += node.token_count || 0;
|
||||
} else {
|
||||
// It's a directory, recurse
|
||||
const stats = calculateDirectoryStats(node);
|
||||
totalSize += stats.totalSize;
|
||||
totalTokens += stats.totalTokens;
|
||||
}
|
||||
});
|
||||
|
||||
return { totalSize, totalTokens };
|
||||
};
|
||||
|
||||
const handleBackNavigation = () => {
|
||||
if (selectedFile) {
|
||||
setSelectedFile(null);
|
||||
} else if (currentPath.length === 0) {
|
||||
if (onBackToDocuments) {
|
||||
onBackToDocuments();
|
||||
}
|
||||
} else {
|
||||
navigateUp();
|
||||
}
|
||||
};
|
||||
|
||||
const renderPathNavigation = () => {
|
||||
return (
|
||||
<div className="mb-0 flex min-h-[38px] flex-col gap-2 text-base sm:flex-row sm:items-center sm:justify-between">
|
||||
{/* Left side with path navigation */}
|
||||
<div className="flex w-full items-center sm:w-auto">
|
||||
<button
|
||||
className="mr-3 flex h-[29px] w-[29px] items-center justify-center rounded-full border p-2 text-sm font-medium text-gray-400 dark:border-0 dark:bg-[#28292D] dark:text-gray-500 dark:hover:bg-[#2E2F34]"
|
||||
onClick={handleBackNavigation}
|
||||
>
|
||||
<img src={ArrowLeft} alt="left-arrow" className="h-3 w-3" />
|
||||
</button>
|
||||
|
||||
<div className="flex flex-wrap items-center">
|
||||
<span className="font-semibold break-words text-[#7D54D1]">
|
||||
{sourceName}
|
||||
</span>
|
||||
{currentPath.length > 0 && (
|
||||
<>
|
||||
<span className="mx-1 flex-shrink-0 text-gray-500">/</span>
|
||||
{currentPath.map((dir, index) => (
|
||||
<React.Fragment key={index}>
|
||||
<span className="break-words text-gray-700 dark:text-[#E0E0E0]">
|
||||
{dir}
|
||||
</span>
|
||||
{index < currentPath.length - 1 && (
|
||||
<span className="mx-1 flex-shrink-0 text-gray-500">
|
||||
/
|
||||
</span>
|
||||
)}
|
||||
</React.Fragment>
|
||||
))}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="relative mt-2 flex w-full flex-row flex-nowrap items-center justify-end gap-2 sm:mt-0 sm:w-auto">
|
||||
{renderFileSearch()}
|
||||
|
||||
{/* Sync button */}
|
||||
<button
|
||||
onClick={handleSync}
|
||||
disabled={isSyncing}
|
||||
className={`flex h-[38px] min-w-[108px] items-center justify-center rounded-full px-4 text-[14px] font-medium whitespace-nowrap transition-colors ${
|
||||
isSyncing
|
||||
? 'cursor-not-allowed bg-gray-300 text-gray-600 dark:bg-gray-600 dark:text-gray-400'
|
||||
: 'bg-purple-30 hover:bg-violets-are-blue text-white'
|
||||
}`}
|
||||
title={
|
||||
isSyncing
|
||||
? `${t('settings.sources.syncing')} ${syncProgress}%`
|
||||
: syncDone
|
||||
? 'Done'
|
||||
: t('settings.sources.sync')
|
||||
}
|
||||
>
|
||||
<img
|
||||
src={SyncIcon}
|
||||
alt={t('settings.sources.sync')}
|
||||
className={`mr-2 h-4 w-4 brightness-0 invert filter ${isSyncing ? 'animate-spin' : ''}`}
|
||||
/>
|
||||
{isSyncing
|
||||
? `${syncProgress}%`
|
||||
: syncDone
|
||||
? 'Done'
|
||||
: t('settings.sources.sync')}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const renderFileTree = (directory: DirectoryStructure) => {
|
||||
if (!directory) return [];
|
||||
|
||||
// Create parent directory row
|
||||
const parentRow =
|
||||
currentPath.length > 0
|
||||
? [
|
||||
<tr
|
||||
key="parent-dir"
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={navigateUp}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex items-center">
|
||||
<img
|
||||
src={FolderIcon}
|
||||
alt={t('settings.sources.parentFolderAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
..
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4"></td>
|
||||
</tr>,
|
||||
]
|
||||
: [];
|
||||
|
||||
// Sort entries: directories first, then files, both alphabetically
|
||||
const sortedEntries = Object.entries(directory).sort(
|
||||
([nameA, nodeA], [nameB, nodeB]) => {
|
||||
const isFileA = !!nodeA.type;
|
||||
const isFileB = !!nodeB.type;
|
||||
|
||||
if (isFileA !== isFileB) {
|
||||
return isFileA ? 1 : -1; // Directories first
|
||||
}
|
||||
|
||||
return nameA.localeCompare(nameB); // Alphabetical within each group
|
||||
},
|
||||
);
|
||||
|
||||
// Process directories
|
||||
const directoryRows = sortedEntries
|
||||
.filter(([_, node]) => !node.type)
|
||||
.map(([name, node]) => {
|
||||
const itemId = `dir-${name}`;
|
||||
const menuRef = getMenuRef(itemId);
|
||||
|
||||
// Calculate directory stats
|
||||
const dirStats = calculateDirectoryStats(node as DirectoryStructure);
|
||||
|
||||
return (
|
||||
<tr
|
||||
key={itemId}
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={() => navigateToDirectory(name)}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex min-w-0 items-center">
|
||||
<img
|
||||
src={FolderIcon}
|
||||
alt={t('settings.sources.folderAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
{name}
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
{dirStats.totalTokens > 0
|
||||
? dirStats.totalTokens.toLocaleString()
|
||||
: '-'}
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
{dirStats.totalSize > 0 ? formatBytes(dirStats.totalSize) : '-'}
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4">
|
||||
<div ref={menuRef} className="relative">
|
||||
<button
|
||||
onClick={(e) => handleMenuClick(e, itemId)}
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
|
||||
aria-label={t('settings.sources.menuAlt')}
|
||||
>
|
||||
<img
|
||||
src={ThreeDots}
|
||||
alt={t('settings.sources.menuAlt')}
|
||||
className="opacity-60 hover:opacity-100"
|
||||
/>
|
||||
</button>
|
||||
<ContextMenu
|
||||
isOpen={activeMenuId === itemId}
|
||||
setIsOpen={(isOpen) =>
|
||||
setActiveMenuId(isOpen ? itemId : null)
|
||||
}
|
||||
options={getActionOptions(name, false, itemId)}
|
||||
anchorRef={menuRef}
|
||||
position="bottom-left"
|
||||
offset={{ x: -4, y: 4 }}
|
||||
/>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
});
|
||||
|
||||
// Process files
|
||||
const fileRows = sortedEntries
|
||||
.filter(([_, node]) => !!node.type)
|
||||
.map(([name, node]) => {
|
||||
const itemId = `file-${name}`;
|
||||
const menuRef = getMenuRef(itemId);
|
||||
|
||||
return (
|
||||
<tr
|
||||
key={itemId}
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={() => handleFileClick(name)}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex min-w-0 items-center">
|
||||
<img
|
||||
src={FileIcon}
|
||||
alt={t('settings.sources.fileAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
{name}
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
{node.token_count?.toLocaleString() || '-'}
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm md:px-4 dark:text-[#E0E0E0]">
|
||||
{node.size_bytes ? formatBytes(node.size_bytes) : '-'}
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4">
|
||||
<div ref={menuRef} className="relative">
|
||||
<button
|
||||
onClick={(e) => handleMenuClick(e, itemId)}
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
|
||||
aria-label={t('settings.sources.menuAlt')}
|
||||
>
|
||||
<img
|
||||
src={ThreeDots}
|
||||
alt={t('settings.sources.menuAlt')}
|
||||
className="opacity-60 hover:opacity-100"
|
||||
/>
|
||||
</button>
|
||||
<ContextMenu
|
||||
isOpen={activeMenuId === itemId}
|
||||
setIsOpen={(isOpen) =>
|
||||
setActiveMenuId(isOpen ? itemId : null)
|
||||
}
|
||||
options={getActionOptions(name, true, itemId)}
|
||||
anchorRef={menuRef}
|
||||
position="bottom-left"
|
||||
offset={{ x: -4, y: 4 }}
|
||||
/>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
);
|
||||
});
|
||||
|
||||
return [...parentRow, ...directoryRows, ...fileRows];
|
||||
};
|
||||
|
||||
const searchFiles = (
|
||||
query: string,
|
||||
structure: DirectoryStructure,
|
||||
currentPath: string[] = [],
|
||||
): SearchResult[] => {
|
||||
let results: SearchResult[] = [];
|
||||
|
||||
Object.entries(structure).forEach(([name, node]) => {
|
||||
const fullPath = [...currentPath, name].join('/');
|
||||
|
||||
if (name.toLowerCase().includes(query.toLowerCase())) {
|
||||
results.push({
|
||||
name,
|
||||
path: fullPath,
|
||||
isFile: !!node.type,
|
||||
});
|
||||
}
|
||||
|
||||
if (!node.type) {
|
||||
// If it's a directory, search recursively
|
||||
results = [
|
||||
...results,
|
||||
...searchFiles(query, node as DirectoryStructure, [
|
||||
...currentPath,
|
||||
name,
|
||||
]),
|
||||
];
|
||||
}
|
||||
});
|
||||
|
||||
return results;
|
||||
};
|
||||
|
||||
const handleSearchSelect = (result: SearchResult) => {
|
||||
if (result.isFile) {
|
||||
const pathParts = result.path.split('/');
|
||||
const fileName = pathParts.pop() || '';
|
||||
setCurrentPath(pathParts);
|
||||
|
||||
setSelectedFile({
|
||||
id: result.path,
|
||||
name: fileName,
|
||||
});
|
||||
} else {
|
||||
setCurrentPath(result.path.split('/'));
|
||||
setSelectedFile(null);
|
||||
}
|
||||
setSearchQuery('');
|
||||
setSearchResults([]);
|
||||
};
|
||||
|
||||
const renderFileSearch = () => {
|
||||
return (
|
||||
<div className="relative w-52" ref={searchDropdownRef}>
|
||||
<input
|
||||
type="text"
|
||||
value={searchQuery}
|
||||
onChange={(e) => {
|
||||
setSearchQuery(e.target.value);
|
||||
if (directoryStructure) {
|
||||
setSearchResults(searchFiles(e.target.value, directoryStructure));
|
||||
}
|
||||
}}
|
||||
placeholder={t('settings.sources.searchFiles')}
|
||||
className={`h-[38px] w-full border border-[#D1D9E0] px-4 py-2 dark:border-[#6A6A6A] ${searchQuery ? 'rounded-t-[24px]' : 'rounded-[24px]'} bg-transparent focus:outline-none dark:text-[#E0E0E0]`}
|
||||
/>
|
||||
|
||||
{searchQuery && (
|
||||
<div className="absolute top-full right-0 left-0 z-10 max-h-[calc(100vh-200px)] w-full overflow-hidden rounded-b-[12px] border border-t-0 border-[#D1D9E0] bg-white shadow-lg transition-all duration-200 dark:border-[#6A6A6A] dark:bg-[#1F2023]">
|
||||
<div className="max-h-[calc(100vh-200px)] overflow-x-hidden overflow-y-auto overscroll-contain">
|
||||
{searchResults.length === 0 ? (
|
||||
<div className="py-2 text-center text-sm text-gray-500 dark:text-gray-400">
|
||||
{t('settings.sources.noResults')}
|
||||
</div>
|
||||
) : (
|
||||
searchResults.map((result, index) => (
|
||||
<div
|
||||
key={index}
|
||||
onClick={() => handleSearchSelect(result)}
|
||||
title={result.path}
|
||||
className={`flex min-w-0 cursor-pointer items-center px-3 py-2 hover:bg-[#ECEEEF] dark:hover:bg-[#27282D] ${
|
||||
index !== searchResults.length - 1
|
||||
? 'border-b border-[#D1D9E0] dark:border-[#6A6A6A]'
|
||||
: ''
|
||||
}`}
|
||||
>
|
||||
<img
|
||||
src={result.isFile ? FileIcon : FolderIcon}
|
||||
alt={
|
||||
result.isFile
|
||||
? t('settings.sources.fileAlt')
|
||||
: t('settings.sources.folderAlt')
|
||||
}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="flex-1 truncate text-sm dark:text-[#E0E0E0]">
|
||||
{result.path.split('/').pop() || result.path}
|
||||
</span>
|
||||
</div>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
const handleFileSearch = (searchQuery: string) => {
|
||||
if (directoryStructure) {
|
||||
return searchFiles(searchQuery, directoryStructure);
|
||||
}
|
||||
return [];
|
||||
};
|
||||
|
||||
const handleFileSelect = (path: string) => {
|
||||
const pathParts = path.split('/');
|
||||
const fileName = pathParts.pop() || '';
|
||||
setCurrentPath(pathParts);
|
||||
setSelectedFile({
|
||||
id: path,
|
||||
name: fileName,
|
||||
});
|
||||
};
|
||||
|
||||
const currentDirectory = getCurrentDirectory();
|
||||
|
||||
const navigateToPath = (index: number) => {
|
||||
setCurrentPath(currentPath.slice(0, index + 1));
|
||||
};
|
||||
|
||||
return (
|
||||
<div>
|
||||
{selectedFile ? (
|
||||
<div className="flex">
|
||||
<div className="flex-1">
|
||||
<Chunks
|
||||
documentId={docId}
|
||||
documentName={sourceName}
|
||||
handleGoBack={() => setSelectedFile(null)}
|
||||
path={selectedFile.id}
|
||||
onFileSearch={handleFileSearch}
|
||||
onFileSelect={handleFileSelect}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex w-full max-w-full flex-col overflow-hidden">
|
||||
<div className="mb-2">{renderPathNavigation()}</div>
|
||||
|
||||
<div className="w-full">
|
||||
<div className="overflow-x-auto rounded-[6px] border border-[#D1D9E0] dark:border-[#6A6A6A]">
|
||||
<table className="w-full min-w-[600px] table-auto bg-transparent">
|
||||
<thead className="bg-gray-100 dark:bg-[#27282D]">
|
||||
<tr className="border-b border-[#D1D9E0] dark:border-[#6A6A6A]">
|
||||
<th className="min-w-[200px] px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]">
|
||||
{t('settings.sources.fileName')}
|
||||
</th>
|
||||
<th className="min-w-[80px] px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]">
|
||||
{t('settings.sources.tokens')}
|
||||
</th>
|
||||
<th className="min-w-[80px] px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]">
|
||||
{t('settings.sources.size')}
|
||||
</th>
|
||||
<th className="w-10 px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]"></th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>{renderFileTree(getCurrentDirectory())}</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
export default ConnectorTreeComponent;
|
||||
@@ -2,6 +2,7 @@ import React, { useState, useRef, useEffect } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useSelector } from 'react-redux';
|
||||
import { selectToken } from '../preferences/preferenceSlice';
|
||||
import { formatBytes } from '../utils/stringUtils';
|
||||
import Chunks from './Chunks';
|
||||
import ContextMenu, { MenuOption } from './ContextMenu';
|
||||
import userService from '../api/services/userService';
|
||||
@@ -10,9 +11,7 @@ import FolderIcon from '../assets/folder.svg';
|
||||
import ArrowLeft from '../assets/arrow-left.svg';
|
||||
import ThreeDots from '../assets/three-dots.svg';
|
||||
import EyeView from '../assets/eye-view.svg';
|
||||
import OutlineSource from '../assets/outline-source.svg';
|
||||
import Trash from '../assets/red-trash.svg';
|
||||
import SearchIcon from '../assets/search.svg';
|
||||
import { useOutsideAlerter } from '../hooks';
|
||||
import ConfirmationModal from '../modals/ConfirmationModal';
|
||||
|
||||
@@ -128,14 +127,6 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
}
|
||||
}, [docId, token]);
|
||||
|
||||
const formatBytes = (bytes: number): string => {
|
||||
if (bytes === 0) return '0 Bytes';
|
||||
const k = 1024;
|
||||
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
|
||||
};
|
||||
|
||||
const navigateToDirectory = (dirName: string) => {
|
||||
setCurrentPath((prev) => [...prev, dirName]);
|
||||
};
|
||||
@@ -443,18 +434,18 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
|
||||
const renderPathNavigation = () => {
|
||||
return (
|
||||
<div className="mb-0 min-h-[38px] flex flex-col gap-2 text-base sm:flex-row sm:items-center sm:justify-between">
|
||||
<div className="mb-0 flex min-h-[38px] flex-col gap-2 text-base sm:flex-row sm:items-center sm:justify-between">
|
||||
{/* Left side with path navigation */}
|
||||
<div className="flex w-full items-center sm:w-auto">
|
||||
<button
|
||||
className="mr-3 flex h-[29px] w-[29px] items-center justify-center rounded-full border p-2 text-sm text-gray-400 dark:border-0 dark:bg-[#28292D] dark:text-gray-500 dark:hover:bg-[#2E2F34] font-medium"
|
||||
className="mr-3 flex h-[29px] w-[29px] items-center justify-center rounded-full border p-2 text-sm font-medium text-gray-400 dark:border-0 dark:bg-[#28292D] dark:text-gray-500 dark:hover:bg-[#2E2F34]"
|
||||
onClick={handleBackNavigation}
|
||||
>
|
||||
<img src={ArrowLeft} alt="left-arrow" className="h-3 w-3" />
|
||||
</button>
|
||||
|
||||
<div className="flex flex-wrap items-center">
|
||||
<span className="text-[#7D54D1] font-semibold break-words">
|
||||
<span className="font-semibold break-words text-[#7D54D1]">
|
||||
{sourceName}
|
||||
</span>
|
||||
{currentPath.length > 0 && (
|
||||
@@ -485,8 +476,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex relative flex-row flex-nowrap items-center gap-2 w-full sm:w-auto justify-end mt-2 sm:mt-0">
|
||||
|
||||
<div className="relative mt-2 flex w-full flex-row flex-nowrap items-center justify-end gap-2 sm:mt-0 sm:w-auto">
|
||||
{processingRef.current && (
|
||||
<div className="text-sm text-gray-500">
|
||||
{currentOpRef.current === 'add'
|
||||
@@ -495,13 +485,13 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
</div>
|
||||
)}
|
||||
|
||||
{renderFileSearch()}
|
||||
{renderFileSearch()}
|
||||
|
||||
{/* Add file button */}
|
||||
{!processingRef.current && (
|
||||
<button
|
||||
onClick={handleAddFile}
|
||||
className="bg-purple-30 hover:bg-violets-are-blue flex h-[38px] min-w-[108px] items-center justify-center rounded-full px-4 text-[14px] whitespace-nowrap text-white font-medium"
|
||||
className="bg-purple-30 hover:bg-violets-are-blue flex h-[38px] min-w-[108px] items-center justify-center rounded-full px-4 text-[14px] font-medium whitespace-nowrap text-white"
|
||||
title={t('settings.sources.addFile')}
|
||||
>
|
||||
{t('settings.sources.addFile')}
|
||||
@@ -543,32 +533,32 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
const parentRow =
|
||||
currentPath.length > 0
|
||||
? [
|
||||
<tr
|
||||
key="parent-dir"
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={navigateUp}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex items-center">
|
||||
<img
|
||||
src={FolderIcon}
|
||||
alt={t('settings.sources.parentFolderAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
..
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4"></td>
|
||||
</tr>,
|
||||
]
|
||||
<tr
|
||||
key="parent-dir"
|
||||
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
|
||||
onClick={navigateUp}
|
||||
>
|
||||
<td className="px-2 py-2 lg:px-4">
|
||||
<div className="flex items-center">
|
||||
<img
|
||||
src={FolderIcon}
|
||||
alt={t('settings.sources.parentFolderAlt')}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="truncate text-sm dark:text-[#E0E0E0]">
|
||||
..
|
||||
</span>
|
||||
</div>
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
|
||||
-
|
||||
</td>
|
||||
<td className="w-10 px-2 py-2 text-sm lg:px-4"></td>
|
||||
</tr>,
|
||||
]
|
||||
: [];
|
||||
|
||||
// Render directories first, then files
|
||||
@@ -609,7 +599,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
<div ref={menuRef} className="relative">
|
||||
<button
|
||||
onClick={(e) => handleMenuClick(e, itemId)}
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E] font-medium"
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
|
||||
aria-label={t('settings.sources.menuAlt')}
|
||||
>
|
||||
<img
|
||||
@@ -665,7 +655,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
<div ref={menuRef} className="relative">
|
||||
<button
|
||||
onClick={(e) => handleMenuClick(e, itemId)}
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E] font-medium"
|
||||
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
|
||||
aria-label={t('settings.sources.menuAlt')}
|
||||
>
|
||||
<img
|
||||
@@ -757,14 +747,12 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
}
|
||||
}}
|
||||
placeholder={t('settings.sources.searchFiles')}
|
||||
className={`w-full h-[38px] border border-[#D1D9E0] px-4 py-2 dark:border-[#6A6A6A]
|
||||
${searchQuery ? 'rounded-t-[24px]' : 'rounded-[24px]'}
|
||||
bg-transparent focus:outline-none dark:text-[#E0E0E0]`}
|
||||
className={`h-[38px] w-full border border-[#D1D9E0] px-4 py-2 dark:border-[#6A6A6A] ${searchQuery ? 'rounded-t-[24px]' : 'rounded-[24px]'} bg-transparent focus:outline-none dark:text-[#E0E0E0]`}
|
||||
/>
|
||||
|
||||
{searchQuery && (
|
||||
<div className="absolute top-full left-0 right-0 z-10 max-h-[calc(100vh-200px)] w-full overflow-hidden rounded-b-[12px] border border-t-0 border-[#D1D9E0] bg-white shadow-lg dark:border-[#6A6A6A] dark:bg-[#1F2023] transition-all duration-200">
|
||||
<div className="max-h-[calc(100vh-200px)] overflow-y-auto overflow-x-hidden overscroll-contain">
|
||||
<div className="absolute top-full right-0 left-0 z-10 max-h-[calc(100vh-200px)] w-full overflow-hidden rounded-b-[12px] border border-t-0 border-[#D1D9E0] bg-white shadow-lg transition-all duration-200 dark:border-[#6A6A6A] dark:bg-[#1F2023]">
|
||||
<div className="max-h-[calc(100vh-200px)] overflow-x-hidden overflow-y-auto overscroll-contain">
|
||||
{searchResults.length === 0 ? (
|
||||
<div className="py-2 text-center text-sm text-gray-500 dark:text-gray-400">
|
||||
{t('settings.sources.noResults')}
|
||||
@@ -775,10 +763,11 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
key={index}
|
||||
onClick={() => handleSearchSelect(result)}
|
||||
title={result.path}
|
||||
className={`flex min-w-0 cursor-pointer items-center px-3 py-2 hover:bg-[#ECEEEF] dark:hover:bg-[#27282D] ${index !== searchResults.length - 1
|
||||
className={`flex min-w-0 cursor-pointer items-center px-3 py-2 hover:bg-[#ECEEEF] dark:hover:bg-[#27282D] ${
|
||||
index !== searchResults.length - 1
|
||||
? 'border-b border-[#D1D9E0] dark:border-[#6A6A6A]'
|
||||
: ''
|
||||
}`}
|
||||
}`}
|
||||
>
|
||||
<img
|
||||
src={result.isFile ? FileIcon : FolderIcon}
|
||||
@@ -789,7 +778,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
}
|
||||
className="mr-2 h-4 w-4 flex-shrink-0"
|
||||
/>
|
||||
<span className="text-sm dark:text-[#E0E0E0] truncate flex-1">
|
||||
<span className="flex-1 truncate text-sm dark:text-[#E0E0E0]">
|
||||
{result.path.split('/').pop() || result.path}
|
||||
</span>
|
||||
</div>
|
||||
@@ -871,7 +860,9 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
|
||||
message={
|
||||
itemToDelete?.isFile
|
||||
? t('settings.sources.confirmDelete')
|
||||
: t('settings.sources.deleteDirectoryWarning', { name: itemToDelete?.name })
|
||||
: t('settings.sources.deleteDirectoryWarning', {
|
||||
name: itemToDelete?.name,
|
||||
})
|
||||
}
|
||||
modalState={deleteModalState}
|
||||
setModalState={setDeleteModalState}
|
||||
|
||||
@@ -16,7 +16,9 @@ const MermaidRenderer: React.FC<MermaidRendererProps> = ({
|
||||
isLoading,
|
||||
}) => {
|
||||
const [isDarkTheme] = useDarkTheme();
|
||||
const diagramId = useRef(`mermaid-${crypto.randomUUID()}`);
|
||||
const diagramId = useRef(
|
||||
`mermaid-${Date.now()}-${Math.random().toString(36).substring(2)}`,
|
||||
);
|
||||
const status = useSelector(selectStatus);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
const [showCode, setShowCode] = useState<boolean>(false);
|
||||
|
||||
@@ -259,7 +259,7 @@ export default function MessageInput({
|
||||
return (
|
||||
<div className="mx-2 flex w-full flex-col">
|
||||
<div className="border-dark-gray bg-lotion dark:border-grey relative flex w-full flex-col rounded-[23px] border dark:bg-transparent">
|
||||
<div className="flex flex-wrap gap-1.5 px-4 pt-3 pb-0 sm:gap-2 sm:px-6">
|
||||
<div className="flex flex-wrap gap-1.5 px-2 py-2 sm:gap-2 sm:px-3">
|
||||
{attachments.map((attachment, index) => (
|
||||
<div
|
||||
key={index}
|
||||
@@ -353,14 +353,14 @@ export default function MessageInput({
|
||||
onChange={handleChange}
|
||||
tabIndex={1}
|
||||
placeholder={t('inputPlaceholder')}
|
||||
className="inputbox-style no-scrollbar bg-lotion dark:text-bright-gray dark:placeholder:text-bright-gray/50 w-full overflow-x-hidden overflow-y-auto rounded-t-[23px] px-4 py-3 text-base leading-tight whitespace-pre-wrap opacity-100 placeholder:text-gray-500 focus:outline-hidden sm:px-6 sm:py-5 dark:bg-transparent"
|
||||
className="inputbox-style no-scrollbar bg-lotion dark:text-bright-gray dark:placeholder:text-bright-gray/50 w-full overflow-x-hidden overflow-y-auto rounded-t-[23px] px-2 text-base leading-tight whitespace-pre-wrap opacity-100 placeholder:text-gray-500 focus:outline-hidden sm:px-3 dark:bg-transparent"
|
||||
onInput={handleInput}
|
||||
onKeyDown={handleKeyDown}
|
||||
aria-label={t('inputPlaceholder')}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="flex items-center px-3 py-1.5 sm:px-4 sm:py-2">
|
||||
<div className="flex items-center px-2 pb-1.5 sm:px-3 sm:pb-2">
|
||||
<div className="flex grow flex-wrap gap-1 sm:gap-2">
|
||||
{showSourceButton && (
|
||||
<button
|
||||
@@ -369,7 +369,7 @@ export default function MessageInput({
|
||||
onClick={() => setIsSourcesPopupOpen(!isSourcesPopupOpen)}
|
||||
title={
|
||||
selectedDocs && selectedDocs.length > 0
|
||||
? selectedDocs.map(doc => doc.name).join(', ')
|
||||
? selectedDocs.map((doc) => doc.name).join(', ')
|
||||
: t('conversation.sources.title')
|
||||
}
|
||||
>
|
||||
@@ -380,7 +380,7 @@ export default function MessageInput({
|
||||
/>
|
||||
<span className="xs:text-[12px] dark:text-bright-gray truncate overflow-hidden text-[10px] font-medium text-[#5D5D5D] sm:text-[14px]">
|
||||
{selectedDocs && selectedDocs.length > 0
|
||||
? selectedDocs.length === 1
|
||||
? selectedDocs.length === 1
|
||||
? selectedDocs[0].name
|
||||
: `${selectedDocs.length} sources selected`
|
||||
: t('conversation.sources.title')}
|
||||
@@ -430,18 +430,18 @@ export default function MessageInput({
|
||||
<button
|
||||
onClick={loading ? undefined : handleSubmit}
|
||||
aria-label={loading ? t('loading') : t('send')}
|
||||
className={`flex items-center justify-center rounded-full p-2 sm:p-2.5 ${loading ? 'bg-gray-300 dark:bg-gray-600' : 'bg-black dark:bg-white'} ml-auto shrink-0`}
|
||||
className={`flex h-7 w-7 items-center justify-center rounded-full sm:h-9 sm:w-9 ${loading || !value.trim() ? 'bg-black opacity-60 dark:bg-[#F0F3F4] dark:opacity-80' : 'bg-black opacity-100 dark:bg-[#F0F3F4]'} ml-auto shrink-0`}
|
||||
disabled={loading}
|
||||
>
|
||||
{loading ? (
|
||||
<img
|
||||
src={isDarkTheme ? SpinnerDark : Spinner}
|
||||
className="h-3.5 w-3.5 animate-spin sm:h-4 sm:w-4"
|
||||
className="mx-auto my-auto block h-3.5 w-3.5 animate-spin sm:h-4 sm:w-4"
|
||||
alt={t('loading')}
|
||||
/>
|
||||
) : (
|
||||
<img
|
||||
className={`h-3.5 w-3.5 sm:h-4 sm:w-4 ${isDarkTheme ? 'invert filter' : ''}`}
|
||||
className={`mx-auto my-auto block h-3.5 w-3.5 translate-x-[-0.9px] translate-y-[1.1px] sm:h-4 sm:w-4 ${isDarkTheme ? 'invert filter' : ''}`}
|
||||
src={PaperPlane}
|
||||
alt={t('send')}
|
||||
/>
|
||||
|
||||
@@ -248,7 +248,7 @@ export default function MultiSelectPopup({
|
||||
</div>
|
||||
<div className="shrink-0">
|
||||
<div
|
||||
className={`dark:bg-charleston-green-2 flex h-4 w-4 items-center justify-center rounded-xs border border-[#C6C6C6] bg-white dark:border-[#757783]`}
|
||||
className={`dark:bg-charleston-green-2 flex h-4 w-4 items-center justify-center rounded-xs border-2 border-[#C6C6C6] bg-white dark:border-[#757783]`}
|
||||
aria-hidden="true"
|
||||
>
|
||||
{isSelected && (
|
||||
|
||||
@@ -200,7 +200,7 @@ export default function SourcesPopup({
|
||||
{option.name}
|
||||
</span>
|
||||
<div
|
||||
className={`flex h-4 w-4 shrink-0 items-center justify-center border border-[#C6C6C6] p-[0.5px] dark:border-[#757783]`}
|
||||
className={`flex h-4 w-4 shrink-0 items-center justify-center rounded-xs border-2 border-[#C6C6C6] p-[0.5px] dark:border-[#757783]`}
|
||||
>
|
||||
{isSelected && (
|
||||
<img
|
||||
|
||||
@@ -46,7 +46,7 @@ const ToggleSwitch: React.FC<ToggleSwitchProps> = ({
|
||||
|
||||
return (
|
||||
<label
|
||||
className={`flex cursor-pointer select-none flex-row items-center ${
|
||||
className={`flex cursor-pointer flex-row items-center select-none ${
|
||||
labelPosition === 'right' ? 'flex-row-reverse' : ''
|
||||
} ${disabled ? 'cursor-not-allowed opacity-50' : ''} ${className}`}
|
||||
>
|
||||
@@ -75,7 +75,7 @@ const ToggleSwitch: React.FC<ToggleSwitchProps> = ({
|
||||
}`}
|
||||
></div>
|
||||
<div
|
||||
className={`absolute ${toggle} flex items-center justify-center rounded-full bg-white opacity-80 transition ${
|
||||
className={`absolute ${toggle} flex items-center justify-center rounded-full bg-white transition ${
|
||||
checked ? `${translate} bg-silver` : ''
|
||||
}`}
|
||||
></div>
|
||||
|
||||
@@ -207,7 +207,7 @@ export default function ToolsPopup({
|
||||
</div>
|
||||
<div className="flex shrink-0 items-center">
|
||||
<div
|
||||
className={`flex h-4 w-4 items-center justify-center border border-[#C6C6C6] p-[0.5px] dark:border-[#757783]`}
|
||||
className={`flex h-4 w-4 items-center justify-center rounded-xs border-2 border-[#C6C6C6] p-[0.5px] dark:border-[#757783]`}
|
||||
>
|
||||
{tool.status && (
|
||||
<img
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "Pre-loaded",
|
||||
"private": "Private",
|
||||
"sync": "Sync",
|
||||
"syncing": "Syncing...",
|
||||
"syncFrequency": {
|
||||
"never": "Never",
|
||||
"daily": "Daily",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "Precargado",
|
||||
"private": "Privado",
|
||||
"sync": "Sincronizar",
|
||||
"syncing": "Sincronizando...",
|
||||
"syncFrequency": {
|
||||
"never": "Nunca",
|
||||
"daily": "Diario",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "プリロード済み",
|
||||
"private": "プライベート",
|
||||
"sync": "同期",
|
||||
"syncing": "同期中...",
|
||||
"syncFrequency": {
|
||||
"never": "なし",
|
||||
"daily": "毎日",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "Предзагруженный",
|
||||
"private": "Частный",
|
||||
"sync": "Синхронизация",
|
||||
"syncing": "Синхронизация...",
|
||||
"syncFrequency": {
|
||||
"never": "Никогда",
|
||||
"daily": "Ежедневно",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "預載入",
|
||||
"private": "私人",
|
||||
"sync": "同步",
|
||||
"syncing": "同步中...",
|
||||
"syncFrequency": {
|
||||
"never": "從不",
|
||||
"daily": "每天",
|
||||
|
||||
@@ -67,6 +67,7 @@
|
||||
"preLoaded": "预加载",
|
||||
"private": "私有",
|
||||
"sync": "同步",
|
||||
"syncing": "同步中...",
|
||||
"syncFrequency": {
|
||||
"never": "从不",
|
||||
"daily": "每天",
|
||||
|
||||
@@ -42,10 +42,10 @@ export default function WrapperModal({
|
||||
}, [close, isPerformingTask]);
|
||||
|
||||
const modalContent = (
|
||||
<div className="bg-gray-alpha bg-opacity-50 fixed top-0 left-0 z-30 flex h-screen w-screen items-center justify-center">
|
||||
<div className="fixed top-0 left-0 z-30 flex h-screen w-screen items-center justify-center">
|
||||
<div
|
||||
ref={modalRef}
|
||||
className={`relative w-11/12 rounded-2xl bg-white p-8 sm:w-[512px] dark:bg-[#26272E] ${className}`}
|
||||
className={`relative w-11/12 rounded-2xl bg-white p-8 shadow-2xl sm:w-[512px] dark:bg-[#26272E] ${className}`}
|
||||
>
|
||||
{!isPerformingTask && (
|
||||
<button
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
|
||||
import React, { useCallback, useEffect, useRef, useState } from 'react';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
@@ -29,6 +28,7 @@ import {
|
||||
import Upload from '../upload/Upload';
|
||||
import { formatDate } from '../utils/dateTimeUtils';
|
||||
import FileTreeComponent from '../components/FileTreeComponent';
|
||||
import ConnectorTreeComponent from '../components/ConnectorTreeComponent';
|
||||
import Chunks from '../components/Chunks';
|
||||
|
||||
const formatTokens = (tokens: number): string => {
|
||||
@@ -272,11 +272,19 @@ export default function Sources({
|
||||
return documentToView ? (
|
||||
<div className="mt-8 flex flex-col">
|
||||
{documentToView.isNested ? (
|
||||
<FileTreeComponent
|
||||
docId={documentToView.id || ''}
|
||||
sourceName={documentToView.name}
|
||||
onBackToDocuments={() => setDocumentToView(undefined)}
|
||||
/>
|
||||
documentToView.type === 'connector' ? (
|
||||
<ConnectorTreeComponent
|
||||
docId={documentToView.id || ''}
|
||||
sourceName={documentToView.name}
|
||||
onBackToDocuments={() => setDocumentToView(undefined)}
|
||||
/>
|
||||
) : (
|
||||
<FileTreeComponent
|
||||
docId={documentToView.id || ''}
|
||||
sourceName={documentToView.name}
|
||||
onBackToDocuments={() => setDocumentToView(undefined)}
|
||||
/>
|
||||
)
|
||||
) : (
|
||||
<Chunks
|
||||
documentId={documentToView.id || ''}
|
||||
@@ -310,7 +318,7 @@ export default function Sources({
|
||||
setSearchTerm(e.target.value);
|
||||
setCurrentPage(1);
|
||||
}}
|
||||
className="w-full h-[32px] rounded-full border border-silver dark:border-silver/40 bg-transparent px-3 text-sm text-jet dark:text-bright-gray placeholder:text-gray-400 dark:placeholder:text-gray-500 outline-none focus:border-silver dark:focus:border-silver/60"
|
||||
className="border-silver dark:border-silver/40 text-jet dark:text-bright-gray focus:border-silver dark:focus:border-silver/60 h-[32px] w-full rounded-full border bg-transparent px-3 text-sm outline-none placeholder:text-gray-400 dark:placeholder:text-gray-500"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
@@ -327,7 +335,7 @@ export default function Sources({
|
||||
</div>
|
||||
<div className="relative w-full">
|
||||
{loading ? (
|
||||
<div className="w-full grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-6 px-2 py-4">
|
||||
<div className="grid w-full grid-cols-1 gap-6 px-2 py-4 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4">
|
||||
<SkeletonLoader component="sourceCards" count={rowsPerPage} />
|
||||
</div>
|
||||
) : !currentDocuments?.length ? (
|
||||
@@ -342,19 +350,19 @@ export default function Sources({
|
||||
</p>
|
||||
</div>
|
||||
) : (
|
||||
<div className="w-full grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-6 px-2 py-4">
|
||||
{currentDocuments.map((document, index) => {
|
||||
const docId = document.id ? document.id.toString() : '';
|
||||
<div className="grid w-full grid-cols-1 gap-6 px-2 py-4 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4">
|
||||
{currentDocuments.map((document, index) => {
|
||||
const docId = document.id ? document.id.toString() : '';
|
||||
|
||||
return (
|
||||
<div key={docId} className="relative">
|
||||
<div
|
||||
className={`flex h-[130px] w-full flex-col rounded-2xl bg-[#F9F9F9] p-3 transition-all duration-200 dark:bg-[#383838] ${
|
||||
activeMenuId === docId || syncMenuState.docId === docId
|
||||
? 'scale-[1.05]'
|
||||
: 'hover:scale-[1.05]'
|
||||
}`}
|
||||
>
|
||||
return (
|
||||
<div key={docId} className="relative">
|
||||
<div
|
||||
className={`flex h-[130px] w-full flex-col rounded-2xl bg-[#F9F9F9] p-3 transition-all duration-200 dark:bg-[#383838] ${
|
||||
activeMenuId === docId || syncMenuState.docId === docId
|
||||
? 'scale-[1.05]'
|
||||
: 'hover:scale-[1.05]'
|
||||
}`}
|
||||
>
|
||||
<div className="w-full flex-1">
|
||||
<div className="flex w-full items-center justify-between gap-2">
|
||||
<h3
|
||||
@@ -418,7 +426,7 @@ export default function Sources({
|
||||
<img
|
||||
src={CalendarIcon}
|
||||
alt=""
|
||||
className="w-[14px] h-[14px]"
|
||||
className="h-[14px] w-[14px]"
|
||||
/>
|
||||
<span className="font-inter text-[12px] leading-[18px] font-[500] text-[#848484] dark:text-[#848484]">
|
||||
{document.date ? formatDate(document.date) : ''}
|
||||
@@ -428,7 +436,7 @@ export default function Sources({
|
||||
<img
|
||||
src={DiscIcon}
|
||||
alt=""
|
||||
className="w-[14px] h-[14px]"
|
||||
className="h-[14px] w-[14px]"
|
||||
/>
|
||||
<span className="font-inter text-[12px] leading-[18px] font-[500] text-[#848484] dark:text-[#848484]">
|
||||
{document.tokens
|
||||
|
||||
@@ -4,6 +4,13 @@ import { useTranslation } from 'react-i18next';
|
||||
import { useDispatch, useSelector } from 'react-redux';
|
||||
|
||||
import userService from '../api/services/userService';
|
||||
import {
|
||||
getSessionToken,
|
||||
setSessionToken,
|
||||
removeSessionToken,
|
||||
} from '../utils/providerUtils';
|
||||
import { formatDate } from '../utils/dateTimeUtils';
|
||||
import { formatBytes } from '../utils/stringUtils';
|
||||
import FileUpload from '../assets/file_upload.svg';
|
||||
import WebsiteCollect from '../assets/website_collect.svg';
|
||||
import Dropdown from '../components/Dropdown';
|
||||
@@ -25,6 +32,9 @@ import {
|
||||
IngestorFormSchemas,
|
||||
IngestorType,
|
||||
} from './types/ingestor';
|
||||
import FileIcon from '../assets/file.svg';
|
||||
import FolderIcon from '../assets/folder.svg';
|
||||
import ConnectorAuth from '../components/ConnectorAuth';
|
||||
|
||||
function Upload({
|
||||
receivedFile = [],
|
||||
@@ -48,6 +58,23 @@ function Upload({
|
||||
const [activeTab, setActiveTab] = useState<string | null>(renderTab);
|
||||
const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
|
||||
|
||||
// Google Drive state
|
||||
const [isGoogleDriveConnected, setIsGoogleDriveConnected] = useState(false);
|
||||
const [googleDriveFiles, setGoogleDriveFiles] = useState<any[]>([]);
|
||||
const [selectedFiles, setSelectedFiles] = useState<string[]>([]);
|
||||
const [isLoadingFiles, setIsLoadingFiles] = useState(false);
|
||||
const [isAuthenticating, setIsAuthenticating] = useState(false);
|
||||
const [userEmail, setUserEmail] = useState<string>('');
|
||||
const [authError, setAuthError] = useState<string>('');
|
||||
const [currentFolderId, setCurrentFolderId] = useState<string | null>(null);
|
||||
const [folderPath, setFolderPath] = useState<
|
||||
Array<{ id: string | null; name: string }>
|
||||
>([{ id: null, name: 'My Drive' }]);
|
||||
|
||||
const [nextPageToken, setNextPageToken] = useState<string | null>(null);
|
||||
const [hasMoreFiles, setHasMoreFiles] = useState<boolean>(false);
|
||||
const scrollContainerRef = useRef<HTMLDivElement | null>(null);
|
||||
|
||||
const renderFormFields = () => {
|
||||
const schema = IngestorFormSchemas[ingestor.type];
|
||||
if (!schema) return null;
|
||||
@@ -204,6 +231,7 @@ function Upload({
|
||||
{ label: 'Link', value: 'url' },
|
||||
{ label: 'GitHub', value: 'github' },
|
||||
{ label: 'Reddit', value: 'reddit' },
|
||||
{ label: 'Google Drive', value: 'google_drive' },
|
||||
];
|
||||
|
||||
const sourceDocs = useSelector(selectSourceDocs);
|
||||
@@ -428,29 +456,40 @@ function Upload({
|
||||
formData.append('user', 'local');
|
||||
formData.append('source', ingestor.type);
|
||||
|
||||
const defaultConfig = IngestorDefaultConfigs[ingestor.type].config;
|
||||
let configData;
|
||||
|
||||
const mergedConfig = { ...defaultConfig, ...ingestor.config };
|
||||
const filteredConfig = Object.entries(mergedConfig).reduce(
|
||||
(acc, [key, value]) => {
|
||||
const field = IngestorFormSchemas[ingestor.type].find(
|
||||
(f) => f.name === key,
|
||||
);
|
||||
// Include the field if:
|
||||
// 1. It's required, or
|
||||
// 2. It's optional and has a non-empty value
|
||||
if (
|
||||
field?.required ||
|
||||
(value !== undefined && value !== null && value !== '')
|
||||
) {
|
||||
acc[key] = value;
|
||||
}
|
||||
return acc;
|
||||
},
|
||||
{} as Record<string, any>,
|
||||
);
|
||||
if (ingestor.type === 'google_drive') {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
|
||||
formData.append('data', JSON.stringify(filteredConfig));
|
||||
const selectedItems = googleDriveFiles.filter((file) =>
|
||||
selectedFiles.includes(file.id),
|
||||
);
|
||||
const selectedFolderIds = selectedItems
|
||||
.filter(
|
||||
(item) =>
|
||||
item.type === 'application/vnd.google-apps.folder' || item.isFolder,
|
||||
)
|
||||
.map((folder) => folder.id);
|
||||
|
||||
const selectedFileIds = selectedItems
|
||||
.filter(
|
||||
(item) =>
|
||||
item.type !== 'application/vnd.google-apps.folder' &&
|
||||
!item.isFolder,
|
||||
)
|
||||
.map((file) => file.id);
|
||||
|
||||
configData = {
|
||||
file_ids: selectedFileIds,
|
||||
folder_ids: selectedFolderIds,
|
||||
recursive: ingestor.config.recursive,
|
||||
session_token: sessionToken || null,
|
||||
};
|
||||
} else {
|
||||
configData = { ...ingestor.config };
|
||||
}
|
||||
|
||||
formData.append('data', JSON.stringify(configData));
|
||||
|
||||
const apiHost: string = import.meta.env.VITE_API_HOST;
|
||||
const xhr = new XMLHttpRequest();
|
||||
@@ -477,6 +516,181 @@ function Upload({
|
||||
xhr.setRequestHeader('Authorization', `Bearer ${token}`);
|
||||
xhr.send(formData);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
if (ingestor.type === 'google_drive') {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
|
||||
if (sessionToken) {
|
||||
// Auto-authenticate if session token exists
|
||||
setIsGoogleDriveConnected(true);
|
||||
setAuthError('');
|
||||
|
||||
// Fetch user email and files using the existing session token
|
||||
|
||||
fetchUserEmailAndLoadFiles(sessionToken);
|
||||
}
|
||||
}
|
||||
}, [ingestor.type]);
|
||||
|
||||
const fetchUserEmailAndLoadFiles = async (sessionToken: string) => {
|
||||
try {
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
|
||||
const validateResponse = await fetch(
|
||||
`${apiHost}/api/connectors/validate-session`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
provider: 'google_drive',
|
||||
session_token: sessionToken,
|
||||
}),
|
||||
},
|
||||
);
|
||||
|
||||
if (!validateResponse.ok) {
|
||||
removeSessionToken(ingestor.type);
|
||||
setIsGoogleDriveConnected(false);
|
||||
setAuthError('Session expired. Please reconnect to Google Drive.');
|
||||
return;
|
||||
}
|
||||
|
||||
const validateData = await validateResponse.json();
|
||||
|
||||
if (validateData.success) {
|
||||
setUserEmail(validateData.user_email || 'Connected User');
|
||||
// reset pagination state and files
|
||||
setGoogleDriveFiles([]);
|
||||
|
||||
setNextPageToken(null);
|
||||
setHasMoreFiles(false);
|
||||
loadGoogleDriveFiles(sessionToken, null, null, false);
|
||||
} else {
|
||||
removeSessionToken(ingestor.type);
|
||||
setIsGoogleDriveConnected(false);
|
||||
setAuthError(
|
||||
validateData.error ||
|
||||
'Session expired. Please reconnect your Google Drive account and make sure to grant offline access.',
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error validating Google Drive session:', error);
|
||||
setAuthError('Failed to validate session. Please reconnect.');
|
||||
setIsGoogleDriveConnected(false);
|
||||
}
|
||||
};
|
||||
|
||||
const loadGoogleDriveFiles = async (
|
||||
sessionToken: string,
|
||||
folderId?: string | null,
|
||||
pageToken?: string | null,
|
||||
append = false,
|
||||
) => {
|
||||
setIsLoadingFiles(true);
|
||||
|
||||
try {
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
const requestBody: any = {
|
||||
session_token: sessionToken,
|
||||
limit: 10,
|
||||
};
|
||||
if (folderId) {
|
||||
requestBody.folder_id = folderId;
|
||||
}
|
||||
if (pageToken) {
|
||||
requestBody.page_token = pageToken;
|
||||
}
|
||||
|
||||
const filesResponse = await fetch(`${apiHost}/api/connectors/files`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify({ ...requestBody, provider: 'google_drive' }),
|
||||
});
|
||||
|
||||
if (!filesResponse.ok) {
|
||||
throw new Error(`Failed to load files: ${filesResponse.status}`);
|
||||
}
|
||||
|
||||
const filesData = await filesResponse.json();
|
||||
|
||||
if (filesData.success && Array.isArray(filesData.files)) {
|
||||
setGoogleDriveFiles((prev) =>
|
||||
append ? [...prev, ...filesData.files] : filesData.files,
|
||||
);
|
||||
setNextPageToken(filesData.next_page_token || null);
|
||||
setHasMoreFiles(Boolean(filesData.has_more));
|
||||
} else {
|
||||
throw new Error(filesData.error || 'Failed to load files');
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error loading Google Drive files:', error);
|
||||
setAuthError(
|
||||
error instanceof Error
|
||||
? error.message
|
||||
: 'Failed to load files. Please make sure your Google Drive account is properly connected and you granted offline access during authorization.',
|
||||
);
|
||||
} finally {
|
||||
setIsLoadingFiles(false);
|
||||
}
|
||||
};
|
||||
|
||||
// Handle file selection
|
||||
const handleFileSelect = (fileId: string) => {
|
||||
setSelectedFiles((prev) => {
|
||||
if (prev.includes(fileId)) {
|
||||
return prev.filter((id) => id !== fileId);
|
||||
} else {
|
||||
return [...prev, fileId];
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
const handleFolderClick = (folderId: string, folderName: string) => {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
if (sessionToken) {
|
||||
setCurrentFolderId(folderId);
|
||||
setFolderPath((prev) => [...prev, { id: folderId, name: folderName }]);
|
||||
|
||||
setGoogleDriveFiles([]);
|
||||
setNextPageToken(null);
|
||||
setHasMoreFiles(false);
|
||||
setSelectedFiles([]);
|
||||
loadGoogleDriveFiles(sessionToken, folderId, null, false);
|
||||
}
|
||||
};
|
||||
|
||||
const navigateBack = (index: number) => {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
if (sessionToken) {
|
||||
const newPath = folderPath.slice(0, index + 1);
|
||||
const targetFolderId = newPath[newPath.length - 1]?.id;
|
||||
|
||||
setCurrentFolderId(targetFolderId as string | null);
|
||||
setFolderPath(newPath);
|
||||
|
||||
setGoogleDriveFiles([]);
|
||||
setNextPageToken(null);
|
||||
setHasMoreFiles(false);
|
||||
setSelectedFiles([]);
|
||||
loadGoogleDriveFiles(sessionToken, targetFolderId ?? null, null, false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleSelectAll = () => {
|
||||
if (selectedFiles.length === googleDriveFiles.length) {
|
||||
setSelectedFiles([]);
|
||||
} else {
|
||||
setSelectedFiles(googleDriveFiles.map((file) => file.id));
|
||||
}
|
||||
};
|
||||
|
||||
const { getRootProps, getInputProps, isDragActive } = useDropzone({
|
||||
onDrop,
|
||||
multiple: true,
|
||||
@@ -515,6 +729,10 @@ function Upload({
|
||||
if (!remoteName?.trim()) {
|
||||
return true;
|
||||
}
|
||||
if (ingestor.type === 'google_drive') {
|
||||
return !isGoogleDriveConnected || selectedFiles.length === 0;
|
||||
}
|
||||
|
||||
const formFields: FormField[] = IngestorFormSchemas[ingestor.type];
|
||||
for (const field of formFields) {
|
||||
if (field.required) {
|
||||
@@ -636,7 +854,7 @@ function Upload({
|
||||
{files.map((file) => (
|
||||
<p
|
||||
key={file.name}
|
||||
className="text-gray-6000 truncate overflow-hidden text-ellipsis"
|
||||
className="text-gray-6000 truncate overflow-hidden text-ellipsis dark:text-[#ececf1]"
|
||||
title={file.name}
|
||||
>
|
||||
{file.name}
|
||||
@@ -679,6 +897,253 @@ function Upload({
|
||||
required={true}
|
||||
labelBgClassName="bg-white dark:bg-charleston-green-2"
|
||||
/>
|
||||
{ingestor.type === 'google_drive' && (
|
||||
<div className="space-y-4">
|
||||
{authError && (
|
||||
<div className="rounded-lg border border-red-200 bg-red-50 p-3 dark:border-red-600 dark:bg-red-900/20">
|
||||
<p className="text-sm text-red-600 dark:text-red-400">
|
||||
⚠️ {authError}
|
||||
</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!isGoogleDriveConnected ? (
|
||||
<ConnectorAuth
|
||||
provider="google_drive"
|
||||
onSuccess={(data) => {
|
||||
setUserEmail(data.user_email);
|
||||
setIsGoogleDriveConnected(true);
|
||||
setIsAuthenticating(false);
|
||||
setAuthError('');
|
||||
|
||||
if (data.session_token) {
|
||||
setSessionToken(ingestor.type, data.session_token);
|
||||
loadGoogleDriveFiles(data.session_token, null);
|
||||
}
|
||||
}}
|
||||
onError={(error) => {
|
||||
setAuthError(error);
|
||||
setIsAuthenticating(false);
|
||||
setIsGoogleDriveConnected(false);
|
||||
}}
|
||||
/>
|
||||
) : (
|
||||
<div className="space-y-4">
|
||||
{/* Connection Status */}
|
||||
<div className="flex w-full items-center justify-between rounded-lg bg-green-500 px-4 py-2 text-sm text-white">
|
||||
<div className="flex items-center gap-2">
|
||||
<svg className="h-4 w-4" viewBox="0 0 24 24">
|
||||
<path
|
||||
fill="currentColor"
|
||||
d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41z"
|
||||
/>
|
||||
</svg>
|
||||
<span>Connected as {userEmail}</span>
|
||||
</div>
|
||||
<button
|
||||
onClick={() => {
|
||||
removeSessionToken(ingestor.type);
|
||||
|
||||
setIsGoogleDriveConnected(false);
|
||||
setGoogleDriveFiles([]);
|
||||
setSelectedFiles([]);
|
||||
setUserEmail('');
|
||||
setAuthError('');
|
||||
|
||||
const apiHost = import.meta.env.VITE_API_HOST;
|
||||
fetch(`${apiHost}/api/connectors/disconnect`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
provider: ingestor.type,
|
||||
session_token: getSessionToken(ingestor.type),
|
||||
}),
|
||||
}).catch((err) =>
|
||||
console.error(
|
||||
'Error disconnecting from Google Drive:',
|
||||
err,
|
||||
),
|
||||
);
|
||||
}}
|
||||
className="text-xs text-white underline hover:text-gray-200"
|
||||
>
|
||||
Disconnect
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* File Browser */}
|
||||
<div className="rounded-lg border border-gray-200 dark:border-gray-600">
|
||||
<div className="rounded-t-lg border-b border-gray-200 bg-gray-50 p-3 dark:border-gray-600 dark:bg-gray-800">
|
||||
{/* Breadcrumb navigation */}
|
||||
<div className="mb-2 flex items-center gap-1">
|
||||
{folderPath.map((path, index) => (
|
||||
<div
|
||||
key={path.id || 'root'}
|
||||
className="flex items-center gap-1"
|
||||
>
|
||||
{index > 0 && (
|
||||
<span className="text-gray-400">/</span>
|
||||
)}
|
||||
<button
|
||||
onClick={() => navigateBack(index)}
|
||||
className="text-sm text-blue-600 hover:text-blue-800 hover:underline dark:text-blue-400"
|
||||
disabled={index === folderPath.length - 1}
|
||||
>
|
||||
{path.name}
|
||||
</button>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-between">
|
||||
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
|
||||
Select Files from Google Drive
|
||||
</h4>
|
||||
{googleDriveFiles.length > 0 && (
|
||||
<button
|
||||
onClick={handleSelectAll}
|
||||
className="text-xs text-blue-600 hover:text-blue-800 dark:text-blue-400"
|
||||
>
|
||||
{selectedFiles.length === googleDriveFiles.length
|
||||
? 'Deselect All'
|
||||
: 'Select All'}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{selectedFiles.length > 0 && (
|
||||
<p className="mt-1 text-xs text-gray-500">
|
||||
{selectedFiles.length} file
|
||||
{selectedFiles.length !== 1 ? 's' : ''} selected
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div
|
||||
className="max-h-72 overflow-y-auto"
|
||||
ref={scrollContainerRef}
|
||||
>
|
||||
{isLoadingFiles && googleDriveFiles.length === 0 ? (
|
||||
<div className="p-4 text-center">
|
||||
<div className="inline-flex items-center gap-2 text-sm text-gray-600 dark:text-gray-400">
|
||||
<div className="h-4 w-4 animate-spin rounded-full border-2 border-blue-500 border-t-transparent"></div>
|
||||
Loading files...
|
||||
</div>
|
||||
</div>
|
||||
) : googleDriveFiles.length === 0 ? (
|
||||
<div className="p-4 text-center text-sm text-gray-500 dark:text-gray-400">
|
||||
No files found in your Google Drive
|
||||
</div>
|
||||
) : (
|
||||
<>
|
||||
<div className="divide-y divide-gray-200 dark:divide-gray-600">
|
||||
{googleDriveFiles.map((file) => (
|
||||
<div
|
||||
key={file.id}
|
||||
className={`p-3 transition-colors ${
|
||||
selectedFiles.includes(file.id)
|
||||
? 'bg-blue-50 dark:bg-blue-900/20'
|
||||
: ''
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
<div className="flex-shrink-0">
|
||||
<input
|
||||
type="checkbox"
|
||||
checked={selectedFiles.includes(
|
||||
file.id,
|
||||
)}
|
||||
onChange={() =>
|
||||
handleFileSelect(file.id)
|
||||
}
|
||||
className="h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500"
|
||||
/>
|
||||
</div>
|
||||
{file.type ===
|
||||
'application/vnd.google-apps.folder' ||
|
||||
file.isFolder ? (
|
||||
<div
|
||||
className="cursor-pointer text-lg hover:text-blue-600"
|
||||
onClick={() =>
|
||||
handleFolderClick(file.id, file.name)
|
||||
}
|
||||
>
|
||||
<img
|
||||
src={FolderIcon}
|
||||
alt="Folder"
|
||||
className="h-6 w-6"
|
||||
/>
|
||||
</div>
|
||||
) : (
|
||||
<div className="text-lg">
|
||||
<img
|
||||
src={FileIcon}
|
||||
alt="File"
|
||||
className="h-6 w-6"
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
<div className="min-w-0 flex-1">
|
||||
<p
|
||||
className={`truncate text-sm font-medium dark:text-[#ececf1] ${
|
||||
file.type ===
|
||||
'application/vnd.google-apps.folder' ||
|
||||
file.isFolder
|
||||
? 'cursor-pointer hover:text-blue-600'
|
||||
: ''
|
||||
}`}
|
||||
onClick={() => {
|
||||
if (
|
||||
file.type ===
|
||||
'application/vnd.google-apps.folder' ||
|
||||
file.isFolder
|
||||
) {
|
||||
handleFolderClick(
|
||||
file.id,
|
||||
file.name,
|
||||
);
|
||||
}
|
||||
}}
|
||||
>
|
||||
{file.name}
|
||||
</p>
|
||||
<p className="text-xs text-gray-500 dark:text-gray-400">
|
||||
{file.size &&
|
||||
`${formatBytes(file.size)} • `}
|
||||
Modified {formatDate(file.modifiedTime)}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
<div className="flex items-center justify-center border-t border-gray-100 p-4 dark:border-gray-800">
|
||||
{isLoadingFiles && (
|
||||
<div className="inline-flex items-center gap-2 text-sm text-gray-600 dark:text-gray-400">
|
||||
<div className="h-4 w-4 animate-spin rounded-full border-2 border-blue-500 border-t-transparent"></div>
|
||||
Loading more files...
|
||||
</div>
|
||||
)}
|
||||
{!hasMoreFiles && !isLoadingFiles && (
|
||||
<span className="text-sm text-gray-500 dark:text-gray-400">
|
||||
All files loaded
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="hidden" aria-hidden="true"></div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{renderFormFields()}
|
||||
{IngestorFormSchemas[ingestor.type].some(
|
||||
(field) => field.advanced,
|
||||
@@ -719,7 +1184,9 @@ function Upload({
|
||||
: 'bg-purple-30 hover:bg-violets-are-blue cursor-pointer text-white'
|
||||
}`}
|
||||
>
|
||||
{t('modals.uploadDoc.train')}
|
||||
{ingestor.type === 'google_drive' && selectedFiles.length > 0
|
||||
? `Train with ${selectedFiles.length} file${selectedFiles.length !== 1 ? 's' : ''}`
|
||||
: t('modals.uploadDoc.train')}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
@@ -727,6 +1194,41 @@ function Upload({
|
||||
);
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
const scrollContainer = scrollContainerRef.current;
|
||||
|
||||
const handleScroll = () => {
|
||||
if (!scrollContainer) return;
|
||||
|
||||
const { scrollTop, scrollHeight, clientHeight } = scrollContainer;
|
||||
const isNearBottom = scrollHeight - scrollTop - clientHeight < 50;
|
||||
|
||||
if (isNearBottom && hasMoreFiles && !isLoadingFiles && nextPageToken) {
|
||||
const sessionToken = getSessionToken(ingestor.type);
|
||||
if (sessionToken) {
|
||||
loadGoogleDriveFiles(
|
||||
sessionToken,
|
||||
currentFolderId,
|
||||
nextPageToken,
|
||||
true,
|
||||
);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
scrollContainer?.addEventListener('scroll', handleScroll);
|
||||
|
||||
return () => {
|
||||
scrollContainer?.removeEventListener('scroll', handleScroll);
|
||||
};
|
||||
}, [
|
||||
hasMoreFiles,
|
||||
isLoadingFiles,
|
||||
nextPageToken,
|
||||
currentFolderId,
|
||||
ingestor.type,
|
||||
]);
|
||||
|
||||
return (
|
||||
<WrapperModal
|
||||
isPerformingTask={progress !== undefined && progress.percentage < 100}
|
||||
|
||||
@@ -22,7 +22,19 @@ export interface UrlIngestorConfig extends BaseIngestorConfig {
|
||||
url: string;
|
||||
}
|
||||
|
||||
export type IngestorType = 'crawler' | 'github' | 'reddit' | 'url';
|
||||
export interface GoogleDriveIngestorConfig extends BaseIngestorConfig {
|
||||
folder_id?: string;
|
||||
file_ids?: string;
|
||||
recursive?: boolean;
|
||||
token_info?: any;
|
||||
}
|
||||
|
||||
export type IngestorType =
|
||||
| 'crawler'
|
||||
| 'github'
|
||||
| 'reddit'
|
||||
| 'url'
|
||||
| 'google_drive';
|
||||
|
||||
export interface IngestorConfig {
|
||||
type: IngestorType;
|
||||
@@ -31,7 +43,8 @@ export interface IngestorConfig {
|
||||
| RedditIngestorConfig
|
||||
| GithubIngestorConfig
|
||||
| CrawlerIngestorConfig
|
||||
| UrlIngestorConfig;
|
||||
| UrlIngestorConfig
|
||||
| GoogleDriveIngestorConfig;
|
||||
}
|
||||
|
||||
export type IngestorFormData = {
|
||||
@@ -109,6 +122,14 @@ export const IngestorFormSchemas: Record<IngestorType, FormField[]> = {
|
||||
required: true,
|
||||
},
|
||||
],
|
||||
google_drive: [
|
||||
{
|
||||
name: 'recursive',
|
||||
label: 'Include subfolders',
|
||||
type: 'boolean',
|
||||
required: false,
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
export const IngestorDefaultConfigs: Record<
|
||||
@@ -143,4 +164,12 @@ export const IngestorDefaultConfigs: Record<
|
||||
repo_url: '',
|
||||
} as GithubIngestorConfig,
|
||||
},
|
||||
google_drive: {
|
||||
name: '',
|
||||
config: {
|
||||
folder_id: '',
|
||||
file_ids: '',
|
||||
recursive: true,
|
||||
} as GoogleDriveIngestorConfig,
|
||||
},
|
||||
};
|
||||
|
||||
16
frontend/src/utils/providerUtils.ts
Normal file
16
frontend/src/utils/providerUtils.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
/**
|
||||
* Utility functions for managing session tokens for different cloud service providers.
|
||||
* Follows the convention: {provider}_session_token
|
||||
*/
|
||||
|
||||
export const getSessionToken = (provider: string): string | null => {
|
||||
return localStorage.getItem(`${provider}_session_token`);
|
||||
};
|
||||
|
||||
export const setSessionToken = (provider: string, token: string): void => {
|
||||
localStorage.setItem(`${provider}_session_token`, token);
|
||||
};
|
||||
|
||||
export const removeSessionToken = (provider: string): void => {
|
||||
localStorage.removeItem(`${provider}_session_token`);
|
||||
};
|
||||
@@ -2,3 +2,12 @@ export function truncate(str: string, n: number) {
|
||||
// slices long strings and ends with ...
|
||||
return str.length > n ? str.slice(0, n - 1) + '...' : str;
|
||||
}
|
||||
|
||||
export function formatBytes(bytes: number | null): string {
|
||||
if (!bytes || bytes <= 0) return '';
|
||||
|
||||
const k = 1024;
|
||||
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'];
|
||||
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
||||
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(2))} ${sizes[i]}`;
|
||||
}
|
||||
|
||||
49
setup.ps1
49
setup.ps1
@@ -9,7 +9,9 @@ $ErrorActionPreference = "Stop"
|
||||
|
||||
# Get current script directory
|
||||
$SCRIPT_DIR = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$COMPOSE_FILE = Join-Path -Path $SCRIPT_DIR -ChildPath "deployment\docker-compose.yaml"
|
||||
$COMPOSE_FILE_HUB = Join-Path -Path $SCRIPT_DIR -ChildPath "deployment\docker-compose-hub.yaml"
|
||||
$COMPOSE_FILE_LOCAL = Join-Path -Path $SCRIPT_DIR -ChildPath "deployment\docker-compose.yaml"
|
||||
$COMPOSE_FILE = $COMPOSE_FILE_HUB
|
||||
$ENV_FILE = Join-Path -Path $SCRIPT_DIR -ChildPath ".env"
|
||||
|
||||
# Function to write colored text
|
||||
@@ -223,12 +225,15 @@ function Prompt-MainMenu {
|
||||
Write-Host ""
|
||||
Write-ColorText "Welcome to DocsGPT Setup!" -ForegroundColor "White" -Bold
|
||||
Write-ColorText "How would you like to proceed?" -ForegroundColor "White"
|
||||
Write-ColorText "1) Use DocsGPT Public API Endpoint (simple and free)" -ForegroundColor "Yellow"
|
||||
Write-ColorText "1) Use DocsGPT Public API Endpoint (simple and free, uses pre-built Docker images from Docker Hub for fastest setup)" -ForegroundColor "Yellow"
|
||||
Write-ColorText "2) Serve Local (with Ollama)" -ForegroundColor "Yellow"
|
||||
Write-ColorText "3) Connect Local Inference Engine" -ForegroundColor "Yellow"
|
||||
Write-ColorText "4) Connect Cloud API Provider" -ForegroundColor "Yellow"
|
||||
Write-ColorText "5) Advanced: Build images locally (for developers)" -ForegroundColor "Yellow"
|
||||
Write-Host ""
|
||||
$script:main_choice = Read-Host "Choose option (1-4)"
|
||||
Write-ColorText "By default, DocsGPT uses pre-built images from Docker Hub for a fast, reliable, and consistent experience. This avoids local build errors and speeds up onboarding. Advanced users can choose to build images locally if needed." -ForegroundColor "White"
|
||||
Write-Host ""
|
||||
$script:main_choice = Read-Host "Choose option (1-5)"
|
||||
}
|
||||
|
||||
# Function to prompt for Local Inference Engine options
|
||||
@@ -304,9 +309,9 @@ function Use-DocsPublicAPIEndpoint {
|
||||
|
||||
# Run Docker compose commands
|
||||
try {
|
||||
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" build
|
||||
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" pull
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
throw "Docker compose build failed with exit code $LASTEXITCODE"
|
||||
throw "Docker compose pull failed with exit code $LASTEXITCODE"
|
||||
}
|
||||
|
||||
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
|
||||
@@ -415,10 +420,10 @@ function Serve-LocalOllama {
|
||||
Write-Host ""
|
||||
Write-ColorText "Starting Docker Compose with Ollama ($docker_compose_file_suffix)..." -ForegroundColor "White"
|
||||
|
||||
# Build the containers
|
||||
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" -f "$optional_compose" build
|
||||
# Pull the containers
|
||||
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" -f "$optional_compose" pull
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
throw "Docker compose build failed with exit code $LASTEXITCODE"
|
||||
throw "Docker compose pull failed with exit code $LASTEXITCODE"
|
||||
}
|
||||
|
||||
# Start the containers
|
||||
@@ -575,10 +580,10 @@ function Connect-LocalInferenceEngine {
|
||||
Write-Host ""
|
||||
Write-ColorText "Starting Docker Compose..." -ForegroundColor "White"
|
||||
|
||||
# Build the containers
|
||||
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" build
|
||||
# Pull the containers
|
||||
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" pull
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
throw "Docker compose build failed with exit code $LASTEXITCODE"
|
||||
throw "Docker compose pull failed with exit code $LASTEXITCODE"
|
||||
}
|
||||
|
||||
# Start the containers
|
||||
@@ -706,10 +711,12 @@ function Connect-CloudAPIProvider {
|
||||
Write-ColorText "Starting Docker Compose..." -ForegroundColor "White"
|
||||
|
||||
# Run Docker compose commands
|
||||
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d --build
|
||||
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" pull
|
||||
if ($LASTEXITCODE -ne 0) {
|
||||
throw "Docker compose build or up failed with exit code $LASTEXITCODE"
|
||||
throw "Docker compose pull failed with exit code $LASTEXITCODE"
|
||||
}
|
||||
|
||||
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
|
||||
|
||||
Write-Host ""
|
||||
Write-ColorText "DocsGPT is now configured to use $provider_name on http://localhost:5173" -ForegroundColor "Green"
|
||||
@@ -735,13 +742,13 @@ while ($true) {
|
||||
|
||||
switch ($main_choice) {
|
||||
"1" {
|
||||
$COMPOSE_FILE = $COMPOSE_FILE_HUB
|
||||
Use-DocsPublicAPIEndpoint
|
||||
$exitLoop = $true # Set flag to true on completion
|
||||
break
|
||||
}
|
||||
"2" {
|
||||
Serve-LocalOllama
|
||||
# Only exit the loop if user didn't press "b" to go back
|
||||
if ($ollama_choice -ne "b" -and $ollama_choice -ne "B") {
|
||||
$exitLoop = $true
|
||||
}
|
||||
@@ -749,7 +756,6 @@ while ($true) {
|
||||
}
|
||||
"3" {
|
||||
Connect-LocalInferenceEngine
|
||||
# Only exit the loop if user didn't press "b" to go back
|
||||
if ($engine_choice -ne "b" -and $engine_choice -ne "B") {
|
||||
$exitLoop = $true
|
||||
}
|
||||
@@ -757,20 +763,25 @@ while ($true) {
|
||||
}
|
||||
"4" {
|
||||
Connect-CloudAPIProvider
|
||||
# Only exit the loop if user didn't press "b" to go back
|
||||
if ($provider_choice -ne "b" -and $provider_choice -ne "B") {
|
||||
$exitLoop = $true
|
||||
}
|
||||
break
|
||||
}
|
||||
"5" {
|
||||
Write-Host ""
|
||||
Write-ColorText "You have selected to build images locally. This is recommended for developers or if you want to test local changes." -ForegroundColor "Yellow"
|
||||
$COMPOSE_FILE = $COMPOSE_FILE_LOCAL
|
||||
Use-DocsPublicAPIEndpoint
|
||||
$exitLoop = $true
|
||||
break
|
||||
}
|
||||
default {
|
||||
Write-Host ""
|
||||
Write-ColorText "Invalid choice. Please choose 1-4." -ForegroundColor "Red"
|
||||
Write-ColorText "Invalid choice. Please choose 1-5." -ForegroundColor "Red"
|
||||
Start-Sleep -Seconds 1
|
||||
}
|
||||
}
|
||||
|
||||
# Only break out of the loop if a function completed successfully
|
||||
if ($exitLoop) {
|
||||
break
|
||||
}
|
||||
|
||||
29
setup.sh
29
setup.sh
@@ -9,7 +9,8 @@ NC='\033[0m'
|
||||
BOLD='\033[1m'
|
||||
|
||||
# Base Compose file (relative to script location)
|
||||
COMPOSE_FILE="$(dirname "$(readlink -f "$0")")/deployment/docker-compose.yaml"
|
||||
COMPOSE_FILE="$(dirname "$(readlink -f "$0")")/deployment/docker-compose-hub.yaml"
|
||||
COMPOSE_FILE_LOCAL="$(dirname "$(readlink -f "$0")")/deployment/docker-compose.yaml"
|
||||
ENV_FILE="$(dirname "$(readlink -f "$0")")/.env"
|
||||
|
||||
# Animation function
|
||||
@@ -111,12 +112,15 @@ check_and_start_docker() {
|
||||
prompt_main_menu() {
|
||||
echo -e "\n${DEFAULT_FG}${BOLD}Welcome to DocsGPT Setup!${NC}"
|
||||
echo -e "${DEFAULT_FG}How would you like to proceed?${NC}"
|
||||
echo -e "${YELLOW}1) Use DocsGPT Public API Endpoint (simple and free)${NC}"
|
||||
echo -e "${YELLOW}1) Use DocsGPT Public API Endpoint (simple and free, uses pre-built Docker images from Docker Hub for fastest setup)${NC}"
|
||||
echo -e "${YELLOW}2) Serve Local (with Ollama)${NC}"
|
||||
echo -e "${YELLOW}3) Connect Local Inference Engine${NC}"
|
||||
echo -e "${YELLOW}4) Connect Cloud API Provider${NC}"
|
||||
echo -e "${YELLOW}5) Advanced: Build images locally (for developers)${NC}"
|
||||
echo
|
||||
read -p "$(echo -e "${DEFAULT_FG}Choose option (1-4): ${NC}")" main_choice
|
||||
echo -e "${DEFAULT_FG}By default, DocsGPT uses pre-built images from Docker Hub for a fast, reliable, and consistent experience. This avoids local build errors and speeds up onboarding. Advanced users can choose to build images locally if needed.${NC}"
|
||||
echo
|
||||
read -p "$(echo -e "${DEFAULT_FG}Choose option (1-5): ${NC}")" main_choice
|
||||
}
|
||||
|
||||
# Function to prompt for Local Inference Engine options
|
||||
@@ -176,7 +180,7 @@ use_docs_public_api_endpoint() {
|
||||
check_and_start_docker
|
||||
|
||||
echo -e "\n${NC}Starting Docker Compose...${NC}"
|
||||
docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" build && docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" up -d
|
||||
docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" pull && docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" up -d
|
||||
docker_compose_status=$? # Capture exit status of docker compose
|
||||
|
||||
echo "Docker Compose Exit Status: $docker_compose_status"
|
||||
@@ -252,7 +256,7 @@ serve_local_ollama() {
|
||||
)
|
||||
|
||||
echo -e "\n${NC}Starting Docker Compose with Ollama (${docker_compose_file_suffix})...${NC}"
|
||||
docker compose --env-file "${ENV_FILE}" "${compose_files[@]}" build
|
||||
docker compose --env-file "${ENV_FILE}" "${compose_files[@]}" pull
|
||||
docker compose --env-file "${ENV_FILE}" "${compose_files[@]}" up -d
|
||||
docker_compose_status=$?
|
||||
|
||||
@@ -360,7 +364,7 @@ connect_local_inference_engine() {
|
||||
check_and_start_docker
|
||||
|
||||
echo -e "\n${NC}Starting Docker Compose...${NC}"
|
||||
docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" build && docker compose -f "${COMPOSE_FILE}" up -d
|
||||
docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" pull && docker compose -f "${COMPOSE_FILE}" up -d
|
||||
docker_compose_status=$?
|
||||
|
||||
echo "Docker Compose Exit Status: $docker_compose_status" # Debug output
|
||||
@@ -449,7 +453,7 @@ connect_cloud_api_provider() {
|
||||
check_and_start_docker
|
||||
|
||||
echo -e "\n${NC}Starting Docker Compose...${NC}"
|
||||
docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" up -d --build
|
||||
docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" pull && docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" up -d
|
||||
docker_compose_status=$?
|
||||
|
||||
echo "Docker Compose Exit Status: $docker_compose_status" # Debug output
|
||||
@@ -468,12 +472,14 @@ connect_cloud_api_provider() {
|
||||
# Main script execution
|
||||
animate_dino
|
||||
|
||||
|
||||
while true; do # Main menu loop
|
||||
clear # Clear screen before showing main menu again
|
||||
prompt_main_menu
|
||||
|
||||
case $main_choice in
|
||||
1) # Use DocsGPT Public API Endpoint
|
||||
1) # Use DocsGPT Public API Endpoint (Docker Hub images)
|
||||
COMPOSE_FILE="$(dirname "$(readlink -f "$0")")/deployment/docker-compose-hub.yaml"
|
||||
use_docs_public_api_endpoint
|
||||
break ;;
|
||||
2) # Serve Local (with Ollama)
|
||||
@@ -485,8 +491,13 @@ while true; do # Main menu loop
|
||||
4) # Connect Cloud API Provider
|
||||
connect_cloud_api_provider
|
||||
break ;;
|
||||
5) # Advanced: Build images locally
|
||||
echo -e "\n${YELLOW}You have selected to build images locally. This is recommended for developers or if you want to test local changes.${NC}"
|
||||
COMPOSE_FILE="$COMPOSE_FILE_LOCAL"
|
||||
use_docs_public_api_endpoint
|
||||
break ;;
|
||||
*)
|
||||
echo -e "\n${RED}Invalid choice. Please choose 1-4.${NC}" ; sleep 1 ;;
|
||||
echo -e "\n${RED}Invalid choice. Please choose 1-5.${NC}" ; sleep 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
|
||||
Reference in New Issue
Block a user