Merge branch 'main' of https://github.com/siiddhantt/DocsGPT into pr/1930

This commit is contained in:
Siddhant Rai
2025-09-10 20:15:20 +05:30
51 changed files with 3792 additions and 247 deletions

View File

@@ -3,11 +3,11 @@
</h1>
<p align="center">
<strong>Open-Source RAG Assistant</strong>
<strong>Private AI for agents, assistants and enterprise search</strong>
</p>
<p align="left">
<strong><a href="https://www.docsgpt.cloud/">DocsGPT</a></strong> is an open-source genAI tool that helps users get reliable answers from any knowledge source, while avoiding hallucinations. It enables quick and reliable information retrieval, with tooling and agentic system capability built in.
<strong><a href="https://www.docsgpt.cloud/">DocsGPT</a></strong> is an open-source AI platform for building intelligent agents and assistants. Features Agent Builder, deep research tools, document analysis (PDF, Office, web content), Multi-model support (choose your provider or run locally), and rich API connectivity for agents with actionable tools and integrations. Deploy anywhere with complete privacy control.
</p>
<div align="center">

View File

@@ -1,3 +1,4 @@
import logging
import uuid
from abc import ABC, abstractmethod
from typing import Dict, Generator, List, Optional
@@ -6,15 +7,15 @@ from bson.objectid import ObjectId
from application.agents.tools.tool_action_parser import ToolActionParser
from application.agents.tools.tool_manager import ToolManager
from application.core.mongo_db import MongoDB
from application.core.settings import settings
from application.llm.handlers.handler_creator import LLMHandlerCreator
from application.llm.llm_creator import LLMCreator
from application.logging import build_stack_data, log_activity, LogContext
from application.retriever.base import BaseRetriever
logger = logging.getLogger(__name__)
class BaseAgent(ABC):
def __init__(
@@ -139,6 +140,40 @@ class BaseAgent(ABC):
tool_id, action_name, call_args = parser.parse_args(call)
call_id = getattr(call, "id", None) or str(uuid.uuid4())
# Check if parsing failed
if tool_id is None or action_name is None:
error_message = f"Error: Failed to parse LLM tool call. Tool name: {getattr(call, 'name', 'unknown')}"
logger.error(error_message)
tool_call_data = {
"tool_name": "unknown",
"call_id": call_id,
"action_name": getattr(call, 'name', 'unknown'),
"arguments": call_args or {},
"result": f"Failed to parse tool call. Invalid tool name format: {getattr(call, 'name', 'unknown')}",
}
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
self.tool_calls.append(tool_call_data)
return "Failed to parse tool call.", call_id
# Check if tool_id exists in available tools
if tool_id not in tools_dict:
error_message = f"Error: Tool ID '{tool_id}' extracted from LLM call not found in available tools_dict. Available IDs: {list(tools_dict.keys())}"
logger.error(error_message)
# Return error result
tool_call_data = {
"tool_name": "unknown",
"call_id": call_id,
"action_name": f"{action_name}_{tool_id}",
"arguments": call_args,
"result": f"Tool with ID {tool_id} not found. Available tools: {list(tools_dict.keys())}",
}
yield {"type": "tool_call", "data": {**tool_call_data, "status": "error"}}
self.tool_calls.append(tool_call_data)
return f"Tool with ID {tool_id} not found.", call_id
tool_call_data = {
"tool_name": tools_dict[tool_id]["name"],
"call_id": call_id,

View File

@@ -19,8 +19,20 @@ class ToolActionParser:
def _parse_openai_llm(self, call):
try:
call_args = json.loads(call.arguments)
tool_id = call.name.split("_")[-1]
action_name = call.name.rsplit("_", 1)[0]
tool_parts = call.name.split("_")
# If the tool name doesn't contain an underscore, it's likely a hallucinated tool
if len(tool_parts) < 2:
logger.warning(f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id")
return None, None, None
tool_id = tool_parts[-1]
action_name = "_".join(tool_parts[:-1])
# Validate that tool_id looks like a numerical ID
if not tool_id.isdigit():
logger.warning(f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call.")
except (AttributeError, TypeError) as e:
logger.error(f"Error parsing OpenAI LLM call: {e}")
return None, None, None
@@ -29,8 +41,20 @@ class ToolActionParser:
def _parse_google_llm(self, call):
try:
call_args = call.arguments
tool_id = call.name.split("_")[-1]
action_name = call.name.rsplit("_", 1)[0]
tool_parts = call.name.split("_")
# If the tool name doesn't contain an underscore, it's likely a hallucinated tool
if len(tool_parts) < 2:
logger.warning(f"Invalid tool name format: {call.name}. Expected format: action_name_tool_id")
return None, None, None
tool_id = tool_parts[-1]
action_name = "_".join(tool_parts[:-1])
# Validate that tool_id looks like a numerical ID
if not tool_id.isdigit():
logger.warning(f"Tool ID '{tool_id}' is not numerical. This might be a hallucinated tool call.")
except (AttributeError, TypeError) as e:
logger.error(f"Error parsing Google LLM call: {e}")
return None, None, None

View File

@@ -85,6 +85,7 @@ class StreamProcessor:
self._configure_agent()
self._configure_source()
self._configure_retriever()
self._configure_agent()
self._load_conversation_history()
self._process_attachments()
@@ -260,6 +261,12 @@ class StreamProcessor:
)
self.initial_user_id = data_key.get("user")
self.decoded_token = {"sub": data_key.get("user")}
if data_key.get("source"):
self.source = {"active_docs": data_key["source"]}
if data_key.get("retriever"):
self.retriever_config["retriever_name"] = data_key["retriever"]
if data_key.get("chunks") is not None:
self.retriever_config["chunks"] = data_key["chunks"]
elif self.agent_key:
data_key = self._get_data_from_api_key(self.agent_key)
self.agent_config.update(
@@ -275,6 +282,12 @@ class StreamProcessor:
if self.is_shared_usage
else {"sub": data_key.get("user")}
)
if data_key.get("source"):
self.source = {"active_docs": data_key["source"]}
if data_key.get("retriever"):
self.retriever_config["retriever_name"] = data_key["retriever"]
if data_key.get("chunks") is not None:
self.retriever_config["chunks"] = data_key["chunks"]
else:
self.agent_config.update(
{

View File

@@ -0,0 +1,627 @@
import datetime
import json
import logging
from bson.objectid import ObjectId
from flask import (
Blueprint,
current_app,
jsonify,
make_response,
request
)
from flask_restx import fields, Namespace, Resource
from application.api.user.tasks import (
ingest_connector_task,
)
from application.core.mongo_db import MongoDB
from application.core.settings import settings
from application.api import api
from application.utils import (
check_required_fields
)
from application.parser.connectors.connector_creator import ConnectorCreator
mongo = MongoDB.get_client()
db = mongo[settings.MONGO_DB_NAME]
sources_collection = db["sources"]
sessions_collection = db["connector_sessions"]
connector = Blueprint("connector", __name__)
connectors_ns = Namespace("connectors", description="Connector operations", path="/")
api.add_namespace(connectors_ns)
@connectors_ns.route("/api/connectors/upload")
class UploadConnector(Resource):
@api.expect(
api.model(
"ConnectorUploadModel",
{
"user": fields.String(required=True, description="User ID"),
"source": fields.String(
required=True, description="Source type (google_drive, github, etc.)"
),
"name": fields.String(required=True, description="Job name"),
"data": fields.String(required=True, description="Configuration data"),
"repo_url": fields.String(description="GitHub repository URL"),
},
)
)
@api.doc(
description="Uploads connector source for vectorization",
)
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
data = request.form
required_fields = ["user", "source", "name", "data"]
missing_fields = check_required_fields(data, required_fields)
if missing_fields:
return missing_fields
try:
config = json.loads(data["data"])
source_data = None
sync_frequency = config.get("sync_frequency", "never")
if data["source"] == "github":
source_data = config.get("repo_url")
elif data["source"] in ["crawler", "url"]:
source_data = config.get("url")
elif data["source"] == "reddit":
source_data = config
elif data["source"] in ConnectorCreator.get_supported_connectors():
session_token = config.get("session_token")
if not session_token:
return make_response(jsonify({
"success": False,
"error": f"Missing session_token in {data['source']} configuration"
}), 400)
file_ids = config.get("file_ids", [])
if isinstance(file_ids, str):
file_ids = [id.strip() for id in file_ids.split(',') if id.strip()]
elif not isinstance(file_ids, list):
file_ids = []
folder_ids = config.get("folder_ids", [])
if isinstance(folder_ids, str):
folder_ids = [id.strip() for id in folder_ids.split(',') if id.strip()]
elif not isinstance(folder_ids, list):
folder_ids = []
config["file_ids"] = file_ids
config["folder_ids"] = folder_ids
task = ingest_connector_task.delay(
job_name=data["name"],
user=decoded_token.get("sub"),
source_type=data["source"],
session_token=session_token,
file_ids=file_ids,
folder_ids=folder_ids,
recursive=config.get("recursive", False),
retriever=config.get("retriever", "classic"),
sync_frequency=sync_frequency
)
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
task = ingest_connector_task.delay(
source_data=source_data,
job_name=data["name"],
user=decoded_token.get("sub"),
loader=data["source"],
sync_frequency=sync_frequency
)
except Exception as err:
current_app.logger.error(
f"Error uploading connector source: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True, "task_id": task.id}), 200)
@connectors_ns.route("/api/connectors/task_status")
class ConnectorTaskStatus(Resource):
task_status_model = api.model(
"ConnectorTaskStatusModel",
{"task_id": fields.String(required=True, description="Task ID")},
)
@api.expect(task_status_model)
@api.doc(description="Get connector task status")
def get(self):
task_id = request.args.get("task_id")
if not task_id:
return make_response(
jsonify({"success": False, "message": "Task ID is required"}), 400
)
try:
from application.celery_init import celery
task = celery.AsyncResult(task_id)
task_meta = task.info
print(f"Task status: {task.status}")
if not isinstance(
task_meta, (dict, list, str, int, float, bool, type(None))
):
task_meta = str(task_meta)
except Exception as err:
current_app.logger.error(f"Error getting task status: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"status": task.status, "result": task_meta}), 200)
@connectors_ns.route("/api/connectors/sources")
class ConnectorSources(Resource):
@api.doc(description="Get connector sources")
def get(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
try:
sources = sources_collection.find({"user": user, "type": "connector"}).sort("date", -1)
connector_sources = []
for source in sources:
connector_sources.append({
"id": str(source["_id"]),
"name": source.get("name"),
"date": source.get("date"),
"type": source.get("type"),
"source": source.get("source"),
"tokens": source.get("tokens", ""),
"retriever": source.get("retriever", "classic"),
"syncFrequency": source.get("sync_frequency", ""),
})
except Exception as err:
current_app.logger.error(f"Error retrieving connector sources: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify(connector_sources), 200)
@connectors_ns.route("/api/connectors/delete")
class DeleteConnectorSource(Resource):
@api.doc(
description="Delete a connector source",
params={"source_id": "The source ID to delete"},
)
def delete(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
source_id = request.args.get("source_id")
if not source_id:
return make_response(
jsonify({"success": False, "message": "source_id is required"}), 400
)
try:
result = sources_collection.delete_one(
{"_id": ObjectId(source_id), "user": decoded_token.get("sub")}
)
if result.deleted_count == 0:
return make_response(
jsonify({"success": False, "message": "Source not found"}), 404
)
except Exception as err:
current_app.logger.error(
f"Error deleting connector source: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify({"success": True}), 200)
@connectors_ns.route("/api/connectors/auth")
class ConnectorAuth(Resource):
@api.doc(description="Get connector OAuth authorization URL", params={"provider": "Connector provider (e.g., google_drive)"})
def get(self):
try:
provider = request.args.get('provider') or request.args.get('source')
if not provider:
return make_response(jsonify({"success": False, "error": "Missing provider"}), 400)
if not ConnectorCreator.is_supported(provider):
return make_response(jsonify({"success": False, "error": f"Unsupported provider: {provider}"}), 400)
import uuid
state = str(uuid.uuid4())
auth = ConnectorCreator.create_auth(provider)
authorization_url = auth.get_authorization_url(state=state)
return make_response(jsonify({
"success": True,
"authorization_url": authorization_url,
"state": state
}), 200)
except Exception as e:
current_app.logger.error(f"Error generating connector auth URL: {e}")
return make_response(jsonify({"success": False, "error": str(e)}), 500)
@connectors_ns.route("/api/connectors/callback")
class ConnectorsCallback(Resource):
@api.doc(description="Handle OAuth callback for external connectors")
def get(self):
"""Handle OAuth callback for external connectors"""
try:
from application.parser.connectors.connector_creator import ConnectorCreator
from flask import request, redirect
import uuid
provider = request.args.get('provider', 'google_drive')
authorization_code = request.args.get('code')
_ = request.args.get('state')
error = request.args.get('error')
if error:
return redirect(f"/api/connectors/callback-status?status=error&message=OAuth+error:+{error}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.&provider={provider}")
if not authorization_code:
return redirect(f"/api/connectors/callback-status?status=error&message=Authorization+code+not+provided.+Please+complete+the+authorization+process+and+make+sure+to+grant+offline+access.&provider={provider}")
try:
auth = ConnectorCreator.create_auth(provider)
token_info = auth.exchange_code_for_tokens(authorization_code)
session_token = str(uuid.uuid4())
try:
credentials = auth.create_credentials_from_token_info(token_info)
service = auth.build_drive_service(credentials)
user_info = service.about().get(fields="user").execute()
user_email = user_info.get('user', {}).get('emailAddress', 'Connected User')
except Exception as e:
current_app.logger.warning(f"Could not get user info: {e}")
user_email = 'Connected User'
sanitized_token_info = {
"access_token": token_info.get("access_token"),
"refresh_token": token_info.get("refresh_token"),
"token_uri": token_info.get("token_uri"),
"expiry": token_info.get("expiry"),
"scopes": token_info.get("scopes")
}
user_id = request.decoded_token.get("sub") if getattr(request, "decoded_token", None) else None
sessions_collection.insert_one({
"session_token": session_token,
"user": user_id,
"token_info": sanitized_token_info,
"created_at": datetime.datetime.now(datetime.timezone.utc),
"user_email": user_email,
"provider": provider
})
# Redirect to success page with session token and user email
return redirect(f"/api/connectors/callback-status?status=success&message=Authentication+successful&provider={provider}&session_token={session_token}&user_email={user_email}")
except Exception as e:
current_app.logger.error(f"Error exchanging code for tokens: {str(e)}", exc_info=True)
return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+exchange+authorization+code+for+tokens:+{str(e)}&provider={provider}")
except Exception as e:
current_app.logger.error(f"Error handling connector callback: {e}")
return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+complete+connector+authentication:+{str(e)}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.")
@connectors_ns.route("/api/connectors/refresh")
class ConnectorRefresh(Resource):
@api.expect(api.model("ConnectorRefreshModel", {"provider": fields.String(required=True), "refresh_token": fields.String(required=True)}))
@api.doc(description="Refresh connector access token")
def post(self):
try:
data = request.get_json()
provider = data.get('provider')
refresh_token = data.get('refresh_token')
if not provider or not refresh_token:
return make_response(jsonify({"success": False, "error": "provider and refresh_token are required"}), 400)
auth = ConnectorCreator.create_auth(provider)
token_info = auth.refresh_access_token(refresh_token)
return make_response(jsonify({"success": True, "token_info": token_info}), 200)
except Exception as e:
current_app.logger.error(f"Error refreshing token for connector: {e}")
return make_response(jsonify({"success": False, "error": str(e)}), 500)
@connectors_ns.route("/api/connectors/files")
class ConnectorFiles(Resource):
@api.expect(api.model("ConnectorFilesModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True), "folder_id": fields.String(required=False), "limit": fields.Integer(required=False), "page_token": fields.String(required=False)}))
@api.doc(description="List files from a connector provider (supports pagination)")
def post(self):
try:
data = request.get_json()
provider = data.get('provider')
session_token = data.get('session_token')
folder_id = data.get('folder_id')
limit = data.get('limit', 10)
page_token = data.get('page_token')
if not provider or not session_token:
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
user = decoded_token.get('sub')
session = sessions_collection.find_one({"session_token": session_token, "user": user})
if not session:
return make_response(jsonify({"success": False, "error": "Invalid or unauthorized session"}), 401)
loader = ConnectorCreator.create_connector(provider, session_token)
documents = loader.load_data({
'limit': limit,
'list_only': True,
'session_token': session_token,
'folder_id': folder_id,
'page_token': page_token
})
files = []
for doc in documents[:limit]:
metadata = doc.extra_info
modified_time = metadata.get('modified_time')
if modified_time:
date_part = modified_time.split('T')[0]
time_part = modified_time.split('T')[1].split('.')[0].split('Z')[0]
formatted_time = f"{date_part} {time_part}"
else:
formatted_time = None
files.append({
'id': doc.doc_id,
'name': metadata.get('file_name', 'Unknown File'),
'type': metadata.get('mime_type', 'unknown'),
'size': metadata.get('size', None),
'modifiedTime': formatted_time
})
next_token = getattr(loader, 'next_page_token', None)
has_more = bool(next_token)
return make_response(jsonify({"success": True, "files": files, "total": len(files), "next_page_token": next_token, "has_more": has_more}), 200)
except Exception as e:
current_app.logger.error(f"Error loading connector files: {e}")
return make_response(jsonify({"success": False, "error": f"Failed to load files: {str(e)}"}), 500)
@connectors_ns.route("/api/connectors/validate-session")
class ConnectorValidateSession(Resource):
@api.expect(api.model("ConnectorValidateSessionModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True)}))
@api.doc(description="Validate connector session token and return user info")
def post(self):
try:
data = request.get_json()
provider = data.get('provider')
session_token = data.get('session_token')
if not provider or not session_token:
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
user = decoded_token.get('sub')
session = sessions_collection.find_one({"session_token": session_token, "user": user})
if not session or "token_info" not in session:
return make_response(jsonify({"success": False, "error": "Invalid or expired session"}), 401)
token_info = session["token_info"]
auth = ConnectorCreator.create_auth(provider)
is_expired = auth.is_token_expired(token_info)
return make_response(jsonify({
"success": True,
"expired": is_expired,
"user_email": session.get('user_email', 'Connected User')
}), 200)
except Exception as e:
current_app.logger.error(f"Error validating connector session: {e}")
return make_response(jsonify({"success": False, "error": str(e)}), 500)
@connectors_ns.route("/api/connectors/disconnect")
class ConnectorDisconnect(Resource):
@api.expect(api.model("ConnectorDisconnectModel", {"provider": fields.String(required=True), "session_token": fields.String(required=False)}))
@api.doc(description="Disconnect a connector session")
def post(self):
try:
data = request.get_json()
provider = data.get('provider')
session_token = data.get('session_token')
if not provider:
return make_response(jsonify({"success": False, "error": "provider is required"}), 400)
if session_token:
sessions_collection.delete_one({"session_token": session_token})
return make_response(jsonify({"success": True}), 200)
except Exception as e:
current_app.logger.error(f"Error disconnecting connector session: {e}")
return make_response(jsonify({"success": False, "error": str(e)}), 500)
@connectors_ns.route("/api/connectors/sync")
class ConnectorSync(Resource):
@api.expect(
api.model(
"ConnectorSyncModel",
{
"source_id": fields.String(required=True, description="Source ID to sync"),
"session_token": fields.String(required=True, description="Authentication token")
},
)
)
@api.doc(description="Sync connector source to check for modifications")
def post(self):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
try:
data = request.get_json()
source_id = data.get('source_id')
session_token = data.get('session_token')
if not all([source_id, session_token]):
return make_response(
jsonify({
"success": False,
"error": "source_id and session_token are required"
}),
400
)
source = sources_collection.find_one({"_id": ObjectId(source_id)})
if not source:
return make_response(
jsonify({
"success": False,
"error": "Source not found"
}),
404
)
if source.get('user') != decoded_token.get('sub'):
return make_response(
jsonify({
"success": False,
"error": "Unauthorized access to source"
}),
403
)
remote_data = {}
try:
if source.get('remote_data'):
remote_data = json.loads(source.get('remote_data'))
except json.JSONDecodeError:
current_app.logger.error(f"Invalid remote_data format for source {source_id}")
remote_data = {}
source_type = remote_data.get('provider')
if not source_type:
return make_response(
jsonify({
"success": False,
"error": "Source provider not found in remote_data"
}),
400
)
# Extract configuration from remote_data
file_ids = remote_data.get('file_ids', [])
folder_ids = remote_data.get('folder_ids', [])
recursive = remote_data.get('recursive', True)
# Start the sync task
task = ingest_connector_task.delay(
job_name=source.get('name'),
user=decoded_token.get('sub'),
source_type=source_type,
session_token=session_token,
file_ids=file_ids,
folder_ids=folder_ids,
recursive=recursive,
retriever=source.get('retriever', 'classic'),
operation_mode="sync",
doc_id=source_id,
sync_frequency=source.get('sync_frequency', 'never')
)
return make_response(
jsonify({
"success": True,
"task_id": task.id
}),
200
)
except Exception as err:
current_app.logger.error(
f"Error syncing connector source: {err}",
exc_info=True
)
return make_response(
jsonify({
"success": False,
"error": str(err)
}),
400
)
@connectors_ns.route("/api/connectors/callback-status")
class ConnectorCallbackStatus(Resource):
@api.doc(description="Return HTML page with connector authentication status")
def get(self):
"""Return HTML page with connector authentication status"""
try:
status = request.args.get('status', 'error')
message = request.args.get('message', '')
provider = request.args.get('provider', 'connector')
session_token = request.args.get('session_token', '')
user_email = request.args.get('user_email', '')
html_content = f"""
<!DOCTYPE html>
<html>
<head>
<title>{provider.replace('_', ' ').title()} Authentication</title>
<style>
body {{ font-family: Arial, sans-serif; text-align: center; padding: 40px; }}
.container {{ max-width: 600px; margin: 0 auto; }}
.success {{ color: #4CAF50; }}
.error {{ color: #F44336; }}
</style>
<script>
window.onload = function() {{
const status = "{status}";
const sessionToken = "{session_token}";
const userEmail = "{user_email}";
if (status === "success" && window.opener) {{
window.opener.postMessage({{
type: '{provider}_auth_success',
session_token: sessionToken,
user_email: userEmail
}}, '*');
setTimeout(() => window.close(), 3000);
}}
}};
</script>
</head>
<body>
<div class="container">
<h2>{provider.replace('_', ' ').title()} Authentication</h2>
<div class="{status}">
<p>{message}</p>
{f'<p>Connected as: {user_email}</p>' if status == 'success' else ''}
</div>
<p><small>You can close this window. {f"Your {provider.replace('_', ' ').title()} is now connected and ready to use." if status == 'success' else ''}</small></p>
</div>
</body>
</html>
"""
return make_response(html_content, 200, {'Content-Type': 'text/html'})
except Exception as e:
current_app.logger.error(f"Error rendering callback status page: {e}")
return make_response("Authentication error occurred", 500, {'Content-Type': 'text/html'})

View File

@@ -3,11 +3,12 @@ import json
import math
import os
import secrets
import tempfile
import uuid
import zipfile
from functools import wraps
from typing import Optional, Tuple
import tempfile
import zipfile
from bson.binary import Binary, UuidRepresentation
from bson.dbref import DBRef
from bson.objectid import ObjectId
@@ -25,26 +26,28 @@ from pymongo import ReturnDocument
from werkzeug.utils import secure_filename
from application.agents.tools.tool_manager import ToolManager
from application.api import api
from application.api.user.tasks import (
ingest,
ingest_connector_task,
ingest_remote,
process_agent_webhook,
store_attachment,
)
from application.core.mongo_db import MongoDB
from application.core.settings import settings
from application.api import api
from application.parser.connectors.connector_creator import ConnectorCreator
from application.storage.storage_creator import StorageCreator
from application.tts.google_tts import GoogleTTS
from application.utils import (
check_required_fields,
generate_image_url,
num_tokens_from_string,
safe_filename,
validate_function_name,
validate_required_fields,
)
from application.utils import num_tokens_from_string
from application.vectorstore.vector_creator import VectorCreator
storage = StorageCreator.get_storage()
@@ -72,7 +75,6 @@ try:
users_collection.create_index("user_id", unique=True)
except Exception as e:
print("Error creating indexes:", e)
user = Blueprint("user", __name__)
user_ns = Namespace("user", description="User related operations", path="/")
api.add_namespace(user_ns)
@@ -125,11 +127,9 @@ def ensure_user_doc(user_id):
updates["agent_preferences.pinned"] = []
if "shared_with_me" not in prefs:
updates["agent_preferences.shared_with_me"] = []
if updates:
users_collection.update_one({"user_id": user_id}, {"$set": updates})
user_doc = users_collection.find_one({"user_id": user_id})
return user_doc
@@ -181,7 +181,6 @@ def handle_image_upload(
jsonify({"success": False, "message": "Image upload failed"}),
400,
)
return image_url, None
@@ -295,8 +294,8 @@ class GetSingleConversation(Resource):
)
if not conversation:
return make_response(jsonify({"status": "not found"}), 404)
# Process queries to include attachment names
queries = conversation["queries"]
for query in queries:
if "attachments" in query and query["attachments"]:
@@ -492,11 +491,11 @@ class DeleteOldIndexes(Resource):
)
if not doc:
return make_response(jsonify({"status": "not found"}), 404)
storage = StorageCreator.get_storage()
try:
# Delete vector index
if settings.VECTOR_STORE == "faiss":
index_path = f"indexes/{str(doc['_id'])}"
if storage.file_exists(f"{index_path}/index.faiss"):
@@ -508,7 +507,6 @@ class DeleteOldIndexes(Resource):
settings.VECTOR_STORE, source_id=str(doc["_id"])
)
vectorstore.delete_index()
if "file_path" in doc and doc["file_path"]:
file_path = doc["file_path"]
if storage.is_directory(file_path):
@@ -517,7 +515,6 @@ class DeleteOldIndexes(Resource):
storage.delete_file(f)
else:
storage.delete_file(file_path)
except FileNotFoundError:
pass
except Exception as err:
@@ -525,7 +522,6 @@ class DeleteOldIndexes(Resource):
f"Error deleting files and indexes: {err}", exc_info=True
)
return make_response(jsonify({"success": False}), 400)
sources_collection.delete_one({"_id": ObjectId(source_id)})
return make_response(jsonify({"success": True}), 200)
@@ -567,6 +563,7 @@ class UploadFile(Resource):
job_name = request.form["name"]
# Create safe versions for filesystem operations
safe_user = safe_filename(user)
dir_name = safe_filename(job_name)
base_path = f"{settings.UPLOAD_FOLDER}/{safe_user}/{dir_name}"
@@ -588,6 +585,7 @@ class UploadFile(Resource):
zip_ref.extractall(path=temp_dir)
# Walk through extracted files and upload them
for root, _, files in os.walk(temp_dir):
for extracted_file in files:
if (
@@ -595,7 +593,6 @@ class UploadFile(Resource):
== temp_file_path
):
continue
rel_path = os.path.relpath(
os.path.join(root, extracted_file), temp_dir
)
@@ -610,15 +607,16 @@ class UploadFile(Resource):
f"Error extracting zip: {e}", exc_info=True
)
# If zip extraction fails, save the original zip file
file_path = f"{base_path}/{safe_file}"
with open(temp_file_path, "rb") as f:
storage.save_file(f, file_path)
else:
# For non-zip files, save directly
file_path = f"{base_path}/{safe_file}"
with open(temp_file_path, "rb") as f:
storage.save_file(f, file_path)
task = ingest.delay(
settings.UPLOAD_FOLDER,
[
@@ -690,7 +688,6 @@ class ManageSourceFiles(Resource):
return make_response(
jsonify({"success": False, "message": "Unauthorized"}), 401
)
user = decoded_token.get("sub")
source_id = request.form.get("source_id")
operation = request.form.get("operation")
@@ -705,7 +702,6 @@ class ManageSourceFiles(Resource):
),
400,
)
if operation not in ["add", "remove", "remove_directory"]:
return make_response(
jsonify(
@@ -716,14 +712,12 @@ class ManageSourceFiles(Resource):
),
400,
)
try:
ObjectId(source_id)
except Exception:
return make_response(
jsonify({"success": False, "message": "Invalid source ID format"}), 400
)
try:
source = sources_collection.find_one(
{"_id": ObjectId(source_id), "user": user}
@@ -743,7 +737,6 @@ class ManageSourceFiles(Resource):
return make_response(
jsonify({"success": False, "message": "Database error"}), 500
)
try:
storage = StorageCreator.get_storage()
source_file_path = source.get("file_path", "")
@@ -756,7 +749,6 @@ class ManageSourceFiles(Resource):
),
400,
)
if operation == "add":
files = request.files.getlist("file")
if not files or all(file.filename == "" for file in files):
@@ -769,23 +761,22 @@ class ManageSourceFiles(Resource):
),
400,
)
added_files = []
target_dir = source_file_path
if parent_dir:
target_dir = f"{source_file_path}/{parent_dir}"
for file in files:
if file.filename:
safe_filename_str = safe_filename(file.filename)
file_path = f"{target_dir}/{safe_filename_str}"
# Save file to storage
storage.save_file(file, file_path)
added_files.append(safe_filename_str)
# Trigger re-ingestion pipeline
from application.api.user.tasks import reingest_source_task
task = reingest_source_task.delay(source_id=source_id, user=user)
@@ -802,7 +793,6 @@ class ManageSourceFiles(Resource):
),
200,
)
elif operation == "remove":
file_paths_str = request.form.get("file_paths")
if not file_paths_str:
@@ -815,7 +805,6 @@ class ManageSourceFiles(Resource):
),
400,
)
try:
file_paths = (
json.loads(file_paths_str)
@@ -829,18 +818,19 @@ class ManageSourceFiles(Resource):
),
400,
)
# Remove files from storage and directory structure
removed_files = []
for file_path in file_paths:
full_path = f"{source_file_path}/{file_path}"
# Remove from storage
if storage.file_exists(full_path):
storage.delete_file(full_path)
removed_files.append(file_path)
# Trigger re-ingestion pipeline
from application.api.user.tasks import reingest_source_task
task = reingest_source_task.delay(source_id=source_id, user=user)
@@ -856,7 +846,6 @@ class ManageSourceFiles(Resource):
),
200,
)
elif operation == "remove_directory":
directory_path = request.form.get("directory_path")
if not directory_path:
@@ -869,8 +858,8 @@ class ManageSourceFiles(Resource):
),
400,
)
# Validate directory path (prevent path traversal)
if directory_path.startswith("/") or ".." in directory_path:
current_app.logger.warning(
f"Invalid directory path attempted for removal. "
@@ -882,7 +871,6 @@ class ManageSourceFiles(Resource):
),
400,
)
full_directory_path = (
f"{source_file_path}/{directory_path}"
if directory_path
@@ -904,7 +892,6 @@ class ManageSourceFiles(Resource):
),
404,
)
success = storage.remove_directory(full_directory_path)
if not success:
@@ -919,7 +906,6 @@ class ManageSourceFiles(Resource):
),
500,
)
current_app.logger.info(
f"Successfully removed directory. "
f"User: {user}, Source ID: {source_id}, Directory path: {directory_path}, "
@@ -927,6 +913,7 @@ class ManageSourceFiles(Resource):
)
# Trigger re-ingestion pipeline
from application.api.user.tasks import reingest_source_task
task = reingest_source_task.delay(source_id=source_id, user=user)
@@ -942,7 +929,6 @@ class ManageSourceFiles(Resource):
),
200,
)
except Exception as err:
error_context = f"operation={operation}, user={user}, source_id={source_id}"
if operation == "remove_directory":
@@ -954,7 +940,6 @@ class ManageSourceFiles(Resource):
elif operation == "add":
parent_dir = request.form.get("parent_dir", "")
error_context += f", parent_dir={parent_dir}"
current_app.logger.error(
f"Error managing source files: {err} ({error_context})", exc_info=True
)
@@ -1001,6 +986,50 @@ class UploadRemote(Resource):
source_data = config.get("url")
elif data["source"] == "reddit":
source_data = config
elif data["source"] in ConnectorCreator.get_supported_connectors():
session_token = config.get("session_token")
if not session_token:
return make_response(
jsonify(
{
"success": False,
"error": f"Missing session_token in {data['source']} configuration",
}
),
400,
)
# Process file_ids
file_ids = config.get("file_ids", [])
if isinstance(file_ids, str):
file_ids = [id.strip() for id in file_ids.split(",") if id.strip()]
elif not isinstance(file_ids, list):
file_ids = []
# Process folder_ids
folder_ids = config.get("folder_ids", [])
if isinstance(folder_ids, str):
folder_ids = [
id.strip() for id in folder_ids.split(",") if id.strip()
]
elif not isinstance(folder_ids, list):
folder_ids = []
config["file_ids"] = file_ids
config["folder_ids"] = folder_ids
task = ingest_connector_task.delay(
job_name=data["name"],
user=decoded_token.get("sub"),
source_type=data["source"],
session_token=session_token,
file_ids=file_ids,
folder_ids=folder_ids,
recursive=config.get("recursive", False),
retriever=config.get("retriever", "classic"),
)
return make_response(
jsonify({"success": True, "task_id": task.id}), 200
)
task = ingest_remote.delay(
source_data=source_data,
job_name=data["name"],
@@ -1109,6 +1138,7 @@ class PaginatedSources(Resource):
"retriever": doc.get("retriever", "classic"),
"syncFrequency": doc.get("sync_frequency", ""),
"isNested": bool(doc.get("directory_structure")),
"type": doc.get("type", "file"),
}
paginated_docs.append(doc_data)
response = {
@@ -1157,6 +1187,9 @@ class CombinedJson(Resource):
"retriever": index.get("retriever", "classic"),
"syncFrequency": index.get("sync_frequency", ""),
"is_nested": bool(index.get("directory_structure")),
"type": index.get(
"type", "file"
), # Add type field with default "file"
}
)
except Exception as err:
@@ -1372,17 +1405,14 @@ class GetAgent(Resource):
def get(self):
if not (decoded_token := request.decoded_token):
return {"success": False}, 401
if not (agent_id := request.args.get("id")):
return {"success": False, "message": "ID required"}, 400
try:
agent = agents_collection.find_one(
{"_id": ObjectId(agent_id), "user": decoded_token["sub"]}
)
if not agent:
return {"status": "Not found"}, 404
data = {
"id": str(agent["_id"]),
"name": agent["name"],
@@ -1428,7 +1458,6 @@ class GetAgent(Resource):
"shared_token": agent.get("shared_token", ""),
}
return make_response(jsonify(data), 200)
except Exception as e:
current_app.logger.error(f"Agent fetch error: {e}", exc_info=True)
return {"success": False}, 400
@@ -1440,7 +1469,6 @@ class GetAgents(Resource):
def get(self):
if not (decoded_token := request.decoded_token):
return {"success": False}, 401
user = decoded_token.get("sub")
try:
user_doc = ensure_user_doc(user)
@@ -1501,7 +1529,6 @@ class GetAgents(Resource):
for agent in agents
if "source" in agent or "retriever" in agent
]
except Exception as err:
current_app.logger.error(f"Error retrieving agents: {err}", exc_info=True)
return make_response(jsonify({"success": False}), 400)
@@ -1573,9 +1600,11 @@ class CreateAgent(Resource):
print(f"Received data: {data}")
# Validate JSON schema if provided
if data.get("json_schema"):
try:
# Basic validation - ensure it's a valid JSON structure
json_schema = data.get("json_schema")
if not isinstance(json_schema, dict):
return make_response(
@@ -1587,8 +1616,8 @@ class CreateAgent(Resource):
),
400,
)
# Validate that it has either a 'schema' property or is itself a schema
if "schema" not in json_schema and "type" not in json_schema:
return make_response(
jsonify(
@@ -1606,7 +1635,6 @@ class CreateAgent(Resource):
),
400,
)
if data.get("status") not in ["draft", "published"]:
return make_response(
jsonify(
@@ -1617,7 +1645,6 @@ class CreateAgent(Resource):
),
400,
)
if data.get("status") == "published":
required_fields = [
"name",
@@ -1628,6 +1655,7 @@ class CreateAgent(Resource):
"agent_type",
]
# Require either source or sources (but not both)
if not data.get("source") and not data.get("sources"):
return make_response(
jsonify(
@@ -1648,13 +1676,11 @@ class CreateAgent(Resource):
return missing_fields
if invalid_fields:
return invalid_fields
image_url, error = handle_image_upload(request, "", user, storage)
if error:
return make_response(
jsonify({"success": False, "message": "Image upload failed"}), 400
)
try:
key = str(uuid.uuid4()) if data.get("status") == "published" else ""
@@ -1674,7 +1700,6 @@ class CreateAgent(Resource):
source_field = DBRef("sources", ObjectId(source_value))
else:
source_field = ""
new_agent = {
"user": user,
"name": data.get("name"),
@@ -1772,7 +1797,6 @@ class UpdateAgent(Resource):
data["json_schema"] = json.loads(data["json_schema"])
except json.JSONDecodeError:
data["json_schema"] = None
if not ObjectId.is_valid(agent_id):
return make_response(
jsonify({"success": False, "message": "Invalid agent ID format"}), 400
@@ -1796,7 +1820,6 @@ class UpdateAgent(Resource):
),
404,
)
image_url, error = handle_image_upload(
request, existing_agent.get("image", ""), user, storage
)
@@ -1804,7 +1827,6 @@ class UpdateAgent(Resource):
return make_response(
jsonify({"success": False, "message": "Image upload failed"}), 400
)
update_fields = {}
allowed_fields = [
"name",
@@ -1837,6 +1859,7 @@ class UpdateAgent(Resource):
source_id = data.get("source")
if source_id == "default":
# Handle special "default" source
update_fields[field] = "default"
elif source_id and ObjectId.is_valid(source_id):
update_fields[field] = DBRef("sources", ObjectId(source_id))
@@ -1941,7 +1964,6 @@ class UpdateAgent(Resource):
),
400,
)
if not existing_agent.get("key"):
newly_generated_key = str(uuid.uuid4())
update_fields["key"] = newly_generated_key
@@ -2028,7 +2050,6 @@ class PinnedAgents(Resource):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user_id = decoded_token.get("sub")
try:
@@ -2037,7 +2058,6 @@ class PinnedAgents(Resource):
if not pinned_ids:
return make_response(jsonify([]), 200)
pinned_object_ids = [ObjectId(agent_id) for agent_id in pinned_ids]
pinned_agents_cursor = agents_collection.find(
@@ -2047,6 +2067,7 @@ class PinnedAgents(Resource):
existing_ids = {str(agent["_id"]) for agent in pinned_agents}
# Clean up any stale pinned IDs
stale_ids = [
agent_id for agent_id in pinned_ids if agent_id not in existing_ids
]
@@ -2055,7 +2076,6 @@ class PinnedAgents(Resource):
{"user_id": user_id},
{"$pullAll": {"agent_preferences.pinned": stale_ids}},
)
list_pinned_agents = [
{
"id": str(agent["_id"]),
@@ -2092,11 +2112,9 @@ class PinnedAgents(Resource):
for agent in pinned_agents
if "source" in agent or "retriever" in agent
]
except Exception as err:
current_app.logger.error(f"Error retrieving pinned agents: {err}")
return make_response(jsonify({"success": False}), 400)
return make_response(jsonify(list_pinned_agents), 200)
@@ -2160,7 +2178,6 @@ class RemoveSharedAgent(Resource):
return make_response(
jsonify({"success": False, "message": "ID is required"}), 400
)
try:
agent = agents_collection.find_one(
{"_id": ObjectId(agent_id), "shared_publicly": True}
@@ -2170,7 +2187,6 @@ class RemoveSharedAgent(Resource):
jsonify({"success": False, "message": "Shared agent not found"}),
404,
)
ensure_user_doc(user_id)
users_collection.update_one(
{"user_id": user_id},
@@ -2183,7 +2199,6 @@ class RemoveSharedAgent(Resource):
)
return make_response(jsonify({"success": True, "action": "removed"}), 200)
except Exception as err:
current_app.logger.error(f"Error removing shared agent: {err}")
return make_response(
@@ -2206,7 +2221,6 @@ class SharedAgent(Resource):
return make_response(
jsonify({"success": False, "message": "Token or ID is required"}), 400
)
try:
query = {
"shared_publicly": True,
@@ -2218,7 +2232,6 @@ class SharedAgent(Resource):
jsonify({"success": False, "message": "Shared agent not found"}),
404,
)
agent_id = str(shared_agent["_id"])
data = {
"id": agent_id,
@@ -2230,7 +2243,12 @@ class SharedAgent(Resource):
else ""
),
"description": shared_agent.get("description", ""),
"source": shared_agent.get("source", ""),
"source": (
str(source_doc["_id"])
if isinstance(shared_agent.get("source"), DBRef)
and (source_doc := db.dereference(shared_agent.get("source")))
else ""
),
"chunks": shared_agent.get("chunks", "0"),
"retriever": shared_agent.get("retriever", "classic"),
"prompt_id": shared_agent.get("prompt_id", "default"),
@@ -2253,7 +2271,6 @@ class SharedAgent(Resource):
if tool_data:
enriched_tools.append(tool_data.get("name", ""))
data["tools"] = enriched_tools
decoded_token = getattr(request, "decoded_token", None)
if decoded_token:
user_id = decoded_token.get("sub")
@@ -2265,9 +2282,7 @@ class SharedAgent(Resource):
{"user_id": user_id},
{"$addToSet": {"agent_preferences.shared_with_me": agent_id}},
)
return make_response(jsonify(data), 200)
except Exception as err:
current_app.logger.error(f"Error retrieving shared agent: {err}")
return make_response(jsonify({"success": False}), 400)
@@ -2301,7 +2316,6 @@ class SharedAgents(Resource):
{"user_id": user_id},
{"$pullAll": {"agent_preferences.shared_with_me": stale_ids}},
)
pinned_ids = set(user_doc.get("agent_preferences", {}).get("pinned", []))
list_shared_agents = [
@@ -2328,7 +2342,6 @@ class SharedAgents(Resource):
]
return make_response(jsonify(list_shared_agents), 200)
except Exception as err:
current_app.logger.error(f"Error retrieving shared agents: {err}")
return make_response(jsonify({"success": False}), 400)
@@ -3808,22 +3821,22 @@ class GetChunks(Resource):
metadata = chunk.get("metadata", {})
# Filter by path if provided
if path:
chunk_source = metadata.get("source", "")
# Check if the chunk's source matches the requested path
if not chunk_source or not chunk_source.endswith(path):
continue
# Filter by search term if provided
if search_term:
text_match = search_term in chunk.get("text", "").lower()
title_match = search_term in metadata.get("title", "").lower()
if not (text_match or title_match):
continue
filtered_chunks.append(chunk)
chunks = filtered_chunks
total_chunks = len(chunks)
@@ -3983,7 +3996,6 @@ class UpdateChunk(Resource):
if metadata is None:
metadata = {}
metadata["token_count"] = token_count
if not ObjectId.is_valid(doc_id):
return make_response(jsonify({"error": "Invalid doc_id"}), 400)
doc = sources_collection.find_one({"_id": ObjectId(doc_id), "user": user})
@@ -3998,7 +4010,6 @@ class UpdateChunk(Resource):
existing_chunk = next((c for c in chunks if c["doc_id"] == chunk_id), None)
if not existing_chunk:
return make_response(jsonify({"error": "Chunk not found"}), 404)
new_text = text if text is not None else existing_chunk["text"]
if metadata is not None:
@@ -4006,10 +4017,8 @@ class UpdateChunk(Resource):
new_metadata.update(metadata)
else:
new_metadata = existing_chunk["metadata"].copy()
if text is not None:
new_metadata["token_count"] = num_tokens_from_string(new_text)
try:
new_chunk_id = store.add_chunk(new_text, new_metadata)
@@ -4018,7 +4027,6 @@ class UpdateChunk(Resource):
current_app.logger.warning(
f"Failed to delete old chunk {chunk_id}, but new chunk {new_chunk_id} was created"
)
return make_response(
jsonify(
{
@@ -4065,7 +4073,6 @@ class StoreAttachment(Resource):
jsonify({"status": "error", "message": "Missing file"}),
400,
)
user = None
if decoded_token:
user = safe_filename(decoded_token.get("sub"))
@@ -4080,7 +4087,6 @@ class StoreAttachment(Resource):
return make_response(
jsonify({"success": False, "message": "Authentication required"}), 401
)
try:
attachment_id = ObjectId()
original_filename = safe_filename(os.path.basename(file.filename))
@@ -4122,7 +4128,6 @@ class ServeImage(Resource):
content_type = f"image/{extension}"
if extension == "jpg":
content_type = "image/jpeg"
response = make_response(file_obj.read())
response.headers.set("Content-Type", content_type)
response.headers.set("Cache-Control", "max-age=86400")
@@ -4149,36 +4154,43 @@ class DirectoryStructure(Resource):
decoded_token = request.decoded_token
if not decoded_token:
return make_response(jsonify({"success": False}), 401)
user = decoded_token.get("sub")
doc_id = request.args.get("id")
if not doc_id:
return make_response(jsonify({"error": "Document ID is required"}), 400)
if not ObjectId.is_valid(doc_id):
return make_response(jsonify({"error": "Invalid document ID"}), 400)
try:
doc = sources_collection.find_one({"_id": ObjectId(doc_id), "user": user})
if not doc:
return make_response(
jsonify({"error": "Document not found or access denied"}), 404
)
directory_structure = doc.get("directory_structure", {})
base_path = doc.get("file_path", "")
provider = None
remote_data = doc.get("remote_data")
try:
if isinstance(remote_data, str) and remote_data:
remote_data_obj = json.loads(remote_data)
provider = remote_data_obj.get("provider")
except Exception as e:
current_app.logger.warning(
f"Failed to parse remote_data for doc {doc_id}: {e}"
)
return make_response(
jsonify(
{
"success": True,
"directory_structure": directory_structure,
"base_path": doc.get("file_path", ""),
"base_path": base_path,
"provider": provider,
}
),
200,
)
except Exception as e:
current_app.logger.error(
f"Error retrieving directory structure: {e}", exc_info=True

View File

@@ -47,6 +47,39 @@ def process_agent_webhook(self, agent_id, payload):
return resp
@celery.task(bind=True)
def ingest_connector_task(
self,
job_name,
user,
source_type,
session_token=None,
file_ids=None,
folder_ids=None,
recursive=True,
retriever="classic",
operation_mode="upload",
doc_id=None,
sync_frequency="never"
):
from application.worker import ingest_connector
resp = ingest_connector(
self,
job_name,
user,
source_type,
session_token=session_token,
file_ids=file_ids,
folder_ids=folder_ids,
recursive=recursive,
retriever=retriever,
operation_mode=operation_mode,
doc_id=doc_id,
sync_frequency=sync_frequency
)
return resp
@celery.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
sender.add_periodic_task(

View File

@@ -16,6 +16,7 @@ from application.api import api # noqa: E402
from application.api.answer import answer # noqa: E402
from application.api.internal.routes import internal # noqa: E402
from application.api.user.routes import user # noqa: E402
from application.api.connector.routes import connector # noqa: E402
from application.celery_init import celery # noqa: E402
from application.core.settings import settings # noqa: E402
@@ -30,6 +31,7 @@ app = Flask(__name__)
app.register_blueprint(user)
app.register_blueprint(answer)
app.register_blueprint(internal)
app.register_blueprint(connector)
app.config.update(
UPLOAD_FOLDER="inputs",
CELERY_BROKER_URL=settings.CELERY_BROKER_URL,

View File

@@ -40,6 +40,13 @@ class Settings(BaseSettings):
FALLBACK_LLM_NAME: Optional[str] = None # model name for fallback llm
FALLBACK_LLM_API_KEY: Optional[str] = None # api key for fallback llm
# Google Drive integration
GOOGLE_CLIENT_ID: Optional[str] = None # Replace with your actual Google OAuth client ID
GOOGLE_CLIENT_SECRET: Optional[str] = None# Replace with your actual Google OAuth client secret
CONNECTOR_REDIRECT_BASE_URI: Optional[str] = "http://127.0.0.1:7091/api/connectors/callback"
##append ?provider={provider_name} in your Provider console like http://127.0.0.1:7091/api/connectors/callback?provider=google_drive
# LLM Cache
CACHE_REDIS_URL: str = "redis://localhost:6379/2"

View File

@@ -0,0 +1,18 @@
"""
External knowledge base connectors for DocsGPT.
This module contains connectors for external knowledge bases and document storage systems
that require authentication and specialized handling, separate from simple web scrapers.
"""
from .base import BaseConnectorAuth, BaseConnectorLoader
from .connector_creator import ConnectorCreator
from .google_drive import GoogleDriveAuth, GoogleDriveLoader
__all__ = [
'BaseConnectorAuth',
'BaseConnectorLoader',
'ConnectorCreator',
'GoogleDriveAuth',
'GoogleDriveLoader'
]

View File

@@ -0,0 +1,129 @@
"""
Base classes for external knowledge base connectors.
This module provides minimal abstract base classes that define the essential
interface for external knowledge base connectors.
"""
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
from application.parser.schema.base import Document
class BaseConnectorAuth(ABC):
"""
Abstract base class for connector authentication.
Defines the minimal interface that all connector authentication
implementations must follow.
"""
@abstractmethod
def get_authorization_url(self, state: Optional[str] = None) -> str:
"""
Generate authorization URL for OAuth flows.
Args:
state: Optional state parameter for CSRF protection
Returns:
Authorization URL
"""
pass
@abstractmethod
def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
"""
Exchange authorization code for access tokens.
Args:
authorization_code: Authorization code from OAuth callback
Returns:
Dictionary containing token information
"""
pass
@abstractmethod
def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
"""
Refresh an expired access token.
Args:
refresh_token: Refresh token
Returns:
Dictionary containing refreshed token information
"""
pass
@abstractmethod
def is_token_expired(self, token_info: Dict[str, Any]) -> bool:
"""
Check if a token is expired.
Args:
token_info: Token information dictionary
Returns:
True if token is expired, False otherwise
"""
pass
class BaseConnectorLoader(ABC):
"""
Abstract base class for connector loaders.
Defines the minimal interface that all connector loader
implementations must follow.
"""
@abstractmethod
def __init__(self, session_token: str):
"""
Initialize the connector loader.
Args:
session_token: Authentication session token
"""
pass
@abstractmethod
def load_data(self, inputs: Dict[str, Any]) -> List[Document]:
"""
Load documents from the external knowledge base.
Args:
inputs: Configuration dictionary containing:
- file_ids: Optional list of specific file IDs to load
- folder_ids: Optional list of folder IDs to browse/download
- limit: Maximum number of items to return
- list_only: If True, return metadata without content
- recursive: Whether to recursively process folders
Returns:
List of Document objects
"""
pass
@abstractmethod
def download_to_directory(self, local_dir: str, source_config: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Download files/folders to a local directory.
Args:
local_dir: Local directory path to download files to
source_config: Configuration for what to download
Returns:
Dictionary containing download results:
- files_downloaded: Number of files downloaded
- directory_path: Path where files were downloaded
- empty_result: Whether no files were downloaded
- source_type: Type of connector
- config_used: Configuration that was used
- error: Error message if download failed (optional)
"""
pass

View File

@@ -0,0 +1,81 @@
from application.parser.connectors.google_drive.loader import GoogleDriveLoader
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
class ConnectorCreator:
"""
Factory class for creating external knowledge base connectors and auth providers.
These are different from remote loaders as they typically require
authentication and connect to external document storage systems.
"""
connectors = {
"google_drive": GoogleDriveLoader,
}
auth_providers = {
"google_drive": GoogleDriveAuth,
}
@classmethod
def create_connector(cls, connector_type, *args, **kwargs):
"""
Create a connector instance for the specified type.
Args:
connector_type: Type of connector to create (e.g., 'google_drive')
*args, **kwargs: Arguments to pass to the connector constructor
Returns:
Connector instance
Raises:
ValueError: If connector type is not supported
"""
connector_class = cls.connectors.get(connector_type.lower())
if not connector_class:
raise ValueError(f"No connector class found for type {connector_type}")
return connector_class(*args, **kwargs)
@classmethod
def create_auth(cls, connector_type):
"""
Create an auth provider instance for the specified connector type.
Args:
connector_type: Type of connector auth to create (e.g., 'google_drive')
Returns:
Auth provider instance
Raises:
ValueError: If connector type is not supported for auth
"""
auth_class = cls.auth_providers.get(connector_type.lower())
if not auth_class:
raise ValueError(f"No auth class found for type {connector_type}")
return auth_class()
@classmethod
def get_supported_connectors(cls):
"""
Get list of supported connector types.
Returns:
List of supported connector type strings
"""
return list(cls.connectors.keys())
@classmethod
def is_supported(cls, connector_type):
"""
Check if a connector type is supported.
Args:
connector_type: Type of connector to check
Returns:
True if supported, False otherwise
"""
return connector_type.lower() in cls.connectors

View File

@@ -0,0 +1,10 @@
"""
Google Drive connector for DocsGPT.
This module provides authentication and document loading capabilities for Google Drive.
"""
from .auth import GoogleDriveAuth
from .loader import GoogleDriveLoader
__all__ = ['GoogleDriveAuth', 'GoogleDriveLoader']

View File

@@ -0,0 +1,268 @@
import logging
import datetime
from typing import Optional, Dict, Any
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import Flow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from application.core.settings import settings
from application.parser.connectors.base import BaseConnectorAuth
class GoogleDriveAuth(BaseConnectorAuth):
"""
Handles Google OAuth 2.0 authentication for Google Drive access.
"""
SCOPES = [
'https://www.googleapis.com/auth/drive.readonly',
'https://www.googleapis.com/auth/drive.metadata.readonly'
]
def __init__(self):
self.client_id = settings.GOOGLE_CLIENT_ID
self.client_secret = settings.GOOGLE_CLIENT_SECRET
self.redirect_uri = f"{settings.CONNECTOR_REDIRECT_BASE_URI}?provider=google_drive"
if not self.client_id or not self.client_secret:
raise ValueError("Google OAuth credentials not configured. Please set GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET in settings.")
def get_authorization_url(self, state: Optional[str] = None) -> str:
try:
flow = Flow.from_client_config(
{
"web": {
"client_id": self.client_id,
"client_secret": self.client_secret,
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"redirect_uris": [self.redirect_uri]
}
},
scopes=self.SCOPES
)
flow.redirect_uri = self.redirect_uri
authorization_url, _ = flow.authorization_url(
access_type='offline',
prompt='consent',
include_granted_scopes='true',
state=state
)
return authorization_url
except Exception as e:
logging.error(f"Error generating authorization URL: {e}")
raise
def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
try:
if not authorization_code:
raise ValueError("Authorization code is required")
flow = Flow.from_client_config(
{
"web": {
"client_id": self.client_id,
"client_secret": self.client_secret,
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"redirect_uris": [self.redirect_uri]
}
},
scopes=self.SCOPES
)
flow.redirect_uri = self.redirect_uri
flow.fetch_token(code=authorization_code)
credentials = flow.credentials
if not credentials.refresh_token:
logging.warning("OAuth flow did not return a refresh_token.")
if not credentials.token:
raise ValueError("OAuth flow did not return an access token")
if not credentials.token_uri:
credentials.token_uri = "https://oauth2.googleapis.com/token"
if not credentials.client_id:
credentials.client_id = self.client_id
if not credentials.client_secret:
credentials.client_secret = self.client_secret
if not credentials.refresh_token:
raise ValueError(
"No refresh token received. This typically happens when offline access wasn't granted. "
)
return {
'access_token': credentials.token,
'refresh_token': credentials.refresh_token,
'token_uri': credentials.token_uri,
'client_id': credentials.client_id,
'client_secret': credentials.client_secret,
'scopes': credentials.scopes,
'expiry': credentials.expiry.isoformat() if credentials.expiry else None
}
except Exception as e:
logging.error(f"Error exchanging code for tokens: {e}")
raise
def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
try:
if not refresh_token:
raise ValueError("Refresh token is required")
credentials = Credentials(
token=None,
refresh_token=refresh_token,
token_uri="https://oauth2.googleapis.com/token",
client_id=self.client_id,
client_secret=self.client_secret
)
from google.auth.transport.requests import Request
credentials.refresh(Request())
return {
'access_token': credentials.token,
'refresh_token': refresh_token,
'token_uri': credentials.token_uri,
'client_id': credentials.client_id,
'client_secret': credentials.client_secret,
'scopes': credentials.scopes,
'expiry': credentials.expiry.isoformat() if credentials.expiry else None
}
except Exception as e:
logging.error(f"Error refreshing access token: {e}", exc_info=True)
raise
def create_credentials_from_token_info(self, token_info: Dict[str, Any]) -> Credentials:
from application.core.settings import settings
access_token = token_info.get('access_token')
if not access_token:
raise ValueError("No access token found in token_info")
credentials = Credentials(
token=access_token,
refresh_token=token_info.get('refresh_token'),
token_uri= 'https://oauth2.googleapis.com/token',
client_id=settings.GOOGLE_CLIENT_ID,
client_secret=settings.GOOGLE_CLIENT_SECRET,
scopes=token_info.get('scopes', ['https://www.googleapis.com/auth/drive.readonly'])
)
if not credentials.token:
raise ValueError("Credentials created without valid access token")
return credentials
def build_drive_service(self, credentials: Credentials):
try:
if not credentials:
raise ValueError("No credentials provided")
if not credentials.token and not credentials.refresh_token:
raise ValueError("No access token or refresh token available. User must re-authorize with offline access.")
needs_refresh = credentials.expired or not credentials.token
if needs_refresh:
if credentials.refresh_token:
try:
from google.auth.transport.requests import Request
credentials.refresh(Request())
except Exception as refresh_error:
raise ValueError(f"Failed to refresh credentials: {refresh_error}")
else:
raise ValueError("No access token or refresh token available. User must re-authorize with offline access.")
return build('drive', 'v3', credentials=credentials)
except HttpError as e:
raise ValueError(f"Failed to build Google Drive service: HTTP {e.resp.status}")
except Exception as e:
raise ValueError(f"Failed to build Google Drive service: {str(e)}")
def is_token_expired(self, token_info):
if 'expiry' in token_info and token_info['expiry']:
try:
from dateutil import parser
# Google Drive provides timezone-aware ISO8601 dates
expiry_dt = parser.parse(token_info['expiry'])
current_time = datetime.datetime.now(datetime.timezone.utc)
return current_time >= expiry_dt - datetime.timedelta(seconds=60)
except Exception:
return True
if 'access_token' in token_info and token_info['access_token']:
return False
return True
def get_token_info_from_session(self, session_token: str) -> Dict[str, Any]:
try:
from application.core.mongo_db import MongoDB
from application.core.settings import settings
mongo = MongoDB.get_client()
db = mongo[settings.MONGO_DB_NAME]
sessions_collection = db["connector_sessions"]
session = sessions_collection.find_one({"session_token": session_token})
if not session:
raise ValueError(f"Invalid session token: {session_token}")
if "token_info" not in session:
raise ValueError("Session missing token information")
token_info = session["token_info"]
if not token_info:
raise ValueError("Invalid token information")
required_fields = ["access_token", "refresh_token"]
missing_fields = [field for field in required_fields if field not in token_info or not token_info.get(field)]
if missing_fields:
raise ValueError(f"Missing required token fields: {missing_fields}")
if 'client_id' not in token_info:
token_info['client_id'] = settings.GOOGLE_CLIENT_ID
if 'client_secret' not in token_info:
token_info['client_secret'] = settings.GOOGLE_CLIENT_SECRET
if 'token_uri' not in token_info:
token_info['token_uri'] = 'https://oauth2.googleapis.com/token'
return token_info
except Exception as e:
raise ValueError(f"Failed to retrieve Google Drive token information: {str(e)}")
def validate_credentials(self, credentials: Credentials) -> bool:
"""
Validate Google Drive credentials by making a test API call.
Args:
credentials: Google credentials object
Returns:
True if credentials are valid, False otherwise
"""
try:
service = self.build_drive_service(credentials)
service.about().get(fields="user").execute()
return True
except HttpError as e:
logging.error(f"HTTP error validating credentials: {e}")
return False
except Exception as e:
logging.error(f"Error validating credentials: {e}")
return False

View File

@@ -0,0 +1,536 @@
"""
Google Drive loader for DocsGPT.
Loads documents from Google Drive using Google Drive API.
"""
import io
import logging
import os
from typing import List, Dict, Any, Optional
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.errors import HttpError
from application.parser.connectors.base import BaseConnectorLoader
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
from application.parser.schema.base import Document
class GoogleDriveLoader(BaseConnectorLoader):
SUPPORTED_MIME_TYPES = {
'application/pdf': '.pdf',
'application/vnd.google-apps.document': '.docx',
'application/vnd.google-apps.presentation': '.pptx',
'application/vnd.google-apps.spreadsheet': '.xlsx',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
'application/msword': '.doc',
'application/vnd.ms-powerpoint': '.ppt',
'application/vnd.ms-excel': '.xls',
'text/plain': '.txt',
'text/csv': '.csv',
'text/html': '.html',
'application/rtf': '.rtf',
'image/jpeg': '.jpg',
'image/jpg': '.jpg',
'image/png': '.png',
}
EXPORT_FORMATS = {
'application/vnd.google-apps.document': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.google-apps.presentation': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'application/vnd.google-apps.spreadsheet': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
}
def __init__(self, session_token: str):
self.auth = GoogleDriveAuth()
self.session_token = session_token
token_info = self.auth.get_token_info_from_session(session_token)
self.credentials = self.auth.create_credentials_from_token_info(token_info)
try:
self.service = self.auth.build_drive_service(self.credentials)
except Exception as e:
logging.warning(f"Could not build Google Drive service: {e}")
self.service = None
self.next_page_token = None
def _process_file(self, file_metadata: Dict[str, Any], load_content: bool = True) -> Optional[Document]:
try:
file_id = file_metadata.get('id')
file_name = file_metadata.get('name', 'Unknown')
mime_type = file_metadata.get('mimeType', 'application/octet-stream')
if mime_type not in self.SUPPORTED_MIME_TYPES and not mime_type.startswith('application/vnd.google-apps.'):
return None
if mime_type not in self.SUPPORTED_MIME_TYPES and not mime_type.startswith('application/vnd.google-apps.'):
logging.info(f"Skipping unsupported file type: {mime_type} for file {file_name}")
return None
# Google Drive provides timezone-aware ISO8601 dates
doc_metadata = {
'file_name': file_name,
'mime_type': mime_type,
'size': file_metadata.get('size', None),
'created_time': file_metadata.get('createdTime'),
'modified_time': file_metadata.get('modifiedTime'),
'parents': file_metadata.get('parents', []),
'source': 'google_drive'
}
if not load_content:
return Document(
text="",
doc_id=file_id,
extra_info=doc_metadata
)
content = self._download_file_content(file_id, mime_type)
if content is None:
logging.warning(f"Could not load content for file {file_name} ({file_id})")
return None
return Document(
text=content,
doc_id=file_id,
extra_info=doc_metadata
)
except Exception as e:
logging.error(f"Error processing file: {e}")
return None
def load_data(self, inputs: Dict[str, Any]) -> List[Document]:
session_token = inputs.get('session_token')
if session_token and session_token != self.session_token:
logging.warning("Session token in inputs differs from loader's session token. Using loader's session token.")
self.config = inputs
try:
documents: List[Document] = []
folder_id = inputs.get('folder_id')
file_ids = inputs.get('file_ids', [])
limit = inputs.get('limit', 100)
list_only = inputs.get('list_only', False)
load_content = not list_only
page_token = inputs.get('page_token')
self.next_page_token = None
if file_ids:
# Specific files requested: load them
for file_id in file_ids:
try:
doc = self._load_file_by_id(file_id, load_content=load_content)
if doc:
documents.append(doc)
elif hasattr(self, '_credential_refreshed') and self._credential_refreshed:
self._credential_refreshed = False
logging.info(f"Retrying load of file {file_id} after credential refresh")
doc = self._load_file_by_id(file_id, load_content=load_content)
if doc:
documents.append(doc)
except Exception as e:
logging.error(f"Error loading file {file_id}: {e}")
continue
else:
# Browsing mode: list immediate children of provided folder or root
parent_id = folder_id if folder_id else 'root'
documents = self._list_items_in_parent(parent_id, limit=limit, load_content=load_content, page_token=page_token)
logging.info(f"Loaded {len(documents)} documents from Google Drive")
return documents
except Exception as e:
logging.error(f"Error loading data from Google Drive: {e}", exc_info=True)
raise
def _load_file_by_id(self, file_id: str, load_content: bool = True) -> Optional[Document]:
self._ensure_service()
try:
file_metadata = self.service.files().get(
fileId=file_id,
fields='id,name,mimeType,size,createdTime,modifiedTime,parents'
).execute()
return self._process_file(file_metadata, load_content=load_content)
except HttpError as e:
logging.error(f"HTTP error loading file {file_id}: {e.resp.status} - {e.content}")
if e.resp.status in [401, 403]:
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
try:
from google.auth.transport.requests import Request
self.credentials.refresh(Request())
self._ensure_service()
return None
except Exception as refresh_error:
raise ValueError(f"Authentication failed and could not be refreshed: {refresh_error}")
else:
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
return None
except Exception as e:
logging.error(f"Error loading file {file_id}: {e}")
return None
def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None) -> List[Document]:
self._ensure_service()
documents: List[Document] = []
try:
query = f"'{parent_id}' in parents and trashed=false"
next_token_out: Optional[str] = None
while True:
page_size = 100
if limit:
remaining = max(0, limit - len(documents))
if remaining == 0:
break
page_size = min(100, remaining)
results = self.service.files().list(
q=query,
fields='nextPageToken,files(id,name,mimeType,size,createdTime,modifiedTime,parents)',
pageToken=page_token,
pageSize=page_size
).execute()
items = results.get('files', [])
for item in items:
mime_type = item.get('mimeType')
if mime_type == 'application/vnd.google-apps.folder':
doc_metadata = {
'file_name': item.get('name', 'Unknown'),
'mime_type': mime_type,
'size': item.get('size', None),
'created_time': item.get('createdTime'),
'modified_time': item.get('modifiedTime'),
'parents': item.get('parents', []),
'source': 'google_drive',
'is_folder': True
}
documents.append(Document(text="", doc_id=item.get('id'), extra_info=doc_metadata))
else:
doc = self._process_file(item, load_content=load_content)
if doc:
documents.append(doc)
if limit and len(documents) >= limit:
self.next_page_token = results.get('nextPageToken')
return documents
page_token = results.get('nextPageToken')
next_token_out = page_token
if not page_token:
break
self.next_page_token = next_token_out
return documents
except Exception as e:
logging.error(f"Error listing items under parent {parent_id}: {e}")
return documents
def _download_file_content(self, file_id: str, mime_type: str) -> Optional[str]:
if not self.credentials.token:
logging.warning("No access token in credentials, attempting to refresh")
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
try:
from google.auth.transport.requests import Request
self.credentials.refresh(Request())
logging.info("Credentials refreshed successfully")
self._ensure_service()
except Exception as e:
logging.error(f"Failed to refresh credentials: {e}")
raise ValueError("Authentication failed and cannot be refreshed: missing or invalid refresh_token")
else:
logging.error("No access token and no refresh_token available")
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
if self.credentials.expired:
logging.warning("Credentials are expired, attempting to refresh")
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
try:
from google.auth.transport.requests import Request
self.credentials.refresh(Request())
logging.info("Credentials refreshed successfully")
self._ensure_service()
except Exception as e:
logging.error(f"Failed to refresh expired credentials: {e}")
raise ValueError("Authentication failed and cannot be refreshed: expired credentials")
else:
logging.error("Credentials expired and no refresh_token available")
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
try:
if mime_type in self.EXPORT_FORMATS:
export_mime_type = self.EXPORT_FORMATS[mime_type]
request = self.service.files().export_media(
fileId=file_id,
mimeType=export_mime_type
)
else:
request = self.service.files().get_media(fileId=file_id)
file_io = io.BytesIO()
downloader = MediaIoBaseDownload(file_io, request)
done = False
while done is False:
try:
_, done = downloader.next_chunk()
except HttpError as e:
logging.error(f"HTTP error downloading file {file_id}: {e.resp.status} - {e.content}")
return None
except Exception as e:
logging.error(f"Error during download of file {file_id}: {e}")
return None
content_bytes = file_io.getvalue()
try:
content = content_bytes.decode('utf-8')
except UnicodeDecodeError:
try:
content = content_bytes.decode('latin-1')
except UnicodeDecodeError:
logging.error(f"Could not decode file {file_id} as text")
return None
return content
except HttpError as e:
logging.error(f"HTTP error downloading file {file_id}: {e.resp.status} - {e.content}")
if e.resp.status in [401, 403]:
logging.error(f"Authentication error downloading file {file_id}")
if hasattr(self.credentials, 'refresh_token') and self.credentials.refresh_token:
logging.info(f"Attempting to refresh credentials for file {file_id}")
try:
from google.auth.transport.requests import Request
self.credentials.refresh(Request())
logging.info("Credentials refreshed successfully")
self._credential_refreshed = True
self._ensure_service()
return None
except Exception as refresh_error:
logging.error(f"Error refreshing credentials: {refresh_error}")
raise ValueError(f"Authentication failed and could not be refreshed: {refresh_error}")
else:
logging.error("Cannot refresh credentials: missing refresh_token")
raise ValueError("Authentication failed and cannot be refreshed: missing refresh_token")
return None
except Exception as e:
logging.error(f"Error downloading file {file_id}: {e}")
return None
def _download_file_to_directory(self, file_id: str, local_dir: str) -> bool:
try:
self._ensure_service()
return self._download_single_file(file_id, local_dir)
except Exception as e:
logging.error(f"Error downloading file {file_id}: {e}", exc_info=True)
return False
def _ensure_service(self):
if not self.service:
try:
self.service = self.auth.build_drive_service(self.credentials)
except Exception as e:
raise ValueError(f"Cannot access Google Drive: {e}")
def _download_single_file(self, file_id: str, local_dir: str) -> bool:
file_metadata = self.service.files().get(
fileId=file_id,
fields='name,mimeType'
).execute()
file_name = file_metadata['name']
mime_type = file_metadata['mimeType']
if mime_type not in self.SUPPORTED_MIME_TYPES and not mime_type.startswith('application/vnd.google-apps.'):
return False
os.makedirs(local_dir, exist_ok=True)
full_path = os.path.join(local_dir, file_name)
if mime_type in self.EXPORT_FORMATS:
export_mime_type = self.EXPORT_FORMATS[mime_type]
request = self.service.files().export_media(
fileId=file_id,
mimeType=export_mime_type
)
extension = self._get_extension_for_mime_type(export_mime_type)
if not full_path.endswith(extension):
full_path += extension
else:
request = self.service.files().get_media(fileId=file_id)
with open(full_path, 'wb') as f:
downloader = MediaIoBaseDownload(f, request)
done = False
while not done:
_, done = downloader.next_chunk()
return True
def _download_folder_recursive(self, folder_id: str, local_dir: str, recursive: bool = True) -> int:
files_downloaded = 0
try:
os.makedirs(local_dir, exist_ok=True)
query = f"'{folder_id}' in parents and trashed=false"
page_token = None
while True:
results = self.service.files().list(
q=query,
fields='nextPageToken, files(id, name, mimeType)',
pageToken=page_token,
pageSize=1000
).execute()
items = results.get('files', [])
logging.info(f"Found {len(items)} items in folder {folder_id}")
for item in items:
item_name = item['name']
item_id = item['id']
mime_type = item['mimeType']
if mime_type == 'application/vnd.google-apps.folder':
if recursive:
# Create subfolder and recurse
subfolder_path = os.path.join(local_dir, item_name)
os.makedirs(subfolder_path, exist_ok=True)
subfolder_files = self._download_folder_recursive(
item_id,
subfolder_path,
recursive
)
files_downloaded += subfolder_files
logging.info(f"Downloaded {subfolder_files} files from subfolder {item_name}")
else:
# Download file
success = self._download_single_file(item_id, local_dir)
if success:
files_downloaded += 1
logging.info(f"Downloaded file: {item_name}")
else:
logging.warning(f"Failed to download file: {item_name}")
page_token = results.get('nextPageToken')
if not page_token:
break
return files_downloaded
except Exception as e:
logging.error(f"Error in _download_folder_recursive for folder {folder_id}: {e}", exc_info=True)
return files_downloaded
def _get_extension_for_mime_type(self, mime_type: str) -> str:
extensions = {
'application/pdf': '.pdf',
'text/plain': '.txt',
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
'application/vnd.openxmlformats-officedocument.presentationml.presentation': '.pptx',
'text/html': '.html',
'text/markdown': '.md',
}
return extensions.get(mime_type, '.bin')
def _download_folder_contents(self, folder_id: str, local_dir: str, recursive: bool = True) -> int:
try:
self._ensure_service()
return self._download_folder_recursive(folder_id, local_dir, recursive)
except Exception as e:
logging.error(f"Error downloading folder {folder_id}: {e}", exc_info=True)
return 0
def download_to_directory(self, local_dir: str, source_config: dict = None) -> dict:
if source_config is None:
source_config = {}
config = source_config if source_config else getattr(self, 'config', {})
files_downloaded = 0
try:
folder_ids = config.get('folder_ids', [])
file_ids = config.get('file_ids', [])
recursive = config.get('recursive', True)
self._ensure_service()
if file_ids:
if isinstance(file_ids, str):
file_ids = [file_ids]
for file_id in file_ids:
if self._download_file_to_directory(file_id, local_dir):
files_downloaded += 1
# Process folders
if folder_ids:
if isinstance(folder_ids, str):
folder_ids = [folder_ids]
for folder_id in folder_ids:
try:
folder_metadata = self.service.files().get(
fileId=folder_id,
fields='name'
).execute()
folder_name = folder_metadata.get('name', '')
folder_path = os.path.join(local_dir, folder_name)
os.makedirs(folder_path, exist_ok=True)
folder_files = self._download_folder_recursive(
folder_id,
folder_path,
recursive
)
files_downloaded += folder_files
logging.info(f"Downloaded {folder_files} files from folder {folder_name}")
except Exception as e:
logging.error(f"Error downloading folder {folder_id}: {e}", exc_info=True)
if not file_ids and not folder_ids:
raise ValueError("No folder_ids or file_ids provided for download")
return {
"files_downloaded": files_downloaded,
"directory_path": local_dir,
"empty_result": files_downloaded == 0,
"source_type": "google_drive",
"config_used": config
}
except Exception as e:
return {
"files_downloaded": files_downloaded,
"directory_path": local_dir,
"empty_result": True,
"source_type": "google_drive",
"config_used": config,
"error": str(e)
}

View File

@@ -6,6 +6,21 @@ from application.core.settings import settings
from application.vectorstore.vector_creator import VectorCreator
def sanitize_content(content: str) -> str:
"""
Remove NUL characters that can cause vector store ingestion to fail.
Args:
content (str): Raw content that may contain NUL characters
Returns:
str: Sanitized content with NUL characters removed
"""
if not content:
return content
return content.replace('\x00', '')
@retry(tries=10, delay=60)
def add_text_to_store_with_retry(store, doc, source_id):
"""
@@ -16,6 +31,9 @@ def add_text_to_store_with_retry(store, doc, source_id):
source_id: Unique identifier for the source.
"""
try:
# Sanitize content to remove NUL characters that cause ingestion failures
doc.page_content = sanitize_content(doc.page_content)
doc.metadata["source_id"] = str(source_id)
store.add_texts([doc.page_content], metadatas=[doc.metadata])
except Exception as e:

View File

@@ -6,6 +6,16 @@ from application.parser.remote.github_loader import GitHubLoader
class RemoteCreator:
"""
Factory class for creating remote content loaders.
These loaders fetch content from remote web sources like URLs,
sitemaps, web crawlers, social media platforms, etc.
For external knowledge base connectors (like Google Drive),
use ConnectorCreator instead.
"""
loaders = {
"url": WebLoader,
"sitemap": SitemapLoader,
@@ -18,5 +28,5 @@ class RemoteCreator:
def create_loader(cls, type, *args, **kwargs):
loader_class = cls.loaders.get(type.lower())
if not loader_class:
raise ValueError(f"No LLM class found for type {type}")
raise ValueError(f"No loader class found for type {type}")
return loader_class(*args, **kwargs)

View File

@@ -13,6 +13,9 @@ Flask==3.1.1
faiss-cpu==1.9.0.post1
flask-restx==1.3.0
google-genai==1.3.0
google-api-python-client==2.179.0
google-auth-httplib2==0.2.0
google-auth-oauthlib==1.2.2
gTTS==2.5.4
gunicorn==23.0.0
javalang==0.13.0

View File

@@ -6,6 +6,7 @@ import os
import shutil
import string
import tempfile
from typing import Any, Dict
import zipfile
from collections import Counter
@@ -21,6 +22,7 @@ from application.api.answer.services.stream_processor import get_prompt
from application.core.mongo_db import MongoDB
from application.core.settings import settings
from application.parser.chunking import Chunker
from application.parser.connectors.connector_creator import ConnectorCreator
from application.parser.embedding_pipeline import embed_and_store_documents
from application.parser.file.bulk import SimpleDirectoryReader
from application.parser.remote.remote_creator import RemoteCreator
@@ -649,8 +651,11 @@ def remote_worker(
"id": str(id),
"type": loader,
"remote_data": source_data,
"sync_frequency": sync_frequency,
"sync_frequency": sync_frequency
}
if operation_mode == "sync":
file_data["last_sync"] = datetime.datetime.now()
upload_index(full_path, file_data)
except Exception as e:
logging.error("Error in remote_worker task: %s", str(e), exc_info=True)
@@ -707,7 +712,7 @@ def sync_worker(self, frequency):
self, source_data, name, user, source_type, frequency, retriever, doc_id
)
sync_counts["total_sync_count"] += 1
sync_counts[
sync_counts[
"sync_success" if resp["status"] == "success" else "sync_failure"
] += 1
return {
@@ -744,7 +749,7 @@ def attachment_worker(self, file_info, user):
input_files=[local_path], exclude_hidden=True, errors="ignore"
)
.load_data()[0]
.text,
.text,
)
@@ -835,3 +840,174 @@ def agent_webhook_worker(self, agent_id, payload):
f"Webhook processed for agent {agent_id}", extra={"agent_id": agent_id}
)
return {"status": "success", "result": result}
def ingest_connector(
self,
job_name: str,
user: str,
source_type: str,
session_token=None,
file_ids=None,
folder_ids=None,
recursive=True,
retriever: str = "classic",
operation_mode: str = "upload",
doc_id=None,
sync_frequency: str = "never",
) -> Dict[str, Any]:
"""
Ingestion for internal knowledge bases (GoogleDrive, etc.).
Args:
job_name: Name of the ingestion job
user: User identifier
source_type: Type of remote source ("google_drive", "dropbox", etc.)
session_token: Authentication token for the service
file_ids: List of file IDs to download
folder_ids: List of folder IDs to download
recursive: Whether to recursively download folders
retriever: Type of retriever to use
operation_mode: "upload" for initial ingestion, "sync" for incremental sync
doc_id: Document ID for sync operations (required when operation_mode="sync")
sync_frequency: How often to sync ("never", "daily", "weekly", "monthly")
"""
logging.info(f"Starting remote ingestion from {source_type} for user: {user}, job: {job_name}")
self.update_state(state="PROGRESS", meta={"current": 1})
with tempfile.TemporaryDirectory() as temp_dir:
try:
# Step 1: Initialize the appropriate loader
self.update_state(state="PROGRESS", meta={"current": 10, "status": "Initializing connector"})
if not session_token:
raise ValueError(f"{source_type} connector requires session_token")
if not ConnectorCreator.is_supported(source_type):
raise ValueError(f"Unsupported connector type: {source_type}. Supported types: {ConnectorCreator.get_supported_connectors()}")
remote_loader = ConnectorCreator.create_connector(source_type, session_token)
# Create a clean config for storage
api_source_config = {
"file_ids": file_ids or [],
"folder_ids": folder_ids or [],
"recursive": recursive
}
# Step 2: Download files to temp directory
self.update_state(state="PROGRESS", meta={"current": 20, "status": "Downloading files"})
download_info = remote_loader.download_to_directory(
temp_dir,
api_source_config
)
if download_info.get("empty_result", False) or not download_info.get("files_downloaded", 0):
logging.warning(f"No files were downloaded from {source_type}")
# Create empty result directly instead of calling a separate method
return {
"name": job_name,
"user": user,
"tokens": 0,
"type": source_type,
"source_config": api_source_config,
"directory_structure": "{}",
}
# Step 3: Use SimpleDirectoryReader to process downloaded files
self.update_state(state="PROGRESS", meta={"current": 40, "status": "Processing files"})
reader = SimpleDirectoryReader(
input_dir=temp_dir,
recursive=True,
required_exts=[
".rst", ".md", ".pdf", ".txt", ".docx", ".csv", ".epub",
".html", ".mdx", ".json", ".xlsx", ".pptx", ".png",
".jpg", ".jpeg",
],
exclude_hidden=True,
file_metadata=metadata_from_filename,
)
raw_docs = reader.load_data()
directory_structure = getattr(reader, 'directory_structure', {})
# Step 4: Process documents (chunking, embedding, etc.)
self.update_state(state="PROGRESS", meta={"current": 60, "status": "Processing documents"})
chunker = Chunker(
chunking_strategy="classic_chunk",
max_tokens=MAX_TOKENS,
min_tokens=MIN_TOKENS,
duplicate_headers=False,
)
raw_docs = chunker.chunk(documents=raw_docs)
# Preserve source information in document metadata
for doc in raw_docs:
if hasattr(doc, 'extra_info') and doc.extra_info:
source = doc.extra_info.get('source')
if source and os.path.isabs(source):
# Convert absolute path to relative path
doc.extra_info['source'] = os.path.relpath(source, start=temp_dir)
docs = [Document.to_langchain_format(raw_doc) for raw_doc in raw_docs]
if operation_mode == "upload":
id = ObjectId()
elif operation_mode == "sync":
if not doc_id or not ObjectId.is_valid(doc_id):
logging.error("Invalid doc_id provided for sync operation: %s", doc_id)
raise ValueError("doc_id must be provided for sync operation.")
id = ObjectId(doc_id)
else:
raise ValueError(f"Invalid operation_mode: {operation_mode}")
vector_store_path = os.path.join(temp_dir, "vector_store")
os.makedirs(vector_store_path, exist_ok=True)
self.update_state(state="PROGRESS", meta={"current": 80, "status": "Storing documents"})
embed_and_store_documents(docs, vector_store_path, id, self)
tokens = count_tokens_docs(docs)
# Step 6: Upload index files
file_data = {
"user": user,
"name": job_name,
"tokens": tokens,
"retriever": retriever,
"id": str(id),
"type": "connector",
"remote_data": json.dumps({
"provider": source_type,
**api_source_config
}),
"directory_structure": json.dumps(directory_structure),
"sync_frequency": sync_frequency
}
if operation_mode == "sync":
file_data["last_sync"] = datetime.datetime.now()
else:
file_data["last_sync"] = datetime.datetime.now()
upload_index(vector_store_path, file_data)
# Ensure we mark the task as complete
self.update_state(state="PROGRESS", meta={"current": 100, "status": "Complete"})
logging.info(f"Remote ingestion completed: {job_name}")
return {
"user": user,
"name": job_name,
"tokens": tokens,
"type": source_type,
"id": str(id),
"status": "complete"
}
except Exception as e:
logging.error(f"Error during remote ingestion: {e}", exc_info=True)
raise

View File

@@ -0,0 +1,74 @@
name: docsgpt-oss
services:
frontend:
image: arc53/docsgpt-fe:develop
environment:
- VITE_API_HOST=http://localhost:7091
- VITE_API_STREAMING=$VITE_API_STREAMING
ports:
- "5173:5173"
depends_on:
- backend
backend:
user: root
image: arc53/docsgpt:develop
environment:
- API_KEY=$API_KEY
- EMBEDDINGS_KEY=$API_KEY
- LLM_PROVIDER=$LLM_PROVIDER
- LLM_NAME=$LLM_NAME
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/1
- MONGO_URI=mongodb://mongo:27017/docsgpt
- CACHE_REDIS_URL=redis://redis:6379/2
- OPENAI_BASE_URL=$OPENAI_BASE_URL
ports:
- "7091:7091"
volumes:
- ../application/indexes:/app/indexes
- ../application/inputs:/app/inputs
- ../application/vectors:/app/vectors
depends_on:
- redis
- mongo
worker:
user: root
image: arc53/docsgpt:develop
command: celery -A application.app.celery worker -l INFO -B
environment:
- API_KEY=$API_KEY
- EMBEDDINGS_KEY=$API_KEY
- LLM_PROVIDER=$LLM_PROVIDER
- LLM_NAME=$LLM_NAME
- CELERY_BROKER_URL=redis://redis:6379/0
- CELERY_RESULT_BACKEND=redis://redis:6379/1
- MONGO_URI=mongodb://mongo:27017/docsgpt
- API_URL=http://backend:7091
- CACHE_REDIS_URL=redis://redis:6379/2
volumes:
- ../application/indexes:/app/indexes
- ../application/inputs:/app/inputs
- ../application/vectors:/app/vectors
depends_on:
- redis
- mongo
redis:
image: redis:6-alpine
ports:
- 6379:6379
mongo:
image: mongo:6
ports:
- 27017:27017
volumes:
- mongodb_data_container:/data/db
volumes:
mongodb_data_container:

View File

@@ -5,6 +5,8 @@
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover" />
<meta name="apple-mobile-web-app-capable" content="yes">
<meta name="theme-color" content="#fbfbfb" media="(prefers-color-scheme: light)" />
<meta name="theme-color" content="#161616" media="(prefers-color-scheme: dark)" />
<title>DocsGPT</title>
<link rel="shortcut icon" type="image/x-icon" href="/favicon.ico" />
</head>

View File

@@ -29,7 +29,7 @@ export default function Hero({
</div>
{/* Demo Buttons Section */}
<div className="mb-8 w-full max-w-full md:mb-16">
<div className="mb-3 w-full max-w-full md:mb-3">
<div className="grid grid-cols-1 gap-3 text-xs md:grid-cols-1 md:gap-4 lg:grid-cols-2">
{demos?.map(
(demo: { header: string; query: string }, key: number) =>

View File

@@ -586,7 +586,7 @@ export default function NewAgent({ mode }: { mode: 'new' | 'edit' | 'draft' }) {
onChange={(e) => setAgent({ ...agent, name: e.target.value })}
/>
<textarea
className="border-silver text-jet dark:bg-raisin-black dark:text-bright-gray dark:placeholder:text-silver mt-3 h-32 w-full rounded-3xl border bg-white px-5 py-4 text-sm outline-hidden placeholder:text-gray-400 dark:border-[#7E7E7E]"
className="border-silver text-jet dark:bg-raisin-black dark:text-bright-gray dark:placeholder:text-silver mt-3 h-32 w-full rounded-xl border bg-white px-5 py-4 text-sm outline-hidden placeholder:text-gray-400 dark:border-[#7E7E7E]"
placeholder="Describe your agent"
value={agent.description}
onChange={(e) =>

View File

@@ -38,6 +38,7 @@ const endpoints = {
UPDATE_TOOL_STATUS: '/api/update_tool_status',
UPDATE_TOOL: '/api/update_tool',
DELETE_TOOL: '/api/delete_tool',
SYNC_CONNECTOR: '/api/connectors/sync',
GET_CHUNKS: (
docId: string,
page: number,

View File

@@ -1,5 +1,6 @@
import apiClient from '../client';
import endpoints from '../endpoints';
import { getSessionToken } from '../../utils/providerUtils';
const userService = {
getConfig: (): Promise<any> => apiClient.get(endpoints.USER.CONFIG, null),
@@ -89,7 +90,10 @@ const userService = {
path?: string,
search?: string,
): Promise<any> =>
apiClient.get(endpoints.USER.GET_CHUNKS(docId, page, perPage, path, search), token),
apiClient.get(
endpoints.USER.GET_CHUNKS(docId, page, perPage, path, search),
token,
),
addChunk: (data: any, token: string | null): Promise<any> =>
apiClient.post(endpoints.USER.ADD_CHUNK, data, token),
deleteChunk: (
@@ -104,6 +108,22 @@ const userService = {
apiClient.get(endpoints.USER.DIRECTORY_STRUCTURE(docId), token),
manageSourceFiles: (data: FormData, token: string | null): Promise<any> =>
apiClient.postFormData(endpoints.USER.MANAGE_SOURCE_FILES, data, token),
syncConnector: (
docId: string,
provider: string,
token: string | null,
): Promise<any> => {
const sessionToken = getSessionToken(provider);
return apiClient.post(
endpoints.USER.SYNC_CONNECTOR,
{
source_id: docId,
session_token: sessionToken,
provider: provider,
},
token,
);
},
};
export default userService;

View File

@@ -1 +1 @@
<svg width="16px" height="16px" viewBox="0 0 1024 1024" class="icon" version="1.1" xmlns="http://www.w3.org/2000/svg" fill="#11ee1c" stroke="#11ee1c" stroke-width="83.96799999999999"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"><path d="M866.133333 258.133333L362.666667 761.6l-204.8-204.8L98.133333 618.666667 362.666667 881.066667l563.2-563.2z" fill="#11ee1c"></path></g></svg>
<svg width="16px" height="16px" viewBox="0 0 1024 1024" class="icon" version="1.1" xmlns="http://www.w3.org/2000/svg" fill="#11ee1c" stroke="#11ee1c" stroke-width="83.96799999999999"><g id="SVGRepo_bgCarrier" stroke-width="0"></g><g id="SVGRepo_tracerCarrier" stroke-linecap="round" stroke-linejoin="round"></g><g id="SVGRepo_iconCarrier"><path d="M866.133333 258.133333L362.666667 761.6l-204.8-204.8L98.133333 618.666667 362.666667 881.066667l563.2-563.2z" fill="#0C9D35"></path></g></svg>

Before

Width:  |  Height:  |  Size: 490 B

After

Width:  |  Height:  |  Size: 490 B

View File

@@ -1 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24"><path fill="white" d="M10.72,19.9a8,8,0,0,1-6.5-9.79A7.77,7.77,0,0,1,10.4,4.16a8,8,0,0,1,9.49,6.52A1.54,1.54,0,0,0,21.38,12h.13a1.37,1.37,0,0,0,1.38-1.54,11,11,0,1,0-12.7,12.39A1.54,1.54,0,0,0,12,21.34h0A1.47,1.47,0,0,0,10.72,19.9Z"><animateTransform attributeName="transform" dur="0.75s" repeatCount="indefinite" type="rotate" values="0 12 12;360 12 12"/></path></svg>
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24"><path fill="black" d="M10.72,19.9a8,8,0,0,1-6.5-9.79A7.77,7.77,0,0,1,10.4,4.16a8,8,0,0,1,9.49,6.52A1.54,1.54,0,0,0,21.38,12h.13a1.37,1.37,0,0,0,1.38-1.54,11,11,0,1,0-12.7,12.39A1.54,1.54,0,0,0,12,21.34h0A1.47,1.47,0,0,0,10.72,19.9Z"><animateTransform attributeName="transform" dur="0.75s" repeatCount="indefinite" type="rotate" values="0 12 12;360 12 12"/></path></svg>

Before

Width:  |  Height:  |  Size: 454 B

After

Width:  |  Height:  |  Size: 454 B

View File

@@ -1 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24"><path fill="black" d="M10.72,19.9a8,8,0,0,1-6.5-9.79A7.77,7.77,0,0,1,10.4,4.16a8,8,0,0,1,9.49,6.52A1.54,1.54,0,0,0,21.38,12h.13a1.37,1.37,0,0,0,1.38-1.54,11,11,0,1,0-12.7,12.39A1.54,1.54,0,0,0,12,21.34h0A1.47,1.47,0,0,0,10.72,19.9Z"><animateTransform attributeName="transform" dur="0.75s" repeatCount="indefinite" type="rotate" values="0 12 12;360 12 12"/></path></svg>
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24"><path fill="white" d="M10.72,19.9a8,8,0,0,1-6.5-9.79A7.77,7.77,0,0,1,10.4,4.16a8,8,0,0,1,9.49,6.52A1.54,1.54,0,0,0,21.38,12h.13a1.37,1.37,0,0,0,1.38-1.54,11,11,0,1,0-12.7,12.39A1.54,1.54,0,0,0,12,21.34h0A1.47,1.47,0,0,0,10.72,19.9Z"><animateTransform attributeName="transform" dur="0.75s" repeatCount="indefinite" type="rotate" values="0 12 12;360 12 12"/></path></svg>

Before

Width:  |  Height:  |  Size: 454 B

After

Width:  |  Height:  |  Size: 454 B

View File

@@ -0,0 +1,130 @@
import React, { useRef } from 'react';
import { useSelector } from 'react-redux';
import { selectToken } from '../preferences/preferenceSlice';
interface ConnectorAuthProps {
provider: string;
onSuccess: (data: { session_token: string; user_email: string }) => void;
onError: (error: string) => void;
label?: string;
}
const providerLabel = (provider: string) => {
const map: Record<string, string> = {
google_drive: 'Google Drive',
};
return map[provider] || provider.replace(/_/g, ' ');
};
const ConnectorAuth: React.FC<ConnectorAuthProps> = ({
provider,
onSuccess,
onError,
label,
}) => {
const token = useSelector(selectToken);
const completedRef = useRef(false);
const intervalRef = useRef<number | null>(null);
const cleanup = () => {
if (intervalRef.current) {
clearInterval(intervalRef.current);
intervalRef.current = null;
}
window.removeEventListener('message', handleAuthMessage as any);
};
const handleAuthMessage = (event: MessageEvent) => {
const successGeneric = event.data?.type === 'connector_auth_success';
const successProvider =
event.data?.type === `${provider}_auth_success` ||
event.data?.type === 'google_drive_auth_success';
const errorProvider =
event.data?.type === `${provider}_auth_error` ||
event.data?.type === 'google_drive_auth_error';
if (successGeneric || successProvider) {
completedRef.current = true;
cleanup();
onSuccess({
session_token: event.data.session_token,
user_email: event.data.user_email || 'Connected User',
});
} else if (errorProvider) {
completedRef.current = true;
cleanup();
onError(event.data.error || 'Authentication failed');
}
};
const handleAuth = async () => {
try {
completedRef.current = false;
cleanup();
const apiHost = import.meta.env.VITE_API_HOST;
const authResponse = await fetch(
`${apiHost}/api/connectors/auth?provider=${provider}`,
{
headers: { Authorization: `Bearer ${token}` },
},
);
if (!authResponse.ok) {
throw new Error(
`Failed to get authorization URL: ${authResponse.status}`,
);
}
const authData = await authResponse.json();
if (!authData.success || !authData.authorization_url) {
throw new Error(authData.error || 'Failed to get authorization URL');
}
const authWindow = window.open(
authData.authorization_url,
`${provider}-auth`,
'width=500,height=600,scrollbars=yes,resizable=yes',
);
if (!authWindow) {
throw new Error(
'Failed to open authentication window. Please allow popups.',
);
}
window.addEventListener('message', handleAuthMessage as any);
const checkClosed = window.setInterval(() => {
if (authWindow.closed) {
clearInterval(checkClosed);
window.removeEventListener('message', handleAuthMessage as any);
if (!completedRef.current) {
onError('Authentication was cancelled');
}
}
}, 1000);
intervalRef.current = checkClosed;
} catch (error) {
onError(error instanceof Error ? error.message : 'Authentication failed');
}
};
const buttonLabel = label || `Connect ${providerLabel(provider)}`;
return (
<button
onClick={handleAuth}
className="flex w-full items-center justify-center gap-2 rounded-lg bg-blue-500 px-4 py-3 text-white transition-colors hover:bg-blue-600"
>
<svg className="h-5 w-5" viewBox="0 0 24 24">
<path
fill="currentColor"
d="M6.28 3l5.72 10H24l-5.72-10H6.28zm11.44 0L12 13l5.72 10H24L18.28 3h-.56zM0 13l5.72 10h5.72L5.72 13H0z"
/>
</svg>
{buttonLabel}
</button>
);
};
export default ConnectorAuth;

View File

@@ -0,0 +1,731 @@
import React, { useState, useRef, useEffect } from 'react';
import { useTranslation } from 'react-i18next';
import { useSelector } from 'react-redux';
import { formatBytes } from '../utils/stringUtils';
import { selectToken } from '../preferences/preferenceSlice';
import Chunks from './Chunks';
import ContextMenu, { MenuOption } from './ContextMenu';
import userService from '../api/services/userService';
import FileIcon from '../assets/file.svg';
import FolderIcon from '../assets/folder.svg';
import ArrowLeft from '../assets/arrow-left.svg';
import ThreeDots from '../assets/three-dots.svg';
import EyeView from '../assets/eye-view.svg';
import SyncIcon from '../assets/sync.svg';
import { useOutsideAlerter } from '../hooks';
interface FileNode {
type?: string;
token_count?: number;
size_bytes?: number;
[key: string]: any;
}
interface DirectoryStructure {
[key: string]: FileNode;
}
interface ConnectorTreeComponentProps {
docId: string;
sourceName: string;
onBackToDocuments: () => void;
}
interface SearchResult {
name: string;
path: string;
isFile: boolean;
}
const ConnectorTreeComponent: React.FC<ConnectorTreeComponentProps> = ({
docId,
sourceName,
onBackToDocuments,
}) => {
const { t } = useTranslation();
const [loading, setLoading] = useState<boolean>(true);
const [error, setError] = useState<string | null>(null);
const [directoryStructure, setDirectoryStructure] =
useState<DirectoryStructure | null>(null);
const [currentPath, setCurrentPath] = useState<string[]>([]);
const token = useSelector(selectToken);
const [activeMenuId, setActiveMenuId] = useState<string | null>(null);
const menuRefs = useRef<{
[key: string]: React.RefObject<HTMLDivElement | null>;
}>({});
const [selectedFile, setSelectedFile] = useState<{
id: string;
name: string;
} | null>(null);
const [searchQuery, setSearchQuery] = useState('');
const [searchResults, setSearchResults] = useState<SearchResult[]>([]);
const searchDropdownRef = useRef<HTMLDivElement>(null);
const [isSyncing, setIsSyncing] = useState<boolean>(false);
const [syncProgress, setSyncProgress] = useState<number>(0);
const [sourceProvider, setSourceProvider] = useState<string>('');
const [syncDone, setSyncDone] = useState<boolean>(false);
useOutsideAlerter(
searchDropdownRef,
() => {
setSearchQuery('');
setSearchResults([]);
},
[],
false,
);
const handleFileClick = (fileName: string) => {
const fullPath = [...currentPath, fileName].join('/');
setSelectedFile({
id: fullPath,
name: fileName,
});
};
const handleSync = async () => {
if (isSyncing) return;
const provider = sourceProvider;
setIsSyncing(true);
setSyncProgress(0);
try {
const response = await userService.syncConnector(docId, provider, token);
const data = await response.json();
if (data.success) {
console.log('Sync started successfully:', data.task_id);
setSyncProgress(10);
// Poll task status using userService
const maxAttempts = 30;
const pollInterval = 2000;
for (let attempt = 0; attempt < maxAttempts; attempt++) {
try {
const statusResponse = await userService.getTaskStatus(
data.task_id,
token,
);
const statusData = await statusResponse.json();
console.log(
`Task status (attempt ${attempt + 1}):`,
statusData.status,
);
if (statusData.status === 'SUCCESS') {
setSyncProgress(100);
console.log('Sync completed successfully');
// Refresh directory structure
try {
const refreshResponse = await userService.getDirectoryStructure(
docId,
token,
);
const refreshData = await refreshResponse.json();
if (refreshData && refreshData.directory_structure) {
setDirectoryStructure(refreshData.directory_structure);
setCurrentPath([]);
}
if (refreshData && refreshData.provider) {
setSourceProvider(refreshData.provider);
}
setSyncDone(true);
setTimeout(() => setSyncDone(false), 5000);
} catch (err) {
console.error('Error refreshing directory structure:', err);
}
break;
} else if (statusData.status === 'FAILURE') {
console.error('Sync task failed:', statusData.result);
break;
} else if (statusData.status === 'PROGRESS') {
const progress = Number(
statusData.result && statusData.result.current != null
? statusData.result.current
: statusData.meta && statusData.meta.current != null
? statusData.meta.current
: 0,
);
setSyncProgress(Math.max(10, progress));
}
await new Promise((resolve) => setTimeout(resolve, pollInterval));
} catch (error) {
console.error('Error polling task status:', error);
break;
}
}
} else {
console.error('Sync failed:', data.error);
}
} catch (err) {
console.error('Error syncing connector:', err);
} finally {
setIsSyncing(false);
setSyncProgress(0);
}
};
useEffect(() => {
const fetchDirectoryStructure = async () => {
try {
setLoading(true);
const directoryResponse = await userService.getDirectoryStructure(
docId,
token,
);
const directoryData = await directoryResponse.json();
if (directoryData && directoryData.directory_structure) {
setDirectoryStructure(directoryData.directory_structure);
} else {
setError('Invalid response format');
}
if (directoryData && directoryData.provider) {
setSourceProvider(directoryData.provider);
}
} catch (err) {
setError('Failed to load source information');
console.error(err);
} finally {
setLoading(false);
}
};
if (docId) {
fetchDirectoryStructure();
}
}, [docId, token]);
const navigateToDirectory = (dirName: string) => {
setCurrentPath([...currentPath, dirName]);
};
const navigateUp = () => {
setCurrentPath(currentPath.slice(0, -1));
};
const getCurrentDirectory = (): DirectoryStructure => {
if (!directoryStructure) return {};
let current = directoryStructure;
for (const dir of currentPath) {
if (current[dir] && !current[dir].type) {
current = current[dir] as DirectoryStructure;
} else {
return {};
}
}
return current;
};
const getMenuRef = (id: string) => {
if (!menuRefs.current[id]) {
menuRefs.current[id] = React.createRef();
}
return menuRefs.current[id];
};
const handleMenuClick = (
e: React.MouseEvent<HTMLButtonElement>,
id: string,
) => {
e.stopPropagation();
setActiveMenuId(activeMenuId === id ? null : id);
};
const getActionOptions = (
name: string,
isFile: boolean,
_itemId: string,
): MenuOption[] => {
const options: MenuOption[] = [];
options.push({
icon: EyeView,
label: t('settings.sources.view'),
onClick: (event: React.SyntheticEvent) => {
event.stopPropagation();
if (isFile) {
handleFileClick(name);
} else {
navigateToDirectory(name);
}
},
iconWidth: 18,
iconHeight: 18,
variant: 'primary',
});
return options;
};
const calculateDirectoryStats = (
structure: DirectoryStructure,
): { totalSize: number; totalTokens: number } => {
let totalSize = 0;
let totalTokens = 0;
Object.entries(structure).forEach(([_, node]) => {
if (node.type) {
// It's a file
totalSize += node.size_bytes || 0;
totalTokens += node.token_count || 0;
} else {
// It's a directory, recurse
const stats = calculateDirectoryStats(node);
totalSize += stats.totalSize;
totalTokens += stats.totalTokens;
}
});
return { totalSize, totalTokens };
};
const handleBackNavigation = () => {
if (selectedFile) {
setSelectedFile(null);
} else if (currentPath.length === 0) {
if (onBackToDocuments) {
onBackToDocuments();
}
} else {
navigateUp();
}
};
const renderPathNavigation = () => {
return (
<div className="mb-0 flex min-h-[38px] flex-col gap-2 text-base sm:flex-row sm:items-center sm:justify-between">
{/* Left side with path navigation */}
<div className="flex w-full items-center sm:w-auto">
<button
className="mr-3 flex h-[29px] w-[29px] items-center justify-center rounded-full border p-2 text-sm font-medium text-gray-400 dark:border-0 dark:bg-[#28292D] dark:text-gray-500 dark:hover:bg-[#2E2F34]"
onClick={handleBackNavigation}
>
<img src={ArrowLeft} alt="left-arrow" className="h-3 w-3" />
</button>
<div className="flex flex-wrap items-center">
<span className="font-semibold break-words text-[#7D54D1]">
{sourceName}
</span>
{currentPath.length > 0 && (
<>
<span className="mx-1 flex-shrink-0 text-gray-500">/</span>
{currentPath.map((dir, index) => (
<React.Fragment key={index}>
<span className="break-words text-gray-700 dark:text-[#E0E0E0]">
{dir}
</span>
{index < currentPath.length - 1 && (
<span className="mx-1 flex-shrink-0 text-gray-500">
/
</span>
)}
</React.Fragment>
))}
</>
)}
</div>
</div>
<div className="relative mt-2 flex w-full flex-row flex-nowrap items-center justify-end gap-2 sm:mt-0 sm:w-auto">
{renderFileSearch()}
{/* Sync button */}
<button
onClick={handleSync}
disabled={isSyncing}
className={`flex h-[38px] min-w-[108px] items-center justify-center rounded-full px-4 text-[14px] font-medium whitespace-nowrap transition-colors ${
isSyncing
? 'cursor-not-allowed bg-gray-300 text-gray-600 dark:bg-gray-600 dark:text-gray-400'
: 'bg-purple-30 hover:bg-violets-are-blue text-white'
}`}
title={
isSyncing
? `${t('settings.sources.syncing')} ${syncProgress}%`
: syncDone
? 'Done'
: t('settings.sources.sync')
}
>
<img
src={SyncIcon}
alt={t('settings.sources.sync')}
className={`mr-2 h-4 w-4 brightness-0 invert filter ${isSyncing ? 'animate-spin' : ''}`}
/>
{isSyncing
? `${syncProgress}%`
: syncDone
? 'Done'
: t('settings.sources.sync')}
</button>
</div>
</div>
);
};
const renderFileTree = (directory: DirectoryStructure) => {
if (!directory) return [];
// Create parent directory row
const parentRow =
currentPath.length > 0
? [
<tr
key="parent-dir"
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
onClick={navigateUp}
>
<td className="px-2 py-2 lg:px-4">
<div className="flex items-center">
<img
src={FolderIcon}
alt={t('settings.sources.parentFolderAlt')}
className="mr-2 h-4 w-4 flex-shrink-0"
/>
<span className="truncate text-sm dark:text-[#E0E0E0]">
..
</span>
</div>
</td>
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
-
</td>
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
-
</td>
<td className="w-10 px-2 py-2 text-sm lg:px-4"></td>
</tr>,
]
: [];
// Sort entries: directories first, then files, both alphabetically
const sortedEntries = Object.entries(directory).sort(
([nameA, nodeA], [nameB, nodeB]) => {
const isFileA = !!nodeA.type;
const isFileB = !!nodeB.type;
if (isFileA !== isFileB) {
return isFileA ? 1 : -1; // Directories first
}
return nameA.localeCompare(nameB); // Alphabetical within each group
},
);
// Process directories
const directoryRows = sortedEntries
.filter(([_, node]) => !node.type)
.map(([name, node]) => {
const itemId = `dir-${name}`;
const menuRef = getMenuRef(itemId);
// Calculate directory stats
const dirStats = calculateDirectoryStats(node as DirectoryStructure);
return (
<tr
key={itemId}
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
onClick={() => navigateToDirectory(name)}
>
<td className="px-2 py-2 lg:px-4">
<div className="flex min-w-0 items-center">
<img
src={FolderIcon}
alt={t('settings.sources.folderAlt')}
className="mr-2 h-4 w-4 flex-shrink-0"
/>
<span className="truncate text-sm dark:text-[#E0E0E0]">
{name}
</span>
</div>
</td>
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
{dirStats.totalTokens > 0
? dirStats.totalTokens.toLocaleString()
: '-'}
</td>
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
{dirStats.totalSize > 0 ? formatBytes(dirStats.totalSize) : '-'}
</td>
<td className="w-10 px-2 py-2 text-sm lg:px-4">
<div ref={menuRef} className="relative">
<button
onClick={(e) => handleMenuClick(e, itemId)}
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
aria-label={t('settings.sources.menuAlt')}
>
<img
src={ThreeDots}
alt={t('settings.sources.menuAlt')}
className="opacity-60 hover:opacity-100"
/>
</button>
<ContextMenu
isOpen={activeMenuId === itemId}
setIsOpen={(isOpen) =>
setActiveMenuId(isOpen ? itemId : null)
}
options={getActionOptions(name, false, itemId)}
anchorRef={menuRef}
position="bottom-left"
offset={{ x: -4, y: 4 }}
/>
</div>
</td>
</tr>
);
});
// Process files
const fileRows = sortedEntries
.filter(([_, node]) => !!node.type)
.map(([name, node]) => {
const itemId = `file-${name}`;
const menuRef = getMenuRef(itemId);
return (
<tr
key={itemId}
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
onClick={() => handleFileClick(name)}
>
<td className="px-2 py-2 lg:px-4">
<div className="flex min-w-0 items-center">
<img
src={FileIcon}
alt={t('settings.sources.fileAlt')}
className="mr-2 h-4 w-4 flex-shrink-0"
/>
<span className="truncate text-sm dark:text-[#E0E0E0]">
{name}
</span>
</div>
</td>
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
{node.token_count?.toLocaleString() || '-'}
</td>
<td className="px-2 py-2 text-sm md:px-4 dark:text-[#E0E0E0]">
{node.size_bytes ? formatBytes(node.size_bytes) : '-'}
</td>
<td className="w-10 px-2 py-2 text-sm lg:px-4">
<div ref={menuRef} className="relative">
<button
onClick={(e) => handleMenuClick(e, itemId)}
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
aria-label={t('settings.sources.menuAlt')}
>
<img
src={ThreeDots}
alt={t('settings.sources.menuAlt')}
className="opacity-60 hover:opacity-100"
/>
</button>
<ContextMenu
isOpen={activeMenuId === itemId}
setIsOpen={(isOpen) =>
setActiveMenuId(isOpen ? itemId : null)
}
options={getActionOptions(name, true, itemId)}
anchorRef={menuRef}
position="bottom-left"
offset={{ x: -4, y: 4 }}
/>
</div>
</td>
</tr>
);
});
return [...parentRow, ...directoryRows, ...fileRows];
};
const searchFiles = (
query: string,
structure: DirectoryStructure,
currentPath: string[] = [],
): SearchResult[] => {
let results: SearchResult[] = [];
Object.entries(structure).forEach(([name, node]) => {
const fullPath = [...currentPath, name].join('/');
if (name.toLowerCase().includes(query.toLowerCase())) {
results.push({
name,
path: fullPath,
isFile: !!node.type,
});
}
if (!node.type) {
// If it's a directory, search recursively
results = [
...results,
...searchFiles(query, node as DirectoryStructure, [
...currentPath,
name,
]),
];
}
});
return results;
};
const handleSearchSelect = (result: SearchResult) => {
if (result.isFile) {
const pathParts = result.path.split('/');
const fileName = pathParts.pop() || '';
setCurrentPath(pathParts);
setSelectedFile({
id: result.path,
name: fileName,
});
} else {
setCurrentPath(result.path.split('/'));
setSelectedFile(null);
}
setSearchQuery('');
setSearchResults([]);
};
const renderFileSearch = () => {
return (
<div className="relative w-52" ref={searchDropdownRef}>
<input
type="text"
value={searchQuery}
onChange={(e) => {
setSearchQuery(e.target.value);
if (directoryStructure) {
setSearchResults(searchFiles(e.target.value, directoryStructure));
}
}}
placeholder={t('settings.sources.searchFiles')}
className={`h-[38px] w-full border border-[#D1D9E0] px-4 py-2 dark:border-[#6A6A6A] ${searchQuery ? 'rounded-t-[24px]' : 'rounded-[24px]'} bg-transparent focus:outline-none dark:text-[#E0E0E0]`}
/>
{searchQuery && (
<div className="absolute top-full right-0 left-0 z-10 max-h-[calc(100vh-200px)] w-full overflow-hidden rounded-b-[12px] border border-t-0 border-[#D1D9E0] bg-white shadow-lg transition-all duration-200 dark:border-[#6A6A6A] dark:bg-[#1F2023]">
<div className="max-h-[calc(100vh-200px)] overflow-x-hidden overflow-y-auto overscroll-contain">
{searchResults.length === 0 ? (
<div className="py-2 text-center text-sm text-gray-500 dark:text-gray-400">
{t('settings.sources.noResults')}
</div>
) : (
searchResults.map((result, index) => (
<div
key={index}
onClick={() => handleSearchSelect(result)}
title={result.path}
className={`flex min-w-0 cursor-pointer items-center px-3 py-2 hover:bg-[#ECEEEF] dark:hover:bg-[#27282D] ${
index !== searchResults.length - 1
? 'border-b border-[#D1D9E0] dark:border-[#6A6A6A]'
: ''
}`}
>
<img
src={result.isFile ? FileIcon : FolderIcon}
alt={
result.isFile
? t('settings.sources.fileAlt')
: t('settings.sources.folderAlt')
}
className="mr-2 h-4 w-4 flex-shrink-0"
/>
<span className="flex-1 truncate text-sm dark:text-[#E0E0E0]">
{result.path.split('/').pop() || result.path}
</span>
</div>
))
)}
</div>
</div>
)}
</div>
);
};
const handleFileSearch = (searchQuery: string) => {
if (directoryStructure) {
return searchFiles(searchQuery, directoryStructure);
}
return [];
};
const handleFileSelect = (path: string) => {
const pathParts = path.split('/');
const fileName = pathParts.pop() || '';
setCurrentPath(pathParts);
setSelectedFile({
id: path,
name: fileName,
});
};
const currentDirectory = getCurrentDirectory();
const navigateToPath = (index: number) => {
setCurrentPath(currentPath.slice(0, index + 1));
};
return (
<div>
{selectedFile ? (
<div className="flex">
<div className="flex-1">
<Chunks
documentId={docId}
documentName={sourceName}
handleGoBack={() => setSelectedFile(null)}
path={selectedFile.id}
onFileSearch={handleFileSearch}
onFileSelect={handleFileSelect}
/>
</div>
</div>
) : (
<div className="flex w-full max-w-full flex-col overflow-hidden">
<div className="mb-2">{renderPathNavigation()}</div>
<div className="w-full">
<div className="overflow-x-auto rounded-[6px] border border-[#D1D9E0] dark:border-[#6A6A6A]">
<table className="w-full min-w-[600px] table-auto bg-transparent">
<thead className="bg-gray-100 dark:bg-[#27282D]">
<tr className="border-b border-[#D1D9E0] dark:border-[#6A6A6A]">
<th className="min-w-[200px] px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]">
{t('settings.sources.fileName')}
</th>
<th className="min-w-[80px] px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]">
{t('settings.sources.tokens')}
</th>
<th className="min-w-[80px] px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]">
{t('settings.sources.size')}
</th>
<th className="w-10 px-2 py-3 text-left text-sm font-medium text-gray-700 lg:px-4 dark:text-[#59636E]"></th>
</tr>
</thead>
<tbody>{renderFileTree(getCurrentDirectory())}</tbody>
</table>
</div>
</div>
</div>
)}
</div>
);
};
export default ConnectorTreeComponent;

View File

@@ -2,6 +2,7 @@ import React, { useState, useRef, useEffect } from 'react';
import { useTranslation } from 'react-i18next';
import { useSelector } from 'react-redux';
import { selectToken } from '../preferences/preferenceSlice';
import { formatBytes } from '../utils/stringUtils';
import Chunks from './Chunks';
import ContextMenu, { MenuOption } from './ContextMenu';
import userService from '../api/services/userService';
@@ -10,9 +11,7 @@ import FolderIcon from '../assets/folder.svg';
import ArrowLeft from '../assets/arrow-left.svg';
import ThreeDots from '../assets/three-dots.svg';
import EyeView from '../assets/eye-view.svg';
import OutlineSource from '../assets/outline-source.svg';
import Trash from '../assets/red-trash.svg';
import SearchIcon from '../assets/search.svg';
import { useOutsideAlerter } from '../hooks';
import ConfirmationModal from '../modals/ConfirmationModal';
@@ -128,14 +127,6 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
}
}, [docId, token]);
const formatBytes = (bytes: number): string => {
if (bytes === 0) return '0 Bytes';
const k = 1024;
const sizes = ['Bytes', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + ' ' + sizes[i];
};
const navigateToDirectory = (dirName: string) => {
setCurrentPath((prev) => [...prev, dirName]);
};
@@ -443,18 +434,18 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
const renderPathNavigation = () => {
return (
<div className="mb-0 min-h-[38px] flex flex-col gap-2 text-base sm:flex-row sm:items-center sm:justify-between">
<div className="mb-0 flex min-h-[38px] flex-col gap-2 text-base sm:flex-row sm:items-center sm:justify-between">
{/* Left side with path navigation */}
<div className="flex w-full items-center sm:w-auto">
<button
className="mr-3 flex h-[29px] w-[29px] items-center justify-center rounded-full border p-2 text-sm text-gray-400 dark:border-0 dark:bg-[#28292D] dark:text-gray-500 dark:hover:bg-[#2E2F34] font-medium"
className="mr-3 flex h-[29px] w-[29px] items-center justify-center rounded-full border p-2 text-sm font-medium text-gray-400 dark:border-0 dark:bg-[#28292D] dark:text-gray-500 dark:hover:bg-[#2E2F34]"
onClick={handleBackNavigation}
>
<img src={ArrowLeft} alt="left-arrow" className="h-3 w-3" />
</button>
<div className="flex flex-wrap items-center">
<span className="text-[#7D54D1] font-semibold break-words">
<span className="font-semibold break-words text-[#7D54D1]">
{sourceName}
</span>
{currentPath.length > 0 && (
@@ -485,8 +476,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
</div>
</div>
<div className="flex relative flex-row flex-nowrap items-center gap-2 w-full sm:w-auto justify-end mt-2 sm:mt-0">
<div className="relative mt-2 flex w-full flex-row flex-nowrap items-center justify-end gap-2 sm:mt-0 sm:w-auto">
{processingRef.current && (
<div className="text-sm text-gray-500">
{currentOpRef.current === 'add'
@@ -495,13 +485,13 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
</div>
)}
{renderFileSearch()}
{renderFileSearch()}
{/* Add file button */}
{!processingRef.current && (
<button
onClick={handleAddFile}
className="bg-purple-30 hover:bg-violets-are-blue flex h-[38px] min-w-[108px] items-center justify-center rounded-full px-4 text-[14px] whitespace-nowrap text-white font-medium"
className="bg-purple-30 hover:bg-violets-are-blue flex h-[38px] min-w-[108px] items-center justify-center rounded-full px-4 text-[14px] font-medium whitespace-nowrap text-white"
title={t('settings.sources.addFile')}
>
{t('settings.sources.addFile')}
@@ -543,32 +533,32 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
const parentRow =
currentPath.length > 0
? [
<tr
key="parent-dir"
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
onClick={navigateUp}
>
<td className="px-2 py-2 lg:px-4">
<div className="flex items-center">
<img
src={FolderIcon}
alt={t('settings.sources.parentFolderAlt')}
className="mr-2 h-4 w-4 flex-shrink-0"
/>
<span className="truncate text-sm dark:text-[#E0E0E0]">
..
</span>
</div>
</td>
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
-
</td>
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
-
</td>
<td className="w-10 px-2 py-2 text-sm lg:px-4"></td>
</tr>,
]
<tr
key="parent-dir"
className="cursor-pointer border-b border-[#D1D9E0] hover:bg-[#ECEEEF] dark:border-[#6A6A6A] dark:hover:bg-[#27282D]"
onClick={navigateUp}
>
<td className="px-2 py-2 lg:px-4">
<div className="flex items-center">
<img
src={FolderIcon}
alt={t('settings.sources.parentFolderAlt')}
className="mr-2 h-4 w-4 flex-shrink-0"
/>
<span className="truncate text-sm dark:text-[#E0E0E0]">
..
</span>
</div>
</td>
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
-
</td>
<td className="px-2 py-2 text-sm lg:px-4 dark:text-[#E0E0E0]">
-
</td>
<td className="w-10 px-2 py-2 text-sm lg:px-4"></td>
</tr>,
]
: [];
// Render directories first, then files
@@ -609,7 +599,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
<div ref={menuRef} className="relative">
<button
onClick={(e) => handleMenuClick(e, itemId)}
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E] font-medium"
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
aria-label={t('settings.sources.menuAlt')}
>
<img
@@ -665,7 +655,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
<div ref={menuRef} className="relative">
<button
onClick={(e) => handleMenuClick(e, itemId)}
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E] font-medium"
className="inline-flex h-[35px] w-[24px] shrink-0 items-center justify-center rounded-md font-medium transition-colors hover:bg-[#EBEBEB] dark:hover:bg-[#26272E]"
aria-label={t('settings.sources.menuAlt')}
>
<img
@@ -757,14 +747,12 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
}
}}
placeholder={t('settings.sources.searchFiles')}
className={`w-full h-[38px] border border-[#D1D9E0] px-4 py-2 dark:border-[#6A6A6A]
${searchQuery ? 'rounded-t-[24px]' : 'rounded-[24px]'}
bg-transparent focus:outline-none dark:text-[#E0E0E0]`}
className={`h-[38px] w-full border border-[#D1D9E0] px-4 py-2 dark:border-[#6A6A6A] ${searchQuery ? 'rounded-t-[24px]' : 'rounded-[24px]'} bg-transparent focus:outline-none dark:text-[#E0E0E0]`}
/>
{searchQuery && (
<div className="absolute top-full left-0 right-0 z-10 max-h-[calc(100vh-200px)] w-full overflow-hidden rounded-b-[12px] border border-t-0 border-[#D1D9E0] bg-white shadow-lg dark:border-[#6A6A6A] dark:bg-[#1F2023] transition-all duration-200">
<div className="max-h-[calc(100vh-200px)] overflow-y-auto overflow-x-hidden overscroll-contain">
<div className="absolute top-full right-0 left-0 z-10 max-h-[calc(100vh-200px)] w-full overflow-hidden rounded-b-[12px] border border-t-0 border-[#D1D9E0] bg-white shadow-lg transition-all duration-200 dark:border-[#6A6A6A] dark:bg-[#1F2023]">
<div className="max-h-[calc(100vh-200px)] overflow-x-hidden overflow-y-auto overscroll-contain">
{searchResults.length === 0 ? (
<div className="py-2 text-center text-sm text-gray-500 dark:text-gray-400">
{t('settings.sources.noResults')}
@@ -775,10 +763,11 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
key={index}
onClick={() => handleSearchSelect(result)}
title={result.path}
className={`flex min-w-0 cursor-pointer items-center px-3 py-2 hover:bg-[#ECEEEF] dark:hover:bg-[#27282D] ${index !== searchResults.length - 1
className={`flex min-w-0 cursor-pointer items-center px-3 py-2 hover:bg-[#ECEEEF] dark:hover:bg-[#27282D] ${
index !== searchResults.length - 1
? 'border-b border-[#D1D9E0] dark:border-[#6A6A6A]'
: ''
}`}
}`}
>
<img
src={result.isFile ? FileIcon : FolderIcon}
@@ -789,7 +778,7 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
}
className="mr-2 h-4 w-4 flex-shrink-0"
/>
<span className="text-sm dark:text-[#E0E0E0] truncate flex-1">
<span className="flex-1 truncate text-sm dark:text-[#E0E0E0]">
{result.path.split('/').pop() || result.path}
</span>
</div>
@@ -871,7 +860,9 @@ const FileTreeComponent: React.FC<FileTreeComponentProps> = ({
message={
itemToDelete?.isFile
? t('settings.sources.confirmDelete')
: t('settings.sources.deleteDirectoryWarning', { name: itemToDelete?.name })
: t('settings.sources.deleteDirectoryWarning', {
name: itemToDelete?.name,
})
}
modalState={deleteModalState}
setModalState={setDeleteModalState}

View File

@@ -16,7 +16,9 @@ const MermaidRenderer: React.FC<MermaidRendererProps> = ({
isLoading,
}) => {
const [isDarkTheme] = useDarkTheme();
const diagramId = useRef(`mermaid-${crypto.randomUUID()}`);
const diagramId = useRef(
`mermaid-${Date.now()}-${Math.random().toString(36).substring(2)}`,
);
const status = useSelector(selectStatus);
const [error, setError] = useState<string | null>(null);
const [showCode, setShowCode] = useState<boolean>(false);

View File

@@ -259,7 +259,7 @@ export default function MessageInput({
return (
<div className="mx-2 flex w-full flex-col">
<div className="border-dark-gray bg-lotion dark:border-grey relative flex w-full flex-col rounded-[23px] border dark:bg-transparent">
<div className="flex flex-wrap gap-1.5 px-4 pt-3 pb-0 sm:gap-2 sm:px-6">
<div className="flex flex-wrap gap-1.5 px-2 py-2 sm:gap-2 sm:px-3">
{attachments.map((attachment, index) => (
<div
key={index}
@@ -353,14 +353,14 @@ export default function MessageInput({
onChange={handleChange}
tabIndex={1}
placeholder={t('inputPlaceholder')}
className="inputbox-style no-scrollbar bg-lotion dark:text-bright-gray dark:placeholder:text-bright-gray/50 w-full overflow-x-hidden overflow-y-auto rounded-t-[23px] px-4 py-3 text-base leading-tight whitespace-pre-wrap opacity-100 placeholder:text-gray-500 focus:outline-hidden sm:px-6 sm:py-5 dark:bg-transparent"
className="inputbox-style no-scrollbar bg-lotion dark:text-bright-gray dark:placeholder:text-bright-gray/50 w-full overflow-x-hidden overflow-y-auto rounded-t-[23px] px-2 text-base leading-tight whitespace-pre-wrap opacity-100 placeholder:text-gray-500 focus:outline-hidden sm:px-3 dark:bg-transparent"
onInput={handleInput}
onKeyDown={handleKeyDown}
aria-label={t('inputPlaceholder')}
/>
</div>
<div className="flex items-center px-3 py-1.5 sm:px-4 sm:py-2">
<div className="flex items-center px-2 pb-1.5 sm:px-3 sm:pb-2">
<div className="flex grow flex-wrap gap-1 sm:gap-2">
{showSourceButton && (
<button
@@ -369,7 +369,7 @@ export default function MessageInput({
onClick={() => setIsSourcesPopupOpen(!isSourcesPopupOpen)}
title={
selectedDocs && selectedDocs.length > 0
? selectedDocs.map(doc => doc.name).join(', ')
? selectedDocs.map((doc) => doc.name).join(', ')
: t('conversation.sources.title')
}
>
@@ -380,7 +380,7 @@ export default function MessageInput({
/>
<span className="xs:text-[12px] dark:text-bright-gray truncate overflow-hidden text-[10px] font-medium text-[#5D5D5D] sm:text-[14px]">
{selectedDocs && selectedDocs.length > 0
? selectedDocs.length === 1
? selectedDocs.length === 1
? selectedDocs[0].name
: `${selectedDocs.length} sources selected`
: t('conversation.sources.title')}
@@ -430,18 +430,18 @@ export default function MessageInput({
<button
onClick={loading ? undefined : handleSubmit}
aria-label={loading ? t('loading') : t('send')}
className={`flex items-center justify-center rounded-full p-2 sm:p-2.5 ${loading ? 'bg-gray-300 dark:bg-gray-600' : 'bg-black dark:bg-white'} ml-auto shrink-0`}
className={`flex h-7 w-7 items-center justify-center rounded-full sm:h-9 sm:w-9 ${loading || !value.trim() ? 'bg-black opacity-60 dark:bg-[#F0F3F4] dark:opacity-80' : 'bg-black opacity-100 dark:bg-[#F0F3F4]'} ml-auto shrink-0`}
disabled={loading}
>
{loading ? (
<img
src={isDarkTheme ? SpinnerDark : Spinner}
className="h-3.5 w-3.5 animate-spin sm:h-4 sm:w-4"
className="mx-auto my-auto block h-3.5 w-3.5 animate-spin sm:h-4 sm:w-4"
alt={t('loading')}
/>
) : (
<img
className={`h-3.5 w-3.5 sm:h-4 sm:w-4 ${isDarkTheme ? 'invert filter' : ''}`}
className={`mx-auto my-auto block h-3.5 w-3.5 translate-x-[-0.9px] translate-y-[1.1px] sm:h-4 sm:w-4 ${isDarkTheme ? 'invert filter' : ''}`}
src={PaperPlane}
alt={t('send')}
/>

View File

@@ -248,7 +248,7 @@ export default function MultiSelectPopup({
</div>
<div className="shrink-0">
<div
className={`dark:bg-charleston-green-2 flex h-4 w-4 items-center justify-center rounded-xs border border-[#C6C6C6] bg-white dark:border-[#757783]`}
className={`dark:bg-charleston-green-2 flex h-4 w-4 items-center justify-center rounded-xs border-2 border-[#C6C6C6] bg-white dark:border-[#757783]`}
aria-hidden="true"
>
{isSelected && (

View File

@@ -200,7 +200,7 @@ export default function SourcesPopup({
{option.name}
</span>
<div
className={`flex h-4 w-4 shrink-0 items-center justify-center border border-[#C6C6C6] p-[0.5px] dark:border-[#757783]`}
className={`flex h-4 w-4 shrink-0 items-center justify-center rounded-xs border-2 border-[#C6C6C6] p-[0.5px] dark:border-[#757783]`}
>
{isSelected && (
<img

View File

@@ -46,7 +46,7 @@ const ToggleSwitch: React.FC<ToggleSwitchProps> = ({
return (
<label
className={`flex cursor-pointer select-none flex-row items-center ${
className={`flex cursor-pointer flex-row items-center select-none ${
labelPosition === 'right' ? 'flex-row-reverse' : ''
} ${disabled ? 'cursor-not-allowed opacity-50' : ''} ${className}`}
>
@@ -75,7 +75,7 @@ const ToggleSwitch: React.FC<ToggleSwitchProps> = ({
}`}
></div>
<div
className={`absolute ${toggle} flex items-center justify-center rounded-full bg-white opacity-80 transition ${
className={`absolute ${toggle} flex items-center justify-center rounded-full bg-white transition ${
checked ? `${translate} bg-silver` : ''
}`}
></div>

View File

@@ -207,7 +207,7 @@ export default function ToolsPopup({
</div>
<div className="flex shrink-0 items-center">
<div
className={`flex h-4 w-4 items-center justify-center border border-[#C6C6C6] p-[0.5px] dark:border-[#757783]`}
className={`flex h-4 w-4 items-center justify-center rounded-xs border-2 border-[#C6C6C6] p-[0.5px] dark:border-[#757783]`}
>
{tool.status && (
<img

View File

@@ -67,6 +67,7 @@
"preLoaded": "Pre-loaded",
"private": "Private",
"sync": "Sync",
"syncing": "Syncing...",
"syncFrequency": {
"never": "Never",
"daily": "Daily",

View File

@@ -67,6 +67,7 @@
"preLoaded": "Precargado",
"private": "Privado",
"sync": "Sincronizar",
"syncing": "Sincronizando...",
"syncFrequency": {
"never": "Nunca",
"daily": "Diario",

View File

@@ -67,6 +67,7 @@
"preLoaded": "プリロード済み",
"private": "プライベート",
"sync": "同期",
"syncing": "同期中...",
"syncFrequency": {
"never": "なし",
"daily": "毎日",

View File

@@ -67,6 +67,7 @@
"preLoaded": "Предзагруженный",
"private": "Частный",
"sync": "Синхронизация",
"syncing": "Синхронизация...",
"syncFrequency": {
"never": "Никогда",
"daily": "Ежедневно",

View File

@@ -67,6 +67,7 @@
"preLoaded": "預載入",
"private": "私人",
"sync": "同步",
"syncing": "同步中...",
"syncFrequency": {
"never": "從不",
"daily": "每天",

View File

@@ -67,6 +67,7 @@
"preLoaded": "预加载",
"private": "私有",
"sync": "同步",
"syncing": "同步中...",
"syncFrequency": {
"never": "从不",
"daily": "每天",

View File

@@ -42,10 +42,10 @@ export default function WrapperModal({
}, [close, isPerformingTask]);
const modalContent = (
<div className="bg-gray-alpha bg-opacity-50 fixed top-0 left-0 z-30 flex h-screen w-screen items-center justify-center">
<div className="fixed top-0 left-0 z-30 flex h-screen w-screen items-center justify-center">
<div
ref={modalRef}
className={`relative w-11/12 rounded-2xl bg-white p-8 sm:w-[512px] dark:bg-[#26272E] ${className}`}
className={`relative w-11/12 rounded-2xl bg-white p-8 shadow-2xl sm:w-[512px] dark:bg-[#26272E] ${className}`}
>
{!isPerformingTask && (
<button

View File

@@ -1,4 +1,3 @@
import React, { useCallback, useEffect, useRef, useState } from 'react';
import { useTranslation } from 'react-i18next';
import { useDispatch, useSelector } from 'react-redux';
@@ -29,6 +28,7 @@ import {
import Upload from '../upload/Upload';
import { formatDate } from '../utils/dateTimeUtils';
import FileTreeComponent from '../components/FileTreeComponent';
import ConnectorTreeComponent from '../components/ConnectorTreeComponent';
import Chunks from '../components/Chunks';
const formatTokens = (tokens: number): string => {
@@ -272,11 +272,19 @@ export default function Sources({
return documentToView ? (
<div className="mt-8 flex flex-col">
{documentToView.isNested ? (
<FileTreeComponent
docId={documentToView.id || ''}
sourceName={documentToView.name}
onBackToDocuments={() => setDocumentToView(undefined)}
/>
documentToView.type === 'connector' ? (
<ConnectorTreeComponent
docId={documentToView.id || ''}
sourceName={documentToView.name}
onBackToDocuments={() => setDocumentToView(undefined)}
/>
) : (
<FileTreeComponent
docId={documentToView.id || ''}
sourceName={documentToView.name}
onBackToDocuments={() => setDocumentToView(undefined)}
/>
)
) : (
<Chunks
documentId={documentToView.id || ''}
@@ -310,7 +318,7 @@ export default function Sources({
setSearchTerm(e.target.value);
setCurrentPage(1);
}}
className="w-full h-[32px] rounded-full border border-silver dark:border-silver/40 bg-transparent px-3 text-sm text-jet dark:text-bright-gray placeholder:text-gray-400 dark:placeholder:text-gray-500 outline-none focus:border-silver dark:focus:border-silver/60"
className="border-silver dark:border-silver/40 text-jet dark:text-bright-gray focus:border-silver dark:focus:border-silver/60 h-[32px] w-full rounded-full border bg-transparent px-3 text-sm outline-none placeholder:text-gray-400 dark:placeholder:text-gray-500"
/>
</div>
</div>
@@ -327,7 +335,7 @@ export default function Sources({
</div>
<div className="relative w-full">
{loading ? (
<div className="w-full grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-6 px-2 py-4">
<div className="grid w-full grid-cols-1 gap-6 px-2 py-4 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4">
<SkeletonLoader component="sourceCards" count={rowsPerPage} />
</div>
) : !currentDocuments?.length ? (
@@ -342,19 +350,19 @@ export default function Sources({
</p>
</div>
) : (
<div className="w-full grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4 gap-6 px-2 py-4">
{currentDocuments.map((document, index) => {
const docId = document.id ? document.id.toString() : '';
<div className="grid w-full grid-cols-1 gap-6 px-2 py-4 sm:grid-cols-2 lg:grid-cols-3 xl:grid-cols-4">
{currentDocuments.map((document, index) => {
const docId = document.id ? document.id.toString() : '';
return (
<div key={docId} className="relative">
<div
className={`flex h-[130px] w-full flex-col rounded-2xl bg-[#F9F9F9] p-3 transition-all duration-200 dark:bg-[#383838] ${
activeMenuId === docId || syncMenuState.docId === docId
? 'scale-[1.05]'
: 'hover:scale-[1.05]'
}`}
>
return (
<div key={docId} className="relative">
<div
className={`flex h-[130px] w-full flex-col rounded-2xl bg-[#F9F9F9] p-3 transition-all duration-200 dark:bg-[#383838] ${
activeMenuId === docId || syncMenuState.docId === docId
? 'scale-[1.05]'
: 'hover:scale-[1.05]'
}`}
>
<div className="w-full flex-1">
<div className="flex w-full items-center justify-between gap-2">
<h3
@@ -418,7 +426,7 @@ export default function Sources({
<img
src={CalendarIcon}
alt=""
className="w-[14px] h-[14px]"
className="h-[14px] w-[14px]"
/>
<span className="font-inter text-[12px] leading-[18px] font-[500] text-[#848484] dark:text-[#848484]">
{document.date ? formatDate(document.date) : ''}
@@ -428,7 +436,7 @@ export default function Sources({
<img
src={DiscIcon}
alt=""
className="w-[14px] h-[14px]"
className="h-[14px] w-[14px]"
/>
<span className="font-inter text-[12px] leading-[18px] font-[500] text-[#848484] dark:text-[#848484]">
{document.tokens

View File

@@ -4,6 +4,13 @@ import { useTranslation } from 'react-i18next';
import { useDispatch, useSelector } from 'react-redux';
import userService from '../api/services/userService';
import {
getSessionToken,
setSessionToken,
removeSessionToken,
} from '../utils/providerUtils';
import { formatDate } from '../utils/dateTimeUtils';
import { formatBytes } from '../utils/stringUtils';
import FileUpload from '../assets/file_upload.svg';
import WebsiteCollect from '../assets/website_collect.svg';
import Dropdown from '../components/Dropdown';
@@ -25,6 +32,9 @@ import {
IngestorFormSchemas,
IngestorType,
} from './types/ingestor';
import FileIcon from '../assets/file.svg';
import FolderIcon from '../assets/folder.svg';
import ConnectorAuth from '../components/ConnectorAuth';
function Upload({
receivedFile = [],
@@ -48,6 +58,23 @@ function Upload({
const [activeTab, setActiveTab] = useState<string | null>(renderTab);
const [showAdvancedOptions, setShowAdvancedOptions] = useState(false);
// Google Drive state
const [isGoogleDriveConnected, setIsGoogleDriveConnected] = useState(false);
const [googleDriveFiles, setGoogleDriveFiles] = useState<any[]>([]);
const [selectedFiles, setSelectedFiles] = useState<string[]>([]);
const [isLoadingFiles, setIsLoadingFiles] = useState(false);
const [isAuthenticating, setIsAuthenticating] = useState(false);
const [userEmail, setUserEmail] = useState<string>('');
const [authError, setAuthError] = useState<string>('');
const [currentFolderId, setCurrentFolderId] = useState<string | null>(null);
const [folderPath, setFolderPath] = useState<
Array<{ id: string | null; name: string }>
>([{ id: null, name: 'My Drive' }]);
const [nextPageToken, setNextPageToken] = useState<string | null>(null);
const [hasMoreFiles, setHasMoreFiles] = useState<boolean>(false);
const scrollContainerRef = useRef<HTMLDivElement | null>(null);
const renderFormFields = () => {
const schema = IngestorFormSchemas[ingestor.type];
if (!schema) return null;
@@ -204,6 +231,7 @@ function Upload({
{ label: 'Link', value: 'url' },
{ label: 'GitHub', value: 'github' },
{ label: 'Reddit', value: 'reddit' },
{ label: 'Google Drive', value: 'google_drive' },
];
const sourceDocs = useSelector(selectSourceDocs);
@@ -428,29 +456,40 @@ function Upload({
formData.append('user', 'local');
formData.append('source', ingestor.type);
const defaultConfig = IngestorDefaultConfigs[ingestor.type].config;
let configData;
const mergedConfig = { ...defaultConfig, ...ingestor.config };
const filteredConfig = Object.entries(mergedConfig).reduce(
(acc, [key, value]) => {
const field = IngestorFormSchemas[ingestor.type].find(
(f) => f.name === key,
);
// Include the field if:
// 1. It's required, or
// 2. It's optional and has a non-empty value
if (
field?.required ||
(value !== undefined && value !== null && value !== '')
) {
acc[key] = value;
}
return acc;
},
{} as Record<string, any>,
);
if (ingestor.type === 'google_drive') {
const sessionToken = getSessionToken(ingestor.type);
formData.append('data', JSON.stringify(filteredConfig));
const selectedItems = googleDriveFiles.filter((file) =>
selectedFiles.includes(file.id),
);
const selectedFolderIds = selectedItems
.filter(
(item) =>
item.type === 'application/vnd.google-apps.folder' || item.isFolder,
)
.map((folder) => folder.id);
const selectedFileIds = selectedItems
.filter(
(item) =>
item.type !== 'application/vnd.google-apps.folder' &&
!item.isFolder,
)
.map((file) => file.id);
configData = {
file_ids: selectedFileIds,
folder_ids: selectedFolderIds,
recursive: ingestor.config.recursive,
session_token: sessionToken || null,
};
} else {
configData = { ...ingestor.config };
}
formData.append('data', JSON.stringify(configData));
const apiHost: string = import.meta.env.VITE_API_HOST;
const xhr = new XMLHttpRequest();
@@ -477,6 +516,181 @@ function Upload({
xhr.setRequestHeader('Authorization', `Bearer ${token}`);
xhr.send(formData);
};
useEffect(() => {
if (ingestor.type === 'google_drive') {
const sessionToken = getSessionToken(ingestor.type);
if (sessionToken) {
// Auto-authenticate if session token exists
setIsGoogleDriveConnected(true);
setAuthError('');
// Fetch user email and files using the existing session token
fetchUserEmailAndLoadFiles(sessionToken);
}
}
}, [ingestor.type]);
const fetchUserEmailAndLoadFiles = async (sessionToken: string) => {
try {
const apiHost = import.meta.env.VITE_API_HOST;
const validateResponse = await fetch(
`${apiHost}/api/connectors/validate-session`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`,
},
body: JSON.stringify({
provider: 'google_drive',
session_token: sessionToken,
}),
},
);
if (!validateResponse.ok) {
removeSessionToken(ingestor.type);
setIsGoogleDriveConnected(false);
setAuthError('Session expired. Please reconnect to Google Drive.');
return;
}
const validateData = await validateResponse.json();
if (validateData.success) {
setUserEmail(validateData.user_email || 'Connected User');
// reset pagination state and files
setGoogleDriveFiles([]);
setNextPageToken(null);
setHasMoreFiles(false);
loadGoogleDriveFiles(sessionToken, null, null, false);
} else {
removeSessionToken(ingestor.type);
setIsGoogleDriveConnected(false);
setAuthError(
validateData.error ||
'Session expired. Please reconnect your Google Drive account and make sure to grant offline access.',
);
}
} catch (error) {
console.error('Error validating Google Drive session:', error);
setAuthError('Failed to validate session. Please reconnect.');
setIsGoogleDriveConnected(false);
}
};
const loadGoogleDriveFiles = async (
sessionToken: string,
folderId?: string | null,
pageToken?: string | null,
append = false,
) => {
setIsLoadingFiles(true);
try {
const apiHost = import.meta.env.VITE_API_HOST;
const requestBody: any = {
session_token: sessionToken,
limit: 10,
};
if (folderId) {
requestBody.folder_id = folderId;
}
if (pageToken) {
requestBody.page_token = pageToken;
}
const filesResponse = await fetch(`${apiHost}/api/connectors/files`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`,
},
body: JSON.stringify({ ...requestBody, provider: 'google_drive' }),
});
if (!filesResponse.ok) {
throw new Error(`Failed to load files: ${filesResponse.status}`);
}
const filesData = await filesResponse.json();
if (filesData.success && Array.isArray(filesData.files)) {
setGoogleDriveFiles((prev) =>
append ? [...prev, ...filesData.files] : filesData.files,
);
setNextPageToken(filesData.next_page_token || null);
setHasMoreFiles(Boolean(filesData.has_more));
} else {
throw new Error(filesData.error || 'Failed to load files');
}
} catch (error) {
console.error('Error loading Google Drive files:', error);
setAuthError(
error instanceof Error
? error.message
: 'Failed to load files. Please make sure your Google Drive account is properly connected and you granted offline access during authorization.',
);
} finally {
setIsLoadingFiles(false);
}
};
// Handle file selection
const handleFileSelect = (fileId: string) => {
setSelectedFiles((prev) => {
if (prev.includes(fileId)) {
return prev.filter((id) => id !== fileId);
} else {
return [...prev, fileId];
}
});
};
const handleFolderClick = (folderId: string, folderName: string) => {
const sessionToken = getSessionToken(ingestor.type);
if (sessionToken) {
setCurrentFolderId(folderId);
setFolderPath((prev) => [...prev, { id: folderId, name: folderName }]);
setGoogleDriveFiles([]);
setNextPageToken(null);
setHasMoreFiles(false);
setSelectedFiles([]);
loadGoogleDriveFiles(sessionToken, folderId, null, false);
}
};
const navigateBack = (index: number) => {
const sessionToken = getSessionToken(ingestor.type);
if (sessionToken) {
const newPath = folderPath.slice(0, index + 1);
const targetFolderId = newPath[newPath.length - 1]?.id;
setCurrentFolderId(targetFolderId as string | null);
setFolderPath(newPath);
setGoogleDriveFiles([]);
setNextPageToken(null);
setHasMoreFiles(false);
setSelectedFiles([]);
loadGoogleDriveFiles(sessionToken, targetFolderId ?? null, null, false);
}
};
const handleSelectAll = () => {
if (selectedFiles.length === googleDriveFiles.length) {
setSelectedFiles([]);
} else {
setSelectedFiles(googleDriveFiles.map((file) => file.id));
}
};
const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop,
multiple: true,
@@ -515,6 +729,10 @@ function Upload({
if (!remoteName?.trim()) {
return true;
}
if (ingestor.type === 'google_drive') {
return !isGoogleDriveConnected || selectedFiles.length === 0;
}
const formFields: FormField[] = IngestorFormSchemas[ingestor.type];
for (const field of formFields) {
if (field.required) {
@@ -636,7 +854,7 @@ function Upload({
{files.map((file) => (
<p
key={file.name}
className="text-gray-6000 truncate overflow-hidden text-ellipsis"
className="text-gray-6000 truncate overflow-hidden text-ellipsis dark:text-[#ececf1]"
title={file.name}
>
{file.name}
@@ -679,6 +897,253 @@ function Upload({
required={true}
labelBgClassName="bg-white dark:bg-charleston-green-2"
/>
{ingestor.type === 'google_drive' && (
<div className="space-y-4">
{authError && (
<div className="rounded-lg border border-red-200 bg-red-50 p-3 dark:border-red-600 dark:bg-red-900/20">
<p className="text-sm text-red-600 dark:text-red-400">
{authError}
</p>
</div>
)}
{!isGoogleDriveConnected ? (
<ConnectorAuth
provider="google_drive"
onSuccess={(data) => {
setUserEmail(data.user_email);
setIsGoogleDriveConnected(true);
setIsAuthenticating(false);
setAuthError('');
if (data.session_token) {
setSessionToken(ingestor.type, data.session_token);
loadGoogleDriveFiles(data.session_token, null);
}
}}
onError={(error) => {
setAuthError(error);
setIsAuthenticating(false);
setIsGoogleDriveConnected(false);
}}
/>
) : (
<div className="space-y-4">
{/* Connection Status */}
<div className="flex w-full items-center justify-between rounded-lg bg-green-500 px-4 py-2 text-sm text-white">
<div className="flex items-center gap-2">
<svg className="h-4 w-4" viewBox="0 0 24 24">
<path
fill="currentColor"
d="M9 16.17L4.83 12l-1.42 1.41L9 19 21 7l-1.41-1.41z"
/>
</svg>
<span>Connected as {userEmail}</span>
</div>
<button
onClick={() => {
removeSessionToken(ingestor.type);
setIsGoogleDriveConnected(false);
setGoogleDriveFiles([]);
setSelectedFiles([]);
setUserEmail('');
setAuthError('');
const apiHost = import.meta.env.VITE_API_HOST;
fetch(`${apiHost}/api/connectors/disconnect`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`,
},
body: JSON.stringify({
provider: ingestor.type,
session_token: getSessionToken(ingestor.type),
}),
}).catch((err) =>
console.error(
'Error disconnecting from Google Drive:',
err,
),
);
}}
className="text-xs text-white underline hover:text-gray-200"
>
Disconnect
</button>
</div>
{/* File Browser */}
<div className="rounded-lg border border-gray-200 dark:border-gray-600">
<div className="rounded-t-lg border-b border-gray-200 bg-gray-50 p-3 dark:border-gray-600 dark:bg-gray-800">
{/* Breadcrumb navigation */}
<div className="mb-2 flex items-center gap-1">
{folderPath.map((path, index) => (
<div
key={path.id || 'root'}
className="flex items-center gap-1"
>
{index > 0 && (
<span className="text-gray-400">/</span>
)}
<button
onClick={() => navigateBack(index)}
className="text-sm text-blue-600 hover:text-blue-800 hover:underline dark:text-blue-400"
disabled={index === folderPath.length - 1}
>
{path.name}
</button>
</div>
))}
</div>
<div className="flex items-center justify-between">
<h4 className="text-sm font-medium text-gray-700 dark:text-gray-300">
Select Files from Google Drive
</h4>
{googleDriveFiles.length > 0 && (
<button
onClick={handleSelectAll}
className="text-xs text-blue-600 hover:text-blue-800 dark:text-blue-400"
>
{selectedFiles.length === googleDriveFiles.length
? 'Deselect All'
: 'Select All'}
</button>
)}
</div>
{selectedFiles.length > 0 && (
<p className="mt-1 text-xs text-gray-500">
{selectedFiles.length} file
{selectedFiles.length !== 1 ? 's' : ''} selected
</p>
)}
</div>
<div
className="max-h-72 overflow-y-auto"
ref={scrollContainerRef}
>
{isLoadingFiles && googleDriveFiles.length === 0 ? (
<div className="p-4 text-center">
<div className="inline-flex items-center gap-2 text-sm text-gray-600 dark:text-gray-400">
<div className="h-4 w-4 animate-spin rounded-full border-2 border-blue-500 border-t-transparent"></div>
Loading files...
</div>
</div>
) : googleDriveFiles.length === 0 ? (
<div className="p-4 text-center text-sm text-gray-500 dark:text-gray-400">
No files found in your Google Drive
</div>
) : (
<>
<div className="divide-y divide-gray-200 dark:divide-gray-600">
{googleDriveFiles.map((file) => (
<div
key={file.id}
className={`p-3 transition-colors ${
selectedFiles.includes(file.id)
? 'bg-blue-50 dark:bg-blue-900/20'
: ''
}`}
>
<div className="flex items-center gap-3">
<div className="flex-shrink-0">
<input
type="checkbox"
checked={selectedFiles.includes(
file.id,
)}
onChange={() =>
handleFileSelect(file.id)
}
className="h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500"
/>
</div>
{file.type ===
'application/vnd.google-apps.folder' ||
file.isFolder ? (
<div
className="cursor-pointer text-lg hover:text-blue-600"
onClick={() =>
handleFolderClick(file.id, file.name)
}
>
<img
src={FolderIcon}
alt="Folder"
className="h-6 w-6"
/>
</div>
) : (
<div className="text-lg">
<img
src={FileIcon}
alt="File"
className="h-6 w-6"
/>
</div>
)}
<div className="min-w-0 flex-1">
<p
className={`truncate text-sm font-medium dark:text-[#ececf1] ${
file.type ===
'application/vnd.google-apps.folder' ||
file.isFolder
? 'cursor-pointer hover:text-blue-600'
: ''
}`}
onClick={() => {
if (
file.type ===
'application/vnd.google-apps.folder' ||
file.isFolder
) {
handleFolderClick(
file.id,
file.name,
);
}
}}
>
{file.name}
</p>
<p className="text-xs text-gray-500 dark:text-gray-400">
{file.size &&
`${formatBytes(file.size)}`}
Modified {formatDate(file.modifiedTime)}
</p>
</div>
</div>
</div>
))}
</div>
<div className="flex items-center justify-center border-t border-gray-100 p-4 dark:border-gray-800">
{isLoadingFiles && (
<div className="inline-flex items-center gap-2 text-sm text-gray-600 dark:text-gray-400">
<div className="h-4 w-4 animate-spin rounded-full border-2 border-blue-500 border-t-transparent"></div>
Loading more files...
</div>
)}
{!hasMoreFiles && !isLoadingFiles && (
<span className="text-sm text-gray-500 dark:text-gray-400">
All files loaded
</span>
)}
</div>
</>
)}
</div>
<div className="hidden" aria-hidden="true"></div>
</div>
</div>
)}
</div>
)}
{renderFormFields()}
{IngestorFormSchemas[ingestor.type].some(
(field) => field.advanced,
@@ -719,7 +1184,9 @@ function Upload({
: 'bg-purple-30 hover:bg-violets-are-blue cursor-pointer text-white'
}`}
>
{t('modals.uploadDoc.train')}
{ingestor.type === 'google_drive' && selectedFiles.length > 0
? `Train with ${selectedFiles.length} file${selectedFiles.length !== 1 ? 's' : ''}`
: t('modals.uploadDoc.train')}
</button>
)}
</div>
@@ -727,6 +1194,41 @@ function Upload({
);
}
useEffect(() => {
const scrollContainer = scrollContainerRef.current;
const handleScroll = () => {
if (!scrollContainer) return;
const { scrollTop, scrollHeight, clientHeight } = scrollContainer;
const isNearBottom = scrollHeight - scrollTop - clientHeight < 50;
if (isNearBottom && hasMoreFiles && !isLoadingFiles && nextPageToken) {
const sessionToken = getSessionToken(ingestor.type);
if (sessionToken) {
loadGoogleDriveFiles(
sessionToken,
currentFolderId,
nextPageToken,
true,
);
}
}
};
scrollContainer?.addEventListener('scroll', handleScroll);
return () => {
scrollContainer?.removeEventListener('scroll', handleScroll);
};
}, [
hasMoreFiles,
isLoadingFiles,
nextPageToken,
currentFolderId,
ingestor.type,
]);
return (
<WrapperModal
isPerformingTask={progress !== undefined && progress.percentage < 100}

View File

@@ -22,7 +22,19 @@ export interface UrlIngestorConfig extends BaseIngestorConfig {
url: string;
}
export type IngestorType = 'crawler' | 'github' | 'reddit' | 'url';
export interface GoogleDriveIngestorConfig extends BaseIngestorConfig {
folder_id?: string;
file_ids?: string;
recursive?: boolean;
token_info?: any;
}
export type IngestorType =
| 'crawler'
| 'github'
| 'reddit'
| 'url'
| 'google_drive';
export interface IngestorConfig {
type: IngestorType;
@@ -31,7 +43,8 @@ export interface IngestorConfig {
| RedditIngestorConfig
| GithubIngestorConfig
| CrawlerIngestorConfig
| UrlIngestorConfig;
| UrlIngestorConfig
| GoogleDriveIngestorConfig;
}
export type IngestorFormData = {
@@ -109,6 +122,14 @@ export const IngestorFormSchemas: Record<IngestorType, FormField[]> = {
required: true,
},
],
google_drive: [
{
name: 'recursive',
label: 'Include subfolders',
type: 'boolean',
required: false,
},
],
};
export const IngestorDefaultConfigs: Record<
@@ -143,4 +164,12 @@ export const IngestorDefaultConfigs: Record<
repo_url: '',
} as GithubIngestorConfig,
},
google_drive: {
name: '',
config: {
folder_id: '',
file_ids: '',
recursive: true,
} as GoogleDriveIngestorConfig,
},
};

View File

@@ -0,0 +1,16 @@
/**
* Utility functions for managing session tokens for different cloud service providers.
* Follows the convention: {provider}_session_token
*/
export const getSessionToken = (provider: string): string | null => {
return localStorage.getItem(`${provider}_session_token`);
};
export const setSessionToken = (provider: string, token: string): void => {
localStorage.setItem(`${provider}_session_token`, token);
};
export const removeSessionToken = (provider: string): void => {
localStorage.removeItem(`${provider}_session_token`);
};

View File

@@ -2,3 +2,12 @@ export function truncate(str: string, n: number) {
// slices long strings and ends with ...
return str.length > n ? str.slice(0, n - 1) + '...' : str;
}
export function formatBytes(bytes: number | null): string {
if (!bytes || bytes <= 0) return '';
const k = 1024;
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return `${parseFloat((bytes / Math.pow(k, i)).toFixed(2))} ${sizes[i]}`;
}

View File

@@ -9,7 +9,9 @@ $ErrorActionPreference = "Stop"
# Get current script directory
$SCRIPT_DIR = Split-Path -Parent $MyInvocation.MyCommand.Definition
$COMPOSE_FILE = Join-Path -Path $SCRIPT_DIR -ChildPath "deployment\docker-compose.yaml"
$COMPOSE_FILE_HUB = Join-Path -Path $SCRIPT_DIR -ChildPath "deployment\docker-compose-hub.yaml"
$COMPOSE_FILE_LOCAL = Join-Path -Path $SCRIPT_DIR -ChildPath "deployment\docker-compose.yaml"
$COMPOSE_FILE = $COMPOSE_FILE_HUB
$ENV_FILE = Join-Path -Path $SCRIPT_DIR -ChildPath ".env"
# Function to write colored text
@@ -223,12 +225,15 @@ function Prompt-MainMenu {
Write-Host ""
Write-ColorText "Welcome to DocsGPT Setup!" -ForegroundColor "White" -Bold
Write-ColorText "How would you like to proceed?" -ForegroundColor "White"
Write-ColorText "1) Use DocsGPT Public API Endpoint (simple and free)" -ForegroundColor "Yellow"
Write-ColorText "1) Use DocsGPT Public API Endpoint (simple and free, uses pre-built Docker images from Docker Hub for fastest setup)" -ForegroundColor "Yellow"
Write-ColorText "2) Serve Local (with Ollama)" -ForegroundColor "Yellow"
Write-ColorText "3) Connect Local Inference Engine" -ForegroundColor "Yellow"
Write-ColorText "4) Connect Cloud API Provider" -ForegroundColor "Yellow"
Write-ColorText "5) Advanced: Build images locally (for developers)" -ForegroundColor "Yellow"
Write-Host ""
$script:main_choice = Read-Host "Choose option (1-4)"
Write-ColorText "By default, DocsGPT uses pre-built images from Docker Hub for a fast, reliable, and consistent experience. This avoids local build errors and speeds up onboarding. Advanced users can choose to build images locally if needed." -ForegroundColor "White"
Write-Host ""
$script:main_choice = Read-Host "Choose option (1-5)"
}
# Function to prompt for Local Inference Engine options
@@ -304,9 +309,9 @@ function Use-DocsPublicAPIEndpoint {
# Run Docker compose commands
try {
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" build
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" pull
if ($LASTEXITCODE -ne 0) {
throw "Docker compose build failed with exit code $LASTEXITCODE"
throw "Docker compose pull failed with exit code $LASTEXITCODE"
}
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
@@ -415,10 +420,10 @@ function Serve-LocalOllama {
Write-Host ""
Write-ColorText "Starting Docker Compose with Ollama ($docker_compose_file_suffix)..." -ForegroundColor "White"
# Build the containers
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" -f "$optional_compose" build
# Pull the containers
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" -f "$optional_compose" pull
if ($LASTEXITCODE -ne 0) {
throw "Docker compose build failed with exit code $LASTEXITCODE"
throw "Docker compose pull failed with exit code $LASTEXITCODE"
}
# Start the containers
@@ -575,10 +580,10 @@ function Connect-LocalInferenceEngine {
Write-Host ""
Write-ColorText "Starting Docker Compose..." -ForegroundColor "White"
# Build the containers
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" build
# Pull the containers
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" pull
if ($LASTEXITCODE -ne 0) {
throw "Docker compose build failed with exit code $LASTEXITCODE"
throw "Docker compose pull failed with exit code $LASTEXITCODE"
}
# Start the containers
@@ -706,10 +711,12 @@ function Connect-CloudAPIProvider {
Write-ColorText "Starting Docker Compose..." -ForegroundColor "White"
# Run Docker compose commands
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d --build
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" pull
if ($LASTEXITCODE -ne 0) {
throw "Docker compose build or up failed with exit code $LASTEXITCODE"
throw "Docker compose pull failed with exit code $LASTEXITCODE"
}
& docker compose --env-file "$ENV_FILE" -f "$COMPOSE_FILE" up -d
Write-Host ""
Write-ColorText "DocsGPT is now configured to use $provider_name on http://localhost:5173" -ForegroundColor "Green"
@@ -735,13 +742,13 @@ while ($true) {
switch ($main_choice) {
"1" {
$COMPOSE_FILE = $COMPOSE_FILE_HUB
Use-DocsPublicAPIEndpoint
$exitLoop = $true # Set flag to true on completion
break
}
"2" {
Serve-LocalOllama
# Only exit the loop if user didn't press "b" to go back
if ($ollama_choice -ne "b" -and $ollama_choice -ne "B") {
$exitLoop = $true
}
@@ -749,7 +756,6 @@ while ($true) {
}
"3" {
Connect-LocalInferenceEngine
# Only exit the loop if user didn't press "b" to go back
if ($engine_choice -ne "b" -and $engine_choice -ne "B") {
$exitLoop = $true
}
@@ -757,20 +763,25 @@ while ($true) {
}
"4" {
Connect-CloudAPIProvider
# Only exit the loop if user didn't press "b" to go back
if ($provider_choice -ne "b" -and $provider_choice -ne "B") {
$exitLoop = $true
}
break
}
"5" {
Write-Host ""
Write-ColorText "You have selected to build images locally. This is recommended for developers or if you want to test local changes." -ForegroundColor "Yellow"
$COMPOSE_FILE = $COMPOSE_FILE_LOCAL
Use-DocsPublicAPIEndpoint
$exitLoop = $true
break
}
default {
Write-Host ""
Write-ColorText "Invalid choice. Please choose 1-4." -ForegroundColor "Red"
Write-ColorText "Invalid choice. Please choose 1-5." -ForegroundColor "Red"
Start-Sleep -Seconds 1
}
}
# Only break out of the loop if a function completed successfully
if ($exitLoop) {
break
}

View File

@@ -9,7 +9,8 @@ NC='\033[0m'
BOLD='\033[1m'
# Base Compose file (relative to script location)
COMPOSE_FILE="$(dirname "$(readlink -f "$0")")/deployment/docker-compose.yaml"
COMPOSE_FILE="$(dirname "$(readlink -f "$0")")/deployment/docker-compose-hub.yaml"
COMPOSE_FILE_LOCAL="$(dirname "$(readlink -f "$0")")/deployment/docker-compose.yaml"
ENV_FILE="$(dirname "$(readlink -f "$0")")/.env"
# Animation function
@@ -111,12 +112,15 @@ check_and_start_docker() {
prompt_main_menu() {
echo -e "\n${DEFAULT_FG}${BOLD}Welcome to DocsGPT Setup!${NC}"
echo -e "${DEFAULT_FG}How would you like to proceed?${NC}"
echo -e "${YELLOW}1) Use DocsGPT Public API Endpoint (simple and free)${NC}"
echo -e "${YELLOW}1) Use DocsGPT Public API Endpoint (simple and free, uses pre-built Docker images from Docker Hub for fastest setup)${NC}"
echo -e "${YELLOW}2) Serve Local (with Ollama)${NC}"
echo -e "${YELLOW}3) Connect Local Inference Engine${NC}"
echo -e "${YELLOW}4) Connect Cloud API Provider${NC}"
echo -e "${YELLOW}5) Advanced: Build images locally (for developers)${NC}"
echo
read -p "$(echo -e "${DEFAULT_FG}Choose option (1-4): ${NC}")" main_choice
echo -e "${DEFAULT_FG}By default, DocsGPT uses pre-built images from Docker Hub for a fast, reliable, and consistent experience. This avoids local build errors and speeds up onboarding. Advanced users can choose to build images locally if needed.${NC}"
echo
read -p "$(echo -e "${DEFAULT_FG}Choose option (1-5): ${NC}")" main_choice
}
# Function to prompt for Local Inference Engine options
@@ -176,7 +180,7 @@ use_docs_public_api_endpoint() {
check_and_start_docker
echo -e "\n${NC}Starting Docker Compose...${NC}"
docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" build && docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" up -d
docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" pull && docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" up -d
docker_compose_status=$? # Capture exit status of docker compose
echo "Docker Compose Exit Status: $docker_compose_status"
@@ -252,7 +256,7 @@ serve_local_ollama() {
)
echo -e "\n${NC}Starting Docker Compose with Ollama (${docker_compose_file_suffix})...${NC}"
docker compose --env-file "${ENV_FILE}" "${compose_files[@]}" build
docker compose --env-file "${ENV_FILE}" "${compose_files[@]}" pull
docker compose --env-file "${ENV_FILE}" "${compose_files[@]}" up -d
docker_compose_status=$?
@@ -360,7 +364,7 @@ connect_local_inference_engine() {
check_and_start_docker
echo -e "\n${NC}Starting Docker Compose...${NC}"
docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" build && docker compose -f "${COMPOSE_FILE}" up -d
docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" pull && docker compose -f "${COMPOSE_FILE}" up -d
docker_compose_status=$?
echo "Docker Compose Exit Status: $docker_compose_status" # Debug output
@@ -449,7 +453,7 @@ connect_cloud_api_provider() {
check_and_start_docker
echo -e "\n${NC}Starting Docker Compose...${NC}"
docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" up -d --build
docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" pull && docker compose --env-file "${ENV_FILE}" -f "${COMPOSE_FILE}" up -d
docker_compose_status=$?
echo "Docker Compose Exit Status: $docker_compose_status" # Debug output
@@ -468,12 +472,14 @@ connect_cloud_api_provider() {
# Main script execution
animate_dino
while true; do # Main menu loop
clear # Clear screen before showing main menu again
prompt_main_menu
case $main_choice in
1) # Use DocsGPT Public API Endpoint
1) # Use DocsGPT Public API Endpoint (Docker Hub images)
COMPOSE_FILE="$(dirname "$(readlink -f "$0")")/deployment/docker-compose-hub.yaml"
use_docs_public_api_endpoint
break ;;
2) # Serve Local (with Ollama)
@@ -485,8 +491,13 @@ while true; do # Main menu loop
4) # Connect Cloud API Provider
connect_cloud_api_provider
break ;;
5) # Advanced: Build images locally
echo -e "\n${YELLOW}You have selected to build images locally. This is recommended for developers or if you want to test local changes.${NC}"
COMPOSE_FILE="$COMPOSE_FILE_LOCAL"
use_docs_public_api_endpoint
break ;;
*)
echo -e "\n${RED}Invalid choice. Please choose 1-4.${NC}" ; sleep 1 ;;
echo -e "\n${RED}Invalid choice. Please choose 1-5.${NC}" ; sleep 1 ;;
esac
done