diff --git a/application/api/connector/routes.py b/application/api/connector/routes.py index 1647aa78..09b6c0c9 100644 --- a/application/api/connector/routes.py +++ b/application/api/connector/routes.py @@ -172,7 +172,7 @@ class ConnectorSources(Resource): return make_response(jsonify({"success": False}), 401) user = decoded_token.get("sub") try: - sources = sources_collection.find({"user": user, "type": "connector"}).sort("date", -1) + sources = sources_collection.find({"user": user, "type": "connector:file"}).sort("date", -1) connector_sources = [] for source in sources: connector_sources.append({ @@ -289,8 +289,7 @@ class ConnectorsCallback(Resource): "access_token": token_info.get("access_token"), "refresh_token": token_info.get("refresh_token"), "token_uri": token_info.get("token_uri"), - "expiry": token_info.get("expiry"), - "scopes": token_info.get("scopes") + "expiry": token_info.get("expiry") } user_id = request.decoded_token.get("sub") if getattr(request, "decoded_token", None) else None @@ -338,8 +337,15 @@ class ConnectorRefresh(Resource): @connectors_ns.route("/api/connectors/files") class ConnectorFiles(Resource): - @api.expect(api.model("ConnectorFilesModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True), "folder_id": fields.String(required=False), "limit": fields.Integer(required=False), "page_token": fields.String(required=False)})) - @api.doc(description="List files from a connector provider (supports pagination)") + @api.expect(api.model("ConnectorFilesModel", { + "provider": fields.String(required=True), + "session_token": fields.String(required=True), + "folder_id": fields.String(required=False), + "limit": fields.Integer(required=False), + "page_token": fields.String(required=False), + "search_query": fields.String(required=False) + })) + @api.doc(description="List files from a connector provider (supports pagination and search)") def post(self): try: data = request.get_json() @@ -348,10 +354,11 @@ class ConnectorFiles(Resource): folder_id = data.get('folder_id') limit = data.get('limit', 10) page_token = data.get('page_token') + search_query = data.get('search_query') + if not provider or not session_token: return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400) - decoded_token = request.decoded_token if not decoded_token: return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401) @@ -361,13 +368,17 @@ class ConnectorFiles(Resource): return make_response(jsonify({"success": False, "error": "Invalid or unauthorized session"}), 401) loader = ConnectorCreator.create_connector(provider, session_token) - documents = loader.load_data({ + input_config = { 'limit': limit, 'list_only': True, 'session_token': session_token, 'folder_id': folder_id, 'page_token': page_token - }) + } + if search_query: + input_config['search_query'] = search_query + + documents = loader.load_data(input_config) files = [] for doc in documents[:limit]: @@ -385,13 +396,20 @@ class ConnectorFiles(Resource): 'name': metadata.get('file_name', 'Unknown File'), 'type': metadata.get('mime_type', 'unknown'), 'size': metadata.get('size', None), - 'modifiedTime': formatted_time + 'modifiedTime': formatted_time, + 'isFolder': metadata.get('is_folder', False) }) next_token = getattr(loader, 'next_page_token', None) has_more = bool(next_token) - return make_response(jsonify({"success": True, "files": files, "total": len(files), "next_page_token": next_token, "has_more": has_more}), 200) + return make_response(jsonify({ + "success": True, + "files": files, + "total": len(files), + "next_page_token": next_token, + "has_more": has_more + }), 200) except Exception as e: current_app.logger.error(f"Error loading connector files: {e}") return make_response(jsonify({"success": False, "error": f"Failed to load files: {str(e)}"}), 500) @@ -400,7 +418,7 @@ class ConnectorFiles(Resource): @connectors_ns.route("/api/connectors/validate-session") class ConnectorValidateSession(Resource): @api.expect(api.model("ConnectorValidateSessionModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True)})) - @api.doc(description="Validate connector session token and return user info") + @api.doc(description="Validate connector session token and return user info and access token") def post(self): try: data = request.get_json() @@ -409,7 +427,6 @@ class ConnectorValidateSession(Resource): if not provider or not session_token: return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400) - decoded_token = request.decoded_token if not decoded_token: return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401) @@ -423,10 +440,36 @@ class ConnectorValidateSession(Resource): auth = ConnectorCreator.create_auth(provider) is_expired = auth.is_token_expired(token_info) + if is_expired and token_info.get('refresh_token'): + try: + refreshed_token_info = auth.refresh_access_token(token_info.get('refresh_token')) + sanitized_token_info = { + "access_token": refreshed_token_info.get("access_token"), + "refresh_token": refreshed_token_info.get("refresh_token"), + "token_uri": refreshed_token_info.get("token_uri"), + "expiry": refreshed_token_info.get("expiry") + } + sessions_collection.update_one( + {"session_token": session_token}, + {"$set": {"token_info": sanitized_token_info}} + ) + token_info = sanitized_token_info + is_expired = False + except Exception as refresh_error: + current_app.logger.error(f"Failed to refresh token: {refresh_error}") + + if is_expired: + return make_response(jsonify({ + "success": False, + "expired": True, + "error": "Session token has expired. Please reconnect." + }), 401) + return make_response(jsonify({ "success": True, - "expired": is_expired, - "user_email": session.get('user_email', 'Connected User') + "expired": False, + "user_email": session.get('user_email', 'Connected User'), + "access_token": token_info.get('access_token') }), 200) except Exception as e: current_app.logger.error(f"Error validating connector session: {e}") diff --git a/application/parser/connectors/google_drive/auth.py b/application/parser/connectors/google_drive/auth.py index 37d55dcc..c282279e 100644 --- a/application/parser/connectors/google_drive/auth.py +++ b/application/parser/connectors/google_drive/auth.py @@ -17,8 +17,7 @@ class GoogleDriveAuth(BaseConnectorAuth): """ SCOPES = [ - 'https://www.googleapis.com/auth/drive.readonly', - 'https://www.googleapis.com/auth/drive.metadata.readonly' + 'https://www.googleapis.com/auth/drive.file' ] def __init__(self): @@ -50,7 +49,7 @@ class GoogleDriveAuth(BaseConnectorAuth): authorization_url, _ = flow.authorization_url( access_type='offline', prompt='consent', - include_granted_scopes='true', + include_granted_scopes='false', state=state ) diff --git a/application/parser/connectors/google_drive/loader.py b/application/parser/connectors/google_drive/loader.py index 07219344..c96a08be 100644 --- a/application/parser/connectors/google_drive/loader.py +++ b/application/parser/connectors/google_drive/loader.py @@ -32,6 +32,10 @@ class GoogleDriveLoader(BaseConnectorLoader): 'text/plain': '.txt', 'text/csv': '.csv', 'text/html': '.html', + 'text/markdown': '.md', + 'text/x-rst': '.rst', + 'application/json': '.json', + 'application/epub+zip': '.epub', 'application/rtf': '.rtf', 'image/jpeg': '.jpg', 'image/jpg': '.jpg', @@ -120,6 +124,7 @@ class GoogleDriveLoader(BaseConnectorLoader): list_only = inputs.get('list_only', False) load_content = not list_only page_token = inputs.get('page_token') + search_query = inputs.get('search_query') self.next_page_token = None if file_ids: @@ -128,12 +133,18 @@ class GoogleDriveLoader(BaseConnectorLoader): try: doc = self._load_file_by_id(file_id, load_content=load_content) if doc: - documents.append(doc) + if not search_query or ( + search_query.lower() in doc.extra_info.get('file_name', '').lower() + ): + documents.append(doc) elif hasattr(self, '_credential_refreshed') and self._credential_refreshed: self._credential_refreshed = False logging.info(f"Retrying load of file {file_id} after credential refresh") doc = self._load_file_by_id(file_id, load_content=load_content) - if doc: + if doc and ( + not search_query or + search_query.lower() in doc.extra_info.get('file_name', '').lower() + ): documents.append(doc) except Exception as e: logging.error(f"Error loading file {file_id}: {e}") @@ -141,7 +152,13 @@ class GoogleDriveLoader(BaseConnectorLoader): else: # Browsing mode: list immediate children of provided folder or root parent_id = folder_id if folder_id else 'root' - documents = self._list_items_in_parent(parent_id, limit=limit, load_content=load_content, page_token=page_token) + documents = self._list_items_in_parent( + parent_id, + limit=limit, + load_content=load_content, + page_token=page_token, + search_query=search_query + ) logging.info(f"Loaded {len(documents)} documents from Google Drive") return documents @@ -184,13 +201,18 @@ class GoogleDriveLoader(BaseConnectorLoader): return None - def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None) -> List[Document]: + def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None, search_query: Optional[str] = None) -> List[Document]: self._ensure_service() documents: List[Document] = [] try: query = f"'{parent_id}' in parents and trashed=false" + + if search_query: + safe_search = search_query.replace("'", "\\'") + query += f" and name contains '{safe_search}'" + next_token_out: Optional[str] = None while True: @@ -205,7 +227,8 @@ class GoogleDriveLoader(BaseConnectorLoader): q=query, fields='nextPageToken,files(id,name,mimeType,size,createdTime,modifiedTime,parents)', pageToken=page_token, - pageSize=page_size + pageSize=page_size, + orderBy='name' ).execute() items = results.get('files', []) diff --git a/application/worker.py b/application/worker.py index 10fb6c2b..5a29d00a 100755 --- a/application/worker.py +++ b/application/worker.py @@ -978,13 +978,13 @@ def ingest_connector( "tokens": tokens, "retriever": retriever, "id": str(id), - "type": "connector", + "type": "connector:file", "remote_data": json.dumps({ "provider": source_type, **api_source_config }), "directory_structure": json.dumps(directory_structure), - "sync_frequency": sync_frequency + "sync_frequency": sync_frequency } if operation_mode == "sync": diff --git a/deployment/docker-compose-hub.yaml b/deployment/docker-compose-hub.yaml index ddfaf26c..fe6a6bcb 100644 --- a/deployment/docker-compose-hub.yaml +++ b/deployment/docker-compose-hub.yaml @@ -6,6 +6,7 @@ services: environment: - VITE_API_HOST=http://localhost:7091 - VITE_API_STREAMING=$VITE_API_STREAMING + - VITE_GOOGLE_CLIENT_ID=$VITE_GOOGLE_CLIENT_ID ports: - "5173:5173" depends_on: diff --git a/deployment/docker-compose.yaml b/deployment/docker-compose.yaml index f1181469..2eef387b 100644 --- a/deployment/docker-compose.yaml +++ b/deployment/docker-compose.yaml @@ -7,6 +7,7 @@ services: environment: - VITE_API_HOST=http://localhost:7091 - VITE_API_STREAMING=$VITE_API_STREAMING + - VITE_GOOGLE_CLIENT_ID=$VITE_GOOGLE_CLIENT_ID ports: - "5173:5173" depends_on: diff --git a/docs/pages/Guides/Integrations/_meta.json b/docs/pages/Guides/Integrations/_meta.json new file mode 100644 index 00000000..4b77ab8d --- /dev/null +++ b/docs/pages/Guides/Integrations/_meta.json @@ -0,0 +1,6 @@ +{ + "google-drive-connector": { + "title": "🔗 Google Drive", + "href": "/Guides/Integrations/google-drive-connector" + } +} diff --git a/docs/pages/Guides/Integrations/google-drive-connector.mdx b/docs/pages/Guides/Integrations/google-drive-connector.mdx new file mode 100644 index 00000000..70d002a2 --- /dev/null +++ b/docs/pages/Guides/Integrations/google-drive-connector.mdx @@ -0,0 +1,212 @@ +--- +title: Google Drive Connector +description: Connect your Google Drive as an external knowledge base to upload and process files directly from your Google Drive account. +--- + +import { Callout } from 'nextra/components' +import { Steps } from 'nextra/components' + +# Google Drive Connector + +The Google Drive Connector allows you to seamlessly connect your Google Drive account as an external knowledge base. This integration enables you to upload and process files directly from your Google Drive without manually downloading and uploading them to DocsGPT. + +## Features + +- **Direct File Access**: Browse and select files directly from your Google Drive +- **Comprehensive File Support**: Supports all major document formats including: + - Google Workspace files (Docs, Sheets, Slides) + - Microsoft Office files (.docx, .xlsx, .pptx, .doc, .ppt, .xls) + - PDF documents + - Text files (.txt, .md, .rst, .html, .rtf) + - Data files (.csv, .json) + - Image files (.png, .jpg, .jpeg) + - E-books (.epub) +- **Secure Authentication**: Uses OAuth 2.0 for secure access to your Google Drive +- **Real-time Sync**: Process files directly from Google Drive without local downloads + + +The Google Drive Connector requires proper configuration of Google API credentials. Follow the setup instructions below to enable this feature. + + +## Prerequisites + +Before setting up the Google Drive Connector, you'll need: + +1. A Google Cloud Platform (GCP) project +2. Google Drive API enabled +3. OAuth 2.0 credentials configured +4. DocsGPT instance with proper environment variables + +## Setup Instructions + + + +### Step 1: Create a Google Cloud Project + +1. Go to the [Google Cloud Console](https://console.cloud.google.com/) +2. Create a new project or select an existing one +3. Note down your Project ID for later use + +### Step 2: Enable Google Drive API + +1. In the Google Cloud Console, navigate to **APIs & Services** > **Library** +2. Search for "Google Drive API" +3. Click on "Google Drive API" and click **Enable** + +### Step 3: Create OAuth 2.0 Credentials + +1. Go to **APIs & Services** > **Credentials** +2. Click **Create Credentials** > **OAuth client ID** +3. If prompted, configure the OAuth consent screen: + - Choose **External** user type (unless you're using Google Workspace) + - Fill in the required fields (App name, User support email, Developer contact) + - Add your domain to **Authorized domains** if deploying publicly +4. For Application type, select **Web application** +5. Add your DocsGPT frontend URL to **Authorized JavaScript origins**: + - For local development: `http://localhost:3000` + - For production: `https://yourdomain.com` +6. Add your DocsGPT callback URL to **Authorized redirect URIs**: + - For local development: `http://localhost:7091/api/connectors/callback?provider=google_drive` + - For production: `https://yourdomain.com/api/connectors/callback?provider=google_drive` +7. Click **Create** and note down the **Client ID** and **Client Secret** + + + +### Step 4: Configure Backend Environment Variables + +Add the following environment variables to your backend configuration: + +**For Docker deployment**, add to your `.env` file in the root directory: + +```env +# Google Drive Connector Configuration +GOOGLE_CLIENT_ID=your_google_client_id_here +GOOGLE_CLIENT_SECRET=your_google_client_secret_here +``` + +**For manual deployment**, set these environment variables in your system or application configuration. + +### Step 5: Configure Frontend Environment Variables + +Add the following environment variables to your frontend `.env` file: + +```env +# Google Drive Frontend Configuration +VITE_GOOGLE_CLIENT_ID=your_google_client_id_here +``` + + +Make sure to use the same Google Client ID in both backend and frontend configurations. + + +### Step 6: Restart Your Application + +After configuring the environment variables: + +1. **For Docker**: Restart your Docker containers + ```bash + docker-compose down + docker-compose up -d + ``` + +2. **For manual deployment**: Restart both backend and frontend services + + + +## Using the Google Drive Connector + +Once configured, you can use the Google Drive Connector to upload files: + + + +### Step 1: Access the Upload Interface + +1. Navigate to the DocsGPT interface +2. Go to the upload/training section +3. You should now see "Google Drive" as an available upload option + +### Step 2: Connect Your Google Account + +1. Select "Google Drive" as your upload method +2. Click "Connect to Google Drive" +3. You'll be redirected to Google's OAuth consent screen +4. Grant the necessary permissions to DocsGPT +5. You'll be redirected back to DocsGPT with a successful connection + +### Step 3: Select Files + +1. Once connected, click "Select Files" +2. The Google Drive picker will open +3. Browse your Google Drive and select the files you want to process +4. Click "Select" to confirm your choices + +### Step 4: Process Files + +1. Review your selected files +2. Click "Train" or "Upload" to process the files +3. DocsGPT will download and process the files from your Google Drive +4. Once processing is complete, the files will be available in your knowledge base + + + +## Supported File Types + +The Google Drive Connector supports the following file types: + +| File Type | Extensions | Description | +|-----------|------------|-------------| +| **Google Workspace** | - | Google Docs, Sheets, Slides (automatically converted) | +| **Microsoft Office** | .docx, .xlsx, .pptx | Modern Office formats | +| **Legacy Office** | .doc, .ppt, .xls | Older Office formats | +| **PDF Documents** | .pdf | Portable Document Format | +| **Text Files** | .txt, .md, .rst, .html, .rtf | Various text formats | +| **Data Files** | .csv, .json | Structured data formats | +| **Images** | .png, .jpg, .jpeg | Image files (with OCR if enabled) | +| **E-books** | .epub | Electronic publication format | + +## Troubleshooting + +### Common Issues + +**"Google Drive option not appearing"** +- Verify that `VITE_GOOGLE_CLIENT_ID` is set in frontend environment +- Check that `VITE_GOOGLE_CLIENT_ID` environment variable is present in your frontend configuration +- Check browser console for any JavaScript errors +- Ensure the frontend has been restarted after adding environment variables + +**"Authentication failed"** +- Verify that your OAuth 2.0 credentials are correctly configured +- Check that the redirect URI `http:///api/connectors/callback?provider=google_drive` is correctly added in GCP console +- Ensure the Google Drive API is enabled in your GCP project + +**"Permission denied" errors** +- Verify that the OAuth consent screen is properly configured +- Check that your Google account has access to the files you're trying to select +- Ensure the required scopes are granted during authentication + +**"Files not processing"** +- Check that the backend environment variables are correctly set +- Verify that the OAuth credentials have the necessary permissions +- Check the backend logs for any error messages + +### Environment Variable Checklist + +**Backend (.env in root directory):** +- ✅ `GOOGLE_CLIENT_ID` +- ✅ `GOOGLE_CLIENT_SECRET` + +**Frontend (.env in frontend directory):** +- ✅ `VITE_GOOGLE_CLIENT_ID` + +### Security Considerations + +- Keep your Google Client Secret secure and never expose it in frontend code +- Regularly rotate your OAuth credentials +- Use HTTPS in production to protect authentication tokens +- Ensure proper OAuth consent screen configuration for production use + + +For production deployments, make sure to add your actual domain to the OAuth consent screen and authorized origins/redirect URIs. + + + diff --git a/docs/pages/Guides/_meta.json b/docs/pages/Guides/_meta.json index a88202d1..065cd8d8 100644 --- a/docs/pages/Guides/_meta.json +++ b/docs/pages/Guides/_meta.json @@ -20,5 +20,8 @@ "Architecture": { "title": "🏗️ Architecture", "href": "/Guides/Architecture" + }, + "Integrations": { + "title": "🔗 Integrations" } } \ No newline at end of file diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 934f9e57..5a746dcb 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -21,6 +21,7 @@ "react-chartjs-2": "^5.3.0", "react-dom": "^19.0.0", "react-dropzone": "^14.3.8", + "react-google-drive-picker": "^1.2.2", "react-i18next": "^15.4.0", "react-markdown": "^9.0.1", "react-redux": "^9.2.0", @@ -9382,6 +9383,16 @@ "react": ">= 16.8 || 18.0.0" } }, + "node_modules/react-google-drive-picker": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/react-google-drive-picker/-/react-google-drive-picker-1.2.2.tgz", + "integrity": "sha512-x30mYkt9MIwPCgL+fyK75HZ8E6G5L/WGW0bfMG6kbD4NG2kmdlmV9oH5lPa6P6d46y9hj5Y3btAMrZd4JRRkSA==", + "license": "MIT", + "peerDependencies": { + "react": ">=17.0.0", + "react-dom": ">=17.0.0" + } + }, "node_modules/react-i18next": { "version": "15.4.0", "resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-15.4.0.tgz", diff --git a/frontend/package.json b/frontend/package.json index 3b869b2d..fe6ce59f 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -32,6 +32,7 @@ "react-chartjs-2": "^5.3.0", "react-dom": "^19.0.0", "react-dropzone": "^14.3.8", + "react-google-drive-picker": "^1.2.2", "react-i18next": "^15.4.0", "react-markdown": "^9.0.1", "react-redux": "^9.2.0", diff --git a/frontend/src/Navigation.tsx b/frontend/src/Navigation.tsx index 2adaa69b..aed38181 100644 --- a/frontend/src/Navigation.tsx +++ b/frontend/src/Navigation.tsx @@ -10,7 +10,7 @@ import Add from './assets/add.svg'; import DocsGPT3 from './assets/cute_docsgpt3.svg'; import Discord from './assets/discord.svg'; import Expand from './assets/expand.svg'; -import Github from './assets/github.svg'; +import Github from './assets/git_nav.svg'; import Hamburger from './assets/hamburger.svg'; import openNewChat from './assets/openNewChat.svg'; import Pin from './assets/pin.svg'; @@ -568,6 +568,8 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { > Join Discord community @@ -581,8 +583,10 @@ export default function Navigation({ navOpen, setNavOpen }: NavigationProps) { > Follow us on Twitter diff --git a/frontend/src/assets/crawler.svg b/frontend/src/assets/crawler.svg new file mode 100644 index 00000000..863405ad --- /dev/null +++ b/frontend/src/assets/crawler.svg @@ -0,0 +1,3 @@ + + + diff --git a/frontend/src/assets/drive.svg b/frontend/src/assets/drive.svg new file mode 100644 index 00000000..8ec4dc9b --- /dev/null +++ b/frontend/src/assets/drive.svg @@ -0,0 +1,3 @@ + + + diff --git a/frontend/src/assets/file_upload.svg b/frontend/src/assets/file_upload.svg index 159b580d..cb945f1a 100644 --- a/frontend/src/assets/file_upload.svg +++ b/frontend/src/assets/file_upload.svg @@ -1,3 +1,10 @@ - - - \ No newline at end of file + + + + + + + + + + diff --git a/frontend/src/assets/git_nav.svg b/frontend/src/assets/git_nav.svg new file mode 100644 index 00000000..2de0d6eb --- /dev/null +++ b/frontend/src/assets/git_nav.svg @@ -0,0 +1,3 @@ + + + diff --git a/frontend/src/assets/github.svg b/frontend/src/assets/github.svg index 40fc238c..db294339 100644 --- a/frontend/src/assets/github.svg +++ b/frontend/src/assets/github.svg @@ -1,5 +1,3 @@ - - github - - + + diff --git a/frontend/src/assets/reddit.svg b/frontend/src/assets/reddit.svg new file mode 100644 index 00000000..705ef638 --- /dev/null +++ b/frontend/src/assets/reddit.svg @@ -0,0 +1,4 @@ + + + + diff --git a/frontend/src/assets/url.svg b/frontend/src/assets/url.svg new file mode 100644 index 00000000..e51397f0 --- /dev/null +++ b/frontend/src/assets/url.svg @@ -0,0 +1,3 @@ + + + diff --git a/frontend/src/components/ConnectorAuth.tsx b/frontend/src/components/ConnectorAuth.tsx index 61b6e895..af7e6540 100644 --- a/frontend/src/components/ConnectorAuth.tsx +++ b/frontend/src/components/ConnectorAuth.tsx @@ -1,5 +1,6 @@ import React, { useRef } from 'react'; import { useSelector } from 'react-redux'; +import { useDarkTheme } from '../hooks'; import { selectToken } from '../preferences/preferenceSlice'; interface ConnectorAuthProps { @@ -7,22 +8,24 @@ interface ConnectorAuthProps { onSuccess: (data: { session_token: string; user_email: string }) => void; onError: (error: string) => void; label?: string; + isConnected?: boolean; + userEmail?: string; + onDisconnect?: () => void; + errorMessage?: string; } -const providerLabel = (provider: string) => { - const map: Record = { - google_drive: 'Google Drive', - }; - return map[provider] || provider.replace(/_/g, ' '); -}; - const ConnectorAuth: React.FC = ({ provider, onSuccess, onError, label, + isConnected = false, + userEmail = '', + onDisconnect, + errorMessage, }) => { const token = useSelector(selectToken); + const [isDarkTheme] = useDarkTheme(); const completedRef = useRef(false); const intervalRef = useRef(null); @@ -36,12 +39,8 @@ const ConnectorAuth: React.FC = ({ const handleAuthMessage = (event: MessageEvent) => { const successGeneric = event.data?.type === 'connector_auth_success'; - const successProvider = - event.data?.type === `${provider}_auth_success` || - event.data?.type === 'google_drive_auth_success'; - const errorProvider = - event.data?.type === `${provider}_auth_error` || - event.data?.type === 'google_drive_auth_error'; + const successProvider = event.data?.type === `${provider}_auth_success`; + const errorProvider = event.data?.type === `${provider}_auth_error`; if (successGeneric || successProvider) { completedRef.current = true; @@ -109,22 +108,58 @@ const ConnectorAuth: React.FC = ({ } }; - const buttonLabel = label || `Connect ${providerLabel(provider)}`; - return ( - + <> + {errorMessage && ( +
+ + + + + + {errorMessage} + +
+ )} + + {isConnected ? ( +
+
+
+ + + + Connected as {userEmail} +
+ {onDisconnect && ( + + )} +
+
+ ) : ( + + )} + ); }; -export default ConnectorAuth; +export default ConnectorAuth; \ No newline at end of file diff --git a/frontend/src/components/ConnectorTreeComponent.tsx b/frontend/src/components/ConnectorTreeComponent.tsx index 9249145c..73cf6ae0 100644 --- a/frontend/src/components/ConnectorTreeComponent.tsx +++ b/frontend/src/components/ConnectorTreeComponent.tsx @@ -3,8 +3,10 @@ import { useTranslation } from 'react-i18next'; import { useSelector } from 'react-redux'; import { formatBytes } from '../utils/stringUtils'; import { selectToken } from '../preferences/preferenceSlice'; +import { ActiveState } from '../models/misc'; import Chunks from './Chunks'; import ContextMenu, { MenuOption } from './ContextMenu'; +import ConfirmationModal from '../modals/ConfirmationModal'; import userService from '../api/services/userService'; import FileIcon from '../assets/file.svg'; import FolderIcon from '../assets/folder.svg'; @@ -12,7 +14,17 @@ import ArrowLeft from '../assets/arrow-left.svg'; import ThreeDots from '../assets/three-dots.svg'; import EyeView from '../assets/eye-view.svg'; import SyncIcon from '../assets/sync.svg'; +import CheckmarkIcon from '../assets/checkMark2.svg'; import { useOutsideAlerter } from '../hooks'; +import { + Table, + TableContainer, + TableHead, + TableBody, + TableRow, + TableHeader, + TableCell, +} from './Table'; interface FileNode { type?: string; @@ -64,6 +76,7 @@ const ConnectorTreeComponent: React.FC = ({ const [syncProgress, setSyncProgress] = useState(0); const [sourceProvider, setSourceProvider] = useState(''); const [syncDone, setSyncDone] = useState(false); + const [syncConfirmationModal, setSyncConfirmationModal] = useState('INACTIVE'); useOutsideAlerter( searchDropdownRef, @@ -343,7 +356,7 @@ const ConnectorTreeComponent: React.FC = ({ {/* Sync button */} + + ))} + + +
+ Select Files from {getProviderConfig(provider).displayName} +
+ +
+ handleSearchChange(e.target.value)} + colorVariant="silver" + borderVariant="thin" + labelBgClassName="bg-[#EEE6FF78] dark:bg-[#2A262E]" + leftIcon={Search} + /> +
+ + {/* Selected Files Message */} +
+ {selectedFiles.length + selectedFolders.length} selected +
+ + +
+ + {( + <> + + + + + Name + Last Modified + Size + + + + {files.map((file, index) => ( + { + if (isFolder(file)) { + handleFolderClick(file.id, file.name); + } else { + handleFileSelect(file.id, false); + } + }} + > + +
{ + e.stopPropagation(); + handleFileSelect(file.id, isFolder(file)); + }} + > + {(isFolder(file) ? selectedFolders : selectedFiles).includes(file.id) && ( + Selected + )} +
+
+ +
+
+ {isFolder(file) +
+ {file.name} +
+
+ + {formatDate(file.modifiedTime)} + + + {file.size ? formatBytes(file.size) : '-'} + +
+ ))} +
+
+ + {isLoading && ( +
+
+
+ Loading more files... +
+
+ )} + + )} +
+
+ + + )} + + ); +}; diff --git a/frontend/src/components/FileTreeComponent.tsx b/frontend/src/components/FileTreeComponent.tsx index 724ca233..32b0839b 100644 --- a/frontend/src/components/FileTreeComponent.tsx +++ b/frontend/src/components/FileTreeComponent.tsx @@ -14,6 +14,15 @@ import EyeView from '../assets/eye-view.svg'; import Trash from '../assets/red-trash.svg'; import { useOutsideAlerter } from '../hooks'; import ConfirmationModal from '../modals/ConfirmationModal'; +import { + Table, + TableContainer, + TableHead, + TableBody, + TableRow, + TableHeader, + TableCell, +} from './Table'; interface FileNode { type?: string; @@ -533,32 +542,31 @@ const FileTreeComponent: React.FC = ({ const parentRow = currentPath.length > 0 ? [ - - -
- {t('settings.sources.parentFolderAlt')} - - .. - -
- - - - - - - - - - - , - ] + + +
+ {t('settings.sources.parentFolderAlt')} + + .. + +
+
+ + - + + + - + + +
, + ] : []; // Render directories first, then files @@ -570,32 +578,31 @@ const FileTreeComponent: React.FC = ({ const dirStats = calculateDirectoryStats(node as DirectoryStructure); return ( - navigateToDirectory(name)} > - +
{t('settings.sources.folderAlt')} - + {name}
- - +
+ + {dirStats.totalSize > 0 ? formatBytes(dirStats.totalSize) : '-'} + + {dirStats.totalTokens > 0 ? dirStats.totalTokens.toLocaleString() : '-'} - - - {dirStats.totalSize > 0 ? formatBytes(dirStats.totalSize) : '-'} - - + +
- - +
+ ); }), ...files.map(([name, node]) => { @@ -628,30 +635,29 @@ const FileTreeComponent: React.FC = ({ const menuRef = getMenuRef(itemId); return ( - handleFileClick(name)} > - +
{t('settings.sources.fileAlt')} - + {name}
- - - {node.token_count?.toLocaleString() || '-'} - - +
+ {node.size_bytes ? formatBytes(node.size_bytes) : '-'} - - + + + {node.token_count?.toLocaleString() || '-'} + +
- - +
+ ); }), ]; @@ -828,31 +834,31 @@ const FileTreeComponent: React.FC = ({
{renderPathNavigation()}
-
- - - -
+ + + + + {t('settings.sources.fileName')} - - - - - - - + + + + {renderFileTree(currentDirectory)} - -
- {t('settings.sources.tokens')} - + + {t('settings.sources.size')} - + + + {t('settings.sources.tokens')} + + {t('settings.sources.actions')} -
- + +
+
)} diff --git a/frontend/src/components/GoogleDrivePicker.tsx b/frontend/src/components/GoogleDrivePicker.tsx new file mode 100644 index 00000000..5112789c --- /dev/null +++ b/frontend/src/components/GoogleDrivePicker.tsx @@ -0,0 +1,342 @@ +import React, { useState, useEffect } from 'react'; +import useDrivePicker from 'react-google-drive-picker'; + +import ConnectorAuth from './ConnectorAuth'; +import { getSessionToken, setSessionToken, removeSessionToken } from '../utils/providerUtils'; + + +interface PickerFile { + id: string; + name: string; + mimeType: string; + iconUrl: string; + description?: string; + sizeBytes?: string; +} + +interface GoogleDrivePickerProps { + token: string | null; + onSelectionChange: (fileIds: string[], folderIds?: string[]) => void; +} + +const GoogleDrivePicker: React.FC = ({ + token, + onSelectionChange, +}) => { + const [selectedFiles, setSelectedFiles] = useState([]); + const [selectedFolders, setSelectedFolders] = useState([]); + const [isLoading, setIsLoading] = useState(false); + const [userEmail, setUserEmail] = useState(''); + const [isConnected, setIsConnected] = useState(false); + const [authError, setAuthError] = useState(''); + const [accessToken, setAccessToken] = useState(null); + const [isValidating, setIsValidating] = useState(false); + + const [openPicker] = useDrivePicker(); + + useEffect(() => { + const sessionToken = getSessionToken('google_drive'); + if (sessionToken) { + setIsValidating(true); + setIsConnected(true); // Optimistically set as connected for skeleton + validateSession(sessionToken); + } + }, [token]); + + const validateSession = async (sessionToken: string) => { + try { + const apiHost = import.meta.env.VITE_API_HOST; + const validateResponse = await fetch(`${apiHost}/api/connectors/validate-session`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${token}` + }, + body: JSON.stringify({ provider: 'google_drive', session_token: sessionToken }) + }); + + if (!validateResponse.ok) { + setIsConnected(false); + setAuthError('Session expired. Please reconnect to Google Drive.'); + setIsValidating(false); + return false; + } + + const validateData = await validateResponse.json(); + if (validateData.success) { + setUserEmail(validateData.user_email || 'Connected User'); + setIsConnected(true); + setAuthError(''); + setAccessToken(validateData.access_token || null); + setIsValidating(false); + return true; + } else { + setIsConnected(false); + setAuthError(validateData.error || 'Session expired. Please reconnect your account.'); + setIsValidating(false); + return false; + } + } catch (error) { + console.error('Error validating session:', error); + setAuthError('Failed to validate session. Please reconnect.'); + setIsConnected(false); + setIsValidating(false); + return false; + } + }; + + const handleOpenPicker = async () => { + setIsLoading(true); + + const sessionToken = getSessionToken('google_drive'); + + if (!sessionToken) { + setAuthError('No valid session found. Please reconnect to Google Drive.'); + setIsLoading(false); + return; + } + + if (!accessToken) { + setAuthError('No access token available. Please reconnect to Google Drive.'); + setIsLoading(false); + return; + } + + try { + const clientId: string = import.meta.env.VITE_GOOGLE_CLIENT_ID; + + // Derive appId from clientId (extract numeric part before first dash) + const appId = clientId ? clientId.split('-')[0] : null; + + if (!clientId || !appId) { + console.error('Missing Google Drive configuration'); + + setIsLoading(false); + return; + } + + openPicker({ + clientId: clientId, + developerKey: "", + appId: appId, + setSelectFolderEnabled: false, + viewId: "DOCS", + showUploadView: false, + showUploadFolders: false, + supportDrives: false, + multiselect: true, + token: accessToken, + viewMimeTypes: 'application/vnd.google-apps.document,application/vnd.google-apps.presentation,application/vnd.google-apps.spreadsheet,application/pdf,application/vnd.openxmlformats-officedocument.wordprocessingml.document,application/vnd.openxmlformats-officedocument.presentationml.presentation,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/msword,application/vnd.ms-powerpoint,application/vnd.ms-excel,text/plain,text/csv,text/html,text/markdown,text/x-rst,application/json,application/epub+zip,application/rtf,image/jpeg,image/jpg,image/png', + callbackFunction: (data:any) => { + setIsLoading(false); + if (data.action === 'picked') { + const docs = data.docs; + + const newFiles: PickerFile[] = []; + const newFolders: PickerFile[] = []; + + docs.forEach((doc: any) => { + const item = { + id: doc.id, + name: doc.name, + mimeType: doc.mimeType, + iconUrl: doc.iconUrl || '', + description: doc.description, + sizeBytes: doc.sizeBytes + }; + + if (doc.mimeType === 'application/vnd.google-apps.folder') { + newFolders.push(item); + } else { + newFiles.push(item); + } + }); + + setSelectedFiles(prevFiles => { + const existingFileIds = new Set(prevFiles.map(file => file.id)); + const uniqueNewFiles = newFiles.filter(file => !existingFileIds.has(file.id)); + return [...prevFiles, ...uniqueNewFiles]; + }); + + setSelectedFolders(prevFolders => { + const existingFolderIds = new Set(prevFolders.map(folder => folder.id)); + const uniqueNewFolders = newFolders.filter(folder => !existingFolderIds.has(folder.id)); + return [...prevFolders, ...uniqueNewFolders]; + }); + onSelectionChange( + [...selectedFiles, ...newFiles].map(file => file.id), + [...selectedFolders, ...newFolders].map(folder => folder.id) + ); + } + }, + }); + } catch (error) { + console.error('Error opening picker:', error); + setAuthError('Failed to open file picker. Please try again.'); + setIsLoading(false); + } + }; + + const handleDisconnect = async () => { + const sessionToken = getSessionToken('google_drive'); + if (sessionToken) { + try { + const apiHost = import.meta.env.VITE_API_HOST; + await fetch(`${apiHost}/api/connectors/disconnect`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${token}` + }, + body: JSON.stringify({ provider: 'google_drive', session_token: sessionToken }) + }); + } catch (err) { + console.error('Error disconnecting from Google Drive:', err); + } + } + + removeSessionToken('google_drive'); + setIsConnected(false); + setSelectedFiles([]); + setSelectedFolders([]); + setAccessToken(null); + setUserEmail(''); + setAuthError(''); + onSelectionChange([], []); + }; + + const ConnectedStateSkeleton = () => ( +
+
+
+
+
+
+
+
+
+ ); + + const FilesSectionSkeleton = () => ( +
+
+
+
+
+
+
+
+
+ ); + + return ( +
+ {isValidating ? ( + <> + + + + ) : ( + <> + { + setUserEmail(data.user_email || 'Connected User'); + setIsConnected(true); + setAuthError(''); + + if (data.session_token) { + setSessionToken('google_drive', data.session_token); + validateSession(data.session_token); + } + }} + onError={(error) => { + setAuthError(error); + setIsConnected(false); + }} + isConnected={isConnected} + userEmail={userEmail} + onDisconnect={handleDisconnect} + errorMessage={authError} + /> + + {isConnected && ( +
+
+
+

Selected Files

+ +
+ + {selectedFiles.length === 0 && selectedFolders.length === 0 ? ( +

No files or folders selected

+ ) : ( +
+ {selectedFolders.length > 0 && ( +
+

Folders

+ {selectedFolders.map((folder) => ( +
+ Folder + {folder.name} + +
+ ))} +
+ )} + + {selectedFiles.length > 0 && ( +
+

Files

+ {selectedFiles.map((file) => ( +
+ File + {file.name} + +
+ ))} +
+ )} +
+ )} +
+
+ )} + + )} +
+ ); +}; + +export default GoogleDrivePicker; diff --git a/frontend/src/components/Input.tsx b/frontend/src/components/Input.tsx index dbc517cb..b9f9d93a 100644 --- a/frontend/src/components/Input.tsx +++ b/frontend/src/components/Input.tsx @@ -16,6 +16,7 @@ const Input = ({ textSize = 'medium', children, labelBgClassName = 'bg-white dark:bg-raisin-black', + leftIcon, onChange, onPaste, onKeyDown, @@ -42,7 +43,7 @@ const Input = ({
{children} + {leftIcon && ( +
+ {leftIcon} +
+ )} {placeholder && (
@@ -1194,51 +815,21 @@ function Upload({ ); } - useEffect(() => { - const scrollContainer = scrollContainerRef.current; + - const handleScroll = () => { - if (!scrollContainer) return; - - const { scrollTop, scrollHeight, clientHeight } = scrollContainer; - const isNearBottom = scrollHeight - scrollTop - clientHeight < 50; - - if (isNearBottom && hasMoreFiles && !isLoadingFiles && nextPageToken) { - const sessionToken = getSessionToken(ingestor.type); - if (sessionToken) { - loadGoogleDriveFiles( - sessionToken, - currentFolderId, - nextPageToken, - true, - ); - } - } - }; - - scrollContainer?.addEventListener('scroll', handleScroll); - - return () => { - scrollContainer?.removeEventListener('scroll', handleScroll); - }; - }, [ - hasMoreFiles, - isLoadingFiles, - nextPageToken, - currentFolderId, - ingestor.type, - ]); + return ( { close(); - setDocName(''); + setIngestor({ type: null, name: '', config: {} }); setfiles([]); setModalState('INACTIVE'); - setActiveTab(null); }} + className="w-11/12 sm:w-auto sm:min-w-[600px] md:min-w-[700px] max-h-[90vh] sm:max-h-none" + contentClassName="max-h-[80vh] sm:max-h-none" > {view} diff --git a/frontend/src/upload/types/ingestor.ts b/frontend/src/upload/types/ingestor.ts index e915b8e2..e9342d2d 100644 --- a/frontend/src/upload/types/ingestor.ts +++ b/frontend/src/upload/types/ingestor.ts @@ -1,50 +1,16 @@ -export interface BaseIngestorConfig { - [key: string]: string | number | boolean | undefined; -} +import CrawlerIcon from '../../assets/crawler.svg'; +import FileUploadIcon from '../../assets/file_upload.svg'; +import UrlIcon from '../../assets/url.svg'; +import GithubIcon from '../../assets/github.svg'; +import RedditIcon from '../../assets/reddit.svg'; +import DriveIcon from '../../assets/drive.svg'; -export interface RedditIngestorConfig extends BaseIngestorConfig { - client_id: string; - client_secret: string; - user_agent: string; - search_queries: string; - number_posts: number; -} - -export interface GithubIngestorConfig extends BaseIngestorConfig { - repo_url: string; -} - -export interface CrawlerIngestorConfig extends BaseIngestorConfig { - url: string; -} - -export interface UrlIngestorConfig extends BaseIngestorConfig { - url: string; -} - -export interface GoogleDriveIngestorConfig extends BaseIngestorConfig { - folder_id?: string; - file_ids?: string; - recursive?: boolean; - token_info?: any; -} - -export type IngestorType = - | 'crawler' - | 'github' - | 'reddit' - | 'url' - | 'google_drive'; +export type IngestorType = 'crawler' | 'github' | 'reddit' | 'url' | 'google_drive' | 'local_file'; export interface IngestorConfig { - type: IngestorType; + type: IngestorType | null; name: string; - config: - | RedditIngestorConfig - | GithubIngestorConfig - | CrawlerIngestorConfig - | UrlIngestorConfig - | GoogleDriveIngestorConfig; + config: Record; } export type IngestorFormData = { @@ -54,7 +20,7 @@ export type IngestorFormData = { data: string; }; -export type FieldType = 'string' | 'number' | 'enum' | 'boolean'; +export type FieldType = 'string' | 'number' | 'enum' | 'boolean' | 'local_file_picker' | 'remote_file_picker' | 'google_drive_picker'; export interface FormField { name: string; @@ -65,89 +31,82 @@ export interface FormField { options?: { label: string; value: string }[]; } -export const IngestorFormSchemas: Record = { - crawler: [ - { - name: 'url', - label: 'URL', - type: 'string', - required: true, - }, - ], - url: [ - { - name: 'url', - label: 'URL', - type: 'string', - required: true, - }, - ], - reddit: [ - { - name: 'client_id', - label: 'Client ID', - type: 'string', - required: true, - }, - { - name: 'client_secret', - label: 'Client Secret', - type: 'string', - required: true, - }, - { - name: 'user_agent', - label: 'User Agent', - type: 'string', - required: true, - }, - { - name: 'search_queries', - label: 'Search Queries', - type: 'string', - required: true, - }, - { - name: 'number_posts', - label: 'Number of Posts', - type: 'number', - required: true, - }, - ], - github: [ - { - name: 'repo_url', - label: 'Repository URL', - type: 'string', - required: true, - }, - ], - google_drive: [ - { - name: 'recursive', - label: 'Include subfolders', - type: 'boolean', - required: false, - }, - ], -}; +export interface IngestorSchema { + key: IngestorType; + label: string; + icon: string; + heading: string; + validate?: () => boolean; + fields: FormField[]; +} -export const IngestorDefaultConfigs: Record< - IngestorType, - Omit -> = { - crawler: { - name: '', - config: { - url: '', - } as CrawlerIngestorConfig, +export const IngestorFormSchemas: IngestorSchema[] = [ + { + key: 'local_file', + label: 'Upload File', + icon: FileUploadIcon, + heading: 'Upload new document', + fields: [ + { name: 'files', label: 'Select files', type: 'local_file_picker', required: true }, + ] }, - url: { - name: '', - config: { - url: '', - } as UrlIngestorConfig, + { + key: 'crawler', + label: 'Crawler', + icon: CrawlerIcon, + heading: 'Add content with Web Crawler', + fields: [{ name: 'url', label: 'URL', type: 'string', required: true }] }, + { + key: 'url', + label: 'Link', + icon: UrlIcon, + heading: 'Add content from URL', + fields: [{ name: 'url', label: 'URL', type: 'string', required: true }] + }, + { + key: 'github', + label: 'GitHub', + icon: GithubIcon, + heading: 'Add content from GitHub', + fields: [{ name: 'repo_url', label: 'Repository URL', type: 'string', required: true }] + }, + { + key: 'reddit', + label: 'Reddit', + icon: RedditIcon, + heading: 'Add content from Reddit', + fields: [ + { name: 'client_id', label: 'Client ID', type: 'string', required: true }, + { name: 'client_secret', label: 'Client Secret', type: 'string', required: true }, + { name: 'user_agent', label: 'User Agent', type: 'string', required: true }, + { name: 'search_queries', label: 'Search Queries', type: 'string', required: true }, + { name: 'number_posts', label: 'Number of Posts', type: 'number', required: true }, + ] + }, + { + key: 'google_drive', + label: 'Google Drive', + icon: DriveIcon, + heading: 'Upload from Google Drive', + validate: () => { + const googleClientId = import.meta.env.VITE_GOOGLE_CLIENT_ID; + return !!(googleClientId); + }, + fields: [ + { + name: 'files', + label: 'Select Files from Google Drive', + type: 'google_drive_picker', + required: true, + } + ] + }, +]; + +export const IngestorDefaultConfigs: Record> = { + crawler: { name: '', config: { url: '' } }, + url: { name: '', config: { url: '' } }, reddit: { name: '', config: { @@ -155,21 +114,30 @@ export const IngestorDefaultConfigs: Record< client_secret: '', user_agent: '', search_queries: '', - number_posts: 10, - } as RedditIngestorConfig, - }, - github: { - name: '', - config: { - repo_url: '', - } as GithubIngestorConfig, + number_posts: 10 + } }, + github: { name: '', config: { repo_url: '' } }, google_drive: { name: '', config: { - folder_id: '', file_ids: '', - recursive: true, - } as GoogleDriveIngestorConfig, + folder_ids: '', + recursive: true + } }, + local_file: { name: '', config: { files: [] } }, }; + +export interface IngestorOption { + label: string; + value: IngestorType; + icon: string; + heading: string; +} + +export const getIngestorSchema = (key: IngestorType): IngestorSchema | undefined => { + return IngestorFormSchemas.find(schema => schema.key === key); +}; + +