diff --git a/README.md b/README.md index 90bc7736..8d431881 100644 --- a/README.md +++ b/README.md @@ -55,9 +55,11 @@ - [x] Agent optimisations (May 2025) - [x] Filesystem sources update (July 2025) - [x] Json Responses (August 2025) -- [ ] Sharepoint integration (August 2025) -- [ ] MCP support (August 2025) -- [ ] Add OAuth 2.0 authentication for tools and sources (August 2025) +- [x] MCP support (August 2025) +- [x] Google Drive integration (September 2025) +- [ ] Add OAuth 2.0 authentication for MCP (September 2025) +- [ ] Sharepoint integration (October 2025) +- [ ] Deep Agents (October 2025) - [ ] Agent scheduling You can find our full roadmap [here](https://github.com/orgs/arc53/projects/2). Please don't hesitate to contribute or create issues, it helps us improve DocsGPT! diff --git a/application/agents/base.py b/application/agents/base.py index 77729fe6..134de1c3 100644 --- a/application/agents/base.py +++ b/application/agents/base.py @@ -264,7 +264,15 @@ class BaseAgent(ABC): query: str, retrieved_data: List[Dict], ) -> List[Dict]: - docs_together = "\n".join([doc["text"] for doc in retrieved_data]) + docs_with_filenames = [] + for doc in retrieved_data: + filename = doc.get("filename") or doc.get("title") or doc.get("source") + if filename: + chunk_header = str(filename) + docs_with_filenames.append(f"{chunk_header}\n{doc['text']}") + else: + docs_with_filenames.append(doc["text"]) + docs_together = "\n\n".join(docs_with_filenames) p_chat_combine = system_prompt.replace("{summaries}", docs_together) messages_combine = [{"role": "system", "content": p_chat_combine}] diff --git a/application/api/connector/routes.py b/application/api/connector/routes.py index 1647aa78..49307058 100644 --- a/application/api/connector/routes.py +++ b/application/api/connector/routes.py @@ -1,5 +1,7 @@ +import base64 import datetime import json +import uuid from bson.objectid import ObjectId @@ -13,8 +15,6 @@ from flask import ( from flask_restx import fields, Namespace, Resource - - from application.api.user.tasks import ( ingest_connector_task, ) @@ -172,7 +172,7 @@ class ConnectorSources(Resource): return make_response(jsonify({"success": False}), 401) user = decoded_token.get("sub") try: - sources = sources_collection.find({"user": user, "type": "connector"}).sort("date", -1) + sources = sources_collection.find({"user": user, "type": "connector:file"}).sort("date", -1) connector_sources = [] for source in sources: connector_sources.append({ @@ -234,8 +234,24 @@ class ConnectorAuth(Resource): if not ConnectorCreator.is_supported(provider): return make_response(jsonify({"success": False, "error": f"Unsupported provider: {provider}"}), 400) - import uuid - state = str(uuid.uuid4()) + decoded_token = request.decoded_token + if not decoded_token: + return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401) + user_id = decoded_token.get('sub') + + now = datetime.datetime.now(datetime.timezone.utc) + result = sessions_collection.insert_one({ + "provider": provider, + "user": user_id, + "status": "pending", + "created_at": now + }) + state_dict = { + "provider": provider, + "object_id": str(result.inserted_id) + } + state = base64.urlsafe_b64encode(json.dumps(state_dict).encode()).decode() + auth = ConnectorCreator.create_auth(provider) authorization_url = auth.get_authorization_url(state=state) return make_response(jsonify({ @@ -256,25 +272,30 @@ class ConnectorsCallback(Resource): try: from application.parser.connectors.connector_creator import ConnectorCreator from flask import request, redirect - import uuid - provider = request.args.get('provider', 'google_drive') authorization_code = request.args.get('code') - _ = request.args.get('state') + state = request.args.get('state') error = request.args.get('error') + state_dict = json.loads(base64.urlsafe_b64decode(state.encode()).decode()) + provider = state_dict["provider"] + state_object_id = state_dict["object_id"] + if error: - return redirect(f"/api/connectors/callback-status?status=error&message=OAuth+error:+{error}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.&provider={provider}") + if error == "access_denied": + return redirect(f"/api/connectors/callback-status?status=cancelled&message=Authentication+was+cancelled.+You+can+try+again+if+you'd+like+to+connect+your+account.&provider={provider}") + else: + current_app.logger.warning(f"OAuth error in callback: {error}") + return redirect(f"/api/connectors/callback-status?status=error&message=Authentication+failed.+Please+try+again+and+make+sure+to+grant+all+requested+permissions.&provider={provider}") if not authorization_code: - return redirect(f"/api/connectors/callback-status?status=error&message=Authorization+code+not+provided.+Please+complete+the+authorization+process+and+make+sure+to+grant+offline+access.&provider={provider}") + return redirect(f"/api/connectors/callback-status?status=error&message=Authentication+failed.+Please+try+again+and+make+sure+to+grant+all+requested+permissions.&provider={provider}") try: auth = ConnectorCreator.create_auth(provider) token_info = auth.exchange_code_for_tokens(authorization_code) session_token = str(uuid.uuid4()) - try: credentials = auth.create_credentials_from_token_info(token_info) @@ -289,30 +310,31 @@ class ConnectorsCallback(Resource): "access_token": token_info.get("access_token"), "refresh_token": token_info.get("refresh_token"), "token_uri": token_info.get("token_uri"), - "expiry": token_info.get("expiry"), - "scopes": token_info.get("scopes") + "expiry": token_info.get("expiry") } - user_id = request.decoded_token.get("sub") if getattr(request, "decoded_token", None) else None - sessions_collection.insert_one({ - "session_token": session_token, - "user": user_id, - "token_info": sanitized_token_info, - "created_at": datetime.datetime.now(datetime.timezone.utc), - "user_email": user_email, - "provider": provider - }) + sessions_collection.find_one_and_update( + {"_id": ObjectId(state_object_id), "provider": provider}, + { + "$set": { + "session_token": session_token, + "token_info": sanitized_token_info, + "user_email": user_email, + "status": "authorized" + } + } + ) # Redirect to success page with session token and user email return redirect(f"/api/connectors/callback-status?status=success&message=Authentication+successful&provider={provider}&session_token={session_token}&user_email={user_email}") except Exception as e: current_app.logger.error(f"Error exchanging code for tokens: {str(e)}", exc_info=True) - return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+exchange+authorization+code+for+tokens:+{str(e)}&provider={provider}") + return redirect(f"/api/connectors/callback-status?status=error&message=Authentication+failed.+Please+try+again+and+make+sure+to+grant+all+requested+permissions.&provider={provider}") except Exception as e: current_app.logger.error(f"Error handling connector callback: {e}") - return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+complete+connector+authentication:+{str(e)}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.") + return redirect("/api/connectors/callback-status?status=error&message=Authentication+failed.+Please+try+again+and+make+sure+to+grant+all+requested+permissions.") @connectors_ns.route("/api/connectors/refresh") @@ -338,8 +360,15 @@ class ConnectorRefresh(Resource): @connectors_ns.route("/api/connectors/files") class ConnectorFiles(Resource): - @api.expect(api.model("ConnectorFilesModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True), "folder_id": fields.String(required=False), "limit": fields.Integer(required=False), "page_token": fields.String(required=False)})) - @api.doc(description="List files from a connector provider (supports pagination)") + @api.expect(api.model("ConnectorFilesModel", { + "provider": fields.String(required=True), + "session_token": fields.String(required=True), + "folder_id": fields.String(required=False), + "limit": fields.Integer(required=False), + "page_token": fields.String(required=False), + "search_query": fields.String(required=False) + })) + @api.doc(description="List files from a connector provider (supports pagination and search)") def post(self): try: data = request.get_json() @@ -348,10 +377,11 @@ class ConnectorFiles(Resource): folder_id = data.get('folder_id') limit = data.get('limit', 10) page_token = data.get('page_token') + search_query = data.get('search_query') + if not provider or not session_token: return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400) - decoded_token = request.decoded_token if not decoded_token: return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401) @@ -361,13 +391,17 @@ class ConnectorFiles(Resource): return make_response(jsonify({"success": False, "error": "Invalid or unauthorized session"}), 401) loader = ConnectorCreator.create_connector(provider, session_token) - documents = loader.load_data({ + input_config = { 'limit': limit, 'list_only': True, 'session_token': session_token, 'folder_id': folder_id, 'page_token': page_token - }) + } + if search_query: + input_config['search_query'] = search_query + + documents = loader.load_data(input_config) files = [] for doc in documents[:limit]: @@ -385,13 +419,20 @@ class ConnectorFiles(Resource): 'name': metadata.get('file_name', 'Unknown File'), 'type': metadata.get('mime_type', 'unknown'), 'size': metadata.get('size', None), - 'modifiedTime': formatted_time + 'modifiedTime': formatted_time, + 'isFolder': metadata.get('is_folder', False) }) next_token = getattr(loader, 'next_page_token', None) has_more = bool(next_token) - return make_response(jsonify({"success": True, "files": files, "total": len(files), "next_page_token": next_token, "has_more": has_more}), 200) + return make_response(jsonify({ + "success": True, + "files": files, + "total": len(files), + "next_page_token": next_token, + "has_more": has_more + }), 200) except Exception as e: current_app.logger.error(f"Error loading connector files: {e}") return make_response(jsonify({"success": False, "error": f"Failed to load files: {str(e)}"}), 500) @@ -400,7 +441,7 @@ class ConnectorFiles(Resource): @connectors_ns.route("/api/connectors/validate-session") class ConnectorValidateSession(Resource): @api.expect(api.model("ConnectorValidateSessionModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True)})) - @api.doc(description="Validate connector session token and return user info") + @api.doc(description="Validate connector session token and return user info and access token") def post(self): try: data = request.get_json() @@ -409,7 +450,6 @@ class ConnectorValidateSession(Resource): if not provider or not session_token: return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400) - decoded_token = request.decoded_token if not decoded_token: return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401) @@ -423,10 +463,36 @@ class ConnectorValidateSession(Resource): auth = ConnectorCreator.create_auth(provider) is_expired = auth.is_token_expired(token_info) + if is_expired and token_info.get('refresh_token'): + try: + refreshed_token_info = auth.refresh_access_token(token_info.get('refresh_token')) + sanitized_token_info = { + "access_token": refreshed_token_info.get("access_token"), + "refresh_token": refreshed_token_info.get("refresh_token"), + "token_uri": refreshed_token_info.get("token_uri"), + "expiry": refreshed_token_info.get("expiry") + } + sessions_collection.update_one( + {"session_token": session_token}, + {"$set": {"token_info": sanitized_token_info}} + ) + token_info = sanitized_token_info + is_expired = False + except Exception as refresh_error: + current_app.logger.error(f"Failed to refresh token: {refresh_error}") + + if is_expired: + return make_response(jsonify({ + "success": False, + "expired": True, + "error": "Session token has expired. Please reconnect." + }), 401) + return make_response(jsonify({ "success": True, - "expired": is_expired, - "user_email": session.get('user_email', 'Connected User') + "expired": False, + "user_email": session.get('user_email', 'Connected User'), + "access_token": token_info.get('access_token') }), 200) except Exception as e: current_app.logger.error(f"Error validating connector session: {e}") @@ -586,20 +652,23 @@ class ConnectorCallbackStatus(Resource): .container {{ max-width: 600px; margin: 0 auto; }} .success {{ color: #4CAF50; }} .error {{ color: #F44336; }} + .cancelled {{ color: #FF9800; }}