mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
Merge branch 'refine/mcp-tool' of https://github.com/siiddhantt/DocsGPT into pr/1964
This commit is contained in:
@@ -264,7 +264,15 @@ class BaseAgent(ABC):
|
||||
query: str,
|
||||
retrieved_data: List[Dict],
|
||||
) -> List[Dict]:
|
||||
docs_together = "\n".join([doc["text"] for doc in retrieved_data])
|
||||
docs_with_filenames = []
|
||||
for doc in retrieved_data:
|
||||
filename = doc.get("filename") or doc.get("title") or doc.get("source")
|
||||
if filename:
|
||||
chunk_header = str(filename)
|
||||
docs_with_filenames.append(f"{chunk_header}\n{doc['text']}")
|
||||
else:
|
||||
docs_with_filenames.append(doc["text"])
|
||||
docs_together = "\n\n".join(docs_with_filenames)
|
||||
p_chat_combine = system_prompt.replace("{summaries}", docs_together)
|
||||
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
||||
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import base64
|
||||
import datetime
|
||||
import json
|
||||
import uuid
|
||||
|
||||
|
||||
from bson.objectid import ObjectId
|
||||
@@ -13,8 +15,6 @@ from flask import (
|
||||
from flask_restx import fields, Namespace, Resource
|
||||
|
||||
|
||||
|
||||
|
||||
from application.api.user.tasks import (
|
||||
ingest_connector_task,
|
||||
)
|
||||
@@ -172,7 +172,7 @@ class ConnectorSources(Resource):
|
||||
return make_response(jsonify({"success": False}), 401)
|
||||
user = decoded_token.get("sub")
|
||||
try:
|
||||
sources = sources_collection.find({"user": user, "type": "connector"}).sort("date", -1)
|
||||
sources = sources_collection.find({"user": user, "type": "connector:file"}).sort("date", -1)
|
||||
connector_sources = []
|
||||
for source in sources:
|
||||
connector_sources.append({
|
||||
@@ -234,8 +234,24 @@ class ConnectorAuth(Resource):
|
||||
if not ConnectorCreator.is_supported(provider):
|
||||
return make_response(jsonify({"success": False, "error": f"Unsupported provider: {provider}"}), 400)
|
||||
|
||||
import uuid
|
||||
state = str(uuid.uuid4())
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
|
||||
user_id = decoded_token.get('sub')
|
||||
|
||||
now = datetime.datetime.now(datetime.timezone.utc)
|
||||
result = sessions_collection.insert_one({
|
||||
"provider": provider,
|
||||
"user": user_id,
|
||||
"status": "pending",
|
||||
"created_at": now
|
||||
})
|
||||
state_dict = {
|
||||
"provider": provider,
|
||||
"object_id": str(result.inserted_id)
|
||||
}
|
||||
state = base64.urlsafe_b64encode(json.dumps(state_dict).encode()).decode()
|
||||
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
authorization_url = auth.get_authorization_url(state=state)
|
||||
return make_response(jsonify({
|
||||
@@ -256,25 +272,30 @@ class ConnectorsCallback(Resource):
|
||||
try:
|
||||
from application.parser.connectors.connector_creator import ConnectorCreator
|
||||
from flask import request, redirect
|
||||
import uuid
|
||||
|
||||
provider = request.args.get('provider', 'google_drive')
|
||||
authorization_code = request.args.get('code')
|
||||
_ = request.args.get('state')
|
||||
state = request.args.get('state')
|
||||
error = request.args.get('error')
|
||||
|
||||
state_dict = json.loads(base64.urlsafe_b64decode(state.encode()).decode())
|
||||
provider = state_dict["provider"]
|
||||
state_object_id = state_dict["object_id"]
|
||||
|
||||
if error:
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=OAuth+error:+{error}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.&provider={provider}")
|
||||
if error == "access_denied":
|
||||
return redirect(f"/api/connectors/callback-status?status=cancelled&message=Authentication+was+cancelled.+You+can+try+again+if+you'd+like+to+connect+your+account.&provider={provider}")
|
||||
else:
|
||||
current_app.logger.warning(f"OAuth error in callback: {error}")
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Authentication+failed.+Please+try+again+and+make+sure+to+grant+all+requested+permissions.&provider={provider}")
|
||||
|
||||
if not authorization_code:
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Authorization+code+not+provided.+Please+complete+the+authorization+process+and+make+sure+to+grant+offline+access.&provider={provider}")
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Authentication+failed.+Please+try+again+and+make+sure+to+grant+all+requested+permissions.&provider={provider}")
|
||||
|
||||
try:
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
token_info = auth.exchange_code_for_tokens(authorization_code)
|
||||
|
||||
session_token = str(uuid.uuid4())
|
||||
|
||||
|
||||
try:
|
||||
credentials = auth.create_credentials_from_token_info(token_info)
|
||||
@@ -289,30 +310,31 @@ class ConnectorsCallback(Resource):
|
||||
"access_token": token_info.get("access_token"),
|
||||
"refresh_token": token_info.get("refresh_token"),
|
||||
"token_uri": token_info.get("token_uri"),
|
||||
"expiry": token_info.get("expiry"),
|
||||
"scopes": token_info.get("scopes")
|
||||
"expiry": token_info.get("expiry")
|
||||
}
|
||||
|
||||
user_id = request.decoded_token.get("sub") if getattr(request, "decoded_token", None) else None
|
||||
sessions_collection.insert_one({
|
||||
"session_token": session_token,
|
||||
"user": user_id,
|
||||
"token_info": sanitized_token_info,
|
||||
"created_at": datetime.datetime.now(datetime.timezone.utc),
|
||||
"user_email": user_email,
|
||||
"provider": provider
|
||||
})
|
||||
sessions_collection.find_one_and_update(
|
||||
{"_id": ObjectId(state_object_id), "provider": provider},
|
||||
{
|
||||
"$set": {
|
||||
"session_token": session_token,
|
||||
"token_info": sanitized_token_info,
|
||||
"user_email": user_email,
|
||||
"status": "authorized"
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
# Redirect to success page with session token and user email
|
||||
return redirect(f"/api/connectors/callback-status?status=success&message=Authentication+successful&provider={provider}&session_token={session_token}&user_email={user_email}")
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error exchanging code for tokens: {str(e)}", exc_info=True)
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+exchange+authorization+code+for+tokens:+{str(e)}&provider={provider}")
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Authentication+failed.+Please+try+again+and+make+sure+to+grant+all+requested+permissions.&provider={provider}")
|
||||
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error handling connector callback: {e}")
|
||||
return redirect(f"/api/connectors/callback-status?status=error&message=Failed+to+complete+connector+authentication:+{str(e)}.+Please+try+again+and+make+sure+to+grant+all+requested+permissions,+including+offline+access.")
|
||||
return redirect("/api/connectors/callback-status?status=error&message=Authentication+failed.+Please+try+again+and+make+sure+to+grant+all+requested+permissions.")
|
||||
|
||||
|
||||
@connectors_ns.route("/api/connectors/refresh")
|
||||
@@ -338,8 +360,15 @@ class ConnectorRefresh(Resource):
|
||||
|
||||
@connectors_ns.route("/api/connectors/files")
|
||||
class ConnectorFiles(Resource):
|
||||
@api.expect(api.model("ConnectorFilesModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True), "folder_id": fields.String(required=False), "limit": fields.Integer(required=False), "page_token": fields.String(required=False)}))
|
||||
@api.doc(description="List files from a connector provider (supports pagination)")
|
||||
@api.expect(api.model("ConnectorFilesModel", {
|
||||
"provider": fields.String(required=True),
|
||||
"session_token": fields.String(required=True),
|
||||
"folder_id": fields.String(required=False),
|
||||
"limit": fields.Integer(required=False),
|
||||
"page_token": fields.String(required=False),
|
||||
"search_query": fields.String(required=False)
|
||||
}))
|
||||
@api.doc(description="List files from a connector provider (supports pagination and search)")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
@@ -348,10 +377,11 @@ class ConnectorFiles(Resource):
|
||||
folder_id = data.get('folder_id')
|
||||
limit = data.get('limit', 10)
|
||||
page_token = data.get('page_token')
|
||||
search_query = data.get('search_query')
|
||||
|
||||
if not provider or not session_token:
|
||||
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
|
||||
|
||||
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
|
||||
@@ -361,13 +391,17 @@ class ConnectorFiles(Resource):
|
||||
return make_response(jsonify({"success": False, "error": "Invalid or unauthorized session"}), 401)
|
||||
|
||||
loader = ConnectorCreator.create_connector(provider, session_token)
|
||||
documents = loader.load_data({
|
||||
input_config = {
|
||||
'limit': limit,
|
||||
'list_only': True,
|
||||
'session_token': session_token,
|
||||
'folder_id': folder_id,
|
||||
'page_token': page_token
|
||||
})
|
||||
}
|
||||
if search_query:
|
||||
input_config['search_query'] = search_query
|
||||
|
||||
documents = loader.load_data(input_config)
|
||||
|
||||
files = []
|
||||
for doc in documents[:limit]:
|
||||
@@ -385,13 +419,20 @@ class ConnectorFiles(Resource):
|
||||
'name': metadata.get('file_name', 'Unknown File'),
|
||||
'type': metadata.get('mime_type', 'unknown'),
|
||||
'size': metadata.get('size', None),
|
||||
'modifiedTime': formatted_time
|
||||
'modifiedTime': formatted_time,
|
||||
'isFolder': metadata.get('is_folder', False)
|
||||
})
|
||||
|
||||
next_token = getattr(loader, 'next_page_token', None)
|
||||
has_more = bool(next_token)
|
||||
|
||||
return make_response(jsonify({"success": True, "files": files, "total": len(files), "next_page_token": next_token, "has_more": has_more}), 200)
|
||||
return make_response(jsonify({
|
||||
"success": True,
|
||||
"files": files,
|
||||
"total": len(files),
|
||||
"next_page_token": next_token,
|
||||
"has_more": has_more
|
||||
}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error loading connector files: {e}")
|
||||
return make_response(jsonify({"success": False, "error": f"Failed to load files: {str(e)}"}), 500)
|
||||
@@ -400,7 +441,7 @@ class ConnectorFiles(Resource):
|
||||
@connectors_ns.route("/api/connectors/validate-session")
|
||||
class ConnectorValidateSession(Resource):
|
||||
@api.expect(api.model("ConnectorValidateSessionModel", {"provider": fields.String(required=True), "session_token": fields.String(required=True)}))
|
||||
@api.doc(description="Validate connector session token and return user info")
|
||||
@api.doc(description="Validate connector session token and return user info and access token")
|
||||
def post(self):
|
||||
try:
|
||||
data = request.get_json()
|
||||
@@ -409,7 +450,6 @@ class ConnectorValidateSession(Resource):
|
||||
if not provider or not session_token:
|
||||
return make_response(jsonify({"success": False, "error": "provider and session_token are required"}), 400)
|
||||
|
||||
|
||||
decoded_token = request.decoded_token
|
||||
if not decoded_token:
|
||||
return make_response(jsonify({"success": False, "error": "Unauthorized"}), 401)
|
||||
@@ -423,10 +463,36 @@ class ConnectorValidateSession(Resource):
|
||||
auth = ConnectorCreator.create_auth(provider)
|
||||
is_expired = auth.is_token_expired(token_info)
|
||||
|
||||
if is_expired and token_info.get('refresh_token'):
|
||||
try:
|
||||
refreshed_token_info = auth.refresh_access_token(token_info.get('refresh_token'))
|
||||
sanitized_token_info = {
|
||||
"access_token": refreshed_token_info.get("access_token"),
|
||||
"refresh_token": refreshed_token_info.get("refresh_token"),
|
||||
"token_uri": refreshed_token_info.get("token_uri"),
|
||||
"expiry": refreshed_token_info.get("expiry")
|
||||
}
|
||||
sessions_collection.update_one(
|
||||
{"session_token": session_token},
|
||||
{"$set": {"token_info": sanitized_token_info}}
|
||||
)
|
||||
token_info = sanitized_token_info
|
||||
is_expired = False
|
||||
except Exception as refresh_error:
|
||||
current_app.logger.error(f"Failed to refresh token: {refresh_error}")
|
||||
|
||||
if is_expired:
|
||||
return make_response(jsonify({
|
||||
"success": False,
|
||||
"expired": True,
|
||||
"error": "Session token has expired. Please reconnect."
|
||||
}), 401)
|
||||
|
||||
return make_response(jsonify({
|
||||
"success": True,
|
||||
"expired": is_expired,
|
||||
"user_email": session.get('user_email', 'Connected User')
|
||||
"expired": False,
|
||||
"user_email": session.get('user_email', 'Connected User'),
|
||||
"access_token": token_info.get('access_token')
|
||||
}), 200)
|
||||
except Exception as e:
|
||||
current_app.logger.error(f"Error validating connector session: {e}")
|
||||
@@ -586,20 +652,23 @@ class ConnectorCallbackStatus(Resource):
|
||||
.container {{ max-width: 600px; margin: 0 auto; }}
|
||||
.success {{ color: #4CAF50; }}
|
||||
.error {{ color: #F44336; }}
|
||||
.cancelled {{ color: #FF9800; }}
|
||||
</style>
|
||||
<script>
|
||||
window.onload = function() {{
|
||||
const status = "{status}";
|
||||
const sessionToken = "{session_token}";
|
||||
const userEmail = "{user_email}";
|
||||
|
||||
|
||||
if (status === "success" && window.opener) {{
|
||||
window.opener.postMessage({{
|
||||
type: '{provider}_auth_success',
|
||||
session_token: sessionToken,
|
||||
user_email: userEmail
|
||||
}}, '*');
|
||||
|
||||
|
||||
setTimeout(() => window.close(), 3000);
|
||||
}} else if (status === "cancelled" || status === "error") {{
|
||||
setTimeout(() => window.close(), 3000);
|
||||
}}
|
||||
}};
|
||||
@@ -612,7 +681,7 @@ class ConnectorCallbackStatus(Resource):
|
||||
<p>{message}</p>
|
||||
{f'<p>Connected as: {user_email}</p>' if status == 'success' else ''}
|
||||
</div>
|
||||
<p><small>You can close this window. {f"Your {provider.replace('_', ' ').title()} is now connected and ready to use." if status == 'success' else ''}</small></p>
|
||||
<p><small>You can close this window. {f"Your {provider.replace('_', ' ').title()} is now connected and ready to use." if status == 'success' else "Feel free to close this window."}</small></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -43,8 +43,7 @@ class Settings(BaseSettings):
|
||||
# Google Drive integration
|
||||
GOOGLE_CLIENT_ID: Optional[str] = None # Replace with your actual Google OAuth client ID
|
||||
GOOGLE_CLIENT_SECRET: Optional[str] = None# Replace with your actual Google OAuth client secret
|
||||
CONNECTOR_REDIRECT_BASE_URI: Optional[str] = "http://127.0.0.1:7091/api/connectors/callback"
|
||||
##append ?provider={provider_name} in your Provider console like http://127.0.0.1:7091/api/connectors/callback?provider=google_drive
|
||||
CONNECTOR_REDIRECT_BASE_URI: Optional[str] = "http://127.0.0.1:7091/api/connectors/callback" ##add redirect url as it is to your provider's console(gcp)
|
||||
|
||||
|
||||
# LLM Cache
|
||||
|
||||
@@ -17,14 +17,13 @@ class GoogleDriveAuth(BaseConnectorAuth):
|
||||
"""
|
||||
|
||||
SCOPES = [
|
||||
'https://www.googleapis.com/auth/drive.readonly',
|
||||
'https://www.googleapis.com/auth/drive.metadata.readonly'
|
||||
'https://www.googleapis.com/auth/drive.file'
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
self.client_id = settings.GOOGLE_CLIENT_ID
|
||||
self.client_secret = settings.GOOGLE_CLIENT_SECRET
|
||||
self.redirect_uri = f"{settings.CONNECTOR_REDIRECT_BASE_URI}?provider=google_drive"
|
||||
self.redirect_uri = f"{settings.CONNECTOR_REDIRECT_BASE_URI}"
|
||||
|
||||
if not self.client_id or not self.client_secret:
|
||||
raise ValueError("Google OAuth credentials not configured. Please set GOOGLE_CLIENT_ID and GOOGLE_CLIENT_SECRET in settings.")
|
||||
@@ -50,7 +49,7 @@ class GoogleDriveAuth(BaseConnectorAuth):
|
||||
authorization_url, _ = flow.authorization_url(
|
||||
access_type='offline',
|
||||
prompt='consent',
|
||||
include_granted_scopes='true',
|
||||
include_granted_scopes='false',
|
||||
state=state
|
||||
)
|
||||
|
||||
|
||||
@@ -32,6 +32,10 @@ class GoogleDriveLoader(BaseConnectorLoader):
|
||||
'text/plain': '.txt',
|
||||
'text/csv': '.csv',
|
||||
'text/html': '.html',
|
||||
'text/markdown': '.md',
|
||||
'text/x-rst': '.rst',
|
||||
'application/json': '.json',
|
||||
'application/epub+zip': '.epub',
|
||||
'application/rtf': '.rtf',
|
||||
'image/jpeg': '.jpg',
|
||||
'image/jpg': '.jpg',
|
||||
@@ -120,6 +124,7 @@ class GoogleDriveLoader(BaseConnectorLoader):
|
||||
list_only = inputs.get('list_only', False)
|
||||
load_content = not list_only
|
||||
page_token = inputs.get('page_token')
|
||||
search_query = inputs.get('search_query')
|
||||
self.next_page_token = None
|
||||
|
||||
if file_ids:
|
||||
@@ -128,12 +133,18 @@ class GoogleDriveLoader(BaseConnectorLoader):
|
||||
try:
|
||||
doc = self._load_file_by_id(file_id, load_content=load_content)
|
||||
if doc:
|
||||
documents.append(doc)
|
||||
if not search_query or (
|
||||
search_query.lower() in doc.extra_info.get('file_name', '').lower()
|
||||
):
|
||||
documents.append(doc)
|
||||
elif hasattr(self, '_credential_refreshed') and self._credential_refreshed:
|
||||
self._credential_refreshed = False
|
||||
logging.info(f"Retrying load of file {file_id} after credential refresh")
|
||||
doc = self._load_file_by_id(file_id, load_content=load_content)
|
||||
if doc:
|
||||
if doc and (
|
||||
not search_query or
|
||||
search_query.lower() in doc.extra_info.get('file_name', '').lower()
|
||||
):
|
||||
documents.append(doc)
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading file {file_id}: {e}")
|
||||
@@ -141,7 +152,13 @@ class GoogleDriveLoader(BaseConnectorLoader):
|
||||
else:
|
||||
# Browsing mode: list immediate children of provided folder or root
|
||||
parent_id = folder_id if folder_id else 'root'
|
||||
documents = self._list_items_in_parent(parent_id, limit=limit, load_content=load_content, page_token=page_token)
|
||||
documents = self._list_items_in_parent(
|
||||
parent_id,
|
||||
limit=limit,
|
||||
load_content=load_content,
|
||||
page_token=page_token,
|
||||
search_query=search_query
|
||||
)
|
||||
|
||||
logging.info(f"Loaded {len(documents)} documents from Google Drive")
|
||||
return documents
|
||||
@@ -184,13 +201,18 @@ class GoogleDriveLoader(BaseConnectorLoader):
|
||||
return None
|
||||
|
||||
|
||||
def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None) -> List[Document]:
|
||||
def _list_items_in_parent(self, parent_id: str, limit: int = 100, load_content: bool = False, page_token: Optional[str] = None, search_query: Optional[str] = None) -> List[Document]:
|
||||
self._ensure_service()
|
||||
|
||||
documents: List[Document] = []
|
||||
|
||||
try:
|
||||
query = f"'{parent_id}' in parents and trashed=false"
|
||||
|
||||
if search_query:
|
||||
safe_search = search_query.replace("'", "\\'")
|
||||
query += f" and name contains '{safe_search}'"
|
||||
|
||||
next_token_out: Optional[str] = None
|
||||
|
||||
while True:
|
||||
@@ -205,7 +227,8 @@ class GoogleDriveLoader(BaseConnectorLoader):
|
||||
q=query,
|
||||
fields='nextPageToken,files(id,name,mimeType,size,createdTime,modifiedTime,parents)',
|
||||
pageToken=page_token,
|
||||
pageSize=page_size
|
||||
pageSize=page_size,
|
||||
orderBy='name'
|
||||
).execute()
|
||||
|
||||
items = results.get('files', [])
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
from application.core.settings import settings
|
||||
from application.llm.llm_creator import LLMCreator
|
||||
@@ -141,15 +142,28 @@ class ClassicRAG(BaseRetriever):
|
||||
title = metadata.get(
|
||||
"title", metadata.get("post_title", page_content)
|
||||
)
|
||||
if isinstance(title, str):
|
||||
title = title.split("/")[-1]
|
||||
if not isinstance(title, str):
|
||||
title = str(title)
|
||||
title = title.split("/")[-1]
|
||||
|
||||
filename = (
|
||||
metadata.get("filename")
|
||||
or metadata.get("file_name")
|
||||
or metadata.get("source")
|
||||
)
|
||||
if isinstance(filename, str):
|
||||
filename = os.path.basename(filename) or filename
|
||||
else:
|
||||
title = str(title).split("/")[-1]
|
||||
filename = title
|
||||
if not filename:
|
||||
filename = title
|
||||
source_path = metadata.get("source") or vectorstore_id
|
||||
all_docs.append(
|
||||
{
|
||||
"title": title,
|
||||
"text": page_content,
|
||||
"source": metadata.get("source") or vectorstore_id,
|
||||
"source": source_path,
|
||||
"filename": filename,
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
|
||||
@@ -1085,7 +1085,7 @@ def ingest_connector(
|
||||
"tokens": tokens,
|
||||
"retriever": retriever,
|
||||
"id": str(id),
|
||||
"type": "connector",
|
||||
"type": "connector:file",
|
||||
"remote_data": json.dumps(
|
||||
{"provider": source_type, **api_source_config}
|
||||
),
|
||||
|
||||
Reference in New Issue
Block a user