mirror of
https://github.com/arc53/DocsGPT.git
synced 2026-03-02 16:21:50 +00:00
(feat:oneDrive) file loading for ingestion
This commit is contained in:
197
tests/parser/remote/test_share_point_loader.py
Normal file
197
tests/parser/remote/test_share_point_loader.py
Normal file
@@ -0,0 +1,197 @@
|
||||
"""Tests for SharePoint loader."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from application.parser.connectors.share_point.loader import SharePointLoader
|
||||
|
||||
|
||||
def make_response(json_data=None, status_code=200, raise_error=None):
|
||||
resp = MagicMock()
|
||||
resp.status_code = status_code
|
||||
resp.json.return_value = json_data
|
||||
resp.content = b"test content"
|
||||
if raise_error is not None:
|
||||
resp.raise_for_status.side_effect = raise_error
|
||||
else:
|
||||
resp.raise_for_status.return_value = None
|
||||
return resp
|
||||
|
||||
|
||||
class TestSharePointLoaderProcessFile:
|
||||
"""Test _process_file method."""
|
||||
|
||||
def test_size_retrieved_from_root_level(self):
|
||||
"""Should retrieve size from root of file_metadata, not nested file object."""
|
||||
loader = SharePointLoader.__new__(SharePointLoader)
|
||||
|
||||
file_metadata = {
|
||||
"id": "test-id",
|
||||
"name": "test.txt",
|
||||
"createdDateTime": "2024-01-01T00:00:00Z",
|
||||
"lastModifiedDateTime": "2024-01-01T00:00:00Z",
|
||||
"size": 1024,
|
||||
"file": {
|
||||
"mimeType": "text/plain"
|
||||
}
|
||||
}
|
||||
|
||||
doc = loader._process_file(file_metadata, load_content=False)
|
||||
|
||||
assert doc is not None
|
||||
assert doc.extra_info["size"] == 1024
|
||||
assert doc.extra_info["file_name"] == "test.txt"
|
||||
assert doc.extra_info["mime_type"] == "text/plain"
|
||||
|
||||
def test_size_null_when_missing(self):
|
||||
"""Should return None when size field is missing."""
|
||||
loader = SharePointLoader.__new__(SharePointLoader)
|
||||
|
||||
file_metadata = {
|
||||
"id": "test-id",
|
||||
"name": "test.txt",
|
||||
"createdDateTime": "2024-01-01T00:00:00Z",
|
||||
"lastModifiedDateTime": "2024-01-01T00:00:00Z",
|
||||
"file": {
|
||||
"mimeType": "text/plain"
|
||||
}
|
||||
}
|
||||
|
||||
doc = loader._process_file(file_metadata, load_content=False)
|
||||
|
||||
assert doc is not None
|
||||
assert doc.extra_info["size"] is None
|
||||
|
||||
|
||||
class TestSharePointLoaderLoadFileById:
|
||||
"""Test _load_file_by_id method."""
|
||||
|
||||
@patch("application.parser.connectors.share_point.loader.requests.get")
|
||||
@patch("application.parser.connectors.share_point.loader.SharePointAuth.get_token_info_from_session")
|
||||
@patch("application.parser.connectors.share_point.loader.SharePointLoader._ensure_valid_token")
|
||||
def test_load_file_by_id_includes_size_in_select(self, mock_ensure_token, mock_get_token, mock_get):
|
||||
"""Should include size field in $select parameter."""
|
||||
mock_get_token.return_value = {
|
||||
"access_token": "test-token",
|
||||
"refresh_token": "test-refresh"
|
||||
}
|
||||
mock_get.return_value = make_response({
|
||||
"id": "test-id",
|
||||
"name": "test.txt",
|
||||
"createdDateTime": "2024-01-01T00:00:00Z",
|
||||
"lastModifiedDateTime": "2024-01-01T00:00:00Z",
|
||||
"size": 2048,
|
||||
"file": {
|
||||
"mimeType": "text/plain"
|
||||
}
|
||||
})
|
||||
|
||||
loader = SharePointLoader("test-session")
|
||||
doc = loader._load_file_by_id("test-id", load_content=False)
|
||||
|
||||
assert doc is not None
|
||||
assert doc.extra_info["size"] == 2048
|
||||
|
||||
call_args = mock_get.call_args
|
||||
params = call_args[1]["params"]
|
||||
assert "size" in params["$select"]
|
||||
|
||||
@patch("application.parser.connectors.share_point.loader.requests.get")
|
||||
@patch("application.parser.connectors.share_point.loader.SharePointAuth.get_token_info_from_session")
|
||||
@patch("application.parser.connectors.share_point.loader.SharePointLoader._ensure_valid_token")
|
||||
def test_load_file_by_id_returns_document_with_size(self, mock_ensure_token, mock_get_token, mock_get):
|
||||
"""Should return document with size from API response."""
|
||||
mock_get_token.return_value = {
|
||||
"access_token": "test-token",
|
||||
"refresh_token": "test-refresh"
|
||||
}
|
||||
mock_get.return_value = make_response({
|
||||
"id": "test-id",
|
||||
"name": "document.pdf",
|
||||
"createdDateTime": "2024-01-01T00:00:00Z",
|
||||
"lastModifiedDateTime": "2024-06-15T10:30:00Z",
|
||||
"size": 56789,
|
||||
"file": {
|
||||
"mimeType": "application/pdf"
|
||||
}
|
||||
})
|
||||
|
||||
loader = SharePointLoader("test-session")
|
||||
doc = loader._load_file_by_id("test-id", load_content=False)
|
||||
|
||||
assert doc is not None
|
||||
assert doc.doc_id == "test-id"
|
||||
assert doc.extra_info["file_name"] == "document.pdf"
|
||||
assert doc.extra_info["mime_type"] == "application/pdf"
|
||||
assert doc.extra_info["size"] == 56789
|
||||
assert doc.extra_info["created_time"] == "2024-01-01T00:00:00Z"
|
||||
assert doc.extra_info["modified_time"] == "2024-06-15T10:30:00Z"
|
||||
assert doc.extra_info["source"] == "share_point"
|
||||
|
||||
|
||||
class TestSharePointLoaderListItems:
|
||||
"""Test _list_items_in_parent method."""
|
||||
|
||||
@patch("application.parser.connectors.share_point.loader.requests.get")
|
||||
@patch("application.parser.connectors.share_point.loader.SharePointAuth.get_token_info_from_session")
|
||||
@patch("application.parser.connectors.share_point.loader.SharePointLoader._ensure_valid_token")
|
||||
def test_list_items_includes_size_in_select(self, mock_ensure_token, mock_get_token, mock_get):
|
||||
"""Should include size field in $select parameter when listing items."""
|
||||
mock_get_token.return_value = {
|
||||
"access_token": "test-token",
|
||||
"refresh_token": "test-refresh"
|
||||
}
|
||||
mock_get.return_value = make_response({
|
||||
"value": [
|
||||
{
|
||||
"id": "file-1",
|
||||
"name": "file1.txt",
|
||||
"createdDateTime": "2024-01-01T00:00:00Z",
|
||||
"lastModifiedDateTime": "2024-01-01T00:00:00Z",
|
||||
"size": 12345,
|
||||
"file": {
|
||||
"mimeType": "text/plain"
|
||||
}
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
loader = SharePointLoader("test-session")
|
||||
docs = loader._list_items_in_parent("parent-id", limit=10, load_content=False)
|
||||
|
||||
assert len(docs) == 1
|
||||
assert docs[0].extra_info["size"] == 12345
|
||||
|
||||
call_args = mock_get.call_args
|
||||
params = call_args[1]["params"]
|
||||
assert "size" in params["$select"]
|
||||
|
||||
@patch("application.parser.connectors.share_point.loader.requests.get")
|
||||
@patch("application.parser.connectors.share_point.loader.SharePointAuth.get_token_info_from_session")
|
||||
@patch("application.parser.connectors.share_point.loader.SharePointLoader._ensure_valid_token")
|
||||
def test_list_items_folders_include_size(self, mock_ensure_token, mock_get_token, mock_get):
|
||||
"""Should include size for folders as well."""
|
||||
mock_get_token.return_value = {
|
||||
"access_token": "test-token",
|
||||
"refresh_token": "test-refresh"
|
||||
}
|
||||
mock_get.return_value = make_response({
|
||||
"value": [
|
||||
{
|
||||
"id": "folder-1",
|
||||
"name": "MyFolder",
|
||||
"createdDateTime": "2024-01-01T00:00:00Z",
|
||||
"lastModifiedDateTime": "2024-01-01T00:00:00Z",
|
||||
"size": 0,
|
||||
"folder": {}
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
loader = SharePointLoader("test-session")
|
||||
docs = loader._list_items_in_parent("parent-id", limit=10, load_content=False)
|
||||
|
||||
assert len(docs) == 1
|
||||
assert docs[0].extra_info["is_folder"] is True
|
||||
assert docs[0].extra_info["size"] == 0
|
||||
|
||||
Reference in New Issue
Block a user