mirror of
https://github.com/arc53/DocsGPT.git
synced 2026-03-02 08:12:06 +00:00
198 lines
7.4 KiB
Python
198 lines
7.4 KiB
Python
"""Tests for SharePoint loader."""
|
|
|
|
import pytest
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
from application.parser.connectors.share_point.loader import SharePointLoader
|
|
|
|
|
|
def make_response(json_data=None, status_code=200, raise_error=None):
|
|
resp = MagicMock()
|
|
resp.status_code = status_code
|
|
resp.json.return_value = json_data
|
|
resp.content = b"test content"
|
|
if raise_error is not None:
|
|
resp.raise_for_status.side_effect = raise_error
|
|
else:
|
|
resp.raise_for_status.return_value = None
|
|
return resp
|
|
|
|
|
|
class TestSharePointLoaderProcessFile:
|
|
"""Test _process_file method."""
|
|
|
|
def test_size_retrieved_from_root_level(self):
|
|
"""Should retrieve size from root of file_metadata, not nested file object."""
|
|
loader = SharePointLoader.__new__(SharePointLoader)
|
|
|
|
file_metadata = {
|
|
"id": "test-id",
|
|
"name": "test.txt",
|
|
"createdDateTime": "2024-01-01T00:00:00Z",
|
|
"lastModifiedDateTime": "2024-01-01T00:00:00Z",
|
|
"size": 1024,
|
|
"file": {
|
|
"mimeType": "text/plain"
|
|
}
|
|
}
|
|
|
|
doc = loader._process_file(file_metadata, load_content=False)
|
|
|
|
assert doc is not None
|
|
assert doc.extra_info["size"] == 1024
|
|
assert doc.extra_info["file_name"] == "test.txt"
|
|
assert doc.extra_info["mime_type"] == "text/plain"
|
|
|
|
def test_size_null_when_missing(self):
|
|
"""Should return None when size field is missing."""
|
|
loader = SharePointLoader.__new__(SharePointLoader)
|
|
|
|
file_metadata = {
|
|
"id": "test-id",
|
|
"name": "test.txt",
|
|
"createdDateTime": "2024-01-01T00:00:00Z",
|
|
"lastModifiedDateTime": "2024-01-01T00:00:00Z",
|
|
"file": {
|
|
"mimeType": "text/plain"
|
|
}
|
|
}
|
|
|
|
doc = loader._process_file(file_metadata, load_content=False)
|
|
|
|
assert doc is not None
|
|
assert doc.extra_info["size"] is None
|
|
|
|
|
|
class TestSharePointLoaderLoadFileById:
|
|
"""Test _load_file_by_id method."""
|
|
|
|
@patch("application.parser.connectors.share_point.loader.requests.get")
|
|
@patch("application.parser.connectors.share_point.loader.SharePointAuth.get_token_info_from_session")
|
|
@patch("application.parser.connectors.share_point.loader.SharePointLoader._ensure_valid_token")
|
|
def test_load_file_by_id_includes_size_in_select(self, mock_ensure_token, mock_get_token, mock_get):
|
|
"""Should include size field in $select parameter."""
|
|
mock_get_token.return_value = {
|
|
"access_token": "test-token",
|
|
"refresh_token": "test-refresh"
|
|
}
|
|
mock_get.return_value = make_response({
|
|
"id": "test-id",
|
|
"name": "test.txt",
|
|
"createdDateTime": "2024-01-01T00:00:00Z",
|
|
"lastModifiedDateTime": "2024-01-01T00:00:00Z",
|
|
"size": 2048,
|
|
"file": {
|
|
"mimeType": "text/plain"
|
|
}
|
|
})
|
|
|
|
loader = SharePointLoader("test-session")
|
|
doc = loader._load_file_by_id("test-id", load_content=False)
|
|
|
|
assert doc is not None
|
|
assert doc.extra_info["size"] == 2048
|
|
|
|
call_args = mock_get.call_args
|
|
params = call_args[1]["params"]
|
|
assert "size" in params["$select"]
|
|
|
|
@patch("application.parser.connectors.share_point.loader.requests.get")
|
|
@patch("application.parser.connectors.share_point.loader.SharePointAuth.get_token_info_from_session")
|
|
@patch("application.parser.connectors.share_point.loader.SharePointLoader._ensure_valid_token")
|
|
def test_load_file_by_id_returns_document_with_size(self, mock_ensure_token, mock_get_token, mock_get):
|
|
"""Should return document with size from API response."""
|
|
mock_get_token.return_value = {
|
|
"access_token": "test-token",
|
|
"refresh_token": "test-refresh"
|
|
}
|
|
mock_get.return_value = make_response({
|
|
"id": "test-id",
|
|
"name": "document.pdf",
|
|
"createdDateTime": "2024-01-01T00:00:00Z",
|
|
"lastModifiedDateTime": "2024-06-15T10:30:00Z",
|
|
"size": 56789,
|
|
"file": {
|
|
"mimeType": "application/pdf"
|
|
}
|
|
})
|
|
|
|
loader = SharePointLoader("test-session")
|
|
doc = loader._load_file_by_id("test-id", load_content=False)
|
|
|
|
assert doc is not None
|
|
assert doc.doc_id == "test-id"
|
|
assert doc.extra_info["file_name"] == "document.pdf"
|
|
assert doc.extra_info["mime_type"] == "application/pdf"
|
|
assert doc.extra_info["size"] == 56789
|
|
assert doc.extra_info["created_time"] == "2024-01-01T00:00:00Z"
|
|
assert doc.extra_info["modified_time"] == "2024-06-15T10:30:00Z"
|
|
assert doc.extra_info["source"] == "share_point"
|
|
|
|
|
|
class TestSharePointLoaderListItems:
|
|
"""Test _list_items_in_parent method."""
|
|
|
|
@patch("application.parser.connectors.share_point.loader.requests.get")
|
|
@patch("application.parser.connectors.share_point.loader.SharePointAuth.get_token_info_from_session")
|
|
@patch("application.parser.connectors.share_point.loader.SharePointLoader._ensure_valid_token")
|
|
def test_list_items_includes_size_in_select(self, mock_ensure_token, mock_get_token, mock_get):
|
|
"""Should include size field in $select parameter when listing items."""
|
|
mock_get_token.return_value = {
|
|
"access_token": "test-token",
|
|
"refresh_token": "test-refresh"
|
|
}
|
|
mock_get.return_value = make_response({
|
|
"value": [
|
|
{
|
|
"id": "file-1",
|
|
"name": "file1.txt",
|
|
"createdDateTime": "2024-01-01T00:00:00Z",
|
|
"lastModifiedDateTime": "2024-01-01T00:00:00Z",
|
|
"size": 12345,
|
|
"file": {
|
|
"mimeType": "text/plain"
|
|
}
|
|
}
|
|
]
|
|
})
|
|
|
|
loader = SharePointLoader("test-session")
|
|
docs = loader._list_items_in_parent("parent-id", limit=10, load_content=False)
|
|
|
|
assert len(docs) == 1
|
|
assert docs[0].extra_info["size"] == 12345
|
|
|
|
call_args = mock_get.call_args
|
|
params = call_args[1]["params"]
|
|
assert "size" in params["$select"]
|
|
|
|
@patch("application.parser.connectors.share_point.loader.requests.get")
|
|
@patch("application.parser.connectors.share_point.loader.SharePointAuth.get_token_info_from_session")
|
|
@patch("application.parser.connectors.share_point.loader.SharePointLoader._ensure_valid_token")
|
|
def test_list_items_folders_include_size(self, mock_ensure_token, mock_get_token, mock_get):
|
|
"""Should include size for folders as well."""
|
|
mock_get_token.return_value = {
|
|
"access_token": "test-token",
|
|
"refresh_token": "test-refresh"
|
|
}
|
|
mock_get.return_value = make_response({
|
|
"value": [
|
|
{
|
|
"id": "folder-1",
|
|
"name": "MyFolder",
|
|
"createdDateTime": "2024-01-01T00:00:00Z",
|
|
"lastModifiedDateTime": "2024-01-01T00:00:00Z",
|
|
"size": 0,
|
|
"folder": {}
|
|
}
|
|
]
|
|
})
|
|
|
|
loader = SharePointLoader("test-session")
|
|
docs = loader._list_items_in_parent("parent-id", limit=10, load_content=False)
|
|
|
|
assert len(docs) == 1
|
|
assert docs[0].extra_info["is_folder"] is True
|
|
assert docs[0].extra_info["size"] == 0
|
|
|