mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-12-03 10:33:17 +00:00
tests:storage
This commit is contained in:
382
tests/storage/test_s3_storage.py
Normal file
382
tests/storage/test_s3_storage.py
Normal file
@@ -0,0 +1,382 @@
|
||||
"""Tests for S3 storage implementation.
|
||||
"""
|
||||
|
||||
import io
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from application.storage.s3 import S3Storage
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_boto3_client():
|
||||
"""Mock boto3.client to isolate S3 client creation."""
|
||||
with patch('boto3.client') as mock_client:
|
||||
s3_mock = MagicMock()
|
||||
mock_client.return_value = s3_mock
|
||||
yield s3_mock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def s3_storage(mock_boto3_client):
|
||||
"""Create S3Storage instance with mocked boto3 client."""
|
||||
return S3Storage(bucket_name="test-bucket")
|
||||
|
||||
|
||||
class TestS3StorageInitialization:
|
||||
"""Test S3Storage initialization and configuration."""
|
||||
|
||||
def test_init_with_default_bucket(self):
|
||||
"""Should use default bucket name when none provided."""
|
||||
with patch('boto3.client'):
|
||||
storage = S3Storage()
|
||||
assert storage.bucket_name == "docsgpt-test-bucket"
|
||||
|
||||
def test_init_with_custom_bucket(self):
|
||||
"""Should use provided bucket name."""
|
||||
with patch('boto3.client'):
|
||||
storage = S3Storage(bucket_name="custom-bucket")
|
||||
assert storage.bucket_name == "custom-bucket"
|
||||
|
||||
def test_init_creates_boto3_client(self):
|
||||
"""Should create boto3 S3 client with credentials from settings."""
|
||||
with patch('boto3.client') as mock_client, \
|
||||
patch('application.storage.s3.settings') as mock_settings:
|
||||
|
||||
mock_settings.SAGEMAKER_ACCESS_KEY = "test-key"
|
||||
mock_settings.SAGEMAKER_SECRET_KEY = "test-secret"
|
||||
mock_settings.SAGEMAKER_REGION = "us-west-2"
|
||||
|
||||
S3Storage()
|
||||
|
||||
mock_client.assert_called_once_with(
|
||||
"s3",
|
||||
aws_access_key_id="test-key",
|
||||
aws_secret_access_key="test-secret",
|
||||
region_name="us-west-2"
|
||||
)
|
||||
|
||||
|
||||
class TestS3StorageSaveFile:
|
||||
"""Test file saving functionality."""
|
||||
|
||||
def test_save_file_uploads_to_s3(self, s3_storage, mock_boto3_client):
|
||||
"""Should upload file to S3 with correct parameters."""
|
||||
file_data = io.BytesIO(b"test content")
|
||||
path = "documents/test.txt"
|
||||
|
||||
with patch('application.storage.s3.settings') as mock_settings:
|
||||
mock_settings.SAGEMAKER_REGION = "us-east-1"
|
||||
result = s3_storage.save_file(file_data, path)
|
||||
|
||||
mock_boto3_client.upload_fileobj.assert_called_once_with(
|
||||
file_data,
|
||||
"test-bucket",
|
||||
path,
|
||||
ExtraArgs={"StorageClass": "INTELLIGENT_TIERING"}
|
||||
)
|
||||
|
||||
assert result == {
|
||||
"storage_type": "s3",
|
||||
"bucket_name": "test-bucket",
|
||||
"uri": "s3://test-bucket/documents/test.txt",
|
||||
"region": "us-east-1"
|
||||
}
|
||||
|
||||
def test_save_file_with_custom_storage_class(self, s3_storage, mock_boto3_client):
|
||||
"""Should use custom storage class when provided."""
|
||||
file_data = io.BytesIO(b"test content")
|
||||
path = "documents/test.txt"
|
||||
|
||||
with patch('application.storage.s3.settings') as mock_settings:
|
||||
mock_settings.SAGEMAKER_REGION = "us-east-1"
|
||||
s3_storage.save_file(file_data, path, storage_class="STANDARD")
|
||||
|
||||
mock_boto3_client.upload_fileobj.assert_called_once_with(
|
||||
file_data,
|
||||
"test-bucket",
|
||||
path,
|
||||
ExtraArgs={"StorageClass": "STANDARD"}
|
||||
)
|
||||
|
||||
def test_save_file_propagates_client_error(self, s3_storage, mock_boto3_client):
|
||||
"""Should propagate ClientError when upload fails."""
|
||||
file_data = io.BytesIO(b"test content")
|
||||
path = "documents/test.txt"
|
||||
|
||||
mock_boto3_client.upload_fileobj.side_effect = ClientError(
|
||||
{"Error": {"Code": "AccessDenied", "Message": "Access denied"}},
|
||||
"upload_fileobj"
|
||||
)
|
||||
|
||||
with pytest.raises(ClientError):
|
||||
s3_storage.save_file(file_data, path)
|
||||
|
||||
|
||||
class TestS3StorageFileExists:
|
||||
"""Test file existence checking."""
|
||||
|
||||
def test_file_exists_returns_true_when_file_found(self, s3_storage, mock_boto3_client):
|
||||
"""Should return True when head_object succeeds."""
|
||||
path = "documents/test.txt"
|
||||
mock_boto3_client.head_object.return_value = {"ContentLength": 100}
|
||||
|
||||
result = s3_storage.file_exists(path)
|
||||
|
||||
assert result is True
|
||||
mock_boto3_client.head_object.assert_called_once_with(
|
||||
Bucket="test-bucket",
|
||||
Key=path
|
||||
)
|
||||
|
||||
def test_file_exists_returns_false_on_client_error(self, s3_storage, mock_boto3_client):
|
||||
"""Should return False when head_object raises ClientError."""
|
||||
path = "documents/nonexistent.txt"
|
||||
mock_boto3_client.head_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "NoSuchKey", "Message": "Not found"}},
|
||||
"head_object"
|
||||
)
|
||||
|
||||
result = s3_storage.file_exists(path)
|
||||
|
||||
assert result is False
|
||||
|
||||
|
||||
class TestS3StorageGetFile:
|
||||
"""Test file retrieval functionality."""
|
||||
|
||||
def test_get_file_downloads_and_returns_file_object(self, s3_storage, mock_boto3_client):
|
||||
"""Should download file from S3 and return BytesIO object."""
|
||||
path = "documents/test.txt"
|
||||
test_content = b"file content"
|
||||
|
||||
mock_boto3_client.head_object.return_value = {}
|
||||
|
||||
def mock_download(bucket, key, file_obj):
|
||||
file_obj.write(test_content)
|
||||
|
||||
mock_boto3_client.download_fileobj.side_effect = mock_download
|
||||
|
||||
result = s3_storage.get_file(path)
|
||||
|
||||
assert isinstance(result, io.BytesIO)
|
||||
assert result.read() == test_content
|
||||
mock_boto3_client.download_fileobj.assert_called_once()
|
||||
|
||||
def test_get_file_raises_error_when_file_not_found(self, s3_storage, mock_boto3_client):
|
||||
"""Should raise FileNotFoundError when file doesn't exist."""
|
||||
path = "documents/nonexistent.txt"
|
||||
mock_boto3_client.head_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "NoSuchKey", "Message": "Not found"}},
|
||||
"head_object"
|
||||
)
|
||||
|
||||
with pytest.raises(FileNotFoundError, match="File not found"):
|
||||
s3_storage.get_file(path)
|
||||
|
||||
|
||||
class TestS3StorageDeleteFile:
|
||||
"""Test file deletion functionality."""
|
||||
|
||||
def test_delete_file_returns_true_on_success(self, s3_storage, mock_boto3_client):
|
||||
"""Should return True when deletion succeeds."""
|
||||
path = "documents/test.txt"
|
||||
mock_boto3_client.delete_object.return_value = {}
|
||||
|
||||
result = s3_storage.delete_file(path)
|
||||
|
||||
assert result is True
|
||||
mock_boto3_client.delete_object.assert_called_once_with(
|
||||
Bucket="test-bucket",
|
||||
Key=path
|
||||
)
|
||||
|
||||
def test_delete_file_returns_false_on_client_error(self, s3_storage, mock_boto3_client):
|
||||
"""Should return False when deletion fails with ClientError."""
|
||||
path = "documents/test.txt"
|
||||
mock_boto3_client.delete_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "AccessDenied", "Message": "Access denied"}},
|
||||
"delete_object"
|
||||
)
|
||||
|
||||
result = s3_storage.delete_file(path)
|
||||
|
||||
assert result is False
|
||||
|
||||
|
||||
class TestS3StorageListFiles:
|
||||
"""Test directory listing functionality."""
|
||||
|
||||
def test_list_files_returns_all_keys_with_prefix(self, s3_storage, mock_boto3_client):
|
||||
"""Should return all file keys matching the directory prefix."""
|
||||
directory = "documents/"
|
||||
|
||||
paginator_mock = MagicMock()
|
||||
mock_boto3_client.get_paginator.return_value = paginator_mock
|
||||
paginator_mock.paginate.return_value = [
|
||||
{
|
||||
"Contents": [
|
||||
{"Key": "documents/file1.txt"},
|
||||
{"Key": "documents/file2.txt"},
|
||||
{"Key": "documents/subdir/file3.txt"}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
result = s3_storage.list_files(directory)
|
||||
|
||||
assert len(result) == 3
|
||||
assert "documents/file1.txt" in result
|
||||
assert "documents/file2.txt" in result
|
||||
assert "documents/subdir/file3.txt" in result
|
||||
|
||||
mock_boto3_client.get_paginator.assert_called_once_with('list_objects_v2')
|
||||
paginator_mock.paginate.assert_called_once_with(
|
||||
Bucket="test-bucket",
|
||||
Prefix="documents/"
|
||||
)
|
||||
|
||||
def test_list_files_returns_empty_list_when_no_contents(self, s3_storage, mock_boto3_client):
|
||||
"""Should return empty list when directory has no files."""
|
||||
directory = "empty/"
|
||||
|
||||
paginator_mock = MagicMock()
|
||||
mock_boto3_client.get_paginator.return_value = paginator_mock
|
||||
paginator_mock.paginate.return_value = [{}]
|
||||
|
||||
result = s3_storage.list_files(directory)
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
class TestS3StorageProcessFile:
|
||||
"""Test file processing functionality."""
|
||||
|
||||
def test_process_file_downloads_and_processes_file(self, s3_storage, mock_boto3_client):
|
||||
"""Should download file to temp location and call processor function."""
|
||||
path = "documents/test.txt"
|
||||
|
||||
mock_boto3_client.head_object.return_value = {}
|
||||
|
||||
with patch('tempfile.NamedTemporaryFile') as mock_temp:
|
||||
mock_file = MagicMock()
|
||||
mock_file.name = "/tmp/test_file"
|
||||
mock_temp.return_value.__enter__.return_value = mock_file
|
||||
|
||||
processor_func = MagicMock(return_value="processed")
|
||||
result = s3_storage.process_file(path, processor_func, extra_arg="value")
|
||||
|
||||
assert result == "processed"
|
||||
processor_func.assert_called_once_with(local_path="/tmp/test_file", extra_arg="value")
|
||||
mock_boto3_client.download_fileobj.assert_called_once()
|
||||
|
||||
def test_process_file_raises_error_when_file_not_found(self, s3_storage, mock_boto3_client):
|
||||
"""Should raise FileNotFoundError when file doesn't exist."""
|
||||
path = "documents/nonexistent.txt"
|
||||
mock_boto3_client.head_object.side_effect = ClientError(
|
||||
{"Error": {"Code": "NoSuchKey", "Message": "Not found"}},
|
||||
"head_object"
|
||||
)
|
||||
|
||||
processor_func = MagicMock()
|
||||
|
||||
with pytest.raises(FileNotFoundError, match="File not found in S3"):
|
||||
s3_storage.process_file(path, processor_func)
|
||||
|
||||
|
||||
class TestS3StorageIsDirectory:
|
||||
"""Test directory checking functionality."""
|
||||
|
||||
def test_is_directory_returns_true_when_objects_exist(self, s3_storage, mock_boto3_client):
|
||||
"""Should return True when objects exist with the directory prefix."""
|
||||
path = "documents/"
|
||||
|
||||
mock_boto3_client.list_objects_v2.return_value = {
|
||||
"Contents": [{"Key": "documents/file1.txt"}]
|
||||
}
|
||||
|
||||
result = s3_storage.is_directory(path)
|
||||
|
||||
assert result is True
|
||||
mock_boto3_client.list_objects_v2.assert_called_once_with(
|
||||
Bucket="test-bucket",
|
||||
Prefix="documents/",
|
||||
MaxKeys=1
|
||||
)
|
||||
|
||||
def test_is_directory_returns_false_when_no_objects_exist(self, s3_storage, mock_boto3_client):
|
||||
"""Should return False when no objects exist with the directory prefix."""
|
||||
path = "nonexistent/"
|
||||
|
||||
mock_boto3_client.list_objects_v2.return_value = {}
|
||||
|
||||
result = s3_storage.is_directory(path)
|
||||
|
||||
assert result is False
|
||||
|
||||
|
||||
class TestS3StorageRemoveDirectory:
|
||||
"""Test directory removal functionality."""
|
||||
|
||||
def test_remove_directory_deletes_all_objects(self, s3_storage, mock_boto3_client):
|
||||
"""Should delete all objects with the directory prefix."""
|
||||
directory = "documents/"
|
||||
|
||||
paginator_mock = MagicMock()
|
||||
mock_boto3_client.get_paginator.return_value = paginator_mock
|
||||
paginator_mock.paginate.return_value = [
|
||||
{
|
||||
"Contents": [
|
||||
{"Key": "documents/file1.txt"},
|
||||
{"Key": "documents/file2.txt"}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
||||
mock_boto3_client.delete_objects.return_value = {
|
||||
"Deleted": [
|
||||
{"Key": "documents/file1.txt"},
|
||||
{"Key": "documents/file2.txt"}
|
||||
]
|
||||
}
|
||||
|
||||
result = s3_storage.remove_directory(directory)
|
||||
|
||||
assert result is True
|
||||
mock_boto3_client.delete_objects.assert_called_once()
|
||||
call_args = mock_boto3_client.delete_objects.call_args[1]
|
||||
assert call_args["Bucket"] == "test-bucket"
|
||||
assert len(call_args["Delete"]["Objects"]) == 2
|
||||
|
||||
def test_remove_directory_returns_false_when_empty(self, s3_storage, mock_boto3_client):
|
||||
"""Should return False when directory is empty (no objects to delete)."""
|
||||
directory = "empty/"
|
||||
|
||||
paginator_mock = MagicMock()
|
||||
mock_boto3_client.get_paginator.return_value = paginator_mock
|
||||
paginator_mock.paginate.return_value = [{}]
|
||||
|
||||
result = s3_storage.remove_directory(directory)
|
||||
|
||||
assert result is False
|
||||
mock_boto3_client.delete_objects.assert_not_called()
|
||||
|
||||
def test_remove_directory_returns_false_on_client_error(self, s3_storage, mock_boto3_client):
|
||||
"""Should return False when deletion fails with ClientError."""
|
||||
directory = "documents/"
|
||||
|
||||
paginator_mock = MagicMock()
|
||||
mock_boto3_client.get_paginator.return_value = paginator_mock
|
||||
paginator_mock.paginate.return_value = [
|
||||
{"Contents": [{"Key": "documents/file1.txt"}]}
|
||||
]
|
||||
|
||||
mock_boto3_client.delete_objects.side_effect = ClientError(
|
||||
{"Error": {"Code": "AccessDenied", "Message": "Access denied"}},
|
||||
"delete_objects"
|
||||
)
|
||||
|
||||
result = s3_storage.remove_directory(directory)
|
||||
|
||||
assert result is False
|
||||
Reference in New Issue
Block a user