Files
DocsGPT/tests/storage/test_s3_storage.py
ManishMadan2882 42fc771833 tests:storage
2025-10-02 02:33:12 +05:30

383 lines
13 KiB
Python

"""Tests for S3 storage implementation.
"""
import io
import pytest
from unittest.mock import patch, MagicMock
from botocore.exceptions import ClientError
from application.storage.s3 import S3Storage
@pytest.fixture
def mock_boto3_client():
"""Mock boto3.client to isolate S3 client creation."""
with patch('boto3.client') as mock_client:
s3_mock = MagicMock()
mock_client.return_value = s3_mock
yield s3_mock
@pytest.fixture
def s3_storage(mock_boto3_client):
"""Create S3Storage instance with mocked boto3 client."""
return S3Storage(bucket_name="test-bucket")
class TestS3StorageInitialization:
"""Test S3Storage initialization and configuration."""
def test_init_with_default_bucket(self):
"""Should use default bucket name when none provided."""
with patch('boto3.client'):
storage = S3Storage()
assert storage.bucket_name == "docsgpt-test-bucket"
def test_init_with_custom_bucket(self):
"""Should use provided bucket name."""
with patch('boto3.client'):
storage = S3Storage(bucket_name="custom-bucket")
assert storage.bucket_name == "custom-bucket"
def test_init_creates_boto3_client(self):
"""Should create boto3 S3 client with credentials from settings."""
with patch('boto3.client') as mock_client, \
patch('application.storage.s3.settings') as mock_settings:
mock_settings.SAGEMAKER_ACCESS_KEY = "test-key"
mock_settings.SAGEMAKER_SECRET_KEY = "test-secret"
mock_settings.SAGEMAKER_REGION = "us-west-2"
S3Storage()
mock_client.assert_called_once_with(
"s3",
aws_access_key_id="test-key",
aws_secret_access_key="test-secret",
region_name="us-west-2"
)
class TestS3StorageSaveFile:
"""Test file saving functionality."""
def test_save_file_uploads_to_s3(self, s3_storage, mock_boto3_client):
"""Should upload file to S3 with correct parameters."""
file_data = io.BytesIO(b"test content")
path = "documents/test.txt"
with patch('application.storage.s3.settings') as mock_settings:
mock_settings.SAGEMAKER_REGION = "us-east-1"
result = s3_storage.save_file(file_data, path)
mock_boto3_client.upload_fileobj.assert_called_once_with(
file_data,
"test-bucket",
path,
ExtraArgs={"StorageClass": "INTELLIGENT_TIERING"}
)
assert result == {
"storage_type": "s3",
"bucket_name": "test-bucket",
"uri": "s3://test-bucket/documents/test.txt",
"region": "us-east-1"
}
def test_save_file_with_custom_storage_class(self, s3_storage, mock_boto3_client):
"""Should use custom storage class when provided."""
file_data = io.BytesIO(b"test content")
path = "documents/test.txt"
with patch('application.storage.s3.settings') as mock_settings:
mock_settings.SAGEMAKER_REGION = "us-east-1"
s3_storage.save_file(file_data, path, storage_class="STANDARD")
mock_boto3_client.upload_fileobj.assert_called_once_with(
file_data,
"test-bucket",
path,
ExtraArgs={"StorageClass": "STANDARD"}
)
def test_save_file_propagates_client_error(self, s3_storage, mock_boto3_client):
"""Should propagate ClientError when upload fails."""
file_data = io.BytesIO(b"test content")
path = "documents/test.txt"
mock_boto3_client.upload_fileobj.side_effect = ClientError(
{"Error": {"Code": "AccessDenied", "Message": "Access denied"}},
"upload_fileobj"
)
with pytest.raises(ClientError):
s3_storage.save_file(file_data, path)
class TestS3StorageFileExists:
"""Test file existence checking."""
def test_file_exists_returns_true_when_file_found(self, s3_storage, mock_boto3_client):
"""Should return True when head_object succeeds."""
path = "documents/test.txt"
mock_boto3_client.head_object.return_value = {"ContentLength": 100}
result = s3_storage.file_exists(path)
assert result is True
mock_boto3_client.head_object.assert_called_once_with(
Bucket="test-bucket",
Key=path
)
def test_file_exists_returns_false_on_client_error(self, s3_storage, mock_boto3_client):
"""Should return False when head_object raises ClientError."""
path = "documents/nonexistent.txt"
mock_boto3_client.head_object.side_effect = ClientError(
{"Error": {"Code": "NoSuchKey", "Message": "Not found"}},
"head_object"
)
result = s3_storage.file_exists(path)
assert result is False
class TestS3StorageGetFile:
"""Test file retrieval functionality."""
def test_get_file_downloads_and_returns_file_object(self, s3_storage, mock_boto3_client):
"""Should download file from S3 and return BytesIO object."""
path = "documents/test.txt"
test_content = b"file content"
mock_boto3_client.head_object.return_value = {}
def mock_download(bucket, key, file_obj):
file_obj.write(test_content)
mock_boto3_client.download_fileobj.side_effect = mock_download
result = s3_storage.get_file(path)
assert isinstance(result, io.BytesIO)
assert result.read() == test_content
mock_boto3_client.download_fileobj.assert_called_once()
def test_get_file_raises_error_when_file_not_found(self, s3_storage, mock_boto3_client):
"""Should raise FileNotFoundError when file doesn't exist."""
path = "documents/nonexistent.txt"
mock_boto3_client.head_object.side_effect = ClientError(
{"Error": {"Code": "NoSuchKey", "Message": "Not found"}},
"head_object"
)
with pytest.raises(FileNotFoundError, match="File not found"):
s3_storage.get_file(path)
class TestS3StorageDeleteFile:
"""Test file deletion functionality."""
def test_delete_file_returns_true_on_success(self, s3_storage, mock_boto3_client):
"""Should return True when deletion succeeds."""
path = "documents/test.txt"
mock_boto3_client.delete_object.return_value = {}
result = s3_storage.delete_file(path)
assert result is True
mock_boto3_client.delete_object.assert_called_once_with(
Bucket="test-bucket",
Key=path
)
def test_delete_file_returns_false_on_client_error(self, s3_storage, mock_boto3_client):
"""Should return False when deletion fails with ClientError."""
path = "documents/test.txt"
mock_boto3_client.delete_object.side_effect = ClientError(
{"Error": {"Code": "AccessDenied", "Message": "Access denied"}},
"delete_object"
)
result = s3_storage.delete_file(path)
assert result is False
class TestS3StorageListFiles:
"""Test directory listing functionality."""
def test_list_files_returns_all_keys_with_prefix(self, s3_storage, mock_boto3_client):
"""Should return all file keys matching the directory prefix."""
directory = "documents/"
paginator_mock = MagicMock()
mock_boto3_client.get_paginator.return_value = paginator_mock
paginator_mock.paginate.return_value = [
{
"Contents": [
{"Key": "documents/file1.txt"},
{"Key": "documents/file2.txt"},
{"Key": "documents/subdir/file3.txt"}
]
}
]
result = s3_storage.list_files(directory)
assert len(result) == 3
assert "documents/file1.txt" in result
assert "documents/file2.txt" in result
assert "documents/subdir/file3.txt" in result
mock_boto3_client.get_paginator.assert_called_once_with('list_objects_v2')
paginator_mock.paginate.assert_called_once_with(
Bucket="test-bucket",
Prefix="documents/"
)
def test_list_files_returns_empty_list_when_no_contents(self, s3_storage, mock_boto3_client):
"""Should return empty list when directory has no files."""
directory = "empty/"
paginator_mock = MagicMock()
mock_boto3_client.get_paginator.return_value = paginator_mock
paginator_mock.paginate.return_value = [{}]
result = s3_storage.list_files(directory)
assert result == []
class TestS3StorageProcessFile:
"""Test file processing functionality."""
def test_process_file_downloads_and_processes_file(self, s3_storage, mock_boto3_client):
"""Should download file to temp location and call processor function."""
path = "documents/test.txt"
mock_boto3_client.head_object.return_value = {}
with patch('tempfile.NamedTemporaryFile') as mock_temp:
mock_file = MagicMock()
mock_file.name = "/tmp/test_file"
mock_temp.return_value.__enter__.return_value = mock_file
processor_func = MagicMock(return_value="processed")
result = s3_storage.process_file(path, processor_func, extra_arg="value")
assert result == "processed"
processor_func.assert_called_once_with(local_path="/tmp/test_file", extra_arg="value")
mock_boto3_client.download_fileobj.assert_called_once()
def test_process_file_raises_error_when_file_not_found(self, s3_storage, mock_boto3_client):
"""Should raise FileNotFoundError when file doesn't exist."""
path = "documents/nonexistent.txt"
mock_boto3_client.head_object.side_effect = ClientError(
{"Error": {"Code": "NoSuchKey", "Message": "Not found"}},
"head_object"
)
processor_func = MagicMock()
with pytest.raises(FileNotFoundError, match="File not found in S3"):
s3_storage.process_file(path, processor_func)
class TestS3StorageIsDirectory:
"""Test directory checking functionality."""
def test_is_directory_returns_true_when_objects_exist(self, s3_storage, mock_boto3_client):
"""Should return True when objects exist with the directory prefix."""
path = "documents/"
mock_boto3_client.list_objects_v2.return_value = {
"Contents": [{"Key": "documents/file1.txt"}]
}
result = s3_storage.is_directory(path)
assert result is True
mock_boto3_client.list_objects_v2.assert_called_once_with(
Bucket="test-bucket",
Prefix="documents/",
MaxKeys=1
)
def test_is_directory_returns_false_when_no_objects_exist(self, s3_storage, mock_boto3_client):
"""Should return False when no objects exist with the directory prefix."""
path = "nonexistent/"
mock_boto3_client.list_objects_v2.return_value = {}
result = s3_storage.is_directory(path)
assert result is False
class TestS3StorageRemoveDirectory:
"""Test directory removal functionality."""
def test_remove_directory_deletes_all_objects(self, s3_storage, mock_boto3_client):
"""Should delete all objects with the directory prefix."""
directory = "documents/"
paginator_mock = MagicMock()
mock_boto3_client.get_paginator.return_value = paginator_mock
paginator_mock.paginate.return_value = [
{
"Contents": [
{"Key": "documents/file1.txt"},
{"Key": "documents/file2.txt"}
]
}
]
mock_boto3_client.delete_objects.return_value = {
"Deleted": [
{"Key": "documents/file1.txt"},
{"Key": "documents/file2.txt"}
]
}
result = s3_storage.remove_directory(directory)
assert result is True
mock_boto3_client.delete_objects.assert_called_once()
call_args = mock_boto3_client.delete_objects.call_args[1]
assert call_args["Bucket"] == "test-bucket"
assert len(call_args["Delete"]["Objects"]) == 2
def test_remove_directory_returns_false_when_empty(self, s3_storage, mock_boto3_client):
"""Should return False when directory is empty (no objects to delete)."""
directory = "empty/"
paginator_mock = MagicMock()
mock_boto3_client.get_paginator.return_value = paginator_mock
paginator_mock.paginate.return_value = [{}]
result = s3_storage.remove_directory(directory)
assert result is False
mock_boto3_client.delete_objects.assert_not_called()
def test_remove_directory_returns_false_on_client_error(self, s3_storage, mock_boto3_client):
"""Should return False when deletion fails with ClientError."""
directory = "documents/"
paginator_mock = MagicMock()
mock_boto3_client.get_paginator.return_value = paginator_mock
paginator_mock.paginate.return_value = [
{"Contents": [{"Key": "documents/file1.txt"}]}
]
mock_boto3_client.delete_objects.side_effect = ClientError(
{"Error": {"Code": "AccessDenied", "Message": "Access denied"}},
"delete_objects"
)
result = s3_storage.remove_directory(directory)
assert result is False