Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changes/next-release/feature-download-58503.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "feature",
"category": "s3 download",
"description": "Validate requested range matches content range in response for multipart downloads"
}
31 changes: 30 additions & 1 deletion awscli/s3transfer/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@

from botocore.exceptions import ClientError
from s3transfer.compat import seekable
from s3transfer.exceptions import RetriesExceededError, S3DownloadFailedError
from s3transfer.exceptions import (
RetriesExceededError,
S3DownloadFailedError,
S3ValidationError,
)
from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG
from s3transfer.tasks import SubmissionTask, Task
from s3transfer.utils import (
Expand Down Expand Up @@ -577,6 +581,10 @@ def _main(
response = client.get_object(
Bucket=bucket, Key=key, **extra_args
)
self._validate_content_range(
extra_args.get('Range'),
response.get('ContentRange'),
)
streaming_body = StreamReaderProgress(
response['Body'], callbacks
)
Expand Down Expand Up @@ -634,6 +642,27 @@ def _main(
def _handle_io(self, download_output_manager, fileobj, chunk, index):
download_output_manager.queue_file_io_task(fileobj, chunk, index)

def _validate_content_range(self, requested_range, content_range):
if not requested_range or not content_range:
return
# Unparsed `ContentRange` looks like `bytes 0-8388607/39542919`,
# where `0-8388607` is the fetched range and `39542919` is
# the total object size.
response_range, total_size = content_range.split('/')
# Subtract `1` because range is 0-indexed.
final_byte = str(int(total_size) - 1)
# If it's the last part, the requested range will not include
# the final byte, eg `bytes=33554432-`.
if requested_range.endswith('-'):
requested_range += final_byte
# Request looks like `bytes=0-8388607`.
# Parsed response looks like `bytes 0-8388607`.
if requested_range[6:] != response_range[6:]:
raise S3ValidationError(
f"Requested range: `{requested_range[6:]}` does not match "
f"content range in response: `{response_range[6:]}`"
)


class ImmediatelyWriteIOGetObjectTask(GetObjectTask):
"""GetObjectTask that immediately writes to the provided file object
Expand Down
4 changes: 4 additions & 0 deletions awscli/s3transfer/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,7 @@ class FatalError(CancelledError):
"""A CancelledError raised from an error in the TransferManager"""

pass


class S3ValidationError(Exception):
pass
37 changes: 34 additions & 3 deletions tests/functional/s3transfer/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@

from botocore.exceptions import ClientError
from s3transfer.compat import SOCKET_ERROR
from s3transfer.exceptions import RetriesExceededError, S3DownloadFailedError
from s3transfer.exceptions import (
RetriesExceededError,
S3DownloadFailedError,
S3ValidationError,
)
from s3transfer.manager import TransferConfig, TransferManager

from tests import (
Expand Down Expand Up @@ -109,7 +113,7 @@ def add_head_object_response(self, expected_params=None):
self.stubber.add_response(**head_response)

def add_successful_get_object_responses(
self, expected_params=None, expected_ranges=None
self, expected_params=None, expected_ranges=None, extras=None
):
# Add all get_object responses needed to complete the download.
# Should account for both ranged and nonranged downloads.
Expand All @@ -124,6 +128,8 @@ def add_successful_get_object_responses(
stubbed_response['expected_params']['Range'] = (
expected_ranges[i]
)
if extras:
stubbed_response['service_response'].update(extras[i])
self.stubber.add_response(**stubbed_response)

def add_n_retryable_get_object_responses(self, n, num_reads=0):
Expand Down Expand Up @@ -511,9 +517,12 @@ def test_download(self):
'RequestPayer': 'requester',
}
expected_ranges = ['bytes=0-3', 'bytes=4-7', 'bytes=8-']
stubbed_ranges = ['bytes 0-3/10', 'bytes 4-7/10', 'bytes 8-9/10']
self.add_head_object_response(expected_params)
self.add_successful_get_object_responses(
{**expected_params, 'IfMatch': self.etag}, expected_ranges
{**expected_params, 'IfMatch': self.etag},
expected_ranges,
[{"ContentRange": r} for r in stubbed_ranges],
)

future = self.manager.download(
Expand Down Expand Up @@ -547,6 +556,28 @@ def test_download_with_checksum_enabled(self):
with open(self.filename, 'rb') as f:
self.assertEqual(self.content, f.read())

def test_download_raises_if_content_range_mismatch(self):
expected_params = {
'Bucket': self.bucket,
'Key': self.key,
}
expected_ranges = ['bytes=0-3', 'bytes=4-7', 'bytes=8-']
# Note that the final retrieved range should be `bytes 8-9/10`.
stubbed_ranges = ['bytes 0-3/10', 'bytes 4-7/10', 'bytes 7-8/10']
self.add_head_object_response(expected_params)
self.add_successful_get_object_responses(
{**expected_params, 'IfMatch': self.etag},
expected_ranges,
[{"ContentRange": r} for r in stubbed_ranges],
)

future = self.manager.download(
self.bucket, self.key, self.filename, self.extra_args
)
with self.assertRaises(S3ValidationError) as e:
future.result()
self.assertIn('does not match content range', str(e.exception))

def test_download_raises_if_etag_validation_fails(self):
expected_params = {
'Bucket': self.bucket,
Expand Down
Loading