From 9f5aa1fdf3ec58f88278384da2ff40f07538535b Mon Sep 17 00:00:00 2001 From: Steve Yoo Date: Fri, 22 Aug 2025 13:08:40 -0400 Subject: [PATCH 1/2] Validate multipart download content ranges --- awscli/s3transfer/download.py | 31 +++++++++++++++- awscli/s3transfer/exceptions.py | 4 +++ tests/functional/s3transfer/test_download.py | 37 ++++++++++++++++++-- 3 files changed, 68 insertions(+), 4 deletions(-) diff --git a/awscli/s3transfer/download.py b/awscli/s3transfer/download.py index 1cb0a65c2320..9307e48fa551 100644 --- a/awscli/s3transfer/download.py +++ b/awscli/s3transfer/download.py @@ -16,7 +16,11 @@ from botocore.exceptions import ClientError from s3transfer.compat import seekable -from s3transfer.exceptions import RetriesExceededError, S3DownloadFailedError +from s3transfer.exceptions import ( + RetriesExceededError, + S3DownloadFailedError, + S3ValidationError, +) from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG from s3transfer.tasks import SubmissionTask, Task from s3transfer.utils import ( @@ -577,6 +581,10 @@ def _main( response = client.get_object( Bucket=bucket, Key=key, **extra_args ) + self._validate_content_range( + extra_args.get('Range'), + response.get('ContentRange'), + ) streaming_body = StreamReaderProgress( response['Body'], callbacks ) @@ -634,6 +642,27 @@ def _main( def _handle_io(self, download_output_manager, fileobj, chunk, index): download_output_manager.queue_file_io_task(fileobj, chunk, index) + def _validate_content_range(self, requested_range, content_range): + if not requested_range or not content_range: + return + # Unparsed `ContentRange` looks like `bytes 0-8388607/39542919`, + # where `0-8388607` is the fetched range and `39542919` is + # the total object size. + response_range, total_size = content_range.split('/') + # Subtract `1` because range is 0-indexed. + final_byte = str(int(total_size) - 1) + # If it's the last part, the requested range will not include + # the final byte, eg `bytes=33554432-`. + if requested_range.endswith('-'): + requested_range += final_byte + # Request looks like `bytes=0-8388607`. + # Parsed response looks like `bytes 0-8388607`. + if requested_range[6:] != response_range[6:]: + raise S3ValidationError( + f"Requested range: `{requested_range[6:]}` does not match " + f"content range in response: `{response_range[6:]}`" + ) + class ImmediatelyWriteIOGetObjectTask(GetObjectTask): """GetObjectTask that immediately writes to the provided file object diff --git a/awscli/s3transfer/exceptions.py b/awscli/s3transfer/exceptions.py index 857a6bf379ba..c46b248b080f 100644 --- a/awscli/s3transfer/exceptions.py +++ b/awscli/s3transfer/exceptions.py @@ -39,3 +39,7 @@ class FatalError(CancelledError): """A CancelledError raised from an error in the TransferManager""" pass + + +class S3ValidationError(Exception): + pass diff --git a/tests/functional/s3transfer/test_download.py b/tests/functional/s3transfer/test_download.py index 8e6f28809e59..66976117a145 100644 --- a/tests/functional/s3transfer/test_download.py +++ b/tests/functional/s3transfer/test_download.py @@ -20,7 +20,11 @@ from botocore.exceptions import ClientError from s3transfer.compat import SOCKET_ERROR -from s3transfer.exceptions import RetriesExceededError, S3DownloadFailedError +from s3transfer.exceptions import ( + RetriesExceededError, + S3DownloadFailedError, + S3ValidationError, +) from s3transfer.manager import TransferConfig, TransferManager from tests import ( @@ -109,7 +113,7 @@ def add_head_object_response(self, expected_params=None): self.stubber.add_response(**head_response) def add_successful_get_object_responses( - self, expected_params=None, expected_ranges=None + self, expected_params=None, expected_ranges=None, extras=None ): # Add all get_object responses needed to complete the download. # Should account for both ranged and nonranged downloads. @@ -124,6 +128,8 @@ def add_successful_get_object_responses( stubbed_response['expected_params']['Range'] = ( expected_ranges[i] ) + if extras: + stubbed_response['service_response'].update(extras[i]) self.stubber.add_response(**stubbed_response) def add_n_retryable_get_object_responses(self, n, num_reads=0): @@ -511,9 +517,12 @@ def test_download(self): 'RequestPayer': 'requester', } expected_ranges = ['bytes=0-3', 'bytes=4-7', 'bytes=8-'] + stubbed_ranges = ['bytes 0-3/10', 'bytes 4-7/10', 'bytes 8-9/10'] self.add_head_object_response(expected_params) self.add_successful_get_object_responses( - {**expected_params, 'IfMatch': self.etag}, expected_ranges + {**expected_params, 'IfMatch': self.etag}, + expected_ranges, + [{"ContentRange": r} for r in stubbed_ranges], ) future = self.manager.download( @@ -547,6 +556,28 @@ def test_download_with_checksum_enabled(self): with open(self.filename, 'rb') as f: self.assertEqual(self.content, f.read()) + def test_download_raises_if_content_range_mismatch(self): + expected_params = { + 'Bucket': self.bucket, + 'Key': self.key, + } + expected_ranges = ['bytes=0-3', 'bytes=4-7', 'bytes=8-'] + # Note that the final retrieved range should be `bytes 8-9/10`. + stubbed_ranges = ['bytes 0-3/10', 'bytes 4-7/10', 'bytes 7-8/10'] + self.add_head_object_response(expected_params) + self.add_successful_get_object_responses( + {**expected_params, 'IfMatch': self.etag}, + expected_ranges, + [{"ContentRange": r} for r in stubbed_ranges], + ) + + future = self.manager.download( + self.bucket, self.key, self.filename, self.extra_args + ) + with self.assertRaises(S3ValidationError) as e: + future.result() + self.assertIn('does not match content range', str(e.exception)) + def test_download_raises_if_etag_validation_fails(self): expected_params = { 'Bucket': self.bucket, From b55c60fba03e9c3e8600d20147190a05abf1df4e Mon Sep 17 00:00:00 2001 From: Steve Yoo Date: Wed, 27 Aug 2025 14:07:47 -0400 Subject: [PATCH 2/2] Add changelog entry --- .changes/next-release/feature-download-58503.json | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changes/next-release/feature-download-58503.json diff --git a/.changes/next-release/feature-download-58503.json b/.changes/next-release/feature-download-58503.json new file mode 100644 index 000000000000..2a80f010e83b --- /dev/null +++ b/.changes/next-release/feature-download-58503.json @@ -0,0 +1,5 @@ +{ + "type": "feature", + "category": "s3 download", + "description": "Validate requested range matches content range in response for multipart downloads" +}