diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index 52cd9c7d3..04212ce24 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -3434,6 +3434,11 @@ def open( latest generation number and set it; or, if the generation is known, set it manually, for instance with bucket.blob(generation=123456). + Checksumming (hashing) to verify data integrity is disabled for reads + using this feature because reads are implemented using request ranges, + which do not provide checksums to validate. See + https://cloud.google.com/storage/docs/hashes-etags for details. + :type mode: str :param mode: (Optional) A mode string, as per standard Python `open()` semantics.The first diff --git a/google/cloud/storage/fileio.py b/google/cloud/storage/fileio.py index 07f2f11a3..53d3d14ab 100644 --- a/google/cloud/storage/fileio.py +++ b/google/cloud/storage/fileio.py @@ -14,6 +14,8 @@ import io +from google.api_core.exceptions import RequestRangeNotSatisfiable + # Resumable uploads require a chunk size of precisely a multiple of 256 KiB. CHUNK_SIZE_MULTIPLE = 256 * 1024 # 256 KiB DEFAULT_CHUNK_SIZE = 40 * 1024 * 1024 # 40 MiB @@ -92,10 +94,20 @@ def read(self, size=-1): else: fetch_end = None - # Download the blob. - result += self._blob.download_as_bytes( - start=fetch_start, end=fetch_end, **self._download_kwargs - ) + # Download the blob. Checksumming must be disabled as we are using + # chunked downloads, and the server only knows the checksum of the + # entire file. + try: + result += self._blob.download_as_bytes( + start=fetch_start, + end=fetch_end, + checksum=None, + **self._download_kwargs + ) + except RequestRangeNotSatisfiable: + # We've reached the end of the file. Python file objects should + # return an empty response in this case, not raise an error. + pass # If more bytes were read than is immediately needed, buffer the # remainder and then trim the result. diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 31ffd9c8b..6ec1c2a68 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -1134,6 +1134,9 @@ def test_blobwriter_and_blobreader(self): file_obj.read(256 * 1024 * 2), reader.read(256 * 1024 * 2) ) self.assertEqual(file_obj.read(), reader.read()) + # End of file reached; further reads should be blank but not + # raise an error. + self.assertEqual(b"", reader.read()) def test_blobwriter_and_blobreader_text_mode(self): blob = self.bucket.blob("MultibyteTextFile") diff --git a/tests/unit/test_fileio.py b/tests/unit/test_fileio.py index 09fa58f8d..8056c045f 100644 --- a/tests/unit/test_fileio.py +++ b/tests/unit/test_fileio.py @@ -17,9 +17,11 @@ import unittest import mock import io -from google.cloud.storage.fileio import BlobReader, BlobWriter, SlidingBuffer import string +from google.cloud.storage.fileio import BlobReader, BlobWriter, SlidingBuffer +from google.api_core.exceptions import RequestRangeNotSatisfiable + TEST_TEXT_DATA = string.ascii_lowercase + "\n" + string.ascii_uppercase + "\n" TEST_BINARY_DATA = TEST_TEXT_DATA.encode("utf-8") TEST_MULTIBYTE_TEXT_DATA = u"あいうえおかきくけこさしすせそたちつてと" @@ -50,7 +52,7 @@ def read_from_fake_data(start=0, end=None, **_): # Read and trigger the first download of chunk_size. self.assertEqual(reader.read(1), TEST_BINARY_DATA[0:1]) blob.download_as_bytes.assert_called_once_with( - start=0, end=8, **download_kwargs + start=0, end=8, checksum=None, **download_kwargs ) # Read from buffered data only. @@ -61,21 +63,36 @@ def read_from_fake_data(start=0, end=None, **_): self.assertEqual(reader.read(8), TEST_BINARY_DATA[4:12]) self.assertEqual(reader._pos, 12) self.assertEqual(blob.download_as_bytes.call_count, 2) - blob.download_as_bytes.assert_called_with(start=8, end=16, **download_kwargs) + blob.download_as_bytes.assert_called_with( + start=8, end=16, checksum=None, **download_kwargs + ) # Read a larger amount, requiring a download larger than chunk_size. self.assertEqual(reader.read(16), TEST_BINARY_DATA[12:28]) self.assertEqual(reader._pos, 28) self.assertEqual(blob.download_as_bytes.call_count, 3) - blob.download_as_bytes.assert_called_with(start=16, end=28, **download_kwargs) + blob.download_as_bytes.assert_called_with( + start=16, end=28, checksum=None, **download_kwargs + ) # Read all remaining data. self.assertEqual(reader.read(), TEST_BINARY_DATA[28:]) self.assertEqual(blob.download_as_bytes.call_count, 4) - blob.download_as_bytes.assert_called_with(start=28, end=None, **download_kwargs) + blob.download_as_bytes.assert_called_with( + start=28, end=None, checksum=None, **download_kwargs + ) reader.close() + def test_416_error_handled(self): + blob = mock.Mock() + blob.download_as_bytes = mock.Mock( + side_effect=RequestRangeNotSatisfiable("message") + ) + + reader = BlobReader(blob) + self.assertEqual(reader.read(), b"") + def test_readline(self): blob = mock.Mock() @@ -87,12 +104,12 @@ def read_from_fake_data(start=0, end=None, **_): # Read a line. With chunk_size=10, expect three chunks downloaded. self.assertEqual(reader.readline(), TEST_BINARY_DATA[:27]) - blob.download_as_bytes.assert_called_with(start=20, end=30) + blob.download_as_bytes.assert_called_with(start=20, end=30, checksum=None) self.assertEqual(blob.download_as_bytes.call_count, 3) # Read another line. self.assertEqual(reader.readline(), TEST_BINARY_DATA[27:]) - blob.download_as_bytes.assert_called_with(start=50, end=60) + blob.download_as_bytes.assert_called_with(start=50, end=60, checksum=None) self.assertEqual(blob.download_as_bytes.call_count, 6) blob.size = len(TEST_BINARY_DATA) @@ -101,7 +118,7 @@ def read_from_fake_data(start=0, end=None, **_): # Read all lines. The readlines algorithm will attempt to read past the end of the last line once to verify there is no more to read. self.assertEqual(b"".join(reader.readlines()), TEST_BINARY_DATA) blob.download_as_bytes.assert_called_with( - start=len(TEST_BINARY_DATA), end=len(TEST_BINARY_DATA) + 10 + start=len(TEST_BINARY_DATA), end=len(TEST_BINARY_DATA) + 10, checksum=None ) self.assertEqual(blob.download_as_bytes.call_count, 13)