Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: silence expected errors for routine operations on BlobReader #400

Merged
merged 2 commits into from Mar 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions google/cloud/storage/blob.py
Expand Up @@ -3434,6 +3434,11 @@ def open(
latest generation number and set it; or, if the generation is known, set
it manually, for instance with bucket.blob(generation=123456).

Checksumming (hashing) to verify data integrity is disabled for reads
using this feature because reads are implemented using request ranges,
which do not provide checksums to validate. See
https://cloud.google.com/storage/docs/hashes-etags for details.

:type mode: str
:param mode:
(Optional) A mode string, as per standard Python `open()` semantics.The first
Expand Down
20 changes: 16 additions & 4 deletions google/cloud/storage/fileio.py
Expand Up @@ -14,6 +14,8 @@

import io

from google.api_core.exceptions import RequestRangeNotSatisfiable

# Resumable uploads require a chunk size of precisely a multiple of 256 KiB.
CHUNK_SIZE_MULTIPLE = 256 * 1024 # 256 KiB
DEFAULT_CHUNK_SIZE = 40 * 1024 * 1024 # 40 MiB
Expand Down Expand Up @@ -92,10 +94,20 @@ def read(self, size=-1):
else:
fetch_end = None

# Download the blob.
result += self._blob.download_as_bytes(
start=fetch_start, end=fetch_end, **self._download_kwargs
)
# Download the blob. Checksumming must be disabled as we are using
# chunked downloads, and the server only knows the checksum of the
# entire file.
try:
result += self._blob.download_as_bytes(
start=fetch_start,
end=fetch_end,
checksum=None,
**self._download_kwargs
)
except RequestRangeNotSatisfiable:
# We've reached the end of the file. Python file objects should
# return an empty response in this case, not raise an error.
pass

# If more bytes were read than is immediately needed, buffer the
# remainder and then trim the result.
Expand Down
3 changes: 3 additions & 0 deletions tests/system/test_system.py
Expand Up @@ -1134,6 +1134,9 @@ def test_blobwriter_and_blobreader(self):
file_obj.read(256 * 1024 * 2), reader.read(256 * 1024 * 2)
)
self.assertEqual(file_obj.read(), reader.read())
# End of file reached; further reads should be blank but not
# raise an error.
self.assertEqual(b"", reader.read())

def test_blobwriter_and_blobreader_text_mode(self):
blob = self.bucket.blob("MultibyteTextFile")
Expand Down
33 changes: 25 additions & 8 deletions tests/unit/test_fileio.py
Expand Up @@ -17,9 +17,11 @@
import unittest
import mock
import io
from google.cloud.storage.fileio import BlobReader, BlobWriter, SlidingBuffer
import string

from google.cloud.storage.fileio import BlobReader, BlobWriter, SlidingBuffer
from google.api_core.exceptions import RequestRangeNotSatisfiable

TEST_TEXT_DATA = string.ascii_lowercase + "\n" + string.ascii_uppercase + "\n"
TEST_BINARY_DATA = TEST_TEXT_DATA.encode("utf-8")
TEST_MULTIBYTE_TEXT_DATA = u"あいうえおかきくけこさしすせそたちつてと"
Expand Down Expand Up @@ -50,7 +52,7 @@ def read_from_fake_data(start=0, end=None, **_):
# Read and trigger the first download of chunk_size.
self.assertEqual(reader.read(1), TEST_BINARY_DATA[0:1])
blob.download_as_bytes.assert_called_once_with(
start=0, end=8, **download_kwargs
start=0, end=8, checksum=None, **download_kwargs
)

# Read from buffered data only.
Expand All @@ -61,21 +63,36 @@ def read_from_fake_data(start=0, end=None, **_):
self.assertEqual(reader.read(8), TEST_BINARY_DATA[4:12])
self.assertEqual(reader._pos, 12)
self.assertEqual(blob.download_as_bytes.call_count, 2)
blob.download_as_bytes.assert_called_with(start=8, end=16, **download_kwargs)
blob.download_as_bytes.assert_called_with(
start=8, end=16, checksum=None, **download_kwargs
)

# Read a larger amount, requiring a download larger than chunk_size.
self.assertEqual(reader.read(16), TEST_BINARY_DATA[12:28])
self.assertEqual(reader._pos, 28)
self.assertEqual(blob.download_as_bytes.call_count, 3)
blob.download_as_bytes.assert_called_with(start=16, end=28, **download_kwargs)
blob.download_as_bytes.assert_called_with(
start=16, end=28, checksum=None, **download_kwargs
)

# Read all remaining data.
self.assertEqual(reader.read(), TEST_BINARY_DATA[28:])
self.assertEqual(blob.download_as_bytes.call_count, 4)
blob.download_as_bytes.assert_called_with(start=28, end=None, **download_kwargs)
blob.download_as_bytes.assert_called_with(
start=28, end=None, checksum=None, **download_kwargs
)

reader.close()

def test_416_error_handled(self):
blob = mock.Mock()
blob.download_as_bytes = mock.Mock(
side_effect=RequestRangeNotSatisfiable("message")
)

reader = BlobReader(blob)
self.assertEqual(reader.read(), b"")

def test_readline(self):
blob = mock.Mock()

Expand All @@ -87,12 +104,12 @@ def read_from_fake_data(start=0, end=None, **_):

# Read a line. With chunk_size=10, expect three chunks downloaded.
self.assertEqual(reader.readline(), TEST_BINARY_DATA[:27])
blob.download_as_bytes.assert_called_with(start=20, end=30)
blob.download_as_bytes.assert_called_with(start=20, end=30, checksum=None)
self.assertEqual(blob.download_as_bytes.call_count, 3)

# Read another line.
self.assertEqual(reader.readline(), TEST_BINARY_DATA[27:])
blob.download_as_bytes.assert_called_with(start=50, end=60)
blob.download_as_bytes.assert_called_with(start=50, end=60, checksum=None)
self.assertEqual(blob.download_as_bytes.call_count, 6)

blob.size = len(TEST_BINARY_DATA)
Expand All @@ -101,7 +118,7 @@ def read_from_fake_data(start=0, end=None, **_):
# Read all lines. The readlines algorithm will attempt to read past the end of the last line once to verify there is no more to read.
self.assertEqual(b"".join(reader.readlines()), TEST_BINARY_DATA)
blob.download_as_bytes.assert_called_with(
start=len(TEST_BINARY_DATA), end=len(TEST_BINARY_DATA) + 10
start=len(TEST_BINARY_DATA), end=len(TEST_BINARY_DATA) + 10, checksum=None
)
self.assertEqual(blob.download_as_bytes.call_count, 13)

Expand Down