Skip to content

Commit

Permalink
fix: silence expected errors for routine operations on BlobReader (#400)
Browse files Browse the repository at this point in the history
Two fixes for BlobReader:
- Checksums are not supported for BlobReader's chunked downloads, so set checksum=None to silence log warnings (and add a note to the docstring explaining this).
- In Python, read() on files at EOF should return an empty result, but not raise an error. Stop BlobReader from emitting RequestRangeNotSatisfiable errors at EOF.

Fixes: #399
  • Loading branch information
andrewsg committed Mar 29, 2021
1 parent d10f842 commit d52853b
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 12 deletions.
5 changes: 5 additions & 0 deletions google/cloud/storage/blob.py
Expand Up @@ -3434,6 +3434,11 @@ def open(
latest generation number and set it; or, if the generation is known, set
it manually, for instance with bucket.blob(generation=123456).
Checksumming (hashing) to verify data integrity is disabled for reads
using this feature because reads are implemented using request ranges,
which do not provide checksums to validate. See
https://cloud.google.com/storage/docs/hashes-etags for details.
:type mode: str
:param mode:
(Optional) A mode string, as per standard Python `open()` semantics.The first
Expand Down
20 changes: 16 additions & 4 deletions google/cloud/storage/fileio.py
Expand Up @@ -14,6 +14,8 @@

import io

from google.api_core.exceptions import RequestRangeNotSatisfiable

# Resumable uploads require a chunk size of precisely a multiple of 256 KiB.
CHUNK_SIZE_MULTIPLE = 256 * 1024 # 256 KiB
DEFAULT_CHUNK_SIZE = 40 * 1024 * 1024 # 40 MiB
Expand Down Expand Up @@ -92,10 +94,20 @@ def read(self, size=-1):
else:
fetch_end = None

# Download the blob.
result += self._blob.download_as_bytes(
start=fetch_start, end=fetch_end, **self._download_kwargs
)
# Download the blob. Checksumming must be disabled as we are using
# chunked downloads, and the server only knows the checksum of the
# entire file.
try:
result += self._blob.download_as_bytes(
start=fetch_start,
end=fetch_end,
checksum=None,
**self._download_kwargs
)
except RequestRangeNotSatisfiable:
# We've reached the end of the file. Python file objects should
# return an empty response in this case, not raise an error.
pass

# If more bytes were read than is immediately needed, buffer the
# remainder and then trim the result.
Expand Down
3 changes: 3 additions & 0 deletions tests/system/test_system.py
Expand Up @@ -1134,6 +1134,9 @@ def test_blobwriter_and_blobreader(self):
file_obj.read(256 * 1024 * 2), reader.read(256 * 1024 * 2)
)
self.assertEqual(file_obj.read(), reader.read())
# End of file reached; further reads should be blank but not
# raise an error.
self.assertEqual(b"", reader.read())

def test_blobwriter_and_blobreader_text_mode(self):
blob = self.bucket.blob("MultibyteTextFile")
Expand Down
33 changes: 25 additions & 8 deletions tests/unit/test_fileio.py
Expand Up @@ -17,9 +17,11 @@
import unittest
import mock
import io
from google.cloud.storage.fileio import BlobReader, BlobWriter, SlidingBuffer
import string

from google.cloud.storage.fileio import BlobReader, BlobWriter, SlidingBuffer
from google.api_core.exceptions import RequestRangeNotSatisfiable

TEST_TEXT_DATA = string.ascii_lowercase + "\n" + string.ascii_uppercase + "\n"
TEST_BINARY_DATA = TEST_TEXT_DATA.encode("utf-8")
TEST_MULTIBYTE_TEXT_DATA = u"あいうえおかきくけこさしすせそたちつてと"
Expand Down Expand Up @@ -50,7 +52,7 @@ def read_from_fake_data(start=0, end=None, **_):
# Read and trigger the first download of chunk_size.
self.assertEqual(reader.read(1), TEST_BINARY_DATA[0:1])
blob.download_as_bytes.assert_called_once_with(
start=0, end=8, **download_kwargs
start=0, end=8, checksum=None, **download_kwargs
)

# Read from buffered data only.
Expand All @@ -61,21 +63,36 @@ def read_from_fake_data(start=0, end=None, **_):
self.assertEqual(reader.read(8), TEST_BINARY_DATA[4:12])
self.assertEqual(reader._pos, 12)
self.assertEqual(blob.download_as_bytes.call_count, 2)
blob.download_as_bytes.assert_called_with(start=8, end=16, **download_kwargs)
blob.download_as_bytes.assert_called_with(
start=8, end=16, checksum=None, **download_kwargs
)

# Read a larger amount, requiring a download larger than chunk_size.
self.assertEqual(reader.read(16), TEST_BINARY_DATA[12:28])
self.assertEqual(reader._pos, 28)
self.assertEqual(blob.download_as_bytes.call_count, 3)
blob.download_as_bytes.assert_called_with(start=16, end=28, **download_kwargs)
blob.download_as_bytes.assert_called_with(
start=16, end=28, checksum=None, **download_kwargs
)

# Read all remaining data.
self.assertEqual(reader.read(), TEST_BINARY_DATA[28:])
self.assertEqual(blob.download_as_bytes.call_count, 4)
blob.download_as_bytes.assert_called_with(start=28, end=None, **download_kwargs)
blob.download_as_bytes.assert_called_with(
start=28, end=None, checksum=None, **download_kwargs
)

reader.close()

def test_416_error_handled(self):
blob = mock.Mock()
blob.download_as_bytes = mock.Mock(
side_effect=RequestRangeNotSatisfiable("message")
)

reader = BlobReader(blob)
self.assertEqual(reader.read(), b"")

def test_readline(self):
blob = mock.Mock()

Expand All @@ -87,12 +104,12 @@ def read_from_fake_data(start=0, end=None, **_):

# Read a line. With chunk_size=10, expect three chunks downloaded.
self.assertEqual(reader.readline(), TEST_BINARY_DATA[:27])
blob.download_as_bytes.assert_called_with(start=20, end=30)
blob.download_as_bytes.assert_called_with(start=20, end=30, checksum=None)
self.assertEqual(blob.download_as_bytes.call_count, 3)

# Read another line.
self.assertEqual(reader.readline(), TEST_BINARY_DATA[27:])
blob.download_as_bytes.assert_called_with(start=50, end=60)
blob.download_as_bytes.assert_called_with(start=50, end=60, checksum=None)
self.assertEqual(blob.download_as_bytes.call_count, 6)

blob.size = len(TEST_BINARY_DATA)
Expand All @@ -101,7 +118,7 @@ def read_from_fake_data(start=0, end=None, **_):
# Read all lines. The readlines algorithm will attempt to read past the end of the last line once to verify there is no more to read.
self.assertEqual(b"".join(reader.readlines()), TEST_BINARY_DATA)
blob.download_as_bytes.assert_called_with(
start=len(TEST_BINARY_DATA), end=len(TEST_BINARY_DATA) + 10
start=len(TEST_BINARY_DATA), end=len(TEST_BINARY_DATA) + 10, checksum=None
)
self.assertEqual(blob.download_as_bytes.call_count, 13)

Expand Down

0 comments on commit d52853b

Please sign in to comment.