From d8432cd65a4e9b38eebd1ade2ff00f2f44bb0ef6 Mon Sep 17 00:00:00 2001 From: William Silversmith Date: Thu, 23 Jul 2020 14:46:04 -0400 Subject: [PATCH] feat(blob.py): auto-populate standard headers for non-chunked downloads (#204) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-storage/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #24 🦕 This PR autopopulates the following fields for non-chunked downloads based on the server header response: ``` blob.content_encoding blob.content_type blob.cache_control blob.storage_class blob.content_language blob.md5_hash blob.crc32c ``` --- google/cloud/storage/blob.py | 33 +++++++++++++++++++++++++++++++-- tests/unit/test_blob.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py index efad9ae39..07a17867c 100644 --- a/google/cloud/storage/blob.py +++ b/google/cloud/storage/blob.py @@ -31,6 +31,7 @@ from io import BytesIO import mimetypes import os +import re import warnings import six @@ -783,6 +784,34 @@ def _get_download_url( ) return _add_query_parameters(base_url, name_value_pairs) + def _extract_headers_from_download(self, response): + """Extract headers from a non-chunked request's http object. + + This avoids the need to make a second request for commonly used + headers. + + :type response: + :class requests.models.Response + :param response: The server response from downloading a non-chunked file + """ + self.content_encoding = response.headers.get("Content-Encoding", None) + self.content_type = response.headers.get("Content-Type", None) + self.cache_control = response.headers.get("Cache-Control", None) + self.storage_class = response.headers.get("X-Goog-Storage-Class", None) + self.content_language = response.headers.get("Content-Language", None) + # 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==', + x_goog_hash = response.headers.get("X-Goog-Hash", "") + + digests = {} + for encoded_digest in x_goog_hash.split(","): + match = re.match(r"(crc32c|md5)=([\w\d]+)==", encoded_digest) + if match: + method, digest = match.groups() + digests[method] = digest + + self.crc32c = digests.get("crc32c", None) + self.md5_hash = digests.get("md5", None) + def _do_download( self, transport, @@ -840,8 +869,8 @@ def _do_download( download = klass( download_url, stream=file_obj, headers=headers, start=start, end=end ) - download.consume(transport, timeout=timeout) - + response = download.consume(transport, timeout=timeout) + self._extract_headers_from_download(response) else: if raw_download: diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py index 4635b050e..54aeae671 100644 --- a/tests/unit/test_blob.py +++ b/tests/unit/test_blob.py @@ -1451,6 +1451,37 @@ def _download_as_string_helper(self, raw_download, timeout=None): stream = blob._do_download.mock_calls[0].args[1] self.assertIsInstance(stream, io.BytesIO) + def test_download_as_string_w_response_headers(self): + blob_name = "blob-name" + client = mock.Mock(spec=["_http"]) + bucket = _Bucket(client) + media_link = "http://example.com/media/" + properties = {"mediaLink": media_link} + blob = self._make_one(blob_name, bucket=bucket, properties=properties) + + response = self._mock_requests_response( + http_client.OK, + headers={ + "Content-Type": "application/json", + "Content-Language": "ko-kr", + "Cache-Control": "max-age=1337;public", + "Content-Encoding": "gzip", + "X-Goog-Storage-Class": "STANDARD", + "X-Goog-Hash": "crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==", + }, + # { "x": 5 } gzipped + content=b"\x1f\x8b\x08\x00\xcfo\x17_\x02\xff\xabVP\xaaP\xb2R0U\xa8\x05\x00\xa1\xcaQ\x93\n\x00\x00\x00", + ) + blob._extract_headers_from_download(response) + + self.assertEqual(blob.content_type, "application/json") + self.assertEqual(blob.content_language, "ko-kr") + self.assertEqual(blob.content_encoding, "gzip") + self.assertEqual(blob.cache_control, "max-age=1337;public") + self.assertEqual(blob.storage_class, "STANDARD") + self.assertEqual(blob.md5_hash, "CS9tHYTtyFntzj7B9nkkJQ") + self.assertEqual(blob.crc32c, "4gcgLQ") + def test_download_as_string_w_generation_match(self): GENERATION_NUMBER = 6 MEDIA_LINK = "http://example.com/media/"