From d8432cd65a4e9b38eebd1ade2ff00f2f44bb0ef6 Mon Sep 17 00:00:00 2001
From: William Silversmith <william.silversmith@gmail.com>
Date: Thu, 23 Jul 2020 14:46:04 -0400
Subject: [PATCH] feat(blob.py): auto-populate standard headers for non-chunked
 downloads (#204)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-storage/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [x] Ensure the tests and linter pass
- [x] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated (if necessary)

Fixes #24  🦕

This PR autopopulates the following fields for non-chunked downloads based on the server header response:
```
blob.content_encoding
blob.content_type
blob.cache_control
blob.storage_class
blob.content_language
blob.md5_hash
blob.crc32c
```
---
 google/cloud/storage/blob.py | 33 +++++++++++++++++++++++++++++++--
 tests/unit/test_blob.py      | 31 +++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/google/cloud/storage/blob.py b/google/cloud/storage/blob.py
index efad9ae39..07a17867c 100644
--- a/google/cloud/storage/blob.py
+++ b/google/cloud/storage/blob.py
@@ -31,6 +31,7 @@
 from io import BytesIO
 import mimetypes
 import os
+import re
 import warnings
 import six
 
@@ -783,6 +784,34 @@ def _get_download_url(
         )
         return _add_query_parameters(base_url, name_value_pairs)
 
+    def _extract_headers_from_download(self, response):
+        """Extract headers from a non-chunked request's http object.
+
+        This avoids the need to make a second request for commonly used
+        headers.
+
+        :type response:
+            :class requests.models.Response
+        :param response: The server response from downloading a non-chunked file
+        """
+        self.content_encoding = response.headers.get("Content-Encoding", None)
+        self.content_type = response.headers.get("Content-Type", None)
+        self.cache_control = response.headers.get("Cache-Control", None)
+        self.storage_class = response.headers.get("X-Goog-Storage-Class", None)
+        self.content_language = response.headers.get("Content-Language", None)
+        #  'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==',
+        x_goog_hash = response.headers.get("X-Goog-Hash", "")
+
+        digests = {}
+        for encoded_digest in x_goog_hash.split(","):
+            match = re.match(r"(crc32c|md5)=([\w\d]+)==", encoded_digest)
+            if match:
+                method, digest = match.groups()
+                digests[method] = digest
+
+        self.crc32c = digests.get("crc32c", None)
+        self.md5_hash = digests.get("md5", None)
+
     def _do_download(
         self,
         transport,
@@ -840,8 +869,8 @@ def _do_download(
             download = klass(
                 download_url, stream=file_obj, headers=headers, start=start, end=end
             )
-            download.consume(transport, timeout=timeout)
-
+            response = download.consume(transport, timeout=timeout)
+            self._extract_headers_from_download(response)
         else:
 
             if raw_download:
diff --git a/tests/unit/test_blob.py b/tests/unit/test_blob.py
index 4635b050e..54aeae671 100644
--- a/tests/unit/test_blob.py
+++ b/tests/unit/test_blob.py
@@ -1451,6 +1451,37 @@ def _download_as_string_helper(self, raw_download, timeout=None):
         stream = blob._do_download.mock_calls[0].args[1]
         self.assertIsInstance(stream, io.BytesIO)
 
+    def test_download_as_string_w_response_headers(self):
+        blob_name = "blob-name"
+        client = mock.Mock(spec=["_http"])
+        bucket = _Bucket(client)
+        media_link = "http://example.com/media/"
+        properties = {"mediaLink": media_link}
+        blob = self._make_one(blob_name, bucket=bucket, properties=properties)
+
+        response = self._mock_requests_response(
+            http_client.OK,
+            headers={
+                "Content-Type": "application/json",
+                "Content-Language": "ko-kr",
+                "Cache-Control": "max-age=1337;public",
+                "Content-Encoding": "gzip",
+                "X-Goog-Storage-Class": "STANDARD",
+                "X-Goog-Hash": "crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==",
+            },
+            # { "x": 5 } gzipped
+            content=b"\x1f\x8b\x08\x00\xcfo\x17_\x02\xff\xabVP\xaaP\xb2R0U\xa8\x05\x00\xa1\xcaQ\x93\n\x00\x00\x00",
+        )
+        blob._extract_headers_from_download(response)
+
+        self.assertEqual(blob.content_type, "application/json")
+        self.assertEqual(blob.content_language, "ko-kr")
+        self.assertEqual(blob.content_encoding, "gzip")
+        self.assertEqual(blob.cache_control, "max-age=1337;public")
+        self.assertEqual(blob.storage_class, "STANDARD")
+        self.assertEqual(blob.md5_hash, "CS9tHYTtyFntzj7B9nkkJQ")
+        self.assertEqual(blob.crc32c, "4gcgLQ")
+
     def test_download_as_string_w_generation_match(self):
         GENERATION_NUMBER = 6
         MEDIA_LINK = "http://example.com/media/"