Skip to content

Commit

Permalink
feat(blob.py): auto-populate standard headers for non-chunked downloa…
Browse files Browse the repository at this point in the history
…ds (#204)

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-storage/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [x] Ensure the tests and linter pass
- [x] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated (if necessary)

Fixes #24  🦕

This PR autopopulates the following fields for non-chunked downloads based on the server header response:
```
blob.content_encoding
blob.content_type
blob.cache_control
blob.storage_class
blob.content_language
blob.md5_hash
blob.crc32c
```
  • Loading branch information
william-silversmith committed Jul 23, 2020
1 parent 6eeb855 commit d8432cd
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 2 deletions.
33 changes: 31 additions & 2 deletions google/cloud/storage/blob.py
Expand Up @@ -31,6 +31,7 @@
from io import BytesIO
import mimetypes
import os
import re
import warnings
import six

Expand Down Expand Up @@ -783,6 +784,34 @@ def _get_download_url(
)
return _add_query_parameters(base_url, name_value_pairs)

def _extract_headers_from_download(self, response):
"""Extract headers from a non-chunked request's http object.
This avoids the need to make a second request for commonly used
headers.
:type response:
:class requests.models.Response
:param response: The server response from downloading a non-chunked file
"""
self.content_encoding = response.headers.get("Content-Encoding", None)
self.content_type = response.headers.get("Content-Type", None)
self.cache_control = response.headers.get("Cache-Control", None)
self.storage_class = response.headers.get("X-Goog-Storage-Class", None)
self.content_language = response.headers.get("Content-Language", None)
# 'X-Goog-Hash': 'crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==',
x_goog_hash = response.headers.get("X-Goog-Hash", "")

digests = {}
for encoded_digest in x_goog_hash.split(","):
match = re.match(r"(crc32c|md5)=([\w\d]+)==", encoded_digest)
if match:
method, digest = match.groups()
digests[method] = digest

self.crc32c = digests.get("crc32c", None)
self.md5_hash = digests.get("md5", None)

def _do_download(
self,
transport,
Expand Down Expand Up @@ -840,8 +869,8 @@ def _do_download(
download = klass(
download_url, stream=file_obj, headers=headers, start=start, end=end
)
download.consume(transport, timeout=timeout)

response = download.consume(transport, timeout=timeout)
self._extract_headers_from_download(response)
else:

if raw_download:
Expand Down
31 changes: 31 additions & 0 deletions tests/unit/test_blob.py
Expand Up @@ -1451,6 +1451,37 @@ def _download_as_string_helper(self, raw_download, timeout=None):
stream = blob._do_download.mock_calls[0].args[1]
self.assertIsInstance(stream, io.BytesIO)

def test_download_as_string_w_response_headers(self):
blob_name = "blob-name"
client = mock.Mock(spec=["_http"])
bucket = _Bucket(client)
media_link = "http://example.com/media/"
properties = {"mediaLink": media_link}
blob = self._make_one(blob_name, bucket=bucket, properties=properties)

response = self._mock_requests_response(
http_client.OK,
headers={
"Content-Type": "application/json",
"Content-Language": "ko-kr",
"Cache-Control": "max-age=1337;public",
"Content-Encoding": "gzip",
"X-Goog-Storage-Class": "STANDARD",
"X-Goog-Hash": "crc32c=4gcgLQ==,md5=CS9tHYTtyFntzj7B9nkkJQ==",
},
# { "x": 5 } gzipped
content=b"\x1f\x8b\x08\x00\xcfo\x17_\x02\xff\xabVP\xaaP\xb2R0U\xa8\x05\x00\xa1\xcaQ\x93\n\x00\x00\x00",
)
blob._extract_headers_from_download(response)

self.assertEqual(blob.content_type, "application/json")
self.assertEqual(blob.content_language, "ko-kr")
self.assertEqual(blob.content_encoding, "gzip")
self.assertEqual(blob.cache_control, "max-age=1337;public")
self.assertEqual(blob.storage_class, "STANDARD")
self.assertEqual(blob.md5_hash, "CS9tHYTtyFntzj7B9nkkJQ")
self.assertEqual(blob.crc32c, "4gcgLQ")

def test_download_as_string_w_generation_match(self):
GENERATION_NUMBER = 6
MEDIA_LINK = "http://example.com/media/"
Expand Down

0 comments on commit d8432cd

Please sign in to comment.