From d84c0ddfd00fa731acfe9899c668041456b08ab7 Mon Sep 17 00:00:00 2001 From: Andrew Gorcester Date: Tue, 5 May 2020 09:59:01 -0700 Subject: [PATCH] feat: add offset and includeTrailingPrefix options to list_blobs (#125) * feat: add offset and includeTrailingPrefix options to list_blobs * lint * fix comment typo in system tests Co-authored-by: Frank Natividad --- google/cloud/storage/bucket.py | 32 ++++++++++++++++++++++ google/cloud/storage/client.py | 24 ++++++++++++++++ tests/system/test_system.py | 50 ++++++++++++++++++++++++++++++++-- tests/unit/test_bucket.py | 9 ++++++ tests/unit/test_client.py | 9 ++++++ 5 files changed, 121 insertions(+), 3 deletions(-) diff --git a/google/cloud/storage/bucket.py b/google/cloud/storage/bucket.py index 69fa321bb..8540bef6e 100644 --- a/google/cloud/storage/bucket.py +++ b/google/cloud/storage/bucket.py @@ -894,6 +894,9 @@ def list_blobs( page_token=None, prefix=None, delimiter=None, + start_offset=None, + end_offset=None, + include_trailing_delimiter=None, versions=None, projection="noAcl", fields=None, @@ -926,6 +929,26 @@ def list_blobs( :param delimiter: (Optional) Delimiter, used with ``prefix`` to emulate hierarchy. + :type start_offset: str + :param start_offset: + (Optional) Filter results to objects whose names are + lexicographically equal to or after ``startOffset``. If + ``endOffset`` is also set, the objects listed will have names + between ``startOffset`` (inclusive) and ``endOffset`` (exclusive). + + :type end_offset: str + :param end_offset: + (Optional) Filter results to objects whose names are + lexicographically before ``endOffset``. If ``startOffset`` is also + set, the objects listed will have names between ``startOffset`` + (inclusive) and ``endOffset`` (exclusive). + + :type include_trailing_delimiter: boolean + :param include_trailing_delimiter: + (Optional) If true, objects that end in exactly one instance of + ``delimiter`` will have their metadata included in ``items`` in + addition to ``prefixes``. + :type versions: bool :param versions: (Optional) Whether object versions should be returned as separate blobs. @@ -967,6 +990,15 @@ def list_blobs( if delimiter is not None: extra_params["delimiter"] = delimiter + if start_offset is not None: + extra_params["startOffset"] = start_offset + + if end_offset is not None: + extra_params["endOffset"] = end_offset + + if include_trailing_delimiter is not None: + extra_params["includeTrailingDelimiter"] = include_trailing_delimiter + if versions is not None: extra_params["versions"] = versions diff --git a/google/cloud/storage/client.py b/google/cloud/storage/client.py index 1a7711552..4a4bbe733 100644 --- a/google/cloud/storage/client.py +++ b/google/cloud/storage/client.py @@ -540,6 +540,9 @@ def list_blobs( page_token=None, prefix=None, delimiter=None, + start_offset=None, + end_offset=None, + include_trailing_delimiter=None, versions=None, projection="noAcl", fields=None, @@ -573,6 +576,24 @@ def list_blobs( (Optional) Delimiter, used with ``prefix`` to emulate hierarchy. + start_offset (str): + (Optional) Filter results to objects whose names are + lexicographically equal to or after ``startOffset``. If + ``endOffset`` is also set, the objects listed will have names + between ``startOffset`` (inclusive) and ``endOffset`` + (exclusive). + + end_offset (str): + (Optional) Filter results to objects whose names are + lexicographically before ``endOffset``. If ``startOffset`` is + also set, the objects listed will have names between + ``startOffset`` (inclusive) and ``endOffset`` (exclusive). + + include_trailing_delimiter (boolean): + (Optional) If true, objects that end in exactly one instance of + ``delimiter`` will have their metadata included in ``items`` in + addition to ``prefixes``. + versions (bool): (Optional) Whether object versions should be returned as separate blobs. @@ -606,6 +627,9 @@ def list_blobs( page_token=page_token, prefix=prefix, delimiter=delimiter, + start_offset=start_offset, + end_offset=end_offset, + include_trailing_delimiter=include_trailing_delimiter, versions=versions, projection=projection, fields=fields, diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 675758794..82f0cb98b 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -761,7 +761,7 @@ def test_fetch_object_and_check_content(self): class TestStorageListFiles(TestStorageFiles): - FILENAMES = ("CloudLogo1", "CloudLogo2", "CloudLogo3") + FILENAMES = ("CloudLogo1", "CloudLogo2", "CloudLogo3", "CloudLogo4") @classmethod def setUpClass(cls): @@ -818,18 +818,49 @@ def test_paginate_files(self): # Technically the iterator is exhausted. self.assertEqual(iterator.num_results, iterator.max_results) # But we modify the iterator to continue paging after - # articially stopping after ``count`` items. + # artificially stopping after ``count`` items. iterator.max_results = None page2 = six.next(page_iter) last_blobs = list(page2) self.assertEqual(len(last_blobs), truncation_size) + @RetryErrors(unittest.TestCase.failureException) + def test_paginate_files_with_offset(self): + truncation_size = 1 + inclusive_start_offset = self.FILENAMES[1] + exclusive_end_offset = self.FILENAMES[-1] + desired_files = self.FILENAMES[1:-1] + count = len(desired_files) - truncation_size + iterator = self.bucket.list_blobs( + max_results=count, + start_offset=inclusive_start_offset, + end_offset=exclusive_end_offset, + ) + page_iter = iterator.pages + + page1 = six.next(page_iter) + blobs = list(page1) + self.assertEqual(len(blobs), count) + self.assertEqual(blobs[0].name, desired_files[0]) + self.assertIsNotNone(iterator.next_page_token) + # Technically the iterator is exhausted. + self.assertEqual(iterator.num_results, iterator.max_results) + # But we modify the iterator to continue paging after + # artificially stopping after ``count`` items. + iterator.max_results = None + + page2 = six.next(page_iter) + last_blobs = list(page2) + self.assertEqual(len(last_blobs), truncation_size) + self.assertEqual(last_blobs[-1].name, desired_files[-1]) + class TestStoragePseudoHierarchy(TestStorageFiles): FILENAMES = ( "file01.txt", + "parent/", "parent/file11.txt", "parent/child/file21.txt", "parent/child/file22.txt", @@ -877,7 +908,9 @@ def test_first_level(self): iterator = self.bucket.list_blobs(delimiter="/", prefix="parent/") page = six.next(iterator.pages) blobs = list(page) - self.assertEqual([blob.name for blob in blobs], ["parent/file11.txt"]) + self.assertEqual( + [blob.name for blob in blobs], ["parent/", "parent/file11.txt"] + ) self.assertIsNone(iterator.next_page_token) self.assertEqual(iterator.prefixes, set(["parent/child/"])) @@ -909,6 +942,17 @@ def test_third_level(self): self.assertIsNone(iterator.next_page_token) self.assertEqual(iterator.prefixes, set()) + @RetryErrors(unittest.TestCase.failureException) + def test_include_trailing_delimiter(self): + iterator = self.bucket.list_blobs( + delimiter="/", include_trailing_delimiter=True + ) + page = six.next(iterator.pages) + blobs = list(page) + self.assertEqual([blob.name for blob in blobs], ["file01.txt", "parent/"]) + self.assertIsNone(iterator.next_page_token) + self.assertEqual(iterator.prefixes, set(["parent/"])) + class TestStorageSignURLs(unittest.TestCase): BLOB_CONTENT = b"This time for sure, Rocky!" diff --git a/tests/unit/test_bucket.py b/tests/unit/test_bucket.py index 365e1f0e1..7bbcf73df 100644 --- a/tests/unit/test_bucket.py +++ b/tests/unit/test_bucket.py @@ -733,6 +733,9 @@ def test_list_blobs_w_all_arguments_and_user_project(self): PAGE_TOKEN = "ABCD" PREFIX = "subfolder" DELIMITER = "/" + START_OFFSET = "c" + END_OFFSET = "g" + INCLUDE_TRAILING_DELIMITER = True VERSIONS = True PROJECTION = "full" FIELDS = "items/contentLanguage,nextPageToken" @@ -741,6 +744,9 @@ def test_list_blobs_w_all_arguments_and_user_project(self): "pageToken": PAGE_TOKEN, "prefix": PREFIX, "delimiter": DELIMITER, + "startOffset": START_OFFSET, + "endOffset": END_OFFSET, + "includeTrailingDelimiter": INCLUDE_TRAILING_DELIMITER, "versions": VERSIONS, "projection": PROJECTION, "fields": FIELDS, @@ -754,6 +760,9 @@ def test_list_blobs_w_all_arguments_and_user_project(self): page_token=PAGE_TOKEN, prefix=PREFIX, delimiter=DELIMITER, + start_offset=START_OFFSET, + end_offset=END_OFFSET, + include_trailing_delimiter=INCLUDE_TRAILING_DELIMITER, versions=VERSIONS, projection=PROJECTION, fields=FIELDS, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 7acba35fa..c38d87979 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -970,6 +970,9 @@ def test_list_blobs_w_all_arguments_and_user_project(self): PAGE_TOKEN = "ABCD" PREFIX = "subfolder" DELIMITER = "/" + START_OFFSET = "c" + END_OFFSET = "g" + INCLUDE_TRAILING_DELIMITER = True VERSIONS = True PROJECTION = "full" FIELDS = "items/contentLanguage,nextPageToken" @@ -978,6 +981,9 @@ def test_list_blobs_w_all_arguments_and_user_project(self): "pageToken": PAGE_TOKEN, "prefix": PREFIX, "delimiter": DELIMITER, + "startOffset": START_OFFSET, + "endOffset": END_OFFSET, + "includeTrailingDelimiter": INCLUDE_TRAILING_DELIMITER, "versions": VERSIONS, "projection": PROJECTION, "fields": FIELDS, @@ -1001,6 +1007,9 @@ def test_list_blobs_w_all_arguments_and_user_project(self): page_token=PAGE_TOKEN, prefix=PREFIX, delimiter=DELIMITER, + start_offset=START_OFFSET, + end_offset=END_OFFSET, + include_trailing_delimiter=INCLUDE_TRAILING_DELIMITER, versions=VERSIONS, projection=PROJECTION, fields=FIELDS,