From 1f1c4b7ba4390fc4c5c8186bc22b83b45304ca06 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 6 Jun 2021 10:18:13 -0400 Subject: [PATCH] feat: list_tables, list_projects, list_datasets, list_models, list_routines, and list_jobs now accept a page_size parameter to control page size (#686) --- google/cloud/bigquery/client.py | 39 ++- setup.py | 2 +- testing/constraints-3.6.txt | 2 +- tests/unit/test_client.py | 494 ------------------------------- tests/unit/test_list_datasets.py | 124 ++++++++ tests/unit/test_list_jobs.py | 291 ++++++++++++++++++ tests/unit/test_list_models.py | 11 +- tests/unit/test_list_projects.py | 119 ++++++++ tests/unit/test_list_routines.py | 11 +- tests/unit/test_list_tables.py | 19 ++ 10 files changed, 605 insertions(+), 507 deletions(-) create mode 100644 tests/unit/test_list_datasets.py create mode 100644 tests/unit/test_list_jobs.py create mode 100644 tests/unit/test_list_projects.py diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 7ef3795a8..2b7a5273e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -286,6 +286,7 @@ def list_projects( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -294,8 +295,8 @@ def list_projects( Args: max_results (Optional[int]): - Maximum number of projects to return, If not passed, - defaults to a value set by the API. + Maximum number of projects to return. + Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the projects. If not passed, @@ -310,6 +311,10 @@ def list_projects( The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of projects to return in each page. + Defaults to a value set by the API. + Returns: google.api_core.page_iterator.Iterator: Iterator of :class:`~google.cloud.bigquery.client.Project` @@ -335,6 +340,7 @@ def api_request(*args, **kwargs): items_key="projects", page_token=page_token, max_results=max_results, + page_size=page_size, ) def list_datasets( @@ -346,6 +352,7 @@ def list_datasets( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -375,6 +382,8 @@ def list_datasets( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of datasets to return per page. Returns: google.api_core.page_iterator.Iterator: @@ -414,6 +423,7 @@ def api_request(*args, **kwargs): page_token=page_token, max_results=max_results, extra_params=extra_params, + page_size=page_size, ) def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: @@ -1270,6 +1280,7 @@ def list_models( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1288,7 +1299,7 @@ def list_models( to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. max_results (Optional[int]): - Maximum number of models to return. If not passed, defaults to a + Maximum number of models to return. Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the models. If not passed, @@ -1301,6 +1312,9 @@ def list_models( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of models to return per page. + Defaults to a value set by the API. Returns: google.api_core.page_iterator.Iterator: @@ -1331,6 +1345,7 @@ def api_request(*args, **kwargs): items_key="models", page_token=page_token, max_results=max_results, + page_size=page_size, ) result.dataset = dataset return result @@ -1342,6 +1357,7 @@ def list_routines( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1360,7 +1376,7 @@ def list_routines( to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. max_results (Optional[int]): - Maximum number of routines to return. If not passed, defaults + Maximum number of routines to return. Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the routines. If not passed, @@ -1373,6 +1389,9 @@ def list_routines( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of routines to return per page. + Defaults to a value set by the API. Returns: google.api_core.page_iterator.Iterator: @@ -1403,6 +1422,7 @@ def api_request(*args, **kwargs): items_key="routines", page_token=page_token, max_results=max_results, + page_size=page_size, ) result.dataset = dataset return result @@ -1414,6 +1434,7 @@ def list_tables( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1432,7 +1453,7 @@ def list_tables( to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. max_results (Optional[int]): - Maximum number of tables to return. If not passed, defaults + Maximum number of tables to return. Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the tables. If not passed, @@ -1445,6 +1466,9 @@ def list_tables( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of tables to return per page. + Defaults to a value set by the API. Returns: google.api_core.page_iterator.Iterator: @@ -1474,6 +1498,7 @@ def api_request(*args, **kwargs): items_key="tables", page_token=page_token, max_results=max_results, + page_size=page_size, ) result.dataset = dataset return result @@ -2112,6 +2137,7 @@ def list_jobs( timeout: float = None, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, + page_size: int = None, ) -> page_iterator.Iterator: """List jobs for the project associated with this client. @@ -2157,6 +2183,8 @@ def list_jobs( Max value for job creation time. If set, only jobs created before or at this timestamp are returned. If the datetime has no time zone assumes UTC time. + page_size (Optional[int]): + Maximum number of jobs to return per page. Returns: google.api_core.page_iterator.Iterator: @@ -2208,6 +2236,7 @@ def api_request(*args, **kwargs): page_token=page_token, max_results=max_results, extra_params=extra_params, + page_size=page_size, ) def load_table_from_uri( diff --git a/setup.py b/setup.py index 6a6202ef9..963eb73ec 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-api-core[grpc] >= 1.23.0, < 2.0.0dev", + "google-api-core[grpc] >= 1.29.0, < 2.0.0dev", "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index 322373eba..71c9ff49a 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -5,7 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 -google-api-core==1.23.0 +google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 1346a1ef6..7a28ef248 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -471,221 +471,6 @@ def test_get_service_account_email_w_custom_retry(self): ], ) - def test_list_projects_defaults(self): - from google.cloud.bigquery.client import Project - - PROJECT_1 = "PROJECT_ONE" - PROJECT_2 = "PROJECT_TWO" - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "projects": [ - { - "kind": "bigquery#project", - "id": PROJECT_1, - "numericId": 1, - "projectReference": {"projectId": PROJECT_1}, - "friendlyName": "One", - }, - { - "kind": "bigquery#project", - "id": PROJECT_2, - "numericId": 2, - "projectReference": {"projectId": PROJECT_2}, - "friendlyName": "Two", - }, - ], - } - creds = _make_credentials() - client = self._make_one(PROJECT_1, creds) - conn = client._connection = make_connection(DATA) - iterator = client.list_projects() - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/projects"}, client, None) - projects = list(page) - token = iterator.next_page_token - - self.assertEqual(len(projects), len(DATA["projects"])) - for found, expected in zip(projects, DATA["projects"]): - self.assertIsInstance(found, Project) - self.assertEqual(found.project_id, expected["id"]) - self.assertEqual(found.numeric_id, expected["numericId"]) - self.assertEqual(found.friendly_name, expected["friendlyName"]) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params={}, timeout=None - ) - - def test_list_projects_w_timeout(self): - PROJECT_1 = "PROJECT_ONE" - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "projects": [], - } - creds = _make_credentials() - client = self._make_one(PROJECT_1, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_projects(timeout=7.5) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/projects"}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params={}, timeout=7.5 - ) - - def test_list_projects_explicit_response_missing_projects_key(self): - TOKEN = "TOKEN" - DATA = {} - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_projects(max_results=3, page_token=TOKEN) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/projects"}, client, None) - projects = list(page) - token = iterator.next_page_token - - self.assertEqual(len(projects), 0) - self.assertIsNone(token) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects", - query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, - ) - - def test_list_datasets_defaults(self): - from google.cloud.bigquery.dataset import DatasetListItem - - DATASET_1 = "dataset_one" - DATASET_2 = "dataset_two" - PATH = "projects/%s/datasets" % self.PROJECT - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "datasets": [ - { - "kind": "bigquery#dataset", - "id": "%s:%s" % (self.PROJECT, DATASET_1), - "datasetReference": { - "datasetId": DATASET_1, - "projectId": self.PROJECT, - }, - "friendlyName": None, - }, - { - "kind": "bigquery#dataset", - "id": "%s:%s" % (self.PROJECT, DATASET_2), - "datasetReference": { - "datasetId": DATASET_2, - "projectId": self.PROJECT, - }, - "friendlyName": "Two", - }, - ], - } - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_datasets() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - datasets = list(page) - token = iterator.next_page_token - - self.assertEqual(len(datasets), len(DATA["datasets"])) - for found, expected in zip(datasets, DATA["datasets"]): - self.assertIsInstance(found, DatasetListItem) - self.assertEqual(found.full_dataset_id, expected["id"]) - self.assertEqual(found.friendly_name, expected["friendlyName"]) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None - ) - - def test_list_datasets_w_project_and_timeout(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - list(client.list_datasets(project="other-project", timeout=7.5)) - - final_attributes.assert_called_once_with( - {"path": "/projects/other-project/datasets"}, client, None - ) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/other-project/datasets", - query_params={}, - timeout=7.5, - ) - - def test_list_datasets_explicit_response_missing_datasets_key(self): - PATH = "projects/%s/datasets" % self.PROJECT - TOKEN = "TOKEN" - FILTER = "FILTER" - DATA = {} - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_datasets( - include_all=True, filter=FILTER, max_results=3, page_token=TOKEN - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - datasets = list(page) - token = iterator.next_page_token - - self.assertEqual(len(datasets), 0) - self.assertIsNone(token) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={ - "all": True, - "filter": FILTER, - "maxResults": 3, - "pageToken": TOKEN, - }, - timeout=None, - ) - def test_dataset_with_specified_project(self): from google.cloud.bigquery.dataset import DatasetReference @@ -3239,285 +3024,6 @@ def test_cancel_job_w_timeout(self): timeout=7.5, ) - def test_list_jobs_defaults(self): - from google.cloud.bigquery.job import CopyJob - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import ExtractJob - from google.cloud.bigquery.job import LoadJob - from google.cloud.bigquery.job import QueryJob - from google.cloud.bigquery.job import WriteDisposition - - SOURCE_TABLE = "source_table" - DESTINATION_TABLE = "destination_table" - QUERY_DESTINATION_TABLE = "query_destination_table" - SOURCE_URI = "gs://test_bucket/src_object*" - DESTINATION_URI = "gs://test_bucket/dst_object*" - JOB_TYPES = { - "load_job": LoadJob, - "copy_job": CopyJob, - "extract_job": ExtractJob, - "query_job": QueryJob, - } - PATH = "projects/%s/jobs" % self.PROJECT - TOKEN = "TOKEN" - QUERY = "SELECT * from test_dataset:test_table" - ASYNC_QUERY_DATA = { - "id": "%s:%s" % (self.PROJECT, "query_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, - "state": "DONE", - "configuration": { - "query": { - "query": QUERY, - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": QUERY_DESTINATION_TABLE, - }, - "createDisposition": CreateDisposition.CREATE_IF_NEEDED, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - } - }, - } - EXTRACT_DATA = { - "id": "%s:%s" % (self.PROJECT, "extract_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "extract_job"}, - "state": "DONE", - "configuration": { - "extract": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - }, - "destinationUris": [DESTINATION_URI], - } - }, - } - COPY_DATA = { - "id": "%s:%s" % (self.PROJECT, "copy_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "copy_job"}, - "state": "DONE", - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": DESTINATION_TABLE, - }, - } - }, - } - LOAD_DATA = { - "id": "%s:%s" % (self.PROJECT, "load_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "load_job"}, - "state": "DONE", - "configuration": { - "load": { - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - }, - "sourceUris": [SOURCE_URI], - } - }, - } - DATA = { - "nextPageToken": TOKEN, - "jobs": [ASYNC_QUERY_DATA, EXTRACT_DATA, COPY_DATA, LOAD_DATA], - } - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_jobs() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - jobs = list(page) - token = iterator.next_page_token - - self.assertEqual(len(jobs), len(DATA["jobs"])) - for found, expected in zip(jobs, DATA["jobs"]): - name = expected["jobReference"]["jobId"] - self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.job_id, name) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={"projection": "full"}, - timeout=None, - ) - - def test_list_jobs_load_job_wo_sourceUris(self): - from google.cloud.bigquery.job import LoadJob - - SOURCE_TABLE = "source_table" - JOB_TYPES = {"load_job": LoadJob} - PATH = "projects/%s/jobs" % self.PROJECT - TOKEN = "TOKEN" - LOAD_DATA = { - "id": "%s:%s" % (self.PROJECT, "load_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "load_job"}, - "state": "DONE", - "configuration": { - "load": { - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - } - } - }, - } - DATA = {"nextPageToken": TOKEN, "jobs": [LOAD_DATA]} - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_jobs() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - jobs = list(page) - token = iterator.next_page_token - - self.assertEqual(len(jobs), len(DATA["jobs"])) - for found, expected in zip(jobs, DATA["jobs"]): - name = expected["jobReference"]["jobId"] - self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.job_id, name) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={"projection": "full"}, - timeout=None, - ) - - def test_list_jobs_explicit_missing(self): - PATH = "projects/%s/jobs" % self.PROJECT - DATA = {} - TOKEN = "TOKEN" - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_jobs( - max_results=1000, page_token=TOKEN, all_users=True, state_filter="done" - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - jobs = list(page) - token = iterator.next_page_token - - self.assertEqual(len(jobs), 0) - self.assertIsNone(token) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={ - "projection": "full", - "maxResults": 1000, - "pageToken": TOKEN, - "allUsers": True, - "stateFilter": "done", - }, - timeout=None, - ) - - def test_list_jobs_w_project(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - list(client.list_jobs(project="other-project")) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/other-project/jobs", - query_params={"projection": "full"}, - timeout=None, - ) - - def test_list_jobs_w_timeout(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - list(client.list_jobs(timeout=7.5)) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/{}/jobs".format(self.PROJECT), - query_params={"projection": "full"}, - timeout=7.5, - ) - - def test_list_jobs_w_time_filter(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - # One millisecond after the unix epoch. - start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000) - # One millisecond after the the 2038 31-bit signed int rollover - end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000) - end_time_millis = (((2 ** 31) - 1) * 1000) + 1 - - list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time)) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/%s/jobs" % self.PROJECT, - query_params={ - "projection": "full", - "minCreationTime": "1", - "maxCreationTime": str(end_time_millis), - }, - timeout=None, - ) - - def test_list_jobs_w_parent_job_filter(self): - from google.cloud.bigquery import job - - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}, {}) - - parent_job_args = ["parent-job-123", job._AsyncJob("parent-job-123", client)] - - for parent_job in parent_job_args: - list(client.list_jobs(parent_job=parent_job)) - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/%s/jobs" % self.PROJECT, - query_params={"projection": "full", "parentJobId": "parent-job-123"}, - timeout=None, - ) - conn.api_request.reset_mock() - def test_load_table_from_uri(self): from google.cloud.bigquery.job import LoadJob, LoadJobConfig diff --git a/tests/unit/test_list_datasets.py b/tests/unit/test_list_datasets.py new file mode 100644 index 000000000..7793a7ba6 --- /dev/null +++ b/tests/unit/test_list_datasets.py @@ -0,0 +1,124 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +import pytest + +from .helpers import make_connection + + +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) +def test_list_datasets_defaults(client, PROJECT, extra, query): + from google.cloud.bigquery.dataset import DatasetListItem + + DATASET_1 = "dataset_one" + DATASET_2 = "dataset_two" + PATH = "projects/%s/datasets" % PROJECT + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "datasets": [ + { + "kind": "bigquery#dataset", + "id": "%s:%s" % (PROJECT, DATASET_1), + "datasetReference": {"datasetId": DATASET_1, "projectId": PROJECT}, + "friendlyName": None, + }, + { + "kind": "bigquery#dataset", + "id": "%s:%s" % (PROJECT, DATASET_2), + "datasetReference": {"datasetId": DATASET_2, "projectId": PROJECT}, + "friendlyName": "Two", + }, + ], + } + conn = client._connection = make_connection(DATA) + + iterator = client.list_datasets(**extra) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + datasets = list(page) + token = iterator.next_page_token + + assert len(datasets) == len(DATA["datasets"]) + for found, expected in zip(datasets, DATA["datasets"]): + assert isinstance(found, DatasetListItem) + assert found.full_dataset_id == expected["id"] + assert found.friendly_name == expected["friendlyName"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params=query, timeout=None + ) + + +def test_list_datasets_w_project_and_timeout(client, PROJECT): + conn = client._connection = make_connection({}) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + list(client.list_datasets(project="other-project", timeout=7.5)) + + final_attributes.assert_called_once_with( + {"path": "/projects/other-project/datasets"}, client, None + ) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/other-project/datasets", + query_params={}, + timeout=7.5, + ) + + +def test_list_datasets_explicit_response_missing_datasets_key(client, PROJECT): + PATH = "projects/%s/datasets" % PROJECT + TOKEN = "TOKEN" + FILTER = "FILTER" + DATA = {} + conn = client._connection = make_connection(DATA) + + iterator = client.list_datasets( + include_all=True, filter=FILTER, max_results=3, page_token=TOKEN + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + datasets = list(page) + token = iterator.next_page_token + + assert len(datasets) == 0 + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={ + "all": True, + "filter": FILTER, + "maxResults": 3, + "pageToken": TOKEN, + }, + timeout=None, + ) diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py new file mode 100644 index 000000000..f348be724 --- /dev/null +++ b/tests/unit/test_list_jobs.py @@ -0,0 +1,291 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import mock +import pytest + +from .helpers import make_connection + + +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) +def test_list_jobs_defaults(client, PROJECT, DS_ID, extra, query): + from google.cloud.bigquery.job import CopyJob + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import ExtractJob + from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery.job import WriteDisposition + + SOURCE_TABLE = "source_table" + DESTINATION_TABLE = "destination_table" + QUERY_DESTINATION_TABLE = "query_destination_table" + SOURCE_URI = "gs://test_bucket/src_object*" + DESTINATION_URI = "gs://test_bucket/dst_object*" + JOB_TYPES = { + "load_job": LoadJob, + "copy_job": CopyJob, + "extract_job": ExtractJob, + "query_job": QueryJob, + } + PATH = "projects/%s/jobs" % PROJECT + TOKEN = "TOKEN" + QUERY = "SELECT * from test_dataset:test_table" + ASYNC_QUERY_DATA = { + "id": "%s:%s" % (PROJECT, "query_job"), + "jobReference": {"projectId": PROJECT, "jobId": "query_job"}, + "state": "DONE", + "configuration": { + "query": { + "query": QUERY, + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": QUERY_DESTINATION_TABLE, + }, + "createDisposition": CreateDisposition.CREATE_IF_NEEDED, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + } + }, + } + EXTRACT_DATA = { + "id": "%s:%s" % (PROJECT, "extract_job"), + "jobReference": {"projectId": PROJECT, "jobId": "extract_job"}, + "state": "DONE", + "configuration": { + "extract": { + "sourceTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": SOURCE_TABLE, + }, + "destinationUris": [DESTINATION_URI], + } + }, + } + COPY_DATA = { + "id": "%s:%s" % (PROJECT, "copy_job"), + "jobReference": {"projectId": PROJECT, "jobId": "copy_job"}, + "state": "DONE", + "configuration": { + "copy": { + "sourceTables": [ + {"projectId": PROJECT, "datasetId": DS_ID, "tableId": SOURCE_TABLE} + ], + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": DESTINATION_TABLE, + }, + } + }, + } + LOAD_DATA = { + "id": "%s:%s" % (PROJECT, "load_job"), + "jobReference": {"projectId": PROJECT, "jobId": "load_job"}, + "state": "DONE", + "configuration": { + "load": { + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": SOURCE_TABLE, + }, + "sourceUris": [SOURCE_URI], + } + }, + } + DATA = { + "nextPageToken": TOKEN, + "jobs": [ASYNC_QUERY_DATA, EXTRACT_DATA, COPY_DATA, LOAD_DATA], + } + conn = client._connection = make_connection(DATA) + + iterator = client.list_jobs(**extra) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + jobs = list(page) + token = iterator.next_page_token + + assert len(jobs) == len(DATA["jobs"]) + for found, expected in zip(jobs, DATA["jobs"]): + name = expected["jobReference"]["jobId"] + assert isinstance(found, JOB_TYPES[name]) + assert found.job_id == name + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params=dict({"projection": "full"}, **query), + timeout=None, + ) + + +def test_list_jobs_load_job_wo_sourceUris(client, PROJECT, DS_ID): + from google.cloud.bigquery.job import LoadJob + + SOURCE_TABLE = "source_table" + JOB_TYPES = {"load_job": LoadJob} + PATH = "projects/%s/jobs" % PROJECT + TOKEN = "TOKEN" + LOAD_DATA = { + "id": "%s:%s" % (PROJECT, "load_job"), + "jobReference": {"projectId": PROJECT, "jobId": "load_job"}, + "state": "DONE", + "configuration": { + "load": { + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": SOURCE_TABLE, + } + } + }, + } + DATA = {"nextPageToken": TOKEN, "jobs": [LOAD_DATA]} + conn = client._connection = make_connection(DATA) + + iterator = client.list_jobs() + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + jobs = list(page) + token = iterator.next_page_token + + assert len(jobs) == len(DATA["jobs"]) + for found, expected in zip(jobs, DATA["jobs"]): + name = expected["jobReference"]["jobId"] + assert isinstance(found, JOB_TYPES[name]) + assert found.job_id == name + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={"projection": "full"}, + timeout=None, + ) + + +def test_list_jobs_explicit_missing(client, PROJECT): + PATH = "projects/%s/jobs" % PROJECT + DATA = {} + TOKEN = "TOKEN" + conn = client._connection = make_connection(DATA) + + iterator = client.list_jobs( + max_results=1000, page_token=TOKEN, all_users=True, state_filter="done" + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + jobs = list(page) + token = iterator.next_page_token + + assert len(jobs) == 0 + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={ + "projection": "full", + "maxResults": 1000, + "pageToken": TOKEN, + "allUsers": True, + "stateFilter": "done", + }, + timeout=None, + ) + + +def test_list_jobs_w_project(client, PROJECT): + conn = client._connection = make_connection({}) + + list(client.list_jobs(project="other-project")) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/other-project/jobs", + query_params={"projection": "full"}, + timeout=None, + ) + + +def test_list_jobs_w_timeout(client, PROJECT): + conn = client._connection = make_connection({}) + + list(client.list_jobs(timeout=7.5)) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/{}/jobs".format(PROJECT), + query_params={"projection": "full"}, + timeout=7.5, + ) + + +def test_list_jobs_w_time_filter(client, PROJECT): + conn = client._connection = make_connection({}) + + # One millisecond after the unix epoch. + start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000) + # One millisecond after the the 2038 31-bit signed int rollover + end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000) + end_time_millis = (((2 ** 31) - 1) * 1000) + 1 + + list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time)) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/%s/jobs" % PROJECT, + query_params={ + "projection": "full", + "minCreationTime": "1", + "maxCreationTime": str(end_time_millis), + }, + timeout=None, + ) + + +def test_list_jobs_w_parent_job_filter(client, PROJECT): + from google.cloud.bigquery import job + + conn = client._connection = make_connection({}, {}) + + parent_job_args = ["parent-job-123", job._AsyncJob("parent-job-123", client)] + + for parent_job in parent_job_args: + list(client.list_jobs(parent_job=parent_job)) + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/%s/jobs" % PROJECT, + query_params={"projection": "full", "parentJobId": "parent-job-123"}, + timeout=None, + ) + conn.api_request.reset_mock() diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py index 56aa66126..4ede9a7dd 100644 --- a/tests/unit/test_list_models.py +++ b/tests/unit/test_list_models.py @@ -33,8 +33,13 @@ def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): ) +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) @dataset_polymorphic -def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_ID): +def test_list_models_defaults( + make_dataset, get_reference, client, PROJECT, DS_ID, extra, query, +): from google.cloud.bigquery.model import Model MODEL_1 = "model_one" @@ -64,7 +69,7 @@ def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_I conn = client._connection = make_connection(DATA) dataset = make_dataset(PROJECT, DS_ID) - iterator = client.list_models(dataset) + iterator = client.list_models(dataset, **extra) assert iterator.dataset == get_reference(dataset) page = next(iterator.pages) models = list(page) @@ -77,7 +82,7 @@ def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_I assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=None ) diff --git a/tests/unit/test_list_projects.py b/tests/unit/test_list_projects.py new file mode 100644 index 000000000..a88540dd5 --- /dev/null +++ b/tests/unit/test_list_projects.py @@ -0,0 +1,119 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +import pytest + +from .helpers import make_connection + + +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) +def test_list_projects_defaults(client, PROJECT, extra, query): + from google.cloud.bigquery.client import Project + + PROJECT_2 = "PROJECT_TWO" + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "projects": [ + { + "kind": "bigquery#project", + "id": PROJECT, + "numericId": 1, + "projectReference": {"projectId": PROJECT}, + "friendlyName": "One", + }, + { + "kind": "bigquery#project", + "id": PROJECT_2, + "numericId": 2, + "projectReference": {"projectId": PROJECT_2}, + "friendlyName": "Two", + }, + ], + } + conn = client._connection = make_connection(DATA) + iterator = client.list_projects(**extra) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) + projects = list(page) + token = iterator.next_page_token + + assert len(projects) == len(DATA["projects"]) + for found, expected in zip(projects, DATA["projects"]): + assert isinstance(found, Project) + assert found.project_id == expected["id"] + assert found.numeric_id == expected["numericId"] + assert found.friendly_name == expected["friendlyName"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/projects", query_params=query, timeout=None + ) + + +def test_list_projects_w_timeout(client): + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "projects": [], + } + conn = client._connection = make_connection(DATA) + + iterator = client.list_projects(timeout=7.5) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) + + conn.api_request.assert_called_once_with( + method="GET", path="/projects", query_params={}, timeout=7.5 + ) + + +def test_list_projects_explicit_response_missing_projects_key(client): + TOKEN = "TOKEN" + DATA = {} + conn = client._connection = make_connection(DATA) + + iterator = client.list_projects(max_results=3, page_token=TOKEN) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) + projects = list(page) + token = iterator.next_page_token + + assert len(projects) == 0 + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects", + query_params={"maxResults": 3, "pageToken": TOKEN}, + timeout=None, + ) diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py index 714ede0d4..069966542 100644 --- a/tests/unit/test_list_routines.py +++ b/tests/unit/test_list_routines.py @@ -34,8 +34,13 @@ def test_list_routines_empty_w_timeout(client): ) +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) @dataset_polymorphic -def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): +def test_list_routines_defaults( + make_dataset, get_reference, client, PROJECT, extra, query +): from google.cloud.bigquery.routine import Routine project_id = PROJECT @@ -67,7 +72,7 @@ def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): conn = client._connection = make_connection(resource) dataset = make_dataset(client.project, dataset_id) - iterator = client.list_routines(dataset) + iterator = client.list_routines(dataset, **extra) assert iterator.dataset == get_reference(dataset) page = next(iterator.pages) routines = list(page) @@ -80,7 +85,7 @@ def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): assert actual_token == token conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=None + method="GET", path=path, query_params=query, timeout=None ) diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py index 9acee9580..45d15bed3 100644 --- a/tests/unit/test_list_tables.py +++ b/tests/unit/test_list_tables.py @@ -157,3 +157,22 @@ def test_list_tables_explicit(client, PROJECT, DS_ID): def test_list_tables_wrong_type(client): with pytest.raises(TypeError): client.list_tables(42) + + +@dataset_polymorphic +def test_list_tables_page_size(make_dataset, get_reference, client, PROJECT, DS_ID): + path = "/projects/{}/datasets/{}/tables".format(PROJECT, DS_ID) + conn = client._connection = make_connection({}) + + dataset = make_dataset(PROJECT, DS_ID) + iterator = client.list_tables(dataset, timeout=7.5, page_size=42) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert tables == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params=dict(maxResults=42), timeout=7.5 + )