From cd519709228cda3bbcf2fd978d37ccd04ef27c82 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 15 Dec 2020 14:04:45 -0600 Subject: [PATCH] docs: add scheduled query samples (#83) * docs: add scheduled query samples * test: opt-out of type annotations for now * test: use environment variable for project ID * set quota project * consolidate config creation to conserve quota --- samples/snippets/conftest.py | 75 +++++++- samples/snippets/copy_dataset_test.py | 11 +- samples/snippets/manage_transfer_configs.py | 171 ++++++++++++++++++ .../snippets/manage_transfer_configs_test.py | 70 +++++++ samples/snippets/noxfile_config.py | 38 ++++ samples/snippets/quickstart_test.py | 6 +- samples/snippets/scheduled_query.py | 80 ++++++++ .../scheduled_query_test.py} | 18 +- samples/tests/__init__.py | 0 samples/tests/conftest.py | 74 -------- samples/tests/test_update_transfer_config.py | 30 --- 11 files changed, 448 insertions(+), 125 deletions(-) create mode 100644 samples/snippets/manage_transfer_configs.py create mode 100644 samples/snippets/manage_transfer_configs_test.py create mode 100644 samples/snippets/noxfile_config.py create mode 100644 samples/snippets/scheduled_query.py rename samples/{tests/test_create_scheduled_query.py => snippets/scheduled_query_test.py} (57%) delete mode 100644 samples/tests/__init__.py delete mode 100644 samples/tests/conftest.py delete mode 100644 samples/tests/test_update_transfer_config.py diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index 44a8fb49..998d5ea7 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime +import os +import uuid + +from google.api_core import client_options import google.api_core.exceptions import google.auth from google.cloud import bigquery @@ -19,27 +24,81 @@ import pytest +def temp_suffix(): + now = datetime.datetime.now() + return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" + + +@pytest.fixture(scope="session") +def bigquery_client(default_credentials): + credentials, project_id = default_credentials + return bigquery.Client(credentials=credentials, project=project_id) + + +@pytest.fixture(scope="session") +def dataset_id(bigquery_client, project_id): + dataset_id = f"bqdts_{temp_suffix()}" + bigquery_client.create_dataset(f"{project_id}.{dataset_id}") + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True) + + @pytest.fixture(scope="session") def default_credentials(): return google.auth.default(["https://www.googleapis.com/auth/cloud-platform"]) @pytest.fixture(scope="session") -def project_id(default_credentials): - _, project_id = default_credentials - return project_id +def project_id(): + return os.environ["GOOGLE_CLOUD_PROJECT"] @pytest.fixture(scope="session") -def bigquery_client(default_credentials): - credentials, project_id = default_credentials - return bigquery.Client(credentials=credentials, project=project_id) +def service_account_name(default_credentials): + credentials, _ = default_credentials + # Note: this property is not available when running with user account + # credentials, but only service account credentials are used in our test + # infrastructure. + return credentials.service_account_email @pytest.fixture(scope="session") -def transfer_client(default_credentials): +def transfer_client(default_credentials, project_id): credentials, _ = default_credentials - return bigquery_datatransfer.DataTransferServiceClient(credentials=credentials) + options = client_options.ClientOptions(quota_project_id=project_id) + + transfer_client = bigquery_datatransfer.DataTransferServiceClient( + credentials=credentials, client_options=options + ) + + # Ensure quota is always attributed to the correct project. + bigquery_datatransfer.DataTransferServiceClient = lambda: transfer_client + + return transfer_client + + +@pytest.fixture(scope="session") +def transfer_config_name(transfer_client, project_id, dataset_id, service_account_name): + from . import manage_transfer_configs, scheduled_query + + # Use the transfer_client fixture so we know quota is attributed to the + # correct project. + assert transfer_client is not None + + # To conserve limited BQ-DTS quota, this fixture creates only one transfer + # config for a whole session and is used to test the scheduled_query.py and + # the delete operation in manage_transfer_configs.py. + transfer_config = scheduled_query.create_scheduled_query( + { + "project_id": project_id, + "dataset_id": dataset_id, + "service_account_name": service_account_name, + } + ) + yield transfer_config.name + manage_transfer_configs.delete_config( + {"transfer_config_name": transfer_config.name} + ) @pytest.fixture diff --git a/samples/snippets/copy_dataset_test.py b/samples/snippets/copy_dataset_test.py index 00a5e560..349f05ce 100644 --- a/samples/snippets/copy_dataset_test.py +++ b/samples/snippets/copy_dataset_test.py @@ -42,8 +42,17 @@ def source_dataset_id(bigquery_client, project_id): def test_copy_dataset( - capsys, project_id, destination_dataset_id, source_dataset_id, to_delete_configs + capsys, + transfer_client, + project_id, + destination_dataset_id, + source_dataset_id, + to_delete_configs, ): + # Use the transfer_client fixture so we know quota is attributed to the + # correct project. + assert transfer_client is not None + transfer_config = copy_dataset.copy_dataset( { "destination_project_id": project_id, diff --git a/samples/snippets/manage_transfer_configs.py b/samples/snippets/manage_transfer_configs.py new file mode 100644 index 00000000..6b4abd78 --- /dev/null +++ b/samples/snippets/manage_transfer_configs.py @@ -0,0 +1,171 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def list_configs(override_values={}): + # [START bigquerydatatransfer_list_configs] + from google.cloud import bigquery_datatransfer + + transfer_client = bigquery_datatransfer.DataTransferServiceClient() + + project_id = "my-project" + # [END bigquerydatatransfer_list_configs] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + project_id = override_values.get("project_id", project_id) + # [START bigquerydatatransfer_list_configs] + parent = transfer_client.common_project_path(project_id) + + configs = transfer_client.list_transfer_configs(parent=parent) + print("Got the following configs:") + for config in configs: + print(f"\tID: {config.name}, Schedule: {config.schedule}") + # [END bigquerydatatransfer_list_configs] + + +def update_config(override_values={}): + # [START bigquerydatatransfer_update_config] + from google.cloud import bigquery_datatransfer + from google.protobuf import field_mask_pb2 + + transfer_client = bigquery_datatransfer.DataTransferServiceClient() + + transfer_config_name = "projects/1234/locations/us/transferConfigs/abcd" + new_display_name = "My Transfer Config" + # [END bigquerydatatransfer_update_config] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + new_display_name = override_values.get("new_display_name", new_display_name) + transfer_config_name = override_values.get( + "transfer_config_name", transfer_config_name + ) + # [START bigquerydatatransfer_update_config] + + transfer_config = bigquery_datatransfer.TransferConfig(name=transfer_config_name) + transfer_config.display_name = new_display_name + + transfer_config = transfer_client.update_transfer_config( + { + "transfer_config": transfer_config, + "update_mask": field_mask_pb2.FieldMask(paths=["display_name"]), + } + ) + + print(f"Updated config: '{transfer_config.name}'") + print(f"New display name: '{transfer_config.display_name}'") + # [END bigquerydatatransfer_update_config] + # Return the config name for testing purposes, so that it can be deleted. + return transfer_config + + +def update_credentials_with_service_account(override_values={}): + # [START bigquerydatatransfer_update_credentials] + from google.cloud import bigquery_datatransfer + from google.protobuf import field_mask_pb2 + + transfer_client = bigquery_datatransfer.DataTransferServiceClient() + + service_account_name = "abcdef-test-sa@abcdef-test.iam.gserviceaccount.com" + transfer_config_name = "projects/1234/locations/us/transferConfigs/abcd" + # [END bigquerydatatransfer_update_credentials] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + service_account_name = override_values.get( + "service_account_name", service_account_name + ) + transfer_config_name = override_values.get( + "transfer_config_name", transfer_config_name + ) + # [START bigquerydatatransfer_update_credentials] + + transfer_config = bigquery_datatransfer.TransferConfig(name=transfer_config_name) + + transfer_config = transfer_client.update_transfer_config( + { + "transfer_config": transfer_config, + "update_mask": field_mask_pb2.FieldMask(paths=["service_account_name"]), + "service_account_name": service_account_name, + } + ) + + print("Updated config: '{}'".format(transfer_config.name)) + # [END bigquerydatatransfer_update_credentials] + # Return the config name for testing purposes, so that it can be deleted. + return transfer_config + + +def schedule_backfill(override_values={}): + # [START bigquerydatatransfer_schedule_backfill] + import datetime + + from google.cloud import bigquery_datatransfer + + transfer_client = bigquery_datatransfer.DataTransferServiceClient() + + transfer_config_name = "projects/1234/locations/us/transferConfigs/abcd" + # [END bigquerydatatransfer_schedule_backfill] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + transfer_config_name = override_values.get( + "transfer_config_name", transfer_config_name + ) + # [START bigquerydatatransfer_schedule_backfill] + now = datetime.datetime.now(datetime.timezone.utc) + start_time = now - datetime.timedelta(days=5) + end_time = now - datetime.timedelta(days=2) + + # Some data sources, such as scheduled_query only support daily run. + # Truncate start_time and end_time to midnight time (00:00AM UTC). + start_time = datetime.datetime( + start_time.year, start_time.month, start_time.day, tzinfo=datetime.timezone.utc + ) + end_time = datetime.datetime( + end_time.year, end_time.month, end_time.day, tzinfo=datetime.timezone.utc + ) + + response = transfer_client.schedule_transfer_runs( + parent=transfer_config_name, + start_time=start_time, + end_time=end_time, + ) + + print("Started transfer runs:") + for run in response.runs: + print(f"backfill: {run.run_time} run: {run.name}") + # [END bigquerydatatransfer_schedule_backfill] + return response.runs + + +def delete_config(override_values={}): + # [START bigquerydatatransfer_delete_transfer] + import google.api_core.exceptions + from google.cloud import bigquery_datatransfer + + transfer_client = bigquery_datatransfer.DataTransferServiceClient() + + transfer_config_name = "projects/1234/locations/us/transferConfigs/abcd" + # [END bigquerydatatransfer_delete_transfer] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + transfer_config_name = override_values.get( + "transfer_config_name", transfer_config_name + ) + # [START bigquerydatatransfer_delete_transfer] + try: + transfer_client.delete_transfer_config(name=transfer_config_name) + except google.api_core.exceptions.NotFound: + print("Transfer config not found.") + else: + print(f"Deleted transfer config: {transfer_config_name}") + # [END bigquerydatatransfer_delete_transfer] diff --git a/samples/snippets/manage_transfer_configs_test.py b/samples/snippets/manage_transfer_configs_test.py new file mode 100644 index 00000000..de31c713 --- /dev/null +++ b/samples/snippets/manage_transfer_configs_test.py @@ -0,0 +1,70 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import manage_transfer_configs + + +def test_list_configs(capsys, project_id, transfer_config_name): + manage_transfer_configs.list_configs({"project_id": project_id}) + out, _ = capsys.readouterr() + assert "Got the following configs:" in out + assert transfer_config_name in out + + +def test_update_config(capsys, transfer_config_name): + manage_transfer_configs.update_config( + { + "new_display_name": "name from test_update_config", + "transfer_config_name": transfer_config_name, + } + ) + out, _ = capsys.readouterr() + assert "Updated config:" in out + assert transfer_config_name in out + assert "name from test_update_config" in out + + +def test_update_credentials_with_service_account( + capsys, project_id, service_account_name, transfer_config_name +): + manage_transfer_configs.update_credentials_with_service_account( + { + "project_id": project_id, + "service_account_name": service_account_name, + "transfer_config_name": transfer_config_name, + } + ) + out, _ = capsys.readouterr() + assert "Updated config:" in out + assert transfer_config_name in out + + +def test_schedule_backfill(capsys, transfer_config_name): + runs = manage_transfer_configs.schedule_backfill( + { + "transfer_config_name": transfer_config_name, + } + ) + out, _ = capsys.readouterr() + assert "Started transfer runs:" in out + # Run IDs should include the transfer name in their path. + assert transfer_config_name in out + # Check that there are runs for 5, 4, 3, and 2 days ago. + assert len(runs) == 4 + + +def test_delete_config(capsys, transfer_config_name): + # transfer_config_name fixture in conftest.py calls the delete config + # sample. To conserve limited BQ-DTS quota we only make basic checks. + assert len(transfer_config_name) != 0 diff --git a/samples/snippets/noxfile_config.py b/samples/snippets/noxfile_config.py new file mode 100644 index 00000000..57b25e58 --- /dev/null +++ b/samples/snippets/noxfile_config.py @@ -0,0 +1,38 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/samples/snippets/quickstart_test.py b/samples/snippets/quickstart_test.py index 570d5181..46398b0f 100644 --- a/samples/snippets/quickstart_test.py +++ b/samples/snippets/quickstart_test.py @@ -15,7 +15,11 @@ from . import quickstart -def test_quickstart(capsys, project_id): +def test_quickstart(capsys, transfer_client, project_id): + # Use the transfer_client fixture so we know quota is attributed to the + # correct project. + assert transfer_client is not None + quickstart.run_quickstart(override_values={"project_id": project_id}) out, _ = capsys.readouterr() assert "Supported Data Sources:" in out diff --git a/samples/snippets/scheduled_query.py b/samples/snippets/scheduled_query.py new file mode 100644 index 00000000..ab85c515 --- /dev/null +++ b/samples/snippets/scheduled_query.py @@ -0,0 +1,80 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_scheduled_query(override_values={}): + # [START bigquerydatatransfer_create_scheduled_query] + # [START bigquerydatatransfer_create_scheduled_query_with_service_account] + from google.cloud import bigquery_datatransfer + + transfer_client = bigquery_datatransfer.DataTransferServiceClient() + + # The project where the query job runs is the same as the project + # containing the destination dataset. + project_id = "your-project-id" + dataset_id = "your_dataset_id" + + # This service account will be used to execute the scheduled queries. Omit + # this request parameter to run the query as the user with the credentials + # associated with this client. + service_account_name = "abcdef-test-sa@abcdef-test.iam.gserviceaccount.com" + # [END bigquerydatatransfer_create_scheduled_query_with_service_account] + # [END bigquerydatatransfer_create_scheduled_query] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + project_id = override_values.get("project_id", project_id) + dataset_id = override_values.get("dataset_id", dataset_id) + service_account_name = override_values.get( + "service_account_name", service_account_name + ) + # [START bigquerydatatransfer_create_scheduled_query] + # [START bigquerydatatransfer_create_scheduled_query_with_service_account] + + # Use standard SQL syntax for the query. + query_string = """ + SELECT + CURRENT_TIMESTAMP() as current_time, + @run_time as intended_run_time, + @run_date as intended_run_date, + 17 as some_integer + """ + + parent = transfer_client.common_project_path(project_id) + + transfer_config = bigquery_datatransfer.TransferConfig( + destination_dataset_id=dataset_id, + display_name="Your Scheduled Query Name", + data_source_id="scheduled_query", + params={ + "query": query_string, + "destination_table_name_template": "your_table_{run_date}", + "write_disposition": "WRITE_TRUNCATE", + "partitioning_field": "", + }, + schedule="every 24 hours", + ) + + transfer_config = transfer_client.create_transfer_config( + bigquery_datatransfer.CreateTransferConfigRequest( + parent=parent, + transfer_config=transfer_config, + service_account_name=service_account_name, + ) + ) + + print("Created scheduled query '{}'".format(transfer_config.name)) + # [END bigquerydatatransfer_create_scheduled_query_with_service_account] + # [END bigquerydatatransfer_create_scheduled_query] + # Return the config name for testing purposes, so that it can be deleted. + return transfer_config diff --git a/samples/tests/test_create_scheduled_query.py b/samples/snippets/scheduled_query_test.py similarity index 57% rename from samples/tests/test_create_scheduled_query.py rename to samples/snippets/scheduled_query_test.py index 9d885e3f..ef841824 100644 --- a/samples/tests/test_create_scheduled_query.py +++ b/samples/snippets/scheduled_query_test.py @@ -1,12 +1,10 @@ -# -*- coding: utf-8 -*- -# # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# https://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, @@ -14,13 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .. import create_scheduled_query +def test_create_scheduled_query(transfer_config_name): + from . import scheduled_query -def test_sample(project_id, dataset_id, capsys, to_delete): - config_name = create_scheduled_query.sample_create_transfer_config( - project_id, dataset_id - ) - to_delete.append(config_name) - out, err = capsys.readouterr() - assert config_name in out + # transfer_config_name fixture in conftest.py calls the scheduled query + # sample. To conserve limited BQ-DTS quota we only make basic checks. + assert hasattr(scheduled_query, "create_scheduled_query") + assert len(transfer_config_name) != 0 diff --git a/samples/tests/__init__.py b/samples/tests/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/samples/tests/conftest.py b/samples/tests/conftest.py deleted file mode 100644 index 90589e8b..00000000 --- a/samples/tests/conftest.py +++ /dev/null @@ -1,74 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import datetime -import os -import uuid - -import google.auth -import google.cloud.bigquery -import pytest - - -@pytest.fixture -def project_id(): - return os.environ["GOOGLE_CLOUD_PROJECT"] - - -@pytest.fixture(scope="module") -def credentials(): - # If using a service account, the BQ DTS robot associated with your project - # requires the roles/iam.serviceAccountShortTermTokenMinter permission to - # act on behalf of the account. - creds, _ = google.auth.default(["https://www.googleapis.com/auth/cloud-platform"]) - return creds - - -@pytest.fixture(scope="module") -def bqdts_client(credentials): - from google.cloud import bigquery_datatransfer - - return bigquery_datatransfer.DataTransferServiceClient(credentials=credentials) - - -@pytest.fixture(scope="module") -def bigquery_client(credentials): - return google.cloud.bigquery.Client(credentials=credentials) - - -@pytest.fixture(scope="module") -def dataset_id(bigquery_client): - # Ensure the test account has owner permissions on the dataset by creating - # one from scratch. - now = datetime.datetime.now() - temp_ds_id = "bqdts_{}_{}".format( - now.strftime("%Y%m%d%H%M%S"), uuid.uuid4().hex[:8] - ) - bigquery_client.create_dataset(temp_ds_id) - yield temp_ds_id - bigquery_client.delete_dataset(temp_ds_id) - - -@pytest.fixture -def to_delete(bqdts_client): - doomed = [] - yield doomed - - for resource_name in doomed: - try: - bqdts_client.delete_transfer_config(name=resource_name) - except Exception: - pass diff --git a/samples/tests/test_update_transfer_config.py b/samples/tests/test_update_transfer_config.py deleted file mode 100644 index 827d8023..00000000 --- a/samples/tests/test_update_transfer_config.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from .. import create_scheduled_query, update_transfer_config - - -def test_update_config_sample(project_id, dataset_id, capsys, to_delete): - config_name = create_scheduled_query.sample_create_transfer_config( - project_id, dataset_id - ) - - display_name = "Transfer config updated" - config = update_transfer_config.sample_update_transfer_config(config_name, display_name) - to_delete.append(config.name) - out, err = capsys.readouterr() - assert config.name in out - assert config.display_name == display_name