diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index d22a33318..31c6ba104 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -12,10 +12,35 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime +import random + from google.cloud import bigquery import pytest +RESOURCE_PREFIX = "python_bigquery_samples_snippets" + + +def resource_prefix() -> str: + timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S") + random_string = hex(random.randrange(1000000))[2:] + return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}" + + +@pytest.fixture(scope="session", autouse=True) +def cleanup_datasets(bigquery_client: bigquery.Client): + yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) + for dataset in bigquery_client.list_datasets(): + if ( + dataset.dataset_id.startswith(RESOURCE_PREFIX) + and dataset.created < yesterday + ): + bigquery_client.delete_dataset( + dataset, delete_contents=True, not_found_ok=True + ) + + @pytest.fixture(scope="session") def bigquery_client(): bigquery_client = bigquery.Client() @@ -25,3 +50,18 @@ def bigquery_client(): @pytest.fixture(scope="session") def project_id(bigquery_client): return bigquery_client.project + + +@pytest.fixture(scope="session") +def dataset_id(bigquery_client: bigquery.Client, project_id: str): + dataset_id = resource_prefix() + full_dataset_id = f"{project_id}.{dataset_id}" + dataset = bigquery.Dataset(full_dataset_id) + bigquery_client.create_dataset(dataset) + yield dataset_id + bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) + + +@pytest.fixture +def bigquery_client_patch(monkeypatch, bigquery_client): + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/samples/snippets/test_update_with_dml.py b/samples/snippets/test_update_with_dml.py new file mode 100644 index 000000000..3cca7a649 --- /dev/null +++ b/samples/snippets/test_update_with_dml.py @@ -0,0 +1,36 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + +from conftest import resource_prefix +import update_with_dml + + +@pytest.fixture +def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + table_id = f"{resource_prefix()}_update_with_dml" + yield table_id + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + +def test_update_with_dml(bigquery_client_patch, dataset_id, table_id): + override_values = { + "dataset_id": dataset_id, + "table_id": table_id, + } + num_rows = update_with_dml.run_sample(override_values=override_values) + assert num_rows > 0 diff --git a/samples/snippets/update_with_dml.py b/samples/snippets/update_with_dml.py new file mode 100644 index 000000000..7fd09dd80 --- /dev/null +++ b/samples/snippets/update_with_dml.py @@ -0,0 +1,82 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START bigquery_update_with_dml] +import pathlib + +from google.cloud import bigquery +from google.cloud.bigquery import enums + + +def load_from_newline_delimited_json( + client: bigquery.Client, + filepath: pathlib.Path, + project_id: str, + dataset_id: str, + table_id: str, +): + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + job_config = bigquery.LoadJobConfig() + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + job_config.schema = [ + bigquery.SchemaField("id", enums.SqlTypeNames.STRING), + bigquery.SchemaField("user_id", enums.SqlTypeNames.INTEGER), + bigquery.SchemaField("login_time", enums.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("logout_time", enums.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("ip_address", enums.SqlTypeNames.STRING), + ] + + with open(filepath, "rb") as json_file: + load_job = client.load_table_from_file( + json_file, full_table_id, job_config=job_config + ) + + # Wait for load job to finish. + load_job.result() + + +def update_with_dml( + client: bigquery.Client, project_id: str, dataset_id: str, table_id: str +): + query_text = f""" + UPDATE `{project_id}.{dataset_id}.{table_id}` + SET ip_address = REGEXP_REPLACE(ip_address, r"(\\.[0-9]+)$", ".0") + WHERE TRUE + """ + query_job = client.query(query_text) + + # Wait for query job to finish. + query_job.result() + + print(f"DML query modified {query_job.num_dml_affected_rows} rows.") + return query_job.num_dml_affected_rows + + +def run_sample(override_values={}): + client = bigquery.Client() + filepath = pathlib.Path(__file__).parent / "user_sessions_data.json" + project_id = client.project + dataset_id = "sample_db" + table_id = "UserSessions" + # [END bigquery_update_with_dml] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + dataset_id = override_values.get("dataset_id", dataset_id) + table_id = override_values.get("table_id", table_id) + # [START bigquery_update_with_dml] + load_from_newline_delimited_json(client, filepath, project_id, dataset_id, table_id) + return update_with_dml(client, project_id, dataset_id, table_id) + + +# [END bigquery_update_with_dml] diff --git a/samples/snippets/user_sessions_data.json b/samples/snippets/user_sessions_data.json new file mode 100644 index 000000000..7ea3715ad --- /dev/null +++ b/samples/snippets/user_sessions_data.json @@ -0,0 +1,10 @@ +{"id":"2ad525d6-c832-4c3d-b7fe-59d104885519","user_id":"38","login_time":"1.47766087E9","logout_time":"1.477661109E9","ip_address":"192.0.2.12"} +{"id":"53d65e20-6ea9-4650-98d9-a2111fbd1122","user_id":"88","login_time":"1.47707544E9","logout_time":"1.477075519E9","ip_address":"192.0.2.88"} +{"id":"5e6c3021-d5e7-4ccd-84b2-adfa9176d13d","user_id":"39","login_time":"1.474022869E9","logout_time":"1.474022961E9","ip_address":"203.0.113.52"} +{"id":"6196eefa-1498-4567-8ef0-498845b888d9","user_id":"52","login_time":"1.478604612E9","logout_time":"1.478604691E9","ip_address":"203.0.113.169"} +{"id":"70656dc5-7e0f-49cf-9e00-f06ed93c1f5b","user_id":"46","login_time":"1.474089924E9","logout_time":"1.474090227E9","ip_address":"192.0.2.10"} +{"id":"aafa5eef-ad49-49a7-9a0f-fbc7fd639bd3","user_id":"40","login_time":"1.478031161E9","logout_time":"1.478031388E9","ip_address":"203.0.113.18"} +{"id":"d2792fc2-24dd-4260-9456-3fbe6cdfdd90","user_id":"5","login_time":"1.481259081E9","logout_time":"1.481259247E9","ip_address":"192.0.2.140"} +{"id":"d835dc49-32f9-4790-b4eb-dddee62e0dcc","user_id":"62","login_time":"1.478892977E9","logout_time":"1.478893219E9","ip_address":"203.0.113.83"} +{"id":"f4a0d3c7-351f-471c-8e11-e093e7a6ce75","user_id":"89","login_time":"1.459031555E9","logout_time":"1.459031831E9","ip_address":"203.0.113.233"} +{"id":"f6e9f526-5b22-4679-9c3e-56a636e815bb","user_id":"97","login_time":"1.482426034E9","logout_time":"1.482426415E9","ip_address":"203.0.113.167"}