From 809e4a27b94ba30c10e0c9a7e89576a9de9fda2b Mon Sep 17 00:00:00 2001 From: Ryan Yuan Date: Wed, 18 Nov 2020 03:04:07 +1100 Subject: [PATCH] docs(samples): add more clustering code snippets (#330) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add sample code for creating a clustered table from a query result. File: samples/client_query_destination_table_clustered.py Section: [https://cloud.google.com/bigquery/docs/creating-clustered-tables#creating_a_clustered_table_from_a_query_result](https://cloud.google.com/bigquery/docs/creating-clustered-tables#creating_a_clustered_table_from_a_query_result) Add sample code for creating a clustered table when you load data. File: samples/load_table_clustered.py Section: [https://cloud.google.com/bigquery/docs/creating-clustered-tables#creating_a_clustered_table_when_you_load_data](https://cloud.google.com/bigquery/docs/creating-clustered-tables#creating_a_clustered_table_when_you_load_data) Fixes #329 🦕 --- docs/usage/tables.rst | 17 ++++++ google/cloud/bigquery/__init__.py | 4 +- ...lient_query_destination_table_clustered.py | 43 +++++++++++++++ samples/load_table_clustered.py | 55 +++++++++++++++++++ ...lient_query_destination_table_clustered.py | 27 +++++++++ samples/tests/test_load_table_clustered.py | 27 +++++++++ 6 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 samples/client_query_destination_table_clustered.py create mode 100644 samples/load_table_clustered.py create mode 100644 samples/tests/test_client_query_destination_table_clustered.py create mode 100644 samples/tests/test_load_table_clustered.py diff --git a/docs/usage/tables.rst b/docs/usage/tables.rst index 7afca05e2..d924fe214 100644 --- a/docs/usage/tables.rst +++ b/docs/usage/tables.rst @@ -85,6 +85,23 @@ Load table data from a file with the :start-after: [START bigquery_load_from_file] :end-before: [END bigquery_load_from_file] +Creating a clustered table from a query result: + +.. literalinclude:: ../samples/client_query_destination_table_clustered.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_clustered_table] + :end-before: [END bigquery_query_clustered_table] + +Creating a clustered table when you load data with the +:func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method: + +.. literalinclude:: ../samples/load_table_clustered.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_clustered] + :end-before: [END bigquery_load_table_clustered] + Load a CSV file from Cloud Storage with the :func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method: diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index b8d1cc4d7..41f987228 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -37,6 +37,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums +from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions @@ -137,8 +138,9 @@ "Encoding", "QueryPriority", "SchemaUpdateOption", - "StandardSqlDataTypes", "SourceFormat", + "SqlTypeNames", + "StandardSqlDataTypes", "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", diff --git a/samples/client_query_destination_table_clustered.py b/samples/client_query_destination_table_clustered.py new file mode 100644 index 000000000..5a109ed10 --- /dev/null +++ b/samples/client_query_destination_table_clustered.py @@ -0,0 +1,43 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_destination_table_clustered(table_id): + + # [START bigquery_query_clustered_table] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the destination table. + # table_id = "your-project.your_dataset.your_table_name" + + sql = "SELECT * FROM `bigquery-public-data.samples.shakespeare`" + cluster_fields = ["corpus"] + + job_config = bigquery.QueryJobConfig( + clustering_fields=cluster_fields, destination=table_id + ) + + # Start the query, passing in the extra configuration. + query_job = client.query(sql, job_config=job_config) # Make an API request. + query_job.result() # Wait for the job to complete. + + table = client.get_table(table_id) # Make an API request. + if table.clustering_fields == cluster_fields: + print( + "The destination table is written using the cluster_fields configuration." + ) + # [END bigquery_query_clustered_table] diff --git a/samples/load_table_clustered.py b/samples/load_table_clustered.py new file mode 100644 index 000000000..20d412cb3 --- /dev/null +++ b/samples/load_table_clustered.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_clustered(table_id): + + # [START bigquery_load_table_clustered] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + skip_leading_rows=1, + source_format=bigquery.SourceFormat.CSV, + schema=[ + bigquery.SchemaField("timestamp", bigquery.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("origin", bigquery.SqlTypeNames.STRING), + bigquery.SchemaField("destination", bigquery.SqlTypeNames.STRING), + bigquery.SchemaField("amount", bigquery.SqlTypeNames.NUMERIC), + ], + time_partitioning=bigquery.TimePartitioning(field="timestamp"), + clustering_fields=["origin", "destination"], + ) + + job = client.load_table_from_uri( + ["gs://cloud-samples-data/bigquery/sample-transactions/transactions.csv"], + table_id, + job_config=job_config, + ) + + job.result() # Waits for the job to complete. + + table = client.get_table(table_id) # Make an API request. + print( + "Loaded {} rows and {} columns to {}".format( + table.num_rows, len(table.schema), table_id + ) + ) + # [END bigquery_load_table_clustered] + return table diff --git a/samples/tests/test_client_query_destination_table_clustered.py b/samples/tests/test_client_query_destination_table_clustered.py new file mode 100644 index 000000000..b4bdd588c --- /dev/null +++ b/samples/tests/test_client_query_destination_table_clustered.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_query_destination_table_clustered + + +def test_client_query_destination_table_clustered(capsys, random_table_id): + + client_query_destination_table_clustered.client_query_destination_table_clustered( + random_table_id + ) + out, err = capsys.readouterr() + assert ( + "The destination table is written using the cluster_fields configuration." + in out + ) diff --git a/samples/tests/test_load_table_clustered.py b/samples/tests/test_load_table_clustered.py new file mode 100644 index 000000000..bafdc2051 --- /dev/null +++ b/samples/tests/test_load_table_clustered.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_clustered + + +def test_load_table_clustered(capsys, random_table_id, client): + + table = load_table_clustered.load_table_clustered(random_table_id) + + out, _ = capsys.readouterr() + assert "rows and 4 columns" in out + + rows = list(client.list_rows(table)) # Make an API request. + assert len(rows) > 0 + assert table.clustering_fields == ["origin", "destination"]