googleapis · gcf-merge-on-green · Nov 17, 2020 · Oct 18, 2020 · Nov 4, 2020 · Nov 4, 2020
@@ -85,6 +85,23 @@ Load table data from a file with the
    :start-after: [START bigquery_load_from_file]
    :end-before: [END bigquery_load_from_file]
 
+Creating a clustered table from a query result:
+
+.. literalinclude:: ../samples/client_query_destination_table_clustered.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_query_clustered_table]
+   :end-before: [END bigquery_query_clustered_table]
+
+Creating a clustered table when you load data with the
+:func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method:
+
+.. literalinclude:: ../samples/load_table_clustered.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_load_table_clustered]
+   :end-before: [END bigquery_load_table_clustered]
+
 Load a CSV file from Cloud Storage with the
 :func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method:
 

@@ -37,6 +37,7 @@
 from google.cloud.bigquery.dataset import Dataset
 from google.cloud.bigquery.dataset import DatasetReference
 from google.cloud.bigquery import enums
+from google.cloud.bigquery.enums import SqlTypeNames
 from google.cloud.bigquery.enums import StandardSqlDataTypes
 from google.cloud.bigquery.external_config import ExternalConfig
 from google.cloud.bigquery.external_config import BigtableOptions
@@ -137,8 +138,9 @@
     "Encoding",
     "QueryPriority",
     "SchemaUpdateOption",
-    "StandardSqlDataTypes",
     "SourceFormat",
+    "SqlTypeNames",
+    "StandardSqlDataTypes",
     "WriteDisposition",
     # EncryptionConfiguration
     "EncryptionConfiguration",

@@ -0,0 +1,43 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def client_query_destination_table_clustered(table_id):
+
+    # [START bigquery_query_clustered_table]
+    from google.cloud import bigquery
+
+    # Construct a BigQuery client object.
+    client = bigquery.Client()
+
+    # TODO(developer): Set table_id to the ID of the destination table.
+    # table_id = "your-project.your_dataset.your_table_name"
+
+    sql = "SELECT * FROM `bigquery-public-data.samples.shakespeare`"
+    cluster_fields = ["corpus"]
+
+    job_config = bigquery.QueryJobConfig(
+        clustering_fields=cluster_fields, destination=table_id
+    )
+
+    # Start the query, passing in the extra configuration.
+    query_job = client.query(sql, job_config=job_config)  # Make an API request.
+    query_job.result()  # Wait for the job to complete.
+
+    table = client.get_table(table_id)  # Make an API request.
+    if table.clustering_fields == cluster_fields:
+        print(
+            "The destination table is written using the cluster_fields configuration."
+        )
+    # [END bigquery_query_clustered_table]
@@ -0,0 +1,55 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def load_table_clustered(table_id):
+
+    # [START bigquery_load_table_clustered]
+    from google.cloud import bigquery
+
+    # Construct a BigQuery client object.
+    client = bigquery.Client()
+
+    # TODO(developer): Set table_id to the ID of the table to create.
+    # table_id = "your-project.your_dataset.your_table_name"
+
+    job_config = bigquery.LoadJobConfig(
+        skip_leading_rows=1,
+        source_format=bigquery.SourceFormat.CSV,
+        schema=[
+            bigquery.SchemaField("timestamp", bigquery.SqlTypeNames.TIMESTAMP),
+            bigquery.SchemaField("origin", bigquery.SqlTypeNames.STRING),
+            bigquery.SchemaField("destination", bigquery.SqlTypeNames.STRING),
+            bigquery.SchemaField("amount", bigquery.SqlTypeNames.NUMERIC),
+        ],
+        time_partitioning=bigquery.TimePartitioning(field="timestamp"),
+        clustering_fields=["origin", "destination"],
+    )
+
+    job = client.load_table_from_uri(
+        ["gs://cloud-samples-data/bigquery/sample-transactions/transactions.csv"],
+        table_id,
+        job_config=job_config,
+    )
+
+    job.result()  # Waits for the job to complete.
+
+    table = client.get_table(table_id)  # Make an API request.
+    print(
+        "Loaded {} rows and {} columns to {}".format(
+            table.num_rows, len(table.schema), table_id
+        )
+    )
+    # [END bigquery_load_table_clustered]
+    return table
@@ -0,0 +1,27 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .. import client_query_destination_table_clustered
+
+
+def test_client_query_destination_table_clustered(capsys, random_table_id):
+
+    client_query_destination_table_clustered.client_query_destination_table_clustered(
+        random_table_id
+    )
+    out, err = capsys.readouterr()
+    assert (
+        "The destination table is written using the cluster_fields configuration."
+        in out
+    )
@@ -0,0 +1,27 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .. import load_table_clustered
+
+
+def test_load_table_clustered(capsys, random_table_id, client):
+
+    table = load_table_clustered.load_table_clustered(random_table_id)
+
+    out, _ = capsys.readouterr()
+    assert "rows and 4 columns" in out
+
+    rows = list(client.list_rows(table))  # Make an API request.
+    assert len(rows) > 0
+    assert table.clustering_fields == ["origin", "destination"]