From 2ca69be83744dfa7b03735118bf5c74120a0475e Mon Sep 17 00:00:00 2001
From: Ryan Yuan <ryan.yuan@outlook.com>
Date: Sun, 18 Oct 2020 16:29:22 +1100
Subject: [PATCH 1/4] docs(samples): add more clustering code snippets

---
 docs/usage/tables.rst                         | 17 +++++++
 ...lient_query_destination_table_clustered.py | 41 +++++++++++++++
 samples/load_table_clustered.py               | 50 +++++++++++++++++++
 ...lient_query_destination_table_clustered.py | 27 ++++++++++
 samples/tests/test_load_table_clustered.py    | 40 +++++++++++++++
 5 files changed, 175 insertions(+)
 create mode 100644 samples/client_query_destination_table_clustered.py
 create mode 100644 samples/load_table_clustered.py
 create mode 100644 samples/tests/test_client_query_destination_table_clustered.py
 create mode 100644 samples/tests/test_load_table_clustered.py

diff --git a/docs/usage/tables.rst b/docs/usage/tables.rst
index 7afca05e2..976bb84e7 100644
--- a/docs/usage/tables.rst
+++ b/docs/usage/tables.rst
@@ -85,6 +85,23 @@ Load table data from a file with the
    :start-after: [START bigquery_load_from_file]
    :end-before: [END bigquery_load_from_file]
 
+Creating a clustered table from a query result:
+
+.. literalinclude:: ../samples/client_query_destination_table_clustered.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_query_destination_table_clustered]
+   :end-before: [END bigquery_query_destination_table_clustered]
+
+Creating a clustered table when you load data
+:func:`~google.cloud.bigquery.client.Client.load_table_from_file` method:
+
+.. literalinclude:: ../samples/load_table_clustered.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_load_table_clustered]
+   :end-before: [END bigquery_load_table_clustered]
+
 Load a CSV file from Cloud Storage with the
 :func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method:
 
diff --git a/samples/client_query_destination_table_clustered.py b/samples/client_query_destination_table_clustered.py
new file mode 100644
index 000000000..cee63d0db
--- /dev/null
+++ b/samples/client_query_destination_table_clustered.py
@@ -0,0 +1,41 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def client_query_destination_table_clustered(table_id):
+
+    # [START bigquery_query_destination_table_clustered]
+    from google.cloud import bigquery
+
+    # Construct a BigQuery client object.
+    client = bigquery.Client()
+
+    # TODO(developer): Set table_id to the ID of the destination table.
+    # table_id = "your-project.your_dataset.your_table_name"
+
+    sql = "SELECT * FROM `bigquery-public-data.samples.shakespeare`"
+    cluster_fields = ["corpus"]
+
+    job_config = bigquery.QueryJobConfig(
+        clustering_fields=cluster_fields, destination=table_id
+    )
+
+    # Start the query, passing in the extra configuration.
+    query_job = client.query(sql, job_config=job_config)  # Make an API request.
+    query_job.result()  # Wait for the job to complete.
+
+    table = client.get_table(table_id)  # Make an API request.
+    if table.clustering_fields == cluster_fields:
+        print("The destination table is written using the cluster_fields configuration.")
+    # [END bigquery_query_destination_table_clustered]
diff --git a/samples/load_table_clustered.py b/samples/load_table_clustered.py
new file mode 100644
index 000000000..7fc8d0455
--- /dev/null
+++ b/samples/load_table_clustered.py
@@ -0,0 +1,50 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def load_table_clustered(file_path, table_id):
+
+    # [START bigquery_load_table_clustered]
+    from google.cloud import bigquery
+
+    # Construct a BigQuery client object.
+    client = bigquery.Client()
+
+    # TODO(developer): Set table_id to the ID of the table to create.
+    # table_id = "your-project.your_dataset.your_table_name"
+
+    job_config = bigquery.LoadJobConfig(
+        schema=[
+            bigquery.SchemaField("full_name", "STRING"),
+            bigquery.SchemaField("age", "INTEGER"),
+        ],
+        clustering_fields=["age"],
+        skip_leading_rows=1,
+        # The source format defaults to CSV, so the line below is optional.
+        source_format=bigquery.SourceFormat.CSV,
+    )
+
+    with open(file_path, "rb") as source_file:
+        job = client.load_table_from_file(source_file, table_id, job_config=job_config)
+
+    job.result()  # Waits for the job to complete.
+
+    table = client.get_table(table_id)  # Make an API request.
+    print(
+        "Loaded {} rows and {} columns to {}".format(
+            table.num_rows, len(table.schema), table_id
+        )
+    )
+    # [END bigquery_load_table_clustered]
+    return table
diff --git a/samples/tests/test_client_query_destination_table_clustered.py b/samples/tests/test_client_query_destination_table_clustered.py
new file mode 100644
index 000000000..b4bdd588c
--- /dev/null
+++ b/samples/tests/test_client_query_destination_table_clustered.py
@@ -0,0 +1,27 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .. import client_query_destination_table_clustered
+
+
+def test_client_query_destination_table_clustered(capsys, random_table_id):
+
+    client_query_destination_table_clustered.client_query_destination_table_clustered(
+        random_table_id
+    )
+    out, err = capsys.readouterr()
+    assert (
+        "The destination table is written using the cluster_fields configuration."
+        in out
+    )
diff --git a/samples/tests/test_load_table_clustered.py b/samples/tests/test_load_table_clustered.py
new file mode 100644
index 000000000..94c2beeae
--- /dev/null
+++ b/samples/tests/test_load_table_clustered.py
@@ -0,0 +1,40 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from google.cloud import bigquery
+
+from .. import load_table_clustered
+
+
+def test_load_table_clustered(capsys, random_table_id, client):
+
+    samples_test_dir = os.path.abspath(os.path.dirname(__file__))
+    file_path = os.path.join(
+        samples_test_dir, "..", "..", "tests", "data", "people.csv"
+    )
+    table = load_table_clustered.load_table_clustered(file_path, random_table_id)
+
+    out, _ = capsys.readouterr()
+    assert "Loaded 2 rows and 2 columns" in out
+
+    rows = list(client.list_rows(table))  # Make an API request.
+    assert len(rows) == 2
+    # Order is not preserved, so compare individually
+    row1 = bigquery.Row(("Wylma Phlyntstone", 29), {"full_name": 0, "age": 1})
+    assert row1 in rows
+    row2 = bigquery.Row(("Phred Phlyntstone", 32), {"full_name": 0, "age": 1})
+    assert row2 in rows
+    assert table.clustering_fields == ["age"]

From 307fb42914caa0e800940ae4be49ca4256e8441a Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 4 Nov 2020 10:49:41 -0600
Subject: [PATCH 2/4] docs: use clustered source file from GCS

---
 docs/usage/tables.rst                         |  4 ++--
 google/cloud/bigquery/__init__.py             |  4 +++-
 ...lient_query_destination_table_clustered.py |  4 ++--
 samples/load_table_clustered.py               | 23 +++++++++++--------
 samples/tests/test_load_table_clustered.py    | 21 ++++-------------
 5 files changed, 25 insertions(+), 31 deletions(-)

diff --git a/docs/usage/tables.rst b/docs/usage/tables.rst
index 976bb84e7..291e947a2 100644
--- a/docs/usage/tables.rst
+++ b/docs/usage/tables.rst
@@ -90,8 +90,8 @@ Creating a clustered table from a query result:
 .. literalinclude:: ../samples/client_query_destination_table_clustered.py
    :language: python
    :dedent: 4
-   :start-after: [START bigquery_query_destination_table_clustered]
-   :end-before: [END bigquery_query_destination_table_clustered]
+   :start-after: [START bigquery_query_clustered_table]
+   :end-before: [END bigquery_query_clustered_table]
 
 Creating a clustered table when you load data
 :func:`~google.cloud.bigquery.client.Client.load_table_from_file` method:
diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py
index b8d1cc4d7..41f987228 100644
--- a/google/cloud/bigquery/__init__.py
+++ b/google/cloud/bigquery/__init__.py
@@ -37,6 +37,7 @@
 from google.cloud.bigquery.dataset import Dataset
 from google.cloud.bigquery.dataset import DatasetReference
 from google.cloud.bigquery import enums
+from google.cloud.bigquery.enums import SqlTypeNames
 from google.cloud.bigquery.enums import StandardSqlDataTypes
 from google.cloud.bigquery.external_config import ExternalConfig
 from google.cloud.bigquery.external_config import BigtableOptions
@@ -137,8 +138,9 @@
     "Encoding",
     "QueryPriority",
     "SchemaUpdateOption",
-    "StandardSqlDataTypes",
     "SourceFormat",
+    "SqlTypeNames",
+    "StandardSqlDataTypes",
     "WriteDisposition",
     # EncryptionConfiguration
     "EncryptionConfiguration",
diff --git a/samples/client_query_destination_table_clustered.py b/samples/client_query_destination_table_clustered.py
index cee63d0db..2fd8b0cad 100644
--- a/samples/client_query_destination_table_clustered.py
+++ b/samples/client_query_destination_table_clustered.py
@@ -15,7 +15,7 @@
 
 def client_query_destination_table_clustered(table_id):
 
-    # [START bigquery_query_destination_table_clustered]
+    # [START bigquery_query_clustered_table]
     from google.cloud import bigquery
 
     # Construct a BigQuery client object.
@@ -38,4 +38,4 @@ def client_query_destination_table_clustered(table_id):
     table = client.get_table(table_id)  # Make an API request.
     if table.clustering_fields == cluster_fields:
         print("The destination table is written using the cluster_fields configuration.")
-    # [END bigquery_query_destination_table_clustered]
+    # [END bigquery_query_clustered_table]
diff --git a/samples/load_table_clustered.py b/samples/load_table_clustered.py
index 7fc8d0455..20d412cb3 100644
--- a/samples/load_table_clustered.py
+++ b/samples/load_table_clustered.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 
-def load_table_clustered(file_path, table_id):
+def load_table_clustered(table_id):
 
     # [START bigquery_load_table_clustered]
     from google.cloud import bigquery
@@ -25,18 +25,23 @@ def load_table_clustered(file_path, table_id):
     # table_id = "your-project.your_dataset.your_table_name"
 
     job_config = bigquery.LoadJobConfig(
-        schema=[
-            bigquery.SchemaField("full_name", "STRING"),
-            bigquery.SchemaField("age", "INTEGER"),
-        ],
-        clustering_fields=["age"],
         skip_leading_rows=1,
-        # The source format defaults to CSV, so the line below is optional.
         source_format=bigquery.SourceFormat.CSV,
+        schema=[
+            bigquery.SchemaField("timestamp", bigquery.SqlTypeNames.TIMESTAMP),
+            bigquery.SchemaField("origin", bigquery.SqlTypeNames.STRING),
+            bigquery.SchemaField("destination", bigquery.SqlTypeNames.STRING),
+            bigquery.SchemaField("amount", bigquery.SqlTypeNames.NUMERIC),
+        ],
+        time_partitioning=bigquery.TimePartitioning(field="timestamp"),
+        clustering_fields=["origin", "destination"],
     )
 
-    with open(file_path, "rb") as source_file:
-        job = client.load_table_from_file(source_file, table_id, job_config=job_config)
+    job = client.load_table_from_uri(
+        ["gs://cloud-samples-data/bigquery/sample-transactions/transactions.csv"],
+        table_id,
+        job_config=job_config,
+    )
 
     job.result()  # Waits for the job to complete.
 
diff --git a/samples/tests/test_load_table_clustered.py b/samples/tests/test_load_table_clustered.py
index 94c2beeae..bafdc2051 100644
--- a/samples/tests/test_load_table_clustered.py
+++ b/samples/tests/test_load_table_clustered.py
@@ -12,29 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
-
-from google.cloud import bigquery
-
 from .. import load_table_clustered
 
 
 def test_load_table_clustered(capsys, random_table_id, client):
 
-    samples_test_dir = os.path.abspath(os.path.dirname(__file__))
-    file_path = os.path.join(
-        samples_test_dir, "..", "..", "tests", "data", "people.csv"
-    )
-    table = load_table_clustered.load_table_clustered(file_path, random_table_id)
+    table = load_table_clustered.load_table_clustered(random_table_id)
 
     out, _ = capsys.readouterr()
-    assert "Loaded 2 rows and 2 columns" in out
+    assert "rows and 4 columns" in out
 
     rows = list(client.list_rows(table))  # Make an API request.
-    assert len(rows) == 2
-    # Order is not preserved, so compare individually
-    row1 = bigquery.Row(("Wylma Phlyntstone", 29), {"full_name": 0, "age": 1})
-    assert row1 in rows
-    row2 = bigquery.Row(("Phred Phlyntstone", 32), {"full_name": 0, "age": 1})
-    assert row2 in rows
-    assert table.clustering_fields == ["age"]
+    assert len(rows) > 0
+    assert table.clustering_fields == ["origin", "destination"]

From 39d0c28e3b64e79b56109f8fad73d7f2bd624f30 Mon Sep 17 00:00:00 2001
From: Tim Swast <swast@google.com>
Date: Wed, 4 Nov 2020 11:41:27 -0600
Subject: [PATCH 3/4] blacken

---
 samples/client_query_destination_table_clustered.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/samples/client_query_destination_table_clustered.py b/samples/client_query_destination_table_clustered.py
index 2fd8b0cad..5a109ed10 100644
--- a/samples/client_query_destination_table_clustered.py
+++ b/samples/client_query_destination_table_clustered.py
@@ -37,5 +37,7 @@ def client_query_destination_table_clustered(table_id):
 
     table = client.get_table(table_id)  # Make an API request.
     if table.clustering_fields == cluster_fields:
-        print("The destination table is written using the cluster_fields configuration.")
+        print(
+            "The destination table is written using the cluster_fields configuration."
+        )
     # [END bigquery_query_clustered_table]

From 0293145f77974ddeed2da016942466af838dc741 Mon Sep 17 00:00:00 2001
From: Ryan Yuan <ryan.yuan@outlook.com>
Date: Thu, 5 Nov 2020 08:16:33 +1100
Subject: [PATCH 4/4] docs(samples): add more clustering code snippets

---
 docs/usage/tables.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/usage/tables.rst b/docs/usage/tables.rst
index 291e947a2..d924fe214 100644
--- a/docs/usage/tables.rst
+++ b/docs/usage/tables.rst
@@ -93,8 +93,8 @@ Creating a clustered table from a query result:
    :start-after: [START bigquery_query_clustered_table]
    :end-before: [END bigquery_query_clustered_table]
 
-Creating a clustered table when you load data
-:func:`~google.cloud.bigquery.client.Client.load_table_from_file` method:
+Creating a clustered table when you load data with the
+:func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method:
 
 .. literalinclude:: ../samples/load_table_clustered.py
    :language: python