docs: pandas DataFrame samples are more standalone (#224)

* docs: pandas DataFrame samples are more standalone * fix region tag * fix region tag * remove unused imports * blacken * remove session from call to rows/to_dataframe
googleapis · Jul 13, 2021 · 4026997 · 4026997
1 parent 7b086ba
commit 4026997
Show file tree

Hide file tree

Showing 10 changed files with 248 additions and 13 deletions.
diff --git a/samples/conftest.py b/samples/conftest.py
@@ -0,0 +1,22 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pytest
+
+
+@pytest.fixture(scope="session")
+def project_id():
+    return os.environ["GOOGLE_CLOUD_PROJECT"]
diff --git a/samples/quickstart/quickstart_test.py b/samples/quickstart/quickstart_test.py
@@ -13,9 +13,6 @@
 # limitations under the License.
 
 import datetime
-import os
-
-import pytest
 
 from . import quickstart
 
@@ -27,11 +24,6 @@ def now_millis():
     )
 
 
-@pytest.fixture()
-def project_id():
-    return os.environ["GOOGLE_CLOUD_PROJECT"]
-
-
 def test_quickstart_wo_snapshot(capsys, project_id):
     quickstart.main(project_id)
     out, _ = capsys.readouterr()

diff --git a/samples/to_dataframe/noxfile.py b/samples/to_dataframe/noxfile.py
@@ -226,7 +226,7 @@ def py(session: nox.sessions.Session) -> None:
 
 
 def _get_repo_root() -> Optional[str]:
-    """ Returns the root folder of the project. """
+    """Returns the root folder of the project."""
     # Get root of this repository. Assume we don't have directories nested deeper than 10 items.
     p = Path(os.getcwd())
     for i in range(10):

diff --git a/samples/to_dataframe/read_query_results.py b/samples/to_dataframe/read_query_results.py
@@ -0,0 +1,47 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def read_query_results():
+    # [START bigquerystorage_pandas_tutorial_read_query_results]
+    from google.cloud import bigquery
+
+    bqclient = bigquery.Client()
+
+    # Download query results.
+    query_string = """
+    SELECT
+    CONCAT(
+        'https://stackoverflow.com/questions/',
+        CAST(id as STRING)) as url,
+    view_count
+    FROM `bigquery-public-data.stackoverflow.posts_questions`
+    WHERE tags like '%google-bigquery%'
+    ORDER BY view_count DESC
+    """
+
+    dataframe = (
+        bqclient.query(query_string)
+        .result()
+        .to_dataframe(
+            # Optionally, explicitly request to use the BigQuery Storage API. As of
+            # google-cloud-bigquery version 1.26.0 and above, the BigQuery Storage
+            # API is used by default.
+            create_bqstorage_client=True,
+        )
+    )
+    print(dataframe.head())
+    # [END bigquerystorage_pandas_tutorial_read_query_results]
+
+    return dataframe
diff --git a/samples/to_dataframe/read_query_results_test.py b/samples/to_dataframe/read_query_results_test.py
@@ -0,0 +1,21 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import read_query_results
+
+
+def test_read_query_results(capsys):
+    read_query_results.read_query_results()
+    out, _ = capsys.readouterr()
+    assert "stackoverflow" in out
diff --git a/samples/to_dataframe/read_table_bigquery.py b/samples/to_dataframe/read_table_bigquery.py
@@ -0,0 +1,42 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def read_table():
+    # [START bigquerystorage_pandas_tutorial_read_table]
+    from google.cloud import bigquery
+
+    bqclient = bigquery.Client()
+
+    # Download a table.
+    table = bigquery.TableReference.from_string(
+        "bigquery-public-data.utility_us.country_code_iso"
+    )
+    rows = bqclient.list_rows(
+        table,
+        selected_fields=[
+            bigquery.SchemaField("country_name", "STRING"),
+            bigquery.SchemaField("fips_code", "STRING"),
+        ],
+    )
+    dataframe = rows.to_dataframe(
+        # Optionally, explicitly request to use the BigQuery Storage API. As of
+        # google-cloud-bigquery version 1.26.0 and above, the BigQuery Storage
+        # API is used by default.
+        create_bqstorage_client=True,
+    )
+    print(dataframe.head())
+    # [END bigquerystorage_pandas_tutorial_read_table]
+
+    return dataframe
diff --git a/samples/to_dataframe/read_table_bigquery_test.py b/samples/to_dataframe/read_table_bigquery_test.py
@@ -0,0 +1,21 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import read_table_bigquery
+
+
+def test_read_table(capsys):
+    read_table_bigquery.read_table()
+    out, _ = capsys.readouterr()
+    assert "country_name" in out
diff --git a/samples/to_dataframe/read_table_bqstorage.py b/samples/to_dataframe/read_table_bqstorage.py
@@ -0,0 +1,69 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def read_table(your_project_id):
+    original_your_project_id = your_project_id
+    # [START bigquerystorage_pandas_tutorial_read_session]
+    your_project_id = "project-for-read-session"
+    # [END bigquerystorage_pandas_tutorial_read_session]
+    your_project_id = original_your_project_id
+
+    # [START bigquerystorage_pandas_tutorial_read_session]
+    from google.cloud import bigquery_storage
+    from google.cloud.bigquery_storage import types
+    import pandas
+
+    bqstorageclient = bigquery_storage.BigQueryReadClient()
+
+    project_id = "bigquery-public-data"
+    dataset_id = "new_york_trees"
+    table_id = "tree_species"
+    table = f"projects/{project_id}/datasets/{dataset_id}/tables/{table_id}"
+
+    # Select columns to read with read options. If no read options are
+    # specified, the whole table is read.
+    read_options = types.ReadSession.TableReadOptions(
+        selected_fields=["species_common_name", "fall_color"]
+    )
+
+    parent = "projects/{}".format(your_project_id)
+
+    requested_session = types.ReadSession(
+        table=table,
+        # Avro is also supported, but the Arrow data format is optimized to
+        # work well with column-oriented data structures such as pandas
+        # DataFrames.
+        data_format=types.DataFormat.ARROW,
+        read_options=read_options,
+    )
+    read_session = bqstorageclient.create_read_session(
+        parent=parent, read_session=requested_session, max_stream_count=1,
+    )
+
+    # This example reads from only a single stream. Read from multiple streams
+    # to fetch data faster. Note that the session may not contain any streams
+    # if there are no rows to read.
+    stream = read_session.streams[0]
+    reader = bqstorageclient.read_rows(stream.name)
+
+    # Parse all Arrow blocks and create a dataframe.
+    frames = []
+    for message in reader.rows().pages:
+        frames.append(message.to_dataframe())
+    dataframe = pandas.concat(frames)
+    print(dataframe.head())
+    # [END bigquerystorage_pandas_tutorial_read_session]
+
+    return dataframe
diff --git a/samples/to_dataframe/read_table_bqstorage_test.py b/samples/to_dataframe/read_table_bqstorage_test.py
@@ -0,0 +1,21 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import read_table_bqstorage
+
+
+def test_read_table(capsys, project_id):
+    read_table_bqstorage.read_table(your_project_id=project_id)
+    out, _ = capsys.readouterr()
+    assert "species_common_name" in out
diff --git a/samples/to_dataframe/requirements.txt b/samples/to_dataframe/requirements.txt
@@ -2,7 +2,7 @@ google-auth==1.32.1
 google-cloud-bigquery-storage==2.6.0
 google-cloud-bigquery==2.20.0
 pyarrow==4.0.1
-ipython==7.10.2; python_version > '3.0'
-ipython==5.9.0; python_version < '3.0'
-pandas==0.25.3; python_version > '3.0'
-pandas==0.24.2; python_version < '3.0'
+ipython==7.24.0; python_version > '3.6'
+ipython==7.16.1; python_version <= '3.6'
+pandas==1.2.5; python_version > '3.6'
+pandas==1.1.5; python_version <= '3.6'