diff --git a/samples/to_dataframe/main_test.py b/samples/to_dataframe/main_test.py deleted file mode 100644 index bda6d601..00000000 --- a/samples/to_dataframe/main_test.py +++ /dev/null @@ -1,144 +0,0 @@ -# Copyright 2019 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - - -@pytest.fixture -def clients(): - # [START bigquerystorage_pandas_tutorial_all] - # [START bigquerystorage_pandas_tutorial_create_client] - import google.auth - from google.cloud import bigquery - from google.cloud import bigquery_storage - - # Explicitly create a credentials object. This allows you to use the same - # credentials for both the BigQuery and BigQuery Storage clients, avoiding - # unnecessary API calls to fetch duplicate authentication tokens. - credentials, your_project_id = google.auth.default( - scopes=["https://www.googleapis.com/auth/cloud-platform"] - ) - - # Make clients. - bqclient = bigquery.Client(credentials=credentials, project=your_project_id,) - bqstorageclient = bigquery_storage.BigQueryReadClient(credentials=credentials) - # [END bigquerystorage_pandas_tutorial_create_client] - # [END bigquerystorage_pandas_tutorial_all] - return bqclient, bqstorageclient - - -def test_table_to_dataframe(capsys, clients): - from google.cloud import bigquery - - bqclient, bqstorageclient = clients - - # [START bigquerystorage_pandas_tutorial_all] - # [START bigquerystorage_pandas_tutorial_read_table] - # Download a table. - table = bigquery.TableReference.from_string( - "bigquery-public-data.utility_us.country_code_iso" - ) - rows = bqclient.list_rows( - table, - selected_fields=[ - bigquery.SchemaField("country_name", "STRING"), - bigquery.SchemaField("fips_code", "STRING"), - ], - ) - dataframe = rows.to_dataframe(bqstorage_client=bqstorageclient) - print(dataframe.head()) - # [END bigquerystorage_pandas_tutorial_read_table] - # [END bigquerystorage_pandas_tutorial_all] - - out, _ = capsys.readouterr() - assert "country_name" in out - - -def test_query_to_dataframe(capsys, clients): - bqclient, bqstorageclient = clients - - # [START bigquerystorage_pandas_tutorial_all] - # [START bigquerystorage_pandas_tutorial_read_query_results] - # Download query results. - query_string = """ - SELECT - CONCAT( - 'https://stackoverflow.com/questions/', - CAST(id as STRING)) as url, - view_count - FROM `bigquery-public-data.stackoverflow.posts_questions` - WHERE tags like '%google-bigquery%' - ORDER BY view_count DESC - """ - - dataframe = ( - bqclient.query(query_string) - .result() - .to_dataframe(bqstorage_client=bqstorageclient) - ) - print(dataframe.head()) - # [END bigquerystorage_pandas_tutorial_read_query_results] - # [END bigquerystorage_pandas_tutorial_all] - - out, _ = capsys.readouterr() - assert "stackoverflow" in out - - -def test_session_to_dataframe(capsys, clients): - from google.cloud.bigquery_storage import types - - bqclient, bqstorageclient = clients - your_project_id = bqclient.project - - # [START bigquerystorage_pandas_tutorial_all] - # [START bigquerystorage_pandas_tutorial_read_session] - project_id = "bigquery-public-data" - dataset_id = "new_york_trees" - table_id = "tree_species" - table = f"projects/{project_id}/datasets/{dataset_id}/tables/{table_id}" - - # Select columns to read with read options. If no read options are - # specified, the whole table is read. - read_options = types.ReadSession.TableReadOptions( - selected_fields=["species_common_name", "fall_color"] - ) - - parent = "projects/{}".format(your_project_id) - - requested_session = types.ReadSession( - table=table, - # This API can also deliver data serialized in Apache Avro format. - # This example leverages Apache Arrow. - data_format=types.DataFormat.ARROW, - read_options=read_options, - ) - read_session = bqstorageclient.create_read_session( - parent=parent, read_session=requested_session, max_stream_count=1, - ) - - # This example reads from only a single stream. Read from multiple streams - # to fetch data faster. Note that the session may not contain any streams - # if there are no rows to read. - stream = read_session.streams[0] - reader = bqstorageclient.read_rows(stream.name) - - # Parse all Arrow blocks and create a dataframe. This call requires a - # session, because the session contains the schema for the row blocks. - dataframe = reader.to_dataframe(read_session) - print(dataframe.head()) - # [END bigquerystorage_pandas_tutorial_read_session] - # [END bigquerystorage_pandas_tutorial_all] - - out, _ = capsys.readouterr() - assert "species_common_name" in out