Skip to content

Commit

Permalink
feat: use BigQuery Storage client by default
Browse files Browse the repository at this point in the history
  • Loading branch information
plamut committed Mar 9, 2020
1 parent a17be5f commit 25572ab
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 51 deletions.
28 changes: 13 additions & 15 deletions google/cloud/bigquery/job.py
Expand Up @@ -3220,7 +3220,7 @@ def to_arrow(
self,
progress_bar_type=None,
bqstorage_client=None,
create_bqstorage_client=False,
create_bqstorage_client=True,
):
"""[Beta] Create a class:`pyarrow.Table` by loading all pages of a
table or query.
Expand Down Expand Up @@ -3255,11 +3255,10 @@ def to_arrow(
Reading from a specific partition or snapshot is not
currently supported by this method.
create_bqstorage_client (bool):
**Beta Feature** Optional. If ``True``, create a BigQuery
Storage API client using the default API settings. The
BigQuery Storage API is a faster way to fetch rows from
BigQuery. See the ``bqstorage_client`` parameter for more
information.
Optional. If ``True`` (default), create a BigQuery Storage API
client using the default API settings. The BigQuery Storage API
is a faster way to fetch rows from BigQuery. See the
``bqstorage_client`` parameter for more information.
This argument does nothing if ``bqstorage_client`` is supplied.
Expand Down Expand Up @@ -3290,15 +3289,15 @@ def to_dataframe(
bqstorage_client=None,
dtypes=None,
progress_bar_type=None,
create_bqstorage_client=False,
create_bqstorage_client=True,
):
"""Return a pandas DataFrame from a QueryJob
Args:
bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient):
**Alpha Feature** Optional. A BigQuery Storage API client. If
supplied, use the faster BigQuery Storage API to fetch rows
from BigQuery. This API is a billable API.
Optional. A BigQuery Storage API client. If supplied, use the
faster BigQuery Storage API to fetch rows from BigQuery. This
API is a billable API.
This method requires the ``fastavro`` and
``google-cloud-bigquery-storage`` libraries.
Expand All @@ -3325,11 +3324,10 @@ def to_dataframe(
..versionadded:: 1.11.0
create_bqstorage_client (bool):
**Beta Feature** Optional. If ``True``, create a BigQuery
Storage API client using the default API settings. The
BigQuery Storage API is a faster way to fetch rows from
BigQuery. See the ``bqstorage_client`` parameter for more
information.
Optional. If ``True`` (default), create a BigQuery Storage API
client using the default API settings. The BigQuery Storage API
is a faster way to fetch rows from BigQuery. See the
``bqstorage_client`` parameter for more information.
This argument does nothing if ``bqstorage_client`` is supplied.
Expand Down
37 changes: 17 additions & 20 deletions google/cloud/bigquery/table.py
Expand Up @@ -1467,7 +1467,7 @@ def to_arrow(
self,
progress_bar_type=None,
bqstorage_client=None,
create_bqstorage_client=False,
create_bqstorage_client=True,
):
"""[Beta] Create a class:`pyarrow.Table` by loading all pages of a
table or query.
Expand All @@ -1492,21 +1492,20 @@ def to_arrow(
Use the :func:`tqdm.tqdm_gui` function to display a
progress bar as a graphical dialog box.
bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient):
**Beta Feature** Optional. A BigQuery Storage API client. If
supplied, use the faster BigQuery Storage API to fetch rows
from BigQuery. This API is a billable API.
Optional. A BigQuery Storage API client. If supplied, use the
faster BigQuery Storage API to fetch rows from BigQuery. This
API is a billable API.
This method requires the ``pyarrow`` and
``google-cloud-bigquery-storage`` libraries.
Reading from a specific partition or snapshot is not
currently supported by this method.
create_bqstorage_client (bool):
**Beta Feature** Optional. If ``True``, create a BigQuery
Storage API client using the default API settings. The
BigQuery Storage API is a faster way to fetch rows from
BigQuery. See the ``bqstorage_client`` parameter for more
information.
Optional. If ``True`` (default), create a BigQuery Storage API
client using the default API settings. The BigQuery Storage API
is a faster way to fetch rows from BigQuery. See the
``bqstorage_client`` parameter for more information.
This argument does nothing if ``bqstorage_client`` is supplied.
Expand Down Expand Up @@ -1640,15 +1639,14 @@ def to_dataframe(
bqstorage_client=None,
dtypes=None,
progress_bar_type=None,
create_bqstorage_client=False,
create_bqstorage_client=True,
):
"""Create a pandas DataFrame by loading all pages of a query.
Args:
bqstorage_client (google.cloud.bigquery_storage_v1beta1.BigQueryStorageClient):
**Beta Feature** Optional. A BigQuery Storage API client. If
supplied, use the faster BigQuery Storage API to fetch rows
from BigQuery.
Optional. A BigQuery Storage API client. If supplied, use the
faster BigQuery Storage API to fetch rows from BigQuery.
This method requires the ``pyarrow`` and
``google-cloud-bigquery-storage`` libraries.
Expand Down Expand Up @@ -1686,11 +1684,10 @@ def to_dataframe(
..versionadded:: 1.11.0
create_bqstorage_client (bool):
**Beta Feature** Optional. If ``True``, create a BigQuery
Storage API client using the default API settings. The
BigQuery Storage API is a faster way to fetch rows from
BigQuery. See the ``bqstorage_client`` parameter for more
information.
Optional. If ``True`` (default), create a BigQuery Storage API
client using the default API settings. The BigQuery Storage API
is a faster way to fetch rows from BigQuery. See the
``bqstorage_client`` parameter for more information.
This argument does nothing if ``bqstorage_client`` is supplied.
Expand Down Expand Up @@ -1782,7 +1779,7 @@ def to_arrow(
self,
progress_bar_type=None,
bqstorage_client=None,
create_bqstorage_client=False,
create_bqstorage_client=True,
):
"""[Beta] Create an empty class:`pyarrow.Table`.
Expand All @@ -1803,7 +1800,7 @@ def to_dataframe(
bqstorage_client=None,
dtypes=None,
progress_bar_type=None,
create_bqstorage_client=False,
create_bqstorage_client=True,
):
"""Create an empty dataframe.
Expand Down
44 changes: 28 additions & 16 deletions tests/unit/test_table.py
Expand Up @@ -1492,7 +1492,7 @@ def test_to_dataframe_error_if_pandas_is_none(self):
@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe(self):
row_iterator = self._make_one()
df = row_iterator.to_dataframe()
df = row_iterator.to_dataframe(create_bqstorage_client=False)
self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 0) # verify the number of rows

Expand Down Expand Up @@ -1687,7 +1687,7 @@ def test_to_arrow(self):
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

tbl = row_iterator.to_arrow()
tbl = row_iterator.to_arrow(create_bqstorage_client=False)

self.assertIsInstance(tbl, pyarrow.Table)
self.assertEqual(tbl.num_rows, 2)
Expand Down Expand Up @@ -1737,7 +1737,7 @@ def test_to_arrow_w_nulls(self):
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

tbl = row_iterator.to_arrow()
tbl = row_iterator.to_arrow(create_bqstorage_client=False)

self.assertIsInstance(tbl, pyarrow.Table)
self.assertEqual(tbl.num_rows, 4)
Expand Down Expand Up @@ -1772,7 +1772,7 @@ def test_to_arrow_w_unknown_type(self):
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

tbl = row_iterator.to_arrow()
tbl = row_iterator.to_arrow(create_bqstorage_client=False)

self.assertIsInstance(tbl, pyarrow.Table)
self.assertEqual(tbl.num_rows, 2)
Expand Down Expand Up @@ -1815,7 +1815,7 @@ def test_to_arrow_w_empty_table(self):
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

tbl = row_iterator.to_arrow()
tbl = row_iterator.to_arrow(create_bqstorage_client=False)

self.assertIsInstance(tbl, pyarrow.Table)
self.assertEqual(tbl.num_rows, 0)
Expand Down Expand Up @@ -2059,7 +2059,9 @@ def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_moc

for progress_bar_type, progress_bar_mock in progress_bars:
row_iterator = self._make_one(_mock_client(), api_request, path, schema)
tbl = row_iterator.to_arrow(progress_bar_type=progress_bar_type)
tbl = row_iterator.to_arrow(
progress_bar_type=progress_bar_type, create_bqstorage_client=False,
)

progress_bar_mock.assert_called()
progress_bar_mock().update.assert_called()
Expand Down Expand Up @@ -2231,7 +2233,7 @@ def test_to_dataframe(self):
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

df = row_iterator.to_dataframe()
df = row_iterator.to_dataframe(create_bqstorage_client=False)

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 4) # verify the number of rows
Expand Down Expand Up @@ -2270,7 +2272,9 @@ def test_to_dataframe_progress_bar(

for progress_bar_type, progress_bar_mock in progress_bars:
row_iterator = self._make_one(_mock_client(), api_request, path, schema)
df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type)
df = row_iterator.to_dataframe(
progress_bar_type=progress_bar_type, create_bqstorage_client=False,
)

progress_bar_mock.assert_called()
progress_bar_mock().update.assert_called()
Expand Down Expand Up @@ -2336,7 +2340,7 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self):
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

with warnings.catch_warnings(record=True) as warned:
df = row_iterator.to_dataframe()
df = row_iterator.to_dataframe(create_bqstorage_client=False)

self.assertEqual(len(warned), 0)
self.assertEqual(len(df), 4)
Expand All @@ -2361,7 +2365,9 @@ def test_to_dataframe_no_tqdm(self):
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

with warnings.catch_warnings(record=True) as warned:
df = row_iterator.to_dataframe(progress_bar_type="tqdm")
df = row_iterator.to_dataframe(
progress_bar_type="tqdm", create_bqstorage_client=False,
)

self.assertEqual(len(warned), 1)
for warning in warned:
Expand Down Expand Up @@ -2396,7 +2402,9 @@ def test_to_dataframe_tqdm_error(self):
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

with warnings.catch_warnings(record=True) as warned:
df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type)
df = row_iterator.to_dataframe(
progress_bar_type=progress_bar_type, create_bqstorage_client=False,
)

self.assertEqual(len(df), 4) # all should be well

Expand All @@ -2416,7 +2424,7 @@ def test_to_dataframe_w_empty_results(self):
api_request = mock.Mock(return_value={"rows": []})
row_iterator = self._make_one(_mock_client(), api_request, schema=schema)

df = row_iterator.to_dataframe()
df = row_iterator.to_dataframe(create_bqstorage_client=False)

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 0) # verify the number of rows
Expand Down Expand Up @@ -2474,7 +2482,7 @@ def test_to_dataframe_logs_tabledata_list(self):
)

with mock.patch("google.cloud.bigquery.table._LOGGER", mock_logger):
row_iterator.to_dataframe()
row_iterator.to_dataframe(create_bqstorage_client=False)

mock_logger.debug.assert_any_call(
"Started reading table 'debug-proj.debug_dset.debug_tbl' with tabledata.list."
Expand Down Expand Up @@ -2504,7 +2512,7 @@ def test_to_dataframe_w_various_types_nullable(self):
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

df = row_iterator.to_dataframe()
df = row_iterator.to_dataframe(create_bqstorage_client=False)

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 4) # verify the number of rows
Expand Down Expand Up @@ -2544,7 +2552,9 @@ def test_to_dataframe_column_dtypes(self):
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(_mock_client(), api_request, path, schema)

df = row_iterator.to_dataframe(dtypes={"km": "float16"})
df = row_iterator.to_dataframe(
dtypes={"km": "float16"}, create_bqstorage_client=False,
)

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 3) # verify the number of rows
Expand Down Expand Up @@ -3169,7 +3179,9 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self):
table=mut.Table("proj.dset.tbl"),
)

df = row_iterator.to_dataframe(bqstorage_client=None)
df = row_iterator.to_dataframe(
bqstorage_client=None, create_bqstorage_client=False,
)

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 2)
Expand Down

0 comments on commit 25572ab

Please sign in to comment.