From bd7dbdae5c972b16bafc53c67911eeaa3255a880 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 20 May 2021 09:56:53 +0200 Subject: [PATCH] feat: detect obsolete BQ Storage extra at runtime (#666) * feat: detect obsolete BQ Storage extra at runtime * Cover the changes with unit tests * Skip BQ Storage version tests if extra missing * Rename and improve _create_bqstorage_client() The method is renamed to _ensure_bqstorage_client() and now performs a check if BQ Storage dependency is recent enough. * Remove BQ Storage check from dbapi.Cursor The check is now performed in dbapi.Connection, which is sufficient. * Remove BQ Storage check in _pandas_helpers The methods in higher layers already do the same check before a BQ Storage client instance is passed to _pandas_helpers._download_table_bqstorage() helper. * Simplify BQ Storage client factory in magics Lean more heavily on client._ensure_bqstorage_client() to de-duplicate logic. * Cover missing code lines with tests --- google/cloud/bigquery/__init__.py | 3 + google/cloud/bigquery/_helpers.py | 30 +++++++++ google/cloud/bigquery/client.py | 57 +++++++++++++++-- google/cloud/bigquery/dbapi/connection.py | 6 +- google/cloud/bigquery/exceptions.py | 21 +++++++ google/cloud/bigquery/magics/magics.py | 11 ++-- google/cloud/bigquery/table.py | 14 ++++- tests/unit/test__helpers.py | 38 ++++++++++++ tests/unit/test_client.py | 76 +++++++++++++++++++++-- tests/unit/test_dbapi_connection.py | 20 +++++- tests/unit/test_dbapi_cursor.py | 12 +++- tests/unit/test_magics.py | 44 ++++++++++++- tests/unit/test_table.py | 61 +++++++++++++++--- 13 files changed, 357 insertions(+), 36 deletions(-) create mode 100644 google/cloud/bigquery/exceptions.py diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index f609468f5..ec08b2c84 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -39,6 +39,7 @@ from google.cloud.bigquery import enums from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -152,6 +153,8 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", + # Custom exceptions + "LegacyBigQueryStorageError", ] diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 4fe29291d..7602483c2 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -25,6 +25,10 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes +import pkg_resources + +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -36,6 +40,32 @@ re.VERBOSE, ) +_MIN_BQ_STORAGE_VERSION = pkg_resources.parse_version("2.0.0") + + +def _verify_bq_storage_version(): + """Verify that a recent enough version of BigQuery Storage extra is installed. + + The function assumes that google-cloud-bigquery-storage extra is installed, and + should thus be used in places where this assumption holds. + + Because `pip` can install an outdated version of this extra despite the constraints + in setup.py, the the calling code can use this helper to verify the version + compatibility at runtime. + """ + from google.cloud import bigquery_storage + + installed_version = pkg_resources.parse_version( + getattr(bigquery_storage, "__version__", "legacy") + ) + + if installed_version < _MIN_BQ_STORAGE_VERSION: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, please upgrade " + f"it to version >= 2.0.0 (version found: {installed_version})." + ) + raise LegacyBigQueryStorageError(msg) + def _not_null(value, field): """Check whether 'value' should be coerced to 'field' type.""" diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8d0acb867..7ef3795a8 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -50,16 +50,25 @@ from google.cloud import exceptions # pytype: disable=import-error from google.cloud.client import ClientWithProject # pytype: disable=import-error +try: + from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( + DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, + ) +except ImportError: + DEFAULT_BQSTORAGE_CLIENT_INFO = None + from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery._helpers import _verify_bq_storage_version from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -445,15 +454,38 @@ def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: ) return DatasetReference(project, dataset_id) - def _create_bqstorage_client(self): + def _ensure_bqstorage_client( + self, + bqstorage_client: Optional[ + "google.cloud.bigquery_storage.BigQueryReadClient" + ] = None, + client_options: Optional[google.api_core.client_options.ClientOptions] = None, + client_info: Optional[ + "google.api_core.gapic_v1.client_info.ClientInfo" + ] = DEFAULT_BQSTORAGE_CLIENT_INFO, + ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]: """Create a BigQuery Storage API client using this client's credentials. - If a client cannot be created due to missing dependencies, raise a - warning and return ``None``. + If a client cannot be created due to a missing or outdated dependency + `google-cloud-bigquery-storage`, raise a warning and return ``None``. + + If the `bqstorage_client` argument is not ``None``, still perform the version + check and return the argument back to the caller if the check passes. If it + fails, raise a warning and return ``None``. + + Args: + bqstorage_client: + An existing BigQuery Storage client instance to check for version + compatibility. If ``None``, a new instance is created and returned. + client_options: + Custom options used with a new BigQuery Storage client instance if one + is created. + client_info: + The client info used with a new BigQuery Storage client instance if one + is created. Returns: - Optional[google.cloud.bigquery_storage.BigQueryReadClient]: - A BigQuery Storage API client. + A BigQuery Storage API client. """ try: from google.cloud import bigquery_storage @@ -464,7 +496,20 @@ def _create_bqstorage_client(self): ) return None - return bigquery_storage.BigQueryReadClient(credentials=self._credentials) + try: + _verify_bq_storage_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return None + + if bqstorage_client is None: + bqstorage_client = bigquery_storage.BigQueryReadClient( + credentials=self._credentials, + client_options=client_options, + client_info=client_info, + ) + + return bqstorage_client def _dataset_from_arg(self, dataset): if isinstance(dataset, str): diff --git a/google/cloud/bigquery/dbapi/connection.py b/google/cloud/bigquery/dbapi/connection.py index 459fc82aa..66dee7dfb 100644 --- a/google/cloud/bigquery/dbapi/connection.py +++ b/google/cloud/bigquery/dbapi/connection.py @@ -47,12 +47,14 @@ def __init__(self, client=None, bqstorage_client=None): else: self._owns_client = False + # A warning is already raised by the BQ Storage client factory factory if + # instantiation fails, or if the given BQ Storage client instance is outdated. if bqstorage_client is None: - # A warning is already raised by the factory if instantiation fails. - bqstorage_client = client._create_bqstorage_client() + bqstorage_client = client._ensure_bqstorage_client() self._owns_bqstorage_client = bqstorage_client is not None else: self._owns_bqstorage_client = False + bqstorage_client = client._ensure_bqstorage_client(bqstorage_client) self._client = client self._bqstorage_client = bqstorage_client diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py new file mode 100644 index 000000000..6e5c27eb1 --- /dev/null +++ b/google/cloud/bigquery/exceptions.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BigQueryError(Exception): + """Base class for all custom exceptions defined by the BigQuery client.""" + + +class LegacyBigQueryStorageError(BigQueryError): + """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 474d9a74a..2b8c2928e 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -644,7 +644,7 @@ def _cell_magic(line, query): bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint bqstorage_client = _make_bqstorage_client( - use_bqstorage_api, context.credentials, bqstorage_client_options, + client, use_bqstorage_api, bqstorage_client_options, ) close_transports = functools.partial(_close_transports, client, bqstorage_client) @@ -762,12 +762,12 @@ def _split_args_line(line): return params_option_value, rest_of_args -def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): +def _make_bqstorage_client(client, use_bqstorage_api, client_options): if not use_bqstorage_api: return None try: - from google.cloud import bigquery_storage + from google.cloud import bigquery_storage # noqa: F401 except ImportError as err: customized_error = ImportError( "The default BigQuery Storage API client cannot be used, install " @@ -785,10 +785,9 @@ def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): ) raise customized_error from err - return bigquery_storage.BigQueryReadClient( - credentials=credentials, - client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), + return client._ensure_bqstorage_client( client_options=client_options, + client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index b91c91a39..b12209252 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -41,6 +41,7 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -1519,6 +1520,17 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): ) return False + try: + from google.cloud import bigquery_storage # noqa: F401 + except ImportError: + return False + + try: + _helpers._verify_bq_storage_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return False + return True def _get_next_page_response(self): @@ -1655,7 +1667,7 @@ def to_arrow( owns_bqstorage_client = False if not bqstorage_client and create_bqstorage_client: - bqstorage_client = self.client._create_bqstorage_client() + bqstorage_client = self.client._ensure_bqstorage_client() owns_bqstorage_client = bqstorage_client is not None try: diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 2437f3568..0ac76d424 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,6 +19,44 @@ import mock +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None + + +@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") +class Test_verify_bq_storage_version(unittest.TestCase): + def _call_fut(self): + from google.cloud.bigquery._helpers import _verify_bq_storage_version + + return _verify_bq_storage_version() + + def test_raises_no_error_w_recent_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): + try: + self._call_fut() + except LegacyBigQueryStorageError: # pragma: NO COVER + self.fail("Legacy error raised with a non-legacy dependency version.") + + def test_raises_error_w_legacy_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + with self.assertRaises(LegacyBigQueryStorageError): + self._call_fut() + + def test_raises_error_w_unknown_bqstorage_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: + del fake_module.__version__ + error_pattern = r"version found: legacy" + with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): + self._call_fut() + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 8f535145b..1346a1ef6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -822,7 +822,7 @@ def test_get_dataset(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - def test_create_bqstorage_client(self): + def test_ensure_bqstorage_client_creating_new_instance(self): mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() mock_client.return_value = mock_client_instance @@ -832,12 +832,19 @@ def test_create_bqstorage_client(self): with mock.patch( "google.cloud.bigquery_storage.BigQueryReadClient", mock_client ): - bqstorage_client = client._create_bqstorage_client() + bqstorage_client = client._ensure_bqstorage_client( + client_options=mock.sentinel.client_options, + client_info=mock.sentinel.client_info, + ) self.assertIs(bqstorage_client, mock_client_instance) - mock_client.assert_called_once_with(credentials=creds) + mock_client.assert_called_once_with( + credentials=creds, + client_options=mock.sentinel.client_options, + client_info=mock.sentinel.client_info, + ) - def test_create_bqstorage_client_missing_dependency(self): + def test_ensure_bqstorage_client_missing_dependency(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -850,7 +857,7 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) with no_bqstorage, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._create_bqstorage_client() + bqstorage_client = client._ensure_bqstorage_client() self.assertIsNone(bqstorage_client) matching_warnings = [ @@ -861,6 +868,65 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): ] assert matching_warnings, "Missing dependency warning not raised." + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_obsolete_dependency(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + patcher = mock.patch( + "google.cloud.bigquery.client._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client() + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_existing_client_check_passes(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + mock_storage_client = mock.sentinel.mock_storage_client + + bqstorage_client = client._ensure_bqstorage_client( + bqstorage_client=mock_storage_client + ) + + self.assertIs(bqstorage_client, mock_storage_client) + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_existing_client_check_fails(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + mock_storage_client = mock.sentinel.mock_storage_client + + patcher = mock.patch( + "google.cloud.bigquery.client._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + def test_create_routine_w_minimal_resource(self): from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 74da318bf..0576cad38 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -51,7 +51,7 @@ def test_ctor_wo_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection mock_client = self._mock_client() - mock_client._create_bqstorage_client.return_value = None + mock_client._ensure_bqstorage_client.return_value = None connection = self._make_one(client=mock_client) self.assertIsInstance(connection, Connection) @@ -66,9 +66,15 @@ def test_ctor_w_bqstorage_client(self): mock_client = self._mock_client() mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client + connection = self._make_one( client=mock_client, bqstorage_client=mock_bqstorage_client, ) + + mock_client._ensure_bqstorage_client.assert_called_once_with( + mock_bqstorage_client + ) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @@ -92,9 +98,11 @@ def test_connect_w_client(self): mock_client = self._mock_client() mock_bqstorage_client = self._mock_bqstorage_client() - mock_client._create_bqstorage_client.return_value = mock_bqstorage_client + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = connect(client=mock_client) + + mock_client._ensure_bqstorage_client.assert_called_once_with() self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @@ -108,9 +116,15 @@ def test_connect_w_both_clients(self): mock_client = self._mock_client() mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client + connection = connect( client=mock_client, bqstorage_client=mock_bqstorage_client, ) + + mock_client._ensure_bqstorage_client.assert_called_once_with( + mock_bqstorage_client + ) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @@ -140,7 +154,7 @@ def test_close_closes_all_created_bigquery_clients(self): return_value=client, ) bqstorage_client_patcher = mock.patch.object( - client, "_create_bqstorage_client", return_value=bqstorage_client, + client, "_ensure_bqstorage_client", return_value=bqstorage_client, ) with client_patcher, bqstorage_client_patcher: diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 55e453254..a2d6693d0 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -72,7 +72,7 @@ def _mock_client( mock_client._default_query_job_config = default_query_job_config # Assure that the REST client gets used, not the BQ Storage client. - mock_client._create_bqstorage_client.return_value = None + mock_client._ensure_bqstorage_client.return_value = None return mock_client @@ -311,6 +311,7 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, rows=bqstorage_streamed_rows, ) + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = dbapi.connect( client=mock_client, bqstorage_client=mock_bqstorage_client, @@ -341,6 +342,7 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): mock_client = self._mock_client(rows=[]) mock_bqstorage_client = self._mock_bqstorage_client(stream_count=0) + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = dbapi.connect( client=mock_client, bqstorage_client=mock_bqstorage_client, @@ -365,7 +367,11 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): row_data = [table.Row([1.1, 1.2], {"foo": 0, "bar": 1})] + def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): + return bqstorage_client + mock_client = self._mock_client(rows=row_data) + mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, rows=row_data, ) @@ -396,7 +402,11 @@ def test_fetchall_w_bqstorage_client_no_arrow_compression(self): row_data = [table.Row([1.2, 1.1], {"bar": 1, "foo": 0})] bqstorage_streamed_rows = [{"bar": _to_pyarrow(1.2), "foo": _to_pyarrow(1.1)}] + def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): + return bqstorage_client + mock_client = self._mock_client(rows=row_data) + mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, rows=bqstorage_streamed_rows, ) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index ff41fe720..5e9bf28a9 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -317,7 +317,10 @@ def test__make_bqstorage_client_false(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - got = magics._make_bqstorage_client(False, credentials_mock, {}) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + got = magics._make_bqstorage_client(test_client, False, {}) assert got is None @@ -328,7 +331,10 @@ def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - got = magics._make_bqstorage_client(True, credentials_mock, {}) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + got = magics._make_bqstorage_client(test_client, True, {}) assert isinstance(got, bigquery_storage.BigQueryReadClient) @@ -336,15 +342,46 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) with pytest.raises(ImportError) as exc_context, missing_bq_storage: - magics._make_bqstorage_client(True, credentials_mock, {}) + magics._make_bqstorage_client(test_client, True, {}) error_msg = str(exc_context.value) assert "google-cloud-bigquery-storage" in error_msg assert "pyarrow" in error_msg +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__make_bqstorage_client_true_obsolete_dependency(): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + + patcher = mock.patch( + "google.cloud.bigquery.client._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + got = magics._make_bqstorage_client(test_client, True, {}) + + assert got is None + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @@ -887,6 +924,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): table_id = "bigquery-public-data.samples.shakespeare" with default_patch, client_patch as client_mock, bqstorage_client_patch: + client_mock()._ensure_bqstorage_client.return_value = bqstorage_instance_mock client_mock().list_rows.return_value = row_iterator_mock ip.run_cell_magic("bigquery", "--max_results=5", table_id) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index ce4a15761..0f2ab00c1 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -24,6 +24,7 @@ import pytz import google.api_core.exceptions +from test_utils.imports import maybe_fail_import try: from google.cloud import bigquery_storage @@ -1768,6 +1769,48 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) + def test__validate_bqstorage_returns_false_if_missing_dependency(self): + iterator = self._make_one(first_page_response=None) # not cached + + def fail_bqstorage_import(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) + + with no_bqstorage: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + iterator = self._make_one(first_page_response=None) # not cached + + patcher = mock.patch( + "google.cloud.bigquery.table._helpers._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField @@ -2003,7 +2046,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") - mock_client._create_bqstorage_client.assert_not_called() + mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( @@ -2099,7 +2142,7 @@ def test_to_arrow_w_bqstorage_creates_client(self): bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) - mock_client._create_bqstorage_client.return_value = bqstorage_client + mock_client._ensure_bqstorage_client.return_value = bqstorage_client session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( @@ -2114,11 +2157,11 @@ def test_to_arrow_w_bqstorage_creates_client(self): table=mut.TableReference.from_string("proj.dset.tbl"), ) row_iterator.to_arrow(create_bqstorage_client=True) - mock_client._create_bqstorage_client.assert_called_once() + mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): + def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2133,14 +2176,14 @@ def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() - mock_client._create_bqstorage_client.return_value = None + mock_client._ensure_bqstorage_client.return_value = None row_iterator = self._make_one(mock_client, api_request, path, schema) tbl = row_iterator.to_arrow(create_bqstorage_client=True) # The client attempted to create a BQ Storage client, and even though # that was not possible, results were still returned without errors. - mock_client._create_bqstorage_client.assert_called_once() + mock_client._ensure_bqstorage_client.assert_called_once() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) @@ -2824,7 +2867,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") - mock_client._create_bqstorage_client.assert_not_called() + mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( @@ -2839,7 +2882,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) - mock_client._create_bqstorage_client.return_value = bqstorage_client + mock_client._ensure_bqstorage_client.return_value = bqstorage_client session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( @@ -2854,7 +2897,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self): table=mut.TableReference.from_string("proj.dset.tbl"), ) row_iterator.to_dataframe(create_bqstorage_client=True) - mock_client._create_bqstorage_client.assert_called_once() + mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`")